From 459f6c643cc56b708fad559ba9f708e52c89706d Mon Sep 17 00:00:00 2001 From: unspeaker Date: Sat, 22 Feb 2025 17:14:48 +0200 Subject: [PATCH] disassemble call targets --- crates/vestal/src/main.rs | 352 ++++++++++++++++++++++---------------- crates/vestal/src/util.rs | 3 +- 2 files changed, 204 insertions(+), 151 deletions(-) diff --git a/crates/vestal/src/main.rs b/crates/vestal/src/main.rs index f23f867..a4faaac 100644 --- a/crates/vestal/src/main.rs +++ b/crates/vestal/src/main.rs @@ -26,56 +26,9 @@ fn main () -> Usually<()> { } let path = rebuilder.find(path.to_str().expect("path must be unicode"), false)? .unwrap_or_else(||panic!("Could not find: {path:?}")); - let main = rebuilder.load(&path, true)?; - println!("\n{main}"); - let main = rebuilder.dlls.get(&main).unwrap(); - //println!("{:?}", main.code[0..1024.min(main.code.len())].hex_conf(HexConfig { - //title: false, - //width: 16, - //group: 4, - //chunk: 1, - //display_offset: main.code_base as usize, - //..HexConfig::default() - //})); - for (addr, call) in main.calls_by_source.iter() { - let start = (addr - main.code_base) as usize; - let end = start + call.length; - let mut decoder = Decoder::with_ip(64, &main.code[start..end], 0, DecoderOptions::NONE); - let instruction = decoder.decode(); - let cfg = HexConfig { - title: false, - width: 16, - group: 4, - chunk: 1, - display_offset: main.code_base as usize, - ..HexConfig::default() - }; - let end = start + 16; - let snap = |x|(x/16)*16; - let a = snap(start - 32); - let b = start; - let c = snap(end); - let d = snap(c + 32); - println!("\n{BOLD}0x{addr:x}{RESET} {} {:?} {:?}\n{:?}\n{BOLD}{:?}{RESET}\n{:?}", - instruction, - call.module, - call.method, - &main.code[a..b].hex_conf(HexConfig { display_offset: main.code_base as usize + a, ..cfg }), - &main.code[b..c].hex_conf(HexConfig { display_offset: main.code_base as usize + b, ..cfg }), - &main.code[c..d].hex_conf(HexConfig { display_offset: main.code_base as usize + c, ..cfg })); - if let (Some(module_name), Some(method_name)) = (&call.module, &call.method) { - if let Some(module) = main.deps_by_library.get(module_name) { - if let Some(method) = module.get(method_name) { - println!("0x{method:>08x} {module_name} {method_name}"); - if let Some(dll) = rebuilder.dlls.get(module_name) { - if let Some(address) = dll.exports.get(method_name) { - println!("# found"); - } - } - } - } - } - } + let name = rebuilder.load(&path, true)?; + let main = rebuilder.dlls.get(&name).unwrap(); + rebuilder.resolve_calls(&main, true, true)?; //println!("{:#?}", &main.calls_by_source); for (name, dll) in rebuilder.dlls.iter() { } @@ -99,48 +52,6 @@ struct Rebuilder { visited: BTreeSet>, } -#[derive(Debug)] -struct Dll { - /// Canonical name like `xxx.dll` (always lowercase) - name: Arc, - /// Path to DLL on host filesystem. - path: Arc, - /// Bytes of `#!`-instruction - bang: Arc<[u8]>, - /// Parsed portable executable - pe: Arc, - /// Bytes of `.text` section - code: Arc<[u8]>, - /// Start of `.text` section - code_base: u32, - /// Addresses of imported methods by library - deps_by_library: BTreeMap, BTreeMap, u32>>, - /// Imported methods by address - deps_by_address: BTreeMap, Arc)>, - /// Calls to dependencies by source address - calls_by_source: BTreeMap>, - /// Calls to dependencies by target address - calls_by_target: BTreeMap>>, - /// Addresses of exported methods by name - exports: BTreeMap, u32>, -} - -#[derive(Debug)] -struct Call { - /// Address on disk - offset: u32, - /// Address in memory - source: u32, - /// Length of call in opcodes - length: usize, - /// Call trampoline address - target: u32, - /// Library being called - module: Option>, - /// Method being called - method: Option>, -} - impl Rebuilder { fn new (paths: &[impl AsRef]) -> Self { Self { @@ -179,12 +90,97 @@ impl Rebuilder { if std::fs::exists(&path)? { if verbose { println!("# found {name} at {path:?}"); + } return Ok(Some(canonicalize(&path)?)) } } Ok(None) } + fn resolve_calls (&self, dll: &Dll, recurse: bool, verbose: bool) -> Usually<()> { + for (addr, call) in dll.calls_by_source.iter() { + self.resolve_call(dll, *addr, call, recurse, verbose)?; + } + Ok(()) + } + fn resolve_call ( + &self, dll: &Dll, addr: u32, call: &Arc, recurse: bool, verbose: bool, + ) -> Usually<()> { + let addr = (addr - dll.code_base) as usize; + if verbose { + println!("--------------------------------"); + dll.print_call(addr, call.module.as_ref(), call.method.as_ref()); + dll.print_hex(addr, 1); + } + if let Some(method) = dll.parse_call(call) { + let module_name = call.module.as_ref().unwrap(); + let method_name = call.method.as_ref().unwrap(); + println!("0x{method:>08x} {module_name:20} {method_name}"); + if let Some(path) = self.find(module_name, false)? { + let name = path.file_name().expect("no file name"); + let name: Arc = name.to_str().map(Arc::from).expect("non-unicode filename"); + if let Some(dll) = self.dlls.get(&name) { + if let Some(thunk) = dll.exports.get(method_name) { + if let ThunkData::Function(rva) = thunk { + println!("# found {:?}::{} at 0x{:>08x}", &dll.name, method_name, rva.0); + let addr = (rva.0 - dll.code_base) as usize; + dll.print_call(addr, None, None); + dll.print_hex(addr, 1); + if recurse { + let mut decoder = Decoder::with_ip( + 64, &dll.code[addr..], 0, DecoderOptions::NONE + ); + while decoder.can_decode() { + let position = decoder.position(); + let instruction = decoder.decode(); + //println!("..."); + dll.print_call(addr + position, None, None); + //dll.print_hex(addr, 0); + } + } + //println!("{:?}", &dll.code[addr..addr + 128].hex_dump()); + //if let Some(path) = self.find(&dll.name, true)? { + //println!("# at {path:?}"); + //} else { + //panic!("# not found {:?}", &dll.name); + //} + } else { + panic!("# unsupported {thunk:?}"); + } + } + } + } else { + println!("# not found {call:?}"); + } + } + Ok(()) + } +} + +#[derive(Debug)] +struct Dll { + /// Canonical name like `xxx.dll` (always lowercase) + name: Arc, + /// Path to DLL on host filesystem. + path: Arc, + /// Bytes of `#!`-instruction + bang: Arc<[u8]>, + /// Parsed portable executable + pe: Arc, + /// Bytes of `.text` section + code: Arc<[u8]>, + /// Start of `.text` section + code_base: u32, + /// Addresses of imported methods by library + deps_by_library: BTreeMap, BTreeMap, u32>>, + /// Imported methods by address + deps_by_address: BTreeMap, Arc)>, + /// Calls to dependencies by source address + calls_by_source: BTreeMap>, + /// Calls to dependencies by target address + calls_by_target: BTreeMap>>, + /// Addresses of exported methods by name + exports: BTreeMap, ThunkData>, } impl Dll { @@ -204,13 +200,14 @@ impl Dll { let mut calls_by_target = Default::default(); let mut deps_by_library = Default::default(); let mut deps_by_address = Default::default(); + let exports = Self::exports(&pe).unwrap_or_default(); let (modules_count, methods_count) = Self::deps( &pe, &mut deps_by_library, &mut deps_by_address, false )?; - let calls = Self::calls( + let calls = Call::calls( &name, &pe, start, @@ -233,7 +230,7 @@ impl Dll { deps_by_address: deps_by_address.clone(), calls_by_source, calls_by_target, - exports: Default::default(), + exports, pe, }) } @@ -301,6 +298,117 @@ impl Dll { } Ok((modules, methods)) } + fn exports (pe: &VecPE) -> Usually, ThunkData>> { + Ok(ImageExportDirectory::parse(pe)? + .get_export_map(pe)? + .into_iter() + .map(|(k, v)|(k.into(), v)) + .collect()) + } + fn parse_call (&self, call: &Arc) -> Option { + self.deps_by_library.get(call.module.as_ref()?)?.get(call.method.as_ref()?).map(|x|*x) + } + fn print_call (&self, addr: usize, module: Option<&Arc>, method: Option<&Arc>) { + let mut decoder = Decoder::with_ip(64, &self.code[addr..], 0, DecoderOptions::NONE); + let instruction = decoder.decode(); + let opcodes = &self.code[addr..addr+instruction.len()].iter() + .map(|x|format!("{x:02x}")) + .join(" "); + println!("{BOLD}0x{addr:>08x}{RESET} {:20} {DIM}{opcodes}{RESET} {BOLD}{instruction}{RESET} {module:?} {method:?}", &self.name); + } + fn print_hex (&self, addr: usize, n: usize) { + let cfg = HexConfig {title: false, width: 16, group: 4, chunk: 1, ..HexConfig::default()}; + let snap = |x|(x/16)*16; + let a = snap(addr - 16*n); + let b = addr; + let c = snap(addr + 16); + let d = snap(c + 16*n); + if n > 0 { + println!("{DIM}{:?}{RESET}", &self.code[a..b].hex_conf(HexConfig { + display_offset: self.code_base as usize + a, ..cfg + })); + } + println!("{BOLD}{:?}{RESET}", &self.code[b..c].hex_conf(HexConfig { + display_offset: self.code_base as usize + b, ..cfg + })); + if n > 0 { + println!("{DIM}{:?}{RESET}", &self.code[c..d].hex_conf(HexConfig { + display_offset: self.code_base as usize + c, ..cfg + })); + } + } +} + +#[derive(Debug)] +struct Call { + /// Address on disk + offset: u32, + /// Address in memory + source: u32, + /// Length of call in opcodes + length: usize, + /// Call trampoline address + target: u32, + /// Library being called + module: Option>, + /// Method being called + method: Option>, +} + +impl Call { + fn matches (instruction: &Instruction) -> bool { + instruction.op0_kind() == OpKind::Memory && ( + instruction.flow_control() == FlowControl::IndirectBranch || + instruction.flow_control() == FlowControl::IndirectCall + ) + } + fn skip (opcodes: &[u8]) -> bool { + match opcodes[0] { + 0x41 | 0x42 | 0x43 | 0x49 => match opcodes[1] { + 0xff => return true, + _ => {} + }, + 0x48 => match opcodes[2] { + 0x20 | 0x60 | 0x62 | 0xa0 | 0xa2 => return true, + _ => {} + }, + 0xff => match opcodes[1] { + 0x10 | 0x12 | 0x13 | + 0x50 | 0x51 | 0x52 | 0x53 | 0x54 | 0x55 | 0x56 | 0x57 | + 0x60 | 0x90 | 0x92 | 0x93 | 0x94 | 0x97 => return true, + _ => {} + }, + _ => {} + } + false + } + fn target (opcodes: &[u8], offset_rva: u32) -> Option { + match opcodes[0] { + 0xff => match opcodes[1] { + 0x15 | 0x25 => return Some(offset_rva + opcodes.len() as u32 + u32::from_le_bytes([ + opcodes[2], + opcodes[3], + opcodes[4], + opcodes[5] + ])), + _ => {} + }, + 0x48 => match opcodes[1] { + 0xff => match opcodes[2] { + 0x15 | 0x25 => return Some(offset_rva + opcodes.len() as u32 + u32::from_le_bytes([ + opcodes[3], + opcodes[4], + opcodes[5], + opcodes[6] + ])), + _ => {} + }, + _ => {} + } + _ => {} + } + None + } fn dep_name (deps: Option<&BTreeMap, Arc)>>, target: u32) -> (Option>, Option>, Arc) { @@ -384,59 +492,3 @@ impl Dll { Ok(None) } } - -impl Call { - fn matches (instruction: &Instruction) -> bool { - instruction.op0_kind() == OpKind::Memory && ( - instruction.flow_control() == FlowControl::IndirectBranch || - instruction.flow_control() == FlowControl::IndirectCall - ) - } - fn skip (opcodes: &[u8]) -> bool { - match opcodes[0] { - 0x41 | 0x42 | 0x43 | 0x49 => match opcodes[1] { - 0xff => return true, - _ => {} - }, - 0x48 => match opcodes[2] { - 0x20 | 0x60 | 0x62 | 0xa0 | 0xa2 => return true, - _ => {} - }, - 0xff => match opcodes[1] { - 0x10 | 0x12 | 0x13 | - 0x50 | 0x51 | 0x52 | 0x53 | 0x54 | 0x55 | 0x56 | 0x57 | - 0x60 | 0x90 | 0x92 | 0x93 | 0x94 | 0x97 => return true, - _ => {} - }, - _ => {} - } - false - } - fn target (opcodes: &[u8], offset_rva: u32) -> Option { - match opcodes[0] { - 0xff => match opcodes[1] { - 0x15 | 0x25 => return Some(offset_rva + opcodes.len() as u32 + u32::from_le_bytes([ - opcodes[2], - opcodes[3], - opcodes[4], - opcodes[5] - ])), - _ => {} - }, - 0x48 => match opcodes[1] { - 0xff => match opcodes[2] { - 0x15 | 0x25 => return Some(offset_rva + opcodes.len() as u32 + u32::from_le_bytes([ - opcodes[3], - opcodes[4], - opcodes[5], - opcodes[6] - ])), - _ => {} - }, - _ => {} - } - _ => {} - } - None - } -} diff --git a/crates/vestal/src/util.rs b/crates/vestal/src/util.rs index ca76faa..66eb174 100644 --- a/crates/vestal/src/util.rs +++ b/crates/vestal/src/util.rs @@ -6,7 +6,7 @@ pub(crate) use std::os::unix::fs::OpenOptionsExt; pub(crate) use std::path::{Path, PathBuf}; pub(crate) use std::pin::Pin; pub(crate) use std::sync::Arc; -pub(crate) use itertools::izip; +pub(crate) use itertools::{Itertools, izip}; //pub(crate) use ::lancelot::loader::pe::{PE, reloc::apply_relocations}; //pub(crate) use ::goblin::{error, Object, pe::{import::Import, export::Export}}; pub(crate) use ::object::endian::LittleEndian; @@ -19,6 +19,7 @@ pub(crate) use ::iced_x86::{Encoder, Decoder, DecoderOptions, Instruction, OpKin pub(crate) type Usually = Result>; pub const RESET: &str = "\u{001b}[0m"; pub const BOLD: &str = "\u{001b}[1m"; +pub const DIM: &str = "\u{001b}[2m"; pub enum Verbosity { Silent, Terse,