From 68bd7d46c4c9774676d19c31711f47dad4e46b8b Mon Sep 17 00:00:00 2001
From: dd86k
Date: Mon, 23 Sep 2024 17:13:33 -0400
Subject: [PATCH] disassembler: Add more compatible machine targets, fixes
- machine: Remove "riscv" dummy entry
- dumper: Made --machine switch override object's machine value at disassembly
---
common/cli.d | 20 +++---
dumper/dumper.d | 3 +-
dumper/main.d | 1 -
src/adbg/disassembler.d | 127 +++++++++++++++------------------
src/adbg/include/capstone/v4.d | 32 ++++-----
src/adbg/machines.d | 6 +-
src/adbg/objects/elf.d | 8 ++-
7 files changed, 92 insertions(+), 105 deletions(-)
diff --git a/common/cli.d b/common/cli.d
index c04d9b57..71c405d7 100644
--- a/common/cli.d
+++ b/common/cli.d
@@ -98,24 +98,22 @@ private:
//
int cli_march(const(char) *val) {
- immutable(AdbgMachine)* machines = adbg_dis_machines();
if (wantsHelp(val)) {
puts("Available machine architectures:");
- for (; *machines; ++machines) {
- immutable(adbg_machine_t)* m = adbg_machine(*machines);
- printf("- %-8s %s\n", m.alias1, m.name);
+ immutable(AdbgMachine)* mach = void;
+ for (size_t i; (mach = adbg_dis_machines(i++)) != null;) {
+ immutable(adbg_machine_t)* m = adbg_machine(*mach);
+ printf("- %*s", -8, m.alias1);
+ if (m.alias2) printf(" (%s)", m.alias2);
+ else putchar('\t');
+ printf("\t%s\n", m.name);
}
exit(0);
}
immutable(adbg_machine_t)* m = adbg_machine_select(val);
- // Selected machine is valid
if (m) {
- // Selected machine is in accepted list for disassembler
- for (; *machines; ++machines)
- if (*machines == m.machine) {
- opt_machine = m.machine;
- return EXIT_SUCCESS;
- }
+ opt_machine = m.machine;
+ return EXIT_SUCCESS;
}
return EXIT_FAILURE;
}
diff --git a/dumper/dumper.d b/dumper/dumper.d
index 1316c131..42ef72ca 100644
--- a/dumper/dumper.d
+++ b/dumper/dumper.d
@@ -507,7 +507,8 @@ int dump_disassemble_object(adbg_object_t *o,
}
int dump_disassemble(AdbgMachine machine, void* data, ulong size, ulong base_address) {
- adbg_disassembler_t *dis = adbg_dis_open(machine);
+ // Overrides
+ adbg_disassembler_t *dis = adbg_dis_open(opt_machine ? opt_machine : machine);
if (dis == null)
panic_adbg();
scope(exit) adbg_dis_close(dis);
diff --git a/dumper/main.d b/dumper/main.d
index 957dbc0d..c736b587 100644
--- a/dumper/main.d
+++ b/dumper/main.d
@@ -10,7 +10,6 @@ import adbg.include.c.stdlib : exit;
import adbg.process.base : adbg_process_t;
import adbg.process.exception : adbg_exception_t, adbg_exception_name;
import adbg.self;
-import adbg.machines : adbg_machine_default;
import adbg.disassembler;
import adbg.error;
import core.stdc.stdlib : EXIT_FAILURE;
diff --git a/src/adbg/disassembler.d b/src/adbg/disassembler.d
index 2e867f1d..0b21f2a6 100644
--- a/src/adbg/disassembler.d
+++ b/src/adbg/disassembler.d
@@ -36,8 +36,13 @@ import core.stdc.stdlib : malloc, free;
// IA64: 16 bytes
// Alpha: 4 bytes
+// NOTE: Instruction buffer
+// Some decoders, like Capstone, is keen to go over architectural
+// limits in some cases. Like with x86, where its architectural limit
+// is 15 Bytes, CS might act weird and go over that, so we bump the
+// buffer when this happens.
/// Maximum instruction size in bytes.
-enum MAX_INSTR_SIZE = 16;
+enum MAX_INSTR_SIZE = 24;
version (X86) { // CS_OPT_SYNTAX_DEFAULT
private enum {
@@ -177,71 +182,58 @@ enum AdbgDisOpt {
//mode = 3,
}
+private
+struct dismachine_t {
+ AdbgMachine mach;
+ int cs_arch;
+ int cs_mode;
+}
+
+// TODO: Machine aliases?
+// like sparc8p to sparc9
+private // "tested" here means that the MODE values work for CS with a sample
+immutable dismachine_t[] machmap_capstone = [
+ { AdbgMachine.i8086, CS_ARCH_X86, CS_MODE_16 }, // tested
+ { AdbgMachine.i386, CS_ARCH_X86, CS_MODE_32 }, // tested
+ { AdbgMachine.amd64, CS_ARCH_X86, CS_MODE_64 }, // tested
+ { AdbgMachine.thumb, CS_ARCH_ARM, CS_MODE_THUMB },
+ { AdbgMachine.thumb32, CS_ARCH_ARM, CS_MODE_THUMB | CS_MODE_V8 },
+ { AdbgMachine.arm, CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_V8 }, // tested
+ { AdbgMachine.aarch64, CS_ARCH_ARM64, 0 }, // tested
+ { AdbgMachine.ppc, CS_ARCH_PPC, CS_MODE_32 | CS_MODE_BIG_ENDIAN }, // tested
+ { AdbgMachine.ppcle, CS_ARCH_PPC, CS_MODE_32 | CS_MODE_LITTLE_ENDIAN }, // tested
+ { AdbgMachine.ppc64, CS_ARCH_PPC, CS_MODE_64 | CS_MODE_BIG_ENDIAN },
+ { AdbgMachine.ppc64le, CS_ARCH_PPC, CS_MODE_64 | CS_MODE_LITTLE_ENDIAN },
+ { AdbgMachine.mips, CS_ARCH_MIPS, CS_MODE_32 | CS_MODE_BIG_ENDIAN }, // tested
+ { AdbgMachine.mipsle, CS_ARCH_MIPS, CS_MODE_32 | CS_MODE_LITTLE_ENDIAN },
+ { AdbgMachine.mipsii, CS_ARCH_MIPS, CS_MODE_32 | CS_MODE_MIPS2 | CS_MODE_BIG_ENDIAN },
+ { AdbgMachine.mipsiii, CS_ARCH_MIPS, CS_MODE_32 | CS_MODE_MIPS3 | CS_MODE_BIG_ENDIAN },
+ { AdbgMachine.mipsiv, CS_ARCH_MIPS, CS_MODE_32 | CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN },
+ { AdbgMachine.sparc, CS_ARCH_SPARC, 0 },
+ { AdbgMachine.sparc9, CS_ARCH_SPARC, CS_MODE_V9 }, // tested
+ { AdbgMachine.systemz, CS_ARCH_SYSZ, 0 }, // tested
+];
+
// Platform to CS' ARCH and MODE types
private
int adbg_dis_lib_a2cs(ref int cs_arch, ref int cs_mode, AdbgMachine platform) {
- switch (platform) with (AdbgMachine) {
- case unknown: // No explicit choice, use target defaults
+ if (platform == AdbgMachine.unknown) {
cs_arch = CS_DEFAULT_PLATFORM;
cs_mode = CS_DEFAULT_MODE;
- break;
- //
- // x86
- //
- case i8086:
- cs_arch = CS_ARCH_X86;
- cs_mode = CS_MODE_16;
- break;
- case i386:
- cs_arch = CS_ARCH_X86;
- cs_mode = CS_MODE_32;
- break;
- case amd64:
- cs_arch = CS_ARCH_X86;
- cs_mode = CS_MODE_64;
- break;
- //
- // Arm
- //
- case thumb:
- cs_arch = CS_ARCH_ARM;
- cs_mode = CS_MODE_THUMB;
- break;
- case thumb32:
- cs_arch = CS_ARCH_ARM;
- cs_mode = CS_MODE_THUMB | CS_MODE_V8;
- break;
- case arm:
- cs_arch = CS_ARCH_ARM;
- cs_mode = CS_MODE_ARM | CS_MODE_V8;
- break;
- case aarch64:
- cs_arch = CS_ARCH_ARM64;
- cs_mode = CS_MODE_ARM | CS_MODE_V8;
- break;
- //
- // PowerISA
- //
- case ppc:
- cs_arch = CS_ARCH_PPC;
- cs_mode = CS_MODE_32;
- break;
- case ppc64:
- cs_arch = CS_ARCH_PPC;
- cs_mode = CS_MODE_64;
- break;
- //
- // Others
- //
- default:
- return adbg_oops(AdbgError.disasmUnsupportedMachine);
+ return 0;
}
- return 0;
+ foreach (ref immutable(dismachine_t) dismach; machmap_capstone) {
+ if (platform != dismach.mach)
+ continue;
+ cs_arch = dismach.cs_arch;
+ cs_mode = dismach.cs_mode;
+ return 0;
+ }
+ return adbg_oops(AdbgError.disasmUnsupportedMachine);
}
/// Open a disassembler instance.
-/// Params:
-/// machine = Machine architecture.
+/// Params: machine = Machine architecture.
/// Returns: Error code.
adbg_disassembler_t* adbg_dis_open(AdbgMachine machine = AdbgMachine.unknown) {
//TODO: static if (CAPSTONE_DYNAMIC)
@@ -259,15 +251,15 @@ adbg_disassembler_t* adbg_dis_open(AdbgMachine machine = AdbgMachine.unknown) {
}
if (cs_open(cs_arch, cs_mode, &dasm.cs_handle)) {
- free(dasm);
adbg_oops(AdbgError.libCapstone, &dasm.cs_handle);
+ free(dasm);
return null;
}
dasm.cs_inst = cs_malloc(dasm.cs_handle);
if (dasm.cs_inst == null) {
- free(dasm);
adbg_oops(AdbgError.libCapstone, &dasm.cs_handle);
+ free(dasm);
return null;
}
@@ -290,20 +282,13 @@ void adbg_dis_close(adbg_disassembler_t *dasm) {
free(dasm);
}
+// HACK: Index parameter since I cannot simply give list linearly
/// Returns a null-terminated list of machines that the disassembler supports.
/// Returns: Pointer to null-terminated list.
-immutable(AdbgMachine)* adbg_dis_machines() {
- static immutable AdbgMachine[] mlist = [
- AdbgMachine.i8086,
- AdbgMachine.i386,
- AdbgMachine.amd64,
- AdbgMachine.thumb,
- AdbgMachine.thumb32,
- AdbgMachine.arm,
- AdbgMachine.aarch64,
- AdbgMachine.unknown, // null-terminator
- ];
- return mlist.ptr;
+immutable(AdbgMachine)* adbg_dis_machines(size_t i) {
+ if (i >= machmap_capstone.length)
+ return null;
+ return &machmap_capstone[i].mach;
}
/// Configure an option to the disassembler.
diff --git a/src/adbg/include/capstone/v4.d b/src/adbg/include/capstone/v4.d
index 5f6c7324..53f23be1 100644
--- a/src/adbg/include/capstone/v4.d
+++ b/src/adbg/include/capstone/v4.d
@@ -17,8 +17,6 @@ import adbg.include.capstone.tms320c64x;
import adbg.include.capstone.x86;
import adbg.include.capstone.xcore;
-extern (C):
-
/* Capstone Disassembly Engine */
/* By Nguyen Anh Quynh , 2013-2016 */
@@ -37,11 +35,13 @@ enum CS_VERSION_EXTRA = 0;
/// Macro to create combined version which can be compared to
/// result of cs_version() API.
-extern (D) auto CS_MAKE_VERSION(T0, T1)(auto ref T0 major, auto ref T1 minor)
+template CS_MAKE_VERSION(int T0, int T1)
{
- return (major << 8) + minor;
+ enum CS_MAKE_VERSION = (major << 8) + minor;
}
+extern (C):
+
/// Maximum size of an instruction mnemonic string.
enum CS_MNEMONIC_SIZE = 32;
@@ -51,18 +51,18 @@ alias csh = size_t;
/// Architecture type
enum
{
- CS_ARCH_ARM = 0, ///< ARM architecture (including Thumb, Thumb-2)
- CS_ARCH_ARM64 = 1, ///< ARM-64, also called AArch64
- CS_ARCH_MIPS = 2, ///< Mips architecture
- CS_ARCH_X86 = 3, ///< X86 architecture (including x86 & x86-64)
- CS_ARCH_PPC = 4, ///< PowerPC architecture
- CS_ARCH_SPARC = 5, ///< Sparc architecture
- CS_ARCH_SYSZ = 6, ///< SystemZ architecture
- CS_ARCH_XCORE = 7, ///< XCore architecture
- CS_ARCH_M68K = 8, ///< 68K architecture
- CS_ARCH_TMS320C64X = 9, ///< TMS320C64x architecture
- CS_ARCH_M680X = 10, ///< 680X architecture
- CS_ARCH_EVM = 11, ///< Ethereum architecture
+ CS_ARCH_ARM = 0, /// ARM architecture (including Thumb, Thumb-2)
+ CS_ARCH_ARM64 = 1, /// ARM-64, also called AArch64
+ CS_ARCH_MIPS = 2, /// Mips architecture
+ CS_ARCH_X86 = 3, /// X86 architecture (including x86 & x86-64)
+ CS_ARCH_PPC = 4, /// PowerPC architecture
+ CS_ARCH_SPARC = 5, /// Sparc architecture
+ CS_ARCH_SYSZ = 6, /// SystemZ architecture
+ CS_ARCH_XCORE = 7, /// XCore architecture
+ CS_ARCH_M68K = 8, /// 68K architecture
+ CS_ARCH_TMS320C64X = 9, /// TMS320C64x architecture
+ CS_ARCH_M680X = 10, /// 680X architecture
+ CS_ARCH_EVM = 11, /// Ethereum architecture
CS_ARCH_MAX = 12,
CS_ARCH_ALL = 0xFFFF // All architectures - for cs_support()
}
diff --git a/src/adbg/machines.d b/src/adbg/machines.d
index c0243a83..3e29694e 100644
--- a/src/adbg/machines.d
+++ b/src/adbg/machines.d
@@ -85,9 +85,7 @@ enum AdbgMachine {
sparc8p,
/// SPARC Version 9
sparc9,
-
- /// RISC-V (any)
- riscv,
+
/// RISC-V RV32
riscv32,
/// RISC-V RV64
@@ -528,7 +526,6 @@ immutable adbg_machine_t[] machines = [
{ AdbgMachine.sparc9, "sparc9", "sparc64", "SPARC Version 9" },
// RISC-V
- { AdbgMachine.riscv, "riscv", null, "RISC-V" },
{ AdbgMachine.riscv32, "riscv32", null, "RISC-V 32-bit" },
{ AdbgMachine.riscv64, "riscv64", null, "RISC-V 64-bit" },
{ AdbgMachine.riscv128, "riscv128", null, "RISC-V 128-bit" },
@@ -858,4 +855,5 @@ immutable(adbg_machine_t)* adbg_machine_select(const(char) *alias_) {
assert(adbg_machine_select("i386").machine == AdbgMachine.i386);
assert(adbg_machine_select("amd64").machine == AdbgMachine.amd64);
assert(adbg_machine_select("mips").machine == AdbgMachine.mips);
+ assert(adbg_machine_select("sparc64").machine == AdbgMachine.sparc9);
}
\ No newline at end of file
diff --git a/src/adbg/objects/elf.d b/src/adbg/objects/elf.d
index f759c474..7fe086b6 100644
--- a/src/adbg/objects/elf.d
+++ b/src/adbg/objects/elf.d
@@ -1797,7 +1797,13 @@ AdbgMachine adbg_object_elf_machine(adbg_object_t *o) {
case ELF_EM_FT32: return AdbgMachine.ftdi;
case ELF_EM_MOXIE: return AdbgMachine.moxie;
case ELF_EM_AMDGPU: return AdbgMachine.amdgpu;
- case ELF_EM_RISCV: return AdbgMachine.riscv;
+ case ELF_EM_RISCV:
+ switch (internal.eheader32.e_ident[ELF_EI_CLASS]) {
+ case ELF_CLASS_32: return AdbgMachine.riscv32;
+ case ELF_CLASS_64: return AdbgMachine.riscv64;
+ default:
+ }
+ goto default;
case ELF_EM_LOONGARCH:
switch (internal.eheader32.e_ident[ELF_EI_CLASS]) {
case ELF_CLASS_32: return AdbgMachine.loongarch32;