impr: Refactor disassembler system to make it more modular

This commit is contained in:
WerWolv
2024-12-26 18:41:34 +01:00
parent f11205bba7
commit a76c6c653d
11 changed files with 570 additions and 323 deletions

View File

@@ -47,19 +47,8 @@ namespace hex::plugin::disasm {
return;
m_disassemblerTask = TaskManager::createTask("hex.disassembler.view.disassembler.disassembling"_lang, m_regionToDisassemble.getSize(), [this](auto &task) {
csh capstoneHandle;
cs_mode mode = m_mode;
// Create a capstone disassembler instance
if (cs_open(Disassembler::toCapstoneArchitecture(m_architecture), mode, &capstoneHandle) == CS_ERR_OK) {
auto *instruction = cs_malloc(capstoneHandle);
ON_SCOPE_EXIT { cs_free(instruction, 1); };
// Tell capstone to skip data bytes
cs_option(capstoneHandle, CS_OPT_SKIPDATA, CS_OPT_ON);
if (m_currArchitecture->start()) {
auto provider = ImHexApi::Provider::get();
std::vector<u8> buffer(1_MiB, 0x00);
@@ -75,34 +64,32 @@ namespace hex::plugin::disasm {
size_t bufferSize = std::min<u64>(buffer.size(), (m_regionToDisassemble.getEndAddress() - instructionDataAddress));
provider->read(instructionDataAddress, buffer.data(), bufferSize);
auto code = std::span(buffer.data(), bufferSize);
// Ask capstone to disassemble the data
const u8 *code = buffer.data();
while (cs_disasm_iter(capstoneHandle, &code, &bufferSize, &instructionLoadAddress, instruction)) {
while (true) {
auto instruction = m_currArchitecture->disassemble(m_imageBaseAddress, instructionLoadAddress, instructionDataAddress, code);
if (!instruction.has_value())
break;
task.update(instructionDataAddress);
// Convert the capstone instructions to our disassembly format
Disassembly disassembly = { };
disassembly.address = instruction->address;
disassembly.offset = instructionDataAddress - m_imageBaseAddress;
disassembly.size = instruction->size;
disassembly.mnemonic = instruction->mnemonic;
disassembly.operators = instruction->op_str;
for (u16 j = 0; j < instruction->size; j++)
disassembly.bytes += hex::format("{0:02X} ", instruction->bytes[j]);
disassembly.bytes.pop_back();
m_disassembly.push_back(disassembly);
m_disassembly.push_back(instruction.value());
code = code.subspan(instruction->size);
instructionDataAddress += instruction->size;
instructionLoadAddress += instruction->size;
hadError = false;
if (code.empty())
break;
}
if (hadError) break;
hadError = true;
}
cs_close(&capstoneHandle);
m_currArchitecture->end();
}
});
}
@@ -122,15 +109,15 @@ namespace hex::plugin::disasm {
}
// As disassembly code can be quite long, we prefer writing each disassembled instruction to file
for (const Disassembly& d : m_disassembly) {
for (const ContentRegistry::Disassembler::Instruction& instruction : m_disassembly) {
// We test for a "bugged" case that should never happen - the instruction should always have a mnemonic
if (d.mnemonic.empty())
if (instruction.mnemonic.empty())
continue;
if (d.operators.empty())
file.writeString(hex::format("{}\n", d.mnemonic));
if (instruction.operators.empty())
file.writeString(hex::format("{}\n", instruction.mnemonic));
else
file.writeString(hex::format("{} {}\n", d.mnemonic, d.operators));
file.writeString(hex::format("{} {}\n", instruction.mnemonic, instruction.operators));
}
});
});
@@ -164,278 +151,29 @@ namespace hex::plugin::disasm {
ImGuiExt::Header("hex.ui.common.settings"_lang);
// Draw architecture selector
if (ImGui::Combo("hex.disassembler.view.disassembler.arch"_lang, reinterpret_cast<int *>(&m_architecture), Disassembler::ArchitectureNames.data(), Disassembler::getArchitectureSupportedCount()))
m_mode = cs_mode(0);
// Draw sub-settings for each architecture
if (ImGuiExt::BeginBox()) {
// Draw endian radio buttons. This setting is available for all architectures
static int littleEndian = true;
ImGui::RadioButton("hex.ui.common.little_endian"_lang, &littleEndian, true);
ImGui::SameLine();
ImGui::RadioButton("hex.ui.common.big_endian"_lang, &littleEndian, false);
ImGui::NewLine();
// Draw architecture specific settings
switch (m_architecture) {
case Architecture::ARM:
{
static int mode = CS_MODE_ARM;
ImGui::RadioButton("hex.disassembler.view.disassembler.arm.arm"_lang, &mode, CS_MODE_ARM);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.arm.thumb"_lang, &mode, CS_MODE_THUMB);
static int extraMode = 0;
ImGui::RadioButton("hex.disassembler.view.disassembler.arm.default"_lang, &extraMode, 0);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.arm.cortex_m"_lang, &extraMode, CS_MODE_MCLASS);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.arm.armv8"_lang, &extraMode, CS_MODE_V8);
m_mode = cs_mode(mode | extraMode);
}
break;
case Architecture::MIPS:
{
static int mode = CS_MODE_MIPS32;
ImGui::RadioButton("hex.disassembler.view.disassembler.mips.mips32"_lang, &mode, CS_MODE_MIPS32);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.mips.mips64"_lang, &mode, CS_MODE_MIPS64);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.mips.mips32R6"_lang, &mode, CS_MODE_MIPS32R6);
ImGui::RadioButton("hex.disassembler.view.disassembler.mips.mips2"_lang, &mode, CS_MODE_MIPS2);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.mips.mips3"_lang, &mode, CS_MODE_MIPS3);
static bool microMode;
ImGui::Checkbox("hex.disassembler.view.disassembler.mips.micro"_lang, &microMode);
m_mode = cs_mode(mode | (microMode ? CS_MODE_MICRO : cs_mode(0)));
}
break;
case Architecture::X86:
{
static int mode = CS_MODE_32;
ImGui::RadioButton("hex.disassembler.view.disassembler.16bit"_lang, &mode, CS_MODE_16);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.32bit"_lang, &mode, CS_MODE_32);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.64bit"_lang, &mode, CS_MODE_64);
m_mode = cs_mode(mode);
}
break;
case Architecture::PPC:
{
static int mode = CS_MODE_32;
ImGui::RadioButton("hex.disassembler.view.disassembler.32bit"_lang, &mode, CS_MODE_32);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.64bit"_lang, &mode, CS_MODE_64);
static bool qpx = false;
ImGui::Checkbox("hex.disassembler.view.disassembler.ppc.qpx"_lang, &qpx);
#if CS_API_MAJOR >= 5
static bool spe = false;
ImGui::Checkbox("hex.disassembler.view.disassembler.ppc.spe"_lang, &spe);
static bool booke = false;
ImGui::Checkbox("hex.disassembler.view.disassembler.ppc.booke"_lang, &booke);
m_mode = cs_mode(mode | (qpx ? CS_MODE_QPX : cs_mode(0)) | (spe ? CS_MODE_SPE : cs_mode(0)) | (booke ? CS_MODE_BOOKE : cs_mode(0)));
#else
m_mode = cs_mode(mode | (qpx ? CS_MODE_QPX : cs_mode(0)));
#endif
}
break;
case Architecture::SPARC:
{
static bool v9Mode = false;
ImGui::Checkbox("hex.disassembler.view.disassembler.sparc.v9"_lang, &v9Mode);
m_mode = cs_mode(v9Mode ? CS_MODE_V9 : cs_mode(0));
}
break;
#if CS_API_MAJOR >= 5
case Architecture::RISCV:
{
static int mode = CS_MODE_RISCV32;
ImGui::RadioButton("hex.disassembler.view.disassembler.32bit"_lang, &mode, CS_MODE_RISCV32);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.64bit"_lang, &mode, CS_MODE_RISCV64);
static bool compressed = false;
ImGui::Checkbox("hex.disassembler.view.disassembler.riscv.compressed"_lang, &compressed);
m_mode = cs_mode(mode | (compressed ? CS_MODE_RISCVC : cs_mode(0)));
}
break;
#endif
case Architecture::M68K:
{
static int selectedMode = 0;
std::pair<const char *, cs_mode> modes[] = {
{"hex.disassembler.view.disassembler.m68k.000"_lang, CS_MODE_M68K_000},
{ "hex.disassembler.view.disassembler.m68k.010"_lang, CS_MODE_M68K_010},
{ "hex.disassembler.view.disassembler.m68k.020"_lang, CS_MODE_M68K_020},
{ "hex.disassembler.view.disassembler.m68k.030"_lang, CS_MODE_M68K_030},
{ "hex.disassembler.view.disassembler.m68k.040"_lang, CS_MODE_M68K_040},
{ "hex.disassembler.view.disassembler.m68k.060"_lang, CS_MODE_M68K_060},
};
if (ImGui::BeginCombo("hex.disassembler.view.disassembler.settings.mode"_lang, modes[selectedMode].first)) {
for (u32 i = 0; i < IM_ARRAYSIZE(modes); i++) {
if (ImGui::Selectable(modes[i].first))
selectedMode = i;
}
ImGui::EndCombo();
}
m_mode = cs_mode(modes[selectedMode].second);
}
break;
case Architecture::M680X:
{
static int selectedMode = 0;
std::pair<const char *, cs_mode> modes[] = {
{"hex.disassembler.view.disassembler.m680x.6301"_lang, CS_MODE_M680X_6301 },
{ "hex.disassembler.view.disassembler.m680x.6309"_lang, CS_MODE_M680X_6309 },
{ "hex.disassembler.view.disassembler.m680x.6800"_lang, CS_MODE_M680X_6800 },
{ "hex.disassembler.view.disassembler.m680x.6801"_lang, CS_MODE_M680X_6801 },
{ "hex.disassembler.view.disassembler.m680x.6805"_lang, CS_MODE_M680X_6805 },
{ "hex.disassembler.view.disassembler.m680x.6808"_lang, CS_MODE_M680X_6808 },
{ "hex.disassembler.view.disassembler.m680x.6809"_lang, CS_MODE_M680X_6809 },
{ "hex.disassembler.view.disassembler.m680x.6811"_lang, CS_MODE_M680X_6811 },
{ "hex.disassembler.view.disassembler.m680x.cpu12"_lang, CS_MODE_M680X_CPU12},
{ "hex.disassembler.view.disassembler.m680x.hcs08"_lang, CS_MODE_M680X_HCS08},
};
if (ImGui::BeginCombo("hex.disassembler.view.disassembler.settings.mode"_lang, modes[selectedMode].first)) {
for (u32 i = 0; i < IM_ARRAYSIZE(modes); i++) {
if (ImGui::Selectable(modes[i].first))
selectedMode = i;
}
ImGui::EndCombo();
}
m_mode = cs_mode(modes[selectedMode].second);
}
break;
#if CS_API_MAJOR >= 5
case Architecture::MOS65XX:
{
static int selectedMode = 0;
std::pair<const char *, cs_mode> modes[] = {
{"hex.disassembler.view.disassembler.mos65xx.6502"_lang, CS_MODE_MOS65XX_6502 },
{ "hex.disassembler.view.disassembler.mos65xx.65c02"_lang, CS_MODE_MOS65XX_65C02 },
{ "hex.disassembler.view.disassembler.mos65xx.w65c02"_lang, CS_MODE_MOS65XX_W65C02 },
{ "hex.disassembler.view.disassembler.mos65xx.65816"_lang, CS_MODE_MOS65XX_65816 },
{ "hex.disassembler.view.disassembler.mos65xx.65816_long_m"_lang, CS_MODE_MOS65XX_65816_LONG_M },
{ "hex.disassembler.view.disassembler.mos65xx.65816_long_x"_lang, CS_MODE_MOS65XX_65816_LONG_X },
{ "hex.disassembler.view.disassembler.mos65xx.65816_long_mx"_lang, CS_MODE_MOS65XX_65816_LONG_MX},
};
if (ImGui::BeginCombo("hex.disassembler.view.disassembler.settings.mode"_lang, modes[selectedMode].first)) {
for (u32 i = 0; i < IM_ARRAYSIZE(modes); i++) {
if (ImGui::Selectable(modes[i].first))
selectedMode = i;
}
ImGui::EndCombo();
}
m_mode = cs_mode(modes[selectedMode].second);
}
break;
#endif
#if CS_API_MAJOR >= 5
case Architecture::BPF:
{
static int mode = CS_MODE_BPF_CLASSIC;
ImGui::RadioButton("hex.disassembler.view.disassembler.bpf.classic"_lang, &mode, CS_MODE_BPF_CLASSIC);
ImGui::SameLine();
ImGui::RadioButton("hex.disassembler.view.disassembler.bpf.extended"_lang, &mode, CS_MODE_BPF_EXTENDED);
m_mode = cs_mode(mode);
}
break;
case Architecture::SH:
{
static u32 selectionMode = 0;
static bool fpu = false;
static bool dsp = false;
std::pair<const char*, cs_mode> modes[] = {
{ "hex.disassembler.view.disassembler.sh.sh2"_lang, CS_MODE_SH2 },
{ "hex.disassembler.view.disassembler.sh.sh2a"_lang, CS_MODE_SH2A },
{ "hex.disassembler.view.disassembler.sh.sh3"_lang, CS_MODE_SH3 },
{ "hex.disassembler.view.disassembler.sh.sh4"_lang, CS_MODE_SH4 },
{ "hex.disassembler.view.disassembler.sh.sh4a"_lang, CS_MODE_SH4A },
};
if (ImGui::BeginCombo("hex.disassembler.view.disassembler.settings.mode"_lang, modes[selectionMode].first)) {
for (u32 i = 0; i < IM_ARRAYSIZE(modes); i++) {
if (ImGui::Selectable(modes[i].first))
selectionMode = i;
}
ImGui::EndCombo();
}
ImGui::Checkbox("hex.disassembler.view.disassembler.sh.fpu"_lang, &fpu);
ImGui::SameLine();
ImGui::Checkbox("hex.disassembler.view.disassembler.sh.dsp"_lang, &dsp);
m_mode = cs_mode(modes[selectionMode].second | (fpu ? CS_MODE_SHFPU : cs_mode(0)) | (dsp ? CS_MODE_SHDSP : cs_mode(0)));
}
break;
case Architecture::TRICORE:
{
static u32 selectionMode = 0;
std::pair<const char*, cs_mode> modes[] = {
{ "hex.disassembler.view.disassembler.tricore.110"_lang, CS_MODE_TRICORE_110 },
{ "hex.disassembler.view.disassembler.tricore.120"_lang, CS_MODE_TRICORE_120 },
{ "hex.disassembler.view.disassembler.tricore.130"_lang, CS_MODE_TRICORE_130 },
{ "hex.disassembler.view.disassembler.tricore.131"_lang, CS_MODE_TRICORE_131 },
{ "hex.disassembler.view.disassembler.tricore.160"_lang, CS_MODE_TRICORE_160 },
{ "hex.disassembler.view.disassembler.tricore.161"_lang, CS_MODE_TRICORE_161 },
{ "hex.disassembler.view.disassembler.tricore.162"_lang, CS_MODE_TRICORE_162 },
};
if (ImGui::BeginCombo("hex.disassembler.view.disassembler.settings.mode"_lang, modes[selectionMode].first)) {
for (u32 i = 0; i < IM_ARRAYSIZE(modes); i++) {
if (ImGui::Selectable(modes[i].first))
selectionMode = i;
}
ImGui::EndCombo();
}
m_mode = cs_mode(modes[selectionMode].second);
}
break;
case Architecture::WASM:
#endif
case Architecture::EVM:
case Architecture::TMS320C64X:
case Architecture::ARM64:
case Architecture::SYSZ:
case Architecture::XCORE:
m_mode = cs_mode(0);
break;
const auto &architectures = ContentRegistry::Disassembler::impl::getArchitectures();
if (architectures.empty()) {
ImGuiExt::TextSpinner("hex.disassembler.view.disassembler.arch"_lang);
} else {
if (m_currArchitecture == nullptr) {
m_currArchitecture = architectures.begin()->second();
}
if (littleEndian) {
m_mode = cs_mode(u32(m_mode) | CS_MODE_LITTLE_ENDIAN);
} else {
m_mode = cs_mode(u32(m_mode) | CS_MODE_BIG_ENDIAN);
if (ImGui::BeginCombo("hex.disassembler.view.disassembler.arch"_lang, m_currArchitecture->getName().c_str())) {
for (const auto &[name, creator] : architectures) {
if (ImGui::Selectable(name.c_str(), name == m_currArchitecture->getName())) {
m_currArchitecture = creator();
}
}
ImGui::EndCombo();
}
// Draw sub-settings for each architecture
if (ImGuiExt::BeginBox()) {
m_currArchitecture->drawSettings();
}
ImGuiExt::EndBox();
}
ImGuiExt::EndBox();
}
// Draw disassemble button