feat: Added various custom built-in types to the pattern language (#1991)

This commit is contained in:
Nik
2024-12-14 16:52:36 +01:00
committed by GitHub
parent 13e079d1b8
commit 040a606b39
14 changed files with 618 additions and 13 deletions

View File

@@ -24,6 +24,7 @@ add_imhex_plugin(
source/content/views/view_disassembler.cpp
source/content/pl_visualizers/disassembler.cpp
source/content/pl_builtin_types.cpp
INCLUDES
include
${CAPSTONE_INCLUDE_DIR}

View File

@@ -5,6 +5,7 @@
#include <array>
#include <capstone/capstone.h>
#include <wolv/utils/string.hpp>
namespace hex::plugin::disasm {
@@ -91,5 +92,174 @@ namespace hex::plugin::disasm {
return supportedCount;
}
// string has to be in the form of `arch;option1,option2,option3,no-option4`
// Not all results might make sense for capstone
static std::pair<cs_arch, cs_mode> stringToSettings(std::string_view string) {
const auto archSeparator = string.find_first_of(';');
std::string_view archName;
std::string_view options;
if (archSeparator == std::string_view::npos) {
archName = wolv::util::trim(string);
options = "";
} else {
archName = wolv::util::trim(string.substr(0, archSeparator - 1));
options = wolv::util::trim(string.substr(archSeparator + 1));
}
u32 arch = {};
u32 mode = {};
if (archName.ends_with("be") || archName.ends_with("eb")) {
mode |= CS_MODE_BIG_ENDIAN;
archName.remove_suffix(2);
} else if (archName.ends_with("le") || archName.ends_with("el")) {
mode |= CS_MODE_LITTLE_ENDIAN;
archName.remove_suffix(2);
}
if (equalsIgnoreCase(archName, "arm")) {
arch = CS_ARCH_ARM;
mode |= CS_MODE_ARM;
}
else if (equalsIgnoreCase(archName, "thumb")) {
arch = CS_ARCH_ARM;
mode |= CS_MODE_THUMB;
}
else if (equalsIgnoreCase(archName, "aarch64") || equalsIgnoreCase(archName, "arm64"))
arch = CS_ARCH_ARM64;
else if (equalsIgnoreCase(archName, "mips"))
arch = CS_ARCH_MIPS;
else if (equalsIgnoreCase(archName, "x86"))
arch = CS_ARCH_X86;
else if (equalsIgnoreCase(archName, "x86_64") || equalsIgnoreCase(archName, "x64")) {
arch = CS_ARCH_X86;
mode = CS_MODE_64;
}
else if (equalsIgnoreCase(archName, "ppc") || equalsIgnoreCase(archName, "powerpc"))
arch = CS_ARCH_PPC;
else if (equalsIgnoreCase(archName, "sparc"))
arch = CS_ARCH_SPARC;
else if (equalsIgnoreCase(archName, "sysz"))
arch = CS_ARCH_SYSZ;
else if (equalsIgnoreCase(archName, "xcore"))
arch = CS_ARCH_XCORE;
else if (equalsIgnoreCase(archName, "m68k"))
arch = CS_ARCH_M68K;
else if (equalsIgnoreCase(archName, "m680x"))
arch = CS_ARCH_M680X;
else if (equalsIgnoreCase(archName, "tms320c64x"))
arch = CS_ARCH_TMS320C64X;
else if (equalsIgnoreCase(archName, "evm"))
arch = CS_ARCH_EVM;
#if CS_API_MAJOR >= 5
else if (equalsIgnoreCase(archName, "wasm"))
arch = CS_ARCH_WASM;
else if (equalsIgnoreCase(archName, "riscv"))
arch = CS_ARCH_RISCV;
else if (equalsIgnoreCase(archName, "mos65xx"))
arch = CS_ARCH_MOS65XX;
else if (equalsIgnoreCase(archName, "bpf"))
arch = CS_ARCH_BPF;
else if (equalsIgnoreCase(archName, "sh"))
arch = CS_ARCH_SH;
else if (equalsIgnoreCase(archName, "tricore"))
arch = CS_ARCH_TRICORE;
#endif
else
throw std::runtime_error("Invalid disassembler architecture");
while (!options.empty()) {
std::string_view option;
auto separatorPos = options.find_first_of(',');
if (separatorPos == std::string_view::npos)
option = options;
else
option = options.substr(0, separatorPos - 1);
options.remove_prefix(option.size() + 1);
option = wolv::util::trim(option);
bool shouldAdd = true;
if (option.starts_with("no-")) {
shouldAdd = false;
option.remove_prefix(3);
}
constexpr static std::array<std::pair<std::string_view, cs_mode>, 53> Options = {{
{ "16bit", CS_MODE_16 },
{ "32bit", CS_MODE_32 },
{ "64bit", CS_MODE_64 },
{ "cortex-m", CS_MODE_MCLASS },
{ "armv8", CS_MODE_V8 },
{ "micromips", CS_MODE_MICRO },
{ "mips2", CS_MODE_MIPS2 },
{ "mips3", CS_MODE_MIPS3 },
{ "mips32r6", CS_MODE_MIPS32R6 },
{ "sparcv9", CS_MODE_V9 },
{ "qpx", CS_MODE_QPX },
{ "spe", CS_MODE_SPE },
{ "ps", CS_MODE_PS },
{ "68000", CS_MODE_M68K_000 },
{ "68010", CS_MODE_M68K_010 },
{ "68020", CS_MODE_M68K_020 },
{ "68030", CS_MODE_M68K_030 },
{ "68040", CS_MODE_M68K_040 },
{ "68060", CS_MODE_M68K_060 },
{ "6301", CS_MODE_M680X_6301 },
{ "6309", CS_MODE_M680X_6309 },
{ "6800", CS_MODE_M680X_6800 },
{ "6801", CS_MODE_M680X_6801 },
{ "6805", CS_MODE_M680X_6805 },
{ "6808", CS_MODE_M680X_6808 },
{ "6809", CS_MODE_M680X_6809 },
{ "6811", CS_MODE_M680X_6811 },
{ "cpu12", CS_MODE_M680X_CPU12 },
{ "hcs08", CS_MODE_M680X_HCS08 },
{ "bpfe", CS_MODE_BPF_EXTENDED },
{ "rv32g", CS_MODE_RISCV32 },
{ "rv64g", CS_MODE_RISCV64 },
{ "riscvc", CS_MODE_RISCVC },
{ "6502", CS_MODE_MOS65XX_6502 },
{ "65c02", CS_MODE_MOS65XX_65C02 },
{ "w65c02", CS_MODE_MOS65XX_W65C02 },
{ "65816", CS_MODE_MOS65XX_65816 },
{ "long-m", CS_MODE_MOS65XX_65816_LONG_M },
{ "long-x", CS_MODE_MOS65XX_65816_LONG_X },
{ "sh2", CS_MODE_SH2 },
{ "sh2a", CS_MODE_SH2A },
{ "sh3", CS_MODE_SH3 },
{ "sh4", CS_MODE_SH4 },
{ "sh4a", CS_MODE_SH4A },
{ "shfpu", CS_MODE_SHFPU },
{ "shdsp", CS_MODE_SHDSP },
{ "1.1", CS_MODE_TRICORE_110 },
{ "1.2", CS_MODE_TRICORE_120 },
{ "1.3", CS_MODE_TRICORE_130 },
{ "1.3.1", CS_MODE_TRICORE_131 },
{ "1.6", CS_MODE_TRICORE_160 },
{ "1.6.1", CS_MODE_TRICORE_161 },
{ "1.6.2", CS_MODE_TRICORE_162 },
}};
bool optionFound = false;
for (const auto &[optionName, optionValue] : Options) {
if (equalsIgnoreCase(option, optionName)) {
if (shouldAdd) mode |= optionValue;
else mode &= ~optionValue;
optionFound = true;
break;
}
}
if (!optionFound) {
throw std::runtime_error(fmt::format("Unknown disassembler option '{}'", option));
}
}
return { cs_arch(arch), cs_mode(mode) };
}
};
}

View File

@@ -0,0 +1,129 @@
#include <hex/api/imhex_api.hpp>
#include <hex/api/content_registry.hpp>
#include <hex/helpers/http_requests.hpp>
#include <hex/helpers/utils.hpp>
#include <hex/helpers/fmt.hpp>
#include <pl/core/evaluator.hpp>
#include <pl/patterns/pattern.hpp>
#include <capstone/capstone.h>
#include <content/helpers/disassembler.hpp>
namespace hex::plugin::disasm {
class PatternInstruction : public pl::ptrn::Pattern {
public:
PatternInstruction(pl::core::Evaluator *evaluator, u64 offset, size_t size, u32 line)
: Pattern(evaluator, offset, size, line) { }
[[nodiscard]] std::unique_ptr<Pattern> clone() const override {
return std::unique_ptr<Pattern>(new PatternInstruction(*this));
}
[[nodiscard]] std::string getFormattedName() const override {
return this->getTypeName();
}
[[nodiscard]] bool operator==(const Pattern &other) const override { return compareCommonProperties<decltype(*this)>(other); }
void accept(pl::PatternVisitor &v) override {
v.visit(*this);
}
std::vector<pl::u8> getRawBytes() override {
std::vector<u8> result;
result.resize(this->getSize());
this->getEvaluator()->readData(this->getOffset(), result.data(), result.size(), this->getSection());
if (this->getEndian() != std::endian::native)
std::reverse(result.begin(), result.end());
return result;
}
void setInstructionString(std::string instructionString) {
m_instructionString = std::move(instructionString);
}
protected:
[[nodiscard]] std::string formatDisplayValue() override {
return m_instructionString;
}
private:
std::string m_instructionString;
};
void registerPatternLanguageTypes() {
using namespace pl::core;
using FunctionParameterCount = pl::api::FunctionParameterCount;
{
const pl::api::Namespace nsHexDec = { "builtin", "hex", "dec" };
/* Json<data_pattern> */
ContentRegistry::PatternLanguage::addType(nsHexDec, "Instruction", FunctionParameterCount::exactly(3), [](Evaluator *evaluator, auto params) -> std::unique_ptr<pl::ptrn::Pattern> {
cs_arch arch;
cs_mode mode;
try {
std::tie(arch, mode) = Disassembler::stringToSettings(params[0].toString());
} catch (const std::exception &e) {
err::E0012.throwError(e.what());
}
const auto syntaxString = params[1].toString();
const auto relocatedAddress = params[2].toUnsigned();
const auto address = evaluator->getReadOffset();
csh capstone;
if (cs_open(arch, mode, &capstone) == CS_ERR_OK) {
cs_opt_value syntax;
if (equalsIgnoreCase(syntaxString, "intel"))
syntax = CS_OPT_SYNTAX_INTEL;
else if (equalsIgnoreCase(syntaxString, "at&t"))
syntax = CS_OPT_SYNTAX_ATT;
else if (equalsIgnoreCase(syntaxString, "masm"))
syntax = CS_OPT_SYNTAX_MASM;
else if (equalsIgnoreCase(syntaxString, "motorola"))
syntax = CS_OPT_SYNTAX_MOTOROLA;
else
err::E0012.throwError(hex::format("Invalid disassembler syntax name '{}'", syntaxString));
cs_option(capstone, CS_OPT_SYNTAX, syntax);
cs_option(capstone, CS_OPT_SKIPDATA, CS_OPT_ON);
const auto sectionId = evaluator->getSectionId();
std::vector<u8> data(std::min<u64>(32, evaluator->getSectionSize(sectionId) - address));
evaluator->readData(address, data.data(), data.size(), sectionId);
cs_insn *instruction = nullptr;
size_t instructionCount = cs_disasm(capstone, data.data(), data.size(), relocatedAddress, 1, &instruction);
if (instructionCount != 1) {
err::E0012.throwError("Failed to disassemble instruction");
}
auto result = std::make_unique<PatternInstruction>(evaluator, address, instruction->size, 0);
std::string instructionString;
if (instruction->mnemonic[0] != '\x00')
instructionString += instruction->mnemonic;
if (instruction->op_str[0] != '\x00') {
instructionString += ' ';
instructionString += instruction->op_str;
}
result->setInstructionString(instructionString);
cs_free(instruction, instructionCount);
cs_close(&capstone);
return result;
} else {
err::E0012.throwError("Failed to disassemble instruction");
}
});
}
}
}

View File

@@ -6,6 +6,7 @@
#include <imgui.h>
#include <capstone/capstone.h>
#include <content/helpers/disassembler.hpp>
#include <hex/ui/imgui_imhex_extensions.h>
#include <hex/api/localization_manager.hpp>
@@ -23,13 +24,12 @@ namespace hex::plugin::disasm {
if (shouldReset) {
auto pattern = arguments[0].toPattern();
auto baseAddress = arguments[1].toUnsigned();
auto architecture = arguments[2].toUnsigned();
auto mode = arguments[3].toUnsigned();
const auto [arch, mode] = Disassembler::stringToSettings(arguments[2].toString());
disassembly.clear();
csh capstone;
if (cs_open(static_cast<cs_arch>(architecture), static_cast<cs_mode>(mode), &capstone) == CS_ERR_OK) {
if (cs_open(arch, mode, &capstone) == CS_ERR_OK) {
cs_option(capstone, CS_OPT_SKIPDATA, CS_OPT_ON);
auto data = pattern->getBytes();

View File

@@ -15,6 +15,7 @@ using namespace hex::plugin::disasm;
namespace hex::plugin::disasm {
void drawDisassemblyVisualizer(pl::ptrn::Pattern &, bool, std::span<const pl::core::Token::Literal> arguments);
void registerPatternLanguageTypes();
}
@@ -27,7 +28,7 @@ namespace {
void registerPlVisualizers() {
using ParamCount = pl::api::FunctionParameterCount;
ContentRegistry::PatternLanguage::addVisualizer("disassembler", drawDisassemblyVisualizer, ParamCount::exactly(4));
ContentRegistry::PatternLanguage::addVisualizer("disassembler", drawDisassemblyVisualizer, ParamCount::exactly(3));
}
}
@@ -40,4 +41,5 @@ IMHEX_PLUGIN_SETUP("Disassembler", "WerWolv", "Disassembler support") {
registerViews();
registerPlVisualizers();
registerPatternLanguageTypes();
}