mirror of
https://github.com/WerWolv/ImHex-Patterns.git
synced 2026-03-28 07:47:02 -05:00
pattern/protobuf: Allow parsing of nested messages (#378)
Mostly this enables attempted recursive parsing of submessages. Note that it is inherently impossible to determine the underlying data type for LengthDelimited for sure, so this is a best-effort attempt. The user can disable recursive submessage parsing via Settings. Other minor changes: * added #pragma MIME and #pragma endian directives * enabled UTF-8 display for LengthDelimited types * added signed LEB128 display for Varint types (although this doesn't seem to be working on my test case) * swapped if/else-if structure for match * fail upon receiving unknown or unsupported WireType Co-authored-by: Nik <werwolv98@gmail.com>
This commit is contained in:
@@ -1,12 +1,25 @@
|
||||
#pragma author WerWolv
|
||||
#pragma author WerWolv and Glenn Hartmann
|
||||
#pragma description Google Protobuf wire encoding (.pb)
|
||||
|
||||
#pragma MIME application/protobuf
|
||||
#pragma MIME application/vnd.google.protobuf
|
||||
|
||||
#pragma endian little
|
||||
|
||||
import std.core;
|
||||
import std.io;
|
||||
import std.mem;
|
||||
import std.string;
|
||||
import std.sys;
|
||||
|
||||
import type.leb128;
|
||||
|
||||
// Attempting to recursively parse submessages is a guess-and-check process
|
||||
// since it's inherently impossible to tell for sure what type a
|
||||
// LengthDelimited field is. This could be imprecise and could be slow for
|
||||
// large or ambiguous files, so we give the user an option to disable it.
|
||||
bool disable_recursive_submessage_parsing in;
|
||||
|
||||
struct ZigZag32 {
|
||||
u32 value;
|
||||
} [[sealed, format("format_zigzag32")]];
|
||||
@@ -32,7 +45,6 @@ enum WireType : u8 {
|
||||
_32Bit = 5
|
||||
};
|
||||
|
||||
|
||||
struct Key {
|
||||
type::uLEB128 keyDec;
|
||||
u32 field_number = u32(keyDec) >> 3;
|
||||
@@ -55,23 +67,55 @@ union _32Bit {
|
||||
float flt;
|
||||
};
|
||||
|
||||
using Field;
|
||||
|
||||
struct Message<auto Size> {
|
||||
Field fields[while(!std::mem::reached(addressof(this) + Size))];
|
||||
};
|
||||
|
||||
struct Utf8String<auto Length> {
|
||||
char data[Length];
|
||||
} [[sealed, format("std::string::impl::format_string"), transform("std::string::impl::format_string")]];
|
||||
|
||||
union _LengthDelimitedData<auto Length> {
|
||||
u8 bytes[Length];
|
||||
Utf8String<Length> utf8;
|
||||
|
||||
if (!disable_recursive_submessage_parsing) {
|
||||
try {
|
||||
// Attempt to parse binary data as an embedded Message. This is
|
||||
// expected to fail often, as the proto format uses LengthDelimited
|
||||
// for several different data types.
|
||||
Message<Length> msg;
|
||||
std::assert(sizeof(msg) == Length, "Attempted parse of Message consumed wrong number of bytes.");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct LengthDelimited {
|
||||
type::uLEB128 length;
|
||||
char data[length];
|
||||
|
||||
std::assert($ + length <= std::mem::size(), "Attempting to parse _LengthDelimitedData would exceed file length.");
|
||||
_LengthDelimitedData<length> data;
|
||||
};
|
||||
|
||||
union _LEB128 {
|
||||
type::uLEB128 uLEB128;
|
||||
type::sLEB128 sLEB128; // NOTE: the signed version doesn't seem to be working properly
|
||||
};
|
||||
|
||||
struct Entry {
|
||||
struct Field {
|
||||
Key key;
|
||||
|
||||
if (key.wire_type == WireType::Varint)
|
||||
type::uLEB128 value;
|
||||
else if (key.wire_type == WireType::_64Bit)
|
||||
_64Bit value;
|
||||
else if (key.wire_type == WireType::LengthDelimited)
|
||||
LengthDelimited value;
|
||||
else if (key.wire_type == WireType::_32Bit)
|
||||
_32Bit value;
|
||||
match (key.wire_type) {
|
||||
(WireType::Varint): _LEB128 value;
|
||||
(WireType::_64Bit): _64Bit value;
|
||||
(WireType::LengthDelimited): LengthDelimited value;
|
||||
(WireType::_32Bit): _32Bit value;
|
||||
(WireType::StartGroup | WireType::EndGroup): std::unimplemented();
|
||||
(_): std::error("Unknown WireType.");
|
||||
}
|
||||
};
|
||||
|
||||
Entry entries[while(!std::mem::eof())] @ 0x00;
|
||||
Message<std::mem::size()> msg @ 0x00;
|
||||
std::assert(std::mem::eof(), "Parsing did not consume whole file.");
|
||||
|
||||
Reference in New Issue
Block a user