diff --git a/patterns/protobuf.hexpat b/patterns/protobuf.hexpat index 4e36b61..f37f0d4 100644 --- a/patterns/protobuf.hexpat +++ b/patterns/protobuf.hexpat @@ -1,12 +1,25 @@ -#pragma author WerWolv +#pragma author WerWolv and Glenn Hartmann #pragma description Google Protobuf wire encoding (.pb) +#pragma MIME application/protobuf +#pragma MIME application/vnd.google.protobuf + +#pragma endian little + import std.core; import std.io; import std.mem; +import std.string; +import std.sys; import type.leb128; +// Attempting to recursively parse submessages is a guess-and-check process +// since it's inherently impossible to tell for sure what type a +// LengthDelimited field is. This could be imprecise and could be slow for +// large or ambiguous files, so we give the user an option to disable it. +bool disable_recursive_submessage_parsing in; + struct ZigZag32 { u32 value; } [[sealed, format("format_zigzag32")]]; @@ -32,7 +45,6 @@ enum WireType : u8 { _32Bit = 5 }; - struct Key { type::uLEB128 keyDec; u32 field_number = u32(keyDec) >> 3; @@ -55,23 +67,55 @@ union _32Bit { float flt; }; +using Field; + +struct Message { + Field fields[while(!std::mem::reached(addressof(this) + Size))]; +}; + +struct Utf8String { + char data[Length]; +} [[sealed, format("std::string::impl::format_string"), transform("std::string::impl::format_string")]]; + +union _LengthDelimitedData { + u8 bytes[Length]; + Utf8String utf8; + + if (!disable_recursive_submessage_parsing) { + try { + // Attempt to parse binary data as an embedded Message. This is + // expected to fail often, as the proto format uses LengthDelimited + // for several different data types. + Message msg; + std::assert(sizeof(msg) == Length, "Attempted parse of Message consumed wrong number of bytes."); + } + } +}; + struct LengthDelimited { type::uLEB128 length; - char data[length]; + + std::assert($ + length <= std::mem::size(), "Attempting to parse _LengthDelimitedData would exceed file length."); + _LengthDelimitedData data; }; +union _LEB128 { + type::uLEB128 uLEB128; + type::sLEB128 sLEB128; // NOTE: the signed version doesn't seem to be working properly +}; -struct Entry { +struct Field { Key key; - if (key.wire_type == WireType::Varint) - type::uLEB128 value; - else if (key.wire_type == WireType::_64Bit) - _64Bit value; - else if (key.wire_type == WireType::LengthDelimited) - LengthDelimited value; - else if (key.wire_type == WireType::_32Bit) - _32Bit value; + match (key.wire_type) { + (WireType::Varint): _LEB128 value; + (WireType::_64Bit): _64Bit value; + (WireType::LengthDelimited): LengthDelimited value; + (WireType::_32Bit): _32Bit value; + (WireType::StartGroup | WireType::EndGroup): std::unimplemented(); + (_): std::error("Unknown WireType."); + } }; -Entry entries[while(!std::mem::eof())] @ 0x00; \ No newline at end of file +Message msg @ 0x00; +std::assert(std::mem::eof(), "Parsing did not consume whole file.");