diff --git a/README.md b/README.md index ea49e7e..35abe78 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ Everything will immediately show up in ImHex's Content Store and gets bundled wi | PRODINFO | | [`patterns/prodinfo.hexpat`](patterns/prodinfo.hexpat) | Nintendo Switch PRODINFO | | Protobuf | | [`patterns/protobuf.hexpat`](patterns/protobuf.hexpat) | Google Protobuf encoding | | PyInstaller | | [`patterns/pyinstaller.hexpat`](patterns/pyinstaller.hexpat) | PyInstaller binray files | +| PYC | | [`patterns/pyc.hexpat`](patterns/pyc.hexpat) | Python bytecode files | | QBCL | | [`patterns/qbcl.hexpat`](patterns/qbcl.hexpat) | Qubicle voxel scene project file | | QOI | `image/qoi` | [`patterns/qoi.hexpat`](patterns/qoi.hexpat) | QOI image files | | Shell Link | `application/x-ms-shortcut` | [`patterns/lnk.hexpat`](patterns/lnk.hexpat) | Windows Shell Link file format | diff --git a/patterns/pyc.hexpat b/patterns/pyc.hexpat new file mode 100644 index 0000000..e98037e --- /dev/null +++ b/patterns/pyc.hexpat @@ -0,0 +1,338 @@ +#include +#include + +#define FLAG_REF 0x80 +#define TYPE_NULL '0' +#define TYPE_NONE 'N' +#define TYPE_FALSE 'F' +#define TYPE_TRUE 'T' +#define TYPE_STOPITER 'S' +#define TYPE_ELLIPSIS '.' +#define TYPE_INT 'i' +#define TYPE_INT64 'I' +#define TYPE_FLOAT 'f' +#define TYPE_BINARY_FLOAT 'g' +#define TYPE_COMPLEX 'x' +#define TYPE_BINARY_COMPLEX 'y' +#define TYPE_LONG 'l' +#define TYPE_STRING 's' +#define TYPE_INTERNED 't' +#define TYPE_REF 'r' +#define TYPE_STRINGREF 'R' +#define TYPE_TUPLE '(' +#define TYPE_LIST '[' +#define TYPE_DICT '{' +#define TYPE_CODE 'c' +#define TYPE_UNICODE 'u' +#define TYPE_UNKNOWN '?' +#define TYPE_SET '<' +#define TYPE_FROZENSET '>' +#define TYPE_ASCII 'a' +#define TYPE_ASCII_INTERNED 'A' +#define TYPE_SMALL_TUPLE ')' +#define TYPE_SHORT_ASCII 'z' +#define TYPE_SHORT_ASCII_INTERNED 'Z' +#define MAX_REFS 2048 + +using MarshalObject; +u8 _major; +u8 _minor; + +fn above(u8 major, u8 minor) { + return _major >= major && _minor >= minor; +}; + +fn below(u8 major, u8 minor) { + return _major <=major && _minor < minor; +}; + +fn is(u8 major, u8 minor) { + return _major == major && _minor == minor; +}; + +fn between(u8 major, u8 minor, u8 otherMajor, u8 otherMinor) { + return above(major, minor) && below(otherMajor, otherMinor); +}; + +struct NoneObject {}; + +struct MarshalString { + if(parent.type == TYPE_SHORT_ASCII || parent.type == TYPE_SHORT_ASCII_INTERNED) + u8 length; + else + u32 length; + + char data[length]; +}; + +struct MarshalSequence { + if(parent.type == TYPE_SMALL_TUPLE) + u8 length; + else + u32 length; + MarshalObject values[length]; +}; + +struct MarshalCode { + if(between(1, 3, 2, 3)) u16 numArgs; + else if(above(2, 3)) u32 numArgs; + + if(above(3, 8)) u32 numPosOnlyArgs; + + if(_major >= 3) u32 numKwOnlyArgs; + + if(between(1, 3, 2, 3)) u16 numLocals; + else if(between(2, 3, 3, 11)) u32 numLocals; + + if(between(1, 5, 2, 3)) u16 numStack; + else if(above(2, 3)) u32 numStack; + + if(between(1, 3, 2, 3)) u16 flags; + else if(above(2, 3)) u32 flags; + + MarshalObject code; + MarshalObject constants; + MarshalObject names; + + if(above(1, 3)) MarshalObject localNames; + + if(above(3, 11)) MarshalObject localKinds; + + if(between(2, 1, 3, 11)) { + MarshalObject freeVars; + MarshalObject cellVars; + } + + MarshalObject fileName; + MarshalObject name; + + if(above(3, 11)) MarshalObject qualifiedName; + + if(between(1, 5, 2, 3)) u16 firstLine; + else if(above(2, 3)) u32 firstLine; + + if(above(1, 5)) MarshalObject lnTable; + + if(above(3, 11)) MarshalObject exceptionTable; +}; + +struct DictEntry { + MarshalObject key; + if(key.type == TYPE_NULL) break; + MarshalObject value; +}; + +struct MarshalDict { + DictEntry entries[while(true)]; +}; + +struct LongInt { + u32 len; + u16 buffer[len]; +}; + +struct LongFloat { + u8 len; + u8 buffer[len]; +}; + +struct LongComplex { + LongFloat real; + LongFloat imag; +}; + +auto section = std::mem::create_section("ref-address"); +u128 refs[MAX_REFS] @ 0x0 in section; +u32 refIndex; + +fn refMember(u128 address) { + refs[refIndex] = address; + refIndex += 1; +}; + +fn getRef(u32 index) { + return refs[index]; +}; + +struct MarshalObject { + u8 _type [[hidden]]; + u8 flag = _type & FLAG_REF; + u8 type = _type & ~FLAG_REF; + + if(flag) { + refMember($-1); + } + + match(type) { + (TYPE_NULL | TYPE_NONE | TYPE_STOPITER | TYPE_ELLIPSIS | TYPE_FALSE | TYPE_TRUE): continue; + (TYPE_CODE): { + MarshalCode code [[inline]]; + } + (TYPE_STRING | + TYPE_UNICODE | + TYPE_ASCII | + TYPE_ASCII_INTERNED | + TYPE_SHORT_ASCII | + TYPE_SHORT_ASCII_INTERNED): { + MarshalString string [[inline]]; + } + (TYPE_SMALL_TUPLE | TYPE_TUPLE | TYPE_LIST | TYPE_SET | TYPE_FROZENSET): { + MarshalSequence sequence [[inline]]; + } + (TYPE_DICT): { + MarshalDict dict [[inline]]; + } + (TYPE_INT): { + u32 int; + } + (TYPE_INT64): { + u64 long; + } + (TYPE_FLOAT): { + LongFloat number; + } + (TYPE_BINARY_FLOAT): { + double number; + } + (TYPE_COMPLEX): { + LongComplex complex; + } + (TYPE_BINARY_COMPLEX): { + double real; + double imag; + } + (TYPE_LONG): { + LongInt long; + } + (TYPE_REF): { + u32 index; + MarshalObject obj @ getRef(index); + } + (_): { + std::print("Unknown marshal object type: {:c}", type); + } + } + +} [[format("format_marshal")]]; + +fn format_marshal(ref auto obj) { + match(obj.type) { + (TYPE_NONE): return "None"; + (TYPE_STOPITER): return "StopIteration"; + (TYPE_ELLIPSIS): return "..."; + (TYPE_FALSE): return "False"; + (TYPE_TRUE): return "True"; + (TYPE_INT): return "int"; + (TYPE_INT64): return "int64"; + (TYPE_FLOAT): return "float"; + (TYPE_BINARY_FLOAT): return "float"; + (TYPE_COMPLEX): return "complex"; + (TYPE_BINARY_COMPLEX): return "complex"; + (TYPE_LONG): return "long"; + (TYPE_STRING): return "string"; + (TYPE_INTERNED): return "interned"; + (TYPE_REF): return "ref"; + (TYPE_TUPLE): return "tuple"; + (TYPE_LIST): return "list"; + (TYPE_DICT): return "dict"; + (TYPE_CODE): return "code"; + (TYPE_UNICODE): return "unicode"; + (TYPE_UNKNOWN): return "unknown"; + (TYPE_SET): return "set"; + (TYPE_FROZENSET): return "frozenset"; + (TYPE_ASCII): return "ascii"; + (TYPE_ASCII_INTERNED): return "ascii_interned"; + (TYPE_SHORT_ASCII): return "short_ascii"; + (TYPE_SHORT_ASCII_INTERNED): return "short_ascii_interned"; + (TYPE_SMALL_TUPLE): return "small_tuple"; + } +}; + +enum Magic : u32 { + MAGIC_1_0 = 0x00999902, + MAGIC_1_1 = 0x00999903, /* Also covers 1.2 */ + MAGIC_1_3 = 0x0A0D2E89, + MAGIC_1_4 = 0x0A0D1704, + MAGIC_1_5 = 0x0A0D4E99, + MAGIC_1_6 = 0x0A0DC4FC, + + MAGIC_2_0 = 0x0A0DC687, + MAGIC_2_1 = 0x0A0DEB2A, + MAGIC_2_2 = 0x0A0DED2D, + MAGIC_2_3 = 0x0A0DF23B, + MAGIC_2_4 = 0x0A0DF26D, + MAGIC_2_5 = 0x0A0DF2B3, + MAGIC_2_6 = 0x0A0DF2D1, + MAGIC_2_7 = 0x0A0DF303, + + MAGIC_3_0 = 0x0A0D0C3A, + MAGIC_3_1 = 0x0A0D0C4E, + MAGIC_3_2 = 0x0A0D0C6C, + MAGIC_3_3 = 0x0A0D0C9E, + MAGIC_3_4 = 0x0A0D0CEE, + MAGIC_3_5 = 0x0A0D0D16, + MAGIC_3_5_3 = 0x0A0D0D17, + MAGIC_3_6 = 0x0A0D0D33, + MAGIC_3_7 = 0x0A0D0D42, + MAGIC_3_8 = 0x0A0D0D55, + MAGIC_3_9 = 0x0A0D0D61, + MAGIC_3_10 = 0x0A0D0D6F, + MAGIC_3_11 = 0x0A0D0DA7, + + INVALID = 0, +}; + +fn getMajor(Magic magic) { + match(magic) { + (Magic::MAGIC_1_0 | Magic::MAGIC_1_1 | Magic::MAGIC_1_3 | Magic::MAGIC_1_4 | Magic::MAGIC_1_5 | Magic::MAGIC_1_6): return 1; + (Magic::MAGIC_2_0 | Magic::MAGIC_2_1 | Magic::MAGIC_2_2 | Magic::MAGIC_2_3 | Magic::MAGIC_2_4 | Magic::MAGIC_2_5 | Magic::MAGIC_2_6 | Magic::MAGIC_2_7): return 2; + (Magic::MAGIC_3_0 | Magic::MAGIC_3_1 | Magic::MAGIC_3_2 | Magic::MAGIC_3_3 | Magic::MAGIC_3_4 | Magic::MAGIC_3_5 | Magic::MAGIC_3_5_3 | Magic::MAGIC_3_6 | Magic::MAGIC_3_7 | Magic::MAGIC_3_8 | Magic::MAGIC_3_9 | Magic::MAGIC_3_10 | Magic::MAGIC_3_11): return 3; + (Magic::INVALID): return 0; + } +}; + +fn getMinor(Magic magic) { + match(magic) { + (Magic::MAGIC_1_0 | Magic::MAGIC_2_0 | Magic::MAGIC_3_0): return 0; + (Magic::MAGIC_1_1 | Magic::MAGIC_2_1 | Magic::MAGIC_3_1): return 1; + (Magic::MAGIC_1_3 | Magic::MAGIC_2_2 | Magic::MAGIC_3_2): return 2; + (Magic::MAGIC_1_4 | Magic::MAGIC_2_3 | Magic::MAGIC_3_3): return 3; + (Magic::MAGIC_1_5 | Magic::MAGIC_2_4 | Magic::MAGIC_3_4): return 4; + (Magic::MAGIC_1_6 | Magic::MAGIC_2_5 | Magic::MAGIC_3_5 | Magic::MAGIC_3_5_3): return 5; + (Magic::MAGIC_2_6 | Magic::MAGIC_3_6): return 6; + (Magic::MAGIC_2_7 | Magic::MAGIC_3_7): return 7; + (Magic::MAGIC_3_8): return 8; + (Magic::MAGIC_3_9): return 9; + (Magic::MAGIC_3_10): return 10; + (Magic::MAGIC_3_11): return 11; + (Magic::INVALID): return 0; + } +}; + +struct PYCHeader { + Magic magic; + _major = getMajor(magic); + _minor = getMinor(magic); + u8 major = _major [[export]]; + u8 minor = _minor [[export]]; + + if(above(3, 7)) { + u32 flags; + } else { + u32 flags; + } + + if(flags & 0x1) { + u64 checksum; + } else { + type::time32_t timestamp; + + if(above(3, 3)) { + u32 size; + } + } + + MarshalObject object; +}; + +PYCHeader header @ 0x0; \ No newline at end of file diff --git a/tests/patterns/test_data/pyc.hexpat.pyc b/tests/patterns/test_data/pyc.hexpat.pyc new file mode 100644 index 0000000..5b69b1f Binary files /dev/null and b/tests/patterns/test_data/pyc.hexpat.pyc differ