Files
ImHex-Patterns/patterns/gguf.hexpat
applecuckoo c5aaac25dd patterns/uf2: Added Family ID enum to uf2.hexpat, Added missing description pragmas and other minor fixes (#282)
* patterns/uf2: add family ID enum

* patterns/uf2: Fix enum spacing

* patterns: add missing description pragmas and README entries, etc.

* patterns/uf2: add reference for Family ID enum
2024-08-03 10:47:40 +02:00

216 lines
9.0 KiB
Rust

// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
// https://github.com/openxla/iree/blob/main/runtime/src/iree/io/formats/gguf/gguf_parser.c
#pragma description GGUF v3 File Format Pattern
#pragma authors @leonjza, jessie @ imhex discord
#pragma pattern_limit 300000
enum ggml_type: u32 {
GGML_TYPE_F32 = 0,
GGML_TYPE_F16 = 1,
GGML_TYPE_Q4_0 = 2,
GGML_TYPE_Q4_1 = 3,
// GGML_TYPE_Q4_2 = 4, support has been removed
// GGML_TYPE_Q4_3 = 5, support has been removed
GGML_TYPE_Q5_0 = 6,
GGML_TYPE_Q5_1 = 7,
GGML_TYPE_Q8_0 = 8,
GGML_TYPE_Q8_1 = 9,
GGML_TYPE_Q2_K = 10,
GGML_TYPE_Q3_K = 11,
GGML_TYPE_Q4_K = 12,
GGML_TYPE_Q5_K = 13,
GGML_TYPE_Q6_K = 14,
GGML_TYPE_Q8_K = 15,
GGML_TYPE_IQ2_XXS = 16,
GGML_TYPE_IQ2_XS = 17,
GGML_TYPE_IQ3_XXS = 18,
GGML_TYPE_IQ1_S = 19,
GGML_TYPE_IQ4_NL = 20,
GGML_TYPE_IQ3_S = 21,
GGML_TYPE_IQ2_S = 22,
GGML_TYPE_IQ4_XS = 23,
GGML_TYPE_I8 = 24,
GGML_TYPE_I16 = 25,
GGML_TYPE_I32 = 26,
GGML_TYPE_I64 = 27,
GGML_TYPE_F64 = 28,
GGML_TYPE_IQ1_M = 29,
GGML_TYPE_COUNT,
};
enum gguf_metadata_value_type: u32 {
// The value is a 8-bit unsigned integer.
GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
// The value is a 8-bit signed integer.
GGUF_METADATA_VALUE_TYPE_INT8 = 1,
// The value is a 16-bit unsigned little-endian integer.
GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
// The value is a 16-bit signed little-endian integer.
GGUF_METADATA_VALUE_TYPE_INT16 = 3,
// The value is a 32-bit unsigned little-endian integer.
GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
// The value is a 32-bit signed little-endian integer.
GGUF_METADATA_VALUE_TYPE_INT32 = 5,
// The value is a 32-bit IEEE754 floating point number.
GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
// The value is a boolean.
// 1-byte value where 0 is false and 1 is true.
// Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
GGUF_METADATA_VALUE_TYPE_BOOL = 7,
// The value is a UTF-8 non-null-terminated string, with length prepended.
GGUF_METADATA_VALUE_TYPE_STRING = 8,
// The value is an array of other values, with the length and type prepended.
///
// Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
// The value is a 64-bit unsigned little-endian integer.
GGUF_METADATA_VALUE_TYPE_UINT64 = 10,
// The value is a 64-bit signed little-endian integer.
GGUF_METADATA_VALUE_TYPE_INT64 = 11,
// The value is a 64-bit IEEE754 floating point number.
GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12,
};
// A string in GGUF.
struct gguf_string_t {
// The length of the string, in bytes.
u64 len;
// The string as a UTF-8 non-null-terminated string.
char string[len];
};
struct gguf_metadata_value_t {
gguf_metadata_value_type type;
u64 length;
match(type) {
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT8): u8 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT8): s8 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT16): u16 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT16): s16 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT32): u32 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT32): s32 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT32): float value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_BOOL): bool value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_STRING): gguf_string_t value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT64): u64 value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT64): double value[length];
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_ARRAY): double value[length];
}
};
struct gguf_metadata_value {
gguf_metadata_value_type type;
match(type) {
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT8): u8 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT8): s8 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT16): u16 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT16): s16 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT32): u32 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT32): s32 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT32): float value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_BOOL): bool value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_STRING): gguf_string_t value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT64): u64 value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT64): double value;
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_ARRAY): gguf_metadata_value_t value;
}
};
struct gguf_metadata_kv_t {
// The key of the metadata. It is a standard GGUF string, with the following caveats:
// - It must be a valid ASCII string.
// - It must be a hierarchical key, where each segment is `lower_snake_case` and separated by a `.`.
// - It must be at most 2^16-1/65535 bytes long.
// Any keys that do not follow these rules are invalid.
gguf_string_t key;
// The type of the value.
// Must be one of the `gguf_metadata_value_type` values.
// gguf_metadata_value_type value_type;
// The value.
gguf_metadata_value value;
};
struct gguf_header_t {
// Magic number to announce that this is a GGUF file.
// Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
// Your executor might do little-endian byte order, so it might be
// check for 0x46554747 and letting the endianness cancel out.
// Consider being *very* explicit about the byte order here.
u32 magic;
// The version of the format implemented.
// Must be `3` for version described in this spec, which introduces big-endian support.
//
// This version should only be increased for structural changes to the format.
// Changes that do not affect the structure of the file should instead update the metadata
// to signify the change.
u32 version;
// The number of tensors in the file.
// This is explicit, instead of being included in the metadata, to ensure it is always present
// for loading the tensors.
u64 tensor_count;
// The number of metadata key-value pairs.
u64 metadata_kv_count;
// The metadata key-value pairs.
gguf_metadata_kv_t metadata_kv[metadata_kv_count];
};
struct gguf_tensor_info_t {
// The name of the tensor. It is a standard GGUF string, with the caveat that
// it must be at most 64 bytes long.
gguf_string_t name;
// The number of dimensions in the tensor.
// Currently at most 4, but this may change in the future.
u32 n_dimensions;
// The dimensions of the tensor.
u64 dimensions[n_dimensions];
// The type of the tensor.
ggml_type type;
// The offset of the tensor's data in this file in bytes.
//
// This offset is relative to `tensor_data`, not to the start
// of the file, to make it easier for writers to write the file.
// Readers should consider exposing this offset relative to the
// file to make it easier to read the data.
//
// Must be a multiple of `ALIGNMENT`. That is, `align_offset(offset) == offset`.
u64 offset;
};
struct gguf_file_t {
// The header of the file.
gguf_header_t header;
// Tensor infos, which can be used to locate the tensor data.
gguf_tensor_info_t tensor_infos[header.tensor_count];
// Padding to the nearest multiple of `ALIGNMENT`.
//
// That is, if `sizeof(header) + sizeof(tensor_infos)` is not a multiple of `ALIGNMENT`,
// this padding is added to make it so.
//
// This can be calculated as `align_offset(position) - position`, where `position` is
// the position of the end of `tensor_infos` (i.e. `sizeof(header) + sizeof(tensor_infos)`).
u8 _padding[];
// Tensor data.
//
// This is arbitrary binary data corresponding to the weights of the model. This data should be close
// or identical to the data in the original model file, but may be different due to quantization or
// other optimizations for inference. Any such deviations should be recorded in the metadata or as
// part of the architecture definition.
//
// Each tensor's data must be stored within this array, and located through its `tensor_infos` entry.
// The offset of each tensor's data must be a multiple of `ALIGNMENT`, and the space between tensors
// should be padded to `ALIGNMENT` bytes.
u8 tensor_data[];
};
gguf_file_t GGUF @ 0x00;