mirror of
https://github.com/WerWolv/ImHex-Patterns.git
synced 2026-03-27 23:37:04 -05:00
217 lines
9.0 KiB
Rust
217 lines
9.0 KiB
Rust
// https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
|
|
// https://github.com/openxla/iree/blob/main/runtime/src/iree/io/formats/gguf/gguf_parser.c
|
|
|
|
#pragma description ggml GGUF v3
|
|
#pragma authors @leonjza, jessie @ imhex discord
|
|
#pragma magic [ 47 47 55 46 ] @ 0x00
|
|
|
|
#pragma pattern_limit 300000
|
|
|
|
enum ggml_type: u32 {
|
|
GGML_TYPE_F32 = 0,
|
|
GGML_TYPE_F16 = 1,
|
|
GGML_TYPE_Q4_0 = 2,
|
|
GGML_TYPE_Q4_1 = 3,
|
|
// GGML_TYPE_Q4_2 = 4, support has been removed
|
|
// GGML_TYPE_Q4_3 = 5, support has been removed
|
|
GGML_TYPE_Q5_0 = 6,
|
|
GGML_TYPE_Q5_1 = 7,
|
|
GGML_TYPE_Q8_0 = 8,
|
|
GGML_TYPE_Q8_1 = 9,
|
|
GGML_TYPE_Q2_K = 10,
|
|
GGML_TYPE_Q3_K = 11,
|
|
GGML_TYPE_Q4_K = 12,
|
|
GGML_TYPE_Q5_K = 13,
|
|
GGML_TYPE_Q6_K = 14,
|
|
GGML_TYPE_Q8_K = 15,
|
|
GGML_TYPE_IQ2_XXS = 16,
|
|
GGML_TYPE_IQ2_XS = 17,
|
|
GGML_TYPE_IQ3_XXS = 18,
|
|
GGML_TYPE_IQ1_S = 19,
|
|
GGML_TYPE_IQ4_NL = 20,
|
|
GGML_TYPE_IQ3_S = 21,
|
|
GGML_TYPE_IQ2_S = 22,
|
|
GGML_TYPE_IQ4_XS = 23,
|
|
GGML_TYPE_I8 = 24,
|
|
GGML_TYPE_I16 = 25,
|
|
GGML_TYPE_I32 = 26,
|
|
GGML_TYPE_I64 = 27,
|
|
GGML_TYPE_F64 = 28,
|
|
GGML_TYPE_IQ1_M = 29,
|
|
GGML_TYPE_COUNT,
|
|
};
|
|
|
|
enum gguf_metadata_value_type: u32 {
|
|
// The value is a 8-bit unsigned integer.
|
|
GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
|
|
// The value is a 8-bit signed integer.
|
|
GGUF_METADATA_VALUE_TYPE_INT8 = 1,
|
|
// The value is a 16-bit unsigned little-endian integer.
|
|
GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
|
|
// The value is a 16-bit signed little-endian integer.
|
|
GGUF_METADATA_VALUE_TYPE_INT16 = 3,
|
|
// The value is a 32-bit unsigned little-endian integer.
|
|
GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
|
|
// The value is a 32-bit signed little-endian integer.
|
|
GGUF_METADATA_VALUE_TYPE_INT32 = 5,
|
|
// The value is a 32-bit IEEE754 floating point number.
|
|
GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
|
|
// The value is a boolean.
|
|
// 1-byte value where 0 is false and 1 is true.
|
|
// Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
|
|
GGUF_METADATA_VALUE_TYPE_BOOL = 7,
|
|
// The value is a UTF-8 non-null-terminated string, with length prepended.
|
|
GGUF_METADATA_VALUE_TYPE_STRING = 8,
|
|
// The value is an array of other values, with the length and type prepended.
|
|
///
|
|
// Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
|
|
GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
|
|
// The value is a 64-bit unsigned little-endian integer.
|
|
GGUF_METADATA_VALUE_TYPE_UINT64 = 10,
|
|
// The value is a 64-bit signed little-endian integer.
|
|
GGUF_METADATA_VALUE_TYPE_INT64 = 11,
|
|
// The value is a 64-bit IEEE754 floating point number.
|
|
GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12,
|
|
};
|
|
|
|
// A string in GGUF.
|
|
struct gguf_string_t {
|
|
// The length of the string, in bytes.
|
|
u64 len;
|
|
// The string as a UTF-8 non-null-terminated string.
|
|
char string[len];
|
|
};
|
|
|
|
|
|
struct gguf_metadata_value_t {
|
|
gguf_metadata_value_type type;
|
|
u64 length;
|
|
|
|
match(type) {
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT8): u8 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT8): s8 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT16): u16 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT16): s16 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT32): u32 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT32): s32 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT32): float value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_BOOL): bool value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_STRING): gguf_string_t value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT64): u64 value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT64): double value[length];
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_ARRAY): double value[length];
|
|
}
|
|
};
|
|
|
|
struct gguf_metadata_value {
|
|
gguf_metadata_value_type type;
|
|
|
|
match(type) {
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT8): u8 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT8): s8 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT16): u16 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT16): s16 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT32): u32 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_INT32): s32 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT32): float value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_BOOL): bool value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_STRING): gguf_string_t value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_UINT64): u64 value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_FLOAT64): double value;
|
|
(gguf_metadata_value_type::GGUF_METADATA_VALUE_TYPE_ARRAY): gguf_metadata_value_t value;
|
|
}
|
|
};
|
|
|
|
struct gguf_metadata_kv_t {
|
|
// The key of the metadata. It is a standard GGUF string, with the following caveats:
|
|
// - It must be a valid ASCII string.
|
|
// - It must be a hierarchical key, where each segment is `lower_snake_case` and separated by a `.`.
|
|
// - It must be at most 2^16-1/65535 bytes long.
|
|
// Any keys that do not follow these rules are invalid.
|
|
gguf_string_t key;
|
|
|
|
// The type of the value.
|
|
// Must be one of the `gguf_metadata_value_type` values.
|
|
// gguf_metadata_value_type value_type;
|
|
|
|
// The value.
|
|
gguf_metadata_value value;
|
|
};
|
|
|
|
struct gguf_header_t {
|
|
// Magic number to announce that this is a GGUF file.
|
|
// Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
|
|
// Your executor might do little-endian byte order, so it might be
|
|
// check for 0x46554747 and letting the endianness cancel out.
|
|
// Consider being *very* explicit about the byte order here.
|
|
u32 magic;
|
|
// The version of the format implemented.
|
|
// Must be `3` for version described in this spec, which introduces big-endian support.
|
|
//
|
|
// This version should only be increased for structural changes to the format.
|
|
// Changes that do not affect the structure of the file should instead update the metadata
|
|
// to signify the change.
|
|
u32 version;
|
|
// The number of tensors in the file.
|
|
// This is explicit, instead of being included in the metadata, to ensure it is always present
|
|
// for loading the tensors.
|
|
u64 tensor_count;
|
|
// The number of metadata key-value pairs.
|
|
u64 metadata_kv_count;
|
|
// The metadata key-value pairs.
|
|
gguf_metadata_kv_t metadata_kv[metadata_kv_count];
|
|
};
|
|
|
|
struct gguf_tensor_info_t {
|
|
// The name of the tensor. It is a standard GGUF string, with the caveat that
|
|
// it must be at most 64 bytes long.
|
|
gguf_string_t name;
|
|
// The number of dimensions in the tensor.
|
|
// Currently at most 4, but this may change in the future.
|
|
u32 n_dimensions;
|
|
// The dimensions of the tensor.
|
|
u64 dimensions[n_dimensions];
|
|
// The type of the tensor.
|
|
ggml_type type;
|
|
// The offset of the tensor's data in this file in bytes.
|
|
//
|
|
// This offset is relative to `tensor_data`, not to the start
|
|
// of the file, to make it easier for writers to write the file.
|
|
// Readers should consider exposing this offset relative to the
|
|
// file to make it easier to read the data.
|
|
//
|
|
// Must be a multiple of `ALIGNMENT`. That is, `align_offset(offset) == offset`.
|
|
u64 offset;
|
|
};
|
|
|
|
struct gguf_file_t {
|
|
// The header of the file.
|
|
gguf_header_t header;
|
|
|
|
// Tensor infos, which can be used to locate the tensor data.
|
|
gguf_tensor_info_t tensor_infos[header.tensor_count];
|
|
|
|
// Padding to the nearest multiple of `ALIGNMENT`.
|
|
//
|
|
// That is, if `sizeof(header) + sizeof(tensor_infos)` is not a multiple of `ALIGNMENT`,
|
|
// this padding is added to make it so.
|
|
//
|
|
// This can be calculated as `align_offset(position) - position`, where `position` is
|
|
// the position of the end of `tensor_infos` (i.e. `sizeof(header) + sizeof(tensor_infos)`).
|
|
u8 _padding[];
|
|
|
|
// Tensor data.
|
|
//
|
|
// This is arbitrary binary data corresponding to the weights of the model. This data should be close
|
|
// or identical to the data in the original model file, but may be different due to quantization or
|
|
// other optimizations for inference. Any such deviations should be recorded in the metadata or as
|
|
// part of the architecture definition.
|
|
//
|
|
// Each tensor's data must be stored within this array, and located through its `tensor_infos` entry.
|
|
// The offset of each tensor's data must be a multiple of `ALIGNMENT`, and the space between tensors
|
|
// should be padded to `ALIGNMENT` bytes.
|
|
u8 tensor_data[];
|
|
};
|
|
|
|
gguf_file_t GGUF @ 0x00;
|