vibevibing/gguf_inspection.py

import sys
from typing import Optional, Any
from gguf import GGUFReader
from llama_cpp import Llama

USEFUL_GGUF_METADATA_KEYS = (
    "general.name",
    "general.architecture",
    "general.file_type",
    "general.quantization_version",
    "llama.context_length",
    "llama.block_count",
    "llama.embedding_length",
    "llama.feed_forward_length",
    "llama.attention.head_count",
    "llama.attention.head_count_kv",
    "llama.vocab_size",
    "tokenizer.ggml.model",
    "tokenizer.ggml.pre",
    "tokenizer.ggml.bos_token_id",
    "tokenizer.ggml.eos_token_id",
    "tokenizer.ggml.add_bos_token",
    "tokenizer.ggml.add_eos_token",
    "tokenizer.ggml.add_space_prefix",
    "tokenizer.chat_template",
    "tokenizer.chat_templates",
)


def _gguf_field_to_python_value(field: Any) -> Any:
    """
    Convert a GGUFReader field to a readable Python value.
    """
    try:
        value = field.contents()
    except Exception:
        return "<unreadable>"

    # Normalize bytes-ish values
    if isinstance(value, bytes):
        return value.decode("utf-8", errors="replace")

    if isinstance(value, list):
        out = []
        for item in value:
            if isinstance(item, bytes):
                out.append(item.decode("utf-8", errors="replace"))
            else:
                out.append(item)
        return out

    return value


def get_useful_metadata_from_llama(llm: Llama) -> dict[str, Any]:
    """
    Read useful GGUF metadata exposed by llama-cpp-python on the loaded model.

    Note: llama.cpp only exposes scalar metadata here, so array fields such as
    tokenizer.chat_templates may be missing even when they exist in the GGUF file.
    """
    metadata_snapshot: dict[str, Any] = {}

    for key in USEFUL_GGUF_METADATA_KEYS:
        metadata_snapshot[key] = llm.metadata.get(key, "<missing>")

    for key in sorted(llm.metadata):
        if key.startswith("tokenizer.chat_template."):
            metadata_snapshot[key] = llm.metadata[key]

    if metadata_snapshot["tokenizer.chat_templates"] == "<missing>":
        metadata_snapshot["tokenizer.chat_templates"] = (
            "<missing or not exposed by llama.cpp metadata API>"
        )

    metadata_snapshot["llama_cpp.chat_format"] = llm.chat_format
    metadata_snapshot["llama_cpp.token_bos"] = llm.token_bos()
    metadata_snapshot["llama_cpp.token_eos"] = llm.token_eos()

    return metadata_snapshot


def get_useful_metadata_from_gguf(model_path: str) -> dict[str, Any]:
    """
    Read useful GGUF metadata directly from the GGUF file.
    """
    reader = GGUFReader(model_path)
    metadata_snapshot: dict[str, Any] = {}

    for key in USEFUL_GGUF_METADATA_KEYS:
        field = reader.fields.get(key)
        metadata_snapshot[key] = (
            _gguf_field_to_python_value(field) if field is not None else "<missing>"
        )

    for key, field in sorted(reader.fields.items()):
        if key.startswith("tokenizer.chat_template."):
            metadata_snapshot[key] = _gguf_field_to_python_value(field)

    return metadata_snapshot


def print_useful_metadata_from_llama(model_path: str, llm: Optional[Llama] = None) -> None:
    """
    Print useful metadata both from the loaded Llama object and directly from GGUF.
    """
    if llm is not None:
        print("=== Useful metadata from loaded Llama object ===")
        for key, value in get_useful_metadata_from_llama(llm).items():
            print(f"{key} = {value}")
        print("=== End of loaded Llama metadata ===\n")

    print("=== Useful metadata read directly from GGUF file ===")
    for key, value in get_useful_metadata_from_gguf(model_path).items():
        print(f"{key} = {value}")
    print("=== End of GGUF file metadata ===\n")


def print_useless_metadata_from_gguf(model_path: str) -> None:
    """
    Print all GGUF metadata key/value pairs found in the file.
    """
    reader = GGUFReader(model_path)

    print("=== GGUF metadata ===")
    for key, field in reader.fields.items():
        value = _gguf_field_to_python_value(field)
        print(f"{key} = {value}")
    print("=== End of GGUF metadata ===\n")