vibevibing/gguf_inspection.py

131 lines
4.0 KiB
Python

import sys
from typing import Optional, Any
from gguf import GGUFReader
from llama_cpp import Llama
USEFUL_GGUF_METADATA_KEYS = (
"general.name",
"general.architecture",
"general.file_type",
"general.quantization_version",
"llama.context_length",
"llama.block_count",
"llama.embedding_length",
"llama.feed_forward_length",
"llama.attention.head_count",
"llama.attention.head_count_kv",
"llama.vocab_size",
"tokenizer.ggml.model",
"tokenizer.ggml.pre",
"tokenizer.ggml.bos_token_id",
"tokenizer.ggml.eos_token_id",
"tokenizer.ggml.add_bos_token",
"tokenizer.ggml.add_eos_token",
"tokenizer.ggml.add_space_prefix",
"tokenizer.chat_template",
"tokenizer.chat_templates",
)
def _gguf_field_to_python_value(field: Any) -> Any:
"""
Convert a GGUFReader field to a readable Python value.
"""
try:
value = field.contents()
except Exception:
return "<unreadable>"
# Normalize bytes-ish values
if isinstance(value, bytes):
return value.decode("utf-8", errors="replace")
if isinstance(value, list):
out = []
for item in value:
if isinstance(item, bytes):
out.append(item.decode("utf-8", errors="replace"))
else:
out.append(item)
return out
return value
def get_useful_metadata_from_llama(llm: Llama) -> dict[str, Any]:
"""
Read useful GGUF metadata exposed by llama-cpp-python on the loaded model.
Note: llama.cpp only exposes scalar metadata here, so array fields such as
tokenizer.chat_templates may be missing even when they exist in the GGUF file.
"""
metadata_snapshot: dict[str, Any] = {}
for key in USEFUL_GGUF_METADATA_KEYS:
metadata_snapshot[key] = llm.metadata.get(key, "<missing>")
for key in sorted(llm.metadata):
if key.startswith("tokenizer.chat_template."):
metadata_snapshot[key] = llm.metadata[key]
if metadata_snapshot["tokenizer.chat_templates"] == "<missing>":
metadata_snapshot["tokenizer.chat_templates"] = (
"<missing or not exposed by llama.cpp metadata API>"
)
metadata_snapshot["llama_cpp.chat_format"] = llm.chat_format
metadata_snapshot["llama_cpp.token_bos"] = llm.token_bos()
metadata_snapshot["llama_cpp.token_eos"] = llm.token_eos()
return metadata_snapshot
def get_useful_metadata_from_gguf(model_path: str) -> dict[str, Any]:
"""
Read useful GGUF metadata directly from the GGUF file.
"""
reader = GGUFReader(model_path)
metadata_snapshot: dict[str, Any] = {}
for key in USEFUL_GGUF_METADATA_KEYS:
field = reader.fields.get(key)
metadata_snapshot[key] = (
_gguf_field_to_python_value(field) if field is not None else "<missing>"
)
for key, field in sorted(reader.fields.items()):
if key.startswith("tokenizer.chat_template."):
metadata_snapshot[key] = _gguf_field_to_python_value(field)
return metadata_snapshot
def print_useful_metadata_from_llama(model_path: str, llm: Optional[Llama] = None) -> None:
"""
Print useful metadata both from the loaded Llama object and directly from GGUF.
"""
if llm is not None:
print("=== Useful metadata from loaded Llama object ===")
for key, value in get_useful_metadata_from_llama(llm).items():
print(f"{key} = {value}")
print("=== End of loaded Llama metadata ===\n")
print("=== Useful metadata read directly from GGUF file ===")
for key, value in get_useful_metadata_from_gguf(model_path).items():
print(f"{key} = {value}")
print("=== End of GGUF file metadata ===\n")
def print_useless_metadata_from_gguf(model_path: str) -> None:
"""
Print all GGUF metadata key/value pairs found in the file.
"""
reader = GGUFReader(model_path)
print("=== GGUF metadata ===")
for key, field in reader.fields.items():
value = _gguf_field_to_python_value(field)
print(f"{key} = {value}")
print("=== End of GGUF metadata ===\n")