import sys from typing import Optional, Any from gguf import GGUFReader from llama_cpp import Llama USEFUL_GGUF_METADATA_KEYS = ( "general.name", "general.architecture", "general.file_type", "general.quantization_version", "llama.context_length", "llama.block_count", "llama.embedding_length", "llama.feed_forward_length", "llama.attention.head_count", "llama.attention.head_count_kv", "llama.vocab_size", "tokenizer.ggml.model", "tokenizer.ggml.pre", "tokenizer.ggml.bos_token_id", "tokenizer.ggml.eos_token_id", "tokenizer.ggml.add_bos_token", "tokenizer.ggml.add_eos_token", "tokenizer.ggml.add_space_prefix", "tokenizer.chat_template", "tokenizer.chat_templates", ) def _gguf_field_to_python_value(field: Any) -> Any: """ Convert a GGUFReader field to a readable Python value. """ try: value = field.contents() except Exception: return "" # Normalize bytes-ish values if isinstance(value, bytes): return value.decode("utf-8", errors="replace") if isinstance(value, list): out = [] for item in value: if isinstance(item, bytes): out.append(item.decode("utf-8", errors="replace")) else: out.append(item) return out return value def get_useful_metadata_from_llama(llm: Llama) -> dict[str, Any]: """ Read useful GGUF metadata exposed by llama-cpp-python on the loaded model. Note: llama.cpp only exposes scalar metadata here, so array fields such as tokenizer.chat_templates may be missing even when they exist in the GGUF file. """ metadata_snapshot: dict[str, Any] = {} for key in USEFUL_GGUF_METADATA_KEYS: metadata_snapshot[key] = llm.metadata.get(key, "") for key in sorted(llm.metadata): if key.startswith("tokenizer.chat_template."): metadata_snapshot[key] = llm.metadata[key] if metadata_snapshot["tokenizer.chat_templates"] == "": metadata_snapshot["tokenizer.chat_templates"] = ( "" ) metadata_snapshot["llama_cpp.chat_format"] = llm.chat_format metadata_snapshot["llama_cpp.token_bos"] = llm.token_bos() metadata_snapshot["llama_cpp.token_eos"] = llm.token_eos() return metadata_snapshot def get_useful_metadata_from_gguf(model_path: str) -> dict[str, Any]: """ Read useful GGUF metadata directly from the GGUF file. """ reader = GGUFReader(model_path) metadata_snapshot: dict[str, Any] = {} for key in USEFUL_GGUF_METADATA_KEYS: field = reader.fields.get(key) metadata_snapshot[key] = ( _gguf_field_to_python_value(field) if field is not None else "" ) for key, field in sorted(reader.fields.items()): if key.startswith("tokenizer.chat_template."): metadata_snapshot[key] = _gguf_field_to_python_value(field) return metadata_snapshot def print_useful_metadata_from_llama(model_path: str, llm: Optional[Llama] = None) -> None: """ Print useful metadata both from the loaded Llama object and directly from GGUF. """ if llm is not None: print("=== Useful metadata from loaded Llama object ===") for key, value in get_useful_metadata_from_llama(llm).items(): print(f"{key} = {value}") print("=== End of loaded Llama metadata ===\n") print("=== Useful metadata read directly from GGUF file ===") for key, value in get_useful_metadata_from_gguf(model_path).items(): print(f"{key} = {value}") print("=== End of GGUF file metadata ===\n") def print_useless_metadata_from_gguf(model_path: str) -> None: """ Print all GGUF metadata key/value pairs found in the file. """ reader = GGUFReader(model_path) print("=== GGUF metadata ===") for key, field in reader.fields.items(): value = _gguf_field_to_python_value(field) print(f"{key} = {value}") print("=== End of GGUF metadata ===\n")