gguf : enforce that tensor names are unique (#6905)

* not allow adding duplicated tensor name * no duplicated tensor while reading gguf * typo * throw exception inside llama_model_loader Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-08-19 06:25:15 -04:00 · 2024-04-28 17:36:18 +02:00
parent ce023f6f2f
commit 7bb36ccf91
4 changed files with 32 additions and 1 deletions
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -234,8 +234,14 @@ class GGUFReader:

    def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
        tensors = []
+        tensor_names = set() # keep track of name to prevent duplicated tensors
        for field in fields:
            _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
+            # check if there's any tensor having same name already in the list
+            tensor_name = str(bytes(name_data), encoding = 'utf-8')
+            if tensor_name in tensor_names:
+                raise ValueError(f'Found duplicated tensor with name {tensor_name}')
+            tensor_names.add(tensor_name)
            ggml_type = GGMLQuantizationType(raw_dtype[0])
            n_elems = np.prod(dims)
            block_size, type_size = GGML_QUANT_SIZES[ggml_type]
@@ -267,7 +273,7 @@ class GGUFReader:
                item_count = n_bytes
                item_type = np.uint8
            tensors.append(ReaderTensor(
-                name = str(bytes(name_data), encoding = 'utf-8'),
+                name = tensor_name,
                tensor_type = ggml_type,
                shape = dims,
                n_elements = n_elems,