From 7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 28 Apr 2024 17:36:18 +0200 Subject: [PATCH] gguf : enforce that tensor names are unique (#6905) * not allow adding duplicated tensor name * no duplicated tensor while reading gguf * typo * throw exception inside llama_model_loader Co-authored-by: slaren --------- Co-authored-by: slaren --- ggml.c | 12 ++++++++++++ gguf-py/gguf/gguf_reader.py | 8 +++++++- gguf-py/gguf/gguf_writer.py | 5 +++++ llama.cpp | 8 ++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/ggml.c b/ggml.c index 34eef23fcf90f..cb273061c5c53 100644 --- a/ggml.c +++ b/ggml.c @@ -20819,6 +20819,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p // TODO: return an error instead of crashing with GGML_ASSERT gguf_tensor_info_sanitize(info); + // make sure there is no duplicated tensor names + for (uint64_t j = 0; j < i; ++j) { + if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) { + fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data); + ok = false; + } + } + if (!ok) { fprintf(stderr, "%s: failed to read tensor info\n", __func__); fclose(file); @@ -21355,6 +21363,10 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { void gguf_add_tensor( struct gguf_context * ctx, const struct ggml_tensor * tensor) { + if (gguf_find_tensor(ctx, tensor->name) != -1) { + GGML_ASSERT(false && "duplicated tensor name"); + } + const int idx = ctx->header.n_tensors; ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info)); diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 33afac552ca75..48ef6d4ae45df 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -234,8 +234,14 @@ def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[Reader def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: tensors = [] + tensor_names = set() # keep track of name to prevent duplicated tensors for field in fields: _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts + # check if there's any tensor having same name already in the list + tensor_name = str(bytes(name_data), encoding = 'utf-8') + if tensor_name in tensor_names: + raise ValueError(f'Found duplicated tensor with name {tensor_name}') + tensor_names.add(tensor_name) ggml_type = GGMLQuantizationType(raw_dtype[0]) n_elems = np.prod(dims) block_size, type_size = GGML_QUANT_SIZES[ggml_type] @@ -267,7 +273,7 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: item_count = n_bytes item_type = np.uint8 tensors.append(ReaderTensor( - name = str(bytes(name_data), encoding = 'utf-8'), + name = tensor_name, tensor_type = ggml_type, shape = dims, n_elements = n_elems, diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index e3dbca454ae05..ec44ac9f3813d 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -63,6 +63,7 @@ def __init__( self.kv_data_count = 0 self.ti_data = bytearray() self.ti_data_count = 0 + self.ti_names = set() self.use_temp_file = use_temp_file self.temp_file = None self.tensors = [] @@ -197,6 +198,10 @@ def add_tensor_info( if self.state is not WriterState.EMPTY: raise ValueError(f'Expected output file to be empty, got {self.state}') + if name in self.ti_names: + raise ValueError(f'Duplicated tensor name {name}') + self.ti_names.add(name) + encoded_name = name.encode("utf8") self.ti_data += self._pack("Q", len(encoded_name)) self.ti_data += encoded_name diff --git a/llama.cpp b/llama.cpp index 49f2b559e965e..3c64622d7c8dc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3120,9 +3120,17 @@ struct llama_model_loader { fver = (enum llama_fver) gguf_get_version(meta); + std::set tensor_names; for (auto & w : weights) { n_elements += ggml_nelements(w.tensor); n_bytes += ggml_nbytes(w.tensor); + // make sure there is no duplicated tensor names + const std::string name(w.tensor->name); + auto found = tensor_names.find(name); + if (found != tensor_names.end()) { + throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", w.tensor->name)); + } + tensor_names.insert(name); } LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",