Skip to content

Commit

Permalink
convert-*.py: add tensor hash general.hash.sha256 to kv store
Browse files Browse the repository at this point in the history
  • Loading branch information
mofosyne committed Jul 23, 2024
1 parent 751fcfc commit 60d4789
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
6 changes: 6 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,12 @@ def prepare_metadata(self, vocab_only: bool):

self.set_type()

# Generate sha256 based on tensor content if required
if not vocab_only:
hash_sha256 = self.gguf_writer.calculate_tensor_hash_sha256()
self.gguf_writer.add_hash_sha256(hash_sha256)
logger.info(f"tensor hash (sha256): {hash_sha256}")

logger.info("Set meta model")
self.metadata.set_gguf_meta_model(self.gguf_writer)

Expand Down
3 changes: 3 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class General:
ALIGNMENT = "general.alignment"
FILE_TYPE = "general.file_type"

# Tensor Hash
HASH_SHA256 = "general.hash.sha256"

# Authorship Metadata
NAME = "general.name"
AUTHOR = "general.author"
Expand Down
16 changes: 16 additions & 0 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import os
import hashlib
import shutil
import struct
import tempfile
Expand Down Expand Up @@ -417,6 +418,18 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:

self.state = WriterState.WEIGHTS

def calculate_tensor_hash_sha256(self) -> str:
sha256 = hashlib.sha256()

for tensors in self.tensors:
# relying on the fact that Python dicts preserve insertion order (since 3.7)
for _, ti in tensors.items():
assert ti.tensor is not None
assert ti.tensor.nbytes == ti.nbytes
sha256.update(ti.tensor.tobytes('C'))

return sha256.hexdigest()

def write_tensors_to_file(self, *, progress: bool = False) -> None:
self.write_ti_data_to_file()

Expand Down Expand Up @@ -491,6 +504,9 @@ def add_custom_alignment(self, alignment: int) -> None:
def add_file_type(self, ftype: int) -> None:
self.add_uint32(Keys.General.FILE_TYPE, ftype)

def add_hash_sha256(self, hash: str) -> None:
self.add_string(Keys.General.HASH_SHA256, hash)

def add_name(self, name: str) -> None:
self.add_string(Keys.General.NAME, name)

Expand Down

0 comments on commit 60d4789

Please sign in to comment.