diff --git a/src/levanter/compat/hf_checkpoints.py b/src/levanter/compat/hf_checkpoints.py index 49e020d77..5822c3fba 100644 --- a/src/levanter/compat/hf_checkpoints.py +++ b/src/levanter/compat/hf_checkpoints.py @@ -30,7 +30,6 @@ import haliax from haliax import Axis -from haliax._src.state_dict import ModuleWithStateDictSerialization from haliax.partitioning import ResourceMapping from haliax.state_dict import from_torch_compatible_state_dict, save_state_dict, to_torch_compatible_state_dict @@ -129,7 +128,7 @@ def hf_checkpoint_converter(cls) -> "HFCheckpointConverter": MConfig = TypeVar("MConfig", bound=HFCompatConfig) -class ModelWithHfSerializationMixin(Generic[MConfig], ModuleWithStateDictSerialization): +class ModelWithHfSerializationMixin(Generic[MConfig]): def get_hf_config(self): return self.config.to_hf_config(self.Vocab.size) diff --git a/tests/test_hf_gpt2_serialize.py b/tests/test_hf_gpt2_serialize.py index bf92088ae..24d87ce0b 100644 --- a/tests/test_hf_gpt2_serialize.py +++ b/tests/test_hf_gpt2_serialize.py @@ -205,8 +205,8 @@ def test_hf_save_to_fs_spec(): loaded_model = converter.load_pretrained(Gpt2LMHeadModel, ref=f"{tmpdir}/test") - simple_dict = simple_model.to_state_dict() - loaded_dict = loaded_model.to_state_dict() + simple_dict = hax.state_dict.to_torch_compatible_state_dict(simple_model) + loaded_dict = hax.state_dict.to_torch_compatible_state_dict(loaded_model) assert simple_dict.keys() == loaded_dict.keys()