Skip to content

Commit

Permalink
new config
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmeda14960 committed Sep 11, 2024
1 parent 0bfcf1c commit 12025a6
Showing 1 changed file with 32 additions and 13 deletions.
45 changes: 32 additions & 13 deletions src/levanter/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,19 +112,38 @@ def from_hf_config(cls, hf_config: HfConfig):
print(f'\n hf_config: {hf_config}')
print(type(hf_config))
print(dir(hf_config))
return LlamaConfig(
seq_len=hf_config.max_position_embeddings,
hidden_dim=hf_config.hidden_size,
intermediate_dim=hf_config.intermediate_size,
num_layers=hf_config.num_hidden_layers,
num_heads=hf_config.num_attention_heads,
num_kv_heads=hf_config.num_key_value_heads,
activation_function=hf_config.hidden_act,
initializer_range=hf_config.initializer_range,
layer_norm_epsilon=hf_config.rms_norm_eps,
rope_scaling=hf_config.rope_scaling,
rope_theta=hf_config.rope_theta,
)

if 'olmo' in hf_config._name_or_path:
return LlamaConfig(
seq_len=hf_config.max_position_embeddings,
hidden_dim=hf_config.hidden_size,
intermediate_dim=hf_config.intermediate_size,
num_layers=hf_config.num_hidden_layers,
num_heads=hf_config.num_attention_heads,
num_kv_heads=hf_config.num_key_value_heads,
activation_function=hf_config.hidden_act,
initializer_range=hf_config.initializer_range,
layer_norm_epsilon=0,
use_layer_norm_weight=False,
use_bias=False,
rope_scaling=hf_config.rope_scaling,
rope_theta=hf_config.rope_theta,
)

else:
return LlamaConfig(
seq_len=hf_config.max_position_embeddings,
hidden_dim=hf_config.hidden_size,
intermediate_dim=hf_config.intermediate_size,
num_layers=hf_config.num_hidden_layers,
num_heads=hf_config.num_attention_heads,
num_kv_heads=hf_config.num_key_value_heads,
activation_function=hf_config.hidden_act,
initializer_range=hf_config.initializer_range,
layer_norm_epsilon=hf_config.rms_norm_eps,
rope_scaling=hf_config.rope_scaling,
rope_theta=hf_config.rope_theta,
)

def to_hf_config(self, vocab_size: int, config_overrides: Optional[Dict] = None) -> HfLlamaConfig:
"""Convert to HuggingFace's LlamaConfig
Expand Down

0 comments on commit 12025a6

Please sign in to comment.