diff --git a/configs/stories/llama2/100k.json b/configs/stories/llama2/100k.json index 601c3809..57eecf05 100644 --- a/configs/stories/llama2/100k.json +++ b/configs/stories/llama2/100k.json @@ -1,7 +1,7 @@ { "model_config": { "hidden_size": 12, - "intermediate_size": 48, + "intermediate_size": 32, "num_attention_heads": 2, "num_hidden_layers": 1, "num_key_value_heads": 1 diff --git a/configs/stories/llama2/10m.json b/configs/stories/llama2/10m.json index 224b4674..a281c95e 100644 --- a/configs/stories/llama2/10m.json +++ b/configs/stories/llama2/10m.json @@ -1,9 +1,9 @@ { "model_config": { - "hidden_size": 332, - "intermediate_size": 896, - "num_attention_heads": 12, + "hidden_size": 340, + "intermediate_size": 907, + "num_attention_heads": 10, "num_hidden_layers": 6, - "num_key_value_heads": 6 + "num_key_value_heads": 5 } } \ No newline at end of file diff --git a/configs/stories/llama2/1m.json b/configs/stories/llama2/1m.json index 4d3d7cbb..c83c064c 100644 --- a/configs/stories/llama2/1m.json +++ b/configs/stories/llama2/1m.json @@ -1,9 +1,9 @@ { "model_config": { - "hidden_size": 82, - "intermediate_size": 256, - "num_attention_heads": 8, + "hidden_size": 84, + "intermediate_size": 244, + "num_attention_heads": 6, "num_hidden_layers": 4, - "num_key_value_heads": 4 + "num_key_value_heads": 3 } } \ No newline at end of file diff --git a/configs/stories/llama2/2.5m.json b/configs/stories/llama2/2.5m.json index 0be616ec..169c8f67 100644 --- a/configs/stories/llama2/2.5m.json +++ b/configs/stories/llama2/2.5m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 176, - "intermediate_size": 352, + "hidden_size": 160, + "intermediate_size": 426, "num_attention_heads": 8, "num_hidden_layers": 4, "num_key_value_heads": 4 diff --git a/configs/stories/llama2/250k.json b/configs/stories/llama2/250k.json index baa81f2e..3b9f8ca5 100644 --- a/configs/stories/llama2/250k.json +++ b/configs/stories/llama2/250k.json @@ -1,9 +1,9 @@ { "model_config": { - "hidden_size": 30, - "intermediate_size": 68, - "num_attention_heads": 4, + "hidden_size": 28, + "intermediate_size": 75, + "num_attention_heads": 2, "num_hidden_layers": 2, - "num_key_value_heads": 2 + "num_key_value_heads": 1 } } \ No newline at end of file diff --git a/configs/stories/llama2/25m.json b/configs/stories/llama2/25m.json index 813d2b63..95d1edd4 100644 --- a/configs/stories/llama2/25m.json +++ b/configs/stories/llama2/25m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 484, - "intermediate_size": 1332, + "hidden_size": 480, + "intermediate_size": 1280, "num_attention_heads": 16, "num_hidden_layers": 8, "num_key_value_heads": 8 diff --git a/configs/stories/llama2/500k.json b/configs/stories/llama2/500k.json index 060062d8..489cb210 100644 --- a/configs/stories/llama2/500k.json +++ b/configs/stories/llama2/500k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 54, - "intermediate_size": 144, + "hidden_size": 56, + "intermediate_size": 149, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2 diff --git a/configs/stories/llama2/50k.json b/configs/stories/llama2/50k.json index 73060f6f..8a02b571 100644 --- a/configs/stories/llama2/50k.json +++ b/configs/stories/llama2/50k.json @@ -1,8 +1,8 @@ { "model_config": { - "hidden_size": 8, + "hidden_size": 6, "intermediate_size": 16, - "num_attention_heads": 2, + "num_attention_heads": 3, "num_hidden_layers": 1, "num_key_value_heads": 1 } diff --git a/configs/stories/llama2/50m.json b/configs/stories/llama2/50m.json index 3fa95022..8b963c85 100644 --- a/configs/stories/llama2/50m.json +++ b/configs/stories/llama2/50m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 708, - "intermediate_size": 1896, + "hidden_size": 704, + "intermediate_size": 1877, "num_attention_heads": 16, "num_hidden_layers": 8, "num_key_value_heads": 8 diff --git a/configs/stories/llama2/5m.json b/configs/stories/llama2/5m.json index 795cb768..15ffdc0a 100644 --- a/configs/stories/llama2/5m.json +++ b/configs/stories/llama2/5m.json @@ -1,9 +1,9 @@ { "model_config": { "hidden_size": 240, - "intermediate_size": 480, - "num_attention_heads": 12, - "num_hidden_layers": 6, - "num_key_value_heads": 6 + "intermediate_size": 640, + "num_attention_heads": 10, + "num_hidden_layers": 5, + "num_key_value_heads": 5 } } \ No newline at end of file