diff --git a/configs/stories/llama2/100k.json b/configs/stories/llama2/100k.json index 17515653..601c3809 100644 --- a/configs/stories/llama2/100k.json +++ b/configs/stories/llama2/100k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 24, - "intermediate_size": 64, + "hidden_size": 12, + "intermediate_size": 48, "num_attention_heads": 2, "num_hidden_layers": 1, "num_key_value_heads": 1 diff --git a/configs/stories/llama2/10m.json b/configs/stories/llama2/10m.json index db00936f..224b4674 100644 --- a/configs/stories/llama2/10m.json +++ b/configs/stories/llama2/10m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 362, - "intermediate_size": 965, + "hidden_size": 332, + "intermediate_size": 896, "num_attention_heads": 12, "num_hidden_layers": 6, "num_key_value_heads": 6 diff --git a/configs/stories/llama2/1m.json b/configs/stories/llama2/1m.json index b7a87931..52f1c893 100644 --- a/configs/stories/llama2/1m.json +++ b/configs/stories/llama2/1m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 112, - "intermediate_size": 298, + "hidden_size": 84, + "intermediate_size": 256, "num_attention_heads": 8, "num_hidden_layers": 4, "num_key_value_heads": 4 diff --git a/configs/stories/llama2/2.5m.json b/configs/stories/llama2/2.5m.json index 843158e2..4d55904c 100644 --- a/configs/stories/llama2/2.5m.json +++ b/configs/stories/llama2/2.5m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 196, - "intermediate_size": 522, + "hidden_size": 168, + "intermediate_size": 384, "num_attention_heads": 8, "num_hidden_layers": 4, "num_key_value_heads": 4 diff --git a/configs/stories/llama2/250k.json b/configs/stories/llama2/250k.json index 8a9d9796..7a4ed066 100644 --- a/configs/stories/llama2/250k.json +++ b/configs/stories/llama2/250k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 48, - "intermediate_size": 128, + "hidden_size": 28, + "intermediate_size": 96, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2 diff --git a/configs/stories/llama2/25m.json b/configs/stories/llama2/25m.json index 5e22a658..813d2b63 100644 --- a/configs/stories/llama2/25m.json +++ b/configs/stories/llama2/25m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 512, - "intermediate_size": 1365, + "hidden_size": 484, + "intermediate_size": 1332, "num_attention_heads": 16, "num_hidden_layers": 8, "num_key_value_heads": 8 diff --git a/configs/stories/llama2/500k.json b/configs/stories/llama2/500k.json index f08e8126..c4e0ec8e 100644 --- a/configs/stories/llama2/500k.json +++ b/configs/stories/llama2/500k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 84, - "intermediate_size": 224, + "hidden_size": 52, + "intermediate_size": 184, "num_attention_heads": 4, "num_hidden_layers": 2, "num_key_value_heads": 2 diff --git a/configs/stories/llama2/50k.json b/configs/stories/llama2/50k.json index 57eecf05..53afb500 100644 --- a/configs/stories/llama2/50k.json +++ b/configs/stories/llama2/50k.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 12, - "intermediate_size": 32, + "hidden_size": 6, + "intermediate_size": 24, "num_attention_heads": 2, "num_hidden_layers": 1, "num_key_value_heads": 1 diff --git a/configs/stories/llama2/50m.json b/configs/stories/llama2/50m.json index 5120c64f..3fa95022 100644 --- a/configs/stories/llama2/50m.json +++ b/configs/stories/llama2/50m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 726, - "intermediate_size": 1952, + "hidden_size": 708, + "intermediate_size": 1896, "num_attention_heads": 16, "num_hidden_layers": 8, "num_key_value_heads": 8 diff --git a/configs/stories/llama2/5m.json b/configs/stories/llama2/5m.json index 30e9dd74..839221f6 100644 --- a/configs/stories/llama2/5m.json +++ b/configs/stories/llama2/5m.json @@ -1,7 +1,7 @@ { "model_config": { - "hidden_size": 246, - "intermediate_size": 656, + "hidden_size": 232, + "intermediate_size": 512, "num_attention_heads": 12, "num_hidden_layers": 6, "num_key_value_heads": 6