diff --git a/configs/stories/llama2/100k.json b/configs/stories/llama2/100k.json
index 17515653..601c3809 100644
--- a/configs/stories/llama2/100k.json
+++ b/configs/stories/llama2/100k.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 24,
-    "intermediate_size": 64,
+    "hidden_size": 12,
+    "intermediate_size": 48,
     "num_attention_heads": 2,
     "num_hidden_layers": 1,
     "num_key_value_heads": 1
diff --git a/configs/stories/llama2/10m.json b/configs/stories/llama2/10m.json
index db00936f..224b4674 100644
--- a/configs/stories/llama2/10m.json
+++ b/configs/stories/llama2/10m.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 362,
-    "intermediate_size": 965,
+    "hidden_size": 332,
+    "intermediate_size": 896,
     "num_attention_heads": 12,
     "num_hidden_layers": 6,
     "num_key_value_heads": 6
diff --git a/configs/stories/llama2/1m.json b/configs/stories/llama2/1m.json
index b7a87931..52f1c893 100644
--- a/configs/stories/llama2/1m.json
+++ b/configs/stories/llama2/1m.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 112,
-    "intermediate_size": 298,
+    "hidden_size": 84,
+    "intermediate_size": 256,
     "num_attention_heads": 8,
     "num_hidden_layers": 4,
     "num_key_value_heads": 4
diff --git a/configs/stories/llama2/2.5m.json b/configs/stories/llama2/2.5m.json
index 843158e2..4d55904c 100644
--- a/configs/stories/llama2/2.5m.json
+++ b/configs/stories/llama2/2.5m.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 196,
-    "intermediate_size": 522,
+    "hidden_size": 168,
+    "intermediate_size": 384,
     "num_attention_heads": 8,
     "num_hidden_layers": 4,
     "num_key_value_heads": 4
diff --git a/configs/stories/llama2/250k.json b/configs/stories/llama2/250k.json
index 8a9d9796..7a4ed066 100644
--- a/configs/stories/llama2/250k.json
+++ b/configs/stories/llama2/250k.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 48,
-    "intermediate_size": 128,
+    "hidden_size": 28,
+    "intermediate_size": 96,
     "num_attention_heads": 4,
     "num_hidden_layers": 2,
     "num_key_value_heads": 2
diff --git a/configs/stories/llama2/25m.json b/configs/stories/llama2/25m.json
index 5e22a658..813d2b63 100644
--- a/configs/stories/llama2/25m.json
+++ b/configs/stories/llama2/25m.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 512,
-    "intermediate_size": 1365,
+    "hidden_size": 484,
+    "intermediate_size": 1332,
     "num_attention_heads": 16,
     "num_hidden_layers": 8,
     "num_key_value_heads": 8
diff --git a/configs/stories/llama2/500k.json b/configs/stories/llama2/500k.json
index f08e8126..c4e0ec8e 100644
--- a/configs/stories/llama2/500k.json
+++ b/configs/stories/llama2/500k.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 84,
-    "intermediate_size": 224,
+    "hidden_size": 52,
+    "intermediate_size": 184,
     "num_attention_heads": 4,
     "num_hidden_layers": 2,
     "num_key_value_heads": 2
diff --git a/configs/stories/llama2/50k.json b/configs/stories/llama2/50k.json
index 57eecf05..53afb500 100644
--- a/configs/stories/llama2/50k.json
+++ b/configs/stories/llama2/50k.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 12,
-    "intermediate_size": 32,
+    "hidden_size": 6,
+    "intermediate_size": 24,
     "num_attention_heads": 2,
     "num_hidden_layers": 1,
     "num_key_value_heads": 1
diff --git a/configs/stories/llama2/50m.json b/configs/stories/llama2/50m.json
index 5120c64f..3fa95022 100644
--- a/configs/stories/llama2/50m.json
+++ b/configs/stories/llama2/50m.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 726,
-    "intermediate_size": 1952,
+    "hidden_size": 708,
+    "intermediate_size": 1896,
     "num_attention_heads": 16,
     "num_hidden_layers": 8,
     "num_key_value_heads": 8
diff --git a/configs/stories/llama2/5m.json b/configs/stories/llama2/5m.json
index 30e9dd74..839221f6 100644
--- a/configs/stories/llama2/5m.json
+++ b/configs/stories/llama2/5m.json
@@ -1,7 +1,7 @@
 {
   "model_config": {
-    "hidden_size": 246,
-    "intermediate_size": 656,
+    "hidden_size": 232,
+    "intermediate_size": 512,
     "num_attention_heads": 12,
     "num_hidden_layers": 6,
     "num_key_value_heads": 6