From aadf2ee0520dea30571105adb2279f650ef85006 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Tue, 26 Nov 2024 14:00:34 -0800
Subject: [PATCH] Use linux.4xlarge.memory instead of linux.12xlarge (#6896)

* Try linux.4xlarge.memory

* Testing

* Ready to land

* More tests

* Forget one line change

* Use linux.8xlarge.memory for llama3_2_text_decoder

* Increase timeout value

* Skip llama3_2_text_decoder because it takes too long to export

* Ready to land
---
 .ci/scripts/gather_test_models.py | 20 +++++++++++---------
 examples/models/__init__.py       |  1 +
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
index 87ed31af3d..d02213b9fa 100755
--- a/.ci/scripts/gather_test_models.py
+++ b/.ci/scripts/gather_test_models.py
@@ -20,16 +20,16 @@
 CUSTOM_RUNNERS = {
     "linux": {
         # This one runs OOM on smaller runner, the root cause is unclear (T163016365)
-        "w2l": "linux.12xlarge",
-        "ic4": "linux.12xlarge",
-        "resnet50": "linux.12xlarge",
-        "llava": "linux.12xlarge",
-        "llama3_2_vision_encoder": "linux.12xlarge",
-        # "llama3_2_text_decoder": "linux.12xlarge",  # TODO: re-enable test when Huy's change is in / model gets smaller.
+        "w2l": "linux.4xlarge.memory",
+        "ic4": "linux.4xlarge.memory",
+        "resnet50": "linux.4xlarge.memory",
+        "llava": "linux.4xlarge.memory",
+        "llama3_2_vision_encoder": "linux.4xlarge.memory",
+        "llama3_2_text_decoder": "linux.4xlarge.memory",
         # This one causes timeout on smaller runner, the root cause is unclear (T161064121)
-        "dl3": "linux.12xlarge",
-        "emformer_join": "linux.12xlarge",
-        "emformer_predict": "linux.12xlarge",
+        "dl3": "linux.4xlarge.memory",
+        "emformer_join": "linux.4xlarge.memory",
+        "emformer_predict": "linux.4xlarge.memory",
     }
 }
 
@@ -39,10 +39,12 @@
     "linux": {
         "mobilebert": 90,
         "emformer_predict": 360,
+        "llama3_2_text_decoder": 360,
     },
     "macos": {
         "mobilebert": 90,
         "emformer_predict": 360,
+        "llama3_2_text_decoder": 360,
     },
 }
 
diff --git a/examples/models/__init__.py b/examples/models/__init__.py
index 706b0105af..c78106668e 100644
--- a/examples/models/__init__.py
+++ b/examples/models/__init__.py
@@ -19,6 +19,7 @@
     "llama2": ("llama", "Llama2Model"),
     "llama": ("llama", "Llama2Model"),
     "llama3_2_vision_encoder": ("llama3_2_vision", "FlamingoVisionEncoderModel"),
+    # TODO: This take too long to export on both Linux and MacOS (> 6 hours)
     # "llama3_2_text_decoder": ("llama3_2_vision", "Llama3_2Decoder"),
     "lstm": ("lstm", "LSTMModel"),
     "mobilebert": ("mobilebert", "MobileBertModelExample"),