foundation-model-stack · Abhishek-TAMU · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 8, 2025
@@ -22,6 +22,7 @@
 from .gpt_bigcode import GPTBigCodeGPTQ
 from .gpt_neox import GPTNeoXGPTQ
 from .granite import GraniteGPTQ
+from .granitemoe import GraniteMoeGPTQ
 from .llama import LlamaGPTQ
 from .mistral import MistralGPTQ
 from .mixtral import MixtralGPTQ
@@ -28,6 +28,7 @@
     "granite",
     "gemma",
     "dbrx_converted",
+    "granitemoe"
 ]
 
 EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048

@@ -29,6 +29,7 @@
 from .gpt_bigcode import GPTBigCodeGPTQ
 from .gpt_neox import GPTNeoXGPTQ
 from .granite import GraniteGPTQ
+from .granitemoe import GraniteMoeGPTQ
 from .llama import LlamaGPTQ
 from .mistral import MistralGPTQ
 from .mixtral import MixtralGPTQ
@@ -43,6 +44,7 @@
     "granite": GraniteGPTQ,
     "dbrx": DbrxGPTQ,
     "dbrx_converted": DbrxConvertedGPTQ,
+    "granitemoe": GraniteMoeGPTQ
 }
 
 at_least_one_cuda_v6 = any(

@@ -558,7 +558,7 @@ def save_quantized(
         self.quantize_config.meta_set_versionable(
             key=META_FIELD_QUANTIZER,
             value=META_QUANTIZER_GPTQMODEL,
-            version=__version__,
+            version="1.0.0",
         )
 
         # The config, quantize_config and model may be edited in place in save_quantized.

@@ -0,0 +1,30 @@
+###############################################################################
+# Adapted from https://github.com/ModelCloud/GPTQModel
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+# Local
+from .base import BaseGPTQModel
+
+
+class GraniteMoeGPTQ(BaseGPTQModel):
+    base_modules = ["model.embed_tokens", "model.norm"]
+
+    layers_node = "model.layers"
+    layer_type = "GraniteMoeDecoderLayer"
+    layer_modules = [
+        ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
+        ["self_attn.o_proj"],
+        ["block_sparse_moe.input_linear", "block_sparse_moe.output_linear"],
+        ["input_layernorm", "post_attention_layernorm"]
+    ]