instructlab · fabiendupont · Oct 10, 2024
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -27,7 +27,7 @@ on:
       - '.github/**'
 
 env:
-  PYTHON_VERSION: 3.11
+  PYTHON_VERSION: 3.12
 
 jobs:
   lint:
@@ -45,10 +45,10 @@ jobs:
           fetch-depth: 0
           submodules: true
 
-      - name: Setup Python 3.11
+      - name: Setup Python 3.12
         uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
         with:
-          python-version: 3.11
+          python-version: 3.12
           cache: pip
           cache-dependency-path: |
             **/pyproject.toml

diff --git a/src/instructlab/dolomite/hf_models/__init__.py b/src/instructlab/dolomite/hf_models/__init__.py
@@ -2,9 +2,9 @@
 # Extracted from https://github.com/ibm-granite/dolomite-engine
 # ----------------------------------------------------------------
 # Local
-from .models.gpt_dolomite.config import GPTDolomiteConfig
 from .model_conversion import export_to_huggingface, import_from_huggingface
 from .models import GPTDolomiteForCausalLM, GPTDolomiteModel
+from .models.gpt_dolomite.config import GPTDolomiteConfig
 from .register_hf import register_model_classes
 
 register_model_classes()
diff --git a/src/instructlab/dolomite/hf_models/config.py b/src/instructlab/dolomite/hf_models/config.py
@@ -1,5 +1,7 @@
+# Third Party
 from transformers import PretrainedConfig
 
+# Local
 from .enums import AttentionHeadType, InitMethod, PositionEmbeddingType
 
 
@@ -98,7 +100,9 @@ def __init__(
             if self.num_key_value_heads is None:
                 self.num_key_value_heads = 1
 
-            assert self.num_key_value_heads == 1, "MultiQueryAttention should have 1 head for keys and values"
+            assert (
+                self.num_key_value_heads == 1
+            ), "MultiQueryAttention should have 1 head for keys and values"
         elif attention_head_type == AttentionHeadType.gqa:
             assert (
                 self.num_key_value_heads is not None
@@ -108,4 +112,9 @@ def __init__(
                 self.n_head % self.num_key_value_heads == 0
             ), "GroupedQueryAttention should have more than 1 head for keys and values"
 
-        super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, pad_token_id=pad_token_id, **kwargs)
+        super().__init__(
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            pad_token_id=pad_token_id,
+            **kwargs,
+        )
diff --git a/src/instructlab/dolomite/hf_models/enums.py b/src/instructlab/dolomite/hf_models/enums.py
@@ -1,3 +1,4 @@
+# Standard
 from enum import Enum
 
 

diff --git a/src/instructlab/dolomite/hf_models/mixins/__init__.py b/src/instructlab/dolomite/hf_models/mixins/__init__.py
@@ -1,4 +1,7 @@
+# Local
 from .dense import BaseModelMixin, CausalLMModelMixin, PreTrainedModelMixin
-#from .dense_TP import BaseModelMixin_TP, CausalLMModelMixin_TP, PreTrainedModelMixin_TP
+
+# from .dense_TP import BaseModelMixin_TP, CausalLMModelMixin_TP, PreTrainedModelMixin_TP
 from .moe import BaseMoEModelMixin, CausalLMMoEModelMixin, PreTrainedMoEModelMixin
-#from .moe_TP import BaseMoEModelMixin_TP, CausalLMMoEModelMixin_TP, PreTrainedMoEModelMixin_TP
+
+# from .moe_TP import BaseMoEModelMixin_TP, CausalLMMoEModelMixin_TP, PreTrainedMoEModelMixin_TP
diff --git a/src/instructlab/dolomite/hf_models/mixins/dense/__init__.py b/src/instructlab/dolomite/hf_models/mixins/dense/__init__.py
@@ -1,2 +1,3 @@
+# Local
 from .base import BaseModelMixin, PreTrainedModelMixin
 from .main import CausalLMModelMixin