Merge branch 'main' into shashank/flexattention

mosaicml · Jan 23, 2025 · 8a62ca4 · 8a62ca4
2 parents 369e818 + 63a733d
commit 8a62ca4
Show file tree

Hide file tree

Showing 17 changed files with 46 additions and 15 deletions.
diff --git a/llmfoundry/_version.py b/llmfoundry/_version.py
@@ -3,4 +3,4 @@
 
 """The LLM Foundry Version."""
 
-__version__ = '0.16.0.dev0'
+__version__ = '0.17.0.dev0'
diff --git a/llmfoundry/command_utils/data_prep/convert_delta_to_json.py b/llmfoundry/command_utils/data_prep/convert_delta_to_json.py
@@ -730,6 +730,14 @@ def fetch_DT(
                 message=
                 f'The data preparation cluster you provided is not usable. Please retry with a cluster that is healthy and alive. {e}',
             ) from e
+        if isinstance(
+            e,
+            spark_errors.SparkConnectGrpcException,
+        ) and 'do not have permission to attach to cluster' in str(e):
+            raise FaultyDataPrepCluster(
+                message=
+                f'You do not have permission to attach to the data preparation cluster you provided. {e}',
+            ) from e
         if isinstance(e, grpc.RpcError) and e.code(
         ) == grpc.StatusCode.INTERNAL and 'Job aborted due to stage failure' in e.details(
         ):

diff --git a/llmfoundry/data/finetuning/tasks.py b/llmfoundry/data/finetuning/tasks.py
@@ -601,6 +601,9 @@ class StreamingFinetuningDataset(StreamingDataset):
         replication (int, optional): Determines how many consecutive devices will receive the same
             samples. Useful for training with tensor or sequence parallelism, where multiple
             devices need to see the same partition of the dataset. Defaults to ``None``.
+        stream_name (str): The name of the Stream to use which is registered in
+            streaming.base.stream.streams_registry. Defaults to ``stream``.
+        stream_config (dict[str, Any]): Additional arguments to pass to the Stream constructor.
     """
 
     def __init__(
@@ -632,6 +635,8 @@ def __init__(
         allow_unsafe_types: bool = False,
         replication: Optional[int] = None,
         packing_ratio: Optional[float] = None,
+        stream_name: str = 'stream',
+        stream_config: Optional[dict[str, Any]] = None,
         **kwargs: Any,
     ):
 
@@ -675,6 +680,8 @@ def __init__(
             batching_method=batching_method,
             allow_unsafe_types=allow_unsafe_types,
             replication=replication,
+            stream_name=stream_name,
+            stream_config=stream_config,
             **kwargs,
         )
 

diff --git a/llmfoundry/data/text_data.py b/llmfoundry/data/text_data.py
@@ -105,6 +105,9 @@ class StreamingTextDataset(StreamingDataset):
         replication (int, optional): Determines how many consecutive devices will receive the same
             samples. Useful for training with tensor or sequence parallelism, where multiple
             devices need to see the same partition of the dataset. Defaults to ``None``.
+        stream_name (str): The name of the Stream to use which is registered in
+            streaming.base.stream.streams_registry. Defaults to ``stream``.
+        stream_config (dict[str, Any]): Additional arguments to pass to the Stream constructor.
     """
 
     def __init__(
@@ -135,6 +138,8 @@ def __init__(
         batching_method: str = 'random',
         allow_unsafe_types: bool = False,
         replication: Optional[int] = None,
+        stream_name: str = 'stream',
+        stream_config: Optional[dict[str, Any]] = None,
         **kwargs: Any,
     ):
 
@@ -183,6 +188,8 @@ def __init__(
             batching_method=batching_method,
             allow_unsafe_types=allow_unsafe_types,
             replication=replication,
+            stream_name=stream_name,
+            stream_config=stream_config,
             **kwargs,
         )
         self.tokenizer = tokenizer

diff --git a/mcli/mcli-1b-eval.yaml b/mcli/mcli-1b-eval.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit: # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit:  # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit:  # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-benchmark-mpt.yaml b/mcli/mcli-benchmark-mpt.yaml
@@ -11,7 +11,7 @@ image: mosaicml/llm-foundry:2.5.1_cu124-latest
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit: # OR use your commit hash
   pip_install: .[gpu]
 

diff --git a/mcli/mcli-convert-composer-to-hf.yaml b/mcli/mcli-convert-composer-to-hf.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit:  # OR use your commit hash
   pip_install: .
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit:  # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-hf-generate.yaml b/mcli/mcli-hf-generate.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit: # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit: # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-llama3-70b-instruct-finetune.yaml b/mcli/mcli-llama3-70b-instruct-finetune.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit: # OR use your commit hash
   pip_install: .[gpu]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit:  # OR use your commit hash
   pip_install: .[gpu,openai]
   ssh_clone: false  # Should be true if using a private repo

diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml
@@ -14,7 +14,7 @@ integrations:
   - oci-cli==3.23.2
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: v0.15.1
+  git_branch: v0.16.0
   # git_commit: # OR use your commit hash
   pip_install: .
   ssh_clone: false  # Should be true if using a private repo

diff --git a/setup.py b/setup.py
@@ -56,9 +56,9 @@
     'mlflow>=2.14.1,<2.19',
     'accelerate>=0.25,<1.2',  # for HF inference `device_map`
     'transformers>=4.43.2,<4.47',
-    'mosaicml-streaming>=0.10.0,<0.11',
+    'mosaicml-streaming>=0.11.0,<0.12',
     'torch>=2.5.1,<2.5.2',
-    'datasets>=2.20.0,<3.2',
+    'datasets>=2.20.0,<3.3',
     'fsspec==2023.6.0',  # newer version results in a bug in datasets that duplicates data
     'sentencepiece==0.2.0',
     'einops==0.8.0',
@@ -79,7 +79,7 @@
 extra_deps = {}
 
 extra_deps['dev'] = [
-    'coverage[toml]==7.6.4',
+    'coverage[toml]==7.6.10',
     'pre-commit>=3.4.0,<4',
     'pytest>=7.2.1,<9',
     'pytest_codeblocks>=0.16.1,<0.18',

diff --git a/tests/a_scripts/data_prep/test_convert_delta_to_json.py b/tests/a_scripts/data_prep/test_convert_delta_to_json.py
@@ -625,6 +625,15 @@ def test_fetch_DT_catches_grpc_errors(
                     'The data preparation cluster you provided is not usable. Please retry with a cluster that is healthy and alive.',
                 ],
             ),
+            (
+                SparkConnectGrpcException(
+                    'do not have permission to attach to cluster etc...',
+                ),
+                FaultyDataPrepCluster,
+                [
+                    'You do not have permission to attach to the data preparation cluster you provided.',
+                ],
+            ),
             (
                 grpc_lib_error,
                 FaultyDataPrepCluster,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,4 @@

		"""The LLM Foundry Version."""

		__version__ = '0.16.0.dev0'
		__version__ = '0.17.0.dev0'