Skip to content

Commit

Permalink
Merge branch 'main' into shashank/flexattention
Browse files Browse the repository at this point in the history
  • Loading branch information
ShashankMosaicML authored Jan 23, 2025
2 parents 369e818 + 63a733d commit 8a62ca4
Show file tree
Hide file tree
Showing 17 changed files with 46 additions and 15 deletions.
2 changes: 1 addition & 1 deletion llmfoundry/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

"""The LLM Foundry Version."""

__version__ = '0.16.0.dev0'
__version__ = '0.17.0.dev0'
8 changes: 8 additions & 0 deletions llmfoundry/command_utils/data_prep/convert_delta_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,14 @@ def fetch_DT(
message=
f'The data preparation cluster you provided is not usable. Please retry with a cluster that is healthy and alive. {e}',
) from e
if isinstance(
e,
spark_errors.SparkConnectGrpcException,
) and 'do not have permission to attach to cluster' in str(e):
raise FaultyDataPrepCluster(
message=
f'You do not have permission to attach to the data preparation cluster you provided. {e}',
) from e
if isinstance(e, grpc.RpcError) and e.code(
) == grpc.StatusCode.INTERNAL and 'Job aborted due to stage failure' in e.details(
):
Expand Down
7 changes: 7 additions & 0 deletions llmfoundry/data/finetuning/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ class StreamingFinetuningDataset(StreamingDataset):
replication (int, optional): Determines how many consecutive devices will receive the same
samples. Useful for training with tensor or sequence parallelism, where multiple
devices need to see the same partition of the dataset. Defaults to ``None``.
stream_name (str): The name of the Stream to use which is registered in
streaming.base.stream.streams_registry. Defaults to ``stream``.
stream_config (dict[str, Any]): Additional arguments to pass to the Stream constructor.
"""

def __init__(
Expand Down Expand Up @@ -632,6 +635,8 @@ def __init__(
allow_unsafe_types: bool = False,
replication: Optional[int] = None,
packing_ratio: Optional[float] = None,
stream_name: str = 'stream',
stream_config: Optional[dict[str, Any]] = None,
**kwargs: Any,
):

Expand Down Expand Up @@ -675,6 +680,8 @@ def __init__(
batching_method=batching_method,
allow_unsafe_types=allow_unsafe_types,
replication=replication,
stream_name=stream_name,
stream_config=stream_config,
**kwargs,
)

Expand Down
7 changes: 7 additions & 0 deletions llmfoundry/data/text_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ class StreamingTextDataset(StreamingDataset):
replication (int, optional): Determines how many consecutive devices will receive the same
samples. Useful for training with tensor or sequence parallelism, where multiple
devices need to see the same partition of the dataset. Defaults to ``None``.
stream_name (str): The name of the Stream to use which is registered in
streaming.base.stream.streams_registry. Defaults to ``stream``.
stream_config (dict[str, Any]): Additional arguments to pass to the Stream constructor.
"""

def __init__(
Expand Down Expand Up @@ -135,6 +138,8 @@ def __init__(
batching_method: str = 'random',
allow_unsafe_types: bool = False,
replication: Optional[int] = None,
stream_name: str = 'stream',
stream_config: Optional[dict[str, Any]] = None,
**kwargs: Any,
):

Expand Down Expand Up @@ -183,6 +188,8 @@ def __init__(
batching_method=batching_method,
allow_unsafe_types=allow_unsafe_types,
replication=replication,
stream_name=stream_name,
stream_config=stream_config,
**kwargs,
)
self.tokenizer = tokenizer
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-1b-eval.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-1b-max-seq-len-8k.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-1b.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-benchmark-mpt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ image: mosaicml/llm-foundry:2.5.1_cu124-latest
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]

Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-convert-composer-to-hf.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-hf-eval.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-hf-generate.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-llama2-finetune.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-llama3-70b-instruct-finetune.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-openai-eval.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .[gpu,openai]
ssh_clone: false # Should be true if using a private repo
Expand Down
2 changes: 1 addition & 1 deletion mcli/mcli-pretokenize-oci-upload.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ integrations:
- oci-cli==3.23.2
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.15.1
git_branch: v0.16.0
# git_commit: # OR use your commit hash
pip_install: .
ssh_clone: false # Should be true if using a private repo
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@
'mlflow>=2.14.1,<2.19',
'accelerate>=0.25,<1.2', # for HF inference `device_map`
'transformers>=4.43.2,<4.47',
'mosaicml-streaming>=0.10.0,<0.11',
'mosaicml-streaming>=0.11.0,<0.12',
'torch>=2.5.1,<2.5.2',
'datasets>=2.20.0,<3.2',
'datasets>=2.20.0,<3.3',
'fsspec==2023.6.0', # newer version results in a bug in datasets that duplicates data
'sentencepiece==0.2.0',
'einops==0.8.0',
Expand All @@ -79,7 +79,7 @@
extra_deps = {}

extra_deps['dev'] = [
'coverage[toml]==7.6.4',
'coverage[toml]==7.6.10',
'pre-commit>=3.4.0,<4',
'pytest>=7.2.1,<9',
'pytest_codeblocks>=0.16.1,<0.18',
Expand Down
9 changes: 9 additions & 0 deletions tests/a_scripts/data_prep/test_convert_delta_to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,15 @@ def test_fetch_DT_catches_grpc_errors(
'The data preparation cluster you provided is not usable. Please retry with a cluster that is healthy and alive.',
],
),
(
SparkConnectGrpcException(
'do not have permission to attach to cluster etc...',
),
FaultyDataPrepCluster,
[
'You do not have permission to attach to the data preparation cluster you provided.',
],
),
(
grpc_lib_error,
FaultyDataPrepCluster,
Expand Down

0 comments on commit 8a62ca4

Please sign in to comment.