Skip to content

Commit

Permalink
Merge branch 'main' into 105-fix-dataset-download-for-its-tokenization2
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuawe authored Apr 10, 2024
2 parents 6fb0bb3 + cc7a6c8 commit 42012f1
Show file tree
Hide file tree
Showing 13 changed files with 177 additions and 139 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,7 @@ cython_debug/
notebooks/scratch.ipynb

# dsstore
.DS_Store
.DS_Store

# vscode debug configs
**/launch.json
42 changes: 0 additions & 42 deletions .vscode/launch.json

This file was deleted.

4 changes: 2 additions & 2 deletions notebooks/training_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
"metadata": {},
"outputs": [],
"source": [
"from delphi.train.config.utils import get_presets_by_name\n",
"from delphi.train.config.utils import load_preset\n",
"from delphi.train.training import run_training\n",
"from delphi.train.utils import ModelTrainingState\n",
"from delphi.train.run_context import RunContext\n",
"\n",
"\n",
"def train() -> tuple[ModelTrainingState, RunContext]:\n",
" config = get_presets_by_name()[\"v0-llama2-100k\"]\n",
" config = load_preset(\"v0-llama2-100k\")\n",
" config.wandb_config.entity = \"jaiwithani\"\n",
" return run_training(config)\n",
"\n",
Expand Down
24 changes: 8 additions & 16 deletions scripts/run_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from pathlib import Path
from typing import Any

from delphi.train.config import build_config_from_files_and_overrides
from delphi.train.config import (
build_config_from_files_and_overrides,
dot_notation_to_dict,
)
from delphi.train.training import run_training
from delphi.train.utils import save_results

Expand Down Expand Up @@ -52,6 +55,8 @@ def setup_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Train a delphi model")
parser.add_argument(
"--config_files",
"--config_file",
"-c",
help=(
"Path to json file(s) containing config values. Specific values can be overridden with --overrides. "
"e.g. `--config_files primary_config.json secondary_config.json"
Expand All @@ -76,22 +81,9 @@ def setup_parser() -> argparse.ArgumentParser:


def overrides_to_dict(overrides: list[str]) -> dict[str, Any]:
# {"--overrides a.b.c=4 foo=false} to {"a": {"b": {"c": 4}}, "foo": False}
# ["a.b.c=4", "foo=false"] to {"a": {"b": {"c": 4}}, "foo": False}
config_vars = {k: v for k, v in [x.split("=") for x in overrides if "=" in x]}
d = {}
for k, v in config_vars.items():
if v is None:
continue
# the laziest, most dangerous type conversion you've seen today
v = eval(v)
cur = d
subkeys = k.split(".")
for subkey in subkeys[:-1]:
if subkey not in cur:
cur[subkey] = {}
cur = cur[subkey]
cur[subkeys[-1]] = v
return d
return dot_notation_to_dict(config_vars)


def main():
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"priority": -1,
"vocab_size": 4096,
"max_seq_len": 512,
"max_epochs": 10,
Expand Down
4 changes: 3 additions & 1 deletion src/delphi/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from importlib.resources import files
from pathlib import Path
from typing import cast

STATIC_ASSETS_DIR = files("delphi.static")
CONFIG_PRESETS_DIR = STATIC_ASSETS_DIR / "configs"
CONFIG_PRESETS_DIR = cast(Path, STATIC_ASSETS_DIR / "configs")

CORPUS_DATASET = "delphi-suite/stories"
TINYSTORIES_TOKENIZED_HF_DATASET = "delphi-suite/v0-tinystories-v2-clean-tokenized"
1 change: 0 additions & 1 deletion src/delphi/static/configs/debug.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"priority": -1,
"vocab_size": 4096,
"max_seq_len": 512,
"max_epochs": 2,
Expand Down
1 change: 0 additions & 1 deletion src/delphi/static/configs/debug_mamba.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"priority": -1,
"vocab_size": 4096,
"max_seq_len": 512,
"max_epochs": 2,
Expand Down
1 change: 0 additions & 1 deletion src/delphi/static/configs/debug_transformers_bloom.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"priority": -1,
"vocab_size": 4096,
"max_seq_len": 512,
"max_epochs": 2,
Expand Down
4 changes: 1 addition & 3 deletions src/delphi/train/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
from .training_config import TrainingConfig
from .utils import (
build_config_dict_from_files,
build_config_from_files,
build_config_from_files_and_overrides,
get_config_dicts_from_files,
dot_notation_to_dict,
get_preset_paths,
get_presets_by_name,
get_user_config_path,
load_preset,
)
Expand Down
Loading

0 comments on commit 42012f1

Please sign in to comment.