From 64c67c6984f1f6eee18a440135a4ac0142a5b368 Mon Sep 17 00:00:00 2001 From: JaiDhyani Date: Sat, 20 Apr 2024 15:28:57 -0700 Subject: [PATCH] Updating test configs to work with recent changes --- configs/debug.json | 5 ---- configs/debug_mamba.json | 5 ---- configs/debug_transformers_bloom.json | 5 ---- configs/sample_config.json | 1 - configs/sample_mamba.json | 1 - configs/sample_transformers_bloom.json | 2 -- src/delphi/test_configs/debug.json | 6 ++--- src/delphi/test_configs/debug_mamba.json | 10 ++++---- .../debug_transformers_bloom.json | 10 ++++---- src/delphi/test_configs/v0-llama2-1.6m.json | 24 ------------------- .../test_configs/v0-llama2-100k-quick.json | 24 ------------------- src/delphi/test_configs/v0-llama2-100k.json | 6 +++++ src/delphi/test_configs/v0-llama2-12.8m.json | 24 ------------------- src/delphi/test_configs/v0-llama2-200k.json | 24 ------------------- src/delphi/test_configs/v0-llama2-25.6m.json | 24 ------------------- src/delphi/test_configs/v0-llama2-3.2m.json | 24 ------------------- src/delphi/test_configs/v0-llama2-400k.json | 24 ------------------- src/delphi/test_configs/v0-llama2-6.4m.json | 24 ------------------- src/delphi/test_configs/v0-llama2-800k.json | 24 ------------------- tests/train/config/test_config_utils.py | 6 +++++ 20 files changed, 25 insertions(+), 248 deletions(-) delete mode 100644 src/delphi/test_configs/v0-llama2-1.6m.json delete mode 100644 src/delphi/test_configs/v0-llama2-100k-quick.json delete mode 100644 src/delphi/test_configs/v0-llama2-12.8m.json delete mode 100644 src/delphi/test_configs/v0-llama2-200k.json delete mode 100644 src/delphi/test_configs/v0-llama2-25.6m.json delete mode 100644 src/delphi/test_configs/v0-llama2-3.2m.json delete mode 100644 src/delphi/test_configs/v0-llama2-400k.json delete mode 100644 src/delphi/test_configs/v0-llama2-6.4m.json delete mode 100644 src/delphi/test_configs/v0-llama2-800k.json diff --git a/configs/debug.json b/configs/debug.json index 0aa6abed..b8e2b9b6 100644 --- a/configs/debug.json +++ b/configs/debug.json @@ -1,12 +1,7 @@ { - "vocab_size": 4096, "max_seq_len": 512, "max_epochs": 2, - "eval_interval": 1, "eval_iters": 1, - "data_config": { - "train_sample_limit": 256 - }, "batch_size": 64, "model_config": { "model_class": "LlamaForCausalLM", diff --git a/configs/debug_mamba.json b/configs/debug_mamba.json index 8f502135..37de97e3 100644 --- a/configs/debug_mamba.json +++ b/configs/debug_mamba.json @@ -1,13 +1,8 @@ { - "vocab_size": 4096, "max_seq_len": 512, "max_epochs": 2, - "eval_interval": 1, "log_interval": 1, "eval_iters": 10, - "data_config": { - "train_sample_limit": 64 - }, "batch_size": 8, "model_config": { "model_class": "MambaForCausalLM", diff --git a/configs/debug_transformers_bloom.json b/configs/debug_transformers_bloom.json index 8ec0f5b0..fd1385c2 100644 --- a/configs/debug_transformers_bloom.json +++ b/configs/debug_transformers_bloom.json @@ -1,12 +1,7 @@ { - "vocab_size": 4096, "max_seq_len": 512, "max_epochs": 2, - "eval_interval": 1, "eval_iters": 1, - "data_config": { - "train_sample_limit": 256 - }, "batch_size": 64, "model_config": { "model_class": "BloomForCausalLM", diff --git a/configs/sample_config.json b/configs/sample_config.json index 3f326687..6a100764 100644 --- a/configs/sample_config.json +++ b/configs/sample_config.json @@ -2,7 +2,6 @@ "run_name": "2024_03_15_17_28_14", "output_dir": "/Users/jaidhyani/Library/Application Support/delphi", "device": "auto", - "eval_interval": 2000, "log_interval": 1, "eval_iters": 100, "eval_only": false, diff --git a/configs/sample_mamba.json b/configs/sample_mamba.json index da089c4f..8a3cb2bb 100644 --- a/configs/sample_mamba.json +++ b/configs/sample_mamba.json @@ -2,7 +2,6 @@ "run_name": "2024_03_15_21_56_35", "output_dir": "/Users/jaidhyani/Library/Application Support/delphi", "device": "auto", - "eval_interval": 2000, "log_interval": 1, "eval_iters": 100, "eval_only": false, diff --git a/configs/sample_transformers_bloom.json b/configs/sample_transformers_bloom.json index 9a81ac89..9fdc04ed 100644 --- a/configs/sample_transformers_bloom.json +++ b/configs/sample_transformers_bloom.json @@ -1,8 +1,6 @@ { - "vocab_size": 4096, "max_seq_len": 512, "max_epochs": 10, - "eval_interval": 10, "eval_iters": 8, "batch_size": 64, "model_config": { diff --git a/src/delphi/test_configs/debug.json b/src/delphi/test_configs/debug.json index e0037e43..bdfd6308 100644 --- a/src/delphi/test_configs/debug.json +++ b/src/delphi/test_configs/debug.json @@ -2,9 +2,6 @@ "max_seq_len": 512, "max_epochs": 2, "eval_iters": 1, - "dataset": { - "name": "delphi-suite/stories-tokenized" - }, "batch_ordering_seed": 42, "torch_seed": 1337, "batch_size": 64, @@ -16,5 +13,8 @@ "num_hidden_layers": 2, "num_key_value_heads": 2, "vocab_size": 4096 + }, + "dataset": { + "name": "delphi-suite/v0-tinystories-v2-clean-tokenized" } } \ No newline at end of file diff --git a/src/delphi/test_configs/debug_mamba.json b/src/delphi/test_configs/debug_mamba.json index 8f502135..7fddcb26 100644 --- a/src/delphi/test_configs/debug_mamba.json +++ b/src/delphi/test_configs/debug_mamba.json @@ -1,13 +1,8 @@ { - "vocab_size": 4096, "max_seq_len": 512, "max_epochs": 2, - "eval_interval": 1, "log_interval": 1, "eval_iters": 10, - "data_config": { - "train_sample_limit": 64 - }, "batch_size": 8, "model_config": { "model_class": "MambaForCausalLM", @@ -18,5 +13,10 @@ "conv_kernel": 2, "expand": 2, "time_step_rank": 2 + }, + "batch_ordering_seed": 42, + "torch_seed": 1337, + "dataset": { + "name": "delphi-suite/v0-tinystories-v2-clean-tokenized" } } \ No newline at end of file diff --git a/src/delphi/test_configs/debug_transformers_bloom.json b/src/delphi/test_configs/debug_transformers_bloom.json index 8ec0f5b0..793f6a8a 100644 --- a/src/delphi/test_configs/debug_transformers_bloom.json +++ b/src/delphi/test_configs/debug_transformers_bloom.json @@ -1,12 +1,7 @@ { - "vocab_size": 4096, "max_seq_len": 512, "max_epochs": 2, - "eval_interval": 1, "eval_iters": 1, - "data_config": { - "train_sample_limit": 256 - }, "batch_size": 64, "model_config": { "model_class": "BloomForCausalLM", @@ -24,5 +19,10 @@ "slow_but_exact": false, "use_cache": true, "vocab_size": 4096 + }, + "batch_ordering_seed": 42, + "torch_seed": 1337, + "dataset": { + "name": "delphi-suite/v0-tinystories-v2-clean-tokenized" } } \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-1.6m.json b/src/delphi/test_configs/v0-llama2-1.6m.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-1.6m.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-100k-quick.json b/src/delphi/test_configs/v0-llama2-100k-quick.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-100k-quick.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-100k.json b/src/delphi/test_configs/v0-llama2-100k.json index 95261935..b89872ce 100644 --- a/src/delphi/test_configs/v0-llama2-100k.json +++ b/src/delphi/test_configs/v0-llama2-100k.json @@ -1,4 +1,5 @@ { + "max_seq_len": 512, "model_config": { "model_class": "LlamaForCausalLM", "attention_bias": false, @@ -20,5 +21,10 @@ "tie_word_embeddings": true, "use_cache": true, "vocab_size": 4096 + }, + "batch_ordering_seed": 42, + "torch_seed": 1337, + "dataset": { + "name": "delphi-suite/v0-tinystories-v2-clean-tokenized" } } \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-12.8m.json b/src/delphi/test_configs/v0-llama2-12.8m.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-12.8m.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-200k.json b/src/delphi/test_configs/v0-llama2-200k.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-200k.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-25.6m.json b/src/delphi/test_configs/v0-llama2-25.6m.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-25.6m.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-3.2m.json b/src/delphi/test_configs/v0-llama2-3.2m.json deleted file mode 100644 index 7a2c9689..00000000 --- a/src/delphi/test_configs/v0-llama2-3.2m.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 0.00001, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-400k.json b/src/delphi/test_configs/v0-llama2-400k.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-400k.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-6.4m.json b/src/delphi/test_configs/v0-llama2-6.4m.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-6.4m.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/src/delphi/test_configs/v0-llama2-800k.json b/src/delphi/test_configs/v0-llama2-800k.json deleted file mode 100644 index 95261935..00000000 --- a/src/delphi/test_configs/v0-llama2-800k.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "model_config": { - "model_class": "LlamaForCausalLM", - "attention_bias": false, - "attention_dropout": 0.0, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_act": "silu", - "hidden_size": 48, - "initializer_range": 0.02, - "intermediate_size": 128, - "max_position_embeddings": 512, - "num_attention_heads": 8, - "num_hidden_layers": 4, - "num_key_value_heads": 4, - "pretraining_tp": 1, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "tie_word_embeddings": true, - "use_cache": true, - "vocab_size": 4096 - } -} \ No newline at end of file diff --git a/tests/train/config/test_config_utils.py b/tests/train/config/test_config_utils.py index b0931fe6..6639e503 100644 --- a/tests/train/config/test_config_utils.py +++ b/tests/train/config/test_config_utils.py @@ -12,6 +12,12 @@ ) +def test_configs(): + test_configs = list(TEST_CONFIGS_DIR.glob("*.json")) + for config in test_configs: + build_config_from_files_and_overrides([config], {}) + + def test_merge_two_dicts(): dict1 = {"a": 1, "b": 2, "c": {"d": 3, "e": 4}} dict2 = {"a": 5, "c": {"d": 6}}