From c24291efea95e049512facc921cc7346d8429e43 Mon Sep 17 00:00:00 2001 From: zsw256 Date: Fri, 1 Mar 2024 03:25:21 +0000 Subject: [PATCH] fix path --- projects/Llama/adapter/adapter_config.py | 2 +- projects/Llama/adapter/adapter_sft.py | 1 + projects/Llama/configs/llama_config.py | 2 +- projects/Llama/configs/llama_sft.py | 1 + projects/Llama/readme.md | 2 +- projects/Llama/utils/prepare_alpaca.py | 5 +++++ 6 files changed, 10 insertions(+), 3 deletions(-) diff --git a/projects/Llama/adapter/adapter_config.py b/projects/Llama/adapter/adapter_config.py index 7381e64af..80f13cb71 100644 --- a/projects/Llama/adapter/adapter_config.py +++ b/projects/Llama/adapter/adapter_config.py @@ -11,7 +11,7 @@ hidden_size=4096, initializer_range=0.02, intermediate_size=11008, - max_position_embeddings=4096, + max_position_embeddings=2048, num_attention_heads=32, hidden_layers=32, pretraining_tp=1, diff --git a/projects/Llama/adapter/adapter_sft.py b/projects/Llama/adapter/adapter_sft.py index e95e012bb..50e7512cb 100644 --- a/projects/Llama/adapter/adapter_sft.py +++ b/projects/Llama/adapter/adapter_sft.py @@ -39,6 +39,7 @@ # model cfg.use_cache = False +cfg.pretrained_model_path=pretrained_model_path model = LazyCall(LlamaForCausalLM)(cfg=cfg) # datasets diff --git a/projects/Llama/configs/llama_config.py b/projects/Llama/configs/llama_config.py index 58b86ecd6..01d208016 100644 --- a/projects/Llama/configs/llama_config.py +++ b/projects/Llama/configs/llama_config.py @@ -12,7 +12,7 @@ hidden_size=4096, initializer_range=0.02, intermediate_size=11008, - max_position_embeddings=4096, + max_position_embeddings=2048, num_attention_heads=32, hidden_layers=32, pretraining_tp=1, diff --git a/projects/Llama/configs/llama_sft.py b/projects/Llama/configs/llama_sft.py index e767d84d7..2185835de 100644 --- a/projects/Llama/configs/llama_sft.py +++ b/projects/Llama/configs/llama_sft.py @@ -39,6 +39,7 @@ ) # model +cfg.pretrained_model_path = pretrained_model_path model = LazyCall(LlamaForCausalLM)(cfg=cfg) # datasets diff --git a/projects/Llama/readme.md b/projects/Llama/readme.md index 9adb3d925..d4a086fc5 100644 --- a/projects/Llama/readme.md +++ b/projects/Llama/readme.md @@ -33,7 +33,7 @@ bash tools/train.sh projects/Llama/adapter/train_net.py projects/Llama/adapter/a ## Evaluate -> set the eval parameters in `/data/home/xiezipeng/libai/projects/Llama/utils/eval_adapter.py`, and running: +> set the eval parameters in `libai/projects/Llama/utils/eval_adapter.py`, and running: ```python3 python projects/Llama/utils/eval_adapter.py ``` diff --git a/projects/Llama/utils/prepare_alpaca.py b/projects/Llama/utils/prepare_alpaca.py index c21f505fb..25a0682fd 100644 --- a/projects/Llama/utils/prepare_alpaca.py +++ b/projects/Llama/utils/prepare_alpaca.py @@ -13,7 +13,9 @@ from libai.config import instantiate from libai.utils.logger import setup_logger +from libai.config import LazyCall from projects.Llama.configs.llama_config import tokenization +from projects.Llama.tokenizer import LlamaTokenizer logger = setup_logger() @@ -46,6 +48,9 @@ def prepare( data = json.load(file) logger.info("Loading tokenizer...") + tokenization.tokenizer = LazyCall(LlamaTokenizer)( + pretrained_model_path=os.path.join(checkpoint_dir, "tokenizer.model") + ) tokenizer = instantiate(tokenization.tokenizer) # Partition the dataset into train and test