diff --git a/libai/inference/basic.py b/libai/inference/basic.py index 752f4578a..4de70d010 100644 --- a/libai/inference/basic.py +++ b/libai/inference/basic.py @@ -16,6 +16,7 @@ import logging from abc import ABCMeta, abstractmethod from typing import Any, Dict +from pathlib import Path import oneflow as flow @@ -62,12 +63,20 @@ def __init__( pipeline_num_layers, ) self.device = device - if device: - self.cfg.train.dist.device_type = device + self.cfg.train.dist.device_type = device dist.setup_dist_util(self.cfg.train.dist) logger.info(self.cfg.train.dist) # initial and load model + self.model_path = model_path + if self.model_path is not None: + # If a model_path is provided in BasePipeline, + # we use it with priority, overwrite the pretrained_model_path in config + self.cfg.model.cfg.pretrained_model_path = self.model_path + else: + # If the model_path in BasePipeline is None, then use the one from the config + assert "pretrained_model_path" in self.cfg.model.cfg + self.model_path = self.cfg.model.cfg.pretrained_model_path self.model = self.load_pretrain_weight(self.cfg.model, model_path, mode=mode) self.model._apply(dist.convert_to_distributed_default_setting) @@ -138,6 +147,13 @@ def load_pretrain_weight( def build_tokenizer(self, cfg): tokenizer = None if try_get_key(cfg, "tokenization") is not None: + tokenizer_cfg = cfg.tokenization.tokenizer + if "pretrained_model_path" not in tokenizer_cfg: + # If "pretrained_model_path" does not exist in the tokenizer's config, + # set it to default as f"{model_path}/tokenizer.model" + tokenizer_cfg.pretrained_model_path = str( + Path(self.model_path).joinpath("tokenizer.model") + ) tokenizer = DefaultTrainer.build_tokenizer(cfg) return tokenizer diff --git a/libai/utils/distributed.py b/libai/utils/distributed.py index f84313fd7..5fab501de 100644 --- a/libai/utils/distributed.py +++ b/libai/utils/distributed.py @@ -72,6 +72,22 @@ def _init_distributed_env(self, cfg): # Add set device type self._device_type = try_get_key(cfg, "device_type", default="cuda") + if self._device_type == "npu": + try: + import oneflow_npu + except ImportError: + raise ImportError( + "The module 'oneflow_npu' is not installed. Please install it to use NPU devices." + ) + elif self._device_type == "xpu": + try: + import oneflow_xpu + except ImportError: + raise ImportError( + "The module 'oneflow_xpu' is not installed. Please install it to use XPU devices." + ) + elif self._device_type not in ("cuda", "npu", "xpu", "cpu"): + raise NotImplementedError(f"Unsupported device {self._device_type}") def _init_parallel_size(self, cfg): diff --git a/projects/Llama/README.md b/projects/Llama/README.md index 8c6dd862d..f58e416c1 100644 --- a/projects/Llama/README.md +++ b/projects/Llama/README.md @@ -50,11 +50,11 @@ bash tools/infer.sh projects/Llama/pipeline.py 8 - npu ```bash -python projects/Llama/pipeline.py --device=npu --mode=huggingface --config_file=projects/Llama/configs/llama_config_npu.py +python projects/Llama/pipeline.py --device=npu --mode=huggingface --model_path /your/model/path ``` - xpu ```bash -python projects/Llama/pipeline.py --device=xpu --mode=huggingface --config_file=projects/Llama/configs/llama_config_xpu.py +python projects/Llama/pipeline.py --device=xpu --mode=huggingface --model_path /your/model/path ``` diff --git a/projects/Llama/configs/llama_config.py b/projects/Llama/configs/llama_config.py index 01d208016..36f95d126 100644 --- a/projects/Llama/configs/llama_config.py +++ b/projects/Llama/configs/llama_config.py @@ -57,5 +57,5 @@ tokenization = OmegaConf.create() tokenization.make_vocab_size_divisible_by = 1 tokenization.tokenizer = LazyCall(LlamaTokenizer)( - pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" + # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" ) diff --git a/projects/Llama/configs/llama_config_npu.py b/projects/Llama/configs/llama_config_npu.py deleted file mode 100644 index e06ca1bee..000000000 --- a/projects/Llama/configs/llama_config_npu.py +++ /dev/null @@ -1,64 +0,0 @@ -from omegaconf import DictConfig, OmegaConf - -from libai.config import LazyCall -from projects.Llama.llama import LlamaForCausalLM -from projects.Llama.tokenizer import LlamaTokenizer -from configs.common.train import train - -import oneflow_npu - -cfg = dict( - # Model - hidden_act="silu", - hidden_size=4096, - initializer_range=0.02, - intermediate_size=11008, - max_position_embeddings=2048, - num_attention_heads=32, - hidden_layers=32, - pretraining_tp=1, - rms_norm_eps=1e-05, - rope_scaling=None, - tie_word_embeddings=False, - vocab_size=32000, - use_scaled_init_for_output_weights=False, - scale_mask_softmax_fusion=False, - amp_enabled=True, - # Inference - is_encoder_decoder=False, - max_length=256, - min_length=0, - do_sample=False, - early_stopping=False, - num_beams=1, - num_beam_groups=1, - diversity_penalty=0.0, - temperature=0.9, - top_k=50, - top_p=0.6, - typical_p=1.0, - repetition_penalty=1.0, - length_penalty=1.0, - no_repeat_ngram_size=0, - encoder_no_repeat_ngram_size=0, - num_return_sequences=1, - chunk_size_feed_forward=0, - output_scores=False, - use_cache=True, - bos_token_id=1, - eos_token_id=2, - pad_token_id=0, - # train - # pretrained_model_path="meta-llama/Llama-2-7b-hf", - pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf", -) - -cfg = DictConfig(cfg) - -model = LazyCall(LlamaForCausalLM)(cfg=cfg) -tokenization = OmegaConf.create() -tokenization.make_vocab_size_divisible_by = 1 -tokenization.tokenizer = LazyCall(LlamaTokenizer)( - # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" - pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf/tokenizer.model" -) diff --git a/projects/Llama/configs/llama_config_xpu.py b/projects/Llama/configs/llama_config_xpu.py deleted file mode 100644 index 0f9fa66c2..000000000 --- a/projects/Llama/configs/llama_config_xpu.py +++ /dev/null @@ -1,64 +0,0 @@ -from omegaconf import DictConfig, OmegaConf - -from libai.config import LazyCall -from projects.Llama.llama import LlamaForCausalLM -from projects.Llama.tokenizer import LlamaTokenizer -from configs.common.train import train - -import oneflow_xpu - -cfg = dict( - # Model - hidden_act="silu", - hidden_size=4096, - initializer_range=0.02, - intermediate_size=11008, - max_position_embeddings=2048, - num_attention_heads=32, - hidden_layers=32, - pretraining_tp=1, - rms_norm_eps=1e-05, - rope_scaling=None, - tie_word_embeddings=False, - vocab_size=32000, - use_scaled_init_for_output_weights=False, - scale_mask_softmax_fusion=False, - amp_enabled=True, - # Inference - is_encoder_decoder=False, - max_length=256, - min_length=0, - do_sample=False, - early_stopping=False, - num_beams=1, - num_beam_groups=1, - diversity_penalty=0.0, - temperature=0.9, - top_k=50, - top_p=0.6, - typical_p=1.0, - repetition_penalty=1.0, - length_penalty=1.0, - no_repeat_ngram_size=0, - encoder_no_repeat_ngram_size=0, - num_return_sequences=1, - chunk_size_feed_forward=0, - output_scores=False, - use_cache=True, - bos_token_id=1, - eos_token_id=2, - pad_token_id=0, - # train - # pretrained_model_path="meta-llama/Llama-2-7b-hf", - pretrained_model_path="/root/models/Llama-2-7b-chat-hf", -) - -cfg = DictConfig(cfg) - -model = LazyCall(LlamaForCausalLM)(cfg=cfg) -tokenization = OmegaConf.create() -tokenization.make_vocab_size_divisible_by = 1 -tokenization.tokenizer = LazyCall(LlamaTokenizer)( - # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" - pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model" -) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index 3014f6d40..4b65d2895 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -95,7 +95,7 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: default="projects/Llama/configs/llama_config.py", help="Path to the configuration file.", ) -@click.option("--model_path", default="", help="Path to the model checkpoint.") +@click.option("--model_path", default=None, help="Path to the model checkpoint.") @click.option( "--mode", default="libai", @@ -105,12 +105,6 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: "--device", default="cuda", help="Device to run the model on, e.g., 'cuda', 'xpu', 'npu'." ) def main(config_file, model_path, mode, device): - if model_path: - print( - "Note: The '--model_path' option is for the model checkpoint only. " - "Please configure 'tokenization.tokenizer.pretrained_model_path' " - "directly in the config file." - ) pipeline = TextGenerationPipeline( config_file, data_parallel=1,