diff --git a/libai/inference/basic.py b/libai/inference/basic.py
index 752f4578a..4de70d010 100644
--- a/libai/inference/basic.py
+++ b/libai/inference/basic.py
@@ -16,6 +16,7 @@
 import logging
 from abc import ABCMeta, abstractmethod
 from typing import Any, Dict
+from pathlib import Path
 
 import oneflow as flow
 
@@ -62,12 +63,20 @@ def __init__(
             pipeline_num_layers,
         )
         self.device = device
-        if device:
-            self.cfg.train.dist.device_type = device
+        self.cfg.train.dist.device_type = device
         dist.setup_dist_util(self.cfg.train.dist)
         logger.info(self.cfg.train.dist)
 
         # initial and load model
+        self.model_path = model_path
+        if self.model_path is not None:
+            # If a model_path is provided in BasePipeline,
+            # we use it with priority, overwrite the pretrained_model_path in config
+            self.cfg.model.cfg.pretrained_model_path = self.model_path
+        else:
+            # If the model_path in BasePipeline is None, then use the one from the config
+            assert "pretrained_model_path" in self.cfg.model.cfg
+            self.model_path = self.cfg.model.cfg.pretrained_model_path
 
         self.model = self.load_pretrain_weight(self.cfg.model, model_path, mode=mode)
         self.model._apply(dist.convert_to_distributed_default_setting)
@@ -138,6 +147,13 @@ def load_pretrain_weight(
     def build_tokenizer(self, cfg):
         tokenizer = None
         if try_get_key(cfg, "tokenization") is not None:
+            tokenizer_cfg = cfg.tokenization.tokenizer
+            if "pretrained_model_path" not in tokenizer_cfg:
+                # If "pretrained_model_path" does not exist in the tokenizer's config,
+                # set it to default as f"{model_path}/tokenizer.model"
+                tokenizer_cfg.pretrained_model_path = str(
+                    Path(self.model_path).joinpath("tokenizer.model")
+                )
             tokenizer = DefaultTrainer.build_tokenizer(cfg)
         return tokenizer
 
diff --git a/libai/utils/distributed.py b/libai/utils/distributed.py
index f84313fd7..5fab501de 100644
--- a/libai/utils/distributed.py
+++ b/libai/utils/distributed.py
@@ -72,6 +72,22 @@ def _init_distributed_env(self, cfg):
 
         # Add set device type
         self._device_type = try_get_key(cfg, "device_type", default="cuda")
+        if self._device_type == "npu":
+            try:
+                import oneflow_npu
+            except ImportError:
+                raise ImportError(
+                    "The module 'oneflow_npu' is not installed. Please install it to use NPU devices."
+                )
+        elif self._device_type == "xpu":
+            try:
+                import oneflow_xpu
+            except ImportError:
+                raise ImportError(
+                    "The module 'oneflow_xpu' is not installed. Please install it to use XPU devices."
+                )
+        elif self._device_type not in ("cuda", "npu", "xpu", "cpu"):
+            raise NotImplementedError(f"Unsupported device {self._device_type}")
 
     def _init_parallel_size(self, cfg):
 
diff --git a/projects/Llama/README.md b/projects/Llama/README.md
index 8c6dd862d..f58e416c1 100644
--- a/projects/Llama/README.md
+++ b/projects/Llama/README.md
@@ -50,11 +50,11 @@ bash tools/infer.sh projects/Llama/pipeline.py 8
 
 - npu
 ```bash
-python projects/Llama/pipeline.py --device=npu --mode=huggingface --config_file=projects/Llama/configs/llama_config_npu.py
+python projects/Llama/pipeline.py --device=npu --mode=huggingface --model_path /your/model/path
 ```
 
 - xpu
 ```bash
-python projects/Llama/pipeline.py --device=xpu --mode=huggingface --config_file=projects/Llama/configs/llama_config_xpu.py
+python projects/Llama/pipeline.py --device=xpu --mode=huggingface --model_path /your/model/path
 ```
 
diff --git a/projects/Llama/configs/llama_config.py b/projects/Llama/configs/llama_config.py
index 01d208016..36f95d126 100644
--- a/projects/Llama/configs/llama_config.py
+++ b/projects/Llama/configs/llama_config.py
@@ -57,5 +57,5 @@
 tokenization = OmegaConf.create()
 tokenization.make_vocab_size_divisible_by = 1
 tokenization.tokenizer = LazyCall(LlamaTokenizer)(
-    pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
+    # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
 )
diff --git a/projects/Llama/configs/llama_config_npu.py b/projects/Llama/configs/llama_config_npu.py
deleted file mode 100644
index e06ca1bee..000000000
--- a/projects/Llama/configs/llama_config_npu.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from omegaconf import DictConfig, OmegaConf
-
-from libai.config import LazyCall
-from projects.Llama.llama import LlamaForCausalLM
-from projects.Llama.tokenizer import LlamaTokenizer
-from configs.common.train import train
-
-import oneflow_npu
-
-cfg = dict(
-    # Model
-    hidden_act="silu",
-    hidden_size=4096,
-    initializer_range=0.02,
-    intermediate_size=11008,
-    max_position_embeddings=2048,
-    num_attention_heads=32,
-    hidden_layers=32,
-    pretraining_tp=1,
-    rms_norm_eps=1e-05,
-    rope_scaling=None,
-    tie_word_embeddings=False,
-    vocab_size=32000,
-    use_scaled_init_for_output_weights=False,
-    scale_mask_softmax_fusion=False,
-    amp_enabled=True,
-    # Inference
-    is_encoder_decoder=False,
-    max_length=256,
-    min_length=0,
-    do_sample=False,
-    early_stopping=False,
-    num_beams=1,
-    num_beam_groups=1,
-    diversity_penalty=0.0,
-    temperature=0.9,
-    top_k=50,
-    top_p=0.6,
-    typical_p=1.0,
-    repetition_penalty=1.0,
-    length_penalty=1.0,
-    no_repeat_ngram_size=0,
-    encoder_no_repeat_ngram_size=0,
-    num_return_sequences=1,
-    chunk_size_feed_forward=0,
-    output_scores=False,
-    use_cache=True,
-    bos_token_id=1,
-    eos_token_id=2,
-    pad_token_id=0,
-    # train
-    # pretrained_model_path="meta-llama/Llama-2-7b-hf",
-    pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf",
-)
-
-cfg = DictConfig(cfg)
-
-model = LazyCall(LlamaForCausalLM)(cfg=cfg)
-tokenization = OmegaConf.create()
-tokenization.make_vocab_size_divisible_by = 1
-tokenization.tokenizer = LazyCall(LlamaTokenizer)(
-    # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
-    pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf/tokenizer.model"
-)
diff --git a/projects/Llama/configs/llama_config_xpu.py b/projects/Llama/configs/llama_config_xpu.py
deleted file mode 100644
index 0f9fa66c2..000000000
--- a/projects/Llama/configs/llama_config_xpu.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from omegaconf import DictConfig, OmegaConf
-
-from libai.config import LazyCall
-from projects.Llama.llama import LlamaForCausalLM
-from projects.Llama.tokenizer import LlamaTokenizer
-from configs.common.train import train
-
-import oneflow_xpu
-
-cfg = dict(
-    # Model
-    hidden_act="silu",
-    hidden_size=4096,
-    initializer_range=0.02,
-    intermediate_size=11008,
-    max_position_embeddings=2048,
-    num_attention_heads=32,
-    hidden_layers=32,
-    pretraining_tp=1,
-    rms_norm_eps=1e-05,
-    rope_scaling=None,
-    tie_word_embeddings=False,
-    vocab_size=32000,
-    use_scaled_init_for_output_weights=False,
-    scale_mask_softmax_fusion=False,
-    amp_enabled=True,
-    # Inference
-    is_encoder_decoder=False,
-    max_length=256,
-    min_length=0,
-    do_sample=False,
-    early_stopping=False,
-    num_beams=1,
-    num_beam_groups=1,
-    diversity_penalty=0.0,
-    temperature=0.9,
-    top_k=50,
-    top_p=0.6,
-    typical_p=1.0,
-    repetition_penalty=1.0,
-    length_penalty=1.0,
-    no_repeat_ngram_size=0,
-    encoder_no_repeat_ngram_size=0,
-    num_return_sequences=1,
-    chunk_size_feed_forward=0,
-    output_scores=False,
-    use_cache=True,
-    bos_token_id=1,
-    eos_token_id=2,
-    pad_token_id=0,
-    # train
-    # pretrained_model_path="meta-llama/Llama-2-7b-hf",
-    pretrained_model_path="/root/models/Llama-2-7b-chat-hf",
-)
-
-cfg = DictConfig(cfg)
-
-model = LazyCall(LlamaForCausalLM)(cfg=cfg)
-tokenization = OmegaConf.create()
-tokenization.make_vocab_size_divisible_by = 1
-tokenization.tokenizer = LazyCall(LlamaTokenizer)(
-    # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
-    pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model"
-)
diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py
index 3014f6d40..4b65d2895 100644
--- a/projects/Llama/pipeline.py
+++ b/projects/Llama/pipeline.py
@@ -95,7 +95,7 @@ def postprocess(self, model_output_dict, **kwargs) -> dict:
     default="projects/Llama/configs/llama_config.py",
     help="Path to the configuration file.",
 )
-@click.option("--model_path", default="", help="Path to the model checkpoint.")
+@click.option("--model_path", default=None, help="Path to the model checkpoint.")
 @click.option(
     "--mode",
     default="libai",
@@ -105,12 +105,6 @@ def postprocess(self, model_output_dict, **kwargs) -> dict:
     "--device", default="cuda", help="Device to run the model on, e.g., 'cuda', 'xpu', 'npu'."
 )
 def main(config_file, model_path, mode, device):
-    if model_path:
-        print(
-            "Note: The '--model_path' option is for the model checkpoint only. "
-            "Please configure 'tokenization.tokenizer.pretrained_model_path' "
-            "directly in the config file."
-        )
     pipeline = TextGenerationPipeline(
         config_file,
         data_parallel=1,