Skip to content

Commit

Permalink
Fix NPU LLM example save/load tokenizer (#12485)
Browse files Browse the repository at this point in the history
  • Loading branch information
JinBridger authored Dec 3, 2024
1 parent 5fe7667 commit 7082844
Show file tree
Hide file tree
Showing 11 changed files with 33 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True,
save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -90,8 +92,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

if args.disable_streaming:
streamer = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
attn_implementation="eager",
transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -88,8 +90,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
pipeline=True,
transpose_value_cache=not args.disable_transpose_value_cache,
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

if args.disable_streaming:
streamer = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
attn_implementation="eager",
transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -94,8 +96,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
pipeline=True,
transpose_value_cache=not args.disable_transpose_value_cache,
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

if args.disable_streaming:
streamer = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@
transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True,
save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -77,8 +79,8 @@
transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

if args.disable_streaming:
streamer = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
mixed_precision=True,
trust_remote_code=True,
save_directory=args.save_directory)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -79,8 +81,8 @@
max_prompt_len=args.max_prompt_len,
pipeline=True,
transpose_value_cache=not args.disable_transpose_value_cache)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

if args.disable_streaming:
streamer = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -90,8 +92,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

DEFAULT_SYSTEM_PROMPT = """\
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
args = parser.parse_args()
model_path = args.repo_id_or_model_path

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

if not args.lowbit_path or not os.path.exists(args.lowbit_path):
model = AutoModelForCausalLM.from_pretrained(
Expand All @@ -52,13 +51,16 @@
load_in_low_bit=args.load_in_low_bit,
attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.lowbit_path)
else:
model = AutoModelForCausalLM.load_low_bit(
args.lowbit_path,
trust_remote_code=True,
bigdl_transformers_low_bit=args.load_in_low_bit,
attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(args.lowbit_path, trust_remote_code=True)

print(model)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -89,8 +91,8 @@ def get_prompt(message: str, chat_history: list[tuple[str, str]],
max_prompt_len=args.max_prompt_len,
transpose_value_cache=not args.disable_transpose_value_cache,
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

DEFAULT_SYSTEM_PROMPT = """\
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -90,8 +92,8 @@ def get_prompt(user_input: str, chat_history: list[tuple[str, str]],
max_prompt_len=args.max_prompt_len,
transpose_value_cache=not args.disable_transpose_value_cache,
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

DEFAULT_SYSTEM_PROMPT = """\
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
transpose_value_cache=not args.disable_transpose_value_cache,
save_directory=args.save_directory
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -76,7 +78,7 @@
transpose_value_cache=not args.disable_transpose_value_cache,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

print("-" * 80)
print("done")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
quantization_group_size=args.quantization_group_size,
save_directory=args.save_directory
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
tokenizer.save_pretrained(args.save_directory)
else:
model = AutoModelForCausalLM.load_low_bit(
args.save_directory,
Expand All @@ -81,8 +83,8 @@
max_prompt_len=args.max_prompt_len,
transpose_value_cache=not args.disable_transpose_value_cache,
)
tokenizer = AutoTokenizer.from_pretrained(args.save_directory, trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

print("-" * 80)
print("done")
Expand Down

0 comments on commit 7082844

Please sign in to comment.