Skip to content

Commit

Permalink
change 5 pytorch/huggingface models to fp16 (intel-analytics#11894)
Browse files Browse the repository at this point in the history
  • Loading branch information
JinheTang authored Aug 22, 2024
1 parent 5c4ed00 commit 18662dc
Show file tree
Hide file tree
Showing 7 changed files with 7 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
optimize_model=False,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = CodeLlamaTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
optimize_model=False,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
2 changes: 1 addition & 1 deletion python/llm/example/GPU/HuggingFace/LLM/solar/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
load_in_4bit=True,
trust_remote_code=True,
use_cache=True)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
model = optimize_model(model)

model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = CodeLlamaTokenizer.from_pretrained(model_path, trust_remote_code=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
use_cache=True)
model = optimize_model(model)

model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
model = optimize_model(model)

model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
# This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
model = optimize_model(model)
model = model.to('xpu')
model = model.half().to('xpu')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
Expand Down

0 comments on commit 18662dc

Please sign in to comment.