From 03735890f122b7d3338aa4f96f628f35c40fffed Mon Sep 17 00:00:00 2001 From: jinbridge <2635480475@qq.com> Date: Fri, 21 Jun 2024 10:43:42 +0800 Subject: [PATCH] fix --- .../CPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py | 5 +++-- .../GPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py index d4f31ad8094..384799f884a 100644 --- a/python/llm/example/CPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py +++ b/python/llm/example/CPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py @@ -40,7 +40,6 @@ args = parser.parse_args() model_path = args.repo_id_or_model_path image_path = args.image_url_or_path - device = "cpu" # Load model in 4 bit, # which convert the relevant layers in the model into INT4 format @@ -57,12 +56,14 @@ image = Image.open(image_path) else: image = Image.open(requests.get(image_path, stream=True).raw) + + # here the prompt tuning refers to https://huggingface.co/THUDM/glm-4v-9b/blob/main/README.md inputs = tokenizer.apply_chat_template([{"role": "user", "image": image, "content": query}], add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True) # chat mode - inputs = inputs.to(device) + inputs = inputs.to('cpu') # Generate predicted tokens with torch.inference_mode(): diff --git a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py index 82fe1ea1dd6..6a1dd035e9e 100644 --- a/python/llm/example/GPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py +++ b/python/llm/example/GPU/HF-Transformers-AutoModels/Model/glm-4v/generate.py @@ -40,7 +40,6 @@ args = parser.parse_args() model_path = args.repo_id_or_model_path image_path = args.image_url_or_path - device = "xpu" # Load model in 4 bit, # which convert the relevant layers in the model into INT4 format @@ -59,12 +58,14 @@ image = Image.open(image_path) else: image = Image.open(requests.get(image_path, stream=True).raw) + + # here the prompt tuning refers to https://huggingface.co/THUDM/glm-4v-9b/blob/main/README.md inputs = tokenizer.apply_chat_template([{"role": "user", "image": image, "content": query}], add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True) # chat mode - inputs = inputs.to(device) + inputs = inputs.to('xpu') # Generate predicted tokens