update doc

intel-analytics · Jun 4, 2024 · 6243f60 · 6243f60
1 parent 3e1dd3c
commit 6243f60
Showing 1 changed file with 4 additions and 6 deletions.
diff --git a/docs/docs/integrations/llms/ipex_llm_gpu.ipynb b/docs/docs/integrations/llms/ipex_llm_gpu.ipynb
@@ -125,7 +125,7 @@
     "\n",
     "## Basic Usage\n",
     "\n",
-    "Setting `device_map` to `\"xpu\"` when initializing `IpexLLM` will put the LLM model on Intel GPU and benefit from IPEX-LLM optimizations:"
+    "Setting `device` to `\"xpu\"` in `model_kwargs` when initializing `IpexLLM` will put the LLM model on Intel GPU and benefit from IPEX-LLM optimizations:"
    ]
   },
   {
@@ -146,8 +146,7 @@
     "\n",
     "llm = IpexLLM.from_model_id(\n",
     "    model_id=\"lmsys/vicuna-7b-v1.5\",\n",
-    "    model_kwargs={\"temperature\": 0, \"max_length\": 64, \"trust_remote_code\": True},\n",
-    "    device_map=\"xpu\",\n",
+    "    model_kwargs={\"temperature\": 0, \"max_length\": 64, \"trust_remote_code\": True, \"device\":\"xpu\"},\n",
     ")"
    ]
   },
@@ -175,7 +174,7 @@
    "metadata": {},
    "source": [
     "## Save/Load Low-bit Model\n",
-    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step. You can similarly set `device_map` to `xpu` in order to load the LLM model to Intel GPU. "
+    "Alternatively, you might save the low-bit model to disk once and use `from_model_id_low_bit` instead of `from_model_id` to reload it for later use - even across different machines. It is space-efficient, as the low-bit model demands significantly less disk space than the original model. And `from_model_id_low_bit` is also more efficient than `from_model_id` in terms of speed and memory usage, as it skips the model conversion step. You can similarly set `device` to `xpu` in `model_kwargs` in order to load the LLM model to Intel GPU. "
    ]
   },
   {
@@ -214,8 +213,7 @@
     "    model_id=saved_lowbit_model_path,\n",
     "    tokenizer_id=\"lmsys/vicuna-7b-v1.5\",\n",
     "    # tokenizer_name=saved_lowbit_model_path,  # copy the tokenizers to saved path if you want to use it this way\n",
-    "    model_kwargs={\"temperature\": 0, \"max_length\": 64, \"trust_remote_code\": True},\n",
-    "    device_map=\"xpu\",\n",
+    "    model_kwargs={\"temperature\": 0, \"max_length\": 64, \"trust_remote_code\": True, \"device\":\"xpu\"},\n",
     ")"
    ]
   },