intel · praveenkk123 · Sep 11, 2024 · Sep 5, 2024 · Sep 10, 2024 · kekaczma
diff --git a/ipex_llm_gpu.ipynb b/ipex_llm_gpu.ipynb
@@ -5,7 +5,7 @@
    "id": "652ea6c8-8d13-4228-853e-fad46db470f5",
    "metadata": {},
    "source": [
-    "# IPEX_LLM using Llamacpp on Intel GPUs"
+    "# Inference using Llamacpp on Intel GPUs"
    ]
   },
   {
@@ -15,7 +15,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+    "This notebook demonstrates how to run an LLM inference on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
    ]
   },
   {
@@ -67,7 +67,7 @@
    "id": "8040fd21-7782-4b97-a0eb-327816328f17",
    "metadata": {},
    "source": [
-    "## Step 2: Install IPEX-LLM\n",
+    "## Step 2: Setup the environment and install required libraries\n",
     "\n",
     "### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n",
     "  ```\n",
@@ -83,7 +83,7 @@
     "\n",
     "<img src=\"Assets/llm4.png\">\n",
     "\n",
-    "### With the llm-cpp environment active, use pip to install ipex-llm for GPU. \n",
+    "### With the llm-cpp environment active, use pip to install required libraries for  suppport. \n",
     "\n",
     "```\n",
     "pip install --pre --upgrade ipex-llm[cpp]\n",
@@ -116,7 +116,7 @@
     "set SYCL_CACHE_PERSISTENT=1\n",
     "\n",
     "```\n",
-    "### Below shows a simple example to show how to run a community GGUF model with IPEX-LLM\n",
+    "### Below shows a simple example to show how to run a community GGUF model\n",
     "* Download and run the model for example as below \n",
     "\n",
     "```\n",
@@ -145,156 +145,6 @@
     "! C:\\workshop\\llama-cpp\\main.exe -m ../models/llama-2-7b-chat.Q5_K_M.gguf -n 100 --prompt \"What is AI\" -t 16 -ngl 999 --color -e "
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "ec180ac3-e74a-41d9-a9b9-65478dcea556",
-   "metadata": {},
-   "source": [
-    "## Complete code snippet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "33b94504-fcc8-454f-8a8d-b7312b7c0d8e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%writefile src/st_ipexllm_native.py\n",
-    "import streamlit as st\n",
-    "import subprocess\n",
-    "import os\n",
-    "import threading\n",
-    "import time\n",
-    "\n",
-    "st.title(\"Chat with me!\")\n",
-    "\n",
-    "# Get the inputs from the text fields with required logs\n",
-    "exe_path = st.text_input(\"Enter the path to the main.exe binary generated by the steps outlined:\",value=\"..\\llama-cpp\\main.exe\", key=\"exe_path\")\n",
-    "print(f\"{exe_path}\\n\")\n",
-    "if exe_path:\n",
-    "    if os.path.exists(exe_path):\n",
-    "        if os.path.isfile(exe_path):\n",
-    "            print(f\"valid file path: {exe_path}\")\n",
-    "        else:\n",
-    "            st.error(f\"The path {exe_path} is not a file\")\n",
-    "    else:\n",
-    "        st.error(f\"The path {exe_path} does not exist\")\n",
-    "else:\n",
-    "    print(\"Please enter the file path\")\n",
-    "\n",
-    "model_path = st.text_input(\"Enter model file path:\", value=\"..\\models\\llama-2-7b-chat.Q5_K_M.gguf\", key=\"model_name\")\n",
-    "print(f\"{model_path}\\n\")\n",
-    "if model_path:\n",
-    "    if os.path.exists(model_path):\n",
-    "        if os.path.isfile(model_path):\n",
-    "            print(f\"valid file path: {model_path}\")\n",
-    "        else:\n",
-    "            st.error(f\"The path {model_path} is not a file\")\n",
-    "    else:\n",
-    "        st.error(f\"The path {model_path} does not exist\")\n",
-    "else:\n",
-    "    print(\"Please enter the file path\")\n",
-    "\n",
-    "\n",
-    "num_words = st.text_input(\"Enter the number of words you'd expect to see in your answer:\", value=\"100\", key=\"num_words\")\n",
-    "print(f\"{num_words}\\n\")\n",
-    "\n",
-    "question = st.text_input(\"Enter your question\", value=\"What is AI\", key=\"question\")\n",
-    "question = f'\"{question}\"'\n",
-    "print(f\"{question}\\n\")\n",
-    "num_cores = st.text_input(\"Enter the number of cores\", value=\"16\", key=\"num_cores\")\n",
-    "print(f\"{num_cores}\\n\")\n",
-    " \n",
-    "gpu_layers = st.text_input(\"Enter number of GPU layers:\", value=\"999\", key=\"gpu_layers\")\n",
-    "print(f\"{gpu_layers}\\n\")\n",
-    "\n",
-    "def stdout_typewriter_effect(stdout_container, current_stdout):\n",
-    "    current_char = \"\"\n",
-    "    for char in current_stdout:\n",
-    "        current_char+=char\n",
-    "        stdout_container.markdown(current_char)\n",
-    "        time.sleep(0.01)\n",
-    "\n",
-    "def launch_exe():\n",
-    "    stdout_chunks = []\n",
-    "    stderr_llama_time = []\n",
-    "    \n",
-    "    def append_stdout(pipe, stdout_lines):\n",
-    "        for line in iter(pipe.readline, ''):\n",
-    "            if line:\n",
-    "                print(line.strip())\n",
-    "                stdout_lines.append(line.strip())\n",
-    "        pipe.close()\n",
-    "\n",
-    "    def append_stderr(pipe, stderr_lines):\n",
-    "        for line in iter(pipe.readline, ''):\n",
-    "            if line.startswith(\"llama_print_timings\"):\n",
-    "                print(line.strip())\n",
-    "                stderr_lines.append(line.strip())\n",
-    "        pipe.close()\n",
-    "\n",
-    "    filter_command = '| findstr \"^\"'\n",
-    "    # command to run    \n",
-    "    commandparams = exe_path + \" \" + \"-m\" + \" \" + model_path + \" \" + \"-n \" + \" \" + num_words + \" \" + \"--prompt \" + \" \" + question + \" \" +  \"-t \" + \" \" + num_cores + \" \" + \"-e -ngl\" + \" \" + gpu_layers + \" \" + filter_command\n",
-    "    # logging command for easy debugging\n",
-    "    print(f\"{commandparams}\")\n",
-    "    try:\n",
-    "        # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time\n",
-    "        result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True)\n",
-    "\n",
-    "        stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks))\n",
-    "        stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time))\n",
-    "        stdout_thread.start()\n",
-    "        stderr_thread.start()\n",
-    "        stdout_container = st.empty()\n",
-    "        stderr_container = st.empty()\n",
-    "\n",
-    "        # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess\n",
-    "        # this method is not waiting for subprocess to complete as it only checks for the current status   \n",
-    "        while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive():\n",
-    "            # stdout_container.markdown('\\n'.join(stdout_lines))\n",
-    "            stdout_typewriter_effect(stdout_container, '\\n'.join(stdout_chunks))\n",
-    "            stderr_container.text('\\n'.join(stderr_llama_time))\n",
-    "            stdout_thread.join(timeout=0.1)\n",
-    "            stderr_thread.join(timeout=0.1)\n",
-    "            \n",
-    "        stdout_thread.join()\n",
-    "        stderr_thread.join()\n",
-    "\n",
-    "    except FileNotFoundError:\n",
-    "        st.error(\"The specified EXE file does not exist.\")\n",
-    "    \n",
-    "if st.button(\"Generate\"):\n",
-    "    with st.spinner(\"Running....Please wait..🐎\"): \n",
-    "        launch_exe()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e49d3f11-c86f-4971-ad9e-4562eb76b005",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "! streamlit run src/st_ipexllm_native.py"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "0e741522-23ff-41e9-8bc0-59e0ea126069",
-   "metadata": {},
-   "source": [
-    "### Streamlit sample output\n",
-    "\n",
-    "Below is the output of a sample run from the streamlit application and offloaded to iGPU\n",
-    "\n",
-    "<img src=\"Assets/llm11.png\"> <img src=\"Assets/output2.png\">\n",
-    "\n",
-    "\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "92387fa9-2376-49a7-a94b-a29f254a0471",
@@ -314,9 +164,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm-cpp",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm-cpp"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -328,7 +178,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,

diff --git a/ipex_llm_ollama_gpu.ipynb b/ipex_llm_ollama_gpu.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+    "This notebook demonstrates how to install Ollama on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
    ]
   },
   {
@@ -81,7 +81,7 @@
     "* Now that we have set up the environment, Intel GPU drivers, and runtime libraries, we can configure ollama to leverage the on-chip GPU.\n",
     "* Open miniforge prompt and run the below commands. We Install IPEX-LLM for llama.cpp and to use llama.cpp with IPEX-LLM, first ensure that ipex-llm[cpp] is installed.\n",
     "\n",
-    "### With the ollama environment active, use pip to install ipex-llm for GPU. \n",
+    "### With the ollama environment active, use pip to install required libraries for GPU. \n",
     "```\n",
     "conda activate llm-ollama\n",
     "pip install --pre --upgrade ipex-llm[cpp]\n",
@@ -259,9 +259,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm-ollama",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm-ollama"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -273,7 +273,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,

diff --git a/ipex_llm_pytorch_gpu.ipynb b/ipex_llm_pytorch_gpu.ipynb
@@ -5,7 +5,7 @@
    "id": "4bdf80ae-10bd-438b-a5ae-76a5c5f99a6d",
    "metadata": {},
    "source": [
-    "# Inference using Pytorch on Intel GPUs -- Intel LLM Library for Pytorch"
+    "# Inference using Pytorch on Intel GPUs"
    ]
   },
   {
@@ -15,7 +15,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+    "This notebook demonstrates how to run LLM inference using pytorch on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
    ]
   },
   {
@@ -66,7 +66,7 @@
    "id": "8040fd21-7782-4b97-a0eb-327816328f17",
    "metadata": {},
    "source": [
-    "## Step 2: Install IPEX-LLM\n",
+    "## Step 2: Setup the environment and install required libraries\n",
     "\n",
     "### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n",
     "  ```\n",
@@ -486,9 +486,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -500,7 +500,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,

diff --git a/llm-rag.ipynb b/llm-rag.ipynb
@@ -498,9 +498,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm-ollama",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm-ollama"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -512,7 +512,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   },
   "openvino_notebooks": {
    "imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/304aa048-f10c-41c6-bb31-6d2bfdf49cf5",

diff --git a/requirements.txt b/requirements.txt
@@ -9,3 +9,5 @@ numexpr==2.10.1
 numpy==1.26.4
 huggingface-hub==0.24.3
 wikipedia==1.4.0
+ollama==0.3.2
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/