From f0a06f0d31d0c6b4b8ace17769fb685f1aee47bb Mon Sep 17 00:00:00 2001
From: praveenkk123 <praveen.k.kundurthy@intel.com>
Date: Thu, 5 Sep 2024 09:21:45 -0700
Subject: [PATCH 1/2] Update requirements.txt

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/requirements.txt b/requirements.txt
index d1a9f67..84d1f35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,5 @@ numexpr==2.10.1
 numpy==1.26.4
 huggingface-hub==0.24.3
 wikipedia==1.4.0
+ollama==0.3.2
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/

From cb69576509cef9481057ef68bd3b1f6472b63a35 Mon Sep 17 00:00:00 2001
From: Praveen <praveen.k.kundurthy@intel.com>
Date: Tue, 10 Sep 2024 22:37:39 +0000
Subject: [PATCH 2/2] updated the notebooks

---
 ipex_llm_gpu.ipynb         | 166 ++-----------------------------------
 ipex_llm_ollama_gpu.ipynb  |  10 +--
 ipex_llm_pytorch_gpu.ipynb |  12 +--
 llm-rag.ipynb              |   6 +-
 src/st_ipexllm_native.py   | 107 ------------------------
 src/st_rag_chromadb.py     |   2 +-
 6 files changed, 23 insertions(+), 280 deletions(-)
 delete mode 100644 src/st_ipexllm_native.py

diff --git a/ipex_llm_gpu.ipynb b/ipex_llm_gpu.ipynb
index da241a1..94e8d21 100644
--- a/ipex_llm_gpu.ipynb
+++ b/ipex_llm_gpu.ipynb
@@ -5,7 +5,7 @@
    "id": "652ea6c8-8d13-4228-853e-fad46db470f5",
    "metadata": {},
    "source": [
-    "# IPEX_LLM using Llamacpp on Intel GPUs"
+    "# Inference using Llamacpp on Intel GPUs"
    ]
   },
   {
@@ -15,7 +15,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+    "This notebook demonstrates how to run an LLM inference on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
    ]
   },
   {
@@ -67,7 +67,7 @@
    "id": "8040fd21-7782-4b97-a0eb-327816328f17",
    "metadata": {},
    "source": [
-    "## Step 2: Install IPEX-LLM\n",
+    "## Step 2: Setup the environment and install required libraries\n",
     "\n",
     "### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n",
     "  ```\n",
@@ -83,7 +83,7 @@
     "\n",
     "<img src=\"Assets/llm4.png\">\n",
     "\n",
-    "### With the llm-cpp environment active, use pip to install ipex-llm for GPU. \n",
+    "### With the llm-cpp environment active, use pip to install required libraries for  suppport. \n",
     "\n",
     "```\n",
     "pip install --pre --upgrade ipex-llm[cpp]\n",
@@ -116,7 +116,7 @@
     "set SYCL_CACHE_PERSISTENT=1\n",
     "\n",
     "```\n",
-    "### Below shows a simple example to show how to run a community GGUF model with IPEX-LLM\n",
+    "### Below shows a simple example to show how to run a community GGUF model\n",
     "* Download and run the model for example as below \n",
     "\n",
     "```\n",
@@ -145,156 +145,6 @@
     "! C:\\workshop\\llama-cpp\\main.exe -m ../models/llama-2-7b-chat.Q5_K_M.gguf -n 100 --prompt \"What is AI\" -t 16 -ngl 999 --color -e "
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "ec180ac3-e74a-41d9-a9b9-65478dcea556",
-   "metadata": {},
-   "source": [
-    "## Complete code snippet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "33b94504-fcc8-454f-8a8d-b7312b7c0d8e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%writefile src/st_ipexllm_native.py\n",
-    "import streamlit as st\n",
-    "import subprocess\n",
-    "import os\n",
-    "import threading\n",
-    "import time\n",
-    "\n",
-    "st.title(\"Chat with me!\")\n",
-    "\n",
-    "# Get the inputs from the text fields with required logs\n",
-    "exe_path = st.text_input(\"Enter the path to the main.exe binary generated by the steps outlined:\",value=\"..\\llama-cpp\\main.exe\", key=\"exe_path\")\n",
-    "print(f\"{exe_path}\\n\")\n",
-    "if exe_path:\n",
-    "    if os.path.exists(exe_path):\n",
-    "        if os.path.isfile(exe_path):\n",
-    "            print(f\"valid file path: {exe_path}\")\n",
-    "        else:\n",
-    "            st.error(f\"The path {exe_path} is not a file\")\n",
-    "    else:\n",
-    "        st.error(f\"The path {exe_path} does not exist\")\n",
-    "else:\n",
-    "    print(\"Please enter the file path\")\n",
-    "\n",
-    "model_path = st.text_input(\"Enter model file path:\", value=\"..\\models\\llama-2-7b-chat.Q5_K_M.gguf\", key=\"model_name\")\n",
-    "print(f\"{model_path}\\n\")\n",
-    "if model_path:\n",
-    "    if os.path.exists(model_path):\n",
-    "        if os.path.isfile(model_path):\n",
-    "            print(f\"valid file path: {model_path}\")\n",
-    "        else:\n",
-    "            st.error(f\"The path {model_path} is not a file\")\n",
-    "    else:\n",
-    "        st.error(f\"The path {model_path} does not exist\")\n",
-    "else:\n",
-    "    print(\"Please enter the file path\")\n",
-    "\n",
-    "\n",
-    "num_words = st.text_input(\"Enter the number of words you'd expect to see in your answer:\", value=\"100\", key=\"num_words\")\n",
-    "print(f\"{num_words}\\n\")\n",
-    "\n",
-    "question = st.text_input(\"Enter your question\", value=\"What is AI\", key=\"question\")\n",
-    "question = f'\"{question}\"'\n",
-    "print(f\"{question}\\n\")\n",
-    "num_cores = st.text_input(\"Enter the number of cores\", value=\"16\", key=\"num_cores\")\n",
-    "print(f\"{num_cores}\\n\")\n",
-    " \n",
-    "gpu_layers = st.text_input(\"Enter number of GPU layers:\", value=\"999\", key=\"gpu_layers\")\n",
-    "print(f\"{gpu_layers}\\n\")\n",
-    "\n",
-    "def stdout_typewriter_effect(stdout_container, current_stdout):\n",
-    "    current_char = \"\"\n",
-    "    for char in current_stdout:\n",
-    "        current_char+=char\n",
-    "        stdout_container.markdown(current_char)\n",
-    "        time.sleep(0.01)\n",
-    "\n",
-    "def launch_exe():\n",
-    "    stdout_chunks = []\n",
-    "    stderr_llama_time = []\n",
-    "    \n",
-    "    def append_stdout(pipe, stdout_lines):\n",
-    "        for line in iter(pipe.readline, ''):\n",
-    "            if line:\n",
-    "                print(line.strip())\n",
-    "                stdout_lines.append(line.strip())\n",
-    "        pipe.close()\n",
-    "\n",
-    "    def append_stderr(pipe, stderr_lines):\n",
-    "        for line in iter(pipe.readline, ''):\n",
-    "            if line.startswith(\"llama_print_timings\"):\n",
-    "                print(line.strip())\n",
-    "                stderr_lines.append(line.strip())\n",
-    "        pipe.close()\n",
-    "\n",
-    "    filter_command = '| findstr \"^\"'\n",
-    "    # command to run    \n",
-    "    commandparams = exe_path + \" \" + \"-m\" + \" \" + model_path + \" \" + \"-n \" + \" \" + num_words + \" \" + \"--prompt \" + \" \" + question + \" \" +  \"-t \" + \" \" + num_cores + \" \" + \"-e -ngl\" + \" \" + gpu_layers + \" \" + filter_command\n",
-    "    # logging command for easy debugging\n",
-    "    print(f\"{commandparams}\")\n",
-    "    try:\n",
-    "        # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time\n",
-    "        result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True)\n",
-    "\n",
-    "        stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks))\n",
-    "        stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time))\n",
-    "        stdout_thread.start()\n",
-    "        stderr_thread.start()\n",
-    "        stdout_container = st.empty()\n",
-    "        stderr_container = st.empty()\n",
-    "\n",
-    "        # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess\n",
-    "        # this method is not waiting for subprocess to complete as it only checks for the current status   \n",
-    "        while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive():\n",
-    "            # stdout_container.markdown('\\n'.join(stdout_lines))\n",
-    "            stdout_typewriter_effect(stdout_container, '\\n'.join(stdout_chunks))\n",
-    "            stderr_container.text('\\n'.join(stderr_llama_time))\n",
-    "            stdout_thread.join(timeout=0.1)\n",
-    "            stderr_thread.join(timeout=0.1)\n",
-    "            \n",
-    "        stdout_thread.join()\n",
-    "        stderr_thread.join()\n",
-    "\n",
-    "    except FileNotFoundError:\n",
-    "        st.error(\"The specified EXE file does not exist.\")\n",
-    "    \n",
-    "if st.button(\"Generate\"):\n",
-    "    with st.spinner(\"Running....Please wait..🐎\"): \n",
-    "        launch_exe()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e49d3f11-c86f-4971-ad9e-4562eb76b005",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "! streamlit run src/st_ipexllm_native.py"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "0e741522-23ff-41e9-8bc0-59e0ea126069",
-   "metadata": {},
-   "source": [
-    "### Streamlit sample output\n",
-    "\n",
-    "Below is the output of a sample run from the streamlit application and offloaded to iGPU\n",
-    "\n",
-    "<img src=\"Assets/llm11.png\"> <img src=\"Assets/output2.png\">\n",
-    "\n",
-    "\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "92387fa9-2376-49a7-a94b-a29f254a0471",
@@ -314,9 +164,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm-cpp",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm-cpp"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -328,7 +178,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,
diff --git a/ipex_llm_ollama_gpu.ipynb b/ipex_llm_ollama_gpu.ipynb
index 6f1b118..d133c5d 100644
--- a/ipex_llm_ollama_gpu.ipynb
+++ b/ipex_llm_ollama_gpu.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+    "This notebook demonstrates how to install Ollama on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
    ]
   },
   {
@@ -81,7 +81,7 @@
     "* Now that we have set up the environment, Intel GPU drivers, and runtime libraries, we can configure ollama to leverage the on-chip GPU.\n",
     "* Open miniforge prompt and run the below commands. We Install IPEX-LLM for llama.cpp and to use llama.cpp with IPEX-LLM, first ensure that ipex-llm[cpp] is installed.\n",
     "\n",
-    "### With the ollama environment active, use pip to install ipex-llm for GPU. \n",
+    "### With the ollama environment active, use pip to install required libraries for GPU. \n",
     "```\n",
     "conda activate llm-ollama\n",
     "pip install --pre --upgrade ipex-llm[cpp]\n",
@@ -259,9 +259,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm-ollama",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm-ollama"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -273,7 +273,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,
diff --git a/ipex_llm_pytorch_gpu.ipynb b/ipex_llm_pytorch_gpu.ipynb
index 1bfca94..2d5d55c 100644
--- a/ipex_llm_pytorch_gpu.ipynb
+++ b/ipex_llm_pytorch_gpu.ipynb
@@ -5,7 +5,7 @@
    "id": "4bdf80ae-10bd-438b-a5ae-76a5c5f99a6d",
    "metadata": {},
    "source": [
-    "# Inference using Pytorch on Intel GPUs -- Intel LLM Library for Pytorch"
+    "# Inference using Pytorch on Intel GPUs"
    ]
   },
   {
@@ -15,7 +15,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+    "This notebook demonstrates how to run LLM inference using pytorch on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
    ]
   },
   {
@@ -66,7 +66,7 @@
    "id": "8040fd21-7782-4b97-a0eb-327816328f17",
    "metadata": {},
    "source": [
-    "## Step 2: Install IPEX-LLM\n",
+    "## Step 2: Setup the environment and install required libraries\n",
     "\n",
     "### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n",
     "  ```\n",
@@ -486,9 +486,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -500,7 +500,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,
diff --git a/llm-rag.ipynb b/llm-rag.ipynb
index 606e6a6..327e68f 100644
--- a/llm-rag.ipynb
+++ b/llm-rag.ipynb
@@ -498,9 +498,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "llm-ollama",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "llm-ollama"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -512,7 +512,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.5"
   },
   "openvino_notebooks": {
    "imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/304aa048-f10c-41c6-bb31-6d2bfdf49cf5",
diff --git a/src/st_ipexllm_native.py b/src/st_ipexllm_native.py
deleted file mode 100644
index b8f9424..0000000
--- a/src/st_ipexllm_native.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import streamlit as st
-import subprocess
-import os
-import threading
-import time
-
-st.title("Chat with me!")
-
-# Get the inputs from the text fields with required logs
-exe_path = st.text_input("Enter the path to the main.exe binary generated by the steps outlined:",value="..\llama-cpp\main.exe", key="exe_path")
-print(f"{exe_path}\n")
-if exe_path:
-    if os.path.exists(exe_path):
-        if os.path.isfile(exe_path):
-            print(f"valid file path: {exe_path}")
-        else:
-            st.error(f"The path {exe_path} is not a file")
-    else:
-        st.error(f"The path {exe_path} does not exist")
-else:
-    print("Please enter the file path")
-
-model_path = st.text_input("Enter model file path:", value="..\models\llama-2-7b-chat.Q5_K_M.gguf", key="model_name")
-print(f"{model_path}\n")
-if model_path:
-    if os.path.exists(model_path):
-        if os.path.isfile(model_path):
-            print(f"valid file path: {model_path}")
-        else:
-            st.error(f"The path {model_path} is not a file")
-    else:
-        st.error(f"The path {model_path} does not exist")
-else:
-    print("Please enter the file path")
-
-
-num_words = st.text_input("Enter the number of words you'd expect to see in your answer:", value="100", key="num_words")
-print(f"{num_words}\n")
-
-question = st.text_input("Enter your question", value="What is AI", key="question")
-question = f'"{question}"'
-print(f"{question}\n")
-num_cores = st.text_input("Enter the number of cores", value="16", key="num_cores")
-print(f"{num_cores}\n")
- 
-gpu_layers = st.text_input("Enter number of GPU layers:", value="999", key="gpu_layers")
-print(f"{gpu_layers}\n")
-
-def stdout_typewriter_effect(stdout_container, current_stdout):
-    current_char = ""
-    for char in current_stdout:
-        current_char+=char
-        stdout_container.markdown(current_char)
-        time.sleep(0.01)
-
-def launch_exe():
-    stdout_chunks = []
-    stderr_llama_time = []
-    
-    def append_stdout(pipe, stdout_lines):
-        for line in iter(pipe.readline, ''):
-            if line:
-                print(line.strip())
-                stdout_lines.append(line.strip())
-        pipe.close()
-
-    def append_stderr(pipe, stderr_lines):
-        for line in iter(pipe.readline, ''):
-            if line.startswith("llama_print_timings"):
-                print(line.strip())
-                stderr_lines.append(line.strip())
-        pipe.close()
-
-    filter_command = '| findstr "^"'
-    # command to run    
-    commandparams = exe_path + " " + "-m" + " " + model_path + " " + "-n " + " " + num_words + " " + "--prompt " + " " + question + " " +  "-t " + " " + num_cores + " " + "-e -ngl" + " " + gpu_layers + " " + filter_command
-    # logging command for easy debugging
-    print(f"{commandparams}")
-    try:
-        # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time
-        result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True)
-
-        stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks))
-        stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time))
-        stdout_thread.start()
-        stderr_thread.start()
-        stdout_container = st.empty()
-        stderr_container = st.empty()
-
-        # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess
-        # this method is not waiting for subprocess to complete as it only checks for the current status   
-        while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive():
-            # stdout_container.markdown('\n'.join(stdout_lines))
-            stdout_typewriter_effect(stdout_container, '\n'.join(stdout_chunks))
-            stderr_container.text('\n'.join(stderr_llama_time))
-            stdout_thread.join(timeout=0.1)
-            stderr_thread.join(timeout=0.1)
-            
-        stdout_thread.join()
-        stderr_thread.join()
-
-    except FileNotFoundError:
-        st.error("The specified EXE file does not exist.")
-    
-if st.button("Generate"):
-    with st.spinner("Running....Please wait..🐎"): 
-        launch_exe()
diff --git a/src/st_rag_chromadb.py b/src/st_rag_chromadb.py
index b07a6b1..9a157e9 100644
--- a/src/st_rag_chromadb.py
+++ b/src/st_rag_chromadb.py
@@ -20,7 +20,7 @@
 model = st.selectbox("Choose a model from the list", models)
 
 # Input text to load the document
-url_path = st.text_input("Enter the URL to load for RAG:",value="https://www.gutenberg.org/files/1727/1727-h/1727-h.htm", key="url_path")
+url_path = st.text_input("Enter the URL to load for RAG:", key="url_path")
 
 # Select embedding type
 embedding_type = st.selectbox("Please select an embedding type", ("ollama", "huggingface", "nomic", "fastembed"),index=1)