diff --git a/ipex_llm_gpu.ipynb b/ipex_llm_gpu.ipynb
index da241a1..94e8d21 100644
--- a/ipex_llm_gpu.ipynb
+++ b/ipex_llm_gpu.ipynb
@@ -5,7 +5,7 @@
"id": "652ea6c8-8d13-4228-853e-fad46db470f5",
"metadata": {},
"source": [
- "# IPEX_LLM using Llamacpp on Intel GPUs"
+ "# Inference using Llamacpp on Intel GPUs"
]
},
{
@@ -15,7 +15,7 @@
"source": [
"## Introduction\n",
"\n",
- "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+ "This notebook demonstrates how to run an LLM inference on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
]
},
{
@@ -67,7 +67,7 @@
"id": "8040fd21-7782-4b97-a0eb-327816328f17",
"metadata": {},
"source": [
- "## Step 2: Install IPEX-LLM\n",
+ "## Step 2: Setup the environment and install required libraries\n",
"\n",
"### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n",
" ```\n",
@@ -83,7 +83,7 @@
"\n",
"
\n",
"\n",
- "### With the llm-cpp environment active, use pip to install ipex-llm for GPU. \n",
+ "### With the llm-cpp environment active, use pip to install required libraries for suppport. \n",
"\n",
"```\n",
"pip install --pre --upgrade ipex-llm[cpp]\n",
@@ -116,7 +116,7 @@
"set SYCL_CACHE_PERSISTENT=1\n",
"\n",
"```\n",
- "### Below shows a simple example to show how to run a community GGUF model with IPEX-LLM\n",
+ "### Below shows a simple example to show how to run a community GGUF model\n",
"* Download and run the model for example as below \n",
"\n",
"```\n",
@@ -145,156 +145,6 @@
"! C:\\workshop\\llama-cpp\\main.exe -m ../models/llama-2-7b-chat.Q5_K_M.gguf -n 100 --prompt \"What is AI\" -t 16 -ngl 999 --color -e "
]
},
- {
- "cell_type": "markdown",
- "id": "ec180ac3-e74a-41d9-a9b9-65478dcea556",
- "metadata": {},
- "source": [
- "## Complete code snippet"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "33b94504-fcc8-454f-8a8d-b7312b7c0d8e",
- "metadata": {},
- "outputs": [],
- "source": [
- "%%writefile src/st_ipexllm_native.py\n",
- "import streamlit as st\n",
- "import subprocess\n",
- "import os\n",
- "import threading\n",
- "import time\n",
- "\n",
- "st.title(\"Chat with me!\")\n",
- "\n",
- "# Get the inputs from the text fields with required logs\n",
- "exe_path = st.text_input(\"Enter the path to the main.exe binary generated by the steps outlined:\",value=\"..\\llama-cpp\\main.exe\", key=\"exe_path\")\n",
- "print(f\"{exe_path}\\n\")\n",
- "if exe_path:\n",
- " if os.path.exists(exe_path):\n",
- " if os.path.isfile(exe_path):\n",
- " print(f\"valid file path: {exe_path}\")\n",
- " else:\n",
- " st.error(f\"The path {exe_path} is not a file\")\n",
- " else:\n",
- " st.error(f\"The path {exe_path} does not exist\")\n",
- "else:\n",
- " print(\"Please enter the file path\")\n",
- "\n",
- "model_path = st.text_input(\"Enter model file path:\", value=\"..\\models\\llama-2-7b-chat.Q5_K_M.gguf\", key=\"model_name\")\n",
- "print(f\"{model_path}\\n\")\n",
- "if model_path:\n",
- " if os.path.exists(model_path):\n",
- " if os.path.isfile(model_path):\n",
- " print(f\"valid file path: {model_path}\")\n",
- " else:\n",
- " st.error(f\"The path {model_path} is not a file\")\n",
- " else:\n",
- " st.error(f\"The path {model_path} does not exist\")\n",
- "else:\n",
- " print(\"Please enter the file path\")\n",
- "\n",
- "\n",
- "num_words = st.text_input(\"Enter the number of words you'd expect to see in your answer:\", value=\"100\", key=\"num_words\")\n",
- "print(f\"{num_words}\\n\")\n",
- "\n",
- "question = st.text_input(\"Enter your question\", value=\"What is AI\", key=\"question\")\n",
- "question = f'\"{question}\"'\n",
- "print(f\"{question}\\n\")\n",
- "num_cores = st.text_input(\"Enter the number of cores\", value=\"16\", key=\"num_cores\")\n",
- "print(f\"{num_cores}\\n\")\n",
- " \n",
- "gpu_layers = st.text_input(\"Enter number of GPU layers:\", value=\"999\", key=\"gpu_layers\")\n",
- "print(f\"{gpu_layers}\\n\")\n",
- "\n",
- "def stdout_typewriter_effect(stdout_container, current_stdout):\n",
- " current_char = \"\"\n",
- " for char in current_stdout:\n",
- " current_char+=char\n",
- " stdout_container.markdown(current_char)\n",
- " time.sleep(0.01)\n",
- "\n",
- "def launch_exe():\n",
- " stdout_chunks = []\n",
- " stderr_llama_time = []\n",
- " \n",
- " def append_stdout(pipe, stdout_lines):\n",
- " for line in iter(pipe.readline, ''):\n",
- " if line:\n",
- " print(line.strip())\n",
- " stdout_lines.append(line.strip())\n",
- " pipe.close()\n",
- "\n",
- " def append_stderr(pipe, stderr_lines):\n",
- " for line in iter(pipe.readline, ''):\n",
- " if line.startswith(\"llama_print_timings\"):\n",
- " print(line.strip())\n",
- " stderr_lines.append(line.strip())\n",
- " pipe.close()\n",
- "\n",
- " filter_command = '| findstr \"^\"'\n",
- " # command to run \n",
- " commandparams = exe_path + \" \" + \"-m\" + \" \" + model_path + \" \" + \"-n \" + \" \" + num_words + \" \" + \"--prompt \" + \" \" + question + \" \" + \"-t \" + \" \" + num_cores + \" \" + \"-e -ngl\" + \" \" + gpu_layers + \" \" + filter_command\n",
- " # logging command for easy debugging\n",
- " print(f\"{commandparams}\")\n",
- " try:\n",
- " # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time\n",
- " result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True)\n",
- "\n",
- " stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks))\n",
- " stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time))\n",
- " stdout_thread.start()\n",
- " stderr_thread.start()\n",
- " stdout_container = st.empty()\n",
- " stderr_container = st.empty()\n",
- "\n",
- " # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess\n",
- " # this method is not waiting for subprocess to complete as it only checks for the current status \n",
- " while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive():\n",
- " # stdout_container.markdown('\\n'.join(stdout_lines))\n",
- " stdout_typewriter_effect(stdout_container, '\\n'.join(stdout_chunks))\n",
- " stderr_container.text('\\n'.join(stderr_llama_time))\n",
- " stdout_thread.join(timeout=0.1)\n",
- " stderr_thread.join(timeout=0.1)\n",
- " \n",
- " stdout_thread.join()\n",
- " stderr_thread.join()\n",
- "\n",
- " except FileNotFoundError:\n",
- " st.error(\"The specified EXE file does not exist.\")\n",
- " \n",
- "if st.button(\"Generate\"):\n",
- " with st.spinner(\"Running....Please wait..🐎\"): \n",
- " launch_exe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e49d3f11-c86f-4971-ad9e-4562eb76b005",
- "metadata": {},
- "outputs": [],
- "source": [
- "! streamlit run src/st_ipexllm_native.py"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "0e741522-23ff-41e9-8bc0-59e0ea126069",
- "metadata": {},
- "source": [
- "### Streamlit sample output\n",
- "\n",
- "Below is the output of a sample run from the streamlit application and offloaded to iGPU\n",
- "\n",
- "
\n",
- "\n",
- "\n"
- ]
- },
{
"cell_type": "markdown",
"id": "92387fa9-2376-49a7-a94b-a29f254a0471",
@@ -314,9 +164,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "llm-cpp",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "llm-cpp"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -328,7 +178,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.9"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/ipex_llm_ollama_gpu.ipynb b/ipex_llm_ollama_gpu.ipynb
index 6f1b118..d133c5d 100644
--- a/ipex_llm_ollama_gpu.ipynb
+++ b/ipex_llm_ollama_gpu.ipynb
@@ -15,7 +15,7 @@
"source": [
"## Introduction\n",
"\n",
- "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+ "This notebook demonstrates how to install Ollama on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
]
},
{
@@ -81,7 +81,7 @@
"* Now that we have set up the environment, Intel GPU drivers, and runtime libraries, we can configure ollama to leverage the on-chip GPU.\n",
"* Open miniforge prompt and run the below commands. We Install IPEX-LLM for llama.cpp and to use llama.cpp with IPEX-LLM, first ensure that ipex-llm[cpp] is installed.\n",
"\n",
- "### With the ollama environment active, use pip to install ipex-llm for GPU. \n",
+ "### With the ollama environment active, use pip to install required libraries for GPU. \n",
"```\n",
"conda activate llm-ollama\n",
"pip install --pre --upgrade ipex-llm[cpp]\n",
@@ -259,9 +259,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "llm-ollama",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "llm-ollama"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -273,7 +273,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.9"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/ipex_llm_pytorch_gpu.ipynb b/ipex_llm_pytorch_gpu.ipynb
index 1bfca94..2d5d55c 100644
--- a/ipex_llm_pytorch_gpu.ipynb
+++ b/ipex_llm_pytorch_gpu.ipynb
@@ -5,7 +5,7 @@
"id": "4bdf80ae-10bd-438b-a5ae-76a5c5f99a6d",
"metadata": {},
"source": [
- "# Inference using Pytorch on Intel GPUs -- Intel LLM Library for Pytorch"
+ "# Inference using Pytorch on Intel GPUs"
]
},
{
@@ -15,7 +15,7 @@
"source": [
"## Introduction\n",
"\n",
- "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
+ "This notebook demonstrates how to run LLM inference using pytorch on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU."
]
},
{
@@ -66,7 +66,7 @@
"id": "8040fd21-7782-4b97-a0eb-327816328f17",
"metadata": {},
"source": [
- "## Step 2: Install IPEX-LLM\n",
+ "## Step 2: Setup the environment and install required libraries\n",
"\n",
"### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n",
" ```\n",
@@ -486,9 +486,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "llm",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "llm"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -500,7 +500,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.9"
+ "version": "3.11.5"
}
},
"nbformat": 4,
diff --git a/llm-rag.ipynb b/llm-rag.ipynb
index 606e6a6..327e68f 100644
--- a/llm-rag.ipynb
+++ b/llm-rag.ipynb
@@ -498,9 +498,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "llm-ollama",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "llm-ollama"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -512,7 +512,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.9"
+ "version": "3.11.5"
},
"openvino_notebooks": {
"imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/304aa048-f10c-41c6-bb31-6d2bfdf49cf5",
diff --git a/requirements.txt b/requirements.txt
index d1a9f67..84d1f35 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,5 @@ numexpr==2.10.1
numpy==1.26.4
huggingface-hub==0.24.3
wikipedia==1.4.0
+ollama==0.3.2
+--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
diff --git a/src/st_ipexllm_native.py b/src/st_ipexllm_native.py
deleted file mode 100644
index b8f9424..0000000
--- a/src/st_ipexllm_native.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import streamlit as st
-import subprocess
-import os
-import threading
-import time
-
-st.title("Chat with me!")
-
-# Get the inputs from the text fields with required logs
-exe_path = st.text_input("Enter the path to the main.exe binary generated by the steps outlined:",value="..\llama-cpp\main.exe", key="exe_path")
-print(f"{exe_path}\n")
-if exe_path:
- if os.path.exists(exe_path):
- if os.path.isfile(exe_path):
- print(f"valid file path: {exe_path}")
- else:
- st.error(f"The path {exe_path} is not a file")
- else:
- st.error(f"The path {exe_path} does not exist")
-else:
- print("Please enter the file path")
-
-model_path = st.text_input("Enter model file path:", value="..\models\llama-2-7b-chat.Q5_K_M.gguf", key="model_name")
-print(f"{model_path}\n")
-if model_path:
- if os.path.exists(model_path):
- if os.path.isfile(model_path):
- print(f"valid file path: {model_path}")
- else:
- st.error(f"The path {model_path} is not a file")
- else:
- st.error(f"The path {model_path} does not exist")
-else:
- print("Please enter the file path")
-
-
-num_words = st.text_input("Enter the number of words you'd expect to see in your answer:", value="100", key="num_words")
-print(f"{num_words}\n")
-
-question = st.text_input("Enter your question", value="What is AI", key="question")
-question = f'"{question}"'
-print(f"{question}\n")
-num_cores = st.text_input("Enter the number of cores", value="16", key="num_cores")
-print(f"{num_cores}\n")
-
-gpu_layers = st.text_input("Enter number of GPU layers:", value="999", key="gpu_layers")
-print(f"{gpu_layers}\n")
-
-def stdout_typewriter_effect(stdout_container, current_stdout):
- current_char = ""
- for char in current_stdout:
- current_char+=char
- stdout_container.markdown(current_char)
- time.sleep(0.01)
-
-def launch_exe():
- stdout_chunks = []
- stderr_llama_time = []
-
- def append_stdout(pipe, stdout_lines):
- for line in iter(pipe.readline, ''):
- if line:
- print(line.strip())
- stdout_lines.append(line.strip())
- pipe.close()
-
- def append_stderr(pipe, stderr_lines):
- for line in iter(pipe.readline, ''):
- if line.startswith("llama_print_timings"):
- print(line.strip())
- stderr_lines.append(line.strip())
- pipe.close()
-
- filter_command = '| findstr "^"'
- # command to run
- commandparams = exe_path + " " + "-m" + " " + model_path + " " + "-n " + " " + num_words + " " + "--prompt " + " " + question + " " + "-t " + " " + num_cores + " " + "-e -ngl" + " " + gpu_layers + " " + filter_command
- # logging command for easy debugging
- print(f"{commandparams}")
- try:
- # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time
- result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True)
-
- stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks))
- stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time))
- stdout_thread.start()
- stderr_thread.start()
- stdout_container = st.empty()
- stderr_container = st.empty()
-
- # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess
- # this method is not waiting for subprocess to complete as it only checks for the current status
- while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive():
- # stdout_container.markdown('\n'.join(stdout_lines))
- stdout_typewriter_effect(stdout_container, '\n'.join(stdout_chunks))
- stderr_container.text('\n'.join(stderr_llama_time))
- stdout_thread.join(timeout=0.1)
- stderr_thread.join(timeout=0.1)
-
- stdout_thread.join()
- stderr_thread.join()
-
- except FileNotFoundError:
- st.error("The specified EXE file does not exist.")
-
-if st.button("Generate"):
- with st.spinner("Running....Please wait..🐎"):
- launch_exe()
diff --git a/src/st_rag_chromadb.py b/src/st_rag_chromadb.py
index b07a6b1..9a157e9 100644
--- a/src/st_rag_chromadb.py
+++ b/src/st_rag_chromadb.py
@@ -20,7 +20,7 @@
model = st.selectbox("Choose a model from the list", models)
# Input text to load the document
-url_path = st.text_input("Enter the URL to load for RAG:",value="https://www.gutenberg.org/files/1727/1727-h/1727-h.htm", key="url_path")
+url_path = st.text_input("Enter the URL to load for RAG:", key="url_path")
# Select embedding type
embedding_type = st.selectbox("Please select an embedding type", ("ollama", "huggingface", "nomic", "fastembed"),index=1)