From f0a06f0d31d0c6b4b8ace17769fb685f1aee47bb Mon Sep 17 00:00:00 2001 From: praveenkk123 Date: Thu, 5 Sep 2024 09:21:45 -0700 Subject: [PATCH 1/2] Update requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index d1a9f67..84d1f35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ numexpr==2.10.1 numpy==1.26.4 huggingface-hub==0.24.3 wikipedia==1.4.0 +ollama==0.3.2 +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ From cb69576509cef9481057ef68bd3b1f6472b63a35 Mon Sep 17 00:00:00 2001 From: Praveen Date: Tue, 10 Sep 2024 22:37:39 +0000 Subject: [PATCH 2/2] updated the notebooks --- ipex_llm_gpu.ipynb | 166 ++----------------------------------- ipex_llm_ollama_gpu.ipynb | 10 +-- ipex_llm_pytorch_gpu.ipynb | 12 +-- llm-rag.ipynb | 6 +- src/st_ipexllm_native.py | 107 ------------------------ src/st_rag_chromadb.py | 2 +- 6 files changed, 23 insertions(+), 280 deletions(-) delete mode 100644 src/st_ipexllm_native.py diff --git a/ipex_llm_gpu.ipynb b/ipex_llm_gpu.ipynb index da241a1..94e8d21 100644 --- a/ipex_llm_gpu.ipynb +++ b/ipex_llm_gpu.ipynb @@ -5,7 +5,7 @@ "id": "652ea6c8-8d13-4228-853e-fad46db470f5", "metadata": {}, "source": [ - "# IPEX_LLM using Llamacpp on Intel GPUs" + "# Inference using Llamacpp on Intel GPUs" ] }, { @@ -15,7 +15,7 @@ "source": [ "## Introduction\n", "\n", - "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU." + "This notebook demonstrates how to run an LLM inference on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU." ] }, { @@ -67,7 +67,7 @@ "id": "8040fd21-7782-4b97-a0eb-327816328f17", "metadata": {}, "source": [ - "## Step 2: Install IPEX-LLM\n", + "## Step 2: Setup the environment and install required libraries\n", "\n", "### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n", " ```\n", @@ -83,7 +83,7 @@ "\n", "\n", "\n", - "### With the llm-cpp environment active, use pip to install ipex-llm for GPU. \n", + "### With the llm-cpp environment active, use pip to install required libraries for suppport. \n", "\n", "```\n", "pip install --pre --upgrade ipex-llm[cpp]\n", @@ -116,7 +116,7 @@ "set SYCL_CACHE_PERSISTENT=1\n", "\n", "```\n", - "### Below shows a simple example to show how to run a community GGUF model with IPEX-LLM\n", + "### Below shows a simple example to show how to run a community GGUF model\n", "* Download and run the model for example as below \n", "\n", "```\n", @@ -145,156 +145,6 @@ "! C:\\workshop\\llama-cpp\\main.exe -m ../models/llama-2-7b-chat.Q5_K_M.gguf -n 100 --prompt \"What is AI\" -t 16 -ngl 999 --color -e " ] }, - { - "cell_type": "markdown", - "id": "ec180ac3-e74a-41d9-a9b9-65478dcea556", - "metadata": {}, - "source": [ - "## Complete code snippet" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33b94504-fcc8-454f-8a8d-b7312b7c0d8e", - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile src/st_ipexllm_native.py\n", - "import streamlit as st\n", - "import subprocess\n", - "import os\n", - "import threading\n", - "import time\n", - "\n", - "st.title(\"Chat with me!\")\n", - "\n", - "# Get the inputs from the text fields with required logs\n", - "exe_path = st.text_input(\"Enter the path to the main.exe binary generated by the steps outlined:\",value=\"..\\llama-cpp\\main.exe\", key=\"exe_path\")\n", - "print(f\"{exe_path}\\n\")\n", - "if exe_path:\n", - " if os.path.exists(exe_path):\n", - " if os.path.isfile(exe_path):\n", - " print(f\"valid file path: {exe_path}\")\n", - " else:\n", - " st.error(f\"The path {exe_path} is not a file\")\n", - " else:\n", - " st.error(f\"The path {exe_path} does not exist\")\n", - "else:\n", - " print(\"Please enter the file path\")\n", - "\n", - "model_path = st.text_input(\"Enter model file path:\", value=\"..\\models\\llama-2-7b-chat.Q5_K_M.gguf\", key=\"model_name\")\n", - "print(f\"{model_path}\\n\")\n", - "if model_path:\n", - " if os.path.exists(model_path):\n", - " if os.path.isfile(model_path):\n", - " print(f\"valid file path: {model_path}\")\n", - " else:\n", - " st.error(f\"The path {model_path} is not a file\")\n", - " else:\n", - " st.error(f\"The path {model_path} does not exist\")\n", - "else:\n", - " print(\"Please enter the file path\")\n", - "\n", - "\n", - "num_words = st.text_input(\"Enter the number of words you'd expect to see in your answer:\", value=\"100\", key=\"num_words\")\n", - "print(f\"{num_words}\\n\")\n", - "\n", - "question = st.text_input(\"Enter your question\", value=\"What is AI\", key=\"question\")\n", - "question = f'\"{question}\"'\n", - "print(f\"{question}\\n\")\n", - "num_cores = st.text_input(\"Enter the number of cores\", value=\"16\", key=\"num_cores\")\n", - "print(f\"{num_cores}\\n\")\n", - " \n", - "gpu_layers = st.text_input(\"Enter number of GPU layers:\", value=\"999\", key=\"gpu_layers\")\n", - "print(f\"{gpu_layers}\\n\")\n", - "\n", - "def stdout_typewriter_effect(stdout_container, current_stdout):\n", - " current_char = \"\"\n", - " for char in current_stdout:\n", - " current_char+=char\n", - " stdout_container.markdown(current_char)\n", - " time.sleep(0.01)\n", - "\n", - "def launch_exe():\n", - " stdout_chunks = []\n", - " stderr_llama_time = []\n", - " \n", - " def append_stdout(pipe, stdout_lines):\n", - " for line in iter(pipe.readline, ''):\n", - " if line:\n", - " print(line.strip())\n", - " stdout_lines.append(line.strip())\n", - " pipe.close()\n", - "\n", - " def append_stderr(pipe, stderr_lines):\n", - " for line in iter(pipe.readline, ''):\n", - " if line.startswith(\"llama_print_timings\"):\n", - " print(line.strip())\n", - " stderr_lines.append(line.strip())\n", - " pipe.close()\n", - "\n", - " filter_command = '| findstr \"^\"'\n", - " # command to run \n", - " commandparams = exe_path + \" \" + \"-m\" + \" \" + model_path + \" \" + \"-n \" + \" \" + num_words + \" \" + \"--prompt \" + \" \" + question + \" \" + \"-t \" + \" \" + num_cores + \" \" + \"-e -ngl\" + \" \" + gpu_layers + \" \" + filter_command\n", - " # logging command for easy debugging\n", - " print(f\"{commandparams}\")\n", - " try:\n", - " # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time\n", - " result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True)\n", - "\n", - " stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks))\n", - " stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time))\n", - " stdout_thread.start()\n", - " stderr_thread.start()\n", - " stdout_container = st.empty()\n", - " stderr_container = st.empty()\n", - "\n", - " # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess\n", - " # this method is not waiting for subprocess to complete as it only checks for the current status \n", - " while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive():\n", - " # stdout_container.markdown('\\n'.join(stdout_lines))\n", - " stdout_typewriter_effect(stdout_container, '\\n'.join(stdout_chunks))\n", - " stderr_container.text('\\n'.join(stderr_llama_time))\n", - " stdout_thread.join(timeout=0.1)\n", - " stderr_thread.join(timeout=0.1)\n", - " \n", - " stdout_thread.join()\n", - " stderr_thread.join()\n", - "\n", - " except FileNotFoundError:\n", - " st.error(\"The specified EXE file does not exist.\")\n", - " \n", - "if st.button(\"Generate\"):\n", - " with st.spinner(\"Running....Please wait..🐎\"): \n", - " launch_exe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e49d3f11-c86f-4971-ad9e-4562eb76b005", - "metadata": {}, - "outputs": [], - "source": [ - "! streamlit run src/st_ipexllm_native.py" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "0e741522-23ff-41e9-8bc0-59e0ea126069", - "metadata": {}, - "source": [ - "### Streamlit sample output\n", - "\n", - "Below is the output of a sample run from the streamlit application and offloaded to iGPU\n", - "\n", - " \n", - "\n", - "\n" - ] - }, { "cell_type": "markdown", "id": "92387fa9-2376-49a7-a94b-a29f254a0471", @@ -314,9 +164,9 @@ ], "metadata": { "kernelspec": { - "display_name": "llm-cpp", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "llm-cpp" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -328,7 +178,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/ipex_llm_ollama_gpu.ipynb b/ipex_llm_ollama_gpu.ipynb index 6f1b118..d133c5d 100644 --- a/ipex_llm_ollama_gpu.ipynb +++ b/ipex_llm_ollama_gpu.ipynb @@ -15,7 +15,7 @@ "source": [ "## Introduction\n", "\n", - "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU." + "This notebook demonstrates how to install Ollama on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU." ] }, { @@ -81,7 +81,7 @@ "* Now that we have set up the environment, Intel GPU drivers, and runtime libraries, we can configure ollama to leverage the on-chip GPU.\n", "* Open miniforge prompt and run the below commands. We Install IPEX-LLM for llama.cpp and to use llama.cpp with IPEX-LLM, first ensure that ipex-llm[cpp] is installed.\n", "\n", - "### With the ollama environment active, use pip to install ipex-llm for GPU. \n", + "### With the ollama environment active, use pip to install required libraries for GPU. \n", "```\n", "conda activate llm-ollama\n", "pip install --pre --upgrade ipex-llm[cpp]\n", @@ -259,9 +259,9 @@ ], "metadata": { "kernelspec": { - "display_name": "llm-ollama", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "llm-ollama" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -273,7 +273,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/ipex_llm_pytorch_gpu.ipynb b/ipex_llm_pytorch_gpu.ipynb index 1bfca94..2d5d55c 100644 --- a/ipex_llm_pytorch_gpu.ipynb +++ b/ipex_llm_pytorch_gpu.ipynb @@ -5,7 +5,7 @@ "id": "4bdf80ae-10bd-438b-a5ae-76a5c5f99a6d", "metadata": {}, "source": [ - "# Inference using Pytorch on Intel GPUs -- Intel LLM Library for Pytorch" + "# Inference using Pytorch on Intel GPUs" ] }, { @@ -15,7 +15,7 @@ "source": [ "## Introduction\n", "\n", - "This notebook demonstrates how to install IPEX-LLM on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU." + "This notebook demonstrates how to run LLM inference using pytorch on Windows with Intel GPUs. It applies to Intel Core Ultra and Core 11 - 14 gen integrated GPUs (iGPUs), as well as Intel Arc Series GPU." ] }, { @@ -66,7 +66,7 @@ "id": "8040fd21-7782-4b97-a0eb-327816328f17", "metadata": {}, "source": [ - "## Step 2: Install IPEX-LLM\n", + "## Step 2: Setup the environment and install required libraries\n", "\n", "### After installation of conda-forge, open the Miniforge Prompt, and create a new python environment:\n", " ```\n", @@ -486,9 +486,9 @@ ], "metadata": { "kernelspec": { - "display_name": "llm", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "llm" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -500,7 +500,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/llm-rag.ipynb b/llm-rag.ipynb index 606e6a6..327e68f 100644 --- a/llm-rag.ipynb +++ b/llm-rag.ipynb @@ -498,9 +498,9 @@ ], "metadata": { "kernelspec": { - "display_name": "llm-ollama", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "llm-ollama" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -512,7 +512,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.5" }, "openvino_notebooks": { "imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/304aa048-f10c-41c6-bb31-6d2bfdf49cf5", diff --git a/src/st_ipexllm_native.py b/src/st_ipexllm_native.py deleted file mode 100644 index b8f9424..0000000 --- a/src/st_ipexllm_native.py +++ /dev/null @@ -1,107 +0,0 @@ -import streamlit as st -import subprocess -import os -import threading -import time - -st.title("Chat with me!") - -# Get the inputs from the text fields with required logs -exe_path = st.text_input("Enter the path to the main.exe binary generated by the steps outlined:",value="..\llama-cpp\main.exe", key="exe_path") -print(f"{exe_path}\n") -if exe_path: - if os.path.exists(exe_path): - if os.path.isfile(exe_path): - print(f"valid file path: {exe_path}") - else: - st.error(f"The path {exe_path} is not a file") - else: - st.error(f"The path {exe_path} does not exist") -else: - print("Please enter the file path") - -model_path = st.text_input("Enter model file path:", value="..\models\llama-2-7b-chat.Q5_K_M.gguf", key="model_name") -print(f"{model_path}\n") -if model_path: - if os.path.exists(model_path): - if os.path.isfile(model_path): - print(f"valid file path: {model_path}") - else: - st.error(f"The path {model_path} is not a file") - else: - st.error(f"The path {model_path} does not exist") -else: - print("Please enter the file path") - - -num_words = st.text_input("Enter the number of words you'd expect to see in your answer:", value="100", key="num_words") -print(f"{num_words}\n") - -question = st.text_input("Enter your question", value="What is AI", key="question") -question = f'"{question}"' -print(f"{question}\n") -num_cores = st.text_input("Enter the number of cores", value="16", key="num_cores") -print(f"{num_cores}\n") - -gpu_layers = st.text_input("Enter number of GPU layers:", value="999", key="gpu_layers") -print(f"{gpu_layers}\n") - -def stdout_typewriter_effect(stdout_container, current_stdout): - current_char = "" - for char in current_stdout: - current_char+=char - stdout_container.markdown(current_char) - time.sleep(0.01) - -def launch_exe(): - stdout_chunks = [] - stderr_llama_time = [] - - def append_stdout(pipe, stdout_lines): - for line in iter(pipe.readline, ''): - if line: - print(line.strip()) - stdout_lines.append(line.strip()) - pipe.close() - - def append_stderr(pipe, stderr_lines): - for line in iter(pipe.readline, ''): - if line.startswith("llama_print_timings"): - print(line.strip()) - stderr_lines.append(line.strip()) - pipe.close() - - filter_command = '| findstr "^"' - # command to run - commandparams = exe_path + " " + "-m" + " " + model_path + " " + "-n " + " " + num_words + " " + "--prompt " + " " + question + " " + "-t " + " " + num_cores + " " + "-e -ngl" + " " + gpu_layers + " " + filter_command - # logging command for easy debugging - print(f"{commandparams}") - try: - # Use subprocess.Popen() to execute the EXE file with command-line parameters and capture the output in real-time - result = subprocess.Popen(commandparams, shell=True, stdout=subprocess.PIPE, stderr = subprocess.PIPE, text=True) - - stdout_thread = threading.Thread(target=append_stdout, args=(result.stdout, stdout_chunks)) - stderr_thread = threading.Thread(target=append_stderr, args=(result.stderr, stderr_llama_time)) - stdout_thread.start() - stderr_thread.start() - stdout_container = st.empty() - stderr_container = st.empty() - - # result.poll() returns None only if the subprocess is still running otherwise it returns the return code of subprocess - # this method is not waiting for subprocess to complete as it only checks for the current status - while result.poll() is None and stdout_thread.is_alive or stderr_thread.is_alive(): - # stdout_container.markdown('\n'.join(stdout_lines)) - stdout_typewriter_effect(stdout_container, '\n'.join(stdout_chunks)) - stderr_container.text('\n'.join(stderr_llama_time)) - stdout_thread.join(timeout=0.1) - stderr_thread.join(timeout=0.1) - - stdout_thread.join() - stderr_thread.join() - - except FileNotFoundError: - st.error("The specified EXE file does not exist.") - -if st.button("Generate"): - with st.spinner("Running....Please wait..🐎"): - launch_exe() diff --git a/src/st_rag_chromadb.py b/src/st_rag_chromadb.py index b07a6b1..9a157e9 100644 --- a/src/st_rag_chromadb.py +++ b/src/st_rag_chromadb.py @@ -20,7 +20,7 @@ model = st.selectbox("Choose a model from the list", models) # Input text to load the document -url_path = st.text_input("Enter the URL to load for RAG:",value="https://www.gutenberg.org/files/1727/1727-h/1727-h.htm", key="url_path") +url_path = st.text_input("Enter the URL to load for RAG:", key="url_path") # Select embedding type embedding_type = st.selectbox("Please select an embedding type", ("ollama", "huggingface", "nomic", "fastembed"),index=1)