diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
index 07ba6ac49f0..7c88e36eff3 100644
--- a/.github/workflows/llm_performance_tests.yml
+++ b/.github/workflows/llm_performance_tests.yml
@@ -805,7 +805,6 @@ jobs:
call conda activate igpu-perf
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -841,7 +840,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -875,7 +873,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -910,7 +907,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -947,7 +943,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1003,7 +998,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1039,7 +1033,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1073,7 +1066,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1108,7 +1100,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1145,7 +1136,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1200,7 +1190,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1236,7 +1225,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1270,7 +1258,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1305,7 +1292,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1342,7 +1328,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1397,7 +1382,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1435,7 +1419,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1474,7 +1457,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1522,7 +1504,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1572,7 +1553,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1642,7 +1622,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1680,7 +1659,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1717,7 +1695,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1756,7 +1733,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1814,7 +1790,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1852,7 +1827,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1888,7 +1862,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1925,7 +1898,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -1964,7 +1936,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2021,7 +1992,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2059,7 +2029,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2095,7 +2064,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2132,7 +2100,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
@@ -2171,7 +2138,6 @@ jobs:
if "${{ matrix.platform }}"=="perf-mtl" (
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
)
if "${{ matrix.platform }}"=="perf-lnl" (
set SYCL_CACHE_PERSISTENT=1
diff --git a/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md b/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md
index ec73bb84f15..7d36f35af82 100644
--- a/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md
+++ b/docs/mddocs/DockerGuides/docker_pytorch_inference_gpu.md
@@ -97,10 +97,6 @@ root@arda-arc12:/# sycl-ls
> # Reduce memory accesses by fusing SDP ops.
> # Recommended for use on Intel Data Center GPU Max Series.
> export ENABLE_SDP_FUSION=1
->
-> # Disable XMX computation.
-> # Recommended for use on integrated GPUs.
-> export BIGDL_LLM_XMX_DISABLED=1
> ```
diff --git a/docs/mddocs/Overview/install_gpu.md b/docs/mddocs/Overview/install_gpu.md
index 2de066e4f7b..38a5e16ceb5 100644
--- a/docs/mddocs/Overview/install_gpu.md
+++ b/docs/mddocs/Overview/install_gpu.md
@@ -150,13 +150,7 @@ call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
Please also set the following environment variable if you would like to run LLMs on: -->
-- For **Intel iGPU**:
- ```cmd
- set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
- ```
-
-- For **Intel Arc™ A-Series Graphics**:
+- For **Intel iGPU** and **Intel Arc™ A-Series Graphics**:
```cmd
set SYCL_CACHE_PERSISTENT=1
```
@@ -596,7 +590,6 @@ To use GPU acceleration on Linux, several environment variables are required or
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
- export BIGDL_LLM_XMX_DISABLED=1
```
> [!NOTE]
diff --git a/docs/mddocs/Quickstart/benchmark_quickstart.md b/docs/mddocs/Quickstart/benchmark_quickstart.md
index fc5ce949a15..63cf120369d 100644
--- a/docs/mddocs/Quickstart/benchmark_quickstart.md
+++ b/docs/mddocs/Quickstart/benchmark_quickstart.md
@@ -80,34 +80,19 @@ Some parameters in the yaml file that you can configure:
Please refer to [here](../Overview/install_gpu.md#runtime-configuration) to configure oneAPI environment variables. Choose corresponding commands base on your device.
-- For **Intel iGPU**:
+- For **Intel iGPU** and **Intel Arc™ A-Series Graphics**:
```bash
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
python run.py
```
-- For **Intel Arc™ A300-Series or Pro A60**:
-
- ```bash
- set SYCL_CACHE_PERSISTENT=1
- python run.py
- ```
-
-- For **Other Intel dGPU Series**:
-
- ```bash
- # e.g. Arc™ A770
- python run.py
- ```
-
## Run on Linux
Please choose corresponding commands base on your device.
-- For **Intel Arc™ A-Series and Intel Data Center GPU Flex**:
+- For **Intel Arc™ A-Series** and **Intel Data Center GPU Flex**:
For Intel Arc™ A-Series Graphics and Intel Data Center GPU Flex Series, we recommend:
diff --git a/docs/mddocs/Quickstart/install_linux_gpu.md b/docs/mddocs/Quickstart/install_linux_gpu.md
index b32dd346780..e3aea7f418d 100644
--- a/docs/mddocs/Quickstart/install_linux_gpu.md
+++ b/docs/mddocs/Quickstart/install_linux_gpu.md
@@ -422,7 +422,6 @@ To use GPU acceleration on Linux, several environment variables are required or
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
- export BIGDL_LLM_XMX_DISABLED=1
```
> [!NOTE]
diff --git a/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md b/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md
index 302f3cb4a9f..15cc9577296 100644
--- a/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md
+++ b/docs/mddocs/Quickstart/install_linux_gpu.zh-CN.md
@@ -390,7 +390,6 @@ conda activate llm
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
- export BIGDL_LLM_XMX_DISABLED=1
```
> [!NOTE]
diff --git a/docs/mddocs/Quickstart/install_windows_gpu.md b/docs/mddocs/Quickstart/install_windows_gpu.md
index 99895dc1983..32a13d0bbf8 100644
--- a/docs/mddocs/Quickstart/install_windows_gpu.md
+++ b/docs/mddocs/Quickstart/install_windows_gpu.md
@@ -109,19 +109,12 @@ You can verify if `ipex-llm` is successfully installed following below steps.
- Set the following environment variables according to your device:
- - For **Intel iGPU**:
+ - For **Intel iGPU** and **Intel Arc™ A770**:
```cmd
set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
```
-
- - For **Intel Arc™ A770**:
-
- ```cmd
- set SYCL_CACHE_PERSISTENT=1
- ```
-
+
> [!TIP]
> For other Intel dGPU Series, please refer to [this guide](../Overview/install_gpu.md#runtime-configuration) for more details regarding runtime configuration.
diff --git a/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md b/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md
index 5ee45481024..81c2b7b7d69 100644
--- a/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md
+++ b/docs/mddocs/Quickstart/install_windows_gpu.zh-CN.md
@@ -108,14 +108,7 @@ conda activate llm
- 根据你的设备,设置以下环境参数:
- - **Intel iGPU**:
-
- ```cmd
- set SYCL_CACHE_PERSISTENT=1
- set BIGDL_LLM_XMX_DISABLED=1
- ```
-
- - **Intel Arc™ A770**:
+ - **Intel iGPU** and **Intel Arc™ A770**:
```cmd
set SYCL_CACHE_PERSISTENT=1
diff --git a/docs/mddocs/Quickstart/webui_quickstart.md b/docs/mddocs/Quickstart/webui_quickstart.md
index 6600c6c0bcd..7f06fb5b7a3 100644
--- a/docs/mddocs/Quickstart/webui_quickstart.md
+++ b/docs/mddocs/Quickstart/webui_quickstart.md
@@ -70,11 +70,6 @@ Configure oneAPI variables by running the following command in **Miniforge Promp
set SYCL_CACHE_PERSISTENT=1
```
-If you're running on iGPU, set additional environment variables by running the following commands:
-```cmd
-set BIGDL_LLM_XMX_DISABLED=1
-```
-
#### Launch the Server
In **Miniforge Prompt** with the conda environment `llm` activated, navigate to the `text-generation-webui` folder and execute the following commands (You can optionally lanch the server with or without the API service):
diff --git a/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md b/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md
index c8f4e701993..1a08475ac39 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/aquila/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md b/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md
index 84bd3a20029..141109dfddd 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/aquila2/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md b/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md
index 9d3b4d78a50..16b4346de67 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/baichuan/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md b/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md
index 2cd38f577f9..09e55443fb1 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/baichuan2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md b/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md
index 3cf8120d5ed..3c11ab08a56 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/bluelm/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md b/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md
index 407534acbc5..a9fdc69b87d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/chatglm2/README.md
@@ -70,7 +70,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -78,24 +77,14 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md b/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md
index 70b66b022a0..111c628c526 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/chatglm3/README.md
@@ -70,7 +70,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -78,24 +77,14 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md b/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md
index 974bca2784a..90ebd4f161a 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/chinese-llama2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md b/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md
index 1bcdef0f391..7e44b89b2b9 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/codegeex2/README.md
@@ -84,7 +84,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -92,24 +91,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md b/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md
index d0f9f5e0f65..dd4a78c18a8 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/codegemma/README.md
@@ -81,7 +81,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -89,24 +88,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md b/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md
index 40cf921c3fc..1880608b55d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/codellama/readme.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md b/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md
index 728d534b07b..cc1f768eea3 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/deciLM-7b/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md b/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md
index 04ef2859a6e..47843525d9c 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/deepseek/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md b/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md
index cc45b2583c4..3e07019a931 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/dolly-v1/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md b/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md
index ace4489b472..f0e05b0bd58 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/dolly-v2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md b/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md
index 0749d29426d..5c2e79ea535 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/falcon/README.md
@@ -101,7 +101,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -109,24 +108,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 4.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md b/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md
index a309c8e5bff..ef2426c6165 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/flan-t5/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md b/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md
index eff2f97ab73..6a3a19a79c3 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/gemma/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md b/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md
index b3167e8c997..e079593051c 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/gemma2/README.md
@@ -81,7 +81,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -89,24 +88,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md b/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md
index 541ae806639..9cf550c2096 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/glm4/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md b/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md
index 25b3fa86562..a08a3cf194d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/gpt-j/readme.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md b/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md
index 72dfccb6d61..b978133940d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/internlm/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md b/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md
index 6d16158f10d..0745014efa8 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/internlm2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md
index cdd0648498f..06bae6ac670 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md
index 1e006c0826b..e55c4713e02 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama3.1/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md
index 156c662287f..cff5fc9d462 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama3.2/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md b/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md
index 3280ead62a6..2e5a47ecb59 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/llama3/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md b/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md
index eeac47da4a7..e77fd4b037d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/minicpm/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md b/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md
index 0f008dd0649..56c58d61c07 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/minicpm3/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md b/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md
index 63542bcfb76..ab0006b8143 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/mistral/README.md
@@ -72,7 +72,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -80,24 +79,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md b/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md
index 5e9449681ec..1d116a3ab50 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/mixtral/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md b/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md
index 192d31b8977..2cd964b4763 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/mpt/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md b/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md
index f34889aad0f..5d50e0530fb 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phi-1_5/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md b/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md
index 7d96882d49c..45f75fa1f0b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phi-2/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md b/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md
index c034c9fd5e4..d03ac331dba 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phi-3/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md b/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md
index ab690af323e..d40d9052da2 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/phixtral/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
index 8311f7f1369..a0192e71271 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md
index 681f035a347..d146e69f72b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen1.5/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md
index 12052b33856..ed4778551db 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen2.5/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md b/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md
index 8ade27f6b80..829139d943d 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/qwen2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md b/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md
index 1376e42d811..3e713025e9b 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/redpajama/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/replit/README.md b/python/llm/example/GPU/HuggingFace/LLM/replit/README.md
index 644de85aa3e..e43b6a6dad1 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/replit/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/replit/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md b/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md
index 55d94f6d11f..15871c8912c 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/rwkv4/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md b/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md
index 63cff014792..18e3c6ce2ba 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/rwkv5/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/solar/README.md b/python/llm/example/GPU/HuggingFace/LLM/solar/README.md
index b86044bb854..e847c67eaf8 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/solar/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/solar/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md b/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md
index 5b5f18ddd51..9746b44a799 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/stablelm/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md b/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md
index 2787a13dd35..f11b0df33e3 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/starcoder/readme.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md b/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md
index 7f4b9806a6c..bd847eb3937 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/vicuna/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/yi/README.md b/python/llm/example/GPU/HuggingFace/LLM/yi/README.md
index 080e2676fdc..793263333fa 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/yi/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/yi/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md b/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md
index dd42cec1360..72b40fa5b59 100644
--- a/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md
+++ b/python/llm/example/GPU/HuggingFace/LLM/yuan2/README.md
@@ -78,7 +78,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -86,24 +85,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md
index a11e1061d21..9d2e43f414e 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-Llama3-V-2_5/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md
index aed936fb277..dee4bc78a9b 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md
index 7e0ea2eafcc..0af1ba5c9cf 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V-2_6/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md
index fdae240d943..91d7b0aa41d 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-V/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md
index 3d39402d646..6c722e91aa5 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/StableDiffusion/README.md
@@ -57,7 +57,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -65,24 +64,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md
index 954e74600d2..89c2035a31a 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/distil-whisper/README.md
@@ -76,7 +76,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -84,24 +83,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md
index c37a99f8183..7464c7e7751 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/glm-4v/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md b/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md
index 4183d309f0d..ad5bf92207a 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/internvl2/readme.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md
index 543e2f488e7..b3b56963ab9 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/phi-3-vision/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
index 737232661fd..90a2e3a0ef6 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/qwen-vl/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md
index b201467a138..243ab04610d 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/qwen2-audio/README.md
@@ -82,7 +82,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -90,24 +89,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
index 7dea109b078..46f7bccd3e5 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/voiceassistant/README.md
@@ -86,7 +86,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -94,24 +93,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
index ac664fb0a36..56c7828eec0 100644
--- a/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
+++ b/python/llm/example/GPU/HuggingFace/Multimodal/whisper/readme.md
@@ -78,7 +78,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -86,24 +85,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/HuggingFace/Save-Load/README.md b/python/llm/example/GPU/HuggingFace/Save-Load/README.md
index 9aae6c8c6cf..acc1237038d 100644
--- a/python/llm/example/GPU/HuggingFace/Save-Load/README.md
+++ b/python/llm/example/GPU/HuggingFace/Save-Load/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/LangChain/README.md b/python/llm/example/GPU/LangChain/README.md
index 6df739768e6..5e84fbfb584 100644
--- a/python/llm/example/GPU/LangChain/README.md
+++ b/python/llm/example/GPU/LangChain/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/Lightweight-Serving/README.md b/python/llm/example/GPU/Lightweight-Serving/README.md
index 3e67b1e579c..2db7cd97d66 100644
--- a/python/llm/example/GPU/Lightweight-Serving/README.md
+++ b/python/llm/example/GPU/Lightweight-Serving/README.md
@@ -97,7 +97,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -105,24 +104,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/LlamaIndex/README.md b/python/llm/example/GPU/LlamaIndex/README.md
index a56ed793bba..abdc67289d4 100644
--- a/python/llm/example/GPU/LlamaIndex/README.md
+++ b/python/llm/example/GPU/LlamaIndex/README.md
@@ -130,7 +130,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -138,24 +137,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 4.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md b/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
index b030eb9ea39..7929cbfa3f4 100644
--- a/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
+++ b/python/llm/example/GPU/Long-Context/Chatglm3-32K/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md b/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
index 20529a0d30b..09a3ed08922 100644
--- a/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
+++ b/python/llm/example/GPU/Long-Context/LLaMA2-32K/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/ModelScope-Models/README.md b/python/llm/example/GPU/ModelScope-Models/README.md
index 2b5ecacbdbb..d8cdf9e6798 100644
--- a/python/llm/example/GPU/ModelScope-Models/README.md
+++ b/python/llm/example/GPU/ModelScope-Models/README.md
@@ -74,7 +74,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -82,24 +81,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md b/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
index ccd3a115598..8a9a3d7acea 100644
--- a/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
+++ b/python/llm/example/GPU/ModelScope-Models/Save-Load/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
index da481b37854..136bef0f381 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/aquila2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md b/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
index d786bc7f98f..ac4d0b0e150 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/baichuan/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
index d4f120f5a22..741e916ddc6 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/baichuan2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md b/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
index 5455050eac7..06500cfb008 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/bark/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
index db198645eab..ddb7b7e0dc8 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/bluelm/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
index f3a10f37a6b..78f47d2bb58 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/chatglm2/README.md
@@ -69,7 +69,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -77,24 +76,14 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
index 6c198bfd5dd..5c8ffa9aaf4 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/chatglm3/README.md
@@ -69,7 +69,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -77,24 +76,14 @@ export BIGDL_LLM_XMX_DISABLED=1
### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md
index 6dd5e0799b5..35d4f316994 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codegeex2/README.md
@@ -84,7 +84,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -92,24 +91,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
index 33da3966df0..25b092bf475 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codegemma/README.md
@@ -81,7 +81,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -89,24 +88,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md b/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
index ff68817eca4..af26fbcfd65 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/codellama/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md b/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
index a9e66f54732..a3a8704efba 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/deciLM-7b/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md b/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
index 4c1a9898088..8dbe600a4ea 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/deepseek/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md b/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
index 8268a0c0d47..cce12c3f79b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/distil-whisper/README.md
@@ -76,7 +76,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -84,24 +83,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
index c35fbd4fc52..d70f6b67027 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v1/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
index 2c88b488688..1c0d7ce66a3 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/dolly-v2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
index f7a3c9ad75a..9c530fa6c91 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/flan-t5/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md b/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md
index 961b71c004a..b5ca3c01d9c 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/glm4/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
index f8906fb2880..1c499291076 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/internlm2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
index bde37043ad2..8f634a2fcd2 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama2/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md
index 74b315340b1..543fbf9fc59 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama3.2-vision/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
index 545a628842e..b633381df53 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llama3/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
index fa75e826770..813c8d407fa 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/llava/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
index 2efc010d488..5a77dd6b837 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mamba/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md
index e441212e2fe..b2478cf843f 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/minicpm/README.md
@@ -73,7 +73,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -81,24 +80,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
index 4f3e58b045c..dd8342da3ad 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mistral/README.md
@@ -72,7 +72,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -80,24 +79,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
index 3179f431723..6adf2f553d7 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/mixtral/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md b/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md
index 5fdd8969b81..b40a9b850f7 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/openai-whisper/README.md
@@ -76,7 +76,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -84,24 +83,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
index 3a4c88dcf7e..41fcc59701c 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-1_5/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
index bbd276b9612..3824652bdd0 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
index cc4542c984d..873ca7f300b 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phi-3/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md b/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
index 1458f2ab908..688d4ef7b7c 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/phixtral/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
index c480c545366..1cec10e1a19 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen-vl/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
index 801fdd591e4..1b6b0388cb9 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen1.5/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md
index 67cacad860d..c9e78c68795 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen2-vl/README.md
@@ -79,7 +79,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -87,24 +86,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md
index 9a3e3e03504..809d03ebfad 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/qwen2/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md b/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
index 3bfbf245655..06ad2ac1ad1 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/replit/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md b/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
index 4d157d19bf3..e97c4121458 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/solar/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
index fd487a38dae..af002746ab3 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/speech-t5/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md b/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
index 3c1952956d6..24befdd5a9d 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/stablelm/README.md
@@ -77,7 +77,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -85,24 +84,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md b/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
index e1ffd7d612c..09719a8c207 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/starcoder/README.md
@@ -71,7 +71,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -79,24 +78,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md b/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
index 2b500175575..0970a46a651 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/yi/README.md
@@ -75,7 +75,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -83,24 +82,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.
diff --git a/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md b/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
index 87c1ccd9a56..6055fe3a775 100644
--- a/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
+++ b/python/llm/example/GPU/PyTorch-Models/Model/yuan2/README.md
@@ -78,7 +78,6 @@ export ENABLE_SDP_FUSION=1
```bash
export SYCL_CACHE_PERSISTENT=1
-export BIGDL_LLM_XMX_DISABLED=1
```
@@ -86,24 +85,14 @@ export BIGDL_LLM_XMX_DISABLED=1
#### 3.2 Configurations for Windows
-For Intel iGPU
+For Intel iGPU and Intel Arc™ A-Series Graphics
```cmd
set SYCL_CACHE_PERSISTENT=1
-set BIGDL_LLM_XMX_DISABLED=1
```
-
-
-For Intel Arc™ A-Series Graphics
-
-```cmd
-set SYCL_CACHE_PERSISTENT=1
-```
-
-
> [!NOTE]
> For the first time that each model runs on Intel iGPU/Intel Arc™ A300-Series or Pro A60, it may take several minutes to compile.