add new benchmark_util.py (#11713)

* add new benchmark_util.py
intel · Aug 5, 2024 · 8fb36b9 · 8fb36b9
1 parent 493cbd9
commit 8fb36b9
Show file tree

Hide file tree

Showing 11 changed files with 4,607 additions and 9 deletions.
diff --git a/python/llm/dev/benchmark/README.md b/python/llm/dev/benchmark/README.md
@@ -9,7 +9,7 @@ Take `chatglm-6b` as an example:
 import torch
 from ipex_llm.transformers import AutoModel
 from transformers import AutoTokenizer
-from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+from ipex_llm.utils import BenchmarkWrapper
 
 model_path ='THUDM/chatglm-6b'
 model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
@@ -37,7 +37,7 @@ import torch
 import intel_extension_for_pytorch as ipex
 from ipex_llm.transformers import AutoModel
 from transformers import AutoTokenizer
-from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+from ipex_llm.utils import BenchmarkWrapper
 
 model_path ='THUDM/chatglm-6b'
 model = AutoModel.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=True)
@@ -66,7 +66,7 @@ For example, just need to apply following code patch on [Deepspeed Autotp exampl
  import torch
  import transformers
  import deepspeed
-+from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+ from ipex_llm.utils import BenchmarkWrapper
 
  def get_int_from_env(env_keys, default):
      """Returns the first positive env value found in the `env_keys` list or the default."""

diff --git a/python/llm/dev/benchmark/all-in-one/config.yaml b/python/llm/dev/benchmark/all-in-one/config.yaml
@@ -1,6 +1,7 @@
 repo_id:
   # - 'THUDM/chatglm2-6b'
   - 'meta-llama/Llama-2-7b-chat-hf'
+  # - 'meta-llama/Meta-Llama-3.1-8B-Instruct'
   # - 'liuhaotian/llava-v1.5-7b' # requires a LLAVA_REPO_DIR env variables pointing to the llava dir; added only for gpu win related test_api now
 local_model_hub: 'path to your local model hub'
 warm_up: 1 # must set >=2 when run "pipeline_parallel_gpu" test_api

diff --git a/python/llm/dev/benchmark/all-in-one/run-stress-test.py b/python/llm/dev/benchmark/all-in-one/run-stress-test.py
@@ -28,7 +28,7 @@
 import os
 current_dir = os.path.dirname(os.path.realpath(__file__))
 import sys
-from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+from ipex_llm.utils import BenchmarkWrapper
 from ipex_llm.utils.common.log4Error import invalidInputError
 
 LLAMA_IDS = ['meta-llama/Llama-2-7b-chat-hf','meta-llama/Llama-2-13b-chat-hf',

diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py
@@ -29,7 +29,7 @@
 import os
 current_dir = os.path.dirname(os.path.realpath(__file__))
 import sys
-from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+from ipex_llm.utils import BenchmarkWrapper
 from ipex_llm.utils.common.log4Error import invalidInputError
 from ipex_llm.utils.common import invalidInputError
 

diff --git a/python/llm/dev/test/lint-python b/python/llm/dev/test/lint-python
@@ -21,7 +21,7 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 PYTHON_ROOT_DIR="$SCRIPT_DIR/.."
 echo $PYTHON_ROOT_DIR
 PATHS_TO_CHECK="$SCRIPT_DIR/../../src"
-PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/ipex_llm/langchain/*,$SCRIPT_DIR/../../src/ipex_llm/transformers/gguf/models/model_implement/yuan2/*,benchmark_util.py,tgi_api_server.py"
+PATTERNS_TO_EXCLUDE="__init__.py,log4Error.py,$SCRIPT_DIR/../../src/ipex_llm/langchain/*,$SCRIPT_DIR/../../src/ipex_llm/transformers/gguf/models/model_implement/yuan2/*,benchmark_util_4_29.py,benchmark_util_4_43.py,tgi_api_server.py"
 PEP8_REPORT_PATH="$PYTHON_ROOT_DIR/test/pep8-report.txt"
 PYLINT_REPORT_PATH="$PYTHON_ROOT_DIR/test/pylint-report.txt"
 PYLINT_INSTALL_INFO="$PYTHON_ROOT_DIR/test/pylint-info.txt"

diff --git a/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/serving.py b/python/llm/example/GPU/Deepspeed-AutoTP-FastAPI/serving.py
@@ -39,7 +39,7 @@
 
 logger = logging.get_logger(__name__)
 
-from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+from ipex_llm.utils import BenchmarkWrapper
 
 
 def get_int_from_env(env_keys, default):

diff --git a/python/llm/src/ipex_llm/serving/fastapi/model_worker.py b/python/llm/src/ipex_llm/serving/fastapi/model_worker.py
@@ -27,7 +27,7 @@ def __init__(self, checkpoint, low_bit, torch_dtype=torch.float16):
         self.dtype = torch_dtype
         start = time.perf_counter()
         model = self.load_model(checkpoint, low_bit)
-        from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+        from ipex_llm.utils import BenchmarkWrapper
         self.model = BenchmarkWrapper(model, do_print=True)
         end = time.perf_counter()
         logger.info(f"Time to load weights: {end - start:.2f}s")

diff --git a/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py b/python/llm/src/ipex_llm/serving/fastchat/ipex_llm_worker.py
@@ -106,7 +106,7 @@ def __init__(
             load_low_bit_model,
         )
         if benchmark.lower() == "true" and not speculative:
-            from ipex_llm.utils.benchmark_util import BenchmarkWrapper
+            from ipex_llm.utils import BenchmarkWrapper
             self.model = BenchmarkWrapper(self.model, do_print=True)
             logger.info(f"enable benchmark successfully")
         self.stream_interval = stream_interval

diff --git a/python/llm/src/ipex_llm/utils/__init__.py b/python/llm/src/ipex_llm/utils/__init__.py
@@ -18,3 +18,11 @@
 # physically located elsewhere.
 # Otherwise there would be module not found error in non-pip's setting as Python would
 # only search the first bigdl package and end up finding only one sub-package.
+import transformers
+
+trans_version = transformers.__version__
+
+if trans_version >= "4.43.1":
+    from .benchmark_util_4_43 import BenchmarkWrapper
+else:
+    from .benchmark_util_4_29 import BenchmarkWrapper
diff --git a/.../llm/src/ipex_llm/utils/benchmark_util.py → ...src/ipex_llm/utils/benchmark_util_4_29.py b/.../llm/src/ipex_llm/utils/benchmark_util.py → ...src/ipex_llm/utils/benchmark_util_4_29.py