Merge remote-tracking branch 'origin' into zack-dev

NexaAI · Aug 25, 2024 · 4e444e4 · 4e444e4
2 parents 1abc6a0 + c00ce6c
commit 4e444e4
Show file tree

Hide file tree

Showing 24 changed files with 938 additions and 1,211 deletions.
diff --git a/README.md b/README.md
@@ -84,12 +84,20 @@ If pre-built wheels cannot meet your requirements, you can install Nexa SDK from
 pip install nexaai
 ```
 
-> [!IMPORTANT]
-> If you are using a Mac with Intel chips, run the following command:
->
-> ```bash
-> CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
-> ```
+<details>
+<summary><strong>FAQ: Building Issues for llava</strong></summary>
+
+If you encounter the following issue while building:
+
+![](docs/.media/error.jpeg)
+
+try the following command:
+
+```bash
+CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
+```
+
+</details>
 
 #### GPU (Metal)
 

diff --git a/docs/.media/error.jpeg b/docs/.media/error.jpeg
diff --git a/nexa/__init__.py b/nexa/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.6.post2"
+__version__ = "0.0.7"
diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py
@@ -102,11 +102,10 @@ def main():
     image_group = run_parser.add_argument_group('Image generation options')
     image_group.add_argument("-i2i", "--img2img", action="store_true", help="Whether to run image-to-image generation")
     image_group.add_argument("-ns", "--num_inference_steps", type=int, help="Number of inference steps")
-    image_group.add_argument("-np", "--num_images_per_prompt", type=int, help="Number of images to generate per prompt")
     image_group.add_argument("-H", "--height", type=int, help="Height of the output image")
     image_group.add_argument("-W", "--width", type=int, help="Width of the output image")
     image_group.add_argument("-g", "--guidance_scale", type=float, help="Guidance scale for diffusion")
-    image_group.add_argument("-o", "--output", type=str, help="Output path for the generated image")
+    image_group.add_argument("-o", "--output", type=str, default="generated_images/image.png", help="Output path for the generated image")
     image_group.add_argument("-s", "--random_seed", type=int, help="Random seed for image generation")
     image_group.add_argument("--lora_dir", type=str, help="Path to directory containing LoRA files")
     image_group.add_argument("--wtype", type=str, help="Weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)")

diff --git a/nexa/constants.py b/nexa/constants.py
@@ -181,7 +181,6 @@
 
 DEFAULT_IMG_GEN_PARAMS = {
     "num_inference_steps": 20,
-    "num_images_per_prompt": 1,
     "height": 512,
     "width": 512,
     "guidance_scale": 7.5,
@@ -191,7 +190,6 @@
 
 DEFAULT_IMG_GEN_PARAMS_LCM = {
     "num_inference_steps": 4,
-    "num_images_per_prompt": 1,
     "height": 512,
     "width": 512,
     "guidance_scale": 1.0,
@@ -201,7 +199,6 @@
 
 DEFAULT_IMG_GEN_PARAMS_TURBO = {
     "num_inference_steps": 5,
-    "num_images_per_prompt": 1,
     "height": 512,
     "width": 512,
     "guidance_scale": 5.0,

diff --git a/nexa/gguf/lib_utils.py b/nexa/gguf/lib_utils.py
@@ -18,7 +18,6 @@ def is_gpu_available():
 def load_library(lib_base_name: str):
     # Construct the paths to the possible shared library names
     _base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib"
-    logging.debug(f"Base path for libraries: {_base_path}")
     # Searching for the library in the current directory under the name "libllama" (default name
     # for llamacpp) and "llama" (default name for this repo)
     _lib_paths: List[pathlib.Path] = []
@@ -29,18 +28,16 @@ def load_library(lib_base_name: str):
         ]
     elif sys.platform == "darwin":
         _lib_paths += [
-            _base_path / f"lib{lib_base_name}.dylib",
             _base_path / f"lib{lib_base_name}.so",
+            _base_path / f"lib{lib_base_name}.dylib",
         ]
     elif sys.platform == "win32":
         _lib_paths += [
             _base_path / f"{lib_base_name}.dll",
             _base_path / f"lib{lib_base_name}.dll",
         ]
-        _add_windows_dll_directories(_base_path)
     else:
         raise RuntimeError("Unsupported platform")
-    logging.debug(f"Possible shared library paths: {_lib_paths}")
 
     if "LLAMA_CPP_LIB" in os.environ:
         lib_base_name = os.environ["LLAMA_CPP_LIB"]
@@ -50,19 +47,31 @@ def load_library(lib_base_name: str):
 
     cdll_args = dict()  # type: ignore
 
+    # Add the library directory to the DLL search path on Windows (if needed)
+    if sys.platform == "win32":
+        os.add_dll_directory(str(_base_path))
+        os.environ["PATH"] = str(_base_path) + os.pathsep + os.environ["PATH"]
+
+    if sys.platform == "win32" and sys.version_info >= (3, 8):
+        os.add_dll_directory(str(_base_path))
+        if "CUDA_PATH" in os.environ:
+            os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
+            os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
+        if "HIP_PATH" in os.environ:
+            os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
+            os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
+        cdll_args["winmode"] = ctypes.RTLD_GLOBAL
+
     # Try to load the shared library, handling potential errors
     for _lib_path in _lib_paths:
-        logging.debug(f"Trying to load shared library from: {_lib_path}")
         if _lib_path.exists():
             try:
-                loaded_lib = ctypes.CDLL(str(_lib_path), **cdll_args)  # type: ignore
-                logging.debug(f"Successfully loaded shared library: {_lib_path}")
-                return loaded_lib
+                return ctypes.CDLL(str(_lib_path), **cdll_args)  # type: ignore
             except Exception as e:
                 raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}")
 
     raise FileNotFoundError(
-        f"Shared library with base name '{lib_base_name}' not found in paths: {_lib_paths}"
+        f"Shared library with base name '{lib_base_name}' not found"
     )
 
 

diff --git a/nexa/gguf/llama/_internals_transformers.py b/nexa/gguf/llama/_internals_transformers.py
@@ -179,11 +179,11 @@ def token_eot(self) -> int:
         assert self.model is not None
         return llama_cpp.llama_token_eot(self.model)
 
-    def add_bos_token(self) -> int:
+    def add_bos_token(self) -> bool:
         assert self.model is not None
         return llama_cpp.llama_add_bos_token(self.model)
 
-    def add_eos_token(self) -> int:
+    def add_eos_token(self) -> bool:
         assert self.model is not None
         return llama_cpp.llama_add_eos_token(self.model)
 
@@ -343,14 +343,6 @@ def get_state_size(self) -> int:
         assert self.ctx is not None
         return llama_cpp.llama_get_state_size(self.ctx)
 
-    # TODO: copy_state_data
-
-    # TODO: set_state_data
-
-    # TODO: llama_load_session_file
-
-    # TODO: llama_save_session_file
-
     def decode(self, batch: "_LlamaBatch"):
         assert self.ctx is not None
         assert batch.batch is not None
@@ -511,7 +503,7 @@ def sample_token(self, candidates: "_LlamaTokenDataArray") -> int:
     def grammar_accept_token(self, grammar: LlamaGrammar, token: int):
         assert self.ctx is not None
         assert grammar.grammar is not None
-        llama_cpp.llama_grammar_accept_token(self.ctx, grammar.grammar, token)
+        llama_cpp.llama_grammar_accept_token(grammar.grammar, self.ctx, token)
 
     def reset_timings(self):
         assert self.ctx is not None
@@ -691,8 +683,8 @@ def _detokenize_bpe(model: _LlamaModel, tokens: List[int]) -> str:
 def _should_add_bos(model: _LlamaModel) -> bool:
     assert model.model is not None
     add_bos = llama_cpp.llama_add_bos_token(model.model)
-    if add_bos != -1:
-        return add_bos != 0
+    if add_bos:
+        return add_bos
     else:
         return llama_cpp.llama_vocab_type(model.model) == llama_cpp.LLAMA_VOCAB_TYPE_SPM
 

diff --git a/nexa/gguf/llama/_utils_transformers.py b/nexa/gguf/llama/_utils_transformers.py
@@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
     sys = sys
     os = os
 
-    def __init__(self, disable: bool = False):
+    def __init__(self, disable: bool = True):
         self.disable = disable
 
     # Oddly enough this works better than the contextlib version