Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin' into zack-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiyuan8 committed Aug 25, 2024
2 parents 1abc6a0 + c00ce6c commit 4e444e4
Show file tree
Hide file tree
Showing 24 changed files with 938 additions and 1,211 deletions.
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,20 @@ If pre-built wheels cannot meet your requirements, you can install Nexa SDK from
pip install nexaai
```

> [!IMPORTANT]
> If you are using a Mac with Intel chips, run the following command:
>
> ```bash
> CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
> ```
<details>
<summary><strong>FAQ: Building Issues for llava</strong></summary>

If you encounter the following issue while building:

![](docs/.media/error.jpeg)

try the following command:

```bash
CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
```

</details>

#### GPU (Metal)

Expand Down
Binary file added docs/.media/error.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion nexa/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.6.post2"
__version__ = "0.0.7"
3 changes: 1 addition & 2 deletions nexa/cli/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,10 @@ def main():
image_group = run_parser.add_argument_group('Image generation options')
image_group.add_argument("-i2i", "--img2img", action="store_true", help="Whether to run image-to-image generation")
image_group.add_argument("-ns", "--num_inference_steps", type=int, help="Number of inference steps")
image_group.add_argument("-np", "--num_images_per_prompt", type=int, help="Number of images to generate per prompt")
image_group.add_argument("-H", "--height", type=int, help="Height of the output image")
image_group.add_argument("-W", "--width", type=int, help="Width of the output image")
image_group.add_argument("-g", "--guidance_scale", type=float, help="Guidance scale for diffusion")
image_group.add_argument("-o", "--output", type=str, help="Output path for the generated image")
image_group.add_argument("-o", "--output", type=str, default="generated_images/image.png", help="Output path for the generated image")
image_group.add_argument("-s", "--random_seed", type=int, help="Random seed for image generation")
image_group.add_argument("--lora_dir", type=str, help="Path to directory containing LoRA files")
image_group.add_argument("--wtype", type=str, help="Weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)")
Expand Down
3 changes: 0 additions & 3 deletions nexa/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@

DEFAULT_IMG_GEN_PARAMS = {
"num_inference_steps": 20,
"num_images_per_prompt": 1,
"height": 512,
"width": 512,
"guidance_scale": 7.5,
Expand All @@ -191,7 +190,6 @@

DEFAULT_IMG_GEN_PARAMS_LCM = {
"num_inference_steps": 4,
"num_images_per_prompt": 1,
"height": 512,
"width": 512,
"guidance_scale": 1.0,
Expand All @@ -201,7 +199,6 @@

DEFAULT_IMG_GEN_PARAMS_TURBO = {
"num_inference_steps": 5,
"num_images_per_prompt": 1,
"height": 512,
"width": 512,
"guidance_scale": 5.0,
Expand Down
27 changes: 18 additions & 9 deletions nexa/gguf/lib_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def is_gpu_available():
def load_library(lib_base_name: str):
# Construct the paths to the possible shared library names
_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib"
logging.debug(f"Base path for libraries: {_base_path}")
# Searching for the library in the current directory under the name "libllama" (default name
# for llamacpp) and "llama" (default name for this repo)
_lib_paths: List[pathlib.Path] = []
Expand All @@ -29,18 +28,16 @@ def load_library(lib_base_name: str):
]
elif sys.platform == "darwin":
_lib_paths += [
_base_path / f"lib{lib_base_name}.dylib",
_base_path / f"lib{lib_base_name}.so",
_base_path / f"lib{lib_base_name}.dylib",
]
elif sys.platform == "win32":
_lib_paths += [
_base_path / f"{lib_base_name}.dll",
_base_path / f"lib{lib_base_name}.dll",
]
_add_windows_dll_directories(_base_path)
else:
raise RuntimeError("Unsupported platform")
logging.debug(f"Possible shared library paths: {_lib_paths}")

if "LLAMA_CPP_LIB" in os.environ:
lib_base_name = os.environ["LLAMA_CPP_LIB"]
Expand All @@ -50,19 +47,31 @@ def load_library(lib_base_name: str):

cdll_args = dict() # type: ignore

# Add the library directory to the DLL search path on Windows (if needed)
if sys.platform == "win32":
os.add_dll_directory(str(_base_path))
os.environ["PATH"] = str(_base_path) + os.pathsep + os.environ["PATH"]

if sys.platform == "win32" and sys.version_info >= (3, 8):
os.add_dll_directory(str(_base_path))
if "CUDA_PATH" in os.environ:
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
if "HIP_PATH" in os.environ:
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
cdll_args["winmode"] = ctypes.RTLD_GLOBAL

# Try to load the shared library, handling potential errors
for _lib_path in _lib_paths:
logging.debug(f"Trying to load shared library from: {_lib_path}")
if _lib_path.exists():
try:
loaded_lib = ctypes.CDLL(str(_lib_path), **cdll_args) # type: ignore
logging.debug(f"Successfully loaded shared library: {_lib_path}")
return loaded_lib
return ctypes.CDLL(str(_lib_path), **cdll_args) # type: ignore
except Exception as e:
raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}")

raise FileNotFoundError(
f"Shared library with base name '{lib_base_name}' not found in paths: {_lib_paths}"
f"Shared library with base name '{lib_base_name}' not found"
)


Expand Down
18 changes: 5 additions & 13 deletions nexa/gguf/llama/_internals_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,11 @@ def token_eot(self) -> int:
assert self.model is not None
return llama_cpp.llama_token_eot(self.model)

def add_bos_token(self) -> int:
def add_bos_token(self) -> bool:
assert self.model is not None
return llama_cpp.llama_add_bos_token(self.model)

def add_eos_token(self) -> int:
def add_eos_token(self) -> bool:
assert self.model is not None
return llama_cpp.llama_add_eos_token(self.model)

Expand Down Expand Up @@ -343,14 +343,6 @@ def get_state_size(self) -> int:
assert self.ctx is not None
return llama_cpp.llama_get_state_size(self.ctx)

# TODO: copy_state_data

# TODO: set_state_data

# TODO: llama_load_session_file

# TODO: llama_save_session_file

def decode(self, batch: "_LlamaBatch"):
assert self.ctx is not None
assert batch.batch is not None
Expand Down Expand Up @@ -511,7 +503,7 @@ def sample_token(self, candidates: "_LlamaTokenDataArray") -> int:
def grammar_accept_token(self, grammar: LlamaGrammar, token: int):
assert self.ctx is not None
assert grammar.grammar is not None
llama_cpp.llama_grammar_accept_token(self.ctx, grammar.grammar, token)
llama_cpp.llama_grammar_accept_token(grammar.grammar, self.ctx, token)

def reset_timings(self):
assert self.ctx is not None
Expand Down Expand Up @@ -691,8 +683,8 @@ def _detokenize_bpe(model: _LlamaModel, tokens: List[int]) -> str:
def _should_add_bos(model: _LlamaModel) -> bool:
assert model.model is not None
add_bos = llama_cpp.llama_add_bos_token(model.model)
if add_bos != -1:
return add_bos != 0
if add_bos:
return add_bos
else:
return llama_cpp.llama_vocab_type(model.model) == llama_cpp.LLAMA_VOCAB_TYPE_SPM

Expand Down
2 changes: 1 addition & 1 deletion nexa/gguf/llama/_utils_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
sys = sys
os = os

def __init__(self, disable: bool = False):
def __init__(self, disable: bool = True):
self.disable = disable

# Oddly enough this works better than the contextlib version
Expand Down
Loading

0 comments on commit 4e444e4

Please sign in to comment.