Skip to content

Commit

Permalink
Merge branch 'main' into xingyu/unified_lib
Browse files Browse the repository at this point in the history
  • Loading branch information
xyyimian committed Aug 22, 2024
2 parents b2ba114 + 72d93b2 commit 32452de
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 89 deletions.
52 changes: 0 additions & 52 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,55 +132,3 @@ if (LLAMA_BUILD)
endif()
endif()
endif()

# Project: stable_diffusion_cpp
project(stable_diffusion_cpp)

option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp shared library and install alongside python package" ON)

if (STABLE_DIFFUSION_BUILD)
set(BUILD_SHARED_LIBS "ON")
option(SD_BUILD_SHARED_LIBS "" "ON")

# Building llama
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
# Need to disable these llama.cpp flags on Apple x86_64,
# otherwise users may encounter invalid instruction errors
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
set(GGML_FMA "Off" CACHE BOOL "ggml: enable FMA" FORCE)
set(GGML_F16C "Off" CACHE BOOL "ggml: enable F16C" FORCE)
endif()

add_subdirectory(dependency/stable-diffusion.cpp)
install(
TARGETS stable-diffusion
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
)

message(STATUS "SKBUILD_PLATLIB_DIR: ${SKBUILD_PLATLIB_DIR}")
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
install(
TARGETS stable-diffusion
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
)
# Workaround for Windows + CUDA
if (WIN32)
install(
FILES $<TARGET_RUNTIME_DLLS:stable-diffusion>
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
)
install(
FILES $<TARGET_RUNTIME_DLLS:stable-diffusion>
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
)
endif()
endif()
19 changes: 16 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Detailed API documentation is available [here](docs/index.html).

## Installation

**GPU version(optional)**
**GPU version(optional)**

check if you have GPU acceleration (torch required)
<details>
Expand All @@ -40,16 +40,24 @@ check if you have GPU acceleration (torch required)
```
CMAKE_ARGS="-DGGML_CUDA=on -DSD_CUBLAS=ON" pip install nexaai-gpu
```
Or you prefer to install our pre-built wheel:
```bash
pip install nexaai-cuda --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
```
</details>
<details>
<summary>Apple M Chip:</summary>
Apple icon -> about this mac -> Graphics

if True:

```
CMAKE_ARGS="-DGGML_METAL=on -DSD_METAL=ON" pip install nexaai-gpu
```
Or you prefer to install our pre-built wheel:
```bash
pip install nexaai-metal --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple
```
</details>

<details>
Expand Down Expand Up @@ -77,7 +85,12 @@ check if you have GPU acceleration (torch required)
```
pip install nexaai
```
<details>
</details>

Or you prefer to install the pre-built wheel:
```bash
pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
```

## Nexa CLI commands

Expand Down
24 changes: 12 additions & 12 deletions nexa/gguf/nexa_inference_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class NexaImageInference:
streamlit (bool): Run the inference in Streamlit UI.
"""


def __init__(self, model_path, **kwargs):
self.model_path = None
Expand Down Expand Up @@ -81,7 +81,7 @@ def __init__(self, model_path, **kwargs):
logging.error("Failed to load the model or pipeline.")
exit(1)

# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self, model_path: str):
with suppress_stdout_stderr():
from nexa.gguf.sd.stable_diffusion import StableDiffusion
Expand All @@ -108,9 +108,9 @@ def _save_images(self, images):
file_path = os.path.join(output_dir, file_name)
image.save(file_path)
logging.info(f"\nImage {i+1} saved to: {file_path}")
def txt2img(self,
prompt,

def txt2img(self,
prompt,
negative_prompt="",
cfg_scale=7.5,
width=512,
Expand Down Expand Up @@ -151,7 +151,7 @@ def run_txt2img(self):
)
try:
images = self.txt2img(
prompt,
prompt,
negative_prompt,
cfg_scale=self.params["guidance_scale"],
width=self.params["width"],
Expand All @@ -169,9 +169,9 @@ def run_txt2img(self):
except Exception as e:
logging.error(f"Error during generation: {e}", exc_info=True)

def img2img(self,
image_path,
prompt,
def img2img(self,
image_path,
prompt,
negative_prompt="",
cfg_scale=7.5,
width=512,
Expand Down Expand Up @@ -213,8 +213,8 @@ def run_img2img(self):
negative_prompt = nexa_prompt(
"Enter your negative prompt (press Enter to skip): "
)
images = self.img2img(image_path,
prompt,
images = self.img2img(image_path,
prompt,
negative_prompt,
cfg_scale=self.params["guidance_scale"],
width=self.params["width"],
Expand All @@ -224,7 +224,7 @@ def run_img2img(self):
control_cond=self.params.get("control_image_path", ""),
control_strength=self.params.get("control_strength", 0.9),
)

self._save_images(images)
except KeyboardInterrupt:
print(EXIT_REMINDER)
Expand Down
10 changes: 5 additions & 5 deletions nexa/gguf/nexa_inference_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class NexaTextInference:
top_k (int): Top-k sampling parameter.
top_p (float): Top-p sampling parameter
"""

def __init__(self, model_path, stop_words=None, **kwargs):
self.params = DEFAULT_TEXT_GEN_PARAMS
self.params.update(kwargs)
Expand Down Expand Up @@ -98,7 +98,7 @@ def create_embedding(
"""
return self.model.create_embedding(input)

# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self):
logging.debug(f"Loading model from {self.downloaded_path}")
start_time = time.time()
Expand Down Expand Up @@ -140,7 +140,7 @@ def _load_model(self):

def run(self):
"""
CLI interactive session. Not for SDK.
CLI interactive session. Not for SDK.
"""
while True:
generated_text = ""
Expand Down Expand Up @@ -189,7 +189,7 @@ def run(self):
except Exception as e:
logging.error(f"Error during generation: {e}", exc_info=True)
print("\n")

def create_chat_completion(self, messages, temperature=0.7, max_tokens=2048, top_k=50, top_p=1.0, stream=False, stop=None):
"""
Used for SDK. Generate completion for a chat conversation.
Expand All @@ -207,7 +207,7 @@ def create_chat_completion(self, messages, temperature=0.7, max_tokens=2048, top
Iterator: Iterator for the completion.
"""
return self.model.create_chat_completion(messages=messages, temperature=temperature, max_tokens=max_tokens, top_k=top_k, top_p=top_p, stream=stream, stop=stop)

def create_completion(self, prompt, temperature=0.7, max_tokens=2048, top_k=50, top_p=1.0, echo=False, stream=False, stop=None):
"""
Used for SDK. Generate completion for a given prompt.
Expand Down
24 changes: 12 additions & 12 deletions nexa/gguf/nexa_inference_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class NexaVLMInference:
top_k (int): Top-k sampling parameter.
top_p (float): Top-p sampling parameter
"""


def __init__(self, model_path, stop_words=None, **kwargs):
self.params = DEFAULT_TEXT_GEN_PARAMS
Expand Down Expand Up @@ -146,7 +146,7 @@ def __init__(self, model_path, stop_words=None, **kwargs):
)
exit(1)

# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self):
logging.debug(f"Loading model from {self.downloaded_path}")
start_time = time.time()
Expand Down Expand Up @@ -240,18 +240,18 @@ def run(self):
except Exception as e:
logging.error(f"Error during generation: {e}", exc_info=True)
print("\n")
def create_chat_completion(self,
messages,
max_tokens:int = 2048,

def create_chat_completion(self,
messages,
max_tokens:int = 2048,
temperature: float = 0.2,
top_p: float = 0.95,
top_k: int = 40,
stream=False,
stream=False,
stop=[]):
"""
Generate text completion for a given chat prompt.
Args:
messages (list): List of messages in the chat prompt.
temperature (float): Temperature for sampling.
Expand All @@ -260,7 +260,7 @@ def create_chat_completion(self,
top_p (float): Top-p sampling parameter.
stream (bool): Stream the output.
stop (list): List of stop words for early stopping.
Returns:
Iterator: An iterator of the generated text completion
return format:
Expand All @@ -285,9 +285,9 @@ def create_chat_completion(self,
"prompt_tokens": 57,
"total_tokens": 74
}
}
usage: message = completion.choices[0].message.content
}
usage: message = completion.choices[0].message.content
"""
return self.model.create_chat_completion(
messages=messages,
Expand Down
6 changes: 3 additions & 3 deletions nexa/gguf/nexa_inference_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(self, model_path, **kwargs):
exit(1)


# @SpinningCursorAnimation()
@SpinningCursorAnimation()
def _load_model(self):
from faster_whisper import WhisperModel

Expand All @@ -91,7 +91,7 @@ def run(self):
print(EXIT_REMINDER)
except Exception as e:
logging.error(f"Error during text generation: {e}", exc_info=True)

def transcribe(self, audio, **kwargs):
"""
Transcribe the audio file.
Expand Down Expand Up @@ -171,7 +171,7 @@ def transcribe(self, audio, **kwargs):
audio,
**kwargs,
)


def _transcribe_audio(self, audio_path):
logging.debug(f"Transcribing audio from: {audio_path}")
Expand Down
8 changes: 7 additions & 1 deletion nexa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,13 @@ def _spin(self):

def __enter__(self):
if self._use_alternate_stream:
self.stream = open("/dev/tty", "w")
if sys.platform == "win32": # Windows
self.stream = open('CONOUT$', "w")
else:
try:
self.stream = open('/dev/tty', "w")
except (FileNotFoundError, OSError):
self.stream = open('/dev/stdout', "w")
self.thread = threading.Thread(target=self._spin)
self.thread.start()
return self
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"

[project]
name = "nexaai"
version = "0.0.1"
version = "0.0.2.dev"
description = "Nexa AI SDK"
readme = "README.md"
license = { text = "MIT" }
Expand Down

0 comments on commit 32452de

Please sign in to comment.