Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-enable TTS (bark.cpp) Inference #359

Merged
merged 5 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 74 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ endif()

# bark_cpp project
# Temporarily disabled since version v0.0.9.3
option(BARK_BUILD "Build bark.cpp" OFF)
option(BARK_BUILD "Build bark.cpp" ON)
if(BARK_BUILD)
# Filter out HIPBLAS and Vulkan options for bark.cpp since it doesn't support them
set(BARK_CMAKE_OPTIONS ${USER_DEFINED_OPTIONS})
Expand Down Expand Up @@ -219,10 +219,26 @@ if(WIN32)
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/stable_diffusion
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.dll"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/llama
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.dll"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/
${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/bark.cpp
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.dll"
Expand All @@ -231,10 +247,26 @@ if(WIN32)
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/stable_diffusion
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.dll"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/llama
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.dll"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/
${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/bark.cpp
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.dll"
Expand All @@ -243,9 +275,27 @@ else()
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/stable_diffusion
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.so"
PATTERN "*.dylib"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/llama
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.so"
PATTERN "*.dylib"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib
DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/bark.cpp
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.so"
Expand All @@ -255,12 +305,30 @@ else()
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/stable_diffusion
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.so"
PATTERN "*.dylib"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/llama
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.so"
PATTERN "*.dylib"
)

install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/bark.cpp
USE_SOURCE_PERMISSIONS
FILES_MATCHING
PATTERN "*.so"
PATTERN "*.dylib"
)
endif()
endif()
2 changes: 1 addition & 1 deletion dependency/bark.cpp
Submodule bark.cpp updated 1 files
+4 −0 CMakeLists.txt
6 changes: 2 additions & 4 deletions nexa/gguf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
from .nexa_inference_text import NexaTextInference
from .nexa_inference_vlm import NexaVLMInference
from .nexa_inference_voice import NexaVoiceInference

# Temporarily disabled since version v0.0.9.3
# from .nexa_inference_tts import NexaTTSInference
from .nexa_inference_tts import NexaTTSInference

__all__ = [
"NexaImageInference",
"NexaTextInference",
"NexaVLMInference",
"NexaVoiceInference",
#"NexaTTSInference",
"NexaTTSInference",
"NexaAudioLMInference"
]
36 changes: 29 additions & 7 deletions nexa/gguf/bark/bark_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,19 @@


# Load the library
def _load_shared_library(lib_base_name: str):
def _load_shared_library(lib_base_name: str, lib_subdir_name: str = ''):
"""
Loads a shared library for bark.cpp using ctypes.

Args:
lib_base_name (str): The base name of the shared library. For example, if your shared library file is named 'libbark.so',
this parameter should be set to 'bark'. The function automatically handles the 'lib' prefix and
platform-specific file extensions (e.g., .so, .dll, .dylib).
lib_subdir_name (str): The name of the subdirectory where the shared library is located. By default, the function looks
for libraries in the root directory without recursion. If a non-empty string is
provided, the function will search in '<root_dir>/<lib_subdir_name>/' instead.
"""

# Determine the file extension based on the platform
if sys.platform.startswith("linux"):
lib_ext = ".so"
Expand All @@ -19,10 +31,16 @@ def _load_shared_library(lib_base_name: str):

# Construct the paths to the possible shared library names
_base_path = pathlib.Path(__file__).parent.parent.resolve()
_lib_paths = [
_base_path / f"lib/lib{lib_base_name}{lib_ext}",
_base_path / f"lib/{lib_base_name}{lib_ext}",
]
if len(lib_subdir_name) == 0:
_lib_paths = [
_base_path / f"lib/lib{lib_base_name}{lib_ext}",
_base_path / f"lib/{lib_base_name}{lib_ext}",
]
else:
_lib_paths = [
_base_path / "lib" / lib_subdir_name / f"lib{lib_base_name}{lib_ext}",
_base_path / "lib" / lib_subdir_name / f"{lib_base_name}{lib_ext}",
]

if "BARK_CPP_LIB" in os.environ:
lib_base_name = os.environ["BARK_CPP_LIB"]
Expand All @@ -32,7 +50,10 @@ def _load_shared_library(lib_base_name: str):

# Add the library directory to the DLL search path on Windows (if needed)
if sys.platform == "win32" and sys.version_info >= (3, 8):
os.add_dll_directory(str(_base_path))
if len(lib_subdir_name) == 0:
os.add_dll_directory(str(_base_path / 'lib'))
else:
os.add_dll_directory(str(_base_path / 'lib' / lib_subdir_name))

# Try to load the shared library, handling potential errors
for _lib_path in _lib_paths:
Expand All @@ -49,9 +70,10 @@ def _load_shared_library(lib_base_name: str):

# Specify the base name of the shared library to load
_lib_base_name = "bark"
_lib_subdir_name = "bark.cpp"

# Load the library
_lib = _load_shared_library(_lib_base_name)
_lib = _load_shared_library(_lib_base_name, _lib_subdir_name)



Expand Down
4 changes: 3 additions & 1 deletion nexa/gguf/lib_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ def is_gpu_available():
return sentinel_file_exists

# Load the library
def load_library(lib_base_name: str):
def load_library(lib_base_name: str, lib_subdir_name: str = ''):
# Construct the paths to the possible shared library names
_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib"
if len(lib_subdir_name) != 0:
_base_path = _base_path / lib_subdir_name
# Searching for the library in the current directory under the name "libllama" (default name
# for llamacpp) and "llama" (default name for this repo)
_lib_paths: List[pathlib.Path] = []
Expand Down
3 changes: 2 additions & 1 deletion nexa/gguf/llama/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@

# Specify the base name of the shared library to load
_lib_base_name = "llama"
_lib_subdir_name = 'llama'
# Load the library
_lib = load_library(_lib_base_name)
_lib = load_library(_lib_base_name, _lib_subdir_name)

ctypes_function = ctypes_function_for_shared_library(_lib)

Expand Down
3 changes: 2 additions & 1 deletion nexa/gguf/sd/stable_diffusion_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@

# Specify the base name of the shared library to load
_lib_base_name = "stable-diffusion"
_lib_subdir_name = 'stable_diffusion'

# Load the library
_lib = load_library(_lib_base_name)
_lib = load_library(_lib_base_name, _lib_subdir_name)

# ctypes sane type hint helpers
#
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ cmake.args = [
"-DCMAKE_BUILD_PARALLEL_LEVEL=16",
"-DSTABLE_DIFFUSION_BUILD=ON",
"-DLLAMA_BUILD=ON",
"-DBARK_BUILD=OFF",
"-DBARK_BUILD=ON",
]

[tool.scikit-build.metadata.version]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_image_generation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from nexa.gguf import NexaImageInference
from tempfile import TemporaryDirectory
from .utils import download_model
from tests.utils import download_model

sd = NexaImageInference(
model_path="sd1-4",
Expand Down
36 changes: 18 additions & 18 deletions tests/test_tts_generation.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
# Temporarily disabled since version v0.0.9.3

# from nexa.gguf import NexaTTSInference
from nexa.gguf import NexaTTSInference

# def test_tts_generation():
# tts = NexaTTSInference(
# model_path="bark-small",
# local_path=None,
# n_threads=4,
# seed=42,
# sampling_rate=24000,
# verbosity=2
# )
def test_tts_generation():
tts = NexaTTSInference(
model_path="bark-small",
local_path=None,
n_threads=4,
seed=42,
sampling_rate=24000,
verbosity=2
)

# # Generate audio from prompt
# prompt = "Hello, this is a test of the Bark text to speech system."
# audio_data = tts.audio_generation(prompt)
# Generate audio from prompt
prompt = "Hello, this is a test of the Bark text to speech system."
audio_data = tts.audio_generation(prompt)

# # Save the generated audio
# tts._save_audio(audio_data, tts.sampling_rate, "tts_output")
# print("TTS generation test completed successfully!")
# Save the generated audio
tts._save_audio(audio_data, tts.sampling_rate, "tts_output")
print("TTS generation test completed successfully!")

# if __name__ == "__main__":
# test_tts_generation()
if __name__ == "__main__":
test_tts_generation()