Skip to content

Commit

Permalink
feat(transformers): split in separate backend
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Nov 20, 2023
1 parent caedb4e commit 54501d4
Show file tree
Hide file tree
Showing 23 changed files with 777 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
sudo rm -rfv /usr/bin/conda || true
PATH=$PATH:/opt/conda/bin make -C backend/python/huggingface
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
# Pre-build piper before we start tests in order to have shared libraries in place
make sources/go-piper && \
Expand Down
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ARG TARGETARCH
ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/huggingface/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
ARG GO_TAGS="stablediffusion tts"

Expand Down Expand Up @@ -169,7 +169,10 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/huggingface \
PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \
Expand Down
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ test: prepare test-models/testmodel grpcs
@echo 'Running tests'
export GO_TAGS="tts stablediffusion"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/huggingface/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts 5 --fail-fast -v -r ./api ./pkg
$(MAKE) test-gpt4all
$(MAKE) test-llama
Expand Down Expand Up @@ -367,7 +367,8 @@ protogen-go:
backend/backend.proto

protogen-python:
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/huggingface/ --grpc_python_out=backend/python/huggingface/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
Expand All @@ -382,7 +383,8 @@ prepare-extra-conda-environments:
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/diffusers
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/huggingface
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/exllama

Expand Down
2 changes: 1 addition & 1 deletion api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ var _ = Describe("API test", func() {
})

Context("External gRPC calls", func() {
It("calculate embeddings with huggingface", func() {
It("calculate embeddings with sentencetransformers", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
Expand Down
5 changes: 0 additions & 5 deletions backend/python/huggingface/README.md

This file was deleted.

18 changes: 18 additions & 0 deletions backend/python/sentencetransformers/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.PONY: sentencetransformers
sentencetransformers:
@echo "Creating virtual environment..."
@conda env create --name sentencetransformers --file sentencetransformers.yml
@echo "Virtual environment created."

.PONY: run
run:
@echo "Running sentencetransformers..."
bash run.sh
@echo "sentencetransformers run."

# It is not working well by using command line. It only6 works with IDE like VSCode.
.PONY: test
test:
@echo "Testing sentencetransformers..."
bash test.sh
@echo "sentencetransformers tested."
5 changes: 5 additions & 0 deletions backend/python/sentencetransformers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Creating a separate environment for the sentencetransformers project

```
make sentencetransformers
```
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: huggingface
name: sentencetransformers
channels:
- defaults
dependencies:
Expand Down Expand Up @@ -74,4 +74,4 @@ dependencies:
- triton==2.1.0
- typing-extensions==4.8.0
- urllib3==2.0.6
prefix: /opt/conda/envs/huggingface
prefix: /opt/conda/envs/sentencetransformers
14 changes: 14 additions & 0 deletions backend/python/sentencetransformers/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

##
## A bash script wrapper that runs the sentencetransformers server with conda

export PATH=$PATH:/opt/conda/bin

# Activate conda environment
source activate sentencetransformers

# get the directory where the bash script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

python $DIR/sentencetransformers.py $@
114 changes: 114 additions & 0 deletions backend/python/sentencetransformers/sentencetransformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
Extra gRPC server for HuggingFace SentenceTransformer models.
"""
#!/usr/bin/env python3
from concurrent import futures

import argparse
import signal
import sys
import os

import time
import backend_pb2
import backend_pb2_grpc

import grpc

from sentence_transformers import SentenceTransformer

_ONE_DAY_IN_SECONDS = 60 * 60 * 24

# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))

# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer for the backend service.
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
"""
def Health(self, request, context):
"""
A gRPC method that returns the health status of the backend service.
Args:
request: A HealthRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Reply object that contains the health status of the backend service.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))

def LoadModel(self, request, context):
"""
A gRPC method that loads a model into memory.
Args:
request: A LoadModelRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
A Result object that contains the result of the LoadModel operation.
"""
model_name = request.Model
try:
self.model = SentenceTransformer(model_name)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

# Implement your logic here for the LoadModel service
# Replace this with your desired response
return backend_pb2.Result(message="Model loaded successfully", success=True)

def Embedding(self, request, context):
"""
A gRPC method that calculates embeddings for a given sentence.
Args:
request: An EmbeddingRequest object that contains the request parameters.
context: A grpc.ServicerContext object that provides information about the RPC.
Returns:
An EmbeddingResult object that contains the calculated embeddings.
"""
# Implement your logic here for the Embedding service
# Replace this with your desired response
print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
sentence_embeddings = self.model.encode(request.Embeddings)
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)


def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)

# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)

# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)

try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()

serve(args.addr)
11 changes: 11 additions & 0 deletions backend/python/sentencetransformers/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
##
## A bash script wrapper that runs the sentencetransformers server with conda

# Activate conda environment
source activate sentencetransformers

# get the directory where the bash script is located
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

python -m unittest $DIR/test_sentencetransformers.py
81 changes: 81 additions & 0 deletions backend/python/sentencetransformers/test_sentencetransformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
A test script to test the gRPC service
"""
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc

import grpc


class TestBackendServicer(unittest.TestCase):
"""
TestBackendServicer is the class that tests the gRPC service
"""
def setUp(self):
"""
This method sets up the gRPC service by starting the server
"""
self.service = subprocess.Popen(["python3", "sentencetransformers.py", "--addr", "localhost:50051"])

def tearDown(self) -> None:
"""
This method tears down the gRPC service by terminating the server
"""
self.service.terminate()
self.service.wait()

def test_server_startup(self):
"""
This method tests if the server starts up successfully
"""
time.sleep(2)
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()

def test_load_model(self):
"""
This method tests if the model is loaded successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()

def test_embedding(self):
"""
This method tests if the embeddings are generated successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
self.assertTrue(response.success)
embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
embedding_response = stub.Embedding(embedding_request)
self.assertIsNotNone(embedding_response.embeddings)
except Exception as err:
print(err)
self.fail("Embedding service failed")
finally:
self.tearDown()
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
.PONY: huggingface
huggingface:
.PONY: transformers
transformers:
@echo "Creating virtual environment..."
@conda env create --name huggingface --file huggingface.yml
@conda env create --name transformers --file transformers.yml
@echo "Virtual environment created."

.PONY: run
run:
@echo "Running huggingface..."
@echo "Running transformers..."
bash run.sh
@echo "huggingface run."
@echo "transformers run."

# It is not working well by using command line. It only6 works with IDE like VSCode.
.PONY: test
test:
@echo "Testing huggingface..."
@echo "Testing transformers..."
bash test.sh
@echo "huggingface tested."
@echo "transformers tested."
5 changes: 5 additions & 0 deletions backend/python/transformers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Creating a separate environment for the transformers project

```
make transformers
```
Loading

0 comments on commit 54501d4

Please sign in to comment.