feat(transformers): split in separate backend

Signed-off-by: Ettore Di Giacinto <[email protected]>
mudler · Nov 20, 2023 · 54501d4 · 54501d4
1 parent caedb4e
commit 54501d4
Show file tree

Hide file tree

Showing 23 changed files with 777 additions and 28 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -78,7 +78,7 @@ jobs:
           sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
           
           sudo rm -rfv /usr/bin/conda || true
-          PATH=$PATH:/opt/conda/bin make -C backend/python/huggingface
+          PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers
 
           # Pre-build piper before we start tests in order to have shared libraries in place
           make sources/go-piper && \

diff --git a/Dockerfile b/Dockerfile
@@ -12,7 +12,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV BUILD_TYPE=${BUILD_TYPE}
-ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/huggingface/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh"
 ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
 ARG GO_TAGS="stablediffusion tts"
 
@@ -169,7 +169,10 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/vllm \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
-	PATH=$PATH:/opt/conda/bin make -C backend/python/huggingface \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers \
+    ; fi
+RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
+	PATH=$PATH:/opt/conda/bin make -C backend/python/transformers \
     ; fi
 RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
 	PATH=$PATH:/opt/conda/bin make -C backend/python/vall-e-x \

diff --git a/Makefile b/Makefile
@@ -296,7 +296,7 @@ test: prepare test-models/testmodel grpcs
 	@echo 'Running tests'
 	export GO_TAGS="tts stablediffusion"
 	$(MAKE) prepare-test
-	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/huggingface/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
+	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf"  --flake-attempts 5 --fail-fast -v -r ./api ./pkg
 	$(MAKE) test-gpt4all
 	$(MAKE) test-llama
@@ -367,7 +367,8 @@ protogen-go:
     backend/backend.proto
 
 protogen-python:
-	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/huggingface/ --grpc_python_out=backend/python/huggingface/ backend/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/sentencetransformers/ --grpc_python_out=backend/python/sentencetransformers/ backend/backend.proto
+	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/transformers/ --grpc_python_out=backend/python/transformers/ backend/backend.proto
 	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/autogptq/ --grpc_python_out=backend/python/autogptq/ backend/backend.proto
 	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/exllama/ --grpc_python_out=backend/python/exllama/ backend/backend.proto
 	python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=backend/python/bark/ --grpc_python_out=backend/python/bark/ backend/backend.proto
@@ -382,7 +383,8 @@ prepare-extra-conda-environments:
 	$(MAKE) -C backend/python/bark
 	$(MAKE) -C backend/python/diffusers
 	$(MAKE) -C backend/python/vllm
-	$(MAKE) -C backend/python/huggingface
+	$(MAKE) -C backend/python/sentencetransformers
+	$(MAKE) -C backend/python/transformers
 	$(MAKE) -C backend/python/vall-e-x
 	$(MAKE) -C backend/python/exllama
 

diff --git a/api/api_test.go b/api/api_test.go
@@ -704,7 +704,7 @@ var _ = Describe("API test", func() {
 		})
 
 		Context("External gRPC calls", func() {
-			It("calculate embeddings with huggingface", func() {
+			It("calculate embeddings with sentencetransformers", func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}

diff --git a/backend/python/huggingface/README.md b/backend/python/huggingface/README.md
diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile
@@ -0,0 +1,18 @@
+.PONY: sentencetransformers
+sentencetransformers:
+	@echo "Creating virtual environment..."
+	@conda env create --name sentencetransformers --file sentencetransformers.yml
+	@echo "Virtual environment created."
+
+.PONY: run
+run:
+	@echo "Running sentencetransformers..."
+	bash run.sh
+	@echo "sentencetransformers run."
+
+# It is not working well by using command line. It only6 works with IDE like VSCode.
+.PONY: test
+test:
+	@echo "Testing sentencetransformers..."
+	bash test.sh
+	@echo "sentencetransformers tested."
diff --git a/backend/python/sentencetransformers/README.md b/backend/python/sentencetransformers/README.md
@@ -0,0 +1,5 @@
+# Creating a separate environment for the sentencetransformers project
+
+```
+make sentencetransformers
+```
diff --git a/backend/python/huggingface/backend_pb2.py → ...ython/sentencetransformers/backend_pb2.py b/backend/python/huggingface/backend_pb2.py → ...ython/sentencetransformers/backend_pb2.py
diff --git a/...nd/python/huggingface/backend_pb2_grpc.py → .../sentencetransformers/backend_pb2_grpc.py b/...nd/python/huggingface/backend_pb2_grpc.py → .../sentencetransformers/backend_pb2_grpc.py
diff --git a/backend/python/huggingface/huggingface.yml → ...thon/sentencetransformers/huggingface.yml b/backend/python/huggingface/huggingface.yml → ...thon/sentencetransformers/huggingface.yml
@@ -1,4 +1,4 @@
-name: huggingface
+name: sentencetransformers
 channels:
   - defaults
 dependencies:
@@ -74,4 +74,4 @@ dependencies:
       - triton==2.1.0
       - typing-extensions==4.8.0
       - urllib3==2.0.6
-prefix: /opt/conda/envs/huggingface
+prefix: /opt/conda/envs/sentencetransformers
diff --git a/backend/python/sentencetransformers/run.sh b/backend/python/sentencetransformers/run.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+##
+## A bash script wrapper that runs the sentencetransformers server with conda
+
+export PATH=$PATH:/opt/conda/bin
+
+# Activate conda environment
+source activate sentencetransformers
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python $DIR/sentencetransformers.py $@
diff --git a/backend/python/sentencetransformers/sentencetransformers.py b/backend/python/sentencetransformers/sentencetransformers.py
@@ -0,0 +1,114 @@
+"""
+Extra gRPC server for HuggingFace SentenceTransformer models.
+"""
+#!/usr/bin/env python3
+from concurrent import futures
+
+import argparse
+import signal
+import sys
+import os
+
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+from sentence_transformers import SentenceTransformer
+
+_ONE_DAY_IN_SECONDS = 60 * 60 * 24
+
+# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
+MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
+
+# Implement the BackendServicer class with the service methods
+class BackendServicer(backend_pb2_grpc.BackendServicer):
+    """
+    A gRPC servicer for the backend service.
+
+    This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
+    """
+    def Health(self, request, context):
+        """
+        A gRPC method that returns the health status of the backend service.
+
+        Args:
+            request: A HealthRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Reply object that contains the health status of the backend service.
+        """
+        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
+
+    def LoadModel(self, request, context):
+        """
+        A gRPC method that loads a model into memory.
+
+        Args:
+            request: A LoadModelRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            A Result object that contains the result of the LoadModel operation.
+        """
+        model_name = request.Model
+        try:
+            self.model = SentenceTransformer(model_name)
+        except Exception as err:
+            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
+
+        # Implement your logic here for the LoadModel service
+        # Replace this with your desired response
+        return backend_pb2.Result(message="Model loaded successfully", success=True)
+
+    def Embedding(self, request, context):
+        """
+        A gRPC method that calculates embeddings for a given sentence.
+
+        Args:
+            request: An EmbeddingRequest object that contains the request parameters.
+            context: A grpc.ServicerContext object that provides information about the RPC.
+
+        Returns:
+            An EmbeddingResult object that contains the calculated embeddings.
+        """
+        # Implement your logic here for the Embedding service
+        # Replace this with your desired response
+        print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
+        sentence_embeddings = self.model.encode(request.Embeddings)
+        return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
+
+
+def serve(address):
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
+    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
+    server.add_insecure_port(address)
+    server.start()
+    print("Server started. Listening on: " + address, file=sys.stderr)
+
+    # Define the signal handler function
+    def signal_handler(sig, frame):
+        print("Received termination signal. Shutting down...")
+        server.stop(0)
+        sys.exit(0)
+
+    # Set the signal handlers for SIGINT and SIGTERM
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        while True:
+            time.sleep(_ONE_DAY_IN_SECONDS)
+    except KeyboardInterrupt:
+        server.stop(0)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the gRPC server.")
+    parser.add_argument(
+        "--addr", default="localhost:50051", help="The address to bind the server to."
+    )
+    args = parser.parse_args()
+
+    serve(args.addr)
diff --git a/backend/python/sentencetransformers/test.sh b/backend/python/sentencetransformers/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+##
+## A bash script wrapper that runs the sentencetransformers server with conda
+
+# Activate conda environment
+source activate sentencetransformers
+
+# get the directory where the bash script is located
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+python -m unittest $DIR/test_sentencetransformers.py
diff --git a/backend/python/sentencetransformers/test_sentencetransformers.py b/backend/python/sentencetransformers/test_sentencetransformers.py
@@ -0,0 +1,81 @@
+"""
+A test script to test the gRPC service
+"""
+import unittest
+import subprocess
+import time
+import backend_pb2
+import backend_pb2_grpc
+
+import grpc
+
+
+class TestBackendServicer(unittest.TestCase):
+    """
+    TestBackendServicer is the class that tests the gRPC service
+    """
+    def setUp(self):
+        """
+        This method sets up the gRPC service by starting the server
+        """
+        self.service = subprocess.Popen(["python3", "sentencetransformers.py", "--addr", "localhost:50051"])
+
+    def tearDown(self) -> None:
+        """
+        This method tears down the gRPC service by terminating the server
+        """
+        self.service.terminate()
+        self.service.wait()
+
+    def test_server_startup(self):
+        """
+        This method tests if the server starts up successfully
+        """
+        time.sleep(2)
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.Health(backend_pb2.HealthMessage())
+                self.assertEqual(response.message, b'OK')
+        except Exception as err:
+            print(err)
+            self.fail("Server failed to start")
+        finally:
+            self.tearDown()
+
+    def test_load_model(self):
+        """
+        This method tests if the model is loaded successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
+                self.assertTrue(response.success)
+                self.assertEqual(response.message, "Model loaded successfully")
+        except Exception as err:
+            print(err)
+            self.fail("LoadModel service failed")
+        finally:
+            self.tearDown()
+
+    def test_embedding(self):
+        """
+        This method tests if the embeddings are generated successfully
+        """
+        try:
+            self.setUp()
+            with grpc.insecure_channel("localhost:50051") as channel:
+                stub = backend_pb2_grpc.BackendStub(channel)
+                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens"))
+                self.assertTrue(response.success)
+                embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.")
+                embedding_response = stub.Embedding(embedding_request)
+                self.assertIsNotNone(embedding_response.embeddings)
+        except Exception as err:
+            print(err)
+            self.fail("Embedding service failed")
+        finally:
+            self.tearDown()
diff --git a/backend/python/huggingface/Makefile → backend/python/transformers/Makefile b/backend/python/huggingface/Makefile → backend/python/transformers/Makefile
@@ -1,18 +1,18 @@
-.PONY: huggingface
-huggingface:
+.PONY: transformers
+transformers:
 	@echo "Creating virtual environment..."
-	@conda env create --name huggingface --file huggingface.yml
+	@conda env create --name transformers --file transformers.yml
 	@echo "Virtual environment created."
 
 .PONY: run
 run:
-	@echo "Running huggingface..."
+	@echo "Running transformers..."
 	bash run.sh
-	@echo "huggingface run."
+	@echo "transformers run."
 
 # It is not working well by using command line. It only6 works with IDE like VSCode.
 .PONY: test
 test:
-	@echo "Testing huggingface..."
+	@echo "Testing transformers..."
 	bash test.sh
-	@echo "huggingface tested."
+	@echo "transformers tested."
diff --git a/backend/python/transformers/README.md b/backend/python/transformers/README.md
@@ -0,0 +1,5 @@
+# Creating a separate environment for the transformers project
+
+```
+make transformers
+```