update output format

Striveworks · Mar 11, 2024 · aec3eaf · aec3eaf
1 parent c2b2d69
commit aec3eaf
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 11 deletions.
diff --git a/runtimes/huggingface/mlserver_huggingface/__init__.py b/runtimes/huggingface/mlserver_huggingface/__init__.py
@@ -4,7 +4,7 @@
 
 # Added Custom pipeline
 PIPELINE_REGISTRY.register_pipeline(
-    "sentence_embedding",
+    "sentence-embedding",
     pipeline_class=StEmbeddingPipeline,
     type="text",
 )

diff --git a/runtimes/huggingface/mlserver_huggingface/pipelines.py b/runtimes/huggingface/mlserver_huggingface/pipelines.py
@@ -41,25 +41,68 @@ def _sanitize_parameters(self, **kwargs):
             forward_kwargs["normalize_embeddings"] = kwargs["normalize_embeddings"]
         return {}, forward_kwargs, {}
 
-    def preprocess(self, inputs):
-        return inputs
+    def preprocess(self, sentences: Union[str, List[str]]):
+        if isinstance(sentences, str):
+            sentences = [sentences]
+        return sentences
 
-    def _forward(self, sentences: Union[str, List[str]], batch_size=32, **kwargs):
+    def _forward(self, sentences: List[str], batch_size=32, **kwargs):
         outputs = self.model.encode(sentences, batch_size=batch_size, **kwargs)
         return outputs
 
-    def forward(
-        self, sentences: Union[str, List[str]], batch_size=32, **forward_params
-    ):
+    def forward(self, sentences: List[str], batch_size=32, **forward_params):
+
         model_outputs = self._forward(
             sentences, batch_size=batch_size, **forward_params
         )
         return model_outputs
 
     def postprocess(self, model_outputs):
-        return model_outputs
+        outputs = {"embeddings": model_outputs}
+        return outputs
 
     def __call__(self, sentences: Union[str, List[str]], batch_size=32, **kwargs):
+        """
+        Computes sentence embeddings.
+
+        Parameters
+        ----------
+        sentences: str
+            the sentences to embed.
+        prompt_name: dict
+            The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary
+            which is either set in the constructor or loaded from the model configuration. For example if
+            `prompt_name` is ``"query"`` and the `prompts` is ``{"query": "query: ", ...}``, then the sentence "What
+            is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
+            is appended to the prompt. If `prompt` is also set, this argument is ignored.
+        prompt: str
+            The prompt to use for encoding. For example, if the prompt is ``"query: "``, then the
+            sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
+            because the sentence is appended to the prompt. If `prompt` is set, `prompt_name` is ignored.
+        batch_size: int
+            the batch size used for the computation.
+        show_progress_bar: bool
+            Whether to output a progress bar when encode sentences.
+        output_value: str
+            The type of embeddings to return: "sentence_embedding" to get sentence embeddings,
+            "token_embeddings" to get wordpiece token embeddings, and `None`, to get all output values. Defaults
+            to "sentence_embedding".
+        convert_to_numpy: bool
+            Whether the output should be a list of numpy vectors. If False, it is a list of PyTorch tensors.
+        convert_to_tensor: bool
+            Whether the output should be one large tensor. Overwrites `convert_to_numpy`.
+        device: str
+            Which `torch.device` to use for the computation.
+
+        normalize_embeddings: bool
+            Whether to normalize returned vectors to have length 1. In that case,
+            the faster dot-product (util.dot_score) instead of cosine similarity can be used.
+        Returns
+        -------
+            By default, a list of tensors is returned. If convert_to_tensor, a stacked tensor is returned.
+            If convert_to_numpy, a numpy matrix is returned.
+
+        """
         (
             preprocess_params,
             forward_params,
@@ -69,8 +112,10 @@ def __call__(self, sentences: Union[str, List[str]], batch_size=32, **kwargs):
         preprocess_params = {**self._preprocess_params, **preprocess_params}
         forward_params = {**self._forward_params, **forward_params}
         postprocess_params = {**self._postprocess_params, **postprocess_params}
+        sentences = self.preprocess(sentences, **preprocess_params)
         model_outputs = self.forward(sentences, batch_size=batch_size, **forward_params)
-        return model_outputs
+        outputs = self.postprocess(model_outputs, **postprocess_params)
+        return outputs
 
     def predict(self, X, batch_size=32, **kwargs):
         return self(X, batch_size=batch_size, **kwargs)
diff --git a/runtimes/huggingface/tests/test_pipeline.py b/runtimes/huggingface/tests/test_pipeline.py
@@ -12,7 +12,7 @@ def test_sentence_transformers_pipeline():
     st_embeder = SentenceTransformer(pretrained_model)
 
     hf_settings = HuggingFaceSettings(
-        pretrained_model=pretrained_model, task="sentence_embedding"
+        pretrained_model=pretrained_model, task="sentence-embedding"
     )
     model_settings = ModelSettings(name="foo", implementation=HuggingFaceRuntime)
     pipeline = load_pipeline_from_settings(hf_settings, model_settings)
@@ -22,5 +22,5 @@ def test_sentence_transformers_pipeline():
         "The quick brown fox jumps over the lazy dog.",
     ]
     st_pred = st_embeder.encode(sentences)
-    pipeline_pred = pipeline.predict(sentences)
+    pipeline_pred = pipeline.predict(sentences)["embeddings"]
     assert np.array_equal(st_pred, pipeline_pred)