Skip to content

Commit

Permalink
update output format
Browse files Browse the repository at this point in the history
  • Loading branch information
Nanbo Liu committed Mar 11, 2024
1 parent c2b2d69 commit aec3eaf
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 11 deletions.
2 changes: 1 addition & 1 deletion runtimes/huggingface/mlserver_huggingface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# Added Custom pipeline
PIPELINE_REGISTRY.register_pipeline(
"sentence_embedding",
"sentence-embedding",
pipeline_class=StEmbeddingPipeline,
type="text",
)
Expand Down
61 changes: 53 additions & 8 deletions runtimes/huggingface/mlserver_huggingface/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,68 @@ def _sanitize_parameters(self, **kwargs):
forward_kwargs["normalize_embeddings"] = kwargs["normalize_embeddings"]
return {}, forward_kwargs, {}

def preprocess(self, inputs):
return inputs
def preprocess(self, sentences: Union[str, List[str]]):
if isinstance(sentences, str):
sentences = [sentences]
return sentences

def _forward(self, sentences: Union[str, List[str]], batch_size=32, **kwargs):
def _forward(self, sentences: List[str], batch_size=32, **kwargs):
outputs = self.model.encode(sentences, batch_size=batch_size, **kwargs)
return outputs

def forward(
self, sentences: Union[str, List[str]], batch_size=32, **forward_params
):
def forward(self, sentences: List[str], batch_size=32, **forward_params):

model_outputs = self._forward(
sentences, batch_size=batch_size, **forward_params
)
return model_outputs

def postprocess(self, model_outputs):
return model_outputs
outputs = {"embeddings": model_outputs}
return outputs

def __call__(self, sentences: Union[str, List[str]], batch_size=32, **kwargs):
"""
Computes sentence embeddings.
Parameters
----------
sentences: str
the sentences to embed.
prompt_name: dict
The name of the prompt to use for encoding. Must be a key in the `prompts` dictionary
which is either set in the constructor or loaded from the model configuration. For example if
`prompt_name` is ``"query"`` and the `prompts` is ``{"query": "query: ", ...}``, then the sentence "What
is the capital of France?" will be encoded as "query: What is the capital of France?" because the sentence
is appended to the prompt. If `prompt` is also set, this argument is ignored.
prompt: str
The prompt to use for encoding. For example, if the prompt is ``"query: "``, then the
sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?"
because the sentence is appended to the prompt. If `prompt` is set, `prompt_name` is ignored.
batch_size: int
the batch size used for the computation.
show_progress_bar: bool
Whether to output a progress bar when encode sentences.
output_value: str
The type of embeddings to return: "sentence_embedding" to get sentence embeddings,
"token_embeddings" to get wordpiece token embeddings, and `None`, to get all output values. Defaults
to "sentence_embedding".
convert_to_numpy: bool
Whether the output should be a list of numpy vectors. If False, it is a list of PyTorch tensors.
convert_to_tensor: bool
Whether the output should be one large tensor. Overwrites `convert_to_numpy`.
device: str
Which `torch.device` to use for the computation.
normalize_embeddings: bool
Whether to normalize returned vectors to have length 1. In that case,
the faster dot-product (util.dot_score) instead of cosine similarity can be used.
Returns
-------
By default, a list of tensors is returned. If convert_to_tensor, a stacked tensor is returned.
If convert_to_numpy, a numpy matrix is returned.
"""
(
preprocess_params,
forward_params,
Expand All @@ -69,8 +112,10 @@ def __call__(self, sentences: Union[str, List[str]], batch_size=32, **kwargs):
preprocess_params = {**self._preprocess_params, **preprocess_params}
forward_params = {**self._forward_params, **forward_params}
postprocess_params = {**self._postprocess_params, **postprocess_params}
sentences = self.preprocess(sentences, **preprocess_params)
model_outputs = self.forward(sentences, batch_size=batch_size, **forward_params)
return model_outputs
outputs = self.postprocess(model_outputs, **postprocess_params)
return outputs

def predict(self, X, batch_size=32, **kwargs):
return self(X, batch_size=batch_size, **kwargs)
4 changes: 2 additions & 2 deletions runtimes/huggingface/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_sentence_transformers_pipeline():
st_embeder = SentenceTransformer(pretrained_model)

hf_settings = HuggingFaceSettings(
pretrained_model=pretrained_model, task="sentence_embedding"
pretrained_model=pretrained_model, task="sentence-embedding"
)
model_settings = ModelSettings(name="foo", implementation=HuggingFaceRuntime)
pipeline = load_pipeline_from_settings(hf_settings, model_settings)
Expand All @@ -22,5 +22,5 @@ def test_sentence_transformers_pipeline():
"The quick brown fox jumps over the lazy dog.",
]
st_pred = st_embeder.encode(sentences)
pipeline_pred = pipeline.predict(sentences)
pipeline_pred = pipeline.predict(sentences)["embeddings"]
assert np.array_equal(st_pred, pipeline_pred)

0 comments on commit aec3eaf

Please sign in to comment.