Update inference types (automated commit)

huggingface · Nov 26, 2024 · 9b5de39 · 9b5de39
1 parent 7dc41b1
commit 9b5de39
Show file tree

Hide file tree

Showing 33 changed files with 134 additions and 215 deletions.
diff --git a/docs/source/en/package_reference/inference_types.md b/docs/source/en/package_reference/inference_types.md
@@ -65,7 +65,9 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions
 
-[[autodoc]] huggingface_hub.ChatCompletionInputToolType
+[[autodoc]] huggingface_hub.ChatCompletionInputTool
+
+[[autodoc]] huggingface_hub.ChatCompletionInputToolChoiceClass
 
 [[autodoc]] huggingface_hub.ChatCompletionInputURL
 
@@ -105,8 +107,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
-[[autodoc]] huggingface_hub.ToolElement
-
 
 
 ## depth_estimation
@@ -369,8 +369,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationParameters
@@ -381,8 +379,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
@@ -395,6 +391,6 @@ This part of the lib is still under development and will be improved in future r
 
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput
 
-[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData
-
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement
+
+[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters
diff --git a/docs/source/ko/package_reference/inference_types.md b/docs/source/ko/package_reference/inference_types.md
@@ -64,7 +64,9 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionInputStreamOptions
 
-[[autodoc]] huggingface_hub.ChatCompletionInputToolType
+[[autodoc]] huggingface_hub.ChatCompletionInputTool
+
+[[autodoc]] huggingface_hub.ChatCompletionInputToolChoiceClass
 
 [[autodoc]] huggingface_hub.ChatCompletionInputURL
 
@@ -104,8 +106,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ChatCompletionStreamOutputUsage
 
-[[autodoc]] huggingface_hub.ToolElement
-
 
 
 ## depth_estimation[[huggingface_hub.DepthEstimationInput]]
@@ -368,8 +368,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotClassificationParameters
@@ -380,8 +378,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationInput
 
-[[autodoc]] huggingface_hub.ZeroShotImageClassificationInputData
-
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationOutputElement
 
 [[autodoc]] huggingface_hub.ZeroShotImageClassificationParameters
@@ -394,6 +390,6 @@ rendered properly in your Markdown viewer.
 
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionInput
 
-[[autodoc]] huggingface_hub.ZeroShotObjectDetectionInputData
-
 [[autodoc]] huggingface_hub.ZeroShotObjectDetectionOutputElement
+
+[[autodoc]] huggingface_hub.ZeroShotObjectDetectionParameters
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -348,7 +348,7 @@ def audio_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
+                The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
             `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -1131,7 +1131,7 @@ def image_classification(
                 The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
                 deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
             function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
+                The function to apply to the model outputs in order to retrieve the scores.
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
@@ -1814,7 +1814,7 @@ def text_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
+                The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
             `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2494,11 +2494,11 @@ def text_to_speech(
             max_length (`int`, *optional*):
                 The maximum length (in tokens) of the generated text, including the input.
             max_new_tokens (`int`, *optional*):
-                The maximum number of tokens to generate. Takes precedence over maxLength.
+                The maximum number of tokens to generate. Takes precedence over max_length.
             min_length (`int`, *optional*):
                 The minimum length (in tokens) of the generated text, including the input.
             min_new_tokens (`int`, *optional*):
-                The minimum number of tokens to generate. Takes precedence over maxLength.
+                The minimum number of tokens to generate. Takes precedence over min_length.
             num_beam_groups (`int`, *optional*):
                 Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
                 See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -2801,8 +2801,8 @@ def zero_shot_classification(
                 the label likelihoods for each sequence is 1. If true, the labels are considered independent and
                 probabilities are normalized for each candidate.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the text classification by
+                replacing the placeholder with the candidate labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
@@ -2918,8 +2918,8 @@ def zero_shot_image_classification(
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the image classification by
+                replacing the placeholder with the candidate labels.
 
         Returns:
             `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.

diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -381,7 +381,7 @@ async def audio_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
+                The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
             `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -1176,7 +1176,7 @@ async def image_classification(
                 The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
                 deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
             function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
+                The function to apply to the model outputs in order to retrieve the scores.
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
@@ -1876,7 +1876,7 @@ async def text_classification(
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
             function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
-                The function to apply to the output.
+                The function to apply to the model outputs in order to retrieve the scores.
 
         Returns:
             `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2559,11 +2559,11 @@ async def text_to_speech(
             max_length (`int`, *optional*):
                 The maximum length (in tokens) of the generated text, including the input.
             max_new_tokens (`int`, *optional*):
-                The maximum number of tokens to generate. Takes precedence over maxLength.
+                The maximum number of tokens to generate. Takes precedence over max_length.
             min_length (`int`, *optional*):
                 The minimum length (in tokens) of the generated text, including the input.
             min_new_tokens (`int`, *optional*):
-                The minimum number of tokens to generate. Takes precedence over maxLength.
+                The minimum number of tokens to generate. Takes precedence over min_length.
             num_beam_groups (`int`, *optional*):
                 Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
                 See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -2870,8 +2870,8 @@ async def zero_shot_classification(
                 the label likelihoods for each sequence is 1. If true, the labels are considered independent and
                 probabilities are normalized for each candidate.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the text classification by
+                replacing the placeholder with the candidate labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
@@ -2989,8 +2989,8 @@ async def zero_shot_image_classification(
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
             hypothesis_template (`str`, *optional*):
-                The sentence used in conjunction with candidateLabels to attempt the text classification by replacing
-                the placeholder with the candidate labels.
+                The sentence used in conjunction with `candidate_labels` to attempt the image classification by
+                replacing the placeholder with the candidate labels.
 
         Returns:
             `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.

diff --git a/src/huggingface_hub/inference/_generated/types/__init__.py b/src/huggingface_hub/inference/_generated/types/__init__.py
@@ -29,7 +29,9 @@
     ChatCompletionInputMessageChunk,
     ChatCompletionInputMessageChunkType,
     ChatCompletionInputStreamOptions,
-    ChatCompletionInputToolType,
+    ChatCompletionInputTool,
+    ChatCompletionInputToolChoiceClass,
+    ChatCompletionInputToolChoiceEnum,
     ChatCompletionInputURL,
     ChatCompletionOutput,
     ChatCompletionOutputComplete,
@@ -49,7 +51,6 @@
     ChatCompletionStreamOutputLogprobs,
     ChatCompletionStreamOutputTopLogprob,
     ChatCompletionStreamOutputUsage,
-    ToolElement,
 )
 from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
 from .document_question_answering import (
@@ -167,19 +168,17 @@
 )
 from .zero_shot_classification import (
     ZeroShotClassificationInput,
-    ZeroShotClassificationInputData,
     ZeroShotClassificationOutputElement,
     ZeroShotClassificationParameters,
 )
 from .zero_shot_image_classification import (
     ZeroShotImageClassificationInput,
-    ZeroShotImageClassificationInputData,
     ZeroShotImageClassificationOutputElement,
     ZeroShotImageClassificationParameters,
 )
 from .zero_shot_object_detection import (
     ZeroShotObjectDetectionBoundingBox,
     ZeroShotObjectDetectionInput,
-    ZeroShotObjectDetectionInputData,
     ZeroShotObjectDetectionOutputElement,
+    ZeroShotObjectDetectionParameters,
 )
diff --git a/src/huggingface_hub/inference/_generated/types/audio_classification.py b/src/huggingface_hub/inference/_generated/types/audio_classification.py
@@ -14,12 +14,10 @@
 
 @dataclass
 class AudioClassificationParameters(BaseInferenceType):
-    """Additional inference parameters
-    Additional inference parameters for Audio Classification
-    """
+    """Additional inference parameters for Audio Classification"""
 
     function_to_apply: Optional["AudioClassificationOutputTransform"] = None
-    """The function to apply to the output."""
+    """The function to apply to the model outputs in order to retrieve the scores."""
     top_k: Optional[int] = None
     """When specified, limits the output to the top K most probable classes."""
 
@@ -33,7 +31,7 @@ class AudioClassificationInput(BaseInferenceType):
     also provide the audio data as a raw bytes payload.
     """
     parameters: Optional[AudioClassificationParameters] = None
-    """Additional inference parameters"""
+    """Additional inference parameters for Audio Classification"""
 
 
 @dataclass

diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -14,9 +14,7 @@
 
 @dataclass
 class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
-    """Parametrization of the text generation process
-    Ad-hoc parametrization of the text generation process
-    """
+    """Parametrization of the text generation process"""
 
     do_sample: Optional[bool] = None
     """Whether to use sampling instead of greedy decoding when generating new tokens."""
@@ -76,11 +74,9 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
 
 @dataclass
 class AutomaticSpeechRecognitionParameters(BaseInferenceType):
-    """Additional inference parameters
-    Additional inference parameters for Automatic Speech Recognition
-    """
+    """Additional inference parameters for Automatic Speech Recognition"""
 
-    generate: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
     """Parametrization of the text generation process"""
     return_timestamps: Optional[bool] = None
     """Whether to output corresponding timestamps with the generated text"""
@@ -95,7 +91,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
     also provide the audio data as a raw bytes payload.
     """
     parameters: Optional[AutomaticSpeechRecognitionParameters] = None
-    """Additional inference parameters"""
+    """Additional inference parameters for Automatic Speech Recognition"""
 
 
 @dataclass

diff --git a/src/huggingface_hub/inference/_generated/types/chat_completion.py b/src/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -60,8 +60,11 @@ class ChatCompletionInputFunctionName(BaseInferenceType):
 
 
 @dataclass
-class ChatCompletionInputToolType(BaseInferenceType):
-    function: Optional[ChatCompletionInputFunctionName] = None
+class ChatCompletionInputToolChoiceClass(BaseInferenceType):
+    function: ChatCompletionInputFunctionName
+
+
+ChatCompletionInputToolChoiceEnum = Literal["auto", "none", "required"]
 
 
 @dataclass
@@ -72,7 +75,7 @@ class ChatCompletionInputFunctionDefinition(BaseInferenceType):
 
 
 @dataclass
-class ToolElement(BaseInferenceType):
+class ChatCompletionInputTool(BaseInferenceType):
     function: ChatCompletionInputFunctionDefinition
     type: str
 
@@ -138,10 +141,10 @@ class ChatCompletionInput(BaseInferenceType):
     lower values like 0.2 will make it more focused and deterministic.
     We generally recommend altering this or `top_p` but not both.
     """
-    tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None
+    tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None
     tool_prompt: Optional[str] = None
     """A prompt to be appended before the tools"""
-    tools: Optional[List[ToolElement]] = None
+    tools: Optional[List[ChatCompletionInputTool]] = None
     """A list of tools the model may call. Currently, only functions are supported as a tool.
     Use this to provide a list of
     functions the model may generate JSON inputs for.

diff --git a/src/huggingface_hub/inference/_generated/types/depth_estimation.py b/src/huggingface_hub/inference/_generated/types/depth_estimation.py
@@ -16,7 +16,7 @@ class DepthEstimationInput(BaseInferenceType):
     inputs: Any
     """The input image data"""
     parameters: Optional[Dict[str, Any]] = None
-    """Additional inference parameters"""
+    """Additional inference parameters for Depth Estimation"""
 
 
 @dataclass

diff --git a/src/huggingface_hub/inference/_generated/types/document_question_answering.py b/src/huggingface_hub/inference/_generated/types/document_question_answering.py
@@ -21,9 +21,7 @@ class DocumentQuestionAnsweringInputData(BaseInferenceType):
 
 @dataclass
 class DocumentQuestionAnsweringParameters(BaseInferenceType):
-    """Additional inference parameters
-    Additional inference parameters for Document Question Answering
-    """
+    """Additional inference parameters for Document Question Answering"""
 
     doc_stride: Optional[int] = None
     """If the words in the document are too long to fit with the question for the model, it will
@@ -62,7 +60,7 @@ class DocumentQuestionAnsweringInput(BaseInferenceType):
     inputs: DocumentQuestionAnsweringInputData
     """One (document, question) pair to answer"""
     parameters: Optional[DocumentQuestionAnsweringParameters] = None
-    """Additional inference parameters"""
+    """Additional inference parameters for Document Question Answering"""
 
 
 @dataclass
@@ -81,5 +79,3 @@ class DocumentQuestionAnsweringOutputElement(BaseInferenceType):
     """The start word index of the answer (in the OCR’d version of the input or provided word
     boxes).
     """
-    words: List[int]
-    """The index of each word/box pair that is in the answer"""
diff --git a/src/huggingface_hub/inference/_generated/types/feature_extraction.py b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -26,7 +26,7 @@ class FeatureExtractionInput(BaseInferenceType):
     prompt_name: Optional[str] = None
     """The name of the prompt that should be used by for encoding. If not set, no prompt
     will be applied.
-    Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.
+    Must be a key in the `sentence-transformers` configuration `prompts` dictionary.
     For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ",
     ...},
     then the sentence "What is the capital of France?" will be encoded as