Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure AI Model Inference API - Embeddings and Chat Completions (#32145) #32165

Merged
merged 11 commits into from
Feb 12, 2025
Prev Previous commit
Next Next commit
fix: audio input descriptions and types
  • Loading branch information
santiagxf committed Jan 17, 2025
commit 5b3aa7df00ef2819ec9d59f13bdac06d166c2e5a
Original file line number Diff line number Diff line change
@@ -18,10 +18,10 @@
"role": "user",
"content": [
{
"type": "audio_input",
"audio_input": {
"type": "input_audio",
"input_audio": {
"data": "<base64 encoded audio data>",
"mime_type": "audio/wav"
"format": "wav"
}
}
]
@@ -37,10 +37,10 @@
"role": "user",
"content": [
{
"type": "audio_input",
"audio_input": {
"type": "input_audio",
"input_audio": {
"data": "<base64 encoded audio data>",
"mime_type": "audio/wav"
"format": "wav"
}
}
]
@@ -84,7 +84,7 @@
"tool_calls": null,
"audio": {
"id": "abcdef1234",
"mime_type": "audio/wav",
"format": "wav",
"data": "<base64 encoded audio data>",
"expires_at": 1896522361,
"transcript": "This is a sample transcript"
17 changes: 9 additions & 8 deletions specification/ai/ModelInference/models/chat_completions.tsp
Original file line number Diff line number Diff line change
@@ -680,13 +680,12 @@ model ChatMessageInputAudio {
@doc("Base64 encoded audio data")
data: string;

@doc("The MIME type of the audio file.")
@encodedName("application/json", "mime_type")
mimeType: AudioMimeType;
@doc("The audio format of the audio content.")
format: AudioContentFormat;
}

@doc("A representation of the possible audio MIME types for audio.")
union AudioMimeType {
@doc("A representation of the possible audio formats for audio.")
union AudioContentFormat {
string,

@doc("Specifies audio in WAV format.")
@@ -715,9 +714,11 @@ model ChatCompletionsAudio {
@doc("Base64 encoded audio data")
data: string;

@doc("The MIME type of the audio file.")
@encodedName("application/json", "mime_type")
mimeType: AudioMimeType;
@doc("""
The format of the audio content. If format is not provided, it will match the format used in the
input audio request.
""")
format?: AudioContentFormat;

@doc("The transcript of the audio file.")
transcript: string;
Original file line number Diff line number Diff line change
@@ -18,10 +18,10 @@
"role": "user",
"content": [
{
"type": "audio_input",
"audio_input": {
"type": "input_audio",
"input_audio": {
"data": "<base64 encoded audio data>",
"mime_type": "audio/wav"
"format": "wav"
}
}
]
@@ -37,10 +37,10 @@
"role": "user",
"content": [
{
"type": "audio_input",
"audio_input": {
"type": "input_audio",
"input_audio": {
"data": "<base64 encoded audio data>",
"mime_type": "audio/wav"
"format": "wav"
}
}
]
@@ -84,7 +84,7 @@
"tool_calls": null,
"audio": {
"id": "abcdef1234",
"mime_type": "audio/wav",
"format": "wav",
"data": "<base64 encoded audio data>",
"expires_at": 1896522361,
"transcript": "This is a sample transcript"
Original file line number Diff line number Diff line change
@@ -332,15 +332,15 @@
}
},
"definitions": {
"AudioMimeType": {
"AudioContentFormat": {
"type": "string",
"description": "A representation of the possible audio MIME types for audio.",
"description": "A representation of the possible audio formats for audio.",
"enum": [
"wav",
"mp3"
],
"x-ms-enum": {
"name": "AudioMimeType",
"name": "AudioContentFormat",
"modelAsString": true,
"values": [
{
@@ -510,10 +510,9 @@
"type": "string",
"description": "Base64 encoded audio data"
},
"mime_type": {
"$ref": "#/definitions/AudioMimeType",
"description": "The MIME type of the audio file.",
"x-ms-client-name": "mimeType"
"format": {
"$ref": "#/definitions/AudioContentFormat",
"description": "The format of the audio content. If format is not provided, it will match the format used in the\ninput audio request."
},
"transcript": {
"type": "string",
@@ -524,7 +523,6 @@
"id",
"expires_at",
"data",
"mime_type",
"transcript"
]
},
@@ -964,15 +962,14 @@
"type": "string",
"description": "Base64 encoded audio data"
},
"mime_type": {
"$ref": "#/definitions/AudioMimeType",
"description": "The MIME type of the audio file.",
"x-ms-client-name": "mimeType"
"format": {
"$ref": "#/definitions/AudioContentFormat",
"description": "The audio format of the audio content."
}
},
"required": [
"data",
"mime_type"
"format"
]
},
"ChatMessageTextContentItem": {
Original file line number Diff line number Diff line change
@@ -178,14 +178,14 @@ components:
minLength: 1
explode: false
schemas:
AudioMimeType:
AudioContentFormat:
anyOf:
- type: string
- type: string
enum:
- wav
- mp3
description: A representation of the possible audio MIME types for audio.
description: A representation of the possible audio formats for audio.
Azure.Core.Foundations.Error:
type: object
required:
@@ -306,7 +306,6 @@ components:
- id
- expires_at
- data
- mime_type
- transcript
properties:
id:
@@ -323,10 +322,12 @@ components:
data:
type: string
description: Base64 encoded audio data
mime_type:
format:
allOf:
- $ref: '#/components/schemas/AudioMimeType'
description: The MIME type of the audio file.
- $ref: '#/components/schemas/AudioContentFormat'
description: |-
The format of the audio content. If format is not provided, it will match the format used in the
input audio request.
transcript:
type: string
description: The transcript of the audio file.
@@ -704,15 +705,15 @@ components:
type: object
required:
- data
- mime_type
- format
properties:
data:
type: string
description: Base64 encoded audio data
mime_type:
format:
allOf:
- $ref: '#/components/schemas/AudioMimeType'
description: The MIME type of the audio file.
- $ref: '#/components/schemas/AudioContentFormat'
description: The audio format of the audio content.
description: The details of an audio chat message content part.
ChatMessageTextContentItem:
type: object