Skip to content

Commit

Permalink
OpenAI-DotNet 8.6.0 (#421)
Browse files Browse the repository at this point in the history
- Deprecated OpenAI.Realtime.Options
- Added OpenAI.Realtime.SessionConfiguration to specify the options of the RealtimeSession when creating a new session
- Added OpenAI.Realtime.RealtimeResponseCreateParams to specify the options of the CreateResponseRequest
- Added VoiceActivityDetectionSettingsConverter to better handle disabled voice activity detection
- Added VoiceActivityDetectionSettings.CreateResponse property
  • Loading branch information
StephenHodgson authored Feb 17, 2025
1 parent 94f671d commit 9607121
Show file tree
Hide file tree
Showing 21 changed files with 891 additions and 97 deletions.
140 changes: 111 additions & 29 deletions OpenAI-DotNet-Tests/TestFixture_13_Realtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,38 @@ namespace OpenAI.Tests
internal class TestFixture_13_Realtime : AbstractTestFixture
{
[Test]
public async Task Test_01_RealtimeSession()
public async Task Test_01_01_RealtimeSession()
{
RealtimeSession session = null;

try
{
Assert.IsNotNull(OpenAIClient.RealtimeEndpoint);
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(20));
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
var wasGoodbyeCalled = false;
var tools = new List<Tool>
{
Tool.FromFunc("goodbye", () =>
{
cts.Cancel();
wasGoodbyeCalled = true;
cts.Cancel();
return "Goodbye!";
})
};

var options = new Options(Model.GPT4oRealtime, tools: tools);
session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(options, cts.Token);
var configuration = new SessionConfiguration(Model.GPT4oRealtime, tools: tools);
session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(configuration, cts.Token);
Assert.IsNotNull(session);
Assert.IsNotNull(session.Options);
Assert.AreEqual(Model.GPT4oRealtime.Id, options.Model);
Assert.AreEqual(options.Model, session.Options.Model);
Assert.IsNotNull(options.Tools);
Assert.IsNotEmpty(options.Tools);
Assert.AreEqual(1, options.Tools.Count);
Assert.AreEqual(options.Tools.Count, session.Options.Tools.Count);
Assert.AreEqual(options.Tools[0].Name, session.Options.Tools[0].Name);
Assert.AreEqual(Modality.Audio | Modality.Text, options.Modalities);
Assert.AreEqual(Modality.Audio | Modality.Text, session.Options.Modalities);
Assert.IsNotNull(session.Configuration);
Assert.AreEqual(Model.GPT4oRealtime.Id, configuration.Model);
Assert.AreEqual(configuration.Model, session.Configuration.Model);
Assert.IsNotNull(configuration.Tools);
Assert.IsNotEmpty(configuration.Tools);
Assert.AreEqual(1, configuration.Tools.Count);
Assert.AreEqual(configuration.Tools.Count, session.Configuration.Tools.Count);
Assert.AreEqual(configuration.Tools[0].Name, session.Configuration.Tools[0].Name);
Assert.AreEqual(Modality.Audio | Modality.Text, configuration.Modalities);
Assert.AreEqual(Modality.Audio | Modality.Text, session.Configuration.Modalities);
var responseTask = session.ReceiveUpdatesAsync<IServerEvent>(SessionEvents, cts.Token);

await session.SendAsync(new ConversationItemCreateRequest("Hello!"), cts.Token);
Expand Down Expand Up @@ -93,14 +93,14 @@ void SessionEvents(IServerEvent @event)
}

[Test]
public async Task Test_01_RealtimeSession_IAsyncEnumerable()
public async Task Test_01_02_RealtimeSession_IAsyncEnumerable()
{
RealtimeSession session = null;

try
{
Assert.IsNotNull(OpenAIClient.RealtimeEndpoint);
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(20));
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
var wasGoodbyeCalled = false;
var tools = new List<Tool>
{
Expand All @@ -112,19 +112,19 @@ public async Task Test_01_RealtimeSession_IAsyncEnumerable()
})
};

var options = new Options(Model.GPT4oRealtime, tools: tools);
session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(options, cts.Token);
var configuration = new SessionConfiguration(Model.GPT4oRealtime, tools: tools);
session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(configuration, cts.Token);
Assert.IsNotNull(session);
Assert.IsNotNull(session.Options);
Assert.AreEqual(Model.GPT4oRealtime.Id, options.Model);
Assert.AreEqual(options.Model, session.Options.Model);
Assert.IsNotNull(options.Tools);
Assert.IsNotEmpty(options.Tools);
Assert.AreEqual(1, options.Tools.Count);
Assert.AreEqual(options.Tools.Count, session.Options.Tools.Count);
Assert.AreEqual(options.Tools[0].Name, session.Options.Tools[0].Name);
Assert.AreEqual(Modality.Audio | Modality.Text, options.Modalities);
Assert.AreEqual(Modality.Audio | Modality.Text, session.Options.Modalities);
Assert.IsNotNull(session.Configuration);
Assert.AreEqual(Model.GPT4oRealtime.Id, configuration.Model);
Assert.AreEqual(configuration.Model, session.Configuration.Model);
Assert.IsNotNull(configuration.Tools);
Assert.IsNotEmpty(configuration.Tools);
Assert.AreEqual(1, configuration.Tools.Count);
Assert.AreEqual(configuration.Tools.Count, session.Configuration.Tools.Count);
Assert.AreEqual(configuration.Tools[0].Name, session.Configuration.Tools[0].Name);
Assert.AreEqual(Modality.Audio | Modality.Text, configuration.Modalities);
Assert.AreEqual(Modality.Audio | Modality.Text, session.Configuration.Modalities);

await foreach (var @event in session.ReceiveUpdatesAsync<IServerEvent>(cts.Token).ConfigureAwait(false))
{
Expand Down Expand Up @@ -175,5 +175,87 @@ public async Task Test_01_RealtimeSession_IAsyncEnumerable()
session?.Dispose();
}
}

[Test]
public async Task Test_02_RealtimeSession_VAD_Disabled()
{
RealtimeSession session = null;

try
{
Assert.IsNotNull(OpenAIClient.RealtimeEndpoint);
var cts = new CancellationTokenSource();
var tools = new List<Tool>
{
Tool.FromFunc("goodbye", () =>
{
cts.Cancel();
return "Goodbye!";
})
};

var configuration = new SessionConfiguration(
model: Model.GPT4oRealtime,
tools: tools,
turnDetectionSettings: VoiceActivityDetectionSettings.Disabled());
session = await OpenAIClient.RealtimeEndpoint.CreateSessionAsync(configuration, cts.Token);
Assert.IsNotNull(session);
Assert.IsNotNull(session.Configuration);
Assert.AreEqual(Model.GPT4oRealtime.Id, configuration.Model);
Assert.AreEqual(configuration.Model, session.Configuration.Model);
Assert.IsNotNull(configuration.Tools);
Assert.IsNotEmpty(configuration.Tools);
Assert.AreEqual(1, configuration.Tools.Count);
Assert.AreEqual(configuration.Tools.Count, session.Configuration.Tools.Count);
Assert.AreEqual(configuration.Tools[0].Name, session.Configuration.Tools[0].Name);
Assert.AreEqual(Modality.Audio | Modality.Text, configuration.Modalities);
Assert.AreEqual(Modality.Audio | Modality.Text, session.Configuration.Modalities);
Assert.IsNull(session.Configuration.VoiceActivityDetectionSettings);
var responseTask = session.ReceiveUpdatesAsync<IServerEvent>(SessionEvents, cts.Token);

await session.SendAsync(new ConversationItemCreateRequest("Hello!"), cts.Token);
await session.SendAsync(new CreateResponseRequest(), cts.Token);
await session.SendAsync(new InputAudioBufferAppendRequest(new ReadOnlyMemory<byte>(new byte[1024 * 8])), cts.Token);
await session.SendAsync(new InputAudioBufferCommitRequest(), cts.Token);
await session.SendAsync(new ConversationItemCreateRequest("Goodbye!"), cts.Token);
await session.SendAsync(new CreateResponseRequest(), cts.Token);

void SessionEvents(IServerEvent @event)
{
switch (@event)
{
case ResponseAudioTranscriptResponse transcriptResponse:
Console.WriteLine(transcriptResponse.ToString());
break;
case ResponseFunctionCallArgumentsResponse functionCallResponse:
if (functionCallResponse.IsDone)
{
ToolCall toolCall = functionCallResponse;
toolCall.InvokeFunction();
}

break;
}
}

await responseTask.ConfigureAwait(true);
}
catch (Exception e)
{
switch (e)
{
case ObjectDisposedException:
// ignore
break;
default:
Console.WriteLine(e);
throw;
}
}
finally
{
session?.Dispose();
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using OpenAI.Realtime;
using System;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace OpenAI
{
internal class VoiceActivityDetectionSettingsConverter : JsonConverter<VoiceActivityDetectionSettings>
{
public override VoiceActivityDetectionSettings Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
return reader.TokenType == JsonTokenType.Null
? VoiceActivityDetectionSettings.Disabled()
: JsonSerializer.Deserialize<VoiceActivityDetectionSettings>(ref reader, options);
}

public override void Write(Utf8JsonWriter writer, VoiceActivityDetectionSettings value, JsonSerializerOptions options)
{
switch (value.Type)
{
case TurnDetectionType.Disabled:
writer.WriteNullValue();
break;
default:
case TurnDetectionType.Server_VAD:
JsonSerializer.Serialize(writer, value, options);
break;
}
}
}
}
8 changes: 7 additions & 1 deletion OpenAI-DotNet/OpenAI-DotNet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,14 @@ More context [on Roger Pincombe's blog](https://rogerpincombe.com/openai-dotnet-
<AssemblyOriginatorKeyFile>OpenAI-DotNet.pfx</AssemblyOriginatorKeyFile>
<IncludeSymbols>true</IncludeSymbols>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>8.5.4</Version>
<Version>8.6.0</Version>
<PackageReleaseNotes>
Version 8.6.0
- Deprecated OpenAI.Realtime.Options
- Added OpenAI.Realtime.SessionConfiguration to specify the options of the RealtimeSession when creating a new session
- Added OpenAI.Realtime.RealtimeResponseCreateParams to specify the options of the CreateResponseRequest
- Added VoiceActivityDetectionSettingsConverter to better handle disabled voice activity detection
- Added VoiceActivityDetectionSettings.CreateResponse property
Version 8.5.4
- Update ChatRequest to support o3 series reasoning models
Version 8.5.3
Expand Down
22 changes: 11 additions & 11 deletions OpenAI-DotNet/Realtime/ConversationItem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ public ConversationItem(Role role, IEnumerable<RealtimeContent> content)
}

public ConversationItem(Role role, RealtimeContent content)
: this(role, new[] { content })
: this(role, [content])
{
}

public ConversationItem(RealtimeContent content)
: this(Role.User, new[] { content })
: this(Role.User, [content])
{
}

Expand All @@ -67,22 +67,22 @@ public ConversationItem(Tool tool)
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public string Id { get; private set; }

/// <summary>
/// The object type, must be "realtime.item".
/// </summary>
[JsonInclude]
[JsonPropertyName("object")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public string Object { get; private set; }

/// <summary>
/// The type of the item ("message", "function_call", "function_call_output").
/// </summary>
[JsonInclude]
[JsonPropertyName("type")]
[JsonIgnore(Condition = JsonIgnoreCondition.Never)]
[JsonConverter(typeof(Extensions.JsonStringEnumConverter<ConversationItemType>))]
public ConversationItemType Type { get; private set; }
public ConversationItemType Type { get; internal set; }

/// <summary>
/// The object type, must be "realtime.item".
/// </summary>
[JsonInclude]
[JsonPropertyName("object")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingDefault)]
public string Object { get; private set; }

/// <summary>
/// The status of the item ("completed", "in_progress", "incomplete").
Expand Down
4 changes: 3 additions & 1 deletion OpenAI-DotNet/Realtime/ConversationItemType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ public enum ConversationItemType
[EnumMember(Value = "function_call")]
FunctionCall,
[EnumMember(Value = "function_call_output")]
FunctionCallOutput
FunctionCallOutput,
[EnumMember(Value = "item_reference")]
ItemReference
}
}
14 changes: 14 additions & 0 deletions OpenAI-DotNet/Realtime/ConversationResponseType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System.Runtime.Serialization;

namespace OpenAI.Realtime
{
public enum ConversationResponseType
{
[EnumMember(Value = "auto")]
Auto = 0,
[EnumMember(Value = "none")]
None = 1
}
}
9 changes: 6 additions & 3 deletions OpenAI-DotNet/Realtime/CreateResponseRequest.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System;
using System.Text.Json.Serialization;

namespace OpenAI.Realtime
Expand All @@ -20,8 +21,10 @@ public CreateResponseRequest() { }
/// <summary>
/// Constructor.
/// </summary>
/// <param name="options">Inference configuration <see cref="Realtime.Options"/> to override the <see cref="RealtimeSession.Options"/> for this response only.</param>
public CreateResponseRequest(Options options)
/// <param name="options">
/// Create a new Realtime response with these parameters.
/// </param>
public CreateResponseRequest(RealtimeResponseCreateParams options)
{
Options = options;
}
Expand All @@ -38,6 +41,6 @@ public CreateResponseRequest(Options options)

[JsonInclude]
[JsonPropertyName("response")]
public Options Options { get; private set; }
public RealtimeResponseCreateParams Options { get; private set; }
}
}
28 changes: 28 additions & 0 deletions OpenAI-DotNet/Realtime/Options.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,35 @@

namespace OpenAI.Realtime
{
[Obsolete("use SessionConfiguration or RealtimeResponseCreateParams")]
public sealed class Options
{
public static implicit operator SessionConfiguration(Options options)
=> new(
options.Model,
options.Modalities,
options.Voice,
options.Instructions,
options.InputAudioFormat,
options.OutputAudioFormat,
options.InputAudioTranscriptionSettings,
options.VoiceActivityDetectionSettings,
options.Tools,
options.ToolChoice,
options.Temperature,
options.MaxResponseOutputTokens);

public static implicit operator RealtimeResponseCreateParams(Options options)
=> new(
options.Modalities,
options.Instructions,
options.Voice,
options.OutputAudioFormat,
options.Tools,
options.ToolChoice,
options.Temperature,
options.MaxResponseOutputTokens);

public Options() { }

public Options(
Expand Down Expand Up @@ -155,6 +182,7 @@ public Options(

[JsonInclude]
[JsonPropertyName("turn_detection")]
[JsonConverter(typeof(VoiceActivityDetectionSettingsConverter))]
public VoiceActivityDetectionSettings VoiceActivityDetectionSettings { get; private set; }

[JsonInclude]
Expand Down
Loading

0 comments on commit 9607121

Please sign in to comment.