diff --git a/docs/Test.md b/docs/Test.md index 317e71d..d565d3b 100644 --- a/docs/Test.md +++ b/docs/Test.md @@ -153,6 +153,11 @@ See [Caching speech-to-text transcriptions](#caching-speech-to-text-transcriptio This is currently only used for LUIS, see the section on LUIS prebuilt entities in [Configuring prebuilt entities](LuisModelConfiguration.md#configuring-prebuilt-entities). +### `--timestamp` +(Optional) Signals whether to add a timestamp to each NLU test result. + +See the documentation on the [`timestamp` property](UtteranceExtensions.md#returning-timestamps-for-each-query) for more details. + ### `-i, --include` (Optional) Path to custom NLU provider DLL. See documentation about [Specifying the include path](https://github.com/microsoft/NLU.DevOps/blob/master/docs/CliExtensions.md#specifying-the-include-path) for more details. diff --git a/docs/UtteranceExtensions.md b/docs/UtteranceExtensions.md index 03721c6..dc03889 100644 --- a/docs/UtteranceExtensions.md +++ b/docs/UtteranceExtensions.md @@ -22,7 +22,7 @@ When an NLU provider in NLU.DevOps returns a prediction result, the value will b ``` In this case, the intent confidence score was `0.99` and the text transcription confidence score was `0.95`. This is useful context when debugging false predictions, as a low confidence score may indicate that the model could be improved with more training examples. The recognized `genre` entity also includes a confidence score of `0.80`, although it should be noted that only the LUIS provider currently returns confidence score for entity types trained from examples. -## Labeled utterance timestamps +## Returning timestamps for each query When analyzing results for a set of NLU predictions, it is often important context to understand when the test was run. For example, for Dialogflow `date` and `time` entities, the service only returns a date time string, and no indication of what token(s) triggered that entity to be recognized. For example, the result from a query like `"Call a taxi in 15 minutes"` may look like the following: ```json @@ -38,7 +38,7 @@ When analyzing results for a set of NLU predictions, it is often important conte "timestamp": "2020-01-01T00:00:00-04:00" } ``` -Without the context provided by the `timestamp` property, we wouldn't be able to make any assertion about the correctness of the `entityValue` property for time. Currently, LUIS, Lex, and Dialogflow return a timestamp for each prediction result. +Without the context provided by the `timestamp` property, we wouldn't be able to make any assertion about the correctness of the `entityValue` property for time. Currently, you must specify the [`--timestamp`](Test.md#--timestamp) option to ensure a timestamp is assigned to each NLU prediction result. ## Utterance Extension Properties diff --git a/src/NLU.DevOps.CommandLine/Test/TestCommand.cs b/src/NLU.DevOps.CommandLine/Test/TestCommand.cs index 5b60496..9638b50 100644 --- a/src/NLU.DevOps.CommandLine/Test/TestCommand.cs +++ b/src/NLU.DevOps.CommandLine/Test/TestCommand.cs @@ -7,7 +7,9 @@ namespace NLU.DevOps.CommandLine.Test using System.Collections.Generic; using System.IO; using System.Linq; + using System.Threading; using System.Threading.Tasks; + using Core; using Models; using Newtonsoft.Json.Linq; using static Serializer; @@ -32,7 +34,8 @@ public override int Main() protected override INLUTestClient CreateNLUTestClient() { - return NLUClientFactory.CreateTestInstance(this.Options, this.Configuration, this.Options.SettingsPath); + var client = NLUClientFactory.CreateTestInstance(this.Options, this.Configuration, this.Options.SettingsPath); + return this.Options.Timestamp ? new TimestampNLUTestClient(client) : client; } private static void EnsureDirectory(string filePath) @@ -146,5 +149,34 @@ public LabeledUtteranceWithSpeechFile(string text, string intent, string speechF public string SpeechFile { get; } } + + private class TimestampNLUTestClient : INLUTestClient + { + public TimestampNLUTestClient(INLUTestClient client) + { + this.Client = client; + } + + private INLUTestClient Client { get; } + + public async Task TestAsync(JToken query, CancellationToken cancellationToken) + { + var timestamp = DateTimeOffset.Now; + var result = await this.Client.TestAsync(query, cancellationToken).ConfigureAwait(false); + return result.WithTimestamp(timestamp); + } + + public async Task TestSpeechAsync(string speechFile, JToken query, CancellationToken cancellationToken) + { + var timestamp = DateTimeOffset.Now; + var result = await this.Client.TestSpeechAsync(speechFile, query, cancellationToken).ConfigureAwait(false); + return result.WithTimestamp(timestamp); + } + + public void Dispose() + { + this.Client.Dispose(); + } + } } } diff --git a/src/NLU.DevOps.CommandLine/Test/TestOptions.cs b/src/NLU.DevOps.CommandLine/Test/TestOptions.cs index 83fc523..756aaf6 100644 --- a/src/NLU.DevOps.CommandLine/Test/TestOptions.cs +++ b/src/NLU.DevOps.CommandLine/Test/TestOptions.cs @@ -28,5 +28,8 @@ internal class TestOptions : BaseOptions [Option('p', "parallelism", HelpText = "Numeric value to determine the numer of parallel tests. Default value is 3.", Required = false)] public int Parallelism { get; set; } = 3; + + [Option("timestamp", HelpText = "Assign a timestamp to each utterance result.", Required = false)] + public bool Timestamp { get; set; } } } diff --git a/src/NLU.DevOps.Dialogflow/DialogflowNLUTestClient.cs b/src/NLU.DevOps.Dialogflow/DialogflowNLUTestClient.cs index 4bbdf50..95940b6 100644 --- a/src/NLU.DevOps.Dialogflow/DialogflowNLUTestClient.cs +++ b/src/NLU.DevOps.Dialogflow/DialogflowNLUTestClient.cs @@ -78,8 +78,7 @@ protected override async Task TestAsync(string utterance, Canc result.QueryResult.Intent.DisplayName, result.QueryResult.Parameters?.Fields.SelectMany(GetEntities).ToList()) .WithScore(result.QueryResult.IntentDetectionConfidence) - .WithTextScore(result.QueryResult.SpeechRecognitionConfidence) - .WithTimestamp(DateTimeOffset.Now); + .WithTextScore(result.QueryResult.SpeechRecognitionConfidence); }, cancellationToken) .ConfigureAwait(false); @@ -115,8 +114,7 @@ protected override async Task TestSpeechAsync(string speechFil result.QueryResult.Intent.DisplayName, result.QueryResult.Parameters?.Fields.SelectMany(GetEntities).ToList()) .WithScore(result.QueryResult.IntentDetectionConfidence) - .WithTextScore(result.QueryResult.SpeechRecognitionConfidence) - .WithTimestamp(DateTimeOffset.Now); + .WithTextScore(result.QueryResult.SpeechRecognitionConfidence); }, cancellationToken) .ConfigureAwait(false); diff --git a/src/NLU.DevOps.Lex.Tests/LexNLUTestClientTests.cs b/src/NLU.DevOps.Lex.Tests/LexNLUTestClientTests.cs index be9491d..92ef73a 100644 --- a/src/NLU.DevOps.Lex.Tests/LexNLUTestClientTests.cs +++ b/src/NLU.DevOps.Lex.Tests/LexNLUTestClientTests.cs @@ -88,9 +88,6 @@ public static async Task TestsWithSpeech(string slots, string entityType, string // assert reads content from file (file contents are "hello world") content.Should().Be("hello world"); - // assert result type - result.Should().BeOfType(); - // assert intent and text result.Intent.Should().Be(intent); result.Text.Should().Be(transcript); @@ -123,7 +120,6 @@ public static async Task CreatesLabeledUtterances() using (var lex = new LexNLUTestClient(string.Empty, string.Empty, new LexSettings(), mockClient.Object)) { var response = await lex.TestAsync(text).ConfigureAwait(false); - response.Should().BeOfType(); response.Text.Should().Be(text); response.Intent.Should().Be(intent); response.Entities.Should().BeEmpty(); diff --git a/src/NLU.DevOps.Lex/LexNLUTestClient.cs b/src/NLU.DevOps.Lex/LexNLUTestClient.cs index 569a869..e3ff843 100644 --- a/src/NLU.DevOps.Lex/LexNLUTestClient.cs +++ b/src/NLU.DevOps.Lex/LexNLUTestClient.cs @@ -94,8 +94,7 @@ protected override async Task TestAsync(string utterance, Canc .Select(slot => new Entity(slot.Key, slot.Value, null, 0)) .ToArray(); - return new LabeledUtterance(utterance, postTextResponse.IntentName, entities) - .WithTimestamp(DateTimeOffset.Now); + return new LabeledUtterance(utterance, postTextResponse.IntentName, entities); } /// @@ -125,8 +124,7 @@ protected override async Task TestSpeechAsync(string speechFil .ToArray() : null; - return new JsonLabeledUtterance(postContentResponse.InputTranscript, postContentResponse.IntentName, slots) - .WithTimestamp(DateTimeOffset.Now); + return new LabeledUtterance(postContentResponse.InputTranscript, postContentResponse.IntentName, slots); } } diff --git a/src/NLU.DevOps.Luis.Tests/LuisNLUTestClientTests.cs b/src/NLU.DevOps.Luis.Tests/LuisNLUTestClientTests.cs index ce82e6f..3b4fbc8 100644 --- a/src/NLU.DevOps.Luis.Tests/LuisNLUTestClientTests.cs +++ b/src/NLU.DevOps.Luis.Tests/LuisNLUTestClientTests.cs @@ -65,7 +65,6 @@ public static async Task TestModel() using (var luis = builder.Build()) { var result = await luis.TestAsync(test).ConfigureAwait(false); - result.Should().BeOfType(); result.Text.Should().Be(test); result.Intent.Should().Be("intent"); result.Entities.Count.Should().Be(1); diff --git a/src/NLU.DevOps.Luis/LuisNLUTestClient.cs b/src/NLU.DevOps.Luis/LuisNLUTestClient.cs index 79de5d1..61949b3 100644 --- a/src/NLU.DevOps.Luis/LuisNLUTestClient.cs +++ b/src/NLU.DevOps.Luis/LuisNLUTestClient.cs @@ -147,8 +147,7 @@ Entity getEntity(EntityModel entity) speechLuisResult.LuisResult.TopScoringIntent?.Intent, speechLuisResult.LuisResult.Entities?.Select(getEntity).ToList()) .WithScore(speechLuisResult.LuisResult.TopScoringIntent?.Score) - .WithTextScore(speechLuisResult.TextScore) - .WithTimestamp(DateTimeOffset.Now); + .WithTextScore(speechLuisResult.TextScore); } } } diff --git a/src/NLU.DevOps.LuisV3.Tests/LuisNLUTestClientTests.cs b/src/NLU.DevOps.LuisV3.Tests/LuisNLUTestClientTests.cs index 80658ba..4c31fb0 100644 --- a/src/NLU.DevOps.LuisV3.Tests/LuisNLUTestClientTests.cs +++ b/src/NLU.DevOps.LuisV3.Tests/LuisNLUTestClientTests.cs @@ -69,7 +69,6 @@ public static async Task TestModel() using (var luis = builder.Build()) { var result = await luis.TestAsync(test).ConfigureAwait(false); - result.Should().BeOfType(); result.Text.Should().Be(test); result.Intent.Should().Be("intent"); result.Entities.Count.Should().Be(1); diff --git a/src/NLU.DevOps.LuisV3/LuisNLUTestClient.cs b/src/NLU.DevOps.LuisV3/LuisNLUTestClient.cs index 519a391..e9efe4c 100644 --- a/src/NLU.DevOps.LuisV3/LuisNLUTestClient.cs +++ b/src/NLU.DevOps.LuisV3/LuisNLUTestClient.cs @@ -178,8 +178,7 @@ private LabeledUtterance LuisResultToLabeledUtterance(SpeechPredictionResponse s speechPredictionResponse.PredictionResponse.Prediction.Intents?.TryGetValue(intent, out intentData); return new LabeledUtterance(query, intent, entities) .WithScore(intentData?.Score) - .WithTextScore(speechPredictionResponse.TextScore) - .WithTimestamp(DateTimeOffset.Now); + .WithTextScore(speechPredictionResponse.TextScore); } } }