diff --git a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.h b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.h index 1a94340347..e95382b39e 100644 --- a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.h +++ b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.h @@ -43,9 +43,9 @@ NS_SWIFT_NAME(AudioClassifierResult) * @return An instance of `AudioClassifierResult` initialized with the given * `ClassificationResult` and timestamp (in milliseconds). */ -- (instancetype)initWithClassificationResult: +- (instancetype)initWithClassificationResults: (NSArray *)classificationResults - timestampInMilliseconds:(NSInteger)timestampInMilliseconds; + timestampInMilliseconds:(NSInteger)timestampInMilliseconds; @end diff --git a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.m b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.m index 491cce1e52..b46cbc8f8a 100644 --- a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.m +++ b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.m @@ -16,9 +16,9 @@ @implementation MPPAudioClassifierResult -- (instancetype)initWithClassificationResult: +- (instancetype)initWithClassificationResults: (NSArray *)classificationResults - timestampInMilliseconds:(NSInteger)timestampInMilliseconds { + timestampInMilliseconds:(NSInteger)timestampInMilliseconds { self = [super initWithTimestampInMilliseconds:timestampInMilliseconds]; if (self) { _classificationResults = classificationResults; diff --git a/mediapipe/tasks/ios/audio/audio_classifier/utils/BUILD b/mediapipe/tasks/ios/audio/audio_classifier/utils/BUILD new file mode 100644 index 0000000000..1fbaa1dc6a --- /dev/null +++ b/mediapipe/tasks/ios/audio/audio_classifier/utils/BUILD @@ -0,0 +1,52 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPAudioClassifierOptionsHelpers", + srcs = ["sources/MPPAudioClassifierOptions+Helpers.mm"], + hdrs = ["sources/MPPAudioClassifierOptions+Helpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + ], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/tasks/cc/audio/audio_classifier/proto:audio_classifier_graph_options_cc_proto", + "//mediapipe/tasks/cc/components/processors/proto:classifier_options_cc_proto", + "//mediapipe/tasks/ios/audio/audio_classifier:MPPAudioClassifierOptions", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskOptionsProtocol", + "//mediapipe/tasks/ios/core/utils:MPPBaseOptionsHelpers", + ], +) + +objc_library( + name = "MPPAudioClassifierResultHelpers", + srcs = ["sources/MPPAudioClassifierResult+Helpers.mm"], + hdrs = ["sources/MPPAudioClassifierResult+Helpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + ], + deps = [ + "//mediapipe/framework:packet", + "//mediapipe/tasks/cc/components/containers/proto:classifications_cc_proto", + "//mediapipe/tasks/ios/audio/audio_classifier:MPPAudioClassifierResult", + "//mediapipe/tasks/ios/components/containers/utils:MPPClassificationResultHelpers", + ], +) diff --git a/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.h b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.h new file mode 100644 index 0000000000..3157e4e095 --- /dev/null +++ b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.h @@ -0,0 +1,32 @@ +// Copyright 2024 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_options.pb.h" +#import "mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierOptions.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptionsProtocol.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPAudioClassifierOptions (Helpers) + +/** + * Populates the provided `CalculatorOptions` proto container with the current settings. + * + * @param optionsProto The `CalculatorOptions` proto object to copy the settings to. + */ +- (void)copyToProto:(::mediapipe::CalculatorOptions *)optionsProto; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.mm b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.mm new file mode 100644 index 0000000000..c623bdfbcb --- /dev/null +++ b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.mm @@ -0,0 +1,58 @@ +// Copyright 2024 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierOptions+Helpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.h" + +#include "mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options.pb.h" +#include "mediapipe/tasks/cc/components/processors/proto/classifier_options.pb.h" + +namespace { +using CalculatorOptionsProto = mediapipe::CalculatorOptions; +using AudioClassifierGraphOptionsProto = + ::mediapipe::tasks::audio::audio_classifier::proto::AudioClassifierGraphOptions; +using ClassifierOptionsProto = ::mediapipe::tasks::components::processors::proto::ClassifierOptions; +} // namespace + +@implementation MPPAudioClassifierOptions (Helpers) + +- (void)copyToProto:(CalculatorOptionsProto *)optionsProto { + AudioClassifierGraphOptionsProto *graphOptions = + optionsProto->MutableExtension(AudioClassifierGraphOptionsProto::ext); + + [self.baseOptions copyToProto:graphOptions->mutable_base_options() + withUseStreamMode:self.runningMode != MPPAudioRunningModeAudioStream]; + + ClassifierOptionsProto *classifierOptionsProto = graphOptions->mutable_classifier_options(); + classifierOptionsProto->Clear(); + + if (self.displayNamesLocale) { + classifierOptionsProto->set_display_names_locale(self.displayNamesLocale.cppString); + } + + classifierOptionsProto->set_max_results((int)self.maxResults); + classifierOptionsProto->set_score_threshold(self.scoreThreshold); + + for (NSString *category in self.categoryAllowlist) { + classifierOptionsProto->add_category_allowlist(category.cppString); + } + + for (NSString *category in self.categoryDenylist) { + classifierOptionsProto->add_category_denylist(category.cppString); + } +} + +@end diff --git a/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.h b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.h new file mode 100644 index 0000000000..ba687fba28 --- /dev/null +++ b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.h @@ -0,0 +1,36 @@ +// Copyright 2024 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifierResult.h" + +#include "mediapipe/framework/packet.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPAudioClassifierResult (Helpers) + +/** + * Creates an `MPPAudioClassifierResult` from a MediaPipe packet containing a + * std::vector. + * + * @param packet a MediaPipe packet wrapping a ClassificationResultProto. + * + * @return An `MPPAudioClassifierResult` object that contains a list of audio classifications. + */ ++ (nullable MPPAudioClassifierResult *)audioClassifierResultWithClassificationsPacket: + (const mediapipe::Packet &)packet; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm new file mode 100644 index 0000000000..01615c95ee --- /dev/null +++ b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm @@ -0,0 +1,57 @@ +// Copyright 2024 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.h" +#import "mediapipe/tasks/ios/components/containers/utils/sources/MPPClassificationResult+Helpers.h" + +#include "mediapipe/tasks/cc/components/containers/proto/classifications.pb.h" + +static const int kMicrosecondsPerMillisecond = 1000; + +namespace { +using ClassificationResultProto = + ::mediapipe::tasks::components::containers::proto::ClassificationResult; +using ::mediapipe::Packet; +} // namespace + +@implementation MPPAudioClassifierResult (Helpers) + ++ (nullable MPPAudioClassifierResult *)audioClassifierResultWithClassificationsPacket: + (const Packet &)packet { + // Even if packet does not validate as the expected type, you can safely access the timestamp. + NSInteger timestampInMilliseconds = + (NSInteger)(packet.Timestamp().Value() / kMicrosecondsPerMillisecond); + + if (!packet.ValidateAsType>().ok()) { + return [[MPPAudioClassifierResult alloc] initWithClassificationResults:@[] + timestampInMilliseconds:timestampInMilliseconds]; + } + + std::vector cppClassificationResults = + packet.Get>(); + + NSMutableArray *classificationResults = + [NSMutableArray arrayWithCapacity:cppClassificationResults.size()]; + + for (const auto &cppClassificationResult : cppClassificationResults) { + MPPClassificationResult *classificationResult = + [MPPClassificationResult classificationResultWithProto:cppClassificationResult]; + [classificationResults addObject:classificationResult]; + } + + return [[MPPAudioClassifierResult alloc] initWithClassificationResults:classificationResults + timestampInMilliseconds:timestampInMilliseconds]; +} + +@end diff --git a/mediapipe/tasks/ios/audio/core/BUILD b/mediapipe/tasks/ios/audio/core/BUILD index 7924f24f04..464e6d3842 100644 --- a/mediapipe/tasks/ios/audio/core/BUILD +++ b/mediapipe/tasks/ios/audio/core/BUILD @@ -38,7 +38,6 @@ objc_library( name = "MPPAudioDataFormat", srcs = ["sources/MPPAudioDataFormat.m"], hdrs = ["sources/MPPAudioDataFormat.h"], - deps = ["//third_party/apple_frameworks:AVFAudio"], ) objc_library( @@ -113,6 +112,7 @@ objc_library( deps = [ ":MPPAudioData", ":MPPAudioPacketCreator", + ":MPPAudioRecord", ":MPPAudioRunningMode", "//mediapipe/framework:packet", "//mediapipe/tasks/ios/common:MPPCommon", @@ -121,6 +121,5 @@ objc_library( "//mediapipe/tasks/ios/core:MPPPacketCreator", "//mediapipe/tasks/ios/core:MPPTaskInfo", "//mediapipe/tasks/ios/core:MPPTaskRunner", - "//third_party/apple_frameworks:UIKit", ], ) diff --git a/mediapipe/tasks/ios/audio/core/sources/MPPAudioRunningMode.h b/mediapipe/tasks/ios/audio/core/sources/MPPAudioRunningMode.h index 3b50591b15..8d67119e70 100644 --- a/mediapipe/tasks/ios/audio/core/sources/MPPAudioRunningMode.h +++ b/mediapipe/tasks/ios/audio/core/sources/MPPAudioRunningMode.h @@ -41,7 +41,7 @@ NS_INLINE NSString *MPPAudioRunningModeDisplayName(MPPAudioRunningMode runningMo case MPPAudioRunningModeAudioStream: return @"Audio Stream"; default: - return nil; + return @""; } } diff --git a/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.h b/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.h index a300f45c76..5ca03d1144 100644 --- a/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.h +++ b/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.h @@ -1,4 +1,4 @@ -// Copyright 2023 The MediaPipe Authors. +// Copyright 2024 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,9 +13,9 @@ // limitations under the License. #import -#import #import "mediapipe/tasks/ios/audio/core/sources/MPPAudioData.h" +#import "mediapipe/tasks/ios/audio/core/sources/MPPAudioRecord.h" #import "mediapipe/tasks/ios/audio/core/sources/MPPAudioRunningMode.h" #import "mediapipe/tasks/ios/core/sources/MPPTaskRunner.h" @@ -96,6 +96,31 @@ NS_ASSUME_NONNULL_BEGIN timestampInMilliseconds:(NSInteger)timestampInMilliseconds error:(NSError **)error; +/** + * Creates an `MPPAudioRecord` instance to get samples from the audio stream produced by the + * microphone. + * + * The client must call appropriate methods from the audio record to start receiving samples from + * the microphone. + * + * Note that MediaPipe Audio tasks will up/down sample automatically to fit the sample rate required + * by the model. The default sample rate of the MediaPipe pretrained audio model, Yamnet is 16kHz. + * + * @param channelCount Number of channels expected by the client. + * @param sampleRate Sample rate of the audio expected by the client. + * @param bufferLength Maximum number of elements the internal buffer of `AudioRecord` can hold at + * any given point of time. The buffer length must be a multiple of `format.channelCount`. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return An new instance of `MPPAudioRecord` with the given audio format and buffer length. `nil` + * if there is an error in initializing `MPPAudioRecord`. + */ +- (MPPAudioRecord *)createAudioRecordWithChannelCount:(NSUInteger)channelCount + sampleRate:(double)sampleRate + bufferLength:(NSUInteger)bufferLength + error:(NSError **)error; + - (instancetype)initWithTaskInfo:(MPPTaskInfo *)taskInfo packetsCallback:(mediapipe::tasks::core::PacketsCallback)packetsCallback error:(NSError **)error NS_UNAVAILABLE; diff --git a/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.mm b/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.mm index b740b19f6a..4099f55a82 100644 --- a/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.mm +++ b/mediapipe/tasks/ios/audio/core/sources/MPPAudioTaskRunner.mm @@ -196,6 +196,17 @@ - (BOOL)checkOrSetSampleRate:(double)sampleRate error:(NSError **)error { return YES; } +- (MPPAudioRecord *)createAudioRecordWithChannelCount:(NSUInteger)channelCount + sampleRate:(double)sampleRate + bufferLength:(NSUInteger)bufferLength + error:(NSError **)error { + MPPAudioDataFormat *format = [[MPPAudioDataFormat alloc] initWithChannelCount:channelCount + sampleRate:sampleRate]; + return [[MPPAudioRecord alloc] initWithAudioDataFormat:format + bufferLength:bufferLength + error:error]; +} + - (std::optional)inputPacketMapWithMPPAudioData:(MPPAudioData *)audioData error:(NSError **)error { PacketMap inputPacketMap;