Skip to content

Commit

Permalink
Merge pull request google-ai-edge#5670 from priankakariatyml:ios-audi…
Browse files Browse the repository at this point in the history
…o-classifier-audio-record-fixes

PiperOrigin-RevId: 684172808
  • Loading branch information
copybara-github committed Oct 9, 2024
2 parents c4f475e + e72a28f commit 59f8ae3
Show file tree
Hide file tree
Showing 12 changed files with 343 additions and 30 deletions.
47 changes: 47 additions & 0 deletions mediapipe/tasks/ios/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ TENSORFLOW_LITE_C_DEPS = [

CALCULATORS_AND_GRAPHS = [
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/tasks/cc/audio/audio_classifier:audio_classifier_graph",
"//mediapipe/tasks/cc/text/text_classifier:text_classifier_graph",
"//mediapipe/tasks/cc/text/text_embedder:text_embedder_graph",
"//mediapipe/tasks/cc/vision/face_detector:face_detector_graph",
Expand Down Expand Up @@ -112,6 +113,14 @@ strip_api_include_path_prefix(
"//mediapipe/tasks/ios/core:sources/MPPBaseOptions.h",
"//mediapipe/tasks/ios/core:sources/MPPTaskOptions.h",
"//mediapipe/tasks/ios/core:sources/MPPTaskResult.h",
"//mediapipe/tasks/ios/audio/audio_classifier:sources/MPPAudioClassifier.h",
"//mediapipe/tasks/ios/audio/audio_classifier:sources/MPPAudioClassifierOptions.h",
"//mediapipe/tasks/ios/audio/audio_classifier:sources/MPPAudioClassifierResult.h",
"//mediapipe/tasks/ios/audio/core:sources/MPPAudioData.h",
"//mediapipe/tasks/ios/audio/core:sources/MPPAudioDataFormat.h",
"//mediapipe/tasks/ios/audio/core:sources/MPPAudioRecord.h",
"//mediapipe/tasks/ios/audio/core:sources/MPPAudioRunningMode.h",
"//mediapipe/tasks/ios/audio/core:sources/MPPFloatBuffer.h",
"//mediapipe/tasks/ios/text/text_classifier:sources/MPPTextClassifier.h",
"//mediapipe/tasks/ios/text/text_classifier:sources/MPPTextClassifierOptions.h",
"//mediapipe/tasks/ios/text/text_classifier:sources/MPPTextClassifierResult.h",
Expand Down Expand Up @@ -164,6 +173,44 @@ strip_api_include_path_prefix(
],
)

apple_static_xcframework(
name = "MediaPipeTasksAudio_framework",
# Avoid dependencies of ":MediaPipeTasksCommon_framework" and
# ":MediaPipeTaskGraphs_library in order to prevent duplicate symbols error
# when the frameworks are imported in iOS projects.
avoid_deps = MEDIAPIPE_TASKS_COMMON_DEPS + CALCULATORS_AND_GRAPHS,
bundle_name = "MediaPipeTasksAudio",
ios = {
"simulator": [
"arm64",
"x86_64",
],
"device": ["arm64"],
},
minimum_os_versions = {
"ios": MPP_TASK_MINIMUM_OS_VERSION,
},
public_hdrs = [
":MPPAudioClassifier.h",
":MPPAudioClassifierOptions.h",
":MPPAudioClassifierResult.h",
":MPPAudioData.h",
":MPPAudioDataFormat.h",
":MPPAudioRecord.h",
":MPPAudioRunningMode.h",
":MPPBaseOptions.h",
":MPPCategory.h",
":MPPClassificationResult.h",
":MPPCommon.h",
":MPPFloatBuffer.h",
":MPPTaskOptions.h",
":MPPTaskResult.h",
],
deps = [
"//mediapipe/tasks/ios/audio/audio_classifier:MPPAudioClassifier",
],
)

apple_static_xcframework(
name = "MediaPipeTasksText_framework",
# Avoid dependencies of ":MediaPipeTasksCommon_framework" and
Expand Down
18 changes: 18 additions & 0 deletions mediapipe/tasks/ios/MediaPipeTasksAudio.podspec.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Pod::Spec.new do |s|
s.name = 'MediaPipeTasksAudio'
s.version = '${MPP_BUILD_VERSION}'
s.authors = 'Google Inc.'
s.license = { :type => 'Apache',:file => "LICENSE" }
s.homepage = 'https://github.com/google/mediapipe'
s.source = { :http => '${MPP_VISION_DOWNLOAD_URL}' }
s.summary = 'MediaPipe Task Library - Audio'
s.description = 'The audio APIs of the MediaPipe Task Library'

s.ios.deployment_target = '12.0'

s.module_name = 'MediaPipeTasksAudio'
s.static_framework = true
s.dependency 'MediaPipeTasksCommon', '${MPP_TASKS_COMMON_VERSION}'
s.library = 'c++'
s.vendored_frameworks = 'frameworks/MediaPipeTasksAudio.xcframework'
end
28 changes: 24 additions & 4 deletions mediapipe/tasks/ios/audio/core/sources/MPPAudioData.m
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,36 @@ - (BOOL)loadBuffer:(MPPFloatBuffer *)buffer
}

- (BOOL)loadAudioRecord:(MPPAudioRecord *)audioRecord error:(NSError **)error {
if (![audioRecord.audioDataFormat isEqual:self.format]) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"The provided audio record has incompatible audio format"];
if (![self isValidAudioRecordFormat:audioRecord.audioDataFormat error:error]) {
return NO;
}

MPPFloatBuffer *audioRecordBuffer = [audioRecord readAtOffset:0
withLength:audioRecord.bufferLength
error:error];
return [self loadRingBufferWithAudioRecordBuffer:audioRecordBuffer error:error];
}

- (BOOL)isValidAudioRecordFormat:(MPPAudioDataFormat *)format error:(NSError **)error {
if (![format isEqual:self.format]) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"The provided audio record has incompatible audio format"];
return NO;
}

return YES;
}

- (BOOL)loadRingBufferWithAudioRecordBuffer:(MPPFloatBuffer *)audioRecordBuffer
error:(NSError **)error {
// Returns `NO` without populating an error since the function that created `audioRecordBuffer` is
// expected to populate the error param of the caller (`loadAudioRecord`) which passed into this
// function.
// For ease of mocking the logic of `loadAudioRecord` in the tests.
if (!audioRecordBuffer) {
return NO;
}
return [_ringBuffer loadFloatBuffer:audioRecordBuffer
offset:0
length:audioRecordBuffer.length
Expand Down
71 changes: 69 additions & 2 deletions mediapipe/tasks/ios/audio/core/sources/MPPAudioRecord.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,38 @@ NS_ASSUME_NONNULL_BEGIN
* A wrapper class to record the device's microphone continuously. Currently this class only
* supports recording up to 2 channels. If the number of channels is 2, then the mono microphone
* input is duplicated to provide dual channel data.
*
* Internally the class manages an instance of `AVAudioEngine` for tapping the microphone samples.
*
* Recording sets the following parameters on the `AVAudioSession.sharedInstance()`:
*
* 1. `category` = .playAndRecord
* 2. `mode` = .default
* 3. `categoryOptions` = [.overrideMutedMicrophoneInterruption]
*
* If you need more control, use the native `AVAudioEngine` directly for tapping the microphone.
* `AudioRecord` does not actively monitor audio interruptions, route changes, resetting of media
* services etc. They can be handled in the app using the notifications provided by `AVFAudio`.
* Refer to the official docs for the following:
* 1. Handling audio interruptions
* https://developer.apple.com/documentation/avfaudio/handling_audio_interruptions
*
* 2. Responding to audio route changes
* https://developer.apple.com/documentation/avfaudio/responding_to_audio_route_change
*
* 3. Responding to resetting of media services
* https://developer.apple.com/documentation/avfaudio/avaudiosessionmediaserviceswereresetnotification
*
* iOS may automatically stop the audio engine in some cases. A few examples of such events are a
* system interrupt, route change, etc. In such cases, `read(offset:length:)` returns
* `TasksErrorCode.audioRecordNotTappingMicError`. You can restart recording using
* `startRecording()`.
*
* Note: Avoid setting the `AVAudioSession.sharedInstance()` category, mode, options, channel
* configuration (enabling stereo) or other parameters while audio is being actively recorded using
* the audio record. Setting these parameters or changing the configuration of
* `AVAudioSession.sharedInstance()` while a recording is in progress will result in undefined
* behaviour.
*/
NS_SWIFT_NAME(AudioRecord)
@interface MPPAudioRecord : NSObject
Expand Down Expand Up @@ -63,20 +95,55 @@ NS_SWIFT_NAME(AudioRecord)
*
* Use `stop()` to stop recording audio sample from the microphone.
*
* @return Boolean value indicating if audio recording started successfully.
* Internally this function activates the `AVAudioSession.sharedInstance()` with the following
* parameters before tapping the microphone using the `AVAudioEngine`:
* 1. `category` = .playAndRecord
* 2. `mode` = .default
* 3. `categoryOptions` = [.overrideMutedMicrophoneInterruption]
*
* If you need more control, use the native `AVAudioEngine` directly for tapping the microphone.
* `AudioRecord` does not actively monitor audio interruptions, route changes, resetting of media
* services etc. They can be handled in the app using the notifications provided by `AVFAudio`.
* Refer to the official docs for the following:
* 1. Handling audio interruptions
* https://developer.apple.com/documentation/avfaudio/handling_audio_interruptions
*
* 2. Responding to audio route changes
* https://developer.apple.com/documentation/avfaudio/responding_to_audio_route_change
*
* 3. Responding to resetting of media services
* https://developer.apple.com/documentation/avfaudio/avaudiosessionmediaserviceswereresetnotification
*
* @return Returns successfully if audio recording has started.
*/
- (BOOL)startRecordingWithError:(NSError **)error NS_SWIFT_NAME(startRecording());

/**
* Stops recording audio from the microphone. All elements in the internal buffer of `AudioRecord`
* will also be set to zero.
*
* Internally, this function deactivates the `AVAudioSession.sharedInstance()` after recording is
* stopped. If some audio resources are running in the app when the session is deactivated, this
* function throws an error. The session gets deactivated by the OS irrespective of the return
* status of this function. The error thrown is a warning to indicate the termination of any running
* audio resources. Refer to the official documentation for more details:
* https://developer.apple.com/documentation/avfaudio/avaudiosession/1616627-setactive
*
* @return Returns successfully if recording was stopped and the deactivation of
* `AVAudioSession.sharedInstance()` succeeded. Throws any error encountered when deactivating
* `AVAudioSession.sharedInstance()`.
*/
- (void)stop;
- (BOOL)stopWithError:(NSError **)error;

/**
* Returns the `length` number of elements in the internal buffer of `AudioRecord` starting at
* `offset`, i.e, `buffer[offset:offset+length]`.
*
* iOS may automatically stop the audio engine in some cases. A few examples of such events are a
* system interrupt, route change, etc. In such cases, this function returns
* `TasksErrorCode.audioRecordNotTappingMicError`. You can restart recording using
* `startRecording()`.
*
* @param offset Index in the buffer from which elements are to be read.
* @param length Number of elements to be returned.
*
Expand Down
80 changes: 74 additions & 6 deletions mediapipe/tasks/ios/audio/core/sources/MPPAudioRecord.m
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ - (nullable instancetype)initWithAudioDataFormat:(MPPAudioDataFormat *)audioData
}

- (BOOL)startRecordingWithError:(NSError **)error {
// The audio engine's running state will be set to `NO` when a system interrupt happens and the
// user did not explicitly invoke `stop()`. This method allows restarting recording in such cases.
if (_audioEngine.isRunning) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeAudioRecordNotTappingMicError
description:@"Recording of microphone samples is already in progress. "
@"You can stop recording using `stopRecording()`."];
return NO;
}

// TODO: This API is deprecated from iOS 17.0. Update to new APIs and restrict the following
// code's use to versions below iOS 17.0.
switch ([AVAudioSession sharedInstance].recordPermission) {
Expand All @@ -115,17 +125,15 @@ - (BOOL)startRecordingWithError:(NSError **)error {
}

case AVAudioSessionRecordPermissionGranted: {
[self startTappingMicrophoneWithError:error];
return YES;
return [self startTappingMicrophoneWithError:error];
}
}

return NO;
}

- (void)stop {
[[_audioEngine inputNode] removeTapOnBus:0];
[_audioEngine stop];
- (BOOL)stopWithError:(NSError **)error {
[self stopAndResetAudioEngine];

// Using strong `self` (instance variable is available through strong self) is okay since the
// block is shortlived and it'll release its strong reference to `self` when it finishes
Expand All @@ -136,11 +144,38 @@ - (void)stop {
dispatch_barrier_async(_convertLoadAndReadBufferQueue, ^{
[_floatRingBuffer clear];
});

// If any audio resources outside this audio record are currently running in the app (eg:, an
// audio player), an error maybe thrown when deactivating `AVAudioSession.sharedInstance()`.
// Irrespective of whether an error is returned, the session will be deactivated by iOS. Official
// docs that explain the reasons for an error:
// https://developer.apple.com/documentation/avfaudio/avaudiosession/1616627-setactive.
return [[AVAudioSession sharedInstance]
setActive:NO
withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
error:error];
}

- (nullable MPPFloatBuffer *)readAtOffset:(NSUInteger)offset
withLength:(NSUInteger)length
error:(NSError **)error {
if (!_audioEngine.isRunning) {
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeAudioRecordNotTappingMicError
description:
@"Recording of microphone samples is not in progress. You may not have started a "
@"recording or OS may have stopped the engine due to an interrupt, route change "
@"etc. You can start recording microphone samples using `startRecording()`."];
return nil;
}

return [self internalReadAtOffset:offset withLength:length error:error];
}

- (nullable MPPFloatBuffer *)internalReadAtOffset:(NSUInteger)offset
withLength:(NSUInteger)length
error:(NSError **)error {
__block MPPFloatBuffer *bufferToReturn = nil;
__block NSError *readError = nil;

Expand All @@ -165,7 +200,31 @@ - (nullable MPPFloatBuffer *)readAtOffset:(NSUInteger)offset
return bufferToReturn;
}

- (void)startTappingMicrophoneWithError:(NSError **)error {
- (BOOL)startTappingMicrophoneWithError:(NSError **)error {
// Stopping and resetting the audio engine to handle the case where user maybe resuming the audio
// engine stopped by the OS due to an interrupt (eg:, a phone call) or a route change. In such
// cases, if a new tap is installed without removing an existing tap the app will crash.
[self stopAndResetAudioEngine];

// For tapping the microphone, `AVAudioSession`'s `category` must be set and it must be
// activated. This audio record is not allowed to tap the microphone with any custom option
// including
// `.allowBluetooth`. This disallows microphone route changes in most scenarios to ensure that
// the recording happens through the device microphone. If users need more control over the
// recording devices or OS interruptions, native `AVAudioEngine` can be used. `category` is set to
// `AVAudioSessionCategoryPlayAndRecord` to ensure that audio playback can be configured in the
// app while an audio record is running.
// TODO: Investigate safe starting of `AVAudioEngine` without any side effects to enable this
// class ot accept custom category, mode and options.
if (!([[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayAndRecord
error:error] &&
[[AVAudioSession sharedInstance]
setActive:YES
withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
error:error])) {
return NO;
}

AVAudioNode *inputNode = [_audioEngine inputNode];
AVAudioFormat *format = [inputNode outputFormatForBus:0];

Expand Down Expand Up @@ -209,6 +268,15 @@ - (void)startTappingMicrophoneWithError:(NSError **)error {

[_audioEngine prepare];
[_audioEngine startAndReturnError:error];

return YES;
}

// To stop engine internally without deactivating `AVAudioSession.sharedInstance()`.
- (void)stopAndResetAudioEngine {
[[_audioEngine inputNode] removeTapOnBus:0];
[_audioEngine stop];
[_audioEngine reset];
}

- (BOOL)loadAudioPCMBuffer:(AVAudioPCMBuffer *)pcmBuffer error:(NSError **)error {
Expand Down
4 changes: 4 additions & 0 deletions mediapipe/tasks/ios/audio/core/sources/MPPFloatBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ NS_ASSUME_NONNULL_BEGIN
/** Clears the `FloatBuffer` by setting all elements to zero */
- (void)clear;

- (instancetype)init NS_UNAVAILABLE;

+ (instancetype)new NS_UNAVAILABLE;

@end

NS_ASSUME_NONNULL_END
8 changes: 6 additions & 2 deletions mediapipe/tasks/ios/build_ios_framework.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
# Set the following variables as appropriate.
# * BAZEL: path to bazel. defaults to the first one available in PATH
# * FRAMEWORK_NAME: name of the iOS framework to be built. Currently the
# * accepted values are MediaPipeTasksCommon, MediaPipeTasksText, MediaPipeTasksVision.
# * accepted values are MediaPipeTasksCommon, MediaPipeTasksText,
# * MediaPipeTasksVision, MediaPipeTasksAudio, MediaPipeTasksGenAIC,
# * MediaPipeTasksGenAI.
# * MPP_BUILD_VERSION: to specify the release version. defaults to 0.0.1-dev
# * IS_RELEASE_BUILD: set as true if this build should be a release build
# * ARCHIVE_FRAMEWORK: set as true if the framework should be archived
Expand Down Expand Up @@ -56,12 +58,14 @@ case $FRAMEWORK_NAME in
;;
"MediaPipeTasksText")
;;
"MediaPipeTasksAudio")
;;
"MediaPipeTasksGenAIC")
;;
"MediaPipeTasksGenAI")
;;
*)
echo "Wrong framework name. The following framework names are allowed: MediaPipeTasksText, MediaPipeTasksVision, MediaPipeTasksCommon, MediaPipeTasksGenAI, MediaPipeTasksGenAIC"
echo "Wrong framework name. The following framework names are allowed: MediaPipeTasksText, MediaPipeTasksVision, MediaPipeTasksAudio, MediaPipeTasksCommon, MediaPipeTasksGenAI, MediaPipeTasksGenAIC"
exit 1
;;
esac
Expand Down
Loading

0 comments on commit 59f8ae3

Please sign in to comment.