diff --git a/.gitignore b/.gitignore index 6e728e1..90976f7 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,9 @@ yarn-debug.log yarn-error.log # Expo -.expo/* \ No newline at end of file +.expo/* + +# Example +/example/ios/* +/example/android/* +example/ios/expoaudiostreamexample.xcodeproj/project.pbxproj diff --git a/README.md b/README.md index 6209ced..c52915e 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,150 @@ # Expo Play Audio Stream 🎶 -The Expo Play Audio Stream module is a powerful tool for streaming audio data in your Expo-based React Native applications. It provides a seamless way to play audio chunks in real-time, allowing you to build audio-centric features like voice assistants, audio players, and more. +The Expo Play Audio Stream module is a powerful tool for recording and streaming audio data in your Expo-based React Native applications. It provides a seamless way to record audio from the microphone and play audio chunks in real-time, allowing you to build audio-centric features like voice assistants, audio players, voice recorders, and more. ## Motivation 🎯 -Expo's built-in audio capabilities are limited to playing pre-loaded audio files. The Expo Audio Stream module was created to address this limitation, enabling developers to stream audio data dynamically and have more control over the audio playback process. +Expo's built-in audio capabilities are limited to playing pre-loaded audio files and basic recording. The Expo Audio Stream module was created to address these limitations, enabling developers to record high-quality audio with real-time streaming capabilities and have more control over both the recording and playback process. The module provides features like dual-stream output (original and 16kHz versions) which is particularly useful for voice activity detection and speech recognition applications. ## Example Usage 🚀 -Here's an example of how you can use the Expo Audio Stream module to play a sequence of audio chunks: +Here's how you can use the Expo Play Audio Stream module for different scenarios: + +### Standard Recording and Playback ```javascript import { ExpoPlayAudioStream } from 'expo-audio-stream'; -// Assuming you have some audio data in base64 format -const sampleA = 'base64EncodedAudioDataA'; -const sampleB = 'base64EncodedAudioDataB'; - -useEffect(() => { - async function playAudioChunks() { - try { - await ExpoPlayAudioStream.setVolume(100); - await ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleA); - console.log('Streamed A'); - await ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleB); - console.log('Streamed B'); - console.log('Streaming A & B'); - ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleA); - ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleB); - } catch (error) { - console.error(error); - } +// Example of standard recording and playback +async function handleStandardRecording() { + try { + // Set volume for playback + await ExpoPlayAudioStream.setVolume(0.8); + + // Start recording with configuration + const { recordingResult, subscription } = await ExpoPlayAudioStream.startRecording({ + onAudioStream: (event) => { + console.log('Received audio stream:', { + audioDataBase64: event.data, + audioData16kHzBase64: event.data16kHz, // usually used for voice activity detection like silero models + position: event.position, + eventDataSize: event.eventDataSize, + totalSize: event.totalSize + }); + } + }); + + // After some time, stop recording + setTimeout(async () => { + const recording = await ExpoPlayAudioStream.stopRecording(); + console.log('Recording stopped:', recording); + + // Read the file from the fileUri and convert to base64 + + // Play the recorded audio + const turnId = 'example-turn-1'; + await ExpoPlayAudioStream.playAudio(base64Content, turnId); + + // Clean up + subscription?.remove(); + }, 5000); + + } catch (error) { + console.error('Audio handling error:', error); } +} + +// You can also subscribe to audio events from anywhere +const audioSubscription = ExpoPlayAudioStream.subscribeToAudioEvents(async (event) => { + console.log('Audio event received:', { + data: event.data, + data16kHz: event.data16kHz + }); +}); +// Don't forget to clean up when done +// audioSubscription.remove(); +``` - playAudioChunks(); -}, []); +### Simultaneous Recording and Playback + +```javascript +import { ExpoPlayAudioStream } from 'expo-audio-stream'; + +// Example of simultaneous recording and playback with voice processing +async function handleSimultaneousRecordAndPlay() { + try { + // Start microphone with voice processing + const { recordingResult, subscription } = await ExpoPlayAudioStream.startMicrophone({ + onAudioStream: (event) => { + console.log('Received audio stream with voice processing:', { + audioDataBase64: event.data, + audioData16kHz: event.data16kHz + }); + } + }); + + // Play audio while recording is active + const turnId = 'response-turn-1'; + await ExpoPlayAudioStream.playSound(someAudioBase64, turnId); + + // Example of controlling playback during recording + setTimeout(async () => { + // Interrupt current playback + await ExpoPlayAudioStream.interruptSound(); + + // Resume playback + await ExpoPlayAudioStream.resumeSound(); + + // Stop microphone recording + await ExpoPlayAudioStream.stopMicrophone(); + + // Clean up + subscription?.remove(); + }, 5000); + + } catch (error) { + console.error('Simultaneous audio handling error:', error); + } +} ``` ## API 📚 -The Expo Play Audio Stream module provides the following API: +The Expo Play Audio Stream module provides the following methods: + +### Standard Audio Operations + +- `startRecording(recordingConfig: RecordingConfig)`: Starts microphone recording with the specified configuration. Returns a promise with recording result and audio event subscription. + +- `stopRecording()`: Stops the current microphone recording and returns the audio recording data. + +- `playAudio(base64Chunk: string, turnId: string)`: Plays a base64 encoded audio chunk with the specified turn ID. + +- `pauseAudio()`: Pauses the current audio playback. + +- `stopAudio()`: Stops the currently playing audio. -- `streamRiff16Khz16BitMonoPcmChunk(base64Chunk: string): Promise`: Streams a base64-encoded audio chunk in the RIFF format with 16 kHz, 16-bit, mono PCM encoding. -- `setVolume(volume: number): Promise`: Sets the volume of the audio playback, where `volume` is a value between 0 and 100. -- `pause(): Promise`: Pauses the audio playback. -- `start(): Promise`: Starts the audio playback. -- `stop(): Promise`: Stops the audio playback and clears any remaining audio data. +- `setVolume(volume: number)`: Sets the volume for audio playback (0.0 to 1.0). + +- `clearPlaybackQueueByTurnId(turnId: string)`: Clears the playback queue for a specific turn ID. + +- `subscribeToAudioEvents(onMicrophoneStream: (event: AudioDataEvent) => Promise)`: Subscribe to recording events from anywhere in your application. Returns a subscription that should be cleaned up when no longer needed. + +### Simultaneous Recording and Playback + +These methods are specifically designed for scenarios where you need to record and play audio at the same time: + +- `startMicrophone(recordingConfig: RecordingConfig)`: Starts microphone streaming with voice processing enabled. Returns a promise with recording result and audio event subscription. + +- `stopMicrophone()`: Stops the microphone streaming when in simultaneous mode. + +- `playSound(audio: string, turnId: string)`: Plays a sound while recording is active. Uses voice processing to prevent feedback. + +- `interruptSound()`: Interrupts the current sound playback in simultaneous mode. + +- `resumeSound()`: Resumes the current sound playback in simultaneous mode. + +All methods are static and most return Promises that resolve when the operation is complete. Error handling is built into each method, with descriptive error messages if operations fail. ## Swift Implementation 🍎 @@ -55,10 +154,22 @@ The Swift implementation of the Expo Audio Stream module uses the `AVFoundation` The Kotlin implementation of the Expo Audio Stream module uses the `AudioTrack` class from the Android framework to handle audio playback. It uses a concurrent queue to manage the audio chunks and a coroutine-based playback loop to ensure efficient and asynchronous processing of the audio data. +## Voice Processing and Isolation 🎤 + +The module implements several audio optimizations for voice recording: + +- On iOS 15 and later, users are prompted with system voice isolation options (`microphoneModes`), allowing them to choose their preferred voice isolation level. +- When simultaneous recording and playback is enabled, the module uses iOS voice processing which includes: + - Noise reduction + - Echo cancellation + - Voice optimization + +Note: Voice processing may result in lower audio levels as it optimizes for voice clarity over volume. This is a trade-off made to ensure better voice quality and reduce background noise. + ## Limitations and Considerations ⚠️ - The Expo Play Audio Stream module is designed to work with specific audio formats (RIFF, 16 kHz, 16-bit, mono PCM). If your audio data is in a different format, you may need to convert it before using the module. -- The module does not provide advanced features like audio effects, mixing, or recording. It is primarily focused on real-time audio streaming. +- The module does not provide advanced features like audio effects or mixing. It is primarily focused on real-time audio streaming and recording. - The performance of the module may depend on the device's hardware capabilities and the complexity of the audio data being streamed. ## Contributions 🤝 diff --git a/example/.gitignore b/example/.gitignore index 05647d5..ad5130d 100644 --- a/example/.gitignore +++ b/example/.gitignore @@ -33,3 +33,7 @@ yarn-error.* # typescript *.tsbuildinfo + +# Example +ios/* +android/* diff --git a/example/App.tsx b/example/App.tsx index 11aff12..0727248 100644 --- a/example/App.tsx +++ b/example/App.tsx @@ -11,10 +11,9 @@ import { Audio } from 'expo-av'; const ANDROID_SAMPLE_RATE = 16000; const IOS_SAMPLE_RATE = 48000; -const BIT_DEPTH = 16; const CHANNELS = 1; const ENCODING = "pcm_16bit"; -const RECORDING_INTERVAL = 50; +const RECORDING_INTERVAL = 100; const turnId1 = 'turnId1'; const turnId2 = 'turnId2'; @@ -23,30 +22,7 @@ const turnId2 = 'turnId2'; export default function App() { - const eventListenerSubscriptionRef = useRef(null); - - useEffect(() => { - async function run() { - try { - // console.log("setPlayAndRecord"); - // //await ExpoPlayAudioStream.setVolume(100); - // await ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleB); - // await ExpoPlayAudioStream.setPlayAndRecord(); - // console.log("after setPlayAndRecord"); - // //await new Promise((resolve) => setTimeout(resolve, 2000)); - // await ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleB); - // console.log("streamed A"); - // await ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleB); - // console.log("streamed B"); - // console.log("streaming A & B"); - //ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleA); - //ExpoPlayAudioStream.streamRiff16Khz16BitMonoPcmChunk(sampleB); - } catch (error) { - console.error(error); - } - } - run(); - }, []); + const eventListenerSubscriptionRef = useRef(undefined); const onAudioCallback = async (audio: AudioDataEvent) => { console.log(audio.data.slice(0, 100)); @@ -59,7 +35,7 @@ export default function App() { onPress={async () => { await ExpoPlayAudioStream.playAudio(sampleB, turnId1); }} - title="Stream B" + title="Play sample B" /> ==================== @@ -68,7 +44,7 @@ export default function App() { onPress={async () => { await ExpoPlayAudioStream.pauseAudio(); }} - title="Pause" + title="Pause Audio" /> ==================== @@ -77,7 +53,7 @@ export default function App() { onPress={async () => { await ExpoPlayAudioStream.playAudio(sampleA, turnId2); }} - title="Stream A" + title="Play sample A" /> ==================== @@ -113,7 +89,7 @@ export default function App() { await ExpoPlayAudioStream.stopRecording(); if (eventListenerSubscriptionRef.current) { eventListenerSubscriptionRef.current.remove(); - eventListenerSubscriptionRef.current = null; + eventListenerSubscriptionRef.current = undefined; } }} title="Stop Recording" diff --git a/ios/ExpoPlayAudioStreamModule.swift b/ios/ExpoPlayAudioStreamModule.swift index 540686a..405877d 100644 --- a/ios/ExpoPlayAudioStreamModule.swift +++ b/ios/ExpoPlayAudioStreamModule.swift @@ -4,9 +4,20 @@ import ExpoModulesCore let audioDataEvent: String = "AudioData" -public class ExpoPlayAudioStreamModule: Module, AudioStreamManagerDelegate { +public class ExpoPlayAudioStreamModule: Module, AudioStreamManagerDelegate, MicrophoneDataDelegate { private let audioController = AudioController() private let audioSessionManager = AudioSessionManager() + private lazy var microphone: Microphone = { + let microphone = Microphone() + return microphone + }() + + private lazy var soundPlayer: SoundPlayer = { + let soundPlayer = SoundPlayer() + return soundPlayer + }() + + private var inittedAudioSession: Bool = false public func definition() -> ModuleDefinition { Name("ExpoPlayAudioStream") @@ -17,6 +28,7 @@ public class ExpoPlayAudioStreamModule: Module, AudioStreamManagerDelegate { OnCreate { print("Setting up Audio Session Manager") audioSessionManager.delegate = self + microphone.delegate = self } /// Asynchronously starts audio recording with the given settings. @@ -146,12 +158,100 @@ public class ExpoPlayAudioStreamModule: Module, AudioStreamManagerDelegate { promise.resolve(result) } + AsyncFunction("playSound") { (base64Chunk: String, turnId: String, promise: Promise) in + Logger.debug("Play sound") + do { + if !inittedAudioSession { + try ensureInittedAudioSession() + } + try soundPlayer.play(audioChunk: base64Chunk, turnId: turnId, resolver: { + _ in promise.resolve(nil) + }, rejecter: {code, message, error in + promise.reject(code ?? "ERR_UNKNOWN", message ?? "Unknown error") + }) + } catch { + print("Error enqueuing audio: \(error.localizedDescription)") + } + } + + AsyncFunction("stopSound") { (promise: Promise) in + soundPlayer.stop(promise) + } + + AsyncFunction("interruptSound") { (promise: Promise) in + soundPlayer.interrupt(promise) + } + + Function("resumeSound") { + soundPlayer.resume() + } + + AsyncFunction("startMicrophone") { (options: [String: Any], promise: Promise) in + + if !inittedAudioSession { + do { + try ensureInittedAudioSession() + } catch { + promise.reject("ERROR", "Failed to init audio session \(error.localizedDescription)") + return + } + } + // Extract settings from provided options, using default values if necessary + let sampleRate = options["sampleRate"] as? Double ?? 16000.0 // it fails if not 48000, why? + let numberOfChannels = options["channelConfig"] as? Int ?? 1 // Mono channel configuration + let bitDepth = options["audioFormat"] as? Int ?? 16 // 16bits + let interval = options["interval"] as? Int ?? 1000 + + + // Create recording settings + let settings = RecordingSettings( + sampleRate: sampleRate, + desiredSampleRate: sampleRate, + numberOfChannels: numberOfChannels, + bitDepth: bitDepth, + maxRecentDataDuration: nil, + pointsPerSecond: nil + ) + + if let result = self.microphone.startRecording(settings: settings, intervalMilliseconds: interval) { + if let resError = result.error { + promise.reject("ERROR", resError) + } else { + let resultDict: [String: Any] = [ + "fileUri": result.fileUri ?? "", + "channels": result.channels ?? 1, + "bitDepth": result.bitDepth ?? 16, + "sampleRate": result.sampleRate ?? 48000, + "mimeType": result.mimeType ?? "", + ] + promise.resolve(resultDict) + } + } else { + promise.reject("ERROR", "Failed to start recording.") + } + } + + AsyncFunction("stopMicrophone") { (promise: Promise) in + microphone.stopRecording() + promise.resolve(nil) + } + /// Clears all audio files stored in the document directory. Function("clearAudioFiles") { clearAudioFiles() } } + private func ensureInittedAudioSession() throws { + if self.inittedAudioSession { return } + let audioSession = AVAudioSession.sharedInstance() + try audioSession.setCategory( + .playAndRecord, mode: .voiceChat, + options: [.defaultToSpeaker, .allowBluetooth, .allowBluetoothA2DP, .mixWithOthers]) + try audioSession.setActive(true) + inittedAudioSession = true + } + /// Handles the reception of audio data from the AudioStreamManager. /// /// - Parameters: @@ -245,4 +345,22 @@ public class ExpoPlayAudioStreamModule: Module, AudioStreamManagerDelegate { return [] } } + + func onMicrophoneData(_ microphoneData: Data, _ microphoneData16kHz: Data) { + let encodedData = microphoneData.base64EncodedString() + let encodedData16kHz = microphoneData16kHz.base64EncodedString() + // Construct the event payload similar to Android + let eventBody: [String: Any] = [ + "fileUri": "", + "lastEmittedSize": 0, + "position": 0, // Add position of the chunk in ms since + "encoded": encodedData, + "encoded16kHz": encodedData16kHz, + "deltaSize": 0, + "totalSize": 0, + "mimeType": "" + ] + // Emit the event to JavaScript + sendEvent(audioDataEvent, eventBody) + } } diff --git a/ios/Microphone.swift b/ios/Microphone.swift new file mode 100644 index 0000000..31b9ca1 --- /dev/null +++ b/ios/Microphone.swift @@ -0,0 +1,406 @@ +import AVFoundation +import ExpoModulesCore +// RealTimeAudioManager.swift +// Pods +// +// Created by Alexander Demchuk on 15/12/2024. +// + +public enum SoundPlayerError: Error { + case invalidBase64String + case couldNotPlayAudio + case decodeError(details: String) +} + +class Microphone { + weak var delegate: MicrophoneDataDelegate? + + private var audioEngine: AVAudioEngine! + private var audioConverter: AVAudioConverter! + private var inputNode: AVAudioInputNode! + private var audioPlayerNode: AVAudioPlayerNode = AVAudioPlayerNode() + + private var isMuted = false + private var isVoiceProcessingEnabled: Bool = false + + + internal var lastEmissionTime: Date? + internal var lastEmittedSize: Int64 = 0 + private var emissionInterval: TimeInterval = 1.0 // Default to 1 second + private var totalDataSize: Int64 = 0 + private var isPaused = false + private var pausedDuration = 0 + private var fileManager = FileManager.default + internal var recordingSettings: RecordingSettings? + internal var recordingUUID: UUID? + internal var mimeType: String = "audio/wav" + private var lastBufferTime: AVAudioTime? + private var accumulatedData = Data() + private var accumulatedData16kHz = Data() + private var recentData = [Float]() // This property stores the recent audio data + + internal var recordingFileURL: URL? + private var startTime: Date? + private var pauseStartTime: Date? + + + private var inittedAudioSession = false + private var isRecording: Bool = false + + public static let sampleRate: Double = 44100 + public static let isLinear16PCM: Bool = true + // Linear16 PCM is a standard format well-supported by EVI (although you must send + // a `session_settings` message to inform EVI of the sample rate). Because there is + // a wide variance of the native format/ sample rate from input devices, we use the + // AVAudioConverter API to convert the audio to this standard format in order to + // remove all guesswork. + private static let desiredInputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: sampleRate, channels: 1, interleaved: false)! + private let audioPlaybackFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000.0, channels: 1, interleaved: false) + + init() { + if #available(iOS 15.0, *) { + AVCaptureDevice.showSystemUserInterface(.microphoneModes) + } else { + // Fallback on earlier versions + } + } + + private func setupVoiceProcessing() { + self.isMuted = false + self.isVoiceProcessingEnabled = true + audioEngine = AVAudioEngine() + + do { + let outputNode: AVAudioOutputNode = audioEngine.outputNode + self.inputNode = audioEngine.inputNode + let mainMixerNode: AVAudioMixerNode = audioEngine.mainMixerNode + audioEngine.connect(mainMixerNode, to: outputNode, format: nil) + + // This step, importantly, tells iOS to enable "voice processing" i.e. noise reduction / echo cancellation + // to optimize the audio input for voice processing. Note that this is simply a + // *request* to the operating system to enable these features, and there is no guarantee + // that they will be supported in all environments. + // Notably, echo cancellation doesn't seem to work in the iOS simulator. + try self.inputNode.setVoiceProcessingEnabled(true) + //try outputNode.setVoiceProcessingEnabled(true) + } catch { + print("Error setting voice processing: \(error)") + return + } + } + + private func ensureInittedAudioSession() throws { + if self.inittedAudioSession { return } + let audioSession = AVAudioSession.sharedInstance() + try audioSession.setCategory( + .playAndRecord, mode: .voiceChat, + options: [.defaultToSpeaker, .allowBluetooth, .allowBluetoothA2DP, .mixWithOthers]) + try audioSession.setActive(true) + inittedAudioSession = true + } + + public func startRecording() throws { + guard !self.isRecording else { return } + if !self.inittedAudioSession { + try ensureInittedAudioSession() + } + if !self.isVoiceProcessingEnabled { + setupVoiceProcessing() + } + let nativeInputFormat = self.inputNode.inputFormat(forBus: 0) + // The sample rate is "samples per second", so multiplying by 0.1 should get us chunks of about 100ms + let inputBufferSize = UInt32(nativeInputFormat.sampleRate * 0.1) + self.inputNode.installTap(onBus: 0, bufferSize: inputBufferSize, format: nativeInputFormat) { (buffer, time) in + let convertedBuffer = AVAudioPCMBuffer(pcmFormat: Microphone.desiredInputFormat, frameCapacity: 1024)! + + var error: NSError? = nil + + let silence = Data(repeating: 0, count: Int(convertedBuffer.frameCapacity) * Int(convertedBuffer.format.streamDescription.pointee.mBytesPerFrame)) + if self.isMuted { + // The standard behavior for muting is to send audio frames filled with empty data + // (versus not sending anything during mute). This helps audio systems distinguish + // between muted-but-still-active streams and streams that have become disconnected. + + self.delegate?.onMicrophoneData(silence, silence) + return + } + let inputAudioConverter = AVAudioConverter(from: nativeInputFormat, to: Microphone.desiredInputFormat)! + let status = inputAudioConverter.convert(to: convertedBuffer, error: &error, withInputFrom: {inNumPackets, outStatus in + outStatus.pointee = .haveData + buffer.frameLength = inNumPackets + return buffer + }) + + if status == .haveData { + let byteLength = Int(convertedBuffer.frameLength) * Int(convertedBuffer.format.streamDescription.pointee.mBytesPerFrame) + let audioData = Data(bytes: convertedBuffer.audioBufferList.pointee.mBuffers.mData!, count: byteLength) + self.delegate?.onMicrophoneData(audioData, silence) + return + } + if error != nil { + print("Error during audio conversion: \(error!.localizedDescription)") + return + } + print( "Unexpected status during audio conversion: \(status)") + } + + if (!audioEngine.isRunning) { + try audioEngine.start() + } + self.isRecording = true + } + + + func startRecording(settings: RecordingSettings, intervalMilliseconds: Int) -> StartRecordingResult? { + if !self.isVoiceProcessingEnabled { + setupVoiceProcessing() + } + guard !isRecording else { + Logger.debug("Debug: Recording is already in progress.") + return StartRecordingResult(error: "Recording is already in progress.") + } + + if audioEngine.isRunning { + Logger.debug("Debug: Audio engine already running.") + audioEngine.stop() + } + + var newSettings = settings // Make settings mutable + + // Determine the commonFormat based on bitDepth + let commonFormat: AVAudioCommonFormat + switch newSettings.bitDepth { + case 16: + commonFormat = .pcmFormatInt16 + case 32: + commonFormat = .pcmFormatInt32 + default: + Logger.debug("Unsupported bit depth. Defaulting to 16-bit PCM") + commonFormat = .pcmFormatInt16 + newSettings.bitDepth = 16 + } + + emissionInterval = max(100.0, Double(intervalMilliseconds)) / 1000.0 + lastEmissionTime = Date() + accumulatedData.removeAll() + totalDataSize = 0 + pausedDuration = 0 + isPaused = false + + do { + let session = AVAudioSession.sharedInstance() + Logger.debug("Debug: Configuring audio session with sample rate: \(settings.sampleRate) Hz") + + // Check if the input node supports the desired format + let inputNode = audioEngine.inputNode + let hardwareFormat = inputNode.inputFormat(forBus: 0) + if hardwareFormat.sampleRate != newSettings.sampleRate { + Logger.debug("Debug: Preferred sample rate not supported. Falling back to hardware sample rate \(session.sampleRate).") + newSettings.sampleRate = session.sampleRate + } + + try session.setPreferredSampleRate(settings.sampleRate) + try session.setPreferredIOBufferDuration(1024 / settings.sampleRate) + try session.setActive(true) + + let actualSampleRate = session.sampleRate + if actualSampleRate != newSettings.sampleRate { + Logger.debug("Debug: Preferred sample rate not set. Falling back to hardware sample rate: \(actualSampleRate) Hz") + newSettings.sampleRate = actualSampleRate + } + Logger.debug("Debug: Audio session is successfully configured. Actual sample rate is \(actualSampleRate) Hz") + + recordingSettings = newSettings // Update the class property with the new settings + } catch { + Logger.debug("Error: Failed to set up audio session with preferred settings: \(error.localizedDescription)") + return StartRecordingResult(error: "Error: Failed to set up audio session with preferred settings: \(error.localizedDescription)") + } + + // Correct the format to use 16-bit integer (PCM) + guard let audioFormat = AVAudioFormat(commonFormat: commonFormat, sampleRate: newSettings.sampleRate, channels: UInt32(newSettings.numberOfChannels), interleaved: true) else { + Logger.debug("Error: Failed to create audio format with the specified bit depth.") + return StartRecordingResult(error: "Error: Failed to create audio format with the specified bit depth.") + } + + audioEngine.inputNode.installTap(onBus: 0, bufferSize: 1024, format: audioFormat) { [weak self] (buffer, time) in + guard let self = self else { + Logger.debug("Error: File URL or self is nil during buffer processing.") + return + } + + // Processing the current buffer + self.processAudioBuffer(buffer) + self.lastBufferTime = time + } + + do { + startTime = Date() + try audioEngine.start() + isRecording = true + Logger.debug("Debug: Recording started successfully.") + return StartRecordingResult( + fileUri: "", + mimeType: mimeType, + channels: settings.numberOfChannels, + bitDepth: settings.bitDepth, + sampleRate: settings.sampleRate + ) + } catch { + Logger.debug("Error: Could not start the audio engine: \(error.localizedDescription)") + isRecording = false + return StartRecordingResult(error: "Error: Could not start the audio engine: \(error.localizedDescription)") + } + } + + public func stopRecording() { + guard self.isRecording else { return } + self.isRecording = false + self.isVoiceProcessingEnabled = false + audioEngine.stop() + self.inputNode.removeTap(onBus: 0) + } + + + /// Resamples the audio buffer using vDSP. If it fails, falls back to manual resampling. + /// - Parameters: + /// - buffer: The original audio buffer to be resampled. + /// - originalSampleRate: The sample rate of the original audio buffer. + /// - targetSampleRate: The desired sample rate to resample to. + /// - Returns: A new audio buffer resampled to the target sample rate, or nil if resampling fails. + private func resampleAudioBuffer(_ buffer: AVAudioPCMBuffer, from originalSampleRate: Double, to targetSampleRate: Double) -> AVAudioPCMBuffer? { + guard let channelData = buffer.floatChannelData else { return nil } + + let sourceFrameCount = Int(buffer.frameLength) + let sourceChannels = Int(buffer.format.channelCount) + + // Calculate the number of frames in the target buffer + let targetFrameCount = Int(Double(sourceFrameCount) * targetSampleRate / originalSampleRate) + + // Create a new audio buffer for the resampled data + guard let targetBuffer = AVAudioPCMBuffer(pcmFormat: buffer.format, frameCapacity: AVAudioFrameCount(targetFrameCount)) else { return nil } + targetBuffer.frameLength = AVAudioFrameCount(targetFrameCount) + + let resamplingFactor = Float(targetSampleRate / originalSampleRate) // Factor to resample the audio + + for channel in 0.. AVAudioPCMBuffer? { + var error: NSError? = nil + var commonFormat: AVAudioCommonFormat = .pcmFormatInt16 + switch recordingSettings?.bitDepth { + case 16: + commonFormat = .pcmFormatInt16 + case 32: + commonFormat = .pcmFormatInt32 + default: + Logger.debug("Unsupported bit depth. Defaulting to 16-bit PCM") + commonFormat = .pcmFormatInt16 + } + guard let nativeInputFormat = AVAudioFormat(commonFormat: commonFormat, sampleRate: buffer.format.sampleRate, channels: 1, interleaved: true) else { + Logger.debug("AudioSessionManager: Failed to convert to desired format. AudioFormat is corrupted.") + return nil + } + let desiredFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: sampleRate, channels: channels, interleaved: false)! + let inputAudioConverter = AVAudioConverter(from: nativeInputFormat, to: desiredFormat)! + + let convertedBuffer = AVAudioPCMBuffer(pcmFormat: desiredFormat, frameCapacity: 1024)! + let status = inputAudioConverter.convert(to: convertedBuffer, error: &error, withInputFrom: {inNumPackets, outStatus in + outStatus.pointee = .haveData + buffer.frameLength = inNumPackets + return buffer + }) + if status == .haveData { + return convertedBuffer + } + return nil + } + + + + /// Processes the audio buffer and writes data to the file. Also handles audio processing if enabled. + /// - Parameters: + /// - buffer: The audio buffer to process. + /// - fileURL: The URL of the file to write the data to. + private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) { + let targetSampleRate = recordingSettings?.desiredSampleRate ?? buffer.format.sampleRate + let finalBuffer: AVAudioPCMBuffer + + if buffer.format.sampleRate != targetSampleRate { + // Resample the audio buffer if the target sample rate is different from the input sample rate + if let resampledBuffer = resampleAudioBuffer(buffer, from: buffer.format.sampleRate, to: targetSampleRate) { + finalBuffer = resampledBuffer + } else { + Logger.debug("Fallback to AVAudioConverter. Converting from \(buffer.format.sampleRate) Hz to \(targetSampleRate) Hz") + + if let convertedBuffer = self.tryConvertToFormat(inputBuffer: buffer, desiredSampleRate: targetSampleRate, desiredChannel: 1) { + finalBuffer = convertedBuffer + } else { + Logger.debug("Failed to convert to desired format.") + finalBuffer = buffer + } + } + } else { + // Use the original buffer if the sample rates are the same + finalBuffer = buffer + } + + + + let audioData = finalBuffer.audioBufferList.pointee.mBuffers + guard let bufferData = audioData.mData else { + Logger.debug("Buffer data is nil.") + return + } + var data = Data(bytes: bufferData, count: Int(audioData.mDataByteSize)) + + // Accumulate new data + accumulatedData.append(data) + + let pmcBuffer16kHz = self.tryConvertToFormat(inputBuffer: buffer, desiredSampleRate: 16000, desiredChannel: 1)! + let audioData16kHz = pmcBuffer16kHz.audioBufferList.pointee.mBuffers + guard let bufferData16kHz = audioData16kHz.mData else { + Logger.debug("Buffer data is nil.") + return + } + + var data16kHz = Data(bytes: bufferData16kHz, count: Int(audioData16kHz.mDataByteSize)) + accumulatedData16kHz.append(data16kHz) + + + totalDataSize += Int64(data.count) + // print("Total data size written: \(totalDataSize) bytes") // Debug: Check total data written + + let currentTime = Date() + if let lastEmissionTime = lastEmissionTime, currentTime.timeIntervalSince(lastEmissionTime) >= emissionInterval { + if let startTime = startTime { + let recordingTime = currentTime.timeIntervalSince(startTime) + // Copy accumulated data for processing + let dataToProcess = accumulatedData + let dataToProcess16kHz = accumulatedData16kHz + + // Emit the processed audio data + self.delegate?.onMicrophoneData(dataToProcess, dataToProcess16kHz) + + self.lastEmissionTime = currentTime // Update last emission time + self.lastEmittedSize = totalDataSize + accumulatedData.removeAll() // Reset accumulated data after emission + accumulatedData16kHz.removeAll() + } + } + } +} diff --git a/ios/MicrophoneDataDelegate.swift b/ios/MicrophoneDataDelegate.swift new file mode 100644 index 0000000..ce7d394 --- /dev/null +++ b/ios/MicrophoneDataDelegate.swift @@ -0,0 +1,3 @@ +protocol MicrophoneDataDelegate: AnyObject { + func onMicrophoneData(_ microphoneData: Data, _ microphoneData16kHz: Data) +} diff --git a/ios/SoundPlayer.swift b/ios/SoundPlayer.swift new file mode 100644 index 0000000..7373e8c --- /dev/null +++ b/ios/SoundPlayer.swift @@ -0,0 +1,147 @@ +import AVFoundation +import ExpoModulesCore + +class SoundPlayer { + private var audioEngine: AVAudioEngine! + + private var inputNode: AVAudioInputNode! + private var audioPlayerNode: AVAudioPlayerNode! + + private var isMuted = false + private var isVoiceProcessingEnabled: Bool = false + + private let bufferAccessQueue = DispatchQueue(label: "com.kinexpoaudiostream.bufferAccessQueue") + + private var audioQueue: [(buffer: AVAudioPCMBuffer, promise: RCTPromiseResolveBlock, turnId: String)] = [] // Queue for audio segments + private var isPlaying: Bool = false // Tracks if audio is currently playing + private var isInterrupted: Bool = false + private var isAudioEngineIsSetup: Bool = false + public static let isLinear16PCM: Bool = true + + private let audioPlaybackFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 16000.0, channels: 1, interleaved: false) + + + private func ensureAudioEngineIsSetup() throws { + self.audioEngine = AVAudioEngine() + + audioPlayerNode = AVAudioPlayerNode() + if let playerNode = self.audioPlayerNode { + audioEngine.attach(playerNode) + audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: self.audioPlaybackFormat) + } + self.isAudioEngineIsSetup = true + + try self.audioEngine.start() + } + + func clearAudioQueue(_ promise: Promise) { + Logger.debug("[SoundPlayer] Clearing Audio Queue...") + if !self.audioQueue.isEmpty { + Logger.debug("[SoundPlayer] Queue is not empty clearing") + self.audioQueue.removeAll() + } else { + Logger.debug("[SoundPlayer] Queue is empty") + } + promise.resolve(nil) + } + + + func stop(_ promise: Promise) { + Logger.debug("[SoundPlayer] Stopping Audio") + if !self.audioQueue.isEmpty { + Logger.debug("[SoundPlayer] Queue is not empty clearing") + self.audioQueue.removeAll() + } + // Stop the audio player node + if self.audioPlayerNode != nil && self.audioPlayerNode.isPlaying { + Logger.debug("[SoundPlayer] Player is playing stopping") + self.audioPlayerNode.pause() + self.audioPlayerNode.stop() + + self.isPlaying = false + } else { + Logger.debug("Player is not playing") + } + promise.resolve(nil) + } + + func interrupt(_ promise: Promise) { + self.isInterrupted = true + self.stop(promise) + } + + func resume() { + self.isInterrupted = false + } + + + public func play( + audioChunk base64String: String, + turnId strTurnId: String, + resolver: @escaping RCTPromiseResolveBlock, + rejecter: @escaping RCTPromiseRejectBlock + ) throws { + Logger.debug("New play chunk \(self.isInterrupted)") + guard !self.isInterrupted else { + resolver(nil) + return + } + do { + if !self.isAudioEngineIsSetup { + try ensureAudioEngineIsSetup() + } + + guard let data = Data(base64Encoded: base64String) else { + Logger.debug("[SoundPlayer] Failed to decode base64 string") + throw SoundPlayerError.invalidBase64String + } + guard let pcmData = AudioUtils.removeRIFFHeaderIfNeeded(from: data), + let pcmBuffer = AudioUtils.convertPCMDataToBuffer(pcmData, audioFormat: self.audioPlaybackFormat!) else { + Logger.debug("[SoundPlayer] Failed to process audio chunk") + return + } + let bufferTuple = (buffer: pcmBuffer, promise: resolver, turnId: strTurnId) + audioQueue.append(bufferTuple) + print("New Chunk \(isPlaying)") + // If not already playing, start playback + playNextInQueue() + } catch { + Logger.debug("[SoundPlayer] Failed to enqueue audio chunk: \(error.localizedDescription)") + rejecter("ERROR_SOUND_PLAYER", "Failed to enqueue audio chunk: \(error.localizedDescription)", nil) + } + } + + + private func playNextInQueue() { + guard !audioQueue.isEmpty else { + return + } + guard !isPlaying else { + return + } + + Logger.debug("[SoundPlayer] Playing audio [ \(audioQueue.count)]") + + + if !self.audioPlayerNode.isPlaying { + Logger.debug("[SoundPlayer] Starting Player") + self.audioPlayerNode.play() + } + self.bufferAccessQueue.async { + if let (buffer, promise, _) = self.audioQueue.first { + self.audioQueue.removeFirst() + + self.audioPlayerNode.scheduleBuffer(buffer) { + promise(nil) + + + let bufferDuration = Double(buffer.frameLength) / buffer.format.sampleRate + if !self.isInterrupted && !self.audioQueue.isEmpty { + self.playNextInQueue() + } + } + } + } + } +} + diff --git a/package.json b/package.json index cce4b4d..7d6bfea 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@mykin-ai/expo-audio-stream", - "version": "0.2.4", + "version": "0.2.5", "description": "Expo Play Audio Stream module", "main": "build/index.js", "types": "build/index.d.ts", diff --git a/src/events.ts b/src/events.ts index 6b9546e..3a0e810 100644 --- a/src/events.ts +++ b/src/events.ts @@ -10,6 +10,7 @@ const emitter = new EventEmitter(ExpoPlayAudioStreamModule) export interface AudioEventPayload { encoded?: string + encoded16kHz?: string buffer?: Float32Array fileUri: string lastEmittedSize: number diff --git a/src/index.ts b/src/index.ts index f6856b2..75e59bc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,20 +1,22 @@ import { Subscription } from "expo-modules-core"; import ExpoPlayAudioStreamModule from "./ExpoPlayAudioStreamModule"; -import { AudioRecording, RecordingConfig, StartRecordingResult } from "./types"; +import { + AudioDataEvent, + AudioRecording, + RecordingConfig, + StartRecordingResult, +} from "./types"; import { addAudioEventListener, AudioEventPayload } from "./events"; export class ExpoPlayAudioStream { - /** * Starts microphone recording. * @param {RecordingConfig} recordingConfig - Configuration for the recording. * @returns {Promise<{recordingResult: StartRecordingResult, subscription: Subscription}>} A promise that resolves to an object containing the recording result and a subscription to audio events. * @throws {Error} If the recording fails to start. */ - static async startRecording( - recordingConfig: RecordingConfig - ): Promise<{ + static async startRecording(recordingConfig: RecordingConfig): Promise<{ recordingResult: StartRecordingResult; subscription?: Subscription; }> { @@ -22,25 +24,29 @@ export class ExpoPlayAudioStream { let subscription: Subscription | undefined; - if (onAudioStream && typeof onAudioStream == 'function') { - subscription = addAudioEventListener( - async (event: AudioEventPayload) => { - const { fileUri, deltaSize, totalSize, position, encoded } = event; - if (!encoded) { - console.error( - `[ExpoPlayAudioStream] Encoded audio data is missing` - ); - throw new Error("Encoded audio data is missing"); - } - onAudioStream?.({ - data: encoded, - position, - fileUri, - eventDataSize: deltaSize, - totalSize, - }); + if (onAudioStream && typeof onAudioStream == "function") { + subscription = addAudioEventListener(async (event: AudioEventPayload) => { + const { + fileUri, + deltaSize, + totalSize, + position, + encoded, + encoded16kHz, + } = event; + if (!encoded) { + console.error(`[ExpoPlayAudioStream] Encoded audio data is missing`); + throw new Error("Encoded audio data is missing"); } - ); + onAudioStream?.({ + data: encoded, + data16kHz: encoded16kHz, + position, + fileUri, + eventDataSize: deltaSize, + totalSize, + }); + }); } try { @@ -75,15 +81,9 @@ export class ExpoPlayAudioStream { * @returns {Promise} * @throws {Error} If the audio chunk fails to stream. */ - static async playAudio( - base64Chunk: string, - turnId: string - ): Promise { + static async playAudio(base64Chunk: string, turnId: string): Promise { try { - return ExpoPlayAudioStreamModule.playAudio( - base64Chunk, - turnId - ); + return ExpoPlayAudioStreamModule.playAudio(base64Chunk, turnId); } catch (error) { console.error(error); throw new Error(`Failed to stream audio chunk: ${error}`); @@ -135,10 +135,158 @@ export class ExpoPlayAudioStream { static async clearPlaybackQueueByTurnId(turnId: string): Promise { try { - return await ExpoPlayAudioStreamModule.clearPlaybackQueueByTurnId(turnId); + await ExpoPlayAudioStreamModule.clearPlaybackQueueByTurnId(turnId); } catch (error) { console.error(error); throw new Error(`Failed to clear playback queue: ${error}`); } } + + /** + * Plays a sound. + * @param {string} audio - The audio to play. + * @param {string} turnId - The turn ID. + * @returns {Promise} + * @throws {Error} If the sound fails to play. + */ + static async playSound(audio: string, turnId: string): Promise { + try { + await ExpoPlayAudioStreamModule.playSound(audio, turnId); + } catch (error) { + console.error(error); + throw new Error(`Failed to enqueue audio: ${error}`); + } + } + + static async stopSound(): Promise { + try { + await ExpoPlayAudioStreamModule.stopSound(); + } catch (error) { + console.error(error); + throw new Error(`Failed to stop enqueued audio: ${error}`); + } + } + + /** + * Interrupts the current sound. + * @returns {Promise} + * @throws {Error} If the sound fails to interrupt. + */ + static async interruptSound(): Promise { + try { + await ExpoPlayAudioStreamModule.interruptSound(); + } catch (error) { + console.error(error); + throw new Error(`Failed to stop enqueued audio: ${error}`); + } + } + + /** + * Resumes the current sound. + * @returns {Promise} + * @throws {Error} If the sound fails to resume. + */ + static resumeSound(): void { + try { + ExpoPlayAudioStreamModule.resumeSound(); + } catch (error) { + console.error(error); + throw new Error(`Failed to resume sound: ${error}`); + } + } + + /** + * Starts microphone streaming. + * @param {RecordingConfig} recordingConfig - The recording configuration. + * @returns {Promise<{recordingResult: StartRecordingResult, subscription: Subscription}>} A promise that resolves to an object containing the recording result and a subscription to audio events. + * @throws {Error} If the recording fails to start. + */ + static async startMicrophone(recordingConfig: RecordingConfig): Promise<{ + recordingResult: StartRecordingResult; + subscription?: Subscription; + }> { + let subscription: Subscription | undefined; + try { + const { onAudioStream, ...options } = recordingConfig; + + if (onAudioStream && typeof onAudioStream == "function") { + subscription = addAudioEventListener( + async (event: AudioEventPayload) => { + const { + fileUri, + deltaSize, + totalSize, + position, + encoded, + encoded16kHz, + } = event; + if (!encoded) { + console.error( + `[ExpoPlayAudioStream] Encoded audio data is missing` + ); + throw new Error("Encoded audio data is missing"); + } + onAudioStream?.({ + data: encoded, + data16kHz: encoded16kHz, + position, + fileUri, + eventDataSize: deltaSize, + totalSize, + }); + } + ); + } + + const result = await ExpoPlayAudioStreamModule.startMicrophone(options); + + return { recordingResult: result, subscription }; + } catch (error) { + console.error(error); + subscription?.remove(); + throw new Error(`Failed to start recording: ${error}`); + } + } + + /** + * Stops the current microphone streaming. + * @returns {Promise} + * @throws {Error} If the microphone streaming fails to stop. + */ + static async stopMicrophone(): Promise { + try { + return await ExpoPlayAudioStreamModule.stopMicrophone(); + } catch (error) { + console.error(error); + throw new Error(`Failed to stop mic stream: ${error}`); + } + } + + static subscribeToAudioEvents( + onMicrophoneStream: (event: AudioDataEvent) => Promise + ): Subscription { + return addAudioEventListener(async (event: AudioEventPayload) => { + const { fileUri, deltaSize, totalSize, position, encoded, encoded16kHz } = + event; + if (!encoded) { + console.error(`[ExpoPlayAudioStream] Encoded audio data is missing`); + throw new Error("Encoded audio data is missing"); + } + onMicrophoneStream?.({ + data: encoded, + data16kHz: encoded16kHz, + position, + fileUri, + eventDataSize: deltaSize, + totalSize, + }); + }); + } } + +export { + AudioDataEvent, + AudioRecording, + RecordingConfig, + StartRecordingResult, +}; diff --git a/src/types.ts b/src/types.ts index b3d40a2..68c7250 100644 --- a/src/types.ts +++ b/src/types.ts @@ -12,6 +12,7 @@ export interface StartRecordingResult { export interface AudioDataEvent { data: string | Float32Array + data16kHz?: string | Float32Array position: number fileUri: string eventDataSize: number