diff --git a/src/main/java/tech/indus340/complexa/AudioProcessorThread.java b/src/main/java/tech/indus340/complexa/AudioProcessorThread.java index 107ee19..6aad8d1 100644 --- a/src/main/java/tech/indus340/complexa/AudioProcessorThread.java +++ b/src/main/java/tech/indus340/complexa/AudioProcessorThread.java @@ -4,6 +4,7 @@ import org.springframework.stereotype.Service; import tech.indus340.complexa.chatbot.Assistant; import tech.indus340.complexa.service.*; +import tech.indus340.complexa.utils.WavMerger; import tech.indus340.complexa.utils.WavPlayer; import java.io.File; @@ -18,9 +19,10 @@ public class AudioProcessorThread { private final WavPlayer wavPlayer; private final Assistant assistant; private final Text2SpeechService text2SpeechService; + private final WavMerger wavMerger; private long timeSinceLastResponse; - public AudioProcessorThread(AudioCaptureService audioCaptureService, NoiseDetectorService noiseDetectorService, AudioRecorderService audioRecorderService, TranscriptionService transcriptionService, WavPlayer wavPlayer, Assistant assistant, Text2SpeechService text2SpeechService) { + public AudioProcessorThread(AudioCaptureService audioCaptureService, NoiseDetectorService noiseDetectorService, AudioRecorderService audioRecorderService, TranscriptionService transcriptionService, WavPlayer wavPlayer, Assistant assistant, Text2SpeechService text2SpeechService, WavMerger wavMerger) { this.audioCaptureService = audioCaptureService; this.noiseDetectorService = noiseDetectorService; this.transcriptionService = transcriptionService; @@ -28,6 +30,7 @@ public AudioProcessorThread(AudioCaptureService audioCaptureService, NoiseDetect this.wavPlayer = wavPlayer; this.assistant = assistant; this.text2SpeechService = text2SpeechService; + this.wavMerger = wavMerger; } @Scheduled(fixedRate = 2000) // Run every 2 seconds to allow time for recording and processing @@ -35,25 +38,30 @@ public void processAudio() { System.out.println("scanning"); File audioFile = audioCaptureService.captureAudio(); if (noiseDetectorService.detectNoise(audioFile)) { - String transscribed = transcriptionService.transcribe(audioFile); - boolean timeElapsedTillLastResponse = System.currentTimeMillis() < timeSinceLastResponse + 10000; - if (transscribed.toLowerCase().contains("omplex") || timeElapsedTillLastResponse) { - File instructionsWav = null; - if (!timeElapsedTillLastResponse) { + boolean continuousDialogueMode = System.currentTimeMillis() < timeSinceLastResponse + 5000; + File instructionsWav; + if (!continuousDialogueMode) { + String transscribed = transcriptionService.transcribe(audioFile); + if (transscribed.toLowerCase().contains("omplex")) { wavPlayer.playAcc(); - System.out.println("recording started"); - instructionsWav = audioRecorderService.recordAudio(); - System.out.println("recording stopped"); + System.out.println("recording intention started"); + instructionsWav = audioRecorderService.recordAudioIntention(); + System.out.println("recording intention stopped"); } else { - instructionsWav = audioFile; + return; } - String message = transcriptionService.transcribe(instructionsWav); - String complexaResponse = assistant.chat(message); - System.out.println(complexaResponse); - text2SpeechService.tts(complexaResponse); - wavPlayer.playResponse(); - timeSinceLastResponse = System.currentTimeMillis(); + } else { + System.out.println("recording dialog mode started"); + instructionsWav = audioRecorderService.recordAudioContinuous(); + System.out.println("recording dialog mode stopped"); + instructionsWav = wavMerger.merge(audioFile, instructionsWav); } + String message = transcriptionService.transcribe(instructionsWav); + String complexaResponse = assistant.chat(message); + System.out.println(complexaResponse); + text2SpeechService.tts(complexaResponse); + wavPlayer.playResponse(); + timeSinceLastResponse = System.currentTimeMillis(); } } } diff --git a/src/main/java/tech/indus340/complexa/service/AudioRecorderService.java b/src/main/java/tech/indus340/complexa/service/AudioRecorderService.java index f738e85..f8859ea 100644 --- a/src/main/java/tech/indus340/complexa/service/AudioRecorderService.java +++ b/src/main/java/tech/indus340/complexa/service/AudioRecorderService.java @@ -10,9 +10,14 @@ public class AudioRecorderService { // Use AudioUtils to record audio with the default format and threshold values - public File recordAudio() { + public File recordAudioIntention() { AudioFormat format = AudioUtils.getDefaultAudioFormat(); return AudioUtils.recordAudio(format, AudioUtils.DEFAULT_SILENCE_THRESHOLD, AudioUtils.DEFAULT_SILENCE_THRESHOLD_MS); } + // Use AudioUtils to record audio with the default format and threshold values + public File recordAudioContinuous() { + AudioFormat format = AudioUtils.getDefaultAudioFormat(); + return AudioUtils.recordAudio(format, AudioUtils.DEFAULT_SILENCE_THRESHOLD, AudioUtils.DEFAULT_SILENCE_CONTINUOUS_THRESHOLD_MS); + } } diff --git a/src/main/java/tech/indus340/complexa/utils/AudioUtils.java b/src/main/java/tech/indus340/complexa/utils/AudioUtils.java index 5f259da..9d9b2f8 100644 --- a/src/main/java/tech/indus340/complexa/utils/AudioUtils.java +++ b/src/main/java/tech/indus340/complexa/utils/AudioUtils.java @@ -8,6 +8,7 @@ public class AudioUtils { public static final double DEFAULT_SILENCE_THRESHOLD = 0.02; // Default threshold for detecting noise public static final int DEFAULT_BUFFER_SIZE = 1024; // Default buffer size public static final long DEFAULT_SILENCE_THRESHOLD_MS = 2000; // Silence threshold duration in milliseconds + public static final long DEFAULT_SILENCE_CONTINUOUS_THRESHOLD_MS = 500; // Silence threshold duration in milliseconds private static long silenceStartTime; // Public method for recording audio until a certain duration of silence is detected @@ -123,10 +124,10 @@ public static double calculateRMSLevel(byte[] audioData, int bytesRead, AudioFor } else { value = audioData[i]; } - sum += value * value; + sum += (long) value * value; } - double rms = Math.sqrt(sum / (bytesRead / sampleSizeInBytes)); + double rms = Math.sqrt(((double) sum / (double) bytesRead) * sampleSizeInBytes); return rms / (1 << (format.getSampleSizeInBits() - 1)); } diff --git a/src/main/java/tech/indus340/complexa/utils/WavMerger.java b/src/main/java/tech/indus340/complexa/utils/WavMerger.java new file mode 100644 index 0000000..5247546 --- /dev/null +++ b/src/main/java/tech/indus340/complexa/utils/WavMerger.java @@ -0,0 +1,51 @@ +package tech.indus340.complexa.utils; + +import org.springframework.stereotype.Service; + +import javax.sound.sampled.*; +import java.io.File; +import java.io.IOException; +import java.io.SequenceInputStream; + +@Service +public class WavMerger { + + public File merge(File file1, File file2) { + File outputWavFile = new File("merged.wav"); + + try { + // Obtain audio input streams from both WAV files + AudioInputStream audioStream1 = AudioSystem.getAudioInputStream(file1); + AudioInputStream audioStream2 = AudioSystem.getAudioInputStream(file2); + + // Check if the audio formats of both files are the same + AudioFormat format1 = audioStream1.getFormat(); + AudioFormat format2 = audioStream2.getFormat(); + if (!format1.matches(format2)) { + System.out.println("Audio formats do not match."); + return null; + } + + // Concatenate the audio streams + AudioInputStream appendedStream = + new AudioInputStream( + new SequenceInputStream(audioStream1, audioStream2), + format1, + audioStream1.getFrameLength() + audioStream2.getFrameLength() + ); + + // Write the result to a new WAV file + AudioSystem.write(appendedStream, AudioFileFormat.Type.WAVE, outputWavFile); + + // Close the streams + audioStream1.close(); + audioStream2.close(); + appendedStream.close(); + + System.out.println("WAV files merged successfully."); + } catch (UnsupportedAudioFileException | IOException e) { + e.printStackTrace(); + } + return outputWavFile; + } +}