Change dialog mode.

ice09 · Sep 15, 2024 · e26e73f · e26e73f
1 parent cb2a077
commit e26e73f
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 19 deletions.
diff --git a/src/main/java/tech/indus340/complexa/AudioProcessorThread.java b/src/main/java/tech/indus340/complexa/AudioProcessorThread.java
@@ -4,6 +4,7 @@
 import org.springframework.stereotype.Service;
 import tech.indus340.complexa.chatbot.Assistant;
 import tech.indus340.complexa.service.*;
+import tech.indus340.complexa.utils.WavMerger;
 import tech.indus340.complexa.utils.WavPlayer;
 
 import java.io.File;
@@ -18,42 +19,49 @@ public class AudioProcessorThread {
     private final WavPlayer wavPlayer;
     private final Assistant assistant;
     private final Text2SpeechService text2SpeechService;
+    private final WavMerger wavMerger;
     private long timeSinceLastResponse;
 
-    public AudioProcessorThread(AudioCaptureService audioCaptureService, NoiseDetectorService noiseDetectorService, AudioRecorderService audioRecorderService, TranscriptionService transcriptionService, WavPlayer wavPlayer, Assistant assistant, Text2SpeechService text2SpeechService) {
+    public AudioProcessorThread(AudioCaptureService audioCaptureService, NoiseDetectorService noiseDetectorService, AudioRecorderService audioRecorderService, TranscriptionService transcriptionService, WavPlayer wavPlayer, Assistant assistant, Text2SpeechService text2SpeechService, WavMerger wavMerger) {
         this.audioCaptureService = audioCaptureService;
         this.noiseDetectorService = noiseDetectorService;
         this.transcriptionService = transcriptionService;
         this.audioRecorderService = audioRecorderService;
         this.wavPlayer = wavPlayer;
         this.assistant = assistant;
         this.text2SpeechService = text2SpeechService;
+        this.wavMerger = wavMerger;
     }
 
     @Scheduled(fixedRate = 2000) // Run every 2 seconds to allow time for recording and processing
     public void processAudio() {
         System.out.println("scanning");
         File audioFile = audioCaptureService.captureAudio();
         if (noiseDetectorService.detectNoise(audioFile)) {
-            String transscribed = transcriptionService.transcribe(audioFile);
-            boolean timeElapsedTillLastResponse = System.currentTimeMillis() < timeSinceLastResponse + 10000;
-            if (transscribed.toLowerCase().contains("omplex") || timeElapsedTillLastResponse) {
-                File instructionsWav = null;
-                if (!timeElapsedTillLastResponse) {
+            boolean continuousDialogueMode = System.currentTimeMillis() < timeSinceLastResponse + 5000;
+            File instructionsWav;
+            if (!continuousDialogueMode) {
+                String transscribed = transcriptionService.transcribe(audioFile);
+                if (transscribed.toLowerCase().contains("omplex")) {
                     wavPlayer.playAcc();
-                    System.out.println("recording started");
-                    instructionsWav = audioRecorderService.recordAudio();
-                    System.out.println("recording stopped");
+                    System.out.println("recording intention started");
+                    instructionsWav = audioRecorderService.recordAudioIntention();
+                    System.out.println("recording intention stopped");
                 } else {
-                    instructionsWav = audioFile;
+                    return;
                 }
-                String message = transcriptionService.transcribe(instructionsWav);
-                String complexaResponse = assistant.chat(message);
-                System.out.println(complexaResponse);
-                text2SpeechService.tts(complexaResponse);
-                wavPlayer.playResponse();
-                timeSinceLastResponse = System.currentTimeMillis();
+            } else {
+                System.out.println("recording dialog mode started");
+                instructionsWav = audioRecorderService.recordAudioContinuous();
+                System.out.println("recording dialog mode stopped");
+                instructionsWav = wavMerger.merge(audioFile, instructionsWav);
             }
+            String message = transcriptionService.transcribe(instructionsWav);
+            String complexaResponse = assistant.chat(message);
+            System.out.println(complexaResponse);
+            text2SpeechService.tts(complexaResponse);
+            wavPlayer.playResponse();
+            timeSinceLastResponse = System.currentTimeMillis();
         }
     }
 }
diff --git a/src/main/java/tech/indus340/complexa/service/AudioRecorderService.java b/src/main/java/tech/indus340/complexa/service/AudioRecorderService.java
@@ -10,9 +10,14 @@
 public class AudioRecorderService {
 
     // Use AudioUtils to record audio with the default format and threshold values
-    public File recordAudio() {
+    public File recordAudioIntention() {
         AudioFormat format = AudioUtils.getDefaultAudioFormat();
         return AudioUtils.recordAudio(format, AudioUtils.DEFAULT_SILENCE_THRESHOLD, AudioUtils.DEFAULT_SILENCE_THRESHOLD_MS);
     }
 
+    // Use AudioUtils to record audio with the default format and threshold values
+    public File recordAudioContinuous() {
+        AudioFormat format = AudioUtils.getDefaultAudioFormat();
+        return AudioUtils.recordAudio(format, AudioUtils.DEFAULT_SILENCE_THRESHOLD, AudioUtils.DEFAULT_SILENCE_CONTINUOUS_THRESHOLD_MS);
+    }
 }
diff --git a/src/main/java/tech/indus340/complexa/utils/AudioUtils.java b/src/main/java/tech/indus340/complexa/utils/AudioUtils.java
@@ -8,6 +8,7 @@ public class AudioUtils {
     public static final double DEFAULT_SILENCE_THRESHOLD = 0.02; // Default threshold for detecting noise
     public static final int DEFAULT_BUFFER_SIZE = 1024; // Default buffer size
     public static final long DEFAULT_SILENCE_THRESHOLD_MS = 2000; // Silence threshold duration in milliseconds
+    public static final long DEFAULT_SILENCE_CONTINUOUS_THRESHOLD_MS = 500; // Silence threshold duration in milliseconds
     private static long silenceStartTime;
 
     // Public method for recording audio until a certain duration of silence is detected
@@ -123,10 +124,10 @@ public static double calculateRMSLevel(byte[] audioData, int bytesRead, AudioFor
             } else {
                 value = audioData[i];
             }
-            sum += value * value;
+            sum += (long) value * value;
         }
 
-        double rms = Math.sqrt(sum / (bytesRead / sampleSizeInBytes));
+        double rms = Math.sqrt(((double) sum / (double) bytesRead) * sampleSizeInBytes);
         return rms / (1 << (format.getSampleSizeInBits() - 1));
     }
 

diff --git a/src/main/java/tech/indus340/complexa/utils/WavMerger.java b/src/main/java/tech/indus340/complexa/utils/WavMerger.java
@@ -0,0 +1,51 @@
+package tech.indus340.complexa.utils;
+
+import org.springframework.stereotype.Service;
+
+import javax.sound.sampled.*;
+import java.io.File;
+import java.io.IOException;
+import java.io.SequenceInputStream;
+
+@Service
+public class WavMerger {
+
+    public File merge(File file1, File file2) {
+        File outputWavFile = new File("merged.wav");
+
+        try {
+            // Obtain audio input streams from both WAV files
+            AudioInputStream audioStream1 = AudioSystem.getAudioInputStream(file1);
+            AudioInputStream audioStream2 = AudioSystem.getAudioInputStream(file2);
+
+            // Check if the audio formats of both files are the same
+            AudioFormat format1 = audioStream1.getFormat();
+            AudioFormat format2 = audioStream2.getFormat();
+            if (!format1.matches(format2)) {
+                System.out.println("Audio formats do not match.");
+                return null;
+            }
+
+            // Concatenate the audio streams
+            AudioInputStream appendedStream = 
+                new AudioInputStream(
+                    new SequenceInputStream(audioStream1, audioStream2),
+                    format1, 
+                    audioStream1.getFrameLength() + audioStream2.getFrameLength()
+                );
+
+            // Write the result to a new WAV file
+            AudioSystem.write(appendedStream, AudioFileFormat.Type.WAVE, outputWavFile);
+
+            // Close the streams
+            audioStream1.close();
+            audioStream2.close();
+            appendedStream.close();
+
+            System.out.println("WAV files merged successfully.");
+        } catch (UnsupportedAudioFileException | IOException e) {
+            e.printStackTrace();
+        }
+        return outputWavFile;
+    }
+}