OpenVoiceOS · JarbasAl · Oct 2, 2023
diff --git a/ovos_stt_plugin_fasterwhisper/__init__.py b/ovos_stt_plugin_fasterwhisper/__init__.py
@@ -189,7 +189,7 @@ def __init__(self, *args, **kwargs):
         self.compute_type = self.config.get("compute_type", "int8")
         self.use_cuda = self.config.get("use_cuda", False)
         self.cpu_threads = self.config.get("cpu_threads", 4)
-
+        self.vad_filter = self.config.get("vad_filter", False)
         if self.use_cuda:
             device = "cuda"
         else:
@@ -203,8 +203,11 @@ def audiodata2array(audio_data):
 
     def execute(self, audio, language=None):
         lang = language or self.lang
-        segments, _ = self.engine.transcribe(self.audiodata2array(audio), beam_size=self.beam_size,
-                                             condition_on_previous_text=False, language=lang.split("-")[0].lower())
+        segments, _ = self.engine.transcribe(self.audiodata2array(audio),
+                                             beam_size=self.beam_size,
+                                             vad_filter=self.vad_filter,
+                                             condition_on_previous_text=False,
+                                             language=lang.split("-")[0].lower())
         # segments is an iterator, transcription only happens here
         transcription = "".join(segment.text for segment in segments).strip()
         return transcription

diff --git a/ovos_stt_plugin_fasterwhisper/transcribe.py b/ovos_stt_plugin_fasterwhisper/transcribe.py
@@ -0,0 +1,63 @@
+import click
+import os
+from speech_recognition import Recognizer, AudioFile
+
+from ovos_stt_plugin_fasterwhisper import FasterWhisperSTT
+
+
+@click.command()
+@click.option("--path")
+@click.option("--lang", default="en-us")
+@click.option("--model", default="base")
+@click.option("--format", default="wav")
+@click.option("--beam", default=5)
+@click.option("--cuda", default=False)
+@click.option("--compute", default="int8")
+@click.option("--vad", default=False)
+def transcribe(path: str, lang: str, model: str, format: str, beam: int, cuda: bool, compute: str, vad: bool):
+    config = {
+        "lang": lang,
+        "model": model,
+        "beam_size": beam,
+        "use_cuda": cuda,
+        "compute_type": compute,
+        "vad_filter": vad
+    }
+
+    b = FasterWhisperSTT(config=config)
+
+    if os.path.isfile(path):
+        try:
+            with AudioFile(path) as source:
+                try:
+                    audio = Recognizer().record(source)
+                    t = b.execute(audio, language="en")
+                    print(t)
+                    with open(path.replace(f'.{format}', '.txt'), "w") as f:
+                        f.write(t)
+                except:
+                    print("failed to transcribe file")
+        except:
+            print("failed to open file", f)
+    elif os.path.isdir(path):
+        for root, folder, files in os.walk(path):
+            for f in files:
+                if f.endswith(".wav") and not os.path.isfile(f"{root}/{f.replace(f'.{format}', '.txt')}"):
+                    print(root, f)
+                    try:
+                        with AudioFile(f"{root}/{f}") as source:
+                            try:
+                                audio = Recognizer().record(source)
+                                t = b.execute(audio, language="en")
+                                print(t)
+                                with open(f"{root}/{f.replace(f'.{format}', '.txt')}", "w") as f:
+                                    f.write(t)
+                            except:
+                                print("failed to transcribe file")
+                    except:
+                        print("failed to open file", f)
+                        continue
+
+
+if __name__ == "__main__":
+    transcribe()
diff --git a/setup.py b/setup.py
@@ -85,5 +85,9 @@ def required(requirements_file):
     keywords='mycroft ovos plugin stt',
     entry_points={'mycroft.plugin.stt': PLUGIN_ENTRY_POINT,
                   'mycroft.plugin.stt.config': CONFIG_ENTRY_POINT,
-                  'neon.plugin.audio': LANG_PLUGIN_ENTRY_POINT}
+                  'neon.plugin.audio': LANG_PLUGIN_ENTRY_POINT,
+                  'console_scripts': [
+                      'fw-transcribe=ovos_stt_plugin_fasterwhisper.transcribe:transcribe'
+                  ]
+                  }
 )