zoglmannk · zoglmannk · Feb 25, 2024 · Feb 25, 2024
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ The software used to generate Morse Code Ninja practice sets as found on
 These must be installed and available in your Shell's PATH.
 * [ebook2cw](https://fkurz.net/ham/ebook2cw.html)
 * [ffmpeg](https://ffmpeg.org)
+* [sox](https://sourceforge.net/projects/sox/)
 * [lame](https://lame.sourceforge.io/)
 * [Perl 5](https://www.perl.org)
 * [Python 3](https://www.python.org)
@@ -64,12 +65,14 @@ Uses AWS Polly and requires valid credentials in the aws.properties file.<br/><b
     --nocourtesytone      exclude the courtesy tone
     --tone                include the courtesy tone
     -e, --engine          name of Polly speech engine to use: NEURAL or STANDARD
-    --sm, --silencemorse  length of silence between Morse code and spoken voice. Default 1 second.
-    --ss, --silencesets   length of silence between courtesy tone and next practice set. Default 1 second.
-    --sv, --silencevoice  length of silence between spoken voice and repeated morse code. Default 1 second.
-    --sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.
-    --st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.
+    -sm, --silencemorse  length of silence between Morse code and spoken voice. Default 1 second.
+    -ss, --silencesets   length of silence between courtesy tone and next practice set. Default 1 second.
+    -sv, --silencevoice  length of silence between spoken voice and repeated morse code. Default 1 second.
+    -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.
+    -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.
     -x, --extraspace      0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc
+    --precise             trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.
+                          ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n";
     -l, --lang            language: ENGLISH or SWEDISH
 
 # General Notes

diff --git a/generators/generate-single-letter-number.pl b/generators/generate-single-letter-number.pl
@@ -11,7 +11,13 @@
 #
 # Mind Melt
 # ./render.pl -i single-letter-number-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2
-
+#
+# Warp -- Be sure to clear cache
+# ./render.pl -i single-letter-number-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise
+#
+# Warp -- Be sure to clear cache
+# ./render.pl -i single-letter-number-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.2 -sv 0.5 --precise
+#
 # Check distribution ./generate-single-letter-number.pl | sort | uniq -c
 
 my $number_of_runs = 5000;

diff --git a/generators/generate-single-letter.pl b/generators/generate-single-letter.pl
@@ -11,6 +11,12 @@
 #
 # Mind Melt
 # ./render.pl -i single-letters-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2
+#
+# Warp -- Be sure to clear cache
+# ./render.pl -i single-letters-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise
+#
+# ICR Territory -- Be sure to clear cache
+# ./render.pl -i single-letters-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.0 -sm 0.2 -sv 0.5 --precise
 
 my $number_of_runs = 5000;
 

diff --git a/render.pl b/render.pl
@@ -38,6 +38,7 @@
   'sv|silencevoice=s' => \(my $silence_between_voice_and_repeat = "1"), # typically 1 second
   'sc|silencecontext=s' => \(my $silence_between_context_and_morse_code = "1"),
   'x|extraspace=s'    => \(my $extra_word_spacing = 0), # 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc
+  'precise'           => \(my $precise = ''), # flag. 1 = precise timing -- useful if using very tight times between morse code and spoken answer
   'l|lang=s'          => \(my $lang = "ENGLISH"), # ENGLISH | GERMAN | SWEDISH
   'p|pitchtone=i'     => \(my $pitch_tone = 700), # tone in Hz for pitch
   'pr|pitchrandom'    => \(my $pitch_tone_random = '0'), # flag. 0 == false, random pitch tone
@@ -55,6 +56,14 @@
   print_usage();
 }
 
+# There is some overhead in the concatentation process, so we'll subtract it out
+if($precise) {
+  $silence_between_morse_code_and_spoken_voice -= "0.11";
+  if($silence_between_morse_code_and_spoken_voice <= 0) {
+    $silence_between_morse_code_and_spoken_voice = "0.03";
+  }
+}
+
 my $speed_racing_multiplier = 1.5;
 my $speed_racing_iterations = 3;
 
@@ -435,7 +444,11 @@ sub split_on_spoken_directive {
         while ($exit_code != 0 && (!$no_spoken || $filename_map_key =~ m/context/)) {
           my $textFile = File::Spec->rel2abs("$filename_base-${counter}");
 
-          my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory";
+          my $trim_silence = 0;
+          if($precise) {
+            $trim_silence = 1;
+          }
+          my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory $trim_silence";
           print "execute $cmd\n";
 
           my $output = `$cmd`;
@@ -569,7 +582,14 @@ sub split_on_spoken_directive {
               $pitch_tone = $random_tones[ rand @random_tones ];
             }
 
-            my $ebookCmdBase = "ebook2cw $lang_option -R $rise_and_fall_time -F $rise_and_fall_time " .
+            my $ebookCmdBase = "";
+            if($precise) {
+              $ebookCmdBase = "./ebook2cw-trim.bash ";
+            } else {
+              $ebookCmdBase = "ebook2cw ";
+            }
+
+            $ebookCmdBase = $ebookCmdBase . "$lang_option -R $rise_and_fall_time -F $rise_and_fall_time " .
                 "$extra_word_spacing_option -f $pitch_tone -w $speed -s 44100 ";
             if ($farnsworth != 0) {
               $ebookCmdBase = $ebookCmdBase . "-e $farnsworth ";
@@ -750,7 +770,11 @@ sub split_on_spoken_directive {
         rename("$output_directory/sentence.txt ", '$filename_base-$counter-full.txt');
         my $exit_code = -1;
         while($exit_code != 0 && $no_spoken != 0) {
-          my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory";
+          my $trim_silence = 0;
+          if($precise) {
+            $trim_silence = 1;
+          }
+          my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory $trim_silence";
           my $output = `$cmd`;
           $output =~ m/^Cached filename:(.*)\n/;
           my $full_voiced_filename = $1;
@@ -1010,7 +1034,9 @@ sub split_on_spoken_directive {
     else {
         $speed = $_;
     }
-    unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3",  "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3";
+    unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3",
+           "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3", "$output_directory/sentence-${speed}.txt",
+           "$output_directory/sentence-${speed}-orig0000.mp3";
   }
   unlink "$filename_base-structure.txt", "$filename_base-sentences.txt";
   unlink glob("$output_directory/silence*.mp3");
@@ -1052,12 +1078,14 @@ sub print_usage {
   print "    --nospoken           exclude spoken\n";
   print "    --nocourtesytone     exclude the courtesy tone\n";
   print "    -e, --engine         name of Polly speech engine to use: NEURAL or STANDARD\n";
-  print "    --sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n";
-  print "    --ss, --silencesets  length of silence between courtesy tone and next practice set. Default 1 second.\n";
-  print "    --sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n";
-  print "    --sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n";
-  print "    --st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.\n";
+  print "    -sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n";
+  print "    -ss, --silencesets  length of silence between courtesy tone and next practice set. Default 1 second.\n";
+  print "    -sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n";
+  print "    -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n";
+  print "    -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.\n";
   print "    -x, --extraspace     0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc\n";
+  print "    --precise            trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.\n";
+  print "                         ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n";
   print "    -l, --lang           language: ENGLISH, GERMAN, or SWEDISH\n\n";
   die "";
 }
diff --git a/text2speech.py b/text2speech.py
@@ -6,14 +6,14 @@
 import hashlib
 import os.path
 from os import environ
-import shutil
 import subprocess
 
 sentence_filename = sys.argv[1]
 engine_type = sys.argv[2].lower()  # needs to be: standard | neural
 language = sys.argv[3]
 #cache_directory = 'cache/'
 cache_directory = sys.argv[4]
+trim_silence = sys.argv[5]
 
 # ERROR return codes (coordinate with render.pl for intelligent error handling)
 ioError = 2
@@ -57,7 +57,8 @@
 
 hex_digest = hashlib.sha256(sentence.encode('utf-8')).hexdigest()
 base_filename = engine_type + '-' + hex_digest + ".mp3"
-temp_filename = cache_directory + engine_type + "-" + hex_digest + "-temp.mp3"
+temp_resample_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-resample.mp3"
+temp_sox_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-sox.mp3"
 cache_filename = cache_directory + hex_digest + ".mp3"
 
 def render(cache_filename, voice_id, text_type, text):
@@ -71,17 +72,29 @@ def render(cache_filename, voice_id, text_type, text):
             response = polly_client.synthesize_speech(Engine=engine_type, VoiceId=voice_id, OutputFormat='mp3',
                                                       TextType=text_type, Text=text)
 
-        file = open(temp_filename, 'wb')
-        file.write(response['AudioStream'].read())
-        file.close()
+        if trim_silence == 1:
+            file = open(temp_sox_filename, 'wb')
+            file.write(response['AudioStream'].read())
+            file.close()
+
+            result = subprocess.run(['sox', temp_sox_filename, temp_resample_filename, 'silence', '1', '0.001', '1%'],
+                                    stdout=subprocess.PIPE,
+                                    universal_newlines=True)
+            os.remove(temp_sox_filename)
+        else:
+            file = open(temp_resample_filename, 'wb')
+            file.write(response['AudioStream'].read())
+            file.close()
+
 
         subprocess.run(['lame', '--resample', '44.1', '-a', '-b', '256',
-                        temp_filename,
+                        temp_resample_filename,
                         cache_filename],
                         stdout=subprocess.PIPE,
                         universal_newlines=True)
 
-        os.remove(temp_filename)
+        os.remove(temp_resample_filename)
+
 
     print("Cached filename:" + cache_filename)
 
@@ -90,7 +103,7 @@ def render(cache_filename, voice_id, text_type, text):
 #          render.pl
 if language == "ENGLISH":
     # short individual words are easier to understand spoken more slowly
-    if re.match(r"<speak>.*?</speak>", sentence):
+    if re.match(r"\s*<speak>.*?</speak>\s*", sentence):
         print("Pronouncing exactly as specified")
         ssml = sentence
         cache_filename = cache_directory + "Mathew-exact-" + base_filename