Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Precise timing #52

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The software used to generate Morse Code Ninja practice sets as found on
These must be installed and available in your Shell's PATH.
* [ebook2cw](https://fkurz.net/ham/ebook2cw.html)
* [ffmpeg](https://ffmpeg.org)
* [sox](https://sourceforge.net/projects/sox/)
* [lame](https://lame.sourceforge.io/)
* [Perl 5](https://www.perl.org)
* [Python 3](https://www.python.org)
Expand Down Expand Up @@ -64,12 +65,14 @@ Uses AWS Polly and requires valid credentials in the aws.properties file.<br/><b
--nocourtesytone exclude the courtesy tone
--tone include the courtesy tone
-e, --engine name of Polly speech engine to use: NEURAL or STANDARD
--sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.
--ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.
--sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.
--sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.
--st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.
-sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.
-ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.
-sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.
-sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.
-st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.
-x, --extraspace 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc
--precise trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.
****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n";
-l, --lang language: ENGLISH or SWEDISH

# General Notes
Expand Down
8 changes: 7 additions & 1 deletion generators/generate-single-letter-number.pl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@
#
# Mind Melt
# ./render.pl -i single-letter-number-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2

#
# Warp -- Be sure to clear cache
# ./render.pl -i single-letter-number-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise
#
# Warp -- Be sure to clear cache
# ./render.pl -i single-letter-number-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.2 -sv 0.5 --precise
#
# Check distribution ./generate-single-letter-number.pl | sort | uniq -c

my $number_of_runs = 5000;
Expand Down
6 changes: 6 additions & 0 deletions generators/generate-single-letter.pl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
#
# Mind Melt
# ./render.pl -i single-letters-mind-melt.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.2 -sm 0.2 -sv 0.2
#
# Warp -- Be sure to clear cache
# ./render.pl -i single-letters-warp.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.5 -sm 0.5 -sv 0.5 --precise
#
# ICR Territory -- Be sure to clear cache
# ./render.pl -i single-letters-icr-territory.txt -s 15 17 20 22 25 28 30 35 40 45 50 --norepeat --nocourtesytone -ss 0.0 -sm 0.2 -sv 0.5 --precise

my $number_of_runs = 5000;

Expand Down
46 changes: 37 additions & 9 deletions render.pl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
'sv|silencevoice=s' => \(my $silence_between_voice_and_repeat = "1"), # typically 1 second
'sc|silencecontext=s' => \(my $silence_between_context_and_morse_code = "1"),
'x|extraspace=s' => \(my $extra_word_spacing = 0), # 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc
'precise' => \(my $precise = ''), # flag. 1 = precise timing -- useful if using very tight times between morse code and spoken answer
'l|lang=s' => \(my $lang = "ENGLISH"), # ENGLISH | GERMAN | SWEDISH
'p|pitchtone=i' => \(my $pitch_tone = 700), # tone in Hz for pitch
'pr|pitchrandom' => \(my $pitch_tone_random = '0'), # flag. 0 == false, random pitch tone
Expand All @@ -55,6 +56,14 @@
print_usage();
}

# There is some overhead in the concatentation process, so we'll subtract it out
if($precise) {
$silence_between_morse_code_and_spoken_voice -= "0.11";
if($silence_between_morse_code_and_spoken_voice <= 0) {
$silence_between_morse_code_and_spoken_voice = "0.03";
}
}

my $speed_racing_multiplier = 1.5;
my $speed_racing_iterations = 3;

Expand Down Expand Up @@ -435,7 +444,11 @@ sub split_on_spoken_directive {
while ($exit_code != 0 && (!$no_spoken || $filename_map_key =~ m/context/)) {
my $textFile = File::Spec->rel2abs("$filename_base-${counter}");

my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory";
my $trim_silence = 0;
if($precise) {
$trim_silence = 1;
}
my $cmd = "./text2speech.py \"$textFile\" $text_to_speech_engine $lang $cache_directory $trim_silence";
print "execute $cmd\n";

my $output = `$cmd`;
Expand Down Expand Up @@ -569,7 +582,14 @@ sub split_on_spoken_directive {
$pitch_tone = $random_tones[ rand @random_tones ];
}

my $ebookCmdBase = "ebook2cw $lang_option -R $rise_and_fall_time -F $rise_and_fall_time " .
my $ebookCmdBase = "";
if($precise) {
$ebookCmdBase = "./ebook2cw-trim.bash ";
} else {
$ebookCmdBase = "ebook2cw ";
}

$ebookCmdBase = $ebookCmdBase . "$lang_option -R $rise_and_fall_time -F $rise_and_fall_time " .
"$extra_word_spacing_option -f $pitch_tone -w $speed -s 44100 ";
if ($farnsworth != 0) {
$ebookCmdBase = $ebookCmdBase . "-e $farnsworth ";
Expand Down Expand Up @@ -750,7 +770,11 @@ sub split_on_spoken_directive {
rename("$output_directory/sentence.txt ", '$filename_base-$counter-full.txt');
my $exit_code = -1;
while($exit_code != 0 && $no_spoken != 0) {
my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory";
my $trim_silence = 0;
if($precise) {
$trim_silence = 1;
}
my $cmd = './text2speech.py '."$filename_base-${counter}-full $text_to_speech_engine $lang $cache_directory $trim_silence";
my $output = `$cmd`;
$output =~ m/^Cached filename:(.*)\n/;
my $full_voiced_filename = $1;
Expand Down Expand Up @@ -1010,7 +1034,9 @@ sub split_on_spoken_directive {
else {
$speed = $_;
}
unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3", "$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3";
unlink "$output_directory/sentence-${speed}0000.mp3", "$output_directory/sentence-repeat-${speed}0000.mp3",
"$filename_base-list-${speed}wpm.txt", "$output_directory/silence.mp3", "$output_directory/sentence-${speed}.txt",
"$output_directory/sentence-${speed}-orig0000.mp3";
}
unlink "$filename_base-structure.txt", "$filename_base-sentences.txt";
unlink glob("$output_directory/silence*.mp3");
Expand Down Expand Up @@ -1052,12 +1078,14 @@ sub print_usage {
print " --nospoken exclude spoken\n";
print " --nocourtesytone exclude the courtesy tone\n";
print " -e, --engine name of Polly speech engine to use: NEURAL or STANDARD\n";
print " --sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n";
print " --ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.\n";
print " --sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n";
print " --sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n";
print " --st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.\n";
print " -sm, --silencemorse length of silence between Morse code and spoken voice. Default 1 second.\n";
print " -ss, --silencesets length of silence between courtesy tone and next practice set. Default 1 second.\n";
print " -sv, --silencevoice length of silence between spoken voice and repeated morse code. Default 1 second.\n";
print " -sc, --silencecontext length of silence between spoken context and morse code. Default 1 second.\n";
print " -st, --silencemanualcourtesytone length of silence between Morse code and manually specified courtesy tone <courtesyTone>. Default 1 second.\n";
print " -x, --extraspace 0 is no extra spacing. 0.5 is half word extra spacing. 1 is twice the word space. 1.5 is 2.5x the word space. etc\n";
print " --precise trim AWS Polly and ebook2cw audio -- useful when specifying very short time with -sm, --silencemorse length of silence between Morse code and spoken voice.\n";
print " ****Be sure*** to clear the cache directory if you are switching between precise and non-precise timing.\n";
print " -l, --lang language: ENGLISH, GERMAN, or SWEDISH\n\n";
die "";
}
29 changes: 21 additions & 8 deletions text2speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
import hashlib
import os.path
from os import environ
import shutil
import subprocess

sentence_filename = sys.argv[1]
engine_type = sys.argv[2].lower() # needs to be: standard | neural
language = sys.argv[3]
#cache_directory = 'cache/'
cache_directory = sys.argv[4]
trim_silence = sys.argv[5]

# ERROR return codes (coordinate with render.pl for intelligent error handling)
ioError = 2
Expand Down Expand Up @@ -57,7 +57,8 @@

hex_digest = hashlib.sha256(sentence.encode('utf-8')).hexdigest()
base_filename = engine_type + '-' + hex_digest + ".mp3"
temp_filename = cache_directory + engine_type + "-" + hex_digest + "-temp.mp3"
temp_resample_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-resample.mp3"
temp_sox_filename = cache_directory + engine_type + "-" + hex_digest + "-temp-sox.mp3"
cache_filename = cache_directory + hex_digest + ".mp3"

def render(cache_filename, voice_id, text_type, text):
Expand All @@ -71,17 +72,29 @@ def render(cache_filename, voice_id, text_type, text):
response = polly_client.synthesize_speech(Engine=engine_type, VoiceId=voice_id, OutputFormat='mp3',
TextType=text_type, Text=text)

file = open(temp_filename, 'wb')
file.write(response['AudioStream'].read())
file.close()
if trim_silence == 1:
file = open(temp_sox_filename, 'wb')
file.write(response['AudioStream'].read())
file.close()

result = subprocess.run(['sox', temp_sox_filename, temp_resample_filename, 'silence', '1', '0.001', '1%'],
stdout=subprocess.PIPE,
universal_newlines=True)
os.remove(temp_sox_filename)
else:
file = open(temp_resample_filename, 'wb')
file.write(response['AudioStream'].read())
file.close()


subprocess.run(['lame', '--resample', '44.1', '-a', '-b', '256',
temp_filename,
temp_resample_filename,
cache_filename],
stdout=subprocess.PIPE,
universal_newlines=True)

os.remove(temp_filename)
os.remove(temp_resample_filename)


print("Cached filename:" + cache_filename)

Expand All @@ -90,7 +103,7 @@ def render(cache_filename, voice_id, text_type, text):
# render.pl
if language == "ENGLISH":
# short individual words are easier to understand spoken more slowly
if re.match(r"<speak>.*?</speak>", sentence):
if re.match(r"\s*<speak>.*?</speak>\s*", sentence):
print("Pronouncing exactly as specified")
ssml = sentence
cache_filename = cache_directory + "Mathew-exact-" + base_filename
Expand Down