diff --git a/.gitignore b/.gitignore
index f88488b..ad598f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
# ignore generated output files
-output/
\ No newline at end of file
+output/
+.idea/
+.idea/misc.xml
+.DS_Store
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 26d3352..0000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
deleted file mode 100644
index 105ce2d..0000000
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/karpik-poc-py.iml b/.idea/karpik-poc-py.iml
deleted file mode 100644
index 74d515a..0000000
--- a/.idea/karpik-poc-py.iml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 8cdbed9..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index ad7b78b..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/generate-audio.py b/generate-audio.py
index 1dfd201..2075c14 100755
--- a/generate-audio.py
+++ b/generate-audio.py
@@ -33,7 +33,7 @@ def synthesize(text, config):
response = polly_client.synthesize_speech(
Engine='neural', # standard|neural - neural nie obsługuje max-duration
- VoiceId= config.voice,
+ VoiceId=config.voice,
LanguageCode='en-US',
OutputFormat='mp3',
TextType='ssml', # or text
@@ -52,6 +52,12 @@ def caption_start(caption):
return seconds
+def caption_end(caption):
+ nums = [float(n) for n in caption.end.split(':')]
+ seconds = nums[0] * 3600 + nums[1] * 60 + nums[2]
+ return seconds
+
+
def load_captions(config):
if config.captions_format == 'vtt':
return webvtt.read(f'input/{config.captions_file_name}')
@@ -61,6 +67,33 @@ def load_captions(config):
raise Exception('Unsupported subtitles format')
+# TODO figure out better way of defining break length
+def define_break(diff_length, num_of_pauses):
+ length_of_pause = diff_length / num_of_pauses
+ if diff_length / num_of_pauses > 2:
+ return 1000
+ elif 1 < length_of_pause < 2:
+ return 800
+ else:
+ return 500
+
+
+def extend_sentence_audio(sentence_audio, caption):
+ audio_duration = sentence_audio.duration_seconds
+ caption_start_time = caption_start(caption)
+ caption_end_time = caption_end(caption)
+ diff = ((caption_end_time - caption_start_time) - audio_duration).__round__(3)
+ result = ''
+ split_caption = caption.text.split(',')
+ if len(split_caption) == 1:
+ return sentence_audio
+ for idx, cpt in enumerate(split_caption):
+ result = result + cpt
+ if idx != len(split_caption):
+ result = result + ''.format(define_break(diff, len(split_caption) - 1))
+ return synthesize(result, config)
+
+
if __name__ == '__main__':
config = InlineClass({
'captions_file_name': 'udemy_sample_01.vtt',
@@ -80,6 +113,8 @@ def load_captions(config):
print(f'Processing {caption}')
sentence_audio = synthesize(caption.text, config)
+ sentence_audio = extend_sentence_audio(sentence_audio, caption)
+
start = caption_start(caption)
if audio.duration_seconds < start:
break_length = (start - audio.duration_seconds) * 1000
@@ -92,4 +127,4 @@ def load_captions(config):
new_audio = mpe.AudioFileClip(f'output/{config.audio_file_name}')
# new_audio = mpe.CompositeAudioClip([input_clip.audio, new_audio])
final_clip = input_clip.set_audio(new_audio)
- final_clip.write_videofile(f'output/{config.movie_file_name}')
+ final_clip.write_videofile(f'output/output_{config.movie_file_name}')