-
Notifications
You must be signed in to change notification settings - Fork 0
/
wav2text.py
59 lines (47 loc) · 1.43 KB
/
wav2text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import speech_recognition as sr
import sys
import os
def audio2text(audio):
"""
get audio as text
EXIF image description is ascii based, we have to replace umlauts and convert to ascii
"""
r = sr.Recognizer()
with sr.AudioFile(audio) as source:
audio = r.listen(source)
text = r.recognize_google(audio, language="de_DE.utf8")
print(f'fetched {file}: {text}')
return text
def updateExif(file,text):
os.system(f'exiftool -ImageDescription="{text}" {file}')
if len(sys.argv) == 1:
cmd = sys.argv[0]
print(f'usage {cmd} dir|file')
else:
file = sys.argv[1]
audios = []
if os.path.isdir(file):
if not file.endswith('/'):
file += '/'
audios.extend( [ file + f for f in os.listdir(file) if f.endswith('.WAV')])
else:
audios.append(file)
"""
loop over all WAV files
"""
for audio in audios:
if not os.path.exists(audio):
print(f'file not found: {audio}')
continue
# find all related images (JPG und RAF)
image_files = []
for ext in ('.RAF','.JPG'):
file = os.path.splitext(audio)[0]+ext
if os.path.exists(file):
image_files.append(file)
if not image_files:
print(f'.. no picture found for audio')
continue
text = audio2text(audio)
for file in image_files:
updateExif(file,text)