-
Notifications
You must be signed in to change notification settings - Fork 0
/
gcs.py
57 lines (44 loc) · 1.78 KB
/
gcs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from google.cloud import speech
from google.cloud import storage
from google.cloud import storage
def upload_file(bucket, filename, destination, project):
storage_client = storage.Client(project=project)
bucket = storage_client.bucket(bucket)
blob = bucket.blob(destination)
blob.upload_from_filename(filename)
print(
f"File {filename} uploaded to {destination}."
)
def transcribe_speech(uri):
# Instantiates a client
client = speech.SpeechClient()
audio = speech.RecognitionAudio(uri=uri)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
language_code="en-US",
enable_word_time_offsets=True,
)
# Detects speech in the audio file
#response = client.recognize(config=config, audio=audio)
operation = client.long_running_recognize(config=config, audio=audio)
result = operation.result(timeout=90)
return annotation_info(result)
#for result in response.results:
#print("Transcript: {}".format(result.alternatives[0].transcript))
def annotation_info(transcription):
print(transcription)
split_labels = []
for result in transcription.results:
alternative = result.alternatives[0]
print("Transcript: {}".format(alternative.transcript))
print("Confidence: {}".format(alternative.confidence))
for word_info in alternative.words:
start = int(float(word_info.start_time.total_seconds()) * 1000)
end = int(float(word_info.end_time.total_seconds()) * 1000)
content = word_info.word
split_labels.append(dict( \
[('start', start), \
('end', end),
("token", content)]))
return split_labels