Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add headers for multiple language identification #99

Merged
merged 14 commits into from
Feb 17, 2025
Merged
2 changes: 2 additions & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
coverage:
ignore:
- "tests/*"
status:
patch:
default:
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- name: Checkout Repository
Expand All @@ -29,12 +29,11 @@ jobs:
run: |
tox -e py
- name: Upload coverage
if: matrix.python-version == 3.7
uses: codecov/codecov-action@v1
if: matrix.python-version == 3.11
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
files: ./coverage.xml
flags: unittests
name: codecov-umbrella
yml: ./codecov.yml
fail_ci_if_error: true
6 changes: 3 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ on:

jobs:
lint:
runs-on: ubuntu-20.04
runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
- name: Set up Python 3.11
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: 3.11
- name: Run pre-commit
uses: pre-commit/[email protected]
6 changes: 3 additions & 3 deletions .github/workflows/typecheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ jobs:
steps:
- name: Checkout Repository
uses: actions/checkout@v1
- name: Set Up Python 3.7
- name: Set Up Python 3.11
uses: actions/setup-python@v1
with:
python-version: 3.7
python-version: 3.11
- name: Install tox
run: |
python3.7 -m pip install tox
python3.11 -m pip install tox
- name: Run type checks
run: |
tox -e type
42 changes: 39 additions & 3 deletions amazon_transcribe/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import re
from binascii import unhexlify
from typing import Optional
from typing import Optional, List

from amazon_transcribe import AWSCRTEventLoop
from amazon_transcribe.auth import AwsCrtCredentialResolver, CredentialResolver
Expand Down Expand Up @@ -75,15 +75,21 @@ async def start_stream_transcription(
media_sample_rate_hz: int,
media_encoding: str,
vocabulary_name: Optional[str] = None,
vocabulary_names: Optional[List[str]] = None,
session_id: Optional[str] = None,
vocab_filter_method: Optional[str] = None,
vocab_filter_name: Optional[str] = None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add VocabularyFilterNames

vocab_filter_names: Optional[List[str]] = None,
show_speaker_label: Optional[bool] = None,
enable_channel_identification: Optional[bool] = None,
number_of_channels: Optional[int] = None,
enable_partial_results_stabilization: Optional[bool] = None,
partial_results_stability: Optional[str] = None,
language_model_name: Optional[str] = None,
identify_language: Optional[bool] = None,
preferred_language: Optional[str] = None,
identify_multiple_languages: Optional[bool] = None,
language_options: Optional[List[str]] = None,
) -> StartStreamTranscriptionEventStream:
"""Coordinate transcription settings and start stream.

Expand All @@ -100,14 +106,18 @@ async def start_stream_transcription(
than 5 minutes.

:param language_code:
Indicates the source language used in the input audio stream.
Indicates the source language used in the input audio stream. Set to
None if identify_languages or identify_multiple_languages is set to True
:param media_sample_rate_hz:
The sample rate, in Hertz, of the input audio. We suggest that you
use 8000 Hz for low quality audio and 16000 Hz for high quality audio.
:param media_encoding:
The encoding used for the input audio.
:param vocabulary_name:
The name of the vocabulary to use when processing the transcription job.
:param vocabulary_names:
When using language identification, the name of the vocabulary to
use for each language option.
:param session_id:
A identifier for the transcription session. Use this parameter when you
want to retry a session. If you don't provide a session ID,
Expand All @@ -118,7 +128,11 @@ async def start_stream_transcription(
:param vocab_filter_name:
The name of the vocabulary filter you've created that is unique to
your AWS account. Provide the name in this field to successfully
use it in a stream.
use it in a stream. Use only when identify_languages and
identify_multiple_languages are set to None
:param vocab_filter_names:
The name of the vocabulary filters to use for each language option. To be
used in conjunction with identify_languages and identify_multiple_languages
:param show_speaker_label:
When true, enables speaker identification in your real-time stream.
:param enable_channel_identification:
Expand All @@ -144,21 +158,43 @@ async def start_stream_transcription(
overall transcription accuracy. Defaults to "high" if not set explicitly.
:param language_model_name:
The name of the language model you want to use.
:param identify_language:
if True, the language of the stream will be automatically detected. Set
language_code to None and provide at least two language_options when
identify_language is True.
:param preferred_language:
Adding a preferred language can speed up the language identification
process, which is helpful for short audio clips.
:param identify_multiple_languages:
If true, all languages spoken in the stream are identified. A multilingual
transcripts is created your transcript using each identified language.
You must also provide at least two language_options and set
language_code to None
:param language_options:
A list of possible language to use when identify_language or
identify_multiple_languages is set to True. Note that not all languages
supported by Transcribe are supported for multiple language identification
"""
transcribe_streaming_request = StartStreamTranscriptionRequest(
language_code,
media_sample_rate_hz,
media_encoding,
vocabulary_name,
vocabulary_names,
session_id,
vocab_filter_method,
vocab_filter_name,
vocab_filter_names,
show_speaker_label,
enable_channel_identification,
number_of_channels,
enable_partial_results_stabilization,
partial_results_stability,
language_model_name,
identify_language,
preferred_language,
identify_multiple_languages,
language_options,
)
endpoint = await self._endpoint_resolver.resolve(self.region)

Expand Down
1 change: 1 addition & 0 deletions amazon_transcribe/deserialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def _parse_result(self, current_node: Any) -> Result:
is_partial=current_node.get("IsPartial"),
alternatives=alternatives,
channel_id=current_node.get("ChannelId"),
language_code=current_node.get("LanguageCode"),
)

def _parse_alternative_list(self, current_node: Any) -> List[Alternative]:
Expand Down
28 changes: 27 additions & 1 deletion amazon_transcribe/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,23 @@ def __init__(
is_partial: Optional[bool] = None,
alternatives: Optional[List[Alternative]] = None,
channel_id: Optional[str] = None,
language_code: Optional[str] = None,
):
self.result_id = result_id
self.start_time = start_time
self.end_time = end_time
self.is_partial = is_partial
self.alternatives = alternatives
self.channel_id = channel_id
self.language_code = language_code


class StartStreamTranscriptionRequest:
"""Transcription Request

:param language_code:
Indicates the source language used in the input audio stream.
Indicates the source language used in the input audio stream. Set to
None if identify_multiple_languages is set to True

:param media_sample_rate_hz:
The sample rate, in Hertz, of the input audio. We suggest that you
Expand Down Expand Up @@ -226,6 +229,15 @@ class StartStreamTranscriptionRequest:
overall transcription accuracy.
:param language_model_name:
The name of the language model you want to use.
:param identify_multiple_languages:
If true, all languages spoken in the stream are identified. A multilingual
transcripts is created your transcript using each identified language.
You must also provide at least two language_options and set
language_code to None
:param language_options:
A list of possible language to use when identify_multiple_languages is
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should mention identify_language as well
NIT: Space after :

set to True. Note that not all languages supported by Transcribe are
supported for multiple language identification
"""

def __init__(
Expand All @@ -234,24 +246,32 @@ def __init__(
media_sample_rate_hz=None,
media_encoding=None,
vocabulary_name=None,
vocabulary_names=None,
session_id=None,
vocab_filter_method=None,
vocab_filter_name=None,
vocab_filter_names=None,
show_speaker_label=None,
enable_channel_identification=None,
number_of_channels=None,
enable_partial_results_stabilization=None,
partial_results_stability=None,
language_model_name=None,
identify_language=None,
preferred_language=None,
identify_multiple_languages=False,
language_options=None,
):

self.language_code: Optional[str] = language_code
self.media_sample_rate_hz: Optional[int] = media_sample_rate_hz
self.media_encoding: Optional[str] = media_encoding
self.vocabulary_name: Optional[str] = vocabulary_name
self.vocabulary_names: Optional[List[str]] = vocabulary_names
self.session_id: Optional[str] = session_id
self.vocab_filter_method: Optional[str] = vocab_filter_method
self.vocab_filter_name: Optional[str] = vocab_filter_name
self.vocab_filter_names: Optional[List[str]] = vocab_filter_names
self.show_speaker_label: Optional[bool] = show_speaker_label
self.enable_channel_identification: Optional[
bool
Expand All @@ -262,6 +282,10 @@ def __init__(
] = enable_partial_results_stabilization
self.partial_results_stability: Optional[str] = partial_results_stability
self.language_model_name: Optional[str] = language_model_name
self.identify_language: Optional[bool] = identify_language
self.preferred_language: Optional[str] = preferred_language
self.identify_multiple_languages: Optional[bool] = identify_multiple_languages
self.language_options: Optional[List[str]] = language_options or []


class StartStreamTranscriptionResponse:
Expand Down Expand Up @@ -324,6 +348,7 @@ def __init__(
media_sample_rate_hz=None,
media_encoding=None,
vocabulary_name=None,
vocabulary_names=None,
session_id=None,
vocab_filter_name=None,
vocab_filter_method=None,
Expand All @@ -339,6 +364,7 @@ def __init__(
self.media_sample_rate_hz: Optional[int] = media_sample_rate_hz
self.media_encoding: Optional[str] = media_encoding
self.vocabulary_name: Optional[str] = vocabulary_name
self.vocabulary_names: Optional[List[str]] = vocabulary_names
self.session_id: Optional[str] = session_id
self.transcript_result_stream: TranscriptResultStream = transcript_result_stream
self.vocab_filter_name: Optional[str] = vocab_filter_name
Expand Down
51 changes: 50 additions & 1 deletion amazon_transcribe/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# language governing permissions and limitations under the License.


from typing import Any, Dict, Tuple, Optional
from typing import Any, Dict, Tuple, Optional, List

from amazon_transcribe.request import Request
from amazon_transcribe.structures import BufferableByteStream
Expand Down Expand Up @@ -56,6 +56,10 @@ def _serialize_bool_header(
) -> Dict[str, str]:
return self._serialize_header(header, value)

def _serialize_list_header(self, header: str, value: List[str]) -> Dict[str, str]:
languages = ",".join(value)
return self._serialize_str_header(header, languages)

def serialize_start_stream_transcription_request(
self, endpoint: str, request_shape: StartStreamTranscriptionRequest
) -> Request:
Expand All @@ -78,6 +82,14 @@ def serialize_start_stream_transcription_request(
headers.update(
self._serialize_str_header("vocabulary-name", request_shape.vocabulary_name)
)

if request_shape.vocabulary_names:
headers.update(
self._serialize_list_header(
"vocabulary-names",
request_shape.vocabulary_names,
)
)
headers.update(
self._serialize_str_header("session-id", request_shape.session_id)
)
Expand All @@ -93,6 +105,13 @@ def serialize_start_stream_transcription_request(
request_shape.vocab_filter_name,
)
)
if request_shape.vocab_filter_names:
headers.update(
self._serialize_list_header(
"vocabulary-filter-names",
request_shape.vocab_filter_names,
)
)
headers.update(
self._serialize_bool_header(
"show-speaker-label",
Expand Down Expand Up @@ -130,6 +149,36 @@ def serialize_start_stream_transcription_request(
)
)

headers.update(
self._serialize_bool_header(
"identify-language",
request_shape.identify_language,
)
)

headers.update(
self._serialize_str_header(
"preferred-language",
request_shape.preferred_language,
)
)

if request_shape.identify_multiple_languages:
headers.update(
self._serialize_bool_header(
"identify-multiple-languages",
request_shape.identify_multiple_languages,
)
)

if request_shape.language_options:
headers.update(
self._serialize_list_header(
"language-options",
request_shape.language_options,
)
)

_add_required_headers(endpoint, headers)

body = BufferableByteStream()
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@
#html_static_path = ['_static']

intersphinx_mapping = {
'python': ('https://docs.python.org/3.7', None),}
'python': ('https://docs.python.org/3.11', None),}
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def find_version(*file_paths):
include_package_data=True,
install_requires=requires,
extras_require={},
python_requires=">= 3.7",
python_requires=">= 3.8",
license="Apache License 2.0",
classifiers=[
"Development Status :: 2 - Pre-Alpha",
Expand All @@ -46,9 +46,9 @@ def find_version(*file_paths):
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
],
)
Loading