Skip to content

Commit

Permalink
feat: models from upstream plugin (#9)
Browse files Browse the repository at this point in the history
* feat: models from upstream plugin

Includes better error handling and logging around model choice

* Update requirements.txt

* revert requirements change

* fix units tests

FasterWhisper doesn't support Python 3.7

* allow no tests 🥲

* ok fine, add tests

* exclude tqdm from license tests
mikejgray authored Sep 13, 2024
1 parent cebac27 commit 2a32cc7
Showing 6 changed files with 270 additions and 55 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/license_tests.yml
Original file line number Diff line number Diff line change
@@ -12,9 +12,9 @@ jobs:
license_tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v1
uses: actions/setup-python@v5
with:
python-version: 3.8
- name: Install Build Tools
@@ -32,13 +32,13 @@ jobs:
pip freeze > requirements-all.txt
- name: Check python
id: license_check_report
uses: pilosus/action-pip-license-checker@v0.5.0
uses: pilosus/action-pip-license-checker@v2
with:
requirements: 'requirements-all.txt'
fail: 'Copyleft,Other,Error'
requirements: "requirements-all.txt"
fail: "Copyleft,Other,Error"
fails-only: true
exclude: '^(ovos-stt-plugin-fasterwhisper).*'
exclude-license: '^(Mozilla).*$'
exclude: "^(ovos-stt-plugin-fasterwhisper|tqdm).*"
exclude-license: "^(Mozilla).*$"
- name: Print report
if: ${{ always() }}
run: echo "${{ steps.license_check_report.outputs.report }}"
run: echo "${{ steps.license_check_report.outputs.report }}"
50 changes: 25 additions & 25 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
@@ -4,43 +4,43 @@ on:
branches:
- dev
paths-ignore:
- 'ovos_stt_plugin_fasterwhisper/version.py'
- 'requirements/**'
- 'examples/**'
- '.github/**'
- '.gitignore'
- 'LICENSE'
- 'CHANGELOG.md'
- 'MANIFEST.in'
- 'README.md'
- 'scripts/**'
- "ovos_stt_plugin_fasterwhisper/version.py"
- "requirements/**"
- "examples/**"
- ".github/**"
- ".gitignore"
- "LICENSE"
- "CHANGELOG.md"
- "MANIFEST.in"
- "README.md"
- "scripts/**"
push:
branches:
- master
paths-ignore:
- 'ovos_stt_plugin_fasterwhisper/version.py'
- 'requirements/**'
- 'examples/**'
- '.github/**'
- '.gitignore'
- 'LICENSE'
- 'CHANGELOG.md'
- 'MANIFEST.in'
- 'README.md'
- 'scripts/**'
- "ovos_stt_plugin_fasterwhisper/version.py"
- "requirements/**"
- "examples/**"
- ".github/**"
- ".gitignore"
- "LICENSE"
- "CHANGELOG.md"
- "MANIFEST.in"
- "README.md"
- "scripts/**"
workflow_dispatch:

jobs:
unit_tests:
strategy:
max-parallel: 2
matrix:
python-version: [ 3.7, 3.8, 3.9, "3.10" ]
python-version: [3.8, 3.9, "3.10", "3.11"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install System Dependencies
@@ -56,11 +56,11 @@ jobs:
pip install pytest pytest-timeout pytest-cov
- name: Run unittests
run: |
pytest --cov=ovos_stt_plugin_fasterwhisper --cov-report xml test/unittests
pytest --cov=ovos_stt_plugin_fasterwhisper --cov-report xml test
# NOTE: additional pytest invocations should also add the --cov-append flag
# or they will overwrite previous invocations' coverage reports
# (for an example, see OVOS Skill Manager's workflow)
- name: Upload coverage
env:
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v4
163 changes: 163 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -18,3 +18,166 @@ dist

# Created by unit tests
.pytest_cache/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
36 changes: 14 additions & 22 deletions ovos_stt_plugin_fasterwhisper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import numpy as np
from faster_whisper import WhisperModel, decode_audio
from faster_whisper import WhisperModel, decode_audio, available_models
from ovos_plugin_manager.templates.stt import STT
from ovos_plugin_manager.templates.transformers import AudioLanguageDetector
from speech_recognition import AudioData

from ovos_utils.log import LOG

class FasterWhisperLangClassifier(AudioLanguageDetector):
def __init__(self, config=None):
config = config or {}
super().__init__("ovos-audio-transformer-plugin-fasterwhisper", 10, config)
model = self.config.get("model")
if not model:
valid_model = model in FasterWhisperSTT.MODELS
if not model or not valid_model:
LOG.warning(f"{model} is not a valid model ({FasterWhisperSTT.MODELS}), using 'small' instead")
model = "small"

assert model in FasterWhisperSTT.MODELS # TODO - better error handling
self.config["model"] = "small"

self.compute_type = self.config.get("compute_type", "int8")
self.use_cuda = self.config.get("use_cuda", False)
@@ -67,19 +68,7 @@ def detect(self, audio_data: bytes, valid_langs=None):


class FasterWhisperSTT(STT):
MODELS = (
"tiny.en",
"tiny",
"base.en",
"base",
"small.en",
"small",
"medium.en",
"medium",
"large",
"large-v2",
"large-v3",
)
MODELS = available_models()
LANGUAGES = {
"en": "english",
"zh": "chinese",
@@ -185,9 +174,11 @@ class FasterWhisperSTT(STT):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
model = self.config.get("model")
if not model:
valid_model = model in FasterWhisperSTT.MODELS
if not model or not valid_model:
LOG.warning(f"{model} is not a valid model ({FasterWhisperSTT.MODELS}), using 'small' instead")
model = "small"
assert model in self.MODELS # TODO - better error handling
self.config["model"] = "small"

self.beam_size = self.config.get("beam_size", 5)
self.compute_type = self.config.get("compute_type", "int8")
@@ -234,7 +225,7 @@ def available_languages(self) -> set:
"lang": lang,
"meta": {
"priority": 50,
"display_name": f"FasterWhisper (Tiny)",
"display_name": "FasterWhisper (Tiny)",
"offline": True,
},
},
@@ -262,6 +253,7 @@ def available_languages(self) -> set:

if __name__ == "__main__":
b = FasterWhisperSTT()

from speech_recognition import Recognizer, AudioFile

jfk = "/home/miro/PycharmProjects/ovos-stt-plugin-fasterwhisper/jfk.wav"
@@ -275,4 +267,4 @@ def available_languages(self) -> set:

l = FasterWhisperLangClassifier()
lang, prob = l.detect(audio.get_wav_data())
print(lang, prob)
print(lang, prob)
Empty file added test/__init__.py
Empty file.
60 changes: 60 additions & 0 deletions test/test_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np
import pytest
from speech_recognition import AudioFile, Recognizer

from ovos_stt_plugin_fasterwhisper import FasterWhisperLangClassifier, FasterWhisperSTT


@pytest.fixture
def audio_data():
recognizer = Recognizer()
with AudioFile("jfk.wav") as source:
return recognizer.record(source)


def test_faster_whisper_stt_execute(audio_data):
stt = FasterWhisperSTT()
transcription = stt.execute(audio_data, language="en")
assert isinstance(transcription, str)
assert len(transcription) > 0


def test_faster_whisper_stt_available_languages():
stt = FasterWhisperSTT()
available_languages = stt.available_languages
assert isinstance(available_languages, set)
assert "en" in available_languages


def test_faster_whisper_lang_classifier_detect(audio_data):
classifier = FasterWhisperLangClassifier()
language, probability = classifier.detect(audio_data.get_wav_data())
assert isinstance(language, str)
assert isinstance(probability, float)
assert 0.0 <= probability <= 1.0


def test_faster_whisper_lang_classifier_audiochunk2array():
audio_data = b"\x00\x01\x02\x03"
array = FasterWhisperLangClassifier.audiochunk2array(audio_data)
assert isinstance(array, np.ndarray)
assert array.dtype == np.float32


def test_faster_whisper_stt_audiodata2array(audio_data):
array = FasterWhisperSTT.audiodata2array(audio_data)
assert isinstance(array, np.ndarray)
assert array.dtype == np.float32


def test_faster_whisper_stt_invalid_model():
stt = FasterWhisperSTT(config={"model": "invalid_model"})
assert stt.config["model"] == "small"


def test_faster_whisper_lang_classifier_invalid_model():
classifier = FasterWhisperLangClassifier(config={"model": "invalid_model"})
assert classifier.config["model"] == "small"

if __name__ == "__main__":
pytest.main()

0 comments on commit 2a32cc7

Please sign in to comment.