From 12b6696fbdb6f7b033a6dcd0e7751534382f8103 Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Tue, 3 Oct 2023 17:06:30 +1000 Subject: [PATCH 1/8] Make datasets preprocessing scalable for large datasets. --- elpis/datasets/dataset.py | 73 ++++++++------ elpis/datasets/extract_annotations.py | 4 - elpis/datasets/preprocessing.py | 2 - elpis/datasets/processing.py | 25 ++++- tests/datasets/test_dataset.py | 137 ++++++++++++++++---------- 5 files changed, 150 insertions(+), 91 deletions(-) diff --git a/elpis/datasets/dataset.py b/elpis/datasets/dataset.py index 0323bbc..3ec2711 100644 --- a/elpis/datasets/dataset.py +++ b/elpis/datasets/dataset.py @@ -1,8 +1,10 @@ from __future__ import annotations from dataclasses import dataclass, field, fields +from functools import cached_property, reduce +from itertools import chain, groupby from pathlib import Path -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple from elpis.models import ElanOptions @@ -82,15 +84,29 @@ def is_valid(self) -> bool: return ( not self.is_empty() and len(self.files) % 2 == 0 - and len(self.mismatched_files()) == 0 - and len(self.colliding_files()) == 0 + and len(self.mismatched_files) == 0 + and len(self.colliding_files) == 0 ) + @staticmethod + def is_audio(file: Path) -> bool: + return file.suffix == ".wav" + + @staticmethod + def is_transcript(file: Path) -> bool: + return file.suffix in TRANSCRIPTION_EXTENSIONS + @staticmethod def corresponding_audio_name(transcript_file: Path) -> Path: """Gets the corresponding audio file name for a given transcript file.""" return Path(transcript_file).parent / (transcript_file.stem + ".wav") + @property + def transcript_files(self) -> Iterable[Path]: + """Returns a set of all transcription files within the dataset.""" + return filter(Dataset.is_transcript, self.files) + + @cached_property def mismatched_files(self) -> Set[Path]: """Returns the list of transcript files with no corresponding audio and vice versa. @@ -101,18 +117,19 @@ def mismatched_files(self) -> Set[Path]: Returns: A list of the mismatched file names. """ - transcripts_with_audio = set( - filter( - lambda file: Dataset.corresponding_audio_name(file) in self.files, - self._transcript_files(), - ) - ) - matched_files = transcripts_with_audio | set( - Dataset.corresponding_audio_name(file) for file in transcripts_with_audio - ) + grouped_by_stems = groupby(self.files, lambda path: path.stem) + + def mismatches(files: Iterable[Path]) -> list[Path]: + files = list(files) + has_audio = any(Dataset.is_audio(file) for file in files) + has_transcript = any(Dataset.is_transcript(file) for file in files) + return [] if has_transcript == has_audio else files - return set(self.files).difference(matched_files) + groups = (mismatches(g) for _, g in grouped_by_stems) + result = set(chain.from_iterable(groups)) + return result + @cached_property def colliding_files(self) -> Set[Path]: """Returns the list of transcript file names that collide. @@ -122,19 +139,14 @@ def colliding_files(self) -> Set[Path]: Returns: A list of the colliding file names. """ + grouped_by_stems = groupby(self.transcript_files, lambda path: path.stem) - def would_collide(transcript_file: Path) -> bool: - other_files = self._transcript_files().difference({transcript_file}) - other_file_names = map(lambda file: Path(file).stem, other_files) - return Path(transcript_file).stem in other_file_names + def collisions(files: Iterable[Path]) -> list[Path]: + files = list(files) + return files if len(files) >= 2 else [] - return set(filter(would_collide, self._transcript_files())) - - def _transcript_files(self) -> Set[Path]: - """Returns a set of all transcription files within the dataset.""" - return set( - filter(lambda file: file.suffix in TRANSCRIPTION_EXTENSIONS, self.files) - ) + collision_groups = (collisions(g) for _, g in grouped_by_stems) + return set(chain.from_iterable(collision_groups)) @classmethod def from_dict(cls, data: Dict[str, Any]) -> Dataset: @@ -155,17 +167,16 @@ def from_dict(cls, data: Dict[str, Any]) -> Dataset: @property def valid_transcriptions(self): - return ( - self._transcript_files() - .difference(self.mismatched_files()) - .difference(self.colliding_files()) + is_valid = lambda path: path not in ( + self.mismatched_files | self.colliding_files ) + return filter(is_valid, self.transcript_files) - def to_batches(self) -> List[ProcessingBatch]: + def to_batches(self) -> Iterable[ProcessingBatch]: """Converts a valid dataset to a list of processing jobs, matching transcript and audio files. """ - return [ + return ( ProcessingBatch( transcription_file=transcription_file, audio_file=self.corresponding_audio_name(transcription_file), @@ -173,7 +184,7 @@ def to_batches(self) -> List[ProcessingBatch]: elan_options=self.elan_options, ) for transcription_file in self.valid_transcriptions - ] + ) def to_dict(self) -> Dict[str, Any]: result = { diff --git a/elpis/datasets/extract_annotations.py b/elpis/datasets/extract_annotations.py index 4f83828..16c8739 100644 --- a/elpis/datasets/extract_annotations.py +++ b/elpis/datasets/extract_annotations.py @@ -81,10 +81,6 @@ def extract_elan_annotations( A list of the annotations contained for the supplied data. Returns an empty list if the given selection isn't found. """ - logger.info( - f"processing eaf {elan_file_path} using {selection_type}: {selection_data}" - ) - match selection_type: case ElanTierSelector.NAME: return get_annotations_by_tier_name(elan_file_path, selection_data) diff --git a/elpis/datasets/preprocessing.py b/elpis/datasets/preprocessing.py index c8dd6bb..29307d1 100644 --- a/elpis/datasets/preprocessing.py +++ b/elpis/datasets/preprocessing.py @@ -5,8 +5,6 @@ from pathlib import Path from typing import Iterable, List, Tuple -from loguru import logger - import elpis.utils.audio as audio from elpis.datasets.clean_text import clean_text from elpis.datasets.dataset import CleaningOptions, ProcessingBatch diff --git a/elpis/datasets/processing.py b/elpis/datasets/processing.py index 942670b..6975ee2 100644 --- a/elpis/datasets/processing.py +++ b/elpis/datasets/processing.py @@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional import numpy as np +import sounddevice as sd from datasets import Audio, DatasetDict, load_dataset from loguru import logger from transformers import Wav2Vec2Processor @@ -54,6 +55,13 @@ def resolve_audio_path(row: Dict[str, Any]) -> Dict[str, Any]: logger.debug(f"Dataset audio file paths post-resolution: {dataset['train'][AUDIO_COLUMN][:4]}") # type: ignore dataset = dataset.cast_column(AUDIO_COLUMN, Audio(sampling_rate=SAMPLING_RATE)) + logger.debug(f"Sample audio col values: {dataset['train'][AUDIO_COLUMN][0]}") # type: ignore + + # Play some test audio + logger.debug(f"Playing test audio file") + data = dataset["train"][AUDIO_COLUMN][0]["array"] # type: ignore + sd.play(data, SAMPLING_RATE, blocking=True) + return dataset["train"].train_test_split(test_size=test_size) # type: ignore @@ -63,8 +71,7 @@ def prepare_dataset(dataset: DatasetDict, processor: Wav2Vec2Processor) -> Datas TODO: I'm going to be honest, I have no idea what this does, and need some smart ML knight in shining armour to write a propert description. - Parameters: - dataset: The dataset to apply the preprocessing + Parameters: dataset: The dataset to apply the preprocessing processor: The processor to apply over the dataset """ @@ -74,11 +81,11 @@ def prepare_dataset(dataset: DatasetDict, processor: Wav2Vec2Processor) -> Datas f'Input array shape:, {np.asarray(dataset["train"][0]["audio"]["array"]).shape}' ) logger.debug(f'Sampling rate:, {dataset["train"][0]["audio"]["sampling_rate"]}') - logger.debug(f"Tokenizer vocab: {processor.tokenizer.vocab}") # type: ignore + logger.debug(f"Tokenizer vocab: {processor.tokenizer.get_vocab()}") # type: ignore def _prepare_dataset(batch: Dict) -> Dict[str, List]: # Also from https://huggingface.co/blog/fine-tune-xlsr-wav2vec2 - audio = batch["audio"] + audio = batch[AUDIO_COLUMN] batch["input_values"] = processor( audio["array"], sampling_rate=audio["sampling_rate"] @@ -93,6 +100,11 @@ def _prepare_dataset(batch: Dict) -> Dict[str, List]: # flatten and make unique between datasets columns_to_remove = list(set(chain.from_iterable(columns))) + # Play some test audio + logger.debug(f"Playing test audio file before dataset preparation") + data = dataset["train"]["audio"][0]["array"] # type: ignore + sd.play(data, SAMPLING_RATE, blocking=True) + dataset = dataset.map( _prepare_dataset, remove_columns=columns_to_remove, @@ -101,5 +113,10 @@ def _prepare_dataset(batch: Dict) -> Dict[str, List]: logger.debug(f"Dataset post prep: {dataset}") logger.debug(f"Training labels: {dataset['train']['labels'][0]}") + + # Play some test audio + logger.debug(f"Playing test audio file after dataset preparation") + data = dataset["train"]["input_values"][0] # type: ignore + sd.play(data, SAMPLING_RATE, blocking=True) # logger.debug(f"Training inputs: {dataset['train']['input_values'][0]}") return dataset diff --git a/tests/datasets/test_dataset.py b/tests/datasets/test_dataset.py index c76e76d..d2d33f0 100644 --- a/tests/datasets/test_dataset.py +++ b/tests/datasets/test_dataset.py @@ -1,8 +1,13 @@ +from enum import Enum +from itertools import groupby from pathlib import Path -from typing import List +from typing import List, Optional + +import pytest from elpis.datasets import CleaningOptions, Dataset, ProcessingBatch from elpis.models import ElanOptions +from elpis.models.elan_options import ElanTierSelector # ====== Elan Options ====== ELAN_OPTIONS_DICT = { @@ -43,20 +48,68 @@ def test_serialize_dataset_options(): # ====== Dataset ====== -FILES_WITH_ELAN = ["1.eaf", "1.wav"] -FILES_WITHOUT_ELAN = ["1.txt", "1.wav"] -MISMATCHED_FILES = ["1.eaf", "1.wav", "2.wav", "3.txt"] -COLLIDING_FILES = ["1.eaf", "1.wav", "1.txt"] -MESSY_FILES = ["1.eaf", "1.wav", "2.eaf", "2.txt", "2.wav", "3.eaf", "4.wav"] +class Files(Enum): + ELAN = ["1.eaf", "1.wav"] + TEXT = ["1.txt", "1.wav"] + MISMATCHED = ["1.eaf", "1.wav", "2.wav", "3.txt"] + COLLIDING = ["1.eaf", "1.wav", "1.txt"] + MESSY = ["1.eaf", "1.wav", "2.eaf", "2.txt", "2.wav", "3.eaf", "4.wav"] + + +def create_dataset(files: Files, elan_options: Optional[ElanOptions] = None) -> Dataset: + paths = [Path(x) for x in files.value] + print(f"Creating Dataset for: {files}: with {paths}") + return Dataset( + name="dataset", + files=paths, + cleaning_options=CleaningOptions.from_dict(CLEANING_OPTIONS_DICT), + elan_options=elan_options, + ) + + +@pytest.fixture +def elan_options(): + return ElanOptions.from_dict(ELAN_OPTIONS_DICT) + + +@pytest.fixture +def dataset(): + return create_dataset(Files.ELAN) + + +@pytest.fixture +def text_dataset(): + return create_dataset(Files.TEXT) + + +@pytest.fixture +def elan_dataset(elan_options): + return create_dataset(Files.ELAN, elan_options=elan_options) + + +@pytest.fixture +def mismatched_dataset(elan_options): + return create_dataset(Files.MISMATCHED, elan_options=elan_options) + + +@pytest.fixture +def colliding_dataset(elan_options): + return create_dataset(Files.COLLIDING, elan_options=elan_options) + + +@pytest.fixture +def messy_dataset(elan_options): + return create_dataset(Files.MESSY, elan_options=elan_options) + DATASET_DICT = { "name": "dataset", - "files": FILES_WITH_ELAN, + "files": Files.ELAN.value, "cleaning_options": CLEANING_OPTIONS_DICT, } MESSY_DATASET_DICT = { "name": "dataset", - "files": MESSY_FILES, + "files": Files.MESSY.value, "cleaning_options": CLEANING_OPTIONS_DICT, } @@ -66,11 +119,10 @@ def test_serialize_dataset_options(): def to_paths(names: List[str]) -> List[Path]: return [Path(name) for name in names] - def test_build_dataset(): dataset = Dataset.from_dict(DATASET_DICT) assert dataset.name == "dataset" - assert dataset.files == to_paths(FILES_WITH_ELAN) + assert dataset.files == to_paths(Files.ELAN.value) assert dataset.cleaning_options == CleaningOptions.from_dict(CLEANING_OPTIONS_DICT) assert dataset.elan_options is None @@ -78,26 +130,22 @@ def test_build_dataset(): def test_build_dataset_with_elan(): dataset = Dataset.from_dict(DATASET_DICT_ELAN) assert dataset.name == "dataset" - assert dataset.files == to_paths(FILES_WITH_ELAN) + assert dataset.files == to_paths(Files.ELAN.value) assert dataset.cleaning_options == CleaningOptions.from_dict(CLEANING_OPTIONS_DICT) assert dataset.elan_options == ElanOptions.from_dict(ELAN_OPTIONS_DICT) -def test_serialize_dataset(): - dataset = Dataset.from_dict(DATASET_DICT) +def test_serialize_dataset(dataset: Dataset, elan_dataset: Dataset): assert dataset.to_dict() == DATASET_DICT - - dataset = Dataset.from_dict(DATASET_DICT_ELAN) - assert dataset.to_dict() == DATASET_DICT_ELAN + assert elan_dataset.to_dict() == DATASET_DICT_ELAN -def test_dataset_is_valid(): - dataset = Dataset.from_dict(DATASET_DICT) +def test_dataset_is_valid(dataset: Dataset, messy_dataset: Dataset): assert dataset.is_valid() + assert not messy_dataset.is_valid() -def test_dataset_is_empty(): - dataset = Dataset.from_dict(DATASET_DICT) +def test_dataset_is_empty(dataset: Dataset): assert not dataset.is_empty() dataset.files = [] @@ -105,41 +153,30 @@ def test_dataset_is_empty(): assert not dataset.is_valid() -def test_dataset_has_elan(): - dataset = Dataset.from_dict(DATASET_DICT) - assert dataset.has_elan() - - dataset.files = to_paths(FILES_WITHOUT_ELAN) - assert not dataset.has_elan() +def test_dataset_has_elan(elan_dataset: Dataset, text_dataset: Dataset): + assert elan_dataset.has_elan() + assert not text_dataset.has_elan() -def test_dataset_mismatched_files(): - dataset = Dataset.from_dict(DATASET_DICT) - assert len(dataset.mismatched_files()) == 0 +def test_dataset_mismatched_files(dataset: Dataset, mismatched_dataset: Dataset): + assert len(dataset.mismatched_files) == 0 + assert mismatched_dataset.mismatched_files == {Path("2.wav"), Path("3.txt")} - dataset.files = to_paths(MISMATCHED_FILES) - assert set(dataset.mismatched_files()) == {Path("2.wav"), Path("3.txt")} +def test_duplicate_files(dataset: Dataset, colliding_dataset: Dataset): + assert len(dataset.colliding_files) == 0 + assert colliding_dataset.colliding_files == {Path("1.eaf"), Path("1.txt")} -def test_duplicate_files(): - dataset = Dataset.from_dict(DATASET_DICT) - assert len(dataset.colliding_files()) == 0 - dataset.files = to_paths(COLLIDING_FILES) - assert set(dataset.colliding_files()) == {Path("1.eaf"), Path("1.txt")} +def test_valid_transcriptions(messy_dataset: Dataset): + assert len(list(messy_dataset.valid_transcriptions)) == 1 -def test_valid_transcriptions(): - dataset = Dataset.from_dict(MESSY_DATASET_DICT) - assert len(dataset.valid_transcriptions) == 1 - - -def test_dataset_batching(): - dataset = Dataset.from_dict(DATASET_DICT) - batch = dataset.to_batches() - assert len(batch) == 1 - job = batch[0] - transcript_file, audio_file = to_paths(FILES_WITH_ELAN) +def test_dataset_batching(dataset: Dataset): + batches = list(dataset.to_batches()) + assert len(batches) == 1 + job = batches[0] + transcript_file, audio_file = to_paths(Files.ELAN.value) assert job.transcription_file == transcript_file assert job.audio_file == audio_file assert job.cleaning_options == dataset.cleaning_options @@ -148,8 +185,8 @@ def test_dataset_batching(): # ====== Processing Job ====== VALID_BATCH_DICT = { - "transcription_file": FILES_WITH_ELAN[0], - "audio_file": FILES_WITH_ELAN[1], + "transcription_file": Files.ELAN.value[0], + "audio_file": Files.ELAN.value[1], "cleaning_options": CLEANING_OPTIONS_DICT, "elan_options": ELAN_OPTIONS_DICT, } @@ -157,7 +194,7 @@ def test_dataset_batching(): def test_build_processing_job(): job = ProcessingBatch.from_dict(VALID_BATCH_DICT) - transcript_file, audio_file = to_paths(FILES_WITH_ELAN) + transcript_file, audio_file = to_paths(Files.ELAN.value) assert job.transcription_file == transcript_file assert job.audio_file == audio_file assert job.cleaning_options == CleaningOptions.from_dict(CLEANING_OPTIONS_DICT) From b4d438b197e40e539ab958a60965caf1b4937010 Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Thu, 5 Oct 2023 10:41:23 +1000 Subject: [PATCH 2/8] Improve large dataset performance. --- elpis/datasets/dataset.py | 5 ++- tests/datasets/test_dataset.py | 59 +++++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/elpis/datasets/dataset.py b/elpis/datasets/dataset.py index 3ec2711..a0e3333 100644 --- a/elpis/datasets/dataset.py +++ b/elpis/datasets/dataset.py @@ -71,6 +71,9 @@ class Dataset: cleaning_options: CleaningOptions elan_options: Optional[ElanOptions] + def __post_init__(self): + self.files = sorted(self.files) + def is_empty(self) -> bool: """Returns true iff the dataset contains no files.""" return len(self.files) == 0 @@ -103,7 +106,7 @@ def corresponding_audio_name(transcript_file: Path) -> Path: @property def transcript_files(self) -> Iterable[Path]: - """Returns a set of all transcription files within the dataset.""" + """Returns an iterable of all transcription files within the dataset.""" return filter(Dataset.is_transcript, self.files) @cached_property diff --git a/tests/datasets/test_dataset.py b/tests/datasets/test_dataset.py index d2d33f0..ca09c18 100644 --- a/tests/datasets/test_dataset.py +++ b/tests/datasets/test_dataset.py @@ -54,10 +54,16 @@ class Files(Enum): MISMATCHED = ["1.eaf", "1.wav", "2.wav", "3.txt"] COLLIDING = ["1.eaf", "1.wav", "1.txt"] MESSY = ["1.eaf", "1.wav", "2.eaf", "2.txt", "2.wav", "3.eaf", "4.wav"] + LARGE = ["1.eaf", "1.wav", "2.eaf", "2.wav", "3.eaf", "3.wav"] -def create_dataset(files: Files, elan_options: Optional[ElanOptions] = None) -> Dataset: +def create_dataset( + files: Files, elan_options: Optional[ElanOptions] = None, absolute_paths=False +) -> Dataset: paths = [Path(x) for x in files.value] + if absolute_paths: + paths = [x.absolute() for x in paths] + print(f"Creating Dataset for: {files}: with {paths}") return Dataset( name="dataset", @@ -102,6 +108,16 @@ def messy_dataset(elan_options): return create_dataset(Files.MESSY, elan_options=elan_options) +@pytest.fixture +def large_dataset(elan_options): + return create_dataset(Files.LARGE, elan_options=elan_options) + + +@pytest.fixture +def abs_messy_dataset(elan_options): + return create_dataset(Files.MESSY, elan_options=elan_options, absolute_paths=True) + + DATASET_DICT = { "name": "dataset", "files": Files.ELAN.value, @@ -119,6 +135,7 @@ def messy_dataset(elan_options): def to_paths(names: List[str]) -> List[Path]: return [Path(name) for name in names] + def test_build_dataset(): dataset = Dataset.from_dict(DATASET_DICT) assert dataset.name == "dataset" @@ -158,21 +175,37 @@ def test_dataset_has_elan(elan_dataset: Dataset, text_dataset: Dataset): assert not text_dataset.has_elan() -def test_dataset_mismatched_files(dataset: Dataset, mismatched_dataset: Dataset): +def test_dataset_mismatched_files( + dataset: Dataset, mismatched_dataset: Dataset, abs_messy_dataset: Dataset +): assert len(dataset.mismatched_files) == 0 assert mismatched_dataset.mismatched_files == {Path("2.wav"), Path("3.txt")} + assert abs_messy_dataset.mismatched_files == { + Path("4.wav").absolute(), + Path("3.eaf").absolute(), + } -def test_duplicate_files(dataset: Dataset, colliding_dataset: Dataset): +def test_duplicate_files( + dataset: Dataset, colliding_dataset: Dataset, abs_messy_dataset: Dataset +): assert len(dataset.colliding_files) == 0 assert colliding_dataset.colliding_files == {Path("1.eaf"), Path("1.txt")} + assert abs_messy_dataset.colliding_files == { + Path("2.eaf").absolute(), + Path("2.txt").absolute(), + } -def test_valid_transcriptions(messy_dataset: Dataset): +def test_valid_transcriptions( + dataset: Dataset, messy_dataset: Dataset, abs_messy_dataset: Dataset +): + assert len(list(dataset.valid_transcriptions)) == 1 assert len(list(messy_dataset.valid_transcriptions)) == 1 + assert len(list(abs_messy_dataset.valid_transcriptions)) == 1 -def test_dataset_batching(dataset: Dataset): +def test_basic_dataset_batching(dataset: Dataset): batches = list(dataset.to_batches()) assert len(batches) == 1 job = batches[0] @@ -183,6 +216,22 @@ def test_dataset_batching(dataset: Dataset): assert job.elan_options == dataset.elan_options +def test_messy_batching(messy_dataset: Dataset): + batches = list(messy_dataset.to_batches()) + assert len(batches) == 1 + job = batches[0] + transcript_file, audio_file = to_paths(Files.MESSY.value[:2]) + assert job.transcription_file == transcript_file + assert job.audio_file == audio_file + assert job.cleaning_options == messy_dataset.cleaning_options + assert job.elan_options == messy_dataset.elan_options + + +def test_multiple_batches(large_dataset: Dataset): + batches = list(large_dataset.to_batches()) + assert len(batches) == len(Files.LARGE.value) / 2 + + # ====== Processing Job ====== VALID_BATCH_DICT = { "transcription_file": Files.ELAN.value[0], From 06a11d39d458373a59da8788adb9c77136d23995 Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Mon, 9 Oct 2023 14:35:07 +1000 Subject: [PATCH 3/8] Make model-specific arguments flexible, and specifiable in the training job. - Explicitly set up optimizer. --- elpis/trainer/job.py | 14 ++++++++------ elpis/trainer/trainer.py | 22 ++++++++++------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/elpis/trainer/job.py b/elpis/trainer/job.py index bb4f456..73a2efa 100644 --- a/elpis/trainer/job.py +++ b/elpis/trainer/job.py @@ -1,6 +1,6 @@ from __future__ import annotations -from dataclasses import dataclass, fields +from dataclasses import dataclass, field, fields from enum import Enum from pathlib import Path from typing import Any, Dict, Tuple @@ -49,7 +49,8 @@ class TrainingJob: model_name: str dataset_name: str - options: TrainingOptions + options: TrainingOptions # TODO - rename to training_options next major V + model_options: Dict[str, Any] = field(default_factory=dict) status: TrainingStatus = TrainingStatus.WAITING base_model: str = BASE_MODEL sampling_rate: int = SAMPLING_RATE @@ -68,10 +69,10 @@ def to_training_args(self, output_dir: Path, **kwargs) -> TrainingArguments: gradient_checkpointing=True, learning_rate=self.options.learning_rate, weight_decay=0.005, - save_steps=400, - eval_steps=400, - logging_steps=400, - warmup_steps=500, + save_steps=10, + eval_steps=10, + logging_steps=10, + warmup_steps=10, save_total_limit=2, overwrite_output_dir=True, do_train=True, @@ -85,6 +86,7 @@ def from_dict(data: Dict[str, Any]) -> TrainingJob: model_name=data["model_name"], dataset_name=data["dataset_name"], options=TrainingOptions.from_dict(data["options"]), + model_options=data.get("model_options", {}), status=TrainingStatus(data.get("status", TrainingStatus.WAITING)), base_model=data.get("base_model", BASE_MODEL), sampling_rate=data.get("sampling_rate", SAMPLING_RATE), diff --git a/elpis/trainer/trainer.py b/elpis/trainer/trainer.py index bedb880..42bed05 100644 --- a/elpis/trainer/trainer.py +++ b/elpis/trainer/trainer.py @@ -33,7 +33,7 @@ def create_processor( pad_token="[PAD]", word_delimiter_token="|", ) -> Wav2Vec2Processor: - config = AutoConfig.from_pretrained(job.base_model) + config = AutoConfig.from_pretrained(job.base_model, cache_dir=cache_dir) tokenizer_type = config.model_type if config.tokenizer_class is None else None config = config if config.tokenizer_class is not None else None @@ -44,7 +44,7 @@ def create_processor( vocab.add(unk_token) vocab.add(pad_token) - vocab.replace(" ", word_delimiter_token) # feels a little restrictive? + vocab.replace(" ", word_delimiter_token) logger.info(f"Vocab: {vocab.vocab}") vocab.save(output_dir) @@ -57,11 +57,11 @@ def create_processor( word_delimiter_token=word_delimiter_token, cache_dir=cache_dir, ) - feature_extractor = AutoFeatureExtractor.from_pretrained( job.base_model, cache_dir=cache_dir ) + AutoProcessor.from_pretrained return Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer) @@ -99,20 +99,16 @@ def train( cache_dir=cache_dir, ctc_loss_reduction="mean", pad_token_id=processor.tokenizer.pad_token_id, # type: ignore - # Wav2vec2 specific hyperparams copied from docs. - attention_dropout=0.1, - hidden_dropout=0.1, - feat_proj_dropout=0.0, - mask_time_prob=0.05, - layerdrop=0.1, - vocab_size=len(processor.tokenizer), # type: ignore - # For Ash -> errors if below param not set. + bos_token_id=processor.tokenizer.bos_token_id, # type: ignore + eos_token_id=processor.tokenizer.eos_token_id, # type: ignore + vocab_size=len(processor.tokenizer.get_vocab()), # type: ignore ignore_mismatched_sizes=True, + **job.model_options ) logger.info("Downloaded model.") if job.options.freeze_feature_extractor: - model.freeze_feature_extractor() + model.freeze_feature_encoder() data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True) output_dir.mkdir(exist_ok=True, parents=True) @@ -127,6 +123,8 @@ def train( compute_metrics=create_metrics(job.metrics, processor), ) + trainer.create_optimizer() + logger.info(f"Begin training model...") trainer.train() logger.info(f"Finished training!") From 7a64f0f1aa16bdb00fa0505e7b1a13cdfcd99632 Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Mon, 9 Oct 2023 14:35:53 +1000 Subject: [PATCH 4/8] Fix bug where replacing vocab character with the same character would cause it to remove that character entirely from the vocab. --- elpis/models/vocab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elpis/models/vocab.py b/elpis/models/vocab.py index 588431b..581bb09 100644 --- a/elpis/models/vocab.py +++ b/elpis/models/vocab.py @@ -42,7 +42,7 @@ def add(self, char: str) -> None: def replace(self, original: str, replacement: str) -> None: """Replaces the supplied character mapping in the vocab.""" - if original not in self.vocab: + if original not in self.vocab or original == replacement: return self.vocab[replacement] = self.vocab[original] From 0d0c8645d869476a1f00735c9adc4ab66db598df Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Mon, 9 Oct 2023 14:36:42 +1000 Subject: [PATCH 5/8] Cleanup dataset processing - Force absolute audio path resolution for dataset files - Manually load in audio from paths with librosa - More logging . --- elpis/datasets/processing.py | 55 ++++++++++++++++-------------------- elpis/trainer/job.py | 8 +++--- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/elpis/datasets/processing.py b/elpis/datasets/processing.py index 6975ee2..97feed5 100644 --- a/elpis/datasets/processing.py +++ b/elpis/datasets/processing.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional +import librosa import numpy as np import sounddevice as sd from datasets import Audio, DatasetDict, load_dataset @@ -12,10 +13,13 @@ PROCESSOR_COUNT = 4 AUDIO_COLUMN = "audio" SAMPLING_RATE = 16_000 +LOGGING_TRANSCRIPT_SAMPLE = 2 def create_dataset( - dataset_path: Path, cache_dir: Optional[Path] = None, test_size: float = 0.2 + dataset_path: Path, + cache_dir: Optional[Path] = None, + test_size: float = 0.2, ) -> DatasetDict: """Creates a dataset with test/train splits from the data within a given directory. @@ -33,7 +37,9 @@ def create_dataset( for file in os.listdir(dataset_path) if (dataset_path / file).suffix == ".json" ] - logger.debug(f"Transcript file paths sample: {transcript_files[:4]}") + logger.debug( + f"Transcript file paths sample: {transcript_files[:LOGGING_TRANSCRIPT_SAMPLE]}" + ) # Annoying hack if cache_dir is not None: @@ -43,45 +49,44 @@ def create_dataset( # Convert the audio file name column into the matching audio data dataset = dataset.rename_column("audio_file", AUDIO_COLUMN) - logger.debug(f"Dataset audio file paths sample: {dataset['train'][AUDIO_COLUMN][:4]}") # type: ignore + logger.debug(f"Dataset audio file paths sample: {dataset['train'][AUDIO_COLUMN][:LOGGING_TRANSCRIPT_SAMPLE]}") # type: ignore def resolve_audio_path(row: Dict[str, Any]) -> Dict[str, Any]: # Forcefully resolve to same dir as dataset. path = dataset_path / Path(row[AUDIO_COLUMN]).name - row[AUDIO_COLUMN] = str(path) + row[AUDIO_COLUMN] = str(path.absolute()) return row dataset = dataset.map(resolve_audio_path) - logger.debug(f"Dataset audio file paths post-resolution: {dataset['train'][AUDIO_COLUMN][:4]}") # type: ignore - dataset = dataset.cast_column(AUDIO_COLUMN, Audio(sampling_rate=SAMPLING_RATE)) + logger.debug(f"Dataset audio file paths post-resolution: {dataset['train'][AUDIO_COLUMN][:LOGGING_TRANSCRIPT_SAMPLE]}") # type: ignore - logger.debug(f"Sample audio col values: {dataset['train'][AUDIO_COLUMN][0]}") # type: ignore + def load_audio(batch: Dict) -> Dict: + path = batch[AUDIO_COLUMN] + data, sr = librosa.load(path, sr=SAMPLING_RATE, mono=True) - # Play some test audio - logger.debug(f"Playing test audio file") - data = dataset["train"][AUDIO_COLUMN][0]["array"] # type: ignore - sd.play(data, SAMPLING_RATE, blocking=True) + batch["audio"] = {"path": path, "array": data, "sampling_rate": SAMPLING_RATE} + return batch + # dataset = dataset.cast_column(AUDIO_COLUMN, Audio(sampling_rate=SAMPLING_RATE)) + dataset = dataset.map(load_audio) + + # logger.debug(f"Sample audio col values: {dataset['train'][AUDIO_COLUMN][0]}") # type: ignore return dataset["train"].train_test_split(test_size=test_size) # type: ignore def prepare_dataset(dataset: DatasetDict, processor: Wav2Vec2Processor) -> DatasetDict: """Runs some preprocessing over the given dataset. - TODO: I'm going to be honest, I have no idea what this does, and need some - smart ML knight in shining armour to write a propert description. - - Parameters: dataset: The dataset to apply the preprocessing + Parameters: + dataset: The dataset on which to apply the preprocessing processor: The processor to apply over the dataset """ - - logger.debug(f"Dataset pre prep: {dataset}") - logger.debug(f"Dataset[train] pre prep: {dataset['train']['transcript'][0]}") + logger.debug(f"Dataset pre-prep: {dataset}") + logger.debug(f"Transcript sample: {dataset['train']['transcript'][0]}") logger.debug( f'Input array shape:, {np.asarray(dataset["train"][0]["audio"]["array"]).shape}' ) logger.debug(f'Sampling rate:, {dataset["train"][0]["audio"]["sampling_rate"]}') - logger.debug(f"Tokenizer vocab: {processor.tokenizer.get_vocab()}") # type: ignore def _prepare_dataset(batch: Dict) -> Dict[str, List]: # Also from https://huggingface.co/blog/fine-tune-xlsr-wav2vec2 @@ -91,7 +96,6 @@ def _prepare_dataset(batch: Dict) -> Dict[str, List]: audio["array"], sampling_rate=audio["sampling_rate"] ).input_values[0] batch["input_length"] = len(batch["input_values"]) - batch["labels"] = processor(text=batch["transcript"]).input_ids return batch @@ -100,23 +104,12 @@ def _prepare_dataset(batch: Dict) -> Dict[str, List]: # flatten and make unique between datasets columns_to_remove = list(set(chain.from_iterable(columns))) - # Play some test audio - logger.debug(f"Playing test audio file before dataset preparation") - data = dataset["train"]["audio"][0]["array"] # type: ignore - sd.play(data, SAMPLING_RATE, blocking=True) - dataset = dataset.map( _prepare_dataset, remove_columns=columns_to_remove, num_proc=PROCESSOR_COUNT, ) - logger.debug(f"Dataset post prep: {dataset}") logger.debug(f"Training labels: {dataset['train']['labels'][0]}") - # Play some test audio - logger.debug(f"Playing test audio file after dataset preparation") - data = dataset["train"]["input_values"][0] # type: ignore - sd.play(data, SAMPLING_RATE, blocking=True) - # logger.debug(f"Training inputs: {dataset['train']['input_values'][0]}") return dataset diff --git a/elpis/trainer/job.py b/elpis/trainer/job.py index 73a2efa..6c77ae7 100644 --- a/elpis/trainer/job.py +++ b/elpis/trainer/job.py @@ -69,10 +69,10 @@ def to_training_args(self, output_dir: Path, **kwargs) -> TrainingArguments: gradient_checkpointing=True, learning_rate=self.options.learning_rate, weight_decay=0.005, - save_steps=10, - eval_steps=10, - logging_steps=10, - warmup_steps=10, + save_steps=400, + eval_steps=400, + logging_steps=400, + warmup_steps=400, save_total_limit=2, overwrite_output_dir=True, do_train=True, From 15ed84765cf04bd2bd5877efc8abb396589b69ae Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Tue, 10 Oct 2023 09:56:27 +1000 Subject: [PATCH 6/8] Bad attempt at upgrading all dependencies --- poetry.lock | 679 +++++++++++++++++++++++++++++-------------------- pyproject.toml | 8 +- 2 files changed, 408 insertions(+), 279 deletions(-) diff --git a/poetry.lock b/poetry.lock index a8acf12..d960003 100644 --- a/poetry.lock +++ b/poetry.lock @@ -431,7 +431,7 @@ tests = ["contextlib2", "matplotlib (>=3.3.0)", "pytest", "pytest-cov", "pytest- [[package]] name = "llvmlite" -version = "0.40.1" +version = "0.41.0" description = "lightweight wrapper around basic LLVM functionality" category = "main" optional = false @@ -557,23 +557,23 @@ setuptools = "*" [[package]] name = "numba" -version = "0.57.1" +version = "0.58.0" description = "compiling Python code using LLVM" category = "main" optional = false python-versions = ">=3.8" [package.dependencies] -llvmlite = ">=0.40.0dev0,<0.41" -numpy = ">=1.21,<1.25" +llvmlite = ">=0.41.0dev0,<0.42" +numpy = ">=1.21,<1.26" [[package]] name = "numpy" -version = "1.24.4" +version = "1.25.2" description = "Fundamental package for array computing in Python" category = "main" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" [[package]] name = "packaging" @@ -646,7 +646,7 @@ markdown = ">=3.0" [[package]] name = "pedalboard" -version = "0.6.9" +version = "0.8.3" description = "A Python library for adding effects to audio." category = "main" optional = false @@ -884,23 +884,23 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=4.6)", "pytest-cov", [[package]] name = "safetensors" -version = "0.3.3" -description = "Fast and Safe Tensor serialization" +version = "0.4.0" +description = "" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.7" [package.extras] -all = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "flax (>=0.6.3)", "h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "isort (>=5.5.4)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)", "tensorflow (==2.11.0)", "torch (>=1.10)"] -dev = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "flax (>=0.6.3)", "h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "isort (>=5.5.4)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)", "tensorflow (==2.11.0)", "torch (>=1.10)"] -jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)"] +all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] +dev = ["safetensors[all]"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] numpy = ["numpy (>=1.21.6)"] -paddlepaddle = ["numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)"] -pinned-tf = ["tensorflow (==2.11.0)"] +paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] -tensorflow = ["numpy (>=1.21.6)", "tensorflow (>=2.11.0)"] -testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "numpy (>=1.21.6)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)"] -torch = ["numpy (>=1.21.6)", "torch (>=1.10)"] +tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"] +torch = ["safetensors[numpy]", "torch (>=1.10)"] [[package]] name = "scikit-learn" @@ -959,6 +959,20 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +[[package]] +name = "sounddevice" +version = "0.4.6" +description = "Play and Record Sound with Python" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +CFFI = ">=1.0" + +[package.extras] +numpy = ["NumPy"] + [[package]] name = "soundfile" version = "0.12.1" @@ -994,15 +1008,18 @@ python-versions = ">=3.8" [[package]] name = "tokenizers" -version = "0.13.3" -description = "Fast and Customizable Tokenizers" +version = "0.14.1" +description = "" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.7" + +[package.dependencies] +huggingface_hub = ">=0.16.4,<0.18" [package.extras] -dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] -docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +dev = ["tokenizers[testing]"] +docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] [[package]] @@ -1015,7 +1032,7 @@ python-versions = ">=3.7" [[package]] name = "torch" -version = "2.0.1" +version = "2.1.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" category = "main" optional = false @@ -1023,6 +1040,7 @@ python-versions = ">=3.8.0" [package.dependencies] filelock = "*" +fsspec = "*" jinja2 = "*" networkx = "*" sympy = "*" @@ -1050,7 +1068,7 @@ telegram = ["requests"] [[package]] name = "transformers" -version = "4.33.2" +version = "4.34.0" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" category = "main" optional = false @@ -1058,28 +1076,28 @@ python-versions = ">=3.8.0" [package.dependencies] filelock = "*" -huggingface-hub = ">=0.15.1,<1.0" +huggingface-hub = ">=0.16.4,<1.0" numpy = ">=1.17" packaging = ">=20.0" pyyaml = ">=5.1" regex = "!=2019.12.17" requests = "*" safetensors = ">=0.3.1" -tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" +tokenizers = ">=0.14,<0.15" tqdm = ">=4.27" [package.extras] accelerate = ["accelerate (>=0.20.3)"] agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"] -all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] +all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] codecarbon = ["codecarbon (==1.2.0)"] deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"] deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] -dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] -dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] -docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] +dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.15)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"] docs-specific = ["hf-doc-builder"] fairscale = ["fairscale (>0.3)"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"] @@ -1106,11 +1124,11 @@ tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.15)", tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"] tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] timm = ["timm"] -tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +tokenizers = ["tokenizers (>=0.14,<0.15)"] torch = ["accelerate (>=0.20.3)", "torch (>=1.10,!=1.12.0)"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-vision = ["Pillow (<10.0.0)", "torchvision"] -torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"] +torchhub = ["filelock", "huggingface-hub (>=0.16.4,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"] video = ["av (==9.2.0)", "decord (==0.6.0)"] vision = ["Pillow (<10.0.0)"] @@ -1178,7 +1196,7 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "1a7eeb7c2dfab661be19a738a54e7e4845cd234302338c15b75b7cffbb44acf5" +content-hash = "3184bb6d2c863964bbdafedcab7616714537ee961561fb0f39bd405a1e350bc4" [metadata.files] accelerate = [ @@ -1630,30 +1648,30 @@ librosa = [ {file = "librosa-0.9.2.tar.gz", hash = "sha256:5b576b5efdce428e90bc988bdd5a953d12a727e5f931f30d74c53b63abbe3c89"}, ] llvmlite = [ - {file = "llvmlite-0.40.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:84ce9b1c7a59936382ffde7871978cddcda14098e5a76d961e204523e5c372fb"}, - {file = "llvmlite-0.40.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3673c53cb21c65d2ff3704962b5958e967c6fc0bd0cff772998face199e8d87b"}, - {file = "llvmlite-0.40.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bba2747cf5b4954e945c287fe310b3fcc484e2a9d1b0c273e99eb17d103bb0e6"}, - {file = "llvmlite-0.40.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbd5e82cc990e5a3e343a3bf855c26fdfe3bfae55225f00efd01c05bbda79918"}, - {file = "llvmlite-0.40.1-cp310-cp310-win32.whl", hash = "sha256:09f83ea7a54509c285f905d968184bba00fc31ebf12f2b6b1494d677bb7dde9b"}, - {file = "llvmlite-0.40.1-cp310-cp310-win_amd64.whl", hash = "sha256:7b37297f3cbd68d14a97223a30620589d98ad1890e5040c9e5fc181063f4ed49"}, - {file = "llvmlite-0.40.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a66a5bd580951751b4268f4c3bddcef92682814d6bc72f3cd3bb67f335dd7097"}, - {file = "llvmlite-0.40.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:467b43836b388eaedc5a106d76761e388dbc4674b2f2237bc477c6895b15a634"}, - {file = "llvmlite-0.40.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c23edd196bd797dc3a7860799054ea3488d2824ecabc03f9135110c2e39fcbc"}, - {file = "llvmlite-0.40.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a36d9f244b6680cb90bbca66b146dabb2972f4180c64415c96f7c8a2d8b60a36"}, - {file = "llvmlite-0.40.1-cp311-cp311-win_amd64.whl", hash = "sha256:5b3076dc4e9c107d16dc15ecb7f2faf94f7736cd2d5e9f4dc06287fd672452c1"}, - {file = "llvmlite-0.40.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4a7525db121f2e699809b539b5308228854ccab6693ecb01b52c44a2f5647e20"}, - {file = "llvmlite-0.40.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:84747289775d0874e506f907a4513db889471607db19b04de97d144047fec885"}, - {file = "llvmlite-0.40.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e35766e42acef0fe7d1c43169a8ffc327a47808fae6a067b049fe0e9bbf84dd5"}, - {file = "llvmlite-0.40.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cda71de10a1f48416309e408ea83dab5bf36058f83e13b86a2961defed265568"}, - {file = "llvmlite-0.40.1-cp38-cp38-win32.whl", hash = "sha256:96707ebad8b051bbb4fc40c65ef93b7eeee16643bd4d579a14d11578e4b7a647"}, - {file = "llvmlite-0.40.1-cp38-cp38-win_amd64.whl", hash = "sha256:e44f854dc11559795bcdeaf12303759e56213d42dabbf91a5897aa2d8b033810"}, - {file = "llvmlite-0.40.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f643d15aacd0b0b0dc8b74b693822ba3f9a53fa63bc6a178c2dba7cc88f42144"}, - {file = "llvmlite-0.40.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:39a0b4d0088c01a469a5860d2e2d7a9b4e6a93c0f07eb26e71a9a872a8cadf8d"}, - {file = "llvmlite-0.40.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9329b930d699699846623054121ed105fd0823ed2180906d3b3235d361645490"}, - {file = "llvmlite-0.40.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2dbbb8424037ca287983b115a29adf37d806baf7e1bf4a67bd2cffb74e085ed"}, - {file = "llvmlite-0.40.1-cp39-cp39-win32.whl", hash = "sha256:e74e7bec3235a1e1c9ad97d897a620c5007d0ed80c32c84c1d787e7daa17e4ec"}, - {file = "llvmlite-0.40.1-cp39-cp39-win_amd64.whl", hash = "sha256:ff8f31111bb99d135ff296757dc81ab36c2dee54ed4bd429158a96da9807c316"}, - {file = "llvmlite-0.40.1.tar.gz", hash = "sha256:5cdb0d45df602099d833d50bd9e81353a5e036242d3c003c5b294fc61d1986b4"}, + {file = "llvmlite-0.41.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acc81c1279f858e5eab460844cc381e30d6666bc8eea04724b54d4eeb1fd1e54"}, + {file = "llvmlite-0.41.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:013000a11df84a8b5e4f7fbf2513896ca48441c527d9ae8e375da92bc5575d08"}, + {file = "llvmlite-0.41.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b5df30581eb8dbdee0e17a1217debb1d7dcd61a092a09726afff441dad5a67"}, + {file = "llvmlite-0.41.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe265129ecd18957d3653cfb17df1632fa2c57fd0bac1960bc20a8c3ca961197"}, + {file = "llvmlite-0.41.0-cp310-cp310-win32.whl", hash = "sha256:6e477d23afbdddb3dde789d29a771e23bcfa1b12485156370dba9df05d529d94"}, + {file = "llvmlite-0.41.0-cp310-cp310-win_amd64.whl", hash = "sha256:93ce07a0a6d98ff2fcc34e7d2d315d8d09f6a737539e089f1a8cbe4a3a0313bf"}, + {file = "llvmlite-0.41.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dabfb1a28d26b8c01228f59aec90a61324203dda6b1465c596d577d6380545e8"}, + {file = "llvmlite-0.41.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:741bb2ab7712c4763483189f0684163fb3ac44087c617698c50654c7d7ab6a24"}, + {file = "llvmlite-0.41.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7b7022f1e2f652722ddd5697987f1aeaf0c9a64f2ee324e03f6e060b28a1bbd"}, + {file = "llvmlite-0.41.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70feadac822f8840f2db6cbb662f1b349fe5d375d8ceb9c907f3919e005dc705"}, + {file = "llvmlite-0.41.0-cp311-cp311-win_amd64.whl", hash = "sha256:21191c6a9fb4a86d71ec72debbaf39db49590a950c8a2a4ac792c41d16b0a61a"}, + {file = "llvmlite-0.41.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0d94e531c763340344198f2c31af6af7b665e9cd2b354e31afa5cf4abfce0a8e"}, + {file = "llvmlite-0.41.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d8997264291e822689f7d6df4716638f35ff586bef5b8be40e2ba77d6bd9405c"}, + {file = "llvmlite-0.41.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de31585b867e8d9bae0c15f03e8bf541afcff66ffa5f61e401a738274702bdcd"}, + {file = "llvmlite-0.41.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57c0a3fd031936461f9f24f4cace80a86c9ba09d8b02fa87c209607aae2463cb"}, + {file = "llvmlite-0.41.0-cp38-cp38-win32.whl", hash = "sha256:0c79cb7e88403d6c64385bf1e63797af0884caf1f4afa3c8c4bbef1920e28148"}, + {file = "llvmlite-0.41.0-cp38-cp38-win_amd64.whl", hash = "sha256:6c40e290d930b09bbebe0d05c750b8a9e20af147e8cec8d62aa42e874f46dbfa"}, + {file = "llvmlite-0.41.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:24b3f7e258ea7c07ebf9f70c772e25619de8d207192254beb7644b818a97440b"}, + {file = "llvmlite-0.41.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:876cd5f53cfe51d3a5cf7952dc1a25bd6158f5795739b1f8159c3591b32ed3cb"}, + {file = "llvmlite-0.41.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8218d307bd89535207fea1cc1ef5498afcb6d0203153dba214058715fecdb699"}, + {file = "llvmlite-0.41.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27d9d11c8dcdb8a8e14e92d0be5bba60f15bdf2fc116b8d27cab40221093a1b0"}, + {file = "llvmlite-0.41.0-cp39-cp39-win32.whl", hash = "sha256:a4af8722ad6cb0dd2d5454ebc5a7bf90867df5f3fcb0787396a3261052caefda"}, + {file = "llvmlite-0.41.0-cp39-cp39-win_amd64.whl", hash = "sha256:f150e127d6bc0e74633b8ba210776b0b6fdc82af6dfebf0794318ea97634acd0"}, + {file = "llvmlite-0.41.0.tar.gz", hash = "sha256:7d41db345d76d2dfa31871178ce0d8e9fd8aa015aa1b7d4dab84b5cb393901e0"}, ] loguru = [ {file = "loguru-0.6.0-py3-none-any.whl", hash = "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"}, @@ -1830,60 +1848,54 @@ nodeenv = [ {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, ] numba = [ - {file = "numba-0.57.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db8268eb5093cae2288942a8cbd69c9352f6fe6e0bfa0a9a27679436f92e4248"}, - {file = "numba-0.57.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:643cb09a9ba9e1bd8b060e910aeca455e9442361e80fce97690795ff9840e681"}, - {file = "numba-0.57.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:53e9fab973d9e82c9f8449f75994a898daaaf821d84f06fbb0b9de2293dd9306"}, - {file = "numba-0.57.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c0602e4f896e6a6d844517c3ab434bc978e7698a22a733cc8124465898c28fa8"}, - {file = "numba-0.57.1-cp310-cp310-win32.whl", hash = "sha256:3d6483c27520d16cf5d122868b79cad79e48056ecb721b52d70c126bed65431e"}, - {file = "numba-0.57.1-cp310-cp310-win_amd64.whl", hash = "sha256:a32ee263649aa3c3587b833d6311305379529570e6c20deb0c6f4fb5bc7020db"}, - {file = "numba-0.57.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c078f84b5529a7fdb8413bb33d5100f11ec7b44aa705857d9eb4e54a54ff505"}, - {file = "numba-0.57.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e447c4634d1cc99ab50d4faa68f680f1d88b06a2a05acf134aa6fcc0342adeca"}, - {file = "numba-0.57.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4838edef2df5f056cb8974670f3d66562e751040c448eb0b67c7e2fec1726649"}, - {file = "numba-0.57.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b17fbe4a69dcd9a7cd49916b6463cd9a82af5f84911feeb40793b8bce00dfa7"}, - {file = "numba-0.57.1-cp311-cp311-win_amd64.whl", hash = "sha256:93df62304ada9b351818ba19b1cfbddaf72cd89348e81474326ca0b23bf0bae1"}, - {file = "numba-0.57.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8e00ca63c5d0ad2beeb78d77f087b3a88c45ea9b97e7622ab2ec411a868420ee"}, - {file = "numba-0.57.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ff66d5b022af6c7d81ddbefa87768e78ed4f834ab2da6ca2fd0d60a9e69b94f5"}, - {file = "numba-0.57.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:60ec56386076e9eed106a87c96626d5686fbb16293b9834f0849cf78c9491779"}, - {file = "numba-0.57.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6c057ccedca95df23802b6ccad86bb318be624af45b5a38bb8412882be57a681"}, - {file = "numba-0.57.1-cp38-cp38-win32.whl", hash = "sha256:5a82bf37444039c732485c072fda21a361790ed990f88db57fd6941cd5e5d307"}, - {file = "numba-0.57.1-cp38-cp38-win_amd64.whl", hash = "sha256:9bcc36478773ce838f38afd9a4dfafc328d4ffb1915381353d657da7f6473282"}, - {file = "numba-0.57.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ae50c8c90c2ce8057f9618b589223e13faa8cbc037d8f15b4aad95a2c33a0582"}, - {file = "numba-0.57.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9a1b2b69448e510d672ff9a6b18d2db9355241d93c6a77677baa14bec67dc2a0"}, - {file = "numba-0.57.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3cf78d74ad9d289fbc1e5b1c9f2680fca7a788311eb620581893ab347ec37a7e"}, - {file = "numba-0.57.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f47dd214adc5dcd040fe9ad2adbd2192133c9075d2189ce1b3d5f9d72863ef05"}, - {file = "numba-0.57.1-cp39-cp39-win32.whl", hash = "sha256:a3eac19529956185677acb7f01864919761bfffbb9ae04bbbe5e84bbc06cfc2b"}, - {file = "numba-0.57.1-cp39-cp39-win_amd64.whl", hash = "sha256:9587ba1bf5f3035575e45562ada17737535c6d612df751e811d702693a72d95e"}, - {file = "numba-0.57.1.tar.gz", hash = "sha256:33c0500170d213e66d90558ad6aca57d3e03e97bb11da82e6d87ab793648cb17"}, + {file = "numba-0.58.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f146c11af62ad25021d93fccf48715a96d1ea76d43c1c3bc97dca561c6a2693"}, + {file = "numba-0.58.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8059ee491651885f89655f08856a107aa610e3355b373f3b7437f1da96f09703"}, + {file = "numba-0.58.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bd9edd27ab29e80bcf4083f9955c4a8871075a13a370b3bef99f81e184541fa"}, + {file = "numba-0.58.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ee9f5fd962e0ada0e68df67a6ff881f95b45e0ae7cb96141e913337040d490b"}, + {file = "numba-0.58.0-cp310-cp310-win_amd64.whl", hash = "sha256:398ab539257df8e980ec2f9cdfae836bb965fadc2dd30db3fcfbf3aefa542836"}, + {file = "numba-0.58.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e61a1fa0ab7d290f0a43d8523b372f96765db6ceb6a691660c17e9ed609cb470"}, + {file = "numba-0.58.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8a9b69cc6259131791822c5eb893b03cd9372f4aae669d020500565b6d5d80bc"}, + {file = "numba-0.58.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e7b42b23c36cf08fcfe1a8f2acf3a0af95b41f9ee07fc81b28d7b9b5ada85d8c"}, + {file = "numba-0.58.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0d7a5e81e4047a23986f816b48ac46616ceb4eadbff6bbe739944d36b3bdbfe7"}, + {file = "numba-0.58.0-cp311-cp311-win_amd64.whl", hash = "sha256:0ce322178ff7006b7f50dad25b042ef64c6393f2fafafa79c0498d789b1aac27"}, + {file = "numba-0.58.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3934eab4eb1c07c8f067e99350b99f70b2ca77d5aa3911d365643171f771157"}, + {file = "numba-0.58.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cee5f22f7fbb2ef445e422aeafe5d38bf71a52c8bb34d22c1e145afa4034d6b"}, + {file = "numba-0.58.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:121bf98a2b02e0611af3bfab3995fed990db58c4bfc6c225332ccdaf37e312e7"}, + {file = "numba-0.58.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0734614d3e92eb01f848b8595be116f9c8ad997f8cf77672f3ba53c511f1429d"}, + {file = "numba-0.58.0-cp38-cp38-win_amd64.whl", hash = "sha256:48bcaae337ee450e38bf3796b4e1a166909c339f1757b6110e6adcf42c1e6c3e"}, + {file = "numba-0.58.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a5f99806d5c9671dc927a8a489bc0c88e79be51e9775d6a3c68dbfdf585cd7e9"}, + {file = "numba-0.58.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9dade55ee5f1b8c5e3e0db95449fdc5b7b4244c1a7fa133bd664cbfc1027bafe"}, + {file = "numba-0.58.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7e182f3296dfcbafcd23b9263baeb350ad5adcacd081f1b3ec927a9fb325cca8"}, + {file = "numba-0.58.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f80aee7889e82ab9c4770e02b21ca4e3ca15cc8c829c173fc27b77ab0529b5cb"}, + {file = "numba-0.58.0-cp39-cp39-win_amd64.whl", hash = "sha256:477f429bb593dd3fc8d84b44f199e8e30268a7cfeb96c8464cb393d401de4f45"}, + {file = "numba-0.58.0.tar.gz", hash = "sha256:e5d5a318dc65a101ef846d7fd93f3cf2f7942494019e8342e51238b360739125"}, ] numpy = [ - {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, - {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, - {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, - {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, - {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, - {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, - {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, - {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, - {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, - {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, - {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, - {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, - {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, - {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, - {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, - {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, - {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, - {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, - {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, - {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, - {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, - {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, - {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, - {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, - {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, - {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] packaging = [ {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, @@ -1919,50 +1931,62 @@ pdoc3 = [ {file = "pdoc3-0.10.0.tar.gz", hash = "sha256:5f22e7bcb969006738e1aa4219c75a32f34c2d62d46dc9d2fb2d3e0b0287e4b7"}, ] pedalboard = [ - {file = "pedalboard-0.6.9-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:1e1bf950edac2a7279b0154635cf63551eaf8e32bfecaae293f28c0de03f5189"}, - {file = "pedalboard-0.6.9-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:bca130da91e33ba674fa6e7e3bc93660d5e4a72e173bbd3210c67564a776b73c"}, - {file = "pedalboard-0.6.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c724e13fb74e7997333610876502479ee43915cd6c663095d4855941620af72a"}, - {file = "pedalboard-0.6.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b616349ce8258a6e88b6bc6ce53b9a90b0c3770aaa64be1d8db0f63b95437335"}, - {file = "pedalboard-0.6.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d14e6812847368cea4374ea12bacbbf1400580dbe92b6fcf2c887574634eab13"}, - {file = "pedalboard-0.6.9-cp310-cp310-win_amd64.whl", hash = "sha256:b84864660134d7f6c03278405a8699d90e82461bdba9a140fe887190685ab2b9"}, - {file = "pedalboard-0.6.9-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:9185f2f1e8d5952d413bdc9af0e41d71e978d7f989cf3c51a4552ba1dc6bf975"}, - {file = "pedalboard-0.6.9-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:40b8f05ff52a8f1afd7e0f8ed5d2add9414958055b122e67493101119ff5d772"}, - {file = "pedalboard-0.6.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e365fd1057008a01d4473d568f543f4e4f7d1d82d498351b240575465966bb2d"}, - {file = "pedalboard-0.6.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4f14bf7e3a33ae360aa895c1a87d0ffc8df17ec93f4660bae15aa4e097fa272"}, - {file = "pedalboard-0.6.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58559445d9bc598fad76241bb24867a7d50db702b37d48da8dd0a745e3d1d31e"}, - {file = "pedalboard-0.6.9-cp311-cp311-win_amd64.whl", hash = "sha256:f2270c56e2223d3df35dce474aff5d28cd2085f805a2c0cd49bc3abb93728da5"}, - {file = "pedalboard-0.6.9-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:2b90723c75ac1a566600895ba05e46d673f4c27dfd541d60295f82fc533d228b"}, - {file = "pedalboard-0.6.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:228fc8a976160d731371eafcecd7ddf02daa716b6b6399972d007d7da93ff7f2"}, - {file = "pedalboard-0.6.9-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c65dea5d3a7dfa46041d0fdfd77b4abcc89541438888adf3163036efa3e72df6"}, - {file = "pedalboard-0.6.9-cp36-cp36m-win_amd64.whl", hash = "sha256:865be52a0986890bd40598a2f7cf865e82cc19c0e1416789d6b750ad07b66dde"}, - {file = "pedalboard-0.6.9-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:cbf73ae589e04b4d90ab1ea004a3fe1e3d6e9a37b29502e08f1e31ca5fdf9ffd"}, - {file = "pedalboard-0.6.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f3ddc859251bb05aa28e6bf9d6e6203034fc0a3608f5c6a1d0938f059beec40"}, - {file = "pedalboard-0.6.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9cb165bc714fad3c284d79ce8e84ae2b0cb55c206e5b6b38cbee2c425ec1d444"}, - {file = "pedalboard-0.6.9-cp37-cp37m-win_amd64.whl", hash = "sha256:6f43987156010dfbcdcab1ddbb340cec50780f77386d045d962587fae40bc82b"}, - {file = "pedalboard-0.6.9-cp38-cp38-macosx_10_13_universal2.whl", hash = "sha256:9519596708f5513b0e249567dd94fd23f076d759ae5e95efb6bec03d465424c2"}, - {file = "pedalboard-0.6.9-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:f2e4a0605bfa7470a961f4596a9c0d948af0eab651cd4cad81f7a4471a060d23"}, - {file = "pedalboard-0.6.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:29434e9323bc7905689e6bad460cf73366f0b465c550b90ee842c661d7c2dba8"}, - {file = "pedalboard-0.6.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f450958ad01031e0118336c9c77f091ef63f2b3e7b500b1eed734ad9c17bee4"}, - {file = "pedalboard-0.6.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d890d531690e5f1c57d7cd26ce961b0cacd4484c3c623c954fa59590346adaa4"}, - {file = "pedalboard-0.6.9-cp38-cp38-win_amd64.whl", hash = "sha256:abf27ddcdf889ed4621a2ea1a509c977c5dcb88039f34ac2c73219f3a334cf02"}, - {file = "pedalboard-0.6.9-cp39-cp39-macosx_10_13_universal2.whl", hash = "sha256:a861d6acd7bee1bdb69538cf3360889852018645c6c929c8c2dac83e87a6e89b"}, - {file = "pedalboard-0.6.9-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:d2af8e61937f3e737b6be62fb0881c8cb6a4c9759029256cc4a1cca90257caed"}, - {file = "pedalboard-0.6.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1ad0d6517fff4f8357b1388dd5a65a96fe76f1906b373d42e56f55ba5d53d27"}, - {file = "pedalboard-0.6.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e41c84d3abbd31fc8bb0ad654be96b67abb40c19329b4bf3c74db7cf8649cba"}, - {file = "pedalboard-0.6.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bff9c7f4d87602ffe5c830b70264315e8fa91369fb13e148fa13f5591562f6b8"}, - {file = "pedalboard-0.6.9-cp39-cp39-win_amd64.whl", hash = "sha256:2352699d3096bf3993c829e11c409a775397cf89f025d1e945619beea1dd26be"}, - {file = "pedalboard-0.6.9-pp37-pypy37_pp73-macosx_10_13_x86_64.whl", hash = "sha256:de0804202009f2c42a359adbf48fcc1f2df00ca888333a0a7e3e36b11c1b177f"}, - {file = "pedalboard-0.6.9-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c15198832d1552edbd8b35426be0ea8747ff6d606219d813a71e935a24dd02d6"}, - {file = "pedalboard-0.6.9-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13060b4052b7aa1b5bc9d8173dabdfdca3f4582a60bc71902328299a7909d6a3"}, - {file = "pedalboard-0.6.9-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:885438727326602b7f02347c6a6dd72d7deb523e1b4406da0cd3fb519101eb33"}, - {file = "pedalboard-0.6.9-pp38-pypy38_pp73-macosx_10_13_x86_64.whl", hash = "sha256:3c5820d37badad420fddb1e730771e177dcd3ba4beafae659f51b35e86df3794"}, - {file = "pedalboard-0.6.9-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:032379e02a46005dfc98b09ad3822013563243ec025592f6836ce0354f485817"}, - {file = "pedalboard-0.6.9-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c9be3cf70c175789983eed282f3fb59e32adc29c20d13d2077bab1963d874d2"}, - {file = "pedalboard-0.6.9-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baa874994585d7c715d461a885abe24909d8826e8094e8bba656f565d8969ab8"}, - {file = "pedalboard-0.6.9-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:319a3677f1e12e954758d163403e84c4bc50a0699e4cb005bf0f976a73253b18"}, - {file = "pedalboard-0.6.9-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ef9f5e35aa2c07ba018370abc8269f61a969383d8b44746fe1fbf5dbb1bca6d"}, - {file = "pedalboard-0.6.9-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3605299afd9bacb24f679d81d5f74f512d0a542e07477d34f9838930b43b8bcb"}, - {file = "pedalboard-0.6.9-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7246943ed1613d3719a5d2f7029f730225295fbde0dd75c536be654b405e8cb6"}, + {file = "pedalboard-0.8.3-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:3aae30436c55e100f1487faaf83b99c0860b00592ca2e7e9311f4778fa8d4f32"}, + {file = "pedalboard-0.8.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:988a91eca3ff85d2f2e09fb79ebfd39a76c1ca0193b7b711da219f0667613346"}, + {file = "pedalboard-0.8.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:20a1c66f1c5f3bbfbb19c1a365851a4ac349e63d28e24d516c34f4c2080df12e"}, + {file = "pedalboard-0.8.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb8ce61dd96c98379c2f550e77e736ff5196a278a704e44a717a4b7e0ae39f5b"}, + {file = "pedalboard-0.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4bdbd6476f5be5a0aa7fb06ae37334099430e0eccad762316634f2743e7f03d"}, + {file = "pedalboard-0.8.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9ccb05deba14881477c90e215f261c8c0460a528cd4ad5d8464fdd4688fb8e85"}, + {file = "pedalboard-0.8.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:457517d1b7b3756fbc07d65010d54ce41153a7ceafa39b58091d7cf9b55c8a4c"}, + {file = "pedalboard-0.8.3-cp310-cp310-win_amd64.whl", hash = "sha256:7455104fe3b24f769ad96355010747c0536c4d1a8e58a175512b6e3988cb7197"}, + {file = "pedalboard-0.8.3-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:bd257fe602ed771b7c6d7342e715192d5103f11c1118e676badda8f4b1db0e2b"}, + {file = "pedalboard-0.8.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:c8a094ae6298947acb008600f6125bceafb919f8e88ccd291bb48a800193458e"}, + {file = "pedalboard-0.8.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b2af29dfac931df8e918b893dd25a76c1c2fe0148ce2bd69776a6b8b9851f51f"}, + {file = "pedalboard-0.8.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d5913af790d5fa6c82a2151a4ab715f0b024f1a95eb62a9d3481920b49ad1a3"}, + {file = "pedalboard-0.8.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8584f34b3735adee75b3c1c2dc47486b18f262cf104b15d63e1a8a49436f504"}, + {file = "pedalboard-0.8.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8824dcc12cbb3d3ee4d140efff22f172603162927064293869ef97156c067cf7"}, + {file = "pedalboard-0.8.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:068af69efb0128f39a3bac173a370e0ba268004fe73e68b0f4fe08e8ef80a142"}, + {file = "pedalboard-0.8.3-cp311-cp311-win_amd64.whl", hash = "sha256:0a49e8cc00b8b29dc4dc6e0810a6cf3d385b9243a89bf9c7a7b4385ae8a70b1a"}, + {file = "pedalboard-0.8.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:13a92fc17d211917e5e9dd7a7e8ed3fa804d99d4786a6871d8c07b2cc6a9dac0"}, + {file = "pedalboard-0.8.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a28f81f06375fce6db2bd7a3aabf759141b2e1ba78ebc1edc936cd0471369fea"}, + {file = "pedalboard-0.8.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86df057b47662a81c13281709d5dcd70fdd9f7cf7f9f9382d832ec2db537219a"}, + {file = "pedalboard-0.8.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c8d6d8d8aac4f3dc86228d6508aefd5f64d34661fedee43ded332c31801d04a"}, + {file = "pedalboard-0.8.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5c67e3704e0b7956e3db0f9085a407b888a6c0aa31c5c09d8cf52a0c858821e"}, + {file = "pedalboard-0.8.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:40060e639aec947c117f55b95a028d8c5347bf285a473cb3a79de886033f3beb"}, + {file = "pedalboard-0.8.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cd3f9de1cd4ec50643acb89f9db63ee569fee859e2975c335ae68b3e07b3e162"}, + {file = "pedalboard-0.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:a53a721ca66d3f785abb963b609545667378bab6da36f9f571fddae9afd47546"}, + {file = "pedalboard-0.8.3-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:17f4622dd151b78de6ba4b4517a2b8372ce32ca3a66f807230a6336bef056420"}, + {file = "pedalboard-0.8.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c3bcf34555a8b71ff2e0cb08df047bfeae8e2b846c4a1307a4ff6a108e76b8f"}, + {file = "pedalboard-0.8.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b5104c5c93d4932b78e8bb5f7c3b5f857411a2e918b64bd5a3d4c706bd59c38"}, + {file = "pedalboard-0.8.3-cp36-cp36m-win_amd64.whl", hash = "sha256:890c52166abaf160cefb486366488ed46cdeb3921c4aef42e8c5719c4d3d2cc3"}, + {file = "pedalboard-0.8.3-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:80bbb68218746fd373787135cc256f8b9b7272b20e1f8d111538923756e01d7e"}, + {file = "pedalboard-0.8.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f302182ac91c8008467f4823bc5f45fb469b9e7796fbf03c6e2c7b1f826b407"}, + {file = "pedalboard-0.8.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1fe73b85f45c63c96d3f638def928a9354a33ce725efd205112efd1158bbe87"}, + {file = "pedalboard-0.8.3-cp37-cp37m-win_amd64.whl", hash = "sha256:8f45accc179b70d39a43c58b5d29b8fc51e18050a20f3ce0ae3b8d820ed575e0"}, + {file = "pedalboard-0.8.3-cp38-cp38-macosx_10_13_universal2.whl", hash = "sha256:43e0f04fd3ca2888134255b58dfefe11597dd376a2e70f4206afc916a58182e2"}, + {file = "pedalboard-0.8.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:e02d123cbcdf31a61b63adb7e8e94cf2bfb5b6704413d6795afe25b653acca0e"}, + {file = "pedalboard-0.8.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9f3bcc724f45b034d6af98b87af2fdc545a0de78471af88fbccf09d24b803e27"}, + {file = "pedalboard-0.8.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d1303e258a893e66fcf0af0efe30a72402d2164405bb25c1ef52135581566db"}, + {file = "pedalboard-0.8.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97d179f819c0fc02b016918d958090dd515d00149183e4501ca13c680c7b8054"}, + {file = "pedalboard-0.8.3-cp38-cp38-win_amd64.whl", hash = "sha256:0065bba007aaf31980614a7f0b9951959822c25b0f98aae52882d225e019e375"}, + {file = "pedalboard-0.8.3-cp39-cp39-macosx_10_13_universal2.whl", hash = "sha256:23e0c0516d8fd7103895d81c64943d5a0bd64652a86ee0716517b620eebd766a"}, + {file = "pedalboard-0.8.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:7e75380fc91efe6940d0cb28d8ce07f2c9661ad8bea1664128cf18ff1036d4c8"}, + {file = "pedalboard-0.8.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63381808a8b775211b447599ca7871f5a2dc986ad4b2eb725db6bd2b1d0518ff"}, + {file = "pedalboard-0.8.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eefb9a865289da5f3e8bbb2fd457de20e038c058a85bd99c2bb0c9949faac7ec"}, + {file = "pedalboard-0.8.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0616bffd9a5d0ff89c843dcc5182b0ef957c4c3b177890f1004229174c16c"}, + {file = "pedalboard-0.8.3-cp39-cp39-win_amd64.whl", hash = "sha256:dc8ae01474977184fffc037837aee34f83423b994802a58786fb62cc34b73db3"}, + {file = "pedalboard-0.8.3-pp37-pypy37_pp73-macosx_10_13_x86_64.whl", hash = "sha256:694b9836498aa3a573d813feb23367ff2d135f7517b051760b906b66fb4c0c22"}, + {file = "pedalboard-0.8.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8931fc5b62da9b52c16b4a76dfa91345fec4da3a280538d6e6c849868b73a6af"}, + {file = "pedalboard-0.8.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0aef54d4078cb03a8f8df9c5b8730f10db52499523358a19cfe2fc7b74351"}, + {file = "pedalboard-0.8.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:d19ebda38f2f8ab54db8fc0e61a1d0ceaab53f55f8888242578aec6b7048b234"}, + {file = "pedalboard-0.8.3-pp38-pypy38_pp73-macosx_10_13_x86_64.whl", hash = "sha256:f848be0db2691927f69dca5f3bf1bd216377fc8673437d224d70d184ff2e40b5"}, + {file = "pedalboard-0.8.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43a7d1033d1f2892d49a70eea113d05d139b3b6f862d23623df534350e65333c"}, + {file = "pedalboard-0.8.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14d9183d4a8ab8f778f9bb00cc0b34d408c64ea14785062cfe7b40946b461a6b"}, + {file = "pedalboard-0.8.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f8f0a2d85d5f5af08ce0ce72235b688a4772fd26c445753b6404278da27b4f68"}, + {file = "pedalboard-0.8.3-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:7641a8991069abf455c13c959841762e94cdbd513462e3d227f3a324ebea4905"}, + {file = "pedalboard-0.8.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8592d9e126151c1e4ebdccb4b78c8106a0978503faca445d1082eed071c96b8e"}, + {file = "pedalboard-0.8.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc0bae55fd0ca7715ab54fdbf2d639bba817eec94799d44ef7ca0d59bd92bee0"}, + {file = "pedalboard-0.8.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4c59190e0340966aa27a8389c839df173299292fc40b4dcd961b9553e807f704"}, ] platformdirs = [ {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, @@ -2290,64 +2314,104 @@ responses = [ {file = "responses-0.18.0.tar.gz", hash = "sha256:380cad4c1c1dc942e5e8a8eaae0b4d4edf708f4f010db8b7bcfafad1fcd254ff"}, ] safetensors = [ - {file = "safetensors-0.3.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:92e4d0c8b2836120fddd134474c5bda8963f322333941f8b9f643e5b24f041eb"}, - {file = "safetensors-0.3.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3dcadb6153c42addc9c625a622ebde9293fabe1973f9ef31ba10fb42c16e8536"}, - {file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:08f26b61e1b0a14dc959aa9d568776bd038805f611caef1de04a80c468d4a7a4"}, - {file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:17f41344d9a075f2f21b289a49a62e98baff54b5754240ba896063bce31626bf"}, - {file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f1045f798e1a16a6ced98d6a42ec72936d367a2eec81dc5fade6ed54638cd7d2"}, - {file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:eaf0e4bc91da13f21ac846a39429eb3f3b7ed06295a32321fa3eb1a59b5c70f3"}, - {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25149180d4dc8ca48bac2ac3852a9424b466e36336a39659b35b21b2116f96fc"}, - {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9e943bf78c39de8865398a71818315e7d5d1af93c7b30d4da3fc852e62ad9bc"}, - {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cccfcac04a010354e87c7a2fe16a1ff004fc4f6e7ef8efc966ed30122ce00bc7"}, - {file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07121f427e646a50d18c1be0fa1a2cbf6398624c31149cd7e6b35486d72189e"}, - {file = "safetensors-0.3.3-cp310-cp310-win32.whl", hash = "sha256:a85e29cbfddfea86453cc0f4889b4bcc6b9c155be9a60e27be479a34e199e7ef"}, - {file = "safetensors-0.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:e13adad4a3e591378f71068d14e92343e626cf698ff805f61cdb946e684a218e"}, - {file = "safetensors-0.3.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:cbc3312f134baf07334dd517341a4b470b2931f090bd9284888acb7dfaf4606f"}, - {file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d15030af39d5d30c22bcbc6d180c65405b7ea4c05b7bab14a570eac7d7d43722"}, - {file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:f84a74cbe9859b28e3d6d7715ac1dd3097bebf8d772694098f6d42435245860c"}, - {file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:10d637423d98ab2e6a4ad96abf4534eb26fcaf8ca3115623e64c00759374e90d"}, - {file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:3b46f5de8b44084aff2e480874c550c399c730c84b2e8ad1bddb062c94aa14e9"}, - {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76da691a82dfaf752854fa6d17c8eba0c8466370c5ad8cf1bfdf832d3c7ee17"}, - {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4e342fd54e66aa9512dd13e410f791e47aa4feeb5f4c9a20882c72f3d272f29"}, - {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:178fd30b5dc73bce14a39187d948cedd0e5698e2f055b7ea16b5a96c9b17438e"}, - {file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e8fdf7407dba44587ed5e79d5de3533d242648e1f2041760b21474bd5ea5c8c"}, - {file = "safetensors-0.3.3-cp311-cp311-win32.whl", hash = "sha256:7d3b744cee8d7a46ffa68db1a2ff1a1a432488e3f7a5a97856fe69e22139d50c"}, - {file = "safetensors-0.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f579877d30feec9b6ba409d05fa174633a4fc095675a4a82971d831a8bb60b97"}, - {file = "safetensors-0.3.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:2fff5b19a1b462c17322998b2f4b8bce43c16fe208968174d2f3a1446284ceed"}, - {file = "safetensors-0.3.3-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:41adb1d39e8aad04b16879e3e0cbcb849315999fad73bc992091a01e379cb058"}, - {file = "safetensors-0.3.3-cp37-cp37m-macosx_12_0_x86_64.whl", hash = "sha256:0f2b404250b3b877b11d34afcc30d80e7035714a1116a3df56acaca6b6c00096"}, - {file = "safetensors-0.3.3-cp37-cp37m-macosx_13_0_x86_64.whl", hash = "sha256:b43956ef20e9f4f2e648818a9e7b3499edd6b753a0f5526d4f6a6826fbee8446"}, - {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d61a99b34169981f088ccfbb2c91170843efc869a0a0532f422db7211bf4f474"}, - {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0008aab36cd20e9a051a68563c6f80d40f238c2611811d7faa5a18bf3fd3984"}, - {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93d54166072b143084fdcd214a080a088050c1bb1651016b55942701b31334e4"}, - {file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c32ee08f61cea56a5d62bbf94af95df6040c8ab574afffaeb7b44ae5da1e9e3"}, - {file = "safetensors-0.3.3-cp37-cp37m-win32.whl", hash = "sha256:351600f367badd59f7bfe86d317bb768dd8c59c1561c6fac43cafbd9c1af7827"}, - {file = "safetensors-0.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:034717e297849dae1af0a7027a14b8647bd2e272c24106dced64d83e10d468d1"}, - {file = "safetensors-0.3.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8530399666748634bc0b301a6a5523756931b0c2680d188e743d16304afe917a"}, - {file = "safetensors-0.3.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:9d741c1f1621e489ba10aa3d135b54202684f6e205df52e219d5eecd673a80c9"}, - {file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:0c345fd85b4d2093a5109596ff4cd9dfc2e84992e881b4857fbc4a93a3b89ddb"}, - {file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:69ccee8d05f55cdf76f7e6c87d2bdfb648c16778ef8acfd2ecc495e273e9233e"}, - {file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:c08a9a4b7a4ca389232fa8d097aebc20bbd4f61e477abc7065b5c18b8202dede"}, - {file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:a002868d2e3f49bbe81bee2655a411c24fa1f8e68b703dec6629cb989d6ae42e"}, - {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bd2704cb41faa44d3ec23e8b97330346da0395aec87f8eaf9c9e2c086cdbf13"}, - {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b2951bf3f0ad63df5e6a95263652bd6c194a6eb36fd4f2d29421cd63424c883"}, - {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07114cec116253ca2e7230fdea30acf76828f21614afd596d7b5438a2f719bd8"}, - {file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab43aeeb9eadbb6b460df3568a662e6f1911ecc39387f8752afcb6a7d96c087"}, - {file = "safetensors-0.3.3-cp38-cp38-win32.whl", hash = "sha256:f2f59fce31dd3429daca7269a6b06f65e6547a0c248f5116976c3f1e9b73f251"}, - {file = "safetensors-0.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:c31ca0d8610f57799925bf08616856b39518ab772c65093ef1516762e796fde4"}, - {file = "safetensors-0.3.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:59a596b3225c96d59af412385981f17dd95314e3fffdf359c7e3f5bb97730a19"}, - {file = "safetensors-0.3.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:82a16e92210a6221edd75ab17acdd468dd958ef5023d9c6c1289606cc30d1479"}, - {file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:98a929e763a581f516373ef31983ed1257d2d0da912a8e05d5cd12e9e441c93a"}, - {file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:12b83f1986cd16ea0454c636c37b11e819d60dd952c26978310a0835133480b7"}, - {file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:f439175c827c2f1bbd54df42789c5204a10983a30bc4242bc7deaf854a24f3f0"}, - {file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:0085be33b8cbcb13079b3a8e131656e05b0bc5e6970530d4c24150f7afd76d70"}, - {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3ec70c87b1e910769034206ad5efc051069b105aac1687f6edcd02526767f4"}, - {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f490132383e5e490e710608f4acffcb98ed37f91b885c7217d3f9f10aaff9048"}, - {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79d1b6c7ed5596baf79c80fbce5198c3cdcc521ae6a157699f427aba1a90082d"}, - {file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad3cc8006e7a86ee7c88bd2813ec59cd7cc75b03e6fa4af89b9c7b235b438d68"}, - {file = "safetensors-0.3.3-cp39-cp39-win32.whl", hash = "sha256:ab29f54c6b8c301ca05fa014728996bd83aac6e21528f893aaf8945c71f42b6d"}, - {file = "safetensors-0.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:0fa82004eae1a71e2aa29843ef99de9350e459a0fc2f65fc6ee0da9690933d2d"}, - {file = "safetensors-0.3.3.tar.gz", hash = "sha256:edb7072d788c4f929d0f5735d3a2fb51e5a27f833587828583b7f5747af1a2b8"}, + {file = "safetensors-0.4.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:2289ae6dbe6d027ecee016b28ced13a2e21a0b3a3a757a23033a2d1c0b1bad55"}, + {file = "safetensors-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bf6458959f310f551cbbeef2255527ade5f783f952738e73e4d0136198cc3bfe"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6b60a58a8f7cc7aed3b5b73dce1f5259a53c83d9ba43a76a874e6ad868c1b4d"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:491b3477e4d0d4599bb75d79da4b75af2e6ed9b1f6ec2b715991f0bc927bf09a"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59d2e10b7e0cd18bb73ed7c17c624a5957b003b81345e18159591771c26ee428"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f667a4c12fb593f5f66ce966cb1b14a7148898b2b1a7f79e0761040ae1e3c51"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f9909512bcb6f712bdd04c296cdfb0d8ff73d258ffc5af884bb62ea02d221e0"}, + {file = "safetensors-0.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33d29e846821f0e4f92614022949b09ccf063cb36fe2f9fe099cde1efbfbb87"}, + {file = "safetensors-0.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4d512525a8e05a045ce6698066ba0c5378c174a83e0b3720a8c7799dc1bb06f3"}, + {file = "safetensors-0.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0219cea445177f6ad1f9acd3a8d025440c8ff436d70a4a7c7ba9c36066aa9474"}, + {file = "safetensors-0.4.0-cp310-none-win32.whl", hash = "sha256:67ab171eeaad6972d3971c53d29d53353c67f6743284c6d637b59fa3e54c8a94"}, + {file = "safetensors-0.4.0-cp310-none-win_amd64.whl", hash = "sha256:7ffc736039f08a9ca1f09816a7481b8e4469c06e8f8a5ffa8cb67ddd79e6d77f"}, + {file = "safetensors-0.4.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:4fe9e3737b30de458225a23926219ca30b902ee779b6a3df96eaab2b6d625ec2"}, + {file = "safetensors-0.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7916e814a90008de767b1c164a1d83803693c661ffe9af5a697b22e2752edb0"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cbc4a4da01143472323c145f3c289e5f6fabde0ac0a3414dabf912a21692fff4"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a54c21654a47669b38e359e8f852af754b786c9da884bb61ad5e9af12bd71ccb"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:25cd407955bad5340ba17f9f8ac789a0d751601a311e2f7b2733f9384478c95e"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82e8fc4e3503cd738fd40718a430fe0e5ce6e7ff91a73d6ce628bbb89c41e8ce"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48b92059b1a4ad163024d4f526e0e73ebe2bb3ae70537e15e347820b4de5dc27"}, + {file = "safetensors-0.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5daa05058f7dce85b5f9f60c4eab483ed7859d63978f08a76e52e78859ff20ca"}, + {file = "safetensors-0.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a86565a5c112dd855909e20144947b4f53abb78c4de207f36ca71ee63ba5b90d"}, + {file = "safetensors-0.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38032078ed9fea52d06584e441bccc73fb475c4581600c6d6166de2fe2deb3d1"}, + {file = "safetensors-0.4.0-cp311-none-win32.whl", hash = "sha256:2f99d90c91b7c76b40a862acd9085bc77f7974a27dee7cfcebe46149af5a99a1"}, + {file = "safetensors-0.4.0-cp311-none-win_amd64.whl", hash = "sha256:74e2a448ffe19be188b457b130168190ee73b5a75e45ba96796320c1f5ae35d2"}, + {file = "safetensors-0.4.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:1e2f9c69b41d03b4826ffb96b29e07444bb6b34a78a7bafd0b88d59e8ec75b8a"}, + {file = "safetensors-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3910fb5bf747413b59f1a34e6d2a993b589fa7d919709518823c70efaaa350bd"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8fdca709b2470a35a59b1e6dffea75cbe1214b22612b5dd4c93947697aea8b"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f27b8ef814c5fb43456caeb7f3cbb889b76115180aad1f42402839c14a47c5b"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b2d6101eccc43c7be0cb052f13ceda64288b3d8b344b988ed08d7133cbce2f3"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdc34027b545a69be3d4220c140b276129523e4e46db06ad1a0b60d6a4cf9214"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db7bb48ca9e90bb9526c71b388d38d8de160c0354f4c5126df23e8701a870dcb"}, + {file = "safetensors-0.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a78ffc0795d3595cd9e4d453502e35f764276c49e434b25556a15a337db4dafc"}, + {file = "safetensors-0.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8e735b0f79090f6855b55e205e820b7b595502ffca0009a5c13eef3661ce465b"}, + {file = "safetensors-0.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f8d2416734e850d5392afffbcb2b8985ea29fb171f1cb197e2ae51b8e35d6438"}, + {file = "safetensors-0.4.0-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:e853e189ba7d47eaf561094586692ba2bbdd258c096f1755805cac098de0e6ab"}, + {file = "safetensors-0.4.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:4b2aa57b5a4d576f3d1dd6e56980026340f156f8a13c13016bfac4e25295b53f"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b6c1316ffde6cb4bf22c7445bc9fd224b4d1b9dd7320695f5611c89e802e4b6"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:003077ec85261d00061058fa12e3c1d2055366b02ce8f2938929359ffbaff2b8"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd63d83a92f1437a8b0431779320376030ae43ace980bea5686d515de0784100"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2077801800b4b13301d8d6290c7fb5bd60737320001717153ebc4371776643b5"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7abe0e157a49a75aeeccfbc4f3dac38d8f98512d3cdb35c200f8e628dc5773cf"}, + {file = "safetensors-0.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3bfed574f6b1e7e7fe1f17213278875ef6c6e8b1582ab6eda93947db1178cae6"}, + {file = "safetensors-0.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:964ef166a286ce3b023d0d0bd0e21d440a1c8028981c8abdb136bc7872ba9b3d"}, + {file = "safetensors-0.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:44f84373e42183bd56a13a1f2d8acb1db7fedaeffbd83e79cec861477eee1af4"}, + {file = "safetensors-0.4.0-cp37-none-win32.whl", hash = "sha256:c68132727dd86fb641102e494d445f705efe402f4d5e24b278183a15499ab400"}, + {file = "safetensors-0.4.0-cp37-none-win_amd64.whl", hash = "sha256:1db87155454c168aef118d5657a403aee48a4cb08d8851a981157f07351ea317"}, + {file = "safetensors-0.4.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:9e583fa68e5a07cc859c4e13c1ebff12029904aa2e27185cf04a1f57fe9a81c4"}, + {file = "safetensors-0.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73e7696dcf3f72f99545eb1abe6106ad65ff1f62381d6ce4b34be3272552897a"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4936096a57c62e84e200f92620a536be067fc5effe46ecc7f230ebb496ecd579"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87b328ee1591adac332543e1f5fc2c2d7f149b745ebb0d58d7850818ff9cee27"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b69554c143336256260eceff1d3c0969172a641b54d4668489a711b05f92a2c0"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ebf6bcece5d5d1bd6416472f94604d2c834ca752ac60ed42dba7157e595a990"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6686ce01b8602d55a7d9903c90d4a6e6f90aeb6ddced7cf4605892d0ba94bcb8"}, + {file = "safetensors-0.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9b8fd6cc2f3bda444a048b541c843c7b7fefc89c4120d7898ea7d5b026e93891"}, + {file = "safetensors-0.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6abfe67692f81b8bdb99c837f28351c17e624ebf136970c850ee989c720446"}, + {file = "safetensors-0.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:27a24ca8822c469ee452db4c13418ba983315a0d863c018a9af15f2305eac38c"}, + {file = "safetensors-0.4.0-cp38-none-win32.whl", hash = "sha256:c4a0a47c8640167792d8261ee21b26430bbc39130a7edaad7f4c0bc05669d00e"}, + {file = "safetensors-0.4.0-cp38-none-win_amd64.whl", hash = "sha256:a738970a367f39249e2abb900d9441a8a86d7ff50083e5eaa6e7760a9f216014"}, + {file = "safetensors-0.4.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:806379f37e1abd5d302288c4b2f4186dd7ea7143d4c7811f90a8077f0ae8967b"}, + {file = "safetensors-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b9b94133ed2ae9dda0e95dcace7b7556eba023ffa4c4ae6df8f99377f571d6a"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b563a14c43614815a6b524d2e4edeaace50b717f7e7487bb227dd5b68350f5a"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00a9b157be660fb7ba88fa2eedd05ec93793a5b61e43e783e10cb0b995372802"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8f194f45ab6aa767993c24f0aeb950af169dbc5d611b94c9021a1d13b8a1a34"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:469360b9451db10bfed3881378d5a71b347ecb1ab4f42367d77b8164a13af70b"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5f75fa97ccf32a3c7af476c6a0e851023197d3c078f6de3612008fff94735f9"}, + {file = "safetensors-0.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:acf0180283c2efae72f1d8c0a4a7974662091df01be3aa43b5237b1e52ed0a01"}, + {file = "safetensors-0.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cd02b495ba0814619f40bda46771bb06dbbf1d42524b66fa03b2a736c77e4515"}, + {file = "safetensors-0.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c42bdea183dbaa99e2f0e6120dc524df79cf4289a6f90f30a534444ef20f49fa"}, + {file = "safetensors-0.4.0-cp39-none-win32.whl", hash = "sha256:cef7bb5d9feae7146c3c3c7b3aef7d2c8b39ba7f5ff4252d368eb69462a47076"}, + {file = "safetensors-0.4.0-cp39-none-win_amd64.whl", hash = "sha256:79dd46fb1f19282fd12f544471efb97823ede927cedbf9cf35550d92b349fdd2"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:002301c1afa32909f83745b0c124d002e7ae07e15671f3b43cbebd0ffc5e6037"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:67762d36ae088c73d4a3c96bfc4ea8d31233554f35b6cace3a18533238d462ea"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f45230f20a206e5e4c7f7bbf9342178410c6f8b0af889843aa99045a76f7691"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f2ca939bbd8fb2f4dfa28e39a146dad03bc9325e9fc831b68f7b98f69a5a2f1"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:61a00f281391fae5ce91df70918bb61c12d2d514a493fd8056e12114be729911"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:435fd136a42492b280cb55126f9ce9535b35dd49df2c5d572a5945455a439448"}, + {file = "safetensors-0.4.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f0daa788273d683258fb1e4a5e16bef4486b2fca536451a2591bc0f4a6488895"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0620ab0d41e390ccb1c4ea8f63dc00cb5f0b96a5cdd3cd0d64c21765720c074a"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1fa8d067733cb67f22926689ee808f08afacf7700d2ffb44efae90a0693eb1"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dcaa40bc363edda145db75cd030f3b1822e5478d550c3500a42502ecef32c959"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b561fbc044db7beff2ece0ec219a291809d45a38d30c6b38e7cc46482582f4ba"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:79a983b09782dacf9a1adb19bb98f4a8f6c3144108939f572c047b5797e43cf5"}, + {file = "safetensors-0.4.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:10b65cd3ad79f5d0daf281523b4146bc271a34bb7430d4e03212e0de8622dab8"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:114decacc475a6a9e2f9102a00c171d113ddb5d35cb0bda0db2c0c82b2eaa9ce"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:72ddb741dd5fe42521db76a70e012f76995516a12e7e0ef26be03ea9be77802a"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c5556c2ec75f5a6134866eddd7341cb36062e6edaea343478a279591b63ddba"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed50f239b0ce7ae85b078395593b4a351ede7e6f73af25f4873e3392336f64c9"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:495dcaea8fbab70b927d2274e2547824462737acbf98ccd851a71124f779a5c6"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3f4d90c79a65ba2fe2ff0876f6140748f0a3ce6a21e27a35190f4f96321803f8"}, + {file = "safetensors-0.4.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7a524382b5c55b5fbb168e0e9d3f502450c8cf3fb81b93e880018437c206a482"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:9849ea60c7e840bfdd6030ad454d4a6ba837b3398c902f15a30460dd6961c28c"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:6c42623ae7045615d9eaa6877b9df1db4e9cc71ecc14bcc721ea1e475dddd595"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80cb8342f00f3c41b3b93b1a599b84723280d3ac90829bc62262efc03ab28793"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8c4f5ed4ede384dea8c99bae76b0718a828dbf7b2c8ced1f44e3b9b1a124475"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40d7cf03493bfe75ef62e2c716314474b28d9ba5bf4909763e4b8dd14330c01a"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:232029f0a9fa6fa1f737324eda98a700409811186888536a2333cbbf64e41741"}, + {file = "safetensors-0.4.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:9ed55f4a20c78ff3e8477efb63c8303c2152cdfb3bfea4d025a80f54d38fd628"}, + {file = "safetensors-0.4.0.tar.gz", hash = "sha256:b985953c3cf11e942eac4317ef3db3da713e274109cf7cfb6076d877054f013e"}, ] scikit-learn = [ {file = "scikit-learn-1.3.0.tar.gz", hash = "sha256:8be549886f5eda46436b6e555b0e4873b4f10aa21c07df45c4bc1735afbccd7a"}, @@ -2403,6 +2467,13 @@ six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +sounddevice = [ + {file = "sounddevice-0.4.6-py3-none-any.whl", hash = "sha256:5de768ba6fe56ad2b5aaa2eea794b76b73e427961c95acad2ee2ed7f866a4b20"}, + {file = "sounddevice-0.4.6-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:8b0b806c205dd3e3cd5a97262b2482624fd21db7d47083b887090148a08051c8"}, + {file = "sounddevice-0.4.6-py3-none-win32.whl", hash = "sha256:e3ba6e674ffa8f79a591d744a1d4ab922fe5bdfd4faf8b25069a08e051010b7b"}, + {file = "sounddevice-0.4.6-py3-none-win_amd64.whl", hash = "sha256:7830d4f8f8570f2e5552942f81d96999c5fcd9a0b682d6fc5d5c5529df23be2c"}, + {file = "sounddevice-0.4.6.tar.gz", hash = "sha256:3236b78f15f0415bdf006a620cef073d0c0522851d66f4a961ed6d8eb1482fe9"}, +] soundfile = [ {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"}, {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"}, @@ -2422,80 +2493,138 @@ threadpoolctl = [ {file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"}, ] tokenizers = [ - {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, - {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, - {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, - {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, - {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, - {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, - {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, - {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, - {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, - {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, - {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, - {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, - {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, - {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, - {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, - {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, - {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, - {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, - {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, - {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, - {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, - {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, - {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, - {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, - {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, + {file = "tokenizers-0.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:04ec1134a18ede355a05641cdc7700f17280e01f69f2f315769f02f7e295cf1e"}, + {file = "tokenizers-0.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:638abedb39375f0ddce2de536fc9c976639b2d1b7202d715c2e7a25f0ebfd091"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:901635098565773a44f74068639d265f19deaaca47ea77b428fd9bee13a61d87"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e95184bf5b9a4c08153ed07c16c130ff174835c9a1e6ee2b311be758c8b3ef"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ebefbc26ccff5e96ae7d40772172e7310174f9aa3683d2870a1882313ec3a4d5"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3a6330c9f1deda22873e8b4ac849cc06d3ff33d60b3217ac0bb397b541e1509"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6cba7483ba45600346a35c466bde32327b108575022f73c35a0f7170b5a71ae2"}, + {file = "tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60fec380778d75cbb492f14ca974f11f37b41d53c057b9c8ba213315b86e1f84"}, + {file = "tokenizers-0.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:930c19b699dd7e1077eac98967adc2fe5f0b104bd96cc1f26778ab82b31ceb24"}, + {file = "tokenizers-0.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a1e30a13376db5329570e09b14c8eb36c017909ed7e88591ca3aa81f3c7d6f32"}, + {file = "tokenizers-0.14.1-cp310-none-win32.whl", hash = "sha256:370b5b86da9bddbe65fa08711f0e8ffdf8b0036558178d1a31dfcb44efcde72a"}, + {file = "tokenizers-0.14.1-cp310-none-win_amd64.whl", hash = "sha256:c2c659f2106b6d154f118ad1b700e68148c46c59b720f04867b1fc5f26a85060"}, + {file = "tokenizers-0.14.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:00df4c5bf25c153b432b98689609b426ae701a44f3d8074dcb619f410bc2a870"}, + {file = "tokenizers-0.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fee553657dcdb7e73df8823c49e8611457ba46e9d7026b7e9c44820c08c327c3"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a480bd902e327dfcaa52b7dd14fdc71e7aa45d73a3d6e41e028a75891d2823cf"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e448b2be0430ab839cf7954715c39d6f34ff6cf2b49393f336283b7a59f485af"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c11444984aecd342f0cf160c3320288edeb1763871fbb560ed466654b2a7016c"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe164a1c72c6be3c5c26753c6c412f81412f4dae0d7d06371e0b396a9cc0fc9"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72d9967fb1f927542cfb5347207fde01b29f25c9bb8cbc7ced280decfa015983"}, + {file = "tokenizers-0.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37cc955c84ec67c2d11183d372044399342b20a1fa447b7a33040f4889bba318"}, + {file = "tokenizers-0.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:db96cf092d86d4cb543daa9148e299011e0a40770380bb78333b9fd700586fcb"}, + {file = "tokenizers-0.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c84d3cb1349936c2b96ca6175b50f5a9518170bffd76464219ee0ea6022a64a7"}, + {file = "tokenizers-0.14.1-cp311-none-win32.whl", hash = "sha256:8db3a6f3d430ac3dc3793c53fa8e5e665c23ba359484d365a191027ad8b65a30"}, + {file = "tokenizers-0.14.1-cp311-none-win_amd64.whl", hash = "sha256:c65d76052561c60e17cb4fa289885ed00a9995d59e97019fac2138bd45142057"}, + {file = "tokenizers-0.14.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:c375161b588982be381c43eb7158c250f430793d0f708ce379a0f196164c6778"}, + {file = "tokenizers-0.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50f03d2330a153a9114c2429061137bd323736059f384de8348d7cb1ca1baa15"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0c8ee283b249c3c3c201c41bc23adc3be2514ae4121eacdb5c5250a461eaa8c6"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9f27399b8d50c5d3f08f0aae961bcc66a1dead1cd0ae9401e4c2a43a623322a"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:89cbeec7e9d5d8773ec4779c64e3cbcbff53d234ca6ad7b1a3736588003bba48"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08e55920b453c30b46d58accc68a38e8e7488d0c03babfdb29c55d3f39dd2052"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91d32bd1056c0e83a0f90e4ffa213c25096b2d8b9f0e2d172a45f138c7d8c081"}, + {file = "tokenizers-0.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44f1748035c36c939848c935715bde41734d9249ab7b844ff9bfbe984be8952c"}, + {file = "tokenizers-0.14.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1ff516d129f01bb7a4aa95bc6aae88e4d86dd63bfc2d57db9302c2624d1be7cb"}, + {file = "tokenizers-0.14.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:acfc8db61c6e919d932448cc7985b85e330c8d745528e12fce6e62d40d268bce"}, + {file = "tokenizers-0.14.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:ba336bc9107acbc1da2ad30967df7b2db93448ca66538ad86aa1fbb91116f631"}, + {file = "tokenizers-0.14.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:f77371b5030e53f8bf92197640af437539e3bba1bc8342b97888c8e26567bfdc"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d72d25c57a9c814240802d188ff0a808b701e2dd2bf1c64721c7088ceeeb1ed7"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caf0df8657277e32671aa8a4d3cc05f2050ab19d9b49447f2265304168e9032c"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb3c6bc6e599e46a26ad559ad5dec260ffdf705663cc9b894033d64a69314e86"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8cf2fcdc2368df4317e05571e33810eeed24cd594acc9dfc9788b21dac6b3a8"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f475d5eda41d2ed51ca775a07c80529a923dd759fcff7abf03ccdd83d9f7564e"}, + {file = "tokenizers-0.14.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cce4d1a97a7eb2253b5d3f29f4a478d8c37ba0303ea34024eb9e65506d4209f8"}, + {file = "tokenizers-0.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ff66577ae55114f7d0f6aa0d4d335f27cae96bf245962a745b718ec887bbe7eb"}, + {file = "tokenizers-0.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a687099e085f5162e5b88b3402adb6c2b41046180c015c5075c9504440b6e971"}, + {file = "tokenizers-0.14.1-cp37-none-win32.whl", hash = "sha256:49f5336b82e315a33bef1025d247ca08d95719715b29e33f0e9e8cf15ff1dfb6"}, + {file = "tokenizers-0.14.1-cp37-none-win_amd64.whl", hash = "sha256:117c8da60d1bd95a6df2692926f36de7971baa1d89ff702fae47b6689a4465ad"}, + {file = "tokenizers-0.14.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:01d2bd5935642de22a6c6778bb2307f9949cd6eaeeb5c77f9b98f0060b69f0db"}, + {file = "tokenizers-0.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b05ec04132394c20bd6bcb692d557a8eb8ab1bac1646d28e49c67c00907d17c8"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7d9025b185465d9d18679406f6f394850347d5ed2681efc203539d800f36f459"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2539831838ab5393f78a893d7bbf27d5c36e43baf77e91dc9992922b2b97e09d"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec8f46d533092d8e20bc742c47918cbe24b8641dbfbbcb83177c5de3c9d4decb"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b019c4810903fdea3b230f358b9d27377c0f38454778b607676c9e1b57d14b7"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e8984114fd83ed3913d89526c992395920930c9620a2feee61faf035f41d7b9a"}, + {file = "tokenizers-0.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11284b32f0036fe7ef4b8b00201dda79c00f3fcea173bc0e5c599e09c937ab0f"}, + {file = "tokenizers-0.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:53614f44f36917282a583180e402105bc63d61d1aca067d51cb7f051eb489901"}, + {file = "tokenizers-0.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e3b6082e9532309727273443c8943bb9558d52e36788b246aa278bda7c642116"}, + {file = "tokenizers-0.14.1-cp38-none-win32.whl", hash = "sha256:7560fca3e17a6bc876d20cd825d7721c101fa2b1cd0bfa0abf9a2e781e49b37b"}, + {file = "tokenizers-0.14.1-cp38-none-win_amd64.whl", hash = "sha256:c318a5acb429ca38f632577754235140bbb8c5a27faca1c51b43fbf575596e34"}, + {file = "tokenizers-0.14.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:b886e0f5c72aa4249c609c24b9610a9ca83fd963cbb5066b19302723ea505279"}, + {file = "tokenizers-0.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f522f28c88a0d5b2f9e895cf405dd594cd518e99d61905406aec74d30eb6383b"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5bef76c4d9329913cef2fe79ce1f4dab98f77fa4887e5f0420ffc9386941de32"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c7df2103052b30b7c76d4fa8251326c9f82689578a912698a127dc1737f43e"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:232445e7b85255ccfe68dfd42185db8a3f3349b34ad7068404856c4a5f67c355"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e63781da85aa8948864970e529af10abc4084a990d30850c41bbdb5f83eee45"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5760a831c0f3c6d3229b50ef3fafa4c164ec99d7e8c2237fe144e67a9d33b120"}, + {file = "tokenizers-0.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c84b456ff8525ec3ff09762e32ccc27888d036dcd0ba2883e1db491e164dd725"}, + {file = "tokenizers-0.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:463ee5f3afbfec29cbf5652752c9d1032bdad63daf48bb8cb9970064cc81d5f9"}, + {file = "tokenizers-0.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ee6b63aecf929a7bcf885bdc8a8aec96c43bc4442f63fe8c6d48f24fc992b05b"}, + {file = "tokenizers-0.14.1-cp39-none-win32.whl", hash = "sha256:aae42798ba1da3bc1572b2048fe42e61dd6bacced2b424cb0f5572c5432f79c2"}, + {file = "tokenizers-0.14.1-cp39-none-win_amd64.whl", hash = "sha256:68c4699147dded6926a3d2c2f948d435d54d027f69909e0ef3c6587933723ed2"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:5f9afdcf701a1aa3c41e0e748c152d2162434d61639a1e5d8523ecf60ae35aea"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6859d81243cd09854be9054aca3ecab14a2dee5b3c9f6d7ef12061d478ca0c57"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7975178f9478ccedcf613332d5d6f37b67c74ef4e2e47e0c965597506b921f04"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ce2f0ff2e5f12ac5bebaa690606395725239265d7ffa35f35c243a379316297"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7cfc3d42e81cda802f93aa9e92caf79feaa1711426e28ce620560b8aaf5e4d"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:67d3adff654dc7f7c7091dd259b3b847fe119c08d0bda61db91e2ea2b61c38c0"}, + {file = "tokenizers-0.14.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:956729b7dd599020e57133fb95b777e4f81ee069ff0a70e80f6eeac82658972f"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:fe2ea1177146a7ab345ab61e90a490eeea25d5f063e1cb9d4eb1425b169b64d7"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9930f31f603ecc6ea54d5c6dfa299f926ab3e921f72f94babcb02598c32b57c6"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d49567a2754e9991c05c2b5a7e6650b56e24365b7cab504558e58033dcf0edc4"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3678be5db330726f19c1949d8ae1b845a02eeb2a2e1d5a8bb8eaa82087ae25c1"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:42b180ed1bec58ab9bdc65d406577e0c0fb7241b74b8c032846073c7743c9f86"}, + {file = "tokenizers-0.14.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:319e4367596fb0d52be645b3de1616faf0fadaf28507ce1c7595bebd9b4c402c"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:2cda65b689aec63b7c76a77f43a08044fa90bbc6ad9849267cedfee9795913f3"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:ca0bfc79b27d84fcb7fa09339b2ee39077896738d9a30ff99c0332376e985072"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a7093767e070269e22e2c5f845e46510304f124c32d2cd249633c0f27eb29d86"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad759ba39cd32c2c2247864d02c84ea5883b5f6cc6a4ee0c95602a3dde52268f"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26fee36a6d8f2bd9464f3566b95e3e3fb7fd7dad723f775c500aac8204ec98c6"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d091c62cb7abbd32e527a85c41f7c8eb4526a926251891fc4ecbe5f974142ffb"}, + {file = "tokenizers-0.14.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ca304402ea66d58f99c05aa3d7a6052faea61e5a8313b94f6bc36fbf27960e2d"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:102f118fa9b720b93c3217c1e239ed7bc1ae1e8dbfe9b4983a4f2d7b4ce6f2ec"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:df4f058e96e8b467b7742e5dba7564255cd482d3c1e6cf81f8cb683bb0433340"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:040ee44efc1806900de72b13c1c3036154077d9cde189c9a7e7a50bbbdcbf39f"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7618b84118ae704f7fa23c4a190bd80fc605671841a4427d5ca14b9b8d9ec1a3"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ecdfe9736c4a73343f629586016a137a10faed1a29c6dc699d8ab20c2d3cf64"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:92c34de04fec7f4ff95f7667d4eb085c4e4db46c31ef44c3d35c38df128430da"}, + {file = "tokenizers-0.14.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:628b654ba555b2ba9111c0936d558b14bfc9d5f57b8c323b02fc846036b38b2f"}, + {file = "tokenizers-0.14.1.tar.gz", hash = "sha256:ea3b3f8908a9a5b9d6fc632b5f012ece7240031c44c6d4764809f33736534166"}, ] tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] torch = [ - {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"}, - {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"}, - {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"}, - {file = "torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:567f84d657edc5582d716900543e6e62353dbe275e61cdc36eda4929e46df9e7"}, - {file = "torch-2.0.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:787b5a78aa7917465e9b96399b883920c88a08f4eb63b5a5d2d1a16e27d2f89b"}, - {file = "torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e617b1d0abaf6ced02dbb9486803abfef0d581609b09641b34fa315c9c40766d"}, - {file = "torch-2.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b6019b1de4978e96daa21d6a3ebb41e88a0b474898fe251fd96189587408873e"}, - {file = "torch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:dbd68cbd1cd9da32fe5d294dd3411509b3d841baecb780b38b3b7b06c7754434"}, - {file = "torch-2.0.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:ef654427d91600129864644e35deea761fb1fe131710180b952a6f2e2207075e"}, - {file = "torch-2.0.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:25aa43ca80dcdf32f13da04c503ec7afdf8e77e3a0183dd85cd3e53b2842e527"}, - {file = "torch-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ef3ea3d25441d3957348f7e99c7824d33798258a2bf5f0f0277cbcadad2e20d"}, - {file = "torch-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0882243755ff28895e8e6dc6bc26ebcf5aa0911ed81b2a12f241fc4b09075b13"}, - {file = "torch-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:f66aa6b9580a22b04d0af54fcd042f52406a8479e2b6a550e3d9f95963e168c8"}, - {file = "torch-2.0.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:1adb60d369f2650cac8e9a95b1d5758e25d526a34808f7448d0bd599e4ae9072"}, - {file = "torch-2.0.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:1bcffc16b89e296826b33b98db5166f990e3b72654a2b90673e817b16c50e32b"}, - {file = "torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e10e1597f2175365285db1b24019eb6f04d53dcd626c735fc502f1e8b6be9875"}, - {file = "torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490"}, - {file = "torch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8742bdc62946c93f75ff92da00e3803216c6cce9b132fbca69664ca38cfb3e18"}, - {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"}, - {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"}, + {file = "torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bf57f8184b2c317ef81fb33dc233ce4d850cd98ef3f4a38be59c7c1572d175db"}, + {file = "torch-2.1.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a04a0296d47f28960f51c18c5489a8c3472f624ec3b5bcc8e2096314df8c3342"}, + {file = "torch-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0bd691efea319b14ef239ede16d8a45c246916456fa3ed4f217d8af679433cc6"}, + {file = "torch-2.1.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:101c139152959cb20ab370fc192672c50093747906ee4ceace44d8dd703f29af"}, + {file = "torch-2.1.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a6b7438a90a870e4cdeb15301519ae6c043c883fcd224d303c5b118082814767"}, + {file = "torch-2.1.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:2224622407ca52611cbc5b628106fde22ed8e679031f5a99ce286629fc696128"}, + {file = "torch-2.1.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:8132efb782cd181cc2dcca5e58effbe4217cdb2581206ac71466d535bf778867"}, + {file = "torch-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:5c3bfa91ce25ba10116c224c59d5b64cdcce07161321d978bd5a1f15e1ebce72"}, + {file = "torch-2.1.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:601b0a2a9d9233fb4b81f7d47dca9680d4f3a78ca3f781078b6ad1ced8a90523"}, + {file = "torch-2.1.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:3cd1dedff13884d890f18eea620184fb4cd8fd3c68ce3300498f427ae93aa962"}, + {file = "torch-2.1.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:fb7bf0cc1a3db484eb5d713942a93172f3bac026fcb377a0cd107093d2eba777"}, + {file = "torch-2.1.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:761822761fffaa1c18a62c5deb13abaa780862577d3eadc428f1daa632536905"}, + {file = "torch-2.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:458a6d6d8f7d2ccc348ac4d62ea661b39a3592ad15be385bebd0a31ced7e00f4"}, + {file = "torch-2.1.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:c8bf7eaf9514465e5d9101e05195183470a6215bb50295c61b52302a04edb690"}, + {file = "torch-2.1.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:05661c32ec14bc3a157193d0f19a7b19d8e61eb787b33353cad30202c295e83b"}, + {file = "torch-2.1.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:556d8dd3e0c290ed9d4d7de598a213fb9f7c59135b4fee144364a8a887016a55"}, + {file = "torch-2.1.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:de7d63c6ecece118684415a3dbd4805af4a4c1ee1490cccf7405d8c240a481b4"}, + {file = "torch-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:2419cf49aaf3b2336c7aa7a54a1b949fa295b1ae36f77e2aecb3a74e3a947255"}, + {file = "torch-2.1.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6ad491e70dbe4288d17fdbfc7fbfa766d66cbe219bc4871c7a8096f4a37c98df"}, + {file = "torch-2.1.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:421739685eba5e0beba42cb649740b15d44b0d565c04e6ed667b41148734a75b"}, ] tqdm = [ {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, ] transformers = [ - {file = "transformers-4.33.2-py3-none-any.whl", hash = "sha256:5a9a757bea5b5a1b94796805bcb5978b552208a3ac193f46edda66be6f4a5488"}, - {file = "transformers-4.33.2.tar.gz", hash = "sha256:47dd36f302afec86d9cdcacab61bbd0296e6bb02e64d2ed7855daaab14ee290e"}, + {file = "transformers-4.34.0-py3-none-any.whl", hash = "sha256:3f0187183a7f22c51ecbbc9eac5145df666c5b86bec6feed10e11f0363f3a1f9"}, + {file = "transformers-4.34.0.tar.gz", hash = "sha256:cc2ae61bfbfaa45337fd9017326669fc60e4f55125f589d50da47819e3d6f504"}, ] typing-extensions = [ {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, diff --git a/pyproject.toml b/pyproject.toml index 4cf07db..c4f97fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,17 +11,17 @@ keywords = ["Elpis", "huggingface", "ASR", "Automatic Speech Recognition", "CoED [tool.poetry.dependencies] python = "^3.10" -torch = "^2.0.1" -transformers = "^4.33.2" +transformers = "^4.34.0" datasets = "^2.6.1" loguru = "^0.6.0" pympi-ling = "^1.70.2" -pedalboard = "^0.6.2" librosa = "^0.9.2" evaluate = "^0.4.0" -numba = "^0.57.1" accelerate = "^0.22.0" jiwer = "^3.0.3" +sounddevice = "^0.4.6" +torch = "^2.1.0" +pedalboard = "^0.8.3" [tool.poetry.dev-dependencies] pytest = "^7.1.3" From f3e3a07fb9e891721511eaea1e4b40f00eaa5cab Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Sat, 14 Oct 2023 18:52:41 +1000 Subject: [PATCH 7/8] Trainer upgrades (#13) * Refactor trainer code to expose as much flexibility to user as possible w.r.t. training options. * Update tests * Finally get a working wav2vec2 training run. * Add example script for working wav2vec2 run. --- .gitignore | 1 - elpis/datasets/clean_text.py | 2 +- elpis/datasets/processing.py | 130 +++--- elpis/models/job.py | 311 ++++++++++++++ elpis/trainer/__init__.py | 5 +- elpis/trainer/data_collator.py | 104 ++++- elpis/trainer/guide.py | 582 +++++++++++++++++++++++++++ elpis/trainer/job.py | 99 ----- elpis/trainer/trainer.py | 364 ++++++++++++----- example.py | 139 +++++++ tests/datasets/test_processing.py | 32 +- tests/test_elpis.py | 28 +- tests/trainer/test_job.py | 23 -- tests/trainer/test_trainer.py | 36 +- tests/transcriber/test_transcribe.py | 2 +- 15 files changed, 1539 insertions(+), 319 deletions(-) create mode 100644 elpis/models/job.py create mode 100644 elpis/trainer/guide.py delete mode 100644 elpis/trainer/job.py create mode 100644 example.py delete mode 100644 tests/trainer/test_job.py diff --git a/.gitignore b/.gitignore index 49cbf63..11e335c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ *.pyc testdir -testscript.py # Packages *.egg diff --git a/elpis/datasets/clean_text.py b/elpis/datasets/clean_text.py index f18a3cc..76b445d 100644 --- a/elpis/datasets/clean_text.py +++ b/elpis/datasets/clean_text.py @@ -17,7 +17,7 @@ def clean_text( Returns: The cleaned text """ - words = text.lower().split() + words = text.upper().split() if words_to_remove is not None: words = filter(lambda word: word not in words_to_remove, words) diff --git a/elpis/datasets/processing.py b/elpis/datasets/processing.py index 97feed5..085821a 100644 --- a/elpis/datasets/processing.py +++ b/elpis/datasets/processing.py @@ -1,37 +1,37 @@ import os -from itertools import chain from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List -import librosa -import numpy as np -import sounddevice as sd from datasets import Audio, DatasetDict, load_dataset from loguru import logger -from transformers import Wav2Vec2Processor +from transformers import AutoFeatureExtractor, AutoTokenizer + +from elpis.models.job import Job -PROCESSOR_COUNT = 4 -AUDIO_COLUMN = "audio" -SAMPLING_RATE = 16_000 LOGGING_TRANSCRIPT_SAMPLE = 2 def create_dataset( - dataset_path: Path, - cache_dir: Optional[Path] = None, + job: Job, test_size: float = 0.2, ) -> DatasetDict: """Creates a dataset with test/train splits from the data within a given directory. Parameters: - dataset_path: The path to the unprocessed dataset files. - cache_dir: The path to save the processed dataset. + job: The training job to run. test_size: The percentage of the dataset to allocate as the test set. Returns: A dataset dictionary with test and train splits. """ + dataset_path = Path(job.data_args.dataset_name_or_path) + if not dataset_path.is_dir(): + raise ValueError( + f"Attempting to create local dataset from non-existent " + f"directory: {dataset_path}." + ) + transcript_files = [ str(dataset_path / file) for file in os.listdir(dataset_path) @@ -41,75 +41,93 @@ def create_dataset( f"Transcript file paths sample: {transcript_files[:LOGGING_TRANSCRIPT_SAMPLE]}" ) - # Annoying hack - if cache_dir is not None: - cache_dir = str(cache_dir) # type: ignore - - dataset = load_dataset("json", data_files=transcript_files, cache_dir=cache_dir) # type: ignore + dataset = load_dataset("json", data_files=transcript_files, cache_dir=job.model_args.cache_dir) # type: ignore # Convert the audio file name column into the matching audio data - dataset = dataset.rename_column("audio_file", AUDIO_COLUMN) - logger.debug(f"Dataset audio file paths sample: {dataset['train'][AUDIO_COLUMN][:LOGGING_TRANSCRIPT_SAMPLE]}") # type: ignore + audio_column = job.data_args.audio_column_name + dataset = dataset.rename_column("audio_file", audio_column) + logger.debug(f"Dataset audio file paths sample: {dataset['train'][audio_column][:LOGGING_TRANSCRIPT_SAMPLE]}") # type: ignore def resolve_audio_path(row: Dict[str, Any]) -> Dict[str, Any]: # Forcefully resolve to same dir as dataset. - path = dataset_path / Path(row[AUDIO_COLUMN]).name - row[AUDIO_COLUMN] = str(path.absolute()) + path = dataset_path / Path(row[audio_column]).name + row[audio_column] = str(path.absolute()) return row dataset = dataset.map(resolve_audio_path) - logger.debug(f"Dataset audio file paths post-resolution: {dataset['train'][AUDIO_COLUMN][:LOGGING_TRANSCRIPT_SAMPLE]}") # type: ignore - - def load_audio(batch: Dict) -> Dict: - path = batch[AUDIO_COLUMN] - data, sr = librosa.load(path, sr=SAMPLING_RATE, mono=True) + logger.debug(f"Dataset audio file paths post-resolution: {dataset['train'][audio_column][:LOGGING_TRANSCRIPT_SAMPLE]}") # type: ignore - batch["audio"] = {"path": path, "array": data, "sampling_rate": SAMPLING_RATE} - return batch - - # dataset = dataset.cast_column(AUDIO_COLUMN, Audio(sampling_rate=SAMPLING_RATE)) - dataset = dataset.map(load_audio) + dataset = dataset["train"].train_test_split(test_size=test_size, seed=job.training_args.seed) # type: ignore + # rename test to eval + dataset["eval"] = dataset["test"] + dataset.pop("test") - # logger.debug(f"Sample audio col values: {dataset['train'][AUDIO_COLUMN][0]}") # type: ignore - return dataset["train"].train_test_split(test_size=test_size) # type: ignore + return dataset -def prepare_dataset(dataset: DatasetDict, processor: Wav2Vec2Processor) -> DatasetDict: +def prepare_dataset( + job: Job, + tokenizer: AutoTokenizer, + feature_extractor: AutoFeatureExtractor, + dataset: DatasetDict, +) -> DatasetDict: """Runs some preprocessing over the given dataset. Parameters: dataset: The dataset on which to apply the preprocessing processor: The processor to apply over the dataset """ - logger.debug(f"Dataset pre-prep: {dataset}") - logger.debug(f"Transcript sample: {dataset['train']['transcript'][0]}") - logger.debug( - f'Input array shape:, {np.asarray(dataset["train"][0]["audio"]["array"]).shape}' + + # Load the audio data and resample if necessary. + dataset = dataset.cast_column( + job.data_args.audio_column_name, + Audio(sampling_rate=feature_extractor.sampling_rate), # type: ignore ) - logger.debug(f'Sampling rate:, {dataset["train"][0]["audio"]["sampling_rate"]}') def _prepare_dataset(batch: Dict) -> Dict[str, List]: - # Also from https://huggingface.co/blog/fine-tune-xlsr-wav2vec2 - audio = batch[AUDIO_COLUMN] - - batch["input_values"] = processor( + audio = batch[job.data_args.audio_column_name] + inputs = feature_extractor( # type: ignore audio["array"], sampling_rate=audio["sampling_rate"] - ).input_values[0] + ) + + batch["input_values"] = inputs.input_values[0] batch["input_length"] = len(batch["input_values"]) - batch["labels"] = processor(text=batch["transcript"]).input_ids - return batch + # encode targets + additional_kwargs = {} + phoneme_language = job.data_args.phoneme_language + if phoneme_language is not None: + additional_kwargs["phonemizer_lang"] = phoneme_language - columns = dataset.column_names.values() - # flatten and make unique between datasets - columns_to_remove = list(set(chain.from_iterable(columns))) + batch["labels"] = tokenizer(batch[job.data_args.text_column_name], **additional_kwargs).input_ids # type: ignore + return batch - dataset = dataset.map( - _prepare_dataset, - remove_columns=columns_to_remove, - num_proc=PROCESSOR_COUNT, + max_input_length = ( + job.data_args.max_duration_in_seconds * feature_extractor.sampling_rate # type: ignore ) - logger.debug(f"Dataset post prep: {dataset}") - logger.debug(f"Training labels: {dataset['train']['labels'][0]}") + min_input_length = ( + job.data_args.min_duration_in_seconds * feature_extractor.sampling_rate # type: ignore + ) + + def is_audio_in_length_range(length: int): + return length >= min_input_length and length <= max_input_length + + with job.training_args.main_process_first(desc="dataset map preprocessing"): + worker_count = job.data_args.preprocessing_num_workers + dataset = dataset.map( + _prepare_dataset, + remove_columns=next(iter(dataset.values())).column_names, + num_proc=worker_count, + desc="preprocess datasets", + ) + + # filter data that is shorter than min_input_length + dataset = dataset.filter( + is_audio_in_length_range, + num_proc=worker_count, + input_columns=["input_length"], + ) + + logger.info(f"Test encoding labels: {dataset['train'][0]['labels']}") return dataset diff --git a/elpis/models/job.py b/elpis/models/job.py new file mode 100644 index 0000000..fa146a8 --- /dev/null +++ b/elpis/models/job.py @@ -0,0 +1,311 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Optional + +from transformers import HfArgumentParser, TrainingArguments + + +def list_field(default=None, metadata=None): + return field(default_factory=lambda: default, metadata=metadata) + + +DEFAULT_METRICS = ("wer", "cer") + + +@dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + model_name_or_path: str = field( + metadata={ + "help": "Path to pretrained model or model identifier from huggingface.co/models" + } + ) + tokenizer_name_or_path: Optional[str] = field( + default=None, + metadata={ + "help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models" + }, + ) + cache_dir: Optional[str] = field( + default=None, + metadata={ + "help": "Where do you want to store the pretrained models downloaded from huggingface.co" + }, + ) + freeze_feature_encoder: bool = field( + default=True, + metadata={"help": "Whether to freeze the feature encoder layers of the model."}, + ) + attention_dropout: float = field( + default=0.0, + metadata={"help": "The dropout ratio for the attention probabilities."}, + ) + activation_dropout: float = field( + default=0.0, + metadata={ + "help": "The dropout ratio for activations inside the fully connected layer." + }, + ) + feat_proj_dropout: float = field( + default=0.0, metadata={"help": "The dropout ratio for the projected features."} + ) + hidden_dropout: float = field( + default=0.0, + metadata={ + "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler." + }, + ) + final_dropout: float = field( + default=0.0, + metadata={"help": "The dropout probability for the final projection layer."}, + ) + mask_time_prob: float = field( + default=0.05, + metadata={ + "help": ( + "Probability of each feature vector along the time axis to be chosen as the start of the vector" + "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature" + "vectors will be masked along the time axis." + ) + }, + ) + mask_time_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the time axis."}, + ) + mask_feature_prob: float = field( + default=0.0, + metadata={ + "help": ( + "Probability of each feature vector along the feature axis to be chosen as the start of the vectorspan" + " to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature" + " bins will be masked along the time axis." + ) + }, + ) + mask_feature_length: int = field( + default=10, + metadata={"help": "Length of vector span to mask along the feature axis."}, + ) + layerdrop: float = field( + default=0.0, metadata={"help": "The LayerDrop probability."} + ) + ctc_loss_reduction: Optional[str] = field( + default="mean", + metadata={ + "help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'." + }, + ) + ctc_zero_infinity: bool = field( + default=False, + metadata={ + "help": "Whether to zero infinite losses and the associated gradients of `torch.nn.CTCLoss`. " + "Infinite losses mainly occur when the inputs are too short to be aligned to the targets. " + "Only relevant when training an instance of Wav2Vec2ForCTC." + }, + ) + + +@dataclass +class DataArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + + Using `HfArgumentParser` we can turn this class + into argparse arguments to be able to specify them on + the command line. + """ + + dataset_name_or_path: str = field( + metadata={ + "help": "If a path, the path to a directory containing the dataset files. " + "Otherwise- the name of the dataset to use (via the datasets library)." + } + ) + dataset_config_name: Optional[str] = field( + default=None, + metadata={ + "help": "The configuration name of the dataset to use (via the datasets library)." + }, + ) + train_split_name: str = field( + default="train+validation", + metadata={ + "help": ( + "The name of the training data set split to use (via the datasets library). Defaults to " + "'train+validation'" + ) + }, + ) + eval_split_name: str = field( + default="test", + metadata={ + "help": "The name of the evaluation data set split to use (via the datasets library). Defaults to 'test'" + }, + ) + audio_column_name: str = field( + default="audio", + metadata={ + "help": "The name of the dataset column containing the audio data. Defaults to 'audio'" + }, + ) + text_column_name: str = field( + default="text", + metadata={ + "help": "The name of the dataset column containing the text data. Defaults to 'text'" + }, + ) + overwrite_cache: bool = field( + default=False, + metadata={"help": "Overwrite the cached preprocessed datasets or not."}, + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + ) + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": ( + "For debugging purposes or quicker training, truncate the number of validation examples to this " + "value if set." + ) + }, + ) + chars_to_ignore: Optional[List[str]] = list_field( + default=None, + metadata={"help": "A list of characters to remove from the transcripts."}, + ) + eval_metrics: List[str] = list_field( + default=DEFAULT_METRICS, + metadata={ + "help": "A list of metrics the model should be evaluated on. E.g. `('wer', 'cer')`" + }, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": ( + "Filter audio files that are longer than `max_duration_in_seconds` seconds to" + " 'max_duration_in_seconds`" + ) + }, + ) + min_duration_in_seconds: float = field( + default=0.0, + metadata={ + "help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds" + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": ( + "Whether to only do data preprocessing and skip training. This is especially useful when data" + " preprocessing errors out in distributed training due to timeout. In this case, one should run the" + " preprocessing in a non-distributed setup with `preprocessing_only=True` so that the cached datasets" + " can consequently be loaded in distributed training" + ) + }, + ) + token: Optional[str] = field( + default=None, + metadata={ + "help": ( + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." + ) + }, + ) + use_auth_token: Optional[bool] = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) + trust_remote_code: bool = field( + default=False, + metadata={ + "help": ( + "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option" + "should only be set to `True` for repositories you trust and in which you have read the code, as it will" + "execute code present on the Hub on your local machine." + ) + }, + ) + unk_token: str = field( + default="[UNK]", + metadata={"help": "The unk token for the tokenizer"}, + ) + pad_token: str = field( + default="[PAD]", + metadata={"help": "The padding token for the tokenizer"}, + ) + word_delimiter_token: str = field( + default="|", + metadata={"help": "The word delimiter token for the tokenizer"}, + ) + phoneme_language: Optional[str] = field( + default=None, + metadata={ + "help": ( + "The target language that should be used be" + " passed to the tokenizer for tokenization. Note that" + " this is only relevant if the model classifies the" + " input audio to a sequence of phoneme sequences." + ) + }, + ) + do_lower_case: Optional[bool] = field( + default=None, + metadata={"help": "Whether the target text should be lower cased."}, + ) + + +@dataclass +class Job: + """Generic class which encapsulates elpis training functionality""" + + model_args: ModelArguments + data_args: DataArguments + training_args: TrainingArguments + + @staticmethod + def parser(): + return HfArgumentParser((ModelArguments, DataArguments, TrainingArguments)) # type: ignore + + @classmethod + def from_args(cls) -> Job: + ( + model_args, + data_args, + training_args, + ) = Job.parser().parse_args_into_dataclasses() + return cls( + model_args=model_args, data_args=data_args, training_args=training_args + ) + + @classmethod + def from_json(cls, file: Path) -> Job: + ( + model_args, + data_args, + training_args, + ) = Job.parser().parse_json_file(str(file)) + return cls( + model_args=model_args, data_args=data_args, training_args=training_args + ) diff --git a/elpis/trainer/__init__.py b/elpis/trainer/__init__.py index fcf7177..3041737 100644 --- a/elpis/trainer/__init__.py +++ b/elpis/trainer/__init__.py @@ -1,4 +1,3 @@ -from elpis.trainer.job import TrainingJob, TrainingOptions, TrainingStatus -from elpis.trainer.trainer import train +from elpis.trainer.trainer import run_job -__all__ = ["TrainingJob", "TrainingOptions", "TrainingStatus", "train"] +__all__ = ["run_job"] diff --git a/elpis/trainer/data_collator.py b/elpis/trainer/data_collator.py index 8738210..79a27f0 100644 --- a/elpis/trainer/data_collator.py +++ b/elpis/trainer/data_collator.py @@ -2,15 +2,37 @@ from typing import Dict, List, Optional, Union import torch -from transformers import Wav2Vec2Processor +from transformers import AutoProcessor @dataclass class DataCollatorCTCWithPadding: - processor: Wav2Vec2Processor - padding: Union[bool, str] = True - max_length: Optional[int] = None - max_length_labels: Optional[int] = None + """ + Data collator that will dynamically pad the inputs received. + Args: + processor (:class:`~transformers.AutoProcessor`) + The processor used for proccessing the data. + padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`): + Select a strategy to pad the returned sequences (according to the model's padding side and padding index) + among: + * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single + sequence if provided). + * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the + maximum acceptable input length for the model if that argument is not provided. + * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of + different lengths). + max_length (:obj:`int`, `optional`): + Maximum length of the ``input_values`` of the returned list and optionally padding length (see above). + max_length_labels (:obj:`int`, `optional`): + Maximum length of the ``labels`` returned list and optionally padding length (see above). + pad_to_multiple_of (:obj:`int`, `optional`): + If set will pad the sequence to a multiple of the provided value. + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + """ + + processor: AutoProcessor + padding: Union[bool, str] = "longest" pad_to_multiple_of: Optional[int] = None pad_to_multiple_of_labels: Optional[int] = None @@ -24,27 +46,81 @@ def __call__( ] label_features = [{"input_ids": feature["labels"]} for feature in features] - batch = self.processor.pad( + batch = self.processor.pad( # type: ignore input_features, padding=self.padding, - max_length=self.max_length, pad_to_multiple_of=self.pad_to_multiple_of, return_tensors="pt", ) - with self.processor.as_target_processor(): - labels_batch = self.processor.pad( - label_features, - padding=self.padding, - max_length=self.max_length_labels, - pad_to_multiple_of=self.pad_to_multiple_of_labels, - return_tensors="pt", + + labels_batch = self.processor.pad( # type: ignore + labels=label_features, + padding=self.padding, + pad_to_multiple_of=self.pad_to_multiple_of_labels, + return_tensors="pt", + ) + + # replace padding with -100 to ignore loss correctly + labels = labels_batch["input_ids"].masked_fill( + labels_batch.attention_mask.ne(1), -100 + ) + + batch["labels"] = labels + if "attention_mask" in batch: + batch["attention_mask"] = batch["attention_mask"].to(torch.long) + + return batch + + +@dataclass +class DataCollatorSpeechSeq2SeqWithPadding: + """ + Data collator that will dynamically pad the inputs received. + Args: + processor ([`WhisperProcessor`]) + The processor used for processing the data. + decoder_start_token_id (`int`) + The begin-of-sentence of the decoder. + forward_attention_mask (`bool`) + Whether to return attention_mask. + """ + + processor: AutoProcessor + decoder_start_token_id: int + forward_attention_mask: bool + + def __call__( + self, features: List[Dict[str, Union[List[int], torch.Tensor]]] + ) -> Dict[str, torch.Tensor]: + # split inputs and labels since they have to be of different lengths and need + # different padding methods + model_input_name = self.processor.model_input_names[0] # type: ignore + input_features = [ + {model_input_name: feature[model_input_name]} for feature in features + ] + label_features = [{"input_ids": feature["labels"]} for feature in features] + + batch = self.processor.feature_extractor.pad( # type: ignore + input_features, return_tensors="pt" + ) + + if self.forward_attention_mask: + batch["attention_mask"] = torch.LongTensor( + [feature["attention_mask"] for feature in features] ) + labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt") # type: ignore + # replace padding with -100 to ignore loss correctly labels = labels_batch["input_ids"].masked_fill( labels_batch.attention_mask.ne(1), -100 ) + # if bos token is appended in previous tokenization step, + # cut bos token here as it's append later anyways + if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item(): + labels = labels[:, 1:] + batch["labels"] = labels return batch diff --git a/elpis/trainer/guide.py b/elpis/trainer/guide.py new file mode 100644 index 0000000..fa515fd --- /dev/null +++ b/elpis/trainer/guide.py @@ -0,0 +1,582 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2021 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition""" + +import functools +import json +import logging +import os +import re +import sys +import warnings +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Union + +import datasets +import evaluate +import numpy as np +import torch +import transformers +from datasets import DatasetDict, load_dataset +from transformers import ( + AutoConfig, + AutoFeatureExtractor, + AutoModelForCTC, + AutoProcessor, + AutoTokenizer, + HfArgumentParser, + Trainer, + TrainingArguments, + Wav2Vec2Processor, + set_seed, +) +from transformers.trainer_utils import get_last_checkpoint, is_main_process +from transformers.utils import check_min_version, send_example_telemetry +from transformers.utils.versions import require_version + +from elpis.trainer.data_collator import DataCollatorCTCWithPadding +from elpis.trainer.job import DataArguments, ModelArguments + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.35.0.dev0") + +require_version( + "datasets>=1.18.0", + "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt", +) + + +logger = logging.getLogger(__name__) + + +def create_vocabulary_from_data( + datasets: DatasetDict, + word_delimiter_token: Optional[str] = None, + unk_token: Optional[str] = None, + pad_token: Optional[str] = None, +): + # Given training and test labels create vocabulary + def extract_all_chars(batch): + all_text = " ".join(batch["target_text"]) + vocab = list(set(all_text)) + return {"vocab": [vocab], "all_text": [all_text]} + + vocabs = datasets.map( + extract_all_chars, + batched=True, + batch_size=-1, + keep_in_memory=True, + remove_columns=datasets["train"].column_names, + ) + + # take union of all unique characters in each dataset + vocab_set = functools.reduce( + lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), + vocabs.values(), + ) + + vocab_dict = {v: k for k, v in enumerate(sorted(vocab_set))} + + # replace white space with delimiter token + if word_delimiter_token is not None: + vocab_dict[word_delimiter_token] = vocab_dict[" "] + del vocab_dict[" "] + + # add unk and pad token + if unk_token is not None: + vocab_dict[unk_token] = len(vocab_dict) + + if pad_token is not None: + vocab_dict[pad_token] = len(vocab_dict) + + return vocab_dict + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + + parser = HfArgumentParser( + (ModelArguments, DataArguments, TrainingArguments) + ) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file( + json_file=os.path.abspath(sys.argv[1]) + ) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + if data_args.use_auth_token is not None: + warnings.warn( + "The `use_auth_token` argument is deprecated and will be removed in v4.34.", + FutureWarning, + ) + if data_args.token is not None: + raise ValueError( + "`token` and `use_auth_token` are both specified. Please set only the argument `token`." + ) + data_args.token = data_args.use_auth_token + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_speech_recognition_ctc", model_args, data_args) + + # Detecting last checkpoint. + last_checkpoint = None + if ( + os.path.isdir(training_args.output_dir) + and training_args.do_train + and not training_args.overwrite_output_dir + ): + last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + elif last_checkpoint is not None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + logger.setLevel( + logging.INFO if is_main_process(training_args.local_rank) else logging.WARN + ) + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}" + ) + # Set the verbosity to info of the Transformers logger (on main process only): + if is_main_process(training_args.local_rank): + transformers.utils.logging.set_verbosity_info() + logger.info("Training/evaluation parameters %s", training_args) + + # Set seed before initializing model. + set_seed(training_args.seed) + + # 1. First, let's load the dataset + raw_datasets = DatasetDict() + + if training_args.do_train: + raw_datasets["train"] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=data_args.train_split_name, + token=data_args.token, + ) + + if data_args.audio_column_name not in raw_datasets["train"].column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'." + " Make sure to set `--audio_column_name` to the correct audio column - one of" + f" {', '.join(raw_datasets['train'].column_names)}." + ) + + if data_args.text_column_name not in raw_datasets["train"].column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(raw_datasets['train'].column_names)}." + ) + + if data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select( + range(data_args.max_train_samples) + ) + + if training_args.do_eval: + raw_datasets["eval"] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=data_args.eval_split_name, + token=data_args.token, + ) + + if data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select( + range(data_args.max_eval_samples) + ) + + # 2. We remove some special characters from the datasets + # that make training complicated and do not help in transcribing the speech + # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic + # that could be easily picked up by the model + chars_to_ignore_regex = ( + f'[{"".join(data_args.chars_to_ignore)}]' + if data_args.chars_to_ignore is not None + else None + ) + text_column_name = data_args.text_column_name + + def remove_special_characters(batch): + if chars_to_ignore_regex is not None: + batch["target_text"] = ( + re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " " + ) + else: + batch["target_text"] = batch[text_column_name].lower() + " " + return batch + + with training_args.main_process_first( + desc="dataset map special characters removal" + ): + raw_datasets = raw_datasets.map( + remove_special_characters, + remove_columns=[text_column_name], + desc="remove special characters from datasets", + ) + + # save special tokens for tokenizer + word_delimiter_token = data_args.word_delimiter_token + unk_token = data_args.unk_token + pad_token = data_args.pad_token + + # 3. Next, let's load the config as we might need it to create + # the tokenizer + # load config + config = AutoConfig.from_pretrained( + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + token=data_args.token, + trust_remote_code=data_args.trust_remote_code, + ) + + # 4. Next, if no tokenizer file is defined, + # we create the vocabulary of the model by extracting all unique characters from + # the training and evaluation datasets + # We need to make sure that only first rank saves vocabulary + # make sure all processes wait until vocab is created + tokenizer_name_or_path = model_args.tokenizer_name_or_path + tokenizer_kwargs = {} + if tokenizer_name_or_path is None: + # save vocab in training output dir + tokenizer_name_or_path = training_args.output_dir + + vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json") + + with training_args.main_process_first(): + if training_args.overwrite_output_dir and os.path.isfile(vocab_file): + try: + os.remove(vocab_file) + except OSError: + # in shared file-systems it might be the case that + # two processes try to delete the vocab file at the some time + pass + + with training_args.main_process_first(desc="dataset map vocabulary creation"): + if not os.path.isfile(vocab_file): + os.makedirs(tokenizer_name_or_path, exist_ok=True) + vocab_dict = create_vocabulary_from_data( + raw_datasets, + word_delimiter_token=word_delimiter_token, + unk_token=unk_token, + pad_token=pad_token, + ) + + # save vocab dict to be loaded into tokenizer + with open(vocab_file, "w") as file: + json.dump(vocab_dict, file) + + # if tokenizer has just been created + # it is defined by `tokenizer_class` if present in config else by `model_type` + tokenizer_kwargs = { + "config": config if config.tokenizer_class is not None else None, + "tokenizer_type": config.model_type + if config.tokenizer_class is None + else None, + "unk_token": unk_token, + "pad_token": pad_token, + "word_delimiter_token": word_delimiter_token, + } + + # 5. Now we can instantiate the feature extractor, tokenizer and model + # Note for distributed training, the .from_pretrained methods guarantee that only + # one local process can concurrently download model & vocab. + + # load feature_extractor and tokenizer + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_name_or_path, + token=data_args.token, + trust_remote_code=data_args.trust_remote_code, + **tokenizer_kwargs, + ) + feature_extractor = AutoFeatureExtractor.from_pretrained( + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + token=data_args.token, + trust_remote_code=data_args.trust_remote_code, + ) + + # adapt config + config.update( + { + "feat_proj_dropout": model_args.feat_proj_dropout, + "attention_dropout": model_args.attention_dropout, + "hidden_dropout": model_args.hidden_dropout, + "final_dropout": model_args.final_dropout, + "mask_time_prob": model_args.mask_time_prob, + "mask_time_length": model_args.mask_time_length, + "mask_feature_prob": model_args.mask_feature_prob, + "mask_feature_length": model_args.mask_feature_length, + "gradient_checkpointing": training_args.gradient_checkpointing, + "layerdrop": model_args.layerdrop, + "ctc_loss_reduction": model_args.ctc_loss_reduction, + "pad_token_id": tokenizer.pad_token_id, + "vocab_size": len(tokenizer), + "activation_dropout": model_args.activation_dropout, + } + ) + + # create model + model = AutoModelForCTC.from_pretrained( + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + config=config, + token=data_args.token, + trust_remote_code=data_args.trust_remote_code, + ) + + # freeze encoder + if model_args.freeze_feature_encoder: + model.freeze_feature_encoder() + + # 6. Now we preprocess the datasets including loading the audio, resampling and normalization + # Thankfully, `datasets` takes care of automatically loading and resampling the audio, + # so that we just need to set the correct target sampling rate and normalize the input + # via the `feature_extractor` + + # make sure that dataset decodes audio with correct sampling rate + dataset_sampling_rate = ( + next(iter(raw_datasets.values())) + .features[data_args.audio_column_name] + .sampling_rate + ) + if dataset_sampling_rate != feature_extractor.sampling_rate: + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, + datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate), + ) + + # derive max & min input length for sample rate & max duration + max_input_length = ( + data_args.max_duration_in_seconds * feature_extractor.sampling_rate + ) + min_input_length = ( + data_args.min_duration_in_seconds * feature_extractor.sampling_rate + ) + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + + # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification + phoneme_language = data_args.phoneme_language + + # Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + def prepare_dataset(batch): + # load audio + sample = batch[audio_column_name] + + inputs = feature_extractor( + sample["array"], sampling_rate=sample["sampling_rate"] + ) + batch["input_values"] = inputs.input_values[0] + batch["input_length"] = len(batch["input_values"]) + + # encode targets + additional_kwargs = {} + if phoneme_language is not None: + additional_kwargs["phonemizer_lang"] = phoneme_language + + batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids + return batch + + with training_args.main_process_first(desc="dataset map preprocessing"): + vectorized_datasets = raw_datasets.map( + prepare_dataset, + remove_columns=next(iter(raw_datasets.values())).column_names, + num_proc=num_workers, + desc="preprocess datasets", + ) + + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + # filter data that is shorter than min_input_length + vectorized_datasets = vectorized_datasets.filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + # 7. Next, we can prepare the training. + # Let's use word error rate (WER) as our evaluation metric, + # instantiate a data collator and the trainer + + # Define evaluation metrics during training, *i.e.* word error rate, character error rate + eval_metrics = {metric: evaluate.load(metric) for metric in data_args.eval_metrics} + + # for large datasets it is advised to run the preprocessing on a + # single machine first with ``args.preprocessing_only`` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step ``args.preprocessing_only`` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + logger.info( + f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}" + ) + return + + def compute_metrics(pred): + pred_logits = pred.predictions + pred_ids = np.argmax(pred_logits, axis=-1) + + pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id + + pred_str = tokenizer.batch_decode(pred_ids) + # we do not want to group tokens when computing the metrics + label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False) + + metrics = { + k: v.compute(predictions=pred_str, references=label_str) + for k, v in eval_metrics.items() + } + + return metrics + + # Now save everything to be able to create a single processor later + # make sure all processes wait until data is saved + with training_args.main_process_first(): + # only the main process saves them + if is_main_process(training_args.local_rank): + # save feature extractor, tokenizer and config + feature_extractor.save_pretrained(training_args.output_dir) + tokenizer.save_pretrained(training_args.output_dir) + config.save_pretrained(training_args.output_dir) + + try: + processor = AutoProcessor.from_pretrained(training_args.output_dir) + except (OSError, KeyError): + warnings.warn( + "Loading a processor from a feature extractor config that does not" + " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following " + " attribute to your `preprocessor_config.json` file to suppress this warning: " + " `'processor_class': 'Wav2Vec2Processor'`", + FutureWarning, + ) + processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir) + + # Instantiate custom data collator + data_collator = DataCollatorCTCWithPadding(processor=processor) + + # Initialize Trainer + trainer = Trainer( + model=model, + data_collator=data_collator, + args=training_args, + compute_metrics=compute_metrics, + train_dataset=vectorized_datasets["train"] if training_args.do_train else None, + eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None, + tokenizer=processor, + ) + + # 8. Finally, we can start training + + # Training + if training_args.do_train: + # use last checkpoint if exist + if last_checkpoint is not None: + checkpoint = last_checkpoint + elif os.path.isdir(model_args.model_name_or_path): + checkpoint = model_args.model_name_or_path + else: + checkpoint = None + + train_result = trainer.train(resume_from_checkpoint=checkpoint) + trainer.save_model() + + metrics = train_result.metrics + max_train_samples = ( + data_args.max_train_samples + if data_args.max_train_samples is not None + else len(vectorized_datasets["train"]) + ) + metrics["train_samples"] = min( + max_train_samples, len(vectorized_datasets["train"]) + ) + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + # Evaluation + results = {} + if training_args.do_eval: + logger.info("*** Evaluate ***") + metrics = trainer.evaluate() + max_eval_samples = ( + data_args.max_eval_samples + if data_args.max_eval_samples is not None + else len(vectorized_datasets["eval"]) + ) + metrics["eval_samples"] = min( + max_eval_samples, len(vectorized_datasets["eval"]) + ) + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + + # Write model card and (optionally) push to hub + config_name = ( + data_args.dataset_config_name + if data_args.dataset_config_name is not None + else "na" + ) + kwargs = { + "finetuned_from": model_args.model_name_or_path, + "tasks": "automatic-speech-recognition", + "tags": ["automatic-speech-recognition", data_args.dataset_name], + "dataset_args": ( + f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:" + f" {data_args.eval_split_name}" + ), + "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", + } + if "common_voice" in data_args.dataset_name: + kwargs["language"] = config_name + + if training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) + + return results + + +if __name__ == "__main__": + main() diff --git a/elpis/trainer/job.py b/elpis/trainer/job.py deleted file mode 100644 index 6c77ae7..0000000 --- a/elpis/trainer/job.py +++ /dev/null @@ -1,99 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field, fields -from enum import Enum -from pathlib import Path -from typing import Any, Dict, Tuple - -import torch -from transformers import TrainingArguments - -BASE_MODEL = "facebook/wav2vec2-base-960h" -SAMPLING_RATE = 16_000 -METRICS = ("wer", "cer") - - -class TrainingStatus(Enum): - WAITING = "waiting" - TRAINING = "training" - FINISHED = "finished" - ERROR = "error" - - -@dataclass -class TrainingOptions: - """A class representing some commonly changed training options""" - - batch_size: int = 4 - epochs: int = 2 - learning_rate: float = 1e-4 - min_duration: int = 0 - max_duration: int = 60 - # word_delimiter_token: str = " " # Note: This might interfere with the tokenizer? - test_size: float = 0.2 # TODO: link with dataset? - freeze_feature_extractor: bool = True - - @staticmethod - def from_dict(data: Dict[str, Any]) -> "TrainingOptions": - field_names = [field.name for field in fields(TrainingOptions)] - kwargs = {key: data[key] for key in data if key in field_names} - return TrainingOptions(**kwargs) - - def to_dict(self) -> Dict[str, Any]: - return dict(self.__dict__) - - -@dataclass -class TrainingJob: - """A class representing a training job for a model""" - - model_name: str - dataset_name: str - options: TrainingOptions # TODO - rename to training_options next major V - model_options: Dict[str, Any] = field(default_factory=dict) - status: TrainingStatus = TrainingStatus.WAITING - base_model: str = BASE_MODEL - sampling_rate: int = SAMPLING_RATE - metrics: Tuple[str, ...] = METRICS - - def to_training_args(self, output_dir: Path, **kwargs) -> TrainingArguments: - return TrainingArguments( - output_dir=str(output_dir), - group_by_length=True, - num_train_epochs=self.options.epochs, - per_device_train_batch_size=self.options.batch_size, - per_device_eval_batch_size=self.options.batch_size, - gradient_accumulation_steps=2, - evaluation_strategy="steps", - fp16=True if torch.cuda.is_available() else False, - gradient_checkpointing=True, - learning_rate=self.options.learning_rate, - weight_decay=0.005, - save_steps=400, - eval_steps=400, - logging_steps=400, - warmup_steps=400, - save_total_limit=2, - overwrite_output_dir=True, - do_train=True, - do_eval=True, - **kwargs, - ) - - @staticmethod - def from_dict(data: Dict[str, Any]) -> TrainingJob: - return TrainingJob( - model_name=data["model_name"], - dataset_name=data["dataset_name"], - options=TrainingOptions.from_dict(data["options"]), - model_options=data.get("model_options", {}), - status=TrainingStatus(data.get("status", TrainingStatus.WAITING)), - base_model=data.get("base_model", BASE_MODEL), - sampling_rate=data.get("sampling_rate", SAMPLING_RATE), - metrics=data.get("metrics", METRICS), - ) - - def to_dict(self) -> Dict[str, Any]: - result = dict(self.__dict__) - result |= dict(options=self.options.to_dict(), status=self.status.value) - return result diff --git a/elpis/trainer/trainer.py b/elpis/trainer/trainer.py index 42bed05..eb1e2dd 100644 --- a/elpis/trainer/trainer.py +++ b/elpis/trainer/trainer.py @@ -1,3 +1,4 @@ +import warnings from contextlib import nullcontext from pathlib import Path from typing import Optional @@ -11,134 +12,315 @@ AutoProcessor, AutoTokenizer, Trainer, - Wav2Vec2CTCTokenizer, - Wav2Vec2FeatureExtractor, Wav2Vec2Processor, ) +from transformers.trainer_utils import get_last_checkpoint, is_main_process from elpis.datasets import create_dataset, prepare_dataset +from elpis.models.job import Job from elpis.models.vocab import VOCAB_FILE, Vocab from elpis.trainer.data_collator import DataCollatorCTCWithPadding -from elpis.trainer.job import TrainingJob from elpis.trainer.metrics import create_metrics from elpis.trainer.utils import log_to_file -def create_processor( - job: TrainingJob, - output_dir: Path, - dataset: DatasetDict, - cache_dir: Optional[Path], - unk_token="[UNK]", - pad_token="[PAD]", - word_delimiter_token="|", -) -> Wav2Vec2Processor: - config = AutoConfig.from_pretrained(job.base_model, cache_dir=cache_dir) - tokenizer_type = config.model_type if config.tokenizer_class is None else None - config = config if config.tokenizer_class is not None else None - - # Build up a vocab from the dataset. - train_vocab = Vocab.from_strings(dataset["train"]["transcript"]) - test_vocab = Vocab.from_strings(dataset["test"]["transcript"]) - vocab = train_vocab.merge(test_vocab) - - vocab.add(unk_token) - vocab.add(pad_token) - vocab.replace(" ", word_delimiter_token) - logger.info(f"Vocab: {vocab.vocab}") - vocab.save(output_dir) - - tokenizer = AutoTokenizer.from_pretrained( - output_dir, - config=config, - tokenizer_type=tokenizer_type, - unk_token=unk_token, - pad_token=pad_token, - word_delimiter_token=word_delimiter_token, - cache_dir=cache_dir, - ) - feature_extractor = AutoFeatureExtractor.from_pretrained( - job.base_model, cache_dir=cache_dir - ) - - AutoProcessor.from_pretrained - return Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer) - - -def train( - job: TrainingJob, - output_dir: Path, - dataset_dir: Path, - cache_dir: Optional[Path] = None, +def run_job( + job: Job, log_file: Optional[Path] = None, ) -> Path: """Fine-tunes a model for use in transcription. Parameters: job: Info about the training job, e.g. training options. - output_dir: Where to save the trained model. dataset_dir: A directory containing the preprocessed dataset to train with. - cache_dir: A directory to use for caching HFT downloads and datasets. log_file: An optional file to write training logs to. Returns: A path to the folder containing the trained model. """ - context = log_to_file(log_file) if log_file is not None else nullcontext() - with context: + logging_context = log_to_file(log_file) if log_file is not None else nullcontext() + with logging_context: + # Setup required directories. + output_dir = job.training_args.output_dir + cache_dir = job.model_args.cache_dir + Path(output_dir).mkdir(exist_ok=True, parents=True) + logger.info("Preparing Datasets...") - dataset = create_dataset(dataset_dir, cache_dir) - processor = create_processor(job, output_dir, dataset, cache_dir) - dataset = prepare_dataset(dataset, processor) - logger.info("Finished Preparing Datasets") + config = create_config(job) + dataset = create_dataset(job) - logger.info("Downloading pretrained model...") - model = AutoModelForCTC.from_pretrained( - job.base_model, + tokenizer = create_tokenizer(job, config, dataset) + logger.info(f"Tokenizer: {tokenizer}") # type: ignore + feature_extractor = AutoFeatureExtractor.from_pretrained( + job.model_args.model_name_or_path, cache_dir=cache_dir, - ctc_loss_reduction="mean", - pad_token_id=processor.tokenizer.pad_token_id, # type: ignore - bos_token_id=processor.tokenizer.bos_token_id, # type: ignore - eos_token_id=processor.tokenizer.eos_token_id, # type: ignore - vocab_size=len(processor.tokenizer.get_vocab()), # type: ignore - ignore_mismatched_sizes=True, - **job.model_options + token=job.data_args.token, + trust_remote_code=job.data_args.trust_remote_code, ) + dataset = prepare_dataset(job, tokenizer, feature_extractor, dataset) + logger.info("Finished Preparing Datasets") + + update_config(job, config, tokenizer) + + logger.info("Downloading pretrained model...") + model = create_ctc_model(job, config) logger.info("Downloaded model.") - if job.options.freeze_feature_extractor: - model.freeze_feature_encoder() + # Now save everything to be able to create a single processor later + # make sure all processes wait until data is saved + logger.info("Saving config, tokenizer and feature extractor.") + with job.training_args.main_process_first(): + # only the main process saves them + if is_main_process(job.training_args.local_rank): + feature_extractor.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) # type: ignore + config.save_pretrained(output_dir) # type: ignore - data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True) - output_dir.mkdir(exist_ok=True, parents=True) + try: + processor = AutoProcessor.from_pretrained(job.training_args.output_dir) + except (OSError, KeyError): + warnings.warn( + "Loading a processor from a feature extractor config that does not" + " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following " + " attribute to your `preprocessor_config.json` file to suppress this warning: " + " `'processor_class': 'Wav2Vec2Processor'`", + FutureWarning, + ) + processor = Wav2Vec2Processor.from_pretrained(job.training_args.output_dir) + data_collator = DataCollatorCTCWithPadding(processor=processor) # type: ignore + + # Initialize Trainer trainer = Trainer( - model=model, - args=job.to_training_args(output_dir), - train_dataset=dataset["train"], # type: ignore - eval_dataset=dataset["test"], # type: ignore - tokenizer=processor.feature_extractor, # type: ignore + model=model, # type: ignore data_collator=data_collator, - compute_metrics=create_metrics(job.metrics, processor), + args=job.training_args, + compute_metrics=create_metrics(job.data_args.eval_metrics, processor), + train_dataset=dataset["train"] if job.training_args.do_train else None, # type: ignore + eval_dataset=dataset["eval"] if job.training_args.do_eval else None, # type: ignore + tokenizer=processor, # type: ignore ) - trainer.create_optimizer() - logger.info(f"Begin training model...") - trainer.train() + train(job, trainer, dataset) logger.info(f"Finished training!") - logger.info(f"Saving model @ {output_dir}") - trainer.save_model() - trainer.save_state() - processor.save_pretrained(output_dir) - logger.info(f"Model written to disk.") + evaluate(job, trainer, dataset) + clean_up(job, trainer) + + return Path(output_dir) + + +def create_config(job: Job) -> AutoConfig: + return AutoConfig.from_pretrained( + job.model_args.model_name_or_path, + cache_dir=job.model_args.cache_dir, + token=job.data_args.token, + trust_remote_code=job.data_args.trust_remote_code, + ) + + +def create_tokenizer( + job: Job, config: AutoConfig, dataset: DatasetDict +) -> AutoTokenizer: + tokenizer_name_or_path = job.model_args.tokenizer_name_or_path + if tokenizer_name_or_path is not None: + return AutoTokenizer.from_pretrained( + tokenizer_name_or_path, + token=job.data_args.token, + trust_remote_code=job.data_args.trust_remote_code, + ) + + training_args = job.training_args + + # save vocab in training output dir + tokenizer_name_or_path = job.training_args.output_dir + vocab_file = Path(tokenizer_name_or_path) / VOCAB_FILE + + # Delete existing vocab file if overwriting + with training_args.main_process_first(): + if training_args.overwrite_output_dir and vocab_file.is_file(): + try: + vocab_file.unlink() + except OSError: + # in shared file-systems it might be the case that + # two processes try to delete the vocab file at the some time + pass + + # Build up a vocab from the dataset. + with training_args.main_process_first(desc="dataset map vocabulary creation"): + if not vocab_file.is_file(): + Path(tokenizer_name_or_path).mkdir(exist_ok=True, parents=True) + text_column = job.data_args.text_column_name + + vocab = Vocab.from_strings(dataset["train"][text_column]) + if "test" in dataset: + test_vocab = Vocab.from_strings(dataset["test"][text_column]) + vocab = vocab.merge(test_vocab) + + vocab.add(job.data_args.unk_token) + vocab.add(job.data_args.pad_token) + vocab.replace(" ", job.data_args.word_delimiter_token) + logger.info(f"Vocab: {vocab.vocab}") + vocab.save(vocab_file) + + # If the tokenizer has just been created, + # it is defined by `tokenizer_class` if present in config else by `model_type` + tokenizer_kwargs = { + "config": config if config.tokenizer_class is not None else None, # type: ignore + "tokenizer_type": config.model_type if config.tokenizer_class is None else None, # type: ignore + "unk_token": job.data_args.unk_token, + "pad_token": job.data_args.pad_token, + "word_delimiter_token": job.data_args.word_delimiter_token, + "do_lower_case": job.data_args.do_lower_case, + } + + return AutoTokenizer.from_pretrained( + tokenizer_name_or_path, + token=job.data_args.token, + trust_remote_code=job.data_args.trust_remote_code, + **tokenizer_kwargs, + ) + + +def update_config(job: Job, config: AutoConfig, tokenizer: AutoTokenizer) -> None: + config.update( # type: ignore + { + "feat_proj_dropout": job.model_args.feat_proj_dropout, + "attention_dropout": job.model_args.attention_dropout, + "hidden_dropout": job.model_args.hidden_dropout, + "final_dropout": job.model_args.final_dropout, + "mask_time_prob": job.model_args.mask_time_prob, + "mask_time_length": job.model_args.mask_time_length, + "mask_feature_prob": job.model_args.mask_feature_prob, + "mask_feature_length": job.model_args.mask_feature_length, + "gradient_checkpointing": job.training_args.gradient_checkpointing, + "layerdrop": job.model_args.layerdrop, + "ctc_loss_reduction": job.model_args.ctc_loss_reduction, + "ctc_zero_infinity": job.model_args.ctc_zero_infinity, + "pad_token_id": tokenizer.pad_token_id, # type: ignore + "bos_token_id": tokenizer.bos_token_id, # type: ignore + "eos_token_id": tokenizer.eos_token_id, # type: ignore + "vocab_size": len(tokenizer), # type: ignore + "activation_dropout": job.model_args.activation_dropout, + } + ) + + +def create_ctc_model(job: Job, config: AutoConfig) -> AutoModelForCTC: + model = AutoModelForCTC.from_pretrained( + job.model_args.model_name_or_path, + cache_dir=job.model_args.cache_dir, + config=config, + token=job.data_args.token, + trust_remote_code=job.data_args.trust_remote_code, + ) + + # freeze encoder + if job.model_args.freeze_feature_encoder: + model.freeze_feature_encoder() + + return model + + +def last_checkpoint(job: Job) -> Optional[str]: + """Returns the string corresponding to the path or name of the last + training checkpoint, if it exists.""" + training_args = job.training_args + output_dir = Path(training_args.output_dir) + + if not output_dir.is_dir(): + return None + if not training_args.do_train: + return None + if training_args.overwrite_output_dir: + return None + + checkpoint = get_last_checkpoint(training_args.output_dir) + checkpoint_folders = [path for path in output_dir.iterdir() if path.is_dir()] + + if checkpoint is None and len(checkpoint_folders) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Set `overwrite_output_dir` in training_args to overcome." + ) + elif checkpoint is not None: + logger.info( + f"Checkpoint detected, resuming training at {checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) - metrics = trainer.evaluate() - logger.info("==== Metrics ====") - trainer.log_metrics("eval", metrics) - trainer.save_metrics("eval", metrics) - logger.info(metrics) + return checkpoint + + +def train(job: Job, trainer: Trainer, dataset: DatasetDict): + if not job.training_args.do_train: + return + + checkpoint = last_checkpoint(job) + if checkpoint is None and Path(job.model_args.model_name_or_path).is_dir(): + checkpoint = job.model_args.model_name_or_path + + train_result = trainer.train(resume_from_checkpoint=checkpoint) + trainer.save_model() + + metrics = train_result.metrics + + # Add training samples to metrics + max_train_samples = ( + job.data_args.max_train_samples + if job.data_args.max_train_samples is not None + else len(dataset["train"]) + ) + metrics["train_samples"] = min(max_train_samples, len(dataset["train"])) + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_model() + trainer.save_state() + + +def evaluate(job: Job, trainer: Trainer, dataset: DatasetDict): + if not job.training_args.do_eval: + return + + logger.info("*** Evaluate ***") + metrics = trainer.evaluate() + max_eval_samples = ( + job.data_args.max_eval_samples + if job.data_args.max_eval_samples is not None + else len(dataset["eval"]) + ) + metrics["eval_samples"] = min(max_eval_samples, len(dataset["eval"])) + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + logger.info(metrics) + + +def clean_up(job: Job, trainer: Trainer): + """Writes a model card, and optionally pushes the trained model to the + huggingface hub.""" + config_name = ( + job.data_args.dataset_config_name + if job.data_args.dataset_config_name is not None + else "na" + ) + kwargs = { + "finetuned_from": job.model_args.model_name_or_path, + "tasks": "automatic-speech-recognition", + "tags": ["automatic-speech-recognition", job.data_args.dataset_name_or_path], + "dataset_args": ( + f"Config: {config_name}, Training split: {job.data_args.train_split_name}, Eval split:" + f" {job.data_args.eval_split_name}" + ), + "dataset": f"{job.data_args.dataset_name_or_path.upper()} - {config_name.upper()}", + } + if "common_voice" in job.data_args.dataset_name_or_path: + kwargs["language"] = config_name - return output_dir + if job.training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + else: + trainer.create_model_card(**kwargs) diff --git a/example.py b/example.py new file mode 100644 index 0000000..458b397 --- /dev/null +++ b/example.py @@ -0,0 +1,139 @@ +import shutil +from itertools import groupby, takewhile +from pathlib import Path +from pprint import pprint + +from loguru import logger +from tqdm import tqdm +from transformers import TrainingArguments, training_args + +from elpis.datasets import Dataset +from elpis.datasets.dataset import CleaningOptions +from elpis.datasets.preprocessing import process_batch +from elpis.models import ElanOptions, ElanTierSelector +from elpis.models.job import DataArguments, Job, ModelArguments +from elpis.trainer.trainer import run_job +from elpis.transcriber.results import build_elan, build_text +from elpis.transcriber.transcribe import build_pipeline, transcribe + +DATASETS_PATH = Path(__file__).parent.parent.parent / "datasets" +TIMIT_PATH = DATASETS_PATH / "timit" +DIGITS_PATH = DATASETS_PATH / "digits-preview" + +TRAINING_FILES = list((DIGITS_PATH / "train").rglob("*.*")) +TRANSCRIBE_AUDIO = DIGITS_PATH / "test/audio2.wav" + +TIMIT_TIER_NAME = "default" +TIER_NAME = "tx" + +print("------ Training files ------") +# print(training_files) +DIGITS_DATASET = Dataset( + name="my_dataset", + files=TRAINING_FILES, + cleaning_options=CleaningOptions(), # Default cleaning options + # Elan data extraction info - required if dataset includes .eaf files. + elan_options=ElanOptions( + selection_mechanism=ElanTierSelector.NAME, selection_value=TIER_NAME + ), +) + +TIMIT_DATASET = Dataset( + name="my_dataset", + files=list(TIMIT_PATH.rglob("*.*")), + cleaning_options=CleaningOptions(), # Default cleaning options + # Elan data extraction info - required if dataset includes .eaf files. + elan_options=ElanOptions( + selection_mechanism=ElanTierSelector.NAME, selection_value="default" + ), +) + +dataset = TIMIT_DATASET + +# Setup +tmp_path = Path("testdir") +# tmp_path = Path("/tmp") / "testscript" +dataset_dir = tmp_path / "dataset" +model_dir = tmp_path / "model" +output_dir = tmp_path / "output" +cache_dir = tmp_path / "cache" + +# Reset dir between runs +if tmp_path.exists(): + shutil.rmtree(tmp_path) + +# Make all directories +for directory in dataset_dir, model_dir, output_dir: + directory.mkdir(exist_ok=True, parents=True) + +# Preprocessing +logger.info("Creating batches") +batches = dataset.to_batches() +logger.info("Processing batches") +for batch in tqdm(batches, unit="batch"): + process_batch(batch, dataset_dir) + +# Train the model +job = Job( + model_args=ModelArguments( + "facebook/wav2vec2-base", + # ctc_zero_infinity=True, + attention_dropout=0.1, + # hidden_dropout=0.1, + # mask_time_prob=0.05, + layerdrop=0.0, + freeze_feature_encoder=True, + ), + data_args=DataArguments( + dataset_name_or_path=str(dataset_dir), + text_column_name="transcript", + min_duration_in_seconds=1, + max_duration_in_seconds=5, + ), + training_args=TrainingArguments( + output_dir=str(model_dir), + overwrite_output_dir=True, + # evaluation_strategy="epoch", + do_train=True, + do_eval=True, + per_device_train_batch_size=16, + per_device_eval_batch_size=1, + num_train_epochs=20, + learning_rate=1e-4, + group_by_length=True, + weight_decay=0.005, + warmup_steps=1000, + logging_steps=10, + eval_steps=100, + save_steps=400, + save_total_limit=2, + ), +) + + +print("------ JOB ------") +print(job) +run_job(job) +print("------ TRAINED ------") + +# Perform inference with pipeline +print("------ INFER ------") +asr = build_pipeline( + pretrained_location=str(model_dir.absolute()), +) + +annotations = transcribe(TRANSCRIBE_AUDIO, asr) +print(build_text(annotations)) + +# Build output files +print("------ OUTPUT ------") +text_file = output_dir / "test.txt" + +with open(text_file, "w") as output_file: + output_file.write(build_text(annotations)) + +elan_file = output_dir / "test.eaf" +eaf = build_elan(annotations) +eaf.to_file(str(elan_file)) + +print("voila ;)") diff --git a/tests/datasets/test_processing.py b/tests/datasets/test_processing.py index 5fb88bb..f671e60 100644 --- a/tests/datasets/test_processing.py +++ b/tests/datasets/test_processing.py @@ -3,20 +3,42 @@ from pathlib import Path from loguru import logger +from transformers import TrainingArguments from elpis.datasets.processing import create_dataset +from elpis.models.job import DataArguments, Job, ModelArguments DATA_PATH = Path(__file__).parent.parent / "data" / "processing" def test_create_dataset(tmp_path: Path): + cache_dir = tmp_path / "cache" + dataset_dir = tmp_path / "dataset" + model_dir = tmp_path / "model" + output_dir = tmp_path / "out" + + for directory in cache_dir, dataset_dir, model_dir, output_dir: + directory.mkdir(exist_ok=True, parents=True) + logger.info(DATA_PATH) for file in os.listdir(DATA_PATH): if Path(file).suffix in [".wav", ".json"]: - shutil.copy(DATA_PATH / file, tmp_path) + shutil.copy(DATA_PATH / file, dataset_dir) + + logger.info(os.listdir(dataset_dir)) - logger.info(os.listdir(tmp_path)) + job = Job( + model_args=ModelArguments( + model_name_or_path="facebook/wav2vec2-base", cache_dir=str(cache_dir) + ), + data_args=DataArguments( + dataset_name_or_path=str(dataset_dir), text_column_name="transcript" + ), + training_args=TrainingArguments( + output_dir=str(model_dir), + ), + ) - result = create_dataset(tmp_path, tmp_path / "cache") - assert "test" in result - assert "train" in result + dataset = create_dataset(job) + assert "train" in dataset + assert "eval" in dataset diff --git a/tests/test_elpis.py b/tests/test_elpis.py index 9986be1..4b22c45 100644 --- a/tests/test_elpis.py +++ b/tests/test_elpis.py @@ -3,14 +3,15 @@ from loguru import logger from pytest import mark +from transformers import TrainingArguments from elpis import __version__ from elpis.datasets import Dataset from elpis.datasets.dataset import CleaningOptions from elpis.datasets.preprocessing import process_batch from elpis.models.elan_options import ElanOptions, ElanTierSelector -from elpis.trainer.job import TrainingJob, TrainingOptions -from elpis.trainer.trainer import train +from elpis.models.job import DataArguments, Job, ModelArguments +from elpis.trainer.trainer import run_job, train from elpis.transcriber.results import build_elan, build_text from elpis.transcriber.transcribe import build_pipeline, transcribe @@ -49,17 +50,22 @@ def test_everything(tmp_path: Path): process_batch(batch, dataset_dir) # Train the model - job = TrainingJob( - model_name="model", - dataset_name="dataset", - options=TrainingOptions(epochs=2, learning_rate=0.001), - ) - train( - job=job, - output_dir=model_dir, - dataset_dir=dataset_dir, + job = Job( + model_args=ModelArguments(model_name_or_path="facebook/wav2vec2-base"), + data_args=DataArguments( + dataset_name_or_path=str(dataset_dir), text_column_name="transcript" + ), + training_args=TrainingArguments( + output_dir=str(model_dir), + num_train_epochs=2, + learning_rate=1e-4, + do_train=True, + do_eval=True, + ), ) + run_job(job=job) + # Perform inference with pipeline asr = build_pipeline( pretrained_location=str(model_dir.absolute()), diff --git a/tests/trainer/test_job.py b/tests/trainer/test_job.py deleted file mode 100644 index b821722..0000000 --- a/tests/trainer/test_job.py +++ /dev/null @@ -1,23 +0,0 @@ -from elpis.trainer import TrainingJob, TrainingOptions, TrainingStatus -from elpis.trainer.job import BASE_MODEL, SAMPLING_RATE - - -def test_training_options_serialization_round_trip(): - expected = TrainingOptions() - assert expected == TrainingOptions.from_dict(expected.to_dict()) - - -def test_training_job_serialization_round_trip(): - expected = TrainingJob("model", "dataset", TrainingOptions()) - assert expected == TrainingJob.from_dict(expected.to_dict()) - - -def test_job_from_basic_dict(): - data = dict(model_name="a", dataset_name="b", options=TrainingOptions().to_dict()) - job = TrainingJob.from_dict(data) - assert job.model_name == "a" - assert job.dataset_name == "b" - assert job.options == TrainingOptions() - assert job.status == TrainingStatus.WAITING - assert job.sampling_rate == SAMPLING_RATE - assert job.base_model == BASE_MODEL diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 85b07c7..77782b9 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -3,33 +3,41 @@ from pathlib import Path from pytest import mark +from transformers import TrainingArguments -from elpis.trainer import TrainingJob, TrainingOptions, train +from elpis.models.job import DataArguments, Job, ModelArguments +from elpis.trainer import run_job DATA_PATH = Path(__file__).parent.parent / "data" DATASET_PATH = DATA_PATH / "processing" -JOB = TrainingJob( - model_name="test", - dataset_name="test", - options=TrainingOptions(epochs=1, max_duration=10), -) - @mark.integration -def test_training(tmp_path: Path): +def test_training_succeeds(tmp_path: Path): dataset_dir = tmp_path / "dataset" - dataset_dir.mkdir(exist_ok=True, parents=True) - + model_dir = tmp_path / "model" output_dir = tmp_path / "out" - output_dir.mkdir(exist_ok=True, parents=True) log_file = tmp_path / "logs.txt" + for directory in dataset_dir, model_dir, output_dir: + directory.mkdir(exist_ok=True, parents=True) + for file in os.listdir(DATASET_PATH): shutil.copy(DATASET_PATH / file, dataset_dir) - model_path = train( - job=JOB, output_dir=output_dir, dataset_dir=dataset_dir, log_file=log_file + job = Job( + model_args=ModelArguments(model_name_or_path="facebook/wav2vec2-base"), + data_args=DataArguments( + dataset_name_or_path=str(dataset_dir), text_column_name="transcript" + ), + training_args=TrainingArguments( + output_dir=str(model_dir), + num_train_epochs=2, + learning_rate=1e-4, + do_train=True, + ), ) - assert model_path == output_dir + + model_path = run_job(job, log_file) + assert model_path == model_dir diff --git a/tests/transcriber/test_transcribe.py b/tests/transcriber/test_transcribe.py index 760065a..a59b7b5 100644 --- a/tests/transcriber/test_transcribe.py +++ b/tests/transcriber/test_transcribe.py @@ -3,11 +3,11 @@ from loguru import logger from pytest import mark -from elpis.trainer.job import BASE_MODEL from elpis.transcriber.transcribe import build_pipeline, transcribe DATA_DIR = Path(__file__).parent.parent / "data" AUDIO = DATA_DIR / "oily_rag.wav" +BASE_MODEL = "facebook/wav2vec2-base" @mark.integration From c8b40b92e43e06256a7cfc6e91a4f8f703517605 Mon Sep 17 00:00:00 2001 From: Harry Keightley Date: Sat, 14 Oct 2023 19:22:01 +1000 Subject: [PATCH 8/8] Update version and format. --- elpis/trainer/guide.py | 4 +--- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/elpis/trainer/guide.py b/elpis/trainer/guide.py index fa515fd..58254de 100644 --- a/elpis/trainer/guide.py +++ b/elpis/trainer/guide.py @@ -111,9 +111,7 @@ def main(): # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. - parser = HfArgumentParser( - (ModelArguments, DataArguments, TrainingArguments) - ) + parser = HfArgumentParser((ModelArguments, DataArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. diff --git a/pyproject.toml b/pyproject.toml index c4f97fb..14cd95a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "elpis" -version = "0.1.8" +version = "0.2.0" description = """\ A library to perform automatic speech recognition with huggingface transformers.\ """