diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 253c4750..7a4a13ec 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -5,10 +5,11 @@ on: [ push, pull_request ] jobs: build: runs-on: ubuntu-latest + continue-on-error: ${{ matrix.python-version == '3.12' }} strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11" ] + python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/README.md b/README.md index 2108092b..dda511d2 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ [![Codecov][codecov_badge]][codecov_link] [![Documentation][docs_badge]][docs_link] -[github_tests_badge]: https://github.com/Toloka/crowdlib/workflows/Tests/badge.svg?branch=main -[github_tests_link]: https://github.com/Toloka/crowdlib/actions?query=workflow:Tests +[github_tests_badge]: https://github.com/Toloka/crowd-kit/actions/workflows/tests.yml/badge.svg?branch=main +[github_tests_link]: https://github.com/Toloka/crowd-kit/actions/workflows/tests.yml [codecov_badge]: https://codecov.io/gh/Toloka/crowd-kit/branch/main/graph/badge.svg [codecov_link]: https://codecov.io/gh/Toloka/crowd-kit [docs_badge]: https://img.shields.io/badge/docs-toloka.ai-1E2126 diff --git a/crowdkit/aggregation/base/__init__.py b/crowdkit/aggregation/base/__init__.py index 393c903b..db14817d 100644 --- a/crowdkit/aggregation/base/__init__.py +++ b/crowdkit/aggregation/base/__init__.py @@ -6,7 +6,7 @@ "BasePairwiseAggregator", ] -from typing import Optional +from typing import Any, Optional import attr import pandas as pd @@ -24,7 +24,7 @@ class BaseClassificationAggregator: is the tasks's most likely true label. """ - labels_: Optional[pd.Series] = named_series_attrib(name="agg_label") + labels_: Optional["pd.Series[Any]"] = named_series_attrib(name="agg_label") def fit(self, data: pd.DataFrame) -> "BaseClassificationAggregator": """Args: @@ -36,7 +36,7 @@ def fit(self, data: pd.DataFrame) -> "BaseClassificationAggregator": """ raise NotImplementedError() - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Args: data (DataFrame): Workers' labeling results. A pandas.DataFrame containing `task`, `worker` and `label` columns. @@ -58,7 +58,7 @@ class BaseImageSegmentationAggregator: is the tasks's aggregated segmentation. """ - segmentations_: pd.Series = named_series_attrib(name="agg_segmentation") + segmentations_: "pd.Series[Any]" = named_series_attrib(name="agg_segmentation") def fit(self, data: pd.DataFrame) -> "BaseImageSegmentationAggregator": """Args: @@ -70,7 +70,7 @@ def fit(self, data: pd.DataFrame) -> "BaseImageSegmentationAggregator": """ raise NotImplementedError() - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Args: data (DataFrame): Workers' segmentations. A pandas.DataFrame containing `worker`, `task` and `segmentation` columns'. @@ -122,7 +122,7 @@ class BaseTextsAggregator: is the task's text. """ - texts_: pd.Series = named_series_attrib(name="agg_text") + texts_: "pd.Series[Any]" = named_series_attrib(name="agg_text") def fit(self, data: pd.DataFrame) -> "BaseTextsAggregator": """Args: @@ -133,7 +133,7 @@ def fit(self, data: pd.DataFrame) -> "BaseTextsAggregator": """ raise NotImplementedError() - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Args: data (DataFrame): Workers' text outputs. A pandas.DataFrame containing `task`, `worker` and `text` columns. @@ -153,7 +153,7 @@ class BasePairwiseAggregator: A pandas.Series index by labels and holding corresponding label's scores """ - scores_: pd.Series = named_series_attrib(name="agg_score") + scores_: "pd.Series[Any]" = named_series_attrib(name="agg_score") def fit(self, data: pd.DataFrame) -> "BasePairwiseAggregator": """Args: @@ -166,7 +166,7 @@ def fit(self, data: pd.DataFrame) -> "BasePairwiseAggregator": """ raise NotImplementedError() - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Args: data (DataFrame): Workers' pairwise comparison results. A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'. diff --git a/crowdkit/aggregation/classification/dawid_skene.py b/crowdkit/aggregation/classification/dawid_skene.py index 8ae9aa67..4ef006ce 100644 --- a/crowdkit/aggregation/classification/dawid_skene.py +++ b/crowdkit/aggregation/classification/dawid_skene.py @@ -1,6 +1,6 @@ __all__ = ["DawidSkene", "OneCoinDawidSkene"] -from typing import List, Optional +from typing import Any, List, Optional, cast import attr import numpy as np @@ -81,7 +81,7 @@ class DawidSkene(BaseClassificationAggregator): tol: float = attr.ib(default=1e-5) probas_: Optional[pd.DataFrame] = attr.ib(init=False) - priors_: Optional[pd.Series] = named_series_attrib(name="prior") + priors_: Optional["pd.Series[Any]"] = named_series_attrib(name="prior") # labels_ errors_: Optional[pd.DataFrame] = attr.ib(init=False) loss_history_: List[float] = attr.ib(init=False) @@ -103,7 +103,7 @@ def _m_step(data: pd.DataFrame, probas: pd.DataFrame) -> pd.DataFrame: @staticmethod def _e_step( - data: pd.DataFrame, priors: pd.Series, errors: pd.DataFrame + data: pd.DataFrame, priors: "pd.Series[Any]", errors: pd.DataFrame ) -> pd.DataFrame: """ Performs E-step of the Dawid-Skene algorithm. @@ -115,7 +115,7 @@ def _e_step( # We have to multiply lots of probabilities and such products are known to converge # to zero exponentially fast. To avoid floating-point precision problems we work with # logs of original values - joined = data.join(np.log2(errors), on=["worker", "label"]) + joined = data.join(np.log2(errors), on=["worker", "label"]) # type: ignore joined.drop(columns=["worker", "label"], inplace=True) log_likelihoods = np.log2(priors) + joined.groupby("task", sort=False).sum() log_likelihoods.rename_axis("label", axis=1, inplace=True) @@ -135,23 +135,23 @@ def _e_step( scaled_likelihoods.columns = pd.Index( scaled_likelihoods.columns, name="label", dtype=data.label.dtype ) - return scaled_likelihoods + return cast(pd.DataFrame, scaled_likelihoods) def _evidence_lower_bound( self, data: pd.DataFrame, probas: pd.DataFrame, - priors: pd.Series, + priors: "pd.Series[Any]", errors: pd.DataFrame, ) -> float: # calculate joint probability log-likelihood expectation over probas - joined = data.join(np.log(errors), on=["worker", "label"]) + joined = data.join(np.log(errors), on=["worker", "label"]) # type: ignore # escape boolean index/column names to prevent confusion between indexing by boolean array and iterable of names joined = joined.rename(columns={True: "True", False: "False"}, copy=False) priors = priors.rename(index={True: "True", False: "False"}, copy=False) - joined.loc[:, priors.index] = joined.loc[:, priors.index].add(np.log(priors)) + joined.loc[:, priors.index] = joined.loc[:, priors.index].add(np.log(priors)) # type: ignore joined.set_index(["task", "worker"], inplace=True) joint_expectation = ( @@ -223,9 +223,11 @@ def fit_predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: Each probability is in he range from 0 to 1, all task probabilities must sum up to 1. """ - return self.fit(data).probas_ + self.fit(data) + assert self.probas_ is not None, "no probas_" + return self.probas_ - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: data (DataFrame): The training dataset of workers' labeling results @@ -234,7 +236,9 @@ def fit_predict(self, data: pd.DataFrame) -> pd.Series: Series: Task labels. The `pandas.Series` data is indexed by `task` so that `labels.loc[task]` is the most likely true label of tasks. """ - return self.fit(data).labels_ + self.fit(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ @attr.s @@ -307,37 +311,37 @@ class OneCoinDawidSkene(DawidSkene): tol: float = attr.ib(default=1e-5) probas_: Optional[pd.DataFrame] = attr.ib(init=False) - priors_: Optional[pd.Series] = named_series_attrib(name="prior") + priors_: Optional["pd.Series[Any]"] = named_series_attrib(name="prior") errors_: Optional[pd.DataFrame] = attr.ib(init=False) - skills_: Optional[pd.Series] = attr.ib(init=False) + skills_: Optional["pd.Series[Any]"] = attr.ib(init=False) loss_history_: List[float] = attr.ib(init=False) @staticmethod - def _assign_skills(row: pd.Series, skills: pd.DataFrame) -> pd.DataFrame: + def _assign_skills(row: "pd.Series[Any]", skills: pd.DataFrame) -> pd.DataFrame: """ Assigns user skills to error matrix row by row. """ num_categories = len(row) for column_name, _ in row.items(): - if column_name == row.name[1]: - row[column_name] = skills[row.name[0]] + if column_name == row.name[1]: # type: ignore + row[column_name] = skills[row.name[0]] # type: ignore else: - row[column_name] = (1 - skills[row.name[0]]) / (num_categories - 1) - return row + row[column_name] = (1 - skills[row.name[0]]) / (num_categories - 1) # type: ignore + return row # type: ignore @staticmethod def _process_skills_to_errors( - data: pd.DataFrame, probas: pd.DataFrame, skills: pd.Series + data: pd.DataFrame, probas: pd.DataFrame, skills: "pd.Series[Any]" ) -> pd.DataFrame: errors = DawidSkene._m_step(data, probas) - errors = errors.apply(OneCoinDawidSkene._assign_skills, args=(skills,), axis=1) + errors = errors.apply(OneCoinDawidSkene._assign_skills, args=(skills,), axis=1) # type: ignore errors.clip(lower=_EPS, upper=1 - _EPS, inplace=True) return errors @staticmethod - def _m_step(data: pd.DataFrame, probas: pd.DataFrame) -> pd.Series: + def _m_step(data: pd.DataFrame, probas: pd.DataFrame) -> "pd.Series[Any]": # type: ignore """Performs M-step of Homogeneous Dawid-Skene algorithm. Calculates a worker skill as their accuracy according to the label probability. diff --git a/crowdkit/aggregation/classification/glad.py b/crowdkit/aggregation/classification/glad.py index badd4f25..d3bf0e25 100644 --- a/crowdkit/aggregation/classification/glad.py +++ b/crowdkit/aggregation/classification/glad.py @@ -100,21 +100,25 @@ class GLAD(BaseClassificationAggregator): n_iter: int = attr.ib(default=100) tol: float = attr.ib(default=1e-5) silent: bool = attr.ib(default=True) - labels_priors: Optional[pd.Series] = attr.ib(default=None) - alphas_priors_mean: Optional[pd.Series] = attr.ib(default=None) - betas_priors_mean: Optional[pd.Series] = attr.ib(default=None) + labels_priors: Optional["pd.Series[Any]"] = attr.ib(default=None) + alphas_priors_mean: Optional["pd.Series[Any]"] = attr.ib(default=None) + betas_priors_mean: Optional["pd.Series[Any]"] = attr.ib(default=None) m_step_max_iter: int = attr.ib(default=25) m_step_tol: float = attr.ib(default=1e-2) # Available after fit # labels_ probas_: Optional[pd.DataFrame] = attr.ib(init=False) - alphas_: pd.Series = named_series_attrib(name="alpha") - betas_: pd.Series = named_series_attrib(name="beta") + alphas_: "pd.Series[Any]" = named_series_attrib(name="alpha") + betas_: "pd.Series[Any]" = named_series_attrib(name="beta") loss_history_: List[float] = attr.ib(init=False) def _join_all( - self, data: pd.DataFrame, alphas: pd.Series, betas: pd.Series, priors: pd.Series + self, + data: pd.DataFrame, + alphas: "pd.Series[Any]", + betas: "pd.Series[Any]", + priors: "pd.Series[Any]", ) -> pd.DataFrame: """Makes a data frame with format `(task, worker, label, variable) -> (alpha, beta, posterior, delta)`""" labels = list(priors.index) @@ -152,7 +156,7 @@ def _e_step(self, data: pd.DataFrame) -> pd.DataFrame: # sum up by workers probas = data.groupby(["task", "variable"]).sum(numeric_only=True)["posterior"] # add priors to every label - probas = probas.add(np.log(cast(pd.Series, self.priors_)), level=1) + probas = probas.add(np.log(cast("pd.Series[Any]", self.priors_)), level=1) # exponentiate and normalize probas = probas.groupby(["task"]).transform(self._softmax) # put posterior in data['posterior'] @@ -166,7 +170,7 @@ def _e_step(self, data: pd.DataFrame) -> pd.DataFrame: def _gradient_Q( self, data: pd.DataFrame - ) -> Tuple[npt.NDArray[Any], npt.NDArray[Any]]: + ) -> Tuple["pd.Series[Any]", "pd.Series[Any]"]: """Computes gradient of loss function""" sigma = scipy.special.expit(data["alpha"] * np.exp(data["beta"])) @@ -178,12 +182,17 @@ def _gradient_Q( * np.exp(data["beta"]) ) dQbeta = data.groupby("task").sum(numeric_only=True)["dQb"] + # gradient of priors on betas + assert self.betas_ is not None, "betas_ is None" + assert self.betas_priors_mean_ is not None, "betas_priors_mean_ is None" dQbeta -= self.betas_ - self.betas_priors_mean_ data["dQa"] = data["posterior"] * (data["delta"] - sigma) * np.exp(data["beta"]) dQalpha = data.groupby("worker").sum(numeric_only=True)["dQa"] # gradient of priors on alphas + assert self.alphas_ is not None, "alphas_ is None" + assert self.alphas_priors_mean_ is not None, "alphas_priors_mean_ is None" dQalpha -= self.alphas_ - self.alphas_priors_mean_ return dQalpha, dQbeta @@ -201,6 +210,10 @@ def _compute_Q(self, data: pd.DataFrame) -> float: Q = data["task_expectation"].sum() # priors on alphas and betas + assert self.alphas_ is not None, "alphas_ is None" + assert self.alphas_priors_mean_ is not None, "alphas_priors_mean_ is None" + assert self.betas_ is not None, "betas_ is None" + assert self.betas_priors_mean_ is not None, "betas_priors_mean_ is None" Q += np.log(scipy.stats.norm.pdf(self.alphas_ - self.alphas_priors_mean_)).sum() Q += np.log(scipy.stats.norm.pdf(self.betas_ - self.betas_priors_mean_)).sum() if np.isnan(Q): @@ -220,11 +233,13 @@ def _optimize_df(self, x: npt.NDArray[Any]) -> npt.NDArray[Any]: dQalpha, dQbeta = self._gradient_Q(self._current_data) minus_grad = np.zeros_like(x) - minus_grad[: len(self.workers_)] = -dQalpha[self.workers_].values - minus_grad[len(self.workers_) :] = -dQbeta[self.tasks_].values + minus_grad[: len(self.workers_)] = -dQalpha[self.workers_].values # type: ignore + minus_grad[len(self.workers_) :] = -dQbeta[self.tasks_].values # type: ignore return minus_grad - def _update_alphas_betas(self, alphas: pd.Series, betas: pd.Series) -> None: + def _update_alphas_betas( + self, alphas: "pd.Series[Any]", betas: "pd.Series[Any]" + ) -> None: self.alphas_ = alphas self.betas_ = betas self._current_data.set_index("worker", inplace=True) @@ -236,16 +251,16 @@ def _update_alphas_betas(self, alphas: pd.Series, betas: pd.Series) -> None: def _get_alphas_betas_by_point( self, x: npt.NDArray[Any] - ) -> Tuple[pd.Series, pd.Series]: - alphas = pd.Series(x[: len(self.workers_)], index=self.workers_, name="alpha") + ) -> Tuple["pd.Series[Any]", "pd.Series[Any]"]: + alphas = pd.Series(x[: len(self.workers_)], index=self.workers_, name="alpha") # type: ignore alphas.index.name = "worker" - betas = pd.Series(x[len(self.workers_) :], index=self.tasks_, name="beta") + betas = pd.Series(x[len(self.workers_) :], index=self.tasks_, name="beta") # type: ignore betas.index.name = "task" return alphas, betas def _m_step(self, data: pd.DataFrame) -> pd.DataFrame: """Optimizes the alpha and beta parameters using the conjugate gradient method.""" - x_0 = np.concatenate([self.alphas_.values, self.betas_.values]) # type: ignore + x_0 = np.concatenate([self.alphas_.values, self.betas_.values]) self._current_data = data res = minimize( self._optimize_f, @@ -260,18 +275,18 @@ def _m_step(self, data: pd.DataFrame) -> pd.DataFrame: return self._current_data def _init(self, data: pd.DataFrame) -> None: - self.alphas_ = pd.Series(1.0, index=pd.unique(data.worker)) - self.betas_ = pd.Series(1.0, index=pd.unique(data.task)) + self.alphas_ = pd.Series(1.0, index=pd.unique(data.worker)) # type: ignore + self.betas_ = pd.Series(1.0, index=pd.unique(data.task)) # type: ignore self.tasks_ = pd.unique(data["task"]) self.workers_ = pd.unique(data["worker"]) self.priors_ = self.labels_priors if self.priors_ is None: self.prior_labels_ = pd.unique(data["label"]) self.priors_ = pd.Series( - 1.0 / len(self.prior_labels_), index=self.prior_labels_ + 1.0 / len(self.prior_labels_), index=self.prior_labels_ # type: ignore ) else: - self.prior_labels_ = self.priors_.index + self.prior_labels_ = self.priors_.index # type: ignore self.alphas_priors_mean_ = self.alphas_priors_mean if self.alphas_priors_mean_ is None: self.alphas_priors_mean_ = pd.Series(1.0, index=self.alphas_.index) @@ -280,7 +295,7 @@ def _init(self, data: pd.DataFrame) -> None: self.betas_priors_mean_ = pd.Series(1.0, index=self.betas_.index) @staticmethod - def _softplus(x: pd.Series, limit: int = 30) -> npt.NDArray[Any]: + def _softplus(x: "pd.Series[Any]", limit: int = 30) -> npt.NDArray[Any]: """log(1 + exp(x)) stable version For x > 30 or x < -30 error is less than 1e-13 @@ -312,6 +327,11 @@ def fit(self, data: pd.DataFrame) -> "GLAD": # Initialization data = data.filter(["task", "worker", "label"]) self._init(data) + + assert self.alphas_ is not None, "no alphas_" + assert self.betas_ is not None, "no betas_" + assert self.priors_ is not None, "no priors_" + data = self._join_all(data, self.alphas_, self.betas_, self.priors_) data = self._e_step(data) Q = self._compute_Q(data) @@ -353,9 +373,11 @@ def fit_predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: Each probability is in he range from 0 to 1, all task probabilities must sum up to 1. """ - return self.fit(data).probas_ + self.fit(data) + assert self.probas_ is not None, "no probas_" + return self.probas_ - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: @@ -367,4 +389,6 @@ def fit_predict(self, data: pd.DataFrame) -> pd.Series: so that `labels.loc[task]` is the most likely true label of tasks. """ - return self.fit(data).labels_ + self.fit(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ diff --git a/crowdkit/aggregation/classification/gold_majority_vote.py b/crowdkit/aggregation/classification/gold_majority_vote.py index 9c33a333..052660d1 100644 --- a/crowdkit/aggregation/classification/gold_majority_vote.py +++ b/crowdkit/aggregation/classification/gold_majority_vote.py @@ -1,6 +1,6 @@ __all__ = ["GoldMajorityVote"] -from typing import Optional +from typing import Any, Optional import attr import pandas as pd @@ -58,7 +58,7 @@ class GoldMajorityVote(BaseClassificationAggregator): """ # Available after fit - skills_: Optional[pd.Series] = named_series_attrib(name="skill") + skills_: Optional["pd.Series[Any]"] = named_series_attrib(name="skill") # Available after predict or predict_proba # labels_ @@ -71,7 +71,7 @@ def _apply(self, data: pd.DataFrame) -> "GoldMajorityVote": self.probas_ = mv.probas_ return self - def fit(self, data: pd.DataFrame, true_labels: pd.Series) -> "GoldMajorityVote": # type: ignore + def fit(self, data: pd.DataFrame, true_labels: "pd.Series[Any]") -> "GoldMajorityVote": # type: ignore """Fits the model to the training data. Args: @@ -89,7 +89,7 @@ def fit(self, data: pd.DataFrame, true_labels: pd.Series) -> "GoldMajorityVote": self.skills_ = get_accuracy(data, true_labels=true_labels, by="worker") return self - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Predicts the true labels of tasks when the model is fitted. Args: @@ -101,7 +101,9 @@ def predict(self, data: pd.DataFrame) -> pd.Series: so that `labels.loc[task]` is the most likely true label of tasks. """ - return self._apply(data).labels_ + self._apply(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ def predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: """Returns probability distributions of labels for each task when the model is fitted. @@ -116,9 +118,11 @@ def predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: Each probability is in he range from 0 to 1, all task probabilities must sum up to 1. """ - return self._apply(data).probas_ + self._apply(data) + assert self.probas_ is not None, "no probas_" + return self.probas_ - def fit_predict(self, data: pd.DataFrame, true_labels: pd.Series) -> pd.Series: # type: ignore + def fit_predict(self, data: pd.DataFrame, true_labels: "pd.Series[Any]") -> "pd.Series[Any]": # type: ignore """Fits the model to the training data and returns the aggregated results. Args: data (DataFrame): The training dataset of workers' labeling results @@ -135,7 +139,7 @@ def fit_predict(self, data: pd.DataFrame, true_labels: pd.Series) -> pd.Series: return self.fit(data, true_labels).predict(data) def fit_predict_proba( - self, data: pd.DataFrame, true_labels: pd.Series + self, data: pd.DataFrame, true_labels: "pd.Series[Any]" ) -> pd.DataFrame: """Fits the model to the training data and returns probability distributions of labels for each task. diff --git a/crowdkit/aggregation/classification/kos.py b/crowdkit/aggregation/classification/kos.py index b5e4e7d6..e0c3d490 100644 --- a/crowdkit/aggregation/classification/kos.py +++ b/crowdkit/aggregation/classification/kos.py @@ -1,5 +1,7 @@ __all__ = ["KOS"] +from typing import Any, cast + import attr import numpy as np import pandas as pd @@ -109,10 +111,12 @@ def fit(self, data: pd.DataFrame) -> "KOS": kos_data["inferred"] = kos_data.label * kos_data.reliabilities inferred_labels = np.sign(kos_data.groupby("task")["inferred"].sum()) back_mapping = {v: k for k, v in mapping.items()} - self.labels_ = inferred_labels.apply(lambda x: back_mapping[x]) + self.labels_ = cast( + "pd.Series[Any]", inferred_labels.apply(lambda x: back_mapping[x]) + ) return self - def fit_predict(self, data: pd.DataFrame) -> pd.DataFrame: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: data (DataFrame): The training dataset of workers' labeling results @@ -123,4 +127,6 @@ def fit_predict(self, data: pd.DataFrame) -> pd.DataFrame: so that `labels.loc[task]` is the most likely true label of tasks. """ - return self.fit(data).labels_ + self.fit(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ diff --git a/crowdkit/aggregation/classification/m_msr.py b/crowdkit/aggregation/classification/m_msr.py index e2ae6185..cc927a5d 100644 --- a/crowdkit/aggregation/classification/m_msr.py +++ b/crowdkit/aggregation/classification/m_msr.py @@ -87,7 +87,7 @@ class MMSR(BaseClassificationAggregator): _tasks_mapping: Dict[Any, int] = attr.ib(factory=dict) # Available after fit - skills_: Optional[pd.Series] = named_series_attrib(name="skill") + skills_: Optional["pd.Series[Any]"] = named_series_attrib(name="skill") # Available after predict or predict_score # labels_ @@ -117,7 +117,7 @@ def fit(self, data: pd.DataFrame) -> "MMSR": self._m_msr() return self - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Predicts the true labels of tasks when the model is fitted. Args: @@ -129,7 +129,9 @@ def predict(self, data: pd.DataFrame) -> pd.Series: so that `labels.loc[task]` is the most likely true label of tasks. """ - return self._apply(data).labels_ + self._apply(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ def predict_score(self, data: pd.DataFrame) -> pd.DataFrame: """Returns the total sum of weights for each label when the model is fitted. @@ -143,9 +145,11 @@ def predict_score(self, data: pd.DataFrame) -> pd.DataFrame: so that `result.loc[task, label]` is a score of `label` for `task`. """ - return self._apply(data).scores_ + self._apply(data) + assert self.scores_ is not None, "no scores_" + return self.scores_ - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: @@ -182,8 +186,8 @@ def _m_msr(self) -> None: X = np.abs(self._covariation_matrix) self.loss_history_ = [] for _ in range(self.n_iter): - v_prev = np.copy(v) # type: ignore - u_prev = np.copy(u) # type: ignore + v_prev = np.copy(v) + u_prev = np.copy(u) for j in range(n): target_v = X[:, j].reshape(-1, 1) target_v = target_v[observed_entries[:, j]] / u[observed_entries[:, j]] @@ -207,12 +211,12 @@ def _m_msr(self) -> None: else: u[i][0] = y.mean() - loss = np.linalg.norm(u @ v.T - u_prev @ v_prev.T, ord="fro") # type: ignore + loss = np.linalg.norm(u @ v.T - u_prev @ v_prev.T, ord="fro") self.loss_history_.append(float(loss)) if loss < self.tol: break - k = np.sqrt(np.linalg.norm(u) / np.linalg.norm(v)) # type: ignore + k = np.sqrt(np.linalg.norm(u) / np.linalg.norm(v)) x_track_1 = u / k x_track_2 = self._sign_determination_valid(self._covariation_matrix, x_track_1) x_track_3 = np.minimum(x_track_2, 1 - 1.0 / np.sqrt(self._n_tasks)) @@ -228,7 +232,7 @@ def _m_msr(self) -> None: self.skills_ = self._get_skills_from_array(skills) - def _get_skills_from_array(self, array: npt.NDArray[Any]) -> pd.Series: + def _get_skills_from_array(self, array: npt.NDArray[Any]) -> "pd.Series[Any]": inverse_workers_mapping = { ind: worker for worker, ind in self._workers_mapping.items() } @@ -245,7 +249,7 @@ def _sign_determination_valid( valid_idx = np.where(np.sum(C, axis=1) != 0)[0] S_valid = S[valid_idx[:, None], valid_idx] k = S_valid.shape[0] - upper_idx = np.triu(np.ones(shape=(k, k))) # type: ignore + upper_idx = np.triu(np.ones(shape=(k, k))) S_valid_upper = S_valid * upper_idx new_node_end_I, new_node_end_J = np.where(S_valid_upper == 1) S_valid[S_valid == 1] = 0 @@ -283,7 +287,7 @@ def _remove_largest_and_smallest_F_value( if np.sum(y > a) < F: y = y[y[:, 0] <= a] else: - y = np.concatenate((y[: m - F], y[m:]), axis=0) # type: ignore + y = np.concatenate((y[: m - F], y[m:]), axis=0) if len(y) == 1 and y[0][0] == 0: y[0][0] = 1 / np.sqrt(n_tasks) return y @@ -310,8 +314,8 @@ def _construnct_covariation_matrix(self, answers: pd.DataFrame) -> None: self._n_common_tasks = ( np.sign(self._observation_matrix) @ np.sign(self._observation_matrix).T ) - self._n_common_tasks -= np.diag(np.diag(self._n_common_tasks)) # type: ignore - self._sparsity = np.min(np.sign(self._n_common_tasks).sum(axis=0)) # type: ignore + self._n_common_tasks -= np.diag(np.diag(self._n_common_tasks)) + self._sparsity = np.min(np.sign(self._n_common_tasks).sum(axis=0)) # Can we rewrite it in matrix operations? self._covariation_matrix = np.zeros(shape=(self._n_workers, self._n_workers)) diff --git a/crowdkit/aggregation/classification/mace.py b/crowdkit/aggregation/classification/mace.py index 07a1d48d..65af71f8 100644 --- a/crowdkit/aggregation/classification/mace.py +++ b/crowdkit/aggregation/classification/mace.py @@ -1,6 +1,6 @@ __all__ = ["MACE"] -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Tuple, Union import attr import numpy as np @@ -23,8 +23,8 @@ def normalize(x: NDArray[np.float64], smoothing: float) -> NDArray[np.float64]: Returns: np.ndarray: Normalized array """ - norm = (x + smoothing).sum(axis=1) # type: ignore - return np.divide( # type: ignore + norm = (x + smoothing).sum(axis=1) + return np.divide( x + smoothing, norm[:, np.newaxis], out=np.zeros_like(x), @@ -44,9 +44,9 @@ def variational_normalize( Returns: np.ndarray: Normalized array """ - norm = (x + hparams).sum(axis=1) # type: ignore + norm = (x + hparams).sum(axis=1) norm = np.exp(digamma(norm)) - return np.divide( # type: ignore + return np.divide( np.exp(digamma(x + hparams)), norm[:, np.newaxis], out=np.zeros_like(x), @@ -238,7 +238,7 @@ def fit(self, data: pd.DataFrame) -> "MACE": return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """ Fits the model to the training data and returns the aggregated results. @@ -250,7 +250,9 @@ def fit_predict(self, data: pd.DataFrame) -> pd.Series: Series: Task labels. The `pandas.Series` data is indexed by `task` so that `labels.loc[task]` is the most likely true label of tasks. """ - return self.fit(data).labels_ + self.fit(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ def fit_predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: """ @@ -265,7 +267,9 @@ def fit_predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: The `pandas.DataFrame` data is indexed by `task` so that `result.loc[task, label]` is the probability that the `task` true label is equal to `label`. Each probability is in he range from 0 to 1, all task probabilities must sum up to 1. """ - return self.fit(data).probas_ + self.fit(data) + assert self.probas_ is not None, "no probas_" + return self.probas_ def _initialize(self, n_workers: int, n_labels: int) -> None: """Initializes the MACE parameters. @@ -299,9 +303,9 @@ def _initialize(self, n_workers: int, n_labels: int) -> None: def _e_step( self, annotation: pd.DataFrame, - task_names: List[Any], - worker_names: List[Any], - label_names: List[Any], + task_names: Union[List[Any], "pd.Index[Any]"], + worker_names: Union[List[Any], "pd.Index[Any]"], + label_names: Union[List[Any], "pd.Index[Any]"], tasks: NDArray[np.int64], workers: NDArray[np.int64], labels: NDArray[np.int64], diff --git a/crowdkit/aggregation/classification/majority_vote.py b/crowdkit/aggregation/classification/majority_vote.py index d5ff1078..04fa76e4 100644 --- a/crowdkit/aggregation/classification/majority_vote.py +++ b/crowdkit/aggregation/classification/majority_vote.py @@ -1,6 +1,6 @@ __all__ = ["MajorityVote"] -from typing import Optional +from typing import Any, Optional import attr import pandas as pd @@ -78,13 +78,15 @@ class MajorityVote(BaseClassificationAggregator): """ # TODO: remove skills_ - skills_: Optional[pd.Series] = named_series_attrib(name="skill") + skills_: Optional["pd.Series[Any]"] = named_series_attrib(name="skill") probas_: Optional[pd.DataFrame] = attr.ib(init=False) # labels_ on_missing_skill: str = attr.ib(default="error") default_skill: Optional[float] = attr.ib(default=None) - def fit(self, data: pd.DataFrame, skills: pd.Series = None) -> "MajorityVote": + def fit( + self, data: pd.DataFrame, skills: Optional["pd.Series[Any]"] = None + ) -> "MajorityVote": """Fits the model to the training data. Args: @@ -115,7 +117,7 @@ def fit(self, data: pd.DataFrame, skills: pd.Series = None) -> "MajorityVote": return self def fit_predict_proba( - self, data: pd.DataFrame, skills: Optional[pd.Series] = None + self, data: pd.DataFrame, skills: Optional["pd.Series[Any]"] = None ) -> pd.DataFrame: """Fits the model to the training data and returns probability distributions of labels for each task. @@ -131,10 +133,13 @@ def fit_predict_proba( The `pandas.DataFrame` data is indexed by `task` so that `result.loc[task, label]` is the probability that the `task` true label is equal to `label`. Each probability is in the range from 0 to 1, all task probabilities must sum up to 1. """ + self.fit(data, skills) + assert self.probas_ is not None, "no probas_" + return self.probas_ - return self.fit(data, skills).probas_ - - def fit_predict(self, data: pd.DataFrame, skills: pd.Series = None) -> pd.Series: + def fit_predict( + self, data: pd.DataFrame, skills: Optional["pd.Series[Any]"] = None + ) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: @@ -149,4 +154,6 @@ def fit_predict(self, data: pd.DataFrame, skills: pd.Series = None) -> pd.Series so that `labels.loc[task]` is the most likely true label of tasks. """ - return self.fit(data, skills).labels_ + self.fit(data, skills) + assert self.labels_ is not None, "no labels_" + return self.labels_ diff --git a/crowdkit/aggregation/classification/wawa.py b/crowdkit/aggregation/classification/wawa.py index ca274e4d..bd4e7df5 100644 --- a/crowdkit/aggregation/classification/wawa.py +++ b/crowdkit/aggregation/classification/wawa.py @@ -1,6 +1,6 @@ __all__ = ["Wawa"] -from typing import Optional +from typing import Any, Optional import attr import pandas as pd @@ -36,7 +36,7 @@ class Wawa(BaseClassificationAggregator): Each probability is in the range from 0 to 1, all task probabilities must sum up to 1. """ - skills_: Optional[pd.Series] = named_series_attrib(name="skill") + skills_: Optional["pd.Series[Any]"] = named_series_attrib(name="skill") probas_: Optional[pd.DataFrame] = attr.ib(init=False) # labels_ @@ -62,10 +62,11 @@ def fit(self, data: pd.DataFrame) -> "Wawa": # TODO: support weights? data = data[["task", "worker", "label"]] mv = MajorityVote().fit(data) + assert mv.labels_ is not None, "no labels_" self.skills_ = get_accuracy(data, true_labels=mv.labels_, by="worker") return self - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Predicts the true labels of tasks when the model is fitted. Args: @@ -77,7 +78,9 @@ def predict(self, data: pd.DataFrame) -> pd.Series: so that `labels.loc[task]` is the most likely true label of tasks. """ - return self._apply(data).labels_ + self._apply(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ def predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: """Returns probability distributions of labels for each task when the model is fitted. @@ -92,9 +95,11 @@ def predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: Each probability is in he range from 0 to 1, all task probabilities must sum up to 1. """ - return self._apply(data).probas_ + self._apply(data) + assert self.probas_ is not None, "no probas_" + return self.probas_ - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: diff --git a/crowdkit/aggregation/classification/zero_based_skill.py b/crowdkit/aggregation/classification/zero_based_skill.py index 1264b056..89bce52c 100644 --- a/crowdkit/aggregation/classification/zero_based_skill.py +++ b/crowdkit/aggregation/classification/zero_based_skill.py @@ -1,6 +1,6 @@ __all__ = ["ZeroBasedSkill"] -from typing import Optional +from typing import Any, Optional import attr import pandas as pd @@ -59,13 +59,13 @@ class ZeroBasedSkill(BaseClassificationAggregator): eps: float = 1e-5 # Available after fit - skills_: Optional[pd.Series] = named_series_attrib(name="skill") + skills_: Optional["pd.Series[Any]"] = named_series_attrib(name="skill") # Available after predict or predict_proba # labels_ probas_: Optional[pd.DataFrame] = attr.ib(init=False) - def _init_skills(self, data: pd.DataFrame) -> pd.Series: + def _init_skills(self, data: pd.DataFrame) -> "pd.Series[Any]": skill_value = 1 / data.label.unique().size + self.eps skill_index = pd.Index(data.worker.unique(), name="worker") return pd.Series(skill_value, index=skill_index) @@ -99,6 +99,7 @@ def fit(self, data: pd.DataFrame) -> "ZeroBasedSkill": if iteration % self.lr_steps_to_reduce == 0: learning_rate *= self.lr_reduce_factor mv.fit(data, skills=skills) + assert mv.labels_ is not None, "no labels_" skills = skills + learning_rate * ( get_accuracy(data, mv.labels_, by="worker") - skills ) @@ -108,7 +109,7 @@ def fit(self, data: pd.DataFrame) -> "ZeroBasedSkill": return self - def predict(self, data: pd.DataFrame) -> pd.Series: + def predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Predicts the true labels of tasks when the model is fitted. Args: @@ -120,7 +121,9 @@ def predict(self, data: pd.DataFrame) -> pd.Series: so that `labels.loc[task]` is the most likely true label of tasks. """ - return self._apply(data).labels_ + self._apply(data) + assert self.labels_ is not None, "no labels_" + return self.labels_ def predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: """Returns probability distributions of labels for each task when the model is fitted. @@ -135,9 +138,11 @@ def predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: Each probability is in the range from 0 to 1, all task probabilities must sum up to 1. """ - return self._apply(data).probas_ + self._apply(data) + assert self.probas_ is not None, "no probas_" + return self.probas_ - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated results. Args: data (DataFrame): The training dataset of workers' labeling results @@ -149,7 +154,7 @@ def fit_predict(self, data: pd.DataFrame) -> pd.Series: return self.fit(data).predict(data) - def fit_predict_proba(self, data: pd.DataFrame) -> pd.Series: + def fit_predict_proba(self, data: pd.DataFrame) -> pd.DataFrame: """Fits the model to the training data and returns the aggregated results. Args: data (DataFrame): The training dataset of workers' labeling results diff --git a/crowdkit/aggregation/embeddings/closest_to_average.py b/crowdkit/aggregation/embeddings/closest_to_average.py index 5ab3ec09..682e1168 100644 --- a/crowdkit/aggregation/embeddings/closest_to_average.py +++ b/crowdkit/aggregation/embeddings/closest_to_average.py @@ -40,8 +40,8 @@ class ClosestToAverage(BaseEmbeddingsAggregator): def fit( self, data: pd.DataFrame, - aggregated_embeddings: Optional[pd.Series] = None, - true_embeddings: pd.Series = None, + aggregated_embeddings: Optional["pd.Series[Any]"] = None, + true_embeddings: Optional["pd.Series[Any]"] = None, ) -> "ClosestToAverage": """Fits the model to the training data. @@ -68,7 +68,7 @@ def fit( group = data.groupby("task") # we don't use .mean() because it does not work with np.array in older pandas versions avg_embeddings = group.embedding.apply(np.sum) / group.worker.count() - avg_embeddings.update(true_embeddings) + avg_embeddings.update(true_embeddings) # type: ignore else: avg_embeddings = aggregated_embeddings @@ -94,7 +94,9 @@ def fit( return self def fit_predict_scores( - self, data: pd.DataFrame, aggregated_embeddings: pd.Series = None + self, + data: pd.DataFrame, + aggregated_embeddings: Optional["pd.Series[Any]"] = None, ) -> pd.DataFrame: """Fits the model to the training data and returns the estimated scores. @@ -113,7 +115,9 @@ def fit_predict_scores( return self.fit(data, aggregated_embeddings).scores_ def fit_predict( - self, data: pd.DataFrame, aggregated_embeddings: Optional[pd.Series] = None + self, + data: pd.DataFrame, + aggregated_embeddings: Optional["pd.Series[Any]"] = None, ) -> pd.DataFrame: """ Fits the model to the training data and returns the aggregated outputs. diff --git a/crowdkit/aggregation/embeddings/hrrasa.py b/crowdkit/aggregation/embeddings/hrrasa.py index 52bfdd60..e0363226 100644 --- a/crowdkit/aggregation/embeddings/hrrasa.py +++ b/crowdkit/aggregation/embeddings/hrrasa.py @@ -113,7 +113,9 @@ class HRRASA(BaseClassificationAggregator): # embeddings_and_outputs_ loss_history_: List[float] = attr.ib(init=False) - def fit(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "HRRASA": + def fit( + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None + ) -> "HRRASA": """Fits the model to the training data. Args: @@ -177,7 +179,7 @@ def fit(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "HRRASA" return self def fit_predict_scores( - self, data: pd.DataFrame, true_embeddings: pd.Series = None + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None ) -> pd.DataFrame: """Fits the model to the training data and returns the estimated scores. @@ -196,8 +198,8 @@ def fit_predict_scores( return self.fit(data, true_embeddings)._apply(data, true_embeddings).scores_ - def fit_predict( - self, data: pd.DataFrame, true_embeddings: Optional[pd.Series] = None + def fit_predict( # type: ignore + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None ) -> pd.DataFrame: """Fits the model to the training data and returns the aggregated outputs. @@ -227,7 +229,9 @@ def _cosine_distance( return float("inf") return float(distance.cosine(embedding, avg_embedding)) - def _apply(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "HRRASA": + def _apply( + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None + ) -> "HRRASA": cta = ClosestToAverage(distance=self._cosine_distance) cta.fit( data, @@ -242,8 +246,8 @@ def _apply(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "HRRA def _aggregate_embeddings( data: pd.DataFrame, weights: pd.DataFrame, - true_embeddings: Optional[pd.Series] = None, - ) -> pd.Series: + true_embeddings: Optional["pd.Series[Any]"] = None, + ) -> "pd.Series[Any]": """Calculates the weighted average of embeddings for each task.""" data = data.join(weights, on=["task", "worker"]) data["weighted_embedding"] = data.weight * data.embedding @@ -269,7 +273,9 @@ def _distance_from_aggregated(self, answers: pd.DataFrame) -> pd.DataFrame: ) # avoid division by zero return with_task_aggregate.reset_index() - def _rank_outputs(self, data: pd.DataFrame, skills: pd.Series) -> pd.DataFrame: + def _rank_outputs( + self, data: pd.DataFrame, skills: "pd.Series[Any]" + ) -> pd.DataFrame: """Returns the ranking score for each record in the `data` data frame.""" if not data.size: @@ -287,7 +293,9 @@ def _rank_outputs(self, data: pd.DataFrame, skills: pd.Series) -> pd.DataFrame: return data[["task", "output", "rank"]] @staticmethod - def _calc_weights(data: pd.DataFrame, worker_skills: pd.Series) -> pd.DataFrame: + def _calc_weights( + data: pd.DataFrame, worker_skills: "pd.Series[Any]" + ) -> pd.DataFrame: """Calculates the weight for every embedding according to its local and global skills.""" data = data.set_index("worker") data["worker_skill"] = worker_skills @@ -297,8 +305,10 @@ def _calc_weights(data: pd.DataFrame, worker_skills: pd.Series) -> pd.DataFrame: @staticmethod def _update_skills( - data: pd.DataFrame, aggregated_embeddings: pd.Series, prior_skills: pd.Series - ) -> pd.Series: + data: pd.DataFrame, + aggregated_embeddings: "pd.Series[Any]", + prior_skills: "pd.Series[Any]", + ) -> "pd.Series[Any]": """Estimates the global reliabilities by aggregated embeddings.""" data = data.join( aggregated_embeddings.rename("aggregated_embedding"), on="task" @@ -323,7 +333,7 @@ def _get_local_skills(self, data: pd.DataFrame) -> pd.DataFrame: index.append((task, worker)) processed_pairs.add((task, worker)) data = data.set_index(["task", "worker"]) - local_skills = pd.Series( + local_skills = pd.Series( # type: ignore local_skills, index=pd.MultiIndex.from_tuples(index, names=["task", "worker"]), dtype=float, @@ -408,4 +418,4 @@ def _fill_single_overlap_tasks_info( ) self.weights_ = pd.concat([self.weights_, pd.DataFrame(weights_to_append)]) if hasattr(self, "ranks_"): - self.ranks_ = self.ranks_.append(pd.DataFrame(ranks_to_append)) + self.ranks_ = self.ranks_.append(pd.DataFrame(ranks_to_append)) # type: ignore diff --git a/crowdkit/aggregation/embeddings/rasa.py b/crowdkit/aggregation/embeddings/rasa.py index 0efe7b95..ce1059a2 100644 --- a/crowdkit/aggregation/embeddings/rasa.py +++ b/crowdkit/aggregation/embeddings/rasa.py @@ -3,7 +3,7 @@ ] from functools import partial -from typing import Any, List +from typing import Any, List, Optional import attr import numpy as np @@ -73,8 +73,10 @@ class RASA(BaseEmbeddingsAggregator): @staticmethod def _aggregate_embeddings( - data: pd.DataFrame, skills: pd.Series, true_embeddings: pd.Series = None - ) -> pd.Series: + data: pd.DataFrame, + skills: "pd.Series[Any]", + true_embeddings: Optional["pd.Series[Any]"] = None, + ) -> "pd.Series[Any]": """Calculates the weighted average of embeddings for each task.""" data = data.join(skills.rename("skill"), on="worker") data["weighted_embedding"] = data.skill * data.embedding @@ -89,8 +91,10 @@ def _aggregate_embeddings( @staticmethod def _update_skills( - data: pd.DataFrame, aggregated_embeddings: pd.Series, prior_skills: pd.Series - ) -> pd.Series: + data: pd.DataFrame, + aggregated_embeddings: "pd.Series[Any]", + prior_skills: "pd.Series[Any]", + ) -> "pd.Series[Any]": """Estimates the global reliabilities by aggregated embeddings.""" data = data.join( aggregated_embeddings.rename("aggregated_embedding"), on="task" @@ -110,7 +114,9 @@ def _cosine_distance( return float("inf") return float(distance.cosine(embedding, avg_embedding)) - def _apply(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "RASA": + def _apply( + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None + ) -> "RASA": cta = ClosestToAverage(distance=self._cosine_distance) cta.fit( data, @@ -121,7 +127,9 @@ def _apply(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "RASA self.embeddings_and_outputs_ = cta.embeddings_and_outputs_ return self - def fit(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "RASA": + def fit( + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None + ) -> "RASA": """Fits the model to the training data. Args: @@ -171,7 +179,7 @@ def fit(self, data: pd.DataFrame, true_embeddings: pd.Series = None) -> "RASA": return self def fit_predict_scores( - self, data: pd.DataFrame, true_embeddings: pd.Series = None + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None ) -> pd.DataFrame: """Fits the model to the training data and returns the estimated scores. @@ -191,7 +199,7 @@ def fit_predict_scores( return self.fit(data, true_embeddings)._apply(data, true_embeddings).scores_ def fit_predict( - self, data: pd.DataFrame, true_embeddings: pd.Series = None + self, data: pd.DataFrame, true_embeddings: Optional["pd.Series[Any]"] = None ) -> pd.DataFrame: """Fits the model to the training data and returns the aggregated outputs. diff --git a/crowdkit/aggregation/image_segmentation/segmentation_em.py b/crowdkit/aggregation/image_segmentation/segmentation_em.py index 1fcc69d0..5e491c4e 100644 --- a/crowdkit/aggregation/image_segmentation/segmentation_em.py +++ b/crowdkit/aggregation/image_segmentation/segmentation_em.py @@ -82,7 +82,7 @@ class SegmentationEM(BaseImageSegmentationAggregator): @staticmethod def _e_step( - segmentations: pd.Series, + segmentations: npt.NDArray[Any], errors: npt.NDArray[Any], priors: Union[float, npt.NDArray[Any]], ) -> npt.NDArray[Any]: @@ -105,7 +105,7 @@ def _e_step( # division by the denominator in the Bayes formula posteriors: npt.NDArray[Any] = np.nan_to_num( np.exp(pos_log_prob) - / (np.exp(pos_log_prob) + np.exp(neg_log_prob)), # type: ignore + / (np.exp(pos_log_prob) + np.exp(neg_log_prob)), nan=0, ) @@ -113,7 +113,7 @@ def _e_step( @staticmethod def _m_step( - segmentations: pd.Series, + segmentations: npt.NDArray[Any], posteriors: npt.NDArray[Any], segmentation_region_size: int, segmentations_sizes: npt.NDArray[Any], @@ -134,7 +134,7 @@ def _m_step( def _evidence_lower_bound( self, - segmentations: pd.Series, + segmentations: npt.NDArray[Any], priors: Union[float, npt.NDArray[Any]], posteriors: npt.NDArray[Any], errors: npt.NDArray[Any], @@ -147,44 +147,54 @@ def _evidence_lower_bound( # we handle log(0) * 0 == 0 case with nan_to_num so warnings are irrelevant here with np.errstate(divide="ignore", invalid="ignore"): log_likelihood_expectation: float = ( - np.nan_to_num( # type: ignore + np.nan_to_num( (np.log(weighted_seg) + np.log(priors)[None, ...]) * posteriors, nan=0, ).sum() - + np.nan_to_num( # type: ignore + + np.nan_to_num( (np.log(1 - weighted_seg) + np.log(1 - priors)[None, ...]) * (1 - posteriors), nan=0, ).sum() ) - return log_likelihood_expectation - float(np.nan_to_num(np.log(posteriors) * posteriors, nan=0).sum()) # type: ignore + return log_likelihood_expectation - float( + np.nan_to_num(np.log(posteriors) * posteriors, nan=0).sum() + ) - def _aggregate_one(self, segmentations: pd.Series) -> npt.NDArray[np.bool_]: + def _aggregate_one(self, segmentations: "pd.Series[Any]") -> npt.NDArray[np.bool_]: """ Performs the Expectation-Maximization algorithm for a single image. """ priors = sum(segmentations) / len(segmentations) - segmentations = np.stack(segmentations.values) - segmentation_region_size = segmentations.any(axis=0).sum() + segmentations_np: npt.NDArray[Any] = np.stack(segmentations.values) # type: ignore + segmentation_region_size = segmentations_np.any(axis=0).sum() if segmentation_region_size == 0: - return np.zeros_like(segmentations[0]) + return np.zeros_like(segmentations_np[0]) - segmentations_sizes = segmentations.sum(axis=(1, 2)) + segmentations_sizes = segmentations_np.sum(axis=(1, 2)) # initialize with errors assuming that ground truth segmentation is majority vote - errors = self._m_step(segmentations, np.round(priors), segmentation_region_size, segmentations_sizes) # type: ignore + errors = self._m_step( + segmentations_np, + np.round(priors), + segmentation_region_size, + segmentations_sizes, + ) loss = -np.inf self.loss_history_ = [] for _ in range(self.n_iter): - posteriors = self._e_step(segmentations, errors, priors) + posteriors = self._e_step(segmentations_np, errors, priors) posteriors[posteriors < self.eps] = 0 errors = self._m_step( - segmentations, posteriors, segmentation_region_size, segmentations_sizes + segmentations_np, + posteriors, + segmentation_region_size, + segmentations_sizes, ) new_loss = self._evidence_lower_bound( - segmentations, priors, posteriors, errors - ) / (len(segmentations) * segmentations[0].size) + segmentations_np, priors, posteriors, errors + ) / (len(segmentations_np) * segmentations_np[0].size) priors = posteriors self.loss_history_.append(new_loss) if new_loss - loss < self.tol: @@ -213,7 +223,7 @@ def fit(self, data: pd.DataFrame) -> "SegmentationEM": ) return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated segmentations. Args: diff --git a/crowdkit/aggregation/image_segmentation/segmentation_majority_vote.py b/crowdkit/aggregation/image_segmentation/segmentation_majority_vote.py index 416a6545..3d1d2445 100644 --- a/crowdkit/aggregation/image_segmentation/segmentation_majority_vote.py +++ b/crowdkit/aggregation/image_segmentation/segmentation_majority_vote.py @@ -1,6 +1,6 @@ __all__ = ["SegmentationMajorityVote"] -from typing import Optional +from typing import Any, Optional import attr import numpy as np @@ -68,7 +68,7 @@ class SegmentationMajorityVote(BaseImageSegmentationAggregator): default_skill: Optional[float] = attr.ib(default=None) def fit( - self, data: pd.DataFrame, skills: pd.Series = None + self, data: pd.DataFrame, skills: Optional["pd.Series[Any]"] = None ) -> "SegmentationMajorityVote": """ Fits the model to the training data. @@ -102,8 +102,8 @@ def fit( return self def fit_predict( - self, data: pd.DataFrame, skills: Optional[pd.Series] = None - ) -> pd.Series: + self, data: pd.DataFrame, skills: Optional["pd.Series[Any]"] = None + ) -> "pd.Series[Any]": """ Fits the model to the training data and returns the aggregated segmentations. diff --git a/crowdkit/aggregation/image_segmentation/segmentation_rasa.py b/crowdkit/aggregation/image_segmentation/segmentation_rasa.py index 5fa4fda1..e45077ae 100644 --- a/crowdkit/aggregation/image_segmentation/segmentation_rasa.py +++ b/crowdkit/aggregation/image_segmentation/segmentation_rasa.py @@ -68,7 +68,7 @@ class SegmentationRASA(BaseImageSegmentationAggregator): @staticmethod def _segmentation_weighted( - segmentations: pd.Series, weights: npt.NDArray[Any] + segmentations: "pd.Series[Any]", weights: npt.NDArray[Any] ) -> npt.NDArray[Any]: """ Performs the weighted Majority Vote algorithm. @@ -81,36 +81,36 @@ def _segmentation_weighted( @staticmethod def _calculate_weights( - segmentations: pd.Series, mv: npt.NDArray[Any] + segmentations: "pd.Series[Any]", mv: npt.NDArray[Any] ) -> npt.NDArray[Any]: """ Calculates weights for each worker from the current Majority Vote estimation. """ intersection = (segmentations & mv).astype(float) union = (segmentations | mv).astype(float) - distances = 1 - intersection.sum(axis=(1, 2)) / union.sum(axis=(1, 2)) + distances = 1 - intersection.sum(axis=(1, 2)) / union.sum(axis=(1, 2)) # type: ignore # add a small bias for more # numerical stability and correctness of transform. weights = np.log(1 / (distances + _EPS) + 1) return cast(npt.NDArray[Any], weights / np.sum(weights)) - def _aggregate_one(self, segmentations: pd.Series) -> npt.NDArray[Any]: + def _aggregate_one(self, segmentations: "pd.Series[Any]") -> npt.NDArray[Any]: """ Performs Segmentation RASA algorithm for a single image. """ size = len(segmentations) - segmentations = np.stack(segmentations.values) + segmentations_np = np.stack(segmentations.values) # type: ignore weights = np.full(size, 1 / size) - mv = self._segmentation_weighted(segmentations, weights) + mv = self._segmentation_weighted(segmentations_np, weights) last_aggregated = None self.loss_history_ = [] for _ in range(self.n_iter): - weighted = self._segmentation_weighted(segmentations, weights) + weighted = self._segmentation_weighted(segmentations_np, weights) mv = weighted >= 0.5 - weights = self._calculate_weights(segmentations, mv) + weights = self._calculate_weights(segmentations_np, mv) if last_aggregated is not None: delta = weighted - last_aggregated @@ -146,7 +146,7 @@ def fit(self, data: pd.DataFrame) -> "SegmentationRASA": return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fits the model to the training data and returns the aggregated segmentations. Args: diff --git a/crowdkit/aggregation/multilabel/binary_relevance.py b/crowdkit/aggregation/multilabel/binary_relevance.py index 04a88d58..741118b1 100644 --- a/crowdkit/aggregation/multilabel/binary_relevance.py +++ b/crowdkit/aggregation/multilabel/binary_relevance.py @@ -1,6 +1,6 @@ __all__ = ["BinaryRelevance"] -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Union, cast import attr import pandas as pd @@ -99,9 +99,9 @@ def fit(self, data: pd.DataFrame) -> "BinaryRelevance": if label_aggregator.labels_ is not None: # for mypy correct work for task, label_value in label_aggregator.labels_.items(): if task not in task_to_labels: - task_to_labels[task] = list() + task_to_labels[cast(Union[str, float], task)] = list() if label_value: - task_to_labels[task].append(label) + task_to_labels[cast(Union[str, float], task)].append(label) if not task_to_labels: self.labels_ = pd.Series(task_to_labels, dtype=float) else: @@ -110,7 +110,7 @@ def fit(self, data: pd.DataFrame) -> "BinaryRelevance": self.labels_.index.name = "task" return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fit the model and return aggregated results. Args: @@ -122,4 +122,6 @@ def fit_predict(self, data: pd.DataFrame) -> pd.Series: A pandas.Series indexed by `task` such that `labels.loc[task]` is a list with the task's aggregated labels. """ - return self.fit(data).labels_ + self.fit(data) + assert self.labels_ is not None, "no labels_ produced" + return self.labels_ diff --git a/crowdkit/aggregation/pairwise/bradley_terry.py b/crowdkit/aggregation/pairwise/bradley_terry.py index a668b2b3..364a6856 100644 --- a/crowdkit/aggregation/pairwise/bradley_terry.py +++ b/crowdkit/aggregation/pairwise/bradley_terry.py @@ -1,6 +1,6 @@ __all__ = ["BradleyTerry"] -from typing import List, Tuple +from typing import Any, List, Tuple import attr import numpy as np @@ -104,7 +104,7 @@ def fit(self, data: pd.DataFrame) -> "BradleyTerry": self.loss_history_ = [] for _ in range(self.n_iter): - P: npt.NDArray[np.float_] = np.broadcast_to(p, M.shape) # type: ignore + P: npt.NDArray[np.float_] = np.broadcast_to(p, M.shape) Z[active] = T[active] / (P[active] + P.T[active]) @@ -125,7 +125,7 @@ def fit(self, data: pd.DataFrame) -> "BradleyTerry": return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Args: data (DataFrame): Workers' pairwise comparison results. A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'. @@ -143,7 +143,7 @@ def _build_win_matrix( ) -> Tuple[npt.NDArray[np.int_], npt.NDArray[np.int_]]: data = data[["left", "right", "label"]] - unique_labels, np_data = np.unique(data.values, return_inverse=True) # type: ignore + unique_labels, np_data = np.unique(data.values, return_inverse=True) np_data = np_data.reshape(data.shape) left_wins = np_data[np_data[:, 0] == np_data[:, 2], :2].T diff --git a/crowdkit/aggregation/pairwise/noisy_bt.py b/crowdkit/aggregation/pairwise/noisy_bt.py index 6794ab47..81afc95d 100644 --- a/crowdkit/aggregation/pairwise/noisy_bt.py +++ b/crowdkit/aggregation/pairwise/noisy_bt.py @@ -62,8 +62,8 @@ class NoisyBradleyTerry(BasePairwiseAggregator): tol: float = attr.ib(default=1e-5) regularization_ratio: float = attr.ib(default=1e-5) random_state: int = attr.ib(default=0) - skills_: pd.Series = named_series_attrib(name="skill") - biases_: pd.Series = named_series_attrib(name="bias") + skills_: "pd.Series[Any]" = named_series_attrib(name="skill") + biases_: "pd.Series[Any]" = named_series_attrib(name="bias") # scores_ @@ -78,7 +78,7 @@ def fit(self, data: pd.DataFrame) -> "NoisyBradleyTerry": """ unique_labels, np_data = factorize(data[["left", "right", "label"]].values) - unique_workers, np_workers = factorize(data.worker.values) + unique_workers, np_workers = factorize(data.worker.values) # type: ignore np.random.seed(self.random_state) x_0 = np.random.rand(1 + unique_labels.size + 2 * unique_workers.size) np_data += 1 @@ -113,7 +113,7 @@ def fit(self, data: pd.DataFrame) -> "NoisyBradleyTerry": return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Args: data (DataFrame): Workers' pairwise comparison results. A pandas.DataFrame containing `worker`, `left`, `right`, and `label` columns'. diff --git a/crowdkit/aggregation/texts/rover.py b/crowdkit/aggregation/texts/rover.py index 653ed8da..19b484fd 100644 --- a/crowdkit/aggregation/texts/rover.py +++ b/crowdkit/aggregation/texts/rover.py @@ -4,7 +4,7 @@ from copy import deepcopy from enum import Enum, unique -from typing import Callable, Dict, List, Optional, Tuple, cast +from typing import Any, Callable, Dict, List, Optional, Tuple, cast import attr import numpy as np @@ -100,7 +100,7 @@ def fit(self, data: pd.DataFrame) -> "ROVER": return self - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Fit the model and return the aggregated texts. Args: @@ -227,7 +227,7 @@ def _align( ) ) - distance[i, j], memoization[i][j] = min(options, key=lambda t: t[0]) # type: ignore + distance[i, j], memoization[i][j] = min(options, key=lambda t: t[0]) alignment = [] i = len(hyp_edges) diff --git a/crowdkit/aggregation/texts/text_hrrasa.py b/crowdkit/aggregation/texts/text_hrrasa.py index ff272932..5984e3b4 100644 --- a/crowdkit/aggregation/texts/text_hrrasa.py +++ b/crowdkit/aggregation/texts/text_hrrasa.py @@ -69,7 +69,7 @@ def __getattr__(self, name: str) -> Any: return getattr(self._hrrasa, name) def fit_predict_scores( - self, data: pd.DataFrame, true_objects: pd.Series = None + self, data: pd.DataFrame, true_objects: "pd.Series[Any]" ) -> pd.DataFrame: """Fit the model and return scores. @@ -90,9 +90,9 @@ def fit_predict_scores( self._encode_data(data), self._encode_true_objects(true_objects) ) - def fit_predict( - self, data: pd.DataFrame, true_objects: pd.Series = None - ) -> pd.Series: + def fit_predict( # type: ignore + self, data: pd.DataFrame, true_objects: "pd.Series[Any]" + ) -> "pd.Series[Any]": """Fit the model and return aggregated texts. Args: @@ -112,7 +112,7 @@ def fit_predict( self._encode_data(data), self._encode_true_objects(true_objects) ) self.texts_ = ( - hrrasa_results.reset_index()[["task", "output"]] + hrrasa_results.reset_index()[["task", "output"]] # type: ignore .rename(columns={"output": "text"}) .set_index("task") ) @@ -120,8 +120,8 @@ def fit_predict( def _encode_data(self, data: pd.DataFrame) -> pd.DataFrame: data = data[["task", "worker", "text"]].rename(columns={"text": "output"}) - data["embedding"] = data.output.apply(self.encoder) + data["embedding"] = data.output.apply(self.encoder) # type: ignore return data - def _encode_true_objects(self, true_objects: pd.Series) -> pd.Series: - return true_objects and true_objects.apply(self.encoder) + def _encode_true_objects(self, true_objects: "pd.Series[Any]") -> "pd.Series[Any]": + return true_objects and true_objects.apply(self.encoder) # type: ignore diff --git a/crowdkit/aggregation/texts/text_rasa.py b/crowdkit/aggregation/texts/text_rasa.py index 760ee812..2fedc16d 100644 --- a/crowdkit/aggregation/texts/text_rasa.py +++ b/crowdkit/aggregation/texts/text_rasa.py @@ -1,6 +1,6 @@ __all__ = ["TextRASA"] -from typing import Any, Callable, List, Optional +from typing import Any, Callable, List import numpy.typing as npt import pandas as pd @@ -52,8 +52,8 @@ def __init__( def __getattr__(self, name: str) -> Any: return getattr(self._rasa, name) - def fit( - self, data: pd.DataFrame, true_objects: Optional[pd.Series] = None + def fit( # type: ignore + self, data: pd.DataFrame, true_objects: "pd.Series[Any]" ) -> "TextRASA": """Fit the model. Args: @@ -71,7 +71,7 @@ def fit( return self def fit_predict_scores( - self, data: pd.DataFrame, true_objects: Optional[pd.Series] = None + self, data: pd.DataFrame, true_objects: "pd.Series[Any]" ) -> pd.DataFrame: """Fit the model and return scores. @@ -92,9 +92,9 @@ def fit_predict_scores( self._encode_data(data), self._encode_true_objects(true_objects) ) - def fit_predict( - self, data: pd.DataFrame, true_objects: Optional[pd.Series] = None - ) -> pd.Series: + def fit_predict( # type: ignore + self, data: pd.DataFrame, true_objects: "pd.Series[Any]" + ) -> "pd.Series[Any]": """Fit the model and return aggregated texts. Args: @@ -114,7 +114,7 @@ def fit_predict( self._encode_data(data), self._encode_true_objects(true_objects) ) self.texts_ = ( - rasa_results.reset_index()[["task", "output"]] + rasa_results.reset_index()[["task", "output"]] # type: ignore .rename(columns={"output": "text"}) .set_index("task") ) @@ -122,8 +122,8 @@ def fit_predict( def _encode_data(self, data: pd.DataFrame) -> pd.DataFrame: data = data[["task", "worker", "text"]].rename(columns={"text": "output"}) - data["embedding"] = data.output.apply(self.encoder) + data["embedding"] = data.output.apply(self.encoder) # type: ignore return data - def _encode_true_objects(self, true_objects: pd.Series) -> pd.Series: - return true_objects and true_objects.apply(self.encoder) + def _encode_true_objects(self, true_objects: "pd.Series[Any]") -> "pd.Series[Any]": + return true_objects and true_objects.apply(self.encoder) # type: ignore diff --git a/crowdkit/aggregation/utils.py b/crowdkit/aggregation/utils.py index bede4c0c..648e9486 100644 --- a/crowdkit/aggregation/utils.py +++ b/crowdkit/aggregation/utils.py @@ -52,18 +52,18 @@ def _argmax_random_ties(array: npt.NDArray[Any]) -> int: return int(np.random.choice(np.flatnonzero(array == array.max()))) -def evaluate_in(row: pd.Series) -> int: +def evaluate_in(row: "pd.Series[Any]") -> int: return int(row["label_pred"] in row["label_true"]) -def evaluate_equal(row: pd.Series) -> int: +def evaluate_equal(row: "pd.Series[Any]") -> int: return int(row["label_pred"] == row["label_true"]) def evaluate( df_true: pd.DataFrame, df_pred: pd.DataFrame, - evaluate_func: Callable[[pd.Series], int] = evaluate_in, + evaluate_func: Callable[["pd.Series[Any]"], int] = evaluate_in, ) -> Union[str, float]: df = df_true.merge(df_pred, on="task", suffixes=("_true", "_pred")) @@ -76,11 +76,11 @@ def evaluate( def factorize(data: npt.NDArray[Any]) -> Tuple[npt.NDArray[Any], npt.NDArray[Any]]: - unique_values, coded = np.unique(data, return_inverse=True) # type: ignore + unique_values, coded = np.unique(data, return_inverse=True) return unique_values, coded.reshape(data.shape) -def get_most_probable_labels(proba: pd.DataFrame) -> pd.Series: +def get_most_probable_labels(proba: pd.DataFrame) -> "pd.Series[Any]": """Returns most probable labels Args: @@ -113,7 +113,9 @@ def normalize_rows(scores: pd.DataFrame) -> pd.DataFrame: def manage_data( - data: pd.DataFrame, weights: Optional[pd.Series] = None, skills: pd.Series = None + data: pd.DataFrame, + weights: Optional["pd.Series[Any]"] = None, + skills: Optional["pd.Series[Any]"] = None, ) -> pd.DataFrame: """ Args: @@ -138,8 +140,8 @@ def manage_data( def get_accuracy( - data: pd.DataFrame, true_labels: pd.Series, by: Optional[str] = None -) -> pd.Series: + data: pd.DataFrame, true_labels: "pd.Series[Any]", by: Optional[str] = None +) -> "pd.Series[Any]": """ Args: data (DataFrame): Workers' labeling results. @@ -173,15 +175,16 @@ def get_accuracy( ) if by is not None: - data = data.groupby(by) - - return data.score.sum() / data.weight.sum() + group = data.groupby(by) + return group.score.sum() / group.weight.sum() + else: + return data.score.sum() / data.weight.sum() # type: ignore -def named_series_attrib(name: str) -> pd.Series: +def named_series_attrib(name: str) -> "pd.Series[Any]": """Attrs attribute with converter and setter which preserves specified attribute name""" - def converter(series: pd.Series) -> pd.Series: + def converter(series: "pd.Series[Any]") -> "pd.Series[Any]": series.name = name return series @@ -190,7 +193,7 @@ def converter(series: pd.Series) -> pd.Series: def add_skills_to_data( data: pd.DataFrame, - skills: pd.Series, + skills: "pd.Series[Any]", on_missing_skill: str, default_skill: Optional[float], ) -> pd.DataFrame: diff --git a/crowdkit/datasets/_loaders.py b/crowdkit/datasets/_loaders.py index 9f37c3d0..49554944 100644 --- a/crowdkit/datasets/_loaders.py +++ b/crowdkit/datasets/_loaders.py @@ -1,5 +1,5 @@ from os.path import exists, join -from typing import Callable, Dict, Optional, Tuple, Union +from typing import Any, Callable, Dict, Optional, Tuple, Union import numpy as np import pandas as pd @@ -20,7 +20,7 @@ def _load_dataset( return full_data_path -def _load_ms_coco_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: +def _load_ms_coco_dataframes(data_path: str) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: labels = np.load(join(data_path, "crowd_labels.npz")) rows = [] for key in labels.files: @@ -39,12 +39,14 @@ def _load_ms_coco_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: return labels, true_labels -def load_relevance2(data_dir: Optional[str] = None) -> Tuple[pd.DataFrame, pd.Series]: +def load_relevance2( + data_dir: Optional[str] = None, +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "relevance-2" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/relevance-2.zip" checksum_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/relevance-2.md5" - def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: + def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: labels = pd.read_csv(join(data_path, "crowd_labels.csv")).rename( columns={"performer": "worker"} ) @@ -61,12 +63,14 @@ def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: return load_dataframes(full_data_path) -def load_relevance5(data_dir: Optional[str] = None) -> Tuple[pd.DataFrame, pd.Series]: +def load_relevance5( + data_dir: Optional[str] = None, +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "relevance-5" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/relevance-5.zip" checksum_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/relevance-5.md5" - def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: + def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: labels = pd.read_csv(join(data_path, "crowd_labels.csv")).rename( columns={"performer": "worker"} ) @@ -83,7 +87,9 @@ def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: return load_dataframes(full_data_path) -def load_mscoco(data_dir: Optional[str] = None) -> Tuple[pd.DataFrame, pd.Series]: +def load_mscoco( + data_dir: Optional[str] = None, +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "mscoco" data_url = "https://huggingface.co/datasets/toloka/crowdkit-datasets/resolve/af2c00549cc026eaea80c18c54a686d98a58fd6e/mscoco.zip" checksum_url = "https://huggingface.co/datasets/toloka/crowdkit-datasets/resolve/79d5468d12d233153c0fdcee0dd61b98980ff7a4/mscoco.md5" @@ -93,7 +99,9 @@ def load_mscoco(data_dir: Optional[str] = None) -> Tuple[pd.DataFrame, pd.Series return _load_ms_coco_dataframes(full_data_path) -def load_mscoco_small(data_dir: Optional[str] = None) -> Tuple[pd.DataFrame, pd.Series]: +def load_mscoco_small( + data_dir: Optional[str] = None, +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "mscoco_small" data_url = "https://huggingface.co/datasets/toloka/crowdkit-datasets/resolve/0e0cac7f51869d4b20d83842c578ca3d013af7b7/mscoco_small.zip" checksum_url = "https://huggingface.co/datasets/toloka/crowdkit-datasets/resolve/bb48658b78db95845ff2a8d3db3e533a493ab819/mscoco_small.md5" @@ -103,7 +111,9 @@ def load_mscoco_small(data_dir: Optional[str] = None) -> Tuple[pd.DataFrame, pd. return _load_ms_coco_dataframes(full_data_path) -def load_crowdspeech_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: +def load_crowdspeech_dataframes( + data_path: str, +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: labels = pd.read_csv(join(data_path, "crowd_labels.csv")).rename( columns={"output": "text", "performer": "worker"} ) @@ -118,7 +128,7 @@ def load_crowdspeech_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series def load_crowdspeech_dev_clean( data_dir: Optional[str] = None, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "crowdspeech-dev-clean" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/crowdspeech-dev-clean.zip" checksum_url = ( @@ -131,7 +141,7 @@ def load_crowdspeech_dev_clean( def load_crowdspeech_dev_other( data_dir: Optional[str] = None, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "crowdspeech-dev-other" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/crowdspeech-dev-other.zip" checksum_url = ( @@ -144,7 +154,7 @@ def load_crowdspeech_dev_other( def load_crowdspeech_test_clean( data_dir: Optional[str] = None, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "crowdspeech-test-clean" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/crowdspeech-test-clean.zip" checksum_url = ( @@ -157,7 +167,7 @@ def load_crowdspeech_test_clean( def load_crowdspeech_test_other( data_dir: Optional[str] = None, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "crowdspeech-test-other" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/crowdspeech-test-other.zip" checksum_url = ( @@ -170,12 +180,12 @@ def load_crowdspeech_test_other( def load_imdb_wiki_sbs( data_dir: Optional[str] = None, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "imdb-wiki-sbs" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/imdb-wiki-sbs.zip" checksum_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/imdb-wiki-sbs.md5" - def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: + def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: labels = pd.read_csv(join(data_path, "crowd_labels.csv")).rename( columns={"performer": "worker"} ) @@ -197,12 +207,12 @@ def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: def load_nist_trec_relevance( data_dir: Optional[str] = None, -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: data_name = "nist-trec-relevance" data_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/relevance.zip" checksum_url = "https://tlk.s3.yandex.net/dataset/crowd-kit/relevance.md5" - def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: + def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: labels = pd.read_csv(join(data_path, "crowd_labels.csv")).rename( columns={"performer": "worker"} ) @@ -221,7 +231,10 @@ def load_dataframes(data_path: str) -> Tuple[pd.DataFrame, pd.Series]: DATA_LOADERS: Dict[ str, - Dict[str, Union[str, Callable[[Optional[str]], Tuple[pd.DataFrame, pd.Series]]]], + Dict[ + str, + Union[str, Callable[[Optional[str]], Tuple[pd.DataFrame, "pd.Series[Any]"]]], + ], ] = { "relevance-2": { "loader": load_relevance2, diff --git a/crowdkit/datasets/load_dataset.py b/crowdkit/datasets/load_dataset.py index 3bf8400c..5a089891 100644 --- a/crowdkit/datasets/load_dataset.py +++ b/crowdkit/datasets/load_dataset.py @@ -3,7 +3,7 @@ "get_datasets_list", ] -from typing import Callable, Dict, List, Optional, Tuple, cast +from typing import Any, Callable, Dict, List, Optional, Tuple, cast import pandas as pd @@ -12,7 +12,7 @@ def load_dataset( dataset: str, data_dir: Optional[str] = None -) -> Tuple[pd.DataFrame, pd.Series]: +) -> Tuple[pd.DataFrame, "pd.Series[Any]"]: """Downloads a dataset from remote and loads it into Pandas objects. If a dataset is already downloaded, loads it from cache. @@ -29,7 +29,7 @@ def load_dataset( raise ValueError("This dataset does not exist") return cast( - Dict[str, Callable[[Optional[str]], Tuple[pd.DataFrame, pd.Series]]], + Dict[str, Callable[[Optional[str]], Tuple[pd.DataFrame, "pd.Series[Any]"]]], DATA_LOADERS[dataset], )["loader"](data_dir) diff --git a/crowdkit/learning/conal.py b/crowdkit/learning/conal.py index cbb56c31..0f499b32 100644 --- a/crowdkit/learning/conal.py +++ b/crowdkit/learning/conal.py @@ -36,7 +36,7 @@ def _identity_init(shape: Union[Tuple[int, int], Tuple[int, int, int]]) -> torch return torch.Tensor(out) -class CoNAL(nn.Module): # type: ignore +class CoNAL(nn.Module): """ Common Noise Adaptation Layers (CoNAL). This method introduces two types of confusions: worker-specific and global. Each is parameterized by a confusion matrix. The ratio of the two confusions is determined by the diff --git a/crowdkit/learning/crowd_layer.py b/crowdkit/learning/crowd_layer.py index b4a14240..fefc8493 100644 --- a/crowdkit/learning/crowd_layer.py +++ b/crowdkit/learning/crowd_layer.py @@ -2,7 +2,7 @@ "CrowdLayer", ] -from typing import Optional +from typing import Optional, cast import torch from torch import nn @@ -25,9 +25,10 @@ def crowd_layer_mw( Returns: torch.Tensor: Tensor of shape (batch_size, input_dim) """ - return torch.einsum( - "lij,ljk->lik", weight[workers], outputs.unsqueeze(-1) - ).squeeze() + return cast( + torch.Tensor, + torch.einsum("lij,ljk->lik", weight[workers], outputs.unsqueeze(-1)).squeeze(), + ) def crowd_layer_vw( @@ -44,7 +45,7 @@ def crowd_layer_vw( Returns: torch.Tensor: Tensor of shape (batch_size, input_dim) """ - return weight[workers] * outputs + return cast(torch.Tensor, weight[workers] * outputs) def crowd_layer_vb( @@ -61,7 +62,7 @@ def crowd_layer_vb( Returns: torch.Tensor: Tensor of shape (batch_size, input_dim) """ - return outputs + weight[workers] + return cast(torch.Tensor, outputs + weight[workers]) def crowd_layer_vw_b( @@ -82,10 +83,10 @@ def crowd_layer_vw_b( Returns: torch.Tensor: Tensor of shape (batch_size, input_dim) """ - return scale[workers] * outputs + bias[workers] + return cast(torch.Tensor, scale[workers] * outputs + bias[workers]) -class CrowdLayer(nn.Module): # type: ignore +class CrowdLayer(nn.Module): """ CrowdLayer module for classification tasks. diff --git a/crowdkit/learning/text_summarization.py b/crowdkit/learning/text_summarization.py index 4a660034..d6fe9477 100644 --- a/crowdkit/learning/text_summarization.py +++ b/crowdkit/learning/text_summarization.py @@ -3,12 +3,12 @@ ] import itertools -from typing import Optional, cast +from typing import Any, Iterable, Optional, Union, cast import attr import numpy as np import pandas as pd -from transformers import PreTrainedModel, PreTrainedTokenizer # type: ignore +from transformers import PreTrainedModel, PreTrainedTokenizer from crowdkit.aggregation.base import BaseTextsAggregator @@ -75,7 +75,7 @@ class TextSummarization(BaseTextsAggregator): # texts_ - def fit_predict(self, data: pd.DataFrame) -> pd.Series: + def fit_predict(self, data: pd.DataFrame) -> "pd.Series[Any]": """Run the aggregation and return the aggregated texts. Args: data (DataFrame): Workers' text outputs. @@ -88,11 +88,11 @@ def fit_predict(self, data: pd.DataFrame) -> pd.Series: data = data[["task", "worker", "text"]] - self.model = self.model.to(self.device) # type: ignore + self.model = self.model.to(self.device) self.texts_ = data.groupby("task")["text"].apply(self._aggregate_one) return self.texts_ - def _aggregate_one(self, outputs: pd.Series) -> str: + def _aggregate_one(self, outputs: "pd.Series[Any]") -> str: if not self.n_permutations: return self._generate_output(outputs) @@ -116,10 +116,12 @@ def _aggregate_one(self, outputs: pd.Series) -> str: return cast(str, data.text.mode()) - def _generate_output(self, permutation: pd.Series) -> str: + def _generate_output( + self, permutation: Union[Iterable[Any], "pd.Series[Any]"] + ) -> str: input_text = self.concat_token.join(permutation) input_ids = self.tokenizer.encode(input_text, return_tensors="pt").to( self.device ) - outputs = self.model.generate(input_ids, num_beams=self.num_beams) # type: ignore + outputs = self.model.generate(input_ids, num_beams=self.num_beams) return cast(str, self.tokenizer.decode(outputs[0], skip_special_tokens=True)) diff --git a/crowdkit/learning/utils.py b/crowdkit/learning/utils.py index e23375f2..646ef27d 100644 --- a/crowdkit/learning/utils.py +++ b/crowdkit/learning/utils.py @@ -3,7 +3,7 @@ "batch_identity_matrices", ] -from typing import Optional +from typing import Optional, cast import torch import torch.nn.functional as F @@ -50,4 +50,4 @@ def batch_identity_matrices( """ x = torch.eye(dim_size, dtype=dtype, device=device) x = x.reshape((1, dim_size, dim_size)) - return x.repeat(batch_size, 1, 1) + return cast(torch.Tensor, x.repeat(batch_size, 1, 1)) diff --git a/crowdkit/metrics/data/_classification.py b/crowdkit/metrics/data/_classification.py index e6404e3f..69ab0a41 100644 --- a/crowdkit/metrics/data/_classification.py +++ b/crowdkit/metrics/data/_classification.py @@ -4,7 +4,7 @@ "alpha_krippendorff", ] -from typing import Any, Callable, Hashable, List, Optional, Tuple, Union +from typing import Any, Callable, Hashable, List, Optional, Tuple, Union, cast import numpy as np import pandas as pd @@ -24,7 +24,7 @@ def _check_answers(answers: pd.DataFrame) -> None: assert "label" in answers, 'There is no "label" column in answers' -def _label_probability(row: pd.Series, label: Any, n_labels: int) -> float: +def _label_probability(row: "pd.Series[Any]", label: Any, n_labels: int) -> float: """Numerator in the Bayes formula""" if row["label"] == label: return float(row["skill"]) @@ -32,7 +32,7 @@ def _label_probability(row: pd.Series, label: Any, n_labels: int) -> float: return (1.0 - float(row["skill"])) / (n_labels - 1) -def _task_consistency(row: pd.Series) -> float: +def _task_consistency(row: "pd.Series[Any]") -> float: """Posterior probability for a single task""" if row["denominator"] != 0: return float(row[row["aggregated_label"]]) / float(row["denominator"]) @@ -42,13 +42,13 @@ def _task_consistency(row: pd.Series) -> float: def consistency( answers: pd.DataFrame, - workers_skills: Optional[pd.Series] = None, + workers_skills: Optional["pd.Series[Any]"] = None, aggregator: BaseClassificationAggregator = MajorityVote(), by_task: bool = False, -) -> Union[float, pd.Series]: +) -> Union[float, "pd.Series[Any]"]: """ Consistency metric: posterior probability of aggregated label given workers skills - calculated using standard Dawid-Skene model. + calculated using the standard Dawid-Skene model. Args: answers (pandas.DataFrame): A data frame containing `task`, `worker` and `label` columns. @@ -64,7 +64,7 @@ def consistency( aggregated = aggregator.fit_predict(answers) if workers_skills is None: if hasattr(aggregator, "skills_"): - workers_skills = aggregator.skills_ # type: ignore + workers_skills = aggregator.skills_ else: raise AssertionError( "This aggregator is not supported. Please, provide workers skills." @@ -94,7 +94,7 @@ def consistency( return consistencies.mean() -def _task_uncertainty(row: pd.Series, labels: List[Hashable]) -> float: +def _task_uncertainty(row: "pd.Series[Any]", labels: List[str]) -> float: if row["denominator"] == 0: row[labels] = 1 / len(labels) else: @@ -106,11 +106,11 @@ def _task_uncertainty(row: pd.Series, labels: List[Hashable]) -> float: def uncertainty( answers: pd.DataFrame, - workers_skills: Optional[pd.Series] = None, + workers_skills: Optional["pd.Series[Any]"] = None, aggregator: Optional[BaseClassificationAggregator] = None, compute_by: str = "task", aggregate: bool = True, -) -> Union[float, pd.Series]: +) -> Union[float, "pd.Series[Any]"]: r"""Label uncertainty metric: entropy of labels probability distribution. Computed as Shannon's Entropy with label probabilities computed either for tasks or workers: $$H(L) = -\sum_{label_i \in L} p(label_i) \cdot \log(p(label_i))$$ @@ -188,7 +188,7 @@ def uncertainty( if workers_skills is None and aggregator is not None: aggregator.fit(answers) if hasattr(aggregator, "skills_"): - workers_skills = aggregator.skills_ # type: ignore + workers_skills = aggregator.skills_ else: raise AssertionError( "This aggregator is not supported. Please, provide workers skills." @@ -214,9 +214,11 @@ def uncertainty( uncertainties = labels_proba.apply( lambda row: entropy(row[labels] / (sum(row[labels]) + 1e-6)), axis=1 ) + if aggregate: - return uncertainties.mean() - return uncertainties + return cast(float, uncertainties.mean()) + + return cast("pd.Series[Any]", uncertainties) def alpha_krippendorff( diff --git a/crowdkit/metrics/workers/accuracy_on_aggregates.py b/crowdkit/metrics/workers/accuracy_on_aggregates.py index f04d4c7f..8df6448d 100644 --- a/crowdkit/metrics/workers/accuracy_on_aggregates.py +++ b/crowdkit/metrics/workers/accuracy_on_aggregates.py @@ -2,7 +2,7 @@ "accuracy_on_aggregates", ] -from typing import Optional, Union +from typing import Any, Optional, Union import pandas as pd @@ -14,9 +14,9 @@ def accuracy_on_aggregates( answers: pd.DataFrame, aggregator: Optional[BaseClassificationAggregator] = MajorityVote(), - aggregates: Optional[pd.Series] = None, + aggregates: Optional["pd.Series[Any]"] = None, by: Optional[str] = None, -) -> Union[float, pd.Series]: +) -> Union[float, "pd.Series[Any]"]: """ Accuracy on aggregates: a fraction of worker's answers that match the aggregated one. diff --git a/crowdkit/postprocessing/entropy_threshold.py b/crowdkit/postprocessing/entropy_threshold.py index 6785158c..108781b9 100644 --- a/crowdkit/postprocessing/entropy_threshold.py +++ b/crowdkit/postprocessing/entropy_threshold.py @@ -3,7 +3,7 @@ ] import warnings -from typing import Optional, cast +from typing import Any, Optional, cast import numpy as np import pandas as pd @@ -13,7 +13,7 @@ def entropy_threshold( answers: pd.DataFrame, - workers_skills: Optional[pd.Series] = None, + workers_skills: Optional["pd.Series[Any]"] = None, percentile: int = 10, min_answers: int = 2, ) -> pd.DataFrame: @@ -64,7 +64,7 @@ def entropy_threshold( answers_for_filtration = answers[answers.worker.isin(answers_per_worker.index)] uncertainties = cast( - pd.Series, + "pd.Series[Any]", uncertainty( answers_for_filtration, workers_skills, @@ -73,7 +73,7 @@ def entropy_threshold( ), ) - cutoff = np.percentile(uncertainties, percentile) # type: ignore + cutoff = np.percentile(uncertainties, percentile) removed_workers = uncertainties[uncertainties <= cutoff].index diff --git a/setup.cfg b/setup.cfg index ea4b7f84..c238e1bc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ dev = notebook ipywidgets stubmaker @ git+https://github.com/Toloka/stubmaker.git@main + pandas-stubs learning = torch>=1.6.0 @@ -72,7 +73,7 @@ learning = [mypy] ignore_missing_imports = True -warn_unused_ignores = False +plugins = numpy.typing.mypy_plugin strict = True [flake8] diff --git a/tests/aggregation/data_gold_mv.py b/tests/aggregation/data_gold_mv.py index aea1192d..b8e4274c 100644 --- a/tests/aggregation/data_gold_mv.py +++ b/tests/aggregation/data_gold_mv.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest @@ -5,12 +7,12 @@ @pytest.fixture -def toy_labels_result_gold(toy_ground_truth_df: pd.Series) -> pd.Series: +def toy_labels_result_gold(toy_ground_truth_df: "pd.Series[Any]") -> "pd.Series[Any]": return toy_ground_truth_df @pytest.fixture -def toy_skills_result_gold() -> pd.Series: +def toy_skills_result_gold() -> "pd.Series[Any]": return pd.Series( [0.5, 1.0, 1.0, 0.5, 0.0], pd.Index(["w1", "w2", "w3", "w4", "w5"], name="worker"), @@ -72,12 +74,14 @@ def toy_answers_on_gold_df_cannot_predict() -> pd.DataFrame: @pytest.fixture -def simple_labels_result_gold(simple_ground_truth: pd.Series) -> pd.Series: +def simple_labels_result_gold( + simple_ground_truth: "pd.Series[Any]", +) -> "pd.Series[Any]": return simple_ground_truth @pytest.fixture -def simple_skills_result_gold() -> pd.Series: +def simple_skills_result_gold() -> "pd.Series[Any]": skills = pd.Series( { "0c3eb7d5fcc414db137c4180a654c06e": 0.5, @@ -145,12 +149,12 @@ def multiple_gt_df() -> pd.DataFrame: @pytest.fixture -def multiple_gt_gt() -> pd.Series: +def multiple_gt_gt() -> "pd.Series[Any]": return pd.Series(["l1", "l2", "l2"], index=["t1", "t1", "t3"]) @pytest.fixture -def multiple_gt_aggregated() -> pd.Series: +def multiple_gt_aggregated() -> "pd.Series[Any]": aggregated = pd.Series(["l2", "l1", "l2"], index=["t1", "t2", "t3"]) aggregated.index.name = "task" aggregated.name = "agg_label" @@ -158,7 +162,7 @@ def multiple_gt_aggregated() -> pd.Series: @pytest.fixture -def multiple_gt_skills() -> pd.Series: +def multiple_gt_skills() -> "pd.Series[Any]": skills = pd.Series( [0.5, 1.0, 0.0], index=["w1", "w2", "w3"], diff --git a/tests/aggregation/data_image.py b/tests/aggregation/data_image.py index cb7c7d4c..7e34666e 100644 --- a/tests/aggregation/data_image.py +++ b/tests/aggregation/data_image.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Any, Tuple import numpy as np import pandas as pd @@ -86,7 +86,7 @@ def simple_image_df() -> pd.DataFrame: @pytest.fixture -def image_with_skills_df() -> Tuple[pd.DataFrame, pd.Series]: +def image_with_skills_df() -> Tuple[pd.DataFrame, "pd.Series[Any]"]: im1_seg1 = np.array([[1, 1, 0, 0, 0], [1, 0, 0, 0, 0]], dtype=bool) im1_seg2 = np.array([[1, 0, 1, 0, 1], [0, 0, 0, 0, 0]], dtype=bool) @@ -121,7 +121,7 @@ def image_with_skills_df() -> Tuple[pd.DataFrame, pd.Series]: @pytest.fixture -def simple_image_mv_result() -> pd.Series: +def simple_image_mv_result() -> "pd.Series[Any]": return pd.Series( [ np.array( @@ -147,7 +147,7 @@ def simple_image_mv_result() -> pd.Series: @pytest.fixture -def image_with_skills_mv_result() -> pd.Series: +def image_with_skills_mv_result() -> "pd.Series[Any]": return pd.Series( [np.array([[0, 1, 1, 1, 0], [0, 0, 0, 0, 0]], dtype=bool)], index=pd.Index([1], name="task"), @@ -156,7 +156,7 @@ def image_with_skills_mv_result() -> pd.Series: @pytest.fixture -def simple_image_rasa_result() -> pd.Series: +def simple_image_rasa_result() -> "pd.Series[Any]": return pd.Series( [ np.array( @@ -182,7 +182,7 @@ def simple_image_rasa_result() -> pd.Series: @pytest.fixture -def simple_image_em_result() -> pd.Series: +def simple_image_em_result() -> "pd.Series[Any]": return pd.Series( [ np.array( diff --git a/tests/aggregation/data_mmsr.py b/tests/aggregation/data_mmsr.py index 7fba6e20..7e4d38c5 100644 --- a/tests/aggregation/data_mmsr.py +++ b/tests/aggregation/data_mmsr.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest @@ -5,7 +7,7 @@ @pytest.fixture -def toy_labels_result_mmsr() -> pd.Series: +def toy_labels_result_mmsr() -> "pd.Series[Any]": return pd.Series( ["yes", "no", "no", "yes", "no"], index=pd.Index(["t1", "t2", "t3", "t4", "t5"], name="task"), @@ -14,7 +16,7 @@ def toy_labels_result_mmsr() -> pd.Series: @pytest.fixture -def toy_skills_result_mmsr() -> pd.Series: +def toy_skills_result_mmsr() -> "pd.Series[Any]": return pd.Series( [ -0.9486439852160969, @@ -47,12 +49,14 @@ def toy_scores_result_mmsr() -> pd.DataFrame: @pytest.fixture -def simple_labels_result_mmsr(simple_ground_truth: pd.Series) -> pd.Series: +def simple_labels_result_mmsr( + simple_ground_truth: "pd.Series[Any]", +) -> "pd.Series[Any]": return simple_ground_truth @pytest.fixture -def simple_skills_result_mmsr() -> pd.Series: +def simple_skills_result_mmsr() -> "pd.Series[Any]": skills = pd.Series( { "0c3eb7d5fcc414db137c4180a654c06e": -0.6268515139467665, diff --git a/tests/aggregation/data_mv.py b/tests/aggregation/data_mv.py index 00f2b2bf..00d067da 100644 --- a/tests/aggregation/data_mv.py +++ b/tests/aggregation/data_mv.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest @@ -5,7 +7,7 @@ @pytest.fixture -def toy_labels_result_mv() -> pd.Series: +def toy_labels_result_mv() -> "pd.Series[Any]": return pd.Series( ["no", "yes", "no", "yes", "no"], pd.Index(["t1", "t2", "t3", "t4", "t5"], name="task"), @@ -14,7 +16,7 @@ def toy_labels_result_mv() -> pd.Series: @pytest.fixture -def toy_skills_result_mv() -> pd.Series: +def toy_skills_result_mv() -> "pd.Series[Any]": return pd.Series( [0.6, 0.8, 1.0, 0.4, 0.8], pd.Index(["w1", "w2", "w3", "w4", "w5"], name="worker"), @@ -44,12 +46,12 @@ def toy_probas_result_mv() -> pd.DataFrame: @pytest.fixture -def simple_labels_result_mv(simple_ground_truth: pd.Series) -> pd.Series: +def simple_labels_result_mv(simple_ground_truth: "pd.Series[Any]") -> "pd.Series[Any]": return simple_ground_truth @pytest.fixture -def simple_skills_result_mv() -> pd.Series: +def simple_skills_result_mv() -> "pd.Series[Any]": skills = pd.Series( { "0c3eb7d5fcc414db137c4180a654c06e": 0.333333, diff --git a/tests/aggregation/data_rover.py b/tests/aggregation/data_rover.py index ff17d4b5..8c24ec18 100644 --- a/tests/aggregation/data_rover.py +++ b/tests/aggregation/data_rover.py @@ -1,9 +1,11 @@ +from typing import Any + import pandas as pd import pytest @pytest.fixture -def simple_text_result_rover() -> pd.Series: +def simple_text_result_rover() -> "pd.Series[Any]": return pd.Series( [ "as soon as you donned my worst is stocking sweetheart", diff --git a/tests/aggregation/data_wawa.py b/tests/aggregation/data_wawa.py index 991d9ea4..22189fa2 100644 --- a/tests/aggregation/data_wawa.py +++ b/tests/aggregation/data_wawa.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest @@ -5,7 +7,7 @@ @pytest.fixture -def toy_labels_result_wawa() -> pd.Series: +def toy_labels_result_wawa() -> "pd.Series[Any]": return pd.Series( ["no", "yes", "no", "yes", "no"], pd.Index(["t1", "t2", "t3", "t4", "t5"], name="task"), @@ -14,7 +16,7 @@ def toy_labels_result_wawa() -> pd.Series: @pytest.fixture -def toy_skills_result_wawa() -> pd.Series: +def toy_skills_result_wawa() -> "pd.Series[Any]": return pd.Series( [0.6, 0.8, 1.0, 0.4, 0.8], pd.Index(["w1", "w2", "w3", "w4", "w5"], name="worker"), @@ -44,12 +46,14 @@ def toy_probas_result_wawa() -> pd.DataFrame: @pytest.fixture -def simple_labels_result_wawa(simple_ground_truth: pd.Series) -> pd.Series: +def simple_labels_result_wawa( + simple_ground_truth: "pd.Series[Any]", +) -> "pd.Series[Any]": return simple_ground_truth @pytest.fixture -def simple_skills_result_wawa() -> pd.Series: +def simple_skills_result_wawa() -> "pd.Series[Any]": skills = pd.Series( { "0c3eb7d5fcc414db137c4180a654c06e": 1 / 3, diff --git a/tests/aggregation/data_zbs.py b/tests/aggregation/data_zbs.py index dc995e98..e71ad8b4 100644 --- a/tests/aggregation/data_zbs.py +++ b/tests/aggregation/data_zbs.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest @@ -5,7 +7,7 @@ @pytest.fixture -def toy_labels_result_zbs() -> pd.Series: +def toy_labels_result_zbs() -> "pd.Series[Any]": return pd.Series( ["no", "yes", "no", "yes", "no"], index=pd.Index(["t1", "t2", "t3", "t4", "t5"], name="task"), @@ -14,7 +16,7 @@ def toy_labels_result_zbs() -> pd.Series: @pytest.fixture -def toy_skills_result_zbs() -> pd.Series: +def toy_skills_result_zbs() -> "pd.Series[Any]": return pd.Series( [0.6, 0.8, 1.0, 0.4, 0.8], index=pd.Index(["w1", "w2", "w3", "w4", "w5"], name="worker"), @@ -41,12 +43,12 @@ def toy_probas_result_zbs() -> pd.DataFrame: @pytest.fixture -def simple_labels_result_zbs(simple_ground_truth: pd.Series) -> pd.Series: +def simple_labels_result_zbs(simple_ground_truth: "pd.Series[Any]") -> "pd.Series[Any]": return simple_ground_truth @pytest.fixture -def simple_skills_result_zbs() -> pd.Series: +def simple_skills_result_zbs() -> "pd.Series[Any]": skills = pd.Series( { "0c3eb7d5fcc414db137c4180a654c06e": 0.333333, diff --git a/tests/aggregation/test_binary_relevance_aggregation.py b/tests/aggregation/test_binary_relevance_aggregation.py index 98111d3c..47d33bfb 100644 --- a/tests/aggregation/test_binary_relevance_aggregation.py +++ b/tests/aggregation/test_binary_relevance_aggregation.py @@ -1,6 +1,8 @@ """ Simple aggregation tests. """ +from typing import Any + import pandas as pd import pytest from pandas.testing import assert_series_equal @@ -25,7 +27,7 @@ def data_toy_binary_relevance() -> pd.DataFrame: @pytest.fixture -def binary_relevance_toy_result() -> pd.Series: +def binary_relevance_toy_result() -> "pd.Series[Any]": result = pd.Series( [["house", "tree"], ["car"]], index=["t1", "t2"], name="agg_label" ) @@ -37,7 +39,7 @@ def binary_relevance_toy_result() -> pd.Series: @pytest.mark.filterwarnings("ignore:In a future version") def test_binary_relevance_aggregation_on_toy_data( aggregator: BaseClassificationAggregator, - binary_relevance_toy_result: pd.Series, + binary_relevance_toy_result: "pd.Series[Any]", data_toy_binary_relevance: pd.DataFrame, ) -> None: mb = BinaryRelevance(aggregator) diff --git a/tests/aggregation/test_bt_aggregation.py b/tests/aggregation/test_bt_aggregation.py index 91057365..66322c69 100644 --- a/tests/aggregation/test_bt_aggregation.py +++ b/tests/aggregation/test_bt_aggregation.py @@ -38,17 +38,17 @@ def data_equal() -> pd.DataFrame: @pytest.fixture -def result_empty() -> pd.Series: +def result_empty() -> "pd.Series[Any]": return pd.Series([], dtype=np.float64, name="agg_score") @pytest.fixture -def result_equal() -> pd.Series: +def result_equal() -> "pd.Series[Any]": return pd.Series([1 / 3, 1 / 3, 1 / 3], index=["a", "b", "c"], name="agg_score") @pytest.fixture -def noisy_bt_result() -> pd.Series: +def noisy_bt_result() -> "pd.Series[Any]": return pd.Series( [1.0, 1.0, 1.497123058531228e-45], index=pd.Index(["a", "b", "c"], name="label"), @@ -57,7 +57,7 @@ def noisy_bt_result() -> pd.Series: @pytest.fixture -def noisy_bt_result_equal() -> pd.Series: +def noisy_bt_result_equal() -> "pd.Series[Any]": return pd.Series( [0.6715468044437242, 0.6462882683525435, 0.632947637600415], index=pd.Index(["a", "b", "c"], name="label"), @@ -66,23 +66,25 @@ def noisy_bt_result_equal() -> pd.Series: @pytest.fixture -def result_iter_0() -> pd.Series: +def result_iter_0() -> "pd.Series[Any]": return pd.Series([1 / 3, 1 / 3, 1 / 3], index=["a", "b", "c"], name="agg_score") @pytest.fixture -def result_iter_10() -> pd.Series: +def result_iter_10() -> "pd.Series[Any]": return pd.Series([0.934, 0.065, 0.0], index=["a", "b", "c"], name="agg_score") -def test_bradley_terry_empty(result_empty: pd.Series, data_empty: pd.DataFrame) -> None: +def test_bradley_terry_empty( + result_empty: "pd.Series[Any]", data_empty: pd.DataFrame +) -> None: bt = BradleyTerry(n_iter=1).fit(data_empty) assert_series_equal(result_empty, bt.scores_) @pytest.mark.parametrize("n_iter, tol", [(10, 0), (100500, 1e-5)]) def test_bradley_terry_equal( - n_iter: int, tol: float, result_equal: pd.Series, data_equal: pd.DataFrame + n_iter: int, tol: float, result_equal: "pd.Series[Any]", data_equal: pd.DataFrame ) -> None: bt = BradleyTerry(n_iter=n_iter, tol=tol).fit(data_equal) assert_series_equal(result_equal, bt.scores_, atol=0.005) @@ -99,7 +101,7 @@ def test_bradley_terry_step_by_step( @pytest.mark.parametrize("n_iter, tol", [(10, 0), (100500, 1e-5)]) def test_noisy_bradley_terry( - n_iter: int, tol: float, data_abc: pd.DataFrame, noisy_bt_result: pd.Series + n_iter: int, tol: float, data_abc: pd.DataFrame, noisy_bt_result: "pd.Series[Any]" ) -> None: with pytest.warns(RuntimeWarning): noisy_bt = NoisyBradleyTerry(n_iter=n_iter, tol=tol).fit(data_abc) @@ -109,7 +111,7 @@ def test_noisy_bradley_terry( def test_noisy_bradley_terry_equal( - data_equal: pd.DataFrame, noisy_bt_result_equal: pd.Series + data_equal: pd.DataFrame, noisy_bt_result_equal: "pd.Series[Any]" ) -> None: noisy_bt = NoisyBradleyTerry().fit(data_equal) assert_series_equal(noisy_bt.scores_, noisy_bt_result_equal, atol=0.005) @@ -117,7 +119,7 @@ def test_noisy_bradley_terry_equal( @pytest.mark.parametrize("agg_class", [BradleyTerry, NoisyBradleyTerry]) def test_zero_iter( - agg_class: Any, data_equal: pd.DataFrame, result_equal: pd.Series + agg_class: Any, data_equal: pd.DataFrame, result_equal: "pd.Series[Any]" ) -> None: aggregator = agg_class(n_iter=0) answers = aggregator.fit_predict(data_equal) diff --git a/tests/aggregation/test_classification_aggregation.py b/tests/aggregation/test_classification_aggregation.py index 151073a6..8d4c8aa9 100644 --- a/tests/aggregation/test_classification_aggregation.py +++ b/tests/aggregation/test_classification_aggregation.py @@ -91,7 +91,7 @@ def test_fit_predict_classification_aggregations_methods( fit_method: str, predict_method: str, dataset: pd.DataFrame, - results_dataset: pd.Series, + results_dataset: "pd.Series[Any]", ) -> None: """ Tests all aggregation methods, that fit->predict chain works well, and at each step we have the correct values for: diff --git a/tests/aggregation/test_classification_aggregation_edge_cases.py b/tests/aggregation/test_classification_aggregation_edge_cases.py index 66cefa77..ae024700 100644 --- a/tests/aggregation/test_classification_aggregation_edge_cases.py +++ b/tests/aggregation/test_classification_aggregation_edge_cases.py @@ -146,7 +146,7 @@ def test_agg_raise_on_less_columns( def test_gold_mv_raise_in_fit( request: Any, not_random: Callable[[], None], - toy_gold_df: pd.Series, + toy_gold_df: "pd.Series[Any]", exception: Type[Exception], answers_on_gold_dataset: str, ) -> None: @@ -192,7 +192,7 @@ def test_gold_mv_raise_in_fit( def test_gold_mv_raise_in_predict( request: Any, not_random: Callable[[], None], - toy_gold_df: pd.Series, + toy_gold_df: "pd.Series[Any]", predict_method: str, exception: Type[Exception], answers_on_gold_dataset: str, @@ -220,7 +220,9 @@ def test_gold_mv_empty() -> None: @pytest.mark.parametrize("agg_class", [MMSR, ZeroBasedSkill, DawidSkene, GLAD]) def test_zero_iter( - agg_class: Any, simple_answers_df: pd.DataFrame, simple_ground_truth: pd.Series + agg_class: Any, + simple_answers_df: pd.DataFrame, + simple_ground_truth: "pd.Series[Any]", ) -> None: aggregator = agg_class(n_iter=0) answers = aggregator.fit_predict(simple_answers_df) diff --git a/tests/aggregation/test_ds_aggregation.py b/tests/aggregation/test_ds_aggregation.py index 299ae2f6..80a9fd35 100644 --- a/tests/aggregation/test_ds_aggregation.py +++ b/tests/aggregation/test_ds_aggregation.py @@ -2,7 +2,7 @@ Simplest aggregation algorithms tests on toy YSDA dataset Testing all boundary conditions and asserts """ -from typing import Any, List +from typing import Any, List, cast import numpy as np import pandas as pd @@ -17,7 +17,7 @@ def test_aggregate_ds_on_toy_ysda( n_iter: int, tol: float, toy_answers_df: pd.DataFrame, - toy_ground_truth_df: pd.Series, + toy_ground_truth_df: "pd.Series[Any]", ) -> None: np.random.seed(42) assert_series_equal( @@ -31,7 +31,7 @@ def test_aggregate_hds_on_toy_ysda( n_iter: int, tol: float, toy_answers_df: pd.DataFrame, - toy_ground_truth_df: pd.Series, + toy_ground_truth_df: "pd.Series[Any]", ) -> None: np.random.seed(42) assert_series_equal( @@ -45,7 +45,7 @@ def test_aggregate_ds_on_simple( n_iter: int, tol: float, simple_answers_df: pd.DataFrame, - simple_ground_truth: pd.Series, + simple_ground_truth: "pd.Series[Any]", ) -> None: np.random.seed(42) assert_series_equal( @@ -59,7 +59,7 @@ def test_aggregate_hds_on_simple( n_iter: int, tol: float, simple_answers_df: pd.DataFrame, - simple_ground_truth: pd.Series, + simple_ground_truth: "pd.Series[Any]", ) -> None: np.random.seed(42) assert_series_equal( @@ -76,11 +76,12 @@ def _make_probas(data: List[List[Any]]) -> pd.DataFrame: def _make_tasks_labels(data: List[List[Any]]) -> pd.DataFrame: # TODO: should task be indexed? - return ( + return cast( + pd.DataFrame, pd.DataFrame(data, columns=["task", "label"]) .set_index("task") .squeeze() - .rename("agg_label") + .rename("agg_label"), ) @@ -139,7 +140,7 @@ def probas_iter_0() -> pd.DataFrame: @pytest.fixture -def priors_iter_0() -> pd.Series: +def priors_iter_0() -> "pd.Series[Any]": return pd.Series([0.46, 0.54], pd.Index(["no", "yes"], name="label"), name="prior") @@ -188,7 +189,7 @@ def probas_iter_1() -> pd.DataFrame: @pytest.fixture -def priors_iter_1() -> pd.Series: +def priors_iter_1() -> "pd.Series[Any]": return pd.Series([0.49, 0.51], pd.Index(["no", "yes"], name="label"), name="prior") @@ -233,6 +234,10 @@ def test_dawid_skene_step_by_step( priors = request.getfixturevalue(f"priors_iter_{n_iter}") ds = DawidSkene(n_iter).fit(data) + assert ds.probas_ is not None, "no probas_" + assert ds.errors_ is not None, "no errors_" + assert ds.priors_ is not None, "no priors_" + assert ds.labels_ is not None, "no labels_" assert_frame_equal(probas, ds.probas_, check_like=True, atol=0.005) assert_frame_equal(errors, ds.errors_, check_like=True, atol=0.005) assert_series_equal(priors, ds.priors_, atol=0.005) @@ -241,6 +246,10 @@ def test_dawid_skene_step_by_step( def test_dawid_skene_on_empty_input(request: Any, data: pd.DataFrame) -> None: ds = DawidSkene(10).fit(pd.DataFrame([], columns=["task", "worker", "label"])) + assert ds.probas_ is not None, "no probas_" + assert ds.errors_ is not None, "no errors_" + assert ds.priors_ is not None, "no priors_" + assert ds.labels_ is not None, "no labels_" assert_frame_equal(pd.DataFrame(), ds.probas_, check_like=True, atol=0.005) assert_frame_equal(pd.DataFrame(), ds.errors_, check_like=True, atol=0.005) assert_series_equal(pd.Series(dtype=float, name="prior"), ds.priors_, atol=0.005) @@ -269,8 +278,12 @@ def test_dawid_skene_overlap(overlap: int) -> None: expected_labels = _make_tasks_labels([[task_id, "yes"] for task_id in range(3)]) # TODO: check errors_ + assert ds.probas_ is not None, "no probas_" + assert ds.errors_ is not None, "no errors_" + assert ds.priors_ is not None, "no priors_" + assert ds.labels_ is not None, "no labels_" assert_frame_equal(expected_probas, ds.probas_, check_like=True, atol=0.005) - assert_series_equal(expected_labels, ds.labels_, atol=0.005) + assert_series_equal(expected_labels, ds.labels_, atol=0.005) # type: ignore assert_series_equal( pd.Series([1 / 3, 2 / 3], pd.Index(["no", "yes"], name="label"), name="prior"), ds.priors_, @@ -279,14 +292,16 @@ def test_dawid_skene_overlap(overlap: int) -> None: def test_ds_on_bool_labels( - data_with_bool_labels: pd.DataFrame, bool_labels_ground_truth: pd.Series + data_with_bool_labels: pd.DataFrame, bool_labels_ground_truth: "pd.Series[Any]" ) -> None: ds = DawidSkene(20).fit(data_with_bool_labels) + assert ds.labels_ is not None, "no labels_" assert_series_equal(bool_labels_ground_truth, ds.labels_, atol=0.005) def test_hds_on_bool_labels( - data_with_bool_labels: pd.DataFrame, bool_labels_ground_truth: pd.Series + data_with_bool_labels: pd.DataFrame, bool_labels_ground_truth: "pd.Series[Any]" ) -> None: hds = OneCoinDawidSkene(20).fit(data_with_bool_labels) + assert hds.labels_ is not None, "no labels_" assert_series_equal(bool_labels_ground_truth, hds.labels_, atol=0.005) diff --git a/tests/aggregation/test_glad_aggregation.py b/tests/aggregation/test_glad_aggregation.py index ef0a4432..f2184b82 100644 --- a/tests/aggregation/test_glad_aggregation.py +++ b/tests/aggregation/test_glad_aggregation.py @@ -19,7 +19,7 @@ def test_aggregate_glad_on_toy_ysda( n_iter: int, tol: float, toy_answers_df: pd.DataFrame, - toy_ground_truth_df: pd.Series, + toy_ground_truth_df: "pd.Series[Any]", ) -> None: np.random.seed(42) predict_df = GLAD(n_iter=n_iter, tol=tol).fit_predict(toy_answers_df) @@ -36,7 +36,7 @@ def test_aggregate_glad_on_simple( n_iter: int, tol: float, simple_answers_df: pd.DataFrame, - simple_ground_truth: pd.Series, + simple_ground_truth: "pd.Series[Any]", ) -> None: np.random.seed(42) predict_df = GLAD(n_iter=n_iter, tol=tol).fit_predict(simple_answers_df) @@ -66,6 +66,9 @@ def single_task_initialized_glad( ) -> Tuple[pd.DataFrame, GLAD]: glad = GLAD() glad._init(single_task_simple_df) + assert glad.alphas_ is not None, "no alphas_" + assert glad.betas_ is not None, "no betas_" + assert glad.priors_ is not None, "no priors_" data = glad._join_all( single_task_simple_df, glad.alphas_, glad.betas_, glad.priors_ ) @@ -85,7 +88,7 @@ def test_glad_derivative( ) -> None: data, glad = single_task_initialized_glad glad._current_data = data - x_0 = np.concatenate([glad.alphas_.values, glad.betas_.values]) # type: ignore + x_0 = np.concatenate([glad.alphas_.values, glad.betas_.values]) def Q_by_alpha_beta(x: npt.NDArray[Any]) -> float: glad._update_alphas_betas(*glad._get_alphas_betas_by_point(x)) @@ -96,5 +99,5 @@ def Q_by_alpha_beta(x: npt.NDArray[Any]) -> float: eps = np.sqrt(np.finfo(float).eps) numerical_grad = np.sort(approx_fprime(x_0, Q_by_alpha_beta, eps)) dQalpha, dQbeta = glad._gradient_Q(data) - analytical_grad = np.sort(np.concatenate([dQalpha.values, dQbeta.values])) # type: ignore + analytical_grad = np.sort(np.concatenate([dQalpha.values, dQbeta.values])) assert np.allclose(analytical_grad, numerical_grad) diff --git a/tests/aggregation/test_image_aggregation.py b/tests/aggregation/test_image_aggregation.py index 1a289aca..ee79d92c 100644 --- a/tests/aggregation/test_image_aggregation.py +++ b/tests/aggregation/test_image_aggregation.py @@ -14,14 +14,14 @@ def test_simple_segmentation_mv( - simple_image_df: pd.DataFrame, simple_image_mv_result: pd.Series + simple_image_df: pd.DataFrame, simple_image_mv_result: "pd.Series[Any]" ) -> None: output = SegmentationMajorityVote().fit_predict(simple_image_df) assert_series_equal(output, simple_image_mv_result) def test_skills_segmentation_mv( - image_with_skills_df: pd.DataFrame, image_with_skills_mv_result: pd.Series + image_with_skills_df: pd.DataFrame, image_with_skills_mv_result: "pd.Series[Any]" ) -> None: output = SegmentationMajorityVote().fit_predict(*image_with_skills_df) assert_series_equal(output, image_with_skills_mv_result) @@ -32,7 +32,7 @@ def test_simple_segmentation_rasa_iter( n_iter: int, tol: float, simple_image_df: pd.DataFrame, - simple_image_rasa_result: pd.Series, + simple_image_rasa_result: "pd.Series[Any]", ) -> None: output = SegmentationRASA(n_iter=n_iter, tol=tol).fit_predict(simple_image_df) assert_series_equal(output, simple_image_rasa_result) @@ -43,7 +43,7 @@ def test_simple_segmentation_em_iter( n_iter: int, tol: float, simple_image_df: pd.DataFrame, - simple_image_em_result: pd.Series, + simple_image_em_result: "pd.Series[Any]", ) -> None: output = SegmentationEM(n_iter=n_iter, tol=tol).fit_predict(simple_image_df) assert_series_equal(output, simple_image_em_result) @@ -51,7 +51,9 @@ def test_simple_segmentation_em_iter( @pytest.mark.parametrize("agg_class", [SegmentationEM, SegmentationRASA]) def test_zero_iter( - agg_class: Any, simple_image_df: pd.DataFrame, simple_image_mv_result: pd.Series + agg_class: Any, + simple_image_df: pd.DataFrame, + simple_image_mv_result: "pd.Series[Any]", ) -> None: aggregator = agg_class(n_iter=0) answers = aggregator.fit_predict(simple_image_df) diff --git a/tests/aggregation/test_kos_aggregation.py b/tests/aggregation/test_kos_aggregation.py index 92715761..2f79d18d 100644 --- a/tests/aggregation/test_kos_aggregation.py +++ b/tests/aggregation/test_kos_aggregation.py @@ -2,6 +2,8 @@ Simplest aggregation algorithms tests on toy YSDA dataset Testing all boundary conditions and asserts """ +from typing import Any + import numpy as np import pandas as pd import pytest @@ -14,19 +16,22 @@ def test_aggregate_kos_on_data_with_bool_labels( n_iter: int, data_with_bool_labels: pd.DataFrame, - bool_labels_ground_truth: pd.Series, + bool_labels_ground_truth: "pd.Series[Any]", ) -> None: np.random.seed(42) + kos = KOS(n_iter=n_iter).fit(data_with_bool_labels) + assert kos.labels_ is not None, "no labels_" assert_series_equal( - KOS(n_iter=n_iter).fit(data_with_bool_labels).labels_, + kos.labels_, bool_labels_ground_truth, ) def test_kos_on_empty_input() -> None: - result = KOS(n_iter=10).fit(pd.DataFrame([], columns=["task", "worker", "label"])) + kos = KOS(n_iter=10).fit(pd.DataFrame([], columns=["task", "worker", "label"])) + assert kos.labels_ is not None, "no labels_" assert_series_equal( - pd.Series([], dtype="O", name="agg_label"), result.labels_, atol=0.005 + pd.Series([], dtype="O", name="agg_label"), kos.labels_, atol=0.005 ) diff --git a/tests/aggregation/test_mace_aggregation.py b/tests/aggregation/test_mace_aggregation.py index 06ff0a2f..9048a9b3 100644 --- a/tests/aggregation/test_mace_aggregation.py +++ b/tests/aggregation/test_mace_aggregation.py @@ -1,6 +1,8 @@ """ Simple aggregation tests. """ +from typing import Any + import numpy as np import pandas as pd import pytest @@ -12,7 +14,7 @@ @pytest.mark.parametrize("method", ["vb", "em"]) def test_aggregate_mace_on_toy_ysda( - method: str, toy_answers_df: pd.DataFrame, toy_ground_truth_df: pd.Series + method: str, toy_answers_df: pd.DataFrame, toy_ground_truth_df: "pd.Series[Any]" ) -> None: np.random.seed(42) predict_df = MACE(n_restarts=1, n_iter=5, method=method).fit_predict(toy_answers_df) @@ -26,7 +28,7 @@ def test_aggregate_mace_on_toy_ysda( @pytest.mark.parametrize("method", ["vb", "em"]) def test_aggregate_mace_on_simple( - method: str, simple_answers_df: pd.DataFrame, simple_ground_truth: pd.Series + method: str, simple_answers_df: pd.DataFrame, simple_ground_truth: "pd.Series[Any]" ) -> None: np.random.seed(42) predict_df = MACE(n_restarts=1, n_iter=5, method=method).fit_predict( diff --git a/tests/aggregation/test_majority_vote.py b/tests/aggregation/test_majority_vote.py index adcbe253..bd93202f 100644 --- a/tests/aggregation/test_majority_vote.py +++ b/tests/aggregation/test_majority_vote.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Any, Tuple import pandas as pd import pytest @@ -18,15 +18,18 @@ def test_majority_vote_with_skills( - simple_answers_df: pd.DataFrame, simple_skills_result_mv: pd.Series # noqa F811 + simple_answers_df: pd.DataFrame, + simple_skills_result_mv: "pd.Series[Any]", # noqa F811 ) -> None: mv = MajorityVote() mv.fit_predict(simple_answers_df, skills=simple_skills_result_mv) + assert mv.skills_ is not None, "no skills_" assert_series_equal(mv.skills_, simple_skills_result_mv) def test_majority_vote_with_missing_skills_value( - simple_answers_df: pd.DataFrame, simple_skills_result_mv: pd.Series # noqa F811 + simple_answers_df: pd.DataFrame, + simple_skills_result_mv: "pd.Series[Any]", # noqa F811 ) -> None: mv = MajorityVote(on_missing_skill="value", default_skill=1000000) simple_skills_result_mv = simple_skills_result_mv.drop( @@ -42,7 +45,8 @@ def test_majority_vote_with_missing_skills_value( def test_majority_vote_with_missing_skills_error( - simple_answers_df: pd.DataFrame, simple_skills_result_mv: pd.Series # noqa F811 + simple_answers_df: pd.DataFrame, + simple_skills_result_mv: "pd.Series[Any]", # noqa F811 ) -> None: mv = MajorityVote(on_missing_skill="error") simple_skills_result_mv = simple_skills_result_mv.drop( @@ -53,7 +57,8 @@ def test_majority_vote_with_missing_skills_error( def test_majority_vote_with_missing_skills_ignore( - simple_answers_df: pd.DataFrame, simple_skills_result_mv: pd.Series # noqa F811 + simple_answers_df: pd.DataFrame, + simple_skills_result_mv: "pd.Series[Any]", # noqa F811 ) -> None: mv = MajorityVote(on_missing_skill="ignore") simple_skills_result_mv = simple_skills_result_mv.drop( @@ -64,7 +69,8 @@ def test_majority_vote_with_missing_skills_ignore( def test_majority_vote_with_missing_skills_ignore_all( - simple_answers_df: pd.DataFrame, simple_skills_result_mv: pd.Series # noqa F811 + simple_answers_df: pd.DataFrame, + simple_skills_result_mv: "pd.Series[Any]", # noqa F811 ) -> None: mv = MajorityVote(on_missing_skill="ignore") with pytest.raises(ValueError): @@ -72,8 +78,8 @@ def test_majority_vote_with_missing_skills_ignore_all( def test_segmentation_majority_vote_with_missing_skills_value( - image_with_skills_df: Tuple[pd.DataFrame, pd.Series], # noqa F811 - image_with_skills_mv_result: pd.Series, # noqa F811 + image_with_skills_df: Tuple[pd.DataFrame, "pd.Series[Any]"], # noqa F811 + image_with_skills_mv_result: "pd.Series[Any]", # noqa F811 ) -> None: answers_df, skills = image_with_skills_df mv = SegmentationMajorityVote(on_missing_skill="value", default_skill=3) @@ -84,7 +90,7 @@ def test_segmentation_majority_vote_with_missing_skills_value( def test_segmentation_majority_vote_with_missing_skills_error( - image_with_skills_df: Tuple[pd.DataFrame, pd.Series] # noqa F811 + image_with_skills_df: Tuple[pd.DataFrame, "pd.Series[Any]"] # noqa F811 ) -> None: answers_df, skills = image_with_skills_df mv = SegmentationMajorityVote(on_missing_skill="error", default_skill=3) @@ -94,7 +100,7 @@ def test_segmentation_majority_vote_with_missing_skills_error( def test_segmentation_majority_vote_with_missing_skills_ignore( - image_with_skills_df: Tuple[pd.DataFrame, pd.Series] # noqa F811 + image_with_skills_df: Tuple[pd.DataFrame, "pd.Series[Any]"] # noqa F811 ) -> None: answers_df, skills = image_with_skills_df mv = SegmentationMajorityVote(on_missing_skill="ignore") @@ -106,7 +112,7 @@ def test_segmentation_majority_vote_with_missing_skills_ignore( def test_segmentation_majority_vote_with_missing_skills_ignore_all( - image_with_skills_df: Tuple[pd.DataFrame, pd.Series] # noqa F811 + image_with_skills_df: Tuple[pd.DataFrame, "pd.Series[Any]"] # noqa F811 ) -> None: answers_df, skills = image_with_skills_df mv = SegmentationMajorityVote(on_missing_skill="ignore") @@ -116,12 +122,14 @@ def test_segmentation_majority_vote_with_missing_skills_ignore_all( def test_gold_mv_multiple_gt( multiple_gt_df: pd.DataFrame, # noqa F811 - multiple_gt_gt: pd.Series, # noqa F811 - multiple_gt_skills: pd.Series, # noqa F811 - multiple_gt_aggregated: pd.Series, # noqa F811 + multiple_gt_gt: "pd.Series[Any]", # noqa F811 + multiple_gt_skills: "pd.Series[Any]", # noqa F811 + multiple_gt_aggregated: "pd.Series[Any]", # noqa F811 ) -> None: gmv = GoldMajorityVote() aggregated = gmv.fit_predict(multiple_gt_df, true_labels=multiple_gt_gt) + assert aggregated is not None, "no aggregated" skills = gmv.skills_ + assert skills is not None, "no skills" assert_series_equal(skills, multiple_gt_skills) assert_series_equal(aggregated, multiple_gt_aggregated) diff --git a/tests/aggregation/test_rover_aggregation.py b/tests/aggregation/test_rover_aggregation.py index bf66e763..6c49da96 100644 --- a/tests/aggregation/test_rover_aggregation.py +++ b/tests/aggregation/test_rover_aggregation.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest from pandas.testing import assert_series_equal @@ -20,13 +22,15 @@ def data_toy() -> pd.DataFrame: @pytest.fixture -def rover_toy_result() -> pd.Series: +def rover_toy_result() -> "pd.Series[Any]": result = pd.Series(["b c d e"], index=["t1"], name="agg_text") result.index.name = "task" return result -def test_rover_aggregation(rover_toy_result: pd.Series, data_toy: pd.DataFrame) -> None: +def test_rover_aggregation( + rover_toy_result: "pd.Series[Any]", data_toy: pd.DataFrame +) -> None: rover = ROVER(tokenizer=lambda x: x.split(" "), detokenizer=lambda x: " ".join(x)) assert_series_equal(rover_toy_result, rover.fit_predict(data_toy)) @@ -42,14 +46,15 @@ def rover_single_overlap_data() -> pd.DataFrame: @pytest.fixture -def rover_single_overlap_result() -> pd.Series: +def rover_single_overlap_result() -> "pd.Series[Any]": result = pd.Series(["a b c d"], index=["t1"], name="agg_text") result.index.name = "task" return result def test_rover_single_overlap( - rover_single_overlap_data: pd.DataFrame, rover_single_overlap_result: pd.Series + rover_single_overlap_data: pd.DataFrame, + rover_single_overlap_result: "pd.Series[Any]", ) -> None: rover = ROVER(tokenizer=lambda x: x.split(" "), detokenizer=lambda x: " ".join(x)) assert_series_equal( @@ -58,7 +63,8 @@ def test_rover_single_overlap( def test_rover_simple_text( - simple_text_df: pd.DataFrame, simple_text_result_rover: pd.Series # noqa F811 + simple_text_df: pd.DataFrame, + simple_text_result_rover: "pd.Series[Any]", # noqa F811 ) -> None: rover = ROVER(tokenizer=lambda x: x.split(" "), detokenizer=lambda x: " ".join(x)) predicted = rover.fit_predict(simple_text_df.rename(columns={"output": "text"})) diff --git a/tests/aggregation/test_text_aggregation.py b/tests/aggregation/test_text_aggregation.py index dd84ae3d..a777e6af 100644 --- a/tests/aggregation/test_text_aggregation.py +++ b/tests/aggregation/test_text_aggregation.py @@ -14,7 +14,7 @@ def test_rasa( n_iter: int, tol: float, simple_text_df: pd.DataFrame, - simple_text_true_embeddings: pd.Series, + simple_text_true_embeddings: "pd.Series[Any]", simple_text_result_rasa: pd.DataFrame, ) -> None: output = RASA(n_iter=n_iter, tol=tol).fit_predict( @@ -28,7 +28,7 @@ def test_hrrasa( n_iter: int, tol: float, simple_text_df: pd.DataFrame, - simple_text_true_embeddings: pd.Series, + simple_text_true_embeddings: "pd.Series[Any]", simple_text_result_hrrasa: pd.DataFrame, ) -> None: output = HRRASA(n_iter=n_iter, tol=tol).fit_predict( @@ -47,7 +47,7 @@ def test_hrrasa_single_overlap(simple_text_df: pd.DataFrame) -> None: def test_zero_iter( agg_class: Any, simple_text_df: pd.DataFrame, - simple_text_true_embeddings: pd.Series, + simple_text_true_embeddings: "pd.Series[Any]", simple_text_result_hrrasa: pd.DataFrame, ) -> None: aggregator = agg_class(n_iter=0) diff --git a/tests/conftest.py b/tests/conftest.py index 9af90ad6..898c3bfe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import random +from typing import Any import numpy as np import pandas as pd @@ -48,7 +49,7 @@ def toy_answers_df() -> pd.DataFrame: @pytest.fixture -def toy_ground_truth_df() -> pd.Series: +def toy_ground_truth_df() -> "pd.Series[Any]": return pd.Series( ["yes", "yes", "no", "yes", "no"], pd.Index(["t1", "t2", "t3", "t4", "t5"], name="task"), @@ -57,7 +58,7 @@ def toy_ground_truth_df() -> pd.Series: @pytest.fixture -def toy_gold_df() -> pd.Series: +def toy_gold_df() -> "pd.Series[Any]": return pd.Series( { "t1": "yes", @@ -247,7 +248,7 @@ def simple_answers_df() -> pd.DataFrame: @pytest.fixture -def simple_ground_truth() -> pd.Series: +def simple_ground_truth() -> "pd.Series[Any]": ground_truth = pd.Series( { "1231239876--5fac0d234ffb2f3b00893eec": "goose", @@ -268,7 +269,7 @@ def simple_ground_truth() -> pd.Series: @pytest.fixture -def simple_gold_df() -> pd.Series: +def simple_gold_df() -> "pd.Series[Any]": true_labels = pd.Series( { "1231239876--5fac0d234ffb2f3b00893eec": "goose", @@ -738,7 +739,7 @@ def simple_text_df() -> pd.DataFrame: @pytest.fixture -def simple_text_true_embeddings() -> pd.Series: +def simple_text_true_embeddings() -> "pd.Series[Any]": return pd.Series( [np.array([0.8619265, 0.3983395]), np.array([10.686009, 17.633106])], index=pd.Index(["1255-74899-0020", "7601-175351-0021"], name="task"), @@ -761,7 +762,7 @@ def data_with_bool_labels() -> pd.DataFrame: @pytest.fixture -def bool_labels_ground_truth() -> pd.Series: +def bool_labels_ground_truth() -> "pd.Series[Any]": return pd.Series( [True, True], index=pd.Index(["t1", "t2"], name="task"), name="agg_label" ) diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py index fc730e46..58604a58 100644 --- a/tests/datasets/test_datasets.py +++ b/tests/datasets/test_datasets.py @@ -14,7 +14,7 @@ def test_get_datasets_list() -> None: def collect_stats_for_dataset( - crowd_labels: pd.DataFrame, gt: pd.Series + crowd_labels: pd.DataFrame, gt: "pd.Series[Any]" ) -> Dict[str, Any]: return { "rows": len(crowd_labels), diff --git a/tests/learning/test_conal.py b/tests/learning/test_conal.py index 3959e941..4f75445a 100644 --- a/tests/learning/test_conal.py +++ b/tests/learning/test_conal.py @@ -21,6 +21,6 @@ def test_conal( out = conal(embeddings, toy_logits, toy_workers) assert out.shape == (3, 5) out.sum().backward() - assert conal.annotator_confusion_matrices.grad.shape == (2, 5, 5) # type: ignore - assert conal.common_confusion_matrix.grad.shape == (5, 5) # type: ignore + assert conal.annotator_confusion_matrices.grad.shape == (2, 5, 5) + assert conal.common_confusion_matrix.grad.shape == (5, 5) # I'm not sure that we can properly test the values of the output because the module is randomly initialized. diff --git a/tests/learning/test_crowdlayer.py b/tests/learning/test_crowdlayer.py index 4242b2bb..7f4b3b41 100644 --- a/tests/learning/test_crowdlayer.py +++ b/tests/learning/test_crowdlayer.py @@ -13,7 +13,7 @@ def test_crowdlayer_vb( assert out.shape == (3, 5) assert torch.allclose(out, toy_logits) out.sum().backward() - assert crowd_layer.weight.grad.shape == (2, 5) # type: ignore + assert crowd_layer.weight.grad.shape == (2, 5) def test_crowdlayer_vw( @@ -24,7 +24,7 @@ def test_crowdlayer_vw( assert out.shape == (3, 5) assert torch.allclose(out, toy_logits) out.sum().backward() - assert crowd_layer.weight.grad.shape == (2, 5) # type: ignore + assert crowd_layer.weight.grad.shape == (2, 5) def test_crowdlayer_vw_b( @@ -35,8 +35,8 @@ def test_crowdlayer_vw_b( assert out.shape == (3, 5) assert torch.allclose(out, toy_logits) out.sum().backward() - assert crowd_layer.scale.grad.shape == (2, 5) # type: ignore - assert crowd_layer.bias.grad.shape == (2, 5) # type: ignore + assert crowd_layer.scale.grad.shape == (2, 5) + assert crowd_layer.bias.grad.shape == (2, 5) def test_crowdlayer_mw( @@ -47,4 +47,4 @@ def test_crowdlayer_mw( assert out.shape == (3, 5) assert torch.allclose(out, toy_logits) out.sum().backward() - assert crowd_layer.weight.grad.shape == (2, 5, 5) # type: ignore + assert crowd_layer.weight.grad.shape == (2, 5, 5) diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py index 6d1d6ecc..bd4c0d0b 100644 --- a/tests/metrics/test_metrics.py +++ b/tests/metrics/test_metrics.py @@ -1,3 +1,5 @@ +from typing import Any + import numpy as np import pandas as pd import pytest @@ -60,7 +62,7 @@ def test_uncertainty_docstring_examples(self) -> None: == 1.0986122886681096 ) - np.testing.assert_allclose( # type: ignore + np.testing.assert_allclose( uncertainty( pd.DataFrame.from_records( [ @@ -77,7 +79,7 @@ def test_uncertainty_docstring_examples(self) -> None: atol=1e-3, ) - np.testing.assert_allclose( # type: ignore + np.testing.assert_allclose( uncertainty( pd.DataFrame.from_records( [ @@ -139,7 +141,7 @@ def test_uncertainty_per_worker(self) -> None: ) assert isinstance(entropies, pd.Series) - assert sorted(np.unique(entropies.index).tolist()) == ["A", "B", "C"] # type: ignore + assert sorted(np.unique(entropies.index).tolist()) == ["A", "B", "C"] # B always answers the same, entropy = 0 np.testing.assert_allclose(entropies["B"], 0, atol=1e-6) # type: ignore @@ -185,14 +187,14 @@ def test_uncertainty_per_task(self) -> None: ) assert isinstance(entropies, pd.Series) - assert sorted(np.unique(entropies.index).tolist()) == ["1", "2", "3", "4", "5"] # type: ignore + assert sorted(np.unique(entropies.index).tolist()) == ["1", "2", "3", "4", "5"] # Everybody answered same on tasks 2 and 4 np.testing.assert_allclose(entropies["2"], 0, atol=1e-6) # type: ignore np.testing.assert_allclose(entropies["4"], 0, atol=1e-6) # type: ignore # On tasks 1 and 3, 2 workers agreed and one answered differently - assert entropies["1"] > 0 + assert entropies["1"] > 0 # type: ignore np.testing.assert_allclose(entropies["1"], entropies["3"], atol=1e-6) # type: ignore # Complete disagreement on task 5, max possible entropy @@ -204,7 +206,7 @@ def test_uncertainty_per_task(self) -> None: def test_golden_set_accuracy( - toy_answers_df: pd.DataFrame, toy_gold_df: pd.Series + toy_answers_df: pd.DataFrame, toy_gold_df: "pd.Series[Any]" ) -> None: assert get_accuracy(toy_answers_df, toy_gold_df) == 5 / 9 assert get_accuracy(toy_answers_df, toy_gold_df, by="worker").equals( @@ -220,8 +222,9 @@ def test_accuracy_on_aggregates(toy_answers_df: pd.DataFrame) -> None: [0.6, 0.8, 1.0, 0.4, 0.8], index=pd.Index(["w1", "w2", "w3", "w4", "w5"], name="worker"), ) + assert toy_answers_df is not None, "no toy_answers_df" assert_series_equal( - accuracy_on_aggregates(toy_answers_df, by="worker"), expected_workers_accuracy + accuracy_on_aggregates(toy_answers_df, by="worker"), expected_workers_accuracy # type: ignore ) assert accuracy_on_aggregates(toy_answers_df) == 0.7083333333333334 diff --git a/tests/postprocessing/test_entropy_threshold.py b/tests/postprocessing/test_entropy_threshold.py index 5af9ccb8..e5566603 100644 --- a/tests/postprocessing/test_entropy_threshold.py +++ b/tests/postprocessing/test_entropy_threshold.py @@ -1,3 +1,5 @@ +from typing import Any + import pandas as pd import pytest @@ -96,7 +98,7 @@ def test_entropy_threshold_min_answers(self) -> None: assert "C" not in filtered_answers.worker.values def test_entropy_threshold_simple_answers( - self, simple_answers_df: pd.DataFrame, simple_ground_truth: pd.Series + self, simple_answers_df: pd.DataFrame, simple_ground_truth: "pd.Series[Any]" ) -> None: aggregated = MajorityVote().fit_predict(simple_answers_df) base_accuracy = sum(