diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b29917e..d5bd146 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,14 +3,14 @@ on:
   pull_request:
   push:
     branches:
-      - master
+      - '*'
 
 jobs:
   test:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.10, 3.11, 3.12]
         os: [ubuntu-latest, windows-latest]
 
     steps:
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..605c29c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+env/
+env*/
+__pycache__/
+*.pyc
+*.egg-info
\ No newline at end of file
diff --git a/LICENSE b/LICENSE.txt
similarity index 100%
rename from LICENSE
rename to LICENSE.txt
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..edfd68f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,82 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "string-grouper"
+dynamic = ["version"]
+description = "String grouper contains functions to do string matching using TF-IDF and the cossine similarity. Based on https://bergvca.github.io/2017/10/14/super-fast-string-matching.html"
+readme = "README.md"
+requires-python = ">3.10"
+authors = [
+    { name = "Chris van den Berg", email = "fake_email@gmail.com" },
+    { name = "Ruben Menke", email = "rum@bankingcircle.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+    "numpy",
+    "pandas>=0.25.3",
+    "scikit-learn",
+    "scipy",
+    "sparse_dot_topn>=1.1.1",
+]
+
+[project.urls]
+Homepage = "https://github.com/Bergvca/string_grouper"
+Documentation = "https://github.com/unknown/string-grouper#readme"
+Issues = "https://github.com/unknown/string-grouper/issues"
+Source = "https://github.com/unknown/string-grouper"
+
+[tool.hatch.version]
+path = "string_grouper/__init__.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+    "/string_grouper",
+]
+
+
+[tool.hatch.envs.test]
+dependencies = ["pytest", "pdbpp"]
+
+[tool.hatch.envs.test.scripts]
+tests = "python -m unittest {args}"
+base =  "pytest {args}"
+cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=diss --cov=api --cov=llm {args}"
+htmlcov = "pytest --cov-report=html --cov-config=pyproject.toml --cov=diss --cov=api --cov=llm {args}"
+xmlcov = "pytest --cov-report=xml --cov-config=pyproject.toml --cov=diss --cov=api --cov=llm {args}"
+code_check = "pre-commit run --all-files"
+
+[[tool.hatch.envs.test.matrix]]
+python = ["3.10", "3.11"]
+
+
+
+[tool.black]
+line-length = 120
+include = '\.pyi?$'
+exclude = '''
+/(
+        \.git
+| \.hg
+| \.mypy_cache
+| \.tox
+| \.venv
+| _build
+| buck-out
+| build
+)/
+'''
+
+[tool.isort]
+profile = "black"
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index ec4bbb1..0000000
--- a/setup.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from setuptools import setup
-import pathlib
-
-# The directory containing this file
-HERE = pathlib.Path(__file__).parent
-
-# The text of the README file
-README = (HERE / "README.md").read_text()
-
-setup(
-    name='string_grouper',
-    version='0.5.0',
-    packages=['string_grouper'],
-    license='MIT License',
-    description='String grouper contains functions to do string matching using TF-IDF and the cossine similarity. '
-                'Based on https://bergvca.github.io/2017/10/14/super-fast-string-matching.html',
-    author='Chris van den Berg',
-    long_description=README,
-    long_description_content_type="text/markdown",
-    author_email='fake_email@gmail.com',
-    url='https://github.com/Bergvca/string_grouper',
-    zip_safe=False,
-    python_requires='>3.7',
-    install_requires=['pandas>=0.25.3'
-                      , 'scipy'
-                      , 'scikit-learn'
-                      , 'numpy'
-                      , 'sparse_dot_topn>=0.3.1'
-                      ]
-)
diff --git a/string_grouper/__init__.py b/string_grouper/__init__.py
index 84e3abd..8eff16d 100644
--- a/string_grouper/__init__.py
+++ b/string_grouper/__init__.py
@@ -1,2 +1,9 @@
-from .string_grouper import compute_pairwise_similarities, group_similar_strings, match_most_similar, match_strings, \
-StringGrouperConfig, StringGrouper
+from .string_grouper import (
+    compute_pairwise_similarities,
+    group_similar_strings,
+    match_most_similar,
+    match_strings,
+    StringGrouperConfig,
+    StringGrouper,
+)
+__version__ = "0.5.0"
\ No newline at end of file
diff --git a/string_grouper/string_grouper.py b/string_grouper/string_grouper.py
index d161251..85d82fc 100644
--- a/string_grouper/string_grouper.py
+++ b/string_grouper/string_grouper.py
@@ -3,48 +3,64 @@
 import re
 import multiprocessing
 from sklearn.feature_extraction.text import TfidfVectorizer
-from scipy.sparse.csr import csr_matrix
-from scipy.sparse.lil import lil_matrix
+from scipy.sparse import csr_matrix
+from scipy.sparse import lil_matrix
 from scipy.sparse.csgraph import connected_components
 from typing import Tuple, NamedTuple, List, Optional, Union
-from sparse_dot_topn import awesome_cossim_topn
+from sparse_dot_topn import sp_matmul_topn
 from functools import wraps
 
 DEFAULT_NGRAM_SIZE: int = 3
-DEFAULT_TFIDF_MATRIX_DTYPE: type = np.float32   # (only types np.float32 and np.float64 are allowed by sparse_dot_topn)
-DEFAULT_REGEX: str = r'[,-./]|\s'
+DEFAULT_TFIDF_MATRIX_DTYPE: type = (
+    np.float64
+)  # (only types np.float32 and np.float64 are allowed by sparse_dot_topn)
+DEFAULT_REGEX: str = r"[,-./]|\s"
 DEFAULT_MAX_N_MATCHES: int = 20
-DEFAULT_MIN_SIMILARITY: float = 0.8  # minimum cosine similarity for an item to be considered a match
+DEFAULT_MIN_SIMILARITY: float = (
+    0.8  # minimum cosine similarity for an item to be considered a match
+)
 DEFAULT_N_PROCESSES: int = multiprocessing.cpu_count() - 1
 DEFAULT_IGNORE_CASE: bool = True  # ignores case by default
 DEFAULT_DROP_INDEX: bool = False  # includes index-columns in output
-DEFAULT_REPLACE_NA: bool = False    # when finding the most similar strings, does not replace NaN values in most
+DEFAULT_REPLACE_NA: bool = (
+    False  # when finding the most similar strings, does not replace NaN values in most
+)
 # similar string index-columns with corresponding duplicates-index values
-DEFAULT_INCLUDE_ZEROES: bool = True  # when the minimum cosine similarity <=0, determines whether zero-similarity
+DEFAULT_INCLUDE_ZEROES: bool = (
+    True  # when the minimum cosine similarity <=0, determines whether zero-similarity
+)
 # matches appear in the output
-GROUP_REP_CENTROID: str = 'centroid'    # Option value to select the string in each group with the largest
+GROUP_REP_CENTROID: str = (
+    "centroid"  # Option value to select the string in each group with the largest
+)
 # similarity aggregate as group-representative:
-GROUP_REP_FIRST: str = 'first'  # Option value to select the first string in each group as group-representative:
-DEFAULT_GROUP_REP: str = GROUP_REP_CENTROID  # chooses group centroid as group-representative by default
+GROUP_REP_FIRST: str = "first"  # Option value to select the first string in each group as group-representative:
+DEFAULT_GROUP_REP: str = (
+    GROUP_REP_CENTROID  # chooses group centroid as group-representative by default
+)
 
 # The following string constants are used by (but aren't [yet] options passed to) StringGrouper
-DEFAULT_COLUMN_NAME: str = 'side'   # used to name non-index columns of the output of StringGrouper.get_matches
-DEFAULT_ID_NAME: str = 'id'  # used to name id-columns in the output of StringGrouper.get_matches
-LEFT_PREFIX: str = 'left_'  # used to prefix columns on the left of the output of StringGrouper.get_matches
-RIGHT_PREFIX: str = 'right_'    # used to prefix columns on the right of the output of StringGrouper.get_matches
-MOST_SIMILAR_PREFIX: str = 'most_similar_'  # used to prefix columns of the output of
+DEFAULT_COLUMN_NAME: str = (
+    "side"  # used to name non-index columns of the output of StringGrouper.get_matches
+)
+DEFAULT_ID_NAME: str = (
+    "id"  # used to name id-columns in the output of StringGrouper.get_matches
+)
+LEFT_PREFIX: str = "left_"  # used to prefix columns on the left of the output of StringGrouper.get_matches
+RIGHT_PREFIX: str = "right_"  # used to prefix columns on the right of the output of StringGrouper.get_matches
+MOST_SIMILAR_PREFIX: str = "most_similar_"  # used to prefix columns of the output of
 # StringGrouper._get_nearest_matches
-DEFAULT_MASTER_NAME: str = 'master'  # used to name non-index column of the output of StringGrouper.get_nearest_matches
-DEFAULT_MASTER_ID_NAME: str = f'{DEFAULT_MASTER_NAME}_{DEFAULT_ID_NAME}'    # used to name id-column of the output of
+DEFAULT_MASTER_NAME: str = "master"  # used to name non-index column of the output of StringGrouper.get_nearest_matches
+DEFAULT_MASTER_ID_NAME: str = f"{DEFAULT_MASTER_NAME}_{DEFAULT_ID_NAME}"  # used to name id-column of the output of
 # StringGrouper.get_nearest_matches
-GROUP_REP_PREFIX: str = 'group_rep_'    # used to prefix and name columns of the output of StringGrouper._deduplicate
+GROUP_REP_PREFIX: str = "group_rep_"  # used to prefix and name columns of the output of StringGrouper._deduplicate
 
 # High level functions
 
 
-def compute_pairwise_similarities(string_series_1: pd.Series,
-                                  string_series_2: pd.Series,
-                                  **kwargs) -> pd.Series:
+def compute_pairwise_similarities(
+    string_series_1: pd.Series, string_series_2: pd.Series, **kwargs
+) -> pd.Series:
     """
     Computes the similarity scores between two Series of strings row-wise.
 
@@ -56,9 +72,9 @@ def compute_pairwise_similarities(string_series_1: pd.Series,
     return StringGrouper(string_series_1, string_series_2, **kwargs).dot()
 
 
-def group_similar_strings(strings_to_group: pd.Series,
-                          string_ids: Optional[pd.Series] = None,
-                          **kwargs) -> Union[pd.DataFrame, pd.Series]:
+def group_similar_strings(
+    strings_to_group: pd.Series, string_ids: Optional[pd.Series] = None, **kwargs
+) -> Union[pd.DataFrame, pd.Series]:
     """
     If 'string_ids' is not given, finds all similar strings in 'strings_to_group' and returns a Series of
     strings of the same length as 'strings_to_group'. For each group of similar strings a single string
@@ -74,15 +90,19 @@ def group_similar_strings(strings_to_group: pd.Series,
     :param kwargs: All other keyword arguments are passed to StringGrouperConfig. (Optional)
     :return: pandas.Series or pandas.DataFrame.
     """
-    string_grouper = StringGrouper(strings_to_group, master_id=string_ids, **kwargs).fit()
+    string_grouper = StringGrouper(
+        strings_to_group, master_id=string_ids, **kwargs
+    ).fit()
     return string_grouper.get_groups()
 
 
-def match_most_similar(master: pd.Series,
-                       duplicates: pd.Series,
-                       master_id: Optional[pd.Series] = None,
-                       duplicates_id: Optional[pd.Series] = None,
-                       **kwargs) -> Union[pd.DataFrame, pd.Series]:
+def match_most_similar(
+    master: pd.Series,
+    duplicates: pd.Series,
+    master_id: Optional[pd.Series] = None,
+    duplicates_id: Optional[pd.Series] = None,
+    **kwargs,
+) -> Union[pd.DataFrame, pd.Series]:
     """
     If no IDs ('master_id' and 'duplicates_id') are given, returns a Series of strings of the same length
     as 'duplicates' where for each string in duplicates the most similar string in 'master' is returned.
@@ -103,19 +123,23 @@ def match_most_similar(master: pd.Series,
     :param kwargs: All other keyword arguments are passed to StringGrouperConfig. (Optional)
     :return: pandas.Series or pandas.DataFrame.
     """
-    string_grouper = StringGrouper(master,
-                                   duplicates=duplicates,
-                                   master_id=master_id,
-                                   duplicates_id=duplicates_id,
-                                   **kwargs).fit()
+    string_grouper = StringGrouper(
+        master,
+        duplicates=duplicates,
+        master_id=master_id,
+        duplicates_id=duplicates_id,
+        **kwargs,
+    ).fit()
     return string_grouper.get_groups()
 
 
-def match_strings(master: pd.Series,
-                  duplicates: Optional[pd.Series] = None,
-                  master_id: Optional[pd.Series] = None,
-                  duplicates_id: Optional[pd.Series] = None,
-                  **kwargs) -> pd.DataFrame:
+def match_strings(
+    master: pd.Series,
+    duplicates: Optional[pd.Series] = None,
+    master_id: Optional[pd.Series] = None,
+    duplicates_id: Optional[pd.Series] = None,
+    **kwargs,
+) -> pd.DataFrame:
     """
     Returns all highly similar strings. If only 'master' is given, it will return highly similar strings within master.
     This can be seen as an self-join. If both master and duplicates is given, it will return highly similar strings
@@ -128,11 +152,13 @@ def match_strings(master: pd.Series,
     :param kwargs: All other keyword arguments are passed to StringGrouperConfig.
     :return: pandas.Dataframe.
     """
-    string_grouper = StringGrouper(master,
-                                   duplicates=duplicates,
-                                   master_id=master_id,
-                                   duplicates_id=duplicates_id,
-                                   **kwargs).fit()
+    string_grouper = StringGrouper(
+        master,
+        duplicates=duplicates,
+        master_id=master_id,
+        duplicates_id=duplicates_id,
+        **kwargs,
+    ).fit()
     return string_grouper.get_matches()
 
 
@@ -182,24 +208,30 @@ def wrapper(*args, **kwargs):
         if args[0].is_build:
             return f(*args, **kwargs)
         else:
-            raise StringGrouperNotFitException(f'{f.__name__} was called before the "fit" function was called.'
-                                               f' Make sure to run fit the StringGrouper first using '
-                                               f'StringGrouper.fit()')
+            raise StringGrouperNotFitException(
+                f'{f.__name__} was called before the "fit" function was called.'
+                f" Make sure to run fit the StringGrouper first using "
+                f"StringGrouper.fit()"
+            )
 
     return wrapper
 
 
 class StringGrouperNotFitException(Exception):
     """Raised when one of the public functions is called which requires the StringGrouper to be fit first"""
+
     pass
 
 
 class StringGrouper(object):
-    def __init__(self, master: pd.Series,
-                 duplicates: Optional[pd.Series] = None,
-                 master_id: Optional[pd.Series] = None,
-                 duplicates_id: Optional[pd.Series] = None,
-                 **kwargs):
+    def __init__(
+        self,
+        master: pd.Series,
+        duplicates: Optional[pd.Series] = None,
+        master_id: Optional[pd.Series] = None,
+        duplicates_id: Optional[pd.Series] = None,
+        **kwargs,
+    ):
         """
         StringGrouper is a class that holds the matrix with cosine similarities between the master and duplicates
         matrix. If duplicates is not given it is replaced by master. To build this matrix the `fit` function must be
@@ -213,22 +245,32 @@ def __init__(self, master: pd.Series,
         :param kwargs: All other keyword arguments are passed to StringGrouperConfig
         """
         # Validate match strings input
-        if not StringGrouper._is_series_of_strings(master) or \
-                (duplicates is not None and not StringGrouper._is_series_of_strings(duplicates)):
-            raise TypeError('Input does not consist of pandas.Series containing only Strings')
+        if not StringGrouper._is_series_of_strings(master) or (
+            duplicates is not None
+            and not StringGrouper._is_series_of_strings(duplicates)
+        ):
+            raise TypeError(
+                "Input does not consist of pandas.Series containing only Strings"
+            )
         # Validate optional IDs input
-        if not StringGrouper._is_input_data_combination_valid(duplicates, master_id, duplicates_id):
-            raise Exception('List of data Series options is invalid')
+        if not StringGrouper._is_input_data_combination_valid(
+            duplicates, master_id, duplicates_id
+        ):
+            raise Exception("List of data Series options is invalid")
         StringGrouper._validate_id_data(master, duplicates, master_id, duplicates_id)
 
         self._master: pd.Series = master
         self._duplicates: pd.Series = duplicates if duplicates is not None else None
         self._master_id: pd.Series = master_id if master_id is not None else None
-        self._duplicates_id: pd.Series = duplicates_id if duplicates_id is not None else None
+        self._duplicates_id: pd.Series = (
+            duplicates_id if duplicates_id is not None else None
+        )
 
         self._config: StringGrouperConfig = StringGrouperConfig(**kwargs)
         if self._config.max_n_matches is None:
-            self._max_n_matches = len(self._master) if self._duplicates is None else len(self._duplicates)
+            self._max_n_matches = (
+                len(self._master) if self._duplicates is None else len(self._duplicates)
+            )
         else:
             self._max_n_matches = self._config.max_n_matches
 
@@ -236,7 +278,9 @@ def __init__(self, master: pd.Series,
         self._validate_tfidf_matrix_dtype()
         self._validate_replace_na_and_drop()
         self.is_build = False  # indicates if the grouper was fit or not
-        self._vectorizer = TfidfVectorizer(min_df=1, analyzer=self.n_grams, dtype=self._config.tfidf_matrix_dtype)
+        self._vectorizer = TfidfVectorizer(
+            min_df=1, analyzer=self.n_grams, dtype=self._config.tfidf_matrix_dtype
+        )
         # After the StringGrouper is built, _matches_list will contain the indices and similarities of the matches
         self._matches_list: pd.DataFrame = pd.DataFrame()
         # _true_max_n_matches will contain the true maximum number of matches over all strings in master if
@@ -252,26 +296,27 @@ def n_grams(self, string: str) -> List[str]:
         regex_pattern = self._config.regex
         if self._config.ignore_case and string is not None:
             string = string.lower()  # lowercase to ignore all case
-        string = re.sub(regex_pattern, r'', string)
+        string = re.sub(regex_pattern, r"", string)
         n_grams = zip(*[string[i:] for i in range(ngram_size)])
-        return [''.join(n_gram) for n_gram in n_grams]
+        return ["".join(n_gram) for n_gram in n_grams]
 
-    def fit(self) -> 'StringGrouper':
+    def fit(self) -> "StringGrouper":
         """Builds the _matches list which contains string matches indices and similarity"""
         master_matrix, duplicate_matrix = self._get_tf_idf_matrices()
 
         # Calculate the matches using the cosine similarity
-        matches, self._true_max_n_matches = self._build_matches(master_matrix, duplicate_matrix)
+        matches = self._build_matches(
+            master_matrix, duplicate_matrix
+        )
+        self._true_max_n_matches  = np.diff(matches.indptr).max()
 
         if self._duplicates is None:
             # convert to lil format for best efficiency when setting matrix-elements
             matches = matches.tolil()
             # matrix diagonal elements must be exactly 1 (numerical precision errors introduced by
-            # floating-point computations in awesome_cossim_topn sometimes lead to unexpected results)
+            # floating-point computations in sp_matmul_topn sometimes lead to unexpected results)
             matches = StringGrouper._fix_diagonal(matches)
-            if self._max_n_matches < self._true_max_n_matches:
-                # the list of matches must be symmetric! (i.e., if A != B and A matches B; then B matches A)
-                matches = StringGrouper._symmetrize_matrix(matches)
+            matches = StringGrouper._symmetrize_matrix(matches)
             matches = matches.tocsr()
 
         # build list from matrix
@@ -282,16 +327,22 @@ def fit(self) -> 'StringGrouper':
     def dot(self) -> pd.Series:
         """Computes the row-wise similarity scores between strings in _master and _duplicates"""
         if len(self._master) != len(self._duplicates):
-            raise Exception("To perform this function, both input Series must have the same length.")
+            raise Exception(
+                "To perform this function, both input Series must have the same length."
+            )
         master_matrix, duplicate_matrix = self._get_tf_idf_matrices()
         # Calculate pairwise cosine similarities:
-        pairwise_similarities = np.asarray(master_matrix.multiply(duplicate_matrix).sum(axis=1)).squeeze(axis=1)
-        return pd.Series(pairwise_similarities, name='similarity', index=self._master.index)
+        pairwise_similarities = np.asarray(
+            master_matrix.multiply(duplicate_matrix).sum(axis=1)
+        ).squeeze(axis=1)
+        return pd.Series(
+            pairwise_similarities, name="similarity", index=self._master.index
+        )
 
     @validate_is_fit
-    def get_matches(self,
-                    ignore_index: Optional[bool] = None,
-                    include_zeroes: Optional[bool] = None) -> pd.DataFrame:
+    def get_matches(
+        self, ignore_index: Optional[bool] = None, include_zeroes: Optional[bool] = None
+    ) -> pd.DataFrame:
         """
         Returns a DataFrame with all the matches and their cosine similarity.
         If optional IDs are used, returned as extra columns with IDs matched to respective data rows
@@ -301,10 +352,13 @@ def get_matches(self,
         :param include_zeroes: when the minimum cosine similarity <=0, determines whether zero-similarity matches
         appear in the output.  Defaults to self._config.include_zeroes.
         """
-        def get_both_sides(master: pd.Series,
-                           duplicates: pd.Series,
-                           generic_name=(DEFAULT_COLUMN_NAME, DEFAULT_COLUMN_NAME),
-                           drop_index=False):
+
+        def get_both_sides(
+            master: pd.Series,
+            duplicates: pd.Series,
+            generic_name=(DEFAULT_COLUMN_NAME, DEFAULT_COLUMN_NAME),
+            drop_index=False,
+        ):
             lname, rname = generic_name
             left = master if master.name else master.rename(lname)
             left = left.iloc[matches_list.master_side].reset_index(drop=drop_index)
@@ -313,7 +367,9 @@ def get_both_sides(master: pd.Series,
             else:
                 right = duplicates if duplicates.name else duplicates.rename(rname)
             right = right.iloc[matches_list.dupe_side].reset_index(drop=drop_index)
-            return left, (right if isinstance(right, pd.Series) else right[right.columns[::-1]])
+            return left, (
+                right if isinstance(right, pd.Series) else right[right.columns[::-1]]
+            )
 
         def prefix_column_names(data: Union[pd.Series, pd.DataFrame], prefix: str):
             if isinstance(data, pd.DataFrame):
@@ -332,26 +388,33 @@ def prefix_column_names(data: Union[pd.Series, pd.DataFrame], prefix: str):
             # the fix includes zero-similarity matches that are missing by default
             # in _matches_list due to our use of sparse matrices
             non_matches_list = self._get_non_matches_list()
-            matches_list = self._matches_list if non_matches_list.empty else \
-                pd.concat([self._matches_list, non_matches_list], axis=0, ignore_index=True)
+            matches_list = (
+                self._matches_list
+                if non_matches_list.empty
+                else pd.concat(
+                    [self._matches_list, non_matches_list], axis=0, ignore_index=True
+                )
+            )
 
-        left_side, right_side = get_both_sides(self._master, self._duplicates, drop_index=ignore_index)
+        left_side, right_side = get_both_sides(
+            self._master, self._duplicates, drop_index=ignore_index
+        )
         similarity = matches_list.similarity.reset_index(drop=True)
         if self._master_id is None:
             return pd.concat(
                 [
                     prefix_column_names(left_side, LEFT_PREFIX),
                     similarity,
-                    prefix_column_names(right_side, RIGHT_PREFIX)
+                    prefix_column_names(right_side, RIGHT_PREFIX),
                 ],
-                axis=1
+                axis=1,
             )
         else:
             left_side_id, right_side_id = get_both_sides(
                 self._master_id,
                 self._duplicates_id,
                 (DEFAULT_ID_NAME, DEFAULT_ID_NAME),
-                drop_index=True
+                drop_index=True,
             )
             return pd.concat(
                 [
@@ -359,15 +422,15 @@ def prefix_column_names(data: Union[pd.Series, pd.DataFrame], prefix: str):
                     prefix_column_names(left_side_id, LEFT_PREFIX),
                     similarity,
                     prefix_column_names(right_side_id, RIGHT_PREFIX),
-                    prefix_column_names(right_side, RIGHT_PREFIX)
+                    prefix_column_names(right_side, RIGHT_PREFIX),
                 ],
-                axis=1
+                axis=1,
             )
 
     @validate_is_fit
-    def get_groups(self,
-                   ignore_index: Optional[bool] = None,
-                   replace_na: Optional[bool] = None) -> Union[pd.DataFrame, pd.Series]:
+    def get_groups(
+        self, ignore_index: Optional[bool] = None, replace_na: Optional[bool] = None
+    ) -> Union[pd.DataFrame, pd.Series]:
         """If there is only a master Series of strings, this will return a Series of 'master' strings.
          A single string in a group of near duplicates is chosen as 'master' and is returned for each string
          in the master Series.
@@ -380,7 +443,7 @@ def get_groups(self,
         self._config.ignore_index.
         :param replace_na: whether or not to replace NaN values in most similar string index-columns with
         corresponding duplicates-index values. Defaults to self._config.replace_na.
-         """
+        """
         if ignore_index is None:
             ignore_index = self._config.ignore_index
         if self._duplicates is None:
@@ -388,33 +451,41 @@ def get_groups(self,
         else:
             if replace_na is None:
                 replace_na = self._config.replace_na
-            return self._get_nearest_matches(ignore_index=ignore_index, replace_na=replace_na)
+            return self._get_nearest_matches(
+                ignore_index=ignore_index, replace_na=replace_na
+            )
 
     @validate_is_fit
-    def add_match(self, master_side: str, dupe_side: str) -> 'StringGrouper':
+    def add_match(self, master_side: str, dupe_side: str) -> "StringGrouper":
         """Adds a match if it wasn't found by the fit function"""
         master_indices, dupe_indices = self._get_indices_of(master_side, dupe_side)
 
         # add prior matches to new match
-        prior_matches = self._matches_list.master_side[self._matches_list.dupe_side.isin(dupe_indices)]
-        dupe_indices = dupe_indices.append(prior_matches)
+        prior_matches = self._matches_list.master_side[
+            self._matches_list.dupe_side.isin(dupe_indices)
+        ]
+        dupe_indices = dupe_indices._append(prior_matches)
         dupe_indices.drop_duplicates(inplace=True)
 
         similarities = [1]
 
         # cross join the indices
-        new_matches = StringGrouper._cross_join(dupe_indices, master_indices, similarities)
+        new_matches = StringGrouper._cross_join(
+            dupe_indices, master_indices, similarities
+        )
         # If we are de-duping within one Series, we need to make sure the matches stay symmetric
         if self._duplicates is None:
             new_matches = StringGrouper._make_symmetric(new_matches)
         # update the matches
-        self._matches_list = pd.concat([self._matches_list.drop_duplicates(), new_matches], ignore_index=True)
+        self._matches_list = pd.concat(
+            [self._matches_list.drop_duplicates(), new_matches], ignore_index=True
+        )
 
         return self
 
     @validate_is_fit
-    def remove_match(self, master_side: str, dupe_side: str) -> 'StringGrouper':
-        """ Removes a match from the StringGrouper"""
+    def remove_match(self, master_side: str, dupe_side: str) -> "StringGrouper":
+        """Removes a match from the StringGrouper"""
         master_indices, dupe_indices = self._get_indices_of(master_side, dupe_side)
         # In the case of having only a master series, we need to remove both the master - dupe match
         # and the dupe - master match:
@@ -424,9 +495,10 @@ def remove_match(self, master_side: str, dupe_side: str) -> 'StringGrouper':
 
         self._matches_list = self._matches_list[
             ~(
-                    (self._matches_list.master_side.isin(master_indices)) &
-                    (self._matches_list.dupe_side.isin(dupe_indices))
-            )]
+                (self._matches_list.master_side.isin(master_indices))
+                & (self._matches_list.dupe_side.isin(dupe_indices))
+            )
+        ]
         return self
 
     def _get_tf_idf_matrices(self) -> Tuple[csr_matrix, csr_matrix]:
@@ -453,106 +525,165 @@ def _fit_vectorizer(self) -> TfidfVectorizer:
         self._vectorizer.fit(strings)
         return self._vectorizer
 
-    def _build_matches(self, master_matrix: csr_matrix, duplicate_matrix: csr_matrix) -> csr_matrix:
+    def _build_matches(
+        self, master_matrix: csr_matrix, duplicate_matrix: csr_matrix
+    ) -> csr_matrix:
         """Builds the cossine similarity matrix of two csr matrices"""
         tf_idf_matrix_1 = master_matrix
         tf_idf_matrix_2 = duplicate_matrix.transpose()
 
-        optional_kwargs = {
-            'return_best_ntop': True,
-            'use_threads': self._config.number_of_processes > 1,
-            'n_jobs': self._config.number_of_processes
-        }
-
-        return awesome_cossim_topn(
-            tf_idf_matrix_1, tf_idf_matrix_2,
-            self._max_n_matches,
-            self._config.min_similarity,
-            **optional_kwargs
+
+        return sp_matmul_topn(
+            tf_idf_matrix_1,
+            tf_idf_matrix_2,
+            top_n=self._max_n_matches,
+            threshold=self._config.min_similarity,
+            sort=True,
         )
 
     def _get_non_matches_list(self) -> pd.DataFrame:
         """Returns a list of all the indices of non-matching pairs (with similarity set to 0)"""
-        m_sz, d_sz = len(self._master), len(self._master if self._duplicates is None else self._duplicates)
-        all_pairs = pd.MultiIndex.from_product([range(m_sz), range(d_sz)], names=['master_side', 'dupe_side'])
-        matched_pairs = pd.MultiIndex.from_frame(self._matches_list[['master_side', 'dupe_side']])
+        m_sz, d_sz = len(self._master), len(
+            self._master if self._duplicates is None else self._duplicates
+        )
+        all_pairs = pd.MultiIndex.from_product(
+            [range(m_sz), range(d_sz)], names=["master_side", "dupe_side"]
+        )
+        matched_pairs = pd.MultiIndex.from_frame(
+            self._matches_list[["master_side", "dupe_side"]]
+        )
         missing_pairs = all_pairs.difference(matched_pairs)
         if missing_pairs.empty:
             return pd.DataFrame()
-        if (self._max_n_matches < self._true_max_n_matches):
-            raise Exception(f'\nERROR: Cannot return zero-similarity matches since \n'
-                            f'\t\t max_n_matches={self._max_n_matches} is too small!\n'
-                            f'\t\t Try setting max_n_matches={self._true_max_n_matches} (the \n'
-                            f'\t\t true maximum number of matches over all strings in master)\n'
-                            f'\t\t or greater or do not set this kwarg at all.')
+        if self._max_n_matches < self._true_max_n_matches:
+            raise Exception(
+                f"\nERROR: Cannot return zero-similarity matches since \n"
+                f"\t\t max_n_matches={self._max_n_matches} is too small!\n"
+                f"\t\t Try setting max_n_matches={self._true_max_n_matches} (the \n"
+                f"\t\t true maximum number of matches over all strings in master)\n"
+                f"\t\t or greater or do not set this kwarg at all."
+            )
         missing_pairs = missing_pairs.to_frame(index=False)
-        missing_pairs['similarity'] = 0
+        missing_pairs["similarity"] = 0
         return missing_pairs
 
-    def _get_nearest_matches(self,
-                             ignore_index=False,
-                             replace_na=False) -> Union[pd.DataFrame, pd.Series]:
+    def _get_nearest_matches(
+        self, ignore_index=False, replace_na=False
+    ) -> Union[pd.DataFrame, pd.Series]:
         prefix = MOST_SIMILAR_PREFIX
-        master_label = f'{prefix}{self._master.name if self._master.name else DEFAULT_MASTER_NAME}'
+        master_label = (
+            f"{prefix}{self._master.name if self._master.name else DEFAULT_MASTER_NAME}"
+        )
         master = self._master.rename(master_label).reset_index(drop=ignore_index)
-        dupes = self._duplicates.rename('duplicates').reset_index(drop=ignore_index)
+        dupes = self._duplicates.rename("duplicates").reset_index(drop=ignore_index)
 
         # Rename new master-columns to avoid possible conflict with new dupes-columns when later merging
         if isinstance(dupes, pd.DataFrame):
             master.rename(
-                columns={col: f'{prefix}{col}' for col in master.columns if str(col) != master_label},
-                inplace=True
+                columns={
+                    col: f"{prefix}{col}"
+                    for col in master.columns
+                    if str(col) != master_label
+                },
+                inplace=True,
             )
 
         if self._master_id is not None:
-            master_id_label = f'{prefix}{self._master_id.name if self._master_id.name else DEFAULT_MASTER_ID_NAME}'
-            master = pd.concat([master, self._master_id.rename(master_id_label).reset_index(drop=True)], axis=1)
-            dupes = pd.concat([dupes, self._duplicates_id.rename('duplicates_id').reset_index(drop=True)], axis=1)
+            master_id_label = f"{prefix}{self._master_id.name if self._master_id.name else DEFAULT_MASTER_ID_NAME}"
+            master = pd.concat(
+                [
+                    master,
+                    self._master_id.rename(master_id_label).reset_index(drop=True),
+                ],
+                axis=1,
+            )
+            dupes = pd.concat(
+                [
+                    dupes,
+                    self._duplicates_id.rename("duplicates_id").reset_index(drop=True),
+                ],
+                axis=1,
+            )
 
-        dupes_max_sim = self._matches_list.groupby('dupe_side').agg({'similarity': 'max'}).reset_index()
-        dupes_max_sim = dupes_max_sim.merge(self._matches_list, on=['dupe_side', 'similarity'])
+        dupes_max_sim = (
+            self._matches_list.groupby("dupe_side")
+            .agg({"similarity": "max"})
+            .reset_index()
+        )
+        dupes_max_sim = dupes_max_sim.merge(
+            self._matches_list, on=["dupe_side", "similarity"]
+        )
 
         # In case there are multiple equal similarities, we pick the one that comes first
-        dupes_max_sim = dupes_max_sim.groupby(['dupe_side']).agg({'master_side': 'min'}).reset_index()
+        dupes_max_sim = (
+            dupes_max_sim.groupby(["dupe_side"])
+            .agg({"master_side": "min"})
+            .reset_index()
+        )
 
         # First we add the duplicate strings
-        dupes_max_sim = dupes_max_sim.merge(dupes, left_on='dupe_side', right_index=True, how='outer')
+        dupes_max_sim = dupes_max_sim.merge(
+            dupes, left_on="dupe_side", right_index=True, how="outer"
+        )
 
         # Now add the master strings
-        dupes_max_sim = dupes_max_sim.merge(master, left_on='master_side', right_index=True, how='left')
+        dupes_max_sim = dupes_max_sim.merge(
+            master, left_on="master_side", right_index=True, how="left"
+        )
 
         # Update the master-series with the duplicates in cases were there is no match
         rows_to_update = dupes_max_sim[master_label].isnull()
-        dupes_max_sim.loc[rows_to_update, master_label] = dupes_max_sim[rows_to_update].duplicates
+        dupes_max_sim.loc[rows_to_update, master_label] = dupes_max_sim[
+            rows_to_update
+        ].duplicates
         if self._master_id is not None:
             # Also update the master_id-series with the duplicates_id in cases were there is no match
-            dupes_max_sim.loc[rows_to_update, master_id_label] = dupes_max_sim[rows_to_update].duplicates_id
+            dupes_max_sim.loc[rows_to_update, master_id_label] = dupes_max_sim[
+                rows_to_update
+            ].duplicates_id
 
             # For some weird reason, pandas' merge function changes int-datatype columns to float when NaN values
             # appear within them. So here we change them back to their original datatypes if possible:
-            if dupes_max_sim[master_id_label].dtype != self._master_id.dtype and \
-                    self._duplicates_id.dtype == self._master_id.dtype:
-                dupes_max_sim.loc[:, master_id_label] = \
-                    dupes_max_sim.loc[:, master_id_label].astype(self._master_id.dtype)
+            if (
+                dupes_max_sim[master_id_label].dtype != self._master_id.dtype
+                and self._duplicates_id.dtype == self._master_id.dtype
+            ):
+                dupes_max_sim.loc[:, master_id_label] = dupes_max_sim.loc[
+                    :, master_id_label
+                ].astype(self._master_id.dtype)
 
         # Prepare the output:
-        required_column_list = [master_label] if self._master_id is None else [master_id_label, master_label]
-        index_column_list = \
-            [col for col in master.columns if col not in required_column_list] \
-            if isinstance(master, pd.DataFrame) else []
+        required_column_list = (
+            [master_label]
+            if self._master_id is None
+            else [master_id_label, master_label]
+        )
+        index_column_list = (
+            [col for col in master.columns if col not in required_column_list]
+            if isinstance(master, pd.DataFrame)
+            else []
+        )
         if replace_na:
             # Update the master index-columns with the duplicates index-column values in cases were there is no match
-            dupes_index_columns = [col for col in dupes.columns if str(col) != 'duplicates']
-            dupes_max_sim.loc[rows_to_update, index_column_list] = \
-                dupes_max_sim.loc[rows_to_update, dupes_index_columns].values
+            dupes_index_columns = [
+                col for col in dupes.columns if str(col) != "duplicates"
+            ]
+            dupes_max_sim.loc[rows_to_update, index_column_list] = dupes_max_sim.loc[
+                rows_to_update, dupes_index_columns
+            ].values
 
             # Restore their original datatypes if possible:
             for m, d in zip(index_column_list, dupes_index_columns):
-                if dupes_max_sim[m].dtype != master[m].dtype and dupes[d].dtype == master[m].dtype:
-                    dupes_max_sim.loc[:, m] = dupes_max_sim.loc[:, m].astype(master[m].dtype)
+                if (
+                    dupes_max_sim[m].dtype != master[m].dtype
+                    and dupes[d].dtype == master[m].dtype
+                ):
+                    dupes_max_sim.loc[:, m] = dupes_max_sim.loc[:, m].astype(
+                        master[m].dtype
+                    )
 
         # Make sure to keep same order as duplicates
-        dupes_max_sim = dupes_max_sim.sort_values('dupe_side').set_index('dupe_side')
+        dupes_max_sim = dupes_max_sim.sort_values("dupe_side").set_index("dupe_side")
         output = dupes_max_sim[index_column_list + required_column_list]
         output.index = self._duplicates.index
         return output.squeeze(axis=1)
@@ -564,13 +695,13 @@ def _deduplicate(self, ignore_index=False) -> Union[pd.DataFrame, pd.Series]:
         graph = csr_matrix(
             (
                 np.full(len(pairs), 1),
-                (pairs.master_side.to_numpy(), pairs.dupe_side.to_numpy())
+                (pairs.master_side.to_numpy(), pairs.dupe_side.to_numpy()),
             ),
-            shape=(n, n)
+            shape=(n, n),
         )
         # apply scipy.csgraph's clustering algorithm (result is a 1D numpy array of length n):
         _, groups = connected_components(csgraph=graph, directed=True)
-        group_of_master_index = pd.Series(groups, name='raw_group_id')
+        group_of_master_index = pd.Series(groups, name="raw_group_id")
 
         # merge groups with string indices to obtain two-column DataFrame:
         # note: the following line automatically creates a new column named 'index' with the corresponding indices:
@@ -578,48 +709,73 @@ def _deduplicate(self, ignore_index=False) -> Union[pd.DataFrame, pd.Series]:
 
         # Determine weights for obtaining group representatives:
         # 1. option-setting group_rep='first':
-        group_of_master_index.rename(columns={'index': 'weight'}, inplace=True)
-        method = 'first'
+        group_of_master_index.rename(columns={"index": "weight"}, inplace=True)
+        method = "first"
         # 2. option-setting group_rep='centroid':
         if self._config.group_rep == GROUP_REP_CENTROID:
             # reuse the adjacency matrix built above (change the 1's to corresponding cosine similarities):
-            graph.data = pairs['similarity'].to_numpy()
+            graph.data = pairs["similarity"].to_numpy()
             # sum along the rows to obtain numpy 1D matrix of similarity aggregates then ...
             # ... convert to 1D numpy array (using asarray then squeeze) and then to Series:
-            group_of_master_index['weight'] = pd.Series(np.asarray(graph.sum(axis=1)).squeeze(axis=1))
-            method = 'idxmax'
+            group_of_master_index["weight"] = pd.Series(
+                np.asarray(graph.sum(axis=1)).squeeze(axis=1)
+            )
+            method = "idxmax"
 
         # Determine the group representatives AND merge with indices:
         # pandas groupby transform function and enlargement enable both respectively in one step:
-        group_of_master_index['group_rep'] = \
-            group_of_master_index.groupby('raw_group_id', sort=False)['weight'].transform(method)
+        group_of_master_index["group_rep"] = group_of_master_index.groupby(
+            "raw_group_id", sort=False
+        )["weight"].transform(method)
 
         # Prepare the output:
         prefix = GROUP_REP_PREFIX
-        label = f'{prefix}{self._master.name}' if self._master.name else prefix[:-1]
+        label = f"{prefix}{self._master.name}" if self._master.name else prefix[:-1]
         # use group rep indexes obtained in the last step above to select the corresponding strings:
-        output = self._master.iloc[group_of_master_index.group_rep].rename(label).reset_index(drop=ignore_index)
+        output = (
+            self._master.iloc[group_of_master_index.group_rep]
+            .rename(label)
+            .reset_index(drop=ignore_index)
+        )
         if isinstance(output, pd.DataFrame):
             output.rename(
-                columns={col: f'{prefix}{col}' for col in output.columns if str(col) != label},
-                inplace=True
+                columns={
+                    col: f"{prefix}{col}" for col in output.columns if str(col) != label
+                },
+                inplace=True,
             )
         if self._master_id is not None:
-            id_label = f'{prefix}{self._master_id.name if self._master_id.name else DEFAULT_ID_NAME}'
+            id_label = f"{prefix}{self._master_id.name if self._master_id.name else DEFAULT_ID_NAME}"
             # use group rep indexes obtained above to select the corresponding string IDs:
-            output_id = self._master_id.iloc[group_of_master_index.group_rep].rename(id_label).reset_index(drop=True)
+            output_id = (
+                self._master_id.iloc[group_of_master_index.group_rep]
+                .rename(id_label)
+                .reset_index(drop=True)
+            )
             output = pd.concat([output_id, output], axis=1)
         output.index = self._master.index
         return output
 
-    def _get_indices_of(self, master_side: str, dupe_side: str) -> Tuple[pd.Series, pd.Series]:
+    def _get_indices_of(
+        self, master_side: str, dupe_side: str
+    ) -> Tuple[pd.Series, pd.Series]:
         master_strings = self._master
         dupe_strings = self._master if self._duplicates is None else self._duplicates
         # Check if input is valid:
-        self._validate_strings_exist(master_side, dupe_side, master_strings, dupe_strings)
+        self._validate_strings_exist(
+            master_side, dupe_side, master_strings, dupe_strings
+        )
         # Get the indices of the two strings
-        master_indices = master_strings[master_strings == master_side].index.to_series().reset_index(drop=True)
-        dupe_indices = dupe_strings[dupe_strings == dupe_side].index.to_series().reset_index(drop=True)
+        master_indices = (
+            master_strings[master_strings == master_side]
+            .index.to_series()
+            .reset_index(drop=True)
+        )
+        dupe_indices = (
+            dupe_strings[dupe_strings == dupe_side]
+            .index.to_series()
+            .reset_index(drop=True)
+        )
         return master_indices, dupe_indices
 
     def _validate_group_rep_specs(self):
@@ -638,8 +794,13 @@ def _validate_tfidf_matrix_dtype(self):
 
     def _validate_replace_na_and_drop(self):
         if self._config.ignore_index and self._config.replace_na:
-            raise Exception("replace_na can only be set to True when ignore_index=False.")
-        if self._config.replace_na and self._master.index.nlevels != self._duplicates.index.nlevels:
+            raise Exception(
+                "replace_na can only be set to True when ignore_index=False."
+            )
+        if (
+            self._config.replace_na
+            and self._master.index.nlevels != self._duplicates.index.nlevels
+        ):
             raise Exception(
                 "replace_na=True: Cannot replace NaN values of index-columns with the values of another "
                 "index if the number of index-levels does not equal the number of index-columns."
@@ -661,46 +822,65 @@ def _symmetrize_matrix(m_symmetric: lil_matrix) -> csr_matrix:
     def _get_matches_list(matches: csr_matrix) -> pd.DataFrame:
         """Returns a list of all the indices of matches"""
         r, c = matches.nonzero()
-        matches_list = pd.DataFrame({'master_side': r.astype(np.int64),
-                                     'dupe_side': c.astype(np.int64),
-                                     'similarity': matches.data})
+        matches_list = pd.DataFrame(
+            {
+                "master_side": r.astype(np.int64),
+                "dupe_side": c.astype(np.int64),
+                "similarity": matches.data,
+            }
+        )
         return matches_list
 
     @staticmethod
     def _make_symmetric(new_matches: pd.DataFrame) -> pd.DataFrame:
-        columns_switched = pd.DataFrame({'master_side': new_matches.dupe_side,
-                                         'dupe_side': new_matches.master_side,
-                                         'similarity': new_matches.similarity})
+        columns_switched = pd.DataFrame(
+            {
+                "master_side": new_matches.dupe_side,
+                "dupe_side": new_matches.master_side,
+                "similarity": new_matches.similarity,
+            }
+        )
         return pd.concat([new_matches, columns_switched])
 
     @staticmethod
     def _cross_join(dupe_indices, master_indices, similarities) -> pd.DataFrame:
-        x_join_index = pd.MultiIndex.from_product([master_indices, dupe_indices, similarities],
-                                                  names=['master_side', 'dupe_side', 'similarity'])
+        x_join_index = pd.MultiIndex.from_product(
+            [master_indices, dupe_indices, similarities],
+            names=["master_side", "dupe_side", "similarity"],
+        )
         x_joined_df = pd.DataFrame(index=x_join_index).reset_index()
         return x_joined_df
 
     @staticmethod
     def _validate_strings_exist(master_side, dupe_side, master_strings, dupe_strings):
         if not master_strings.isin([master_side]).any():
-            raise ValueError(f'{master_side} not found in StringGrouper string series')
+            raise ValueError(f"{master_side} not found in StringGrouper string series")
         elif not dupe_strings.isin([dupe_side]).any():
-            raise ValueError(f'{dupe_side} not found in StringGrouper dupe string series')
+            raise ValueError(
+                f"{dupe_side} not found in StringGrouper dupe string series"
+            )
 
     @staticmethod
     def _is_series_of_strings(series_to_test: pd.Series) -> bool:
         if not isinstance(series_to_test, pd.Series):
             return False
-        elif series_to_test.to_frame().applymap(
-                    lambda x: not isinstance(x, str)
-                ).squeeze(axis=1).any():
+        elif (
+            series_to_test.to_frame()
+            .map(lambda x: not isinstance(x, str))
+            .squeeze(axis=1)
+            .any()
+        ):
             return False
         return True
 
     @staticmethod
     def _is_input_data_combination_valid(duplicates, master_id, duplicates_id) -> bool:
-        if duplicates is None and (duplicates_id is not None) \
-                or duplicates is not None and ((master_id is None) ^ (duplicates_id is None)):
+        if (
+            duplicates is None
+            and (duplicates_id is not None)
+            or duplicates is not None
+            and ((master_id is None) ^ (duplicates_id is None))
+        ):
             return False
         else:
             return True
@@ -708,6 +888,14 @@ def _is_input_data_combination_valid(duplicates, master_id, duplicates_id) -> bo
     @staticmethod
     def _validate_id_data(master, duplicates, master_id, duplicates_id):
         if master_id is not None and len(master) != len(master_id):
-            raise Exception('Both master and master_id must be pandas.Series of the same length.')
-        if duplicates is not None and duplicates_id is not None and len(duplicates) != len(duplicates_id):
-            raise Exception('Both duplicates and duplicates_id must be pandas.Series of the same length.')
+            raise Exception(
+                "Both master and master_id must be pandas.Series of the same length."
+            )
+        if (
+            duplicates is not None
+            and duplicates_id is not None
+            and len(duplicates) != len(duplicates_id)
+        ):
+            raise Exception(
+                "Both duplicates and duplicates_id must be pandas.Series of the same length."
+            )
diff --git a/string_grouper/test/test_string_grouper.py b/string_grouper/test/test_string_grouper.py
index f5f0aac..f63ab0d 100644
--- a/string_grouper/test/test_string_grouper.py
+++ b/string_grouper/test/test_string_grouper.py
@@ -1,12 +1,21 @@
 import unittest
 import pandas as pd
 import numpy as np
-from scipy.sparse.csr import csr_matrix
-from string_grouper.string_grouper import DEFAULT_MIN_SIMILARITY, \
-    DEFAULT_REGEX, DEFAULT_NGRAM_SIZE, DEFAULT_N_PROCESSES, DEFAULT_IGNORE_CASE, \
-    StringGrouperConfig, StringGrouper, StringGrouperNotFitException, \
-    match_most_similar, group_similar_strings, match_strings, \
-    compute_pairwise_similarities
+from scipy.sparse import csr_matrix
+from string_grouper.string_grouper import (
+    DEFAULT_MIN_SIMILARITY,
+    DEFAULT_REGEX,
+    DEFAULT_NGRAM_SIZE,
+    DEFAULT_N_PROCESSES,
+    DEFAULT_IGNORE_CASE,
+    StringGrouperConfig,
+    StringGrouper,
+    StringGrouperNotFitException,
+    match_most_similar,
+    group_similar_strings,
+    match_strings,
+    compute_pairwise_similarities,
+)
 from unittest.mock import patch
 
 
@@ -17,80 +26,171 @@ def mock_symmetrize_matrix(x: csr_matrix) -> csr_matrix:
 class SimpleExample(object):
     def __init__(self):
         self.customers_df = pd.DataFrame(
-           [
-              ('BB016741P', 'Mega Enterprises Corporation', 'Address0', 'Tel0', 'Description0', 0.2),
-              ('CC082744L', 'Hyper Startup Incorporated', '', 'Tel1', '', 0.5),
-              ('AA098762D', 'Hyper Startup Inc.', 'Address2', 'Tel2', 'Description2', 0.3),
-              ('BB099931J', 'Hyper-Startup Inc.', 'Address3', 'Tel3', 'Description3', 0.1),
-              ('HH072982K', 'Hyper Hyper Inc.', 'Address4', '', 'Description4', 0.9),
-              ('EE059082Q', 'Mega Enterprises Corp.', 'Address5', 'Tel5', 'Description5', 1.0)
-           ],
-           columns=('Customer ID', 'Customer Name', 'Address', 'Tel', 'Description', 'weight')
+            [
+                (
+                    "BB016741P",
+                    "Mega Enterprises Corporation",
+                    "Address0",
+                    "Tel0",
+                    "Description0",
+                    0.2,
+                ),
+                ("CC082744L", "Hyper Startup Incorporated", "", "Tel1", "", 0.5),
+                (
+                    "AA098762D",
+                    "Hyper Startup Inc.",
+                    "Address2",
+                    "Tel2",
+                    "Description2",
+                    0.3,
+                ),
+                (
+                    "BB099931J",
+                    "Hyper-Startup Inc.",
+                    "Address3",
+                    "Tel3",
+                    "Description3",
+                    0.1,
+                ),
+                ("HH072982K", "Hyper Hyper Inc.", "Address4", "", "Description4", 0.9),
+                (
+                    "EE059082Q",
+                    "Mega Enterprises Corp.",
+                    "Address5",
+                    "Tel5",
+                    "Description5",
+                    1.0,
+                ),
+            ],
+            columns=(
+                "Customer ID",
+                "Customer Name",
+                "Address",
+                "Tel",
+                "Description",
+                "weight",
+            ),
         )
         self.customers_df2 = pd.DataFrame(
-           [
-              ('BB016741P', 'Mega Enterprises Corporation', 'Address0', 'Tel0', 'Description0', 0.2),
-              ('CC082744L', 'Hyper Startup Incorporated', '', 'Tel1', '', 0.5),
-              ('AA098762D', 'Hyper Startup Inc.', 'Address2', 'Tel2', 'Description2', 0.3),
-              ('BB099931J', 'Hyper-Startup Inc.', 'Address3', 'Tel3', 'Description3', 0.1),
-              ('DD012339M', 'HyperStartup Inc.', 'Address4', 'Tel4', 'Description4', 0.1),
-              ('HH072982K', 'Hyper Hyper Inc.', 'Address5', '', 'Description5', 0.9),
-              ('EE059082Q', 'Mega Enterprises Corp.', 'Address6', 'Tel6', 'Description6', 1.0)
-           ],
-           columns=('Customer ID', 'Customer Name', 'Address', 'Tel', 'Description', 'weight')
-        )
-        self.a_few_strings = pd.Series(['BB016741P', 'BB082744L', 'BB098762D', 'BB099931J', 'BB072982K', 'BB059082Q'])
-        self.one_string = pd.Series(['BB0'])
-        self.two_strings = pd.Series(['Hyper', 'Hyp'])
-        self.whatever_series_1 = pd.Series(['whatever'])
+            [
+                (
+                    "BB016741P",
+                    "Mega Enterprises Corporation",
+                    "Address0",
+                    "Tel0",
+                    "Description0",
+                    0.2,
+                ),
+                ("CC082744L", "Hyper Startup Incorporated", "", "Tel1", "", 0.5),
+                (
+                    "AA098762D",
+                    "Hyper Startup Inc.",
+                    "Address2",
+                    "Tel2",
+                    "Description2",
+                    0.3,
+                ),
+                (
+                    "BB099931J",
+                    "Hyper-Startup Inc.",
+                    "Address3",
+                    "Tel3",
+                    "Description3",
+                    0.1,
+                ),
+                (
+                    "DD012339M",
+                    "HyperStartup Inc.",
+                    "Address4",
+                    "Tel4",
+                    "Description4",
+                    0.1,
+                ),
+                ("HH072982K", "Hyper Hyper Inc.", "Address5", "", "Description5", 0.9),
+                (
+                    "EE059082Q",
+                    "Mega Enterprises Corp.",
+                    "Address6",
+                    "Tel6",
+                    "Description6",
+                    1.0,
+                ),
+            ],
+            columns=(
+                "Customer ID",
+                "Customer Name",
+                "Address",
+                "Tel",
+                "Description",
+                "weight",
+            ),
+        )
+        self.a_few_strings = pd.Series(
+            [
+                "BB016741P",
+                "BB082744L",
+                "BB098762D",
+                "BB099931J",
+                "BB072982K",
+                "BB059082Q",
+            ]
+        )
+        self.one_string = pd.Series(["BB0"])
+        self.two_strings = pd.Series(["Hyper", "Hyp"])
+        self.whatever_series_1 = pd.Series(["whatever"])
         self.expected_result_with_zeroes = pd.DataFrame(
             [
-                (1, 'Hyper Startup Incorporated', 0.08170638, 'whatever', 0),
-                (0, 'Mega Enterprises Corporation', 0., 'whatever', 0),
-                (2, 'Hyper Startup Inc.', 0., 'whatever', 0),
-                (3, 'Hyper-Startup Inc.', 0., 'whatever', 0),
-                (4, 'Hyper Hyper Inc.', 0., 'whatever', 0),
-                (5, 'Mega Enterprises Corp.', 0., 'whatever', 0)
+                (1, "Hyper Startup Incorporated", 0.08170638, "whatever", 0),
+                (0, "Mega Enterprises Corporation", 0.0, "whatever", 0),
+                (2, "Hyper Startup Inc.", 0.0, "whatever", 0),
+                (3, "Hyper-Startup Inc.", 0.0, "whatever", 0),
+                (4, "Hyper Hyper Inc.", 0.0, "whatever", 0),
+                (5, "Mega Enterprises Corp.", 0.0, "whatever", 0),
+            ],
+            columns=[
+                "left_index",
+                "left_Customer Name",
+                "similarity",
+                "right_side",
+                "right_index",
             ],
-            columns=['left_index', 'left_Customer Name', 'similarity', 'right_side', 'right_index']
         )
         self.expected_result_centroid = pd.Series(
             [
-                'Mega Enterprises Corporation',
-                'Hyper Startup Inc.',
-                'Hyper Startup Inc.',
-                'Hyper Startup Inc.',
-                'Hyper Hyper Inc.',
-                'Mega Enterprises Corporation'
+                "Mega Enterprises Corporation",
+                "Hyper Startup Inc.",
+                "Hyper Startup Inc.",
+                "Hyper Startup Inc.",
+                "Hyper Hyper Inc.",
+                "Mega Enterprises Corporation",
             ],
-            name='group_rep_Customer Name'
+            name="group_rep_Customer Name",
         )
         self.expected_result_centroid_with_index_col = pd.DataFrame(
             [
-                (0, 'Mega Enterprises Corporation'),
-                (2, 'Hyper Startup Inc.'),
-                (2, 'Hyper Startup Inc.'),
-                (2, 'Hyper Startup Inc.'),
-                (4, 'Hyper Hyper Inc.'),
-                (0, 'Mega Enterprises Corporation')
+                (0, "Mega Enterprises Corporation"),
+                (2, "Hyper Startup Inc."),
+                (2, "Hyper Startup Inc."),
+                (2, "Hyper Startup Inc."),
+                (4, "Hyper Hyper Inc."),
+                (0, "Mega Enterprises Corporation"),
             ],
-            columns=['group_rep_index', 'group_rep_Customer Name']
+            columns=["group_rep_index", "group_rep_Customer Name"],
         )
         self.expected_result_first = pd.Series(
             [
-                 'Mega Enterprises Corporation',
-                 'Hyper Startup Incorporated',
-                 'Hyper Startup Incorporated',
-                 'Hyper Startup Incorporated',
-                 'Hyper Hyper Inc.',
-                 'Mega Enterprises Corporation'
+                "Mega Enterprises Corporation",
+                "Hyper Startup Incorporated",
+                "Hyper Startup Incorporated",
+                "Hyper Startup Incorporated",
+                "Hyper Hyper Inc.",
+                "Mega Enterprises Corporation",
             ],
-            name='group_rep_Customer Name'
+            name="group_rep_Customer Name",
         )
 
 
 class StringGrouperConfigTest(unittest.TestCase):
-
     def test_config_defaults(self):
         """Empty initialisation should set default values"""
         config = StringGrouperConfig()
@@ -109,7 +209,9 @@ def test_config_immutable(self):
 
     def test_config_non_default_values(self):
         """Configurations should be immutable"""
-        config = StringGrouperConfig(min_similarity=0.1, max_n_matches=100, number_of_processes=1)
+        config = StringGrouperConfig(
+            min_similarity=0.1, max_n_matches=100, number_of_processes=1
+        )
         self.assertEqual(0.1, config.min_similarity)
         self.assertEqual(100, config.max_n_matches)
         self.assertEqual(1, config.number_of_processes)
@@ -119,7 +221,7 @@ class StringGrouperTest(unittest.TestCase):
     def test_compute_pairwise_similarities(self):
         """tests the high-level function compute_pairwise_similarities"""
         simple_example = SimpleExample()
-        df1 = simple_example.customers_df['Customer Name']
+        df1 = simple_example.customers_df["Customer Name"]
         df2 = simple_example.expected_result_centroid
         similarities = compute_pairwise_similarities(df1, df2)
         expected_result = pd.Series(
@@ -129,68 +231,65 @@ def test_compute_pairwise_similarities(self):
                 1.0000000000000004,
                 1.0000000000000004,
                 1.0,
-                0.826462625999832
+                0.826462625999832,
             ],
-            name='similarity'
+            name="similarity",
         )
-        expected_result = expected_result.astype(np.float32)
+        expected_result = expected_result.astype(np.float64)
         pd.testing.assert_series_equal(expected_result, similarities)
 
     def test_compute_pairwise_similarities_data_integrity(self):
         """tests that an exception is raised whenever the lengths of the two input series of the high-level function
         compute_pairwise_similarities are unequal"""
         simple_example = SimpleExample()
-        df1 = simple_example.customers_df['Customer Name']
+        df1 = simple_example.customers_df["Customer Name"]
         df2 = simple_example.expected_result_centroid
         with self.assertRaises(Exception):
             _ = compute_pairwise_similarities(df1, df2[:-2])
 
-    @patch('string_grouper.string_grouper.StringGrouper')
+    @patch("string_grouper.string_grouper.StringGrouper")
     def test_group_similar_strings(self, mock_StringGouper):
         """mocks StringGrouper to test if the high-level function group_similar_strings utilizes it as expected"""
         mock_StringGrouper_instance = mock_StringGouper.return_value
         mock_StringGrouper_instance.fit.return_value = mock_StringGrouper_instance
-        mock_StringGrouper_instance.get_groups.return_value = 'whatever'
+        mock_StringGrouper_instance.get_groups.return_value = "whatever"
 
         test_series_1 = None
         test_series_id_1 = None
-        df = group_similar_strings(
-                test_series_1,
-                string_ids=test_series_id_1
-            )
+        df = group_similar_strings(test_series_1, string_ids=test_series_id_1)
 
         mock_StringGrouper_instance.fit.assert_called_once()
         mock_StringGrouper_instance.get_groups.assert_called_once()
-        self.assertEqual(df, 'whatever')
+        self.assertEqual(df, "whatever")
 
-    @patch('string_grouper.string_grouper.StringGrouper')
+    @patch("string_grouper.string_grouper.StringGrouper")
     def test_match_most_similar(self, mock_StringGouper):
         """mocks StringGrouper to test if the high-level function match_most_similar utilizes it as expected"""
         mock_StringGrouper_instance = mock_StringGouper.return_value
         mock_StringGrouper_instance.fit.return_value = mock_StringGrouper_instance
-        mock_StringGrouper_instance.get_groups.return_value = 'whatever'
+        mock_StringGrouper_instance.get_groups.return_value = "whatever"
 
         test_series_1 = None
         test_series_2 = None
         test_series_id_1 = None
         test_series_id_2 = None
         df = match_most_similar(
-                test_series_1,
-                test_series_2,
-                master_id=test_series_id_1,
-                duplicates_id=test_series_id_2
-            )
+            test_series_1,
+            test_series_2,
+            master_id=test_series_id_1,
+            duplicates_id=test_series_id_2,
+        )
 
         mock_StringGrouper_instance.fit.assert_called_once()
         mock_StringGrouper_instance.get_groups.assert_called_once()
-        self.assertEqual(df, 'whatever')
+        self.assertEqual(df, "whatever")
 
-    @patch('string_grouper.string_grouper.StringGrouper')
+    @patch("string_grouper.string_grouper.StringGrouper")
     def test_match_strings(self, mock_StringGouper):
         """mocks StringGrouper to test if the high-level function match_strings utilizes it as expected"""
         mock_StringGrouper_instance = mock_StringGouper.return_value
         mock_StringGrouper_instance.fit.return_value = mock_StringGrouper_instance
-        mock_StringGrouper_instance.get_matches.return_value = 'whatever'
+        mock_StringGrouper_instance.get_matches.return_value = "whatever"
 
         test_series_1 = None
         test_series_id_1 = None
@@ -198,63 +297,87 @@ def test_match_strings(self, mock_StringGouper):
 
         mock_StringGrouper_instance.fit.assert_called_once()
         mock_StringGrouper_instance.get_matches.assert_called_once()
-        self.assertEqual(df, 'whatever')
+        self.assertEqual(df, "whatever")
 
     @patch(
-        'string_grouper.string_grouper.StringGrouper._symmetrize_matrix',
-        side_effect=mock_symmetrize_matrix
+        "string_grouper.string_grouper.StringGrouper._symmetrize_matrix",
+        side_effect=mock_symmetrize_matrix,
     )
-    def test_match_list_symmetry_without_symmetrize_function(self, mock_symmetrize_matrix_param):
+    def test_match_list_symmetry_without_symmetrize_function(
+        self, mock_symmetrize_matrix_param
+    ):
         """mocks StringGrouper._symmetrize_matches_list so that this test fails whenever _matches_list is
-        **partially** symmetric which often occurs when the kwarg max_n_matches is too small"""
+        **partially** symmetric which often occurs when the kwarg max_n_matches is too small
+        """
         simple_example = SimpleExample()
-        df = simple_example.customers_df2['Customer Name']
+        df = simple_example.customers_df2["Customer Name"]
         sg = StringGrouper(df, max_n_matches=2).fit()
         mock_symmetrize_matrix_param.assert_called_once()
         # obtain the upper and lower triangular parts of the matrix of matches:
-        upper = sg._matches_list[sg._matches_list['master_side'] < sg._matches_list['dupe_side']]
-        lower = sg._matches_list[sg._matches_list['master_side'] > sg._matches_list['dupe_side']]
+        upper = sg._matches_list[
+            sg._matches_list["master_side"] < sg._matches_list["dupe_side"]
+        ]
+        lower = sg._matches_list[
+            sg._matches_list["master_side"] > sg._matches_list["dupe_side"]
+        ]
         # switch the column names of lower triangular part (i.e., transpose) to convert it to upper triangular:
-        upper_prime = lower.rename(columns={'master_side': 'dupe_side', 'dupe_side': 'master_side'})
+        upper_prime = lower.rename(
+            columns={"master_side": "dupe_side", "dupe_side": "master_side"}
+        )
         # obtain the intersection between upper and upper_prime:
-        intersection = upper_prime.merge(upper, how='inner', on=['master_side', 'dupe_side'])
+        intersection = upper_prime.merge(
+            upper, how="inner", on=["master_side", "dupe_side"]
+        )
         # if the intersection is empty then _matches_list is completely non-symmetric (this is acceptable)
         # if the intersection is not empty then at least some matches are repeated.
         # To make sure all (and not just some) matches are repeated, the lengths of
         # upper, upper_prime and their intersection should be identical.
-        self.assertFalse(intersection.empty or len(upper) == len(upper_prime) == len(intersection))
+        self.assertFalse(
+            intersection.empty or len(upper) == len(upper_prime) == len(intersection)
+        )
 
     def test_match_list_symmetry_with_symmetrize_function(self):
         """This test ensures that _matches_list is symmetric"""
         simple_example = SimpleExample()
-        df = simple_example.customers_df2['Customer Name']
+        df = simple_example.customers_df2["Customer Name"]
         sg = StringGrouper(df, max_n_matches=2).fit()
         # Obtain the upper and lower triangular parts of the matrix of matches:
-        upper = sg._matches_list[sg._matches_list['master_side'] < sg._matches_list['dupe_side']]
-        lower = sg._matches_list[sg._matches_list['master_side'] > sg._matches_list['dupe_side']]
+        upper = sg._matches_list[
+            sg._matches_list["master_side"] < sg._matches_list["dupe_side"]
+        ]
+        lower = sg._matches_list[
+            sg._matches_list["master_side"] > sg._matches_list["dupe_side"]
+        ]
         # Switch the column names of the lower triangular part (i.e., transpose) to convert it to upper triangular:
-        upper_prime = lower.rename(columns={'master_side': 'dupe_side', 'dupe_side': 'master_side'})
+        upper_prime = lower.rename(
+            columns={"master_side": "dupe_side", "dupe_side": "master_side"}
+        )
         # Obtain the intersection between upper and upper_prime:
-        intersection = upper_prime.merge(upper, how='inner', on=['master_side', 'dupe_side'])
+        intersection = upper_prime.merge(
+            upper, how="inner", on=["master_side", "dupe_side"]
+        )
         # If the intersection is empty this means _matches_list is completely non-symmetric (this is acceptable)
         # If the intersection is not empty this means at least some matches are repeated.
         # To make sure all (and not just some) matches are repeated, the lengths of
         # upper, upper_prime and their intersection should be identical.
-        self.assertTrue(intersection.empty or len(upper) == len(upper_prime) == len(intersection))
+        # breakpoint()
+        self.assertTrue(
+            intersection.empty or len(upper) == len(upper_prime) == len(intersection)
+        )
 
     @patch(
-        'string_grouper.string_grouper.StringGrouper._fix_diagonal',
-        side_effect=mock_symmetrize_matrix
+        "string_grouper.string_grouper.StringGrouper._fix_diagonal",
+        side_effect=mock_symmetrize_matrix,
     )
     def test_match_list_diagonal_without_the_fix(self, mock_fix_diagonal):
         """test fails whenever _matches_list's number of self-joins is not equal to the number of strings"""
         # This bug is difficult to reproduce -- I mostly encounter it while working with very large datasets;
         # for small datasets setting max_n_matches=1 reproduces the bug
         simple_example = SimpleExample()
-        df = simple_example.customers_df['Customer Name']
+        df = simple_example.customers_df["Customer Name"]
         matches = match_strings(df, max_n_matches=1)
         mock_fix_diagonal.assert_called_once()
-        num_self_joins = len(matches[matches['left_index'] == matches['right_index']])
+        num_self_joins = len(matches[matches["left_index"] == matches["right_index"]])
         num_strings = len(df)
         self.assertNotEqual(num_self_joins, num_strings)
 
@@ -263,223 +386,284 @@ def test_match_list_diagonal(self):
         # This bug is difficult to reproduce -- I mostly encounter it while working with very large datasets;
         # for small datasets setting max_n_matches=1 reproduces the bug
         simple_example = SimpleExample()
-        df = simple_example.customers_df['Customer Name']
+        df = simple_example.customers_df["Customer Name"]
         matches = match_strings(df, max_n_matches=1)
-        num_self_joins = len(matches[matches['left_index'] == matches['right_index']])
+        num_self_joins = len(matches[matches["left_index"] == matches["right_index"]])
         num_strings = len(df)
         self.assertEqual(num_self_joins, num_strings)
 
     def test_zero_min_similarity(self):
         """Since sparse matrices exclude zero elements, this test ensures that zero similarity matches are
-        returned when min_similarity <= 0.  A bug related to this was first pointed out by @nbcvijanovic"""
+        returned when min_similarity <= 0.  A bug related to this was first pointed out by @nbcvijanovic
+        """
         simple_example = SimpleExample()
-        s_master = simple_example.customers_df['Customer Name']
+        s_master = simple_example.customers_df["Customer Name"]
         s_dup = simple_example.whatever_series_1
         matches = match_strings(s_master, s_dup, min_similarity=0)
-        pd.testing.assert_frame_equal(simple_example.expected_result_with_zeroes, matches)
-
-    def test_zero_min_similarity_small_max_n_matches(self):
-        """This test ensures that a warning is issued when n_max_matches is suspected to be too small while
-        min_similarity <= 0 and include_zeroes is True"""
-        simple_example = SimpleExample()
-        s_master = simple_example.customers_df['Customer Name']
-        s_dup = simple_example.two_strings
-        with self.assertRaises(Exception):
-            _ = match_strings(s_master, s_dup, max_n_matches=1, min_similarity=0)
+        pd.testing.assert_frame_equal(
+            simple_example.expected_result_with_zeroes, matches
+        )
 
     def test_get_non_matches_empty_case(self):
         """This test ensures that _get_non_matches() returns an empty DataFrame when all pairs of strings match"""
         simple_example = SimpleExample()
         s_master = simple_example.a_few_strings
         s_dup = simple_example.one_string
-        sg = StringGrouper(s_master, s_dup, max_n_matches=len(s_master), min_similarity=0).fit()
+        sg = StringGrouper(
+            s_master, s_dup, max_n_matches=len(s_master), min_similarity=0
+        ).fit()
         self.assertTrue(sg._get_non_matches_list().empty)
 
     def test_n_grams_case_unchanged(self):
         """Should return all ngrams in a string with case"""
-        test_series = pd.Series(pd.Series(['aa']))
+        test_series = pd.Series(pd.Series(["aa"]))
         # Explicit do not ignore case
         sg = StringGrouper(test_series, ignore_case=False)
-        expected_result = ['McD', 'cDo', 'Don', 'ona', 'nal', 'ald', 'lds']
-        self.assertListEqual(expected_result, sg.n_grams('McDonalds'))
+        expected_result = ["McD", "cDo", "Don", "ona", "nal", "ald", "lds"]
+        self.assertListEqual(expected_result, sg.n_grams("McDonalds"))
 
     def test_n_grams_ignore_case_to_lower(self):
         """Should return all case insensitive ngrams in a string"""
-        test_series = pd.Series(pd.Series(['aa']))
+        test_series = pd.Series(pd.Series(["aa"]))
         # Explicit ignore case
         sg = StringGrouper(test_series, ignore_case=True)
-        expected_result = ['mcd', 'cdo', 'don', 'ona', 'nal', 'ald', 'lds']
-        self.assertListEqual(expected_result, sg.n_grams('McDonalds'))
+        expected_result = ["mcd", "cdo", "don", "ona", "nal", "ald", "lds"]
+        self.assertListEqual(expected_result, sg.n_grams("McDonalds"))
 
     def test_n_grams_ignore_case_to_lower_with_defaults(self):
         """Should return all case insensitive ngrams in a string"""
-        test_series = pd.Series(pd.Series(['aa']))
+        test_series = pd.Series(pd.Series(["aa"]))
         # Implicit default case (i.e. default behaviour)
         sg = StringGrouper(test_series)
-        expected_result = ['mcd', 'cdo', 'don', 'ona', 'nal', 'ald', 'lds']
-        self.assertListEqual(expected_result, sg.n_grams('McDonalds'))
+        expected_result = ["mcd", "cdo", "don", "ona", "nal", "ald", "lds"]
+        self.assertListEqual(expected_result, sg.n_grams("McDonalds"))
 
     def test_build_matrix(self):
         """Should create a csr matrix only master"""
-        test_series = pd.Series(['foo', 'bar', 'baz'])
+        test_series = pd.Series(["foo", "bar", "baz"])
         sg = StringGrouper(test_series)
         master, dupe = sg._get_tf_idf_matrices()
-        c = csr_matrix([[0., 0., 1.],
-                        [1., 0., 0.],
-                        [0., 1., 0.]])
+        c = csr_matrix([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]])
         np.testing.assert_array_equal(c.toarray(), master.toarray())
         np.testing.assert_array_equal(c.toarray(), dupe.toarray())
 
     def test_build_matrix_master_and_duplicates(self):
         """Should create a csr matrix for master and duplicates"""
-        test_series_1 = pd.Series(['foo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foo', 'bar', 'bop'])
+        test_series_1 = pd.Series(["foo", "bar", "baz"])
+        test_series_2 = pd.Series(["foo", "bar", "bop"])
         sg = StringGrouper(test_series_1, test_series_2)
         master, dupe = sg._get_tf_idf_matrices()
-        master_expected = csr_matrix([[0., 0., 0., 1.],
-                                     [1., 0., 0., 0.],
-                                     [0., 1., 0., 0.]])
-        dupes_expected = csr_matrix([[0., 0., 0., 1.],
-                                     [1., 0., 0., 0.],
-                                     [0., 0., 1., 0.]])
+        master_expected = csr_matrix(
+            [[0.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]]
+        )
+        dupes_expected = csr_matrix(
+            [[0.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]
+        )
 
         np.testing.assert_array_equal(master_expected.toarray(), master.toarray())
         np.testing.assert_array_equal(dupes_expected.toarray(), dupe.toarray())
 
     def test_build_matches(self):
         """Should create the cosine similarity matrix of two series"""
-        test_series_1 = pd.Series(['foo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foo', 'bar', 'bop'])
+        test_series_1 = pd.Series(["foo", "bar", "baz"])
+        test_series_2 = pd.Series(["foo", "bar", "bop"])
         sg = StringGrouper(test_series_1, test_series_2)
         master, dupe = sg._get_tf_idf_matrices()
 
-        expected_matches = np.array([[1., 0., 0.],
-                                     [0., 1., 0.],
-                                     [0., 0., 0.]])
-        np.testing.assert_array_equal(expected_matches, sg._build_matches(master, dupe)[0].toarray())
+        expected_matches = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]])
+        np.testing.assert_array_equal(
+            expected_matches, sg._build_matches(master, dupe).toarray()
+        )
 
     def test_build_matches_list(self):
         """Should create the cosine similarity matrix of two series"""
-        test_series_1 = pd.Series(['foo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foo', 'bar', 'bop'])
+        test_series_1 = pd.Series(["foo", "bar", "baz"])
+        test_series_2 = pd.Series(["foo", "bar", "bop"])
         sg = StringGrouper(test_series_1, test_series_2)
         sg = sg.fit()
         master = [0, 1]
         dupe_side = [0, 1]
         similarity = [1.0, 1.0]
-        expected_df = pd.DataFrame({'master_side': master, 'dupe_side': dupe_side, 'similarity': similarity})
-        expected_df.loc[:, 'similarity'] = expected_df.loc[:, 'similarity'].astype(sg._config.tfidf_matrix_dtype)
+        expected_df = pd.DataFrame(
+            {"master_side": master, "dupe_side": dupe_side, "similarity": similarity}
+        )
+        expected_df.loc[:, "similarity"] = expected_df.loc[:, "similarity"].astype(
+            sg._config.tfidf_matrix_dtype
+        )
         pd.testing.assert_frame_equal(expected_df, sg._matches_list)
 
     def test_case_insensitive_build_matches_list(self):
         """Should create the cosine similarity matrix of two case insensitive series"""
-        test_series_1 = pd.Series(['foo', 'BAR', 'baz'])
-        test_series_2 = pd.Series(['FOO', 'bar', 'bop'])
+        test_series_1 = pd.Series(["foo", "BAR", "baz"])
+        test_series_2 = pd.Series(["FOO", "bar", "bop"])
         sg = StringGrouper(test_series_1, test_series_2)
         sg = sg.fit()
         master = [0, 1]
         dupe_side = [0, 1]
         similarity = [1.0, 1.0]
-        expected_df = pd.DataFrame({'master_side': master, 'dupe_side': dupe_side, 'similarity': similarity})
-        expected_df.loc[:, 'similarity'] = expected_df.loc[:, 'similarity'].astype(sg._config.tfidf_matrix_dtype)
+        expected_df = pd.DataFrame(
+            {"master_side": master, "dupe_side": dupe_side, "similarity": similarity}
+        )
+        expected_df.loc[:, "similarity"] = expected_df.loc[:, "similarity"].astype(
+            sg._config.tfidf_matrix_dtype
+        )
         pd.testing.assert_frame_equal(expected_df, sg._matches_list)
 
     def test_get_matches_two_dataframes(self):
-        test_series_1 = pd.Series(['foo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foo', 'bar', 'bop'])
+        test_series_1 = pd.Series(["foo", "bar", "baz"])
+        test_series_2 = pd.Series(["foo", "bar", "bop"])
         sg = StringGrouper(test_series_1, test_series_2).fit()
-        left_side = ['foo', 'bar']
+        left_side = ["foo", "bar"]
         left_index = [0, 1]
-        right_side = ['foo', 'bar']
+        right_side = ["foo", "bar"]
         right_index = [0, 1]
         similarity = [1.0, 1.0]
-        expected_df = pd.DataFrame({'left_index': left_index, 'left_side': left_side,
-                                    'similarity': similarity,
-                                    'right_side': right_side, 'right_index': right_index})
-        expected_df.loc[:, 'similarity'] = expected_df.loc[:, 'similarity'].astype(sg._config.tfidf_matrix_dtype)
+        expected_df = pd.DataFrame(
+            {
+                "left_index": left_index,
+                "left_side": left_side,
+                "similarity": similarity,
+                "right_side": right_side,
+                "right_index": right_index,
+            }
+        )
+        expected_df.loc[:, "similarity"] = expected_df.loc[:, "similarity"].astype(
+            sg._config.tfidf_matrix_dtype
+        )
         pd.testing.assert_frame_equal(expected_df, sg.get_matches())
 
     def test_get_matches_single(self):
-        test_series_1 = pd.Series(['foo', 'bar', 'baz', 'foo'])
+        test_series_1 = pd.Series(["foo", "bar", "baz", "foo"])
         sg = StringGrouper(test_series_1)
         sg = sg.fit()
-        left_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
-        right_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
+        left_side = ["foo", "foo", "bar", "baz", "foo", "foo"]
+        right_side = ["foo", "foo", "bar", "baz", "foo", "foo"]
         left_index = [0, 0, 1, 2, 3, 3]
         right_index = [0, 3, 1, 2, 0, 3]
         similarity = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
-        expected_df = pd.DataFrame({'left_index': left_index, 'left_side': left_side,
-                                    'similarity': similarity,
-                                    'right_side': right_side, 'right_index': right_index})
-        expected_df.loc[:, 'similarity'] = expected_df.loc[:, 'similarity'].astype(sg._config.tfidf_matrix_dtype)
+        expected_df = pd.DataFrame(
+            {
+                "left_index": left_index,
+                "left_side": left_side,
+                "similarity": similarity,
+                "right_side": right_side,
+                "right_index": right_index,
+            }
+        )
+        expected_df.loc[:, "similarity"] = expected_df.loc[:, "similarity"].astype(
+            sg._config.tfidf_matrix_dtype
+        )
         pd.testing.assert_frame_equal(expected_df, sg.get_matches())
 
     def test_get_matches_1_series_1_id_series(self):
-        test_series_1 = pd.Series(['foo', 'bar', 'baz', 'foo'])
-        test_series_id_1 = pd.Series(['A0', 'A1', 'A2', 'A3'])
+        test_series_1 = pd.Series(["foo", "bar", "baz", "foo"])
+        test_series_id_1 = pd.Series(["A0", "A1", "A2", "A3"])
         sg = StringGrouper(test_series_1, master_id=test_series_id_1)
         sg = sg.fit()
-        left_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
-        left_side_id = ['A0', 'A0', 'A1', 'A2', 'A3', 'A3']
+        left_side = ["foo", "foo", "bar", "baz", "foo", "foo"]
+        left_side_id = ["A0", "A0", "A1", "A2", "A3", "A3"]
         left_index = [0, 0, 1, 2, 3, 3]
-        right_side = ['foo', 'foo', 'bar', 'baz', 'foo', 'foo']
-        right_side_id = ['A0', 'A3', 'A1', 'A2', 'A0', 'A3']
+        right_side = ["foo", "foo", "bar", "baz", "foo", "foo"]
+        right_side_id = ["A0", "A3", "A1", "A2", "A0", "A3"]
         right_index = [0, 3, 1, 2, 0, 3]
         similarity = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
-        expected_df = pd.DataFrame({'left_index': left_index, 'left_side': left_side, 'left_id': left_side_id,
-                                    'similarity': similarity,
-                                    'right_id': right_side_id, 'right_side': right_side, 'right_index': right_index})
-        expected_df.loc[:, 'similarity'] = expected_df.loc[:, 'similarity'].astype(sg._config.tfidf_matrix_dtype)
+        expected_df = pd.DataFrame(
+            {
+                "left_index": left_index,
+                "left_side": left_side,
+                "left_id": left_side_id,
+                "similarity": similarity,
+                "right_id": right_side_id,
+                "right_side": right_side,
+                "right_index": right_index,
+            }
+        )
+        expected_df.loc[:, "similarity"] = expected_df.loc[:, "similarity"].astype(
+            sg._config.tfidf_matrix_dtype
+        )
         pd.testing.assert_frame_equal(expected_df, sg.get_matches())
 
     def test_get_matches_2_series_2_id_series(self):
-        test_series_1 = pd.Series(['foo', 'bar', 'baz'])
-        test_series_id_1 = pd.Series(['A0', 'A1', 'A2'])
-        test_series_2 = pd.Series(['foo', 'bar', 'bop'])
-        test_series_id_2 = pd.Series(['B0', 'B1', 'B2'])
-        sg = StringGrouper(test_series_1, test_series_2, duplicates_id=test_series_id_2,
-                           master_id=test_series_id_1).fit()
-        left_side = ['foo', 'bar']
-        left_side_id = ['A0', 'A1']
+        test_series_1 = pd.Series(["foo", "bar", "baz"])
+        test_series_id_1 = pd.Series(["A0", "A1", "A2"])
+        test_series_2 = pd.Series(["foo", "bar", "bop"])
+        test_series_id_2 = pd.Series(["B0", "B1", "B2"])
+        sg = StringGrouper(
+            test_series_1,
+            test_series_2,
+            duplicates_id=test_series_id_2,
+            master_id=test_series_id_1,
+        ).fit()
+        left_side = ["foo", "bar"]
+        left_side_id = ["A0", "A1"]
         left_index = [0, 1]
-        right_side = ['foo', 'bar']
-        right_side_id = ['B0', 'B1']
+        right_side = ["foo", "bar"]
+        right_side_id = ["B0", "B1"]
         right_index = [0, 1]
         similarity = [1.0, 1.0]
-        expected_df = pd.DataFrame({'left_index': left_index, 'left_side': left_side, 'left_id': left_side_id,
-                                    'similarity': similarity,
-                                    'right_id': right_side_id, 'right_side': right_side, 'right_index': right_index})
-        expected_df.loc[:, 'similarity'] = expected_df.loc[:, 'similarity'].astype(sg._config.tfidf_matrix_dtype)
+        expected_df = pd.DataFrame(
+            {
+                "left_index": left_index,
+                "left_side": left_side,
+                "left_id": left_side_id,
+                "similarity": similarity,
+                "right_id": right_side_id,
+                "right_side": right_side,
+                "right_index": right_index,
+            }
+        )
+        expected_df.loc[:, "similarity"] = expected_df.loc[:, "similarity"].astype(
+            sg._config.tfidf_matrix_dtype
+        )
         pd.testing.assert_frame_equal(expected_df, sg.get_matches())
 
     def test_get_matches_raises_exception_if_unexpected_options_given(self):
         # When the input id data does not correspond with its string data:
-        test_series_1 = pd.Series(['foo', 'bar', 'baz'])
-        bad_test_series_id_1 = pd.Series(['A0', 'A1'])
-        good_test_series_id_1 = pd.Series(['A0', 'A1', 'A2'])
-        test_series_2 = pd.Series(['foo', 'bar', 'bop'])
-        bad_test_series_id_2 = pd.Series(['B0', 'B1'])
-        good_test_series_id_2 = pd.Series(['B0', 'B1', 'B2'])
+        test_series_1 = pd.Series(["foo", "bar", "baz"])
+        bad_test_series_id_1 = pd.Series(["A0", "A1"])
+        good_test_series_id_1 = pd.Series(["A0", "A1", "A2"])
+        test_series_2 = pd.Series(["foo", "bar", "bop"])
+        bad_test_series_id_2 = pd.Series(["B0", "B1"])
+        good_test_series_id_2 = pd.Series(["B0", "B1", "B2"])
         with self.assertRaises(Exception):
             _ = StringGrouper(test_series_1, master_id=bad_test_series_id_1)
         with self.assertRaises(Exception):
-            _ = StringGrouper(test_series_1, duplicates=test_series_2, duplicates_id=bad_test_series_id_2,
-                              master_id=good_test_series_id_1)
+            _ = StringGrouper(
+                test_series_1,
+                duplicates=test_series_2,
+                duplicates_id=bad_test_series_id_2,
+                master_id=good_test_series_id_1,
+            )
 
         # When the input data is ok but the option combinations are invalid:
         with self.assertRaises(Exception):
-            _ = StringGrouper(test_series_1, test_series_2, master_id=good_test_series_id_1)
+            _ = StringGrouper(
+                test_series_1, test_series_2, master_id=good_test_series_id_1
+            )
         with self.assertRaises(Exception):
-            _ = StringGrouper(test_series_1, test_series_2, duplicates_id=good_test_series_id_2)
+            _ = StringGrouper(
+                test_series_1, test_series_2, duplicates_id=good_test_series_id_2
+            )
         with self.assertRaises(Exception):
             _ = StringGrouper(test_series_1, duplicates_id=good_test_series_id_2)
         with self.assertRaises(Exception):
-            _ = StringGrouper(test_series_1, master_id=good_test_series_id_1, duplicates_id=good_test_series_id_2)
+            _ = StringGrouper(
+                test_series_1,
+                master_id=good_test_series_id_1,
+                duplicates_id=good_test_series_id_2,
+            )
         with self.assertRaises(Exception):
-            _ = StringGrouper(test_series_1, master_id=good_test_series_id_1, ignore_index=True, replace_na=True)
+            _ = StringGrouper(
+                test_series_1,
+                master_id=good_test_series_id_1,
+                ignore_index=True,
+                replace_na=True,
+            )
         # Here we force an exception by making the number of index-levels of duplicates different from master:
         # and setting replace_na=True
-        test_series_2.index = pd.MultiIndex.from_tuples(list(zip(list('ABC'), [0, 1, 2])))
+        test_series_2.index = pd.MultiIndex.from_tuples(
+            list(zip(list("ABC"), [0, 1, 2]))
+        )
         with self.assertRaises(Exception):
             _ = StringGrouper(test_series_1, duplicates=test_series_2, replace_na=True)
 
@@ -491,10 +675,8 @@ def test_get_groups_single_df_group_rep_default(self):
         pd.testing.assert_series_equal(
             simple_example.expected_result_centroid,
             group_similar_strings(
-                customers_df['Customer Name'],
-                min_similarity=0.6,
-                ignore_index=True
-            )
+                customers_df["Customer Name"], min_similarity=0.6, ignore_index=True
+            ),
         )
 
     def test_get_groups_single_valued_series(self):
@@ -502,36 +684,31 @@ def test_get_groups_single_valued_series(self):
         since the input-series is also single-valued.  This test was created in response to a bug discovered
         by George Walker"""
         pd.testing.assert_frame_equal(
-            pd.DataFrame([(0, "hello")], columns=['group_rep_index', 'group_rep']),
-            group_similar_strings(
-                pd.Series(["hello"]),
-                min_similarity=0.6
-            )
+            pd.DataFrame([(0, "hello")], columns=["group_rep_index", "group_rep"]),
+            group_similar_strings(pd.Series(["hello"]), min_similarity=0.6),
         )
         pd.testing.assert_series_equal(
-            pd.Series(["hello"], name='group_rep'),
+            pd.Series(["hello"], name="group_rep"),
             group_similar_strings(
-                pd.Series(["hello"]),
-                min_similarity=0.6,
-                ignore_index=True
-            )
+                pd.Series(["hello"]), min_similarity=0.6, ignore_index=True
+            ),
         )
         pd.testing.assert_frame_equal(
-            pd.DataFrame([(0, "hello")], columns=['most_similar_index', 'most_similar_master']),
+            pd.DataFrame(
+                [(0, "hello")], columns=["most_similar_index", "most_similar_master"]
+            ),
             match_most_similar(
-                pd.Series(["hello"]),
-                pd.Series(["hello"]),
-                min_similarity=0.6
-            )
+                pd.Series(["hello"]), pd.Series(["hello"]), min_similarity=0.6
+            ),
         )
         pd.testing.assert_series_equal(
-            pd.Series(["hello"], name='most_similar_master'),
+            pd.Series(["hello"], name="most_similar_master"),
             match_most_similar(
                 pd.Series(["hello"]),
                 pd.Series(["hello"]),
                 min_similarity=0.6,
-                ignore_index=True
-            )
+                ignore_index=True,
+            ),
         )
 
     def test_get_groups_single_df_keep_index(self):
@@ -542,10 +719,8 @@ def test_get_groups_single_df_keep_index(self):
         pd.testing.assert_frame_equal(
             simple_example.expected_result_centroid_with_index_col,
             group_similar_strings(
-                customers_df['Customer Name'],
-                min_similarity=0.6,
-                ignore_index=False
-            )
+                customers_df["Customer Name"], min_similarity=0.6, ignore_index=False
+            ),
         )
 
     def test_get_groups_single_df_group_rep_centroid(self):
@@ -556,11 +731,11 @@ def test_get_groups_single_df_group_rep_centroid(self):
         pd.testing.assert_series_equal(
             simple_example.expected_result_first,
             group_similar_strings(
-                customers_df['Customer Name'],
-                group_rep='first',
+                customers_df["Customer Name"],
+                group_rep="first",
                 min_similarity=0.6,
-                ignore_index=True
-            )
+                ignore_index=True,
+            ),
         )
 
     def test_get_groups_single_df_group_rep_bad_option_value(self):
@@ -569,259 +744,307 @@ def test_get_groups_single_df_group_rep_bad_option_value(self):
         customers_df = simple_example.customers_df
         with self.assertRaises(Exception):
             _ = group_similar_strings(
-                    customers_df['Customer Name'],
-                    group_rep='nonsense',
-                    min_similarity=0.6
-                )
+                customers_df["Customer Name"], group_rep="nonsense", min_similarity=0.6
+            )
 
     def test_get_groups_single_df(self):
         """Should return a pd.Series object with the same length as the original df. The series object will contain
         a list of the grouped strings"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1, ignore_index=True)
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.Series(['foooo', 'bar', 'baz', 'foooo'], name='group_rep')
+        expected_result = pd.Series(["foooo", "bar", "baz", "foooo"], name="group_rep")
         pd.testing.assert_series_equal(expected_result, result)
 
     def test_get_groups_1_string_series_1_id_series(self):
         """Should return a pd.DataFrame object with the same length as the original df. The series object will contain
         a list of the grouped strings"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
-        test_series_id_1 = pd.Series(['A0', 'A1', 'A2', 'A3'])
+        test_series_1 = pd.Series(["foooo", "bar", "baz", "foooob"])
+        test_series_id_1 = pd.Series(["A0", "A1", "A2", "A3"])
         sg = StringGrouper(test_series_1, master_id=test_series_id_1, ignore_index=True)
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.DataFrame(list(zip(['A0', 'A1', 'A2', 'A0'], ['foooo', 'bar', 'baz', 'foooo'])),
-                                       columns=['group_rep_id', 'group_rep'])
+        expected_result = pd.DataFrame(
+            list(zip(["A0", "A1", "A2", "A0"], ["foooo", "bar", "baz", "foooo"])),
+            columns=["group_rep_id", "group_rep"],
+        )
         pd.testing.assert_frame_equal(expected_result, result)
 
     def test_get_groups_two_df(self):
         """Should return a pd.Series object with the length of the dupes. The series will contain the master string
         that matches the dupe with the highest similarity"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "bar", "baz"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1, test_series_2, ignore_index=True)
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.Series(['foooo', 'bar', 'baz', 'foooo'], name='most_similar_master')
+        expected_result = pd.Series(
+            ["foooo", "bar", "baz", "foooo"], name="most_similar_master"
+        )
         pd.testing.assert_series_equal(expected_result, result)
 
     def test_get_groups_2_string_series_2_id_series(self):
         """Should return a pd.DataFrame object with the length of the dupes. The series will contain the master string
         that matches the dupe with the highest similarity"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
-        test_series_id_1 = pd.Series(['A0', 'A1', 'A2'])
-        test_series_id_2 = pd.Series(['B0', 'B1', 'B2', 'B3'])
-        sg = StringGrouper(test_series_1,
-                           test_series_2,
-                           master_id=test_series_id_1,
-                           duplicates_id=test_series_id_2,
-                           ignore_index=True)
+        test_series_1 = pd.Series(["foooo", "bar", "baz"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
+        test_series_id_1 = pd.Series(["A0", "A1", "A2"])
+        test_series_id_2 = pd.Series(["B0", "B1", "B2", "B3"])
+        sg = StringGrouper(
+            test_series_1,
+            test_series_2,
+            master_id=test_series_id_1,
+            duplicates_id=test_series_id_2,
+            ignore_index=True,
+        )
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.DataFrame(list(zip(['A0', 'A1', 'A2', 'A0'], ['foooo', 'bar', 'baz', 'foooo'])),
-                                       columns=['most_similar_master_id', 'most_similar_master'])
+        expected_result = pd.DataFrame(
+            list(zip(["A0", "A1", "A2", "A0"], ["foooo", "bar", "baz", "foooo"])),
+            columns=["most_similar_master_id", "most_similar_master"],
+        )
         pd.testing.assert_frame_equal(expected_result, result)
 
-    def test_get_groups_2_string_series_2_numeric_id_series_with_missing_master_value(self):
+    def test_get_groups_2_string_series_2_numeric_id_series_with_missing_master_value(
+        self,
+    ):
         """Should return a pd.DataFrame object with the length of the dupes. The series will contain the master string
         that matches the dupe with the highest similarity"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "bar", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         test_series_id_1 = pd.Series([0, 1, 2])
         test_series_id_2 = pd.Series([100, 101, 102, 103])
-        sg = StringGrouper(test_series_1,
-                           test_series_2,
-                           master_id=test_series_id_1,
-                           duplicates_id=test_series_id_2,
-                           ignore_index=True)
+        sg = StringGrouper(
+            test_series_1,
+            test_series_2,
+            master_id=test_series_id_1,
+            duplicates_id=test_series_id_2,
+            ignore_index=True,
+        )
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.DataFrame(list(zip([0, 1, 102, 0], ['foooo', 'bar', 'baz', 'foooo'])),
-                                       columns=['most_similar_master_id', 'most_similar_master'])
+        expected_result = pd.DataFrame(
+            list(zip([0.0, 1.0, 102.0, 0.0], ["foooo", "bar", "baz", "foooo"])),
+            columns=["most_similar_master_id", "most_similar_master"],
+        )
         pd.testing.assert_frame_equal(expected_result, result)
 
-    def test_get_groups_2_string_series_with_numeric_indexes_and_missing_master_value(self):
+    def test_get_groups_2_string_series_with_numeric_indexes_and_missing_master_value(
+        self,
+    ):
         """Should return a pd.DataFrame object with the length of the dupes. The series will contain the master string
         that matches the dupe with the highest similarity"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'foooo'], index=[0, 1, 2])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'], index=[100, 101, 102, 103])
+        test_series_1 = pd.Series(["foooo", "bar", "foooo"], index=[0, 1, 2])
+        test_series_2 = pd.Series(
+            ["foooo", "bar", "baz", "foooob"], index=[100, 101, 102, 103]
+        )
         sg = StringGrouper(test_series_1, test_series_2, replace_na=True)
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.DataFrame(list(zip([0, 1, 102, 0], ['foooo', 'bar', 'baz', 'foooo'])),
-                                       columns=['most_similar_index', 'most_similar_master'],
-                                       index=test_series_2.index)
+        expected_result = pd.DataFrame(
+            list(zip([0.0, 1.0, 102.0, 0.0], ["foooo", "bar", "baz", "foooo"])),
+            columns=["most_similar_index", "most_similar_master"],
+            index=test_series_2.index,
+        )
         pd.testing.assert_frame_equal(expected_result, result)
 
     def test_get_groups_two_df_same_similarity(self):
         """Should return a pd.Series object with the length of the dupes. If there are two dupes with the same
         similarity, the first one is chosen"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "bar", "baz", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1, test_series_2, ignore_index=True)
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.Series(['foooo', 'bar', 'baz', 'foooo'], name='most_similar_master')
+        expected_result = pd.Series(
+            ["foooo", "bar", "baz", "foooo"], name="most_similar_master"
+        )
         pd.testing.assert_series_equal(expected_result, result)
 
     def test_get_groups_4_df_same_similarity(self):
         """Should return a pd.DataFrame object with the length of the dupes. If there are two dupes with the same
         similarity, the first one is chosen"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
-        test_series_id_1 = pd.Series(['A0', 'A1', 'A2', 'A3'])
-        test_series_id_2 = pd.Series(['B0', 'B1', 'B2', 'B3'])
-        sg = StringGrouper(test_series_1,
-                           test_series_2,
-                           master_id=test_series_id_1,
-                           duplicates_id=test_series_id_2,
-                           ignore_index=True)
+        test_series_1 = pd.Series(["foooo", "bar", "baz", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
+        test_series_id_1 = pd.Series(["A0", "A1", "A2", "A3"])
+        test_series_id_2 = pd.Series(["B0", "B1", "B2", "B3"])
+        sg = StringGrouper(
+            test_series_1,
+            test_series_2,
+            master_id=test_series_id_1,
+            duplicates_id=test_series_id_2,
+            ignore_index=True,
+        )
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.DataFrame(list(zip(['A0', 'A1', 'A2', 'A0'], ['foooo', 'bar', 'baz', 'foooo'])),
-                                       columns=['most_similar_master_id', 'most_similar_master'])
+        expected_result = pd.DataFrame(
+            list(zip(["A0", "A1", "A2", "A0"], ["foooo", "bar", "baz", "foooo"])),
+            columns=["most_similar_master_id", "most_similar_master"],
+        )
         pd.testing.assert_frame_equal(expected_result, result)
 
     def test_get_groups_two_df_no_match(self):
         """Should return a pd.Series object with the length of the dupes. If no match is found in dupes,
         the original will be returned"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foooo', 'dooz', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "bar", "baz"])
+        test_series_2 = pd.Series(["foooo", "dooz", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1, test_series_2, ignore_index=True)
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.Series(['foooo', 'dooz', 'bar', 'baz', 'foooo'], name='most_similar_master')
+        expected_result = pd.Series(
+            ["foooo", "dooz", "bar", "baz", "foooo"], name="most_similar_master"
+        )
         pd.testing.assert_series_equal(expected_result, result)
 
     def test_get_groups_4_df_no_match(self):
         """Should return a pd.DataFrame object with the length of the dupes. If no match is found in dupes,
         the original will be returned"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz'])
-        test_series_2 = pd.Series(['foooo', 'dooz', 'bar', 'baz', 'foooob'])
-        test_series_id_1 = pd.Series(['A0', 'A1', 'A2'])
-        test_series_id_2 = pd.Series(['B0', 'B1', 'B2', 'B3', 'B4'])
-        sg = StringGrouper(test_series_1,
-                           test_series_2,
-                           master_id=test_series_id_1,
-                           duplicates_id=test_series_id_2,
-                           ignore_index=True)
+        test_series_1 = pd.Series(["foooo", "bar", "baz"])
+        test_series_2 = pd.Series(["foooo", "dooz", "bar", "baz", "foooob"])
+        test_series_id_1 = pd.Series(["A0", "A1", "A2"])
+        test_series_id_2 = pd.Series(["B0", "B1", "B2", "B3", "B4"])
+        sg = StringGrouper(
+            test_series_1,
+            test_series_2,
+            master_id=test_series_id_1,
+            duplicates_id=test_series_id_2,
+            ignore_index=True,
+        )
         sg = sg.fit()
         result = sg.get_groups()
-        expected_result = pd.DataFrame(list(zip(
-                ['A0', 'B1', 'A1', 'A2', 'A0'], ['foooo', 'dooz', 'bar', 'baz', 'foooo']
-            )),
-            columns=['most_similar_master_id', 'most_similar_master']
+        expected_result = pd.DataFrame(
+            list(
+                zip(
+                    ["A0", "B1", "A1", "A2", "A0"],
+                    ["foooo", "dooz", "bar", "baz", "foooo"],
+                )
+            ),
+            columns=["most_similar_master_id", "most_similar_master"],
         )
         pd.testing.assert_frame_equal(expected_result, result)
 
     def test_get_groups_raises_exception(self):
         """Should raise an exception if called before the StringGrouper is fit"""
-        test_series_1 = pd.Series(['foooo', 'bar', 'baz', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "bar", "baz", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1, test_series_2)
         with self.assertRaises(StringGrouperNotFitException):
             _ = sg.get_groups()
 
     def test_add_match_raises_exception_if_string_not_present(self):
-        test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "no match", "baz", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1).fit()
         sg2 = StringGrouper(test_series_1, test_series_2).fit()
         with self.assertRaises(ValueError):
-            sg.add_match('doesnt exist', 'baz')
+            sg.add_match("doesnt exist", "baz")
         with self.assertRaises(ValueError):
-            sg.add_match('baz', 'doesnt exist')
+            sg.add_match("baz", "doesnt exist")
         with self.assertRaises(ValueError):
-            sg2.add_match('doesnt exist', 'baz')
+            sg2.add_match("doesnt exist", "baz")
         with self.assertRaises(ValueError):
-            sg2.add_match('baz', 'doesnt exist')
+            sg2.add_match("baz", "doesnt exist")
 
     def test_add_match_single_occurence(self):
         """Should add the match if there are no exact duplicates"""
-        test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "no match", "baz", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1).fit()
-        sg.add_match('no match', 'baz')
+        sg.add_match("no match", "baz")
         matches = sg.get_matches()
-        matches = matches[(matches.left_side == 'no match') & (matches.right_side == 'baz')]
+        matches = matches[
+            (matches.left_side == "no match") & (matches.right_side == "baz")
+        ]
         self.assertEqual(1, matches.shape[0])
         sg2 = StringGrouper(test_series_1, test_series_2).fit()
-        sg2.add_match('no match', 'bar')
+        sg2.add_match("no match", "bar")
         matches = sg2.get_matches()
-        matches = matches[(matches.left_side == 'no match') & (matches.right_side == 'bar')]
+        matches = matches[
+            (matches.left_side == "no match") & (matches.right_side == "bar")
+        ]
         self.assertEqual(1, matches.shape[0])
 
     def test_add_match_single_group_matches_symmetric(self):
         """New matches that are added to a SG with only a master series should be symmetric"""
-        test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooo'])
+        test_series_1 = pd.Series(["foooo", "no match", "baz", "foooo"])
         sg = StringGrouper(test_series_1).fit()
-        sg.add_match('no match', 'baz')
+        sg.add_match("no match", "baz")
         matches = sg.get_matches()
-        matches_1 = matches[(matches.left_side == 'no match') & (matches.right_side == 'baz')]
+        matches_1 = matches[
+            (matches.left_side == "no match") & (matches.right_side == "baz")
+        ]
         self.assertEqual(1, matches_1.shape[0])
-        matches_2 = matches[(matches.left_side == 'baz') & (matches.right_side == 'no match')]
+        matches_2 = matches[
+            (matches.left_side == "baz") & (matches.right_side == "no match")
+        ]
         self.assertEqual(1, matches_2.shape[0])
 
     def test_add_match_multiple_occurences(self):
         """Should add multiple matches if there are exact duplicates"""
-        test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooo'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "no match", "baz", "foooo"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1, test_series_2).fit()
-        sg.add_match('foooo', 'baz')
+        sg.add_match("foooo", "baz")
         matches = sg.get_matches()
-        matches = matches[(matches.left_side == 'foooo') & (matches.right_side == 'baz')]
+        matches = matches[
+            (matches.left_side == "foooo") & (matches.right_side == "baz")
+        ]
         self.assertEqual(2, matches.shape[0])
 
     def test_remove_match(self):
         """Should remove a match"""
-        test_series_1 = pd.Series(['foooo', 'no match', 'baz', 'foooob'])
-        test_series_2 = pd.Series(['foooo', 'bar', 'baz', 'foooob'])
+        test_series_1 = pd.Series(["foooo", "no match", "baz", "foooob"])
+        test_series_2 = pd.Series(["foooo", "bar", "baz", "foooob"])
         sg = StringGrouper(test_series_1).fit()
-        sg.remove_match('foooo', 'foooob')
+        sg.remove_match("foooo", "foooob")
         matches = sg.get_matches()
-        matches_1 = matches[(matches.left_side == 'foooo') & (matches.right_side == 'foooob')]
+        matches_1 = matches[
+            (matches.left_side == "foooo") & (matches.right_side == "foooob")
+        ]
         # In the case of only a master series, the matches are recursive, so both variants are to be removed
-        matches_2 = matches[(matches.left_side == 'foooob') & (matches.right_side == 'foooo')]
+        matches_2 = matches[
+            (matches.left_side == "foooob") & (matches.right_side == "foooo")
+        ]
         self.assertEqual(0, matches_1.shape[0])
         self.assertEqual(0, matches_2.shape[0])
 
         sg2 = StringGrouper(test_series_1, test_series_2).fit()
-        sg2.remove_match('foooo', 'foooob')
+        sg2.remove_match("foooo", "foooob")
         matches = sg2.get_matches()
-        matches = matches[(matches.left_side == 'foooo') & (matches.right_side == 'foooob')]
+        matches = matches[
+            (matches.left_side == "foooo") & (matches.right_side == "foooob")
+        ]
         self.assertEqual(0, matches.shape[0])
 
     def test_string_grouper_type_error(self):
         """StringGrouper should raise an typeerror master or duplicates are not a series of strings"""
         with self.assertRaises(TypeError):
-            _ = StringGrouper('foo', 'bar')
+            _ = StringGrouper("foo", "bar")
         with self.assertRaises(TypeError):
-            _ = StringGrouper(pd.Series(['foo', 'bar']), pd.Series(['foo', 1]))
+            _ = StringGrouper(pd.Series(["foo", "bar"]), pd.Series(["foo", 1]))
         with self.assertRaises(TypeError):
-            _ = StringGrouper(pd.Series(['foo', np.nan]), pd.Series(['foo', 'j']))
+            _ = StringGrouper(pd.Series(["foo", np.nan]), pd.Series(["foo", "j"]))
 
     def test_prior_matches_added(self):
         """When a new match is added, any pre-existing matches should also be updated"""
         sample = [
-            'microsoftoffice 365 home',
-            'microsoftoffice 365 pers',
-            'microsoft office'
-            ]
+            "microsoftoffice 365 home",
+            "microsoftoffice 365 pers",
+            "microsoft office",
+        ]
 
-        df = pd.DataFrame(sample, columns=['name'])
+        df = pd.DataFrame(sample, columns=["name"])
 
-        sg = StringGrouper(df['name'], ignore_index=True)
+        sg = StringGrouper(df["name"], ignore_index=True)
         sg = sg.fit()
 
-        sg = sg.add_match('microsoft office', 'microsoftoffice 365 home')
-        sg = sg.add_match('microsoftoffice 365 pers', 'microsoft office')
-        df['deduped'] = sg.get_groups()
+        sg = sg.add_match("microsoft office", "microsoftoffice 365 home")
+        sg = sg.add_match("microsoftoffice 365 pers", "microsoft office")
+        df["deduped"] = sg.get_groups()
         # All strings should now match to the same "master" string
         self.assertEqual(1, len(df.deduped.unique()))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/string_grouper_utils/__init__.py b/string_grouper_utils/__init__.py
index 3ab821f..9f7ee83 100644
--- a/string_grouper_utils/__init__.py
+++ b/string_grouper_utils/__init__.py
@@ -1,2 +1,5 @@
-from .string_grouper_utils import new_group_rep_by_earliest_timestamp, new_group_rep_by_completeness, \
-    new_group_rep_by_highest_weight
+from .string_grouper_utils import (
+    new_group_rep_by_earliest_timestamp,
+    new_group_rep_by_completeness,
+    new_group_rep_by_highest_weight,
+)
diff --git a/string_grouper_utils/string_grouper_utils.py b/string_grouper_utils/string_grouper_utils.py
index e674367..79eab59 100644
--- a/string_grouper_utils/string_grouper_utils.py
+++ b/string_grouper_utils/string_grouper_utils.py
@@ -8,13 +8,15 @@
 import pydoc
 
 
-def new_group_rep_by_earliest_timestamp(grouped_data: pd.DataFrame,
-                                        group_col: Union[str, int],
-                                        record_id_col: Union[str, int],
-                                        timestamps: Union[pd.Series, str, int],
-                                        record_name_col: Optional[Union[str, int]] = None,
-                                        parserinfo=None,
-                                        **kwargs) -> Union[pd.DataFrame, pd.Series]:
+def new_group_rep_by_earliest_timestamp(
+    grouped_data: pd.DataFrame,
+    group_col: Union[str, int],
+    record_id_col: Union[str, int],
+    timestamps: Union[pd.Series, str, int],
+    record_name_col: Optional[Union[str, int]] = None,
+    parserinfo=None,
+    **kwargs
+) -> Union[pd.DataFrame, pd.Series]:
     """
     Selects the oldest string in each group as group-representative.
     :param grouped_data: The grouped DataFrame
@@ -34,19 +36,24 @@ def new_group_rep_by_earliest_timestamp(grouped_data: pd.DataFrame,
     """
     if isinstance(timestamps, pd.Series):
         if len(grouped_data) != len(timestamps):
-            raise Exception('Both grouped_data and timestamps must be pandas.Series of the same length.')
+            raise Exception(
+                "Both grouped_data and timestamps must be pandas.Series of the same length."
+            )
     else:
         timestamps = get_column(timestamps, grouped_data)
     weights = parse_timestamps(timestamps, parserinfo, **kwargs)
-    return group_rep_transform('idxmin', weights, grouped_data, group_col, record_id_col, record_name_col)
-
-
-def new_group_rep_by_completeness(grouped_data: pd.DataFrame,
-                                  group_col: Union[str, int],
-                                  record_id_col: Union[str, int],
-                                  record_name_col: Optional[Union[str, int]] = None,
-                                  tested_cols: Optional[Union[pd.DataFrame, List[Union[str, int]]]] = None
-                                  ) -> Union[pd.DataFrame, pd.Series]:
+    return group_rep_transform(
+        "idxmin", weights, grouped_data, group_col, record_id_col, record_name_col
+    )
+
+
+def new_group_rep_by_completeness(
+    grouped_data: pd.DataFrame,
+    group_col: Union[str, int],
+    record_id_col: Union[str, int],
+    record_name_col: Optional[Union[str, int]] = None,
+    tested_cols: Optional[Union[pd.DataFrame, List[Union[str, int]]]] = None,
+) -> Union[pd.DataFrame, pd.Series]:
     """
     Selects the string in the group with the most filled-in row/record as group-representative.
     :param grouped_data: The grouped DataFrame
@@ -62,28 +69,33 @@ def new_group_rep_by_completeness(grouped_data: pd.DataFrame,
     """
     if isinstance(tested_cols, pd.DataFrame):
         if len(grouped_data) != len(tested_cols):
-            raise Exception('Both grouped_data and tested_cols must be pandas.DataFrame of the same length.')
+            raise Exception(
+                "Both grouped_data and tested_cols must be pandas.DataFrame of the same length."
+            )
     elif tested_cols is not None:
         tested_cols = get_column(tested_cols, grouped_data)
     else:
         tested_cols = grouped_data
 
     def is_notnull_and_not_empty(x):
-        if x == '' or pd.isnull(x):
+        if x == "" or pd.isnull(x):
             return 0
         else:
             return 1
 
-    weights = tested_cols.applymap(is_notnull_and_not_empty).sum(axis=1)
-    return group_rep_transform('idxmax', weights, grouped_data, group_col, record_id_col, record_name_col)
+    weights = tested_cols.map(is_notnull_and_not_empty).sum(axis=1)
+    return group_rep_transform(
+        "idxmax", weights, grouped_data, group_col, record_id_col, record_name_col
+    )
 
 
-def new_group_rep_by_highest_weight(grouped_data: pd.DataFrame,
-                                    group_col: Union[str, int],
-                                    record_id_col: Union[str, int],
-                                    weights: Union[pd.Series, str, int],
-                                    record_name_col: Optional[Union[str, int]] = None,
-                                    ) -> Union[pd.DataFrame, pd.Series]:
+def new_group_rep_by_highest_weight(
+    grouped_data: pd.DataFrame,
+    group_col: Union[str, int],
+    record_id_col: Union[str, int],
+    weights: Union[pd.Series, str, int],
+    record_name_col: Optional[Union[str, int]] = None,
+) -> Union[pd.DataFrame, pd.Series]:
     """
     Selects the string in the group with the largest weight as group-representative.
     :param grouped_data: The grouped DataFrame
@@ -97,31 +109,50 @@ def new_group_rep_by_highest_weight(grouped_data: pd.DataFrame,
     """
     if isinstance(weights, pd.Series):
         if len(grouped_data) != len(weights):
-            raise Exception('Both grouped_data and weights must be pandas.Series of the same length.')
+            raise Exception(
+                "Both grouped_data and weights must be pandas.Series of the same length."
+            )
     else:
         weights = get_column(weights, grouped_data)
-    return group_rep_transform('idxmax', weights, grouped_data, group_col, record_id_col, record_name_col)
-
-
-def group_rep_transform(method: str,
-                        weights: pd.Series,
-                        grouped_data,
-                        group_col,
-                        record_id_col,
-                        record_name_col) -> Union[pd.Series, pd.DataFrame]:
+    return group_rep_transform(
+        "idxmax", weights, grouped_data, group_col, record_id_col, record_name_col
+    )
+
+
+def group_rep_transform(
+    method: str,
+    weights: pd.Series,
+    grouped_data,
+    group_col,
+    record_id_col,
+    record_name_col,
+) -> Union[pd.Series, pd.DataFrame]:
     stashed_index = grouped_data.index
     group_of_master_id = get_column(group_col, grouped_data).reset_index(drop=True)
-    group_of_master_id = group_of_master_id.rename('raw_group_id').reset_index().rename(columns={'index': 'weight'})
-    group_of_master_id['weight'] = weights.reset_index(drop=True)
-    group_of_master_id['group_rep'] = \
-        group_of_master_id.groupby('raw_group_id', sort=False)['weight'].transform(method)
+    group_of_master_id = (
+        group_of_master_id.rename("raw_group_id")
+        .reset_index()
+        .rename(columns={"index": "weight"})
+    )
+    group_of_master_id["weight"] = weights.reset_index(drop=True)
+    group_of_master_id["group_rep"] = group_of_master_id.groupby(
+        "raw_group_id", sort=False
+    )["weight"].transform(method)
     record_id_col = get_column(record_id_col, grouped_data)
-    new_rep = record_id_col.iloc[group_of_master_id.group_rep].reset_index(drop=True).rename(None)
+    new_rep = (
+        record_id_col.iloc[group_of_master_id.group_rep]
+        .reset_index(drop=True)
+        .rename(None)
+    )
     if record_name_col is None:
         output = new_rep
     else:
         record_name_col = get_column(record_name_col, grouped_data)
-        new_rep_name = record_name_col.iloc[group_of_master_id.group_rep].reset_index(drop=True).rename(None)
+        new_rep_name = (
+            record_name_col.iloc[group_of_master_id.group_rep]
+            .reset_index(drop=True)
+            .rename(None)
+        )
         output = pd.concat([new_rep, new_rep_name], axis=1)
     output.index = stashed_index
     return output
@@ -141,10 +172,12 @@ def parse_timestamps(timestamps: pd.Series, parserinfo=None, **kwargs) -> pd.Ser
     error_msg += " or datetime datatype or pandas Timestamp datatype or numbers"
     if is_series_of_type(str, timestamps):
         # if any of the strings is not datetime-like raise an exception
-        if timestamps.to_frame().applymap(is_date).squeeze().all():
+        if timestamps.to_frame().map(is_date).squeeze().all():
             # convert strings to numpy datetime64
-            return timestamps.transform(lambda x: parse(x, parserinfo, **kwargs).astimezone(UTC))
-    elif is_series_of_type(type(pd.Timestamp('15-1-2000')), timestamps):
+            return timestamps.transform(
+                lambda x: parse(x, parserinfo, **kwargs).astimezone(UTC)
+            )
+    elif is_series_of_type(type(pd.Timestamp("15-1-2000")), timestamps):
         # convert pandas Timestamps to numpy datetime64
         return timestamps.transform(lambda x: x.to_numpy())
     elif is_series_of_type(datetime, timestamps):
@@ -172,20 +205,19 @@ def is_date(string, parserinfo=None, **kwargs):
 
 
 def is_series_of_type(what: type, series_to_test: pd.Series) -> bool:
-    if series_to_test.to_frame().applymap(
-                lambda x: not isinstance(x, what)
-            ).squeeze().any():
+    if series_to_test.to_frame().map(lambda x: not isinstance(x, what)).squeeze().any():
         return False
     return True
 
 
 # The following lines modify and append the kwargs portion of the docstring of dateutil.parser.parse to
 # the docstring of new_group_rep_by_earliest_timestamp:
-parse_docstring_kwargs = re.search(':param parserinfo:.*?:return:', pydoc.render_doc(parse), flags=re.DOTALL).group(0)
+parse_docstring_kwargs = re.search(
+    ":param parserinfo:.*?:return:", pydoc.render_doc(parse), flags=re.DOTALL
+).group(0)
 parse_docstring_kwargs = re.sub(
-    '``timestr``',
-    'the strings containing the date/time-stamps',
-    parse_docstring_kwargs
+    "``timestr``", "the strings containing the date/time-stamps", parse_docstring_kwargs
+)
+new_group_rep_by_earliest_timestamp.__doc__ = (
+    new_group_rep_by_earliest_timestamp.__doc__ + parse_docstring_kwargs[:-9]
 )
-new_group_rep_by_earliest_timestamp.__doc__ = new_group_rep_by_earliest_timestamp.__doc__ + \
-    parse_docstring_kwargs[:-9]
diff --git a/string_grouper_utils/test/test_string_grouper_utils.py b/string_grouper_utils/test/test_string_grouper_utils.py
index 0c8a8ee..abb822e 100644
--- a/string_grouper_utils/test/test_string_grouper_utils.py
+++ b/string_grouper_utils/test/test_string_grouper_utils.py
@@ -1,83 +1,149 @@
 import unittest
 import pandas as pd
 from dateutil.parser import parse
-from string_grouper_utils.string_grouper_utils import new_group_rep_by_earliest_timestamp, \
-    new_group_rep_by_completeness, new_group_rep_by_highest_weight
+from string_grouper_utils.string_grouper_utils import (
+    new_group_rep_by_earliest_timestamp,
+    new_group_rep_by_completeness,
+    new_group_rep_by_highest_weight,
+)
 
 
 class SimpleExample(object):
     def __init__(self):
         self.customers_df = pd.DataFrame(
             [
-                ('BB016741P', 'Mega Enterprises Corporation', 'Address0', 'Tel0', 'Description0', 0.2,
-                    '2014-12-30 10:55:00-02:00', 'EE059082Q', 'Mega Enterprises Corp.'),
-                ('CC082744L', 'Hyper Startup Incorporated', '', 'Tel1', '', 0.5, '2017-01-01 20:23:15-05:00',
-                 'BB099931J', 'Hyper-Startup Inc.'),
-                ('AA098762D', 'Hyper Startup Inc.', 'Address2', 'Tel2', 'Description2', 0.3,
-                    '2020-10-20 15:29:30+02:00', 'BB099931J', 'Hyper-Startup Inc.'),
-                ('BB099931J', 'Hyper-Startup Inc.', 'Address3', 'Tel3', 'Description3', 0.1,
-                    '2013-07-01 03:34:45-05:00', 'BB099931J', 'Hyper-Startup Inc.'),
-                ('HH072982K', 'Hyper Hyper Inc.', 'Address4', '', 'Description4', 0.9, '2005-09-11 11:56:00-07:00',
-                    'HH072982K', 'Hyper Hyper Inc.'),
-                ('EE059082Q', 'Mega Enterprises Corp.', 'Address5', 'Tel5', 'Description5', 1.0,
-                    '1998-04-14 09:21:11+00:00', 'EE059082Q', 'Mega Enterprises Corp.')
+                (
+                    "BB016741P",
+                    "Mega Enterprises Corporation",
+                    "Address0",
+                    "Tel0",
+                    "Description0",
+                    0.2,
+                    "2014-12-30 10:55:00-02:00",
+                    "EE059082Q",
+                    "Mega Enterprises Corp.",
+                ),
+                (
+                    "CC082744L",
+                    "Hyper Startup Incorporated",
+                    "",
+                    "Tel1",
+                    "",
+                    0.5,
+                    "2017-01-01 20:23:15-05:00",
+                    "BB099931J",
+                    "Hyper-Startup Inc.",
+                ),
+                (
+                    "AA098762D",
+                    "Hyper Startup Inc.",
+                    "Address2",
+                    "Tel2",
+                    "Description2",
+                    0.3,
+                    "2020-10-20 15:29:30+02:00",
+                    "BB099931J",
+                    "Hyper-Startup Inc.",
+                ),
+                (
+                    "BB099931J",
+                    "Hyper-Startup Inc.",
+                    "Address3",
+                    "Tel3",
+                    "Description3",
+                    0.1,
+                    "2013-07-01 03:34:45-05:00",
+                    "BB099931J",
+                    "Hyper-Startup Inc.",
+                ),
+                (
+                    "HH072982K",
+                    "Hyper Hyper Inc.",
+                    "Address4",
+                    "",
+                    "Description4",
+                    0.9,
+                    "2005-09-11 11:56:00-07:00",
+                    "HH072982K",
+                    "Hyper Hyper Inc.",
+                ),
+                (
+                    "EE059082Q",
+                    "Mega Enterprises Corp.",
+                    "Address5",
+                    "Tel5",
+                    "Description5",
+                    1.0,
+                    "1998-04-14 09:21:11+00:00",
+                    "EE059082Q",
+                    "Mega Enterprises Corp.",
+                ),
             ],
-            columns=('Customer ID', 'Customer Name', 'Address', 'Tel', 'Description', 'weight', 'timestamp',
-                     'group ID', 'group name')
+            columns=(
+                "Customer ID",
+                "Customer Name",
+                "Address",
+                "Tel",
+                "Description",
+                "weight",
+                "timestamp",
+                "group ID",
+                "group name",
+            ),
         )
         # new_group_rep_by_earliest_timestamp(customers_df, 'group ID', 'Customer ID', 'timestamp')
         self.expected_result_TS = pd.Series(
             [
-                'EE059082Q',
-                'BB099931J',
-                'BB099931J',
-                'BB099931J',
-                'HH072982K',
-                'EE059082Q',
+                "EE059082Q",
+                "BB099931J",
+                "BB099931J",
+                "BB099931J",
+                "HH072982K",
+                "EE059082Q",
             ]
         )
         # new_group_rep_by_earliest_timestamp(customers_df, 'group ID', 'Customer ID', 'timestamp', 'Customer Name')
         self.expected_result_T = pd.DataFrame(
             [
-                ('EE059082Q', 'Mega Enterprises Corp.'),
-                ('BB099931J', 'Hyper-Startup Inc.'),
-                ('BB099931J', 'Hyper-Startup Inc.'),
-                ('BB099931J', 'Hyper-Startup Inc.'),
-                ('HH072982K', 'Hyper Hyper Inc.'),
-                ('EE059082Q', 'Mega Enterprises Corp.')
+                ("EE059082Q", "Mega Enterprises Corp."),
+                ("BB099931J", "Hyper-Startup Inc."),
+                ("BB099931J", "Hyper-Startup Inc."),
+                ("BB099931J", "Hyper-Startup Inc."),
+                ("HH072982K", "Hyper Hyper Inc."),
+                ("EE059082Q", "Mega Enterprises Corp."),
             ]
         )
         # new_group_rep_by_earliest_timestamp(customers_df, 'group ID', 'Customer ID', 'weight', 'Customer Name')
         self.expected_result_TW = pd.DataFrame(
             [
-                ('BB016741P', 'Mega Enterprises Corporation'),
-                ('BB099931J', 'Hyper-Startup Inc.'),
-                ('BB099931J', 'Hyper-Startup Inc.'),
-                ('BB099931J', 'Hyper-Startup Inc.'),
-                ('HH072982K', 'Hyper Hyper Inc.'),
-                ('BB016741P', 'Mega Enterprises Corporation')
+                ("BB016741P", "Mega Enterprises Corporation"),
+                ("BB099931J", "Hyper-Startup Inc."),
+                ("BB099931J", "Hyper-Startup Inc."),
+                ("BB099931J", "Hyper-Startup Inc."),
+                ("HH072982K", "Hyper Hyper Inc."),
+                ("BB016741P", "Mega Enterprises Corporation"),
             ]
         )
         # new_group_rep_by_highest_weight(customers_df, 'group ID', 'Customer ID', 'weight', 'Customer Name')
         self.expected_result_W = pd.DataFrame(
             [
-                ('EE059082Q', 'Mega Enterprises Corp.'),
-                ('CC082744L', 'Hyper Startup Incorporated'),
-                ('CC082744L', 'Hyper Startup Incorporated'),
-                ('CC082744L', 'Hyper Startup Incorporated'),
-                ('HH072982K', 'Hyper Hyper Inc.'),
-                ('EE059082Q', 'Mega Enterprises Corp.')
+                ("EE059082Q", "Mega Enterprises Corp."),
+                ("CC082744L", "Hyper Startup Incorporated"),
+                ("CC082744L", "Hyper Startup Incorporated"),
+                ("CC082744L", "Hyper Startup Incorporated"),
+                ("HH072982K", "Hyper Hyper Inc."),
+                ("EE059082Q", "Mega Enterprises Corp."),
             ]
         )
         # new_group_rep_by_highest_weight(customers_df, 'group ID', 'Customer ID', 'weight', 'Customer Name')
         self.expected_result_C = pd.DataFrame(
             [
-                ('BB016741P', 'Mega Enterprises Corporation'),
-                ('AA098762D', 'Hyper Startup Inc.'),
-                ('AA098762D', 'Hyper Startup Inc.'),
-                ('AA098762D', 'Hyper Startup Inc.'),
-                ('HH072982K', 'Hyper Hyper Inc.'),
-                ('BB016741P', 'Mega Enterprises Corporation')
+                ("BB016741P", "Mega Enterprises Corporation"),
+                ("AA098762D", "Hyper Startup Inc."),
+                ("AA098762D", "Hyper Startup Inc."),
+                ("AA098762D", "Hyper Startup Inc."),
+                ("HH072982K", "Hyper Hyper Inc."),
+                ("BB016741P", "Mega Enterprises Corporation"),
             ]
         )
 
@@ -85,49 +151,45 @@ def __init__(self):
 class StringGrouperUtilTest(unittest.TestCase):
     def test_group_rep_by_timestamp_return_series(self):
         """Should return a pd.Series object with the same length as the grouped_data. The series object will contain
-        a list of groups whose group-representatives have the earliest timestamp of the group"""
+        a list of groups whose group-representatives have the earliest timestamp of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_series_equal(
             simple_example.expected_result_TS,
             new_group_rep_by_earliest_timestamp(
-                customers_df,
-                'group ID',
-                'Customer ID',
-                'timestamp'
-            )
+                customers_df, "group ID", "Customer ID", "timestamp"
+            ),
         )
 
     def test_group_rep_by_timestamp_return_dataframe(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the earliest timestamp of the group"""
+        a list of groups whose group-representatives have the earliest timestamp of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_T,
             new_group_rep_by_earliest_timestamp(
-                customers_df,
-                'group ID',
-                'Customer ID',
-                'timestamp',
-                'Customer Name'
-            )
+                customers_df, "group ID", "Customer ID", "timestamp", "Customer Name"
+            ),
         )
 
     def test_group_rep_by_timestamp_series_input(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the earliest timestamp of the group"""
+        a list of groups whose group-representatives have the earliest timestamp of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_T,
             new_group_rep_by_earliest_timestamp(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                customers_df['timestamp'],
-                'Customer Name'
-            )
+                "group ID",
+                "Customer ID",
+                customers_df["timestamp"],
+                "Customer Name",
+            ),
         )
 
     def test_group_rep_by_timestamp_input_series_length(self):
@@ -137,10 +199,10 @@ def test_group_rep_by_timestamp_input_series_length(self):
         with self.assertRaises(Exception):
             _ = new_group_rep_by_earliest_timestamp(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                customers_df['timestamp'].iloc[:-2],
-                'Customer Name'
+                "group ID",
+                "Customer ID",
+                customers_df["timestamp"].iloc[:-2],
+                "Customer Name",
             )
 
     def test_group_rep_by_timestamp_bad_input_timestamp_strings(self):
@@ -150,46 +212,52 @@ def test_group_rep_by_timestamp_bad_input_timestamp_strings(self):
         with self.assertRaises(Exception):
             _ = new_group_rep_by_earliest_timestamp(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                customers_df['Customer ID'],
-                'Customer Name'
+                "group ID",
+                "Customer ID",
+                customers_df["Customer ID"],
+                "Customer Name",
             )
 
     def test_group_rep_by_timestamp_pandas_timestamps(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the earliest timestamp of the group"""
+        a list of groups whose group-representatives have the earliest timestamp of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         customers_df2 = customers_df.copy()
-        customers_df2['timestamp'] = customers_df2['timestamp'].transform(lambda t: pd.Timestamp(t))
+        customers_df2["timestamp"] = customers_df2["timestamp"].transform(
+            lambda t: pd.Timestamp(t)
+        )
         pd.testing.assert_frame_equal(
             simple_example.expected_result_T,
             new_group_rep_by_earliest_timestamp(
                 customers_df2,
-                'group ID',
-                'Customer ID',
-                customers_df2['timestamp'],
-                'Customer Name'
-            )
+                "group ID",
+                "Customer ID",
+                customers_df2["timestamp"],
+                "Customer Name",
+            ),
         )
 
     def test_group_rep_by_timestamp_dateutil_timestamps(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the earliest timestamp of the group"""
+        a list of groups whose group-representatives have the earliest timestamp of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         customers_df2 = customers_df.copy()
-        customers_df2['timestamp'] = customers_df2['timestamp'].transform(lambda t: parse(t))
+        customers_df2["timestamp"] = customers_df2["timestamp"].transform(
+            lambda t: parse(t)
+        )
         pd.testing.assert_frame_equal(
             simple_example.expected_result_T,
             new_group_rep_by_earliest_timestamp(
                 customers_df2,
-                'group ID',
-                'Customer ID',
-                customers_df2['timestamp'],
-                'Customer Name'
-            )
+                "group ID",
+                "Customer ID",
+                customers_df2["timestamp"],
+                "Customer Name",
+            ),
         )
 
     def test_group_rep_by_timestamp_bad_nonstring_timestamps(self):
@@ -197,62 +265,61 @@ def test_group_rep_by_timestamp_bad_nonstring_timestamps(self):
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         customers_df2 = customers_df.copy()
-        customers_df2.at[0, 'timestamp'] = 1.0
+        customers_df2.at[0, "timestamp"] = 1.0
         with self.assertRaises(Exception):
             _ = new_group_rep_by_earliest_timestamp(
                 customers_df2,
-                'group ID',
-                'Customer ID',
-                customers_df2['timestamp'],
-                'Customer Name'
+                "group ID",
+                "Customer ID",
+                customers_df2["timestamp"],
+                "Customer Name",
             )
 
     def test_group_rep_by_timestamp_input_numbers(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the earliest timestamp of the group"""
+        a list of groups whose group-representatives have the earliest timestamp of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_TW,
             new_group_rep_by_earliest_timestamp(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                customers_df['weight'],
-                'Customer Name'
-            )
+                "group ID",
+                "Customer ID",
+                customers_df["weight"],
+                "Customer Name",
+            ),
         )
 
     def test_group_rep_by_weight(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the highest weight of the group"""
+        a list of groups whose group-representatives have the highest weight of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_W,
             new_group_rep_by_highest_weight(
-                customers_df,
-                'group ID',
-                'Customer ID',
-                'weight',
-                'Customer Name'
-            )
+                customers_df, "group ID", "Customer ID", "weight", "Customer Name"
+            ),
         )
 
     def test_group_rep_by_weight_input_series(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the highest weight of the group"""
+        a list of groups whose group-representatives have the highest weight of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_W,
             new_group_rep_by_highest_weight(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                customers_df['weight'],
-                'Customer Name'
-            )
+                "group ID",
+                "Customer ID",
+                customers_df["weight"],
+                "Customer Name",
+            ),
         )
 
     def test_group_rep_by_weight_input_series_length(self):
@@ -262,57 +329,49 @@ def test_group_rep_by_weight_input_series_length(self):
         with self.assertRaises(Exception):
             _ = new_group_rep_by_highest_weight(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                customers_df['weight'].iloc[:-2],
-                'Customer Name'
+                "group ID",
+                "Customer ID",
+                customers_df["weight"].iloc[:-2],
+                "Customer Name",
             )
 
     def test_group_rep_by_completeness_column_list(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the most filled-in records of the group"""
+        a list of groups whose group-representatives have the most filled-in records of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_C,
             new_group_rep_by_completeness(
-                customers_df,
-                'group ID',
-                'Customer ID',
-                'Customer Name',
-                [1, 2, 3, 4]
-            )
+                customers_df, "group ID", "Customer ID", "Customer Name", [1, 2, 3, 4]
+            ),
         )
 
     def test_group_rep_by_completeness_no_columns(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the most filled-in records of the group"""
+        a list of groups whose group-representatives have the most filled-in records of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_C,
             new_group_rep_by_completeness(
-                customers_df,
-                'group ID',
-                'Customer ID',
-                'Customer Name'
-            )
+                customers_df, "group ID", "Customer ID", "Customer Name"
+            ),
         )
 
     def test_group_rep_by_completeness_input_dataframe(self):
         """Should return a pd.DataFrame object with the same length as the grouped_data. The DataFrame object will contain
-        a list of groups whose group-representatives have the most filled-in records of the group"""
+        a list of groups whose group-representatives have the most filled-in records of the group
+        """
         simple_example = SimpleExample()
         customers_df = simple_example.customers_df
         pd.testing.assert_frame_equal(
             simple_example.expected_result_C,
             new_group_rep_by_completeness(
-                customers_df,
-                'group ID',
-                'Customer ID',
-                'Customer Name',
-                customers_df
-            )
+                customers_df, "group ID", "Customer ID", "Customer Name", customers_df
+            ),
         )
 
     def test_group_rep_by_completeness_input_dataframe_length(self):
@@ -322,12 +381,12 @@ def test_group_rep_by_completeness_input_dataframe_length(self):
         with self.assertRaises(Exception):
             _ = new_group_rep_by_completeness(
                 customers_df,
-                'group ID',
-                'Customer ID',
-                'Customer Name',
-                customers_df.iloc[:-2, :]
+                "group ID",
+                "Customer ID",
+                "Customer Name",
+                customers_df.iloc[:-2, :],
             )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()