pyjanitor-devs · samukweku · Sep 14, 2024 · Sep 6, 2024 · Sep 7, 2024 · Sep 7, 2024
diff --git a/examples/notebooks/Pivoting Data from Wide to Long.ipynb b/examples/notebooks/Pivoting Data from Wide to Long.ipynb
diff --git a/examples/notebooks/anime.ipynb b/examples/notebooks/anime.ipynb
@@ -73,7 +73,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "filename = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-23/raw_anime.csv'\n",
+    "filename = \"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-23/raw_anime.csv\"\n",
     "df = pd.read_csv(filename)\n",
     "\n",
     "\n",
@@ -124,10 +124,11 @@
     "    stop: int = None,\n",
     "    pat: str = \" \",\n",
     "    *args,\n",
-    "    **kwargs\n",
+    "    **kwargs,\n",
     "):\n",
     "    \"\"\"\n",
-    "    Wrapper around `df.str.split` with additional `start` and `end` arguments\n",
+    "    Wrapper around `df.str.split`\n",
+    "    with additional `start` and `end` arguments\n",
     "    to select a slice of the list of words.\n",
     "    \"\"\"\n",
     "\n",
@@ -148,7 +149,12 @@
     "\n",
     "@pf.register_dataframe_method\n",
     "def str_slice(\n",
-    "    df, column_name: str, start: int = None, stop: int = None, *args, **kwargs\n",
+    "    df, \n",
+    "    column_name: str,\n",
+    "    start: int = None, \n",
+    "    stop: int = None, \n",
+    "    *args, \n",
+    "    **kwargs\n",
     "):\n",
     "    \"\"\"\n",
     "    Wrapper around `df.str.slice\n",
@@ -175,7 +181,9 @@
     "    .str_word(column_name=\"aired\", start=0, stop=2, pat=\",\")\n",
     "    .str_join(column_name=\"aired\", sep=\",\")\n",
     "    .deconcatenate_column(\n",
-    "        column_name=\"aired\", new_column_names=[\"start_date\", \"end_date\"], sep=\",\"\n",
+    "        column_name=\"aired\",\n",
+    "        new_column_names=[\"start_date\", \"end_date\"],\n",
+    "        sep=\",\",\n",
     "    )\n",
     "    .remove_columns(column_names=[\"aired\"])\n",
     "    .str_remove(column_name=\"start_date\", pat=\"'\")\n",
@@ -858,19 +866,22 @@
    "outputs": [],
    "source": [
     "@pf.register_dataframe_method\n",
-    "def str_remove(df, column_name: str, pat: str, *args, **kwargs):\n",
+    "def str_remove(df, column_name: str, pat: str, *args, **kwargs):  # noqa: F811\n",
     "    \"\"\"\n",
     "    Wrapper around df.str.replace\n",
-    "    The function will loop through regex patterns and remove them from the desired column.\n",
+    "    The function will loop through regex patterns \n",
+    "    and remove them from the desired column.\n",
     "\n",
     "    :param df: A pandas DataFrame.\n",
-    "    :param column_name: A `str` indicating which column the string removal action is to be made.\n",
+    "    :param column_name: A `str` indicating which column \n",
+    "        the string removal action is to be made.\n",
     "    :param pat: A regex pattern to match and remove.\n",
     "    \"\"\"\n",
     "\n",
     "    if not isinstance(pat, str):\n",
     "        raise TypeError(\n",
-    "            f\"Pattern should be a valid regex pattern. Received pattern: {pat} with dtype: {type(pat)}\"\n",
+    "            f\"Pattern should be a valid regex pattern. \"\n",
+    "            f\"Received pattern: {pat} with dtype: {type(pat)}\"\n",
     "        )\n",
     "    df[column_name] = df[column_name].str.replace(pat, \"\", *args, **kwargs)\n",
     "    return df"
@@ -939,13 +950,17 @@
    "outputs": [],
    "source": [
     "@pf.register_dataframe_method\n",
-    "def explode(df: pd.DataFrame, column_name: str, sep: str):\n",
+    "def explode(df: pd.DataFrame, column_name: str, sep: str):  # noqa: F811\n",
     "    \"\"\"\n",
-    "    For rows with a list of values, this function will create new rows for each value in the list\n",
+    "    For rows with a list of values,\n",
+    "    this function will create new rows\n",
+    "    for each value in the list\n",
     "\n",
     "    :param df: A pandas DataFrame.\n",
-    "    :param column_name: A `str` indicating which column the string removal action is to be made.\n",
-    "    :param sep: The delimiter. Example delimiters include `|`, `, `, `,` etc.\n",
+    "    :param column_name: A `str` indicating which column\n",
+    "        the string removal action is to be made.\n",
+    "    :param sep: The delimiter.\n",
+    "        Example delimiters include `|`, `, `, `,` etc.\n",
     "    \"\"\"\n",
     "\n",
     "    df[\"id\"] = df.index\n",
@@ -1046,7 +1061,7 @@
    "outputs": [],
    "source": [
     "@pf.register_dataframe_method\n",
-    "def str_trim(df, column_name: str, *args, **kwargs):\n",
+    "def str_trim(df, column_name: str, *args, **kwargs):  #noqa: F811\n",
     "    \"\"\"Remove trailing and leading characters, in a given column\"\"\"\n",
     "    df[column_name] = df[column_name].str.strip(*args, **kwargs)\n",
     "    return df"
@@ -1300,54 +1315,60 @@
    "outputs": [],
    "source": [
     "@pf.register_dataframe_method\n",
-    "def str_word(\n",
+    "def str_word(   #noqa: F811\n",
     "    df,\n",
     "    column_name: str,\n",
     "    start: int = None,\n",
     "    stop: int = None,\n",
     "    pat: str = \" \",\n",
     "    *args,\n",
     "    **kwargs\n",
-    "):\n",
+    "):   #noqa: F811\n",
     "    \"\"\"\n",
-    "    Wrapper around `df.str.split` with additional `start` and `end` arguments\n",
+    "    Wrapper around `df.str.split`,\n",
+    "    with additional `start` and `end` arguments\n",
     "    to select a slice of the list of words.\n",
     "\n",
     "    :param df: A pandas DataFrame.\n",
-    "    :param column_name: A `str` indicating which column the split action is to be made.\n",
+    "    :param column_name: A `str` indicating which column \n",
+    "        the split action is to be made.\n",
     "    :param start: optional An `int` for the start index of the slice\n",
     "    :param stop: optional  An `int` for the end index of the slice\n",
-    "    :param pat: String or regular expression to split on. If not specified, split on whitespace.\n",
+    "    :param pat: String or regular expression to split on. \n",
+    "        If not specified, split on whitespace.\n",
     "\n",
     "    \"\"\"\n",
     "    df[column_name] = df[column_name].str.split(pat).str[start:stop]\n",
     "    return df\n",
     "\n",
     "\n",
     "@pf.register_dataframe_method\n",
-    "def str_join(df, column_name: str, sep: str, *args, **kwargs):\n",
+    "def str_join(df, column_name: str, sep: str, *args, **kwargs):  #noqa: F811\n",
     "    \"\"\"\n",
     "    Wrapper around `df.str.join`\n",
     "    Joins items in a list.\n",
     "\n",
     "    :param df: A pandas DataFrame.\n",
-    "    :param column_name: A `str` indicating which column the split action is to be made.\n",
-    "    :param sep: The delimiter. Example delimiters include `|`, `, `, `,` etc.\n",
+    "    :param column_name: A `str` indicating which column \n",
+    "        the split action is to be made.\n",
+    "    :param sep: The delimiter. Example delimiters \n",
+    "        include `|`, `, `, `,` etc.\n",
     "    \"\"\"\n",
     "    df[column_name] = df[column_name].str.join(sep)\n",
     "    return df\n",
     "\n",
     "\n",
     "@pf.register_dataframe_method\n",
-    "def str_slice(\n",
+    "def str_slice(  #noqa: F811\n",
     "    df, column_name: str, start: int = None, stop: int = None, *args, **kwargs\n",
-    "):\n",
+    "):  #noqa: F811\n",
     "    \"\"\"\n",
     "    Wrapper around `df.str.slice\n",
     "    Slices strings.\n",
     "\n",
     "    :param df: A pandas DataFrame.\n",
-    "    :param column_name: A `str` indicating which column the split action is to be made.\n",
+    "    :param column_name: A `str` indicating which column \n",
+    "        the split action is to be made.\n",
     "    :param start: 'int' indicating start of slice.\n",
     "    :param stop: 'int' indicating stop of slice.\n",
     "    \"\"\"\n",
@@ -1745,7 +1766,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,

diff --git a/examples/notebooks/bird_call.ipynb b/examples/notebooks/bird_call.ipynb
@@ -597,7 +597,8 @@
    "source": [
     "clean_birds = (\n",
     "    raw_birds\n",
-    "    .merge(clean_call, how='left')  # merge the raw_birds dataframe with clean_raw dataframe\n",
+    "    # merge the raw_birds dataframe with clean_raw dataframe\n",
+    "    .merge(clean_call, how='left')  \n",
     "    .select_columns(\n",
     "        [\n",
     "            \"Genus\",\n",
@@ -611,9 +612,11 @@
     "        ]\n",
     "    )  # include list of cols\n",
     "    .clean_names()\n",
-    "    .rename_column(\"collisions\", \"family\")  # rename 'collisions' column to 'family' in merged dataframe\n",
+    "    # rename 'collisions' column to 'family' in merged dataframe\n",
+    "    .rename_column(\"collisions\", \"family\")  \n",
     "    .rename_column(\"call\", \"flight_call\")\n",
-    "    .dropna()  # drop all rows which contain a NaN\n",
+    "     # drop all rows which contain a NaN\n",
+    "    .dropna() \n",
     ")"
    ]
   },
@@ -755,7 +758,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,

diff --git a/examples/notebooks/board_games.ipynb b/examples/notebooks/board_games.ipynb
diff --git a/examples/notebooks/complete.ipynb b/examples/notebooks/complete.ipynb
@@ -493,7 +493,7 @@
     }
    ],
    "source": [
-    "new_year_values = lambda year: range(year.min(), year.max() + 1)\n",
+    "new_year_values = lambda year: range(year.min(), year.max() + 1)  # noqa: E731\n",
     "\n",
     "df.complete({\"Year\": new_year_values}, \"Taxon\")"
    ]
@@ -963,7 +963,7 @@
     }
    ],
    "source": [
-    "new_year_values = lambda year: range(year.min(), year.max() + 1)\n",
+    "new_year_values = lambda year: range(year.min(), year.max() + 1)  # noqa: E731\n",
     "\n",
     "df.complete(\n",
     "    {'year': new_year_values},\n",
@@ -1163,7 +1163,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.9.16"
   },
   "orig_nbformat": 4
  },

diff --git a/examples/notebooks/french_trains.ipynb b/examples/notebooks/french_trains.ipynb
diff --git a/examples/notebooks/medium_franchise.ipynb b/examples/notebooks/medium_franchise.ipynb
@@ -66,7 +66,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Suppress user warnings when we try overwriting our custom pandas flavor functions\n",
+    "# Suppress user warnings \n",
+    "# when we try overwriting our custom pandas flavor functions\n",
     "import warnings\n",
     "\n",
     "warnings.filterwarnings('ignore')"
@@ -191,7 +192,9 @@
     }
    ],
    "source": [
-    "fileurl = '../data/medium_franchise_raw_table.csv' # originally from https://en.wikipedia.org/wiki/List_of_highest-grossing_media_franchises\n",
+    "# originally from \n",
+    "# https://en.wikipedia.org/wiki/List_of_highest-grossing_media_franchises\n",
+    "fileurl = '../data/medium_franchise_raw_table.csv' \n",
     "df_raw = pd.read_csv(fileurl)\n",
     "df_raw.head(3)"
    ]
@@ -883,20 +886,23 @@
    "source": [
     "# Value mapper `revenue_category`\n",
     "value_mapper = {\n",
-    "    'box office': 'Box Office',\n",
-    "    'dvd|blu|vhs|home video|video rentals|video sales|streaming|home entertainment': 'Home Video/Entertainment',\n",
-    "    'video game|computer game|mobile game|console|game|pachinko|pet|card': 'Video Games/Games',\n",
-    "    'comic|manga': 'Comic or Manga',\n",
-    "    'music|soundtrac': 'Music',\n",
-    "    'tv': 'TV',\n",
-    "    'merchandise|licens|mall|stage|retail': 'Merchandise, Licensing & Retail',\n",
+    "    \"box office\": \"Box Office\",\n",
+    "    \"dvd|blu|vhs|home video|video rentals|video sales|streaming|home entertainment\": \"Home Video/Entertainment\",  # noqa: E501\n",
+    "    \"video game|computer game|mobile game|console|game|pachinko|pet|card\": \"Video Games/Games\",  # noqa: E501\n",
+    "    \"comic|manga\": \"Comic or Manga\",\n",
+    "    \"music|soundtrac\": \"Music\",\n",
+    "    \"tv\": \"TV\",\n",
+    "    \"merchandise|licens|mall|stage|retail\": \"Merchandise, Licensing & Retail\",\n",
     "}\n",
     "\n",
-    "column_name = 'revenue_category'\n",
+    "column_name = \"revenue_category\"\n",
+    "# [pyjanitor] convert to lower case\n",
     "df_clean_category = (\n",
-    "    df_clean_category.transform_column(column_name, str.lower)  # [pyjanitor] convert to lower case\n",
-    "    .transform_column(column_name, str.strip)  # [pyjanitor] strip leading/trailing white space\n",
-    "    .fuzzy_match_replace(column_name, mapper=value_mapper)  # [pyjanitor + pandas_flavor]\n",
+    "    df_clean_category.transform_column(column_name, str.lower)\n",
+    "    # [pyjanitor] strip leading/trailing white space\n",
+    "    .transform_column(column_name, str.strip)\n",
+    "    # [pyjanitor + pandas_flavor]\n",
+    "    .fuzzy_match_replace(column_name, mapper=value_mapper)\n",
     ")\n",
     "df_clean_category.head(3)"
    ]
@@ -1127,31 +1133,37 @@
     "df_clean = (\n",
     "    pd.read_csv(fileurl)\n",
     "    .rename(\n",
-    "        columns={col_old: col_new for col_old, col_new in zip(df_raw.columns, colnames)}\n",
+    "        columns={\n",
+    "            col_old: col_new\n",
+    "            for col_old, col_new in zip(df_raw.columns, colnames)\n",
+    "        }\n",
     "    )\n",
-    "    .str_remove('total_revenue', pattern='est.')  # [pandas-flavor]\n",
-    "    .str_trim('total_revenue')  # [pandas-flavor]\n",
-    "    .str_remove('total_revenue', pattern='\\$')   # [pandas-flavor]\n",
-    "    .str_slice('total_revenue', start=0, stop=2)  # [pandas-flavor]\n",
-    "    .change_type('total_revenue', float)  # [pyjanitor]\n",
-    "    .separate_rows('revenue_items', sep='\\[')  # [pandas-flavor]\n",
-    "    .filter_string('revenue_items', 'illion')  # [pyjanitor]\n",
-    "    .separate(\n",
-    "        'revenue_items', into=['revenue_category', 'revenue'], sep='\\$'\n",
-    "    )  # [pyjanitor + pandas-flavor]\n",
-    "    .str_remove('revenue_category', pattern=' – ')  # [pandas-flavor]\n",
-    "    .str_remove('revenue_category', pattern='.*\\]')  # [pandas-flavor]\n",
-    "    .str_remove('revenue_category', pattern='\\n')  # [pandas-flavor]\n",
-    "    .transform_column('revenue_category', str.lower)  # [pyjanitor] convert to lower case\n",
-    "    .transform_column('revenue_category', str.strip)  # [pyjanitor] strip leading/trailing white space\n",
-    "    .fuzzy_match_replace('revenue_category', mapper=value_mapper)  # [pyjanitor + pandas_flavor]\n",
-    "    .str_remove('revenue', 'illion')  # [pandas-flavor]\n",
-    "    .str_trim('revenue')  # [pandas-flavor]\n",
-    "    .str_remove('revenue', ' ')  # [pandas-flavor]\n",
-    "    .str_replace('revenue', '\\s*b', '')  # [pandas-flavor]\n",
-    "    .str_replace('revenue', '\\s*m', 'e-3')  # [pandas-flavor]\n",
+    "    .str_remove(\"total_revenue\", pattern=\"est.\")  # [pandas-flavor]\n",
+    "    .str_trim(\"total_revenue\")  # [pandas-flavor]\n",
+    "    .str_remove(\"total_revenue\", pattern=\"\\$\")  # [pandas-flavor]\n",
+    "    .str_slice(\"total_revenue\", start=0, stop=2)  # [pandas-flavor]\n",
+    "    .change_type(\"total_revenue\", float)  # [pyjanitor]\n",
+    "    .separate_rows(\"revenue_items\", sep=\"\\[\")  # [pandas-flavor]\n",
+    "    .filter_string(\"revenue_items\", \"illion\")  # [pyjanitor]\n",
+    "    # [pyjanitor + pandas-flavor]\n",
+    "    .separate(\"revenue_items\", into=[\"revenue_category\", \"revenue\"], sep=\"\\$\")\n",
+    "    # [pandas-flavor]\n",
+    "    .str_remove(\"revenue_category\", pattern=\" – \")\n",
+    "    .str_remove(\"revenue_category\", pattern=\".*\\]\")\n",
+    "    .str_remove(\"revenue_category\", pattern=\"\\n\")\n",
+    "    # [pyjanitor] convert to lower case\n",
+    "    .transform_column(\"revenue_category\", str.lower)\n",
+    "    # [pyjanitor] strip leading/trailing white space\n",
+    "    .transform_column(\"revenue_category\", str.strip)\n",
+    "    # [pyjanitor + pandas_flavor]\n",
+    "    .fuzzy_match_replace(\"revenue_category\", mapper=value_mapper)\n",
+    "    .str_remove(\"revenue\", \"illion\")  # [pandas-flavor]\n",
+    "    .str_trim(\"revenue\")  # [pandas-flavor]\n",
+    "    .str_remove(\"revenue\", \" \")  # [pandas-flavor]\n",
+    "    .str_replace(\"revenue\", \"\\s*b\", \"\")  # [pandas-flavor]\n",
+    "    .str_replace(\"revenue\", \"\\s*m\", \"e-3\")  # [pandas-flavor]\n",
     "    .parse_number()  # [pandas-flavor]\n",
-    "    .str_remove('original_media', '\\[.+')  # [pandas-flavor]\n",
+    "    .str_remove(\"original_media\", \"\\[.+\")  # [pandas-flavor]\n",
     ")"
    ]
   },
@@ -1443,9 +1455,9 @@
     "# Generate final dataframe\n",
     "df_final = (\n",
     "    pd.merge(\n",
-    "        df_sum, df_metadata, how='left', on=['franchise', 'revenue_category']\n",
+    "        df_sum, df_metadata, how=\"left\", on=[\"franchise\", \"revenue_category\"]\n",
     "    )\n",
-    "    .drop_duplicates(keep='first')\n",
+    "    .drop_duplicates(keep=\"first\")\n",
     "    .reset_index(drop=True)\n",
     ")\n",
     "df_final.head(3)"

diff --git a/examples/notebooks/teacher_pupil.ipynb b/examples/notebooks/teacher_pupil.ipynb
@@ -263,8 +263,10 @@
     "\n",
     "@pf.register_dataframe_method\n",
     "def drop_duplicated_column(df, column_name: str, column_order: int=0):\n",
-    "    \"\"\"Remove duplicated columns and retain only a column given its order.\n",
-    "    Order 0 is to remove the first column, Order 1 is to remove the second column, and etc\"\"\"\n",
+    "    \"\"\"Remove duplicated columns \n",
+    "    and retain only a column given its order.\n",
+    "    Order 0 is to remove the first column, \n",
+    "    Order 1 is to remove the second column, and etc\"\"\"\n",
     "\n",
     "    cols = list(df.columns)\n",
     "    col_indexes = [\n",
@@ -467,7 +469,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.9.16"
   },
   "stem_cell": {
    "cell_type": "raw",