From add386c49d37be46c5141814ed3a356c243d7387 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Mon, 4 Dec 2023 16:13:49 +0100 Subject: [PATCH 01/49] Added initial functions and groups --- src/ssb_arbmark_fagfunksjoner/functions.py | 414 ++++++++++++++++++++- src/ssb_arbmark_fagfunksjoner/groups.py | 352 ++++++++++++++++++ 2 files changed, 765 insertions(+), 1 deletion(-) create mode 100644 src/ssb_arbmark_fagfunksjoner/groups.py diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index cc54f7f..bd29f34 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -5,7 +5,6 @@ """ - def example_function(number1: int, number2: int) -> str: """Compare two integers. @@ -33,3 +32,416 @@ def example_function(number1: int, number2: int) -> str: return f"{number1} is less than {number2}" return f"{number1} is greater than or equal to {number2}" + +# Holidays to calculate the number of holidays +import holidays +# Pandas for table management +import pandas as pd +# Numpy for data wrangling +import numpy as np +# Itertools for functions creating iterators for efficient looping +import itertools + +def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: + """ + Parameters: + from_dates : List of first dates of period. + to_dates : List of last dates of period. + Returns: + Returns a list of number of workdays in period from first to last date. + """ + # Convert the from_dates and to_dates columns to numpy arrays + from_dates = from_dates.values + to_dates = to_dates.values + # Extract the year from the from_dates array + year = from_dates.astype('datetime64[Y]').astype(int) + 1970 + # Check if the year is the same in the to_dates array + if not np.all(year == to_dates.astype('datetime64[Y]').astype(int) + 1970): + # If the year is not the same, raise an error + raise ValueError("Function can only be applied to dates in the same year!") + # Check if there is more than one unique year in the array + if np.unique(year).size > 1: + # If there is more than one unique year, raise an error + raise ValueError("Function can only be applied to a single year!") + + # Convert from_dates and to_dates to datetime64 arrays + from_dates = from_dates.astype('datetime64[D]') + to_dates = to_dates.astype('datetime64[D]') + + # Find the max and min dates + min_date = np.min(from_dates) + max_date = np.max(to_dates) + + # Generate a range of dates between the min and max dates + dates = np.arange(min_date, max_date + np.timedelta64(1, 'D'), dtype='datetime64[D]') + + # Convert the holiday dates to a numpy array of datetime64 objects + holiday_dates = np.array(sorted(holidays.NO(years=year).keys()), dtype='datetime64[D]') + + # Filter the dates array to exclude holiday dates and weekends + workdays = dates[~np.isin(dates, holiday_dates) & ~np.isin((dates.astype('datetime64[D]').view('int64') - 4) % 7, [5, 6])] + + # Calculate the number of workdays for each from and to date pair + workdays_list = [] + for from_date, to_date in zip(from_dates, to_dates): + workdays_in_range = workdays[(workdays >= from_date) & (workdays <= to_date)] + workdays_list.append(len(workdays_in_range)) + + # Check if the length of the workdays_list is the same as the number of date pairs + if len(workdays_list) != len(from_dates): + raise ValueError("Unexpected error: length of workdays_list does not match the number of date pairs.") + + return np.array(workdays_list) + +def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple: + """ + Given a year and a quarter, this function calculates the + first and last dates of the specified quarter using pandas. + Args: + year_str (str): The year as a string. + quarter_str (str): The quarter as a string. + + Returns: + tuple: A tuple containing two strings, the first and + last dates of the specified quarter in 'YYYY-MM-DD' format. + """ + # Convert input year and quarter strings to integers + year = int(year_str) + quarter = int(quarter_str) + + # Calculate the starting month of the quarter + start_month = (quarter - 1) * 3 + 1 + + # Create the start date of the quarter + start_date = pd.Timestamp(year, start_month, 1) + + # Calculate the end date of the quarter + end_date = start_date + pd.offsets.QuarterEnd() + + # Format dates as strings in 'YYYY-MM-DD' format + start_date_str = start_date.strftime('%Y-%m-%d') + end_date_str = end_date.strftime('%Y-%m-%d') + + return start_date_str, end_date_str + +def format_invalids(df: pd.DataFrame, + col_name: str, + invalid: list, + label: str) -> None: + """ + Modify a column in a pandas DataFrame in-place by replacing values + specified in the 'invalid' list with a given 'label'. + + Parameters: + - df (pd.DataFrame): The pandas DataFrame containing the column to be modified. + - col_name (str): The name of the column in the DataFrame to be modified. + - invalid (list): List of values in the column to be replaced. + - label (str): The value to replace the invalid entries with. + + Returns: + None. The function modifies the DataFrame in-place. + """ + + # Identify which entries in the column are in the 'invalid' list + isinvalid = df[col_name].isin(invalid) + + # Get a list of unique invalid codes present in the column + invalid_codes = list(df[col_name][isinvalid].unique()) + + # Print the unique invalid codes found + print(f"The following invalid codes can be found in {col_name}: {invalid_codes}.") + + # Print the number of entries that will be changed to the label + print(f"Changes {len(df[col_name][isinvalid])} entries in {col_name} to '{label}'.") + + # Use .loc on the DataFrame to modify the column in-place + df.loc[isinvalid, col_name] = label + + # Print the total number of entries that have been labeled + print(f"There is a total of {(df[col_name] == label).sum()} entries labelled as '{label}'.") + +def indicate_merge(left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: list) -> pd.DataFrame: + """ + Perform a merge of two DataFrames and prints a frequency table indicating the merge type for each row. + + The merge types are determined as follows (left-to-right): + - 'one-to-zero': Rows that exist only in the left DataFrame. + - 'zero-to-one': Rows that exist only in the right DataFrame. + - 'many-to-zero': Rows in the right DataFrame with multiple identical entries and no matching entries in the left DataFrame. + - 'zero-to-many': Rows in the left DataFrame with multiple identical entries and no matching entries in the right DataFrame. + - 'one-to-one': Rows that have a matching entry in both left and right DataFrames. + - 'many-to-one': Rows in the right DataFrame with multiple matching entries in the left DataFrame. + - 'one-to-many': Rows in the left DataFrame with multiple matching entries in the right DataFrame. + - 'many-to-many': Rows in both left and right DataFrames with multiple matching entries. + + Args: + left (pd.DataFrame): The left DataFrame to be merged. + right (pd.DataFrame): The right DataFrame to be merged. + how (str): The type of merge to be performed. Options are: 'inner', 'outer', 'left', 'right'. + on (list): A list of column names to merge on. + + Returns: + pd.DataFrame: The merged DataFrame. + """ + # Perform the merge operation + merged_df = pd.merge(left, right, how=how, on=on, indicator=True) + + # Convert _merge column to numpy + np_merge = merged_df['_merge'].to_numpy() + + # Identify duplicate rows in each DataFrame + duplicated_left = left.duplicated(subset=on, keep=False) + duplicated_right = right.duplicated(subset=on, keep=False) + + # Different treatment depending on if "on" is a single column or not + if isinstance(on, str): + duplicated_from_left = merged_df[on].isin(left.loc[duplicated_left, on].drop_duplicates()).to_numpy() + duplicated_from_right = merged_df[on].isin(right.loc[duplicated_right, on].drop_duplicates()).to_numpy() + else: + duplicated_from_left = merged_df[on].apply(tuple, axis=1).isin(left[on][duplicated_left].drop_duplicates().apply(tuple, axis=1)).to_numpy() + duplicated_from_right = merged_df[on].apply(tuple, axis=1).isin(right[on][duplicated_right].drop_duplicates().apply(tuple, axis=1)).to_numpy() + + # Define the conditions and choices for np.select + conditions = [ + (np_merge == 'left_only') & ~duplicated_from_left, + (np_merge == 'right_only') & ~duplicated_from_right, + (np_merge == 'left_only') & duplicated_from_left, + (np_merge == 'right_only') & duplicated_from_right, + (np_merge == 'both') & ~duplicated_from_left & ~duplicated_from_right, + (np_merge == 'both') & duplicated_from_left & ~duplicated_from_right, + (np_merge == 'both') & ~duplicated_from_left & duplicated_from_right, + (np_merge == 'both') & duplicated_from_right & duplicated_from_left + ] + + choices = ['one-to-zero', 'zero-to-one', 'many-to-zero', 'zero-to-many', 'one-to-one', 'many-to-one', 'one-to-many', 'many-to-many'] + + # Use np.select to create new column + merge_type = np.select(conditions, choices, default='unknown') + + # Print the frequency of each merge type + unique, counts = np.unique(merge_type, return_counts=True) + print(f"Sum of entries after merge: {merged_df.shape[0]}") + for i, j in zip(unique, counts): + print(f"Number of entries of type '{i}': {j}") + + # Drop the _merge column and return the result + merged_df.drop(columns='_merge', inplace=True) + + return merged_df + +def kv_intervall(start_p, slutt_p): + """ + This function generates a list of quarterly periods between two given periods. + + The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year + and Q is a quarter (1 to 4). The function handles cases where the start and end + periods are in the same year or in different years. + + Parameters: + start_p (str): The start period in the format 'YYYYkQ'. + slutt_p (str): The end period in the format 'YYYYkQ'. + + Returns: + list: A list of strings representing the quarterly periods from start_p to slutt_p, inclusive. + + Example: + >>> kv_intervall('2022k3', '2023k2') + ['2022k3', '2022k4', '2023k1', '2023k2'] + """ + # Extract the year and quarter from the start period + start_aar4 = int(start_p[:4]) + start_kv = int(start_p[-1]) + + # Extract the year and quarter from the end period + slutt_aar4 = int(slutt_p[:4]) + slutt_kv = int(slutt_p[-1]) + + # Initialize an empty list to store the periods + intervall = [] + + # Generate the periods + for i in range(start_aar4, slutt_aar4+1): + if (start_aar4 == slutt_aar4): + # If the start and end periods are in the same year + for j in range(start_kv, slutt_kv+1): + intervall.append(f"{i}k{j}") + elif (i == start_aar4): + # If the current year is the start year + for j in range(start_kv, 4+1): + intervall.append(f"{i}k{j}") + elif (start_aar4 < i and slutt_aar4 > i): + # If the current year is between the start and end years + for j in range(1, 4+1): + intervall.append(f"{i}k{j}") + elif (i == slutt_aar4): + # If the current year is the end year + for j in range(1, slutt_kv+1): + intervall.append(f"{i}k{j}") + + return intervall + +def proc_sums( + df: pd.DataFrame, + groups: list[str], + values: list[str], + agg_func: dict = None +) -> pd.DataFrame: + """ + Compute aggregations for all combinations of columns and return a new + DataFrame with these aggregations. + + Parameters: + df : pd.DataFrame + The input DataFrame. + groups : list[str] + List of columns to be considered for groupings. + values : list[str] + List of columns on which the aggregation functions will be applied. + agg_func : dict, optional + Dictionary mapping columns to aggregation functions corresponding to + the 'values' list. + Default is 'sum' for all columns in 'values'. + + Returns: + pd.DataFrame + A DataFrame containing aggregations for all combinations of 'columns'. + + Notes: + - The returned DataFrame also contains an additional column named 'level' + indicating the level of grouping. + - Columns not used in a particular level of grouping will have a value + 'Total'. + """ + + # All columns used from the input dataframe + required_columns = groups + values + + # Check that the parameters references columns in the dataframe + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + raise ValueError(f"Columns {', '.join(missing_columns)} are not present in the dataframe!") + + # Check if all columns in 'values' are numeric + non_numeric_cols = [col for col in values if not pd.api.types.is_numeric_dtype(df[col])] + + # Copy the dataframe and limit input to columns in the parameter + df = df[required_columns].copy() + + # Default aggregation: 'sum' for all 'values' columns. + if agg_func is None and not non_numeric_cols: + agg_func = {col: 'sum' for col in values} + elif agg_func is None and non_numeric_cols: + raise ValueError(f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!") + else: + # Correct a format causing error in agg-function + for col, funcs in agg_func.items(): + if isinstance(funcs, list) and len(funcs) == 1: + agg_func[col] = funcs[0] + + # Initialize empty datframe + sum_df = pd.DataFrame() + + # Convert columns lists to sets for easier set operations. + groups_set = set(groups) + + # Loop over all possible combinations of 'columns' for aggregation. + for i in reversed(range(1, len(groups) + 1)): + for subset in itertools.combinations(groups, i): + # Convert subset of columns list to a set. + subset_set = set(subset) + # Group by the current subset of columns and aggregate. + sub_sum = df.groupby(list(subset)).agg(agg_func).reset_index() + # Check if there are missing columns in the subset + sum_columns = list(groups_set - subset_set) + if sum_columns: + # For columns not in the current subset, fill with 'Total'. + sub_sum[sum_columns] = 'Total' + # Indicate level of grouping + sub_sum['level'] = i + # Append this subset's aggregation results to the final DataFrame. + sum_df = pd.concat([sum_df, sub_sum], ignore_index=True) + + return sum_df + +def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: + """ + Parameters: + from_dates : List of first dates of period. + to_dates : List of last dates of period. + Returns: + Returns a list of booleans for if the + reference day is between the from_dates and to_dates. + The reference day is defined as the 16th of each month. + """ + # Convert the from_dates and to_dates columns to numpy arrays + from_dates = from_dates.values + to_dates = to_dates.values + + # Extract the year from the from_dates array + year = from_dates.astype('datetime64[Y]').astype(int) + 1970 + + # Check if the year is the same in the to_dates array + if not np.all(year == to_dates.astype('datetime64[Y]').astype(int) + 1970): + # If the year is not the same, raise an error + raise ValueError("Function can only be applied to dates in the same year!") + + # Check if there is more than one unique year in the array + if np.unique(year).size > 1: + # If there is more than one unique year, raise an error + raise ValueError("Function can only be applied to a single year!") + + # Extract the month from the from_dates array + month = from_dates.astype('datetime64[M]').astype(int) % 12 + 1 + + # Check if the month is the same in the to_dates array + if not np.all(month == to_dates.astype('datetime64[M]').astype(int) % 12 + 1): + # If the month is not the same, raise an error + raise ValueError("Function can only be applied to dates in the same months!") + + # Create a reference day for each month + ref_days = np.array([f"{year[0]}-{m:02d}-16" for m in month], dtype='datetime64[D]') + + # Check if the reference day is within the range of the from_date and to_date + result = np.logical_and(from_dates <= ref_days, ref_days <= to_dates) + + # Return the result as an array of boolean values + return result + +def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: + """ + Parameters: + from_dates : List of first dates of period. + to_dates : List of last dates of period. + Returns: + Returns a list of booleans for if any of the dates between + from_dates and to_dates is in the reference week. The + reference week is defined as the week of the 16th of + each month. + """ + # Check if the year is the same in the to_dates array + if not np.all(from_dates.dt.year == to_dates.dt.year): + # If the year is not the same, raise an error + raise ValueError("Function can only be applied to dates in the same year!") + + # Check if the month is the same in the to_dates array + if not np.all(from_dates.dt.month == to_dates.dt.month): + # If the month is not the same, raise an error + raise ValueError("Function can only be applied to dates in the same months!") + + # Create a reference day for each month + ref_days = pd.to_datetime([f"{y}-{m:02d}-16" for y, m in zip(from_dates.dt.year, from_dates.dt.month)]) + + # Convert ref_days to a Series object to use the dt accessor + ref_days = pd.Series(ref_days) + + # Calculate the week numbers using pandas with Monday as the starting day + from_weeks = from_dates.dt.isocalendar().week + to_weeks = to_dates.dt.isocalendar().week + ref_weeks = ref_days.dt.isocalendar().week + + # Check if any of the weeks between from_dates and to_dates is the reference week + result = np.logical_and(from_weeks <= ref_weeks, ref_weeks <= to_weeks) + + # Return the result as a series of boolean values + return result + diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py new file mode 100644 index 0000000..ae0fcc7 --- /dev/null +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -0,0 +1,352 @@ +"""A collection of useful groups.""" + +# Pandas for table management +import pandas as pd +# Numpy for data wrangling +import numpy as np + +def alder_grp(alder: pd.Series, labels=True) -> pd.Series: + """ + Categorize a pandas Series of person ages into predefined groups. + + Parameters: + alder (pd.Series): A pandas Series containing the person ages. + labels (bool, optional): If True, returns group labels; if False, returns keys; + if 'combined', returns a combination of keys and labels. + + Returns: + pd.Series: A pandas Series where the original person ages are replaced by group labels or keys. + """ + + # Define the conditions for each group + conditions = [ + np.logical_and(alder >= 16, alder <= 19), # 16-19 år + np.logical_and(alder >= 20, alder <= 24), # 20-24 år + np.logical_and(alder >= 25, alder <= 29), # 25-29 år + np.logical_and(alder >= 30, alder <= 34), # 30-34 år + np.logical_and(alder >= 35, alder <= 39), # 35-39 år + np.logical_and(alder >= 40, alder <= 44), # 40-44 år + np.logical_and(alder >= 45, alder <= 49), # 45-49 år + np.logical_and(alder >= 50, alder <= 54), # 50-54 år + np.logical_and(alder >= 55, alder <= 59), # 55-59 år + np.logical_and(alder >= 60, alder <= 64), # 60-64 år + np.logical_or(alder == 65, alder == 66), # 65-66 år + alder == 67, # 67 år + alder == 68, # 68 år + alder == 69, # 69 år + ] + + # Define the group labels with string keys + groups = { + '1': '16-19 år', + '2': '20-24 år', + '3': '25-29 år', + '4': '30-34 år', + '5': '35-39 år', + '6': '40-44 år', + '7': '45-49 år', + '8': '50-54 år', + '9': '55-59 år', + '10': '60-64 år', + '11': '65-66 år', + '12': '67 år', + '13': '68 år', + '14': '69 år' + } + + # Determine the format of the results based on the labels parameter + if labels == 'combined': + results = [f"{key} {value}" for key, value in groups.items()] + elif labels: + results = list(groups.values()) + else: + results = list(groups.keys()) + + # Apply the selected format to the series + return np.select(conditions, results, default='.') + +def nace_sn07_47grp(nace_sn07: pd.Series, labels=True) -> pd.Series: + """ + Categorize a pandas Series of NACE-codes (SN07) into predefined groups. + + Parameters: + nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. + labels (bool, optional): Whether to return group labels or keys. Default is True. + + Returns: + pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. + """ + + # Removes periods in the NACE codes (if any) + nace_sn07 = nace_sn07.replace('.', '') + + # Substring of NACE codes at length 2 and 3 + nace2 = pd.Series(nace_sn07.str[:2], name='nace2') + nace3 = pd.Series(nace_sn07.str[:3], name='nace3') + + # Define the conditions for each group + conditions = [ + np.isin(nace2, ['01', '02', '03']), # Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass + np.logical_or(np.isin(nace2, ['05', '07', '08']), nace3 == '099'), # Annen utvinning; Bygging av skip og båter; Reparasjon og installasjon av maskiner og utstyr; Uoppgitt utvinning + np.logical_or(nace2 == '06', nace3 == '091'), # Olje- og gassutvinning; Uoppgitt utvinning av petroleum + np.isin(nace2, ['10', '11', '12']), # Næringsmiddel-,drikkev.,tobakkind. + np.isin(nace2, ['13', '14', '15']), # Tekstil-,bekledn.-,lærvareind. + np.isin(nace2, ['16', '17']), # Trelast- og trevareind. + nace2 == '18', # Trykking, grafisk industri + np.isin(nace2, ['19', '20', '21']), # Petrolieum, kull, kjemisk og farmasøytisk industri + np.isin(nace2, ['22', '23']), # Gummivare-, plast-,mineralproduktind. + nace2 == '24', # Metallindustri + nace2 == '25', # Metallvareindustri + np.isin(nace2, ['26', '27']), # Data- og elektronisk industri + nace2 == '28', # Maskinindustri + np.logical_or(np.isin(nace2, ['29', '33']), np.logical_and(nace3 >= '302', nace3 <= '309')), # Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler + nace3 == '301', # Produksjon av skip og båter, inkl. oljeplattformer + np.isin(nace2, ['31', '32']), # Møbel og annen industri + nace2 == '35', # Elekstrisitet, gass, damp, varmtvann + np.logical_and(nace2 >= '36', nace2 <= '39'), # Vann, avløp og renovasjon + np.isin(nace2, ['41', '42', '43']), # Bygge- og anleggsvirksomhet + nace2 == '45', # Motorvognrep og -handel + nace2 == '46', # Agentur- og engroshandel + nace2 == '47', # Detaljhandel, unntatt motorvogner + nace2 == '49', # Landtransport og rørtransport + nace2 == '50', # Sjøfart + nace2 == '51', # Lufttransport + nace2 == '52', # Lagring og tjenester tilknyttet transport + nace2 == '53', # Posttjenester + nace2 == '55', # Overnattingsvirksomhet + nace2 == '56', # Serveringsvirksomhet + np.isin(nace2, ['58', '59', '60']), # Forlag, film-, TV-pr, kringkasting + np.isin(nace2, ['61', '62', '63']), # IKT-virksomhet + nace2 == '64', # Finansieringsvirksomhet (bank, m.m.) + nace2 == '65', # Forsikringsvirksomhet og pensjonskasser + nace2 == '66', # Finansiell tjenesteyting + nace2 == '68', # Omsetning og drift av fast eiendom + np.isin(nace2, ['69', '70', '71']), # Juridisk-, hovedkontor-, konsulentj. + nace2 == '72', # Forskning og utviklingsarbeid + np.isin(nace2, ['73', '74', '75']), # Faglig, vitenskapelig og teknisk tjenesteyting ellers + np.logical_and(nace2 >= '77', nace2 <= '82'), # Forretningsmessig tjenesteyting ellers + nace2 == '84', # Off.adm., forsvar, sosialforsikring + nace2 == '85', # Undervining + nace2 == '86', # Helsetjenester + np.isin(nace2, ['87', '88']), # Pleie og omsorg; Fritids- og sportsaktiviteter + np.logical_and(nace2 >= '90', nace2 <= '93'), # Kultur, underholdning og fritid + np.isin(nace2, ['94', '95', '96']), # Annen tjenesteyting + nace2 == '97', # Lønnet husarbeid i private husholdninger + nace2 == '99', # Internasjonale organisasjoner + ] + + # Define the group labels with string keys + groups = { + '01': 'Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass', + '02': 'Annen utvinning; Bygging av skip og båter; Reparasjon og installasjon av maskiner og utstyr; Uoppgitt utvinning', + '03': 'Olje- og gassutvinning; Uoppgitt utvinning av petroleum', + '04': 'Næringsmiddel-,drikkev.,tobakkind.', + '05': 'Tekstil-,bekledn.-,lærvareind.', + '06': 'Trelast- og trevareind.', + '07': 'Trykking, grafisk industri', + '08': 'Petrolieum, kull, kjemisk og farmasøytisk industri', + '09': 'Gummivare-, plast-,mineralproduktind.', + '10': 'Metallindustri', + '11': 'Metallvareindustri', + '12': 'Data- og elektronisk industri', + '13': 'Maskinindustri', + '14': 'Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler', + '15': 'Produksjon av skip og båter, inkl. oljeplattformer', + '16': 'Møbel og annen industri', + '17': 'Elekstrisitet, gass, damp, varmtvann', + '18': 'Vann, avløp og renovasjon', + '19': 'Bygge- og anleggsvirksomhet', + '20': 'Motorvognrep og -handel', + '21': 'Agentur- og engroshandel', + '22': 'Detaljhandel, unntatt motorvogner', + '23': 'Landtransport og rørtransport', + '24': 'Sjøfart', + '25': 'Lufttransport', + '26': 'Lagring og tjenester tilknyttet transport', + '27': 'Posttjenester', + '28': 'Overnattingsvirksomhet', + '29': 'Serveringsvirksomhet', + '30': 'Forlag, film-, TV-pr, kringkasting', + '31': 'IKT-virksomhet', + '32': 'Finansieringsvirksomhet (bank, m.m.)', + '33': 'Forsikringsvirksomhet og pensjonskasser', + '34': 'Finansiell tjenesteyting', + '35': 'Omsetning og drift av fast eiendom', + '36': 'Juridisk-, hovedkontor-, konsulentj.', + '37': 'Forskning og utviklingsarbeid', + '38': 'Faglig, vitenskapelig og teknisk tjenesteyting ellers', + '39': 'Forretningsmessig tjenesteyting ellers', + '40': 'Off.adm., forsvar, sosialforsikring', + '41': 'Undervisning', + '42': 'Helsetjenester', + '43': 'Pleie og omsorg; Fritids- og sportsaktiviteter', + '44': 'Kultur, underholdning og fritid', + '45': 'Annen tjenesteyting', + '46': 'Lønnet husarbeid i private husholdninger', + '47': 'Internasjonale organisasjoner' + } + + # Determine and apply the selected format based on the labels parameter + if labels == 'combined': + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default='99 Uoppgitt') + elif labels: + return np.select(conditions, list(groups.values()), default='Uoppgitt') + else: + return np.select(conditions, list(groups.keys()), default='99') + +def nace_sn07_17grp(nace_sn07: pd.Series) -> pd.Series: + """ + Categorize a pandas Series of NACE-codes (SN07) into predefined groups. + + Parameters: + nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. + labels (bool, optional): Whether to return group labels or keys. Default is True. + + Returns: + pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. + """ + + # Removes labels (if any) + nace_str2 = nace_sn07.str[:2] + + # Counts the number of unique groups of nace codes + n_unique_grp = len(nace_str2.unique()) + + # Check if nace codes are already grouped into 47-groups + if n_unique_grp > 48: + print(f"Warning: There are {n_unique_grp} unique industry divisions on 2-number level. The function first groups the input into the 47 groups standard.") + nace_str2 = nace_sn07_47grp(nace_sn07, labels=False) + + # Define the conditions for each group + conditions = [ + nace_str2 == '01', # 01-03 Jordbruk, skogbruk og fiske + np.logical_and(nace_str2 >= '01', nace_str2 <= '03'), # 05-09 Bergverksdrift og utvinning + np.logical_and(nace_str2 >= '04', nace_str2 <= '16'), # 10-33 Industri + np.logical_and(nace_str2 >= '17', nace_str2 <= '18'), # 35-39 Elektrisitet, vann og renovasjon + nace_str2 == '19', # 41-43 Bygge- og anleggsvirksomhet + np.logical_and(nace_str2 >= '20', nace_str2 <= '22'), # 45-47 Varehandel, reparasjon av motorvogner + np.logical_and(nace_str2 >= '23', nace_str2 <= '27'), # 49-53 Transport og lagring + np.logical_and(nace_str2 >= '28', nace_str2 <= '29'), # 55-56 Overnattings- og serveringsvirksomhet + np.logical_and(nace_str2 >= '30', nace_str2 <= '31'), # 58-63 Informasjon og kommunikasjon + np.logical_and(nace_str2 >= '32', nace_str2 <= '34'), # 64-66 Finansiering og forsikring + np.logical_and(nace_str2 >= '35', nace_str2 <= '38'), # 68-75 Teknisk tjenesteyting, eiendomsdrift + nace_str2 == '39', # 77-82 Forretningsmessig tjenesteyting + nace_str2 == '40', # 84 Off.adm., forsvar, sosialforsikring + nace_str2 == '41', # 85 Undervisning + np.logical_and(nace_str2 >= '42', nace_str2 <= '43'), # 86-88 Helse- og sosialtjenester + np.logical_and(nace_str2 >= '44', nace_str2 <= '47') # 90-99 Personlig tjenesteyting + ] + + # Define the group labels with string keys + groups = { + '01-03': 'Jordbruk, skogbruk og fiske', + '05-09': 'Bergverksdrift og utvinning', + '10-33': 'Industri', + '35-39': 'Elektrisitet, vann og renovasjon', + '41-43': 'Bygge- og anleggsvirksomhet', + '45-47': 'Varehandel, reparasjon av motorvogner', + '49-53': 'Transport og lagring', + '55-56': 'Overnattings- og serveringsvirksomhet', + '58-63': 'Informasjon og kommunikasjon', + '64-66': 'Finansiering og forsikring', + '68-75': 'Teknisk tjenesteyting, eiendomsdrift', + '77-82': 'Forretningsmessig tjenesteyting', + '84': 'Off.adm., forsvar, sosialforsikring', + '85': 'Undervisning', + '86-88': 'Helse- og sosialtjenester', + '90-99': 'Personlig tjenesteyting' + } + + # Determine and apply the selected format based on the labels parameter + if labels == 'combined': + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default='999 Uoppgitt') + elif labels: + return np.select(conditions, list(groups.values()), default='Uoppgitt') + else: + return np.select(conditions, list(groups.keys()), default='999') + +def sektor2_grp(sektor: pd.Series, undersektor: pd.Series, labels=True) -> pd.Series: + """ + Categorize a pandas Series of sectors and subsectors into predefined groups. + + Parameters: + sektor (pd.Series): A pandas Series containing the sector codes. + undersektor (pd.Series): A pandas Series containing the subsector codes. + labels (bool, optional): Whether to return group labels or keys. Default is True. + + Returns: + pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. + """ + + # Define the conditions for each group + conditions = [ + sektor == '6100', + np.logical_and(sektor == '6500', undersektor != '007'), + np.logical_and(sektor == '6500', undersektor == '007'), + sektor == '1510', + sektor == '1520', + ] + + groups = { + '110': 'Statlig forvaltning', + '550': 'Kommunal forvaltning', + '510': 'Fylkeskommunal forvaltning', + '660': 'Kommunale foretak med ubegrenset ansvar', + '680': 'Kommunalt eide aksjeselskaper m.v.' + } + + # Determine and apply the selected format based on the labels parameter + if labels == 'combined': + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default='999 Uoppgitt') + elif labels: + return np.select(conditions, list(groups.values()), default='Uoppgitt') + else: + return np.select(conditions, list(groups.keys()), default='999') + + def virk_str_8grp(ansatte: pd.Series, labels=True) -> pd.Series: + """ + Categorize a pandas Series of employee counts into predefined groups. + + Parameters: + ansatte (pd.Series): A pandas Series containing the employee counts. + labels (bool, optional): Whether to return group labels or keys. Default is True. + + Returns: + pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. + """ + + # Define the conditions for each group + conditions = [ + ansatte == 0, # No employees + np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees + np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees + np.logical_and(ansatte >= 10, ansatte <= 19), # 10-19 employees + np.logical_and(ansatte >= 20, ansatte <= 49), # 20-49 employees + np.logical_and(ansatte >= 50, ansatte <= 99), # 50-99 employees + np.logical_and(ansatte >= 100, ansatte <= 249), # 100-249 employees + ansatte >= 250, # 250 employees or more + ] + + # Define the group labels with string keys + groups = { + '1': 'Ingen ansatte', + '2': '1-4 ansatte', + '3': '5-9 ansatte', + '4': '10-19 ansatte', + '5': '20-49 ansatte', + '6': '50-99 ansatte', + '7': '100-249 ansatte', + '8': '250 ansatte og over' + } + + # Determine and apply the selected format based on the labels parameter + if labels == 'combined': + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default='99 Uoppgitt') + elif labels: + return np.select(conditions, list(groups.values()), default='Uoppgitt') + else: + return np.select(conditions, list(groups.keys()), default='99') \ No newline at end of file From 48b239cbfe8bcb9b1beee5c0832dd6d99046dc12 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Tue, 5 Dec 2023 14:52:49 +0100 Subject: [PATCH 02/49] Fixed unsorted import --- poetry.lock | 456 ++++++++++++++++++- pyproject.toml | 6 + src/ssb_arbmark_fagfunksjoner/functions.py | 363 +++++++++------ src/ssb_arbmark_fagfunksjoner/groups.py | 502 +++++++++++---------- 4 files changed, 936 insertions(+), 391 deletions(-) diff --git a/poetry.lock b/poetry.lock index f36fb20..7e6dfa3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -22,6 +22,39 @@ files = [ {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, ] +[[package]] +name = "argcomplete" +version = "3.1.6" +description = "Bash tab completion for argparse" +optional = false +python-versions = ">=3.8" +files = [ + {file = "argcomplete-3.1.6-py3-none-any.whl", hash = "sha256:71f4683bc9e6b0be85f2b2c1224c47680f210903e23512cfebfe5a41edfd883a"}, + {file = "argcomplete-3.1.6.tar.gz", hash = "sha256:3b1f07d133332547a53c79437527c00be48cca3807b1d4ca5cab1b26313386a6"}, +] + +[package.extras] +test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] + +[[package]] +name = "arrow" +version = "1.3.0" +description = "Better dates & times for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80"}, + {file = "arrow-1.3.0.tar.gz", hash = "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85"}, +] + +[package.dependencies] +python-dateutil = ">=2.7.0" +types-python-dateutil = ">=2.8.10" + +[package.extras] +doc = ["doc8", "sphinx (>=7.0.0)", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx_rtd_theme (>=1.3.0)"] +test = ["dateparser (==1.*)", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "pytz (==2021.1)", "simplejson (==3.*)"] + [[package]] name = "asttokens" version = "2.4.1" @@ -75,6 +108,20 @@ soupsieve = ">1.2" html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "binaryornot" +version = "0.4.4" +description = "Ultra-lightweight pure Python package to check if a file is binary or text." +optional = false +python-versions = "*" +files = [ + {file = "binaryornot-0.4.4-py2.py3-none-any.whl", hash = "sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4"}, + {file = "binaryornot-0.4.4.tar.gz", hash = "sha256:359501dfc9d40632edc9fac890e19542db1a287bbcfa58175b66658392018061"}, +] + +[package.dependencies] +chardet = ">=3.0.2" + [[package]] name = "black" version = "23.11.0" @@ -205,6 +252,17 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] +[[package]] +name = "chardet" +version = "5.2.0" +description = "Universal encoding detector for Python 3" +optional = false +python-versions = ">=3.7" +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -329,6 +387,23 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "colorlog" +version = "6.8.0" +description = "Add colours to the output of Python's logging module." +optional = false +python-versions = ">=3.6" +files = [ + {file = "colorlog-6.8.0-py3-none-any.whl", hash = "sha256:4ed23b05a1154294ac99f511fabe8c1d6d4364ec1f7fc989c7fb515ccc29d375"}, + {file = "colorlog-6.8.0.tar.gz", hash = "sha256:fbb6fdf9d5685f2517f388fb29bb27d54e8654dd31f58bc2a3b217e967a95ca6"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} + +[package.extras] +development = ["black", "flake8", "mypy", "pytest", "types-colorama"] + [[package]] name = "comm" version = "0.2.0" @@ -346,6 +421,27 @@ traitlets = ">=4" [package.extras] test = ["pytest"] +[[package]] +name = "cookiecutter" +version = "2.5.0" +description = "A command-line utility that creates projects from project templates, e.g. creating a Python package project from a Python package project template." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cookiecutter-2.5.0-py3-none-any.whl", hash = "sha256:8aa2f12ed11bc05628651e9dc4353a10571dd9908aaaaeec959a2b9ea465a5d2"}, + {file = "cookiecutter-2.5.0.tar.gz", hash = "sha256:e61e9034748e3f41b8bd2c11f00d030784b48711c4d5c42363c50989a65331ec"}, +] + +[package.dependencies] +arrow = "*" +binaryornot = ">=0.4.4" +click = ">=7.0,<9.0.0" +Jinja2 = ">=2.7,<4.0.0" +python-slugify = ">=4.0.0" +pyyaml = ">=5.3.1" +requests = ">=2.23.0" +rich = "*" + [[package]] name = "coverage" version = "7.3.2" @@ -413,6 +509,26 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "cruft" +version = "2.15.0" +description = "Allows you to maintain all the necessary cruft for packaging and building projects separate from the code you intentionally write. Built on-top of CookieCutter." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cruft-2.15.0-py3-none-any.whl", hash = "sha256:2c1c6c7c512b8bc3afc66cefa829247dc067409a78ba2609d629d413444bc153"}, + {file = "cruft-2.15.0.tar.gz", hash = "sha256:9802af66037418655e7e4b6f30b531591e0761939b3ff5dd45d27c3a3f588abe"}, +] + +[package.dependencies] +click = ">=7.1.2" +cookiecutter = ">=1.7" +gitpython = ">=3.0" +typer = ">=0.4.0" + +[package.extras] +pyproject = ["toml (>=0.10)"] + [[package]] name = "darglint" version = "1.8.1" @@ -545,6 +661,51 @@ pygments = ">=2.7" sphinx = ">=6.0,<8.0" sphinx-basic-ng = "*" +[[package]] +name = "gitdb" +version = "4.0.11" +description = "Git Object Database" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.11-py3-none-any.whl", hash = "sha256:81a3407ddd2ee8df444cbacea00e2d038e40150acfa3001696fe0dcf1d3adfa4"}, + {file = "gitdb-4.0.11.tar.gz", hash = "sha256:bf5421126136d6d0af55bc1e7c1af1c397a34f5b7bd79e776cd3e89785c2b04b"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.40" +description = "GitPython is a Python library used to interact with Git repositories" +optional = false +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.40-py3-none-any.whl", hash = "sha256:cf14627d5a8049ffbf49915732e5eddbe8134c3bdb9d476e6182b676fc573f8a"}, + {file = "GitPython-3.1.40.tar.gz", hash = "sha256:22b126e9ffb671fdd0c129796343a02bf67bf2994b35449ffc9321aa755e18a4"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + +[package.extras] +test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"] + +[[package]] +name = "holidays" +version = "0.37" +description = "Generate and work with holidays in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "holidays-0.37-py3-none-any.whl", hash = "sha256:5b8ff8c94c06e3b225762d495e51b8e51205d332f8ad092aab809c4bffa8d123"}, + {file = "holidays-0.37.tar.gz", hash = "sha256:712df71a8d97b04554fa1c9208d219fbf174bad2864263bef24c6dcfa1ded6ff"}, +] + +[package.dependencies] +python-dateutil = "*" + [[package]] name = "identify" version = "2.5.32" @@ -583,20 +744,20 @@ files = [ [[package]] name = "importlib-metadata" -version = "6.8.0" +version = "7.0.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, - {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, + {file = "importlib_metadata-7.0.0-py3-none-any.whl", hash = "sha256:d97503976bb81f40a193d41ee6570868479c69d5068651eb039c40d850c59d67"}, + {file = "importlib_metadata-7.0.0.tar.gz", hash = "sha256:7fc841f8b8332803464e5dc1c63a2e59121f46ca186c0e2e182e80bf8c1319f7"}, ] [package.dependencies] zipp = ">=0.5" [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] @@ -1021,6 +1182,71 @@ files = [ [package.dependencies] setuptools = "*" +[[package]] +name = "nox" +version = "2023.4.22" +description = "Flexible test automation." +optional = false +python-versions = ">=3.7" +files = [ + {file = "nox-2023.4.22-py3-none-any.whl", hash = "sha256:0b1adc619c58ab4fa57d6ab2e7823fe47a32e70202f287d78474adcc7bda1891"}, + {file = "nox-2023.4.22.tar.gz", hash = "sha256:46c0560b0dc609d7d967dc99e22cb463d3c4caf54a5fda735d6c11b5177e3a9f"}, +] + +[package.dependencies] +argcomplete = ">=1.9.4,<4.0" +colorlog = ">=2.6.1,<7.0.0" +packaging = ">=20.9" +virtualenv = ">=14" + +[package.extras] +tox-to-nox = ["jinja2", "tox (<4)"] + +[[package]] +name = "numpy" +version = "1.26.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"}, + {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"}, + {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"}, + {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"}, + {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"}, + {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"}, + {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"}, + {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"}, + {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"}, + {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"}, + {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"}, + {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"}, + {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"}, + {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"}, + {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"}, + {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"}, + {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"}, + {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"}, + {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"}, + {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"}, + {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"}, + {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"}, + {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"}, + {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"}, + {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"}, + {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"}, + {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"}, + {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"}, + {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"}, + {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"}, + {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"}, + {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"}, + {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"}, + {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"}, + {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"}, + {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"}, +] + [[package]] name = "packaging" version = "23.2" @@ -1032,6 +1258,74 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] +[[package]] +name = "pandas" +version = "2.1.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acf08a73b5022b479c1be155d4988b72f3020f308f7a87c527702c5f8966d34f"}, + {file = "pandas-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cc4469ff0cf9aa3a005870cb49ab8969942b7156e0a46cc3f5abd6b11051dfb"}, + {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35172bff95f598cc5866c047f43c7f4df2c893acd8e10e6653a4b792ed7f19bb"}, + {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dfe0e65a2f3988e940224e2a70932edc964df79f3356e5f2997c7d63e758b4"}, + {file = "pandas-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0296a66200dee556850d99b24c54c7dfa53a3264b1ca6f440e42bad424caea03"}, + {file = "pandas-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:465571472267a2d6e00657900afadbe6097c8e1dc43746917db4dfc862e8863e"}, + {file = "pandas-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04d4c58e1f112a74689da707be31cf689db086949c71828ef5da86727cfe3f82"}, + {file = "pandas-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fa2ad4ff196768ae63a33f8062e6838efed3a319cf938fdf8b95e956c813042"}, + {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4441ac94a2a2613e3982e502ccec3bdedefe871e8cea54b8775992485c5660ef"}, + {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ded6ff28abbf0ea7689f251754d3789e1edb0c4d0d91028f0b980598418a58"}, + {file = "pandas-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca5680368a5139d4920ae3dc993eb5106d49f814ff24018b64d8850a52c6ed2"}, + {file = "pandas-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:de21e12bf1511190fc1e9ebc067f14ca09fccfb189a813b38d63211d54832f5f"}, + {file = "pandas-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5d53c725832e5f1645e7674989f4c106e4b7249c1d57549023ed5462d73b140"}, + {file = "pandas-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cf4cf26042476e39394f1f86868d25b265ff787c9b2f0d367280f11afbdee6d"}, + {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72c84ec1b1d8e5efcbff5312abe92bfb9d5b558f11e0cf077f5496c4f4a3c99e"}, + {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f539e113739a3e0cc15176bf1231a553db0239bfa47a2c870283fd93ba4f683"}, + {file = "pandas-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc77309da3b55732059e484a1efc0897f6149183c522390772d3561f9bf96c00"}, + {file = "pandas-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:08637041279b8981a062899da0ef47828df52a1838204d2b3761fbd3e9fcb549"}, + {file = "pandas-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b99c4e51ef2ed98f69099c72c75ec904dd610eb41a32847c4fcbc1a975f2d2b8"}, + {file = "pandas-2.1.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7ea8ae8004de0381a2376662c0505bb0a4f679f4c61fbfd122aa3d1b0e5f09d"}, + {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd76d67ca2d48f56e2db45833cf9d58f548f97f61eecd3fdc74268417632b8a"}, + {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1329dbe93a880a3d7893149979caa82d6ba64a25e471682637f846d9dbc10dd2"}, + {file = "pandas-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:321ecdb117bf0f16c339cc6d5c9a06063854f12d4d9bc422a84bb2ed3207380a"}, + {file = "pandas-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:11a771450f36cebf2a4c9dbd3a19dfa8c46c4b905a3ea09dc8e556626060fe71"}, + {file = "pandas-2.1.3.tar.gz", hash = "sha256:22929f84bca106921917eb73c1521317ddd0a4c71b395bcf767a106e3494209f"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + [[package]] name = "parso" version = "0.8.3" @@ -1072,6 +1366,25 @@ files = [ [package.dependencies] ptyprocess = ">=0.5" +[[package]] +name = "pipx" +version = "1.3.2" +description = "Install and Run Python Applications in Isolated Environments" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pipx-1.3.2-py3-none-any.whl", hash = "sha256:bb4bf6c052639f589901d7ae5a837a44bc09fa82c38b4f74a319d39cccc73b84"}, + {file = "pipx-1.3.2.tar.gz", hash = "sha256:704d01d04c67c2dd0c776c5bf5ed35c7b249055b0174568b8507f07d72ed7a7f"}, +] + +[package.dependencies] +argcomplete = ">=1.9.4" +colorama = {version = ">=0.4.4", markers = "sys_platform == \"win32\""} +packaging = ">=20" +platformdirs = ">=2.1" +tomli = {version = "*", markers = "python_version < \"3.11\""} +userpath = ">=1.6,<1.9.0 || >1.9.0" + [[package]] name = "platformdirs" version = "4.0.0" @@ -1264,6 +1577,34 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-slugify" +version = "8.0.1" +description = "A Python slugify application that also handles Unicode" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python-slugify-8.0.1.tar.gz", hash = "sha256:ce0d46ddb668b3be82f4ed5e503dbc33dd815d83e2eb6824211310d3fb172a27"}, + {file = "python_slugify-8.0.1-py2.py3-none-any.whl", hash = "sha256:70ca6ea68fe63ecc8fa4fcf00ae651fc8a5d02d93dcd12ae6d4fc7ca46c4d395"}, +] + +[package.dependencies] +text-unidecode = ">=1.3" + +[package.extras] +unidecode = ["Unidecode (>=1.1.1)"] + +[[package]] +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, +] + [[package]] name = "pywin32" version = "306" @@ -1472,6 +1813,24 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "rich" +version = "13.7.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.7.0-py3-none-any.whl", hash = "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235"}, + {file = "rich-13.7.0.tar.gz", hash = "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "ruamel-yaml" version = "0.18.5" @@ -1602,6 +1961,17 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smmap" +version = "5.0.1" +description = "A pure Python implementation of a sliding window memory map manager" +optional = false +python-versions = ">=3.7" +files = [ + {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, + {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, +] + [[package]] name = "snowballstemmer" version = "2.2.0" @@ -1853,6 +2223,17 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] +[[package]] +name = "text-unidecode" +version = "1.3" +description = "The most basic Text::Unidecode port" +optional = false +python-versions = "*" +files = [ + {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, + {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, +] + [[package]] name = "tokenize-rt" version = "5.2.0" @@ -1929,6 +2310,38 @@ typing-extensions = {version = ">=4.7.0", markers = "python_version < \"3.12\""} doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] test = ["coverage[toml] (>=7)", "mypy (>=1.2.0)", "pytest (>=7)"] +[[package]] +name = "typer" +version = "0.9.0" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = false +python-versions = ">=3.6" +files = [ + {file = "typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee"}, + {file = "typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + +[[package]] +name = "types-python-dateutil" +version = "2.8.19.14" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = "*" +files = [ + {file = "types-python-dateutil-2.8.19.14.tar.gz", hash = "sha256:1f4f10ac98bb8b16ade9dbee3518d9ace017821d94b057a425b069f834737f4b"}, + {file = "types_python_dateutil-2.8.19.14-py3-none-any.whl", hash = "sha256:f977b8de27787639986b4e28963263fd0e5158942b3ecef91b9335c130cb1ce9"}, +] + [[package]] name = "typing-extensions" version = "4.8.0" @@ -1940,6 +2353,17 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + [[package]] name = "urllib3" version = "2.1.0" @@ -1956,15 +2380,29 @@ brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "userpath" +version = "1.9.1" +description = "Cross-platform tool for adding locations to the user PATH" +optional = false +python-versions = ">=3.7" +files = [ + {file = "userpath-1.9.1-py3-none-any.whl", hash = "sha256:e085053e5161f82558793c41d60375289efceb4b77d96033ea9c84fc0893f772"}, + {file = "userpath-1.9.1.tar.gz", hash = "sha256:ce8176728d98c914b6401781bf3b23fccd968d1647539c8788c7010375e02796"}, +] + +[package.dependencies] +click = "*" + [[package]] name = "virtualenv" -version = "20.24.7" +version = "20.25.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.24.7-py3-none-any.whl", hash = "sha256:a18b3fd0314ca59a2e9f4b556819ed07183b3e9a3702ecfe213f593d44f7b3fd"}, - {file = "virtualenv-20.24.7.tar.gz", hash = "sha256:69050ffb42419c91f6c1284a7b24e0475d793447e35929b488bf6a0aade39353"}, + {file = "virtualenv-20.25.0-py3-none-any.whl", hash = "sha256:4238949c5ffe6876362d9c0180fc6c3a824a7b12b80604eeb8085f2ed7460de3"}, + {file = "virtualenv-20.25.0.tar.gz", hash = "sha256:bf51c0d9c7dd63ea8e44086fa1e4fb1093a31e963b86959257378aef020e1f1b"}, ] [package.dependencies] @@ -2032,4 +2470,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "3e586bbc7ea7907f063b28917b06a5834b13b4d7683fa1673a8c71de2ea59882" +content-hash = "1027d02cb93ebdc4ee471c8377b8da7b7464d099b8492c5e7250fbbdb2f341a1" diff --git a/pyproject.toml b/pyproject.toml index 71e08ab..17bfc6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,12 @@ Changelog = "https://github.com/statisticsnorway/ssb-arbmark-fagfunksjoner/relea [tool.poetry.dependencies] python = "^3.9" click = ">=8.0.1" +pipx = "^1.3.2" +cruft = "^2.15.0" +nox = "^2023.4.22" +pandas = "^2.1.3" +numpy = "^1.26.2" +holidays = "^0.37" [tool.poetry.group.dev.dependencies] pygments = ">=2.10.0" diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index bd29f34..2e713a8 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -5,6 +5,7 @@ """ + def example_function(number1: int, number2: int) -> str: """Compare two integers. @@ -33,53 +34,83 @@ def example_function(number1: int, number2: int) -> str: return f"{number1} is greater than or equal to {number2}" + +# Itertools for functions creating iterators for efficient looping +import itertools + +# Optional for explicit type hint +from typing import Optional + # Holidays to calculate the number of holidays import holidays -# Pandas for table management -import pandas as pd + # Numpy for data wrangling import numpy as np -# Itertools for functions creating iterators for efficient looping -import itertools + +# Pandas for table management +import pandas as pd + def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: - """ - Parameters: - from_dates : List of first dates of period. - to_dates : List of last dates of period. + """Counts the number of workdays between pairs of dates in given series. + + This function calculates the number of workdays for each pair of start and end dates + provided in the `from_dates` and `to_dates` series. It handles date ranges spanning multiple + years and excludes weekends and holidays specific to Norway. The function dynamically + fetches Norwegian holidays for the relevant years based on the input dates. + + Args: + from_dates (pd.Series): A pandas Series containing the start dates of the periods. + to_dates (pd.Series): A pandas Series containing the end dates of the periods. + Returns: - Returns a list of number of workdays in period from first to last date. + np.ndarray: An array containing the number of workdays for each date pair. + + Raises: + ValueError: If the length of the calculated workdays list does not match the number of date pairs. + + Note: + - The function can handle date ranges spanning multiple years. + - Holidays are determined based on the Norwegian calendar for each year in the date range. """ # Convert the from_dates and to_dates columns to numpy arrays from_dates = from_dates.values to_dates = to_dates.values - # Extract the year from the from_dates array - year = from_dates.astype('datetime64[Y]').astype(int) + 1970 - # Check if the year is the same in the to_dates array - if not np.all(year == to_dates.astype('datetime64[Y]').astype(int) + 1970): - # If the year is not the same, raise an error - raise ValueError("Function can only be applied to dates in the same year!") - # Check if there is more than one unique year in the array - if np.unique(year).size > 1: - # If there is more than one unique year, raise an error - raise ValueError("Function can only be applied to a single year!") + + # Extract the year from the from_dates and to_dates arrays + from_years = from_dates.astype("datetime64[Y]").astype(int) + 1970 + to_years = to_dates.astype("datetime64[Y]").astype(int) + 1970 + + # Find the max and min years + min_year = np.min(from_years) + max_year = np.max(to_years) + + if min_year == max_year: + norwegian_holidays = holidays.NO(years=min_year) + else: + norwegian_holidays = holidays.NO(years=list(range(min_year, max_year + 1))) + + # Convert the holiday dates to a numpy array of datetime64 objects + holiday_dates = np.array(sorted(norwegian_holidays.keys()), dtype="datetime64[D]") # Convert from_dates and to_dates to datetime64 arrays - from_dates = from_dates.astype('datetime64[D]') - to_dates = to_dates.astype('datetime64[D]') + from_dates = from_dates.astype("datetime64[D]") + to_dates = to_dates.astype("datetime64[D]") # Find the max and min dates min_date = np.min(from_dates) max_date = np.max(to_dates) # Generate a range of dates between the min and max dates - dates = np.arange(min_date, max_date + np.timedelta64(1, 'D'), dtype='datetime64[D]') - - # Convert the holiday dates to a numpy array of datetime64 objects - holiday_dates = np.array(sorted(holidays.NO(years=year).keys()), dtype='datetime64[D]') + dates = np.arange( + min_date, max_date + np.timedelta64(1, "D"), dtype="datetime64[D]" + ) # Filter the dates array to exclude holiday dates and weekends - workdays = dates[~np.isin(dates, holiday_dates) & ~np.isin((dates.astype('datetime64[D]').view('int64') - 4) % 7, [5, 6])] + workdays = dates[ + ~np.isin(dates, holiday_dates) + & ~np.isin((dates.astype("datetime64[D]").view("int64") - 4) % 7, [5, 6]) + ] # Calculate the number of workdays for each from and to date pair workdays_list = [] @@ -89,14 +120,16 @@ def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: # Check if the length of the workdays_list is the same as the number of date pairs if len(workdays_list) != len(from_dates): - raise ValueError("Unexpected error: length of workdays_list does not match the number of date pairs.") + raise ValueError( + "Unexpected error: length of workdays_list does not match the number of date pairs." + ) return np.array(workdays_list) + def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple: - """ - Given a year and a quarter, this function calculates the - first and last dates of the specified quarter using pandas. + """Given a year and a quarter, this function calculates the first and last dates of the specified quarter using pandas. + Args: year_str (str): The year as a string. quarter_str (str): The quarter as a string. @@ -119,50 +152,16 @@ def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple: end_date = start_date + pd.offsets.QuarterEnd() # Format dates as strings in 'YYYY-MM-DD' format - start_date_str = start_date.strftime('%Y-%m-%d') - end_date_str = end_date.strftime('%Y-%m-%d') + start_date_str = start_date.strftime("%Y-%m-%d") + end_date_str = end_date.strftime("%Y-%m-%d") return start_date_str, end_date_str -def format_invalids(df: pd.DataFrame, - col_name: str, - invalid: list, - label: str) -> None: - """ - Modify a column in a pandas DataFrame in-place by replacing values - specified in the 'invalid' list with a given 'label'. - - Parameters: - - df (pd.DataFrame): The pandas DataFrame containing the column to be modified. - - col_name (str): The name of the column in the DataFrame to be modified. - - invalid (list): List of values in the column to be replaced. - - label (str): The value to replace the invalid entries with. - - Returns: - None. The function modifies the DataFrame in-place. - """ - - # Identify which entries in the column are in the 'invalid' list - isinvalid = df[col_name].isin(invalid) - - # Get a list of unique invalid codes present in the column - invalid_codes = list(df[col_name][isinvalid].unique()) - - # Print the unique invalid codes found - print(f"The following invalid codes can be found in {col_name}: {invalid_codes}.") - - # Print the number of entries that will be changed to the label - print(f"Changes {len(df[col_name][isinvalid])} entries in {col_name} to '{label}'.") - - # Use .loc on the DataFrame to modify the column in-place - df.loc[isinvalid, col_name] = label - # Print the total number of entries that have been labeled - print(f"There is a total of {(df[col_name] == label).sum()} entries labelled as '{label}'.") - -def indicate_merge(left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: list) -> pd.DataFrame: - """ - Perform a merge of two DataFrames and prints a frequency table indicating the merge type for each row. +def indicate_merge( + left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: list +) -> pd.DataFrame: + """Perform a merge of two DataFrames and prints a frequency table indicating the merge type for each row. The merge types are determined as follows (left-to-right): - 'one-to-zero': Rows that exist only in the left DataFrame. @@ -187,7 +186,7 @@ def indicate_merge(left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: li merged_df = pd.merge(left, right, how=how, on=on, indicator=True) # Convert _merge column to numpy - np_merge = merged_df['_merge'].to_numpy() + np_merge = merged_df["_merge"].to_numpy() # Identify duplicate rows in each DataFrame duplicated_left = left.duplicated(subset=on, keep=False) @@ -195,28 +194,55 @@ def indicate_merge(left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: li # Different treatment depending on if "on" is a single column or not if isinstance(on, str): - duplicated_from_left = merged_df[on].isin(left.loc[duplicated_left, on].drop_duplicates()).to_numpy() - duplicated_from_right = merged_df[on].isin(right.loc[duplicated_right, on].drop_duplicates()).to_numpy() + duplicated_from_left = ( + merged_df[on] + .isin(left.loc[duplicated_left, on].drop_duplicates()) + .to_numpy() + ) + duplicated_from_right = ( + merged_df[on] + .isin(right.loc[duplicated_right, on].drop_duplicates()) + .to_numpy() + ) else: - duplicated_from_left = merged_df[on].apply(tuple, axis=1).isin(left[on][duplicated_left].drop_duplicates().apply(tuple, axis=1)).to_numpy() - duplicated_from_right = merged_df[on].apply(tuple, axis=1).isin(right[on][duplicated_right].drop_duplicates().apply(tuple, axis=1)).to_numpy() + duplicated_from_left = ( + merged_df[on] + .apply(tuple, axis=1) + .isin(left[on][duplicated_left].drop_duplicates().apply(tuple, axis=1)) + .to_numpy() + ) + duplicated_from_right = ( + merged_df[on] + .apply(tuple, axis=1) + .isin(right[on][duplicated_right].drop_duplicates().apply(tuple, axis=1)) + .to_numpy() + ) # Define the conditions and choices for np.select conditions = [ - (np_merge == 'left_only') & ~duplicated_from_left, - (np_merge == 'right_only') & ~duplicated_from_right, - (np_merge == 'left_only') & duplicated_from_left, - (np_merge == 'right_only') & duplicated_from_right, - (np_merge == 'both') & ~duplicated_from_left & ~duplicated_from_right, - (np_merge == 'both') & duplicated_from_left & ~duplicated_from_right, - (np_merge == 'both') & ~duplicated_from_left & duplicated_from_right, - (np_merge == 'both') & duplicated_from_right & duplicated_from_left + (np_merge == "left_only") & ~duplicated_from_left, + (np_merge == "right_only") & ~duplicated_from_right, + (np_merge == "left_only") & duplicated_from_left, + (np_merge == "right_only") & duplicated_from_right, + (np_merge == "both") & ~duplicated_from_left & ~duplicated_from_right, + (np_merge == "both") & duplicated_from_left & ~duplicated_from_right, + (np_merge == "both") & ~duplicated_from_left & duplicated_from_right, + (np_merge == "both") & duplicated_from_right & duplicated_from_left, ] - choices = ['one-to-zero', 'zero-to-one', 'many-to-zero', 'zero-to-many', 'one-to-one', 'many-to-one', 'one-to-many', 'many-to-many'] + choices = [ + "one-to-zero", + "zero-to-one", + "many-to-zero", + "zero-to-many", + "one-to-one", + "many-to-one", + "one-to-many", + "many-to-many", + ] # Use np.select to create new column - merge_type = np.select(conditions, choices, default='unknown') + merge_type = np.select(conditions, choices, default="unknown") # Print the frequency of each merge type unique, counts = np.unique(merge_type, return_counts=True) @@ -225,15 +251,15 @@ def indicate_merge(left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: li print(f"Number of entries of type '{i}': {j}") # Drop the _merge column and return the result - merged_df.drop(columns='_merge', inplace=True) + merged_df.drop(columns="_merge", inplace=True) return merged_df + def kv_intervall(start_p, slutt_p): - """ - This function generates a list of quarterly periods between two given periods. + """This function generates a list of quarterly periods between two given periods. - The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year + The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year and Q is a quarter (1 to 4). The function handles cases where the start and end periods are in the same year or in different years. @@ -260,78 +286,83 @@ def kv_intervall(start_p, slutt_p): intervall = [] # Generate the periods - for i in range(start_aar4, slutt_aar4+1): - if (start_aar4 == slutt_aar4): + for i in range(start_aar4, slutt_aar4 + 1): + if start_aar4 == slutt_aar4: # If the start and end periods are in the same year - for j in range(start_kv, slutt_kv+1): + for j in range(start_kv, slutt_kv + 1): intervall.append(f"{i}k{j}") - elif (i == start_aar4): + elif i == start_aar4: # If the current year is the start year - for j in range(start_kv, 4+1): + for j in range(start_kv, 4 + 1): intervall.append(f"{i}k{j}") - elif (start_aar4 < i and slutt_aar4 > i): + elif start_aar4 < i and slutt_aar4 > i: # If the current year is between the start and end years - for j in range(1, 4+1): + for j in range(1, 4 + 1): intervall.append(f"{i}k{j}") - elif (i == slutt_aar4): + elif i == slutt_aar4: # If the current year is the end year - for j in range(1, slutt_kv+1): + for j in range(1, slutt_kv + 1): intervall.append(f"{i}k{j}") return intervall + def proc_sums( df: pd.DataFrame, groups: list[str], values: list[str], - agg_func: dict = None + agg_func: Optional[dict] = None, ) -> pd.DataFrame: - """ - Compute aggregations for all combinations of columns and return a new - DataFrame with these aggregations. + """Compute aggregations for all combinations of columns and return a new DataFrame with these aggregations. Parameters: - df : pd.DataFrame - The input DataFrame. - groups : list[str] - List of columns to be considered for groupings. - values : list[str] - List of columns on which the aggregation functions will be applied. - agg_func : dict, optional - Dictionary mapping columns to aggregation functions corresponding to - the 'values' list. - Default is 'sum' for all columns in 'values'. + df : pd.DataFrame + The input DataFrame. + groups : list[str] + List of columns to be considered for groupings. + values : list[str] + List of columns on which the aggregation functions will be applied. + agg_func : Optional[dict], default None + Dictionary mapping columns to aggregation functions corresponding to + the 'values' list. If None, defaults to 'sum' for all columns in 'values'. Returns: - pd.DataFrame - A DataFrame containing aggregations for all combinations of 'columns'. + pd.DataFrame: A DataFrame containing aggregations for all combinations of 'columns'. + + Raises: + ValueError: If any of the specified columns in 'groups' or 'values' are not present in the DataFrame. + ValueError: If any columns in 'values' are not numeric and no aggregation function is provided. Notes: - - The returned DataFrame also contains an additional column named 'level' - indicating the level of grouping. - - Columns not used in a particular level of grouping will have a value - 'Total'. + - The returned DataFrame also contains an additional column named 'level' + indicating the level of grouping. + - Columns not used in a particular level of grouping will have a value 'Total'. """ - # All columns used from the input dataframe required_columns = groups + values # Check that the parameters references columns in the dataframe missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: - raise ValueError(f"Columns {', '.join(missing_columns)} are not present in the dataframe!") + raise ValueError( + f"Columns {', '.join(missing_columns)} are not present in the dataframe!" + ) # Check if all columns in 'values' are numeric - non_numeric_cols = [col for col in values if not pd.api.types.is_numeric_dtype(df[col])] + non_numeric_cols = [ + col for col in values if not pd.api.types.is_numeric_dtype(df[col]) + ] # Copy the dataframe and limit input to columns in the parameter df = df[required_columns].copy() # Default aggregation: 'sum' for all 'values' columns. if agg_func is None and not non_numeric_cols: - agg_func = {col: 'sum' for col in values} + agg_func = {col: "sum" for col in values} elif agg_func is None and non_numeric_cols: - raise ValueError(f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!") + raise ValueError( + f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!" + ) else: # Correct a format causing error in agg-function for col, funcs in agg_func.items(): @@ -355,33 +386,51 @@ def proc_sums( sum_columns = list(groups_set - subset_set) if sum_columns: # For columns not in the current subset, fill with 'Total'. - sub_sum[sum_columns] = 'Total' + sub_sum[sum_columns] = "Total" # Indicate level of grouping - sub_sum['level'] = i + sub_sum["level"] = i # Append this subset's aggregation results to the final DataFrame. sum_df = pd.concat([sum_df, sub_sum], ignore_index=True) return sum_df + def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: - """ - Parameters: - from_dates : List of first dates of period. - to_dates : List of last dates of period. + """Determines if the reference day falls between given date ranges. + + This function checks if the 16th day of each month (reference day) is + within the range specified by the corresponding 'from_dates' and 'to_dates'. + It requires that both 'from_dates' and 'to_dates' are in the same year and month. + + Args: + from_dates (pd.Series): A Series of dates representing the start of a period. + These dates should be in the 'YYYY-MM-DD' format. + to_dates (pd.Series): A Series of dates representing the end of a period. + These dates should also be in the 'YYYY-MM-DD' format. + Returns: - Returns a list of booleans for if the - reference day is between the from_dates and to_dates. - The reference day is defined as the 16th of each month. + np.ndarray: An array of booleans. Each element corresponds to whether the + 16th day of the month for each period is within the respective date range. + + Raises: + ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if + they are not in the same month, or if multiple years are present across the dates. + + Example: + >>> from_dates = pd.Series(['2023-01-01', '2023-02-10']) + >>> to_dates = pd.Series(['2023-01-20', '2023-02-18']) + >>> ref_day(from_dates, to_dates) + array([True, True]) """ # Convert the from_dates and to_dates columns to numpy arrays from_dates = from_dates.values to_dates = to_dates.values # Extract the year from the from_dates array - year = from_dates.astype('datetime64[Y]').astype(int) + 1970 + year = from_dates.astype("datetime64[Y]").astype(int) + 1970 # Check if the year is the same in the to_dates array - if not np.all(year == to_dates.astype('datetime64[Y]').astype(int) + 1970): + if not np.all(year == to_dates.astype("datetime64[Y]").astype(int) + 1970): # If the year is not the same, raise an error raise ValueError("Function can only be applied to dates in the same year!") @@ -391,15 +440,15 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: raise ValueError("Function can only be applied to a single year!") # Extract the month from the from_dates array - month = from_dates.astype('datetime64[M]').astype(int) % 12 + 1 + month = from_dates.astype("datetime64[M]").astype(int) % 12 + 1 # Check if the month is the same in the to_dates array - if not np.all(month == to_dates.astype('datetime64[M]').astype(int) % 12 + 1): + if not np.all(month == to_dates.astype("datetime64[M]").astype(int) % 12 + 1): # If the month is not the same, raise an error raise ValueError("Function can only be applied to dates in the same months!") # Create a reference day for each month - ref_days = np.array([f"{year[0]}-{m:02d}-16" for m in month], dtype='datetime64[D]') + ref_days = np.array([f"{year[0]}-{m:02d}-16" for m in month], dtype="datetime64[D]") # Check if the reference day is within the range of the from_date and to_date result = np.logical_and(from_dates <= ref_days, ref_days <= to_dates) @@ -407,16 +456,35 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: # Return the result as an array of boolean values return result + def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: - """ - Parameters: - from_dates : List of first dates of period. - to_dates : List of last dates of period. + """Determines if any date in each date range falls in the reference week. + + This function checks if any date between the 'from_dates' and 'to_dates' + is within the reference week. The reference week is defined as the week + which includes the 16th day of each month. It requires that both + 'from_dates' and 'to_dates' are in the same year and the same month. + + Args: + from_dates (pd.Series): A Series of dates representing the start of a period. + These dates should be in the 'YYYY-MM-DD' format. + to_dates (pd.Series): A Series of dates representing the end of a period. + These dates should also be in the 'YYYY-MM-DD' format. + Returns: - Returns a list of booleans for if any of the dates between - from_dates and to_dates is in the reference week. The - reference week is defined as the week of the 16th of - each month. + pd.Series: A Series of booleans, where each boolean corresponds to whether + any date in the period from 'from_dates' to 'to_dates' falls within the + reference week of the month. + + Raises: + ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if + they are not in the same month. + + Example: + >>> from_dates = pd.Series(['2023-01-01', '2023-02-10']) + >>> to_dates = pd.Series(['2023-01-20', '2023-02-18']) + >>> ref_week(from_dates, to_dates) + pd.Series([True, True]) """ # Check if the year is the same in the to_dates array if not np.all(from_dates.dt.year == to_dates.dt.year): @@ -429,7 +497,9 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: raise ValueError("Function can only be applied to dates in the same months!") # Create a reference day for each month - ref_days = pd.to_datetime([f"{y}-{m:02d}-16" for y, m in zip(from_dates.dt.year, from_dates.dt.month)]) + ref_days = pd.to_datetime( + [f"{y}-{m:02d}-16" for y, m in zip(from_dates.dt.year, from_dates.dt.month)] + ) # Convert ref_days to a Series object to use the dt accessor ref_days = pd.Series(ref_days) @@ -444,4 +514,3 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: # Return the result as a series of boolean values return result - diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index ae0fcc7..e4144f1 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -1,23 +1,23 @@ """A collection of useful groups.""" -# Pandas for table management -import pandas as pd # Numpy for data wrangling import numpy as np +# Pandas for table management +import pandas as pd + + def alder_grp(alder: pd.Series, labels=True) -> pd.Series: - """ - Categorize a pandas Series of person ages into predefined groups. + """Categorize a pandas Series of person ages into predefined groups. Parameters: - alder (pd.Series): A pandas Series containing the person ages. - labels (bool, optional): If True, returns group labels; if False, returns keys; - if 'combined', returns a combination of keys and labels. + alder (pd.Series): A pandas Series containing the person ages. + labels (bool, optional): If True, returns group labels; if False, returns keys; + if 'combined', returns a combination of keys and labels. Returns: - pd.Series: A pandas Series where the original person ages are replaced by group labels or keys. + pd.Series: A pandas Series where the original person ages are replaced by group labels or keys. """ - # Define the conditions for each group conditions = [ np.logical_and(alder >= 16, alder <= 19), # 16-19 år @@ -30,323 +30,355 @@ def alder_grp(alder: pd.Series, labels=True) -> pd.Series: np.logical_and(alder >= 50, alder <= 54), # 50-54 år np.logical_and(alder >= 55, alder <= 59), # 55-59 år np.logical_and(alder >= 60, alder <= 64), # 60-64 år - np.logical_or(alder == 65, alder == 66), # 65-66 år - alder == 67, # 67 år - alder == 68, # 68 år - alder == 69, # 69 år + np.logical_or(alder == 65, alder == 66), # 65-66 år + alder == 67, # 67 år + alder == 68, # 68 år + alder == 69, # 69 år ] - + # Define the group labels with string keys groups = { - '1': '16-19 år', - '2': '20-24 år', - '3': '25-29 år', - '4': '30-34 år', - '5': '35-39 år', - '6': '40-44 år', - '7': '45-49 år', - '8': '50-54 år', - '9': '55-59 år', - '10': '60-64 år', - '11': '65-66 år', - '12': '67 år', - '13': '68 år', - '14': '69 år' + "1": "16-19 år", + "2": "20-24 år", + "3": "25-29 år", + "4": "30-34 år", + "5": "35-39 år", + "6": "40-44 år", + "7": "45-49 år", + "8": "50-54 år", + "9": "55-59 år", + "10": "60-64 år", + "11": "65-66 år", + "12": "67 år", + "13": "68 år", + "14": "69 år", } # Determine the format of the results based on the labels parameter - if labels == 'combined': + if labels == "combined": results = [f"{key} {value}" for key, value in groups.items()] elif labels: results = list(groups.values()) else: results = list(groups.keys()) - + # Apply the selected format to the series - return np.select(conditions, results, default='.') + return np.select(conditions, results, default=".") + def nace_sn07_47grp(nace_sn07: pd.Series, labels=True) -> pd.Series: - """ - Categorize a pandas Series of NACE-codes (SN07) into predefined groups. + """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: - nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. - labels (bool, optional): Whether to return group labels or keys. Default is True. + nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. + labels (bool, optional): Whether to return group labels or keys. Default is True. Returns: - pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. + pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. """ - # Removes periods in the NACE codes (if any) - nace_sn07 = nace_sn07.replace('.', '') + nace_sn07 = nace_sn07.replace(".", "") # Substring of NACE codes at length 2 and 3 - nace2 = pd.Series(nace_sn07.str[:2], name='nace2') - nace3 = pd.Series(nace_sn07.str[:3], name='nace3') + nace2 = pd.Series(nace_sn07.str[:2], name="nace2") + nace3 = pd.Series(nace_sn07.str[:3], name="nace3") # Define the conditions for each group conditions = [ - np.isin(nace2, ['01', '02', '03']), # Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass - np.logical_or(np.isin(nace2, ['05', '07', '08']), nace3 == '099'), # Annen utvinning; Bygging av skip og båter; Reparasjon og installasjon av maskiner og utstyr; Uoppgitt utvinning - np.logical_or(nace2 == '06', nace3 == '091'), # Olje- og gassutvinning; Uoppgitt utvinning av petroleum - np.isin(nace2, ['10', '11', '12']), # Næringsmiddel-,drikkev.,tobakkind. - np.isin(nace2, ['13', '14', '15']), # Tekstil-,bekledn.-,lærvareind. - np.isin(nace2, ['16', '17']), # Trelast- og trevareind. - nace2 == '18', # Trykking, grafisk industri - np.isin(nace2, ['19', '20', '21']), # Petrolieum, kull, kjemisk og farmasøytisk industri - np.isin(nace2, ['22', '23']), # Gummivare-, plast-,mineralproduktind. - nace2 == '24', # Metallindustri - nace2 == '25', # Metallvareindustri - np.isin(nace2, ['26', '27']), # Data- og elektronisk industri - nace2 == '28', # Maskinindustri - np.logical_or(np.isin(nace2, ['29', '33']), np.logical_and(nace3 >= '302', nace3 <= '309')), # Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler - nace3 == '301', # Produksjon av skip og båter, inkl. oljeplattformer - np.isin(nace2, ['31', '32']), # Møbel og annen industri - nace2 == '35', # Elekstrisitet, gass, damp, varmtvann - np.logical_and(nace2 >= '36', nace2 <= '39'), # Vann, avløp og renovasjon - np.isin(nace2, ['41', '42', '43']), # Bygge- og anleggsvirksomhet - nace2 == '45', # Motorvognrep og -handel - nace2 == '46', # Agentur- og engroshandel - nace2 == '47', # Detaljhandel, unntatt motorvogner - nace2 == '49', # Landtransport og rørtransport - nace2 == '50', # Sjøfart - nace2 == '51', # Lufttransport - nace2 == '52', # Lagring og tjenester tilknyttet transport - nace2 == '53', # Posttjenester - nace2 == '55', # Overnattingsvirksomhet - nace2 == '56', # Serveringsvirksomhet - np.isin(nace2, ['58', '59', '60']), # Forlag, film-, TV-pr, kringkasting - np.isin(nace2, ['61', '62', '63']), # IKT-virksomhet - nace2 == '64', # Finansieringsvirksomhet (bank, m.m.) - nace2 == '65', # Forsikringsvirksomhet og pensjonskasser - nace2 == '66', # Finansiell tjenesteyting - nace2 == '68', # Omsetning og drift av fast eiendom - np.isin(nace2, ['69', '70', '71']), # Juridisk-, hovedkontor-, konsulentj. - nace2 == '72', # Forskning og utviklingsarbeid - np.isin(nace2, ['73', '74', '75']), # Faglig, vitenskapelig og teknisk tjenesteyting ellers - np.logical_and(nace2 >= '77', nace2 <= '82'), # Forretningsmessig tjenesteyting ellers - nace2 == '84', # Off.adm., forsvar, sosialforsikring - nace2 == '85', # Undervining - nace2 == '86', # Helsetjenester - np.isin(nace2, ['87', '88']), # Pleie og omsorg; Fritids- og sportsaktiviteter - np.logical_and(nace2 >= '90', nace2 <= '93'), # Kultur, underholdning og fritid - np.isin(nace2, ['94', '95', '96']), # Annen tjenesteyting - nace2 == '97', # Lønnet husarbeid i private husholdninger - nace2 == '99', # Internasjonale organisasjoner + np.isin( + nace2, ["01", "02", "03"] + ), # Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass + np.logical_or( + np.isin(nace2, ["05", "07", "08"]), nace3 == "099" + ), # Annen utvinning; Bygging av skip og båter; Reparasjon og installasjon av maskiner og utstyr; Uoppgitt utvinning + np.logical_or( + nace2 == "06", nace3 == "091" + ), # Olje- og gassutvinning; Uoppgitt utvinning av petroleum + np.isin(nace2, ["10", "11", "12"]), # Næringsmiddel-,drikkev.,tobakkind. + np.isin(nace2, ["13", "14", "15"]), # Tekstil-,bekledn.-,lærvareind. + np.isin(nace2, ["16", "17"]), # Trelast- og trevareind. + nace2 == "18", # Trykking, grafisk industri + np.isin( + nace2, ["19", "20", "21"] + ), # Petrolieum, kull, kjemisk og farmasøytisk industri + np.isin(nace2, ["22", "23"]), # Gummivare-, plast-,mineralproduktind. + nace2 == "24", # Metallindustri + nace2 == "25", # Metallvareindustri + np.isin(nace2, ["26", "27"]), # Data- og elektronisk industri + nace2 == "28", # Maskinindustri + np.logical_or( + np.isin(nace2, ["29", "33"]), np.logical_and(nace3 >= "302", nace3 <= "309") + ), # Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler + nace3 == "301", # Produksjon av skip og båter, inkl. oljeplattformer + np.isin(nace2, ["31", "32"]), # Møbel og annen industri + nace2 == "35", # Elekstrisitet, gass, damp, varmtvann + np.logical_and(nace2 >= "36", nace2 <= "39"), # Vann, avløp og renovasjon + np.isin(nace2, ["41", "42", "43"]), # Bygge- og anleggsvirksomhet + nace2 == "45", # Motorvognrep og -handel + nace2 == "46", # Agentur- og engroshandel + nace2 == "47", # Detaljhandel, unntatt motorvogner + nace2 == "49", # Landtransport og rørtransport + nace2 == "50", # Sjøfart + nace2 == "51", # Lufttransport + nace2 == "52", # Lagring og tjenester tilknyttet transport + nace2 == "53", # Posttjenester + nace2 == "55", # Overnattingsvirksomhet + nace2 == "56", # Serveringsvirksomhet + np.isin(nace2, ["58", "59", "60"]), # Forlag, film-, TV-pr, kringkasting + np.isin(nace2, ["61", "62", "63"]), # IKT-virksomhet + nace2 == "64", # Finansieringsvirksomhet (bank, m.m.) + nace2 == "65", # Forsikringsvirksomhet og pensjonskasser + nace2 == "66", # Finansiell tjenesteyting + nace2 == "68", # Omsetning og drift av fast eiendom + np.isin(nace2, ["69", "70", "71"]), # Juridisk-, hovedkontor-, konsulentj. + nace2 == "72", # Forskning og utviklingsarbeid + np.isin( + nace2, ["73", "74", "75"] + ), # Faglig, vitenskapelig og teknisk tjenesteyting ellers + np.logical_and( + nace2 >= "77", nace2 <= "82" + ), # Forretningsmessig tjenesteyting ellers + nace2 == "84", # Off.adm., forsvar, sosialforsikring + nace2 == "85", # Undervining + nace2 == "86", # Helsetjenester + np.isin(nace2, ["87", "88"]), # Pleie og omsorg; Fritids- og sportsaktiviteter + np.logical_and(nace2 >= "90", nace2 <= "93"), # Kultur, underholdning og fritid + np.isin(nace2, ["94", "95", "96"]), # Annen tjenesteyting + nace2 == "97", # Lønnet husarbeid i private husholdninger + nace2 == "99", # Internasjonale organisasjoner ] # Define the group labels with string keys groups = { - '01': 'Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass', - '02': 'Annen utvinning; Bygging av skip og båter; Reparasjon og installasjon av maskiner og utstyr; Uoppgitt utvinning', - '03': 'Olje- og gassutvinning; Uoppgitt utvinning av petroleum', - '04': 'Næringsmiddel-,drikkev.,tobakkind.', - '05': 'Tekstil-,bekledn.-,lærvareind.', - '06': 'Trelast- og trevareind.', - '07': 'Trykking, grafisk industri', - '08': 'Petrolieum, kull, kjemisk og farmasøytisk industri', - '09': 'Gummivare-, plast-,mineralproduktind.', - '10': 'Metallindustri', - '11': 'Metallvareindustri', - '12': 'Data- og elektronisk industri', - '13': 'Maskinindustri', - '14': 'Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler', - '15': 'Produksjon av skip og båter, inkl. oljeplattformer', - '16': 'Møbel og annen industri', - '17': 'Elekstrisitet, gass, damp, varmtvann', - '18': 'Vann, avløp og renovasjon', - '19': 'Bygge- og anleggsvirksomhet', - '20': 'Motorvognrep og -handel', - '21': 'Agentur- og engroshandel', - '22': 'Detaljhandel, unntatt motorvogner', - '23': 'Landtransport og rørtransport', - '24': 'Sjøfart', - '25': 'Lufttransport', - '26': 'Lagring og tjenester tilknyttet transport', - '27': 'Posttjenester', - '28': 'Overnattingsvirksomhet', - '29': 'Serveringsvirksomhet', - '30': 'Forlag, film-, TV-pr, kringkasting', - '31': 'IKT-virksomhet', - '32': 'Finansieringsvirksomhet (bank, m.m.)', - '33': 'Forsikringsvirksomhet og pensjonskasser', - '34': 'Finansiell tjenesteyting', - '35': 'Omsetning og drift av fast eiendom', - '36': 'Juridisk-, hovedkontor-, konsulentj.', - '37': 'Forskning og utviklingsarbeid', - '38': 'Faglig, vitenskapelig og teknisk tjenesteyting ellers', - '39': 'Forretningsmessig tjenesteyting ellers', - '40': 'Off.adm., forsvar, sosialforsikring', - '41': 'Undervisning', - '42': 'Helsetjenester', - '43': 'Pleie og omsorg; Fritids- og sportsaktiviteter', - '44': 'Kultur, underholdning og fritid', - '45': 'Annen tjenesteyting', - '46': 'Lønnet husarbeid i private husholdninger', - '47': 'Internasjonale organisasjoner' + "01": "Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass", + "02": "Annen utvinning; Bygging av skip og båter; Reparasjon og installasjon av maskiner og utstyr; Uoppgitt utvinning", + "03": "Olje- og gassutvinning; Uoppgitt utvinning av petroleum", + "04": "Næringsmiddel-,drikkev.,tobakkind.", + "05": "Tekstil-,bekledn.-,lærvareind.", + "06": "Trelast- og trevareind.", + "07": "Trykking, grafisk industri", + "08": "Petrolieum, kull, kjemisk og farmasøytisk industri", + "09": "Gummivare-, plast-,mineralproduktind.", + "10": "Metallindustri", + "11": "Metallvareindustri", + "12": "Data- og elektronisk industri", + "13": "Maskinindustri", + "14": "Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler", + "15": "Produksjon av skip og båter, inkl. oljeplattformer", + "16": "Møbel og annen industri", + "17": "Elekstrisitet, gass, damp, varmtvann", + "18": "Vann, avløp og renovasjon", + "19": "Bygge- og anleggsvirksomhet", + "20": "Motorvognrep og -handel", + "21": "Agentur- og engroshandel", + "22": "Detaljhandel, unntatt motorvogner", + "23": "Landtransport og rørtransport", + "24": "Sjøfart", + "25": "Lufttransport", + "26": "Lagring og tjenester tilknyttet transport", + "27": "Posttjenester", + "28": "Overnattingsvirksomhet", + "29": "Serveringsvirksomhet", + "30": "Forlag, film-, TV-pr, kringkasting", + "31": "IKT-virksomhet", + "32": "Finansieringsvirksomhet (bank, m.m.)", + "33": "Forsikringsvirksomhet og pensjonskasser", + "34": "Finansiell tjenesteyting", + "35": "Omsetning og drift av fast eiendom", + "36": "Juridisk-, hovedkontor-, konsulentj.", + "37": "Forskning og utviklingsarbeid", + "38": "Faglig, vitenskapelig og teknisk tjenesteyting ellers", + "39": "Forretningsmessig tjenesteyting ellers", + "40": "Off.adm., forsvar, sosialforsikring", + "41": "Undervisning", + "42": "Helsetjenester", + "43": "Pleie og omsorg; Fritids- og sportsaktiviteter", + "44": "Kultur, underholdning og fritid", + "45": "Annen tjenesteyting", + "46": "Lønnet husarbeid i private husholdninger", + "47": "Internasjonale organisasjoner", } # Determine and apply the selected format based on the labels parameter - if labels == 'combined': + if labels == "combined": combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default='99 Uoppgitt') + return np.select(conditions, combined_labels, default="99 Uoppgitt") elif labels: - return np.select(conditions, list(groups.values()), default='Uoppgitt') + return np.select(conditions, list(groups.values()), default="Uoppgitt") else: - return np.select(conditions, list(groups.keys()), default='99') + return np.select(conditions, list(groups.keys()), default="99") -def nace_sn07_17grp(nace_sn07: pd.Series) -> pd.Series: - """ - Categorize a pandas Series of NACE-codes (SN07) into predefined groups. + +def nace_sn07_17grp(nace_sn07: pd.Series, labels=True) -> pd.Series: + """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: - nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. - labels (bool, optional): Whether to return group labels or keys. Default is True. + nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. + labels (bool, optional): Whether to return group labels or keys. Default is True. Returns: - pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. + pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. """ - # Removes labels (if any) nace_str2 = nace_sn07.str[:2] - + # Counts the number of unique groups of nace codes n_unique_grp = len(nace_str2.unique()) - + # Check if nace codes are already grouped into 47-groups if n_unique_grp > 48: - print(f"Warning: There are {n_unique_grp} unique industry divisions on 2-number level. The function first groups the input into the 47 groups standard.") + print( + f"Warning: There are {n_unique_grp} unique industry divisions on 2-number level. The function first groups the input into the 47 groups standard." + ) nace_str2 = nace_sn07_47grp(nace_sn07, labels=False) # Define the conditions for each group conditions = [ - nace_str2 == '01', # 01-03 Jordbruk, skogbruk og fiske - np.logical_and(nace_str2 >= '01', nace_str2 <= '03'), # 05-09 Bergverksdrift og utvinning - np.logical_and(nace_str2 >= '04', nace_str2 <= '16'), # 10-33 Industri - np.logical_and(nace_str2 >= '17', nace_str2 <= '18'), # 35-39 Elektrisitet, vann og renovasjon - nace_str2 == '19', # 41-43 Bygge- og anleggsvirksomhet - np.logical_and(nace_str2 >= '20', nace_str2 <= '22'), # 45-47 Varehandel, reparasjon av motorvogner - np.logical_and(nace_str2 >= '23', nace_str2 <= '27'), # 49-53 Transport og lagring - np.logical_and(nace_str2 >= '28', nace_str2 <= '29'), # 55-56 Overnattings- og serveringsvirksomhet - np.logical_and(nace_str2 >= '30', nace_str2 <= '31'), # 58-63 Informasjon og kommunikasjon - np.logical_and(nace_str2 >= '32', nace_str2 <= '34'), # 64-66 Finansiering og forsikring - np.logical_and(nace_str2 >= '35', nace_str2 <= '38'), # 68-75 Teknisk tjenesteyting, eiendomsdrift - nace_str2 == '39', # 77-82 Forretningsmessig tjenesteyting - nace_str2 == '40', # 84 Off.adm., forsvar, sosialforsikring - nace_str2 == '41', # 85 Undervisning - np.logical_and(nace_str2 >= '42', nace_str2 <= '43'), # 86-88 Helse- og sosialtjenester - np.logical_and(nace_str2 >= '44', nace_str2 <= '47') # 90-99 Personlig tjenesteyting + nace_str2 == "01", # 01-03 Jordbruk, skogbruk og fiske + np.logical_and( + nace_str2 >= "01", nace_str2 <= "03" + ), # 05-09 Bergverksdrift og utvinning + np.logical_and(nace_str2 >= "04", nace_str2 <= "16"), # 10-33 Industri + np.logical_and( + nace_str2 >= "17", nace_str2 <= "18" + ), # 35-39 Elektrisitet, vann og renovasjon + nace_str2 == "19", # 41-43 Bygge- og anleggsvirksomhet + np.logical_and( + nace_str2 >= "20", nace_str2 <= "22" + ), # 45-47 Varehandel, reparasjon av motorvogner + np.logical_and( + nace_str2 >= "23", nace_str2 <= "27" + ), # 49-53 Transport og lagring + np.logical_and( + nace_str2 >= "28", nace_str2 <= "29" + ), # 55-56 Overnattings- og serveringsvirksomhet + np.logical_and( + nace_str2 >= "30", nace_str2 <= "31" + ), # 58-63 Informasjon og kommunikasjon + np.logical_and( + nace_str2 >= "32", nace_str2 <= "34" + ), # 64-66 Finansiering og forsikring + np.logical_and( + nace_str2 >= "35", nace_str2 <= "38" + ), # 68-75 Teknisk tjenesteyting, eiendomsdrift + nace_str2 == "39", # 77-82 Forretningsmessig tjenesteyting + nace_str2 == "40", # 84 Off.adm., forsvar, sosialforsikring + nace_str2 == "41", # 85 Undervisning + np.logical_and( + nace_str2 >= "42", nace_str2 <= "43" + ), # 86-88 Helse- og sosialtjenester + np.logical_and( + nace_str2 >= "44", nace_str2 <= "47" + ), # 90-99 Personlig tjenesteyting ] # Define the group labels with string keys groups = { - '01-03': 'Jordbruk, skogbruk og fiske', - '05-09': 'Bergverksdrift og utvinning', - '10-33': 'Industri', - '35-39': 'Elektrisitet, vann og renovasjon', - '41-43': 'Bygge- og anleggsvirksomhet', - '45-47': 'Varehandel, reparasjon av motorvogner', - '49-53': 'Transport og lagring', - '55-56': 'Overnattings- og serveringsvirksomhet', - '58-63': 'Informasjon og kommunikasjon', - '64-66': 'Finansiering og forsikring', - '68-75': 'Teknisk tjenesteyting, eiendomsdrift', - '77-82': 'Forretningsmessig tjenesteyting', - '84': 'Off.adm., forsvar, sosialforsikring', - '85': 'Undervisning', - '86-88': 'Helse- og sosialtjenester', - '90-99': 'Personlig tjenesteyting' + "01-03": "Jordbruk, skogbruk og fiske", + "05-09": "Bergverksdrift og utvinning", + "10-33": "Industri", + "35-39": "Elektrisitet, vann og renovasjon", + "41-43": "Bygge- og anleggsvirksomhet", + "45-47": "Varehandel, reparasjon av motorvogner", + "49-53": "Transport og lagring", + "55-56": "Overnattings- og serveringsvirksomhet", + "58-63": "Informasjon og kommunikasjon", + "64-66": "Finansiering og forsikring", + "68-75": "Teknisk tjenesteyting, eiendomsdrift", + "77-82": "Forretningsmessig tjenesteyting", + "84": "Off.adm., forsvar, sosialforsikring", + "85": "Undervisning", + "86-88": "Helse- og sosialtjenester", + "90-99": "Personlig tjenesteyting", } # Determine and apply the selected format based on the labels parameter - if labels == 'combined': + if labels == "combined": combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default='999 Uoppgitt') + return np.select(conditions, combined_labels, default="999 Uoppgitt") elif labels: - return np.select(conditions, list(groups.values()), default='Uoppgitt') + return np.select(conditions, list(groups.values()), default="Uoppgitt") else: - return np.select(conditions, list(groups.keys()), default='999') - + return np.select(conditions, list(groups.keys()), default="999") + + def sektor2_grp(sektor: pd.Series, undersektor: pd.Series, labels=True) -> pd.Series: - """ - Categorize a pandas Series of sectors and subsectors into predefined groups. + """Categorize a pandas Series of sectors and subsectors into predefined groups. Parameters: - sektor (pd.Series): A pandas Series containing the sector codes. - undersektor (pd.Series): A pandas Series containing the subsector codes. - labels (bool, optional): Whether to return group labels or keys. Default is True. + sektor (pd.Series): A pandas Series containing the sector codes. + undersektor (pd.Series): A pandas Series containing the subsector codes. + labels (bool, optional): Whether to return group labels or keys. Default is True. Returns: - pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. + pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. """ - # Define the conditions for each group conditions = [ - sektor == '6100', - np.logical_and(sektor == '6500', undersektor != '007'), - np.logical_and(sektor == '6500', undersektor == '007'), - sektor == '1510', - sektor == '1520', + sektor == "6100", + np.logical_and(sektor == "6500", undersektor != "007"), + np.logical_and(sektor == "6500", undersektor == "007"), + sektor == "1510", + sektor == "1520", ] - + groups = { - '110': 'Statlig forvaltning', - '550': 'Kommunal forvaltning', - '510': 'Fylkeskommunal forvaltning', - '660': 'Kommunale foretak med ubegrenset ansvar', - '680': 'Kommunalt eide aksjeselskaper m.v.' + "110": "Statlig forvaltning", + "550": "Kommunal forvaltning", + "510": "Fylkeskommunal forvaltning", + "660": "Kommunale foretak med ubegrenset ansvar", + "680": "Kommunalt eide aksjeselskaper m.v.", } # Determine and apply the selected format based on the labels parameter - if labels == 'combined': + if labels == "combined": combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default='999 Uoppgitt') + return np.select(conditions, combined_labels, default="999 Uoppgitt") elif labels: - return np.select(conditions, list(groups.values()), default='Uoppgitt') + return np.select(conditions, list(groups.values()), default="Uoppgitt") else: - return np.select(conditions, list(groups.keys()), default='999') - - def virk_str_8grp(ansatte: pd.Series, labels=True) -> pd.Series: - """ - Categorize a pandas Series of employee counts into predefined groups. + return np.select(conditions, list(groups.keys()), default="999") + + +def virk_str_8grp(ansatte: pd.Series, labels=True) -> pd.Series: + """Categorize a pandas Series of employee counts into predefined groups. Parameters: - ansatte (pd.Series): A pandas Series containing the employee counts. - labels (bool, optional): Whether to return group labels or keys. Default is True. + ansatte (pd.Series): A pandas Series containing the employee counts. + labels (bool, optional): Whether to return group labels or keys. Default is True. Returns: - pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. + pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. """ - # Define the conditions for each group conditions = [ - ansatte == 0, # No employees - np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees - np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees - np.logical_and(ansatte >= 10, ansatte <= 19), # 10-19 employees - np.logical_and(ansatte >= 20, ansatte <= 49), # 20-49 employees - np.logical_and(ansatte >= 50, ansatte <= 99), # 50-99 employees + ansatte == 0, # No employees + np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees + np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees + np.logical_and(ansatte >= 10, ansatte <= 19), # 10-19 employees + np.logical_and(ansatte >= 20, ansatte <= 49), # 20-49 employees + np.logical_and(ansatte >= 50, ansatte <= 99), # 50-99 employees np.logical_and(ansatte >= 100, ansatte <= 249), # 100-249 employees - ansatte >= 250, # 250 employees or more + ansatte >= 250, # 250 employees or more ] # Define the group labels with string keys groups = { - '1': 'Ingen ansatte', - '2': '1-4 ansatte', - '3': '5-9 ansatte', - '4': '10-19 ansatte', - '5': '20-49 ansatte', - '6': '50-99 ansatte', - '7': '100-249 ansatte', - '8': '250 ansatte og over' + "1": "Ingen ansatte", + "2": "1-4 ansatte", + "3": "5-9 ansatte", + "4": "10-19 ansatte", + "5": "20-49 ansatte", + "6": "50-99 ansatte", + "7": "100-249 ansatte", + "8": "250 ansatte og over", } - + # Determine and apply the selected format based on the labels parameter - if labels == 'combined': + if labels == "combined": combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default='99 Uoppgitt') + return np.select(conditions, combined_labels, default="99 Uoppgitt") elif labels: - return np.select(conditions, list(groups.values()), default='Uoppgitt') + return np.select(conditions, list(groups.values()), default="Uoppgitt") else: - return np.select(conditions, list(groups.keys()), default='99') \ No newline at end of file + return np.select(conditions, list(groups.keys()), default="99") From c64576d3582fa5bff2486d629e485cfdb5de6330 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Tue, 5 Dec 2023 15:23:14 +0100 Subject: [PATCH 03/49] nox run --- poetry.lock | 30 +++++++++++++++++++++++-- pyproject.toml | 1 + src/ssb_arbmark_fagfunksjoner/groups.py | 12 +++++----- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7e6dfa3..a0a6421 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "alabaster" @@ -1326,6 +1326,21 @@ sql-other = ["SQLAlchemy (>=1.4.36)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.8.0)"] +[[package]] +name = "pandas-stubs" +version = "2.1.1.230928" +description = "Type annotations for pandas" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas_stubs-2.1.1.230928-py3-none-any.whl", hash = "sha256:992d97159e054ca3175ebe8321ac5616cf6502dd8218b03bb0eaf3c4f6939037"}, + {file = "pandas_stubs-2.1.1.230928.tar.gz", hash = "sha256:ce1691c71c5d67b8f332da87763f7f54650f46895d99964d588c3a5d79e2cacc"}, +] + +[package.dependencies] +numpy = {version = ">=1.26.0", markers = "python_version < \"3.13\""} +types-pytz = ">=2022.1.1" + [[package]] name = "parso" version = "0.8.3" @@ -2342,6 +2357,17 @@ files = [ {file = "types_python_dateutil-2.8.19.14-py3-none-any.whl", hash = "sha256:f977b8de27787639986b4e28963263fd0e5158942b3ecef91b9335c130cb1ce9"}, ] +[[package]] +name = "types-pytz" +version = "2023.3.1.1" +description = "Typing stubs for pytz" +optional = false +python-versions = "*" +files = [ + {file = "types-pytz-2023.3.1.1.tar.gz", hash = "sha256:cc23d0192cd49c8f6bba44ee0c81e4586a8f30204970fc0894d209a6b08dab9a"}, + {file = "types_pytz-2023.3.1.1-py3-none-any.whl", hash = "sha256:1999a123a3dc0e39a2ef6d19f3f8584211de9e6a77fe7a0259f04a524e90a5cf"}, +] + [[package]] name = "typing-extensions" version = "4.8.0" @@ -2470,4 +2496,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "1027d02cb93ebdc4ee471c8377b8da7b7464d099b8492c5e7250fbbdb2f341a1" +content-hash = "c71ee25bdad9a15471d2f366fe7868e4a4916e807e29f447ce2788f9ff5d7bf4" diff --git a/pyproject.toml b/pyproject.toml index 17bfc6f..e25b16d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ nox = "^2023.4.22" pandas = "^2.1.3" numpy = "^1.26.2" holidays = "^0.37" +pandas-stubs = "^2.1.1.230928" [tool.poetry.group.dev.dependencies] pygments = ">=2.10.0" diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index e4144f1..fb4cd60 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -7,7 +7,7 @@ import pandas as pd -def alder_grp(alder: pd.Series, labels=True) -> pd.Series: +def alder_grp(alder: pd.Series, labels: bool = True) -> pd.Series: """Categorize a pandas Series of person ages into predefined groups. Parameters: @@ -66,7 +66,7 @@ def alder_grp(alder: pd.Series, labels=True) -> pd.Series: return np.select(conditions, results, default=".") -def nace_sn07_47grp(nace_sn07: pd.Series, labels=True) -> pd.Series: +def nace_sn07_47grp(nace_sn07: pd.Series, labels: bool = True) -> pd.Series: """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: @@ -209,7 +209,7 @@ def nace_sn07_47grp(nace_sn07: pd.Series, labels=True) -> pd.Series: return np.select(conditions, list(groups.keys()), default="99") -def nace_sn07_17grp(nace_sn07: pd.Series, labels=True) -> pd.Series: +def nace_sn07_17grp(nace_sn07: pd.Series, labels: bool = True) -> pd.Series: """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: @@ -302,7 +302,9 @@ def nace_sn07_17grp(nace_sn07: pd.Series, labels=True) -> pd.Series: return np.select(conditions, list(groups.keys()), default="999") -def sektor2_grp(sektor: pd.Series, undersektor: pd.Series, labels=True) -> pd.Series: +def sektor2_grp( + sektor: pd.Series, undersektor: pd.Series, labels: bool = True +) -> pd.Series: """Categorize a pandas Series of sectors and subsectors into predefined groups. Parameters: @@ -340,7 +342,7 @@ def sektor2_grp(sektor: pd.Series, undersektor: pd.Series, labels=True) -> pd.Se return np.select(conditions, list(groups.keys()), default="999") -def virk_str_8grp(ansatte: pd.Series, labels=True) -> pd.Series: +def virk_str_8grp(ansatte: pd.Series, labels: bool = True) -> pd.Series: """Categorize a pandas Series of employee counts into predefined groups. Parameters: From 72767f16fc8765e65f523cb33ddc39a7a1d1e195 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Tue, 5 Dec 2023 15:29:01 +0100 Subject: [PATCH 04/49] Resolve conflict --- .github/workflows/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/constraints.txt b/.github/workflows/constraints.txt index 791bf6a..37bb30b 100644 --- a/.github/workflows/constraints.txt +++ b/.github/workflows/constraints.txt @@ -2,4 +2,4 @@ pip==23.3.1 nox==2023.4.22 nox-poetry==1.0.3 poetry==1.7.1 -virtualenv==20.24.7 +virtualenv==20.25.0 From 04b8ba7c620ea87dd6ad5822553858e59a298323 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Tue, 5 Dec 2023 16:06:38 +0100 Subject: [PATCH 05/49] dtype specifications --- src/ssb_arbmark_fagfunksjoner/functions.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 2e713a8..7e06fab 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -51,7 +51,7 @@ def example_function(number1: int, number2: int) -> str: import pandas as pd -def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: +def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Counts the number of workdays between pairs of dates in given series. This function calculates the number of workdays for each pair of start and end dates @@ -64,7 +64,7 @@ def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: to_dates (pd.Series): A pandas Series containing the end dates of the periods. Returns: - np.ndarray: An array containing the number of workdays for each date pair. + pd.Series: A Pandas Series containing the number of workdays for each date pair. Raises: ValueError: If the length of the calculated workdays list does not match the number of date pairs. @@ -86,9 +86,9 @@ def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: max_year = np.max(to_years) if min_year == max_year: - norwegian_holidays = holidays.NO(years=min_year) + norwegian_holidays = holidays.Norway(years=min_year) else: - norwegian_holidays = holidays.NO(years=list(range(min_year, max_year + 1))) + norwegian_holidays = holidays.Norway(years=range(min_year, max_year + 1)) # Convert the holiday dates to a numpy array of datetime64 objects holiday_dates = np.array(sorted(norwegian_holidays.keys()), dtype="datetime64[D]") @@ -124,10 +124,10 @@ def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: "Unexpected error: length of workdays_list does not match the number of date pairs." ) - return np.array(workdays_list) + return pd.Series(workdays_list, dtype=int) -def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple: +def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[int, int]: """Given a year and a quarter, this function calculates the first and last dates of the specified quarter using pandas. Args: @@ -155,7 +155,7 @@ def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple: start_date_str = start_date.strftime("%Y-%m-%d") end_date_str = end_date.strftime("%Y-%m-%d") - return start_date_str, end_date_str + return tuple(start_date_str, end_date_str) def indicate_merge( @@ -395,7 +395,7 @@ def proc_sums( return sum_df -def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: +def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Determines if the reference day falls between given date ranges. This function checks if the 16th day of each month (reference day) is @@ -454,7 +454,7 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> np.ndarray: result = np.logical_and(from_dates <= ref_days, ref_days <= to_dates) # Return the result as an array of boolean values - return result + return pd.Series(result, dtype=bool) def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: @@ -513,4 +513,4 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: result = np.logical_and(from_weeks <= ref_weeks, ref_weeks <= to_weeks) # Return the result as a series of boolean values - return result + return pd.Series(result, dtype=bool) From 69afa383c381326b9ede01a13ddeab19a075e78d Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Wed, 6 Dec 2023 16:47:57 +0100 Subject: [PATCH 06/49] Fixed non-overlapping equality in groups --- src/ssb_arbmark_fagfunksjoner/groups.py | 78 +++++++++++++------------ 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index fb4cd60..73a2c97 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -7,16 +7,16 @@ import pandas as pd -def alder_grp(alder: pd.Series, labels: bool = True) -> pd.Series: +def alder_grp(alder: pd.Series, display: str = "label") -> pd.Series: """Categorize a pandas Series of person ages into predefined groups. Parameters: alder (pd.Series): A pandas Series containing the person ages. - labels (bool, optional): If True, returns group labels; if False, returns keys; - if 'combined', returns a combination of keys and labels. + display (str, optional): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: - pd.Series: A pandas Series where the original person ages are replaced by group labels or keys. + pd.Series: A pandas Series where the original person ages are replaced by group labels, keys, or a combination. """ # Define the conditions for each group conditions = [ @@ -54,24 +54,25 @@ def alder_grp(alder: pd.Series, labels: bool = True) -> pd.Series: "14": "69 år", } - # Determine the format of the results based on the labels parameter - if labels == "combined": - results = [f"{key} {value}" for key, value in groups.items()] - elif labels: + # Determine the format of the results based on the display parameter + if display == "label": results = list(groups.values()) - else: + elif display == "number": results = list(groups.keys()) + else: + results = [f"{key} {value}" for key, value in groups.items()] # Apply the selected format to the series return np.select(conditions, results, default=".") -def nace_sn07_47grp(nace_sn07: pd.Series, labels: bool = True) -> pd.Series: +def nace_sn07_47grp(nace_sn07: pd.Series, display: str = "label") -> pd.Series: """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. - labels (bool, optional): Whether to return group labels or keys. Default is True. + display (str, optional): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. @@ -200,21 +201,22 @@ def nace_sn07_47grp(nace_sn07: pd.Series, labels: bool = True) -> pd.Series: } # Determine and apply the selected format based on the labels parameter - if labels == "combined": - combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="99 Uoppgitt") - elif labels: + if display == "label": return np.select(conditions, list(groups.values()), default="Uoppgitt") - else: + elif display == "number": return np.select(conditions, list(groups.keys()), default="99") + else: + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default="99 Uoppgitt") -def nace_sn07_17grp(nace_sn07: pd.Series, labels: bool = True) -> pd.Series: +def nace_sn07_17grp(nace_sn07: pd.Series, display: str = "label") -> pd.Series: """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. - labels (bool, optional): Whether to return group labels or keys. Default is True. + display (str, optional): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. @@ -293,24 +295,25 @@ def nace_sn07_17grp(nace_sn07: pd.Series, labels: bool = True) -> pd.Series: } # Determine and apply the selected format based on the labels parameter - if labels == "combined": - combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="999 Uoppgitt") - elif labels: + if display == "label": return np.select(conditions, list(groups.values()), default="Uoppgitt") - else: + elif display == "number": return np.select(conditions, list(groups.keys()), default="999") + else: + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default="999 Uoppgitt") def sektor2_grp( - sektor: pd.Series, undersektor: pd.Series, labels: bool = True + sektor: pd.Series, undersektor: pd.Series, display: str = "label" ) -> pd.Series: """Categorize a pandas Series of sectors and subsectors into predefined groups. Parameters: sektor (pd.Series): A pandas Series containing the sector codes. undersektor (pd.Series): A pandas Series containing the subsector codes. - labels (bool, optional): Whether to return group labels or keys. Default is True. + display (str, optional): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. @@ -333,21 +336,22 @@ def sektor2_grp( } # Determine and apply the selected format based on the labels parameter - if labels == "combined": - combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="999 Uoppgitt") - elif labels: + if display == "label": return np.select(conditions, list(groups.values()), default="Uoppgitt") - else: + elif display == "number": return np.select(conditions, list(groups.keys()), default="999") + else: + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default="999 Uoppgitt") -def virk_str_8grp(ansatte: pd.Series, labels: bool = True) -> pd.Series: +def virk_str_8grp(ansatte: pd.Series, display: str = "label") -> pd.Series: """Categorize a pandas Series of employee counts into predefined groups. Parameters: ansatte (pd.Series): A pandas Series containing the employee counts. - labels (bool, optional): Whether to return group labels or keys. Default is True. + display (str, optional): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. @@ -377,10 +381,10 @@ def virk_str_8grp(ansatte: pd.Series, labels: bool = True) -> pd.Series: } # Determine and apply the selected format based on the labels parameter - if labels == "combined": - combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="99 Uoppgitt") - elif labels: + if display == "label": return np.select(conditions, list(groups.values()), default="Uoppgitt") - else: + elif display == "number": return np.select(conditions, list(groups.keys()), default="99") + else: + combined_labels = [f"{key} {value}" for key, value in groups.items()] + return np.select(conditions, combined_labels, default="99 Uoppgitt") From 72721763bed4d5850984783fd4d21bd83dc99867 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Wed, 6 Dec 2023 17:01:10 +0100 Subject: [PATCH 07/49] Fixed missing type parameters for Series and reformatted --- src/ssb_arbmark_fagfunksjoner/functions.py | 12 +++++++++--- src/ssb_arbmark_fagfunksjoner/groups.py | 16 ++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 7e06fab..4ea48c9 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -51,7 +51,9 @@ def example_function(number1: int, number2: int) -> str: import pandas as pd -def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: +def count_workdays( + from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] +) -> pd.Series[int]: """Counts the number of workdays between pairs of dates in given series. This function calculates the number of workdays for each pair of start and end dates @@ -395,7 +397,9 @@ def proc_sums( return sum_df -def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: +def ref_day( + from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] +) -> pd.Series[bool]: """Determines if the reference day falls between given date ranges. This function checks if the 16th day of each month (reference day) is @@ -457,7 +461,9 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: return pd.Series(result, dtype=bool) -def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: +def ref_week( + from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] +) -> pd.Series[bool]: """Determines if any date in each date range falls in the reference week. This function checks if any date between the 'from_dates' and 'to_dates' diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 73a2c97..23e0118 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -7,7 +7,7 @@ import pandas as pd -def alder_grp(alder: pd.Series, display: str = "label") -> pd.Series: +def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: """Categorize a pandas Series of person ages into predefined groups. Parameters: @@ -66,7 +66,9 @@ def alder_grp(alder: pd.Series, display: str = "label") -> pd.Series: return np.select(conditions, results, default=".") -def nace_sn07_47grp(nace_sn07: pd.Series, display: str = "label") -> pd.Series: +def nace_sn07_47grp( + nace_sn07: pd.Series[str], display: str = "label" +) -> pd.Series[str]: """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: @@ -210,7 +212,9 @@ def nace_sn07_47grp(nace_sn07: pd.Series, display: str = "label") -> pd.Series: return np.select(conditions, combined_labels, default="99 Uoppgitt") -def nace_sn07_17grp(nace_sn07: pd.Series, display: str = "label") -> pd.Series: +def nace_sn07_17grp( + nace_sn07: pd.Series[str], display: str = "label" +) -> pd.Series[str]: """Categorize a pandas Series of NACE-codes (SN07) into predefined groups. Parameters: @@ -305,8 +309,8 @@ def nace_sn07_17grp(nace_sn07: pd.Series, display: str = "label") -> pd.Series: def sektor2_grp( - sektor: pd.Series, undersektor: pd.Series, display: str = "label" -) -> pd.Series: + sektor: pd.Series[str], undersektor: pd.Series[str], display: str = "label" +) -> pd.Series[str]: """Categorize a pandas Series of sectors and subsectors into predefined groups. Parameters: @@ -345,7 +349,7 @@ def sektor2_grp( return np.select(conditions, combined_labels, default="999 Uoppgitt") -def virk_str_8grp(ansatte: pd.Series, display: str = "label") -> pd.Series: +def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[str]: """Categorize a pandas Series of employee counts into predefined groups. Parameters: From 696fb870c677e1334d6f628a69d376d374197dbd Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 08:56:05 +0100 Subject: [PATCH 08/49] Changed non-subscriptable type to datetime and sort imports --- src/ssb_arbmark_fagfunksjoner/functions.py | 38 ++++------------------ 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 4ea48c9..2121b0d 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -6,38 +6,12 @@ """ -def example_function(number1: int, number2: int) -> str: - """Compare two integers. - - This is merely an example function can be deleted. It is used to show and test generating - documentation from code, type hinting, testing, and testing examples - in the code. - - - Args: - number1: The first number. - number2: The second number, which will be compared to number1. - - Returns: - A string describing which number is the greatest. - - Examples: - Examples should be written in doctest format, and should illustrate how - to use the function. - - >>> example_function(1, 2) - 1 is less than 2 - - """ - if number1 < number2: - return f"{number1} is less than {number2}" - - return f"{number1} is greater than or equal to {number2}" - - # Itertools for functions creating iterators for efficient looping import itertools +# Datetime to handle datetime objects inside Series +from datetime import datetime + # Optional for explicit type hint from typing import Optional @@ -52,7 +26,7 @@ def example_function(number1: int, number2: int) -> str: def count_workdays( - from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] + from_dates: pd.Series[datetime], to_dates: pd.Series[datetime] ) -> pd.Series[int]: """Counts the number of workdays between pairs of dates in given series. @@ -398,7 +372,7 @@ def proc_sums( def ref_day( - from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] + from_dates: pd.Series[datetime], to_dates: pd.Series[datetime] ) -> pd.Series[bool]: """Determines if the reference day falls between given date ranges. @@ -462,7 +436,7 @@ def ref_day( def ref_week( - from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] + from_dates: pd.Series[datetime], to_dates: pd.Series[datetime] ) -> pd.Series[bool]: """Determines if any date in each date range falls in the reference week. From 53cb822019e05648ec2ec8beb4f44fd9a44f4e80 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 09:02:37 +0100 Subject: [PATCH 09/49] Installed datetime --- poetry.lock | 70 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index a0a6421..7e255ce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -540,6 +540,21 @@ files = [ {file = "darglint-1.8.1.tar.gz", hash = "sha256:080d5106df149b199822e7ee7deb9c012b49891538f14a11be681044f0bb20da"}, ] +[[package]] +name = "datetime" +version = "5.3" +description = "This package provides a DateTime data type, as known from Zope. Unless you need to communicate with Zope APIs, you're probably better off using Python's built-in datetime module." +optional = false +python-versions = ">=3.7" +files = [ + {file = "DateTime-5.3-py3-none-any.whl", hash = "sha256:05669f035ec7ccb24443bda8572078c381edf79c813186f627e9e8e5c6e8e6e6"}, + {file = "DateTime-5.3.tar.gz", hash = "sha256:4762a9b371ce696b7ffb82b869d2906fad94fdecdb1685bfbec1e2d8f37e5a98"}, +] + +[package.dependencies] +pytz = "*" +"zope.interface" = "*" + [[package]] name = "debugpy" version = "1.8.0" @@ -2493,7 +2508,60 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[[package]] +name = "zope-interface" +version = "6.1" +description = "Interfaces for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zope.interface-6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:43b576c34ef0c1f5a4981163b551a8781896f2a37f71b8655fd20b5af0386abb"}, + {file = "zope.interface-6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:67be3ca75012c6e9b109860820a8b6c9a84bfb036fbd1076246b98e56951ca92"}, + {file = "zope.interface-6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b9bc671626281f6045ad61d93a60f52fd5e8209b1610972cf0ef1bbe6d808e3"}, + {file = "zope.interface-6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe81def9cf3e46f16ce01d9bfd8bea595e06505e51b7baf45115c77352675fd"}, + {file = "zope.interface-6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dc998f6de015723196a904045e5a2217f3590b62ea31990672e31fbc5370b41"}, + {file = "zope.interface-6.1-cp310-cp310-win_amd64.whl", hash = "sha256:239a4a08525c080ff833560171d23b249f7f4d17fcbf9316ef4159f44997616f"}, + {file = "zope.interface-6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9ffdaa5290422ac0f1688cb8adb1b94ca56cee3ad11f29f2ae301df8aecba7d1"}, + {file = "zope.interface-6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:34c15ca9248f2e095ef2e93af2d633358c5f048c49fbfddf5fdfc47d5e263736"}, + {file = "zope.interface-6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b012d023b4fb59183909b45d7f97fb493ef7a46d2838a5e716e3155081894605"}, + {file = "zope.interface-6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97806e9ca3651588c1baaebb8d0c5ee3db95430b612db354c199b57378312ee8"}, + {file = "zope.interface-6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fddbab55a2473f1d3b8833ec6b7ac31e8211b0aa608df5ab09ce07f3727326de"}, + {file = "zope.interface-6.1-cp311-cp311-win_amd64.whl", hash = "sha256:a0da79117952a9a41253696ed3e8b560a425197d4e41634a23b1507efe3273f1"}, + {file = "zope.interface-6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e8bb9c990ca9027b4214fa543fd4025818dc95f8b7abce79d61dc8a2112b561a"}, + {file = "zope.interface-6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b51b64432eed4c0744241e9ce5c70dcfecac866dff720e746d0a9c82f371dfa7"}, + {file = "zope.interface-6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa6fd016e9644406d0a61313e50348c706e911dca29736a3266fc9e28ec4ca6d"}, + {file = "zope.interface-6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c8cf55261e15590065039696607f6c9c1aeda700ceee40c70478552d323b3ff"}, + {file = "zope.interface-6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e30506bcb03de8983f78884807e4fd95d8db6e65b69257eea05d13d519b83ac0"}, + {file = "zope.interface-6.1-cp312-cp312-win_amd64.whl", hash = "sha256:e33e86fd65f369f10608b08729c8f1c92ec7e0e485964670b4d2633a4812d36b"}, + {file = "zope.interface-6.1-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:2f8d89721834524a813f37fa174bac074ec3d179858e4ad1b7efd4401f8ac45d"}, + {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13b7d0f2a67eb83c385880489dbb80145e9d344427b4262c49fbf2581677c11c"}, + {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef43ee91c193f827e49599e824385ec7c7f3cd152d74cb1dfe02cb135f264d83"}, + {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e441e8b7d587af0414d25e8d05e27040d78581388eed4c54c30c0c91aad3a379"}, + {file = "zope.interface-6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f89b28772fc2562ed9ad871c865f5320ef761a7fcc188a935e21fe8b31a38ca9"}, + {file = "zope.interface-6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:70d2cef1bf529bff41559be2de9d44d47b002f65e17f43c73ddefc92f32bf00f"}, + {file = "zope.interface-6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad54ed57bdfa3254d23ae04a4b1ce405954969c1b0550cc2d1d2990e8b439de1"}, + {file = "zope.interface-6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef467d86d3cfde8b39ea1b35090208b0447caaabd38405420830f7fd85fbdd56"}, + {file = "zope.interface-6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af47f10cfc54c2ba2d825220f180cc1e2d4914d783d6fc0cd93d43d7bc1c78b"}, + {file = "zope.interface-6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9559138690e1bd4ea6cd0954d22d1e9251e8025ce9ede5d0af0ceae4a401e43"}, + {file = "zope.interface-6.1-cp38-cp38-win_amd64.whl", hash = "sha256:964a7af27379ff4357dad1256d9f215047e70e93009e532d36dcb8909036033d"}, + {file = "zope.interface-6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:387545206c56b0315fbadb0431d5129c797f92dc59e276b3ce82db07ac1c6179"}, + {file = "zope.interface-6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57d0a8ce40ce440f96a2c77824ee94bf0d0925e6089df7366c2272ccefcb7941"}, + {file = "zope.interface-6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ebc4d34e7620c4f0da7bf162c81978fce0ea820e4fa1e8fc40ee763839805f3"}, + {file = "zope.interface-6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a804abc126b33824a44a7aa94f06cd211a18bbf31898ba04bd0924fbe9d282d"}, + {file = "zope.interface-6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f294a15f7723fc0d3b40701ca9b446133ec713eafc1cc6afa7b3d98666ee1ac"}, + {file = "zope.interface-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a41f87bb93b8048fe866fa9e3d0c51e27fe55149035dcf5f43da4b56732c0a40"}, + {file = "zope.interface-6.1.tar.gz", hash = "sha256:2fdc7ccbd6eb6b7df5353012fbed6c3c5d04ceaca0038f75e601060e95345309"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +docs = ["Sphinx", "repoze.sphinx.autointerface", "sphinx-rtd-theme"] +test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] +testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] + [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "c71ee25bdad9a15471d2f366fe7868e4a4916e807e29f447ce2788f9ff5d7bf4" +content-hash = "ef79a530afdfba78f70a827de02cda2feeb53502823a9ac0e3574938daa336b7" diff --git a/pyproject.toml b/pyproject.toml index e25b16d..c0b8ff0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ pandas = "^2.1.3" numpy = "^1.26.2" holidays = "^0.37" pandas-stubs = "^2.1.1.230928" +datetime = "^5.3" [tool.poetry.group.dev.dependencies] pygments = ">=2.10.0" From 9acd3eb7a4ee41c6853a431b4c9657165b6cb5e5 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 09:24:59 +0100 Subject: [PATCH 10/49] added type to groups and nox --- src/ssb_arbmark_fagfunksjoner/functions.py | 2 +- src/ssb_arbmark_fagfunksjoner/groups.py | 30 ++++++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 2121b0d..7169d62 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -232,7 +232,7 @@ def indicate_merge( return merged_df -def kv_intervall(start_p, slutt_p): +def kv_intervall(start_p, slutt_p) -> list: """This function generates a list of quarterly periods between two given periods. The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 23e0118..7682785 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -63,7 +63,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: results = [f"{key} {value}" for key, value in groups.items()] # Apply the selected format to the series - return np.select(conditions, results, default=".") + return pd.Series(np.select(conditions, results, default="."), dtype=str) def nace_sn07_47grp( @@ -204,12 +204,13 @@ def nace_sn07_47grp( # Determine and apply the selected format based on the labels parameter if display == "label": - return np.select(conditions, list(groups.values()), default="Uoppgitt") + results = np.select(conditions, list(groups.values()), default="Uoppgitt") elif display == "number": - return np.select(conditions, list(groups.keys()), default="99") + results = np.select(conditions, list(groups.keys()), default="99") else: combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="99 Uoppgitt") + results = np.select(conditions, combined_labels, default="99 Uoppgitt") + return pd.Series(results, dtype=str) def nace_sn07_17grp( @@ -300,12 +301,13 @@ def nace_sn07_17grp( # Determine and apply the selected format based on the labels parameter if display == "label": - return np.select(conditions, list(groups.values()), default="Uoppgitt") + results = np.select(conditions, list(groups.values()), default="Uoppgitt") elif display == "number": - return np.select(conditions, list(groups.keys()), default="999") + results = np.select(conditions, list(groups.keys()), default="999") else: combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="999 Uoppgitt") + results = np.select(conditions, combined_labels, default="999 Uoppgitt") + return pd.Series(results, dtype=str) def sektor2_grp( @@ -341,12 +343,13 @@ def sektor2_grp( # Determine and apply the selected format based on the labels parameter if display == "label": - return np.select(conditions, list(groups.values()), default="Uoppgitt") + results = np.select(conditions, list(groups.values()), default="Uoppgitt") elif display == "number": - return np.select(conditions, list(groups.keys()), default="999") + results = np.select(conditions, list(groups.keys()), default="999") else: combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="999 Uoppgitt") + results = np.select(conditions, combined_labels, default="999 Uoppgitt") + return pd.Series(results, dtype=str) def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[str]: @@ -386,9 +389,10 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ # Determine and apply the selected format based on the labels parameter if display == "label": - return np.select(conditions, list(groups.values()), default="Uoppgitt") + results = np.select(conditions, list(groups.values()), default="Uoppgitt") elif display == "number": - return np.select(conditions, list(groups.keys()), default="99") + results = np.select(conditions, list(groups.keys()), default="99") else: combined_labels = [f"{key} {value}" for key, value in groups.items()] - return np.select(conditions, combined_labels, default="99 Uoppgitt") + results = np.select(conditions, combined_labels, default="99 Uoppgitt") + return pd.Series(results, dtype=str) From 146e31c6b67f178c051544604af907cc62e63eed Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 09:46:25 +0100 Subject: [PATCH 11/49] poetry update and removed timestamp type specifications --- poetry.lock | 314 ++++++++------------- pyproject.toml | 1 - src/ssb_arbmark_fagfunksjoner/functions.py | 21 +- src/ssb_arbmark_fagfunksjoner/groups.py | 12 +- 4 files changed, 135 insertions(+), 213 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7e255ce..6fd19f6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -540,21 +540,6 @@ files = [ {file = "darglint-1.8.1.tar.gz", hash = "sha256:080d5106df149b199822e7ee7deb9c012b49891538f14a11be681044f0bb20da"}, ] -[[package]] -name = "datetime" -version = "5.3" -description = "This package provides a DateTime data type, as known from Zope. Unless you need to communicate with Zope APIs, you're probably better off using Python's built-in datetime module." -optional = false -python-versions = ">=3.7" -files = [ - {file = "DateTime-5.3-py3-none-any.whl", hash = "sha256:05669f035ec7ccb24443bda8572078c381edf79c813186f627e9e8e5c6e8e6e6"}, - {file = "DateTime-5.3.tar.gz", hash = "sha256:4762a9b371ce696b7ffb82b869d2906fad94fdecdb1685bfbec1e2d8f37e5a98"}, -] - -[package.dependencies] -pytz = "*" -"zope.interface" = "*" - [[package]] name = "debugpy" version = "1.8.0" @@ -723,13 +708,13 @@ python-dateutil = "*" [[package]] name = "identify" -version = "2.5.32" +version = "2.5.33" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.5.32-py2.py3-none-any.whl", hash = "sha256:0b7656ef6cba81664b783352c73f8c24b39cf82f926f78f4550eda928e5e0545"}, - {file = "identify-2.5.32.tar.gz", hash = "sha256:5d9979348ec1a21c768ae07e0a652924538e8bce67313a73cb0f681cf08ba407"}, + {file = "identify-2.5.33-py2.py3-none-any.whl", hash = "sha256:d40ce5fcd762817627670da8a7d8d8e65f24342d14539c59488dc603bf662e34"}, + {file = "identify-2.5.33.tar.gz", hash = "sha256:161558f9fe4559e1557e1bff323e8631f6a0e4837f7497767c1782832f16b62d"}, ] [package.extras] @@ -1398,13 +1383,13 @@ ptyprocess = ">=0.5" [[package]] name = "pipx" -version = "1.3.2" +version = "1.3.3" description = "Install and Run Python Applications in Isolated Environments" optional = false python-versions = ">=3.8" files = [ - {file = "pipx-1.3.2-py3-none-any.whl", hash = "sha256:bb4bf6c052639f589901d7ae5a837a44bc09fa82c38b4f74a319d39cccc73b84"}, - {file = "pipx-1.3.2.tar.gz", hash = "sha256:704d01d04c67c2dd0c776c5bf5ed35c7b249055b0174568b8507f07d72ed7a7f"}, + {file = "pipx-1.3.3-py3-none-any.whl", hash = "sha256:ce119a15f04da670d44ff1c493c7f9510639f610c720f0381abfed8aac5cef81"}, + {file = "pipx-1.3.3.tar.gz", hash = "sha256:6d5474e71e78c28d83570443e5418c56599aa8319a950ccf5984c5cb0a35f0a7"}, ] [package.dependencies] @@ -1417,13 +1402,13 @@ userpath = ">=1.6,<1.9.0 || >1.9.0" [[package]] name = "platformdirs" -version = "4.0.0" +version = "4.1.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "platformdirs-4.0.0-py3-none-any.whl", hash = "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b"}, - {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, + {file = "platformdirs-4.1.0-py3-none-any.whl", hash = "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380"}, + {file = "platformdirs-4.1.0.tar.gz", hash = "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"}, ] [package.extras] @@ -1719,104 +1704,104 @@ files = [ [[package]] name = "pyzmq" -version = "25.1.1" +version = "25.1.2" description = "Python bindings for 0MQ" optional = false python-versions = ">=3.6" files = [ - {file = "pyzmq-25.1.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:381469297409c5adf9a0e884c5eb5186ed33137badcbbb0560b86e910a2f1e76"}, - {file = "pyzmq-25.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:955215ed0604dac5b01907424dfa28b40f2b2292d6493445dd34d0dfa72586a8"}, - {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:985bbb1316192b98f32e25e7b9958088431d853ac63aca1d2c236f40afb17c83"}, - {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:afea96f64efa98df4da6958bae37f1cbea7932c35878b185e5982821bc883369"}, - {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76705c9325d72a81155bb6ab48d4312e0032bf045fb0754889133200f7a0d849"}, - {file = "pyzmq-25.1.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:77a41c26205d2353a4c94d02be51d6cbdf63c06fbc1295ea57dad7e2d3381b71"}, - {file = "pyzmq-25.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:12720a53e61c3b99d87262294e2b375c915fea93c31fc2336898c26d7aed34cd"}, - {file = "pyzmq-25.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:57459b68e5cd85b0be8184382cefd91959cafe79ae019e6b1ae6e2ba8a12cda7"}, - {file = "pyzmq-25.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:292fe3fc5ad4a75bc8df0dfaee7d0babe8b1f4ceb596437213821f761b4589f9"}, - {file = "pyzmq-25.1.1-cp310-cp310-win32.whl", hash = "sha256:35b5ab8c28978fbbb86ea54958cd89f5176ce747c1fb3d87356cf698048a7790"}, - {file = "pyzmq-25.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:11baebdd5fc5b475d484195e49bae2dc64b94a5208f7c89954e9e354fc609d8f"}, - {file = "pyzmq-25.1.1-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:d20a0ddb3e989e8807d83225a27e5c2eb2260eaa851532086e9e0fa0d5287d83"}, - {file = "pyzmq-25.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e1c1be77bc5fb77d923850f82e55a928f8638f64a61f00ff18a67c7404faf008"}, - {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d89528b4943d27029a2818f847c10c2cecc79fa9590f3cb1860459a5be7933eb"}, - {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:90f26dc6d5f241ba358bef79be9ce06de58d477ca8485e3291675436d3827cf8"}, - {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2b92812bd214018e50b6380ea3ac0c8bb01ac07fcc14c5f86a5bb25e74026e9"}, - {file = "pyzmq-25.1.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:2f957ce63d13c28730f7fd6b72333814221c84ca2421298f66e5143f81c9f91f"}, - {file = "pyzmq-25.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:047a640f5c9c6ade7b1cc6680a0e28c9dd5a0825135acbd3569cc96ea00b2505"}, - {file = "pyzmq-25.1.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7f7e58effd14b641c5e4dec8c7dab02fb67a13df90329e61c869b9cc607ef752"}, - {file = "pyzmq-25.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c2910967e6ab16bf6fbeb1f771c89a7050947221ae12a5b0b60f3bca2ee19bca"}, - {file = "pyzmq-25.1.1-cp311-cp311-win32.whl", hash = "sha256:76c1c8efb3ca3a1818b837aea423ff8a07bbf7aafe9f2f6582b61a0458b1a329"}, - {file = "pyzmq-25.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:44e58a0554b21fc662f2712814a746635ed668d0fbc98b7cb9d74cb798d202e6"}, - {file = "pyzmq-25.1.1-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:e1ffa1c924e8c72778b9ccd386a7067cddf626884fd8277f503c48bb5f51c762"}, - {file = "pyzmq-25.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1af379b33ef33757224da93e9da62e6471cf4a66d10078cf32bae8127d3d0d4a"}, - {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cff084c6933680d1f8b2f3b4ff5bbb88538a4aac00d199ac13f49d0698727ecb"}, - {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2400a94f7dd9cb20cd012951a0cbf8249e3d554c63a9c0cdfd5cbb6c01d2dec"}, - {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d81f1ddae3858b8299d1da72dd7d19dd36aab654c19671aa8a7e7fb02f6638a"}, - {file = "pyzmq-25.1.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:255ca2b219f9e5a3a9ef3081512e1358bd4760ce77828e1028b818ff5610b87b"}, - {file = "pyzmq-25.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a882ac0a351288dd18ecae3326b8a49d10c61a68b01419f3a0b9a306190baf69"}, - {file = "pyzmq-25.1.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:724c292bb26365659fc434e9567b3f1adbdb5e8d640c936ed901f49e03e5d32e"}, - {file = "pyzmq-25.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ca1ed0bb2d850aa8471387882247c68f1e62a4af0ce9c8a1dbe0d2bf69e41fb"}, - {file = "pyzmq-25.1.1-cp312-cp312-win32.whl", hash = "sha256:b3451108ab861040754fa5208bca4a5496c65875710f76789a9ad27c801a0075"}, - {file = "pyzmq-25.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:eadbefd5e92ef8a345f0525b5cfd01cf4e4cc651a2cffb8f23c0dd184975d787"}, - {file = "pyzmq-25.1.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:db0b2af416ba735c6304c47f75d348f498b92952f5e3e8bff449336d2728795d"}, - {file = "pyzmq-25.1.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7c133e93b405eb0d36fa430c94185bdd13c36204a8635470cccc200723c13bb"}, - {file = "pyzmq-25.1.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:273bc3959bcbff3f48606b28229b4721716598d76b5aaea2b4a9d0ab454ec062"}, - {file = "pyzmq-25.1.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cbc8df5c6a88ba5ae385d8930da02201165408dde8d8322072e3e5ddd4f68e22"}, - {file = "pyzmq-25.1.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:18d43df3f2302d836f2a56f17e5663e398416e9dd74b205b179065e61f1a6edf"}, - {file = "pyzmq-25.1.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:73461eed88a88c866656e08f89299720a38cb4e9d34ae6bf5df6f71102570f2e"}, - {file = "pyzmq-25.1.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:34c850ce7976d19ebe7b9d4b9bb8c9dfc7aac336c0958e2651b88cbd46682123"}, - {file = "pyzmq-25.1.1-cp36-cp36m-win32.whl", hash = "sha256:d2045d6d9439a0078f2a34b57c7b18c4a6aef0bee37f22e4ec9f32456c852c71"}, - {file = "pyzmq-25.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:458dea649f2f02a0b244ae6aef8dc29325a2810aa26b07af8374dc2a9faf57e3"}, - {file = "pyzmq-25.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7cff25c5b315e63b07a36f0c2bab32c58eafbe57d0dce61b614ef4c76058c115"}, - {file = "pyzmq-25.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1579413ae492b05de5a6174574f8c44c2b9b122a42015c5292afa4be2507f28"}, - {file = "pyzmq-25.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3d0a409d3b28607cc427aa5c30a6f1e4452cc44e311f843e05edb28ab5e36da0"}, - {file = "pyzmq-25.1.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21eb4e609a154a57c520e3d5bfa0d97e49b6872ea057b7c85257b11e78068222"}, - {file = "pyzmq-25.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:034239843541ef7a1aee0c7b2cb7f6aafffb005ede965ae9cbd49d5ff4ff73cf"}, - {file = "pyzmq-25.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f8115e303280ba09f3898194791a153862cbf9eef722ad8f7f741987ee2a97c7"}, - {file = "pyzmq-25.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1a5d26fe8f32f137e784f768143728438877d69a586ddeaad898558dc971a5ae"}, - {file = "pyzmq-25.1.1-cp37-cp37m-win32.whl", hash = "sha256:f32260e556a983bc5c7ed588d04c942c9a8f9c2e99213fec11a031e316874c7e"}, - {file = "pyzmq-25.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:abf34e43c531bbb510ae7e8f5b2b1f2a8ab93219510e2b287a944432fad135f3"}, - {file = "pyzmq-25.1.1-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:87e34f31ca8f168c56d6fbf99692cc8d3b445abb5bfd08c229ae992d7547a92a"}, - {file = "pyzmq-25.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c9c6c9b2c2f80747a98f34ef491c4d7b1a8d4853937bb1492774992a120f475d"}, - {file = "pyzmq-25.1.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5619f3f5a4db5dbb572b095ea3cb5cc035335159d9da950830c9c4db2fbb6995"}, - {file = "pyzmq-25.1.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5a34d2395073ef862b4032343cf0c32a712f3ab49d7ec4f42c9661e0294d106f"}, - {file = "pyzmq-25.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25f0e6b78220aba09815cd1f3a32b9c7cb3e02cb846d1cfc526b6595f6046618"}, - {file = "pyzmq-25.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3669cf8ee3520c2f13b2e0351c41fea919852b220988d2049249db10046a7afb"}, - {file = "pyzmq-25.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2d163a18819277e49911f7461567bda923461c50b19d169a062536fffe7cd9d2"}, - {file = "pyzmq-25.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:df27ffddff4190667d40de7beba4a950b5ce78fe28a7dcc41d6f8a700a80a3c0"}, - {file = "pyzmq-25.1.1-cp38-cp38-win32.whl", hash = "sha256:a382372898a07479bd34bda781008e4a954ed8750f17891e794521c3e21c2e1c"}, - {file = "pyzmq-25.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:52533489f28d62eb1258a965f2aba28a82aa747202c8fa5a1c7a43b5db0e85c1"}, - {file = "pyzmq-25.1.1-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:03b3f49b57264909aacd0741892f2aecf2f51fb053e7d8ac6767f6c700832f45"}, - {file = "pyzmq-25.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:330f9e188d0d89080cde66dc7470f57d1926ff2fb5576227f14d5be7ab30b9fa"}, - {file = "pyzmq-25.1.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2ca57a5be0389f2a65e6d3bb2962a971688cbdd30b4c0bd188c99e39c234f414"}, - {file = "pyzmq-25.1.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d457aed310f2670f59cc5b57dcfced452aeeed77f9da2b9763616bd57e4dbaae"}, - {file = "pyzmq-25.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c56d748ea50215abef7030c72b60dd723ed5b5c7e65e7bc2504e77843631c1a6"}, - {file = "pyzmq-25.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8f03d3f0d01cb5a018debeb412441996a517b11c5c17ab2001aa0597c6d6882c"}, - {file = "pyzmq-25.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:820c4a08195a681252f46926de10e29b6bbf3e17b30037bd4250d72dd3ddaab8"}, - {file = "pyzmq-25.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17ef5f01d25b67ca8f98120d5fa1d21efe9611604e8eb03a5147360f517dd1e2"}, - {file = "pyzmq-25.1.1-cp39-cp39-win32.whl", hash = "sha256:04ccbed567171579ec2cebb9c8a3e30801723c575601f9a990ab25bcac6b51e2"}, - {file = "pyzmq-25.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:e61f091c3ba0c3578411ef505992d356a812fb200643eab27f4f70eed34a29ef"}, - {file = "pyzmq-25.1.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ade6d25bb29c4555d718ac6d1443a7386595528c33d6b133b258f65f963bb0f6"}, - {file = "pyzmq-25.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0c95ddd4f6e9fca4e9e3afaa4f9df8552f0ba5d1004e89ef0a68e1f1f9807c7"}, - {file = "pyzmq-25.1.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48e466162a24daf86f6b5ca72444d2bf39a5e58da5f96370078be67c67adc978"}, - {file = "pyzmq-25.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abc719161780932c4e11aaebb203be3d6acc6b38d2f26c0f523b5b59d2fc1996"}, - {file = "pyzmq-25.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ccf825981640b8c34ae54231b7ed00271822ea1c6d8ba1090ebd4943759abf5"}, - {file = "pyzmq-25.1.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c2f20ce161ebdb0091a10c9ca0372e023ce24980d0e1f810f519da6f79c60800"}, - {file = "pyzmq-25.1.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:deee9ca4727f53464daf089536e68b13e6104e84a37820a88b0a057b97bba2d2"}, - {file = "pyzmq-25.1.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:aa8d6cdc8b8aa19ceb319aaa2b660cdaccc533ec477eeb1309e2a291eaacc43a"}, - {file = "pyzmq-25.1.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:019e59ef5c5256a2c7378f2fb8560fc2a9ff1d315755204295b2eab96b254d0a"}, - {file = "pyzmq-25.1.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:b9af3757495c1ee3b5c4e945c1df7be95562277c6e5bccc20a39aec50f826cd0"}, - {file = "pyzmq-25.1.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:548d6482dc8aadbe7e79d1b5806585c8120bafa1ef841167bc9090522b610fa6"}, - {file = "pyzmq-25.1.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:057e824b2aae50accc0f9a0570998adc021b372478a921506fddd6c02e60308e"}, - {file = "pyzmq-25.1.1-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2243700cc5548cff20963f0ca92d3e5e436394375ab8a354bbea2b12911b20b0"}, - {file = "pyzmq-25.1.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79986f3b4af059777111409ee517da24a529bdbd46da578b33f25580adcff728"}, - {file = "pyzmq-25.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:11d58723d44d6ed4dd677c5615b2ffb19d5c426636345567d6af82be4dff8a55"}, - {file = "pyzmq-25.1.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:49d238cf4b69652257db66d0c623cd3e09b5d2e9576b56bc067a396133a00d4a"}, - {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fedbdc753827cf014c01dbbee9c3be17e5a208dcd1bf8641ce2cd29580d1f0d4"}, - {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc16ac425cc927d0a57d242589f87ee093884ea4804c05a13834d07c20db203c"}, - {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11c1d2aed9079c6b0c9550a7257a836b4a637feb334904610f06d70eb44c56d2"}, - {file = "pyzmq-25.1.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e8a701123029cc240cea61dd2d16ad57cab4691804143ce80ecd9286b464d180"}, - {file = "pyzmq-25.1.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:61706a6b6c24bdece85ff177fec393545a3191eeda35b07aaa1458a027ad1304"}, - {file = "pyzmq-25.1.1.tar.gz", hash = "sha256:259c22485b71abacdfa8bf79720cd7bcf4b9d128b30ea554f01ae71fdbfdaa23"}, + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"}, + {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d"}, + {file = "pyzmq-25.1.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979"}, + {file = "pyzmq-25.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08"}, + {file = "pyzmq-25.1.2-cp310-cp310-win32.whl", hash = "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886"}, + {file = "pyzmq-25.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c"}, + {file = "pyzmq-25.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840"}, + {file = "pyzmq-25.1.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b"}, + {file = "pyzmq-25.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3"}, + {file = "pyzmq-25.1.2-cp311-cp311-win32.whl", hash = "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097"}, + {file = "pyzmq-25.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a"}, + {file = "pyzmq-25.1.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee"}, + {file = "pyzmq-25.1.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe"}, + {file = "pyzmq-25.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737"}, + {file = "pyzmq-25.1.2-cp312-cp312-win32.whl", hash = "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d"}, + {file = "pyzmq-25.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0"}, + {file = "pyzmq-25.1.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a"}, + {file = "pyzmq-25.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win32.whl", hash = "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc"}, + {file = "pyzmq-25.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68"}, + {file = "pyzmq-25.1.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003"}, + {file = "pyzmq-25.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec"}, + {file = "pyzmq-25.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win32.whl", hash = "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7"}, + {file = "pyzmq-25.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41"}, + {file = "pyzmq-25.1.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7"}, + {file = "pyzmq-25.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f"}, + {file = "pyzmq-25.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755"}, + {file = "pyzmq-25.1.2-cp38-cp38-win32.whl", hash = "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07"}, + {file = "pyzmq-25.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6"}, + {file = "pyzmq-25.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add"}, + {file = "pyzmq-25.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6"}, + {file = "pyzmq-25.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2"}, + {file = "pyzmq-25.1.2-cp39-cp39-win32.whl", hash = "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289"}, + {file = "pyzmq-25.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4"}, + {file = "pyzmq-25.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96"}, + {file = "pyzmq-25.1.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611"}, + {file = "pyzmq-25.1.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc"}, + {file = "pyzmq-25.1.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314"}, + {file = "pyzmq-25.1.2.tar.gz", hash = "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226"}, ] [package.dependencies] @@ -1940,28 +1925,28 @@ files = [ [[package]] name = "ruff" -version = "0.1.6" +version = "0.1.7" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:88b8cdf6abf98130991cbc9f6438f35f6e8d41a02622cc5ee130a02a0ed28703"}, - {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c549ed437680b6105a1299d2cd30e4964211606eeb48a0ff7a93ef70b902248"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf5f701062e294f2167e66d11b092bba7af6a057668ed618a9253e1e90cfd76"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05991ee20d4ac4bb78385360c684e4b417edd971030ab12a4fbd075ff535050e"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87455a0c1f739b3c069e2f4c43b66479a54dea0276dd5d4d67b091265f6fd1dc"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:683aa5bdda5a48cb8266fcde8eea2a6af4e5700a392c56ea5fb5f0d4bfdc0240"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:137852105586dcbf80c1717facb6781555c4e99f520c9c827bd414fac67ddfb6"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd98138a98d48a1c36c394fd6b84cd943ac92a08278aa8ac8c0fdefcf7138f35"}, - {file = "ruff-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0cd909d25f227ac5c36d4e7e681577275fb74ba3b11d288aff7ec47e3ae745"}, - {file = "ruff-0.1.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8fd1c62a47aa88a02707b5dd20c5ff20d035d634aa74826b42a1da77861b5ff"}, - {file = "ruff-0.1.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd89b45d374935829134a082617954120d7a1470a9f0ec0e7f3ead983edc48cc"}, - {file = "ruff-0.1.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:491262006e92f825b145cd1e52948073c56560243b55fb3b4ecb142f6f0e9543"}, - {file = "ruff-0.1.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea284789861b8b5ca9d5443591a92a397ac183d4351882ab52f6296b4fdd5462"}, - {file = "ruff-0.1.6-py3-none-win32.whl", hash = "sha256:1610e14750826dfc207ccbcdd7331b6bd285607d4181df9c1c6ae26646d6848a"}, - {file = "ruff-0.1.6-py3-none-win_amd64.whl", hash = "sha256:4558b3e178145491e9bc3b2ee3c4b42f19d19384eaa5c59d10acf6e8f8b57e33"}, - {file = "ruff-0.1.6-py3-none-win_arm64.whl", hash = "sha256:03910e81df0d8db0e30050725a5802441c2022ea3ae4fe0609b76081731accbc"}, - {file = "ruff-0.1.6.tar.gz", hash = "sha256:1b09f29b16c6ead5ea6b097ef2764b42372aebe363722f1605ecbcd2b9207184"}, + {file = "ruff-0.1.7-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7f80496854fdc65b6659c271d2c26e90d4d401e6a4a31908e7e334fab4645aac"}, + {file = "ruff-0.1.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1ea109bdb23c2a4413f397ebd8ac32cb498bee234d4191ae1a310af760e5d287"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b0c2de9dd9daf5e07624c24add25c3a490dbf74b0e9bca4145c632457b3b42a"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:69a4bed13bc1d5dabf3902522b5a2aadfebe28226c6269694283c3b0cecb45fd"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de02ca331f2143195a712983a57137c5ec0f10acc4aa81f7c1f86519e52b92a1"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:45b38c3f8788a65e6a2cab02e0f7adfa88872696839d9882c13b7e2f35d64c5f"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c64cb67b2025b1ac6d58e5ffca8f7b3f7fd921f35e78198411237e4f0db8e73"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9dcc6bb2f4df59cb5b4b40ff14be7d57012179d69c6565c1da0d1f013d29951b"}, + {file = "ruff-0.1.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df2bb4bb6bbe921f6b4f5b6fdd8d8468c940731cb9406f274ae8c5ed7a78c478"}, + {file = "ruff-0.1.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:276a89bcb149b3d8c1b11d91aa81898fe698900ed553a08129b38d9d6570e717"}, + {file = "ruff-0.1.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:90c958fe950735041f1c80d21b42184f1072cc3975d05e736e8d66fc377119ea"}, + {file = "ruff-0.1.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6b05e3b123f93bb4146a761b7a7d57af8cb7384ccb2502d29d736eaade0db519"}, + {file = "ruff-0.1.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:290ecab680dce94affebefe0bbca2322a6277e83d4f29234627e0f8f6b4fa9ce"}, + {file = "ruff-0.1.7-py3-none-win32.whl", hash = "sha256:416dfd0bd45d1a2baa3b1b07b1b9758e7d993c256d3e51dc6e03a5e7901c7d80"}, + {file = "ruff-0.1.7-py3-none-win_amd64.whl", hash = "sha256:4af95fd1d3b001fc41325064336db36e3d27d2004cdb6d21fd617d45a172dd96"}, + {file = "ruff-0.1.7-py3-none-win_arm64.whl", hash = "sha256:0683b7bfbb95e6df3c7c04fe9d78f631f8e8ba4868dfc932d43d690698057e2e"}, + {file = "ruff-0.1.7.tar.gz", hash = "sha256:dffd699d07abf54833e5f6cc50b85a6ff043715da8788c4a79bcd4ab4734d306"}, ] [[package]] @@ -2508,60 +2493,7 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] -[[package]] -name = "zope-interface" -version = "6.1" -description = "Interfaces for Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "zope.interface-6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:43b576c34ef0c1f5a4981163b551a8781896f2a37f71b8655fd20b5af0386abb"}, - {file = "zope.interface-6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:67be3ca75012c6e9b109860820a8b6c9a84bfb036fbd1076246b98e56951ca92"}, - {file = "zope.interface-6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b9bc671626281f6045ad61d93a60f52fd5e8209b1610972cf0ef1bbe6d808e3"}, - {file = "zope.interface-6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbe81def9cf3e46f16ce01d9bfd8bea595e06505e51b7baf45115c77352675fd"}, - {file = "zope.interface-6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dc998f6de015723196a904045e5a2217f3590b62ea31990672e31fbc5370b41"}, - {file = "zope.interface-6.1-cp310-cp310-win_amd64.whl", hash = "sha256:239a4a08525c080ff833560171d23b249f7f4d17fcbf9316ef4159f44997616f"}, - {file = "zope.interface-6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9ffdaa5290422ac0f1688cb8adb1b94ca56cee3ad11f29f2ae301df8aecba7d1"}, - {file = "zope.interface-6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:34c15ca9248f2e095ef2e93af2d633358c5f048c49fbfddf5fdfc47d5e263736"}, - {file = "zope.interface-6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b012d023b4fb59183909b45d7f97fb493ef7a46d2838a5e716e3155081894605"}, - {file = "zope.interface-6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97806e9ca3651588c1baaebb8d0c5ee3db95430b612db354c199b57378312ee8"}, - {file = "zope.interface-6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fddbab55a2473f1d3b8833ec6b7ac31e8211b0aa608df5ab09ce07f3727326de"}, - {file = "zope.interface-6.1-cp311-cp311-win_amd64.whl", hash = "sha256:a0da79117952a9a41253696ed3e8b560a425197d4e41634a23b1507efe3273f1"}, - {file = "zope.interface-6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e8bb9c990ca9027b4214fa543fd4025818dc95f8b7abce79d61dc8a2112b561a"}, - {file = "zope.interface-6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b51b64432eed4c0744241e9ce5c70dcfecac866dff720e746d0a9c82f371dfa7"}, - {file = "zope.interface-6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa6fd016e9644406d0a61313e50348c706e911dca29736a3266fc9e28ec4ca6d"}, - {file = "zope.interface-6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c8cf55261e15590065039696607f6c9c1aeda700ceee40c70478552d323b3ff"}, - {file = "zope.interface-6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e30506bcb03de8983f78884807e4fd95d8db6e65b69257eea05d13d519b83ac0"}, - {file = "zope.interface-6.1-cp312-cp312-win_amd64.whl", hash = "sha256:e33e86fd65f369f10608b08729c8f1c92ec7e0e485964670b4d2633a4812d36b"}, - {file = "zope.interface-6.1-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:2f8d89721834524a813f37fa174bac074ec3d179858e4ad1b7efd4401f8ac45d"}, - {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13b7d0f2a67eb83c385880489dbb80145e9d344427b4262c49fbf2581677c11c"}, - {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef43ee91c193f827e49599e824385ec7c7f3cd152d74cb1dfe02cb135f264d83"}, - {file = "zope.interface-6.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e441e8b7d587af0414d25e8d05e27040d78581388eed4c54c30c0c91aad3a379"}, - {file = "zope.interface-6.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f89b28772fc2562ed9ad871c865f5320ef761a7fcc188a935e21fe8b31a38ca9"}, - {file = "zope.interface-6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:70d2cef1bf529bff41559be2de9d44d47b002f65e17f43c73ddefc92f32bf00f"}, - {file = "zope.interface-6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad54ed57bdfa3254d23ae04a4b1ce405954969c1b0550cc2d1d2990e8b439de1"}, - {file = "zope.interface-6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef467d86d3cfde8b39ea1b35090208b0447caaabd38405420830f7fd85fbdd56"}, - {file = "zope.interface-6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af47f10cfc54c2ba2d825220f180cc1e2d4914d783d6fc0cd93d43d7bc1c78b"}, - {file = "zope.interface-6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9559138690e1bd4ea6cd0954d22d1e9251e8025ce9ede5d0af0ceae4a401e43"}, - {file = "zope.interface-6.1-cp38-cp38-win_amd64.whl", hash = "sha256:964a7af27379ff4357dad1256d9f215047e70e93009e532d36dcb8909036033d"}, - {file = "zope.interface-6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:387545206c56b0315fbadb0431d5129c797f92dc59e276b3ce82db07ac1c6179"}, - {file = "zope.interface-6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57d0a8ce40ce440f96a2c77824ee94bf0d0925e6089df7366c2272ccefcb7941"}, - {file = "zope.interface-6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ebc4d34e7620c4f0da7bf162c81978fce0ea820e4fa1e8fc40ee763839805f3"}, - {file = "zope.interface-6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a804abc126b33824a44a7aa94f06cd211a18bbf31898ba04bd0924fbe9d282d"}, - {file = "zope.interface-6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f294a15f7723fc0d3b40701ca9b446133ec713eafc1cc6afa7b3d98666ee1ac"}, - {file = "zope.interface-6.1-cp39-cp39-win_amd64.whl", hash = "sha256:a41f87bb93b8048fe866fa9e3d0c51e27fe55149035dcf5f43da4b56732c0a40"}, - {file = "zope.interface-6.1.tar.gz", hash = "sha256:2fdc7ccbd6eb6b7df5353012fbed6c3c5d04ceaca0038f75e601060e95345309"}, -] - -[package.dependencies] -setuptools = "*" - -[package.extras] -docs = ["Sphinx", "repoze.sphinx.autointerface", "sphinx-rtd-theme"] -test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] -testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] - [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "ef79a530afdfba78f70a827de02cda2feeb53502823a9ac0e3574938daa336b7" +content-hash = "c71ee25bdad9a15471d2f366fe7868e4a4916e807e29f447ce2788f9ff5d7bf4" diff --git a/pyproject.toml b/pyproject.toml index c0b8ff0..e25b16d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ pandas = "^2.1.3" numpy = "^1.26.2" holidays = "^0.37" pandas-stubs = "^2.1.1.230928" -datetime = "^5.3" [tool.poetry.group.dev.dependencies] pygments = ">=2.10.0" diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 7169d62..ff81e5b 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -9,9 +9,6 @@ # Itertools for functions creating iterators for efficient looping import itertools -# Datetime to handle datetime objects inside Series -from datetime import datetime - # Optional for explicit type hint from typing import Optional @@ -25,9 +22,7 @@ import pandas as pd -def count_workdays( - from_dates: pd.Series[datetime], to_dates: pd.Series[datetime] -) -> pd.Series[int]: +def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[int]: """Counts the number of workdays between pairs of dates in given series. This function calculates the number of workdays for each pair of start and end dates @@ -100,7 +95,7 @@ def count_workdays( "Unexpected error: length of workdays_list does not match the number of date pairs." ) - return pd.Series(workdays_list, dtype=int) + return pd.Series(workdays_list, dtype="Int64") def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[int, int]: @@ -371,9 +366,7 @@ def proc_sums( return sum_df -def ref_day( - from_dates: pd.Series[datetime], to_dates: pd.Series[datetime] -) -> pd.Series[bool]: +def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[bool]: """Determines if the reference day falls between given date ranges. This function checks if the 16th day of each month (reference day) is @@ -432,12 +425,10 @@ def ref_day( result = np.logical_and(from_dates <= ref_days, ref_days <= to_dates) # Return the result as an array of boolean values - return pd.Series(result, dtype=bool) + return pd.Series(result, dtype="boolean") -def ref_week( - from_dates: pd.Series[datetime], to_dates: pd.Series[datetime] -) -> pd.Series[bool]: +def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[bool]: """Determines if any date in each date range falls in the reference week. This function checks if any date between the 'from_dates' and 'to_dates' @@ -493,4 +484,4 @@ def ref_week( result = np.logical_and(from_weeks <= ref_weeks, ref_weeks <= to_weeks) # Return the result as a series of boolean values - return pd.Series(result, dtype=bool) + return pd.Series(result, dtype="boolean") diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 7682785..54d79e8 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -63,7 +63,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: results = [f"{key} {value}" for key, value in groups.items()] # Apply the selected format to the series - return pd.Series(np.select(conditions, results, default="."), dtype=str) + return pd.Series(np.select(conditions, results, default="."), dtype="string") def nace_sn07_47grp( @@ -210,7 +210,7 @@ def nace_sn07_47grp( else: combined_labels = [f"{key} {value}" for key, value in groups.items()] results = np.select(conditions, combined_labels, default="99 Uoppgitt") - return pd.Series(results, dtype=str) + return pd.Series(results, dtype="string") def nace_sn07_17grp( @@ -237,7 +237,7 @@ def nace_sn07_17grp( print( f"Warning: There are {n_unique_grp} unique industry divisions on 2-number level. The function first groups the input into the 47 groups standard." ) - nace_str2 = nace_sn07_47grp(nace_sn07, labels=False) + nace_str2 = nace_sn07_47grp(nace_sn07, display="number") # Define the conditions for each group conditions = [ @@ -307,7 +307,7 @@ def nace_sn07_17grp( else: combined_labels = [f"{key} {value}" for key, value in groups.items()] results = np.select(conditions, combined_labels, default="999 Uoppgitt") - return pd.Series(results, dtype=str) + return pd.Series(results, dtype="string") def sektor2_grp( @@ -349,7 +349,7 @@ def sektor2_grp( else: combined_labels = [f"{key} {value}" for key, value in groups.items()] results = np.select(conditions, combined_labels, default="999 Uoppgitt") - return pd.Series(results, dtype=str) + return pd.Series(results, dtype="string") def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[str]: @@ -395,4 +395,4 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ else: combined_labels = [f"{key} {value}" for key, value in groups.items()] results = np.select(conditions, combined_labels, default="99 Uoppgitt") - return pd.Series(results, dtype=str) + return pd.Series(results, dtype="string") From c75896a93b2e01c749782afa63d8a5ce5fca29cb Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 10:28:22 +0100 Subject: [PATCH 12/49] Changed to list comprehension --- src/ssb_arbmark_fagfunksjoner/groups.py | 56 +++++++++++++++---------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 54d79e8..cc92892 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -56,9 +56,9 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: # Determine the format of the results based on the display parameter if display == "label": - results = list(groups.values()) + results = [str(value) for value in groups.values()] elif display == "number": - results = list(groups.keys()) + results = [str(key) for key in groups.keys()] else: results = [f"{key} {value}" for key, value in groups.items()] @@ -204,13 +204,16 @@ def nace_sn07_47grp( # Determine and apply the selected format based on the labels parameter if display == "label": - results = np.select(conditions, list(groups.values()), default="Uoppgitt") + results = [str(value) for value in groups.values()] + default_code = "Uoppgitt" elif display == "number": - results = np.select(conditions, list(groups.keys()), default="99") + results = [str(key) for key in groups.keys()] + default_code = "99" else: - combined_labels = [f"{key} {value}" for key, value in groups.items()] - results = np.select(conditions, combined_labels, default="99 Uoppgitt") - return pd.Series(results, dtype="string") + results = [f"{key} {value}" for key, value in groups.items()] + default_code = "99 Uoppgitt" + grouped = np.select(conditions, results, default=default_code) + return pd.Series(grouped, dtype="string") def nace_sn07_17grp( @@ -301,13 +304,16 @@ def nace_sn07_17grp( # Determine and apply the selected format based on the labels parameter if display == "label": - results = np.select(conditions, list(groups.values()), default="Uoppgitt") + results = [str(value) for value in groups.values()] + default_code = "Uoppgitt" elif display == "number": - results = np.select(conditions, list(groups.keys()), default="999") + results = [str(key) for key in groups.keys()] + default_code = "999" else: - combined_labels = [f"{key} {value}" for key, value in groups.items()] - results = np.select(conditions, combined_labels, default="999 Uoppgitt") - return pd.Series(results, dtype="string") + results = [f"{key} {value}" for key, value in groups.items()] + default_code = "999 Uoppgitt" + grouped = np.select(conditions, results, default=default_code) + return pd.Series(grouped, dtype="string") def sektor2_grp( @@ -343,13 +349,16 @@ def sektor2_grp( # Determine and apply the selected format based on the labels parameter if display == "label": - results = np.select(conditions, list(groups.values()), default="Uoppgitt") + results = [str(value) for value in groups.values()] + default_code = "Uoppgitt" elif display == "number": - results = np.select(conditions, list(groups.keys()), default="999") + results = [str(key) for key in groups.keys()] + default_code = "999" else: - combined_labels = [f"{key} {value}" for key, value in groups.items()] - results = np.select(conditions, combined_labels, default="999 Uoppgitt") - return pd.Series(results, dtype="string") + results = [f"{key} {value}" for key, value in groups.items()] + default_code = "999 Uoppgitt" + grouped = np.select(conditions, results, default=default_code) + return pd.Series(grouped, dtype="string") def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[str]: @@ -389,10 +398,13 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ # Determine and apply the selected format based on the labels parameter if display == "label": - results = np.select(conditions, list(groups.values()), default="Uoppgitt") + results = [str(value) for value in groups.values()] + default_code = "Uoppgitt" elif display == "number": - results = np.select(conditions, list(groups.keys()), default="99") + results = [str(key) for key in groups.keys()] + default_code = "99" else: - combined_labels = [f"{key} {value}" for key, value in groups.items()] - results = np.select(conditions, combined_labels, default="99 Uoppgitt") - return pd.Series(results, dtype="string") + results = [f"{key} {value}" for key, value in groups.items()] + default_code = "99 Uoppgitt" + grouped = np.select(conditions, results, default=default_code) + return pd.Series(grouped, dtype="string") From e8bea069b27989a74b2e5c597c1aac53ab0d14ec Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 11:15:32 +0100 Subject: [PATCH 13/49] Added type hints for conditions --- src/ssb_arbmark_fagfunksjoner/groups.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index cc92892..aba6426 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -19,7 +19,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: pd.Series: A pandas Series where the original person ages are replaced by group labels, keys, or a combination. """ # Define the conditions for each group - conditions = [ + conditions: list[np.ndarray] = [ np.logical_and(alder >= 16, alder <= 19), # 16-19 år np.logical_and(alder >= 20, alder <= 24), # 20-24 år np.logical_and(alder >= 25, alder <= 29), # 25-29 år @@ -87,7 +87,7 @@ def nace_sn07_47grp( nace3 = pd.Series(nace_sn07.str[:3], name="nace3") # Define the conditions for each group - conditions = [ + conditions: list[np.ndarray] = [ np.isin( nace2, ["01", "02", "03"] ), # Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass @@ -243,7 +243,7 @@ def nace_sn07_17grp( nace_str2 = nace_sn07_47grp(nace_sn07, display="number") # Define the conditions for each group - conditions = [ + conditions: list[np.ndarray] = [ nace_str2 == "01", # 01-03 Jordbruk, skogbruk og fiske np.logical_and( nace_str2 >= "01", nace_str2 <= "03" @@ -331,7 +331,7 @@ def sektor2_grp( pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. """ # Define the conditions for each group - conditions = [ + conditions: list[np.ndarray] = [ sektor == "6100", np.logical_and(sektor == "6500", undersektor != "007"), np.logical_and(sektor == "6500", undersektor == "007"), @@ -373,7 +373,7 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. """ # Define the conditions for each group - conditions = [ + conditions: list[np.ndarray] = [ ansatte == 0, # No employees np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees From b891d9e9e82db545511313fdb38855cf9d3a09fc Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 11:53:13 +0100 Subject: [PATCH 14/49] Type annotation for conditions --- src/ssb_arbmark_fagfunksjoner/groups.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index aba6426..20fdc0e 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -1,5 +1,7 @@ """A collection of useful groups.""" +# List for appropriate type annotation + # Numpy for data wrangling import numpy as np @@ -19,7 +21,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: pd.Series: A pandas Series where the original person ages are replaced by group labels, keys, or a combination. """ # Define the conditions for each group - conditions: list[np.ndarray] = [ + conditions: list[np.ndarray[np.bool_]] = [ np.logical_and(alder >= 16, alder <= 19), # 16-19 år np.logical_and(alder >= 20, alder <= 24), # 20-24 år np.logical_and(alder >= 25, alder <= 29), # 25-29 år @@ -87,7 +89,7 @@ def nace_sn07_47grp( nace3 = pd.Series(nace_sn07.str[:3], name="nace3") # Define the conditions for each group - conditions: list[np.ndarray] = [ + conditions: list[np.ndarray[np.bool_]] = [ np.isin( nace2, ["01", "02", "03"] ), # Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass @@ -243,7 +245,7 @@ def nace_sn07_17grp( nace_str2 = nace_sn07_47grp(nace_sn07, display="number") # Define the conditions for each group - conditions: list[np.ndarray] = [ + conditions: list[np.ndarray[np.bool_]] = [ nace_str2 == "01", # 01-03 Jordbruk, skogbruk og fiske np.logical_and( nace_str2 >= "01", nace_str2 <= "03" @@ -331,7 +333,7 @@ def sektor2_grp( pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. """ # Define the conditions for each group - conditions: list[np.ndarray] = [ + conditions: list[np.ndarray[np.bool_]] = [ sektor == "6100", np.logical_and(sektor == "6500", undersektor != "007"), np.logical_and(sektor == "6500", undersektor == "007"), @@ -373,7 +375,7 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. """ # Define the conditions for each group - conditions: list[np.ndarray] = [ + conditions: list[np.ndarray[np.bool_]] = [ ansatte == 0, # No employees np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees From 25536471f30da0f510672fa4fc1064a73c3423c8 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 12:19:04 +0100 Subject: [PATCH 15/49] more explicitly specified numpy format --- src/ssb_arbmark_fagfunksjoner/groups.py | 102 ++++++++++++------------ 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 20fdc0e..265e607 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -1,7 +1,5 @@ """A collection of useful groups.""" -# List for appropriate type annotation - # Numpy for data wrangling import numpy as np @@ -21,7 +19,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: pd.Series: A pandas Series where the original person ages are replaced by group labels, keys, or a combination. """ # Define the conditions for each group - conditions: list[np.ndarray[np.bool_]] = [ + conditions = [ np.logical_and(alder >= 16, alder <= 19), # 16-19 år np.logical_and(alder >= 20, alder <= 24), # 20-24 år np.logical_and(alder >= 25, alder <= 29), # 25-29 år @@ -33,9 +31,9 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: np.logical_and(alder >= 55, alder <= 59), # 55-59 år np.logical_and(alder >= 60, alder <= 64), # 60-64 år np.logical_or(alder == 65, alder == 66), # 65-66 år - alder == 67, # 67 år - alder == 68, # 68 år - alder == 69, # 69 år + np.ndarray(alder == 67), # 67 år + np.ndarray(alder == 68), # 68 år + np.ndarray(alder == 69), # 69 år ] # Define the group labels with string keys @@ -65,7 +63,9 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: results = [f"{key} {value}" for key, value in groups.items()] # Apply the selected format to the series - return pd.Series(np.select(conditions, results, default="."), dtype="string") + return pd.Series( + np.select(conditions, np.ndarray(results), default="."), dtype="string" + ) def nace_sn07_47grp( @@ -89,7 +89,7 @@ def nace_sn07_47grp( nace3 = pd.Series(nace_sn07.str[:3], name="nace3") # Define the conditions for each group - conditions: list[np.ndarray[np.bool_]] = [ + conditions = [ np.isin( nace2, ["01", "02", "03"] ), # Jordbruk, skogbruk, fiske; Bergverksdrift og utvinning, utenom olje og gass @@ -102,55 +102,57 @@ def nace_sn07_47grp( np.isin(nace2, ["10", "11", "12"]), # Næringsmiddel-,drikkev.,tobakkind. np.isin(nace2, ["13", "14", "15"]), # Tekstil-,bekledn.-,lærvareind. np.isin(nace2, ["16", "17"]), # Trelast- og trevareind. - nace2 == "18", # Trykking, grafisk industri + np.ndarray(nace2 == "18"), # Trykking, grafisk industri np.isin( nace2, ["19", "20", "21"] ), # Petrolieum, kull, kjemisk og farmasøytisk industri np.isin(nace2, ["22", "23"]), # Gummivare-, plast-,mineralproduktind. - nace2 == "24", # Metallindustri - nace2 == "25", # Metallvareindustri + np.ndarray(nace2 == "24"), # Metallindustri + np.ndarray(nace2 == "25"), # Metallvareindustri np.isin(nace2, ["26", "27"]), # Data- og elektronisk industri nace2 == "28", # Maskinindustri np.logical_or( np.isin(nace2, ["29", "33"]), np.logical_and(nace3 >= "302", nace3 <= "309") ), # Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler - nace3 == "301", # Produksjon av skip og båter, inkl. oljeplattformer + np.ndarray( + nace3 == "301" + ), # Produksjon av skip og båter, inkl. oljeplattformer np.isin(nace2, ["31", "32"]), # Møbel og annen industri - nace2 == "35", # Elekstrisitet, gass, damp, varmtvann + np.ndarray([nace2 == "35"]), # Elekstrisitet, gass, damp, varmtvann np.logical_and(nace2 >= "36", nace2 <= "39"), # Vann, avløp og renovasjon np.isin(nace2, ["41", "42", "43"]), # Bygge- og anleggsvirksomhet - nace2 == "45", # Motorvognrep og -handel - nace2 == "46", # Agentur- og engroshandel - nace2 == "47", # Detaljhandel, unntatt motorvogner - nace2 == "49", # Landtransport og rørtransport - nace2 == "50", # Sjøfart - nace2 == "51", # Lufttransport - nace2 == "52", # Lagring og tjenester tilknyttet transport - nace2 == "53", # Posttjenester - nace2 == "55", # Overnattingsvirksomhet - nace2 == "56", # Serveringsvirksomhet + np.ndarray(nace2 == "45"), # Motorvognrep og -handel + np.ndarray(nace2 == "46"), # Agentur- og engroshandel + np.ndarray(nace2 == "47"), # Detaljhandel, unntatt motorvogner + np.ndarray(nace2 == "49"), # Landtransport og rørtransport + np.ndarray(nace2 == "50"), # Sjøfart + np.ndarray(nace2 == "51"), # Lufttransport + np.ndarray(nace2 == "52"), # Lagring og tjenester tilknyttet transport + np.ndarray(nace2 == "53"), # Posttjenester + np.ndarray(nace2 == "55"), # Overnattingsvirksomhet + np.ndarray(nace2 == "56"), # Serveringsvirksomhet np.isin(nace2, ["58", "59", "60"]), # Forlag, film-, TV-pr, kringkasting np.isin(nace2, ["61", "62", "63"]), # IKT-virksomhet - nace2 == "64", # Finansieringsvirksomhet (bank, m.m.) - nace2 == "65", # Forsikringsvirksomhet og pensjonskasser - nace2 == "66", # Finansiell tjenesteyting - nace2 == "68", # Omsetning og drift av fast eiendom + np.ndarray(nace2 == "64"), # Finansieringsvirksomhet (bank, m.m.) + np.ndarray(nace2 == "65"), # Forsikringsvirksomhet og pensjonskasser + np.ndarray(nace2 == "66"), # Finansiell tjenesteyting + np.ndarray(nace2 == "68"), # Omsetning og drift av fast eiendom np.isin(nace2, ["69", "70", "71"]), # Juridisk-, hovedkontor-, konsulentj. - nace2 == "72", # Forskning og utviklingsarbeid + np.ndarray(nace2 == "72"), # Forskning og utviklingsarbeid np.isin( nace2, ["73", "74", "75"] ), # Faglig, vitenskapelig og teknisk tjenesteyting ellers np.logical_and( nace2 >= "77", nace2 <= "82" ), # Forretningsmessig tjenesteyting ellers - nace2 == "84", # Off.adm., forsvar, sosialforsikring - nace2 == "85", # Undervining - nace2 == "86", # Helsetjenester + np.ndarray(nace2 == "84"), # Off.adm., forsvar, sosialforsikring + np.ndarray(nace2 == "85"), # Undervining + np.ndarray(nace2 == "86"), # Helsetjenester np.isin(nace2, ["87", "88"]), # Pleie og omsorg; Fritids- og sportsaktiviteter np.logical_and(nace2 >= "90", nace2 <= "93"), # Kultur, underholdning og fritid np.isin(nace2, ["94", "95", "96"]), # Annen tjenesteyting - nace2 == "97", # Lønnet husarbeid i private husholdninger - nace2 == "99", # Internasjonale organisasjoner + np.ndarray(nace2 == "97"), # Lønnet husarbeid i private husholdninger + np.ndarray(nace2 == "99"), # Internasjonale organisasjoner ] # Define the group labels with string keys @@ -214,7 +216,7 @@ def nace_sn07_47grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "99 Uoppgitt" - grouped = np.select(conditions, results, default=default_code) + grouped = np.select(conditions, results.to_numpy(), default=default_code) return pd.Series(grouped, dtype="string") @@ -245,8 +247,8 @@ def nace_sn07_17grp( nace_str2 = nace_sn07_47grp(nace_sn07, display="number") # Define the conditions for each group - conditions: list[np.ndarray[np.bool_]] = [ - nace_str2 == "01", # 01-03 Jordbruk, skogbruk og fiske + conditions = [ + np.ndarray(nace_str2 == "01"), # 01-03 Jordbruk, skogbruk og fiske np.logical_and( nace_str2 >= "01", nace_str2 <= "03" ), # 05-09 Bergverksdrift og utvinning @@ -254,7 +256,7 @@ def nace_sn07_17grp( np.logical_and( nace_str2 >= "17", nace_str2 <= "18" ), # 35-39 Elektrisitet, vann og renovasjon - nace_str2 == "19", # 41-43 Bygge- og anleggsvirksomhet + np.ndarray(nace_str2 == "19"), # 41-43 Bygge- og anleggsvirksomhet np.logical_and( nace_str2 >= "20", nace_str2 <= "22" ), # 45-47 Varehandel, reparasjon av motorvogner @@ -273,9 +275,9 @@ def nace_sn07_17grp( np.logical_and( nace_str2 >= "35", nace_str2 <= "38" ), # 68-75 Teknisk tjenesteyting, eiendomsdrift - nace_str2 == "39", # 77-82 Forretningsmessig tjenesteyting - nace_str2 == "40", # 84 Off.adm., forsvar, sosialforsikring - nace_str2 == "41", # 85 Undervisning + np.ndarray(nace_str2 == "39"), # 77-82 Forretningsmessig tjenesteyting + np.ndarray(nace_str2 == "40"), # 84 Off.adm., forsvar, sosialforsikring + np.ndarray(nace_str2 == "41"), # 85 Undervisning np.logical_and( nace_str2 >= "42", nace_str2 <= "43" ), # 86-88 Helse- og sosialtjenester @@ -314,7 +316,7 @@ def nace_sn07_17grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "999 Uoppgitt" - grouped = np.select(conditions, results, default=default_code) + grouped = np.select(conditions, results.to_numpy(), default=default_code) return pd.Series(grouped, dtype="string") @@ -333,12 +335,12 @@ def sektor2_grp( pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. """ # Define the conditions for each group - conditions: list[np.ndarray[np.bool_]] = [ - sektor == "6100", + conditions = [ + np.ndarray(sektor == "6100"), np.logical_and(sektor == "6500", undersektor != "007"), np.logical_and(sektor == "6500", undersektor == "007"), - sektor == "1510", - sektor == "1520", + np.ndarray(sektor == "1510"), + np.ndarray(sektor == "1520"), ] groups = { @@ -359,7 +361,7 @@ def sektor2_grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "999 Uoppgitt" - grouped = np.select(conditions, results, default=default_code) + grouped = np.select(conditions, results.to_numpy(), default=default_code) return pd.Series(grouped, dtype="string") @@ -375,15 +377,15 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. """ # Define the conditions for each group - conditions: list[np.ndarray[np.bool_]] = [ - ansatte == 0, # No employees + conditions = [ + np.ndarray(ansatte == 0), # No employees np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees np.logical_and(ansatte >= 10, ansatte <= 19), # 10-19 employees np.logical_and(ansatte >= 20, ansatte <= 49), # 20-49 employees np.logical_and(ansatte >= 50, ansatte <= 99), # 50-99 employees np.logical_and(ansatte >= 100, ansatte <= 249), # 100-249 employees - ansatte >= 250, # 250 employees or more + np.ndarray(ansatte >= 250), # 250 employees or more ] # Define the group labels with string keys @@ -408,5 +410,5 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "99 Uoppgitt" - grouped = np.select(conditions, results, default=default_code) + grouped = np.select(conditions, results.to_numpy(), default=default_code) return pd.Series(grouped, dtype="string") From 00a0c2d3d9db85bdbc09fe907328ca1ba7b2c680 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 12:33:24 +0100 Subject: [PATCH 16/49] to_numpy handles series while np.array handles list --- src/ssb_arbmark_fagfunksjoner/groups.py | 90 ++++++++++++------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 265e607..98f6e37 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -31,9 +31,9 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: np.logical_and(alder >= 55, alder <= 59), # 55-59 år np.logical_and(alder >= 60, alder <= 64), # 60-64 år np.logical_or(alder == 65, alder == 66), # 65-66 år - np.ndarray(alder == 67), # 67 år - np.ndarray(alder == 68), # 68 år - np.ndarray(alder == 69), # 69 år + (alder == 67).to_numpy(), # 67 år + (alder == 68).to_numpy(), # 68 år + (alder == 69).to_numpy(), # 69 år ] # Define the group labels with string keys @@ -64,7 +64,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: # Apply the selected format to the series return pd.Series( - np.select(conditions, np.ndarray(results), default="."), dtype="string" + np.select(conditions, np.array(results), default="."), dtype="string" ) @@ -102,57 +102,57 @@ def nace_sn07_47grp( np.isin(nace2, ["10", "11", "12"]), # Næringsmiddel-,drikkev.,tobakkind. np.isin(nace2, ["13", "14", "15"]), # Tekstil-,bekledn.-,lærvareind. np.isin(nace2, ["16", "17"]), # Trelast- og trevareind. - np.ndarray(nace2 == "18"), # Trykking, grafisk industri + (nace2 == "18").to_numpy(), # Trykking, grafisk industri np.isin( nace2, ["19", "20", "21"] ), # Petrolieum, kull, kjemisk og farmasøytisk industri np.isin(nace2, ["22", "23"]), # Gummivare-, plast-,mineralproduktind. - np.ndarray(nace2 == "24"), # Metallindustri - np.ndarray(nace2 == "25"), # Metallvareindustri + (nace2 == "24").to_numpy(), # Metallindustri + (nace2 == "25").to_numpy(), # Metallvareindustri np.isin(nace2, ["26", "27"]), # Data- og elektronisk industri - nace2 == "28", # Maskinindustri + (nace2 == "28").to_numpy(), # Maskinindustri np.logical_or( np.isin(nace2, ["29", "33"]), np.logical_and(nace3 >= "302", nace3 <= "309") ), # Transportmidelindustri, utenom 30.1; Produksjon av kjøretøy og tilhengere, unntatt motorvogner og motorsykler - np.ndarray( + ( nace3 == "301" - ), # Produksjon av skip og båter, inkl. oljeplattformer + ).to_numpy(), # Produksjon av skip og båter, inkl. oljeplattformer np.isin(nace2, ["31", "32"]), # Møbel og annen industri - np.ndarray([nace2 == "35"]), # Elekstrisitet, gass, damp, varmtvann + (nace2 == "35").to_numpy(), # Elekstrisitet, gass, damp, varmtvann np.logical_and(nace2 >= "36", nace2 <= "39"), # Vann, avløp og renovasjon np.isin(nace2, ["41", "42", "43"]), # Bygge- og anleggsvirksomhet - np.ndarray(nace2 == "45"), # Motorvognrep og -handel - np.ndarray(nace2 == "46"), # Agentur- og engroshandel - np.ndarray(nace2 == "47"), # Detaljhandel, unntatt motorvogner - np.ndarray(nace2 == "49"), # Landtransport og rørtransport - np.ndarray(nace2 == "50"), # Sjøfart - np.ndarray(nace2 == "51"), # Lufttransport - np.ndarray(nace2 == "52"), # Lagring og tjenester tilknyttet transport - np.ndarray(nace2 == "53"), # Posttjenester - np.ndarray(nace2 == "55"), # Overnattingsvirksomhet - np.ndarray(nace2 == "56"), # Serveringsvirksomhet + (nace2 == "45").to_numpy(), # Motorvognrep og -handel + (nace2 == "46").to_numpy(), # Agentur- og engroshandel + (nace2 == "47").to_numpy(), # Detaljhandel, unntatt motorvogner + (nace2 == "49").to_numpy(), # Landtransport og rørtransport + (nace2 == "50").to_numpy(), # Sjøfart + (nace2 == "51").to_numpy(), # Lufttransport + (nace2 == "52").to_numpy(), # Lagring og tjenester tilknyttet transport + (nace2 == "53").to_numpy(), # Posttjenester + (nace2 == "55").to_numpy(), # Overnattingsvirksomhet + (nace2 == "56").to_numpy(), # Serveringsvirksomhet np.isin(nace2, ["58", "59", "60"]), # Forlag, film-, TV-pr, kringkasting np.isin(nace2, ["61", "62", "63"]), # IKT-virksomhet - np.ndarray(nace2 == "64"), # Finansieringsvirksomhet (bank, m.m.) - np.ndarray(nace2 == "65"), # Forsikringsvirksomhet og pensjonskasser - np.ndarray(nace2 == "66"), # Finansiell tjenesteyting - np.ndarray(nace2 == "68"), # Omsetning og drift av fast eiendom + (nace2 == "64").to_numpy(), # Finansieringsvirksomhet (bank, m.m.) + (nace2 == "65").to_numpy(), # Forsikringsvirksomhet og pensjonskasser + (nace2 == "66").to_numpy(), # Finansiell tjenesteyting + (nace2 == "68").to_numpy(), # Omsetning og drift av fast eiendom np.isin(nace2, ["69", "70", "71"]), # Juridisk-, hovedkontor-, konsulentj. - np.ndarray(nace2 == "72"), # Forskning og utviklingsarbeid + (nace2 == "72").to_numpy(), # Forskning og utviklingsarbeid np.isin( nace2, ["73", "74", "75"] ), # Faglig, vitenskapelig og teknisk tjenesteyting ellers np.logical_and( nace2 >= "77", nace2 <= "82" ), # Forretningsmessig tjenesteyting ellers - np.ndarray(nace2 == "84"), # Off.adm., forsvar, sosialforsikring - np.ndarray(nace2 == "85"), # Undervining - np.ndarray(nace2 == "86"), # Helsetjenester + (nace2 == "84").to_numpy(), # Off.adm., forsvar, sosialforsikring + (nace2 == "85").to_numpy(), # Undervining + (nace2 == "86").to_numpy(), # Helsetjenester np.isin(nace2, ["87", "88"]), # Pleie og omsorg; Fritids- og sportsaktiviteter np.logical_and(nace2 >= "90", nace2 <= "93"), # Kultur, underholdning og fritid np.isin(nace2, ["94", "95", "96"]), # Annen tjenesteyting - np.ndarray(nace2 == "97"), # Lønnet husarbeid i private husholdninger - np.ndarray(nace2 == "99"), # Internasjonale organisasjoner + (nace2 == "97").to_numpy(), # Lønnet husarbeid i private husholdninger + (nace2 == "99").to_numpy(), # Internasjonale organisasjoner ] # Define the group labels with string keys @@ -216,7 +216,7 @@ def nace_sn07_47grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "99 Uoppgitt" - grouped = np.select(conditions, results.to_numpy(), default=default_code) + grouped = np.select(conditions, np.array(results), default=default_code) return pd.Series(grouped, dtype="string") @@ -248,7 +248,7 @@ def nace_sn07_17grp( # Define the conditions for each group conditions = [ - np.ndarray(nace_str2 == "01"), # 01-03 Jordbruk, skogbruk og fiske + (nace_str2 == "01").to_numpy(), # 01-03 Jordbruk, skogbruk og fiske np.logical_and( nace_str2 >= "01", nace_str2 <= "03" ), # 05-09 Bergverksdrift og utvinning @@ -256,7 +256,7 @@ def nace_sn07_17grp( np.logical_and( nace_str2 >= "17", nace_str2 <= "18" ), # 35-39 Elektrisitet, vann og renovasjon - np.ndarray(nace_str2 == "19"), # 41-43 Bygge- og anleggsvirksomhet + (nace_str2 == "19").to_numpy(), # 41-43 Bygge- og anleggsvirksomhet np.logical_and( nace_str2 >= "20", nace_str2 <= "22" ), # 45-47 Varehandel, reparasjon av motorvogner @@ -275,9 +275,9 @@ def nace_sn07_17grp( np.logical_and( nace_str2 >= "35", nace_str2 <= "38" ), # 68-75 Teknisk tjenesteyting, eiendomsdrift - np.ndarray(nace_str2 == "39"), # 77-82 Forretningsmessig tjenesteyting - np.ndarray(nace_str2 == "40"), # 84 Off.adm., forsvar, sosialforsikring - np.ndarray(nace_str2 == "41"), # 85 Undervisning + (nace_str2 == "39").to_numpy(), # 77-82 Forretningsmessig tjenesteyting + (nace_str2 == "40").to_numpy(), # 84 Off.adm., forsvar, sosialforsikring + (nace_str2 == "41").to_numpy(), # 85 Undervisning np.logical_and( nace_str2 >= "42", nace_str2 <= "43" ), # 86-88 Helse- og sosialtjenester @@ -316,7 +316,7 @@ def nace_sn07_17grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "999 Uoppgitt" - grouped = np.select(conditions, results.to_numpy(), default=default_code) + grouped = np.select(conditions, np.array(results), default=default_code) return pd.Series(grouped, dtype="string") @@ -336,11 +336,11 @@ def sektor2_grp( """ # Define the conditions for each group conditions = [ - np.ndarray(sektor == "6100"), + (sektor == "6100").to_numpy(), np.logical_and(sektor == "6500", undersektor != "007"), np.logical_and(sektor == "6500", undersektor == "007"), - np.ndarray(sektor == "1510"), - np.ndarray(sektor == "1520"), + (sektor == "1510").to_numpy(), + (sektor == "1520").to_numpy(), ] groups = { @@ -361,7 +361,7 @@ def sektor2_grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "999 Uoppgitt" - grouped = np.select(conditions, results.to_numpy(), default=default_code) + grouped = np.select(conditions, np.array(results), default=default_code) return pd.Series(grouped, dtype="string") @@ -378,14 +378,14 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ """ # Define the conditions for each group conditions = [ - np.ndarray(ansatte == 0), # No employees + (ansatte == 0).to_numpy(), # No employees np.logical_and(ansatte >= 1, ansatte <= 4), # 1-4 employees np.logical_and(ansatte >= 5, ansatte <= 9), # 5-9 employees np.logical_and(ansatte >= 10, ansatte <= 19), # 10-19 employees np.logical_and(ansatte >= 20, ansatte <= 49), # 20-49 employees np.logical_and(ansatte >= 50, ansatte <= 99), # 50-99 employees np.logical_and(ansatte >= 100, ansatte <= 249), # 100-249 employees - np.ndarray(ansatte >= 250), # 250 employees or more + (ansatte >= 250).to_numpy(), # 250 employees or more ] # Define the group labels with string keys @@ -410,5 +410,5 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "99 Uoppgitt" - grouped = np.select(conditions, results.to_numpy(), default=default_code) + grouped = np.select(conditions, np.array(results), default=default_code) return pd.Series(grouped, dtype="string") From dd762f4d4f36f9f6c54fb4d35659a59b61546a9f Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 12:38:12 +0100 Subject: [PATCH 17/49] second arg in select is list not array --- src/ssb_arbmark_fagfunksjoner/groups.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 98f6e37..8de7677 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -63,9 +63,7 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: results = [f"{key} {value}" for key, value in groups.items()] # Apply the selected format to the series - return pd.Series( - np.select(conditions, np.array(results), default="."), dtype="string" - ) + return pd.Series(np.select(conditions, results, default="."), dtype="string") def nace_sn07_47grp( @@ -216,7 +214,7 @@ def nace_sn07_47grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "99 Uoppgitt" - grouped = np.select(conditions, np.array(results), default=default_code) + grouped = np.select(conditions, results, default=default_code) return pd.Series(grouped, dtype="string") @@ -316,7 +314,7 @@ def nace_sn07_17grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "999 Uoppgitt" - grouped = np.select(conditions, np.array(results), default=default_code) + grouped = np.select(conditions, results, default=default_code) return pd.Series(grouped, dtype="string") @@ -361,7 +359,7 @@ def sektor2_grp( else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "999 Uoppgitt" - grouped = np.select(conditions, np.array(results), default=default_code) + grouped = np.select(conditions, results, default=default_code) return pd.Series(grouped, dtype="string") @@ -410,5 +408,5 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ else: results = [f"{key} {value}" for key, value in groups.items()] default_code = "99 Uoppgitt" - grouped = np.select(conditions, np.array(results), default=default_code) + grouped = np.select(conditions, results, default=default_code) return pd.Series(grouped, dtype="string") From d600e03cfcea78b0bff00c7ef2a095386ad6af91 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 12:58:48 +0100 Subject: [PATCH 18/49] test --- src/ssb_arbmark_fagfunksjoner/functions.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index ff81e5b..3bebda6 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -8,6 +8,7 @@ # Itertools for functions creating iterators for efficient looping import itertools +from datetime import datetime # Optional for explicit type hint from typing import Optional @@ -22,7 +23,9 @@ import pandas as pd -def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[int]: +def count_workdays( + from_dates: pd.Series["datetime"], to_dates: pd.Series[datetime] +) -> pd.Series[int]: """Counts the number of workdays between pairs of dates in given series. This function calculates the number of workdays for each pair of start and end dates @@ -366,7 +369,9 @@ def proc_sums( return sum_df -def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[bool]: +def ref_day( + from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] +) -> pd.Series[bool]: """Determines if the reference day falls between given date ranges. This function checks if the 16th day of each month (reference day) is @@ -428,7 +433,9 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[bool]: return pd.Series(result, dtype="boolean") -def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series[bool]: +def ref_week( + from_dates: pd.Series[pd.datetime], to_dates: pd.Series[pd.datetime] +) -> pd.Series[bool]: """Determines if any date in each date range falls in the reference week. This function checks if any date between the 'from_dates' and 'to_dates' From f514f38c0626fc49c57348589abc673373923177 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 13:02:05 +0100 Subject: [PATCH 19/49] timestamp type for series --- src/ssb_arbmark_fagfunksjoner/functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 3bebda6..238966b 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -8,7 +8,6 @@ # Itertools for functions creating iterators for efficient looping import itertools -from datetime import datetime # Optional for explicit type hint from typing import Optional @@ -24,7 +23,7 @@ def count_workdays( - from_dates: pd.Series["datetime"], to_dates: pd.Series[datetime] + from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] ) -> pd.Series[int]: """Counts the number of workdays between pairs of dates in given series. @@ -434,7 +433,7 @@ def ref_day( def ref_week( - from_dates: pd.Series[pd.datetime], to_dates: pd.Series[pd.datetime] + from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] ) -> pd.Series[bool]: """Determines if any date in each date range falls in the reference week. From 4054b5b19f651047c6e4b160270d806fc033e7c5 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 13:14:57 +0100 Subject: [PATCH 20/49] dates and numpy --- src/ssb_arbmark_fagfunksjoner/functions.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 238966b..37c9875 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -47,12 +47,12 @@ def count_workdays( - Holidays are determined based on the Norwegian calendar for each year in the date range. """ # Convert the from_dates and to_dates columns to numpy arrays - from_dates = from_dates.values - to_dates = to_dates.values + from_dates_np = from_dates.to_numpy() + to_dates_np = to_dates.to_numpy() # Extract the year from the from_dates and to_dates arrays - from_years = from_dates.astype("datetime64[Y]").astype(int) + 1970 - to_years = to_dates.astype("datetime64[Y]").astype(int) + 1970 + from_years = from_dates_np.astype("datetime64[Y]").astype(int) + 1970 + to_years = to_dates_np.astype("datetime64[Y]").astype(int) + 1970 # Find the max and min years min_year = np.min(from_years) @@ -67,12 +67,12 @@ def count_workdays( holiday_dates = np.array(sorted(norwegian_holidays.keys()), dtype="datetime64[D]") # Convert from_dates and to_dates to datetime64 arrays - from_dates = from_dates.astype("datetime64[D]") - to_dates = to_dates.astype("datetime64[D]") + from_dates_d = from_dates_np.astype("datetime64[D]") + to_dates_d = to_dates_np.astype("datetime64[D]") # Find the max and min dates - min_date = np.min(from_dates) - max_date = np.max(to_dates) + min_date = np.min(from_dates_d) + max_date = np.max(to_dates_d) # Generate a range of dates between the min and max dates dates = np.arange( @@ -87,7 +87,7 @@ def count_workdays( # Calculate the number of workdays for each from and to date pair workdays_list = [] - for from_date, to_date in zip(from_dates, to_dates): + for from_date, to_date in zip(from_dates_d, to_dates_d): workdays_in_range = workdays[(workdays >= from_date) & (workdays <= to_date)] workdays_list.append(len(workdays_in_range)) From 0d61eabc35bcc84a31441b5e40a3e5cb82393c02 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 13:27:02 +0100 Subject: [PATCH 21/49] Tried importing module that was not recognised --- src/ssb_arbmark_fagfunksjoner/functions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 37c9875..53c09a0 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -12,15 +12,15 @@ # Optional for explicit type hint from typing import Optional -# Holidays to calculate the number of holidays -import holidays - # Numpy for data wrangling import numpy as np # Pandas for table management import pandas as pd +# Holidays to calculate the number of holidays +from holidays import Norway + def count_workdays( from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] @@ -59,9 +59,9 @@ def count_workdays( max_year = np.max(to_years) if min_year == max_year: - norwegian_holidays = holidays.Norway(years=min_year) + norwegian_holidays = Norway(years=min_year) else: - norwegian_holidays = holidays.Norway(years=range(min_year, max_year + 1)) + norwegian_holidays = Norway(years=range(min_year, max_year + 1)) # Convert the holiday dates to a numpy array of datetime64 objects holiday_dates = np.array(sorted(norwegian_holidays.keys()), dtype="datetime64[D]") From e34adc7cb6795b09e80f4dd09737860e4ccd4048 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 13:34:55 +0100 Subject: [PATCH 22/49] Changed to country code --- src/ssb_arbmark_fagfunksjoner/functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 53c09a0..ba616e8 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -18,8 +18,8 @@ # Pandas for table management import pandas as pd -# Holidays to calculate the number of holidays -from holidays import Norway +# Holidays in Norway +from holidays import NO def count_workdays( @@ -59,9 +59,9 @@ def count_workdays( max_year = np.max(to_years) if min_year == max_year: - norwegian_holidays = Norway(years=min_year) + norwegian_holidays = NO(years=min_year) else: - norwegian_holidays = Norway(years=range(min_year, max_year + 1)) + norwegian_holidays = NO(years=range(min_year, max_year + 1)) # Convert the holiday dates to a numpy array of datetime64 objects holiday_dates = np.array(sorted(norwegian_holidays.keys()), dtype="datetime64[D]") From 8b53f4cfa075233a26be53051eb9820663e7eec0 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 13:42:47 +0100 Subject: [PATCH 23/49] Fix tuple in first_last_date function --- src/ssb_arbmark_fagfunksjoner/functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index ba616e8..8f6ca54 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -100,7 +100,7 @@ def count_workdays( return pd.Series(workdays_list, dtype="Int64") -def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[int, int]: +def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[str, str]: """Given a year and a quarter, this function calculates the first and last dates of the specified quarter using pandas. Args: @@ -128,7 +128,7 @@ def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[int, int]: start_date_str = start_date.strftime("%Y-%m-%d") end_date_str = end_date.strftime("%Y-%m-%d") - return tuple(start_date_str, end_date_str) + return start_date_str, end_date_str def indicate_merge( From 892c6ddd5634cbbac88158ab3b024d081babd1e4 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 13:56:14 +0100 Subject: [PATCH 24/49] fix indicate merge function --- src/ssb_arbmark_fagfunksjoner/functions.py | 30 +++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 8f6ca54..7d9cef2 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -132,7 +132,7 @@ def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[str, str]: def indicate_merge( - left: pd.DataFrame(), right: pd.DataFrame(), how: str, on: list + left: pd.DataFrame, right: pd.DataFrame, how: str, on: list[str] ) -> pd.DataFrame: """Perform a merge of two DataFrames and prints a frequency table indicating the merge type for each row. @@ -193,14 +193,26 @@ def indicate_merge( # Define the conditions and choices for np.select conditions = [ - (np_merge == "left_only") & ~duplicated_from_left, - (np_merge == "right_only") & ~duplicated_from_right, - (np_merge == "left_only") & duplicated_from_left, - (np_merge == "right_only") & duplicated_from_right, - (np_merge == "both") & ~duplicated_from_left & ~duplicated_from_right, - (np_merge == "both") & duplicated_from_left & ~duplicated_from_right, - (np_merge == "both") & ~duplicated_from_left & duplicated_from_right, - (np_merge == "both") & duplicated_from_right & duplicated_from_left, + np.logical_and(np_merge == "left_only", ~duplicated_from_left), + np.logical_and(np_merge == "right_only", ~duplicated_from_right), + np.logical_and(np_merge == "left_only", duplicated_from_left), + np.logical_and(np_merge == "right_only", duplicated_from_right), + np.logical_and( + np_merge == "both", + np.logical_and(~duplicated_from_left, ~duplicated_from_right), + ), + np.logical_and( + np_merge == "both", + np.logical_and(duplicated_from_left, ~duplicated_from_right), + ), + np.logical_and( + np_merge == "both", + np.logical_and(~duplicated_from_left, duplicated_from_right), + ), + np.logical_and( + np_merge == "both", + np.logical_and(duplicated_from_right, duplicated_from_left), + ), ] choices = [ From 15f49ec08b8028b8b906f7061b3d9d510214fd28 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 14:12:50 +0100 Subject: [PATCH 25/49] Added Literal and Union --- src/ssb_arbmark_fagfunksjoner/functions.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 7d9cef2..a49c7d3 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -9,8 +9,10 @@ # Itertools for functions creating iterators for efficient looping import itertools -# Optional for explicit type hint +# Literal, Optional, Union and List for type hints +from typing import Literal from typing import Optional +from typing import Union # Numpy for data wrangling import numpy as np @@ -132,7 +134,10 @@ def first_last_date_quarter(year_str: str, quarter_str: str) -> tuple[str, str]: def indicate_merge( - left: pd.DataFrame, right: pd.DataFrame, how: str, on: list[str] + left: pd.DataFrame, + right: pd.DataFrame, + how: Literal["left", "right", "outer", "inner", "cross"], + on: Union[str, list[str]], ) -> pd.DataFrame: """Perform a merge of two DataFrames and prints a frequency table indicating the merge type for each row. From 319126de51d89573c1ea23a9a405349394932295 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 14:35:09 +0100 Subject: [PATCH 26/49] type hints to proc_sums --- src/ssb_arbmark_fagfunksjoner/functions.py | 23 ++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index a49c7d3..75c76cd 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -6,23 +6,24 @@ """ +# Holidays in Norway # Itertools for functions creating iterators for efficient looping import itertools -# Literal, Optional, Union and List for type hints +# Type hints +from typing import Callable from typing import Literal from typing import Optional from typing import Union +import holidays + # Numpy for data wrangling import numpy as np # Pandas for table management import pandas as pd -# Holidays in Norway -from holidays import NO - def count_workdays( from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] @@ -61,9 +62,11 @@ def count_workdays( max_year = np.max(to_years) if min_year == max_year: - norwegian_holidays = NO(years=min_year) + norwegian_holidays = holidays.country_holidays("NO", years=min_year) else: - norwegian_holidays = NO(years=range(min_year, max_year + 1)) + norwegian_holidays = holidays.country_holidays( + "NO", years=range(min_year, max_year + 1) + ) # Convert the holiday dates to a numpy array of datetime64 objects holiday_dates = np.array(sorted(norwegian_holidays.keys()), dtype="datetime64[D]") @@ -246,7 +249,7 @@ def indicate_merge( return merged_df -def kv_intervall(start_p, slutt_p) -> list: +def kv_intervall(start_p, slutt_p) -> list[str]: """This function generates a list of quarterly periods between two given periods. The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year @@ -301,7 +304,7 @@ def proc_sums( df: pd.DataFrame, groups: list[str], values: list[str], - agg_func: Optional[dict] = None, + agg_func: Optional[dict[str, Callable]] = None, ) -> pd.DataFrame: """Compute aggregations for all combinations of columns and return a new DataFrame with these aggregations. @@ -312,7 +315,7 @@ def proc_sums( List of columns to be considered for groupings. values : list[str] List of columns on which the aggregation functions will be applied. - agg_func : Optional[dict], default None + agg_func : Optional[Dict[str, Callable]], default None Dictionary mapping columns to aggregation functions corresponding to the 'values' list. If None, defaults to 'sum' for all columns in 'values'. @@ -353,7 +356,7 @@ def proc_sums( raise ValueError( f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!" ) - else: + elif agg_func is not None: # Correct a format causing error in agg-function for col, funcs in agg_func.items(): if isinstance(funcs, list) and len(funcs) == 1: From faa4dd78ae78d689b79aef9bae2cb5c63b9db1d0 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 14:39:58 +0100 Subject: [PATCH 27/49] added type to kv_intervall --- src/ssb_arbmark_fagfunksjoner/functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 75c76cd..0ee1b2c 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -6,7 +6,6 @@ """ -# Holidays in Norway # Itertools for functions creating iterators for efficient looping import itertools @@ -16,6 +15,7 @@ from typing import Optional from typing import Union +# Holidays in Norway import holidays # Numpy for data wrangling @@ -249,7 +249,7 @@ def indicate_merge( return merged_df -def kv_intervall(start_p, slutt_p) -> list[str]: +def kv_intervall(start_p: str, slutt_p: str) -> list[str]: """This function generates a list of quarterly periods between two given periods. The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year From fcd4f1dd3e5c9db5112bcaf6307e7f3430abc0a7 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 15:02:17 +0100 Subject: [PATCH 28/49] changes to proc_sums --- src/ssb_arbmark_fagfunksjoner/functions.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 0ee1b2c..943b90e 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -350,13 +350,14 @@ def proc_sums( df = df[required_columns].copy() # Default aggregation: 'sum' for all 'values' columns. - if agg_func is None and not non_numeric_cols: - agg_func = {col: "sum" for col in values} - elif agg_func is None and non_numeric_cols: - raise ValueError( - f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!" - ) - elif agg_func is not None: + if agg_func is None: + if not non_numeric_cols: + agg_func = {col: np.sum for col in values} + else: + raise ValueError( + f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!" + ) + else: # Correct a format causing error in agg-function for col, funcs in agg_func.items(): if isinstance(funcs, list) and len(funcs) == 1: From e383329cb089eef432bead080795a4df6334b09d Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 17:47:49 +0100 Subject: [PATCH 29/49] added typing Any --- src/ssb_arbmark_fagfunksjoner/functions.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 943b90e..92fe5fc 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -10,6 +10,7 @@ import itertools # Type hints +from typing import Any from typing import Callable from typing import Literal from typing import Optional @@ -304,7 +305,7 @@ def proc_sums( df: pd.DataFrame, groups: list[str], values: list[str], - agg_func: Optional[dict[str, Callable]] = None, + agg_func: Optional[dict[str, Callable[..., Any]]] = None, ) -> pd.DataFrame: """Compute aggregations for all combinations of columns and return a new DataFrame with these aggregations. @@ -350,18 +351,18 @@ def proc_sums( df = df[required_columns].copy() # Default aggregation: 'sum' for all 'values' columns. - if agg_func is None: + if agg_func is not None: + for col, funcs in list(agg_func.items()): + if isinstance(funcs, list) and len(funcs) == 1: + # Directly assign the single function instead of the list + agg_func[col] = funcs[0] + elif agg_func is None: if not non_numeric_cols: agg_func = {col: np.sum for col in values} else: raise ValueError( f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!" ) - else: - # Correct a format causing error in agg-function - for col, funcs in agg_func.items(): - if isinstance(funcs, list) and len(funcs) == 1: - agg_func[col] = funcs[0] # Initialize empty datframe sum_df = pd.DataFrame() From 21be3a87fdadad5ec4bb0a469c23f39c8f9163d2 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 18:02:05 +0100 Subject: [PATCH 30/49] Reference dates --- src/ssb_arbmark_fagfunksjoner/functions.py | 23 +++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 92fe5fc..cb93eef 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -420,14 +420,14 @@ def ref_day( array([True, True]) """ # Convert the from_dates and to_dates columns to numpy arrays - from_dates = from_dates.values - to_dates = to_dates.values + from_dates_np = from_dates.to_numpy() + to_dates_np = to_dates.to_numpy() # Extract the year from the from_dates array - year = from_dates.astype("datetime64[Y]").astype(int) + 1970 + year = from_dates_np.astype("datetime64[Y]").astype(int) + 1970 # Check if the year is the same in the to_dates array - if not np.all(year == to_dates.astype("datetime64[Y]").astype(int) + 1970): + if not np.all(year == to_dates_np.astype("datetime64[Y]").astype(int) + 1970): # If the year is not the same, raise an error raise ValueError("Function can only be applied to dates in the same year!") @@ -437,10 +437,10 @@ def ref_day( raise ValueError("Function can only be applied to a single year!") # Extract the month from the from_dates array - month = from_dates.astype("datetime64[M]").astype(int) % 12 + 1 + month = from_dates_np.astype("datetime64[M]").astype(int) % 12 + 1 # Check if the month is the same in the to_dates array - if not np.all(month == to_dates.astype("datetime64[M]").astype(int) % 12 + 1): + if not np.all(month == to_dates_np.astype("datetime64[M]").astype(int) % 12 + 1): # If the month is not the same, raise an error raise ValueError("Function can only be applied to dates in the same months!") @@ -448,7 +448,7 @@ def ref_day( ref_days = np.array([f"{year[0]}-{m:02d}-16" for m in month], dtype="datetime64[D]") # Check if the reference day is within the range of the from_date and to_date - result = np.logical_and(from_dates <= ref_days, ref_days <= to_dates) + result = np.logical_and(from_dates_np <= ref_days, ref_days <= to_dates_np) # Return the result as an array of boolean values return pd.Series(result, dtype="boolean") @@ -496,13 +496,12 @@ def ref_week( raise ValueError("Function can only be applied to dates in the same months!") # Create a reference day for each month - ref_days = pd.to_datetime( - [f"{y}-{m:02d}-16" for y, m in zip(from_dates.dt.year, from_dates.dt.month)] + ref_days = pd.Series( + pd.to_datetime( + [f"{y}-{m:02d}-16" for y, m in zip(from_dates.dt.year, from_dates.dt.month)] + ) ) - # Convert ref_days to a Series object to use the dt accessor - ref_days = pd.Series(ref_days) - # Calculate the week numbers using pandas with Monday as the starting day from_weeks = from_dates.dt.isocalendar().week to_weeks = to_dates.dt.isocalendar().week From 79726694f9eabb692e13d0e595520d3c13b3528a Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 20:05:37 +0100 Subject: [PATCH 31/49] len logic for agg func --- src/ssb_arbmark_fagfunksjoner/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index cb93eef..c506116 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -353,7 +353,7 @@ def proc_sums( # Default aggregation: 'sum' for all 'values' columns. if agg_func is not None: for col, funcs in list(agg_func.items()): - if isinstance(funcs, list) and len(funcs) == 1: + if isinstance(funcs, list) and not len(funcs) > 1: # Directly assign the single function instead of the list agg_func[col] = funcs[0] elif agg_func is None: From 332f10ff3470e24a0860c295edcb5460ca5c464a Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 20:17:48 +0100 Subject: [PATCH 32/49] len logic for agg func --- src/ssb_arbmark_fagfunksjoner/functions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index c506116..07f6204 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -353,9 +353,10 @@ def proc_sums( # Default aggregation: 'sum' for all 'values' columns. if agg_func is not None: for col, funcs in list(agg_func.items()): - if isinstance(funcs, list) and not len(funcs) > 1: - # Directly assign the single function instead of the list - agg_func[col] = funcs[0] + if isinstance(funcs, list): + if len(funcs) == 1: + # Directly assign the single function instead of the list + agg_func[col] = str(funcs[0]) elif agg_func is None: if not non_numeric_cols: agg_func = {col: np.sum for col in values} From fa5f5536e9351d0c14b99f17de386564699d32c8 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 20:34:13 +0100 Subject: [PATCH 33/49] len logic for agg func --- src/ssb_arbmark_fagfunksjoner/functions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 07f6204..e44c1fb 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -350,15 +350,16 @@ def proc_sums( # Copy the dataframe and limit input to columns in the parameter df = df[required_columns].copy() - # Default aggregation: 'sum' for all 'values' columns. if agg_func is not None: for col, funcs in list(agg_func.items()): - if isinstance(funcs, list): - if len(funcs) == 1: + if not isinstance(funcs, str): + n_funcs = len(funcs) + if n_funcs == 1: # Directly assign the single function instead of the list agg_func[col] = str(funcs[0]) elif agg_func is None: if not non_numeric_cols: + # Default aggregation: 'sum' for all 'values' columns. agg_func = {col: np.sum for col in values} else: raise ValueError( From fd722e441b236f9850b611edfca3cf2f48533676 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 20:44:53 +0100 Subject: [PATCH 34/49] len logic for agg func --- src/ssb_arbmark_fagfunksjoner/functions.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index e44c1fb..c9202cf 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -351,12 +351,10 @@ def proc_sums( df = df[required_columns].copy() if agg_func is not None: - for col, funcs in list(agg_func.items()): - if not isinstance(funcs, str): - n_funcs = len(funcs) - if n_funcs == 1: - # Directly assign the single function instead of the list - agg_func[col] = str(funcs[0]) + for col, funcs in agg_func.items(): + if len(list(funcs)) == 1: + # Directly assign the single function instead of the list + agg_func[col] = str(next(iter(funcs))) elif agg_func is None: if not non_numeric_cols: # Default aggregation: 'sum' for all 'values' columns. From 84560b64034100fcc5ddae45511fe8cad7c110a7 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Thu, 7 Dec 2023 21:01:35 +0100 Subject: [PATCH 35/49] len logic for agg func --- src/ssb_arbmark_fagfunksjoner/functions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index c9202cf..b9c825b 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -11,7 +11,6 @@ # Type hints from typing import Any -from typing import Callable from typing import Literal from typing import Optional from typing import Union @@ -305,7 +304,7 @@ def proc_sums( df: pd.DataFrame, groups: list[str], values: list[str], - agg_func: Optional[dict[str, Callable[..., Any]]] = None, + agg_func: Optional[dict[str, Union[Any, list[Any]]]] = None, ) -> pd.DataFrame: """Compute aggregations for all combinations of columns and return a new DataFrame with these aggregations. @@ -352,9 +351,10 @@ def proc_sums( if agg_func is not None: for col, funcs in agg_func.items(): - if len(list(funcs)) == 1: - # Directly assign the single function instead of the list - agg_func[col] = str(next(iter(funcs))) + # Check if funcs is a list + if isinstance(funcs, list) and len(funcs) == 1: + # If funcs is a list with exactly one item, extract that item + agg_func[col] = funcs[0] elif agg_func is None: if not non_numeric_cols: # Default aggregation: 'sum' for all 'values' columns. From 7f430c8e172cd906ee5dbe367b7486d8e29c0604 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 08:47:37 +0100 Subject: [PATCH 36/49] replaced Union with | --- src/ssb_arbmark_fagfunksjoner/functions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index b9c825b..df49c2c 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -13,7 +13,6 @@ from typing import Any from typing import Literal from typing import Optional -from typing import Union # Holidays in Norway import holidays @@ -140,7 +139,7 @@ def indicate_merge( left: pd.DataFrame, right: pd.DataFrame, how: Literal["left", "right", "outer", "inner", "cross"], - on: Union[str, list[str]], + on: str | list[str], ) -> pd.DataFrame: """Perform a merge of two DataFrames and prints a frequency table indicating the merge type for each row. @@ -304,7 +303,7 @@ def proc_sums( df: pd.DataFrame, groups: list[str], values: list[str], - agg_func: Optional[dict[str, Union[Any, list[Any]]]] = None, + agg_func: Optional[dict[str, Any | list[Any]]] = None, ) -> pd.DataFrame: """Compute aggregations for all combinations of columns and return a new DataFrame with these aggregations. From 1eb45b84323f14b1013d4193d71991a72a1bff45 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 12:59:04 +0100 Subject: [PATCH 37/49] Added tests for functions --- src/ssb_arbmark_fagfunksjoner/functions.py | 14 +- tests/test_functions.py | 159 ++++++++++++++++++++- tests/test_groups.py | 0 3 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 tests/test_groups.py diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index df49c2c..2aacb5a 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -24,9 +24,7 @@ import pandas as pd -def count_workdays( - from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] -) -> pd.Series[int]: +def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Counts the number of workdays between pairs of dates in given series. This function calculates the number of workdays for each pair of start and end dates @@ -357,7 +355,7 @@ def proc_sums( elif agg_func is None: if not non_numeric_cols: # Default aggregation: 'sum' for all 'values' columns. - agg_func = {col: np.sum for col in values} + agg_func = {col: "sum" for col in values} else: raise ValueError( f"Values {', '.join(non_numeric_cols)} are not numeric! Specify aggregation functions!" @@ -389,9 +387,7 @@ def proc_sums( return sum_df -def ref_day( - from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] -) -> pd.Series[bool]: +def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Determines if the reference day falls between given date ranges. This function checks if the 16th day of each month (reference day) is @@ -453,9 +449,7 @@ def ref_day( return pd.Series(result, dtype="boolean") -def ref_week( - from_dates: pd.Series[pd.Timestamp], to_dates: pd.Series[pd.Timestamp] -) -> pd.Series[bool]: +def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Determines if any date in each date range falls in the reference week. This function checks if any date between the 'from_dates' and 'to_dates' diff --git a/tests/test_functions.py b/tests/test_functions.py index 190f521..62a29e3 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1,6 +1,157 @@ -from ssb_arbmark_fagfunksjoner.functions import example_function +import pandas as pd +from ssb_arbmark_fagfunksjoner.functions import count_workdays +from ssb_arbmark_fagfunksjoner.functions import first_last_date_quarter +from ssb_arbmark_fagfunksjoner.functions import kv_intervall +from ssb_arbmark_fagfunksjoner.functions import proc_sums +from ssb_arbmark_fagfunksjoner.functions import ref_day +from ssb_arbmark_fagfunksjoner.functions import ref_week -def test_example_function() -> None: - assert example_function(1, 2) == "1 is less than 2" - assert example_function(1, 0) == "1 is greater than or equal to 0" + +def test_count_workdays() -> None: + test1_from_dates = pd.Series( + pd.to_datetime(["2023-01-01", "2023-12-20", "2024-05-03"]) + ) + test1_to_dates = pd.Series( + pd.to_datetime(["2023-01-10", "2024-01-05", "2024-05-31"]) + ) + test1_result = count_workdays(test1_from_dates, test1_to_dates).to_list() + test1_expected = [7, 10, 18] + + assert ( + test1_result == test1_expected + ), f"Expected {test1_expected}, but got {test1_result}" + + +def test_first_last_date_quarter() -> None: + test_cases = [ + ("2023", "1", ("2023-01-01", "2023-03-31")), + ("2023", "2", ("2023-04-01", "2023-06-30")), + ("2023", "3", ("2023-07-01", "2023-09-30")), + ("2023", "4", ("2023-10-01", "2023-12-31")), + # Add more test cases if necessary + ] + + for year, quarter, expected in test_cases: + result = first_last_date_quarter(year, quarter) + assert ( + result == expected + ), f"For {year} Q{quarter}, expected {expected}, but got {result}" + + +def test_kv_intervall() -> None: + test_cases = [ + ("2022k1", "2022k4", ["2022k1", "2022k2", "2022k3", "2022k4"]), + ("2022k2", "2023k1", ["2022k2", "2022k3", "2022k4", "2023k1"]), + ("2022k3", "2023k2", ["2022k3", "2022k4", "2023k1", "2023k2"]), + # Add more test cases as necessary + ] + + for start_p, slutt_p, expected in test_cases: + result = kv_intervall(start_p, slutt_p) + assert ( + result == expected + ), f"For {start_p} to {slutt_p}, expected {expected}, but got {result}" + + +def sample_df(): + return pd.DataFrame( + { + "A": ["foo", "foo", "foo", "bar", "bar", "bar"], + "B": ["one", "one", "two", "two", "two", "two"], + "C": [1, 2, 3, 4, 5, 6], + "D": [10, 20, 30, 40, 50, 60], + "E": ["1", "0", "1", "1", "0", "1"], + } + ) + + +def test_proc_sums_count_nunique(sample_df): + test1_result = proc_sums( + sample_df, groups=["B"], values=["E"], agg_func={"E": ["count", "nunique"]} + ).to_dict() + test1_expected = { + ("B", ""): {0: "one", 1: "two"}, + ("E", "count"): {0: 2, 1: 4}, + ("E", "nunique"): {0: 2, 1: 2}, + ("level", ""): {0: 1, 1: 1}, + } + assert test1_result == test1_expected, "Test 1 failed" + + +def test_proc_sums_default_sum(sample_df): + test2_result = proc_sums(sample_df, groups=["A", "B"], values=["C"]).to_dict() + test2_expected = { + "A": {0: "bar", 1: "foo", 2: "foo", 3: "bar", 4: "foo", 5: "Total", 6: "Total"}, + "B": {0: "two", 1: "one", 2: "two", 3: "Total", 4: "Total", 5: "one", 6: "two"}, + "C": {0: 15, 1: 3, 2: 3, 3: 15, 4: 6, 5: 3, 6: 18}, + "level": {0: 2, 1: 2, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1}, + } + assert test2_result == test2_expected, "Test 2 failed" + + +def test_proc_sums_custom_aggregations(sample_df): + test3_result = proc_sums( + sample_df, + groups=["A", "B"], + values=["C", "D"], + agg_func={"C": "sum", "D": "mean"}, + ).to_dict() + test3_expected = { + "A": {0: "bar", 1: "foo", 2: "foo", 3: "bar", 4: "foo", 5: "Total", 6: "Total"}, + "B": {0: "two", 1: "one", 2: "two", 3: "Total", 4: "Total", 5: "one", 6: "two"}, + "C": {0: 15, 1: 3, 2: 3, 3: 15, 4: 6, 5: 3, 6: 18}, + "D": {0: 50.0, 1: 15.0, 2: 30.0, 3: 50.0, 4: 20.0, 5: 15.0, 6: 45.0}, + "level": {0: 2, 1: 2, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1}, + } + assert test3_result == test3_expected, "Test 3 failed" + + +def test_ref_day_within_range(): + from_dates = pd.Series(["2023-01-01", "2023-02-10"]) + to_dates = pd.Series(["2023-01-20", "2023-02-18"]) + expected = pd.Series([True, True]) + assert ref_day(from_dates, to_dates).equals( + expected + ), "16th day within range test failed" + + +def test_ref_day_outside_range(): + from_dates = pd.Series(["2023-03-17", "2023-04-18"]) + to_dates = pd.Series(["2023-03-30", "2023-04-25"]) + expected = pd.Series([False, False]) + assert ref_day(from_dates, to_dates).equals( + expected + ), "16th day outside range test failed" + + +def test_ref_week_within_range(): + from_dates = pd.Series( + pd.to_datetime( + [ + "2023-01-22", + "2023-04-01", + ] + ) + ) + to_dates = pd.Series( + pd.to_datetime( + [ + "2023-01-31", + "2023-04-15", + ] + ) + ) + expected = pd.Series([True, True]) + assert ref_week(from_dates, to_dates).equals( + expected + ), "Reference week within range test failed" + + +def test_ref_week_outside_range(): + from_dates = pd.Series(pd.to_datetime(["2023-01-01", "2023-04-17"])) + to_dates = pd.Series(pd.to_datetime(["2023-01-15", "2023-04-30"])) + expected = pd.Series([False, False]) + assert ref_week(from_dates, to_dates).equals( + expected + ), "Reference week outside range test failed" diff --git a/tests/test_groups.py b/tests/test_groups.py new file mode 100644 index 0000000..e69de29 From 226f53b8dbafe3a48748e1262febb20e7fefe5a3 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:02:32 +0100 Subject: [PATCH 38/49] Added tests for functions --- tests/test_functions.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index 62a29e3..a6bf58c 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -54,7 +54,7 @@ def test_kv_intervall() -> None: ), f"For {start_p} to {slutt_p}, expected {expected}, but got {result}" -def sample_df(): +def sample_df() -> pd.DataFrame: return pd.DataFrame( { "A": ["foo", "foo", "foo", "bar", "bar", "bar"], @@ -66,7 +66,7 @@ def sample_df(): ) -def test_proc_sums_count_nunique(sample_df): +def test_proc_sums_count_nunique(sample_df) -> None: test1_result = proc_sums( sample_df, groups=["B"], values=["E"], agg_func={"E": ["count", "nunique"]} ).to_dict() @@ -79,7 +79,7 @@ def test_proc_sums_count_nunique(sample_df): assert test1_result == test1_expected, "Test 1 failed" -def test_proc_sums_default_sum(sample_df): +def test_proc_sums_default_sum(sample_df) -> None: test2_result = proc_sums(sample_df, groups=["A", "B"], values=["C"]).to_dict() test2_expected = { "A": {0: "bar", 1: "foo", 2: "foo", 3: "bar", 4: "foo", 5: "Total", 6: "Total"}, @@ -90,7 +90,7 @@ def test_proc_sums_default_sum(sample_df): assert test2_result == test2_expected, "Test 2 failed" -def test_proc_sums_custom_aggregations(sample_df): +def test_proc_sums_custom_aggregations(sample_df) -> None: test3_result = proc_sums( sample_df, groups=["A", "B"], @@ -107,7 +107,7 @@ def test_proc_sums_custom_aggregations(sample_df): assert test3_result == test3_expected, "Test 3 failed" -def test_ref_day_within_range(): +def test_ref_day_within_range() -> None: from_dates = pd.Series(["2023-01-01", "2023-02-10"]) to_dates = pd.Series(["2023-01-20", "2023-02-18"]) expected = pd.Series([True, True]) @@ -116,7 +116,7 @@ def test_ref_day_within_range(): ), "16th day within range test failed" -def test_ref_day_outside_range(): +def test_ref_day_outside_range() -> None: from_dates = pd.Series(["2023-03-17", "2023-04-18"]) to_dates = pd.Series(["2023-03-30", "2023-04-25"]) expected = pd.Series([False, False]) @@ -125,7 +125,7 @@ def test_ref_day_outside_range(): ), "16th day outside range test failed" -def test_ref_week_within_range(): +def test_ref_week_within_range() -> None: from_dates = pd.Series( pd.to_datetime( [ @@ -148,7 +148,7 @@ def test_ref_week_within_range(): ), "Reference week within range test failed" -def test_ref_week_outside_range(): +def test_ref_week_outside_range() -> None: from_dates = pd.Series(pd.to_datetime(["2023-01-01", "2023-04-17"])) to_dates = pd.Series(pd.to_datetime(["2023-01-15", "2023-04-30"])) expected = pd.Series([False, False]) From 6a4cc48bc0c232b05ff3132c86ef5d644df94384 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:05:06 +0100 Subject: [PATCH 39/49] Added tests for functions --- tests/test_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index a6bf58c..9503f9d 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -66,7 +66,7 @@ def sample_df() -> pd.DataFrame: ) -def test_proc_sums_count_nunique(sample_df) -> None: +def test_proc_sums_count_nunique(sample_df: pd.DataFrame) -> None: test1_result = proc_sums( sample_df, groups=["B"], values=["E"], agg_func={"E": ["count", "nunique"]} ).to_dict() @@ -79,7 +79,7 @@ def test_proc_sums_count_nunique(sample_df) -> None: assert test1_result == test1_expected, "Test 1 failed" -def test_proc_sums_default_sum(sample_df) -> None: +def test_proc_sums_default_sum(sample_df: pd.DataFrame) -> None: test2_result = proc_sums(sample_df, groups=["A", "B"], values=["C"]).to_dict() test2_expected = { "A": {0: "bar", 1: "foo", 2: "foo", 3: "bar", 4: "foo", 5: "Total", 6: "Total"}, @@ -90,7 +90,7 @@ def test_proc_sums_default_sum(sample_df) -> None: assert test2_result == test2_expected, "Test 2 failed" -def test_proc_sums_custom_aggregations(sample_df) -> None: +def test_proc_sums_custom_aggregations(sample_df: pd.DataFrame) -> None: test3_result = proc_sums( sample_df, groups=["A", "B"], From bb1e82567334e168992f05402e0d64b835dc014f Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:12:36 +0100 Subject: [PATCH 40/49] Removed wrong optional --- src/ssb_arbmark_fagfunksjoner/groups.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/groups.py b/src/ssb_arbmark_fagfunksjoner/groups.py index 8de7677..3acc849 100644 --- a/src/ssb_arbmark_fagfunksjoner/groups.py +++ b/src/ssb_arbmark_fagfunksjoner/groups.py @@ -12,8 +12,8 @@ def alder_grp(alder: pd.Series[int], display: str = "label") -> pd.Series[str]: Parameters: alder (pd.Series): A pandas Series containing the person ages. - display (str, optional): If 'label', returns group labels; if 'number', returns keys; - for any other string, returns a combination of keys and labels. + display (str): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original person ages are replaced by group labels, keys, or a combination. @@ -73,8 +73,8 @@ def nace_sn07_47grp( Parameters: nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. - display (str, optional): If 'label', returns group labels; if 'number', returns keys; - for any other string, returns a combination of keys and labels. + display (str): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. @@ -225,8 +225,8 @@ def nace_sn07_17grp( Parameters: nace_sn07 (pd.Series): A pandas Series containing the NACE-codes. - display (str, optional): If 'label', returns group labels; if 'number', returns keys; - for any other string, returns a combination of keys and labels. + display (str): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original NACE-codes are replaced by group labels or keys. @@ -326,8 +326,8 @@ def sektor2_grp( Parameters: sektor (pd.Series): A pandas Series containing the sector codes. undersektor (pd.Series): A pandas Series containing the subsector codes. - display (str, optional): If 'label', returns group labels; if 'number', returns keys; - for any other string, returns a combination of keys and labels. + display (str): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original sector and subsectors are replaced by group labels or keys. @@ -368,8 +368,8 @@ def virk_str_8grp(ansatte: pd.Series[int], display: str = "label") -> pd.Series[ Parameters: ansatte (pd.Series): A pandas Series containing the employee counts. - display (str, optional): If 'label', returns group labels; if 'number', returns keys; - for any other string, returns a combination of keys and labels. + display (str): If 'label', returns group labels; if 'number', returns keys; + for any other string, returns a combination of keys and labels. Returns: pd.Series: A pandas Series where the original employee counts are replaced by group labels or keys. From 0e1332c770e7db7ae28c1ce18057b95de3ddcc23 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:20:34 +0100 Subject: [PATCH 41/49] DAR003 --- src/ssb_arbmark_fagfunksjoner/functions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 2aacb5a..07a8156 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -405,8 +405,7 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: 16th day of the month for each period is within the respective date range. Raises: - ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if - they are not in the same month, or if multiple years are present across the dates. + ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month, or if multiple years are present across the dates. Example: >>> from_dates = pd.Series(['2023-01-01', '2023-02-10']) @@ -469,8 +468,7 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: reference week of the month. Raises: - ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if - they are not in the same month. + ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month. Example: >>> from_dates = pd.Series(['2023-01-01', '2023-02-10']) From b81a7f92b632461ad79fb8f1141f38545b5baff7 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:22:48 +0100 Subject: [PATCH 42/49] DAR003 --- src/ssb_arbmark_fagfunksjoner/functions.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 07a8156..1493e65 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -27,10 +27,7 @@ def count_workdays(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Counts the number of workdays between pairs of dates in given series. - This function calculates the number of workdays for each pair of start and end dates - provided in the `from_dates` and `to_dates` series. It handles date ranges spanning multiple - years and excludes weekends and holidays specific to Norway. The function dynamically - fetches Norwegian holidays for the relevant years based on the input dates. + This function calculates the number of workdays for each pair of start and end dates provided in the `from_dates` and `to_dates` series. It handles date ranges spanning multiple years and excludes weekends and holidays specific to Norway. The function dynamically fetches Norwegian holidays for the relevant years based on the input dates. Args: from_dates (pd.Series): A pandas Series containing the start dates of the periods. @@ -249,9 +246,7 @@ def indicate_merge( def kv_intervall(start_p: str, slutt_p: str) -> list[str]: """This function generates a list of quarterly periods between two given periods. - The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year - and Q is a quarter (1 to 4). The function handles cases where the start and end - periods are in the same year or in different years. + The periods are strings in the format 'YYYYkQ', where YYYY is a 4-digit year and Q is a quarter (1 to 4). The function handles cases where the start and end periods are in the same year or in different years. Parameters: start_p (str): The start period in the format 'YYYYkQ'. @@ -390,9 +385,7 @@ def proc_sums( def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Determines if the reference day falls between given date ranges. - This function checks if the 16th day of each month (reference day) is - within the range specified by the corresponding 'from_dates' and 'to_dates'. - It requires that both 'from_dates' and 'to_dates' are in the same year and month. + This function checks if the 16th day of each month (reference day) is within the range specified by the corresponding 'from_dates' and 'to_dates'. It requires that both 'from_dates' and 'to_dates' are in the same year and month. Args: from_dates (pd.Series): A Series of dates representing the start of a period. @@ -451,10 +444,7 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: """Determines if any date in each date range falls in the reference week. - This function checks if any date between the 'from_dates' and 'to_dates' - is within the reference week. The reference week is defined as the week - which includes the 16th day of each month. It requires that both - 'from_dates' and 'to_dates' are in the same year and the same month. + This function checks if any date between the 'from_dates' and 'to_dates' is within the reference week. The reference week is defined as the week which includes the 16th day of each month. It requires that both 'from_dates' and 'to_dates' are in the same year and the same month. Args: from_dates (pd.Series): A Series of dates representing the start of a period. @@ -463,9 +453,7 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: These dates should also be in the 'YYYY-MM-DD' format. Returns: - pd.Series: A Series of booleans, where each boolean corresponds to whether - any date in the period from 'from_dates' to 'to_dates' falls within the - reference week of the month. + pd.Series: A Series of booleans, where each boolean corresponds to whether any date in the period from 'from_dates' to 'to_dates' falls within the reference week of the month. Raises: ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month. From f4ba10542d76b5eaea47fdd70371ad6a0bc8fcc6 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:41:32 +0100 Subject: [PATCH 43/49] datetime specified in example --- src/ssb_arbmark_fagfunksjoner/functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index 1493e65..f618975 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -401,8 +401,8 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month, or if multiple years are present across the dates. Example: - >>> from_dates = pd.Series(['2023-01-01', '2023-02-10']) - >>> to_dates = pd.Series(['2023-01-20', '2023-02-18']) + >>> from_dates = pd.Series(pd.to_datetime(['2023-01-01', '2023-02-10'])) + >>> to_dates = pd.Series(pd.to_datetime(['2023-01-20', '2023-02-18'])) >>> ref_day(from_dates, to_dates) array([True, True]) """ @@ -459,8 +459,8 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month. Example: - >>> from_dates = pd.Series(['2023-01-01', '2023-02-10']) - >>> to_dates = pd.Series(['2023-01-20', '2023-02-18']) + >>> from_dates = pd.Series(pd.to_datetime(['2023-01-01', '2023-02-10'])) + >>> to_dates = pd.Series(pd.to_datetime(['2023-01-20', '2023-02-18'])) >>> ref_week(from_dates, to_dates) pd.Series([True, True]) """ From ecb0a8e94bbac05f3e7c46b4c712ddcf4f0a60ad Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 13:46:58 +0100 Subject: [PATCH 44/49] fixture --- tests/test_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_functions.py b/tests/test_functions.py index 9503f9d..70849b6 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -1,4 +1,5 @@ import pandas as pd +import pytest from ssb_arbmark_fagfunksjoner.functions import count_workdays from ssb_arbmark_fagfunksjoner.functions import first_last_date_quarter @@ -54,6 +55,7 @@ def test_kv_intervall() -> None: ), f"For {start_p} to {slutt_p}, expected {expected}, but got {result}" +@pytest.fixture def sample_df() -> pd.DataFrame: return pd.DataFrame( { From 293f6c8828a4b42e6e1b38c3ee8b9df36398961b Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 14:29:37 +0100 Subject: [PATCH 45/49] type error --- src/ssb_arbmark_fagfunksjoner/functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index f618975..fbf6ce7 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -407,8 +407,8 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: array([True, True]) """ # Convert the from_dates and to_dates columns to numpy arrays - from_dates_np = from_dates.to_numpy() - to_dates_np = to_dates.to_numpy() + from_dates_np = from_dates.to_numpy().astype("datetime64[D]") + to_dates_np = to_dates.to_numpy().astype("datetime64[D]") # Extract the year from the from_dates array year = from_dates_np.astype("datetime64[Y]").astype(int) + 1970 From 265bde4d8bcc2b573987471123a96daf46a7f088 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 14:38:43 +0100 Subject: [PATCH 46/49] test equal operator --- tests/test_functions.py | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index 70849b6..fdf150a 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -113,8 +113,8 @@ def test_ref_day_within_range() -> None: from_dates = pd.Series(["2023-01-01", "2023-02-10"]) to_dates = pd.Series(["2023-01-20", "2023-02-18"]) expected = pd.Series([True, True]) - assert ref_day(from_dates, to_dates).equals( - expected + assert ( + ref_day(from_dates, to_dates) == expected ), "16th day within range test failed" @@ -122,31 +122,17 @@ def test_ref_day_outside_range() -> None: from_dates = pd.Series(["2023-03-17", "2023-04-18"]) to_dates = pd.Series(["2023-03-30", "2023-04-25"]) expected = pd.Series([False, False]) - assert ref_day(from_dates, to_dates).equals( - expected + assert ( + ref_day(from_dates, to_dates) == expected ), "16th day outside range test failed" def test_ref_week_within_range() -> None: - from_dates = pd.Series( - pd.to_datetime( - [ - "2023-01-22", - "2023-04-01", - ] - ) - ) - to_dates = pd.Series( - pd.to_datetime( - [ - "2023-01-31", - "2023-04-15", - ] - ) - ) + from_dates = pd.Series(pd.to_datetime(["2023-01-22", "2023-04-01"])) + to_dates = pd.Series(pd.to_datetime(["2023-01-31", "2023-04-15"])) expected = pd.Series([True, True]) - assert ref_week(from_dates, to_dates).equals( - expected + assert ( + ref_week(from_dates, to_dates) == expected ), "Reference week within range test failed" @@ -154,6 +140,6 @@ def test_ref_week_outside_range() -> None: from_dates = pd.Series(pd.to_datetime(["2023-01-01", "2023-04-17"])) to_dates = pd.Series(pd.to_datetime(["2023-01-15", "2023-04-30"])) expected = pd.Series([False, False]) - assert ref_week(from_dates, to_dates).equals( - expected + assert ( + ref_week(from_dates, to_dates) == expected ), "Reference week outside range test failed" From de247f7720b6c1d8597ec28b0fa5a0a1e6d2512a Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 14:43:32 +0100 Subject: [PATCH 47/49] all to fix ambiguouity --- tests/test_functions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_functions.py b/tests/test_functions.py index fdf150a..9db8143 100644 --- a/tests/test_functions.py +++ b/tests/test_functions.py @@ -115,7 +115,7 @@ def test_ref_day_within_range() -> None: expected = pd.Series([True, True]) assert ( ref_day(from_dates, to_dates) == expected - ), "16th day within range test failed" + ).all(), "16th day within range test failed" def test_ref_day_outside_range() -> None: @@ -124,7 +124,7 @@ def test_ref_day_outside_range() -> None: expected = pd.Series([False, False]) assert ( ref_day(from_dates, to_dates) == expected - ), "16th day outside range test failed" + ).all(), "16th day outside range test failed" def test_ref_week_within_range() -> None: @@ -133,7 +133,7 @@ def test_ref_week_within_range() -> None: expected = pd.Series([True, True]) assert ( ref_week(from_dates, to_dates) == expected - ), "Reference week within range test failed" + ).all(), "Reference week within range test failed" def test_ref_week_outside_range() -> None: @@ -142,4 +142,4 @@ def test_ref_week_outside_range() -> None: expected = pd.Series([False, False]) assert ( ref_week(from_dates, to_dates) == expected - ), "Reference week outside range test failed" + ).all(), "Reference week outside range test failed" From 34fcc4cdd66b7d552e4fffa36bd6bcf74e82973d Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Fri, 8 Dec 2023 14:49:15 +0100 Subject: [PATCH 48/49] removed some docstring example texts --- src/ssb_arbmark_fagfunksjoner/functions.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/ssb_arbmark_fagfunksjoner/functions.py b/src/ssb_arbmark_fagfunksjoner/functions.py index fbf6ce7..f93e957 100644 --- a/src/ssb_arbmark_fagfunksjoner/functions.py +++ b/src/ssb_arbmark_fagfunksjoner/functions.py @@ -399,12 +399,6 @@ def ref_day(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: Raises: ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month, or if multiple years are present across the dates. - - Example: - >>> from_dates = pd.Series(pd.to_datetime(['2023-01-01', '2023-02-10'])) - >>> to_dates = pd.Series(pd.to_datetime(['2023-01-20', '2023-02-18'])) - >>> ref_day(from_dates, to_dates) - array([True, True]) """ # Convert the from_dates and to_dates columns to numpy arrays from_dates_np = from_dates.to_numpy().astype("datetime64[D]") @@ -457,12 +451,6 @@ def ref_week(from_dates: pd.Series, to_dates: pd.Series) -> pd.Series: Raises: ValueError: If 'from_dates' and 'to_dates' are not in the same year, or if they are not in the same month. - - Example: - >>> from_dates = pd.Series(pd.to_datetime(['2023-01-01', '2023-02-10'])) - >>> to_dates = pd.Series(pd.to_datetime(['2023-01-20', '2023-02-18'])) - >>> ref_week(from_dates, to_dates) - pd.Series([True, True]) """ # Check if the year is the same in the to_dates array if not np.all(from_dates.dt.year == to_dates.dt.year): From c27263213d4865e5d959bb1985dc7b9be53e8844 Mon Sep 17 00:00:00 2001 From: Jan Sebastian Rothe Date: Wed, 13 Dec 2023 13:31:18 +0100 Subject: [PATCH 49/49] Satt strict til false --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e25b16d..ed04dc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ show_missing = true fail_under = 50 [tool.mypy] -strict = true +strict = false warn_unreachable = true pretty = true show_column_numbers = true