Skip to content

Commit

Permalink
Merge pull request #51 from dsi-clinic/MN_abstract_class
Browse files Browse the repository at this point in the history
Mn abstract class
  • Loading branch information
averyschoen authored Dec 5, 2023
2 parents 261fa60 + 40f92a1 commit 57390b3
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 6 deletions.
22 changes: 22 additions & 0 deletions utils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## Minnesota Util:
#### MN_util.py

Util functions for MN EDA
1. datasets_col_consistent (deprecated)
2. preprocess_candidate_df (deprecated)
3. preprocess_noncandidate_df (deprecated)
4. preprocess_contribution_df (deprecated)
5. drop_nonclassifiable (deprecated)
6. preprocess_expenditure (deprecated)
7. drop_nonclassifiable_expenditure (deprecated)

#### minnesota.py
1. entity_name_dictionary
2. preprocess_candidate_contribution
3. preprocess_noncandidate_contribution
4. preprocess_expenditure
5. preprocess
6. clean
7. standardize
8. create_tables
9. clean_state
1 change: 1 addition & 0 deletions utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@
"fundraiser",
]


PA_MAIN_URL = "https://www.dos.pa.gov"
PA_ZIPPED_URL = (
"/VotingElections/CandidatesCommittees/CampaignFinance/Resources/Documents/"
Expand Down
26 changes: 20 additions & 6 deletions utils/minnesota.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import numpy as np
import pandas as pd
from clean import StateCleaner
from constants import (
from utils.clean import StateCleaner
from utils.constants import (
MN_CANDIDATE_CONTRIBUTION_COL,
MN_CANDIDATE_CONTRIBUTION_MAP,
MN_INDEPENDENT_EXPENDITURE_COL,
Expand Down Expand Up @@ -242,11 +242,19 @@ def standardize(self, data: list[pd.DataFrame]) -> list[pd.DataFrame]:
Returns: A list of 1 standarized DataFrame matching database schema
"""

df = data[0]

df = data[0].copy() # Create a copy to avoid modifying the original DataFrame
df["company"] = None # MN dataset has no company information
df["party"] = None # MN dataset has no party information
df["transaction_id"] = None
df["office_sought"] = df["office_sought"].replace(MN_RACE_MAP)

# Standardize entity names to match other states in the database schema
entity_map = self.entity_name_dictionary()
df["recipient_type"] = df["recipient_type"].map(entity_map)
df["donor_type"] = df["donor_type"].map(entity_map)
id_mapping = {}

# Standardize entity names to match othe states in database schema
df["recipient_type"] = df["recipient_type"].replace(self.entity_name_dictionary)
df["donor_type"] = df["donor_type"].replace(self.entity_name_dictionary)
Expand All @@ -255,6 +263,7 @@ def standardize(self, data: list[pd.DataFrame]) -> list[pd.DataFrame]:
recipient_uuid = str(uuid.uuid4())
donor_uuid = str(uuid.uuid4())
transaction_uuid = str(uuid.uuid4())

# MN has partial recipient id, generate uuid, map them to original id
if row["recipient_id"]:
if (
Expand All @@ -271,7 +280,9 @@ def standardize(self, data: list[pd.DataFrame]) -> list[pd.DataFrame]:
row["recipient_id"],
recipient_uuid,
)
df["recipient_id"].iloc[index] = recipient_uuid

df.at[index, "recipient_id"] = recipient_uuid

# MN has partial donor id, generate uuid, map them to original id
if row["donor_id"]:
if row["donor_type"] == "Individual" or row["donor_type"] == "Lobbyist":
Expand All @@ -285,8 +296,11 @@ def standardize(self, data: list[pd.DataFrame]) -> list[pd.DataFrame]:
row["donor_id"],
donor_uuid,
)
df["donor_id"].iloc[index] = donor_uuid
df["transaction_id"].iloc[index] = transaction_uuid

df.at[index, "donor_id"] = donor_uuid

df.at[index, "transaction_id"] = transaction_uuid


# Convert id_mapping to DataFrame and save to CSV
id_mapping_df = pd.DataFrame.from_dict(
Expand Down

0 comments on commit 57390b3

Please sign in to comment.