From 59e9ad04a36d73f3919dca0be494a1e0b329817a Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Thu, 11 Jan 2024 08:24:23 -0600 Subject: [PATCH 001/214] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1340d12d..708b1e82 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# 2023-fall-clinic-climate-cabinet +# 2024-winter-clinic-climate-cabinet ## Data Science Clinic Project Goals @@ -37,7 +37,7 @@ If you prefer to develop inside a container with VS Code then do the following s ### Project Pipeline 1. Collect the data through **one** of the steps below a. Collect state's finance campaign data either from web scraping (AZ, MI, PA) or direct download (MN) OR - b. Go to the [Project's Google Drive]('https://drive.google.com/drive/u/2/folders/1HUbOU0KRZy85mep2SHMU48qUQ1ZOSNce') to download each state's data to their local repo following this format: repo_root / "data" / "raw" / / "file" + b. Go to the [Project's Google Drive]('https://drive.google.com/drive/u/2/folders/1HUbOU0KRZy85mep2SHMU48qUQ1ZOSNce') to download each state's data to their local repo following this format: repo_root / "data" / "raw" / state acronym / "file" 2. Open in development container which installs all necessary packages. 3. Run the project by running ```python utils/pipeline.py``` or ```python3 utils/pipeline.py``` run the processing pipeline that cleans, standardizes, and creates the individuals, organizations, and transactions concatenated into one comprehensive database. 5. running ```pipeline.py``` returns the tables to the output folder as csv files containing the complete individuals, organizations, and transactions DataFrames combining the AZ, MI, MN, and PA datasets. From 82eaba790ab2e80d51c05b4dd2cebc3f3b606bfc Mon Sep 17 00:00:00 2001 From: alankagiri Date: Thu, 11 Jan 2024 09:24:18 -0600 Subject: [PATCH 002/214] changing all references from 2023 fall to 2024 winter --- .devcontainer/devcontainer.json | 2 +- Makefile | 4 ++-- README.md | 10 +--------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index f17fc8e3..ae86d984 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "2023-fall-clinic-climate-cabinet-devcontainer", + "name": "2024-winter-clinic-climate-cabinet-devcontainer", "build": { "dockerfile": "../Dockerfile", "context": "..", diff --git a/Makefile b/Makefile index eb3ba0c3..e210fb2c 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ current_abs_path := $(subst Makefile,,$(mkfile_path)) # pipeline constants # PROJECT_NAME -project_image_name := "2023-fall-clinic-climate-cabinet" -project_container_name := "2023-fall-clinic-climate-cabinet-container" +project_image_name := "2024-winter-clinic-climate-cabinet" +project_container_name := "2024-winter-clinic-climate-cabinet-container" project_dir := "$(current_abs_path)" # environment variables diff --git a/README.md b/README.md index d4c582bf..6fd45479 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# 2023-fall-clinic-climate-cabinet +# 2024-winter-clinic-climate-cabinet ## Project Background @@ -54,14 +54,6 @@ Should contain work product generated by the analysis. Keep in mind that results ## Team Member -Student Name: April Wang -Student Email: yuzhouw@uchicago.edu - -Student Name: Nicolas Posner -Student Email: nrposner@uchicago.edu - -Student Name: Aïcha Camara -Student Email: aichacamara@uchicago.edu Student Name: Alan Kagiri Student Email: alankagiri@uchicago.edu. From 893ebe6bf7c7c5397c8035ac349aa214fdd422ab Mon Sep 17 00:00:00 2001 From: npashilkar <102933200+npashilkar@users.noreply.github.com> Date: Thu, 11 Jan 2024 11:30:23 -0600 Subject: [PATCH 003/214] Update README.md added name and email to readme file --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 8a65c359..7dec9c8c 100644 --- a/README.md +++ b/README.md @@ -75,3 +75,6 @@ Student Email: alankagiri@uchicago.edu. Student Name: Adil Kassim Student Email: adilk@uchicago.edu + +Student Name: Nayna Pashilkar +Student Email: npashilkar@uchicago.edu From 026f83fce89831b2726ced83990fdbad3f1cdd3b Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 16 Jan 2024 09:56:37 -0600 Subject: [PATCH 004/214] using jaro-winkler, see if it passes pytest --- requirements.txt | 1 + utils/linkage.py | 82 ++---------------------------------------------- 2 files changed, 4 insertions(+), 79 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6658f0ea..846beb4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ beautifulsoup4==4.11.1 numpy==1.25.0 Requests==2.31.0 setuptools==68.0.0 +textdistance==4.6.1 \ No newline at end of file diff --git a/utils/linkage.py b/utils/linkage.py index aa56307e..bca99460 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,3 +1,5 @@ +import textdistance as td + """ Module for performing record linkage on state campaign finance dataset """ @@ -27,83 +29,5 @@ def calculate_string_similarity(string1: str, string2: str) -> float: >>> similar_socre > different_score True """ - pass - - -def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: - """Given name related columns, return a person's likely name - - Given different formatting used accross states, errors in data entry - and missing data, it can be difficult to determine someone's actual - name. For example, some states have a last name column with values like - "Doe, Jane", where the person's first name appears to have been erroneously - included. - - Args: - first_name: raw value of first name column - last_name: raw value last name column - full_name: raw value of name or full_name column - Returns: - The most likely full name of the person listed - - Sample Usage: - >>> get_likely_name("Jane", "Doe", "") - "Jane Doe" - >>> get_likely_name("", "", "Jane Doe") - "Jane Doe" - >>> get_likely_name("", "Doe, Jane", "") - "Jane Doe" - >>> get_likely_name("Jane Doe", "Doe", "Jane Doe") - "Jane Doe" - """ - pass - - -def get_address_line_1_from_full_address(address: str) -> str: - """Given a full address, return the first line of the formatted address - - Address line 1 usually includes street address or PO Box information. - Args: - address: raw string representing full address - Returns: - address_line_1 - - Sample Usage: - >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") - "6727 W. Corrine Dr." - >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") - "P.O. Box 5456" - >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") - "119 S 5th St" - >>> get_address_line_1_from_full_address( - ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" - ... ) - "1415 PARKER STREET" - """ - pass - - -def get_street_from_address_line_1(address_line_1: str) -> str: - """Given an address line 1, return the street name - - Args: - address_line_1: either street information or PO box - Returns: - street name - Raises: - ValueError: if string is malformed and no street can be reasonably - found. - - >>> get_street_from_address_line_1("5645 N. UBER ST") - "UBER ST" - >>> get_street_from_address_line_1("") - Traceback (most recent call last): - ... - ValueError: address_line_1 must have whitespace - >>> get_street_from_address_line_1("PO Box 1111") - Traceback (most recent call last): - ... - ValueError: address_line_1 is PO Box - """ - pass + return td.jaro_winkler(string1, string2) From e2cd758e6ffb9b999a64a26ccbe4d9143e192292 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 16 Jan 2024 20:31:57 -0600 Subject: [PATCH 005/214] change data type for string similarity to float --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index bca99460..e3398c2c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -30,4 +30,4 @@ def calculate_string_similarity(string1: str, string2: str) -> float: True """ - return td.jaro_winkler(string1, string2) + return float(td.jaro_winkler(string1, string2)) From b51b30bc1e4bef62d9f48db03373d15595d7a223 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 16 Jan 2024 20:55:32 -0600 Subject: [PATCH 006/214] fixed spelling error in test --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index e3398c2c..2edfdeac 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -26,7 +26,7 @@ def calculate_string_similarity(string1: str, string2: str) -> float: 0.0 >>> similar_score = calculate_string_similarity("very similar", "vary similar") >>> different_score = calculate_string_similarity("very similar", "very not close") - >>> similar_socre > different_score + >>> similar_score > different_score True """ From b9d4d6429152fb2bc00a9ba9b9e52b12fa893c9b Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 17 Jan 2024 03:58:22 +0000 Subject: [PATCH 007/214] get_street_from_address_line_1 initial function --- utils/linkage.py | 101 +++++++++-------------------------------------- 1 file changed, 19 insertions(+), 82 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index aa56307e..433656bb 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -3,87 +3,6 @@ """ -def calculate_string_similarity(string1: str, string2: str) -> float: - """Returns how similar two strings are on a scale of 0 to 1 - - The exact meaning of the metric is open, but the following must hold true: - 1. equivalent strings must return 1 - 2. strings with no similar characters must return 0 - 3. strings with higher intuitive similarity must return higher scores - - Args: - string1: any string - string2: any string - Returns: - similarity score - - Sample Usage: - >>> calculate_string_similarity("exact match", "exact match") - 1.0 - >>> calculate_string_similarity("aaaaaa", "bbbbbbbbbbb") - 0.0 - >>> similar_score = calculate_string_similarity("very similar", "vary similar") - >>> different_score = calculate_string_similarity("very similar", "very not close") - >>> similar_socre > different_score - True - """ - pass - - -def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: - """Given name related columns, return a person's likely name - - Given different formatting used accross states, errors in data entry - and missing data, it can be difficult to determine someone's actual - name. For example, some states have a last name column with values like - "Doe, Jane", where the person's first name appears to have been erroneously - included. - - Args: - first_name: raw value of first name column - last_name: raw value last name column - full_name: raw value of name or full_name column - Returns: - The most likely full name of the person listed - - Sample Usage: - >>> get_likely_name("Jane", "Doe", "") - "Jane Doe" - >>> get_likely_name("", "", "Jane Doe") - "Jane Doe" - >>> get_likely_name("", "Doe, Jane", "") - "Jane Doe" - >>> get_likely_name("Jane Doe", "Doe", "Jane Doe") - "Jane Doe" - """ - pass - - -def get_address_line_1_from_full_address(address: str) -> str: - """Given a full address, return the first line of the formatted address - - Address line 1 usually includes street address or PO Box information. - - Args: - address: raw string representing full address - Returns: - address_line_1 - - Sample Usage: - >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") - "6727 W. Corrine Dr." - >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") - "P.O. Box 5456" - >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") - "119 S 5th St" - >>> get_address_line_1_from_full_address( - ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" - ... ) - "1415 PARKER STREET" - """ - pass - - def get_street_from_address_line_1(address_line_1: str) -> str: """Given an address line 1, return the street name @@ -106,4 +25,22 @@ def get_street_from_address_line_1(address_line_1: str) -> str: ... ValueError: address_line_1 is PO Box """ - pass + + if not address_line_1 or address_line_1.isspace(): + raise ValueError("address_line_1 must have whitespace") + + address_line_lower = address_line_1.lower() + + if "po box" in address_line_lower: + raise ValueError("address_line_1 is PO Box") + + parts = address_line_1.split() + + string = [] + for i, part in enumerate(parts): + part_lower = part.lower() + if part.isdigit() or "." in part_lower: + continue + else: + string += [part] + return " ".join(string) From 6e86bfead8157794fc13fa0e191451cbb164c205 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 17 Jan 2024 06:00:32 +0000 Subject: [PATCH 008/214] updated work --- utils/linkage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 433656bb..e15421ca 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -43,4 +43,6 @@ def get_street_from_address_line_1(address_line_1: str) -> str: continue else: string += [part] - return " ".join(string) + + string = " ".join(string) + print('"{}"'.format(string)) From e2e898cf1a5e5c61f9880405505c0a137b9f3195 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 17 Jan 2024 15:47:15 +0000 Subject: [PATCH 009/214] slight edit in function --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index e15421ca..ccc5d32c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -37,7 +37,7 @@ def get_street_from_address_line_1(address_line_1: str) -> str: parts = address_line_1.split() string = [] - for i, part in enumerate(parts): + for part in enumerate(parts): part_lower = part.lower() if part.isdigit() or "." in part_lower: continue From c41428098d387aa222a592d3e5422b918b7cf87c Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 17 Jan 2024 15:51:39 +0000 Subject: [PATCH 010/214] slight edit in function loop --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index ccc5d32c..10711fb9 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -37,7 +37,7 @@ def get_street_from_address_line_1(address_line_1: str) -> str: parts = address_line_1.split() string = [] - for part in enumerate(parts): + for _, part in enumerate(parts): part_lower = part.lower() if part.isdigit() or "." in part_lower: continue From 7f9fd677e132c1aa5526ed820ffa9fdda06d00a0 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 17 Jan 2024 09:59:37 -0600 Subject: [PATCH 011/214] made case insensitive --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 2edfdeac..c7504a82 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -30,4 +30,4 @@ def calculate_string_similarity(string1: str, string2: str) -> float: True """ - return float(td.jaro_winkler(string1, string2)) + return float(td.jaro_winkler(string1.lower(), string2.lower())) From d1b52b3616b8d100f57eaba920b9483d5dce817e Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 17 Jan 2024 16:05:28 +0000 Subject: [PATCH 012/214] edited test case and function --- utils/linkage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 10711fb9..d7f69742 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -15,7 +15,7 @@ def get_street_from_address_line_1(address_line_1: str) -> str: found. >>> get_street_from_address_line_1("5645 N. UBER ST") - "UBER ST" + 'UBER ST' >>> get_street_from_address_line_1("") Traceback (most recent call last): ... @@ -37,12 +37,11 @@ def get_street_from_address_line_1(address_line_1: str) -> str: parts = address_line_1.split() string = [] - for _, part in enumerate(parts): + for part in parts: part_lower = part.lower() if part.isdigit() or "." in part_lower: continue else: string += [part] - string = " ".join(string) - print('"{}"'.format(string)) + return " ".join(string) From e16fd40c2e6edb0d4ed3e64b311196aec7e428bc Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 17 Jan 2024 16:18:25 +0000 Subject: [PATCH 013/214] slight changes in function --- utils/linkage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index d7f69742..6a37c405 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -38,8 +38,7 @@ def get_street_from_address_line_1(address_line_1: str) -> str: string = [] for part in parts: - part_lower = part.lower() - if part.isdigit() or "." in part_lower: + if part.isdigit() or "." in part: continue else: string += [part] From f1d6e83e35ffde5005779049ff8887902f2573ca Mon Sep 17 00:00:00 2001 From: npashilkar <102933200+npashilkar@users.noreply.github.com> Date: Wed, 17 Jan 2024 19:08:21 -0600 Subject: [PATCH 014/214] finished line 1 from full address function --- utils/linkage.py | 145 ++++++++++++++--------------------------------- 1 file changed, 43 insertions(+), 102 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index aa56307e..3c98a90e 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -2,108 +2,49 @@ Module for performing record linkage on state campaign finance dataset """ - -def calculate_string_similarity(string1: str, string2: str) -> float: - """Returns how similar two strings are on a scale of 0 to 1 - - The exact meaning of the metric is open, but the following must hold true: - 1. equivalent strings must return 1 - 2. strings with no similar characters must return 0 - 3. strings with higher intuitive similarity must return higher scores - - Args: - string1: any string - string2: any string - Returns: - similarity score - - Sample Usage: - >>> calculate_string_similarity("exact match", "exact match") - 1.0 - >>> calculate_string_similarity("aaaaaa", "bbbbbbbbbbb") - 0.0 - >>> similar_score = calculate_string_similarity("very similar", "vary similar") - >>> different_score = calculate_string_similarity("very similar", "very not close") - >>> similar_socre > different_score - True - """ - pass - - -def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: - """Given name related columns, return a person's likely name - - Given different formatting used accross states, errors in data entry - and missing data, it can be difficult to determine someone's actual - name. For example, some states have a last name column with values like - "Doe, Jane", where the person's first name appears to have been erroneously - included. - - Args: - first_name: raw value of first name column - last_name: raw value last name column - full_name: raw value of name or full_name column - Returns: - The most likely full name of the person listed - - Sample Usage: - >>> get_likely_name("Jane", "Doe", "") - "Jane Doe" - >>> get_likely_name("", "", "Jane Doe") - "Jane Doe" - >>> get_likely_name("", "Doe, Jane", "") - "Jane Doe" - >>> get_likely_name("Jane Doe", "Doe", "Jane Doe") - "Jane Doe" - """ - pass +import usaddress + + +def address_components(address_tuples): + add = [] + for value in address_tuples: + if value[1] == "PlaceName": + break # Stop when reaching 'PlaceName' + elif value[1] in ( + "AddressNumber", + "StreetNamePreDirectional", + "StreetName", + "StreetNamePostType", + "USPSBoxType", + "USPSBoxID", + ): + add.append(value[0]) + return " ".join(add) def get_address_line_1_from_full_address(address: str) -> str: - """Given a full address, return the first line of the formatted address - - Address line 1 usually includes street address or PO Box information. - - Args: - address: raw string representing full address - Returns: - address_line_1 - - Sample Usage: - >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") - "6727 W. Corrine Dr." - >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") - "P.O. Box 5456" - >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") - "119 S 5th St" - >>> get_address_line_1_from_full_address( - ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" - ... ) - "1415 PARKER STREET" - """ - pass - - -def get_street_from_address_line_1(address_line_1: str) -> str: - """Given an address line 1, return the street name - - Args: - address_line_1: either street information or PO box - Returns: - street name - Raises: - ValueError: if string is malformed and no street can be reasonably - found. - - >>> get_street_from_address_line_1("5645 N. UBER ST") - "UBER ST" - >>> get_street_from_address_line_1("") - Traceback (most recent call last): - ... - ValueError: address_line_1 must have whitespace - >>> get_street_from_address_line_1("PO Box 1111") - Traceback (most recent call last): - ... - ValueError: address_line_1 is PO Box - """ - pass + line1 = usaddress.parse(address) + return address_components(line1) + + # """Given a full address, return the first line of the formatted address + + # Address line 1 usually includes street address or PO Box information. + + # Args: + # address: raw string representing full address + # Returns: + # address_line_1 + + # Sample Usage: + # >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") + # "6727 W. Corrine Dr." + # >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") + # "P.O. Box 5456" + # >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") + # "119 S 5th St" + # >>> get_address_line_1_from_full_address( + # ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" + # ... ) + # "1415 PARKER STREET" + # """ + # pass From de741914a00d0024b0567f990860d0a7828dc853 Mon Sep 17 00:00:00 2001 From: npashilkar <102933200+npashilkar@users.noreply.github.com> Date: Wed, 17 Jan 2024 19:08:58 -0600 Subject: [PATCH 015/214] added usaddress library to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 6658f0ea..49c29ba0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ beautifulsoup4==4.11.1 numpy==1.25.0 Requests==2.31.0 setuptools==68.0.0 +usaddress==0.5.4 From 46a5c3b8d297598d561da2aaecc3d90493f8427e Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 18 Jan 2024 00:05:53 -0600 Subject: [PATCH 016/214] get_likely function done --- setup.py | 2 +- utils/linkage.py | 120 +++++++++++++++++------------------------------ 2 files changed, 44 insertions(+), 78 deletions(-) diff --git a/setup.py b/setup.py index 63ef672a..07404acd 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages, setup setup( - name="2023-fall-clinic-climate-cabinet", + name="2024-winter-clinic-climate-cabinet", version="0.1.0", packages=find_packages( include=[ diff --git a/utils/linkage.py b/utils/linkage.py index aa56307e..c3ddf1b7 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -3,33 +3,6 @@ """ -def calculate_string_similarity(string1: str, string2: str) -> float: - """Returns how similar two strings are on a scale of 0 to 1 - - The exact meaning of the metric is open, but the following must hold true: - 1. equivalent strings must return 1 - 2. strings with no similar characters must return 0 - 3. strings with higher intuitive similarity must return higher scores - - Args: - string1: any string - string2: any string - Returns: - similarity score - - Sample Usage: - >>> calculate_string_similarity("exact match", "exact match") - 1.0 - >>> calculate_string_similarity("aaaaaa", "bbbbbbbbbbb") - 0.0 - >>> similar_score = calculate_string_similarity("very similar", "vary similar") - >>> different_score = calculate_string_similarity("very similar", "very not close") - >>> similar_socre > different_score - True - """ - pass - - def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: """Given name related columns, return a person's likely name @@ -56,54 +29,47 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: >>> get_likely_name("Jane Doe", "Doe", "Jane Doe") "Jane Doe" """ - pass - - -def get_address_line_1_from_full_address(address: str) -> str: - """Given a full address, return the first line of the formatted address - - Address line 1 usually includes street address or PO Box information. - - Args: - address: raw string representing full address - Returns: - address_line_1 - - Sample Usage: - >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") - "6727 W. Corrine Dr." - >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") - "P.O. Box 5456" - >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") - "119 S 5th St" - >>> get_address_line_1_from_full_address( - ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" - ... ) - "1415 PARKER STREET" - """ - pass - -def get_street_from_address_line_1(address_line_1: str) -> str: - """Given an address line 1, return the street name - - Args: - address_line_1: either street information or PO box - Returns: - street name - Raises: - ValueError: if string is malformed and no street can be reasonably - found. - - >>> get_street_from_address_line_1("5645 N. UBER ST") - "UBER ST" - >>> get_street_from_address_line_1("") - Traceback (most recent call last): - ... - ValueError: address_line_1 must have whitespace - >>> get_street_from_address_line_1("PO Box 1111") - Traceback (most recent call last): - ... - ValueError: address_line_1 is PO Box - """ - pass + # if data is clean: + if first_name + " " + last_name == full_name: + return full_name + + # some names have titles or professions associated with the name. We need to + # remove those from the name. + titles = [ + "mr", + "ms", + "mrs", + "miss", + "prof", + "dr", + "doctor", + "sir", + "madam", + "professor", + ] + names = [first_name, last_name, full_name] + + for i in range(len(names)): + # if there is a ',' switch around the names + if "," in names[i]: + index = names[i].find(",") + first_part = names[i][index + 1 :] + last_part = names[i][0:index] + names[i] = first_part + " " + last_part + + names[i] = names[i].lower().replace(".", "").split(" ") + names[i] = [ + name_part for name_part in names[i] if name_part not in titles + ] + names[i] = " ".join(names[i]) + + names = " ".join(names) + names = names.split(" ") + final_name = [] + [ + final_name.append(x) + for x in names + if ((x not in final_name) & (len(x) != 0)) + ] + return " ".join(final_name) From 073c935e3861ebc12a086ae5bc01fee4acadc373 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 18 Jan 2024 00:15:34 -0600 Subject: [PATCH 017/214] added .title() function to return proper name format --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index c3ddf1b7..7461049b 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -72,4 +72,4 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: for x in names if ((x not in final_name) & (len(x) != 0)) ] - return " ".join(final_name) + return " ".join(final_name).title() From 16a51dc7ba80ecd3ecdc4653f8623c1b5a8fb9a1 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 18 Jan 2024 00:55:07 -0600 Subject: [PATCH 018/214] struggling with converting single quotes into double quotes for function output --- utils/linkage.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 7461049b..f695a0a9 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -67,9 +67,5 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: names = " ".join(names) names = names.split(" ") final_name = [] - [ - final_name.append(x) - for x in names - if ((x not in final_name) & (len(x) != 0)) - ] - return " ".join(final_name).title() + [final_name.append(x) for x in names if x not in final_name] + return " ".join(final_name).title().strip() From c446aaf79b5843a416d6951cc19a7f1554347f1a Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 20 Jan 2024 03:03:36 -0600 Subject: [PATCH 019/214] updates to get_likely_name function after feedback to consider generational suffixes and handle more edge cases --- utils/linkage.py | 64 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index f695a0a9..f43e09fd 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,47 @@ -""" -Module for performing record linkage on state campaign finance dataset -""" +def determine_comma_role(name: str) -> str: + """Given a string (someone's name), attempts to determine the role of the + comma in the name and where it ought to belong. + + Some assumptions are made: + * If a suffix is included in the name and the name is not just the last + name(i.e "Doe, Jr), the format is + (last_name suffix, first and middle name) i.e Doe iv, Jane Elisabeth + + * If a comma is used anywhere else, it is in the format of + (last_name, first and middle name) i.e Doe, Jane Elisabeth + + Args: + name: a string representing a name/names of individuals + Returns: + the name with or without a comma based on some conditions + """ + suffixes = [ + "sr", + "jr", + "i", + "ii", + "iii", + "iv", + "v", + "vi", + "vii", + "viii", + "ix", + "x", + ] + name_parts = name.split(",") + # if the comma is just in the end as a typo: + if len(name_parts[1]) == 0: + return name_parts[0] + # if just the suffix in the end, leave the name as it is + if name_parts[1].strip() in suffixes: + return name + # at this point either it's just poor name placement, or the suffix is + # in the beginning of the name. Either way, the first part of the list is the + # true last name. + last_part = name_parts.pop(0) + first_part = " ".join(name_parts) + return first_part + " " + last_part def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: @@ -29,6 +70,10 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: >>> get_likely_name("Jane Doe", "Doe", "Jane Doe") "Jane Doe" """ + # first ensure clean input by deleting spaces: + first_name, last_name, full_name = list( + map(lambda x: x.lower().strip(), [first_name, last_name, full_name]) + ) # if data is clean: if first_name + " " + last_name == full_name: @@ -51,20 +96,19 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: names = [first_name, last_name, full_name] for i in range(len(names)): - # if there is a ',' switch around the names + # if there is a ',' deal with it accordingly if "," in names[i]: - index = names[i].find(",") - first_part = names[i][index + 1 :] - last_part = names[i][0:index] - names[i] = first_part + " " + last_part - - names[i] = names[i].lower().replace(".", "").split(" ") + names[i] = determine_comma_role(names[i]) + print(names[i]) + names[i] = names[i].replace(".", "").split(" ") names[i] = [ name_part for name_part in names[i] if name_part not in titles ] names[i] = " ".join(names[i]) + print(names[i]) names = " ".join(names) + print("after comma: ", names) names = names.split(" ") final_name = [] [final_name.append(x) for x in names if x not in final_name] From efc02e22ebc298095c2abf7a4adcd13db02b2a2d Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 20 Jan 2024 03:11:05 -0600 Subject: [PATCH 020/214] adjusted the sample usage output to single quotes as per Avery's suggestion --- utils/linkage.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index f43e09fd..9b9ba220 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -62,13 +62,19 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: Sample Usage: >>> get_likely_name("Jane", "Doe", "") - "Jane Doe" + 'Jane Doe' >>> get_likely_name("", "", "Jane Doe") - "Jane Doe" + 'Jane Doe' >>> get_likely_name("", "Doe, Jane", "") - "Jane Doe" + 'Jane Doe' >>> get_likely_name("Jane Doe", "Doe", "Jane Doe") - "Jane Doe" + 'Jane Doe' + >>> get_likely_name("Jane","","Doe, Sr") + 'Jane Doe, Sr' + >>> get_likely_name("Jane Elisabeth Doe, IV","Elisabeth","Doe, IV) + 'Jane Elisabeth Doe, Iv' + >>> get_likely_name("","",Jane Elisabeth Doe, IV") + 'Jane Elisabeth Doe Iv' """ # first ensure clean input by deleting spaces: first_name, last_name, full_name = list( @@ -99,16 +105,14 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: # if there is a ',' deal with it accordingly if "," in names[i]: names[i] = determine_comma_role(names[i]) - print(names[i]) + names[i] = names[i].replace(".", "").split(" ") names[i] = [ name_part for name_part in names[i] if name_part not in titles ] names[i] = " ".join(names[i]) - print(names[i]) names = " ".join(names) - print("after comma: ", names) names = names.split(" ") final_name = [] [final_name.append(x) for x in names if x not in final_name] From 6c37c4576c39ec2d1ac6856c036ed6dceef6c628 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 20 Jan 2024 03:25:32 -0600 Subject: [PATCH 021/214] took care of empty strings that were adding extra whitespace to o output --- utils/linkage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 9b9ba220..521c75c5 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -111,7 +111,9 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: name_part for name_part in names[i] if name_part not in titles ] names[i] = " ".join(names[i]) - + + #one last check to remove any pieces that might add extra whitespace + names = list(filter(lambda x: x != '', names)) names = " ".join(names) names = names.split(" ") final_name = [] From 81e52dbdb537e4ee6caae02462c49ba7a2ef1d1a Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 20 Jan 2024 03:27:12 -0600 Subject: [PATCH 022/214] took care of empty strings that were adding extra whitespace to output --- utils/linkage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 521c75c5..4c1d24ff 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -111,9 +111,9 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: name_part for name_part in names[i] if name_part not in titles ] names[i] = " ".join(names[i]) - - #one last check to remove any pieces that might add extra whitespace - names = list(filter(lambda x: x != '', names)) + + # one last check to remove any pieces that might add extra whitespace + names = list(filter(lambda x: x != "", names)) names = " ".join(names) names = names.split(" ") final_name = [] From 2dcb7d9592be19be15e688101509a25581848dcc Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 20 Jan 2024 03:30:06 -0600 Subject: [PATCH 023/214] fixed error in sample usage output --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 4c1d24ff..df15117d 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -71,7 +71,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: 'Jane Doe' >>> get_likely_name("Jane","","Doe, Sr") 'Jane Doe, Sr' - >>> get_likely_name("Jane Elisabeth Doe, IV","Elisabeth","Doe, IV) + >>> get_likely_name("Jane Elisabeth Doe, IV","Elisabeth","Doe, IV") 'Jane Elisabeth Doe, Iv' >>> get_likely_name("","",Jane Elisabeth Doe, IV") 'Jane Elisabeth Doe Iv' From a51c33777d06cf5aca0097ed456ad5d908771b6d Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 23 Jan 2024 20:37:46 -0600 Subject: [PATCH 024/214] added explanation of jaro-winkler and reversed strings --- utils/linkage.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index c7504a82..b7b95ef5 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -8,6 +8,16 @@ def calculate_string_similarity(string1: str, string2: str) -> float: """Returns how similar two strings are on a scale of 0 to 1 + This version utilizes Jaro-Winkler distance, which is a metric of + edit distance. Jaro-Winkler specially prioritizes the early + characters in a string. + + Since the ends of strings are often more valuable in matching names + and addresses, we reverse the strings before matching them. + + https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance + https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro-winkler.js + The exact meaning of the metric is open, but the following must hold true: 1. equivalent strings must return 1 2. strings with no similar characters must return 0 @@ -30,4 +40,4 @@ def calculate_string_similarity(string1: str, string2: str) -> float: True """ - return float(td.jaro_winkler(string1.lower(), string2.lower())) + return float(td.jaro_winkler(string1.lower()[::-1], string2.lower()[::-1])) From 98a10586a4cb66d8a239e12015f1c5636245696d Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 23 Jan 2024 20:50:24 -0600 Subject: [PATCH 025/214] fixing linter error --- utils/linkage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index b7b95ef5..568125df 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -8,9 +8,9 @@ def calculate_string_similarity(string1: str, string2: str) -> float: """Returns how similar two strings are on a scale of 0 to 1 - This version utilizes Jaro-Winkler distance, which is a metric of - edit distance. Jaro-Winkler specially prioritizes the early - characters in a string. + This version utilizes Jaro-Winkler distance, which is a metric of + edit distance. Jaro-Winkler specially prioritizes the early + characters in a string. Since the ends of strings are often more valuable in matching names and addresses, we reverse the strings before matching them. From d711a26d05bb2793f74c7d356b96688a03911876 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 24 Jan 2024 03:49:30 +0000 Subject: [PATCH 026/214] adding usaddress to requirements.txt file --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 6658f0ea..49c29ba0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ beautifulsoup4==4.11.1 numpy==1.25.0 Requests==2.31.0 setuptools==68.0.0 +usaddress==0.5.4 From 0ddd8b0abe76f01f8ac7bff25c0dd9aa6fe2906c Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 24 Jan 2024 05:07:18 +0000 Subject: [PATCH 027/214] updated function with additional test cases --- utils/linkage.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 6a37c405..fe4dfd3c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,7 @@ """ Module for performing record linkage on state campaign finance dataset """ +import usaddress def get_street_from_address_line_1(address_line_1: str) -> str: @@ -24,8 +25,13 @@ def get_street_from_address_line_1(address_line_1: str) -> str: Traceback (most recent call last): ... ValueError: address_line_1 is PO Box + >>> get_street_from_address_line_1("300 59 St.") + '59 St.' + >>> get_street_from_address_line_1("Uber St.") + 'Uber St.' + >>> get_street_from_address_line_1("3NW 59th St") + '59th St' """ - if not address_line_1 or address_line_1.isspace(): raise ValueError("address_line_1 must have whitespace") @@ -34,13 +40,10 @@ def get_street_from_address_line_1(address_line_1: str) -> str: if "po box" in address_line_lower: raise ValueError("address_line_1 is PO Box") - parts = address_line_1.split() - string = [] - for part in parts: - if part.isdigit() or "." in part: - continue - else: - string += [part] + address = usaddress.parse(address_line_1) + for key, val in address: + if val in ["StreetName", "StreetNamePostType"]: + string.append(key) return " ".join(string) From 6f882ddf9735c86a379348d760ba026ef98aee73 Mon Sep 17 00:00:00 2001 From: Avery Date: Wed, 24 Jan 2024 09:54:40 -0600 Subject: [PATCH 028/214] fix for linter --- utils/linkage.py | 2 +- utils/pipeline.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index b738998d..44f24e59 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -5,6 +5,7 @@ Module for performing record linkage on state campaign finance dataset """ + def calculate_string_similarity(string1: str, string2: str) -> float: """Returns how similar two strings are on a scale of 0 to 1 @@ -86,4 +87,3 @@ def get_street_from_address_line_1(address_line_1: str) -> str: string.append(key) return " ".join(string) - diff --git a/utils/pipeline.py b/utils/pipeline.py index 7a288fd4..e6b7a120 100644 --- a/utils/pipeline.py +++ b/utils/pipeline.py @@ -18,6 +18,7 @@ single_state_organizations_tables = [] single_state_transactions_tables = [] for state_cleaner in state_cleaners: + print("Cleaning...") ( individuals_table, organizations_table, From c8cdcaf10a9121fee065e67b138cf6c923ebb664 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 18:20:31 -0600 Subject: [PATCH 029/214] added row similarity and row match functions. these do not yet have tests --- utils/linkage.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/utils/linkage.py b/utils/linkage.py index aa56307e..ed6a8264 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,8 @@ """ Module for performing record linkage on state campaign finance dataset """ +import numpy as np +import pandas as pd def calculate_string_similarity(string1: str, string2: str) -> float: @@ -107,3 +109,68 @@ def get_street_from_address_line_1(address_line_1: str) -> str: ValueError: address_line_1 is PO Box """ pass + + +def calculate_row_similarity( + row1: pd.DataFrame, row2: pd.DataFrame, weights: np.array, comparison_func +) -> float: + """Find weighted similarity of two rows in a dataframe + + The length of the weights vector must be the same as + the number of selected columns. + + This version is slow and not optimized, and will be + revised in order to make it more efficient. It + exists as to provide basic functionality. Once we have + the comparison function locked in, using .apply will + likely be easier and more efficient. + """ + + row_length = len(weights) + if not (row1.shape[1] == row2.shape[1] == row_length): + raise ValueError("Number of columns and weights must be the same") + + similarity = np.zeros(row_length) + + for i in range(row_length): + similarity[i] = comparison_func(row1.iloc[:, i], row2.iloc[:, i]) + + return sum(similarity * weights) + + +def row_matches(df: pd.DataFrame, weights: np.array, threshold: float, comparison_func) -> dict: + """Get weighted similarity score of two rows + + Run through the rows using indices: if two rows have a comparison score + greater than a threshold, we assign the later row to the former. Any + row which is matched to any other row is not examined again. Matches are + stored in a dictionary object, with each index appearing no more than once. + + This is not optimized + """ + + all_indices = np.array(list(df.index)) + + index_dict = {} + [index_dict.setdefault(x, []) for x in all_indices] + + discard_indices = [] + + end = max(all_indices) + for i in all_indices: + # Skip indices that have been stored in the discard_indices list + if i in discard_indices: + continue + + # Iterate through the remaining numbers + for j in range(i + 1, end): + if j in discard_indices: + continue + + # Our conditional + if calculate_row_similarity(df.iloc[[i]], df.iloc[[j]], weights, comparison_func) > threshold: + # Store the other index and mark it for skipping in future iterations + discard_indices.append(j) + index_dict[i].append[j] + + return index_dict From edfda3e48ae6a5d147b232e50aba883acc23a25b Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 19:38:01 -0600 Subject: [PATCH 030/214] fixing linter error --- utils/linkage.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index ed6a8264..ee856a84 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -138,7 +138,9 @@ def calculate_row_similarity( return sum(similarity * weights) -def row_matches(df: pd.DataFrame, weights: np.array, threshold: float, comparison_func) -> dict: +def row_matches( + df: pd.DataFrame, weights: np.array, threshold: float, comparison_func +) -> dict: """Get weighted similarity score of two rows Run through the rows using indices: if two rows have a comparison score @@ -168,7 +170,12 @@ def row_matches(df: pd.DataFrame, weights: np.array, threshold: float, compariso continue # Our conditional - if calculate_row_similarity(df.iloc[[i]], df.iloc[[j]], weights, comparison_func) > threshold: + if ( + calculate_row_similarity( + df.iloc[[i]], df.iloc[[j]], weights, comparison_func + ) + > threshold + ): # Store the other index and mark it for skipping in future iterations discard_indices.append(j) index_dict[i].append[j] From fa78221415d1031c19b3bade004f1fb1952ad753 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 19:53:07 -0600 Subject: [PATCH 031/214] fixing linter errors --- utils/linkage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 90feb034..a7bce17a 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,5 +1,6 @@ import textdistance as td -import usaddress + +# import usaddress """ Module for performing record linkage on state campaign finance dataset From 427d5b6da5d18d54fc802372aed43bf6114abee6 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 19:57:15 -0600 Subject: [PATCH 032/214] fixing pytest errors --- utils/linkage.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index a7bce17a..a56cea60 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -47,31 +47,6 @@ def calculate_string_similarity(string1: str, string2: str) -> float: return float(td.jaro_winkler(string1.lower()[::-1], string2.lower()[::-1])) -def get_street_from_address_line_1(address_line_1: str) -> str: - """Given an address line 1, return the street name - - Args: - address_line_1: either street information or PO box - Returns: - street name - Raises: - ValueError: if string is malformed and no street can be reasonably - found. - - >>> get_street_from_address_line_1("5645 N. UBER ST") - 'UBER ST' - >>> get_street_from_address_line_1("") - Traceback (most recent call last): - ... - ValueError: address_line_1 must have whitespace - >>> get_street_from_address_line_1("PO Box 1111") - Traceback (most recent call last): - ... - ValueError: address_line_1 is PO Box - """ - pass - - def calculate_row_similarity( row1: pd.DataFrame, row2: pd.DataFrame, weights: np.array, comparison_func ) -> float: From 236bfc75d7b18723a054800e7414b3e8af06dd91 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 20:31:25 -0600 Subject: [PATCH 033/214] revised and added test case for calculate_row_similarity --- utils/linkage.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index a56cea60..66e99138 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -60,6 +60,23 @@ def calculate_row_similarity( exists as to provide basic functionality. Once we have the comparison function locked in, using .apply will likely be easier and more efficient. + + >>> d = {'name': ["bob von rosevich", "anantarya smith", "bob j + vonrosevich"],'address': ["3 Circle Drive, Chicago, Illinois", + "4 Circle Drive, Chicago, Illinois", "8 Fancy Way, Chicago, Illinois"]} + >>> df = pd.DataFrame(data = d) + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + np.array([.8, .2]), calculate_string_similarity) + >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], + np.array([.8, .2]), calculate_string_similarity) + >>> right > wrong + True + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + np.array([.2, .8]), calculate_string_similarity) + >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], + np.array([.2, .8]), calculate_string_similarity) + >>> right > wrong + False """ row_length = len(weights) @@ -69,7 +86,10 @@ def calculate_row_similarity( similarity = np.zeros(row_length) for i in range(row_length): - similarity[i] = comparison_func(row1.iloc[:, i], row2.iloc[:, i]) + similarity[i] = comparison_func( + row1.reset_index().drop(columns="index").iloc[:, i][0], + row2.reset_index().drop(columns="index").iloc[:, i][0], + ) return sum(similarity * weights) @@ -84,7 +104,8 @@ def row_matches( row which is matched to any other row is not examined again. Matches are stored in a dictionary object, with each index appearing no more than once. - This is not optimized + This is not optimized. Not presently sure how to make a good test case + for this, will submit and ask in mentor session. """ all_indices = np.array(list(df.index)) From f3e3b228c9cfaba62ced82a48230edb15ea2a13e Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 20:35:04 -0600 Subject: [PATCH 034/214] fixing pytest string error --- utils/linkage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 66e99138..1fb6df78 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -61,8 +61,8 @@ def calculate_row_similarity( the comparison function locked in, using .apply will likely be easier and more efficient. - >>> d = {'name': ["bob von rosevich", "anantarya smith", "bob j - vonrosevich"],'address': ["3 Circle Drive, Chicago, Illinois", + >>> d = {'name': ["bob von rosevich", "anantarya smith", + "bob j vonrosevich"],'address': ["3 Circle Drive, Chicago, Illinois", "4 Circle Drive, Chicago, Illinois", "8 Fancy Way, Chicago, Illinois"]} >>> df = pd.DataFrame(data = d) >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], From 26b9095677b3dedab6f8ddad9d1e6aa5c2ea2514 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 24 Jan 2024 20:39:10 -0600 Subject: [PATCH 035/214] fixed some more pytest errors --- utils/linkage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 1fb6df78..a15eaacb 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -61,9 +61,10 @@ def calculate_row_similarity( the comparison function locked in, using .apply will likely be easier and more efficient. - >>> d = {'name': ["bob von rosevich", "anantarya smith", - "bob j vonrosevich"],'address': ["3 Circle Drive, Chicago, Illinois", - "4 Circle Drive, Chicago, Illinois", "8 Fancy Way, Chicago, Illinois"]} + >>> d = {'name': + ["bob von rosevich", "anantarya smith","bob j vonrosevich"], + 'address': + ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", "8 Fancy Way, Chicago"]} >>> df = pd.DataFrame(data = d) >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], np.array([.8, .2]), calculate_string_similarity) From 20f4e938e09fa98d1f5acddf7e6eee5c8c2684b5 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 25 Jan 2024 05:16:07 +0000 Subject: [PATCH 036/214] adding cleaning_company_column function --- utils/linkage.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/utils/linkage.py b/utils/linkage.py index fe4dfd3c..86485d3b 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,7 @@ """ Module for performing record linkage on state campaign finance dataset """ +import pandas as pd import usaddress @@ -47,3 +48,54 @@ def get_street_from_address_line_1(address_line_1: str) -> str: string.append(key) return " ".join(string) + + +""" +Module for standardizing the 'company' columnn of the state campaign finance dataset +""" + + +def cleaning_company_column(company: str) -> str: + """ + Given a string, check if it contains a variation of self employed, unemployed, + or retired and return the standardized version. + + Args: + company: string of inputted company names + Returns: + standardized for retired, self employed, and unemployed, + or original string if no match or empty string + + >>> cleaning_company_column("Retireed") + 'Retired' + >>> cleaning_company_column("self") + 'Self Employed' + >>> cleaning_company_column("None") + 'Unemployed' + """ + if pd.isnull(company): + return company + + company_edited = company.lower() + company_edited = company_edited.strip() + company_edited = company_edited.replace(".", " ") + company_edited = company_edited.replace(",", " ") + company_edited = company_edited.replace("-", " ") + + if "retire" in company_edited: + return "Retired" + elif "self employe" in company_edited or company_edited == "self": + return "Self Employed" + elif ( + "unemploye" in company_edited + or company_edited == "none" + or company_edited == "not employed" + ): + return "Unemployed" + + else: + return company + + +# Example implementation of the function standardize_company_column for a dataframe +# df['standardized_company'] = df['company'].apply(standardize_company_column) From 7e7fba38657e8521956b88d8fbb60f3415a29f38 Mon Sep 17 00:00:00 2001 From: npashilkar <102933200+npashilkar@users.noreply.github.com> Date: Thu, 25 Jan 2024 18:53:39 -0600 Subject: [PATCH 037/214] Updated linkage.py updated variable names + additional description --- utils/linkage.py | 72 +++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 3c98a90e..89a3cf1e 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,16 +1,39 @@ """ Module for performing record linkage on state campaign finance dataset """ - import usaddress - -def address_components(address_tuples): - add = [] - for value in address_tuples: - if value[1] == "PlaceName": - break # Stop when reaching 'PlaceName' - elif value[1] in ( +def get_address_line_1_from_full_address(address: str) -> str: + + """Given a full address, return the first line of the formatted address + + Address line 1 usually includes street address or PO Box information. + + Args: + address: raw string representing full address + Returns: + address_line_1 + + Sample Usage: + >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") + "6727 W. Corrine Dr." + >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") + "P.O. Box 5456" + >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") + "119 S 5th St" + >>> get_address_line_1_from_full_address( + ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" + ... ) + "1415 PARKER STREET" + """ + pass + + address_tuples = usaddress.parse(address) #takes a string address and put them into value,key pairs as tuples + line1_components = [] + for value,key in address_tuples: + if key == "PlaceName": + break + elif key in ( "AddressNumber", "StreetNamePreDirectional", "StreetName", @@ -18,33 +41,6 @@ def address_components(address_tuples): "USPSBoxType", "USPSBoxID", ): - add.append(value[0]) - return " ".join(add) - - -def get_address_line_1_from_full_address(address: str) -> str: - line1 = usaddress.parse(address) - return address_components(line1) - - # """Given a full address, return the first line of the formatted address - - # Address line 1 usually includes street address or PO Box information. - - # Args: - # address: raw string representing full address - # Returns: - # address_line_1 - - # Sample Usage: - # >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") - # "6727 W. Corrine Dr." - # >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") - # "P.O. Box 5456" - # >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") - # "119 S 5th St" - # >>> get_address_line_1_from_full_address( - # ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" - # ... ) - # "1415 PARKER STREET" - # """ - # pass + line1_components.append(value) + line1 = " ".join(line1_components) + return line1 From 87be7e2a711687ee9b000cc9a71cc58e1c3e5880 Mon Sep 17 00:00:00 2001 From: Avery Date: Fri, 26 Jan 2024 14:41:58 -0600 Subject: [PATCH 038/214] run precommit --- utils/linkage.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 8b619d21..2bd15653 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -5,8 +5,8 @@ Module for performing record linkage on state campaign finance dataset """ + def get_address_line_1_from_full_address(address: str) -> str: - """Given a full address, return the first line of the formatted address Address line 1 usually includes street address or PO Box information. @@ -30,11 +30,13 @@ def get_address_line_1_from_full_address(address: str) -> str: """ pass - address_tuples = usaddress.parse(address) #takes a string address and put them into value,key pairs as tuples + address_tuples = usaddress.parse( + address + ) # takes a string address and put them into value,key pairs as tuples line1_components = [] - for value,key in address_tuples: + for value, key in address_tuples: if key == "PlaceName": - break + break elif key in ( "AddressNumber", "StreetNamePreDirectional", @@ -48,7 +50,6 @@ def get_address_line_1_from_full_address(address: str) -> str: return line1 - def calculate_string_similarity(string1: str, string2: str) -> float: """Returns how similar two strings are on a scale of 0 to 1 @@ -129,4 +130,4 @@ def get_street_from_address_line_1(address_line_1: str) -> str: if val in ["StreetName", "StreetNamePostType"]: string.append(key) - return " ".join(string) \ No newline at end of file + return " ".join(string) From baf56f5707c31b222f97322c2b244892982873a5 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 29 Jan 2024 10:26:49 -0600 Subject: [PATCH 039/214] testing if merge was done correctly after git pull --- requirements.txt | 2 ++ utils/linkage.py | 86 +++++++++++++++++++++++++++++++++++++++++++++-- utils/pipeline.py | 1 + 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6658f0ea..944e1c53 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,5 @@ beautifulsoup4==4.11.1 numpy==1.25.0 Requests==2.31.0 setuptools==68.0.0 +textdistance==4.6.1 +usaddress==0.5.4 \ No newline at end of file diff --git a/utils/linkage.py b/utils/linkage.py index df15117d..e88a4a37 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,3 +1,41 @@ +""" +Module for performing record linkage on state campaign finance dataset +""" +import textdistance as td +import usaddress + +def calculate_string_similarity(string1: str, string2: str) -> float: + """Returns how similar two strings are on a scale of 0 to 1 + + This version utilizes Jaro-Winkler distance, which is a metric of + edit distance. Jaro-Winkler specially prioritizes the early + characters in a string. + + Since the ends of strings are often more valuable in matching names + and addresses, we reverse the strings before matching them. + + https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance + https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro-winkler.js + + The exact meaning of the metric is open, but the following must hold true: + 1. equivalent strings must return 1 + 2. strings with no similar characters must return 0 + 3. strings with higher intuitive similarity must return higher scores + similarity score + + Sample Usage: + >>> calculate_string_similarity("exact match", "exact match") + 1.0 + >>> calculate_string_similarity("aaaaaa", "bbbbbbbbbbb") + 0.0 + >>> similar_score = calculate_string_similarity("very similar", "vary similar") + >>> different_score = calculate_string_similarity("very similar", "very not close") + >>> similar_score > different_score + True + """ + + return float(td.jaro_winkler(string1.lower()[::-1], string2.lower()[::-1])) + def determine_comma_role(name: str) -> str: """Given a string (someone's name), attempts to determine the role of the comma in the name and where it ought to belong. @@ -9,7 +47,6 @@ def determine_comma_role(name: str) -> str: * If a comma is used anywhere else, it is in the format of (last_name, first and middle name) i.e Doe, Jane Elisabeth - Args: name: a string representing a name/names of individuals Returns: @@ -73,7 +110,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: 'Jane Doe, Sr' >>> get_likely_name("Jane Elisabeth Doe, IV","Elisabeth","Doe, IV") 'Jane Elisabeth Doe, Iv' - >>> get_likely_name("","",Jane Elisabeth Doe, IV") + >>> get_likely_name("","","Jane Elisabeth Doe, IV") 'Jane Elisabeth Doe Iv' """ # first ensure clean input by deleting spaces: @@ -119,3 +156,48 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: final_name = [] [final_name.append(x) for x in names if x not in final_name] return " ".join(final_name).title().strip() + + +def get_street_from_address_line_1(address_line_1: str) -> str: + """Given an address line 1, return the street name + + Args: + address_line_1: either street information or PO box + Returns: + street name + Raises: + ValueError: if string is malformed and no street can be reasonably + found. + + >>> get_street_from_address_line_1("5645 N. UBER ST") + 'UBER ST' + >>> get_street_from_address_line_1("") + Traceback (most recent call last): + ... + ValueError: address_line_1 must have whitespace + >>> get_street_from_address_line_1("PO Box 1111") + Traceback (most recent call last): + ... + ValueError: address_line_1 is PO Box + >>> get_street_from_address_line_1("300 59 St.") + '59 St.' + >>> get_street_from_address_line_1("Uber St.") + 'Uber St.' + >>> get_street_from_address_line_1("3NW 59th St") + '59th St' + """ + if not address_line_1 or address_line_1.isspace(): + raise ValueError("address_line_1 must have whitespace") + + address_line_lower = address_line_1.lower() + + if "po box" in address_line_lower: + raise ValueError("address_line_1 is PO Box") + + string = [] + address = usaddress.parse(address_line_1) + for key, val in address: + if val in ["StreetName", "StreetNamePostType"]: + string.append(key) + + return " ".join(string) diff --git a/utils/pipeline.py b/utils/pipeline.py index 7a288fd4..e6b7a120 100644 --- a/utils/pipeline.py +++ b/utils/pipeline.py @@ -18,6 +18,7 @@ single_state_organizations_tables = [] single_state_transactions_tables = [] for state_cleaner in state_cleaners: + print("Cleaning...") ( individuals_table, organizations_table, From 6e5f656c802eb33675bfd2bc8cd323a46326af7e Mon Sep 17 00:00:00 2001 From: npashilkar Date: Mon, 29 Jan 2024 11:01:55 -0600 Subject: [PATCH 040/214] updated sample usage --- utils/linkage.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 2bd15653..d013bfd7 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -17,16 +17,16 @@ def get_address_line_1_from_full_address(address: str) -> str: address_line_1 Sample Usage: - >>> get_address_line_1_from_full_address("6727 W. Corrine Dr. Peoria,AZ 85381") - "6727 W. Corrine Dr." - >>> get_address_line_1_from_full_address("P.O. Box 5456 Sun City West ,AZ 85375") - "P.O. Box 5456" - >>> get_address_line_1_from_full_address("119 S 5th St Niles,MI 49120") - "119 S 5th St" + >>> get_address_line_1_from_full_address('6727 W. Corrine Dr. Peoria,AZ 85381') + '6727 W. Corrine Dr.' + >>> get_address_line_1_from_full_address('P.O. Box 5456 Sun City West ,AZ 85375') + 'P.O. Box 5456' + >>> get_address_line_1_from_full_address('119 S 5th St Niles,MI 49120') + '119 S 5th St' >>> get_address_line_1_from_full_address( - ... "1415 PARKER STREET APT 251 DETROIT MI 48214-0000" + ... '1415 PARKER STREET APT 251 DETROIT MI 48214-0000' ... ) - "1415 PARKER STREET" + '1415 PARKER STREET' """ pass From 3d6500cfb5ef60aa3a745c593802cb605f840800 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 29 Jan 2024 11:17:58 -0600 Subject: [PATCH 041/214] undoing the mistake of previous commit where I committed files from the data and output directories --- utils/linkage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 1500dc08..e419c6f5 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -4,6 +4,7 @@ import textdistance as td import usaddress + def calculate_string_similarity(string1: str, string2: str) -> float: """Returns how similar two strings are on a scale of 0 to 1 @@ -75,8 +76,8 @@ def determine_comma_role(name: str) -> str: if name_parts[1].strip() in suffixes: return name # at this point either it's just poor name placement, or the suffix is - # in the beginning of the name. Either way, the first part of the list is the - # true last name. + # in the beginning of the name. Either way, the first part of the list is + # the true last name. last_part = name_parts.pop(0) first_part = " ".join(name_parts) return first_part + " " + last_part @@ -157,7 +158,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: final_name = [] [final_name.append(x) for x in names if x not in final_name] return " ".join(final_name).title().strip() - + def get_street_from_address_line_1(address_line_1: str) -> str: """Given an address line 1, return the street name From 90872c091017a5d60704966cab579f08ef351673 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 29 Jan 2024 11:30:24 -0600 Subject: [PATCH 042/214] fixing last linter error for function sample output --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index e419c6f5..2ec26e04 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -113,7 +113,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: >>> get_likely_name("Jane Elisabeth Doe, IV","Elisabeth","Doe, IV") 'Jane Elisabeth Doe, Iv' >>> get_likely_name("","","Jane Elisabeth Doe, IV") - 'Jane Elisabeth Doe Iv' + 'Jane Elisabeth Doe, Iv' """ # first ensure clean input by deleting spaces: first_name, last_name, full_name = list( From ca8b3f7aa83262e8c8de1064d962a2f19f16da86 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Mon, 29 Jan 2024 23:21:27 -0600 Subject: [PATCH 043/214] standardizing corporate names function --- utils/linkage.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/utils/linkage.py b/utils/linkage.py index 44f24e59..f99ab5a0 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -87,3 +87,69 @@ def get_street_from_address_line_1(address_line_1: str) -> str: string.append(key) return " ".join(string) + + +def standardize_corp_names(company_name: str) -> str: + """Given an employer name, return the standardized version + + Args: + company_name: corporate name + Returns: + standardized company name + + >>>standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC') + 'MI BEER WINE WHOLESALERS ASSOCIATION' + + >>>standardize_corp_names('MI COMMUNITY COLLEGE ASSOCIATION') + 'MI COMMUNITY COLLEGE ASSOCIATION' + + >>>standardize_corp_names('STEPHANIES CHANGEMAKER FUND') + 'STEPHANIES CHANGEMAKER FUND' + + """ + + + company_name_split = company_name.upper().split(' ') + + company_types = { + 'CORP': 'CORPORATION', + 'CO': 'CORPORATION', + 'LLC': 'LIMITED LIABILITY COMPANY', + 'PTNR': 'PARTNERSHIP', + 'LP': 'LIMITED PARTNERSHIP', + 'LLP': 'LIMITED LIABILITY PARTNERSHIP', + 'SOLE PROP': 'SOLE PROPRIETORSHIP', + 'SP': 'SOLE PROPRIETORSHIP', + 'NPO': 'NONPROFIT ORGANIZATION', + 'PC': 'PROFESSIONAL CORPORATION', + 'CO-OP': 'COOPERATIVE', + 'LTD': 'LIMITED COMPANY', + 'JSC': 'JOINT STOCK COMPANY', + 'HOLDCO': 'HOLDING COMPANY', + 'PLC': 'PUBLIC LIMITED COMPANY', + 'PVT LTD': 'PRIVATE LIMITED COMPANY', + 'INC': 'INCORPORATED', + 'ASSOC': 'ASSOCIATION', + 'FDN': 'FOUNDATION', + 'TR': 'TRUST', + 'SOC': 'SOCIETY', + 'CONSORT': 'CONSORTIUM', + 'SYND': 'SYNDICATE', + 'GRP': 'GROUP', + 'CORP SOLE': 'CORPORATION SOLE', + 'JV': 'JOINT VENTURE', + 'SUB': 'SUBSIDIARY', + 'FRANCHISE': 'FRANCHISE', + 'PA': 'PROFESSIONAL ASSOCIATION', + 'CIC': 'COMMUNITY INTEREST COMPANY', + + 'PAC': 'POLITICAL ACTION COMMITTEE' +} + + for i in range(len(company_name_split)): + if company_name_split[i] in list(company_types.keys()): + hold = company_name_split[i] + company_name_split[i] = company_types[hold] + + new_company_name = ' '.join(company_name_split) + return new_company_name \ No newline at end of file From d3d3ebf8ebad7ac6c0979da3c54d8bd48181024d Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 11:20:37 -0600 Subject: [PATCH 044/214] fixing row similarity test syntax --- utils/linkage.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index a15eaacb..d9a131b9 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -61,21 +61,21 @@ def calculate_row_similarity( the comparison function locked in, using .apply will likely be easier and more efficient. - >>> d = {'name': - ["bob von rosevich", "anantarya smith","bob j vonrosevich"], - 'address': - ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", "8 Fancy Way, Chicago"]} + >>> d = { + ... 'name':["bob von rosevich", "anantarya smith","bob j vonrosevich"], + ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, + ... Chicago", "8 Fancy Way, Chicago"]} >>> df = pd.DataFrame(data = d) >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - np.array([.8, .2]), calculate_string_similarity) + ... np.array([.8, .2]), calculate_string_similarity) >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - np.array([.8, .2]), calculate_string_similarity) + ... np.array([.8, .2]), calculate_string_similarity) >>> right > wrong True >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - np.array([.2, .8]), calculate_string_similarity) + ... np.array([.2, .8]), calculate_string_similarity) >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - np.array([.2, .8]), calculate_string_similarity) + ... np.array([.2, .8]), calculate_string_similarity) >>> right > wrong False """ From 85021df354bcb29ae2d4343a02fa4532a3f2d80c Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 11:25:11 -0600 Subject: [PATCH 045/214] adding backspaces to fix string literals --- utils/linkage.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index d9a131b9..1880affe 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -61,21 +61,21 @@ def calculate_row_similarity( the comparison function locked in, using .apply will likely be easier and more efficient. - >>> d = { - ... 'name':["bob von rosevich", "anantarya smith","bob j vonrosevich"], - ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, + >>> d = {\ + ... 'name':["bob von rosevich", "anantarya smith","bob j vonrosevich"],\ + ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive,\ ... Chicago", "8 Fancy Way, Chicago"]} >>> df = pd.DataFrame(data = d) - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]],\ ... np.array([.8, .2]), calculate_string_similarity) - >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], + >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]],\ ... np.array([.8, .2]), calculate_string_similarity) >>> right > wrong True - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]],\ ... np.array([.2, .8]), calculate_string_similarity) >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.2, .8]), calculate_string_similarity) + ... np.array([.2, .8]), calculate_string_similarity)\ >>> right > wrong False """ From 44b89b3c27da1d67bd5d684f69b8c3924ef62b7b Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 11:34:10 -0600 Subject: [PATCH 046/214] fixing typos --- utils/linkage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 1880affe..16aeb27e 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -63,8 +63,8 @@ def calculate_row_similarity( >>> d = {\ ... 'name':["bob von rosevich", "anantarya smith","bob j vonrosevich"],\ - ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive,\ - ... Chicago", "8 Fancy Way, Chicago"]} + ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive,Chicago",\ + ... "8 Fancy Way, Chicago"]} >>> df = pd.DataFrame(data = d) >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]],\ ... np.array([.8, .2]), calculate_string_similarity) From 1a39b72885c80115d8d5d2edf7331bf21e220a10 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 15:50:37 -0600 Subject: [PATCH 047/214] trying out chatpgts recommendation to fix the pytest error --- utils/linkage.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 16aeb27e..80ef39c1 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -61,21 +61,22 @@ def calculate_row_similarity( the comparison function locked in, using .apply will likely be easier and more efficient. - >>> d = {\ - ... 'name':["bob von rosevich", "anantarya smith","bob j vonrosevich"],\ - ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive,Chicago",\ - ... "8 Fancy Way, Chicago"]} - >>> df = pd.DataFrame(data = d) - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]],\ - ... np.array([.8, .2]), calculate_string_similarity) - >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]],\ - ... np.array([.8, .2]), calculate_string_similarity) + >>> d = { + ... 'name': ["bob von rosevich", "anantarya smith", "bob j vonrosevich"], + ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", + ... "8 Fancy Way, Chicago"] + ... } + >>> df = pd.DataFrame(data=d) + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + ... np.array([.8, .2]), calculate_string_similarity) + >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], + ... np.array([.8, .2]), calculate_string_similarity) >>> right > wrong True - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]],\ - ... np.array([.2, .8]), calculate_string_similarity) + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + ... np.array([.2, .8]), calculate_string_similarity) >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.2, .8]), calculate_string_similarity)\ + ... np.array([.2, .8]), calculate_string_similarity) >>> right > wrong False """ From 498009fadedc76a05ec4df587e5b57c8aaf41865 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 15:53:04 -0600 Subject: [PATCH 048/214] resolving linter error --- utils/linkage.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 80ef39c1..c6cc5292 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -62,21 +62,25 @@ def calculate_row_similarity( likely be easier and more efficient. >>> d = { - ... 'name': ["bob von rosevich", "anantarya smith", "bob j vonrosevich"], + ... 'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"], ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", ... "8 Fancy Way, Chicago"] ... } >>> df = pd.DataFrame(data=d) >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - ... np.array([.8, .2]), calculate_string_similarity) + ... np.array([.8, .2]), + ... calculate_string_similarity) >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.8, .2]), calculate_string_similarity) + ... np.array([.8, .2]), + ... calculate_string_similarity) >>> right > wrong True >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - ... np.array([.2, .8]), calculate_string_similarity) + ... np.array([.2, .8]), + ... calculate_string_similarity) >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.2, .8]), calculate_string_similarity) + ... np.array([.2, .8]), + ... calculate_string_similarity) >>> right > wrong False """ From 663f08daf061f79cfba23a97cbaadfd9ff67d6a6 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 30 Jan 2024 20:02:15 -0600 Subject: [PATCH 049/214] corp names function update --- utils/linkage.py | 82 +++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 42 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index f99ab5a0..65f4cb48 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -96,60 +96,58 @@ def standardize_corp_names(company_name: str) -> str: company_name: corporate name Returns: standardized company name - + >>>standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC') 'MI BEER WINE WHOLESALERS ASSOCIATION' - + >>>standardize_corp_names('MI COMMUNITY COLLEGE ASSOCIATION') 'MI COMMUNITY COLLEGE ASSOCIATION' - + >>>standardize_corp_names('STEPHANIES CHANGEMAKER FUND') - 'STEPHANIES CHANGEMAKER FUND' - - """ + 'STEPHANIES CHANGEMAKER FUND' + """ - company_name_split = company_name.upper().split(' ') + company_name_split = company_name.upper().split(" ") company_types = { - 'CORP': 'CORPORATION', - 'CO': 'CORPORATION', - 'LLC': 'LIMITED LIABILITY COMPANY', - 'PTNR': 'PARTNERSHIP', - 'LP': 'LIMITED PARTNERSHIP', - 'LLP': 'LIMITED LIABILITY PARTNERSHIP', - 'SOLE PROP': 'SOLE PROPRIETORSHIP', - 'SP': 'SOLE PROPRIETORSHIP', - 'NPO': 'NONPROFIT ORGANIZATION', - 'PC': 'PROFESSIONAL CORPORATION', - 'CO-OP': 'COOPERATIVE', - 'LTD': 'LIMITED COMPANY', - 'JSC': 'JOINT STOCK COMPANY', - 'HOLDCO': 'HOLDING COMPANY', - 'PLC': 'PUBLIC LIMITED COMPANY', - 'PVT LTD': 'PRIVATE LIMITED COMPANY', - 'INC': 'INCORPORATED', - 'ASSOC': 'ASSOCIATION', - 'FDN': 'FOUNDATION', - 'TR': 'TRUST', - 'SOC': 'SOCIETY', - 'CONSORT': 'CONSORTIUM', - 'SYND': 'SYNDICATE', - 'GRP': 'GROUP', - 'CORP SOLE': 'CORPORATION SOLE', - 'JV': 'JOINT VENTURE', - 'SUB': 'SUBSIDIARY', - 'FRANCHISE': 'FRANCHISE', - 'PA': 'PROFESSIONAL ASSOCIATION', - 'CIC': 'COMMUNITY INTEREST COMPANY', - - 'PAC': 'POLITICAL ACTION COMMITTEE' -} + "CORP": "CORPORATION", + "CO": "CORPORATION", + "LLC": "LIMITED LIABILITY COMPANY", + "PTNR": "PARTNERSHIP", + "LP": "LIMITED PARTNERSHIP", + "LLP": "LIMITED LIABILITY PARTNERSHIP", + "SOLE PROP": "SOLE PROPRIETORSHIP", + "SP": "SOLE PROPRIETORSHIP", + "NPO": "NONPROFIT ORGANIZATION", + "PC": "PROFESSIONAL CORPORATION", + "CO-OP": "COOPERATIVE", + "LTD": "LIMITED COMPANY", + "JSC": "JOINT STOCK COMPANY", + "HOLDCO": "HOLDING COMPANY", + "PLC": "PUBLIC LIMITED COMPANY", + "PVT LTD": "PRIVATE LIMITED COMPANY", + "INC": "INCORPORATED", + "ASSOC": "ASSOCIATION", + "FDN": "FOUNDATION", + "TR": "TRUST", + "SOC": "SOCIETY", + "CONSORT": "CONSORTIUM", + "SYND": "SYNDICATE", + "GRP": "GROUP", + "CORP SOLE": "CORPORATION SOLE", + "JV": "JOINT VENTURE", + "SUB": "SUBSIDIARY", + "FRANCHISE": "FRANCHISE", + "PA": "PROFESSIONAL ASSOCIATION", + "CIC": "COMMUNITY INTEREST COMPANY", + "PAC": "POLITICAL ACTION COMMITTEE", + } for i in range(len(company_name_split)): if company_name_split[i] in list(company_types.keys()): hold = company_name_split[i] company_name_split[i] = company_types[hold] - new_company_name = ' '.join(company_name_split) - return new_company_name \ No newline at end of file + new_company_name = " ".join(company_name_split) + return new_company_name From 1ab1d4277f03f68a0fdf9b887af729f24f4e1d2c Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 30 Jan 2024 20:07:25 -0600 Subject: [PATCH 050/214] updated corp names --- utils/linkage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 65f4cb48..49d120b3 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -97,13 +97,13 @@ def standardize_corp_names(company_name: str) -> str: Returns: standardized company name - >>>standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC') + >>> standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC') 'MI BEER WINE WHOLESALERS ASSOCIATION' - >>>standardize_corp_names('MI COMMUNITY COLLEGE ASSOCIATION') + >>> standardize_corp_names('MI COMMUNITY COLLEGE ASSOCIATION') 'MI COMMUNITY COLLEGE ASSOCIATION' - >>>standardize_corp_names('STEPHANIES CHANGEMAKER FUND') + >>> standardize_corp_names('STEPHANIES CHANGEMAKER FUND') 'STEPHANIES CHANGEMAKER FUND' """ From b5f764a955030fac17adb73bdac9394c2968ed74 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 20:40:12 -0600 Subject: [PATCH 051/214] addressed comments, but address function back in --- utils/linkage.py | 52 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index c6cc5292..35c69429 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,12 +1,11 @@ import textdistance as td - -# import usaddress +import numpy as np +import pandas as pd +import usaddress """ Module for performing record linkage on state campaign finance dataset """ -import numpy as np -import pandas as pd def calculate_string_similarity(string1: str, string2: str) -> float: @@ -144,3 +143,48 @@ def row_matches( index_dict[i].append[j] return index_dict + + +def get_street_from_address_line_1(address_line_1: str) -> str: + """Given an address line 1, return the street name + + Args: + address_line_1: either street information or PO box + Returns: + street name + Raises: + ValueError: if string is malformed and no street can be reasonably + found. + + >>> get_street_from_address_line_1("5645 N. UBER ST") + 'UBER ST' + >>> get_street_from_address_line_1("") + Traceback (most recent call last): + ... + ValueError: address_line_1 must have whitespace + >>> get_street_from_address_line_1("PO Box 1111") + Traceback (most recent call last): + ... + ValueError: address_line_1 is PO Box + >>> get_street_from_address_line_1("300 59 St.") + '59 St.' + >>> get_street_from_address_line_1("Uber St.") + 'Uber St.' + >>> get_street_from_address_line_1("3NW 59th St") + '59th St' + """ + if not address_line_1 or address_line_1.isspace(): + raise ValueError("address_line_1 must have whitespace") + + address_line_lower = address_line_1.lower() + + if "po box" in address_line_lower: + raise ValueError("address_line_1 is PO Box") + + string = [] + address = usaddress.parse(address_line_1) + for key, val in address: + if val in ["StreetName", "StreetNamePostType"]: + string.append(key) + + return " ".join(string) From df1dbd1912a4a5bb0a52a02527bde31fbf99292b Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 30 Jan 2024 20:41:33 -0600 Subject: [PATCH 052/214] fixing linter error --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 35c69429..c0b5028c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,6 @@ -import textdistance as td import numpy as np import pandas as pd +import textdistance as td import usaddress """ From 6aad87ef2d9598a5745abf64d5eaf3326122041c Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 30 Jan 2024 21:29:16 -0600 Subject: [PATCH 053/214] moved dict to constants file --- utils/constants.py | 36 ++++++++++++++++++++++++++++++++++++ utils/linkage.py | 42 ++++++------------------------------------ 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index b87d39d3..f259db36 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -605,3 +605,39 @@ " WV ", " WY ", ] + +# utils/linkage.py constants + +COMPANY_TYPES = { + "CORP": "CORPORATION", + "CO": "CORPORATION", + "LLC": "LIMITED LIABILITY COMPANY", + "PTNR": "PARTNERSHIP", + "LP": "LIMITED PARTNERSHIP", + "LLP": "LIMITED LIABILITY PARTNERSHIP", + "SOLE PROP": "SOLE PROPRIETORSHIP", + "SP": "SOLE PROPRIETORSHIP", + "NPO": "NONPROFIT ORGANIZATION", + "PC": "PROFESSIONAL CORPORATION", + "CO-OP": "COOPERATIVE", + "LTD": "LIMITED COMPANY", + "JSC": "JOINT STOCK COMPANY", + "HOLDCO": "HOLDING COMPANY", + "PLC": "PUBLIC LIMITED COMPANY", + "PVT LTD": "PRIVATE LIMITED COMPANY", + "INC": "INCORPORATED", + "ASSOC": "ASSOCIATION", + "FDN": "FOUNDATION", + "TR": "TRUST", + "SOC": "SOCIETY", + "CONSORT": "CONSORTIUM", + "SYND": "SYNDICATE", + "GRP": "GROUP", + "CORP SOLE": "CORPORATION SOLE", + "JV": "JOINT VENTURE", + "SUB": "SUBSIDIARY", + "FRANCHISE": "FRANCHISE", + "PA": "PROFESSIONAL ASSOCIATION", + "CIC": "COMMUNITY INTEREST COMPANY", + "PAC": "POLITICAL ACTION COMMITTEE", +} diff --git a/utils/linkage.py b/utils/linkage.py index 49d120b3..34b25791 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,3 +1,4 @@ +import constants import textdistance as td import usaddress @@ -110,44 +111,13 @@ def standardize_corp_names(company_name: str) -> str: company_name_split = company_name.upper().split(" ") - company_types = { - "CORP": "CORPORATION", - "CO": "CORPORATION", - "LLC": "LIMITED LIABILITY COMPANY", - "PTNR": "PARTNERSHIP", - "LP": "LIMITED PARTNERSHIP", - "LLP": "LIMITED LIABILITY PARTNERSHIP", - "SOLE PROP": "SOLE PROPRIETORSHIP", - "SP": "SOLE PROPRIETORSHIP", - "NPO": "NONPROFIT ORGANIZATION", - "PC": "PROFESSIONAL CORPORATION", - "CO-OP": "COOPERATIVE", - "LTD": "LIMITED COMPANY", - "JSC": "JOINT STOCK COMPANY", - "HOLDCO": "HOLDING COMPANY", - "PLC": "PUBLIC LIMITED COMPANY", - "PVT LTD": "PRIVATE LIMITED COMPANY", - "INC": "INCORPORATED", - "ASSOC": "ASSOCIATION", - "FDN": "FOUNDATION", - "TR": "TRUST", - "SOC": "SOCIETY", - "CONSORT": "CONSORTIUM", - "SYND": "SYNDICATE", - "GRP": "GROUP", - "CORP SOLE": "CORPORATION SOLE", - "JV": "JOINT VENTURE", - "SUB": "SUBSIDIARY", - "FRANCHISE": "FRANCHISE", - "PA": "PROFESSIONAL ASSOCIATION", - "CIC": "COMMUNITY INTEREST COMPANY", - "PAC": "POLITICAL ACTION COMMITTEE", - } - for i in range(len(company_name_split)): - if company_name_split[i] in list(company_types.keys()): + if company_name_split[i] in list(constants.COMPANY_TYPES.keys()): hold = company_name_split[i] - company_name_split[i] = company_types[hold] + company_name_split[i] = constants.COMPANY_TYPES[hold] new_company_name = " ".join(company_name_split) return new_company_name + + +print(standardize_corp_names("MI BEER WINE WHOLESALERS ASSOCIATION")) From 5b4de8c3c2c27164cf47df72d3eaa6101335cac1 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 30 Jan 2024 21:47:56 -0600 Subject: [PATCH 054/214] updated constants file --- utils/linkage.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 34b25791..a26a9fe3 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -118,6 +118,3 @@ def standardize_corp_names(company_name: str) -> str: new_company_name = " ".join(company_name_split) return new_company_name - - -print(standardize_corp_names("MI BEER WINE WHOLESALERS ASSOCIATION")) From e4fe9fc354e4429d17b754c32b083b0eaae6a4c6 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 30 Jan 2024 21:53:36 -0600 Subject: [PATCH 055/214] updated constants file --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index a26a9fe3..faa8860c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,4 +1,4 @@ -import constants +import utils.constants import textdistance as td import usaddress From 844d20e5ddccd35514da4ef52fb321677c15e919 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 30 Jan 2024 22:17:08 -0600 Subject: [PATCH 056/214] updated constants file --- utils/linkage.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index faa8860c..9866a9b9 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,4 +1,6 @@ -import utils.constants +from utils.constants import ( + COMPANY_TYPES +) import textdistance as td import usaddress @@ -112,9 +114,11 @@ def standardize_corp_names(company_name: str) -> str: company_name_split = company_name.upper().split(" ") for i in range(len(company_name_split)): - if company_name_split[i] in list(constants.COMPANY_TYPES.keys()): + if company_name_split[i] in list(COMPANY_TYPES.keys()): hold = company_name_split[i] - company_name_split[i] = constants.COMPANY_TYPES[hold] + company_name_split[i] = COMPANY_TYPES[hold] new_company_name = " ".join(company_name_split) return new_company_name + +print(standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC')) From 976fc3ff4608874a9259977b21397027073ecfd6 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 31 Jan 2024 14:15:18 +0000 Subject: [PATCH 057/214] updated function --- utils/linkage.py | 52 +++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 86485d3b..1dbf54b6 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,7 +1,8 @@ """ Module for performing record linkage on state campaign finance dataset """ -import pandas as pd +import re + import usaddress @@ -50,12 +51,7 @@ def get_street_from_address_line_1(address_line_1: str) -> str: return " ".join(string) -""" -Module for standardizing the 'company' columnn of the state campaign finance dataset -""" - - -def cleaning_company_column(company: str) -> str: +def cleaning_company_column(company_entry: str) -> str: """ Given a string, check if it contains a variation of self employed, unemployed, or retired and return the standardized version. @@ -72,30 +68,44 @@ def cleaning_company_column(company: str) -> str: 'Self Employed' >>> cleaning_company_column("None") 'Unemployed' + >>> cleaning_company_column("N/A") + 'Unemployed' + >>> cleaning_company_column("nan") + 'Unemployed' """ - if pd.isnull(company): - return company - company_edited = company.lower() - company_edited = company_edited.strip() - company_edited = company_edited.replace(".", " ") - company_edited = company_edited.replace(",", " ") - company_edited = company_edited.replace("-", " ") + if not company_entry: + return company_entry - if "retire" in company_edited: + company_edited = company_entry.lower() + + if company_edited == "n/a": + return "Unemployed" + + company_edited = re.sub(r"[^\w\s]", "", company_edited) + + if ( + company_edited == "retired" + or company_edited == "retiree" + or company_edited == "retire" + or "retiree" in company_edited + ): return "Retired" - elif "self employe" in company_edited or company_edited == "self": + + elif ( + "self employe" in company_edited + or "freelance" in company_edited + or company_edited == "self" + or company_edited == "independent contractor" + ): return "Self Employed" elif ( "unemploye" in company_edited or company_edited == "none" or company_edited == "not employed" + or company_edited == "nan" ): return "Unemployed" else: - return company - - -# Example implementation of the function standardize_company_column for a dataframe -# df['standardized_company'] = df['company'].apply(standardize_company_column) + return company_edited From 87ea3da197ea722b5c54f99b7f5cdd29b890060d Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 31 Jan 2024 09:02:44 -0600 Subject: [PATCH 058/214] Adding Avery's feedback --- utils/linkage.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index e419c6f5..2cdd11b6 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -53,6 +53,16 @@ def determine_comma_role(name: str) -> str: name: a string representing a name/names of individuals Returns: the name with or without a comma based on some conditions + + Sample Usage: + >>> determine_comma_role("Jane Doe, Jr") + 'Jane Doe, Jr' + >>> determine_comma_role("Doe, Jane Elisabeth") + ' Jane Elisabeth Doe' + >>> determine_comma_role("Jane Doe,") + 'Jane Doe' + >>> determine_comma_role("DOe, Jane") + ' Jane Doe' """ suffixes = [ "sr", @@ -68,19 +78,19 @@ def determine_comma_role(name: str) -> str: "ix", "x", ] - name_parts = name.split(",") + name_parts = name.lower().split(",") # if the comma is just in the end as a typo: if len(name_parts[1]) == 0: - return name_parts[0] + return name_parts[0].title() # if just the suffix in the end, leave the name as it is if name_parts[1].strip() in suffixes: - return name + return name.title() # at this point either it's just poor name placement, or the suffix is # in the beginning of the name. Either way, the first part of the list is # the true last name. last_part = name_parts.pop(0) first_part = " ".join(name_parts) - return first_part + " " + last_part + return first_part.title() + " " + last_part.title() def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: @@ -114,6 +124,8 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: 'Jane Elisabeth Doe, Iv' >>> get_likely_name("","","Jane Elisabeth Doe, IV") 'Jane Elisabeth Doe Iv' + >>> get_likely_name("Jane","","Doe, Jane, Elisabeth") + 'Jane Elisabeth Doe' """ # first ensure clean input by deleting spaces: first_name, last_name, full_name = list( @@ -154,10 +166,10 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: # one last check to remove any pieces that might add extra whitespace names = list(filter(lambda x: x != "", names)) names = " ".join(names) - names = names.split(" ") + names = names.title().replace(" ", " ").split(" ") final_name = [] [final_name.append(x) for x in names if x not in final_name] - return " ".join(final_name).title().strip() + return " ".join(final_name).strip() def get_street_from_address_line_1(address_line_1: str) -> str: From 23a8c1ffca9935aeef5b74341c7562eb1f020fe2 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 31 Jan 2024 09:07:26 -0600 Subject: [PATCH 059/214] Adding Avery's feedback --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 2cdd11b6..0450fca8 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -123,7 +123,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: >>> get_likely_name("Jane Elisabeth Doe, IV","Elisabeth","Doe, IV") 'Jane Elisabeth Doe, Iv' >>> get_likely_name("","","Jane Elisabeth Doe, IV") - 'Jane Elisabeth Doe Iv' + 'Jane Elisabeth Doe, Iv' >>> get_likely_name("Jane","","Doe, Jane, Elisabeth") 'Jane Elisabeth Doe' """ From 4081715a2d4b83875c3def1c086f3d9f1b579e78 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 31 Jan 2024 09:40:58 -0600 Subject: [PATCH 060/214] saving personal work before merging, no need to look or review @Avery @Trevor --- notebooks/Test.ipynb | 421 ++++++++++++++++++++++++++++++++++++++++++- utils/linkage.py | 13 ++ 2 files changed, 433 insertions(+), 1 deletion(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index 5df942e1..1176ab75 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -12,6 +12,425 @@ "\n", "save_random_dataframe(Path(\"../output\"), Path(\"test.csv\"))" ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def determine_comma_role(name: str) -> str:\n", + " \"\"\"Given a string (someone's name), attempts to determine the role of the\n", + " comma in the name and where it ought to belong.\n", + "\n", + " Some assumptions are made:\n", + " * If a suffix is included in the name and the name is not just the last\n", + " name(i.e \"Doe, Jr), the format is\n", + " (last_name suffix, first and middle name) i.e Doe iv, Jane Elisabeth\n", + "\n", + " * If a comma is used anywhere else, it is in the format of\n", + " (last_name, first and middle name) i.e Doe, Jane Elisabeth\n", + " Args:\n", + " name: a string representing a name/names of individuals\n", + " Returns:\n", + " the name with or without a comma based on some conditions\n", + " \"\"\"\n", + " suffixes = [\n", + " \"sr\",\n", + " \"jr\",\n", + " \"i\",\n", + " \"ii\",\n", + " \"iii\",\n", + " \"iv\",\n", + " \"v\",\n", + " \"vi\",\n", + " \"vii\",\n", + " \"viii\",\n", + " \"ix\",\n", + " \"x\",\n", + " ]\n", + " name_parts = name.lower().split(\",\")\n", + " # if the comma is just in the end as a typo:\n", + " if len(name_parts[1]) == 0:\n", + " return name_parts[0].title()\n", + " # if just the suffix in the end, leave the name as it is\n", + " if name_parts[1].strip() in suffixes:\n", + " return name.title()\n", + " # at this point either it's just poor name placement, or the suffix is\n", + " # in the beginning of the name. Either way, the first part of the list is\n", + " # the true last name.\n", + " last_part = name_parts.pop(0)\n", + " first_part = \" \".join(name_parts)\n", + " return first_part.title() + \" \" + last_part.title()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' Jane Jr Doe'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "determine_comma_role(\"DOe, Jane, Jr\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "def get_likely_name(first_name: str, last_name: str, full_name: str) -> str:\n", + " \"\"\"Given name related columns, return a person's likely name\n", + "\n", + " Given different formatting used accross states, errors in data entry\n", + " and missing data, it can be difficult to determine someone's actual\n", + " name. For example, some states have a last name column with values like\n", + " \"Doe, Jane\", where the person's first name appears to have been erroneously\n", + " included.\n", + "\n", + " Args:\n", + " first_name: raw value of first name column\n", + " last_name: raw value last name column\n", + " full_name: raw value of name or full_name column\n", + " Returns:\n", + " The most likely full name of the person listed\n", + "\n", + " Sample Usage:\n", + " >>> get_likely_name(\"Jane\", \"Doe\", \"\")\n", + " 'Jane Doe'\n", + " >>> get_likely_name(\"\", \"\", \"Jane Doe\")\n", + " 'Jane Doe'\n", + " >>> get_likely_name(\"\", \"Doe, Jane\", \"\")\n", + " 'Jane Doe'\n", + " >>> get_likely_name(\"Jane Doe\", \"Doe\", \"Jane Doe\")\n", + " 'Jane Doe'\n", + " >>> get_likely_name(\"Jane\",\"\",\"Doe, Sr\")\n", + " 'Jane Doe, Sr'\n", + " >>> get_likely_name(\"Jane Elisabeth Doe, IV\",\"Elisabeth\",\"Doe, IV\")\n", + " 'Jane Elisabeth Doe, Iv'\n", + " >>> get_likely_name(\"\",\"\",\"Jane Elisabeth Doe, IV\")\n", + " 'Jane Elisabeth Doe Iv'\n", + " \"\"\"\n", + " # first ensure clean input by deleting spaces:\n", + " first_name, last_name, full_name = list(\n", + " map(lambda x: x.lower().strip(), [first_name, last_name, full_name])\n", + " )\n", + "\n", + " # if data is clean:\n", + " if first_name + \" \" + last_name == full_name:\n", + " return full_name\n", + "\n", + " # some names have titles or professions associated with the name. We need to\n", + " # remove those from the name.\n", + " titles = [\n", + " \"mr\",\n", + " \"ms\",\n", + " \"mrs\",\n", + " \"miss\",\n", + " \"prof\",\n", + " \"dr\",\n", + " \"doctor\",\n", + " \"sir\",\n", + " \"madam\",\n", + " \"professor\",\n", + " ]\n", + " names = [first_name, last_name, full_name]\n", + "\n", + " for i in range(len(names)):\n", + " # if there is a ',' deal with it accordingly\n", + " if \",\" in names[i]:\n", + " names[i] = determine_comma_role(names[i])\n", + "\n", + " names[i] = names[i].replace(\".\", \"\").split(\" \")\n", + " names[i] = [\n", + " name_part for name_part in names[i] if name_part not in titles\n", + " ]\n", + " names[i] = \" \".join(names[i])\n", + "\n", + " # one last check to remove any pieces that might add extra whitespace\n", + " names = list(filter(lambda x: x != \"\", names))\n", + " names = \" \".join(names)\n", + " names = names.title().replace(\" \",\" \").split(\" \")\n", + " final_name = []\n", + " [final_name.append(x) for x in names if x not in final_name]\n", + " return \" \".join(final_name).strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_4143866/1500712151.py:2: DtypeWarning: Columns (7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " ind_df = pd.read_csv(\"../output/complete_individuals_table.csv\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0idfirst_namelast_namefull_nameentity_typestatepartycompany
001869727NaNNaNwilliam \bstonerindividualNaNNaNNaN
111779679NaNNaNrm coulonindividualNaNNaNarea agency on aging
222277221NaNNaNjames engelsonindividualNaNNaNretired
332277156NaNNaNmarivic franciaskinnerindividualNaNNaNfibre source international corp
442341373NaNNaNanthony grindleindividualNaNNaNzimmerbiomet
..............................
25053418612606acfa74b-d5e1-4afd-b020-dbe429eb1c3fNaNNaNMelissa HartCandidatePAREPNaN
2505342861271f111045d-bc3d-4050-9ad7-b3b1e6d72e56NaNNaNHeather MillerCandidatePADEMNaN
2505343861277d40859d7-b523-4ef5-895b-c3a947ab582fNaNNaNChristopher M. GebhardCandidatePAREPNaN
2505344861775f5d76d43-86f4-40f9-aeb9-3df97ca8cdf0NaNNaNApril WeaverCandidatePAREPNaN
25053458619201a0cf90d-3252-4c8d-b109-dea084a01f69NaNNaNKrista PaolucciCandidatePAREPNaN
\n", + "

2505346 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 id first_name \\\n", + "0 0 1869727 NaN \n", + "1 1 1779679 NaN \n", + "2 2 2277221 NaN \n", + "3 3 2277156 NaN \n", + "4 4 2341373 NaN \n", + "... ... ... ... \n", + "2505341 861260 6acfa74b-d5e1-4afd-b020-dbe429eb1c3f NaN \n", + "2505342 861271 f111045d-bc3d-4050-9ad7-b3b1e6d72e56 NaN \n", + "2505343 861277 d40859d7-b523-4ef5-895b-c3a947ab582f NaN \n", + "2505344 861775 f5d76d43-86f4-40f9-aeb9-3df97ca8cdf0 NaN \n", + "2505345 861920 1a0cf90d-3252-4c8d-b109-dea084a01f69 NaN \n", + "\n", + " last_name full_name entity_type state party \\\n", + "0 NaN william \bstoner individual NaN NaN \n", + "1 NaN rm coulon individual NaN NaN \n", + "2 NaN james engelson individual NaN NaN \n", + "3 NaN marivic franciaskinner individual NaN NaN \n", + "4 NaN anthony grindle individual NaN NaN \n", + "... ... ... ... ... ... \n", + "2505341 NaN Melissa Hart Candidate PA REP \n", + "2505342 NaN Heather Miller Candidate PA DEM \n", + "2505343 NaN Christopher M. Gebhard Candidate PA REP \n", + "2505344 NaN April Weaver Candidate PA REP \n", + "2505345 NaN Krista Paolucci Candidate PA REP \n", + "\n", + " company \n", + "0 NaN \n", + "1 area agency on aging \n", + "2 retired \n", + "3 fibre source international corp \n", + "4 zimmerbiomet \n", + "... ... \n", + "2505341 NaN \n", + "2505342 NaN \n", + "2505343 NaN \n", + "2505344 NaN \n", + "2505345 NaN \n", + "\n", + "[2505346 rows x 9 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "ind_df = pd.read_csv(\"../output/complete_individuals_table.csv\")\n", + "ind_df.sample(1000)\n", + "ind_df" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Doe, Jr, Jane'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "determine_comma_role(\"Doe, Jr, Jane\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -30,7 +449,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.7" }, "orig_nbformat": 4 }, diff --git a/utils/linkage.py b/utils/linkage.py index 0450fca8..f5018979 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -3,6 +3,7 @@ """ import textdistance as td import usaddress +import pandas as pd def calculate_string_similarity(string1: str, string2: str) -> float: @@ -215,3 +216,15 @@ def get_street_from_address_line_1(address_line_1: str) -> str: string.append(key) return " ".join(string) + +def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: + '''Given a dataframe, remove rows that have identical entry data beyond + UUIDs, and output a file mapping an entry to other the UUIDs of the + deduplicated rows + + Args: + a pandas dataframe containing contribution data + Returns: + a deduplicated pandas dataframe containing contribution data + ''' + pass \ No newline at end of file From 50537f9e620400630d1a92f1b4f7962a48955fa8 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 31 Jan 2024 15:43:13 +0000 Subject: [PATCH 061/214] updating requirements.txt to include names-dataset package --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index db05b66f..fa82b105 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ Requests==2.31.0 setuptools==68.0.0 textdistance==4.6.1 usaddress==0.5.4 +names-dataset==3.1.0 From 3fcbc5b6539edc5fdf1102c9ec9d3727552c57ee Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 31 Jan 2024 09:51:34 -0600 Subject: [PATCH 062/214] precommit checks --- utils/linkage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 9866a9b9..5788eb0b 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,9 +1,8 @@ -from utils.constants import ( - COMPANY_TYPES -) import textdistance as td import usaddress +from utils.constants import COMPANY_TYPES + """ Module for performing record linkage on state campaign finance dataset """ @@ -121,4 +120,5 @@ def standardize_corp_names(company_name: str) -> str: new_company_name = " ".join(company_name_split) return new_company_name -print(standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC')) + +print(standardize_corp_names("MI BEER WINE WHOLESALERS ASSOC")) From f07dae2a96ebc9ed00d7056721361d3684165b5c Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 31 Jan 2024 10:23:53 -0600 Subject: [PATCH 063/214] get address number from line 1 function --- utils/linkage.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index a96b8167..1333024f 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -165,4 +165,32 @@ def standardize_corp_names(company_name: str) -> str: return new_company_name -print(standardize_corp_names("MI BEER WINE WHOLESALERS ASSOC")) +def get_address_number_from_address_line_1(address_line_1: str) -> str: + """Given an address line 1, return the building number or po box + + Args: + address_line_1: either street information or PO box + Returns: + address or po box number + + Sample Usage: + >>> get_building_from_address_line_1('6727 W. Corrine Dr. Peoria,AZ 85381') + '6727' + >>> get_building_from_address_line_1('P.O. Box 5456 Sun City West ,AZ 85375') + 'P.O. Box 5456' + >>> get_building_from_address_line_1('119 S 5th St Niles,MI 49120') + '119' + >>> get_building_from_address_line_1( + ... '1415 PARKER STREET APT 251 DETROIT MI 48214-0000' + ... ) + '1415' + """ + + address_line_1_components = usaddress.parse(address_line_1) + + for i in range(len(address_line_1_components)): + if address_line_1_components[i][1] == "AddressNumber": + return address_line_1_components[i][0] + elif address_line_1_components[i][1] == "USPSBoxID": + return address_line_1_components[i][0] + raise ValueError("Can not find Address Number") From b21fd5299d3351f28a8f3896c11729fc14390629 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 31 Jan 2024 16:24:03 +0000 Subject: [PATCH 064/214] initial name_rank function --- utils/linkage.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/utils/linkage.py b/utils/linkage.py index d013bfd7..d910587c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,5 +1,9 @@ import textdistance as td import usaddress +from names_dataset import NameDataset + +nd = NameDataset() +# 'The library takes time to initialize because the database is massive.' """ Module for performing record linkage on state campaign finance dataset @@ -131,3 +135,36 @@ def get_street_from_address_line_1(address_line_1: str) -> str: string.append(key) return " ".join(string) + + +def name_rank(first_name: str, last_name: str) -> list: + """Returns a score for the rank of a first name and last name in the US + https://github.com/philipperemy/name-dataset + + Args: + first_name: any string + last_name: any string + Returns: + name rank for first name and last names + 1 is the most common name, only for names in the 'United States' + first element is the element corresponds to the rank of the first name + second element is the element corresponds to the rank of the last name + """ + + first_name_result = nd.search(first_name) + last_name_result = nd.search(last_name) + first_name_rank = 0 + last_name_rank = 0 + try: + first_name_rank = first_name_result["first_name"]["rank"][ + "United States" + ] + except KeyError: + pass + + try: + last_name_rank = last_name_result["last_name"]["rank"]["United States"] + except KeyError: + pass + + return [first_name_rank, last_name_rank] From 8849f462925bbc3064f5f5539513cb16cf7c20b7 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 31 Jan 2024 10:29:06 -0600 Subject: [PATCH 065/214] get address number from line 1 function --- utils/linkage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 1333024f..379e6d4c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -174,13 +174,13 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: address or po box number Sample Usage: - >>> get_building_from_address_line_1('6727 W. Corrine Dr. Peoria,AZ 85381') + >>> get_address_number_from_address_line_1('6727 W. Corrine Dr. Peoria,AZ 85381') '6727' - >>> get_building_from_address_line_1('P.O. Box 5456 Sun City West ,AZ 85375') + >>> get_address_number_from_address_line_1('P.O. Box 5456 Sun City West ,AZ 85375') 'P.O. Box 5456' - >>> get_building_from_address_line_1('119 S 5th St Niles,MI 49120') + >>> get_address_number_from_address_line_1('119 S 5th St Niles,MI 49120') '119' - >>> get_building_from_address_line_1( + >>> get_address_number_from_address_line_1( ... '1415 PARKER STREET APT 251 DETROIT MI 48214-0000' ... ) '1415' From d0086ef22db122a6e8bd6add3f7e2fdfcc9fb221 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 31 Jan 2024 11:07:37 -0600 Subject: [PATCH 066/214] get address number from line 1 function --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 379e6d4c..ac11a5ac 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -177,7 +177,7 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: >>> get_address_number_from_address_line_1('6727 W. Corrine Dr. Peoria,AZ 85381') '6727' >>> get_address_number_from_address_line_1('P.O. Box 5456 Sun City West ,AZ 85375') - 'P.O. Box 5456' + '5456' >>> get_address_number_from_address_line_1('119 S 5th St Niles,MI 49120') '119' >>> get_address_number_from_address_line_1( From 5f65159fbe7d8752755e814878486d8f50697b48 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 31 Jan 2024 23:48:08 -0600 Subject: [PATCH 067/214] attempt so far at dedup --- utils/linkage.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 6e8e6a5b..f8ea7bb0 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -271,4 +271,13 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: Returns: a deduplicated pandas dataframe containing contribution data ''' - pass \ No newline at end of file + #first remove all duplicate entries: + new_df = df.drop_duplicates() + + # now find the duplicates along all columns but the ID + cols = new_df.columns[1:] + duplicates = new_df[new_df.duplicated(cols)] + new_df = new_df.drop(index=duplicates.index.tolist()) + #for index in duplicates.index: + + return new_df \ No newline at end of file From 28c003433545676a9f09827e29814d40543ff4c4 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 1 Feb 2024 06:02:46 +0000 Subject: [PATCH 068/214] edited function --- utils/linkage.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index d910587c..5370b306 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -151,20 +151,25 @@ def name_rank(first_name: str, last_name: str) -> list: second element is the element corresponds to the rank of the last name """ + if first_name is None or last_name is None: + return [None, None] + + if not isinstance(first_name, str) or not isinstance(last_name, str): + return [None, None] + first_name_result = nd.search(first_name) last_name_result = nd.search(last_name) - first_name_rank = 0 - last_name_rank = 0 - try: - first_name_rank = first_name_result["first_name"]["rank"][ - "United States" - ] - except KeyError: - pass - - try: - last_name_rank = last_name_result["last_name"]["rank"]["United States"] - except KeyError: - pass + first_name_rank = None + last_name_rank = None + + if first_name_result and isinstance(first_name_result, dict): + first_name_data = first_name_result.get("first_name") + if first_name_data and "rank" in first_name_data: + first_name_rank = first_name_data["rank"].get("United States", None) + + if last_name_result and isinstance(last_name_result, dict): + last_name_data = last_name_result.get("last_name") + if last_name_data and "rank" in last_name_data: + last_name_rank = last_name_data["rank"].get("United States", None) return [first_name_rank, last_name_rank] From 71a3174aabda2137f4980cb8df7952374f3ca7a5 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 1 Feb 2024 00:12:47 -0600 Subject: [PATCH 069/214] attempt so far at dedup --- utils/linkage.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index f8ea7bb0..bc2f062b 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,9 +1,9 @@ """ Module for performing record linkage on state campaign finance dataset """ +import pandas as pd import textdistance as td import usaddress -import pandas as pd def get_address_line_1_from_full_address(address: str) -> str: @@ -261,23 +261,24 @@ def get_street_from_address_line_1(address_line_1: str) -> str: return " ".join(string) + def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: - '''Given a dataframe, remove rows that have identical entry data beyond + """Given a dataframe, remove rows that have identical entry data beyond UUIDs, and output a file mapping an entry to other the UUIDs of the deduplicated rows - + Args: a pandas dataframe containing contribution data Returns: a deduplicated pandas dataframe containing contribution data - ''' - #first remove all duplicate entries: + """ + # first remove all duplicate entries: new_df = df.drop_duplicates() # now find the duplicates along all columns but the ID cols = new_df.columns[1:] - duplicates = new_df[new_df.duplicated(cols)] + duplicates = new_df[new_df.duplicated(cols)] new_df = new_df.drop(index=duplicates.index.tolist()) - #for index in duplicates.index: + # for index in duplicates.index: - return new_df \ No newline at end of file + return new_df From 56cde5f003a2e3a49817e3c04e2305252110ef96 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 1 Feb 2024 00:13:15 -0600 Subject: [PATCH 070/214] attempt so far at dedup --- utils/linkage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index bc2f062b..25e110da 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -279,6 +279,5 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: cols = new_df.columns[1:] duplicates = new_df[new_df.duplicated(cols)] new_df = new_df.drop(index=duplicates.index.tolist()) - # for index in duplicates.index: return new_df From 161a175c8f31bf79fea702d7b7497cb33218bd0b Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 1 Feb 2024 01:54:17 -0600 Subject: [PATCH 071/214] updates on linkage doc, ignore notebooks/Test.ipynb --- notebooks/Test.ipynb | 287 +++++++++++++++++++++++++++++++++++++------ utils/linkage.py | 27 +++- 2 files changed, 276 insertions(+), 38 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index cf4679fb..e4cac62f 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -191,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -403,7 +403,7 @@ "18 Paa Pac PA Organization " ] }, - "execution_count": 5, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -433,10 +433,29 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 46, "metadata": {}, "outputs": [], "source": [ + "from utils.constants import repo_root\n", + "def convert_duplicates_to_dict(df: pd.DataFrame)->pd.DataFrame:\n", + " '''Takes a dataframe whose indexes are UUIDs, and a column that is a list of\n", + " all other UUIDs that have duplicate values. The function then outputs a\n", + " dictionary file where the deduped UUIDs map to the dataframe main UUID\n", + " \n", + " Args:\n", + " A pandas dataframe with UUIDs as indexes and deduplicated UUIDs\n", + " matching up to the index in the same row\n", + " \n", + " Returns\n", + " None. However it outputs a dictionary\n", + " '''\n", + " #for index in df.index:\n", + " \n", + " #entities.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False)\n", + " pass\n", + "\n", + "\n", "def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame:\n", " '''Given a dataframe, remove rows that have identical entry data beyond\n", " UUIDs, and output a file mapping an entry to other the UUIDs of the\n", @@ -451,17 +470,20 @@ " new_df = df.drop_duplicates()\n", "\n", " # now find the duplicates along all columns but the ID\n", - " cols = new_df.columns[1:]\n", - " duplicates = new_df[new_df.duplicated(cols)] \n", - " new_df = new_df.drop(index=duplicates.index.tolist())\n", - " #for index in duplicates.index:\n", + " new_df=new_df.groupby(df.columns[1:].tolist())[\"id\"].agg(list).reset_index().rename(columns={\"id\": \"duplicated\"})\n", + " new_df.index=new_df[\"duplicated\"].str[0].tolist()\n", + " new_df[\"duplicated\"]=new_df[\"duplicated\"].str[1:]\n", "\n", + " # now convert the duplicated column into a dictionary that can will be\n", + " # an output\n", + " convert_duplicates_to_dict(new_df[['duplicated']])\n", + " #new_df = new_df.drop(['duplicated'], axis=1)\n", " return new_df" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -485,63 +507,151 @@ " \n", " \n", " \n", - " id\n", " name\n", " state\n", " entity_type\n", + " duplicated\n", " \n", " \n", " \n", " \n", - " 16\n", - " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe\n", - " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", + " d31df1ca-714e-4a82-9e88-1892c0451a71\n", + " COMMITTEE TO ELECT DR PATRICIA BERNARD\n", " MI\n", " committee\n", + " []\n", " \n", " \n", - " 17\n", - " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff\n", + " 910c4d36-b036-469e-aa2a-ea4ff8855a6c\n", + " Citizens For Kail\n", + " PA\n", + " Organization\n", + " []\n", + " \n", + " \n", + " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd\n", " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", " MI\n", " committee\n", + " [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc...\n", " \n", " \n", - " 18\n", - " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd\n", + " c875d7de-94be-42f1-b994-dd89b114d51e\n", + " Pa Fraternal Order Of Police Pac\n", + " PA\n", + " Organization\n", + " []\n", + " \n", + " \n", + " 60d454d1-3773-4d88-80e9-132c161da0f0\n", " Paa Pac\n", " PA\n", " Organization\n", + " [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd]\n", + " \n", + " \n", + " f71341d7-d27e-47eb-9b66-903af39d6cb5\n", + " Pabar Pac (Pa Bar Assn)\n", + " PA\n", + " Organization\n", + " []\n", + " \n", + " \n", + " 50c7d9a1-b448-46a5-8e2d-cd15b3097360\n", + " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", + " MI\n", + " committee\n", + " []\n", + " \n", + " \n", + " 62ea1e9c-ac12-400c-b3dc-519389c0f7d3\n", + " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", + " MI\n", + " committee\n", + " []\n", + " \n", + " \n", + " 4db76e6e-f0d5-40eb-82de-6dbcdb562dd7\n", + " Ugi Utilities Inc/Ugi Energy Services Llc Pac\n", + " PA\n", + " Organization\n", + " []\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id \\\n", - "16 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe \n", - "17 1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff \n", - "18 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", + " name \\\n", + "d31df1ca-714e-4a82-9e88-1892c0451a71 COMMITTEE TO ELECT DR PATRICIA BERNARD \n", + "910c4d36-b036-469e-aa2a-ea4ff8855a6c Citizens For Kail \n", + "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "c875d7de-94be-42f1-b994-dd89b114d51e Pa Fraternal Order Of Police Pac \n", + "60d454d1-3773-4d88-80e9-132c161da0f0 Paa Pac \n", + "f71341d7-d27e-47eb-9b66-903af39d6cb5 Pabar Pac (Pa Bar Assn) \n", + "50c7d9a1-b448-46a5-8e2d-cd15b3097360 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... \n", + "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", + "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 Ugi Utilities Inc/Ugi Energy Services Llc Pac \n", + "\n", + " state entity_type \\\n", + "d31df1ca-714e-4a82-9e88-1892c0451a71 MI committee \n", + "910c4d36-b036-469e-aa2a-ea4ff8855a6c PA Organization \n", + "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MI committee \n", + "c875d7de-94be-42f1-b994-dd89b114d51e PA Organization \n", + "60d454d1-3773-4d88-80e9-132c161da0f0 PA Organization \n", + "f71341d7-d27e-47eb-9b66-903af39d6cb5 PA Organization \n", + "50c7d9a1-b448-46a5-8e2d-cd15b3097360 MI committee \n", + "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 MI committee \n", + "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 PA Organization \n", "\n", - " name state entity_type \n", - "16 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee \n", - "17 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee \n", - "18 Paa Pac PA Organization " + " duplicated \n", + "d31df1ca-714e-4a82-9e88-1892c0451a71 [] \n", + "910c4d36-b036-469e-aa2a-ea4ff8855a6c [] \n", + "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc... \n", + "c875d7de-94be-42f1-b994-dd89b114d51e [] \n", + "60d454d1-3773-4d88-80e9-132c161da0f0 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd] \n", + "f71341d7-d27e-47eb-9b66-903af39d6cb5 [] \n", + "50c7d9a1-b448-46a5-8e2d-cd15b3097360 [] \n", + "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 [] \n", + "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 [] " ] }, - "execution_count": 7, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = deduplicate_perfect_matches(sample_df)\n", - "for i in range(len(x)):\n", - " curr_row = x.loc[i]\n", - " sample_df.loc[(sample_df.name == 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC') &\n", - "# (sample_df.state == 'MI') &\n", - "# (sample_df.entity_type == 'committee')]\n", - "x\n" + "x#[['duplicated']]" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[]\n", + "['1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe', '1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff']\n", + "[]\n", + "['1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd']\n", + "[]\n", + "[]\n", + "[]\n", + "[]\n" + ] + } + ], + "source": [ + "y = x[['duplicated']]\n", + "for i in range(len(y)):\n", + " #print(y.iloc[i]['duplicated'])\n", + " print(y.iloc[i]['duplicated'])" ] }, { @@ -637,21 +747,128 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namestateentity_type
d31df1ca-714e-4a82-9e88-1892c0451a71COMMITTEE TO ELECT DR PATRICIA BERNARDMIcommittee
910c4d36-b036-469e-aa2a-ea4ff8855a6cCitizens For KailPAOrganization
1d2b5bc0-9385-4cd7-ac48-df43b3eca6fdMICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PACMIcommittee
c875d7de-94be-42f1-b994-dd89b114d51ePa Fraternal Order Of Police PacPAOrganization
60d454d1-3773-4d88-80e9-132c161da0f0Paa PacPAOrganization
f71341d7-d27e-47eb-9b66-903af39d6cb5Pabar Pac (Pa Bar Assn)PAOrganization
50c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee
62ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee
4db76e6e-f0d5-40eb-82de-6dbcdb562dd7Ugi Utilities Inc/Ugi Energy Services Llc PacPAOrganization
\n", + "
" + ], "text/plain": [ - "['name', 'state', 'entity_type']" + " name \\\n", + "d31df1ca-714e-4a82-9e88-1892c0451a71 COMMITTEE TO ELECT DR PATRICIA BERNARD \n", + "910c4d36-b036-469e-aa2a-ea4ff8855a6c Citizens For Kail \n", + "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "c875d7de-94be-42f1-b994-dd89b114d51e Pa Fraternal Order Of Police Pac \n", + "60d454d1-3773-4d88-80e9-132c161da0f0 Paa Pac \n", + "f71341d7-d27e-47eb-9b66-903af39d6cb5 Pabar Pac (Pa Bar Assn) \n", + "50c7d9a1-b448-46a5-8e2d-cd15b3097360 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... \n", + "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", + "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 Ugi Utilities Inc/Ugi Energy Services Llc Pac \n", + "\n", + " state entity_type \n", + "d31df1ca-714e-4a82-9e88-1892c0451a71 MI committee \n", + "910c4d36-b036-469e-aa2a-ea4ff8855a6c PA Organization \n", + "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MI committee \n", + "c875d7de-94be-42f1-b994-dd89b114d51e PA Organization \n", + "60d454d1-3773-4d88-80e9-132c161da0f0 PA Organization \n", + "f71341d7-d27e-47eb-9b66-903af39d6cb5 PA Organization \n", + "50c7d9a1-b448-46a5-8e2d-cd15b3097360 MI committee \n", + "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 MI committee \n", + "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 PA Organization " ] }, - "execution_count": 19, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "#for index in x.index:\n", + "# print(index)\n", + "x" + ] }, { "cell_type": "code", diff --git a/utils/linkage.py b/utils/linkage.py index 6ee0de05..01d05dea 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -270,6 +270,22 @@ def get_street_from_address_line_1(address_line_1: str) -> str: return " ".join(string) +def convert_duplicates_to_dict(df: pd.DataFrame) -> pd.DataFrame: + """Takes a dataframe whose indexes are UUIDs, and a column that is a list of + all other UUIDs that have duplicate values. The function then outputs a + dictionary file where the deduped UUIDs map to the dataframe main UUID + + Args: + A pandas dataframe with UUIDs as indexes and deduplicated UUIDs + matching up to the index in the same row + + Returns + None. However it outputs a dictionary + """ + # df.to_csv(repo_root / "output" / "deduplicated_UUIDs.csv", index=False) + pass + + def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: """Given a dataframe, remove rows that have identical entry data beyond UUIDs, and output a file mapping an entry to other the UUIDs of the @@ -284,9 +300,14 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: new_df = df.drop_duplicates() # now find the duplicates along all columns but the ID - cols = new_df.columns[1:] - duplicates = new_df[new_df.duplicated(cols)] - new_df = new_df.drop(index=duplicates.index.tolist()) + new_df = ( + new_df.groupby(df.columns[1:].tolist())["id"] + .agg(list) + .reset_index() + .rename(columns={"id": "duplicated"}) + ) + new_df.index = new_df["duplicated"].str[0].tolist() + new_df["duplicated"] = new_df["duplicated"].str[1:] return new_df From b519fa164babf8498930abcddfcc0aa4abd8f135 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 1 Feb 2024 09:22:40 -0600 Subject: [PATCH 072/214] modifications to dedup function, not yet done, no need to review yet --- notebooks/Test.ipynb | 195 +++++++++++++++++++++++++------------------ utils/linkage.py | 9 ++ 2 files changed, 124 insertions(+), 80 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index e4cac62f..bc731855 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -191,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -403,7 +403,7 @@ "18 Paa Pac PA Organization " ] }, - "execution_count": 45, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -475,15 +475,16 @@ " new_df[\"duplicated\"]=new_df[\"duplicated\"].str[1:]\n", "\n", " # now convert the duplicated column into a dictionary that can will be\n", - " # an output\n", - " convert_duplicates_to_dict(new_df[['duplicated']])\n", - " #new_df = new_df.drop(['duplicated'], axis=1)\n", + " # an output by only feeding the entries with duplicates\n", + " new_df = new_df.reset_index().rename(columns = {'index':'id'})\n", + " convert_duplicates_to_dict(new_df[new_df['duplicated'].apply(lambda x: len(x))>0][['id','duplicated']])\n", + " new_df = new_df.drop(['duplicated'], axis=1)\n", " return new_df" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -507,74 +508,120 @@ " \n", " \n", " \n", - " name\n", - " state\n", - " entity_type\n", + " id\n", " duplicated\n", " \n", " \n", " \n", " \n", - " d31df1ca-714e-4a82-9e88-1892c0451a71\n", - " COMMITTEE TO ELECT DR PATRICIA BERNARD\n", - " MI\n", - " committee\n", + " 2\n", + " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd\n", + " [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc...\n", + " \n", + " \n", + " 4\n", + " 60d454d1-3773-4d88-80e9-132c161da0f0\n", + " [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd]\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " id \\\n", + "2 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", + "4 60d454d1-3773-4d88-80e9-132c161da0f0 \n", + "\n", + " duplicated \n", + "2 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc... \n", + "4 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd] " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = deduplicate_perfect_matches(sample_df)\n", + "#len(x.iloc[2]['duplicated'])\n", + "x[x['duplicated'].apply(lambda x: len(x)) > 0][['id','duplicated']]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -582,48 +629,36 @@ "" ], "text/plain": [ - " name \\\n", - "d31df1ca-714e-4a82-9e88-1892c0451a71 COMMITTEE TO ELECT DR PATRICIA BERNARD \n", - "910c4d36-b036-469e-aa2a-ea4ff8855a6c Citizens For Kail \n", - "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", - "c875d7de-94be-42f1-b994-dd89b114d51e Pa Fraternal Order Of Police Pac \n", - "60d454d1-3773-4d88-80e9-132c161da0f0 Paa Pac \n", - "f71341d7-d27e-47eb-9b66-903af39d6cb5 Pabar Pac (Pa Bar Assn) \n", - "50c7d9a1-b448-46a5-8e2d-cd15b3097360 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... \n", - "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", - "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 Ugi Utilities Inc/Ugi Energy Services Llc Pac \n", - "\n", - " state entity_type \\\n", - "d31df1ca-714e-4a82-9e88-1892c0451a71 MI committee \n", - "910c4d36-b036-469e-aa2a-ea4ff8855a6c PA Organization \n", - "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MI committee \n", - "c875d7de-94be-42f1-b994-dd89b114d51e PA Organization \n", - "60d454d1-3773-4d88-80e9-132c161da0f0 PA Organization \n", - "f71341d7-d27e-47eb-9b66-903af39d6cb5 PA Organization \n", - "50c7d9a1-b448-46a5-8e2d-cd15b3097360 MI committee \n", - "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 MI committee \n", - "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 PA Organization \n", + " id \\\n", + "0 d31df1ca-714e-4a82-9e88-1892c0451a71 \n", + "1 910c4d36-b036-469e-aa2a-ea4ff8855a6c \n", + "2 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", + "3 c875d7de-94be-42f1-b994-dd89b114d51e \n", + "4 60d454d1-3773-4d88-80e9-132c161da0f0 \n", + "5 f71341d7-d27e-47eb-9b66-903af39d6cb5 \n", + "6 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "7 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", + "8 4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 \n", "\n", - " duplicated \n", - "d31df1ca-714e-4a82-9e88-1892c0451a71 [] \n", - "910c4d36-b036-469e-aa2a-ea4ff8855a6c [] \n", - "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc... \n", - "c875d7de-94be-42f1-b994-dd89b114d51e [] \n", - "60d454d1-3773-4d88-80e9-132c161da0f0 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd] \n", - "f71341d7-d27e-47eb-9b66-903af39d6cb5 [] \n", - "50c7d9a1-b448-46a5-8e2d-cd15b3097360 [] \n", - "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 [] \n", - "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 [] " + " duplicated \n", + "0 [] \n", + "1 [] \n", + "2 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc... \n", + "3 [] \n", + "4 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd] \n", + "5 [] \n", + "6 [] \n", + "7 [] \n", + "8 [] " ] }, - "execution_count": 47, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x = deduplicate_perfect_matches(sample_df)\n", - "x#[['duplicated']]" + "x[['id','duplicated']]" ] }, { diff --git a/utils/linkage.py b/utils/linkage.py index 01d05dea..0d2ebf79 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -309,6 +309,15 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: new_df.index = new_df["duplicated"].str[0].tolist() new_df["duplicated"] = new_df["duplicated"].str[1:] + # now convert the duplicated column into a dictionary that can will be + # an output by only feeding the entries with duplicates + new_df = new_df.reset_index().rename(columns={"index": "id"}) + convert_duplicates_to_dict( + new_df[new_df["duplicated"].apply(lambda x: len(x)) > 0][ + ["id", "duplicated"] + ] + ) + new_df = new_df.drop(["duplicated"], axis=1) return new_df From 4ac551fa498be733717a7f50af2084cb28e6c321 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Fri, 2 Feb 2024 02:34:42 +0000 Subject: [PATCH 073/214] passing pre-commits and doctests --- utils/linkage.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 1dbf54b6..d2236171 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -70,8 +70,6 @@ def cleaning_company_column(company_entry: str) -> str: 'Unemployed' >>> cleaning_company_column("N/A") 'Unemployed' - >>> cleaning_company_column("nan") - 'Unemployed' """ if not company_entry: From 37dcbf76a638c0007ff0de1620b93f6ec2f24ec3 Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:02:34 -0600 Subject: [PATCH 074/214] Update linkage.py --- utils/linkage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index c8842380..74cbc93c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -252,4 +252,5 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: return address_line_1_components[i][0] elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] - raise ValueError("Can not find Address Number") \ No newline at end of file + raise ValueError("Can not find Address Number") + From 7f9135f7acc77ee429557bc48d19d3d9a5f69cf6 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sun, 4 Feb 2024 14:31:07 -0600 Subject: [PATCH 075/214] finished dedup function with helper function to output to a csv_file in the output directory --- utils/linkage.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index ad5589ac..f2242da0 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -5,7 +5,7 @@ import textdistance as td import usaddress -from utils.constants import COMPANY_TYPES +from utils.constants import COMPANY_TYPES, repo_root def get_address_line_1_from_full_address(address: str) -> str: @@ -280,10 +280,25 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> pd.DataFrame: matching up to the index in the same row Returns - None. However it outputs a dictionary + None. However it outputs a dictionary to the output directory, with 2 + columns. The first, which indicates the deduplicated UUIDs, is labeled + 'duplicated_uuids', and the 2nd, which shows the uuids to which the + deduplicated entries match two, is labeled 'mapped_uuids'. """ - # df.to_csv(repo_root / "output" / "deduplicated_UUIDs.csv", index=False) - pass + deduped_dict = {} + for i in range(len(df)): + deduped_uudis = df.iloc[i]["duplicated"] + for j in range(len(deduped_uudis)): + deduped_dict.update({deduped_uudis[j]: df.iloc[i]["id"]}) + + # now convert dictionary into a csv file + deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") + deduped_df = deduped_df.reset_index().rename( + columns={"index": "duplicated_uuids", 0: "mapped_uuids"} + ) + deduped_df.to_csv( + repo_root / "output" / "deduplicated_UUIDs.csv", index=False + ) def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: From fb106545507614b4306c7652589eb3dbf93a7059 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 5 Feb 2024 01:13:27 +0000 Subject: [PATCH 076/214] updated function --- utils/linkage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 74cbc93c..26fbd5b5 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,7 @@ +import re + import textdistance as td import usaddress -import re from utils.constants import COMPANY_TYPES @@ -253,4 +254,3 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] raise ValueError("Can not find Address Number") - From 29ee6bb63e198256d83a22019f98561f303a764b Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 6 Feb 2024 09:57:53 -0600 Subject: [PATCH 077/214] made modifications to the deduplication function --- utils/linkage.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index f2242da0..5db87454 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -271,19 +271,21 @@ def get_street_from_address_line_1(address_line_1: str) -> str: def convert_duplicates_to_dict(df: pd.DataFrame) -> pd.DataFrame: - """Takes a dataframe whose indexes are UUIDs, and a column that is a list of - all other UUIDs that have duplicate values. The function then outputs a - dictionary file where the deduped UUIDs map to the dataframe main UUID + """Saves to the "output" directory a file mapping multiple strings to one + string + + Given a dataframe where each row contains one string in a column and a list + of strings in another column, the function maps each string in the list to + the single string. Args: - A pandas dataframe with UUIDs as indexes and deduplicated UUIDs - matching up to the index in the same row + A pandas dataframe Returns - None. However it outputs a dictionary to the output directory, with 2 - columns. The first, which indicates the deduplicated UUIDs, is labeled + None. However it outputs a file to the output directory, with 2 + columns. The first, which indicates the duplicated UUIDs, is labeled 'duplicated_uuids', and the 2nd, which shows the uuids to which the - deduplicated entries match two, is labeled 'mapped_uuids'. + deduplicated entries match to, is labeled 'mapped_uuids'. """ deduped_dict = {} for i in range(len(df)): @@ -297,14 +299,17 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> pd.DataFrame: columns={"index": "duplicated_uuids", 0: "mapped_uuids"} ) deduped_df.to_csv( - repo_root / "output" / "deduplicated_UUIDs.csv", index=False + repo_root / "output" / "deduplicated_UUIDs.csv", index=False, mode="a" ) def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: - """Given a dataframe, remove rows that have identical entry data beyond - UUIDs, and output a file mapping an entry to other the UUIDs of the - deduplicated rows + """Return a dataframe with duplicated entries removed. + + Given a dataframe, combines rows that have identical data beyond their + UUIDs, keeps the first UUID amond the similarly grouped UUIDs, and saves the + rest of the UUIDS to a file in the "output" directory linking them to the + first selected UUID. Args: a pandas dataframe containing contribution data @@ -316,7 +321,7 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: # now find the duplicates along all columns but the ID new_df = ( - new_df.groupby(df.columns[1:].tolist())["id"] + new_df.groupby(df.columns[1:].tolist(), dropna=False)["id"] .agg(list) .reset_index() .rename(columns={"id": "duplicated"}) From cfa15d079459a30032a61325fa2f1dcf8a74e3f8 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 6 Feb 2024 10:07:25 -0600 Subject: [PATCH 078/214] received a git push error stating that the tip of my branch is behind its remote counterpart...commiting my changes before rebasing --- notebooks/Test.ipynb | 667 ++++++++++++++++++++++++++++--------------- 1 file changed, 431 insertions(+), 236 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index bc731855..188591d1 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -66,27 +66,7 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "' Jane Jr Doe'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "determine_comma_role(\"DOe, Jane, Jr\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -169,29 +149,18 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import pandas as pd\n", - "orgs = pd.read_csv(\"../output/complete_organizations_table.csv\")\n", - "type(orgs.id.tolist()[1000])" + "orgs_sample = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0).sample(10000)\n", + "inds_sample = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False).sample(10000)\n" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -403,7 +372,7 @@ "18 Paa Pac PA Organization " ] }, - "execution_count": 28, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -448,12 +417,21 @@ " matching up to the index in the same row\n", " \n", " Returns\n", - " None. However it outputs a dictionary\n", + " None. However it outputs a dictionary to the output directory, with 2\n", + " columns. The first, which indicates the deduplicated UUIDs, is labeled\n", + " 'duplicated_uuids', and the 2nd, which shows the uuids to which the\n", + " deduplicated entries match two, is labeled 'mapped_uuids'.\n", " '''\n", - " #for index in df.index:\n", - " \n", - " #entities.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False)\n", - " pass\n", + " deduped_dict = {}\n", + " for i in range(len(df)):\n", + " deduped_uudis = df.iloc[i]['duplicated']\n", + " for j in range(len(deduped_uudis)):\n", + " deduped_dict.update({deduped_uudis[j]:df.iloc[i]['id']})\n", + " \n", + " # now convert dictionary into a csv file\n", + " deduped_df = pd.DataFrame.from_dict(deduped_dict,'index') \n", + " deduped_df = deduped_df.reset_index().rename(columns={\"index\":\"duplicated_uuids\", 0:\"mapped_uuids\"})\n", + " deduped_df.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False)\n", "\n", "\n", "def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame:\n", @@ -470,7 +448,7 @@ " new_df = df.drop_duplicates()\n", "\n", " # now find the duplicates along all columns but the ID\n", - " new_df=new_df.groupby(df.columns[1:].tolist())[\"id\"].agg(list).reset_index().rename(columns={\"id\": \"duplicated\"})\n", + " new_df=new_df.groupby(df.columns[1:].tolist(),dropna=False)[\"id\"].agg(list).reset_index().rename(columns={\"id\": \"duplicated\"})\n", " new_df.index=new_df[\"duplicated\"].str[0].tolist()\n", " new_df[\"duplicated\"]=new_df[\"duplicated\"].str[1:]\n", "\n", @@ -484,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -509,48 +487,138 @@ " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
idduplicated
0d31df1ca-714e-4a82-9e88-1892c0451a71[]
910c4d36-b036-469e-aa2a-ea4ff8855a6cCitizens For KailPAOrganization1910c4d36-b036-469e-aa2a-ea4ff8855a6c[]
1d2b5bc0-9385-4cd7-ac48-df43b3eca6fdMICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PACMIcommittee21d2b5bc0-9385-4cd7-ac48-df43b3eca6fd[1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc...
c875d7de-94be-42f1-b994-dd89b114d51ePa Fraternal Order Of Police PacPAOrganization3c875d7de-94be-42f1-b994-dd89b114d51e[]
60d454d1-3773-4d88-80e9-132c161da0f0Paa PacPAOrganization460d454d1-3773-4d88-80e9-132c161da0f0[1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd]
f71341d7-d27e-47eb-9b66-903af39d6cb5Pabar Pac (Pa Bar Assn)PAOrganization5f71341d7-d27e-47eb-9b66-903af39d6cb5[]
50c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee650c7d9a1-b448-46a5-8e2d-cd15b3097360[]
62ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee762ea1e9c-ac12-400c-b3dc-519389c0f7d3[]
4db76e6e-f0d5-40eb-82de-6dbcdb562dd7Ugi Utilities Inc/Ugi Energy Services Llc PacPAOrganization84db76e6e-f0d5-40eb-82de-6dbcdb562dd7[]
idduplicatednamestateentity_type
043a79b93-fed7-4f3c-a279-0441cdc7e72214TH DISTRICT DEMOCRATIC PARTYMIcorporation
1215f3104-2df0-4799-9a13-d0c5ec27d6f214TH DISTRICT DEMOCRATSMIcorporation
21d2b5bc0-9385-4cd7-ac48-df43b3eca6fd[1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc...022d2951-8fe9-42d6-a6ac-01e82d90fa6521ST CENTURY MEDIA - MICHIGANMIcorporation
3e1150dce-219c-4eef-995d-ee2759a92923360 TOUCHMIcorporation
460d454d1-3773-4d88-80e9-132c161da0f0[1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd]88c3b805-e0f1-42d5-8b77-536734731c4a50+1 STRATEGIES LLCMIcorporation
...............
2135f5fbf8f5-bd03-43f6-bfdd-42113bdd02a8ZoomPAOrganization
2136616c47f1-39cc-4b12-a93d-f7d3bdc88047Zoom Video CommunicationsPAOrganization
2137df101e29-4adf-4496-8d96-9732d9f7dbc8Zoom.UsPAOrganization
2138d02d1f6d-4a13-428e-a040-d35bd5cfcf9fZupancich, Andrea Senate CommitteeGACommittee
2139df42f2ec-9ee0-49d0-9020-d1a441ef8b42womenwinning State PACMNCommittee
\n", + "

2140 rows × 4 columns

\n", "
" ], "text/plain": [ - " id \\\n", - "2 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", - "4 60d454d1-3773-4d88-80e9-132c161da0f0 \n", + " id \\\n", + "0 43a79b93-fed7-4f3c-a279-0441cdc7e722 \n", + "1 215f3104-2df0-4799-9a13-d0c5ec27d6f2 \n", + "2 022d2951-8fe9-42d6-a6ac-01e82d90fa65 \n", + "3 e1150dce-219c-4eef-995d-ee2759a92923 \n", + "4 88c3b805-e0f1-42d5-8b77-536734731c4a \n", + "... ... \n", + "2135 f5fbf8f5-bd03-43f6-bfdd-42113bdd02a8 \n", + "2136 616c47f1-39cc-4b12-a93d-f7d3bdc88047 \n", + "2137 df101e29-4adf-4496-8d96-9732d9f7dbc8 \n", + "2138 d02d1f6d-4a13-428e-a040-d35bd5cfcf9f \n", + "2139 df42f2ec-9ee0-49d0-9020-d1a441ef8b42 \n", "\n", - " duplicated \n", - "2 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc... \n", - "4 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd] " + " name state entity_type \n", + "0 14TH DISTRICT DEMOCRATIC PARTY MI corporation \n", + "1 14TH DISTRICT DEMOCRATS MI corporation \n", + "2 21ST CENTURY MEDIA - MICHIGAN MI corporation \n", + "3 360 TOUCH MI corporation \n", + "4 50+1 STRATEGIES LLC MI corporation \n", + "... ... ... ... \n", + "2135 Zoom PA Organization \n", + "2136 Zoom Video Communications PA Organization \n", + "2137 Zoom.Us PA Organization \n", + "2138 Zupancich, Andrea Senate Committee GA Committee \n", + "2139 womenwinning State PAC MN Committee \n", + "\n", + "[2140 rows x 4 columns]" ] }, - "execution_count": 32, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x = deduplicate_perfect_matches(sample_df)\n", + "x = deduplicate_perfect_matches(orgs_sample)\n", "#len(x.iloc[2]['duplicated'])\n", - "x[x['duplicated'].apply(lambda x: len(x)) > 0][['id','duplicated']]" + "x" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -575,123 +643,257 @@ " \n", " \n", " id\n", + " first_name\n", + " last_name\n", + " full_name\n", + " entity_type\n", + " state\n", + " party\n", + " company\n", " duplicated\n", " \n", " \n", " \n", " \n", " 0\n", - " d31df1ca-714e-4a82-9e88-1892c0451a71\n", + " 6c833843-2f4f-416c-9092-f1d95d9b27dc\n", + " 'JESSE' PHILIP\n", + " SHERMAN\n", + " 'JESSE' PHILIP SHERMAN ...\n", + " Individual\n", + " CA\n", + " NaN\n", + " NaN\n", " []\n", " \n", " \n", " 1\n", - " 910c4d36-b036-469e-aa2a-ea4ff8855a6c\n", + " cdbe7cd4-f57b-4b89-b85d-d0b812e76aa4\n", + " AARON\n", + " AEBIG\n", + " AARON AEBIG ...\n", + " Individual\n", + " MI\n", + " NaN\n", + " NaN\n", " []\n", " \n", " \n", " 2\n", - " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd\n", - " [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc...\n", + " a7304cd4-76ae-4223-86c3-f50da82a62aa\n", + " AARON\n", + " BATES\n", + " AARON BATES ...\n", + " Individual\n", + " MI\n", + " NaN\n", + " NaN\n", + " []\n", " \n", " \n", " 3\n", - " c875d7de-94be-42f1-b994-dd89b114d51e\n", + " cce5ccc0-cd28-4a6a-afdf-8a08ce31b94d\n", + " AARON\n", + " BIRD\n", + " AARON BIRD ...\n", + " Individual\n", + " WA\n", + " NaN\n", + " L0021\n", " []\n", " \n", " \n", " 4\n", - " 60d454d1-3773-4d88-80e9-132c161da0f0\n", - " [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd]\n", + " 1302bf1f-393b-43ed-a15d-8cf6e121223c\n", + " AARON\n", + " COHEN\n", + " AARON COHEN ...\n", + " Individual\n", + " IL\n", + " NaN\n", + " NaN\n", + " []\n", " \n", " \n", - " 5\n", - " f71341d7-d27e-47eb-9b66-903af39d6cb5\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 7182\n", + " 160a5c9e-d04a-40c9-a0fd-c28e21dd70dc\n", + " NaN\n", + " NaN\n", + " Wilkinson, James\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", " []\n", " \n", " \n", - " 6\n", - " 50c7d9a1-b448-46a5-8e2d-cd15b3097360\n", + " 7183\n", + " 7a19cbb7-d681-46a5-8f9f-1e7be7071f06\n", + " NaN\n", + " NaN\n", + " Wolf, Linda\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", " []\n", " \n", " \n", - " 7\n", - " 62ea1e9c-ac12-400c-b3dc-519389c0f7d3\n", + " 7184\n", + " ce5156f8-23d4-40e0-8711-f19bff942543\n", + " NaN\n", + " NaN\n", + " Wollenburg, George\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", " []\n", " \n", " \n", - " 8\n", - " 4db76e6e-f0d5-40eb-82de-6dbcdb562dd7\n", + " 7185\n", + " 1948661\n", + " NaN\n", + " NaN\n", + " richard 3033 shoreham\n", + " individual\n", + " NaN\n", + " NaN\n", + " NaN\n", + " []\n", + " \n", + " \n", + " 7186\n", + " 69744565-e7e4-47e1-8555-ede565fca705\n", + " NaN\n", + " NaN\n", + " wark, david\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", " []\n", " \n", " \n", "\n", + "

7187 rows × 9 columns

\n", "" ], "text/plain": [ - " id \\\n", - "0 d31df1ca-714e-4a82-9e88-1892c0451a71 \n", - "1 910c4d36-b036-469e-aa2a-ea4ff8855a6c \n", - "2 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", - "3 c875d7de-94be-42f1-b994-dd89b114d51e \n", - "4 60d454d1-3773-4d88-80e9-132c161da0f0 \n", - "5 f71341d7-d27e-47eb-9b66-903af39d6cb5 \n", - "6 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "7 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "8 4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 \n", + " id first_name \\\n", + "0 6c833843-2f4f-416c-9092-f1d95d9b27dc 'JESSE' PHILIP \n", + "1 cdbe7cd4-f57b-4b89-b85d-d0b812e76aa4 AARON \n", + "2 a7304cd4-76ae-4223-86c3-f50da82a62aa AARON \n", + "3 cce5ccc0-cd28-4a6a-afdf-8a08ce31b94d AARON \n", + "4 1302bf1f-393b-43ed-a15d-8cf6e121223c AARON \n", + "... ... ... \n", + "7182 160a5c9e-d04a-40c9-a0fd-c28e21dd70dc NaN \n", + "7183 7a19cbb7-d681-46a5-8f9f-1e7be7071f06 NaN \n", + "7184 ce5156f8-23d4-40e0-8711-f19bff942543 NaN \n", + "7185 1948661 NaN \n", + "7186 69744565-e7e4-47e1-8555-ede565fca705 NaN \n", + "\n", + " last_name \\\n", + "0 SHERMAN \n", + "1 AEBIG \n", + "2 BATES \n", + "3 BIRD \n", + "4 COHEN \n", + "... ... \n", + "7182 NaN \n", + "7183 NaN \n", + "7184 NaN \n", + "7185 NaN \n", + "7186 NaN \n", "\n", - " duplicated \n", - "0 [] \n", - "1 [] \n", - "2 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe, 1d2b5bc... \n", - "3 [] \n", - "4 [1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd] \n", - "5 [] \n", - "6 [] \n", - "7 [] \n", - "8 [] " + " full_name entity_type state \\\n", + "0 'JESSE' PHILIP SHERMAN ... Individual CA \n", + "1 AARON AEBIG ... Individual MI \n", + "2 AARON BATES ... Individual MI \n", + "3 AARON BIRD ... Individual WA \n", + "4 AARON COHEN ... Individual IL \n", + "... ... ... ... \n", + "7182 Wilkinson, James Individual MN \n", + "7183 Wolf, Linda Individual MN \n", + "7184 Wollenburg, George Individual MN \n", + "7185 richard 3033 shoreham individual NaN \n", + "7186 wark, david Individual MN \n", + "\n", + " party company duplicated \n", + "0 NaN NaN [] \n", + "1 NaN NaN [] \n", + "2 NaN NaN [] \n", + "3 NaN L0021 [] \n", + "4 NaN NaN [] \n", + "... ... ... ... \n", + "7182 NaN NaN [] \n", + "7183 NaN NaN [] \n", + "7184 NaN NaN [] \n", + "7185 NaN NaN [] \n", + "7186 NaN NaN [] \n", + "\n", + "[7187 rows x 9 columns]" ] }, - "execution_count": 20, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x[['id','duplicated']]" + "y = inds_sample.drop_duplicates()\n", + "\n", + "# now find the duplicates along all columns but the ID\n", + "y=y.groupby(inds_sample.columns[1:].tolist(),dropna=False)[\"id\"].agg(list).reset_index().rename(columns={\"id\": \"duplicated\"})\n", + "y.index=y[\"duplicated\"].str[0].tolist()\n", + "y[\"duplicated\"]=y[\"duplicated\"].str[1:]\n", + "\n", + "# now convert the duplicated column into a dictionary that can will be\n", + "# an output by only feeding the entries with duplicates\n", + "y = y.reset_index().rename(columns = {'index':'id'})\n", + "convert_duplicates_to_dict(y[y['duplicated'].apply(lambda x: len(x))>0][['id','duplicated']])\n", + "new_df = y.drop(['duplicated'], axis=1)\n", + "#return new_df\n", + "y" ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 9, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "[]\n", - "[]\n", - "['1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe', '1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff']\n", - "[]\n", - "['1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd']\n", - "[]\n", - "[]\n", - "[]\n", - "[]\n" - ] + "data": { + "text/plain": [ + "Index(['first_name', 'last_name', 'full_name', 'entity_type', 'state', 'party',\n", + " 'company'],\n", + " dtype='object')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "y = x[['duplicated']]\n", - "for i in range(len(y)):\n", - " #print(y.iloc[i]['duplicated'])\n", - " print(y.iloc[i]['duplicated'])" + "inds_sample.columns[1:]" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -715,74 +917,92 @@ " \n", " \n", " \n", - " name\n", - " state\n", - " entity_type\n", - " id\n", + " Max Speed\n", + " Animal\n", + " Color\n", + " Age\n", " \n", " \n", " \n", " \n", " 0\n", - " COMMITTEE TO ELECT DR PATRICIA BERNARD\n", - " MI\n", - " committee\n", - " 2\n", + " 380.0\n", + " None\n", + " green\n", + " 2.0\n", " \n", " \n", - " 2\n", - " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", - " MI\n", - " committee\n", - " 4\n", - " \n", - " \n", - " 4\n", - " Paa Pac\n", - " PA\n", - " Organization\n", - " 2\n", + " 1\n", + " 370.0\n", + " Falcon\n", + " None\n", + " NaN\n", " \n", " \n", - " 6\n", - " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", - " MI\n", - " committee\n", - " 3\n", + " 2\n", + " NaN\n", + " None\n", + " yellow\n", + " 5.0\n", " \n", " \n", - " 7\n", - " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", - " MI\n", - " committee\n", - " 4\n", + " 3\n", + " NaN\n", + " Parrot\n", + " blue\n", + " 6.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " name state entity_type id\n", - "0 COMMITTEE TO ELECT DR PATRICIA BERNARD MI committee 2\n", - "2 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee 4\n", - "4 Paa Pac PA Organization 2\n", - "6 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee 3\n", - "7 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee 4" + " Max Speed Animal Color Age\n", + "0 380.0 None green 2.0\n", + "1 370.0 Falcon None NaN\n", + "2 NaN None yellow 5.0\n", + "3 NaN Parrot blue 6.0" ] }, - "execution_count": 13, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x = sample_df.groupby(sample_df.columns[1:].tolist()).count().reset_index()\n", - "x.loc[x.id >1]" + "import numpy as np\n", + "import pandas as pd\n", + "df = pd.DataFrame({'Max Speed': [380., 370., np.nan, np.nan],\n", + " 'Animal': ['None', 'Falcon', 'None', 'Parrot'],\n", + " 'Color':['green',None,'yellow','blue'],\n", + " 'Age':[2,np.nan,5,6]})\n", + "df" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= df.groupby(df.columns[1:].tolist(), dropna=False)[\"Max Speed\"]#.agg(list)#.reset_index()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -806,114 +1026,89 @@ " \n", " \n", " \n", - " name\n", - " state\n", - " entity_type\n", + " Age\n", + " Animal\n", + " Color\n", + " Max Speed\n", " \n", " \n", " \n", " \n", - " d31df1ca-714e-4a82-9e88-1892c0451a71\n", - " COMMITTEE TO ELECT DR PATRICIA BERNARD\n", - " MI\n", - " committee\n", - " \n", - " \n", - " 910c4d36-b036-469e-aa2a-ea4ff8855a6c\n", - " Citizens For Kail\n", - " PA\n", - " Organization\n", - " \n", - " \n", - " 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd\n", - " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", - " MI\n", - " committee\n", - " \n", - " \n", - " c875d7de-94be-42f1-b994-dd89b114d51e\n", - " Pa Fraternal Order Of Police Pac\n", - " PA\n", - " Organization\n", - " \n", - " \n", - " 60d454d1-3773-4d88-80e9-132c161da0f0\n", - " Paa Pac\n", - " PA\n", - " Organization\n", - " \n", - " \n", - " f71341d7-d27e-47eb-9b66-903af39d6cb5\n", - " Pabar Pac (Pa Bar Assn)\n", - " PA\n", - " Organization\n", + " 0\n", + " 2.0\n", + " None\n", + " green\n", + " [380.0]\n", " \n", " \n", - " 50c7d9a1-b448-46a5-8e2d-cd15b3097360\n", - " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", - " MI\n", - " committee\n", + " 1\n", + " 5.0\n", + " None\n", + " yellow\n", + " [nan]\n", " \n", " \n", - " 62ea1e9c-ac12-400c-b3dc-519389c0f7d3\n", - " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", - " MI\n", - " committee\n", + " 2\n", + " 6.0\n", + " Parrot\n", + " blue\n", + " [nan]\n", " \n", " \n", - " 4db76e6e-f0d5-40eb-82de-6dbcdb562dd7\n", - " Ugi Utilities Inc/Ugi Energy Services Llc Pac\n", - " PA\n", - " Organization\n", + " 3\n", + " NaN\n", + " Falcon\n", + " NaN\n", + " [370.0]\n", " \n", " \n", "\n", "" ], "text/plain": [ - " name \\\n", - "d31df1ca-714e-4a82-9e88-1892c0451a71 COMMITTEE TO ELECT DR PATRICIA BERNARD \n", - "910c4d36-b036-469e-aa2a-ea4ff8855a6c Citizens For Kail \n", - "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", - "c875d7de-94be-42f1-b994-dd89b114d51e Pa Fraternal Order Of Police Pac \n", - "60d454d1-3773-4d88-80e9-132c161da0f0 Paa Pac \n", - "f71341d7-d27e-47eb-9b66-903af39d6cb5 Pabar Pac (Pa Bar Assn) \n", - "50c7d9a1-b448-46a5-8e2d-cd15b3097360 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... \n", - "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", - "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 Ugi Utilities Inc/Ugi Energy Services Llc Pac \n", - "\n", - " state entity_type \n", - "d31df1ca-714e-4a82-9e88-1892c0451a71 MI committee \n", - "910c4d36-b036-469e-aa2a-ea4ff8855a6c PA Organization \n", - "1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd MI committee \n", - "c875d7de-94be-42f1-b994-dd89b114d51e PA Organization \n", - "60d454d1-3773-4d88-80e9-132c161da0f0 PA Organization \n", - "f71341d7-d27e-47eb-9b66-903af39d6cb5 PA Organization \n", - "50c7d9a1-b448-46a5-8e2d-cd15b3097360 MI committee \n", - "62ea1e9c-ac12-400c-b3dc-519389c0f7d3 MI committee \n", - "4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 PA Organization " + " Age Animal Color Max Speed\n", + "0 2.0 None green [380.0]\n", + "1 5.0 None yellow [nan]\n", + "2 6.0 Parrot blue [nan]\n", + "3 NaN Falcon NaN [370.0]" ] }, - "execution_count": 41, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#for index in x.index:\n", - "# print(index)\n", - "x" + "df = df.groupby((df.columns.difference(['Max Speed'])).tolist(),dropna=False)['Max Speed'].agg(list).reset_index()\n", + "df" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from utils.constants import repo_root\n", - "entities.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False)\n" + "df" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 3d26fdef9d0d56459c36f61cb7b4d9fa309f7925 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 7 Feb 2024 09:47:25 -0600 Subject: [PATCH 079/214] trying to see what the git branch issues are...no need to review this commit --- notebooks/Test.ipynb | 333 ++++++++++++++++++++----------------------- 1 file changed, 154 insertions(+), 179 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index 188591d1..26d98b5f 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -149,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -160,7 +160,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -372,7 +372,7 @@ "18 Paa Pac PA Organization " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -402,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -431,7 +431,7 @@ " # now convert dictionary into a csv file\n", " deduped_df = pd.DataFrame.from_dict(deduped_dict,'index') \n", " deduped_df = deduped_df.reset_index().rename(columns={\"index\":\"duplicated_uuids\", 0:\"mapped_uuids\"})\n", - " deduped_df.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False)\n", + " deduped_df.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False, mode='a')\n", "\n", "\n", "def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame:\n", @@ -462,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -495,36 +495,36 @@ " \n", " \n", " 0\n", - " 43a79b93-fed7-4f3c-a279-0441cdc7e722\n", - " 14TH DISTRICT DEMOCRATIC PARTY\n", + " 3246120d-45fc-4d19-adee-d2aa2c5be6db\n", + " 1 BOLD STEP\n", " MI\n", " corporation\n", " \n", " \n", " 1\n", - " 215f3104-2df0-4799-9a13-d0c5ec27d6f2\n", - " 14TH DISTRICT DEMOCRATS\n", + " 8fc7e5d5-558d-42ea-bd9a-8e48a4a9a4bd\n", + " 12CDRC\n", " MI\n", " corporation\n", " \n", " \n", " 2\n", - " 022d2951-8fe9-42d6-a6ac-01e82d90fa65\n", - " 21ST CENTURY MEDIA - MICHIGAN\n", + " a5379930-7324-4f1d-b216-84d9e9ddea40\n", + " 303 MANAGEMENT INC.\n", " MI\n", " corporation\n", " \n", " \n", " 3\n", - " e1150dce-219c-4eef-995d-ee2759a92923\n", - " 360 TOUCH\n", + " 9064112f-ef40-4690-9d0a-782a2375feb0\n", + " 314 ACTION FUND\n", " MI\n", " corporation\n", " \n", " \n", " 4\n", - " 88c3b805-e0f1-42d5-8b77-536734731c4a\n", - " 50+1 STRATEGIES LLC\n", + " 9e11e7ae-ee29-4a50-9720-41c6ac556a1f\n", + " A T AND T MICHIGAN PAC\n", " MI\n", " corporation\n", " \n", @@ -536,76 +536,76 @@ " ...\n", " \n", " \n", - " 2135\n", - " f5fbf8f5-bd03-43f6-bfdd-42113bdd02a8\n", - " Zoom\n", + " 2149\n", + " d79f9729-c9af-4347-868a-ae6e6814a295\n", + " Zach Kirk\n", " PA\n", " Organization\n", " \n", " \n", - " 2136\n", - " 616c47f1-39cc-4b12-a93d-f7d3bdc88047\n", - " Zoom Video Communications\n", + " 2150\n", + " fbfea472-e183-4479-b869-90eddfa5198c\n", + " Zest Kitchen\n", " PA\n", " Organization\n", " \n", " \n", - " 2137\n", - " df101e29-4adf-4496-8d96-9732d9f7dbc8\n", - " Zoom.Us\n", + " 2151\n", + " c105a4af-9fd4-4a5b-a7b8-1e8738ff39c6\n", + " Zoom Us\n", " PA\n", " Organization\n", " \n", " \n", - " 2138\n", - " d02d1f6d-4a13-428e-a040-d35bd5cfcf9f\n", - " Zupancich, Andrea Senate Committee\n", - " GA\n", - " Committee\n", + " 2152\n", + " 59cc8db9-607e-4e1b-ba41-0850b6019360\n", + " Zoom Video Communications Inc.\n", + " PA\n", + " Organization\n", " \n", " \n", - " 2139\n", - " df42f2ec-9ee0-49d0-9020-d1a441ef8b42\n", - " womenwinning State PAC\n", - " MN\n", - " Committee\n", + " 2153\n", + " NaN\n", + " NaN\n", + " MI\n", + " corporation\n", " \n", " \n", "\n", - "

2140 rows × 4 columns

\n", + "

2154 rows × 4 columns

\n", "" ], "text/plain": [ " id \\\n", - "0 43a79b93-fed7-4f3c-a279-0441cdc7e722 \n", - "1 215f3104-2df0-4799-9a13-d0c5ec27d6f2 \n", - "2 022d2951-8fe9-42d6-a6ac-01e82d90fa65 \n", - "3 e1150dce-219c-4eef-995d-ee2759a92923 \n", - "4 88c3b805-e0f1-42d5-8b77-536734731c4a \n", + "0 3246120d-45fc-4d19-adee-d2aa2c5be6db \n", + "1 8fc7e5d5-558d-42ea-bd9a-8e48a4a9a4bd \n", + "2 a5379930-7324-4f1d-b216-84d9e9ddea40 \n", + "3 9064112f-ef40-4690-9d0a-782a2375feb0 \n", + "4 9e11e7ae-ee29-4a50-9720-41c6ac556a1f \n", "... ... \n", - "2135 f5fbf8f5-bd03-43f6-bfdd-42113bdd02a8 \n", - "2136 616c47f1-39cc-4b12-a93d-f7d3bdc88047 \n", - "2137 df101e29-4adf-4496-8d96-9732d9f7dbc8 \n", - "2138 d02d1f6d-4a13-428e-a040-d35bd5cfcf9f \n", - "2139 df42f2ec-9ee0-49d0-9020-d1a441ef8b42 \n", + "2149 d79f9729-c9af-4347-868a-ae6e6814a295 \n", + "2150 fbfea472-e183-4479-b869-90eddfa5198c \n", + "2151 c105a4af-9fd4-4a5b-a7b8-1e8738ff39c6 \n", + "2152 59cc8db9-607e-4e1b-ba41-0850b6019360 \n", + "2153 NaN \n", "\n", " name state entity_type \n", - "0 14TH DISTRICT DEMOCRATIC PARTY MI corporation \n", - "1 14TH DISTRICT DEMOCRATS MI corporation \n", - "2 21ST CENTURY MEDIA - MICHIGAN MI corporation \n", - "3 360 TOUCH MI corporation \n", - "4 50+1 STRATEGIES LLC MI corporation \n", + "0 1 BOLD STEP MI corporation \n", + "1 12CDRC MI corporation \n", + "2 303 MANAGEMENT INC. MI corporation \n", + "3 314 ACTION FUND MI corporation \n", + "4 A T AND T MICHIGAN PAC MI corporation \n", "... ... ... ... \n", - "2135 Zoom PA Organization \n", - "2136 Zoom Video Communications PA Organization \n", - "2137 Zoom.Us PA Organization \n", - "2138 Zupancich, Andrea Senate Committee GA Committee \n", - "2139 womenwinning State PAC MN Committee \n", + "2149 Zach Kirk PA Organization \n", + "2150 Zest Kitchen PA Organization \n", + "2151 Zoom Us PA Organization \n", + "2152 Zoom Video Communications Inc. PA Organization \n", + "2153 NaN MI corporation \n", "\n", - "[2140 rows x 4 columns]" + "[2154 rows x 4 columns]" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -618,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -650,69 +650,63 @@ " state\n", " party\n", " company\n", - " duplicated\n", " \n", " \n", " \n", " \n", " 0\n", - " 6c833843-2f4f-416c-9092-f1d95d9b27dc\n", - " 'JESSE' PHILIP\n", - " SHERMAN\n", - " 'JESSE' PHILIP SHERMAN ...\n", + " f6df631a-e626-4861-b62b-e09512887bd3\n", + " A SCOTT\n", + " PARIS\n", + " A SCOTT PARIS ...\n", " Individual\n", - " CA\n", - " NaN\n", + " MI\n", " NaN\n", - " []\n", + " NOT EMPLOYED\n", " \n", " \n", " 1\n", - " cdbe7cd4-f57b-4b89-b85d-d0b812e76aa4\n", - " AARON\n", - " AEBIG\n", - " AARON AEBIG ...\n", + " 075fb1c6-6c70-4ec6-a439-fcebb76c4e0a\n", + " A. MARK\n", + " GLICKSTEIN\n", + " A. MARK GLICKSTEIN ...\n", " Individual\n", - " MI\n", - " NaN\n", + " CA\n", " NaN\n", - " []\n", + " PARTNERSHIP HEALTH PLAN OF CA\n", " \n", " \n", " 2\n", - " a7304cd4-76ae-4223-86c3-f50da82a62aa\n", - " AARON\n", - " BATES\n", - " AARON BATES ...\n", + " 4a3968f5-7f5e-4ed1-8f39-bfc70bc67af8\n", + " A. MICHAEL\n", + " PALIZZI\n", + " A. MICHAEL PALIZZI ...\n", " Individual\n", " MI\n", " NaN\n", - " NaN\n", - " []\n", + " MILLER CANFIELD\n", " \n", " \n", " 3\n", - " cce5ccc0-cd28-4a6a-afdf-8a08ce31b94d\n", + " bb952efc-3dba-4449-9405-ea65202fbbea\n", " AARON\n", - " BIRD\n", - " AARON BIRD ...\n", + " ALDRICH\n", + " AARON ALDRICH ...\n", " Individual\n", - " WA\n", + " MI\n", " NaN\n", - " L0021\n", - " []\n", + " MILLER PIPELINE CORP.\n", " \n", " \n", " 4\n", - " 1302bf1f-393b-43ed-a15d-8cf6e121223c\n", + " 79ec4a73-f688-479a-a4e3-0b0a3813188a\n", " AARON\n", - " COHEN\n", - " AARON COHEN ...\n", + " BLAND\n", + " AARON BLAND ...\n", " Individual\n", - " IL\n", + " MI\n", " NaN\n", " NaN\n", - " []\n", " \n", " \n", " ...\n", @@ -724,59 +718,54 @@ " ...\n", " ...\n", " ...\n", - " ...\n", " \n", " \n", - " 7182\n", - " 160a5c9e-d04a-40c9-a0fd-c28e21dd70dc\n", + " 7122\n", + " a1a6ff3b-cfa4-4b84-bf8c-20984f9871f0\n", " NaN\n", " NaN\n", - " Wilkinson, James\n", + " Trone, Robert\n", " Individual\n", " MN\n", " NaN\n", " NaN\n", - " []\n", " \n", " \n", - " 7183\n", - " 7a19cbb7-d681-46a5-8f9f-1e7be7071f06\n", + " 7123\n", + " 37ab55f5-3613-469c-8b66-ac8888f5bcae\n", " NaN\n", " NaN\n", - " Wolf, Linda\n", + " Wark, Mary Ann\n", " Individual\n", " MN\n", " NaN\n", " NaN\n", - " []\n", " \n", " \n", - " 7184\n", - " ce5156f8-23d4-40e0-8711-f19bff942543\n", + " 7124\n", + " 92d5ac7c-4702-420c-97a7-656111677f5a\n", " NaN\n", " NaN\n", - " Wollenburg, George\n", + " Wenstrom, Gene\n", " Individual\n", " MN\n", " NaN\n", " NaN\n", - " []\n", " \n", " \n", - " 7185\n", - " 1948661\n", + " 7125\n", + " fa934bf1-f611-4cd3-9bff-451bdf2e5bd2\n", " NaN\n", " NaN\n", - " richard 3033 shoreham\n", - " individual\n", - " NaN\n", + " Wika, Kevin\n", + " Individual\n", + " MN\n", " NaN\n", " NaN\n", - " []\n", " \n", " \n", - " 7186\n", - " 69744565-e7e4-47e1-8555-ede565fca705\n", + " 7126\n", + " fb8bb833-7010-418a-9f24-1a29771e0b67\n", " NaN\n", " NaN\n", " wark, david\n", @@ -784,111 +773,97 @@ " MN\n", " NaN\n", " NaN\n", - " []\n", " \n", " \n", "\n", - "

7187 rows × 9 columns

\n", + "

7127 rows × 8 columns

\n", "" ], "text/plain": [ " id first_name \\\n", - "0 6c833843-2f4f-416c-9092-f1d95d9b27dc 'JESSE' PHILIP \n", - "1 cdbe7cd4-f57b-4b89-b85d-d0b812e76aa4 AARON \n", - "2 a7304cd4-76ae-4223-86c3-f50da82a62aa AARON \n", - "3 cce5ccc0-cd28-4a6a-afdf-8a08ce31b94d AARON \n", - "4 1302bf1f-393b-43ed-a15d-8cf6e121223c AARON \n", + "0 f6df631a-e626-4861-b62b-e09512887bd3 A SCOTT \n", + "1 075fb1c6-6c70-4ec6-a439-fcebb76c4e0a A. MARK \n", + "2 4a3968f5-7f5e-4ed1-8f39-bfc70bc67af8 A. MICHAEL \n", + "3 bb952efc-3dba-4449-9405-ea65202fbbea AARON \n", + "4 79ec4a73-f688-479a-a4e3-0b0a3813188a AARON \n", "... ... ... \n", - "7182 160a5c9e-d04a-40c9-a0fd-c28e21dd70dc NaN \n", - "7183 7a19cbb7-d681-46a5-8f9f-1e7be7071f06 NaN \n", - "7184 ce5156f8-23d4-40e0-8711-f19bff942543 NaN \n", - "7185 1948661 NaN \n", - "7186 69744565-e7e4-47e1-8555-ede565fca705 NaN \n", + "7122 a1a6ff3b-cfa4-4b84-bf8c-20984f9871f0 NaN \n", + "7123 37ab55f5-3613-469c-8b66-ac8888f5bcae NaN \n", + "7124 92d5ac7c-4702-420c-97a7-656111677f5a NaN \n", + "7125 fa934bf1-f611-4cd3-9bff-451bdf2e5bd2 NaN \n", + "7126 fb8bb833-7010-418a-9f24-1a29771e0b67 NaN \n", "\n", " last_name \\\n", - "0 SHERMAN \n", - "1 AEBIG \n", - "2 BATES \n", - "3 BIRD \n", - "4 COHEN \n", + "0 PARIS \n", + "1 GLICKSTEIN \n", + "2 PALIZZI \n", + "3 ALDRICH \n", + "4 BLAND \n", "... ... \n", - "7182 NaN \n", - "7183 NaN \n", - "7184 NaN \n", - "7185 NaN \n", - "7186 NaN \n", + "7122 NaN \n", + "7123 NaN \n", + "7124 NaN \n", + "7125 NaN \n", + "7126 NaN \n", "\n", " full_name entity_type state \\\n", - "0 'JESSE' PHILIP SHERMAN ... Individual CA \n", - "1 AARON AEBIG ... Individual MI \n", - "2 AARON BATES ... Individual MI \n", - "3 AARON BIRD ... Individual WA \n", - "4 AARON COHEN ... Individual IL \n", + "0 A SCOTT PARIS ... Individual MI \n", + "1 A. MARK GLICKSTEIN ... Individual CA \n", + "2 A. MICHAEL PALIZZI ... Individual MI \n", + "3 AARON ALDRICH ... Individual MI \n", + "4 AARON BLAND ... Individual MI \n", "... ... ... ... \n", - "7182 Wilkinson, James Individual MN \n", - "7183 Wolf, Linda Individual MN \n", - "7184 Wollenburg, George Individual MN \n", - "7185 richard 3033 shoreham individual NaN \n", - "7186 wark, david Individual MN \n", + "7122 Trone, Robert Individual MN \n", + "7123 Wark, Mary Ann Individual MN \n", + "7124 Wenstrom, Gene Individual MN \n", + "7125 Wika, Kevin Individual MN \n", + "7126 wark, david Individual MN \n", "\n", - " party company duplicated \n", - "0 NaN NaN [] \n", - "1 NaN NaN [] \n", - "2 NaN NaN [] \n", - "3 NaN L0021 [] \n", - "4 NaN NaN [] \n", - "... ... ... ... \n", - "7182 NaN NaN [] \n", - "7183 NaN NaN [] \n", - "7184 NaN NaN [] \n", - "7185 NaN NaN [] \n", - "7186 NaN NaN [] \n", + " party company \n", + "0 NaN NOT EMPLOYED \n", + "1 NaN PARTNERSHIP HEALTH PLAN OF CA \n", + "2 NaN MILLER CANFIELD \n", + "3 NaN MILLER PIPELINE CORP. \n", + "4 NaN NaN \n", + "... ... ... \n", + "7122 NaN NaN \n", + "7123 NaN NaN \n", + "7124 NaN NaN \n", + "7125 NaN NaN \n", + "7126 NaN NaN \n", "\n", - "[7187 rows x 9 columns]" + "[7127 rows x 8 columns]" ] }, - "execution_count": 13, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "y = inds_sample.drop_duplicates()\n", - "\n", - "# now find the duplicates along all columns but the ID\n", - "y=y.groupby(inds_sample.columns[1:].tolist(),dropna=False)[\"id\"].agg(list).reset_index().rename(columns={\"id\": \"duplicated\"})\n", - "y.index=y[\"duplicated\"].str[0].tolist()\n", - "y[\"duplicated\"]=y[\"duplicated\"].str[1:]\n", - "\n", - "# now convert the duplicated column into a dictionary that can will be\n", - "# an output by only feeding the entries with duplicates\n", - "y = y.reset_index().rename(columns = {'index':'id'})\n", - "convert_duplicates_to_dict(y[y['duplicated'].apply(lambda x: len(x))>0][['id','duplicated']])\n", - "new_df = y.drop(['duplicated'], axis=1)\n", - "#return new_df\n", + "y=deduplicate_perfect_matches(inds_sample)\n", "y" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['first_name', 'last_name', 'full_name', 'entity_type', 'state', 'party',\n", - " 'company'],\n", - " dtype='object')" + "7207" ] }, - "execution_count": 9, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_sample.columns[1:]" + "a = inds_sample.drop_duplicates()\n", + "len(a)" ] }, { From 5843485fbeb48f4adb4a20a86a79cece154e10c0 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 7 Feb 2024 23:50:38 -0600 Subject: [PATCH 080/214] implementing PR feedback --- utils/linkage.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 5db87454..1b27a844 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -4,6 +4,7 @@ import pandas as pd import textdistance as td import usaddress +import os.path from utils.constants import COMPANY_TYPES, repo_root @@ -270,9 +271,9 @@ def get_street_from_address_line_1(address_line_1: str) -> str: return " ".join(string) -def convert_duplicates_to_dict(df: pd.DataFrame) -> pd.DataFrame: - """Saves to the "output" directory a file mapping multiple strings to one - string +def convert_duplicates_to_dict(df: pd.DataFrame) -> None: + """Saves to the "output" directory a file where each row represents a string + matching to another string Given a dataframe where each row contains one string in a column and a list of strings in another column, the function maps each string in the list to @@ -296,11 +297,9 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> pd.DataFrame: # now convert dictionary into a csv file deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") deduped_df = deduped_df.reset_index().rename( - columns={"index": "duplicated_uuids", 0: "mapped_uuids"} - ) + columns={"index": "duplicated_uuids", 0: "mapped_uuids"}) deduped_df.to_csv( - repo_root / "output" / "deduplicated_UUIDs.csv", index=False, mode="a" - ) + repo_root / "output" / "deduplicated_UUIDs.csv", index=False, mode="a", header= not os.path.exists('../output/deduplicated_UUIDs.csv')) def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: From 97b89dd7dba65a71b0c3ba31225e559d16c21617 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 7 Feb 2024 23:51:44 -0600 Subject: [PATCH 081/214] addressing linter tests failure due to formatting --- utils/linkage.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 1b27a844..0b8459d9 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,10 +1,11 @@ """ Module for performing record linkage on state campaign finance dataset """ +import os.path + import pandas as pd import textdistance as td import usaddress -import os.path from utils.constants import COMPANY_TYPES, repo_root @@ -297,9 +298,14 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: # now convert dictionary into a csv file deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") deduped_df = deduped_df.reset_index().rename( - columns={"index": "duplicated_uuids", 0: "mapped_uuids"}) + columns={"index": "duplicated_uuids", 0: "mapped_uuids"} + ) deduped_df.to_csv( - repo_root / "output" / "deduplicated_UUIDs.csv", index=False, mode="a", header= not os.path.exists('../output/deduplicated_UUIDs.csv')) + repo_root / "output" / "deduplicated_UUIDs.csv", + index=False, + mode="a", + header=not os.path.exists("../output/deduplicated_UUIDs.csv"), + ) def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: From 7dc5b7054cd6f525c6c542d12bb7bb4b68487d11 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 8 Feb 2024 07:40:57 +0000 Subject: [PATCH 082/214] updating requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index db05b66f..0b21babc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ Requests==2.31.0 setuptools==68.0.0 textdistance==4.6.1 usaddress==0.5.4 +nameparser==1.1.3 From a3310a1871b0415140a4dad8bd41ab46d47fa97b Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 8 Feb 2024 07:41:35 +0000 Subject: [PATCH 083/214] adding pre_process pipeline funcion --- utils/linkage.py | 144 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/utils/linkage.py b/utils/linkage.py index ac11a5ac..97b0ad6e 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,5 +1,10 @@ +import re +from typing import Tuple + +import pandas as pd import textdistance as td import usaddress +from nameparser import HumanName from utils.constants import COMPANY_TYPES @@ -194,3 +199,142 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] raise ValueError("Can not find Address Number") + + +def cleaning_company_column(company_entry: str) -> str: + """ + Given a string, check if it contains a variation of self employed, unemployed, + or retired and return the standardized version. + + Args: + company: string of inputted company names + Returns: + standardized for retired, self employed, and unemployed, + or original string if no match or empty string + + >>> cleaning_company_column("Retireed") + 'Retired' + >>> cleaning_company_column("self") + 'Self Employed' + >>> cleaning_company_column("None") + 'Unemployed' + >>> cleaning_company_column("N/A") + 'Unemployed' + """ + + if not company_entry: + return company_entry + + company_edited = company_entry.lower() + + if company_edited == "n/a": + return "Unemployed" + + company_edited = re.sub(r"[^\w\s]", "", company_edited) + + if ( + company_edited == "retired" + or company_edited == "retiree" + or company_edited == "retire" + or "retiree" in company_edited + ): + return "Retired" + + elif ( + "self employe" in company_edited + or "freelance" in company_edited + or company_edited == "self" + or company_edited == "independent contractor" + ): + return "Self Employed" + elif ( + "unemploye" in company_edited + or company_edited == "none" + or company_edited == "not employed" + or company_edited == "nan" + ): + return "Unemployed" + + else: + return company_edited + + +def preprocess_pipeline( + individuals: pd.DataFrame, + Address: str, + organizations: pd.DataFrame, + transactions: pd.DataFrame, +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Preprocesses data for record linkage + + Args: + Individuals: dataframe of individual contributions + Address: column name of address + Organizations: dataframe of organization contributions + Transactions: dataframe of transactions + Returns: + preprocessed tuple of dataframes + first element is the individuals dataframe, + second element is the organizations dataframe, + third element is the transactions dataframe + """ + # Preprocess organizations dataframe + organizations["name"] = ( + organizations["name"].astype(str).apply(standardize_corp_names) + ) + + # Preprocess individuals dataframe + if "Unnamed: 0" in individuals.columns: + individuals.drop(columns="Unnamed: 0", inplace=True) + + individuals = individuals.astype( + {"first_name": str, "last_name": str, "full_name": str, "company": str} + ) + + # Standardize company names in individuals dataframe + individuals["company"] = individuals["company"].apply( + standardize_corp_names + ) + individuals["company"] = individuals["company"].apply( + cleaning_company_column + ) + + # Address functions, assuming address column is named 'address' + individuals["Address Line 1"] = individuals[Address].apply( + get_address_line_1_from_full_address + ) + individuals["Street Name"] = individuals["Address Line 1"].apply( + get_street_from_address_line_1 + ) + individuals["Address Number"] = individuals["Address Line 1"].apply( + get_address_number_from_address_line_1 + ) + + # Check if first name or last names are empty, if so, extract from full name column + individuals["full_name"] = individuals["full_name"].astype(str) + if individuals["first_name"].isnull().any(): + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) + first_name = name.apply(lambda x: x["first"]) + individuals["first_name"] = first_name + + if individuals["last_name"].isnull().any(): + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) + last_name = name.apply(lambda x: x["last"]) + individuals["last_name"] = last_name + + # Transactions + if "Unnamed: 0" in transactions.columns: + transactions.drop(columns="Unnamed: 0", inplace=True) + + transactions["purpose"] = transactions["purpose"].str.upper() + + return individuals, organizations, transactions From 270d532bbf7cde467172343e65d4a39ef6c196ed Mon Sep 17 00:00:00 2001 From: nrposner <100233813+nrposner@users.noreply.github.com> Date: Tue, 13 Feb 2024 10:40:53 -0600 Subject: [PATCH 084/214] fixed error in row_matches --- utils/linkage.py | 2 +- utils/tests/test_linkage.py | 220 ++++++++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 utils/tests/test_linkage.py diff --git a/utils/linkage.py b/utils/linkage.py index c0b5028c..255d7e2a 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -140,7 +140,7 @@ def row_matches( ): # Store the other index and mark it for skipping in future iterations discard_indices.append(j) - index_dict[i].append[j] + index_dict[i].append(j) return index_dict diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py new file mode 100644 index 00000000..76a37431 --- /dev/null +++ b/utils/tests/test_linkage.py @@ -0,0 +1,220 @@ +import json +import pytest +import numpy as np +import pandas as pd + + + +#creating a test for calculate_row_similarity and row_matches + + +from utils.linkage import( + calculate_string_similarity, calculate_row_similarity, row_matches, +) + + +#maybe this will just be a csv for us? +def open_test_data_json(filename: str) -> dict: + """Open json in tests/data dir into a python dict""" + with open(test_data_directory / filename, "r") as f: + return json.load(f) + + + + + + +#to put in data: +d = {'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"],'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago","8 Fancy Way, Chicago"]} +test_df = pd.DataFrame(data=d) + + +@pytest.fixture +def row_similarity_scen_1(): + return open_test_data_json(data) + +@pytest.fixture +def row_similarity_scen_2(): + return open_test_data_json(data) + + +def test_row_similarity_scen_1( + row_similarity_scen_1 +): + result = calculate_row_similarity( + row_similarity_scen_1 + ) + wrong = calculate_row_similarity(row_similarity_scen_1.iloc[[0]], row_similarity_scen_1.iloc[[1]],np.array([.8, .2]),calculate_string_similarity) + right = calculate_row_similarity(row_similarity_scen_1.iloc[[0]], row_similarity_scen_1.iloc[[2]],np.array([.8, .2]),calculate_string_similarity) + + assert right > wrong + + + +def test_row_similarity_scen_2( + row_similarity_scen_2 +): + result = calculate_row_similarity( + row_similarity_scen_2 + ) + wrong = calculate_row_similarity(row_similarity_scen_2.iloc[[0]], row_similarity_scen_2.iloc[[1]],np.array([.2, .8]),calculate_string_similarity) + right = calculate_row_similarity(row_similarity_scen_2.iloc[[0]], row_similarity_scen_2.iloc[[2]],np.array([.2, .8]),calculate_string_similarity) + + assert right < wrong + + + +#how about for row matches? First pull into a notebook and see how a test should work + + + + + +def test_create_record_from_labelstudio_results( + labelstudio_simple_results_filtered, +): + result = create_record_from_labelstudio_results( + labelstudio_simple_results_filtered + ) + expected = { + "borrower": "Wisconsin Public Service Corporation", + "borrower_start_idx": 23, + "borrower_end_idx": 59, + "interest_rate": "5.35%", + "interest_rate_start_idx": 220, + "interest_rate_end_idx": 225, + "name": "Senior\\n\\nNotes", + "name_start_idx": 205, + "name_end_idx": 218, + "end_date": "November 10, 2025", + "end_date_start_idx": 237, + "end_date_end_idx": 254, + "type": "Bond", + "governed_by": "eVtWJ7O08t", + } + assert result == expected + + + + +#calculate + +def calculate_row_similarity( + row1: pd.DataFrame, row2: pd.DataFrame, weights: np.array, comparison_func +) -> float: + """Find weighted similarity of two rows in a dataframe + + The length of the weights vector must be the same as + the number of selected columns. + + This version is slow and not optimized, and will be + revised in order to make it more efficient. It + exists as to provide basic functionality. Once we have + the comparison function locked in, using .apply will + likely be easier and more efficient. + + >>> d = { + ... 'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"], + ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", + ... "8 Fancy Way, Chicago"] + ... } + >>> df = pd.DataFrame(data=d) + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + ... np.array([.8, .2]), + ... calculate_string_similarity) + >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], + ... np.array([.8, .2]), + ... calculate_string_similarity) + >>> right > wrong + True + >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], + ... np.array([.2, .8]), + ... calculate_string_similarity) + >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], + ... np.array([.2, .8]), + ... calculate_string_similarity) + >>> right > wrong + False + """ + + row_length = len(weights) + if not (row1.shape[1] == row2.shape[1] == row_length): + raise ValueError("Number of columns and weights must be the same") + + similarity = np.zeros(row_length) + + for i in range(row_length): + similarity[i] = comparison_func( + row1.reset_index().drop(columns="index").iloc[:, i][0], + row2.reset_index().drop(columns="index").iloc[:, i][0], + ) + + return sum(similarity * weights) + + + + + + + + + + +# from banktrack.annotation.convert import ( +# create_record_from_labelstudio_results, +# get_unique_entity_ids_from_labelstudio_results, +# ) +# from banktrack.pipeline.constants import ROOT_DIR + +test_data_directory = ROOT_DIR / "tests" / "data" + + +def open_test_data_json(filename: str) -> dict: + """Open json in tests/data dir into a python dict""" + with open(test_data_directory / filename, "r") as f: + return json.load(f) + + +@pytest.fixture +def labelstudio_simple_results(): + return open_test_data_json("simple_labelstudio_results.json") + + +@pytest.fixture +def labelstudio_simple_results_filtered(): + return open_test_data_json("simple_labelstudio_results_filtered.json") + + +def test_create_record_from_labelstudio_results( + labelstudio_simple_results_filtered, +): + result = create_record_from_labelstudio_results( + labelstudio_simple_results_filtered + ) + expected = { + "borrower": "Wisconsin Public Service Corporation", + "borrower_start_idx": 23, + "borrower_end_idx": 59, + "interest_rate": "5.35%", + "interest_rate_start_idx": 220, + "interest_rate_end_idx": 225, + "name": "Senior\\n\\nNotes", + "name_start_idx": 205, + "name_end_idx": 218, + "end_date": "November 10, 2025", + "end_date_start_idx": 237, + "end_date_end_idx": 254, + "type": "Bond", + "governed_by": "eVtWJ7O08t", + } + assert result == expected + + +def test_get_unique_entity_ids_from_labelstudio_results( + labelstudio_simple_results, +): + result = get_unique_entity_ids_from_labelstudio_results( + labelstudio_simple_results + ) + expected = ["eVtWJ7O08t", "b9izoYopAS"] + assert result == expected \ No newline at end of file From 7b3e8f0dd434b92cfb42e772e8ccf00f4c2f1d1c Mon Sep 17 00:00:00 2001 From: nrposner <100233813+nrposner@users.noreply.github.com> Date: Tue, 13 Feb 2024 22:12:58 -0600 Subject: [PATCH 085/214] fixing linter errors --- utils/classify.py | 47 +++++++++ utils/tests/test_linkage.py | 185 ++---------------------------------- 2 files changed, 56 insertions(+), 176 deletions(-) create mode 100644 utils/classify.py diff --git a/utils/classify.py b/utils/classify.py new file mode 100644 index 00000000..c50741bf --- /dev/null +++ b/utils/classify.py @@ -0,0 +1,47 @@ +import pandas as pd + +from utils.linkage import calculate_string_similarity + +#we want to run down a list of people and, hopefully, their adresses, plus a list of +#corporations, groups, etc, and classify them, basically just looking for matches + +#do we want to just input all the names/people (there's not many, less than 200 for sure), +#give a string similarity match score, and extract the top ten for manual review? +#thsi should give us a feeling for how to set our threhsold +#we might also, once we have all the data, buckle down and just classify some of them manually + +inds_list = [] + +#a list of individual names + + +def similarity_calculator(df: pd.DataFrame, suspect): + """Run through a pandas dataframe column and compare elements to a constant + + """ + # this needs to output somehting useful + + similarities = df['column1'].apply(lambda x: calculate_string_similarity(x, suspect)) + return similarities + + +#crawl through list automatically once a threshold has been set +def + +for i in inds_list: + similarities = similarity_calculator(data, i) + + similarities + # + + + + + df.apply(calculate_string_similarity, inds_list) #very psuedocode + #get top n, maybe just ten, and output + + #we can use the indices and/or select manually, just add a new column to the subjects table + #that marks fossil fuels, green energy, or neither + + + diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 76a37431..bf9a558f 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -2,16 +2,12 @@ import pytest import numpy as np import pandas as pd - - - -#creating a test for calculate_row_similarity and row_matches - - from utils.linkage import( - calculate_string_similarity, calculate_row_similarity, row_matches, + calculate_string_similarity, calculate_row_similarity, ) +#creating a test for calculate_row_similarity and row_matches + #maybe this will just be a csv for us? def open_test_data_json(filename: str) -> dict: @@ -20,30 +16,23 @@ def open_test_data_json(filename: str) -> dict: return json.load(f) - - - - #to put in data: d = {'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"],'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago","8 Fancy Way, Chicago"]} test_df = pd.DataFrame(data=d) -@pytest.fixture -def row_similarity_scen_1(): - return open_test_data_json(data) +# @pytest.fixture +# def row_similarity_scen_1(): +# return open_test_data_json(data) -@pytest.fixture -def row_similarity_scen_2(): - return open_test_data_json(data) +# @pytest.fixture +# def row_similarity_scen_2(): +# return open_test_data_json(data) def test_row_similarity_scen_1( row_similarity_scen_1 ): - result = calculate_row_similarity( - row_similarity_scen_1 - ) wrong = calculate_row_similarity(row_similarity_scen_1.iloc[[0]], row_similarity_scen_1.iloc[[1]],np.array([.8, .2]),calculate_string_similarity) right = calculate_row_similarity(row_similarity_scen_1.iloc[[0]], row_similarity_scen_1.iloc[[2]],np.array([.8, .2]),calculate_string_similarity) @@ -62,159 +51,3 @@ def test_row_similarity_scen_2( assert right < wrong - - -#how about for row matches? First pull into a notebook and see how a test should work - - - - - -def test_create_record_from_labelstudio_results( - labelstudio_simple_results_filtered, -): - result = create_record_from_labelstudio_results( - labelstudio_simple_results_filtered - ) - expected = { - "borrower": "Wisconsin Public Service Corporation", - "borrower_start_idx": 23, - "borrower_end_idx": 59, - "interest_rate": "5.35%", - "interest_rate_start_idx": 220, - "interest_rate_end_idx": 225, - "name": "Senior\\n\\nNotes", - "name_start_idx": 205, - "name_end_idx": 218, - "end_date": "November 10, 2025", - "end_date_start_idx": 237, - "end_date_end_idx": 254, - "type": "Bond", - "governed_by": "eVtWJ7O08t", - } - assert result == expected - - - - -#calculate - -def calculate_row_similarity( - row1: pd.DataFrame, row2: pd.DataFrame, weights: np.array, comparison_func -) -> float: - """Find weighted similarity of two rows in a dataframe - - The length of the weights vector must be the same as - the number of selected columns. - - This version is slow and not optimized, and will be - revised in order to make it more efficient. It - exists as to provide basic functionality. Once we have - the comparison function locked in, using .apply will - likely be easier and more efficient. - - >>> d = { - ... 'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"], - ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", - ... "8 Fancy Way, Chicago"] - ... } - >>> df = pd.DataFrame(data=d) - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - ... np.array([.8, .2]), - ... calculate_string_similarity) - >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.8, .2]), - ... calculate_string_similarity) - >>> right > wrong - True - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - ... np.array([.2, .8]), - ... calculate_string_similarity) - >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.2, .8]), - ... calculate_string_similarity) - >>> right > wrong - False - """ - - row_length = len(weights) - if not (row1.shape[1] == row2.shape[1] == row_length): - raise ValueError("Number of columns and weights must be the same") - - similarity = np.zeros(row_length) - - for i in range(row_length): - similarity[i] = comparison_func( - row1.reset_index().drop(columns="index").iloc[:, i][0], - row2.reset_index().drop(columns="index").iloc[:, i][0], - ) - - return sum(similarity * weights) - - - - - - - - - - -# from banktrack.annotation.convert import ( -# create_record_from_labelstudio_results, -# get_unique_entity_ids_from_labelstudio_results, -# ) -# from banktrack.pipeline.constants import ROOT_DIR - -test_data_directory = ROOT_DIR / "tests" / "data" - - -def open_test_data_json(filename: str) -> dict: - """Open json in tests/data dir into a python dict""" - with open(test_data_directory / filename, "r") as f: - return json.load(f) - - -@pytest.fixture -def labelstudio_simple_results(): - return open_test_data_json("simple_labelstudio_results.json") - - -@pytest.fixture -def labelstudio_simple_results_filtered(): - return open_test_data_json("simple_labelstudio_results_filtered.json") - - -def test_create_record_from_labelstudio_results( - labelstudio_simple_results_filtered, -): - result = create_record_from_labelstudio_results( - labelstudio_simple_results_filtered - ) - expected = { - "borrower": "Wisconsin Public Service Corporation", - "borrower_start_idx": 23, - "borrower_end_idx": 59, - "interest_rate": "5.35%", - "interest_rate_start_idx": 220, - "interest_rate_end_idx": 225, - "name": "Senior\\n\\nNotes", - "name_start_idx": 205, - "name_end_idx": 218, - "end_date": "November 10, 2025", - "end_date_start_idx": 237, - "end_date_end_idx": 254, - "type": "Bond", - "governed_by": "eVtWJ7O08t", - } - assert result == expected - - -def test_get_unique_entity_ids_from_labelstudio_results( - labelstudio_simple_results, -): - result = get_unique_entity_ids_from_labelstudio_results( - labelstudio_simple_results - ) - expected = ["eVtWJ7O08t", "b9izoYopAS"] - assert result == expected \ No newline at end of file From 665519241ceaafdd5361d69bbfc6162226ea46e9 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 14 Feb 2024 02:27:03 -0600 Subject: [PATCH 086/214] updates to dedup file and beginning steps on netorkx --- requirements.txt | 1 + utils/linkage.py | 9 ++------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index db05b66f..d28ae9f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ Requests==2.31.0 setuptools==68.0.0 textdistance==4.6.1 usaddress==0.5.4 +networkx~=3.1 \ No newline at end of file diff --git a/utils/linkage.py b/utils/linkage.py index f3231886..ee8dcd60 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -190,7 +190,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: # if data is clean: if first_name + " " + last_name == full_name: - return full_name + return full_name.title() # some names have titles or professions associated with the name. We need to # remove those from the name. @@ -333,16 +333,11 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: .rename(columns={"id": "duplicated"}) ) new_df.index = new_df["duplicated"].str[0].tolist() - new_df["duplicated"] = new_df["duplicated"].str[1:] # now convert the duplicated column into a dictionary that can will be # an output by only feeding the entries with duplicates new_df = new_df.reset_index().rename(columns={"index": "id"}) - convert_duplicates_to_dict( - new_df[new_df["duplicated"].apply(lambda x: len(x)) > 0][ - ["id", "duplicated"] - ] - ) + convert_duplicates_to_dict(new_df[["id", "duplicated"]]) new_df = new_df.drop(["duplicated"], axis=1) return new_df From b24041d9b532a1c3e363e3ab8c70d8a7fd2d9d79 Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Wed, 14 Feb 2024 10:01:12 -0600 Subject: [PATCH 087/214] Delete notebooks/Test.ipynb --- notebooks/Test.ipynb | 1111 ------------------------------------------ 1 file changed, 1111 deletions(-) delete mode 100644 notebooks/Test.ipynb diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb deleted file mode 100644 index 26d98b5f..00000000 --- a/notebooks/Test.ipynb +++ /dev/null @@ -1,1111 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Example Notebook file demonstrating how to use the file structure\n", - "from utils.preprocess_util_lib_example import save_random_dataframe\n", - "from pathlib import Path\n", - "\n", - "save_random_dataframe(Path(\"../output\"), Path(\"test.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "def determine_comma_role(name: str) -> str:\n", - " \"\"\"Given a string (someone's name), attempts to determine the role of the\n", - " comma in the name and where it ought to belong.\n", - "\n", - " Some assumptions are made:\n", - " * If a suffix is included in the name and the name is not just the last\n", - " name(i.e \"Doe, Jr), the format is\n", - " (last_name suffix, first and middle name) i.e Doe iv, Jane Elisabeth\n", - "\n", - " * If a comma is used anywhere else, it is in the format of\n", - " (last_name, first and middle name) i.e Doe, Jane Elisabeth\n", - " Args:\n", - " name: a string representing a name/names of individuals\n", - " Returns:\n", - " the name with or without a comma based on some conditions\n", - " \"\"\"\n", - " suffixes = [\n", - " \"sr\",\n", - " \"jr\",\n", - " \"i\",\n", - " \"ii\",\n", - " \"iii\",\n", - " \"iv\",\n", - " \"v\",\n", - " \"vi\",\n", - " \"vii\",\n", - " \"viii\",\n", - " \"ix\",\n", - " \"x\",\n", - " ]\n", - " name_parts = name.lower().split(\",\")\n", - " # if the comma is just in the end as a typo:\n", - " if len(name_parts[1]) == 0:\n", - " return name_parts[0].title()\n", - " # if just the suffix in the end, leave the name as it is\n", - " if name_parts[1].strip() in suffixes:\n", - " return name.title()\n", - " # at this point either it's just poor name placement, or the suffix is\n", - " # in the beginning of the name. Either way, the first part of the list is\n", - " # the true last name.\n", - " last_part = name_parts.pop(0)\n", - " first_part = \" \".join(name_parts)\n", - " return first_part.title() + \" \" + last_part.title()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def get_likely_name(first_name: str, last_name: str, full_name: str) -> str:\n", - " \"\"\"Given name related columns, return a person's likely name\n", - "\n", - " Given different formatting used accross states, errors in data entry\n", - " and missing data, it can be difficult to determine someone's actual\n", - " name. For example, some states have a last name column with values like\n", - " \"Doe, Jane\", where the person's first name appears to have been erroneously\n", - " included.\n", - "\n", - " Args:\n", - " first_name: raw value of first name column\n", - " last_name: raw value last name column\n", - " full_name: raw value of name or full_name column\n", - " Returns:\n", - " The most likely full name of the person listed\n", - "\n", - " Sample Usage:\n", - " >>> get_likely_name(\"Jane\", \"Doe\", \"\")\n", - " 'Jane Doe'\n", - " >>> get_likely_name(\"\", \"\", \"Jane Doe\")\n", - " 'Jane Doe'\n", - " >>> get_likely_name(\"\", \"Doe, Jane\", \"\")\n", - " 'Jane Doe'\n", - " >>> get_likely_name(\"Jane Doe\", \"Doe\", \"Jane Doe\")\n", - " 'Jane Doe'\n", - " >>> get_likely_name(\"Jane\",\"\",\"Doe, Sr\")\n", - " 'Jane Doe, Sr'\n", - " >>> get_likely_name(\"Jane Elisabeth Doe, IV\",\"Elisabeth\",\"Doe, IV\")\n", - " 'Jane Elisabeth Doe, Iv'\n", - " >>> get_likely_name(\"\",\"\",\"Jane Elisabeth Doe, IV\")\n", - " 'Jane Elisabeth Doe Iv'\n", - " \"\"\"\n", - " # first ensure clean input by deleting spaces:\n", - " first_name, last_name, full_name = list(\n", - " map(lambda x: x.lower().strip(), [first_name, last_name, full_name])\n", - " )\n", - "\n", - " # if data is clean:\n", - " if first_name + \" \" + last_name == full_name:\n", - " return full_name\n", - "\n", - " # some names have titles or professions associated with the name. We need to\n", - " # remove those from the name.\n", - " titles = [\n", - " \"mr\",\n", - " \"ms\",\n", - " \"mrs\",\n", - " \"miss\",\n", - " \"prof\",\n", - " \"dr\",\n", - " \"doctor\",\n", - " \"sir\",\n", - " \"madam\",\n", - " \"professor\",\n", - " ]\n", - " names = [first_name, last_name, full_name]\n", - "\n", - " for i in range(len(names)):\n", - " # if there is a ',' deal with it accordingly\n", - " if \",\" in names[i]:\n", - " names[i] = determine_comma_role(names[i])\n", - "\n", - " names[i] = names[i].replace(\".\", \"\").split(\" \")\n", - " names[i] = [\n", - " name_part for name_part in names[i] if name_part not in titles\n", - " ]\n", - " names[i] = \" \".join(names[i])\n", - "\n", - " # one last check to remove any pieces that might add extra whitespace\n", - " names = list(filter(lambda x: x != \"\", names))\n", - " names = \" \".join(names)\n", - " names = names.title().replace(\" \",\" \").split(\" \")\n", - " final_name = []\n", - " [final_name.append(x) for x in names if x not in final_name]\n", - " return \" \".join(final_name).strip()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "orgs_sample = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0).sample(10000)\n", - "inds_sample = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False).sample(10000)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamestateentity_type
050c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee
150c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee
250c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee
362ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee
462ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee
562ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee
6d31df1ca-714e-4a82-9e88-1892c0451a71COMMITTEE TO ELECT DR PATRICIA BERNARDMIcommittee
7d31df1ca-714e-4a82-9e88-1892c0451a71COMMITTEE TO ELECT DR PATRICIA BERNARDMIcommittee
862ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee
94db76e6e-f0d5-40eb-82de-6dbcdb562dd7Ugi Utilities Inc/Ugi Energy Services Llc PacPAOrganization
10f71341d7-d27e-47eb-9b66-903af39d6cb5Pabar Pac (Pa Bar Assn)PAOrganization
11c875d7de-94be-42f1-b994-dd89b114d51ePa Fraternal Order Of Police PacPAOrganization
12910c4d36-b036-469e-aa2a-ea4ff8855a6cCitizens For KailPAOrganization
1360d454d1-3773-4d88-80e9-132c161da0f0Paa PacPAOrganization
141d2b5bc0-9385-4cd7-ac48-df43b3eca6fdMICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PACMIcommittee
151d2b5bc0-9385-4cd7-ac48-df43b3eca6fdMICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PACMIcommittee
161d2b5bc0-9385-4cd7-ac48-df43b3eca6feMICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PACMIcommittee
171d2b5bc0-9385-4cd7-ac48-df43b3eca6ffMICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PACMIcommittee
181d2b5bc0-9385-4cd7-ac48-df43b3eca6fdPaa PacPAOrganization
\n", - "
" - ], - "text/plain": [ - " id \\\n", - "0 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "1 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "2 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "3 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "4 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "5 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "6 d31df1ca-714e-4a82-9e88-1892c0451a71 \n", - "7 d31df1ca-714e-4a82-9e88-1892c0451a71 \n", - "8 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "9 4db76e6e-f0d5-40eb-82de-6dbcdb562dd7 \n", - "10 f71341d7-d27e-47eb-9b66-903af39d6cb5 \n", - "11 c875d7de-94be-42f1-b994-dd89b114d51e \n", - "12 910c4d36-b036-469e-aa2a-ea4ff8855a6c \n", - "13 60d454d1-3773-4d88-80e9-132c161da0f0 \n", - "14 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", - "15 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", - "16 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe \n", - "17 1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff \n", - "18 1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd \n", - "\n", - " name state entity_type \n", - "0 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", - "1 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", - "2 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", - "3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", - "4 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", - "5 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", - "6 COMMITTEE TO ELECT DR PATRICIA BERNARD MI committee \n", - "7 COMMITTEE TO ELECT DR PATRICIA BERNARD MI committee \n", - "8 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", - "9 Ugi Utilities Inc/Ugi Energy Services Llc Pac PA Organization \n", - "10 Pabar Pac (Pa Bar Assn) PA Organization \n", - "11 Pa Fraternal Order Of Police Pac PA Organization \n", - "12 Citizens For Kail PA Organization \n", - "13 Paa Pac PA Organization \n", - "14 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee \n", - "15 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee \n", - "16 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee \n", - "17 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC MI committee \n", - "18 Paa Pac PA Organization " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = {'id':['50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360',\n", - " '62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3',\n", - " 'd31df1ca-714e-4a82-9e88-1892c0451a71','d31df1ca-714e-4a82-9e88-1892c0451a71','62ea1e9c-ac12-400c-b3dc-519389c0f7d3',\n", - " '4db76e6e-f0d5-40eb-82de-6dbcdb562dd7','f71341d7-d27e-47eb-9b66-903af39d6cb5','c875d7de-94be-42f1-b994-dd89b114d51e',\n", - " '910c4d36-b036-469e-aa2a-ea4ff8855a6c','60d454d1-3773-4d88-80e9-132c161da0f0','1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd',\n", - " '1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd','1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe','1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff',\n", - " '1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd'],\n", - " 'name':['REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC','REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC',\n", - " 'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC','UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB',\n", - " 'UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB','UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB',\n", - " 'COMMITTEE TO ELECT DR PATRICIA BERNARD','COMMITTEE TO ELECT DR PATRICIA BERNARD','UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB',\n", - " 'Ugi Utilities Inc/Ugi Energy Services Llc Pac','Pabar Pac (Pa Bar Assn)','Pa Fraternal Order Of Police Pac','Citizens For Kail',\n", - " 'Paa Pac','MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC','MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC',\n", - " 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC','MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC','Paa Pac'],\n", - " 'state':['MI','MI','MI','MI','MI','MI','MI','MI','MI','PA','PA','PA','PA','PA','MI','MI','MI','MI','PA'],\n", - " 'entity_type':['committee','committee','committee','committee','committee','committee','committee','committee','committee',\n", - " 'Organization','Organization','Organization','Organization','Organization','committee','committee','committee','committee','Organization']}\n", - "\n", - "sample_df = pd.DataFrame(data)\n", - "sample_df" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from utils.constants import repo_root\n", - "def convert_duplicates_to_dict(df: pd.DataFrame)->pd.DataFrame:\n", - " '''Takes a dataframe whose indexes are UUIDs, and a column that is a list of\n", - " all other UUIDs that have duplicate values. The function then outputs a\n", - " dictionary file where the deduped UUIDs map to the dataframe main UUID\n", - " \n", - " Args:\n", - " A pandas dataframe with UUIDs as indexes and deduplicated UUIDs\n", - " matching up to the index in the same row\n", - " \n", - " Returns\n", - " None. However it outputs a dictionary to the output directory, with 2\n", - " columns. The first, which indicates the deduplicated UUIDs, is labeled\n", - " 'duplicated_uuids', and the 2nd, which shows the uuids to which the\n", - " deduplicated entries match two, is labeled 'mapped_uuids'.\n", - " '''\n", - " deduped_dict = {}\n", - " for i in range(len(df)):\n", - " deduped_uudis = df.iloc[i]['duplicated']\n", - " for j in range(len(deduped_uudis)):\n", - " deduped_dict.update({deduped_uudis[j]:df.iloc[i]['id']})\n", - " \n", - " # now convert dictionary into a csv file\n", - " deduped_df = pd.DataFrame.from_dict(deduped_dict,'index') \n", - " deduped_df = deduped_df.reset_index().rename(columns={\"index\":\"duplicated_uuids\", 0:\"mapped_uuids\"})\n", - " deduped_df.to_csv(repo_root / \"output\" / \"deduplicated_UUIDs.csv\", index=False, mode='a')\n", - "\n", - "\n", - "def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame:\n", - " '''Given a dataframe, remove rows that have identical entry data beyond\n", - " UUIDs, and output a file mapping an entry to other the UUIDs of the\n", - " deduplicated rows\n", - " \n", - " Args:\n", - " a pandas dataframe containing contribution data\n", - " Returns:\n", - " a deduplicated pandas dataframe containing contribution data\n", - " '''\n", - " #first remove all duplicate entries:\n", - " new_df = df.drop_duplicates()\n", - "\n", - " # now find the duplicates along all columns but the ID\n", - " new_df=new_df.groupby(df.columns[1:].tolist(),dropna=False)[\"id\"].agg(list).reset_index().rename(columns={\"id\": \"duplicated\"})\n", - " new_df.index=new_df[\"duplicated\"].str[0].tolist()\n", - " new_df[\"duplicated\"]=new_df[\"duplicated\"].str[1:]\n", - "\n", - " # now convert the duplicated column into a dictionary that can will be\n", - " # an output by only feeding the entries with duplicates\n", - " new_df = new_df.reset_index().rename(columns = {'index':'id'})\n", - " convert_duplicates_to_dict(new_df[new_df['duplicated'].apply(lambda x: len(x))>0][['id','duplicated']])\n", - " new_df = new_df.drop(['duplicated'], axis=1)\n", - " return new_df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamestateentity_type
03246120d-45fc-4d19-adee-d2aa2c5be6db1 BOLD STEPMIcorporation
18fc7e5d5-558d-42ea-bd9a-8e48a4a9a4bd12CDRCMIcorporation
2a5379930-7324-4f1d-b216-84d9e9ddea40303 MANAGEMENT INC.MIcorporation
39064112f-ef40-4690-9d0a-782a2375feb0314 ACTION FUNDMIcorporation
49e11e7ae-ee29-4a50-9720-41c6ac556a1fA T AND T MICHIGAN PACMIcorporation
...............
2149d79f9729-c9af-4347-868a-ae6e6814a295Zach KirkPAOrganization
2150fbfea472-e183-4479-b869-90eddfa5198cZest KitchenPAOrganization
2151c105a4af-9fd4-4a5b-a7b8-1e8738ff39c6Zoom UsPAOrganization
215259cc8db9-607e-4e1b-ba41-0850b6019360Zoom Video Communications Inc.PAOrganization
2153NaNNaNMIcorporation
\n", - "

2154 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " id \\\n", - "0 3246120d-45fc-4d19-adee-d2aa2c5be6db \n", - "1 8fc7e5d5-558d-42ea-bd9a-8e48a4a9a4bd \n", - "2 a5379930-7324-4f1d-b216-84d9e9ddea40 \n", - "3 9064112f-ef40-4690-9d0a-782a2375feb0 \n", - "4 9e11e7ae-ee29-4a50-9720-41c6ac556a1f \n", - "... ... \n", - "2149 d79f9729-c9af-4347-868a-ae6e6814a295 \n", - "2150 fbfea472-e183-4479-b869-90eddfa5198c \n", - "2151 c105a4af-9fd4-4a5b-a7b8-1e8738ff39c6 \n", - "2152 59cc8db9-607e-4e1b-ba41-0850b6019360 \n", - "2153 NaN \n", - "\n", - " name state entity_type \n", - "0 1 BOLD STEP MI corporation \n", - "1 12CDRC MI corporation \n", - "2 303 MANAGEMENT INC. MI corporation \n", - "3 314 ACTION FUND MI corporation \n", - "4 A T AND T MICHIGAN PAC MI corporation \n", - "... ... ... ... \n", - "2149 Zach Kirk PA Organization \n", - "2150 Zest Kitchen PA Organization \n", - "2151 Zoom Us PA Organization \n", - "2152 Zoom Video Communications Inc. PA Organization \n", - "2153 NaN MI corporation \n", - "\n", - "[2154 rows x 4 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x = deduplicate_perfect_matches(orgs_sample)\n", - "#len(x.iloc[2]['duplicated'])\n", - "x" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idfirst_namelast_namefull_nameentity_typestatepartycompany
0f6df631a-e626-4861-b62b-e09512887bd3A SCOTTPARISA SCOTT PARIS ...IndividualMINaNNOT EMPLOYED
1075fb1c6-6c70-4ec6-a439-fcebb76c4e0aA. MARKGLICKSTEINA. MARK GLICKSTEIN ...IndividualCANaNPARTNERSHIP HEALTH PLAN OF CA
24a3968f5-7f5e-4ed1-8f39-bfc70bc67af8A. MICHAELPALIZZIA. MICHAEL PALIZZI ...IndividualMINaNMILLER CANFIELD
3bb952efc-3dba-4449-9405-ea65202fbbeaAARONALDRICHAARON ALDRICH ...IndividualMINaNMILLER PIPELINE CORP.
479ec4a73-f688-479a-a4e3-0b0a3813188aAARONBLANDAARON BLAND ...IndividualMINaNNaN
...........................
7122a1a6ff3b-cfa4-4b84-bf8c-20984f9871f0NaNNaNTrone, RobertIndividualMNNaNNaN
712337ab55f5-3613-469c-8b66-ac8888f5bcaeNaNNaNWark, Mary AnnIndividualMNNaNNaN
712492d5ac7c-4702-420c-97a7-656111677f5aNaNNaNWenstrom, GeneIndividualMNNaNNaN
7125fa934bf1-f611-4cd3-9bff-451bdf2e5bd2NaNNaNWika, KevinIndividualMNNaNNaN
7126fb8bb833-7010-418a-9f24-1a29771e0b67NaNNaNwark, davidIndividualMNNaNNaN
\n", - "

7127 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " id first_name \\\n", - "0 f6df631a-e626-4861-b62b-e09512887bd3 A SCOTT \n", - "1 075fb1c6-6c70-4ec6-a439-fcebb76c4e0a A. MARK \n", - "2 4a3968f5-7f5e-4ed1-8f39-bfc70bc67af8 A. MICHAEL \n", - "3 bb952efc-3dba-4449-9405-ea65202fbbea AARON \n", - "4 79ec4a73-f688-479a-a4e3-0b0a3813188a AARON \n", - "... ... ... \n", - "7122 a1a6ff3b-cfa4-4b84-bf8c-20984f9871f0 NaN \n", - "7123 37ab55f5-3613-469c-8b66-ac8888f5bcae NaN \n", - "7124 92d5ac7c-4702-420c-97a7-656111677f5a NaN \n", - "7125 fa934bf1-f611-4cd3-9bff-451bdf2e5bd2 NaN \n", - "7126 fb8bb833-7010-418a-9f24-1a29771e0b67 NaN \n", - "\n", - " last_name \\\n", - "0 PARIS \n", - "1 GLICKSTEIN \n", - "2 PALIZZI \n", - "3 ALDRICH \n", - "4 BLAND \n", - "... ... \n", - "7122 NaN \n", - "7123 NaN \n", - "7124 NaN \n", - "7125 NaN \n", - "7126 NaN \n", - "\n", - " full_name entity_type state \\\n", - "0 A SCOTT PARIS ... Individual MI \n", - "1 A. MARK GLICKSTEIN ... Individual CA \n", - "2 A. MICHAEL PALIZZI ... Individual MI \n", - "3 AARON ALDRICH ... Individual MI \n", - "4 AARON BLAND ... Individual MI \n", - "... ... ... ... \n", - "7122 Trone, Robert Individual MN \n", - "7123 Wark, Mary Ann Individual MN \n", - "7124 Wenstrom, Gene Individual MN \n", - "7125 Wika, Kevin Individual MN \n", - "7126 wark, david Individual MN \n", - "\n", - " party company \n", - "0 NaN NOT EMPLOYED \n", - "1 NaN PARTNERSHIP HEALTH PLAN OF CA \n", - "2 NaN MILLER CANFIELD \n", - "3 NaN MILLER PIPELINE CORP. \n", - "4 NaN NaN \n", - "... ... ... \n", - "7122 NaN NaN \n", - "7123 NaN NaN \n", - "7124 NaN NaN \n", - "7125 NaN NaN \n", - "7126 NaN NaN \n", - "\n", - "[7127 rows x 8 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y=deduplicate_perfect_matches(inds_sample)\n", - "y" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "7207" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a = inds_sample.drop_duplicates()\n", - "len(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Max SpeedAnimalColorAge
0380.0Nonegreen2.0
1370.0FalconNoneNaN
2NaNNoneyellow5.0
3NaNParrotblue6.0
\n", - "
" - ], - "text/plain": [ - " Max Speed Animal Color Age\n", - "0 380.0 None green 2.0\n", - "1 370.0 Falcon None NaN\n", - "2 NaN None yellow 5.0\n", - "3 NaN Parrot blue 6.0" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "df = pd.DataFrame({'Max Speed': [380., 370., np.nan, np.nan],\n", - " 'Animal': ['None', 'Falcon', 'None', 'Parrot'],\n", - " 'Color':['green',None,'yellow','blue'],\n", - " 'Age':[2,np.nan,5,6]})\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df= df.groupby(df.columns[1:].tolist(), dropna=False)[\"Max Speed\"]#.agg(list)#.reset_index()\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AgeAnimalColorMax Speed
02.0Nonegreen[380.0]
15.0Noneyellow[nan]
26.0Parrotblue[nan]
3NaNFalconNaN[370.0]
\n", - "
" - ], - "text/plain": [ - " Age Animal Color Max Speed\n", - "0 2.0 None green [380.0]\n", - "1 5.0 None yellow [nan]\n", - "2 6.0 Parrot blue [nan]\n", - "3 NaN Falcon NaN [370.0]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = df.groupby((df.columns.difference(['Max Speed'])).tolist(),dropna=False)['Max Speed'].agg(list).reset_index()\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 869a2ea0e4602cc51cfbf0ec54cf70bac0e4c15c Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 14 Feb 2024 10:03:11 -0600 Subject: [PATCH 088/214] (not complete) splink --- notebooks/splink.ipynb | 653 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 653 insertions(+) create mode 100644 notebooks/splink.ipynb diff --git a/notebooks/splink.ipynb b/notebooks/splink.ipynb new file mode 100644 index 00000000..e8e53110 --- /dev/null +++ b/notebooks/splink.ipynb @@ -0,0 +1,653 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1a863d3e-59b4-46c3-ad0f-7d192a61ebe2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.7.3' currently installed).\n", + " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", + "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.2' currently installed).\n", + " from pandas.core import (\n", + "/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_14716/3040970222.py:1: DeprecationWarning: \n", + "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n", + "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n", + "but was not found to be installed on your system.\n", + "If this would cause problems for you,\n", + "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n", + " \n", + " import pandas as pd\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Define sample data\n", + "data = {\n", + " 'unique_id': range(1, 13),\n", + " 'first_name': ['John', 'Jane', 'David', 'Emily', 'Michael', 'Sarah', 'John', 'Jane', 'David', 'Emily', 'John', 'John'],\n", + " 'last_name': ['Doe', 'Smith', 'Johnson', 'Brown', 'Davis', 'Miller', 'Doe', 'Smith', 'Johnson', 'Brown', 'Miller', 'Jones'],\n", + " 'full_name': ['John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'Michael Davis', 'Sarah Miller', 'John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'John Miller', 'John Jones'],\n", + " 'entity_type': ['Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person'],\n", + " 'state': ['CA', 'NY', 'TX', 'FL', 'CA', 'NY', 'CA', 'TX', 'FL', 'NY', 'CA', 'FL'],\n", + " 'party': ['Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent'],\n", + " 'company': ['Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Google', 'Microsoft']\n", + "}\n", + "\n", + "# Create DataFrame\n", + "df = pd.DataFrame(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "25334eac-e048-47e7-b911-571853e2a666", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RendererRegistry.enable('html')" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from splink.duckdb.linker import DuckDBLinker\n", + "import altair as alt\n", + "alt.renderers.enable('html')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f604d9d4-577a-4ed7-988d-71d5dcb21eae", + "metadata": {}, + "outputs": [], + "source": [ + "settings = {\n", + " \"link_type\": \"dedupe_only\",\n", + " \"blocking_rules_to_generate_predictions\": [\n", + " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", + " ],\n", + "}\n", + "linker = DuckDBLinker(df, settings)\n", + "\n", + "# linker.profile_columns(\n", + "# [\"first_name\", \"last_name\"], top_n=10, bottom_n=5\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a5915f73-77a6-42c9-a7df-a8f0d396836c", + "metadata": {}, + "outputs": [], + "source": [ + "import splink.duckdb.comparison_template_library as ctl\n", + "import splink.duckdb.comparison_library as cl\n", + "\n", + "settings = {\n", + " \"link_type\": \"dedupe_only\",\n", + " \"blocking_rules_to_generate_predictions\": [\n", + " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", + " ],\n", + " \"comparisons\": [\n", + " ctl.name_comparison(\"first_name\", term_frequency_adjustments=True),\n", + " ctl.name_comparison(\"last_name\", term_frequency_adjustments=True),\n", + " cl.exact_match(\"entity_type\", term_frequency_adjustments=True),\n", + " cl.levenshtein_at_thresholds(\"state\", 2),\n", + " cl.levenshtein_at_thresholds(\"party\", 2),\n", + " cl.levenshtein_at_thresholds(\"company\", 2),\n", + " ctl.name_comparison(\"full_name\", term_frequency_adjustments=True),\n", + " # Add more comparisons as needed\n", + " ],\n", + " \"retain_matching_columns\": True,\n", + " \"retain_intermediate_calculation_columns\": True,\n", + " \"max_iterations\": 10,\n", + " \"em_convergence\": 0.01\n", + "}\n", + "\n", + "linker = DuckDBLinker(df, settings)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1356e5a-d7e9-415d-8b7e-b5195b708755", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "217a07cf-eaa3-42a2-b43b-b2eecd740a7b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Probability two random records match is estimated to be 0.101.\n", + "This means that amongst all possible pairwise record comparisons, one in 9.90 are expected to match. With 66 total possible comparisons, we expect a total of around 6.67 matching pairs\n" + ] + } + ], + "source": [ + "linker.estimate_probability_two_random_records_match(\n", + " [\n", + " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", + " \"l.full_name = r.full_name and l.state = r.state\",\n", + " \"l.full_name = r.full_name and l.company = r.company\",\n", + " ],\n", + " recall=0.6,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "307ae7b8-b637-4451-aad3-9c848e8dff65", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "----- Estimating u probabilities using random sampling -----\n", + "u probability not trained for first_name - Damerau_levenshtein <= 1 (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for first_name - Jaro_winkler_similarity >= 0.9 (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for first_name - Jaro_winkler_similarity >= 0.8 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for last_name - Damerau_levenshtein <= 1 (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for last_name - Jaro_winkler_similarity >= 0.9 (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for entity_type - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for state - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for party - Levenshtein <= 2 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for company - Levenshtein <= 2 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", + "u probability not trained for full_name - Damerau_levenshtein <= 1 (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", + "\n", + "Estimated u probabilities using random sampling\n", + "\n", + "Your model is not yet fully trained. Missing estimates for:\n", + " - first_name (some u values are not trained, no m values are trained).\n", + " - last_name (some u values are not trained, no m values are trained).\n", + " - entity_type (some u values are not trained, no m values are trained).\n", + " - state (some u values are not trained, no m values are trained).\n", + " - party (some u values are not trained, no m values are trained).\n", + " - company (some u values are not trained, no m values are trained).\n", + " - full_name (some u values are not trained, no m values are trained).\n" + ] + } + ], + "source": [ + "linker.estimate_u_using_random_sampling(max_pairs=5e6)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "813cbdfe-4b6c-4c20-8d0d-a5efee91bd8f", + "metadata": {}, + "outputs": [], + "source": [ + "# linker.match_weights_chart()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a8730290-d9c1-4fff-a8d0-5964284ffef7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " -- WARNING --\n", + "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", + "Comparison: 'first_name':\n", + " m values not fully trained\n", + "Comparison: 'first_name':\n", + " u values not fully trained\n", + "Comparison: 'last_name':\n", + " m values not fully trained\n", + "Comparison: 'last_name':\n", + " u values not fully trained\n", + "Comparison: 'entity_type':\n", + " m values not fully trained\n", + "Comparison: 'entity_type':\n", + " u values not fully trained\n", + "Comparison: 'state':\n", + " m values not fully trained\n", + "Comparison: 'state':\n", + " u values not fully trained\n", + "Comparison: 'party':\n", + " m values not fully trained\n", + "Comparison: 'party':\n", + " u values not fully trained\n", + "Comparison: 'company':\n", + " m values not fully trained\n", + "Comparison: 'company':\n", + " u values not fully trained\n", + "Comparison: 'full_name':\n", + " m values not fully trained\n", + "Comparison: 'full_name':\n", + " u values not fully trained\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
match_weightmatch_probabilityunique_id_lunique_id_rfirst_name_lfirst_name_rgamma_first_nametf_first_name_ltf_first_name_rbf_first_name...company_rgamma_companybf_companyfull_name_lfull_name_rgamma_full_nametf_full_name_ltf_full_name_rbf_full_namebf_tf_adj_full_name
0-4.0103300.05842828JaneJane40.1666670.1666676.966667...Microsoft00.028947Jane SmithJane Smith40.1666670.16666715.6750.363636
1-4.0103300.058428410EmilyEmily40.1666670.1666676.966667...Facebook00.028947Emily BrownEmily Brown40.1666670.16666715.6750.363636
2-4.0103300.05842839DavidDavid40.1666670.1666676.966667...Amazon00.028947David JohnsonDavid Johnson40.1666670.16666715.6750.363636
32.2650780.82778817JohnJohn40.3333330.3333336.966667...Google00.028947John DoeJohn Doe40.1666670.16666715.6750.363636
\n", + "

4 rows × 44 columns

\n", + "
" + ], + "text/plain": [ + " match_weight match_probability unique_id_l unique_id_r first_name_l \\\n", + "0 -4.010330 0.058428 2 8 Jane \n", + "1 -4.010330 0.058428 4 10 Emily \n", + "2 -4.010330 0.058428 3 9 David \n", + "3 2.265078 0.827788 1 7 John \n", + "\n", + " first_name_r gamma_first_name tf_first_name_l tf_first_name_r \\\n", + "0 Jane 4 0.166667 0.166667 \n", + "1 Emily 4 0.166667 0.166667 \n", + "2 David 4 0.166667 0.166667 \n", + "3 John 4 0.333333 0.333333 \n", + "\n", + " bf_first_name ... company_r gamma_company bf_company full_name_l \\\n", + "0 6.966667 ... Microsoft 0 0.028947 Jane Smith \n", + "1 6.966667 ... Facebook 0 0.028947 Emily Brown \n", + "2 6.966667 ... Amazon 0 0.028947 David Johnson \n", + "3 6.966667 ... Google 0 0.028947 John Doe \n", + "\n", + " full_name_r gamma_full_name tf_full_name_l tf_full_name_r \\\n", + "0 Jane Smith 4 0.166667 0.166667 \n", + "1 Emily Brown 4 0.166667 0.166667 \n", + "2 David Johnson 4 0.166667 0.166667 \n", + "3 John Doe 4 0.166667 0.166667 \n", + "\n", + " bf_full_name bf_tf_adj_full_name \n", + "0 15.675 0.363636 \n", + "1 15.675 0.363636 \n", + "2 15.675 0.363636 \n", + "3 15.675 0.363636 \n", + "\n", + "[4 rows x 44 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_predict = linker.predict()\n", + "df_e = df_predict.as_pandas_dataframe(limit=5)\n", + "df_e" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6ccb6eac-6985-4d8a-899d-d188ca980126", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Completed iteration 1, root rows count 0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cluster_idunique_idfirst_namelast_namefull_nameentity_typestatepartycompany__splink_salttf_first_nametf_last_nametf_full_nametf_entity_type
011JohnDoeJohn DoePersonCADemocratApple0.8834140.3333330.1666670.1666671.0
122JaneSmithJane SmithPersonNYRepublicanGoogle0.1463000.1666670.1666670.1666671.0
233DavidJohnsonDavid JohnsonPersonTXIndependentMicrosoft0.3555670.1666670.1666670.1666671.0
344EmilyBrownEmily BrownPersonFLDemocratAmazon0.6713640.1666670.1666670.1666671.0
455MichaelDavisMichael DavisPersonCARepublicanFacebook0.5406800.0833330.0833330.0833331.0
\n", + "
" + ], + "text/plain": [ + " cluster_id unique_id first_name last_name full_name entity_type \\\n", + "0 1 1 John Doe John Doe Person \n", + "1 2 2 Jane Smith Jane Smith Person \n", + "2 3 3 David Johnson David Johnson Person \n", + "3 4 4 Emily Brown Emily Brown Person \n", + "4 5 5 Michael Davis Michael Davis Person \n", + "\n", + " state party company __splink_salt tf_first_name tf_last_name \\\n", + "0 CA Democrat Apple 0.883414 0.333333 0.166667 \n", + "1 NY Republican Google 0.146300 0.166667 0.166667 \n", + "2 TX Independent Microsoft 0.355567 0.166667 0.166667 \n", + "3 FL Democrat Amazon 0.671364 0.166667 0.166667 \n", + "4 CA Republican Facebook 0.540680 0.083333 0.083333 \n", + "\n", + " tf_full_name tf_entity_type \n", + "0 0.166667 1.0 \n", + "1 0.166667 1.0 \n", + "2 0.166667 1.0 \n", + "3 0.166667 1.0 \n", + "4 0.083333 1.0 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clusters = linker.cluster_pairwise_predictions_at_threshold(df_predict, threshold_match_probability=0.7)\n", + "clusters.as_pandas_dataframe(limit=5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28948465-fea2-433d-bace-d0627dfe348d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef568813-181b-42a7-b3a2-786fe87addfb", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 21e85755d2d2886ffbad86114516277c1469d4c4 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 14 Feb 2024 10:13:48 -0600 Subject: [PATCH 089/214] trying to fix disconnect --- utils/classify.py | 9 ++---- utils/tests/test_linkage.py | 63 ++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index c50741bf..f7763678 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -2,10 +2,10 @@ from utils.linkage import calculate_string_similarity -#we want to run down a list of people and, hopefully, their adresses, plus a list of +#we want to run down a list of people and, hopefully, their adresses, plus a list of #corporations, groups, etc, and classify them, basically just looking for matches -#do we want to just input all the names/people (there's not many, less than 200 for sure), +#do we want to just input all the names/people (there's not many, less than 200 for sure), #give a string similarity match score, and extract the top ten for manual review? #thsi should give us a feeling for how to set our threhsold #we might also, once we have all the data, buckle down and just classify some of them manually @@ -17,7 +17,7 @@ def similarity_calculator(df: pd.DataFrame, suspect): """Run through a pandas dataframe column and compare elements to a constant - + """ # this needs to output somehting useful @@ -42,6 +42,3 @@ def similarity_calculator(df: pd.DataFrame, suspect): #we can use the indices and/or select manually, just add a new column to the subjects table #that marks fossil fuels, green energy, or neither - - - diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index bf9a558f..5899f36a 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,23 +1,30 @@ import json -import pytest + import numpy as np import pandas as pd -from utils.linkage import( - calculate_string_similarity, calculate_row_similarity, -) +import pytest -#creating a test for calculate_row_similarity and row_matches +from utils.linkage import calculate_row_similarity, calculate_string_similarity +# creating a test for calculate_row_similarity and row_matches -#maybe this will just be a csv for us? + +# maybe this will just be a csv for us? def open_test_data_json(filename: str) -> dict: """Open json in tests/data dir into a python dict""" with open(test_data_directory / filename, "r") as f: return json.load(f) -#to put in data: -d = {'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"],'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago","8 Fancy Way, Chicago"]} +# to put in data: +d = { + "name": ["bob von rosevich", "anantarya smith", "bob j vonrosevich"], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + ], +} test_df = pd.DataFrame(data=d) @@ -30,24 +37,36 @@ def open_test_data_json(filename: str) -> dict: # return open_test_data_json(data) -def test_row_similarity_scen_1( - row_similarity_scen_1 -): - wrong = calculate_row_similarity(row_similarity_scen_1.iloc[[0]], row_similarity_scen_1.iloc[[1]],np.array([.8, .2]),calculate_string_similarity) - right = calculate_row_similarity(row_similarity_scen_1.iloc[[0]], row_similarity_scen_1.iloc[[2]],np.array([.8, .2]),calculate_string_similarity) +def test_row_similarity_scen_1(row_similarity_scen_1): + wrong = calculate_row_similarity( + row_similarity_scen_1.iloc[[0]], + row_similarity_scen_1.iloc[[1]], + np.array([0.8, 0.2]), + calculate_string_similarity, + ) + right = calculate_row_similarity( + row_similarity_scen_1.iloc[[0]], + row_similarity_scen_1.iloc[[2]], + np.array([0.8, 0.2]), + calculate_string_similarity, + ) assert right > wrong - -def test_row_similarity_scen_2( - row_similarity_scen_2 -): - result = calculate_row_similarity( - row_similarity_scen_2 +def test_row_similarity_scen_2(row_similarity_scen_2): + result = calculate_row_similarity(row_similarity_scen_2) + wrong = calculate_row_similarity( + row_similarity_scen_2.iloc[[0]], + row_similarity_scen_2.iloc[[1]], + np.array([0.2, 0.8]), + calculate_string_similarity, + ) + right = calculate_row_similarity( + row_similarity_scen_2.iloc[[0]], + row_similarity_scen_2.iloc[[2]], + np.array([0.2, 0.8]), + calculate_string_similarity, ) - wrong = calculate_row_similarity(row_similarity_scen_2.iloc[[0]], row_similarity_scen_2.iloc[[1]],np.array([.2, .8]),calculate_string_similarity) - right = calculate_row_similarity(row_similarity_scen_2.iloc[[0]], row_similarity_scen_2.iloc[[2]],np.array([.2, .8]),calculate_string_similarity) assert right < wrong - From 495db8100b8bb945a201cd27b71d9123ab0b7a2a Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 14 Feb 2024 21:41:17 -0600 Subject: [PATCH 090/214] updated classify --- utils/classify.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index f7763678..fae19ff1 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -25,19 +25,9 @@ def similarity_calculator(df: pd.DataFrame, suspect): return similarities -#crawl through list automatically once a threshold has been set -def -for i in inds_list: - similarities = similarity_calculator(data, i) - similarities - # - - - - - df.apply(calculate_string_similarity, inds_list) #very psuedocode + #very psuedocode #get top n, maybe just ten, and output #we can use the indices and/or select manually, just add a new column to the subjects table From dbaad50d25540e680ffef004021f917fcf5b265d Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 15 Feb 2024 05:17:21 +0000 Subject: [PATCH 091/214] updated name_rank function --- utils/linkage.py | 62 ++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 5370b306..9f146da2 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -2,8 +2,8 @@ import usaddress from names_dataset import NameDataset +# Initialize the NameDataset class, takes too long to initialize within the function nd = NameDataset() -# 'The library takes time to initialize because the database is massive.' """ Module for performing record linkage on state campaign finance dataset @@ -138,38 +138,44 @@ def get_street_from_address_line_1(address_line_1: str) -> str: def name_rank(first_name: str, last_name: str) -> list: - """Returns a score for the rank of a first name and last name in the US + """Returns a score for the rank of a given first name and last name https://github.com/philipperemy/name-dataset - Args: first_name: any string last_name: any string Returns: name rank for first name and last names - 1 is the most common name, only for names in the 'United States' - first element is the element corresponds to the rank of the first name - second element is the element corresponds to the rank of the last name + 1 is the most common name, only for names in the United States + First element in the list corresponds to the rank of the first name + Second element in the list corresponds to the rank of the last name + Empty or non string values will return None + Names that are not found in the dataset will return 0 + + >>> name_rank("John", "Smith") + [5, 7] + >>> name_rank("Adil", "Kassim") + [0, 7392] + >>> name_rank(None, 9) + [None, None """ - - if first_name is None or last_name is None: - return [None, None] - - if not isinstance(first_name, str) or not isinstance(last_name, str): - return [None, None] - - first_name_result = nd.search(first_name) - last_name_result = nd.search(last_name) - first_name_rank = None - last_name_rank = None - - if first_name_result and isinstance(first_name_result, dict): - first_name_data = first_name_result.get("first_name") - if first_name_data and "rank" in first_name_data: - first_name_rank = first_name_data["rank"].get("United States", None) - - if last_name_result and isinstance(last_name_result, dict): - last_name_data = last_name_result.get("last_name") - if last_name_data and "rank" in last_name_data: - last_name_rank = last_name_data["rank"].get("United States", None) - + first_name_rank = 0 + last_name_rank = 0 + if isinstance(first_name, str): + first_name_result = nd.search(first_name) + if first_name_result and isinstance(first_name_result, dict): + first_name_data = first_name_result.get("first_name") + if first_name_data and "rank" in first_name_data: + first_name_rank = first_name_data["rank"].get( + "United States", 0 + ) + else: + first_name_rank = None + if isinstance(last_name, str): + last_name_result = nd.search(last_name) + if last_name_result and isinstance(last_name_result, dict): + last_name_data = last_name_result.get("last_name") + if last_name_data and "rank" in last_name_data: + last_name_rank = last_name_data["rank"].get("United States", 0) + else: + last_name_rank = None return [first_name_rank, last_name_rank] From fb484508e1aae9054cdbbcb5cddab41128dadb64 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Thu, 15 Feb 2024 03:38:11 -0600 Subject: [PATCH 092/214] splink to .py --- notebooks/splink.ipynb | 653 ----------------------------------------- utils/constants.py | 52 ++++ utils/linkage.py | 27 ++ 3 files changed, 79 insertions(+), 653 deletions(-) delete mode 100644 notebooks/splink.ipynb diff --git a/notebooks/splink.ipynb b/notebooks/splink.ipynb deleted file mode 100644 index e8e53110..00000000 --- a/notebooks/splink.ipynb +++ /dev/null @@ -1,653 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "1a863d3e-59b4-46c3-ad0f-7d192a61ebe2", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.7.3' currently installed).\n", - " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", - "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.2' currently installed).\n", - " from pandas.core import (\n", - "/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_14716/3040970222.py:1: DeprecationWarning: \n", - "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n", - "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n", - "but was not found to be installed on your system.\n", - "If this would cause problems for you,\n", - "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n", - " \n", - " import pandas as pd\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "# Define sample data\n", - "data = {\n", - " 'unique_id': range(1, 13),\n", - " 'first_name': ['John', 'Jane', 'David', 'Emily', 'Michael', 'Sarah', 'John', 'Jane', 'David', 'Emily', 'John', 'John'],\n", - " 'last_name': ['Doe', 'Smith', 'Johnson', 'Brown', 'Davis', 'Miller', 'Doe', 'Smith', 'Johnson', 'Brown', 'Miller', 'Jones'],\n", - " 'full_name': ['John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'Michael Davis', 'Sarah Miller', 'John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'John Miller', 'John Jones'],\n", - " 'entity_type': ['Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person', 'Person'],\n", - " 'state': ['CA', 'NY', 'TX', 'FL', 'CA', 'NY', 'CA', 'TX', 'FL', 'NY', 'CA', 'FL'],\n", - " 'party': ['Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent'],\n", - " 'company': ['Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Google', 'Microsoft']\n", - "}\n", - "\n", - "# Create DataFrame\n", - "df = pd.DataFrame(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "25334eac-e048-47e7-b911-571853e2a666", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RendererRegistry.enable('html')" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from splink.duckdb.linker import DuckDBLinker\n", - "import altair as alt\n", - "alt.renderers.enable('html')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f604d9d4-577a-4ed7-988d-71d5dcb21eae", - "metadata": {}, - "outputs": [], - "source": [ - "settings = {\n", - " \"link_type\": \"dedupe_only\",\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", - " ],\n", - "}\n", - "linker = DuckDBLinker(df, settings)\n", - "\n", - "# linker.profile_columns(\n", - "# [\"first_name\", \"last_name\"], top_n=10, bottom_n=5\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a5915f73-77a6-42c9-a7df-a8f0d396836c", - "metadata": {}, - "outputs": [], - "source": [ - "import splink.duckdb.comparison_template_library as ctl\n", - "import splink.duckdb.comparison_library as cl\n", - "\n", - "settings = {\n", - " \"link_type\": \"dedupe_only\",\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", - " ],\n", - " \"comparisons\": [\n", - " ctl.name_comparison(\"first_name\", term_frequency_adjustments=True),\n", - " ctl.name_comparison(\"last_name\", term_frequency_adjustments=True),\n", - " cl.exact_match(\"entity_type\", term_frequency_adjustments=True),\n", - " cl.levenshtein_at_thresholds(\"state\", 2),\n", - " cl.levenshtein_at_thresholds(\"party\", 2),\n", - " cl.levenshtein_at_thresholds(\"company\", 2),\n", - " ctl.name_comparison(\"full_name\", term_frequency_adjustments=True),\n", - " # Add more comparisons as needed\n", - " ],\n", - " \"retain_matching_columns\": True,\n", - " \"retain_intermediate_calculation_columns\": True,\n", - " \"max_iterations\": 10,\n", - " \"em_convergence\": 0.01\n", - "}\n", - "\n", - "linker = DuckDBLinker(df, settings)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a1356e5a-d7e9-415d-8b7e-b5195b708755", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "217a07cf-eaa3-42a2-b43b-b2eecd740a7b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Probability two random records match is estimated to be 0.101.\n", - "This means that amongst all possible pairwise record comparisons, one in 9.90 are expected to match. With 66 total possible comparisons, we expect a total of around 6.67 matching pairs\n" - ] - } - ], - "source": [ - "linker.estimate_probability_two_random_records_match(\n", - " [\n", - " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", - " \"l.full_name = r.full_name and l.state = r.state\",\n", - " \"l.full_name = r.full_name and l.company = r.company\",\n", - " ],\n", - " recall=0.6,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "307ae7b8-b637-4451-aad3-9c848e8dff65", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "----- Estimating u probabilities using random sampling -----\n", - "u probability not trained for first_name - Damerau_levenshtein <= 1 (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for first_name - Jaro_winkler_similarity >= 0.9 (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for first_name - Jaro_winkler_similarity >= 0.8 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for last_name - Damerau_levenshtein <= 1 (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for last_name - Jaro_winkler_similarity >= 0.9 (comparison vector value: 2). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for entity_type - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for state - All other comparisons (comparison vector value: 0). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for party - Levenshtein <= 2 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for company - Levenshtein <= 2 (comparison vector value: 1). This usually means the comparison level was never observed in the training data.\n", - "u probability not trained for full_name - Damerau_levenshtein <= 1 (comparison vector value: 3). This usually means the comparison level was never observed in the training data.\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is not yet fully trained. Missing estimates for:\n", - " - first_name (some u values are not trained, no m values are trained).\n", - " - last_name (some u values are not trained, no m values are trained).\n", - " - entity_type (some u values are not trained, no m values are trained).\n", - " - state (some u values are not trained, no m values are trained).\n", - " - party (some u values are not trained, no m values are trained).\n", - " - company (some u values are not trained, no m values are trained).\n", - " - full_name (some u values are not trained, no m values are trained).\n" - ] - } - ], - "source": [ - "linker.estimate_u_using_random_sampling(max_pairs=5e6)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "813cbdfe-4b6c-4c20-8d0d-a5efee91bd8f", - "metadata": {}, - "outputs": [], - "source": [ - "# linker.match_weights_chart()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a8730290-d9c1-4fff-a8d0-5964284ffef7", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " -- WARNING --\n", - "You have called predict(), but there are some parameter estimates which have neither been estimated or specified in your settings dictionary. To produce predictions the following untrained trained parameters will use default values.\n", - "Comparison: 'first_name':\n", - " m values not fully trained\n", - "Comparison: 'first_name':\n", - " u values not fully trained\n", - "Comparison: 'last_name':\n", - " m values not fully trained\n", - "Comparison: 'last_name':\n", - " u values not fully trained\n", - "Comparison: 'entity_type':\n", - " m values not fully trained\n", - "Comparison: 'entity_type':\n", - " u values not fully trained\n", - "Comparison: 'state':\n", - " m values not fully trained\n", - "Comparison: 'state':\n", - " u values not fully trained\n", - "Comparison: 'party':\n", - " m values not fully trained\n", - "Comparison: 'party':\n", - " u values not fully trained\n", - "Comparison: 'company':\n", - " m values not fully trained\n", - "Comparison: 'company':\n", - " u values not fully trained\n", - "Comparison: 'full_name':\n", - " m values not fully trained\n", - "Comparison: 'full_name':\n", - " u values not fully trained\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
match_weightmatch_probabilityunique_id_lunique_id_rfirst_name_lfirst_name_rgamma_first_nametf_first_name_ltf_first_name_rbf_first_name...company_rgamma_companybf_companyfull_name_lfull_name_rgamma_full_nametf_full_name_ltf_full_name_rbf_full_namebf_tf_adj_full_name
0-4.0103300.05842828JaneJane40.1666670.1666676.966667...Microsoft00.028947Jane SmithJane Smith40.1666670.16666715.6750.363636
1-4.0103300.058428410EmilyEmily40.1666670.1666676.966667...Facebook00.028947Emily BrownEmily Brown40.1666670.16666715.6750.363636
2-4.0103300.05842839DavidDavid40.1666670.1666676.966667...Amazon00.028947David JohnsonDavid Johnson40.1666670.16666715.6750.363636
32.2650780.82778817JohnJohn40.3333330.3333336.966667...Google00.028947John DoeJohn Doe40.1666670.16666715.6750.363636
\n", - "

4 rows × 44 columns

\n", - "
" - ], - "text/plain": [ - " match_weight match_probability unique_id_l unique_id_r first_name_l \\\n", - "0 -4.010330 0.058428 2 8 Jane \n", - "1 -4.010330 0.058428 4 10 Emily \n", - "2 -4.010330 0.058428 3 9 David \n", - "3 2.265078 0.827788 1 7 John \n", - "\n", - " first_name_r gamma_first_name tf_first_name_l tf_first_name_r \\\n", - "0 Jane 4 0.166667 0.166667 \n", - "1 Emily 4 0.166667 0.166667 \n", - "2 David 4 0.166667 0.166667 \n", - "3 John 4 0.333333 0.333333 \n", - "\n", - " bf_first_name ... company_r gamma_company bf_company full_name_l \\\n", - "0 6.966667 ... Microsoft 0 0.028947 Jane Smith \n", - "1 6.966667 ... Facebook 0 0.028947 Emily Brown \n", - "2 6.966667 ... Amazon 0 0.028947 David Johnson \n", - "3 6.966667 ... Google 0 0.028947 John Doe \n", - "\n", - " full_name_r gamma_full_name tf_full_name_l tf_full_name_r \\\n", - "0 Jane Smith 4 0.166667 0.166667 \n", - "1 Emily Brown 4 0.166667 0.166667 \n", - "2 David Johnson 4 0.166667 0.166667 \n", - "3 John Doe 4 0.166667 0.166667 \n", - "\n", - " bf_full_name bf_tf_adj_full_name \n", - "0 15.675 0.363636 \n", - "1 15.675 0.363636 \n", - "2 15.675 0.363636 \n", - "3 15.675 0.363636 \n", - "\n", - "[4 rows x 44 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_predict = linker.predict()\n", - "df_e = df_predict.as_pandas_dataframe(limit=5)\n", - "df_e" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "6ccb6eac-6985-4d8a-899d-d188ca980126", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Completed iteration 1, root rows count 0\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cluster_idunique_idfirst_namelast_namefull_nameentity_typestatepartycompany__splink_salttf_first_nametf_last_nametf_full_nametf_entity_type
011JohnDoeJohn DoePersonCADemocratApple0.8834140.3333330.1666670.1666671.0
122JaneSmithJane SmithPersonNYRepublicanGoogle0.1463000.1666670.1666670.1666671.0
233DavidJohnsonDavid JohnsonPersonTXIndependentMicrosoft0.3555670.1666670.1666670.1666671.0
344EmilyBrownEmily BrownPersonFLDemocratAmazon0.6713640.1666670.1666670.1666671.0
455MichaelDavisMichael DavisPersonCARepublicanFacebook0.5406800.0833330.0833330.0833331.0
\n", - "
" - ], - "text/plain": [ - " cluster_id unique_id first_name last_name full_name entity_type \\\n", - "0 1 1 John Doe John Doe Person \n", - "1 2 2 Jane Smith Jane Smith Person \n", - "2 3 3 David Johnson David Johnson Person \n", - "3 4 4 Emily Brown Emily Brown Person \n", - "4 5 5 Michael Davis Michael Davis Person \n", - "\n", - " state party company __splink_salt tf_first_name tf_last_name \\\n", - "0 CA Democrat Apple 0.883414 0.333333 0.166667 \n", - "1 NY Republican Google 0.146300 0.166667 0.166667 \n", - "2 TX Independent Microsoft 0.355567 0.166667 0.166667 \n", - "3 FL Democrat Amazon 0.671364 0.166667 0.166667 \n", - "4 CA Republican Facebook 0.540680 0.083333 0.083333 \n", - "\n", - " tf_full_name tf_entity_type \n", - "0 0.166667 1.0 \n", - "1 0.166667 1.0 \n", - "2 0.166667 1.0 \n", - "3 0.166667 1.0 \n", - "4 0.083333 1.0 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "clusters = linker.cluster_pairwise_predictions_at_threshold(df_predict, threshold_match_probability=0.7)\n", - "clusters.as_pandas_dataframe(limit=5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "28948465-fea2-433d-bace-d0627dfe348d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef568813-181b-42a7-b3a2-786fe87addfb", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/utils/constants.py b/utils/constants.py index f259db36..c7c040ac 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -641,3 +641,55 @@ "CIC": "COMMUNITY INTEREST COMPANY", "PAC": "POLITICAL ACTION COMMITTEE", } + +individual_settings = { + "link_type": "dedupe_only", + "blocking_rules_to_generate_predictions": [ + "l.first_name = r.first_name and l.last_name = r.last_name", + "l.full_name - r.full_name" + ], + "comparisons": [ + ctl.name_comparison("first_name"), #built in comparison function + ctl.name_comparison("last_name"), + ctl.name_comparison("full_name"), + ctl.forename_surname_comparison("first_name", "last_name"), #built in comparison function + cl.exact_match("entity_type", term_frequency_adjustments=True), + cl.jaro_winkler_at_thresholds("state", [0.9, 0.8]), #threshold will catch typos and shortenings + cl.jaro_winkler_at_thresholds("party", [0.9, 0.8]), + cl.jaro_winkler_at_thresholds("company", [0.9, 0.8]), + ], + + #DEFAULT + "retain_matching_columns": True, + "retain_intermediate_calculation_columns": True, + "max_iterations": 10, + "em_convergence": 0.01 +} + +i_blocking = [ + "l.first_name = r.first_name and l.last_name = r.last_name", + "l.full_name = r.full_name and l.state = r.state", + "l.full_name = r.full_name and l.company = r.company", + ] + +organizations_settings = { + "link_type": "dedupe_only", + "blocking_rules_to_generate_predictions": [ + "l.name = r.name", + ], + "comparisons": [ + ctl.name_comparison("name", term_frequency_adjustments=True), + cl.exact_match("entity_type", term_frequency_adjustments=True), + cl.jaro_winkler_at_thresholds("state", [0.9, 0.8]), #threshold will catch typos and shortenings + # Add more comparisons as needed + ], + "retain_matching_columns": True, + "retain_intermediate_calculation_columns": True, + "max_iterations": 10, + "em_convergence": 0.01 +} + +o_blocking = [ + "l.name = r.name", + "l.name = r.name and l.state = r.state", + ] \ No newline at end of file diff --git a/utils/linkage.py b/utils/linkage.py index 26fbd5b5..b24c53a3 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -5,6 +5,10 @@ from utils.constants import COMPANY_TYPES +from splink.duckdb.linker import DuckDBLinker +import splink.duckdb.comparison_template_library as ctl +import splink.duckdb.comparison_library as cl + """ Module for performing record linkage on state campaign finance dataset """ @@ -254,3 +258,26 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] raise ValueError("Can not find Address Number") + + +def splink_dedupe(df, settings, blocking): + + """Given a dataframe, configuration settings, and blocking rules return a deduplicated dataframe + Args: + df: dataframe with potential duplicates + settings: model settings based on splink documentation + blocking: list of columns to block on + Returns: + dataframe with matched ids of matching rows""" + + linker = DuckDBLinker(df, settings) + linker.estimate_probability_two_random_records_match(blocking, recall=0.6) #default + linker.estimate_u_using_random_sampling(max_pairs=5e6) + + for i in blocking: + training_session_names = linker.estimate_parameters_using_expectation_maximisation(i) + + df_predict = linker.predict() + df_e = df_predict.as_pandas_dataframe() + clusters = linker.cluster_pairwise_predictions_at_threshold(df_predict, threshold_match_probability=0.7) #default + return clusters.as_pandas_dataframe() \ No newline at end of file From fbc579c0dfbd6a8e6b6d4a6a7a5ac418ff31d380 Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Thu, 15 Feb 2024 08:37:58 -0600 Subject: [PATCH 093/214] Update linkage.py --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 9f146da2..403ff165 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -156,7 +156,7 @@ def name_rank(first_name: str, last_name: str) -> list: >>> name_rank("Adil", "Kassim") [0, 7392] >>> name_rank(None, 9) - [None, None + [None, None] """ first_name_rank = 0 last_name_rank = 0 From 6a41aa0778912a24ffd89c720e2f72fa6f8451d6 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 15 Feb 2024 10:25:55 -0600 Subject: [PATCH 094/214] discovered logic error in dedup function...no need to review yet --- tests/test_dedup.py | 78 ++++++++++++ tests/tester.ipynb | 294 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 372 insertions(+) create mode 100644 tests/test_dedup.py create mode 100644 tests/tester.ipynb diff --git a/tests/test_dedup.py b/tests/test_dedup.py new file mode 100644 index 00000000..25ba0c5d --- /dev/null +++ b/tests/test_dedup.py @@ -0,0 +1,78 @@ +# import json +# import pytest +import pandas as pd + +from utils.constants import BASE_FILEPATH +from utils.linkage import deduplicate_perfect_matches + +inds_sample = pd.read_csv( + BASE_FILEPATH / "output" / "complete_individuals_table.csv" +) +orgs_sample = pd.read_csv( + BASE_FILEPATH / "output" / "complete_organizations_table.csv" +) + +deduplicated_inds = deduplicate_perfect_matches(inds_sample) +deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) + +output_dedup_ids = pd.read_csv( + BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv" +) +# outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs +# has + +dedup_inds_id = deduplicated_inds.id.tolist() +dedup_orgs_id = deduplicated_orgs.id.tolist() +unique_ids = output_dedup_ids.duplicated_uuids.tolist() + +assert all(x in unique_ids for x in dedup_inds_id) +assert all(x in unique_ids for x in dedup_orgs_id) + +''' +from banktrack.annotation.convert import ( + create_record_from_labelstudio_results, + get_unique_entity_ids_from_labelstudio_results, +) +from banktrack.pipeline.constants import ROOT_DIR + +test_data_directory = ROOT_DIR / "tests" / "data" + + +def open_test_data_json(filename: str) -> dict: + """Open json in tests/data dir into a python dict""" + with open(test_data_directory / filename, "r") as f: + return json.load(f) + + +@pytest.fixture +def labelstudio_simple_results(): + return open_test_data_json("simple_labelstudio_results.json") + + +@pytest.fixture +def labelstudio_simple_results_filtered(): + return open_test_data_json("simple_labelstudio_results_filtered.json") + + +def test_create_record_from_labelstudio_results( + labelstudio_simple_results_filtered, +): + result = create_record_from_labelstudio_results( + labelstudio_simple_results_filtered + ) + expected = { + "borrower": "Wisconsin Public Service Corporation", + } + assert result == expected + + +def test_get_unique_entity_ids_from_labelstudio_results( + labelstudio_simple_results, +): + result = get_unique_entity_ids_from_labelstudio_results( + labelstudio_simple_results + ) + expected = ["eVtWJ7O08t", "b9izoYopAS"] + assert result == expected + +''' diff --git a/tests/tester.ipynb b/tests/tester.ipynb new file mode 100644 index 00000000..7bbdc9dd --- /dev/null +++ b/tests/tester.ipynb @@ -0,0 +1,294 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from utils.linkage import deduplicate_perfect_matches, convert_duplicates_to_dict\n", + "from utils.constants import BASE_FILEPATH\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfirst_namelast_namefull_nameentity_typestatepartycompany
724734c87d5ea3-0000-4cae-ab4c-febce5b17fc2KIRSTINBRODERICKKIRSTIN BRODERICK ...IndividualNYNaNNaN
11872379af092ab-9e88-456f-9a27-5fa561d615ebSARAHLEVINESARAH LEVINE ...IndividualNYNaNNaN
32044889078c2f-964a-4044-a760-c0ea28e71c6eDUSTINDEMLOWDUSTIN DEMLOW ...IndividualMINaNNaN
238953e9b9112d-7927-4cc1-b316-0beb55e9c47aBRIANBARBASBRIAN BARBAS ...IndividualILNaNNaN
38632a22c70ab-464c-4d89-a473-7c97931e0155SUSAN MASSAROMCFARLANDSUSAN MASSARO MCFARLAND ...IndividualMINaNNaN
\n", + "
" + ], + "text/plain": [ + " id first_name \\\n", + "724734 c87d5ea3-0000-4cae-ab4c-febce5b17fc2 KIRSTIN \n", + "1187237 9af092ab-9e88-456f-9a27-5fa561d615eb SARAH \n", + "320448 89078c2f-964a-4044-a760-c0ea28e71c6e DUSTIN \n", + "238953 e9b9112d-7927-4cc1-b316-0beb55e9c47a BRIAN \n", + "38632 a22c70ab-464c-4d89-a473-7c97931e0155 SUSAN MASSARO \n", + "\n", + " last_name \\\n", + "724734 BRODERICK \n", + "1187237 LEVINE \n", + "320448 DEMLOW \n", + "238953 BARBAS \n", + "38632 MCFARLAND \n", + "\n", + " full_name entity_type state \\\n", + "724734 KIRSTIN BRODERICK ... Individual NY \n", + "1187237 SARAH LEVINE ... Individual NY \n", + "320448 DUSTIN DEMLOW ... Individual MI \n", + "238953 BRIAN BARBAS ... Individual IL \n", + "38632 SUSAN MASSARO MCFARLAND ... Individual MI \n", + "\n", + " party company \n", + "724734 NaN NaN \n", + "1187237 NaN NaN \n", + "320448 NaN NaN \n", + "238953 NaN NaN \n", + "38632 NaN NaN " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inds_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_individuals_table.csv\", index_col=0, low_memory=False).sample(10000)\n", + "orgs_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_organizations_table.csv\", index_col=0).sample(10000)\n", + "inds_sample.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "deduplicated_inds = deduplicate_perfect_matches(inds_sample)\n", + "deduplicated_orgs = deduplicate_perfect_matches(orgs_sample)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
duplicated_uuidsmapped_uuids
028557e06-b13b-46e1-ab44-768bd8b4ff1628557e06-b13b-46e1-ab44-768bd8b4ff16
166978565-4c37-432e-9c61-bb0ca125485066978565-4c37-432e-9c61-bb0ca1254850
251ca5ecb-6c20-4380-8562-36048ca72f4651ca5ecb-6c20-4380-8562-36048ca72f46
3735f45fd-4859-4285-a63c-29399250d20e735f45fd-4859-4285-a63c-29399250d20e
45130157e-1c68-4529-9638-d5727e8feb075130157e-1c68-4529-9638-d5727e8feb07
\n", + "
" + ], + "text/plain": [ + " duplicated_uuids mapped_uuids\n", + "0 28557e06-b13b-46e1-ab44-768bd8b4ff16 28557e06-b13b-46e1-ab44-768bd8b4ff16\n", + "1 66978565-4c37-432e-9c61-bb0ca1254850 66978565-4c37-432e-9c61-bb0ca1254850\n", + "2 51ca5ecb-6c20-4380-8562-36048ca72f46 51ca5ecb-6c20-4380-8562-36048ca72f46\n", + "3 735f45fd-4859-4285-a63c-29399250d20e 735f45fd-4859-4285-a63c-29399250d20e\n", + "4 5130157e-1c68-4529-9638-d5727e8feb07 5130157e-1c68-4529-9638-d5727e8feb07" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test = pd.read_csv(BASE_FILEPATH / \"output\" / \"deduplicated_UUIDs.csv\")\n", + "test.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Yes\n", + "Yes\n" + ] + } + ], + "source": [ + "dedup_inds_id = deduplicated_inds.id.tolist()\n", + "dedup_orgs_id = deduplicated_orgs.id.tolist()\n", + "test_ids = test.duplicated_uuids.tolist()\n", + "\n", + "if all(x in test_ids for x in dedup_inds_id):\n", + " print(\"Yes\")\n", + "if all(x in test_ids for x in dedup_orgs_id):\n", + " print(\"Yes\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "climate_cabinet", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 2e28eecb84601d4542f1af9d4c6f7689fa4150c5 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 15 Feb 2024 12:25:18 -0600 Subject: [PATCH 095/214] file that tests my deduplicate function --- tests/test_dedup.py | 63 +++++---------------------------------------- 1 file changed, 6 insertions(+), 57 deletions(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index 25ba0c5d..327f2840 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -1,12 +1,10 @@ -# import json -# import pytest import pandas as pd from utils.constants import BASE_FILEPATH from utils.linkage import deduplicate_perfect_matches inds_sample = pd.read_csv( - BASE_FILEPATH / "output" / "complete_individuals_table.csv" + BASE_FILEPATH / "output" / "complete_individuals_table.csv", low_memory=False ) orgs_sample = pd.read_csv( BASE_FILEPATH / "output" / "complete_organizations_table.csv" @@ -21,58 +19,9 @@ # outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs # has -dedup_inds_id = deduplicated_inds.id.tolist() -dedup_orgs_id = deduplicated_orgs.id.tolist() -unique_ids = output_dedup_ids.duplicated_uuids.tolist() +dedup_inds_id = set(deduplicated_inds.id.tolist()) +dedup_orgs_id = set(deduplicated_orgs.id.tolist()) +unique_ids = set(output_dedup_ids.duplicated_uuids.tolist()) -assert all(x in unique_ids for x in dedup_inds_id) -assert all(x in unique_ids for x in dedup_orgs_id) - -''' -from banktrack.annotation.convert import ( - create_record_from_labelstudio_results, - get_unique_entity_ids_from_labelstudio_results, -) -from banktrack.pipeline.constants import ROOT_DIR - -test_data_directory = ROOT_DIR / "tests" / "data" - - -def open_test_data_json(filename: str) -> dict: - """Open json in tests/data dir into a python dict""" - with open(test_data_directory / filename, "r") as f: - return json.load(f) - - -@pytest.fixture -def labelstudio_simple_results(): - return open_test_data_json("simple_labelstudio_results.json") - - -@pytest.fixture -def labelstudio_simple_results_filtered(): - return open_test_data_json("simple_labelstudio_results_filtered.json") - - -def test_create_record_from_labelstudio_results( - labelstudio_simple_results_filtered, -): - result = create_record_from_labelstudio_results( - labelstudio_simple_results_filtered - ) - expected = { - "borrower": "Wisconsin Public Service Corporation", - } - assert result == expected - - -def test_get_unique_entity_ids_from_labelstudio_results( - labelstudio_simple_results, -): - result = get_unique_entity_ids_from_labelstudio_results( - labelstudio_simple_results - ) - expected = ["eVtWJ7O08t", "b9izoYopAS"] - assert result == expected - -''' +assert dedup_inds_id.issubset(unique_ids) +assert dedup_orgs_id.issubset(unique_ids) \ No newline at end of file From 5621652b143b03e8f514227b686c0be56ce954d4 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 15 Feb 2024 12:25:54 -0600 Subject: [PATCH 096/214] file that tests my deduplicate function --- tests/test_dedup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index 327f2840..fc1710e5 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -4,7 +4,8 @@ from utils.linkage import deduplicate_perfect_matches inds_sample = pd.read_csv( - BASE_FILEPATH / "output" / "complete_individuals_table.csv", low_memory=False + BASE_FILEPATH / "output" / "complete_individuals_table.csv", + low_memory=False, ) orgs_sample = pd.read_csv( BASE_FILEPATH / "output" / "complete_organizations_table.csv" @@ -24,4 +25,4 @@ unique_ids = set(output_dedup_ids.duplicated_uuids.tolist()) assert dedup_inds_id.issubset(unique_ids) -assert dedup_orgs_id.issubset(unique_ids) \ No newline at end of file +assert dedup_orgs_id.issubset(unique_ids) From cfb6d261f6a2867b6c9521c96d0e7ca8ee4a0693 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sun, 18 Feb 2024 11:27:54 -0600 Subject: [PATCH 097/214] testing if path to complete_orgs_table.csv is working --- tests/test_dedup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index fc1710e5..86533450 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -3,15 +3,15 @@ from utils.constants import BASE_FILEPATH from utils.linkage import deduplicate_perfect_matches -inds_sample = pd.read_csv( - BASE_FILEPATH / "output" / "complete_individuals_table.csv", - low_memory=False, -) +# inds_sample = pd.read_csv( +# BASE_FILEPATH / "output" / "complete_individuals_table.csv", +# low_memory=False, +# ) orgs_sample = pd.read_csv( BASE_FILEPATH / "output" / "complete_organizations_table.csv" ) -deduplicated_inds = deduplicate_perfect_matches(inds_sample) +# deduplicated_inds = deduplicate_perfect_matches(inds_sample) deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) output_dedup_ids = pd.read_csv( @@ -20,9 +20,9 @@ # outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs # has -dedup_inds_id = set(deduplicated_inds.id.tolist()) +# dedup_inds_id = set(deduplicated_inds.id.tolist()) dedup_orgs_id = set(deduplicated_orgs.id.tolist()) unique_ids = set(output_dedup_ids.duplicated_uuids.tolist()) -assert dedup_inds_id.issubset(unique_ids) +# assert dedup_inds_id.issubset(unique_ids) assert dedup_orgs_id.issubset(unique_ids) From d9356a2a93f8cf0cf70a094801a5dba9546822f8 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:04:16 -0600 Subject: [PATCH 098/214] added test for row_matches --- utils/tests/test_linkage.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 5899f36a..401e70b6 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,20 +1,12 @@ -import json - import numpy as np import pandas as pd -import pytest - -from utils.linkage import calculate_row_similarity, calculate_string_similarity -# creating a test for calculate_row_similarity and row_matches +from utils.linkage import calculate_row_similarity, calculate_string_similarity, row_matches +# import pytest -# maybe this will just be a csv for us? -def open_test_data_json(filename: str) -> dict: - """Open json in tests/data dir into a python dict""" - with open(test_data_directory / filename, "r") as f: - return json.load(f) +# creating a test for calculate_row_similarity and row_matches # to put in data: d = { @@ -55,7 +47,6 @@ def test_row_similarity_scen_1(row_similarity_scen_1): def test_row_similarity_scen_2(row_similarity_scen_2): - result = calculate_row_similarity(row_similarity_scen_2) wrong = calculate_row_similarity( row_similarity_scen_2.iloc[[0]], row_similarity_scen_2.iloc[[1]], @@ -70,3 +61,16 @@ def test_row_similarity_scen_2(row_similarity_scen_2): ) assert right < wrong + + +d2 = {'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich", "missy elliot", "mr johnson", "quarantin directino", "missy eliot", "joseph johnson"],'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago","8 Fancy Way, Chicago", "8 Fancy Way, Evanston", "17 Regular Road, Chicago", "42 Hollywood Boulevard, Chicago", "8 Fancy Way, Evanston", "17 Regular Road, Chicago"]} +test_df2 = pd.DataFrame(data=d2) + + +def test_row_matches(row_match_scen1): + res = row_matches(test_df, np.array([.8, .2]), .9, calculate_string_similarity) + + assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} + + + From 256caf756a258742012a4609f7e5d654632bb36c Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:08:34 -0600 Subject: [PATCH 099/214] changes to linkage --- utils/linkage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index c4fac807..9a94ed36 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,7 @@ +import re + import numpy as np import pandas as pd -import re import textdistance as td import usaddress From 1d11b5233168261cd3ccc7f5a8b4ede568543bcc Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:14:37 -0600 Subject: [PATCH 100/214] fixing linter issues --- utils/classify.py | 35 +++++++++++++++++----------------- utils/linkage.py | 4 ++-- utils/tests/test_linkage.py | 38 ++++++++++++++++++++++++++++++------- 3 files changed, 50 insertions(+), 27 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index fae19ff1..cfd629c0 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -2,33 +2,32 @@ from utils.linkage import calculate_string_similarity -#we want to run down a list of people and, hopefully, their adresses, plus a list of -#corporations, groups, etc, and classify them, basically just looking for matches +# we want to run down a list of people and, hopefully, their adresses, plus a list of +# corporations, groups, etc, and classify them, basically just looking for matches -#do we want to just input all the names/people (there's not many, less than 200 for sure), -#give a string similarity match score, and extract the top ten for manual review? -#thsi should give us a feeling for how to set our threhsold -#we might also, once we have all the data, buckle down and just classify some of them manually +# do we want to just input all the names/people (there's not many, less than 200 +# for sure),give a string similarity match score, and extract the top ten for +# manual review? this should give us a feeling for how to set our threshold +# we might also, once we have all the data, buckle down and just classify +# some of them manually inds_list = [] -#a list of individual names +# a list of individual names def similarity_calculator(df: pd.DataFrame, suspect): - """Run through a pandas dataframe column and compare elements to a constant - - """ + """Run through a pandas dataframe column and compare elements to a constant""" # this needs to output somehting useful - similarities = df['column1'].apply(lambda x: calculate_string_similarity(x, suspect)) + similarities = df["column1"].apply( + lambda x: calculate_string_similarity(x, suspect) + ) return similarities + # very psuedocode + # get top n, maybe just ten, and output - - - #very psuedocode - #get top n, maybe just ten, and output - - #we can use the indices and/or select manually, just add a new column to the subjects table - #that marks fossil fuels, green energy, or neither + # we can use the indices and/or select manually, just add a new + # column to the subjects table + # that marks fossil fuels, green energy, or neither diff --git a/utils/linkage.py b/utils/linkage.py index bd2ad238..dd045753 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -2,8 +2,8 @@ Module for performing record linkage on state campaign finance dataset """ import os.path -import pandas as pd import re + import numpy as np import pandas as pd import textdistance as td @@ -193,7 +193,7 @@ def row_matches( return index_dict - + def determine_comma_role(name: str) -> str: """Given a string (someone's name), attempts to determine the role of the comma in the name and where it ought to belong. diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 401e70b6..a7bd0514 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,7 +1,11 @@ import numpy as np import pandas as pd -from utils.linkage import calculate_row_similarity, calculate_string_similarity, row_matches +from utils.linkage import ( + calculate_row_similarity, + calculate_string_similarity, + row_matches, +) # import pytest @@ -63,14 +67,34 @@ def test_row_similarity_scen_2(row_similarity_scen_2): assert right < wrong -d2 = {'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich", "missy elliot", "mr johnson", "quarantin directino", "missy eliot", "joseph johnson"],'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago","8 Fancy Way, Chicago", "8 Fancy Way, Evanston", "17 Regular Road, Chicago", "42 Hollywood Boulevard, Chicago", "8 Fancy Way, Evanston", "17 Regular Road, Chicago"]} +d2 = { + "name": [ + "bob von rosevich", + "anantarya smith", + "bob j vonrosevich", + "missy elliot", + "mr johnson", + "quarantin directino", + "missy eliot", + "joseph johnson", + ], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + "42 Hollywood Boulevard, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + ], +} test_df2 = pd.DataFrame(data=d2) def test_row_matches(row_match_scen1): - res = row_matches(test_df, np.array([.8, .2]), .9, calculate_string_similarity) - - assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} - - + res = row_matches( + test_df, np.array([0.8, 0.2]), 0.9, calculate_string_similarity + ) + assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} From 2c035484e4f37ed1ff669b5959fee65e6454d008 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:20:54 -0600 Subject: [PATCH 101/214] fixing test --- utils/tests/test_linkage.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index a7bd0514..d495e10c 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import pytest from utils.linkage import ( calculate_row_similarity, @@ -24,13 +25,13 @@ test_df = pd.DataFrame(data=d) -# @pytest.fixture -# def row_similarity_scen_1(): -# return open_test_data_json(data) +@pytest.fixture +def row_similarity_scen_1(): + return test_df -# @pytest.fixture -# def row_similarity_scen_2(): -# return open_test_data_json(data) +@pytest.fixture +def row_similarity_scen_2(): + return test_df def test_row_similarity_scen_1(row_similarity_scen_1): @@ -91,6 +92,10 @@ def test_row_similarity_scen_2(row_similarity_scen_2): } test_df2 = pd.DataFrame(data=d2) +@pytest.fixture +def row_match_scen1(): + return test_df2 + def test_row_matches(row_match_scen1): res = row_matches( From 1b90fc4775dbc7461a3ffb1048aeb2188abeb982 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:23:15 -0600 Subject: [PATCH 102/214] fixing linter errors --- utils/tests/test_linkage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index d495e10c..60511727 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -29,6 +29,7 @@ def row_similarity_scen_1(): return test_df + @pytest.fixture def row_similarity_scen_2(): return test_df @@ -92,6 +93,7 @@ def test_row_similarity_scen_2(row_similarity_scen_2): } test_df2 = pd.DataFrame(data=d2) + @pytest.fixture def row_match_scen1(): return test_df2 From 3dd4b4df78ddcc56ab1157d92b6f88b1ab90c46a Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:26:32 -0600 Subject: [PATCH 103/214] fixing typo --- utils/tests/test_linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 60511727..e964dade 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -101,7 +101,7 @@ def row_match_scen1(): def test_row_matches(row_match_scen1): res = row_matches( - test_df, np.array([0.8, 0.2]), 0.9, calculate_string_similarity + test_df2, np.array([0.8, 0.2]), 0.9, calculate_string_similarity ) assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} From 4df82365b8c06c99ef4d709005a0f78d6c9d7a56 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:27:44 -0600 Subject: [PATCH 104/214] fixing typo again --- utils/tests/test_linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index e964dade..3695a399 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -101,7 +101,7 @@ def row_match_scen1(): def test_row_matches(row_match_scen1): res = row_matches( - test_df2, np.array([0.8, 0.2]), 0.9, calculate_string_similarity + row_match_scen1, np.array([0.8, 0.2]), 0.9, calculate_string_similarity ) assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} From 7aab13e0245dd1cbaaf162a93d09b6863813f10e Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:39:47 -0600 Subject: [PATCH 105/214] added match_confidence function --- utils/linkage.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/utils/linkage.py b/utils/linkage.py index dd045753..d259c989 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -8,6 +8,7 @@ import pandas as pd import textdistance as td import usaddress +import math from utils.constants import COMPANY_TYPES, repo_root @@ -194,6 +195,52 @@ def row_matches( return index_dict +def match_confidence( + confidences: np.array(float), weights: np.array(float), weights_toggle: bool +) -> float: + """Combine confidences for row matches into a final confidence + + This is a weighted log-odds based combination of row match confidences + originating from various record linkage methods. Weights will be applied + to the linkage methods in order and must be of the same length. + + weights_toggle allows one to turn weights on and off when calling the + function. False cancels the use of weights. + + Since log-odds have undesirable behaviors at 0 and 1, we truncate at + +-5, which corresponds to around half a percent probability or + 1 - the same. + >>> match_confidence(np.array([.6, .9, .0001]), np.array([2,5.7,8]), True) + 2.627759082143462e-12 + >>> match_confidence(np.array([.6, .9, .0001]), np.array([2,5.7,8]), False) + 0.08337802853594725 + """ + + if (min(confidences) < 0) or (max(confidences) > 1): + raise ValueError("Probabilities must be bounded on [0, 1]") + + log_odds = [] + + for c in confidences: + l_o = np.log(c / (1 - c)) + + if l_o > 5: + l_o = 5 + + elif l_o < -5: + l_o = -5 + + log_odds.append(l_o) + + if weights_toggle: + log_odds = log_odds * weights + + l_o_sum = np.sum(log_odds) + + conf_sum = math.e ** (l_o_sum) / (1 + math.e ** (l_o_sum)) + return conf_sum + + def determine_comma_role(name: str) -> str: """Given a string (someone's name), attempts to determine the role of the comma in the name and where it ought to belong. From 8796fa65416d2fdd5075f27373c54147389d7f9c Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:41:52 -0600 Subject: [PATCH 106/214] fixing linter --- utils/linkage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index d259c989..e4b0387d 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,6 +1,7 @@ """ Module for performing record linkage on state campaign finance dataset """ +import math import os.path import re @@ -8,7 +9,6 @@ import pandas as pd import textdistance as td import usaddress -import math from utils.constants import COMPANY_TYPES, repo_root @@ -199,7 +199,7 @@ def match_confidence( confidences: np.array(float), weights: np.array(float), weights_toggle: bool ) -> float: """Combine confidences for row matches into a final confidence - + This is a weighted log-odds based combination of row match confidences originating from various record linkage methods. Weights will be applied to the linkage methods in order and must be of the same length. From f932563205e4428e1b005a8f4aa90ffebec00dad Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 19:53:26 -0600 Subject: [PATCH 107/214] removed duplicate test --- utils/linkage.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index e4b0387d..98844822 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -109,29 +109,6 @@ def calculate_row_similarity( exists as to provide basic functionality. Once we have the comparison function locked in, using .apply will likely be easier and more efficient. - - >>> d = { - ... 'name': ["bob von rosevich", "anantarya smith","bob j vonrosevich"], - ... 'address': ["3 Blue Drive, Chicago", "4 Blue Drive, Chicago", - ... "8 Fancy Way, Chicago"] - ... } - >>> df = pd.DataFrame(data=d) - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - ... np.array([.8, .2]), - ... calculate_string_similarity) - >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.8, .2]), - ... calculate_string_similarity) - >>> right > wrong - True - >>> wrong = calculate_row_similarity(df.iloc[[0]], df.iloc[[1]], - ... np.array([.2, .8]), - ... calculate_string_similarity) - >>> right = calculate_row_similarity(df.iloc[[0]], df.iloc[[2]], - ... np.array([.2, .8]), - ... calculate_string_similarity) - >>> right > wrong - False """ row_length = len(weights) From ce540957f2083ac4c3db5afd458f4ad8f3391d25 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 20:29:34 -0600 Subject: [PATCH 108/214] updating classifier --- utils/classify.py | 63 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index cfd629c0..86e671ca 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -1,7 +1,5 @@ import pandas as pd -from utils.linkage import calculate_string_similarity - # we want to run down a list of people and, hopefully, their adresses, plus a list of # corporations, groups, etc, and classify them, basically just looking for matches @@ -16,17 +14,62 @@ # a list of individual names -def similarity_calculator(df: pd.DataFrame, suspect): - """Run through a pandas dataframe column and compare elements to a constant""" - # this needs to output somehting useful +def similarity_calculator( + df: pd.DataFrame, subject: str, n: int, comparison_func +) -> pd.DataFrame: + """Find best matches to a subject name in a pandas dataframe + + For a given individual or organization, the subject, we search through the + 'name'column of a dataframe, select the n highest matches according to a + selected comparison function, and return those as a dataframe. This is meant + to be used manually to search for matches. For quick automated processing, see + automated_classifier(). + + Note that the comparison function must take in two inputs, both strings, and + output a percentage match + """ + + similarities_df = df.copy() - similarities = df["column1"].apply( - lambda x: calculate_string_similarity(x, suspect) + similarities = similarities_df["name"].apply( + lambda x: comparison_func(x, subject) ) - return similarities - # very psuedocode - # get top n, maybe just ten, and output + similarities_df["similarities"] = similarities + + top_n_matches = similarities_df.sort_values( + by=["similarities"], ascending=False + )[0:n] + + return top_n_matches + + +def automated_classifier( + df: pd.DataFrame, subjects_dict: dict, threshold: float, comparison_func +): + """Using similarity_calculator, classify entities automatically + + Feeding a dictionary of names and the associated statuses, we compare + the string matches and, if they exceed a certain threshold, classify + them as belonging to some group specified in the subjects dictionary. + """ + + similarities_df = df.copy() + + for subject in subjects_dict: + similarities = similarities_df["name"].apply( + lambda x, sub=subject: comparison_func(x, sub) + + ) + matches = similarities >= threshold + + status = subjects_dict[subject] + + similarities_df["classification"] = pd.Series(matches).apply( + lambda x, stat=status: stat if x else "neutral" + ) + + return similarities_df # we can use the indices and/or select manually, just add a new # column to the subjects table From ff02e3deb0875ea1e517792dad7c63d7a8fdb2d1 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Sun, 18 Feb 2024 20:31:12 -0600 Subject: [PATCH 109/214] fixing linter --- utils/classify.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/classify.py b/utils/classify.py index 86e671ca..db574ace 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -59,7 +59,6 @@ def automated_classifier( for subject in subjects_dict: similarities = similarities_df["name"].apply( lambda x, sub=subject: comparison_func(x, sub) - ) matches = similarities >= threshold From 4e353273bd7535d96dec2d932d3b769baff8cc52 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 19 Feb 2024 15:46:27 +0000 Subject: [PATCH 110/214] slight formatting changes --- utils/linkage.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 2e1f9c97..88108775 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -2,7 +2,6 @@ import usaddress from names_dataset import NameDataset - """ Module for performing record linkage on state campaign finance dataset """ @@ -10,8 +9,6 @@ import re import pandas as pd -import textdistance as td -import usaddress from utils.constants import COMPANY_TYPES, repo_root @@ -278,7 +275,6 @@ def get_street_from_address_line_1(address_line_1: str) -> str: return " ".join(string) - def name_rank(first_name: str, last_name: str) -> list: """Returns a score for the rank of a given first name and last name https://github.com/philipperemy/name-dataset @@ -300,10 +296,10 @@ def name_rank(first_name: str, last_name: str) -> list: >>> name_rank(None, 9) [None, None] """ - + # Initialize the NameDataset class nd = NameDataset() - + first_name_rank = 0 last_name_rank = 0 if isinstance(first_name, str): @@ -325,7 +321,8 @@ def name_rank(first_name: str, last_name: str) -> list: else: last_name_rank = None return [first_name_rank, last_name_rank] -======= + + def convert_duplicates_to_dict(df: pd.DataFrame) -> None: """Saves to the "output" directory a file where each row represents a string matching to another string From c3c8defec982adfea07ebba96b735f6cfd5ec29e Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 19 Feb 2024 16:09:46 +0000 Subject: [PATCH 111/214] preprocess file and function initial commit --- utils/preprocess.py | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 utils/preprocess.py diff --git a/utils/preprocess.py b/utils/preprocess.py new file mode 100644 index 00000000..2831996a --- /dev/null +++ b/utils/preprocess.py @@ -0,0 +1,81 @@ +from typing import Tuple + +import pandas as pd +from nameparser import HumanName + +from utils.linkage import ( + cleaning_company_column, + get_address_line_1_from_full_address, + get_address_number_from_address_line_1, + get_street_from_address_line_1, + standardize_corp_names, +) + + +def preprocess_pipeline( + individuals: pd.DataFrame, + organizations: pd.DataFrame, + transactions: pd.DataFrame, +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Preprocesses data for record linkage + + Args: + Individuals: dataframe of individual contributions + Organizations: dataframe of organization contributions + Transactions: dataframe of transactions + Returns: + preprocessed tuple of dataframes + first element is the individuals dataframe, + second element is the organizations dataframe, + third element is the transactions dataframe + """ + # Preprocess organizations dataframe + organizations["name"] = ( + organizations["name"].astype(str, skipna=True).apply(standardize_corp_names) + ) + + # Preprocess individuals dataframe + if "Unnamed: 0" in individuals.columns: + individuals.drop(columns="Unnamed: 0", inplace=True) + + individuals = individuals.astype( + {"first_name": str, "last_name": str, "full_name": str, "company": str} + ) + + # Standardize company names in individuals dataframe + individuals["company"] = individuals["company"].apply(standardize_corp_names) + individuals["company"] = individuals["company"].apply(cleaning_company_column) + + # Address functions, assuming address column is named 'address' + individuals["Address Line 1"] = individuals["Address"].apply( + get_address_line_1_from_full_address + ) + individuals["Street Name"] = individuals["Address Line 1"].apply( + get_street_from_address_line_1 + ) + individuals["Address Number"] = individuals["Address Line 1"].apply( + get_address_number_from_address_line_1 + ) + + # Check if first name or last names are empty, if so, extract from full name column + individuals["full_name"] = individuals["full_name"].astype(str)[ + individuals["full_name"].notnull() + ] + if individuals["first_name"].isnull().any(): + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + first_name = name.apply(lambda x: x["first"]) + individuals["first_name"] = first_name + + if individuals["last_name"].isnull().any(): + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + last_name = name.apply(lambda x: x["last"]) + individuals["last_name"] = last_name + + # Transactions + if "Unnamed: 0" in transactions.columns: + transactions.drop(columns="Unnamed: 0", inplace=True) + + transactions["purpose"] = transactions["purpose"].str.upper() + + return individuals, organizations, transactions From 97e78ae3a7de8ba91cf70f5b9f8a5325f49b86af Mon Sep 17 00:00:00 2001 From: Avery Date: Mon, 19 Feb 2024 10:16:52 -0600 Subject: [PATCH 112/214] update readme --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index c50d0115..498c0999 100644 --- a/README.md +++ b/README.md @@ -43,8 +43,6 @@ If you prefer to develop inside a container with VS Code then do the following s 5. running ```pipeline.py``` returns the tables to the output folder as csv files containing the complete individuals, organizations, and transactions DataFrames combining the AZ, MI, MN, and PA datasets. 6. For future reference, the above pipeline also stores the information mapping given id to our database id (generated via uuid) in a csv file in the format of (state)IDMap.csv (example: ArizonaIDMap.csv) in the output folder -## Team Members - ## Repository Structure ### utils From 71cbb37694c7f972a50c8ac96a80562237b08103 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 10:17:44 -0600 Subject: [PATCH 113/214] adding tests to appropriate winter repo --- tests/test_dedup.py | 57 +++++++++++++++++++++++++++------------------ utils/linkage.py | 4 +++- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index 86533450..04e21bce 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -1,28 +1,39 @@ import pandas as pd +import pytest from utils.constants import BASE_FILEPATH from utils.linkage import deduplicate_perfect_matches -# inds_sample = pd.read_csv( -# BASE_FILEPATH / "output" / "complete_individuals_table.csv", -# low_memory=False, -# ) -orgs_sample = pd.read_csv( - BASE_FILEPATH / "output" / "complete_organizations_table.csv" -) - -# deduplicated_inds = deduplicate_perfect_matches(inds_sample) -deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) - -output_dedup_ids = pd.read_csv( - BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv" -) -# outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs -# has - -# dedup_inds_id = set(deduplicated_inds.id.tolist()) -dedup_orgs_id = set(deduplicated_orgs.id.tolist()) -unique_ids = set(output_dedup_ids.duplicated_uuids.tolist()) - -# assert dedup_inds_id.issubset(unique_ids) -assert dedup_orgs_id.issubset(unique_ids) +@pytest.fixture +def return_data(filename): + path = BASE_FILEPATH / "output" / filename + df = pd.read_csv(path, low_memory=False ) + return df + +@pytest.fixture +def call_dedup_func(): + inds_sample = return_data("complete_individuals_table.csv") + orgs_sample = return_data("complete_organizations_table.csv") + + assert not orgs_sample.empty() + assert not inds_sample.empty() + + deduplicated_inds = deduplicate_perfect_matches(inds_sample) + deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) + + output_dedup_ids = return_data("deduplicated_UUIDs.csv") + # outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs + # has + + return deduplicated_inds, deduplicated_orgs, output_dedup_ids + +@pytest.fixture +def confirm_dedup_uuids(): + inds, orgs, output = call_dedup_func() + + dedup_inds_id = set(inds.id.tolist()) + dedup_orgs_id = set(orgs.id.tolist()) + unique_ids = set(output.duplicated_uuids.tolist()) + + assert dedup_inds_id.issubset(unique_ids) + assert dedup_orgs_id.issubset(unique_ids) \ No newline at end of file diff --git a/utils/linkage.py b/utils/linkage.py index ee8dcd60..97ec7ab4 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -327,7 +327,9 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: # now find the duplicates along all columns but the ID new_df = ( - new_df.groupby(df.columns[1:].tolist(), dropna=False)["id"] + new_df.groupby(df.columns.difference(["id"]).tolist(), dropna=False)[ + "id" + ] .agg(list) .reset_index() .rename(columns={"id": "duplicated"}) From cccc7cc2665793e4777974b6464d29e9b594feb5 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 19 Feb 2024 16:21:54 +0000 Subject: [PATCH 114/214] slight edits --- utils/preprocess.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/utils/preprocess.py b/utils/preprocess.py index 2831996a..55a99810 100644 --- a/utils/preprocess.py +++ b/utils/preprocess.py @@ -32,7 +32,9 @@ def preprocess_pipeline( """ # Preprocess organizations dataframe organizations["name"] = ( - organizations["name"].astype(str, skipna=True).apply(standardize_corp_names) + organizations["name"] + .astype(str, skipna=True) + .apply(standardize_corp_names) ) # Preprocess individuals dataframe @@ -44,8 +46,12 @@ def preprocess_pipeline( ) # Standardize company names in individuals dataframe - individuals["company"] = individuals["company"].apply(standardize_corp_names) - individuals["company"] = individuals["company"].apply(cleaning_company_column) + individuals["company"] = individuals["company"].apply( + standardize_corp_names + ) + individuals["company"] = individuals["company"].apply( + cleaning_company_column + ) # Address functions, assuming address column is named 'address' individuals["Address Line 1"] = individuals["Address"].apply( @@ -63,12 +69,20 @@ def preprocess_pipeline( individuals["full_name"].notnull() ] if individuals["first_name"].isnull().any(): - name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) first_name = name.apply(lambda x: x["first"]) individuals["first_name"] = first_name if individuals["last_name"].isnull().any(): - name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) last_name = name.apply(lambda x: x["last"]) individuals["last_name"] = last_name From 25eaf606604b4499b8b1c18d07e2b79ad0ddfeb2 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 10:22:23 -0600 Subject: [PATCH 115/214] fixing linter errors --- tests/test_dedup.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index 04e21bce..f1c01690 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -4,20 +4,22 @@ from utils.constants import BASE_FILEPATH from utils.linkage import deduplicate_perfect_matches + @pytest.fixture def return_data(filename): path = BASE_FILEPATH / "output" / filename - df = pd.read_csv(path, low_memory=False ) + df = pd.read_csv(path, low_memory=False) return df + @pytest.fixture def call_dedup_func(): inds_sample = return_data("complete_individuals_table.csv") orgs_sample = return_data("complete_organizations_table.csv") - + assert not orgs_sample.empty() assert not inds_sample.empty() - + deduplicated_inds = deduplicate_perfect_matches(inds_sample) deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) @@ -27,6 +29,7 @@ def call_dedup_func(): return deduplicated_inds, deduplicated_orgs, output_dedup_ids + @pytest.fixture def confirm_dedup_uuids(): inds, orgs, output = call_dedup_func() @@ -36,4 +39,4 @@ def confirm_dedup_uuids(): unique_ids = set(output.duplicated_uuids.tolist()) assert dedup_inds_id.issubset(unique_ids) - assert dedup_orgs_id.issubset(unique_ids) \ No newline at end of file + assert dedup_orgs_id.issubset(unique_ids) From 57c6070bb8c85743a5ebb5b2584db5427b32a35a Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 19 Feb 2024 16:23:12 +0000 Subject: [PATCH 116/214] removing preprocess function from linkage.py --- utils/linkage.py | 84 ------------------------------------------------ 1 file changed, 84 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 97b0ad6e..2c80939a 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,10 +1,7 @@ import re -from typing import Tuple -import pandas as pd import textdistance as td import usaddress -from nameparser import HumanName from utils.constants import COMPANY_TYPES @@ -257,84 +254,3 @@ def cleaning_company_column(company_entry: str) -> str: else: return company_edited - - -def preprocess_pipeline( - individuals: pd.DataFrame, - Address: str, - organizations: pd.DataFrame, - transactions: pd.DataFrame, -) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - """ - Preprocesses data for record linkage - - Args: - Individuals: dataframe of individual contributions - Address: column name of address - Organizations: dataframe of organization contributions - Transactions: dataframe of transactions - Returns: - preprocessed tuple of dataframes - first element is the individuals dataframe, - second element is the organizations dataframe, - third element is the transactions dataframe - """ - # Preprocess organizations dataframe - organizations["name"] = ( - organizations["name"].astype(str).apply(standardize_corp_names) - ) - - # Preprocess individuals dataframe - if "Unnamed: 0" in individuals.columns: - individuals.drop(columns="Unnamed: 0", inplace=True) - - individuals = individuals.astype( - {"first_name": str, "last_name": str, "full_name": str, "company": str} - ) - - # Standardize company names in individuals dataframe - individuals["company"] = individuals["company"].apply( - standardize_corp_names - ) - individuals["company"] = individuals["company"].apply( - cleaning_company_column - ) - - # Address functions, assuming address column is named 'address' - individuals["Address Line 1"] = individuals[Address].apply( - get_address_line_1_from_full_address - ) - individuals["Street Name"] = individuals["Address Line 1"].apply( - get_street_from_address_line_1 - ) - individuals["Address Number"] = individuals["Address Line 1"].apply( - get_address_number_from_address_line_1 - ) - - # Check if first name or last names are empty, if so, extract from full name column - individuals["full_name"] = individuals["full_name"].astype(str) - if individuals["first_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) - first_name = name.apply(lambda x: x["first"]) - individuals["first_name"] = first_name - - if individuals["last_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) - last_name = name.apply(lambda x: x["last"]) - individuals["last_name"] = last_name - - # Transactions - if "Unnamed: 0" in transactions.columns: - transactions.drop(columns="Unnamed: 0", inplace=True) - - transactions["purpose"] = transactions["purpose"].str.upper() - - return individuals, organizations, transactions From 277663672fcd1faf2cee83f51096d39e71dedbbe Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 19 Feb 2024 16:24:17 +0000 Subject: [PATCH 117/214] slight changes --- utils/preprocess.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/utils/preprocess.py b/utils/preprocess.py index 55a99810..f3755eec 100644 --- a/utils/preprocess.py +++ b/utils/preprocess.py @@ -32,9 +32,7 @@ def preprocess_pipeline( """ # Preprocess organizations dataframe organizations["name"] = ( - organizations["name"] - .astype(str, skipna=True) - .apply(standardize_corp_names) + organizations["name"].astype(str, skipna=True).apply(standardize_corp_names) ) # Preprocess individuals dataframe @@ -46,14 +44,10 @@ def preprocess_pipeline( ) # Standardize company names in individuals dataframe - individuals["company"] = individuals["company"].apply( - standardize_corp_names - ) - individuals["company"] = individuals["company"].apply( - cleaning_company_column - ) + individuals["company"] = individuals["company"].apply(standardize_corp_names) + individuals["company"] = individuals["company"].apply(cleaning_company_column) - # Address functions, assuming address column is named 'address' + # Address functions, assuming address column is named 'Address' individuals["Address Line 1"] = individuals["Address"].apply( get_address_line_1_from_full_address ) @@ -69,20 +63,12 @@ def preprocess_pipeline( individuals["full_name"].notnull() ] if individuals["first_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) first_name = name.apply(lambda x: x["first"]) individuals["first_name"] = first_name if individuals["last_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) last_name = name.apply(lambda x: x["last"]) individuals["last_name"] = last_name From d3df75b9141b0ae432ffe5713725f768b2924fc1 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 10:32:31 -0600 Subject: [PATCH 118/214] changing branches, no need to review --- tests/test_dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index f1c01690..4660d775 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -3,7 +3,7 @@ from utils.constants import BASE_FILEPATH from utils.linkage import deduplicate_perfect_matches - +print(BASE_FILEPATH) @pytest.fixture def return_data(filename): From 531453c1c91074f9307ee8f5228570a8ecd1e82f Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 10:32:51 -0600 Subject: [PATCH 119/214] changing branches, no need to review --- tests/tester.ipynb | 203 +++++++++++++++++++++++---------------------- 1 file changed, 104 insertions(+), 99 deletions(-) diff --git a/tests/tester.ipynb b/tests/tester.ipynb index 7bbdc9dd..ec7a5de6 100644 --- a/tests/tester.ipynb +++ b/tests/tester.ipynb @@ -2,18 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "from utils.linkage import deduplicate_perfect_matches, convert_duplicates_to_dict\n", + "from utils.linkage import deduplicate_perfect_matches\n", "from utils.constants import BASE_FILEPATH\n", "import pandas as pd" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -49,108 +49,94 @@ " \n", " \n", " \n", - " 724734\n", - " c87d5ea3-0000-4cae-ab4c-febce5b17fc2\n", - " KIRSTIN\n", - " BRODERICK\n", - " KIRSTIN BRODERICK ...\n", - " Individual\n", - " NY\n", + " 0\n", + " 1869727\n", + " NaN\n", + " NaN\n", + " william \bstoner\n", + " individual\n", + " NaN\n", " NaN\n", " NaN\n", " \n", " \n", - " 1187237\n", - " 9af092ab-9e88-456f-9a27-5fa561d615eb\n", - " SARAH\n", - " LEVINE\n", - " SARAH LEVINE ...\n", - " Individual\n", - " NY\n", + " 1\n", + " 1779679\n", + " NaN\n", + " NaN\n", + " rm coulon\n", + " individual\n", " NaN\n", " NaN\n", + " area agency on aging\n", " \n", " \n", - " 320448\n", - " 89078c2f-964a-4044-a760-c0ea28e71c6e\n", - " DUSTIN\n", - " DEMLOW\n", - " DUSTIN DEMLOW ...\n", - " Individual\n", - " MI\n", + " 2\n", + " 2277221\n", + " NaN\n", " NaN\n", + " james engelson\n", + " individual\n", " NaN\n", + " NaN\n", + " retired\n", " \n", " \n", - " 238953\n", - " e9b9112d-7927-4cc1-b316-0beb55e9c47a\n", - " BRIAN\n", - " BARBAS\n", - " BRIAN BARBAS ...\n", - " Individual\n", - " IL\n", + " 3\n", + " 2277156\n", + " NaN\n", " NaN\n", + " marivic franciaskinner\n", + " individual\n", " NaN\n", + " NaN\n", + " fibre source international corp\n", " \n", " \n", - " 38632\n", - " a22c70ab-464c-4d89-a473-7c97931e0155\n", - " SUSAN MASSARO\n", - " MCFARLAND\n", - " SUSAN MASSARO MCFARLAND ...\n", - " Individual\n", - " MI\n", + " 4\n", + " 2341373\n", " NaN\n", " NaN\n", + " anthony grindle\n", + " individual\n", + " NaN\n", + " NaN\n", + " zimmerbiomet\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id first_name \\\n", - "724734 c87d5ea3-0000-4cae-ab4c-febce5b17fc2 KIRSTIN \n", - "1187237 9af092ab-9e88-456f-9a27-5fa561d615eb SARAH \n", - "320448 89078c2f-964a-4044-a760-c0ea28e71c6e DUSTIN \n", - "238953 e9b9112d-7927-4cc1-b316-0beb55e9c47a BRIAN \n", - "38632 a22c70ab-464c-4d89-a473-7c97931e0155 SUSAN MASSARO \n", - "\n", - " last_name \\\n", - "724734 BRODERICK \n", - "1187237 LEVINE \n", - "320448 DEMLOW \n", - "238953 BARBAS \n", - "38632 MCFARLAND \n", + " id first_name last_name full_name entity_type state \\\n", + "0 1869727 NaN NaN william \bstoner individual NaN \n", + "1 1779679 NaN NaN rm coulon individual NaN \n", + "2 2277221 NaN NaN james engelson individual NaN \n", + "3 2277156 NaN NaN marivic franciaskinner individual NaN \n", + "4 2341373 NaN NaN anthony grindle individual NaN \n", "\n", - " full_name entity_type state \\\n", - "724734 KIRSTIN BRODERICK ... Individual NY \n", - "1187237 SARAH LEVINE ... Individual NY \n", - "320448 DUSTIN DEMLOW ... Individual MI \n", - "238953 BRIAN BARBAS ... Individual IL \n", - "38632 SUSAN MASSARO MCFARLAND ... Individual MI \n", - "\n", - " party company \n", - "724734 NaN NaN \n", - "1187237 NaN NaN \n", - "320448 NaN NaN \n", - "238953 NaN NaN \n", - "38632 NaN NaN " + " party company \n", + "0 NaN NaN \n", + "1 NaN area agency on aging \n", + "2 NaN retired \n", + "3 NaN fibre source international corp \n", + "4 NaN zimmerbiomet " ] }, - "execution_count": 6, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_individuals_table.csv\", index_col=0, low_memory=False).sample(10000)\n", - "orgs_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_organizations_table.csv\", index_col=0).sample(10000)\n", + "inds_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_individuals_table.csv\", index_col=0, low_memory=False)\n", + "orgs_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_organizations_table.csv\", index_col=0)\n", "inds_sample.head(5)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -160,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -191,28 +177,28 @@ " \n", " \n", " 0\n", - " 28557e06-b13b-46e1-ab44-768bd8b4ff16\n", - " 28557e06-b13b-46e1-ab44-768bd8b4ff16\n", + " af6dd8b5-21d4-4ab3-b0e9-b6e617e4837b\n", + " af6dd8b5-21d4-4ab3-b0e9-b6e617e4837b\n", " \n", " \n", " 1\n", - " 66978565-4c37-432e-9c61-bb0ca1254850\n", - " 66978565-4c37-432e-9c61-bb0ca1254850\n", + " d2ed5639-be25-40aa-bfbc-7b8bb3bba62a\n", + " d2ed5639-be25-40aa-bfbc-7b8bb3bba62a\n", " \n", " \n", " 2\n", - " 51ca5ecb-6c20-4380-8562-36048ca72f46\n", - " 51ca5ecb-6c20-4380-8562-36048ca72f46\n", + " 054e4f1d-aec7-4188-968d-4429748f2fe6\n", + " 054e4f1d-aec7-4188-968d-4429748f2fe6\n", " \n", " \n", " 3\n", - " 735f45fd-4859-4285-a63c-29399250d20e\n", - " 735f45fd-4859-4285-a63c-29399250d20e\n", + " c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74\n", + " c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74\n", " \n", " \n", " 4\n", - " 5130157e-1c68-4529-9638-d5727e8feb07\n", - " 5130157e-1c68-4529-9638-d5727e8feb07\n", + " 81125483-336e-4d48-9f4b-5708e91c3835\n", + " 81125483-336e-4d48-9f4b-5708e91c3835\n", " \n", " \n", "\n", @@ -220,14 +206,14 @@ ], "text/plain": [ " duplicated_uuids mapped_uuids\n", - "0 28557e06-b13b-46e1-ab44-768bd8b4ff16 28557e06-b13b-46e1-ab44-768bd8b4ff16\n", - "1 66978565-4c37-432e-9c61-bb0ca1254850 66978565-4c37-432e-9c61-bb0ca1254850\n", - "2 51ca5ecb-6c20-4380-8562-36048ca72f46 51ca5ecb-6c20-4380-8562-36048ca72f46\n", - "3 735f45fd-4859-4285-a63c-29399250d20e 735f45fd-4859-4285-a63c-29399250d20e\n", - "4 5130157e-1c68-4529-9638-d5727e8feb07 5130157e-1c68-4529-9638-d5727e8feb07" + "0 af6dd8b5-21d4-4ab3-b0e9-b6e617e4837b af6dd8b5-21d4-4ab3-b0e9-b6e617e4837b\n", + "1 d2ed5639-be25-40aa-bfbc-7b8bb3bba62a d2ed5639-be25-40aa-bfbc-7b8bb3bba62a\n", + "2 054e4f1d-aec7-4188-968d-4429748f2fe6 054e4f1d-aec7-4188-968d-4429748f2fe6\n", + "3 c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74 c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74\n", + "4 81125483-336e-4d48-9f4b-5708e91c3835 81125483-336e-4d48-9f4b-5708e91c3835" ] }, - "execution_count": 9, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -239,27 +225,46 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Yes\n", - "Yes\n" - ] + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "dedup_inds_id = deduplicated_inds.id.tolist()\n", - "dedup_orgs_id = deduplicated_orgs.id.tolist()\n", - "test_ids = test.duplicated_uuids.tolist()\n", + "dedup_inds_id = set(deduplicated_inds.id.tolist())\n", + "dedup_orgs_id = set(deduplicated_orgs.id.tolist())\n", + "test_ids = set(test.duplicated_uuids.tolist())\n", "\n", - "if all(x in test_ids for x in dedup_inds_id):\n", - " print(\"Yes\")\n", - "if all(x in test_ids for x in dedup_orgs_id):\n", - " print(\"Yes\")" + "dedup_orgs_id.issubset(test_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(612181, 526145)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(deduplicated_inds.id.tolist()), len(dedup_inds_id)" ] }, { From 75082e479222ef425f7550b8facf994af7dff3ce Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 13:17:57 -0600 Subject: [PATCH 120/214] finishing up with dedup func --- tests/test_dedup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dedup.py b/tests/test_dedup.py index 4660d775..f1c01690 100644 --- a/tests/test_dedup.py +++ b/tests/test_dedup.py @@ -3,7 +3,7 @@ from utils.constants import BASE_FILEPATH from utils.linkage import deduplicate_perfect_matches -print(BASE_FILEPATH) + @pytest.fixture def return_data(filename): From 1ea09b4034a687c458dad3d5cbe573c24b8bf59b Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 19 Feb 2024 13:50:39 -0600 Subject: [PATCH 121/214] Renaming File --- utils/{preprocess.py => linkage_pipeline.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename utils/{preprocess.py => linkage_pipeline.py} (100%) diff --git a/utils/preprocess.py b/utils/linkage_pipeline.py similarity index 100% rename from utils/preprocess.py rename to utils/linkage_pipeline.py From 6d5cee8de0c8b63e5937101f98efcfb2f5a4815a Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 16:21:12 -0600 Subject: [PATCH 122/214] update on function to add nodes and their attributes to graph --- utils/linkage.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index cae5024d..9138c2a2 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,14 +1,15 @@ -import textdistance as td -import usaddress -from names_dataset import NameDataset - """ Module for performing record linkage on state campaign finance dataset + """ + +import textdistance as td +import usaddress +from names_dataset import NameDataset import math import os.path import re - +import networkx as nx import numpy as np import pandas as pd @@ -633,3 +634,27 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] raise ValueError("Can not find Address Number") + +def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph : + ''' Takes in a dataframe and generates a MultiDiGraph where the nodes are + entity names, and the rest of the dataframe columns make the node attributes + + Args: + df: a pandas dataframe (complete_individuals_table / + complete_organizations_table) + + Returns: + A Networkx MultiDiGraph with nodes lacking any edges + ''' + G = nx.MultiDiGraph() + # first check if df is individuals or organizations dataset + if 'name' in df.columns: + node_name = 'name' + else: node_name = 'full_name' + + for _, row in df.iterrows(): + G.add_node(row[node_name]) + for column in df.columns: + nx.set_node_attributes(G, row[column], name=column) + + return G From e92192bfd75950307ef781c1e298b79a6d0ce8bb Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 19 Feb 2024 17:04:10 -0600 Subject: [PATCH 123/214] checking for issue with linter test --- utils/linkage.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 9138c2a2..14c57f6b 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -3,15 +3,16 @@ """ -import textdistance as td -import usaddress -from names_dataset import NameDataset import math import os.path import re + import networkx as nx import numpy as np import pandas as pd +import textdistance as td +import usaddress +from names_dataset import NameDataset from utils.constants import COMPANY_TYPES, repo_root @@ -635,8 +636,9 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: return address_line_1_components[i][0] raise ValueError("Can not find Address Number") -def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph : - ''' Takes in a dataframe and generates a MultiDiGraph where the nodes are + +def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: + """Takes in a dataframe and generates a MultiDiGraph where the nodes are entity names, and the rest of the dataframe columns make the node attributes Args: @@ -645,16 +647,17 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph : Returns: A Networkx MultiDiGraph with nodes lacking any edges - ''' + """ G = nx.MultiDiGraph() # first check if df is individuals or organizations dataset - if 'name' in df.columns: - node_name = 'name' - else: node_name = 'full_name' + if "name" in df.columns: + node_name = "name" + else: + node_name = "full_name" for _, row in df.iterrows(): G.add_node(row[node_name]) for column in df.columns: nx.set_node_attributes(G, row[column], name=column) - + return G From 6e043449b83e9fcd9520f08227f39c018d6f3c09 Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Mon, 19 Feb 2024 21:36:47 -0600 Subject: [PATCH 124/214] Delete tests/tester.ipynb --- tests/tester.ipynb | 299 --------------------------------------------- 1 file changed, 299 deletions(-) delete mode 100644 tests/tester.ipynb diff --git a/tests/tester.ipynb b/tests/tester.ipynb deleted file mode 100644 index ec7a5de6..00000000 --- a/tests/tester.ipynb +++ /dev/null @@ -1,299 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from utils.linkage import deduplicate_perfect_matches\n", - "from utils.constants import BASE_FILEPATH\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idfirst_namelast_namefull_nameentity_typestatepartycompany
01869727NaNNaNwilliam \bstonerindividualNaNNaNNaN
11779679NaNNaNrm coulonindividualNaNNaNarea agency on aging
22277221NaNNaNjames engelsonindividualNaNNaNretired
32277156NaNNaNmarivic franciaskinnerindividualNaNNaNfibre source international corp
42341373NaNNaNanthony grindleindividualNaNNaNzimmerbiomet
\n", - "
" - ], - "text/plain": [ - " id first_name last_name full_name entity_type state \\\n", - "0 1869727 NaN NaN william \bstoner individual NaN \n", - "1 1779679 NaN NaN rm coulon individual NaN \n", - "2 2277221 NaN NaN james engelson individual NaN \n", - "3 2277156 NaN NaN marivic franciaskinner individual NaN \n", - "4 2341373 NaN NaN anthony grindle individual NaN \n", - "\n", - " party company \n", - "0 NaN NaN \n", - "1 NaN area agency on aging \n", - "2 NaN retired \n", - "3 NaN fibre source international corp \n", - "4 NaN zimmerbiomet " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "inds_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_individuals_table.csv\", index_col=0, low_memory=False)\n", - "orgs_sample = pd.read_csv(BASE_FILEPATH / \"output\" / \"complete_organizations_table.csv\", index_col=0)\n", - "inds_sample.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "deduplicated_inds = deduplicate_perfect_matches(inds_sample)\n", - "deduplicated_orgs = deduplicate_perfect_matches(orgs_sample)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
duplicated_uuidsmapped_uuids
0af6dd8b5-21d4-4ab3-b0e9-b6e617e4837baf6dd8b5-21d4-4ab3-b0e9-b6e617e4837b
1d2ed5639-be25-40aa-bfbc-7b8bb3bba62ad2ed5639-be25-40aa-bfbc-7b8bb3bba62a
2054e4f1d-aec7-4188-968d-4429748f2fe6054e4f1d-aec7-4188-968d-4429748f2fe6
3c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74
481125483-336e-4d48-9f4b-5708e91c383581125483-336e-4d48-9f4b-5708e91c3835
\n", - "
" - ], - "text/plain": [ - " duplicated_uuids mapped_uuids\n", - "0 af6dd8b5-21d4-4ab3-b0e9-b6e617e4837b af6dd8b5-21d4-4ab3-b0e9-b6e617e4837b\n", - "1 d2ed5639-be25-40aa-bfbc-7b8bb3bba62a d2ed5639-be25-40aa-bfbc-7b8bb3bba62a\n", - "2 054e4f1d-aec7-4188-968d-4429748f2fe6 054e4f1d-aec7-4188-968d-4429748f2fe6\n", - "3 c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74 c0a31a0d-fc53-42e5-9dfc-23dfbf8c3b74\n", - "4 81125483-336e-4d48-9f4b-5708e91c3835 81125483-336e-4d48-9f4b-5708e91c3835" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test = pd.read_csv(BASE_FILEPATH / \"output\" / \"deduplicated_UUIDs.csv\")\n", - "test.head(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dedup_inds_id = set(deduplicated_inds.id.tolist())\n", - "dedup_orgs_id = set(deduplicated_orgs.id.tolist())\n", - "test_ids = set(test.duplicated_uuids.tolist())\n", - "\n", - "dedup_orgs_id.issubset(test_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(612181, 526145)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(deduplicated_inds.id.tolist()), len(dedup_inds_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "climate_cabinet", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 976d2af420317817143f45e6476ef41cbfaf0d43 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Wed, 21 Feb 2024 10:17:31 -0600 Subject: [PATCH 125/214] Saving notebook on networkx --- notebooks/Test.ipynb | 887 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 887 insertions(+) create mode 100644 notebooks/Test.ipynb diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb new file mode 100644 index 00000000..457fb6f0 --- /dev/null +++ b/notebooks/Test.ipynb @@ -0,0 +1,887 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import re\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "orgs_sample = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0).sample(10)\n", + "inds_sample = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False).sample(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamestateentity_typedonationsdonations_toreceiveddonations_from
050c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee503Pabar Pac (Pa Bar Assn)5210MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC
150c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee2969REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...5768MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC
250c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee4592COMMITTEE TO ELECT DR PATRICIA BERNARD4274UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...
362ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee2459REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...2602UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...
462ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee4748MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC4153REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "1 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "2 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "3 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", + "4 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", + "\n", + " name state entity_type \\\n", + "0 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", + "1 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", + "2 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", + "3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", + "4 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", + "\n", + " donations donations_to received \\\n", + "0 503 Pabar Pac (Pa Bar Assn) 5210 \n", + "1 2969 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 5768 \n", + "2 4592 COMMITTEE TO ELECT DR PATRICIA BERNARD 4274 \n", + "3 2459 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 2602 \n", + "4 4748 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC 4153 \n", + "\n", + " donations_from \n", + "0 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "1 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "2 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", + "3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", + "4 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {'id':['50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360',\n", + " '62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3',\n", + " 'd31df1ca-714e-4a82-9e88-1892c0451a71','d31df1ca-714e-4a82-9e88-1892c0451a71','62ea1e9c-ac12-400c-b3dc-519389c0f7d3',\n", + " '4db76e6e-f0d5-40eb-82de-6dbcdb562dd7','f71341d7-d27e-47eb-9b66-903af39d6cb5','c875d7de-94be-42f1-b994-dd89b114d51e',\n", + " '910c4d36-b036-469e-aa2a-ea4ff8855a6c','60d454d1-3773-4d88-80e9-132c161da0f0','1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd',\n", + " '1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd','1d2b5bc0-9385-4cd7-ac48-df43b3eca6fe','1d2b5bc0-9385-4cd7-ac48-df43b3eca6ff',\n", + " '1d2b5bc0-9385-4cd7-ac48-df43b3eca6fd'],\n", + " 'name':['REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC','REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC',\n", + " 'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC','UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB',\n", + " 'UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB','UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB',\n", + " 'COMMITTEE TO ELECT DR PATRICIA BERNARD','COMMITTEE TO ELECT DR PATRICIA BERNARD','UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB',\n", + " 'Ugi Utilities Inc/Ugi Energy Services Llc Pac','Pabar Pac (Pa Bar Assn)','Pa Fraternal Order Of Police Pac','Citizens For Kail',\n", + " 'Paa Pac','MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC','MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC',\n", + " 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC','MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC','Paa Pac'],\n", + " 'state':['MI','MI','MI','MI','MI','MI','MI','MI','MI','PA','PA','PA','PA','PA','MI','MI','MI','MI','PA'],\n", + " 'entity_type':['committee','committee','committee','committee','committee','committee','committee','committee','committee',\n", + " 'Organization','Organization','Organization','Organization','Organization','committee','committee','committee','committee','Organization']}\n", + "\n", + "sample_df = pd.DataFrame(data)\n", + "sample_df['donations'] = np.random.randint(100, 6000, sample_df.shape[0])\n", + "sample_df['donations_to'] = np.random.choice(sample_df.name.tolist(), size=len(sample_df))\n", + "sample_df['received'] = np.random.randint(0, 6000, sample_df.shape[0])\n", + "sample_df['donations_from'] = np.random.choice(sample_df.name.tolist(), size=len(sample_df))\n", + "sample_df.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Some Considerations to Remember Moving Forward:\n", + "1. The 'get_likely_name' function takes in 3 string inputs. The data is not clean and when there are NaN entries, the function is somehow inputing null values as strings, so a column that has \"Tim\", \"Walz\" and Nan in the first, last, and full name columns, is being combined as \"Tim Walz Nan\". When calling this function account for this possibility" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Playing Around with Graphs\n", + "\n", + "**Some considerations**\n", + "1. What attributes do we want each Node to Have?\n", + "- UUID, Name, Entity Type, Address, {from transactions table: money_donated and money_given}, affilition?\n", + "- Should transaction info also be included? If so, how would we show transaction info to multiple recipients / from multiple donors?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Notes for Graphs\n", + "**Generating Graphs**\n", + "* nx.Graph() → the most simple undirected graph (edges going both ways)\n", + "* nx.DiGraph() → a graph with directed edges\n", + "* nx.MultiGraph() → multiple edges between nodes\n", + "* nx.MultiDiGraph() → the MultiGraph equivalent for directed graphs\n", + "\n", + "**Finding Centrality**\n", + "There are 4 main ways to find the centrality of a node (how important or frequent is a node / how influential are some donors potentially)\n", + "* nx.degree_centrality : based on the assumption that important nodes have many connections\n", + "* nx.closeness_centrality : based on the assumption that important nodes are close to other nodes. It is calculated as the sum of the path lengths from the given node to all other nodes. \n", + "* nx.eigenvector_centrality : assumes that important nodes connect other nodes. Considers the number of shortest paths between 2 nodes .For Graphs with a large number of nodes, the value of betweenness centrality is very high\n", + "* nx.betweeness_centrality : a measure of centrality in a graph based on shortest paths. For every pair of vertices in a connected graph, there exists at least one shortest path between the vertices such that either the number of edges that the path passes through (for unweighted graphs) or the sum of the weights of the edges (for weighted graphs) is minimized. The betweenness centrality for each vertex is the number of these shortest paths that pass through the vertex\n", + "* nx.pagerank : Page Rank Algorithm (developed by Google founders to measure the importance of webpages) assigns a score of importance to each node. Important nodes are those with many inlinks from important pages. It mainly works for Directed Networks\n", + "\n", + "**Finding Connections**\n", + "* nx.find_cliques (undirected graphs): finds the maximum subgraphs based on the number of interconnected nodes\n", + "* nx.k_core : A k-core is a maximal subgraph that contains nodes of degree k or more. Groups clusters meeting the threshold k (can be used as a toggle)\n", + "\n", + "**Sources**\n", + "* https://www.youtube.com/watch?v=VetBkjcm9Go\n", + "* https://www.activestate.com/blog/graph-theory-using-python-introduction-and-implementation/ \n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamestateentity_type
13516581ec10e00-c7a7-4bcc-861f-cd1ff43bfc04Friends Of Freedom & ConveniencePACommittee
11589606359974e-9e78-409c-b9dd-fe7415304560GRETCHEN WHITMER FOR GOVERNORMIcommittee
4742209e43c101-03ef-4083-ab60-b7fd76dea7b5TUDOR DIXON FOR GOVERNOR INCMIcommittee
257895fb7cb16-912f-4fec-ba37-f201465a5725LNAACK BEVERLEYMIcorporation
4956426359974e-9e78-409c-b9dd-fe7415304560GRETCHEN WHITMER FOR GOVERNORMIcommittee
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "1351658 1ec10e00-c7a7-4bcc-861f-cd1ff43bfc04 \n", + "1158960 6359974e-9e78-409c-b9dd-fe7415304560 \n", + "474220 9e43c101-03ef-4083-ab60-b7fd76dea7b5 \n", + "25789 5fb7cb16-912f-4fec-ba37-f201465a5725 \n", + "495642 6359974e-9e78-409c-b9dd-fe7415304560 \n", + "\n", + " name state entity_type \n", + "1351658 Friends Of Freedom & Convenience PA Committee \n", + "1158960 GRETCHEN WHITMER FOR GOVERNOR MI committee \n", + "474220 TUDOR DIXON FOR GOVERNOR INC MI committee \n", + "25789 LNAACK BEVERLEY MI corporation \n", + "495642 GRETCHEN WHITMER FOR GOVERNOR MI committee " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orgs_sample.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id 1ec10e00-c7a7-4bcc-861f-cd1ff43bfc04\n", + "name Friends Of Freedom & Convenience\n", + "state PA\n", + "entity_type Committee\n", + "Name: 1351658, dtype: object\n", + "id 6359974e-9e78-409c-b9dd-fe7415304560\n", + "name GRETCHEN WHITMER FOR GOVERNOR\n", + "state MI\n", + "entity_type committee\n", + "Name: 1158960, dtype: object\n", + "id 9e43c101-03ef-4083-ab60-b7fd76dea7b5\n", + "name TUDOR DIXON FOR GOVERNOR INC\n", + "state MI\n", + "entity_type committee\n", + "Name: 474220, dtype: object\n", + "id 5fb7cb16-912f-4fec-ba37-f201465a5725\n", + "name LNAACK BEVERLEY \n", + "state MI\n", + "entity_type corporation\n", + "Name: 25789, dtype: object\n", + "id 6359974e-9e78-409c-b9dd-fe7415304560\n", + "name GRETCHEN WHITMER FOR GOVERNOR\n", + "state MI\n", + "entity_type committee\n", + "Name: 495642, dtype: object\n", + "id f1df070b-a91b-4aab-b943-4f80e5c41026\n", + "name MICHIGAN LABORERS POLITICAL LEAGUE\n", + "state MI\n", + "entity_type committee\n", + "Name: 1939825, dtype: object\n", + "id 57fbfb3e-835c-4096-9dc9-1555816aff0d\n", + "name PLUMBERS AND PIPEFITTERS LOCAL 333 PAC\n", + "state MI\n", + "entity_type committee\n", + "Name: 1643401, dtype: object\n", + "id 357e354f-d81b-4eb5-af6e-574afd175672\n", + "name MICHIGAN FARM BUREAU POLITICAL ACTION COMMITTEE\n", + "state MI\n", + "entity_type committee\n", + "Name: 2088505, dtype: object\n", + "id 1a5d85e2-0382-4064-9606-8ee0a2be5ea1\n", + "name ANEDOT INC \n", + "state MI\n", + "entity_type corporation\n", + "Name: 157224, dtype: object\n", + "id 6d8e2e79-72c1-487e-835f-ededfe0aafaa\n", + "name DEMOCRATIC LEGISLATIVE CAMPAIGN COMMITTEE\n", + "state MI\n", + "entity_type committee\n", + "Name: 854930, dtype: object\n" + ] + } + ], + "source": [ + "for index, row in orgs_sample.iterrows():\n", + " print(row)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'color': 'blue', 'size': 2}" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G = nx.Graph()\n", + "G.add_node(0)\n", + "nx.set_node_attributes(G, \"red\", name=\"color\")\n", + "nx.set_node_attributes(G, 2, name=\"size\")\n", + "G.add_node(1)\n", + "nx.set_node_attributes(G, \"blue\", name='color')\n", + "G.nodes[0]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamestateentity_type
297930e44b8553-0dff-4a6b-8335-d97849641ff8FRIENDS OF DANA NESSELMIcommittee
9455364f5b8fc4-c871-4774-a436-1622b8e26a44MALLORY MCMORROW FOR MICHIGANMIcommittee
\n", + "
" + ], + "text/plain": [ + " id name \\\n", + "297930 e44b8553-0dff-4a6b-8335-d97849641ff8 FRIENDS OF DANA NESSEL \n", + "945536 4f5b8fc4-c871-4774-a436-1622b8e26a44 MALLORY MCMORROW FOR MICHIGAN \n", + "\n", + " state entity_type \n", + "297930 MI committee \n", + "945536 MI committee " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + ".head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfirst_namelast_namefull_nameentity_typestatepartycompany
891077c94a0491-7ea1-45ce-a155-6153ea74da08BELALAHNERBELA LAHNER ...IndividualMINaNNOT EMPLOYED
617571c38816dd-8a47-4102-97cd-59d0f6bc42dcJANICESHAPIROJANICE SHAPIRO ...IndividualTXNaNNaN
\n", + "
" + ], + "text/plain": [ + " id first_name \\\n", + "891077 c94a0491-7ea1-45ce-a155-6153ea74da08 BELA \n", + "617571 c38816dd-8a47-4102-97cd-59d0f6bc42dc JANICE \n", + "\n", + " last_name \\\n", + "891077 LAHNER \n", + "617571 SHAPIRO \n", + "\n", + " full_name entity_type state \\\n", + "891077 BELA LAHNER ... Individual MI \n", + "617571 JANICE SHAPIRO ... Individual TX \n", + "\n", + " party company \n", + "891077 NaN NOT EMPLOYED \n", + "617571 NaN NaN " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inds_sample.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "def add_notes_from_df(df):\n", + " G = nx.MultiDiGraph()\n", + " if 'name' in df.columns:\n", + " node_name = 'name'\n", + " else: node_name = 'full_name'\n", + " for index, row in df.iterrows():\n", + " # if nodes 1 and 2 don't exist, this both creates the nodes and adds the edges to them\n", + " # the weight can be added to show the magnitude of the edge\n", + " G.add_node(row[node_name])\n", + " for column in df.columns:\n", + " nx.set_node_attributes(G, row[column], name=column)\n", + " nx.draw_random(G, with_labels=True)\n", + " plt.show()\n", + " return G" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'id': 'da441d41-1050-4505-a834-99d6023001e1',\n", + " 'first_name': 'AARON ',\n", + " 'last_name': 'KRAUSS ',\n", + " 'full_name': 'AARON KRAUSS ',\n", + " 'entity_type': 'Individual',\n", + " 'state': 'MI',\n", + " 'party': nan,\n", + " 'company': nan}" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = add_notes_from_df(inds_sample)\n", + "x.nodes['BELA LAHNER ']" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['BELA LAHNER ',\n", + " 'JANICE SHAPIRO ',\n", + " 'RAMON HAWKINS ',\n", + " 'LEAH CYGAN ',\n", + " 'ALLISON HATT ^ ',\n", + " 'ELLEN FEINGOLD ',\n", + " 'KEVIN HERTEL FOR SENATE',\n", + " 'SARA LAFORGE ^ ',\n", + " 'LOIS TACK ',\n", + " 'AARON KRAUSS ']" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inds_sample.full_name.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'MALLORY MCMORROW FOR MICHIGAN'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[94], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mG\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnodes\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMALLORY MCMORROW FOR MICHIGAN\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/classes/reportviews.py:194\u001b[0m, in \u001b[0;36mNodeView.__getitem__\u001b[0;34m(self, n)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(n, \u001b[38;5;28mslice\u001b[39m):\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m nx\u001b[38;5;241m.\u001b[39mNetworkXError(\n\u001b[1;32m 191\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not support slicing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtry list(G.nodes)[\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn\u001b[38;5;241m.\u001b[39mstart\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn\u001b[38;5;241m.\u001b[39mstop\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn\u001b[38;5;241m.\u001b[39mstep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 193\u001b[0m )\n\u001b[0;32m--> 194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_nodes\u001b[49m\u001b[43m[\u001b[49m\u001b[43mn\u001b[49m\u001b[43m]\u001b[49m\n", + "\u001b[0;31mKeyError\u001b[0m: 'MALLORY MCMORROW FOR MICHIGAN'" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "G = nx.petersen_graph()\n", + "subax1 = plt.subplot(121)\n", + "nx.draw(G, with_labels=True, font_weight='bold')\n", + "subax2 = plt.subplot(122)\n", + "nx.draw_shell(G, nlist=[range(5, 10), range(5)], with_labels=True, font_weight='light')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC': Text(-0.071782758799796, -0.3387166453182715, 'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC'),\n", + " 'Paa Pac': Text(0.06023249378587841, -0.07946204618171311, 'Paa Pac'),\n", + " 'UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB': Text(-0.12554712442237967, 0.08789304420689323, 'UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB'),\n", + " 'COMMITTEE TO ELECT DR PATRICIA BERNARD': Text(-0.40486733116122986, -0.04769565353200762, 'COMMITTEE TO ELECT DR PATRICIA BERNARD'),\n", + " 'Pabar Pac (Pa Bar Assn)': Text(-0.6714326170558735, 0.21693950702464565, 'Pabar Pac (Pa Bar Assn)'),\n", + " 'Ugi Utilities Inc/Ugi Energy Services Llc Pac': Text(1.0, -0.38838038123915186, 'Ugi Utilities Inc/Ugi Energy Services Llc Pac'),\n", + " 'Pa Fraternal Order Of Police Pac': Text(0.5897482153166077, -0.2569656851069028, 'Pa Fraternal Order Of Police Pac'),\n", + " 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC': Text(-0.27784326029554446, 0.2828712220763738, 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC'),\n", + " 'Citizens For Kail': Text(-0.09850761736766293, 0.5235166380701339, 'Citizens For Kail')}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "G = nx.from_pandas_edgelist(sample_df,source='name',target='donations_to',edge_attr=['donations','received'])\n", + "G.nodes()\n", + "pos=nx.spring_layout(G)\n", + "weights = list(nx.get_edge_attributes(G,'donations').values())\n", + "weights = [i/5000 for i in weights]\n", + "node_color = [G.degree(v) for v in G] \n", + "#node_size = [0.0005 * nx.get_node_attributes(G, 'donations')[v] for v in G] \n", + "nx.draw_networkx_nodes(G, pos, node_color=node_color)#, node_size=node_size) \n", + "nx.draw_networkx_edges(G, pos, width=weights)\n", + "nx.draw_networkx_labels(G, pos)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G.nodes['Citizens For Kail']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m node_color \u001b[38;5;241m=\u001b[39m [G\u001b[38;5;241m.\u001b[39mdegree(v) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m G] \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# node colour is a list of degrees of nodes \u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m node_size \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[38;5;241;43m0.0005\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_node_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpopulation\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mG\u001b[49m\u001b[43m]\u001b[49m \n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# size of node is a list of population of cities \u001b[39;00m\n\u001b[1;32m 10\u001b[0m edge_width \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0.0015\u001b[39m \u001b[38;5;241m*\u001b[39m G[u][v][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m u, v \u001b[38;5;129;01min\u001b[39;00m G\u001b[38;5;241m.\u001b[39medges()] \n", + "Cell \u001b[0;32mIn[8], line 7\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 4\u001b[0m node_color \u001b[38;5;241m=\u001b[39m [G\u001b[38;5;241m.\u001b[39mdegree(v) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m G] \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# node colour is a list of degrees of nodes \u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m node_size \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0.0005\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_node_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpopulation\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m G] \n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# size of node is a list of population of cities \u001b[39;00m\n\u001b[1;32m 10\u001b[0m edge_width \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0.0015\u001b[39m \u001b[38;5;241m*\u001b[39m G[u][v][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m u, v \u001b[38;5;129;01min\u001b[39;00m G\u001b[38;5;241m.\u001b[39medges()] \n", + "\u001b[0;31mKeyError\u001b[0m: 'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC'" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# fixing the size of the figure \n", + "plt.figure(figsize =(10, 7)) \n", + "\n", + "node_color = [G.degree(v) for v in G] \n", + "# node colour is a list of degrees of nodes \n", + "\n", + "node_size = [0.0005 * nx.get_node_attributes(G, 'population')[v] for v in G] \n", + "# size of node is a list of population of cities \n", + "\n", + "edge_width = [0.0015 * G[u][v]['weight'] for u, v in G.edges()] \n", + "# width of edge is a list of weight of edges \n", + "\n", + "nx.draw_networkx(G, node_size = node_size, \n", + "\t\t\t\tnode_color = node_color, alpha = 0.7, \n", + "\t\t\t\twith_labels = True, width = edge_width, \n", + "\t\t\t\tedge_color ='.4', cmap = plt.cm.Blues) \n", + "\n", + "plt.axis('off') \n", + "plt.tight_layout(); " + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'color': 'white'}" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G = nx.Graph()\n", + "G.add_node(0)\n", + "nx.set_node_attributes(G, \"red\", name=\"color\")\n", + "nx.set_node_attributes(G, 4, name = 'size')\n", + "G.add_node(2)\n", + "nx.set_node_attributes(G, \"white\", name='color')\n", + "G.nodes[2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "climate_cabinet", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9534af9550a6ffeaa16683cd375fb8696d057555 Mon Sep 17 00:00:00 2001 From: Avery Date: Wed, 21 Feb 2024 10:24:19 -0600 Subject: [PATCH 126/214] combine test files --- utils/linkage.py | 9 +++---- utils/tests/test_linkage.py | 49 +++++++++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 7efdf024..d1b85447 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,19 +1,16 @@ import textdistance as td import usaddress from names_dataset import NameDataset - -""" -Module for performing record linkage on state campaign finance dataset -""" import math import os.path import re - import numpy as np import pandas as pd - from utils.constants import COMPANY_TYPES, repo_root +""" +Module for performing record linkage on state campaign finance dataset +""" def get_address_line_1_from_full_address(address: str) -> str: """Given a full address, return the first line of the formatted address diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 3695a399..4e57f1b0 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,17 +1,18 @@ import numpy as np import pandas as pd import pytest - +from utils.constants import BASE_FILEPATH from utils.linkage import ( calculate_row_similarity, calculate_string_similarity, - row_matches, + row_matches, deduplicate_perfect_matches ) -# import pytest - +""" +Module for testing functions in linkage.py +""" -# creating a test for calculate_row_similarity and row_matches +# Creating a test for calculate_row_similarity and row_matches # to put in data: d = { @@ -22,6 +23,7 @@ "8 Fancy Way, Chicago", ], } + test_df = pd.DataFrame(data=d) @@ -105,3 +107,40 @@ def test_row_matches(row_match_scen1): ) assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} + +# Test for dedupe function +@pytest.fixture +def return_data(filename): + path = BASE_FILEPATH / "output" / filename + df = pd.read_csv(path, low_memory=False) + return df + + +@pytest.fixture +def call_dedup_func(): + inds_sample = return_data("complete_individuals_table.csv") + orgs_sample = return_data("complete_organizations_table.csv") + + assert not orgs_sample.empty() + assert not inds_sample.empty() + + deduplicated_inds = deduplicate_perfect_matches(inds_sample) + deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) + + output_dedup_ids = return_data("deduplicated_UUIDs.csv") + # outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs + # has + + return deduplicated_inds, deduplicated_orgs, output_dedup_ids + + +@pytest.fixture +def confirm_dedup_uuids(): + inds, orgs, output = call_dedup_func() + + dedup_inds_id = set(inds.id.tolist()) + dedup_orgs_id = set(orgs.id.tolist()) + unique_ids = set(output.duplicated_uuids.tolist()) + + assert dedup_inds_id.issubset(unique_ids) + assert dedup_orgs_id.issubset(unique_ids) \ No newline at end of file From f91ee0e69cd311e911a0f976309b9e523aca705a Mon Sep 17 00:00:00 2001 From: Avery Date: Wed, 21 Feb 2024 10:26:51 -0600 Subject: [PATCH 127/214] precommit --- utils/linkage.py | 9 ++++++--- utils/tests/test_linkage.py | 7 +++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index d1b85447..44d5e40c 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,17 +1,20 @@ -import textdistance as td -import usaddress -from names_dataset import NameDataset import math import os.path import re + import numpy as np import pandas as pd +import textdistance as td +import usaddress +from names_dataset import NameDataset + from utils.constants import COMPANY_TYPES, repo_root """ Module for performing record linkage on state campaign finance dataset """ + def get_address_line_1_from_full_address(address: str) -> str: """Given a full address, return the first line of the formatted address diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 4e57f1b0..4a5f73f2 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,11 +1,13 @@ import numpy as np import pandas as pd import pytest + from utils.constants import BASE_FILEPATH from utils.linkage import ( calculate_row_similarity, calculate_string_similarity, - row_matches, deduplicate_perfect_matches + deduplicate_perfect_matches, + row_matches, ) """ @@ -108,6 +110,7 @@ def test_row_matches(row_match_scen1): assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} + # Test for dedupe function @pytest.fixture def return_data(filename): @@ -143,4 +146,4 @@ def confirm_dedup_uuids(): unique_ids = set(output.duplicated_uuids.tolist()) assert dedup_inds_id.issubset(unique_ids) - assert dedup_orgs_id.issubset(unique_ids) \ No newline at end of file + assert dedup_orgs_id.issubset(unique_ids) From 2a30961daeda9accb4db453018eff4430a40135a Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:33:23 -0600 Subject: [PATCH 128/214] Delete tests directory --- tests/test_dedup.py | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 tests/test_dedup.py diff --git a/tests/test_dedup.py b/tests/test_dedup.py deleted file mode 100644 index f1c01690..00000000 --- a/tests/test_dedup.py +++ /dev/null @@ -1,42 +0,0 @@ -import pandas as pd -import pytest - -from utils.constants import BASE_FILEPATH -from utils.linkage import deduplicate_perfect_matches - - -@pytest.fixture -def return_data(filename): - path = BASE_FILEPATH / "output" / filename - df = pd.read_csv(path, low_memory=False) - return df - - -@pytest.fixture -def call_dedup_func(): - inds_sample = return_data("complete_individuals_table.csv") - orgs_sample = return_data("complete_organizations_table.csv") - - assert not orgs_sample.empty() - assert not inds_sample.empty() - - deduplicated_inds = deduplicate_perfect_matches(inds_sample) - deduplicated_orgs = deduplicate_perfect_matches(orgs_sample) - - output_dedup_ids = return_data("deduplicated_UUIDs.csv") - # outpud_ids should have all the ids that deduplicated_inds and deduplicated_orgs - # has - - return deduplicated_inds, deduplicated_orgs, output_dedup_ids - - -@pytest.fixture -def confirm_dedup_uuids(): - inds, orgs, output = call_dedup_func() - - dedup_inds_id = set(inds.id.tolist()) - dedup_orgs_id = set(orgs.id.tolist()) - unique_ids = set(output.duplicated_uuids.tolist()) - - assert dedup_inds_id.issubset(unique_ids) - assert dedup_orgs_id.issubset(unique_ids) From e007f3cfacc324963d970966d5c3fa733c4a9aa7 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 21 Feb 2024 14:03:40 -0600 Subject: [PATCH 129/214] splink notebook --- notebooks/splink.ipynb | 659 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 659 insertions(+) create mode 100644 notebooks/splink.ipynb diff --git a/notebooks/splink.ipynb b/notebooks/splink.ipynb new file mode 100644 index 00000000..c37813d0 --- /dev/null +++ b/notebooks/splink.ipynb @@ -0,0 +1,659 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1a863d3e-59b4-46c3-ad0f-7d192a61ebe2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.7.3' currently installed).\n", + " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", + "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.2' currently installed).\n", + " from pandas.core import (\n", + "/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_4396/3624639948.py:16: DtypeWarning: Columns (8,9,10,11,12) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df_i = pd.read_csv('complete_individuals_table.csv')\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Define sample data\n", + "# i_data = {\n", + "# 'unique_id': range(1, 13),\n", + "# 'first_name': ['John', 'Jane', 'David', 'Emily', 'Michael', 'Sarah', 'John', 'Jane', 'David', 'Emily', 'John', 'John'],\n", + "# 'last_name': ['Doe', 'Smith', 'Johnson', 'Brown', 'Davis', 'Miller', 'Doe', 'Smith', 'Johnson', 'Brown', 'Miller', 'Jones'],\n", + "# 'full_name': ['John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'Michael Davis', 'Sarah Miller', 'John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'John Miller', 'John Jones'],\n", + "# 'entity_type': ['Person'] * 12,\n", + "# 'state': ['CA', 'NY', 'TX', 'FL', 'CA', 'NY', 'CA', 'TX', 'FL', 'NY', 'CA', 'FL'],\n", + "# 'party': ['Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent'],\n", + "# 'company': ['Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Google', 'Microsoft']\n", + "# }\n", + "\n", + "# Create DataFrame\n", + "df_i = pd.read_csv('complete_individuals_table.csv')\n", + "df_i.rename(columns={'id': 'unique_id'}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "61447af5-7270-4438-baf7-29ba08203019", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0unique_idfirst_namelast_namefull_nameentity_typestatepartycompanyoccupationaddresszipcity
00efa41f9a-a31c-4154-acfa-3707d2c7cc47FREDERICKBERGFREDERICK BERG ...IndividualMINaNBUTZEL LONGATTORNEY1033 YORKSHIRE48230-0000GROSSE POINTE PARK
113e7661e4-1557-4fc8-9cd7-e4381eaed2d8JENNIFERCONSIGLIOJENNIFER CONSIGLIO ...IndividualMINaNBUTZEL LONGATTORNEY7520 SHUMAN DRIVE48438-0000GOODRICH
22a3bbe060-5da0-4b1d-a61c-0a32ec18c2edVANESSACROCETTOVANESSA CROCETTO ...IndividualMINaNBUTZEL LONGCHIEF MARKETING OFFICER4104 ARLINGTON DRIVE48073-0000ROYAL OAK
33667ae9be-5aff-4ce2-a032-5947032c1a9aCAREY A.DEWITTCAREY A. DEWITT ...IndividualMINaNBUTZEL LONGATTORNEY770 HUPP CROSS ROAD48301-0000BLOOMFIELD TWP
44f02b191a-235a-4d79-9e00-3fd63a249e66JENNIFERDUKARSKIJENNIFER DUKARSKI ...IndividualMINaNNaNNaN11855 BECK ROAD48170-0000PLYMOUTH
..........................................
2486933248693367a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486934248693467a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486935248693567a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486936248693667a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486937248693767a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
\n", + "

2486938 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 unique_id \\\n", + "0 0 efa41f9a-a31c-4154-acfa-3707d2c7cc47 \n", + "1 1 3e7661e4-1557-4fc8-9cd7-e4381eaed2d8 \n", + "2 2 a3bbe060-5da0-4b1d-a61c-0a32ec18c2ed \n", + "3 3 667ae9be-5aff-4ce2-a032-5947032c1a9a \n", + "4 4 f02b191a-235a-4d79-9e00-3fd63a249e66 \n", + "... ... ... \n", + "2486933 2486933 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", + "2486934 2486934 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", + "2486935 2486935 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", + "2486936 2486936 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", + "2486937 2486937 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", + "\n", + " first_name last_name \\\n", + "0 FREDERICK BERG \n", + "1 JENNIFER CONSIGLIO \n", + "2 VANESSA CROCETTO \n", + "3 CAREY A. DEWITT \n", + "4 JENNIFER DUKARSKI \n", + "... ... ... \n", + "2486933 GRETCHEN WHITMER \n", + "2486934 GRETCHEN WHITMER \n", + "2486935 GRETCHEN WHITMER \n", + "2486936 GRETCHEN WHITMER \n", + "2486937 GRETCHEN WHITMER \n", + "\n", + " full_name entity_type state \\\n", + "0 FREDERICK BERG ... Individual MI \n", + "1 JENNIFER CONSIGLIO ... Individual MI \n", + "2 VANESSA CROCETTO ... Individual MI \n", + "3 CAREY A. DEWITT ... Individual MI \n", + "4 JENNIFER DUKARSKI ... Individual MI \n", + "... ... ... ... \n", + "2486933 GRETCHEN WHITMER Candidate MI \n", + "2486934 GRETCHEN WHITMER Candidate MI \n", + "2486935 GRETCHEN WHITMER Candidate MI \n", + "2486936 GRETCHEN WHITMER Candidate MI \n", + "2486937 GRETCHEN WHITMER Candidate MI \n", + "\n", + " party company occupation address \\\n", + "0 NaN BUTZEL LONG ATTORNEY 1033 YORKSHIRE \n", + "1 NaN BUTZEL LONG ATTORNEY 7520 SHUMAN DRIVE \n", + "2 NaN BUTZEL LONG CHIEF MARKETING OFFICER 4104 ARLINGTON DRIVE \n", + "3 NaN BUTZEL LONG ATTORNEY 770 HUPP CROSS ROAD \n", + "4 NaN NaN NaN 11855 BECK ROAD \n", + "... ... ... ... ... \n", + "2486933 NaN NaN NaN NaN \n", + "2486934 NaN NaN NaN NaN \n", + "2486935 NaN NaN NaN NaN \n", + "2486936 NaN NaN NaN NaN \n", + "2486937 NaN NaN NaN NaN \n", + "\n", + " zip city \n", + "0 48230-0000 GROSSE POINTE PARK \n", + "1 48438-0000 GOODRICH \n", + "2 48073-0000 ROYAL OAK \n", + "3 48301-0000 BLOOMFIELD TWP \n", + "4 48170-0000 PLYMOUTH \n", + "... ... ... \n", + "2486933 NaN NaN \n", + "2486934 NaN NaN \n", + "2486935 NaN NaN \n", + "2486936 NaN NaN \n", + "2486937 NaN NaN \n", + "\n", + "[2486938 rows x 13 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_i" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "25334eac-e048-47e7-b911-571853e2a666", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RendererRegistry.enable('html')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from splink.duckdb.linker import DuckDBLinker\n", + "import altair as alt\n", + "alt.renderers.enable('html')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a5915f73-77a6-42c9-a7df-a8f0d396836c", + "metadata": {}, + "outputs": [], + "source": [ + "import splink.duckdb.comparison_template_library as ctl\n", + "import splink.duckdb.comparison_library as cl\n", + "\n", + "individual_settings = {\n", + " \"link_type\": \"dedupe_only\",\n", + " \"blocking_rules_to_generate_predictions\": [\n", + " \"l.full_name - r.full_name\",\n", + " \"l.first_name = r.first_name and l.last_name = r.last_name\"\n", + " ],\n", + " # \"comparisons\": [\n", + " # ctl.name_comparison(\"first_name\"), #built in comparison function\n", + " # ctl.name_comparison(\"last_name\"),\n", + " # ctl.name_comparison(\"full_name\"),\n", + " # ctl.forename_surname_comparison(\"first_name\", \"last_name\"), #built in comparison function\n", + " # cl.exact_match(\"entity_type\", term_frequency_adjustments=True),\n", + " # cl.jaro_winkler_at_thresholds(\"state\", [0.9, 0.8]), #threshold will catch typos and shortenings\n", + " # cl.jaro_winkler_at_thresholds(\"party\", [0.9, 0.8]),\n", + " # cl.jaro_winkler_at_thresholds(\"company\", [0.9, 0.8]),\n", + " # ],\n", + " \n", + " #DEFAULT\n", + " \"retain_matching_columns\": True,\n", + " \"retain_intermediate_calculation_columns\": True,\n", + " # \"max_iterations\": 10,\n", + " # \"em_convergence\": 0.01\n", + "}\n", + "\n", + "i_blocking = [\n", + " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", + " ]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "42ce12f9-1160-4e1e-848a-deb7882566a6", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3e162fc03ab041429b0c2b4143081ee6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "linker = DuckDBLinker(df_i, individual_settings)\n", + "linker.count_num_comparisons_from_blocking_rule(\"l.first_name = r.first_name and l.last_name = r.last_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "217a07cf-eaa3-42a2-b43b-b2eecd740a7b", + "metadata": {}, + "outputs": [], + "source": [ + "def splink_dedupe(df, settings, blocking):\n", + " linker = DuckDBLinker(df, settings)\n", + " linker.estimate_probability_two_random_records_match(blocking, recall=0.6) #default\n", + " linker.estimate_u_using_random_sampling(max_pairs=5e6)\n", + " \n", + " for i in blocking:\n", + " training_session_names = linker.estimate_parameters_using_expectation_maximisation(i)\n", + " \n", + " df_predict = linker.predict()\n", + " df_e = df_predict.as_pandas_dataframe()\n", + " clusters = linker.cluster_pairwise_predictions_at_threshold(df_predict, threshold_match_probability=0.7) #default\n", + " return clusters.as_pandas_dataframe()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "307ae7b8-b637-4451-aad3-9c848e8dff65", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b530b6b922494872b2721019d974f23e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Deterministic matching rules led to no observed matches! This means that no possible record pairs are matches, and no records are linked to one another.\n", + "If this is truly the case then you do not need to run the linkage model.\n", + "However this is usually in error; expected rules to have recall of 60%. Consider revising rules as they may have an error.\n", + "Probability two random records match is estimated to be 0.\n", + "This means that amongst all possible pairwise record comparisons, one in Infinity are expected to match. With 3,092,429,064,453 total possible comparisons, we expect a total of around 0.00 matching pairs\n", + "----- Estimating u probabilities using random sampling -----\n", + "\n", + "Estimated u probabilities using random sampling\n", + "\n", + "Your model is fully trained. All comparisons have at least one estimate for their m and u values\n", + "\n", + "----- Starting EM training session -----\n", + "\n", + "Estimating the m probabilities of the model by blocking on:\n", + "l.first_name = r.first_name and l.last_name = r.last_name\n", + "\n", + "Parameter estimates will be made for the following comparison(s):\n", + "\n", + "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "31a41cbc2cde42819b137edfbd189831", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "EMTrainingException", + "evalue": "Training rule `l.first_name = r.first_name and l.last_name = r.last_name` resulted in no record pairs. This means that in the supplied data set there were no pairs of records for which `l.first_name = r.first_name and l.last_name = r.last_name` was `true`.\nExpectation maximisation requires a substantial number of record comparisons to produce accurate parameter estimates - usually at least a few hundred, but preferably at least a few thousand.\nYou must revise your training blocking rule so that the set of generated comparisons is not empty. You can use `linker.count_num_comparisons_from_blocking_rule()` to compute the number of comparisons that will be generated by a blocking rule.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mEMTrainingException\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_4396/657554030.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msplink_dedupe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_i\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindividual_settings\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi_blocking\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_4396/2410222890.py\u001b[0m in \u001b[0;36msplink_dedupe\u001b[0;34m(df, settings, blocking)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mblocking\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mtraining_session_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestimate_parameters_using_expectation_maximisation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdf_predict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/splink/linker.py\u001b[0m in \u001b[0;36mestimate_parameters_using_expectation_maximisation\u001b[0;34m(self, blocking_rule, comparisons_to_deactivate, comparison_levels_to_reverse_blocking_rule, estimate_without_term_frequencies, fix_probability_two_random_records_match, fix_m_probabilities, fix_u_probabilities, populate_probability_two_random_records_match_from_trained_values)\u001b[0m\n\u001b[1;32m 1704\u001b[0m )\n\u001b[1;32m 1705\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1706\u001b[0;31m \u001b[0mem_training_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1708\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_populate_m_u_from_trained_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/splink/em_training_session.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcvv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_record_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mbr_sql\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"`{self._blocking_rule_for_training.blocking_rule_sql}`\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m raise EMTrainingException(\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0;34mf\"Training rule {br_sql} resulted in no record pairs. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;34m\"This means that in the supplied data set \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mEMTrainingException\u001b[0m: Training rule `l.first_name = r.first_name and l.last_name = r.last_name` resulted in no record pairs. This means that in the supplied data set there were no pairs of records for which `l.first_name = r.first_name and l.last_name = r.last_name` was `true`.\nExpectation maximisation requires a substantial number of record comparisons to produce accurate parameter estimates - usually at least a few hundred, but preferably at least a few thousand.\nYou must revise your training blocking rule so that the set of generated comparisons is not empty. You can use `linker.count_num_comparisons_from_blocking_rule()` to compute the number of comparisons that will be generated by a blocking rule." + ] + } + ], + "source": [ + "splink_dedupe(df_i, individual_settings, i_blocking)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "890ba4ed-8e55-4128-bd36-cd8413cad00e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28948465-fea2-433d-bace-d0627dfe348d", + "metadata": {}, + "outputs": [], + "source": [ + "#--------------------------------------------" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef568813-181b-42a7-b3a2-786fe87addfb", + "metadata": {}, + "outputs": [], + "source": [ + "organizations_settings = {\n", + " \"link_type\": \"dedupe_only\",\n", + " \"blocking_rules_to_generate_predictions\": [\n", + " \"l.name = r.name\",\n", + " ],\n", + " \"comparisons\": [\n", + " ctl.name_comparison(\"name\", term_frequency_adjustments=True),\n", + " cl.exact_match(\"entity_type\", term_frequency_adjustments=True),\n", + " cl.jaro_winkler_at_thresholds(\"state\", [0.9, 0.8]), #threshold will catch typos and shortenings\n", + " # Add more comparisons as needed\n", + " ],\n", + " \"retain_matching_columns\": True,\n", + " \"retain_intermediate_calculation_columns\": True,\n", + " \"max_iterations\": 10,\n", + " \"em_convergence\": 0.01\n", + "}\n", + "\n", + "o_blocking = [\n", + " \"l.name = r.name\",\n", + " \"l.name = r.name and l.state = r.state\",\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9354ec53-0aa3-40b7-968a-d6b5263182c9", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Define sample data\n", + "o_data = {\n", + " 'unique_id': range(1, 13),\n", + " 'name': ['Apple Inc.', 'Google LLC', 'Microsoft Corporation', 'Amazon.com Inc.', 'Facebook Inc.', \n", + " 'Apple Inc.', 'Google LLC', 'Microsoft Corporation', 'Amazon.com Inc.', 'Facebook Inc.', \n", + " 'Google LLC', 'Microsoft Corporation'],\n", + " 'entity_type': ['Organization'] * 12,\n", + " 'state': ['CA', 'NY', 'WA', 'WA', 'CA', 'CA', 'NY', 'WA', 'WA', 'CA', 'NY', 'WA'],\n", + "}\n", + "\n", + "# Create DataFrame\n", + "df_o = pd.DataFrame(o_data)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a1c1785-17b6-4aa2-9a10-b431c70e411d", + "metadata": {}, + "outputs": [], + "source": [ + "splink_dedupe(df_o, organizations_settings, o_blocking)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac3665d0-984c-4367-a7eb-62cd980dff16", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bffdeb09-0788-449d-9046-351d4a258537", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 89af1cbf5652b762b4e9cc0ef59a50551897d23f Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 21 Feb 2024 17:57:41 -0600 Subject: [PATCH 130/214] splink function clean-up --- requirements.txt | 3 ++- utils/constants.py | 45 +++++++++++++++++++++------------------------ utils/linkage.py | 44 ++++++++++++++++++++++++++------------------ 3 files changed, 49 insertions(+), 43 deletions(-) diff --git a/requirements.txt b/requirements.txt index d28ae9f8..6fa9433b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,5 @@ Requests==2.31.0 setuptools==68.0.0 textdistance==4.6.1 usaddress==0.5.4 -networkx~=3.1 \ No newline at end of file +networkx~=3.1 +splink==3.9.12 \ No newline at end of file diff --git a/utils/constants.py b/utils/constants.py index c7c040ac..91e1d87f 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -3,6 +3,9 @@ """ from pathlib import Path +import splink.duckdb.comparison_library as cl +import splink.duckdb.comparison_template_library as ctl + BASE_FILEPATH = Path(__file__).resolve().parent.parent # returns the base_path to the directory @@ -642,35 +645,29 @@ "PAC": "POLITICAL ACTION COMMITTEE", } -individual_settings = { +individuals_settings = { "link_type": "dedupe_only", - "blocking_rules_to_generate_predictions": [ - "l.first_name = r.first_name and l.last_name = r.last_name", - "l.full_name - r.full_name" - ], + "blocking_rules_to_generate_predictions": ["l.full_name - r.full_name"], "comparisons": [ - ctl.name_comparison("first_name"), #built in comparison function + ctl.name_comparison("first_name"), # built in comparison function ctl.name_comparison("last_name"), ctl.name_comparison("full_name"), - ctl.forename_surname_comparison("first_name", "last_name"), #built in comparison function cl.exact_match("entity_type", term_frequency_adjustments=True), - cl.jaro_winkler_at_thresholds("state", [0.9, 0.8]), #threshold will catch typos and shortenings + cl.jaro_winkler_at_thresholds( + "state", [0.9, 0.8] + ), # threshold will catch typos and shortenings cl.jaro_winkler_at_thresholds("party", [0.9, 0.8]), cl.jaro_winkler_at_thresholds("company", [0.9, 0.8]), ], - - #DEFAULT + # DEFAULT "retain_matching_columns": True, "retain_intermediate_calculation_columns": True, - "max_iterations": 10, - "em_convergence": 0.01 } -i_blocking = [ - "l.first_name = r.first_name and l.last_name = r.last_name", - "l.full_name = r.full_name and l.state = r.state", - "l.full_name = r.full_name and l.company = r.company", - ] +individuals_blocking = [ + "l.first_name = r.first_name and l.last_name = r.last_name", + "l.full_name = r.full_name", +] organizations_settings = { "link_type": "dedupe_only", @@ -680,16 +677,16 @@ "comparisons": [ ctl.name_comparison("name", term_frequency_adjustments=True), cl.exact_match("entity_type", term_frequency_adjustments=True), - cl.jaro_winkler_at_thresholds("state", [0.9, 0.8]), #threshold will catch typos and shortenings + cl.jaro_winkler_at_thresholds( + "state", [0.9, 0.8] + ), # threshold will catch typos and shortenings # Add more comparisons as needed ], "retain_matching_columns": True, "retain_intermediate_calculation_columns": True, - "max_iterations": 10, - "em_convergence": 0.01 } -o_blocking = [ - "l.name = r.name", - "l.name = r.name and l.state = r.state", - ] \ No newline at end of file +organizations_blocking = [ + "l.name = r.name", + "l.name = r.name and l.state = r.state", +] diff --git a/utils/linkage.py b/utils/linkage.py index 730c92fe..cd76a77f 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -7,17 +7,14 @@ import pandas as pd import textdistance as td import usaddress - from splink.duckdb.linker import DuckDBLinker -import splink.duckdb.comparison_template_library as ctl -import splink.duckdb.comparison_library as cl + +from utils.constants import COMPANY_TYPES, repo_root """ Module for performing record linkage on state campaign finance dataset """ -from utils.constants import COMPANY_TYPES, repo_root - def get_address_line_1_from_full_address(address: str) -> str: """Given a full address, return the first line of the formatted address @@ -470,23 +467,34 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: def splink_dedupe(df, settings, blocking): + """Given the individuals or organizations dataframes, the corresponding + configuration settings, and corresponding blocking rules return a + deduplicated dataframe + + Configuration settings and blocking can be found in constants.py as + individuals_settings, indivduals_blocking, organizations_settings, + organizations_blocking - """Given a dataframe, configuration settings, and blocking rules return a deduplicated dataframe Args: - df: dataframe with potential duplicates - settings: model settings based on splink documentation - blocking: list of columns to block on + df: individuals or organizations table + settings: individuald or configuration settings + (based on splink documentation) + blocking: list of columns to block on for the table + (cuts dataframe into parts based on above blocks ) Returns: - dataframe with matched ids of matching rows""" - + dataframe with matched ids of matching rows + """ linker = DuckDBLinker(df, settings) - linker.estimate_probability_two_random_records_match(blocking, recall=0.6) #default + linker.estimate_probability_two_random_records_match( + blocking, recall=0.6 + ) # default linker.estimate_u_using_random_sampling(max_pairs=5e6) - + for i in blocking: - training_session_names = linker.estimate_parameters_using_expectation_maximisation(i) - + linker.estimate_parameters_using_expectation_maximisation(i) + df_predict = linker.predict() - df_e = df_predict.as_pandas_dataframe() - clusters = linker.cluster_pairwise_predictions_at_threshold(df_predict, threshold_match_probability=0.7) #default - return clusters.as_pandas_dataframe() \ No newline at end of file + clusters = linker.cluster_pairwise_predictions_at_threshold( + df_predict, threshold_match_probability=0.7 + ) # default + return clusters.as_pandas_dataframe() From a5eb7a1e4d4eb9bb814fb7b954f0a2d220b39586 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 21 Feb 2024 18:05:43 -0600 Subject: [PATCH 131/214] splink function clean-up --- utils/linkage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index cd76a77f..90956b7a 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -466,7 +466,9 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: raise ValueError("Can not find Address Number") -def splink_dedupe(df, settings, blocking): +def splink_dedupe( + df: pd.DataFrame, settings: dict, blocking: list +) -> pd.DataFrame: """Given the individuals or organizations dataframes, the corresponding configuration settings, and corresponding blocking rules return a deduplicated dataframe From ae1db64ee715a58f34160672b1f90da27f69812e Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 21 Feb 2024 18:09:50 -0600 Subject: [PATCH 132/214] splink function clean-up2 --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 76fc565a..a8a40fd1 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -6,8 +6,8 @@ import pandas as pd import textdistance as td import usaddress -from splink.duckdb.linker import DuckDBLinker from names_dataset import NameDataset +from splink.duckdb.linker import DuckDBLinker from utils.constants import COMPANY_TYPES, repo_root From 21af2c951ae837a94a7603af71bdb267349b0f4d Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 22 Feb 2024 07:38:04 +0000 Subject: [PATCH 133/214] updates --- utils/linkage_pipeline.py | 87 ++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 16 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index f3755eec..0f7be5e5 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -3,10 +3,13 @@ import pandas as pd from nameparser import HumanName +from utils.constants import BASE_FILEPATH from utils.linkage import ( cleaning_company_column, + deduplicate_perfect_matches, get_address_line_1_from_full_address, get_address_number_from_address_line_1, + get_likely_name, get_street_from_address_line_1, standardize_corp_names, ) @@ -32,50 +35,102 @@ def preprocess_pipeline( """ # Preprocess organizations dataframe organizations["name"] = ( - organizations["name"].astype(str, skipna=True).apply(standardize_corp_names) + organizations["name"].astype(str).apply(standardize_corp_names) ) + if "Unnamed: 0" in organizations.columns: + organizations.drop(columns="Unnamed: 0", inplace=True) # Preprocess individuals dataframe if "Unnamed: 0" in individuals.columns: individuals.drop(columns="Unnamed: 0", inplace=True) individuals = individuals.astype( - {"first_name": str, "last_name": str, "full_name": str, "company": str} + { + "first_name": str, + "last_name": str, + "full_name": str, + "company": "string", + } ) # Standardize company names in individuals dataframe - individuals["company"] = individuals["company"].apply(standardize_corp_names) - individuals["company"] = individuals["company"].apply(cleaning_company_column) - - # Address functions, assuming address column is named 'Address' - individuals["Address Line 1"] = individuals["Address"].apply( - get_address_line_1_from_full_address - ) - individuals["Street Name"] = individuals["Address Line 1"].apply( - get_street_from_address_line_1 + individuals["company"] = ( + individuals["company"] + .loc[individuals["company"].notnull()] + .apply(standardize_corp_names) ) - individuals["Address Number"] = individuals["Address Line 1"].apply( - get_address_number_from_address_line_1 + individuals["company"] = ( + individuals["company"] + .loc[individuals["company"].notnull()] + .apply(cleaning_company_column) ) + # Address functions, assuming address column is named 'Address' + # If there is an "Address" column in the first place + if "Address" in individuals.columns: + individuals["Address"] = individuals["Address"].astype(str) + individuals["Address Line 1"] = individuals["Address"].apply( + get_address_line_1_from_full_address + ) + individuals["Street Name"] = individuals["Address Line 1"].apply( + get_street_from_address_line_1 + ) + individuals["Address Number"] = individuals["Address Line 1"].apply( + get_address_number_from_address_line_1 + ) + # Check if first name or last names are empty, if so, extract from full name column individuals["full_name"] = individuals["full_name"].astype(str)[ individuals["full_name"].notnull() ] if individuals["first_name"].isnull().any(): - name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) first_name = name.apply(lambda x: x["first"]) individuals["first_name"] = first_name if individuals["last_name"].isnull().any(): - name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) last_name = name.apply(lambda x: x["last"]) individuals["last_name"] = last_name - # Transactions + individuals["full_name"] = individuals.apply( + lambda row: get_likely_name( + row["first_name"], row["last_name"], row["full_name"] + ), + axis=1, + ) + if "Unnamed: 0" in transactions.columns: transactions.drop(columns="Unnamed: 0", inplace=True) transactions["purpose"] = transactions["purpose"].str.upper() return individuals, organizations, transactions + + +organizations = pd.read_csv( + BASE_FILEPATH / "output" / "complete_organizations_table.csv" +) + +individuals = pd.read_csv( + BASE_FILEPATH / "output" / "complete_individuals_table.csv" +) + +transactions = pd.read_csv( + BASE_FILEPATH / "output" / "complete_transactions_table.csv" +) + +individuals, organizations, transactions = preprocess_pipeline( + individuals, organizations, transactions +) + +individuals = deduplicate_perfect_matches(individuals) +organizations = deduplicate_perfect_matches(organizations) From 4d7bdfb9cfe95b7c0c8e98314b2ca2977fb8c266 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 22 Feb 2024 07:46:05 +0000 Subject: [PATCH 134/214] adding output csv --- utils/linkage_pipeline.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 0f7be5e5..b5e4d451 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -134,3 +134,19 @@ def preprocess_pipeline( individuals = deduplicate_perfect_matches(individuals) organizations = deduplicate_perfect_matches(organizations) + +processed_individuals_output_path = ( + BASE_FILEPATH / "output" / "processed_individuals_table.csv" +) + +processed_organizations_output_path = ( + BASE_FILEPATH / "output" / "processed_organizations_table.csv" +) + +processed_transactions_output_path = ( + BASE_FILEPATH / "output" / "processed_transactions_table.csv" +) + +individuals.to_csv(processed_individuals_output_path) +organizations.to_csv(processed_organizations_output_path) +transactions.to_csv(processed_transactions_output_path) From fa8c0da373b88cdde630347ff630d80654da1c68 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 22 Feb 2024 13:16:09 -0600 Subject: [PATCH 135/214] Saving work on networkx branch --- Makefile | 5 + notebooks/Test.ipynb | 1469 ++++++++++++++++++++++++++++++++++-------- utils/linkage.py | 22 +- 3 files changed, 1214 insertions(+), 282 deletions(-) diff --git a/Makefile b/Makefile index e210fb2c..f0d93dd0 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,11 @@ project_dir := "$(current_abs_path)" build-only: docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) + # these are called directives + # run-pipeline: + # docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) + # docker run -e python pipeline.py + run-interactive: docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) docker run -it -v $(current_abs_path):/project -t $(project_image_name) /bin/bash diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index 457fb6f0..b17aeb76 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -10,7 +10,9 @@ "import numpy as np\n", "import re\n", "import networkx as nx\n", - "import matplotlib.pyplot as plt" + "import matplotlib.pyplot as plt\n", + "\n", + "from utils.linkage import deduplicate_perfect_matches" ] }, { @@ -19,8 +21,365 @@ "metadata": {}, "outputs": [], "source": [ - "orgs_sample = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0).sample(10)\n", - "inds_sample = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False).sample(10)" + "orgs_sample = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0)#,nrows=10000).sample(10)\n", + "inds_sample = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False)#, nrows=10000).sample(10)\n", + "transactions = pd.read_csv(\"../output/complete_transactions_table.csv\",index_col=0, low_memory=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamestateentity_type
01022#1022 arizona committee of automotive retailersAZpac
4100112314 action victory fund (fec id c00689828)DCpac
\n", + "
" + ], + "text/plain": [ + " id name state entity_type\n", + "0 1022 #1022 arizona committee of automotive retailers AZ pac\n", + "4 100112 314 action victory fund (fec id c00689828) DC pac" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orgs_sample.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
transaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_office
046406501005922021.025.01869727nonewr 9.13contribution from individualsNaNNaNNaN
181852572018003012020.0100.01779679noneabcontribution from individualsNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " transaction_id donor_id year amount recipient_id office_sought \\\n", + "0 4640650 100592 2021.0 25.0 1869727 none \n", + "1 8185257 201800301 2020.0 100.0 1779679 none \n", + "\n", + " purpose transaction_type donor_type recipient_type \\\n", + "0 wr 9.13 contribution from individuals NaN NaN \n", + "1 ab contribution from individuals NaN NaN \n", + "\n", + " donor_office \n", + "0 NaN \n", + "1 NaN " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transactions.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamestateentity_type
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [id, name, state, entity_type]\n", + "Index: []" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orgs_sample.loc[orgs_sample['id']=='201800301']" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idfirst_namelast_namefull_nameentity_typestatepartycompany
01869727NaNNaNwilliam \bstonerindividualNaNNaNNaN
11779679NaNNaNrm coulonindividualNaNNaNarea agency on aging
\n", + "
" + ], + "text/plain": [ + " id first_name last_name full_name entity_type state party \\\n", + "0 1869727 NaN NaN william \bstoner individual NaN NaN \n", + "1 1779679 NaN NaN rm coulon individual NaN NaN \n", + "\n", + " company \n", + "0 NaN \n", + "1 area agency on aging " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inds_sample.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(542368, 102, 248318, 3)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inds_ids = set(inds_sample.id.tolist())\n", + "orgs_ids = set(orgs_sample.id.tolist())\n", + "trans_donorids = set(transactions.donor_id.tolist())\n", + "trans_recepids = set(transactions.recipient_id.tolist())\n", + "ind_id_there, org_id_there = [], []\n", + "for ind_id in inds_ids:\n", + " if ind_id in trans_donorids:\n", + " ind_id_there.append(ind_id)\n", + " elif ind_id in trans_recepids:\n", + " ind_id_there.append(ind_id)\n", + "\n", + "for org_id in orgs_ids:\n", + " if org_id in trans_donorids:\n", + " org_id_there.append(org_id)\n", + " elif org_id in trans_recepids:\n", + " org_id_there.append(org_id)\n", + "\n", + "len(inds_ids), len(ind_id_there), len(orgs_ids), len(org_id_there)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['100894', '100883']" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = []\n", + "for ind_id in inds_ids:\n", + " if ((ind_id in trans_donorids) and (ind_id in trans_recepids)):\n", + " a.append(ind_id)\n", + "a" ] }, { @@ -66,10 +425,10 @@ " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", " MI\n", " committee\n", - " 503\n", - " Pabar Pac (Pa Bar Assn)\n", - " 5210\n", - " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", + " 4249\n", + " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", + " 730\n", + " COMMITTEE TO ELECT DR PATRICIA BERNARD\n", " \n", " \n", " 1\n", @@ -77,10 +436,10 @@ " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", " MI\n", " committee\n", - " 2969\n", - " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", - " 5768\n", + " 426\n", " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", + " 853\n", + " Pabar Pac (Pa Bar Assn)\n", " \n", " \n", " 2\n", @@ -88,10 +447,10 @@ " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", " MI\n", " committee\n", - " 4592\n", - " COMMITTEE TO ELECT DR PATRICIA BERNARD\n", - " 4274\n", - " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", + " 382\n", + " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", + " 620\n", + " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", " \n", " \n", " 3\n", @@ -99,10 +458,10 @@ " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", " MI\n", " committee\n", - " 2459\n", - " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", - " 2602\n", - " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", + " 2328\n", + " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", + " 4505\n", + " Paa Pac\n", " \n", " \n", " 4\n", @@ -110,10 +469,10 @@ " UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...\n", " MI\n", " committee\n", - " 4748\n", - " MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC\n", - " 4153\n", - " REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...\n", + " 3421\n", + " Paa Pac\n", + " 672\n", + " Paa Pac\n", " \n", " \n", "\n", @@ -135,18 +494,18 @@ "4 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", "\n", " donations donations_to received \\\n", - "0 503 Pabar Pac (Pa Bar Assn) 5210 \n", - "1 2969 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 5768 \n", - "2 4592 COMMITTEE TO ELECT DR PATRICIA BERNARD 4274 \n", - "3 2459 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 2602 \n", - "4 4748 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC 4153 \n", - "\n", - " donations_from \n", - "0 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", - "1 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", - "2 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", - "3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", - "4 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... " + "0 4249 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 730 \n", + "1 426 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC 853 \n", + "2 382 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 620 \n", + "3 2328 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC 4505 \n", + "4 3421 Paa Pac 672 \n", + "\n", + " donations_from \n", + "0 COMMITTEE TO ELECT DR PATRICIA BERNARD \n", + "1 Pabar Pac (Pa Bar Assn) \n", + "2 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "3 Paa Pac \n", + "4 Paa Pac " ] }, "execution_count": 3, @@ -229,9 +588,19 @@ "* https://www.activestate.com/blog/graph-theory-using-python-introduction-and-implementation/ \n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Things to think about\n", + "* Apply the deduplicated_uuids.csv info to the transactions table\n", + "* After doing a left join on the inds/orgs dataset with the transactions data, the recipient_id column needs to have a recipient_name column so that a new node can be created\n", + "* for ppl who have multiple donations {and so have various attributes like office_sought, purpose, transaction_type}, should this information be saved?" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 97, "metadata": {}, "outputs": [ { @@ -256,172 +625,336 @@ " \n", " \n", " id\n", - " name\n", - " state\n", + " first_name\n", + " last_name\n", + " full_name\n", " entity_type\n", + " state\n", + " party\n", + " company\n", " \n", " \n", " \n", " \n", - " 1351658\n", - " 1ec10e00-c7a7-4bcc-861f-cd1ff43bfc04\n", - " Friends Of Freedom & Convenience\n", + " 0\n", + " 1869727\n", + " NaN\n", + " NaN\n", + " william \bstoner\n", + " individual\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 1\n", + " 1779679\n", + " NaN\n", + " NaN\n", + " rm coulon\n", + " individual\n", + " NaN\n", + " NaN\n", + " area agency on aging\n", + " \n", + " \n", + " 2\n", + " 2277221\n", + " NaN\n", + " NaN\n", + " james engelson\n", + " individual\n", + " NaN\n", + " NaN\n", + " retired\n", + " \n", + " \n", + " 3\n", + " 2277156\n", + " NaN\n", + " NaN\n", + " marivic franciaskinner\n", + " individual\n", + " NaN\n", + " NaN\n", + " fibre source international corp\n", + " \n", + " \n", + " 4\n", + " 2341373\n", + " NaN\n", + " NaN\n", + " anthony grindle\n", + " individual\n", + " NaN\n", + " NaN\n", + " zimmerbiomet\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 861260\n", + " 6acfa74b-d5e1-4afd-b020-dbe429eb1c3f\n", + " NaN\n", + " NaN\n", + " Melissa Hart\n", + " Candidate\n", " PA\n", - " Committee\n", + " REP\n", + " NaN\n", " \n", " \n", - " 1158960\n", - " 6359974e-9e78-409c-b9dd-fe7415304560\n", - " GRETCHEN WHITMER FOR GOVERNOR\n", - " MI\n", - " committee\n", + " 861271\n", + " f111045d-bc3d-4050-9ad7-b3b1e6d72e56\n", + " NaN\n", + " NaN\n", + " Heather Miller\n", + " Candidate\n", + " PA\n", + " DEM\n", + " NaN\n", " \n", " \n", - " 474220\n", - " 9e43c101-03ef-4083-ab60-b7fd76dea7b5\n", - " TUDOR DIXON FOR GOVERNOR INC\n", - " MI\n", - " committee\n", + " 861277\n", + " d40859d7-b523-4ef5-895b-c3a947ab582f\n", + " NaN\n", + " NaN\n", + " Christopher M. Gebhard\n", + " Candidate\n", + " PA\n", + " REP\n", + " NaN\n", " \n", " \n", - " 25789\n", - " 5fb7cb16-912f-4fec-ba37-f201465a5725\n", - " LNAACK BEVERLEY\n", - " MI\n", - " corporation\n", + " 861775\n", + " f5d76d43-86f4-40f9-aeb9-3df97ca8cdf0\n", + " NaN\n", + " NaN\n", + " April Weaver\n", + " Candidate\n", + " PA\n", + " REP\n", + " NaN\n", " \n", " \n", - " 495642\n", - " 6359974e-9e78-409c-b9dd-fe7415304560\n", - " GRETCHEN WHITMER FOR GOVERNOR\n", - " MI\n", - " committee\n", + " 861920\n", + " 1a0cf90d-3252-4c8d-b109-dea084a01f69\n", + " NaN\n", + " NaN\n", + " Krista Paolucci\n", + " Candidate\n", + " PA\n", + " REP\n", + " NaN\n", " \n", " \n", "\n", + "

2505346 rows × 8 columns

\n", "" ], "text/plain": [ - " id \\\n", - "1351658 1ec10e00-c7a7-4bcc-861f-cd1ff43bfc04 \n", - "1158960 6359974e-9e78-409c-b9dd-fe7415304560 \n", - "474220 9e43c101-03ef-4083-ab60-b7fd76dea7b5 \n", - "25789 5fb7cb16-912f-4fec-ba37-f201465a5725 \n", - "495642 6359974e-9e78-409c-b9dd-fe7415304560 \n", - "\n", - " name state entity_type \n", - "1351658 Friends Of Freedom & Convenience PA Committee \n", - "1158960 GRETCHEN WHITMER FOR GOVERNOR MI committee \n", - "474220 TUDOR DIXON FOR GOVERNOR INC MI committee \n", - "25789 LNAACK BEVERLEY MI corporation \n", - "495642 GRETCHEN WHITMER FOR GOVERNOR MI committee " + " id first_name last_name \\\n", + "0 1869727 NaN NaN \n", + "1 1779679 NaN NaN \n", + "2 2277221 NaN NaN \n", + "3 2277156 NaN NaN \n", + "4 2341373 NaN NaN \n", + "... ... ... ... \n", + "861260 6acfa74b-d5e1-4afd-b020-dbe429eb1c3f NaN NaN \n", + "861271 f111045d-bc3d-4050-9ad7-b3b1e6d72e56 NaN NaN \n", + "861277 d40859d7-b523-4ef5-895b-c3a947ab582f NaN NaN \n", + "861775 f5d76d43-86f4-40f9-aeb9-3df97ca8cdf0 NaN NaN \n", + "861920 1a0cf90d-3252-4c8d-b109-dea084a01f69 NaN NaN \n", + "\n", + " full_name entity_type state party \\\n", + "0 william \bstoner individual NaN NaN \n", + "1 rm coulon individual NaN NaN \n", + "2 james engelson individual NaN NaN \n", + "3 marivic franciaskinner individual NaN NaN \n", + "4 anthony grindle individual NaN NaN \n", + "... ... ... ... ... \n", + "861260 Melissa Hart Candidate PA REP \n", + "861271 Heather Miller Candidate PA DEM \n", + "861277 Christopher M. Gebhard Candidate PA REP \n", + "861775 April Weaver Candidate PA REP \n", + "861920 Krista Paolucci Candidate PA REP \n", + "\n", + " company \n", + "0 NaN \n", + "1 area agency on aging \n", + "2 retired \n", + "3 fibre source international corp \n", + "4 zimmerbiomet \n", + "... ... \n", + "861260 NaN \n", + "861271 NaN \n", + "861277 NaN \n", + "861775 NaN \n", + "861920 NaN \n", + "\n", + "[2505346 rows x 8 columns]" ] }, - "execution_count": 5, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "orgs_sample.head(5)" + "sample_inds" ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id 1ec10e00-c7a7-4bcc-861f-cd1ff43bfc04\n", - "name Friends Of Freedom & Convenience\n", - "state PA\n", - "entity_type Committee\n", - "Name: 1351658, dtype: object\n", - "id 6359974e-9e78-409c-b9dd-fe7415304560\n", - "name GRETCHEN WHITMER FOR GOVERNOR\n", - "state MI\n", - "entity_type committee\n", - "Name: 1158960, dtype: object\n", - "id 9e43c101-03ef-4083-ab60-b7fd76dea7b5\n", - "name TUDOR DIXON FOR GOVERNOR INC\n", - "state MI\n", - "entity_type committee\n", - "Name: 474220, dtype: object\n", - "id 5fb7cb16-912f-4fec-ba37-f201465a5725\n", - "name LNAACK BEVERLEY \n", - "state MI\n", - "entity_type corporation\n", - "Name: 25789, dtype: object\n", - "id 6359974e-9e78-409c-b9dd-fe7415304560\n", - "name GRETCHEN WHITMER FOR GOVERNOR\n", - "state MI\n", - "entity_type committee\n", - "Name: 495642, dtype: object\n", - "id f1df070b-a91b-4aab-b943-4f80e5c41026\n", - "name MICHIGAN LABORERS POLITICAL LEAGUE\n", - "state MI\n", - "entity_type committee\n", - "Name: 1939825, dtype: object\n", - "id 57fbfb3e-835c-4096-9dc9-1555816aff0d\n", - "name PLUMBERS AND PIPEFITTERS LOCAL 333 PAC\n", - "state MI\n", - "entity_type committee\n", - "Name: 1643401, dtype: object\n", - "id 357e354f-d81b-4eb5-af6e-574afd175672\n", - "name MICHIGAN FARM BUREAU POLITICAL ACTION COMMITTEE\n", - "state MI\n", - "entity_type committee\n", - "Name: 2088505, dtype: object\n", - "id 1a5d85e2-0382-4064-9606-8ee0a2be5ea1\n", - "name ANEDOT INC \n", - "state MI\n", - "entity_type corporation\n", - "Name: 157224, dtype: object\n", - "id 6d8e2e79-72c1-487e-835f-ededfe0aafaa\n", - "name DEMOCRATIC LEGISLATIVE CAMPAIGN COMMITTEE\n", - "state MI\n", - "entity_type committee\n", - "Name: 854930, dtype: object\n" - ] - } - ], - "source": [ - "for index, row in orgs_sample.iterrows():\n", - " print(row)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, + "execution_count": 63, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcompanyentity_typefirst_namefull_namelast_namepartystatetransaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_office
025625730individualNaNvarious 0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
11617483aggregate cashindividualNaNcash _small donationsNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], "text/plain": [ - "{'color': 'blue', 'size': 2}" + " id company entity_type first_name full_name \\\n", + "0 2562573 0 individual NaN various 0 \n", + "1 1617483 aggregate cash individual NaN cash _small donations \n", + "\n", + " last_name party state transaction_id donor_id year amount recipient_id \\\n", + "0 NaN NaN NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " office_sought purpose transaction_type donor_type recipient_type \\\n", + "0 NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN \n", + "\n", + " donor_office \n", + "0 NaN \n", + "1 NaN " ] }, - "execution_count": 38, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "G = nx.Graph()\n", - "G.add_node(0)\n", - "nx.set_node_attributes(G, \"red\", name=\"color\")\n", - "nx.set_node_attributes(G, 2, name=\"size\")\n", - "G.add_node(1)\n", - "nx.set_node_attributes(G, \"blue\", name='color')\n", - "G.nodes[0]\n" + "sample_inds = inds_sample.loc[inds_sample['id'].isin(ind_id_there)]\n", + "# apply dedup\n", + "sample_inds = deduplicate_perfect_matches(sample_inds)\n", + "\n", + "# map the uuids in transaction donor and recipient columns to the deduplicated uuids\n", + "deduped = pd.read_csv(\"../output/deduplicated_UUIDs.csv\")\n", + "transactions[['donor_id','recipient_id']] = transactions[['donor_id','recipient_id']].replace(deduped)\n", + "\n", + "# add recipient name to transactions df: \n", + "# this step took more than 16 minutes to run...think of alternative way\n", + "# id_to_name = {id: name for id, name in zip(inds_sample.id.tolist(), inds_sample.full_name.tolist())} #the same would be applied to orgs\n", + "transactions['recipient_name'] = transactions['recipient_id'].apply(lambda x: sample_inds.loc[sample_inds.id == x] )\n", + "\n", + "# left merge according to ind_id and transaction donor_id. This was entities that only received money will still be there, no info from ind_dataset\n", + "# is lost\n", + "merged_inds_sample = pd.merge(sample_inds,transactions,how='left',left_on='id',right_on='donor_id')\n", + "merged_inds_sample.head(2)" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 93, "metadata": {}, "outputs": [ { @@ -446,52 +979,76 @@ " \n", " \n", " id\n", - " name\n", - " state\n", + " company\n", " entity_type\n", + " first_name\n", + " full_name\n", + " last_name\n", + " party\n", + " state\n", " \n", " \n", " \n", " \n", - " 297930\n", - " e44b8553-0dff-4a6b-8335-d97849641ff8\n", - " FRIENDS OF DANA NESSEL\n", - " MI\n", - " committee\n", - " \n", - " \n", - " 945536\n", - " 4f5b8fc4-c871-4774-a436-1622b8e26a44\n", - " MALLORY MCMORROW FOR MICHIGAN\n", - " MI\n", - " committee\n", + " 27\n", + " 100894\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abdussamad, shams\n", + " NaN\n", + " democratic\n", + " AZ\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id name \\\n", - "297930 e44b8553-0dff-4a6b-8335-d97849641ff8 FRIENDS OF DANA NESSEL \n", - "945536 4f5b8fc4-c871-4774-a436-1622b8e26a44 MALLORY MCMORROW FOR MICHIGAN \n", + " id company entity_type first_name full_name \\\n", + "27 100894 none (is a candidate) candidate NaN abdussamad, shams \n", "\n", - " state entity_type \n", - "297930 MI committee \n", - "945536 MI committee " + " last_name party state \n", + "27 NaN democratic AZ " ] }, - "execution_count": 42, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ - ".head(2)" + "sample_inds.loc[sample_inds.full_name == 'abdussamad, shams']" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "def add_notes_from_df(df):\n", + " G = nx.MultiDiGraph()\n", + " #inds or org...\n", + " if 'name' in df.columns:\n", + " node_name = 'name'\n", + " else: node_name = 'full_name'\n", + "\n", + " for _, row in df.iterrows():\n", + " G.add_node(row[node_name])\n", + " for column in df.columns:\n", + " # only add info that's present\n", + " if (row[column] != 'nan'):\n", + " nx.set_node_attributes(G, row[column], name=column)\n", + " #nx.set\n", + " nx.draw_random(G, with_labels=True)\n", + " plt.show()\n", + " return G" + ] + }, + { + "cell_type": "code", + "execution_count": 77, "metadata": {}, "outputs": [ { @@ -516,99 +1073,360 @@ " \n", " \n", " id\n", + " company\n", + " entity_type\n", " first_name\n", - " last_name\n", " full_name\n", - " entity_type\n", - " state\n", + " last_name\n", " party\n", - " company\n", + " state\n", + " transaction_id\n", + " donor_id\n", + " year\n", + " amount\n", + " recipient_id\n", + " office_sought\n", + " purpose\n", + " transaction_type\n", + " donor_type\n", + " recipient_type\n", + " donor_office\n", " \n", " \n", " \n", " \n", - " 891077\n", - " c94a0491-7ea1-45ce-a155-6153ea74da08\n", - " BELA\n", - " LAHNER\n", - " BELA LAHNER ...\n", - " Individual\n", - " MI\n", + " 27\n", + " 100894\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abdussamad, shams\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5088079\n", + " 100894\n", + " 2022.0\n", + " 5.00\n", + " 750413\n", + " state representative - district 11\n", + " e-qual online qc\n", + " ccec $5 qualifying contribution\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 28\n", + " 100894\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abdussamad, shams\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5088080\n", + " 100894\n", + " 2022.0\n", + " 5.00\n", + " 2002235\n", + " state representative - district 11\n", + " NaN\n", + " ccec $5 qualifying contribution\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 29\n", + " 100894\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abdussamad, shams\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5088081\n", + " 100894\n", + " 2022.0\n", + " 100.00\n", + " 1942680\n", + " state representative - district 11\n", + " NaN\n", + " receive loan from candidate or family member\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 30\n", + " 100894\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abdussamad, shams\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5088083\n", + " 100894\n", + " 2022.0\n", + " 5.00\n", + " -1\n", + " state representative - district 11\n", + " NaN\n", + " in-state contributions $100 or less\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 31\n", + " 100894\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abdussamad, shams\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5088084\n", + " 100894\n", + " 2022.0\n", + " 20.00\n", + " -1\n", + " state representative - district 11\n", + " NaN\n", + " in-state contributions $100 or less\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 597\n", + " 100883\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abeytia, anna lynn\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5084100\n", + " 100883\n", + " 2022.0\n", + " 10.00\n", + " 2017053\n", + " state representative - district 11\n", + " NaN\n", + " contribution from individuals\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 598\n", + " 100883\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abeytia, anna lynn\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5084102\n", + " 100883\n", + " 2022.0\n", + " 180.00\n", + " 2017970\n", + " state representative - district 11\n", + " video production\n", + " in-kind cont. from individual\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 599\n", + " 100883\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abeytia, anna lynn\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5084103\n", + " 100883\n", + " 2022.0\n", + " 51.99\n", + " 2008747\n", + " state representative - district 11\n", + " NaN\n", + " contribution from individuals\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 600\n", + " 100883\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abeytia, anna lynn\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5084105\n", + " 100883\n", + " 2022.0\n", + " 10.80\n", + " 1193076\n", + " state representative - district 11\n", + " NaN\n", + " contribution from individuals\n", + " NaN\n", + " NaN\n", " NaN\n", - " NOT EMPLOYED\n", " \n", " \n", - " 617571\n", - " c38816dd-8a47-4102-97cd-59d0f6bc42dc\n", - " JANICE\n", - " SHAPIRO\n", - " JANICE SHAPIRO ...\n", - " Individual\n", - " TX\n", + " 601\n", + " 100883\n", + " none (is a candidate)\n", + " candidate\n", + " NaN\n", + " abeytia, anna lynn\n", + " NaN\n", + " democratic\n", + " AZ\n", + " 5084107\n", + " 100883\n", + " 2022.0\n", + " 51.99\n", + " 1691025\n", + " state representative - district 11\n", + " NaN\n", + " contribution from individuals\n", + " NaN\n", " NaN\n", " NaN\n", " \n", " \n", "\n", + "

575 rows × 19 columns

\n", "" ], "text/plain": [ - " id first_name \\\n", - "891077 c94a0491-7ea1-45ce-a155-6153ea74da08 BELA \n", - "617571 c38816dd-8a47-4102-97cd-59d0f6bc42dc JANICE \n", + " id company entity_type first_name \\\n", + "27 100894 none (is a candidate) candidate NaN \n", + "28 100894 none (is a candidate) candidate NaN \n", + "29 100894 none (is a candidate) candidate NaN \n", + "30 100894 none (is a candidate) candidate NaN \n", + "31 100894 none (is a candidate) candidate NaN \n", + ".. ... ... ... ... \n", + "597 100883 none (is a candidate) candidate NaN \n", + "598 100883 none (is a candidate) candidate NaN \n", + "599 100883 none (is a candidate) candidate NaN \n", + "600 100883 none (is a candidate) candidate NaN \n", + "601 100883 none (is a candidate) candidate NaN \n", + "\n", + " full_name last_name party state transaction_id donor_id \\\n", + "27 abdussamad, shams NaN democratic AZ 5088079 100894 \n", + "28 abdussamad, shams NaN democratic AZ 5088080 100894 \n", + "29 abdussamad, shams NaN democratic AZ 5088081 100894 \n", + "30 abdussamad, shams NaN democratic AZ 5088083 100894 \n", + "31 abdussamad, shams NaN democratic AZ 5088084 100894 \n", + ".. ... ... ... ... ... ... \n", + "597 abeytia, anna lynn NaN democratic AZ 5084100 100883 \n", + "598 abeytia, anna lynn NaN democratic AZ 5084102 100883 \n", + "599 abeytia, anna lynn NaN democratic AZ 5084103 100883 \n", + "600 abeytia, anna lynn NaN democratic AZ 5084105 100883 \n", + "601 abeytia, anna lynn NaN democratic AZ 5084107 100883 \n", "\n", - " last_name \\\n", - "891077 LAHNER \n", - "617571 SHAPIRO \n", + " year amount recipient_id office_sought \\\n", + "27 2022.0 5.00 750413 state representative - district 11 \n", + "28 2022.0 5.00 2002235 state representative - district 11 \n", + "29 2022.0 100.00 1942680 state representative - district 11 \n", + "30 2022.0 5.00 -1 state representative - district 11 \n", + "31 2022.0 20.00 -1 state representative - district 11 \n", + ".. ... ... ... ... \n", + "597 2022.0 10.00 2017053 state representative - district 11 \n", + "598 2022.0 180.00 2017970 state representative - district 11 \n", + "599 2022.0 51.99 2008747 state representative - district 11 \n", + "600 2022.0 10.80 1193076 state representative - district 11 \n", + "601 2022.0 51.99 1691025 state representative - district 11 \n", "\n", - " full_name entity_type state \\\n", - "891077 BELA LAHNER ... Individual MI \n", - "617571 JANICE SHAPIRO ... Individual TX \n", + " purpose transaction_type \\\n", + "27 e-qual online qc ccec $5 qualifying contribution \n", + "28 NaN ccec $5 qualifying contribution \n", + "29 NaN receive loan from candidate or family member \n", + "30 NaN in-state contributions $100 or less \n", + "31 NaN in-state contributions $100 or less \n", + ".. ... ... \n", + "597 NaN contribution from individuals \n", + "598 video production in-kind cont. from individual \n", + "599 NaN contribution from individuals \n", + "600 NaN contribution from individuals \n", + "601 NaN contribution from individuals \n", "\n", - " party company \n", - "891077 NaN NOT EMPLOYED \n", - "617571 NaN NaN " + " donor_type recipient_type donor_office \n", + "27 NaN NaN NaN \n", + "28 NaN NaN NaN \n", + "29 NaN NaN NaN \n", + "30 NaN NaN NaN \n", + "31 NaN NaN NaN \n", + ".. ... ... ... \n", + "597 NaN NaN NaN \n", + "598 NaN NaN NaN \n", + "599 NaN NaN NaN \n", + "600 NaN NaN NaN \n", + "601 NaN NaN NaN \n", + "\n", + "[575 rows x 19 columns]" ] }, - "execution_count": 43, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_sample.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [], - "source": [ - "def add_notes_from_df(df):\n", - " G = nx.MultiDiGraph()\n", - " if 'name' in df.columns:\n", - " node_name = 'name'\n", - " else: node_name = 'full_name'\n", - " for index, row in df.iterrows():\n", - " # if nodes 1 and 2 don't exist, this both creates the nodes and adds the edges to them\n", - " # the weight can be added to show the magnitude of the edge\n", - " G.add_node(row[node_name])\n", - " for column in df.columns:\n", - " nx.set_node_attributes(G, row[column], name=column)\n", - " nx.draw_random(G, with_labels=True)\n", - " plt.show()\n", - " return G" + "merged_inds_sample.loc[merged_inds_sample.donor_id.notnull()]" ] }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 91, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -619,74 +1437,159 @@ { "data": { "text/plain": [ - "{'id': 'da441d41-1050-4505-a834-99d6023001e1',\n", - " 'first_name': 'AARON ',\n", - " 'last_name': 'KRAUSS ',\n", - " 'full_name': 'AARON KRAUSS ',\n", - " 'entity_type': 'Individual',\n", - " 'state': 'MI',\n", + "{'id': '1869727',\n", + " 'company': nan,\n", + " 'entity_type': 'individual',\n", + " 'first_name': nan,\n", + " 'full_name': 'william \\x08stoner',\n", + " 'last_name': nan,\n", " 'party': nan,\n", - " 'company': nan}" + " 'state': nan,\n", + " 'transaction_id': nan,\n", + " 'donor_id': nan,\n", + " 'year': nan,\n", + " 'amount': nan,\n", + " 'recipient_id': nan,\n", + " 'office_sought': nan,\n", + " 'purpose': nan,\n", + " 'transaction_type': nan,\n", + " 'donor_type': nan,\n", + " 'recipient_type': nan,\n", + " 'donor_office': nan}" ] }, - "execution_count": 105, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x = add_notes_from_df(inds_sample)\n", - "x.nodes['BELA LAHNER ']" + "x = add_notes_from_df(merged_inds_sample)\n", + "x.nodes['abdussamad, shams']" ] }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 79, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcompanyentity_typefirst_namefull_namelast_namepartystatetransaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_office
6631869727NaNindividualNaNwilliam \bstonerNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], "text/plain": [ - "['BELA LAHNER ',\n", - " 'JANICE SHAPIRO ',\n", - " 'RAMON HAWKINS ',\n", - " 'LEAH CYGAN ',\n", - " 'ALLISON HATT ^ ',\n", - " 'ELLEN FEINGOLD ',\n", - " 'KEVIN HERTEL FOR SENATE',\n", - " 'SARA LAFORGE ^ ',\n", - " 'LOIS TACK ',\n", - " 'AARON KRAUSS ']" + " id company entity_type first_name full_name last_name \\\n", + "663 1869727 NaN individual NaN william \bstoner NaN \n", + "\n", + " party state transaction_id donor_id year amount recipient_id \\\n", + "663 NaN NaN NaN NaN NaN NaN NaN \n", + "\n", + " office_sought purpose transaction_type donor_type recipient_type \\\n", + "663 NaN NaN NaN NaN NaN \n", + "\n", + " donor_office \n", + "663 NaN " ] }, - "execution_count": 104, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_sample.full_name.tolist()" + "merged_inds_sample.loc[merged_inds_sample.full_name == 'william \\x08stoner']" ] }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 65, "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "'MALLORY MCMORROW FOR MICHIGAN'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[94], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mG\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnodes\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mMALLORY MCMORROW FOR MICHIGAN\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/classes/reportviews.py:194\u001b[0m, in \u001b[0;36mNodeView.__getitem__\u001b[0;34m(self, n)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(n, \u001b[38;5;28mslice\u001b[39m):\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m nx\u001b[38;5;241m.\u001b[39mNetworkXError(\n\u001b[1;32m 191\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not support slicing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtry list(G.nodes)[\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn\u001b[38;5;241m.\u001b[39mstart\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn\u001b[38;5;241m.\u001b[39mstop\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn\u001b[38;5;241m.\u001b[39mstep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 193\u001b[0m )\n\u001b[0;32m--> 194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_nodes\u001b[49m\u001b[43m[\u001b[49m\u001b[43mn\u001b[49m\u001b[43m]\u001b[49m\n", - "\u001b[0;31mKeyError\u001b[0m: 'MALLORY MCMORROW FOR MICHIGAN'" - ] + "data": { + "text/plain": [ + "{'color': nan, 'size': 2}" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" } ], - "source": [] + "source": [ + "G = nx.Graph()\n", + "G.add_node(0)\n", + "nx.set_node_attributes(G, \"red\", name=\"color\")\n", + "nx.set_node_attributes(G, 2, name=\"size\")\n", + "G.add_node(1)\n", + "nx.set_node_attributes(G, np.nan, name='color')\n", + "G.nodes[0]" + ] }, { "cell_type": "code", @@ -831,7 +1734,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -840,13 +1743,13 @@ "{'color': 'white'}" ] }, - "execution_count": 89, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "G = nx.Graph()\n", + "G = nx.MultiDiGraph()\n", "G.add_node(0)\n", "nx.set_node_attributes(G, \"red\", name=\"color\")\n", "nx.set_node_attributes(G, 4, name = 'size')\n", @@ -855,6 +1758,28 @@ "G.nodes[2]" ] }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'color': 'white', 'age': 4}" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G.add_node(2)\n", + "nx.set_node_attributes(G, 4, name='age')\n", + "G.nodes[2]" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/utils/linkage.py b/utils/linkage.py index e955f94e..93467519 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -449,21 +449,23 @@ def name_rank(first_name: str, last_name: str) -> list: def convert_duplicates_to_dict(df: pd.DataFrame) -> None: - """Saves to the "output" directory a file where each row represents a string - matching to another string + """For each uuid, maps it to all other uuids for which it has been deemed a + match. - Given a dataframe where each row contains one string in a column and a list - of strings in another column, the function maps each string in the list to - the single string. + Given a dataframe where the uuids of all rows deemed similar are stored in a + list and all but the first row of each paired uuid is dropped, this function + maps the matched uuids to a single uuid. Args: - A pandas dataframe + A pandas df containing a column called 'duplicated', where each row is a + list of all uuids deemed a match. In each list, all uuids but the first + have their rows already dropped. Returns None. However it outputs a file to the output directory, with 2 - columns. The first, which indicates the duplicated UUIDs, is labeled - 'duplicated_uuids', and the 2nd, which shows the uuids to which the - deduplicated entries match to, is labeled 'mapped_uuids'. + columns. The first lists all the uuids in df, and is labeled 'all_uuids' + The 2nd shows the uuids to which each entry is mapped to, and is labeled + 'mapped_uuids'. """ deduped_dict = {} for i in range(len(df)): @@ -474,7 +476,7 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: # now convert dictionary into a csv file deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") deduped_df = deduped_df.reset_index().rename( - columns={"index": "duplicated_uuids", 0: "mapped_uuids"} + columns={"index": "all_uuids", 0: "mapped_uuids"} ) deduped_df.to_csv( repo_root / "output" / "deduplicated_UUIDs.csv", From 1e4a550703613ff389835021faa18b83015a9d91 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 22 Feb 2024 13:16:28 -0600 Subject: [PATCH 136/214] Saving work on networkx branch --- utils/linkage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 93467519..49f10bf7 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -459,13 +459,13 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: Args: A pandas df containing a column called 'duplicated', where each row is a list of all uuids deemed a match. In each list, all uuids but the first - have their rows already dropped. + have their rows already dropped. Returns None. However it outputs a file to the output directory, with 2 - columns. The first lists all the uuids in df, and is labeled 'all_uuids' + columns. The first lists all the uuids in df, and is labeled 'all_uuids' The 2nd shows the uuids to which each entry is mapped to, and is labeled - 'mapped_uuids'. + 'mapped_uuids'. """ deduped_dict = {} for i in range(len(df)): From 0a043a9f999630445dfdac08f69c09048fd1fbe8 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Thu, 22 Feb 2024 13:18:15 -0600 Subject: [PATCH 137/214] updating docstring of dedup func based on feedback --- utils/linkage.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 44d5e40c..f71a2b5f 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -448,21 +448,23 @@ def name_rank(first_name: str, last_name: str) -> list: def convert_duplicates_to_dict(df: pd.DataFrame) -> None: - """Saves to the "output" directory a file where each row represents a string - matching to another string + """For each uuid, maps it to all other uuids for which it has been deemed a + match. - Given a dataframe where each row contains one string in a column and a list - of strings in another column, the function maps each string in the list to - the single string. + Given a dataframe where the uuids of all rows deemed similar are stored in a + list and all but the first row of each paired uuid is dropped, this function + maps the matched uuids to a single uuid. Args: - A pandas dataframe + A pandas df containing a column called 'duplicated', where each row is a + list of all uuids deemed a match. In each list, all uuids but the first + have their rows already dropped. Returns None. However it outputs a file to the output directory, with 2 - columns. The first, which indicates the duplicated UUIDs, is labeled - 'duplicated_uuids', and the 2nd, which shows the uuids to which the - deduplicated entries match to, is labeled 'mapped_uuids'. + columns. The first lists all the uuids in df, and is labeled 'all_uuids' + The 2nd shows the uuids to which each entry is mapped to, and is labeled + 'mapped_uuids'. """ deduped_dict = {} for i in range(len(df)): @@ -473,7 +475,7 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: # now convert dictionary into a csv file deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") deduped_df = deduped_df.reset_index().rename( - columns={"index": "duplicated_uuids", 0: "mapped_uuids"} + columns={"index": "all_uuids", 0: "mapped_uuids"} ) deduped_df.to_csv( repo_root / "output" / "deduplicated_UUIDs.csv", From cd94c0863510d5e4eb4f87a73e8397fadc537590 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 24 Feb 2024 14:47:36 -0600 Subject: [PATCH 138/214] pipeline progress so far on network linkage --- utils/linkage.py | 27 ------------ utils/network.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 27 deletions(-) create mode 100644 utils/network.py diff --git a/utils/linkage.py b/utils/linkage.py index 49f10bf7..f71a2b5f 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -2,7 +2,6 @@ import os.path import re -import networkx as nx import numpy as np import pandas as pd import textdistance as td @@ -638,29 +637,3 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] raise ValueError("Can not find Address Number") - - -def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: - """Takes in a dataframe and generates a MultiDiGraph where the nodes are - entity names, and the rest of the dataframe columns make the node attributes - - Args: - df: a pandas dataframe (complete_individuals_table / - complete_organizations_table) - - Returns: - A Networkx MultiDiGraph with nodes lacking any edges - """ - G = nx.MultiDiGraph() - # first check if df is individuals or organizations dataset - if "name" in df.columns: - node_name = "name" - else: - node_name = "full_name" - - for _, row in df.iterrows(): - G.add_node(row[node_name]) - for column in df.columns: - nx.set_node_attributes(G, row[column], name=column) - - return G diff --git a/utils/network.py b/utils/network.py new file mode 100644 index 00000000..b0d1e905 --- /dev/null +++ b/utils/network.py @@ -0,0 +1,104 @@ +import networkx as nx +import pandas as pd + +from utils.linkage import deduplicate_perfect_matches + + +def deduplicate_datasets( + ind_df: pd.DataFrame, org_df: pd.DataFrame, transactions_df: pd.DataFrame +) -> tuple: + """Deduplicates the uuids in the inds and orgs dfs and updates the uuids in + transactions dataset to match those in the new inds and orgs dfs + + Args: + ind_df: A pandas df with individual information + org_df: A pandas df with organization information + transactions df: A pandas df with info on transactions between entities + + Returns: + A tuple of the ind_df, org_df, and transactions_df + """ + # apply dedup to both inds and orgs + inds_df = deduplicate_perfect_matches(ind_df) + orgs_df = deduplicate_perfect_matches(org_df) + + # update the deduplicated uuids in transaction donor and recipient columns + # to the uuids they are mapped to + deduped = pd.read_csv("../output/deduplicated_UUIDs.csv") + transactions_df[["donor_id", "recipient_id"]] = transactions_df[ + ["donor_id", "recipient_id"] + ].replace(deduped) + + return inds_df, orgs_df, transactions_df + +def name_identifier(uuid:str, orgs_df, inds_df) -> str: + '''Returns the name of the entity given the entity's uuid + + Args: + uuid: the uuid of the entity + orgs_df and inds_df: the dataframes from which the entities uuid + is queried + + Return: + The entity's name + ''' + # first, check orgs df: + name_in_org = orgs_df.loc[orgs_df['id']==uuid] + if len(name_in_org)> 0: + return name_in_org.iloc[0]['name'] + # theoretically it must be in inds if not in orgs, but for the sample data + # this might not be the case + name_in_ind = inds_df.loc[inds_df['id']==uuid] + if len(name_in_ind)> 0: + return name_in_ind.iloc[0]['full_name'] + else: return None + + +def network_prep_pipeline( + ind_df: pd.DataFrame, org_df: pd.DataFrame, transactions_df: pd.DataFrame +) -> tuple: + '''Pipeline for preparing the orgs, inds, and transactions dataframes for + network linkage + + Args: + ind_df, org_df, transactions_df: pandas dataframes with information + regarding campaign contributions between donors and recipients + + Returns: + a tuple containing the 3 dataframes ready for network building + ''' + + ind_df, org_df, transactions_df = deduplicate_datasets( + ind_df, org_df, transactions_df + ) + + # add recipient_name to the transactions dataset + transactions_df['recipient_name'] = transactions_df['recipient_id'].apply(name_identifier, args=(org_df, ind_df)) + return ind_df, org_df, transactions_df + + + +def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: + """Takes in a dataframe and generates a MultiDiGraph where the nodes are + entity names, and the rest of the dataframe columns make the node attributes + + Args: + df: a pandas dataframe (complete_individuals_table / + complete_organizations_table) + + Returns: + A Networkx MultiDiGraph with nodes lacking any edges + """ + G = nx.MultiDiGraph() + # first check if df is individuals or organizations dataset + if "name" in df.columns: + node_name = "name" + else: + node_name = "full_name" + + for _, row in df.iterrows(): + G.add_node(row[node_name]) + for column in df.columns: + nx.set_node_attributes(G, row[column], name=column) + + return G From 22607e7420cfd5ed80752110ea024a629945c76d Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 24 Feb 2024 15:01:27 -0600 Subject: [PATCH 139/214] saving changes in networkx, no need for review --- notebooks/Test.ipynb | 1052 ++++++++++++++++++++++++++++++++++-------- utils/network.py | 39 +- 2 files changed, 887 insertions(+), 204 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index b17aeb76..d188b444 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -8,7 +8,6 @@ "source": [ "import pandas as pd\n", "import numpy as np\n", - "import re\n", "import networkx as nx\n", "import matplotlib.pyplot as plt\n", "\n", @@ -21,8 +20,8 @@ "metadata": {}, "outputs": [], "source": [ - "orgs_sample = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0)#,nrows=10000).sample(10)\n", - "inds_sample = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False)#, nrows=10000).sample(10)\n", + "orgs_df = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0)#,nrows=10000).sample(10)\n", + "inds_df = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False)#, nrows=10000).sample(10)\n", "transactions = pd.read_csv(\"../output/complete_transactions_table.csv\",index_col=0, low_memory=False)" ] }, @@ -89,7 +88,7 @@ } ], "source": [ - "orgs_sample.head(2)" + "orgs_df.head(2)" ] }, { @@ -136,7 +135,7 @@ " 0\n", " 4640650\n", " 100592\n", - " 2021.0\n", + " 2021\n", " 25.0\n", " 1869727\n", " none\n", @@ -150,7 +149,7 @@ " 1\n", " 8185257\n", " 201800301\n", - " 2020.0\n", + " 2020\n", " 100.0\n", " 1779679\n", " none\n", @@ -165,17 +164,13 @@ "" ], "text/plain": [ - " transaction_id donor_id year amount recipient_id office_sought \\\n", - "0 4640650 100592 2021.0 25.0 1869727 none \n", - "1 8185257 201800301 2020.0 100.0 1779679 none \n", + " transaction_id donor_id year amount recipient_id office_sought purpose \\\n", + "0 4640650 100592 2021 25.0 1869727 none wr 9.13 \n", + "1 8185257 201800301 2020 100.0 1779679 none ab \n", "\n", - " purpose transaction_type donor_type recipient_type \\\n", - "0 wr 9.13 contribution from individuals NaN NaN \n", - "1 ab contribution from individuals NaN NaN \n", - "\n", - " donor_office \n", - "0 NaN \n", - "1 NaN " + " transaction_type donor_type recipient_type donor_office \n", + "0 contribution from individuals NaN NaN NaN \n", + "1 contribution from individuals NaN NaN NaN " ] }, "execution_count": 4, @@ -187,58 +182,6 @@ "transactions.head(2)" ] }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamestateentity_type
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [id, name, state, entity_type]\n", - "Index: []" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "orgs_sample.loc[orgs_sample['id']=='201800301']" - ] - }, { "cell_type": "code", "execution_count": 5, @@ -273,6 +216,10 @@ " state\n", " party\n", " company\n", + " occupation\n", + " address\n", + " zip\n", + " city\n", " \n", " \n", " \n", @@ -286,6 +233,10 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", " 1\n", @@ -297,6 +248,10 @@ " NaN\n", " NaN\n", " area agency on aging\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", @@ -307,9 +262,9 @@ "0 1869727 NaN NaN william \bstoner individual NaN NaN \n", "1 1779679 NaN NaN rm coulon individual NaN NaN \n", "\n", - " company \n", - "0 NaN \n", - "1 area agency on aging " + " company occupation address zip city \n", + "0 NaN NaN NaN NaN NaN \n", + "1 area agency on aging NaN NaN NaN NaN " ] }, "execution_count": 5, @@ -318,28 +273,28 @@ } ], "source": [ - "inds_sample.head(2)" + "inds_df.head(2)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(542368, 102, 248318, 3)" + "(541803, 541150, 77611, 77611)" ] }, - "execution_count": 22, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_ids = set(inds_sample.id.tolist())\n", - "orgs_ids = set(orgs_sample.id.tolist())\n", + "inds_ids = set(inds_df.id.tolist())\n", + "orgs_ids = set(orgs_df.id.tolist())\n", "trans_donorids = set(transactions.donor_id.tolist())\n", "trans_recepids = set(transactions.recipient_id.tolist())\n", "ind_id_there, org_id_there = [], []\n", @@ -360,16 +315,16 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['100894', '100883']" + "['100883', '100894']" ] }, - "execution_count": 99, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -600,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -632,63 +587,87 @@ " state\n", " party\n", " company\n", + " occupation\n", + " address\n", + " zip\n", + " city\n", " \n", " \n", " \n", " \n", - " 0\n", - " 1869727\n", + " 102\n", + " 100894\n", " NaN\n", " NaN\n", - " william \bstoner\n", - " individual\n", + " abdussamad, shams\n", + " candidate\n", + " AZ\n", + " democratic\n", + " none (is a candidate)\n", + " NaN\n", " NaN\n", " NaN\n", " NaN\n", " \n", " \n", - " 1\n", - " 1779679\n", + " 103\n", + " 100894\n", + " NaN\n", + " NaN\n", + " abdussamad, shams\n", + " candidate\n", + " AZ\n", + " democratic\n", + " none (is a candidate)\n", " NaN\n", " NaN\n", - " rm coulon\n", - " individual\n", " NaN\n", " NaN\n", - " area agency on aging\n", " \n", " \n", - " 2\n", - " 2277221\n", + " 104\n", + " 100883\n", + " NaN\n", + " NaN\n", + " abeytia, anna lynn\n", + " candidate\n", + " AZ\n", + " democratic\n", + " none (is a candidate)\n", " NaN\n", " NaN\n", - " james engelson\n", - " individual\n", " NaN\n", " NaN\n", - " retired\n", " \n", " \n", - " 3\n", - " 2277156\n", - " NaN\n", + " 105\n", + " 100883\n", " NaN\n", - " marivic franciaskinner\n", - " individual\n", " NaN\n", + " abeytia, anna lynn\n", + " candidate\n", + " AZ\n", + " democratic\n", + " none (is a candidate)\n", " NaN\n", - " fibre source international corp\n", - " \n", - " \n", - " 4\n", - " 2341373\n", " NaN\n", " NaN\n", - " anthony grindle\n", - " individual\n", " NaN\n", + " \n", + " \n", + " 0\n", + " b8fbed14-0766-49ab-8516-97952c654a12\n", + " FREDERICK\n", + " BERG\n", + " FREDERICK BERG ...\n", + " Individual\n", + " MI\n", " NaN\n", - " zimmerbiomet\n", + " BUTZEL LONG\n", + " ATTORNEY\n", + " 1033 YORKSHIRE\n", + " 48230-0000\n", + " GROSSE POINTE PARK\n", " \n", " \n", " ...\n", @@ -700,117 +679,826 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " \n", + " \n", + " 17734\n", + " 75b99f42-e0d4-4c3c-89a6-16e11f6dd810\n", + " NaN\n", + " NaN\n", + " Rodriguez, Adrian\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 17735\n", + " 8b634b74-a6be-4280-a2c4-63e46a8f9bc9\n", + " NaN\n", + " NaN\n", + " O'Connor, Timothy J\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 17736\n", + " d7d5b121-015f-474f-8b76-7c6c865da557\n", + " NaN\n", + " NaN\n", + " Frenzel, Robert C\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 17737\n", + " b2eaaec4-30d5-46f4-9922-efc8d79c16d2\n", + " NaN\n", + " NaN\n", + " Enzminger, Peter\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 17738\n", + " de34f2c7-fa2f-4fa5-abea-b67f6c8fe35f\n", + " NaN\n", + " NaN\n", + " Bowler, Erin\n", + " Individual\n", + " MN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + "\n", + "

1760156 rows × 12 columns

\n", + "" + ], + "text/plain": [ + " id first_name \\\n", + "102 100894 NaN \n", + "103 100894 NaN \n", + "104 100883 NaN \n", + "105 100883 NaN \n", + "0 b8fbed14-0766-49ab-8516-97952c654a12 FREDERICK \n", + "... ... ... \n", + "17734 75b99f42-e0d4-4c3c-89a6-16e11f6dd810 NaN \n", + "17735 8b634b74-a6be-4280-a2c4-63e46a8f9bc9 NaN \n", + "17736 d7d5b121-015f-474f-8b76-7c6c865da557 NaN \n", + "17737 b2eaaec4-30d5-46f4-9922-efc8d79c16d2 NaN \n", + "17738 de34f2c7-fa2f-4fa5-abea-b67f6c8fe35f NaN \n", + "\n", + " last_name \\\n", + "102 NaN \n", + "103 NaN \n", + "104 NaN \n", + "105 NaN \n", + "0 BERG \n", + "... ... \n", + "17734 NaN \n", + "17735 NaN \n", + "17736 NaN \n", + "17737 NaN \n", + "17738 NaN \n", + "\n", + " full_name entity_type state \\\n", + "102 abdussamad, shams candidate AZ \n", + "103 abdussamad, shams candidate AZ \n", + "104 abeytia, anna lynn candidate AZ \n", + "105 abeytia, anna lynn candidate AZ \n", + "0 FREDERICK BERG ... Individual MI \n", + "... ... ... ... \n", + "17734 Rodriguez, Adrian Individual MN \n", + "17735 O'Connor, Timothy J Individual MN \n", + "17736 Frenzel, Robert C Individual MN \n", + "17737 Enzminger, Peter Individual MN \n", + "17738 Bowler, Erin Individual MN \n", + "\n", + " party company occupation address \\\n", + "102 democratic none (is a candidate) NaN NaN \n", + "103 democratic none (is a candidate) NaN NaN \n", + "104 democratic none (is a candidate) NaN NaN \n", + "105 democratic none (is a candidate) NaN NaN \n", + "0 NaN BUTZEL LONG ATTORNEY 1033 YORKSHIRE \n", + "... ... ... ... ... \n", + "17734 NaN NaN NaN NaN \n", + "17735 NaN NaN NaN NaN \n", + "17736 NaN NaN NaN NaN \n", + "17737 NaN NaN NaN NaN \n", + "17738 NaN NaN NaN NaN \n", + "\n", + " zip city \n", + "102 NaN NaN \n", + "103 NaN NaN \n", + "104 NaN NaN \n", + "105 NaN NaN \n", + "0 48230-0000 GROSSE POINTE PARK \n", + "... ... ... \n", + "17734 NaN NaN \n", + "17735 NaN NaN \n", + "17736 NaN NaN \n", + "17737 NaN NaN \n", + "17738 NaN NaN \n", + "\n", + "[1760156 rows x 12 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# for now only work with datasets \n", + "sample_inds = inds_df.loc[(inds_df['id'].isin(transactions.donor_id.tolist()))]\n", + "sample_inds\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", "
transaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_office
212637NaNb8fbed14-0766-49ab-8516-97952c654a122022100.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
212667NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
440542NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
440573NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
440607NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
440642NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
636312NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
636346NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
636382NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNNaN
839846NaNb8fbed14-0766-49ab-8516-97952c654a12202283.33f9fa8506-bfbb-4ef0-9e08-5c9c3e948121NaNNaNDIRECT/FUND RAISERNaNNaNNaN
840051NaNb8fbed14-0766-49ab-8516-97952c654a12202283.34389fe2ba-828a-41d4-815c-8efb2499ea11NaNNaNDIRECT/FUND RAISERNaNNaNNaN
8612606acfa74b-d5e1-4afd-b020-dbe429eb1c3f968402NaNb8fbed14-0766-49ab-8516-97952c654a12202283.33043a03b7-af31-4830-b12e-446b93fca9a0NaNNaNDIRECT/FUND RAISERNaNNaNMelissa HartCandidatePAREPNaN
861271f111045d-bc3d-4050-9ad7-b3b1e6d72e561414338NaNb8fbed14-0766-49ab-8516-97952c654a122022250.00ba06baf6-eae6-459f-b3a9-7261e4baa33eNaNNaNDIRECT/FUND RAISERNaNNaNHeather MillerCandidatePADEMNaN
861277d40859d7-b523-4ef5-895b-c3a947ab582f1502742NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNChristopher M. GebhardCandidatePAREPNaN
861775f5d76d43-86f4-40f9-aeb9-3df97ca8cdf01502777NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNApril WeaverCandidatePAREPNaN
8619201a0cf90d-3252-4c8d-b109-dea084a01f691502812NaNb8fbed14-0766-49ab-8516-97952c654a12202250.001d4ae24b-2814-4d0d-995e-28fd4c26785dNaNNaNDIRECTNaNNaNKrista PaolucciCandidatePAREPNaN
\n", - "

2505346 rows × 8 columns

\n", "
" ], "text/plain": [ - " id first_name last_name \\\n", - "0 1869727 NaN NaN \n", - "1 1779679 NaN NaN \n", - "2 2277221 NaN NaN \n", - "3 2277156 NaN NaN \n", - "4 2341373 NaN NaN \n", - "... ... ... ... \n", - "861260 6acfa74b-d5e1-4afd-b020-dbe429eb1c3f NaN NaN \n", - "861271 f111045d-bc3d-4050-9ad7-b3b1e6d72e56 NaN NaN \n", - "861277 d40859d7-b523-4ef5-895b-c3a947ab582f NaN NaN \n", - "861775 f5d76d43-86f4-40f9-aeb9-3df97ca8cdf0 NaN NaN \n", - "861920 1a0cf90d-3252-4c8d-b109-dea084a01f69 NaN NaN \n", + " transaction_id donor_id year amount \\\n", + "212637 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 100.00 \n", + "212667 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "440542 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "440573 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "440607 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "440642 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "636312 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "636346 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "636382 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "839846 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 83.33 \n", + "840051 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 83.34 \n", + "968402 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 83.33 \n", + "1414338 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 250.00 \n", + "1502742 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "1502777 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + "1502812 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", "\n", - " full_name entity_type state party \\\n", - "0 william \bstoner individual NaN NaN \n", - "1 rm coulon individual NaN NaN \n", - "2 james engelson individual NaN NaN \n", - "3 marivic franciaskinner individual NaN NaN \n", - "4 anthony grindle individual NaN NaN \n", - "... ... ... ... ... \n", - "861260 Melissa Hart Candidate PA REP \n", - "861271 Heather Miller Candidate PA DEM \n", - "861277 Christopher M. Gebhard Candidate PA REP \n", - "861775 April Weaver Candidate PA REP \n", - "861920 Krista Paolucci Candidate PA REP \n", + " recipient_id office_sought purpose \\\n", + "212637 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "212667 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "440542 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "440573 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "440607 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "440642 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "636312 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "636346 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "636382 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "839846 f9fa8506-bfbb-4ef0-9e08-5c9c3e948121 NaN NaN \n", + "840051 389fe2ba-828a-41d4-815c-8efb2499ea11 NaN NaN \n", + "968402 043a03b7-af31-4830-b12e-446b93fca9a0 NaN NaN \n", + "1414338 ba06baf6-eae6-459f-b3a9-7261e4baa33e NaN NaN \n", + "1502742 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "1502777 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + "1502812 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", "\n", - " company \n", - "0 NaN \n", - "1 area agency on aging \n", - "2 retired \n", - "3 fibre source international corp \n", - "4 zimmerbiomet \n", - "... ... \n", - "861260 NaN \n", - "861271 NaN \n", - "861277 NaN \n", - "861775 NaN \n", - "861920 NaN \n", + " transaction_type donor_type recipient_type donor_office \n", + "212637 DIRECT NaN NaN NaN \n", + "212667 DIRECT NaN NaN NaN \n", + "440542 DIRECT NaN NaN NaN \n", + "440573 DIRECT NaN NaN NaN \n", + "440607 DIRECT NaN NaN NaN \n", + "440642 DIRECT NaN NaN NaN \n", + "636312 DIRECT NaN NaN NaN \n", + "636346 DIRECT NaN NaN NaN \n", + "636382 DIRECT NaN NaN NaN \n", + "839846 DIRECT/FUND RAISER NaN NaN NaN \n", + "840051 DIRECT/FUND RAISER NaN NaN NaN \n", + "968402 DIRECT/FUND RAISER NaN NaN NaN \n", + "1414338 DIRECT/FUND RAISER NaN NaN NaN \n", + "1502742 DIRECT NaN NaN NaN \n", + "1502777 DIRECT NaN NaN NaN \n", + "1502812 DIRECT NaN NaN NaN " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transactions.loc[transactions['donor_id'] == 'b8fbed14-0766-49ab-8516-97952c654a12']" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'BUTZEL LONG POLITICAL ACTION COMMITTEE'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = orgs_df.loc[orgs_df['id']=='1d4ae24b-2814-4d0d-995e-28fd4c26785d']\n", + "x.iloc[0]['name']" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# apply dedup to both inds and orgs\n", + "inds_df = deduplicate_perfect_matches(inds_df)\n", + "orgs_df = deduplicate_perfect_matches(orgs_df)\n", + "\n", + "# map the uuids in transaction donor and recipient columns to the deduplicated uuids\n", + "deduped = pd.read_csv(\"../output/deduplicated_UUIDs.csv\")\n", + "transactions[['donor_id','recipient_id']] = transactions[['donor_id','recipient_id']].replace(deduped)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# add recipient name to transactions df: \n", + "def name_identifier(uuid:str, orgs_df, inds_df) -> str:\n", + " # 1st check orgs df:\n", + " name_in_org = orgs_df.loc[orgs_df['id']==uuid] \n", + " if len(name_in_org)> 0:\n", + " return name_in_org.iloc[0]['name']\n", + " # theoretically it must be in inds if not in orgs, but for the sample data\n", + " # this might not be the case\n", + " name_in_ind = inds_df.loc[inds_df['id']==uuid]\n", + " if len(name_in_ind)> 0:\n", + " return name_in_ind.iloc[0]['full_name']\n", + " else: return None" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
transaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_officerecipient_name
884875NaN6c2b94a2-4247-4bc4-b784-6b5a9a2ae9f220225.00533f6de5-5140-4799-be24-1d5f4e228d1bNaNNaNDIRECTNaNNaNNaNFRIENDS OF DANA NESSEL
122735NaNb906d3eb-3874-4789-b523-e2eaab41532820229.16f2fad7aa-a782-4d56-8343-049d2150c16fNaNMERCHANT SVCS FEESNaNNaNNaNNaNTHE JULIE BRIXIE BLUE WAVE FUND 2
458788NaN88740001-952f-477e-b2da-9b24f747f6ce20221.00a0619eff-155f-442f-ab71-b5c0ee942223NaNNaNDIRECTNaNNaNNaNCOMERICA INC POLITICAL ACTION COMMITTEE
1522918NaNda538e73-c823-48f8-b4ec-920aa1da458f202220.0081169dce-331e-44ad-b870-1b376d49cf2fNaNNaNDIRECTNaNNaNNaNWASTE MANAGEMENT EMPLOYEES BETTER GOVERNMENT F...
1933218NaN33814139-8442-4050-b15a-40aed1aa9db7202235.00abdf0530-e2fb-40b6-9a52-dea386cd60f4NaNNaNDIRECTNaNNaNNaNGRETCHEN WHITMER FOR GOVERNOR
465682NaN133431dd-41ef-4161-97ef-02d23fc05b4220227.50d582fba6-2a0c-4864-9fb2-5a4f898f26c2NaNNaNDIRECTNaNNaNNaNMI ASSOC OF COMMUNITY BANKERS OF MICHIGAN POLI...
761674NaN668d8471-ade6-469b-9e6b-71ddbfd1d8ba202225.001d05ca29-e97f-43cd-bd9e-f313573b324bNaNNaNDIRECTNaNNaNNaNEND CITIZENS UNITED NON-FEDERAL MI
993543NaNb9b66f08-4e99-43e7-9161-c75db92b0bb4202210.00ecebf482-f298-4777-bea6-e3451c75e3fcNaNNaNDIRECTNaNNaNNaNRESCARE INC DBA BRIGHTSPRING HEALTH SERVICES L...
1196687NaNf4942707-0d7f-4617-b478-56af7504123e202212.00a24e305e-a49b-4cb3-a857-d629f1162ce8NaNNaNDIRECTNaNNaNNaNMARATHON PETROLEUM CORPORATION EMPLOYEES PAC
334698NaN96be56db-56b6-48d0-9cf7-9d47da307388202211.809fc94e93-b6aa-400d-9a4a-d6501afb84dcNaNNaNDIRECTNaNNaNNaNMICHIGAN REGIONAL COUNCIL OF CARPENTERS POLITI...
\n", + "
" + ], + "text/plain": [ + " transaction_id donor_id year amount \\\n", + "884875 NaN 6c2b94a2-4247-4bc4-b784-6b5a9a2ae9f2 2022 5.00 \n", + "122735 NaN b906d3eb-3874-4789-b523-e2eaab415328 2022 9.16 \n", + "458788 NaN 88740001-952f-477e-b2da-9b24f747f6ce 2022 1.00 \n", + "1522918 NaN da538e73-c823-48f8-b4ec-920aa1da458f 2022 20.00 \n", + "1933218 NaN 33814139-8442-4050-b15a-40aed1aa9db7 2022 35.00 \n", + "465682 NaN 133431dd-41ef-4161-97ef-02d23fc05b42 2022 7.50 \n", + "761674 NaN 668d8471-ade6-469b-9e6b-71ddbfd1d8ba 2022 25.00 \n", + "993543 NaN b9b66f08-4e99-43e7-9161-c75db92b0bb4 2022 10.00 \n", + "1196687 NaN f4942707-0d7f-4617-b478-56af7504123e 2022 12.00 \n", + "334698 NaN 96be56db-56b6-48d0-9cf7-9d47da307388 2022 11.80 \n", + "\n", + " recipient_id office_sought \\\n", + "884875 533f6de5-5140-4799-be24-1d5f4e228d1b NaN \n", + "122735 f2fad7aa-a782-4d56-8343-049d2150c16f NaN \n", + "458788 a0619eff-155f-442f-ab71-b5c0ee942223 NaN \n", + "1522918 81169dce-331e-44ad-b870-1b376d49cf2f NaN \n", + "1933218 abdf0530-e2fb-40b6-9a52-dea386cd60f4 NaN \n", + "465682 d582fba6-2a0c-4864-9fb2-5a4f898f26c2 NaN \n", + "761674 1d05ca29-e97f-43cd-bd9e-f313573b324b NaN \n", + "993543 ecebf482-f298-4777-bea6-e3451c75e3fc NaN \n", + "1196687 a24e305e-a49b-4cb3-a857-d629f1162ce8 NaN \n", + "334698 9fc94e93-b6aa-400d-9a4a-d6501afb84dc NaN \n", + "\n", + " purpose transaction_type donor_type \\\n", + "884875 NaN DIRECT NaN \n", + "122735 MERCHANT SVCS FEES NaN NaN \n", + "458788 NaN DIRECT NaN \n", + "1522918 NaN DIRECT NaN \n", + "1933218 NaN DIRECT NaN \n", + "465682 NaN DIRECT NaN \n", + "761674 NaN DIRECT NaN \n", + "993543 NaN DIRECT NaN \n", + "1196687 NaN DIRECT NaN \n", + "334698 NaN DIRECT NaN \n", + "\n", + " recipient_type donor_office \\\n", + "884875 NaN NaN \n", + "122735 NaN NaN \n", + "458788 NaN NaN \n", + "1522918 NaN NaN \n", + "1933218 NaN NaN \n", + "465682 NaN NaN \n", + "761674 NaN NaN \n", + "993543 NaN NaN \n", + "1196687 NaN NaN \n", + "334698 NaN NaN \n", + "\n", + " recipient_name \n", + "884875 FRIENDS OF DANA NESSEL \n", + "122735 THE JULIE BRIXIE BLUE WAVE FUND 2 \n", + "458788 COMERICA INC POLITICAL ACTION COMMITTEE \n", + "1522918 WASTE MANAGEMENT EMPLOYEES BETTER GOVERNMENT F... \n", + "1933218 GRETCHEN WHITMER FOR GOVERNOR \n", + "465682 MI ASSOC OF COMMUNITY BANKERS OF MICHIGAN POLI... \n", + "761674 END CITIZENS UNITED NON-FEDERAL MI \n", + "993543 RESCARE INC DBA BRIGHTSPRING HEALTH SERVICES L... \n", + "1196687 MARATHON PETROLEUM CORPORATION EMPLOYEES PAC \n", + "334698 MICHIGAN REGIONAL COUNCIL OF CARPENTERS POLITI... " ] }, - "execution_count": 97, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sample_inds" + "ex = transactions.sample(10)\n", + "ex['recipient_name'] = ex['recipient_id'].apply(name_identifier, args=(orgs_df, inds_df))\n", + "ex" ] }, { @@ -933,15 +1621,7 @@ } ], "source": [ - "sample_inds = inds_sample.loc[inds_sample['id'].isin(ind_id_there)]\n", - "# apply dedup\n", - "sample_inds = deduplicate_perfect_matches(sample_inds)\n", "\n", - "# map the uuids in transaction donor and recipient columns to the deduplicated uuids\n", - "deduped = pd.read_csv(\"../output/deduplicated_UUIDs.csv\")\n", - "transactions[['donor_id','recipient_id']] = transactions[['donor_id','recipient_id']].replace(deduped)\n", - "\n", - "# add recipient name to transactions df: \n", "# this step took more than 16 minutes to run...think of alternative way\n", "# id_to_name = {id: name for id, name in zip(inds_sample.id.tolist(), inds_sample.full_name.tolist())} #the same would be applied to orgs\n", "transactions['recipient_name'] = transactions['recipient_id'].apply(lambda x: sample_inds.loc[sample_inds.id == x] )\n", diff --git a/utils/network.py b/utils/network.py index b0d1e905..443f240d 100644 --- a/utils/network.py +++ b/utils/network.py @@ -22,7 +22,7 @@ def deduplicate_datasets( inds_df = deduplicate_perfect_matches(ind_df) orgs_df = deduplicate_perfect_matches(org_df) - # update the deduplicated uuids in transaction donor and recipient columns + # update the deduplicated uuids in transaction donor and recipient columns # to the uuids they are mapped to deduped = pd.read_csv("../output/deduplicated_UUIDs.csv") transactions_df[["donor_id", "recipient_id"]] = transactions_df[ @@ -31,9 +31,10 @@ def deduplicate_datasets( return inds_df, orgs_df, transactions_df -def name_identifier(uuid:str, orgs_df, inds_df) -> str: - '''Returns the name of the entity given the entity's uuid - + +def name_identifier(uuid: str, orgs_df, inds_df) -> str: + """Returns the name of the entity given the entity's uuid + Args: uuid: the uuid of the entity orgs_df and inds_df: the dataframes from which the entities uuid @@ -41,23 +42,24 @@ def name_identifier(uuid:str, orgs_df, inds_df) -> str: Return: The entity's name - ''' + """ # first, check orgs df: - name_in_org = orgs_df.loc[orgs_df['id']==uuid] - if len(name_in_org)> 0: - return name_in_org.iloc[0]['name'] + name_in_org = orgs_df.loc[orgs_df["id"] == uuid] + if len(name_in_org) > 0: + return name_in_org.iloc[0]["name"] # theoretically it must be in inds if not in orgs, but for the sample data # this might not be the case - name_in_ind = inds_df.loc[inds_df['id']==uuid] - if len(name_in_ind)> 0: - return name_in_ind.iloc[0]['full_name'] - else: return None + name_in_ind = inds_df.loc[inds_df["id"] == uuid] + if len(name_in_ind) > 0: + return name_in_ind.iloc[0]["full_name"] + else: + return None def network_prep_pipeline( ind_df: pd.DataFrame, org_df: pd.DataFrame, transactions_df: pd.DataFrame ) -> tuple: - '''Pipeline for preparing the orgs, inds, and transactions dataframes for + """Pipeline for preparing the orgs, inds, and transactions dataframes for network linkage Args: @@ -65,19 +67,20 @@ def network_prep_pipeline( regarding campaign contributions between donors and recipients Returns: - a tuple containing the 3 dataframes ready for network building - ''' - + a tuple containing the 3 dataframes ready for network building + """ + ind_df, org_df, transactions_df = deduplicate_datasets( ind_df, org_df, transactions_df ) # add recipient_name to the transactions dataset - transactions_df['recipient_name'] = transactions_df['recipient_id'].apply(name_identifier, args=(org_df, ind_df)) + transactions_df["recipient_name"] = transactions_df["recipient_id"].apply( + name_identifier, args=(org_df, ind_df) + ) return ind_df, org_df, transactions_df - def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: """Takes in a dataframe and generates a MultiDiGraph where the nodes are entity names, and the rest of the dataframe columns make the node attributes From 661feff7135078a6281495395efd0d436a04b391 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sat, 24 Feb 2024 15:02:45 -0600 Subject: [PATCH 140/214] updated column names and docstring of dedup func based on Avery's feedback --- utils/linkage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index f71a2b5f..c2df5515 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -462,9 +462,9 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: Returns None. However it outputs a file to the output directory, with 2 - columns. The first lists all the uuids in df, and is labeled 'all_uuids' - The 2nd shows the uuids to which each entry is mapped to, and is labeled - 'mapped_uuids'. + columns. The first lists all the uuids in df, and is labeled + 'original_uuids.' The 2nd shows the uuids to which each entry is mapped + to, and is labeled 'mapped_uuids'. """ deduped_dict = {} for i in range(len(df)): @@ -475,7 +475,7 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: # now convert dictionary into a csv file deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") deduped_df = deduped_df.reset_index().rename( - columns={"index": "all_uuids", 0: "mapped_uuids"} + columns={"index": "original_uuids", 0: "mapped_uuid"} ) deduped_df.to_csv( repo_root / "output" / "deduplicated_UUIDs.csv", From d58795b76ed228ecc4eda3b4888ce44b3bf4e85a Mon Sep 17 00:00:00 2001 From: npashilkar Date: Mon, 26 Feb 2024 00:28:34 -0600 Subject: [PATCH 141/214] splink output edit --- utils/linkage.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index a8a40fd1..10afff7f 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -641,7 +641,7 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: def splink_dedupe( df: pd.DataFrame, settings: dict, blocking: list ) -> pd.DataFrame: - """Given the individuals or organizations dataframes, the corresponding + """Given a dataframes, the corresponding configuration settings, and corresponding blocking rules return a deduplicated dataframe @@ -650,13 +650,15 @@ def splink_dedupe( organizations_blocking Args: - df: individuals or organizations table - settings: individuald or configuration settings - (based on splink documentation) + df: dataframe + settings: configuration settings + (based on splink documentation and dataframe columns) blocking: list of columns to block on for the table - (cuts dataframe into parts based on above blocks ) + (cuts dataframe into parts based on columns labeled blocks) Returns: - dataframe with matched ids of matching rows + deduplicated version of initial dataframe with column 'matching_id' + that holds list of matching unique_ids + """ linker = DuckDBLinker(df, settings) linker.estimate_probability_two_random_records_match( @@ -671,4 +673,24 @@ def splink_dedupe( clusters = linker.cluster_pairwise_predictions_at_threshold( df_predict, threshold_match_probability=0.7 ) # default - return clusters.as_pandas_dataframe() + clusters_df = ( + clusters.as_pandas_dataframe() + ) # dataframe where cluster_id maps unique_id to initial instance of row + + match_list_df = ( + clusters_df.groupby("cluster_id")["unique_id"].agg(list).reset_index() + ) + match_list_df.rename( + columns={"unique_id": "matching_list"}, inplace=True + ) # dataframe which matches cluster_id to a list of unique_ids + + first_instance_df = clusters_df.drop_duplicates(subset="cluster_id") + col_names = np.append("cluster_id", df.columns) + first_instance_df = first_instance_df[col_names] + + return pd.merge( + first_instance_df, + match_list_df[["cluster_id", "matching_list"]], + on="cluster_id", + how="left", + ) From d0f36b6c8c13708109f4be8ee8c7791dd30fea4c Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 26 Feb 2024 09:47:58 -0600 Subject: [PATCH 142/214] saving Networkx work before merge...no need to review --- utils/network.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/utils/network.py b/utils/network.py index 443f240d..8b4d4610 100644 --- a/utils/network.py +++ b/utils/network.py @@ -99,9 +99,31 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: else: node_name = "full_name" + transact_info = [ + "office_sought", + "purpose", + "transaction_type", + "recipient_id", + "transaction_id", + "recipient_type", + "donor_office", + "recipient_name", + "amount", + ] + for _, row in df.iterrows(): + # add node attributes based on the columns relevant to the entity G.add_node(row[node_name]) - for column in df.columns: - nx.set_node_attributes(G, row[column], name=column) + for column in df.columns.difference(transact_info): + if not pd.isnull(row[column]): + G.nodes[row[node_name]][column] = row[column] + + # link the donor node to the recipient node. add the attributes of the + # edge based on relevant nodes + for column in transact_info: + if not pd.isnull(row[column]): + G.add_edge( + row[node_name], row["recipient_name"], column=row[column] + ) return G From f8df69fcd07046f05899375f5705d66973bd3b62 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 26 Feb 2024 11:35:32 -0600 Subject: [PATCH 143/214] saving work for merge, no need to review --- utils/network.py | 87 ++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 70 deletions(-) diff --git a/utils/network.py b/utils/network.py index 8b4d4610..6e9dcd90 100644 --- a/utils/network.py +++ b/utils/network.py @@ -1,84 +1,31 @@ import networkx as nx import pandas as pd -from utils.linkage import deduplicate_perfect_matches - -def deduplicate_datasets( - ind_df: pd.DataFrame, org_df: pd.DataFrame, transactions_df: pd.DataFrame -) -> tuple: - """Deduplicates the uuids in the inds and orgs dfs and updates the uuids in - transactions dataset to match those in the new inds and orgs dfs - - Args: - ind_df: A pandas df with individual information - org_df: A pandas df with organization information - transactions df: A pandas df with info on transactions between entities - - Returns: - A tuple of the ind_df, org_df, and transactions_df - """ - # apply dedup to both inds and orgs - inds_df = deduplicate_perfect_matches(ind_df) - orgs_df = deduplicate_perfect_matches(org_df) - - # update the deduplicated uuids in transaction donor and recipient columns - # to the uuids they are mapped to - deduped = pd.read_csv("../output/deduplicated_UUIDs.csv") - transactions_df[["donor_id", "recipient_id"]] = transactions_df[ - ["donor_id", "recipient_id"] - ].replace(deduped) - - return inds_df, orgs_df, transactions_df - - -def name_identifier(uuid: str, orgs_df, inds_df) -> str: +def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: """Returns the name of the entity given the entity's uuid Args: uuid: the uuid of the entity - orgs_df and inds_df: the dataframes from which the entities uuid - is queried - + List of dfs: dataframes that have a uuid column, and an 'name' or + 'full_name' column Return: The entity's name """ - # first, check orgs df: - name_in_org = orgs_df.loc[orgs_df["id"] == uuid] - if len(name_in_org) > 0: - return name_in_org.iloc[0]["name"] - # theoretically it must be in inds if not in orgs, but for the sample data - # this might not be the case - name_in_ind = inds_df.loc[inds_df["id"] == uuid] - if len(name_in_ind) > 0: - return name_in_ind.iloc[0]["full_name"] - else: - return None - - -def network_prep_pipeline( - ind_df: pd.DataFrame, org_df: pd.DataFrame, transactions_df: pd.DataFrame -) -> tuple: - """Pipeline for preparing the orgs, inds, and transactions dataframes for - network linkage - - Args: - ind_df, org_df, transactions_df: pandas dataframes with information - regarding campaign contributions between donors and recipients - - Returns: - a tuple containing the 3 dataframes ready for network building - """ - - ind_df, org_df, transactions_df = deduplicate_datasets( - ind_df, org_df, transactions_df - ) - - # add recipient_name to the transactions dataset - transactions_df["recipient_name"] = transactions_df["recipient_id"].apply( - name_identifier, args=(org_df, ind_df) - ) - return ind_df, org_df, transactions_df + for df in dfs: + # first, check orgs df: + if "name" in df.columns: + name_in_org = df.loc[df["id"] == uuid] + if len(name_in_org) > 0: + return name_in_org.iloc[0]["name"] + # theoretically it must be in inds if not in orgs, but for the sample + # data this might not be the case + + if "full_name" in df.columns: + name_in_ind = df.loc[df["id"] == uuid] + if len(name_in_ind) > 0: + return name_in_ind.iloc[0]["full_name"] + return None def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: From 7b2ca080234a64f7a274f30a19949f52ea8db857 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 27 Feb 2024 01:47:26 -0600 Subject: [PATCH 144/214] splink output edits --- utils/constants.py | 21 +++++++++++---------- utils/linkage.py | 17 +++++++++++++++-- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index 91e1d87f..23ea161b 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -647,10 +647,11 @@ individuals_settings = { "link_type": "dedupe_only", - "blocking_rules_to_generate_predictions": ["l.full_name - r.full_name"], + "blocking_rules_to_generate_predictions": [ + "l.first_name = r.first_name", + "l.last_name = r.last_name", + ], "comparisons": [ - ctl.name_comparison("first_name"), # built in comparison function - ctl.name_comparison("last_name"), ctl.name_comparison("full_name"), cl.exact_match("entity_type", term_frequency_adjustments=True), cl.jaro_winkler_at_thresholds( @@ -662,11 +663,13 @@ # DEFAULT "retain_matching_columns": True, "retain_intermediate_calculation_columns": True, + "max_iterations": 10, + "em_convergence": 0.01, } individuals_blocking = [ - "l.first_name = r.first_name and l.last_name = r.last_name", - "l.full_name = r.full_name", + "l.first_name = r.first_name", + "l.last_name = r.last_name", ] organizations_settings = { @@ -675,7 +678,6 @@ "l.name = r.name", ], "comparisons": [ - ctl.name_comparison("name", term_frequency_adjustments=True), cl.exact_match("entity_type", term_frequency_adjustments=True), cl.jaro_winkler_at_thresholds( "state", [0.9, 0.8] @@ -684,9 +686,8 @@ ], "retain_matching_columns": True, "retain_intermediate_calculation_columns": True, + "max_iterations": 10, + "em_convergence": 0.01, } -organizations_blocking = [ - "l.name = r.name", - "l.name = r.name and l.state = r.state", -] +organizations_blocking = ["l.name = r.name"] diff --git a/utils/linkage.py b/utils/linkage.py index 10afff7f..0a575a1d 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -688,9 +688,22 @@ def splink_dedupe( col_names = np.append("cluster_id", df.columns) first_instance_df = first_instance_df[col_names] - return pd.merge( + deduped_df = pd.merge( first_instance_df, - match_list_df[["cluster_id", "matching_list"]], + match_list_df[["cluster_id"]], on="cluster_id", how="left", ) + + match_list_df.rename( + columns={"unique_id": "mapped_uuids", "cluster_id": "original_ids"}, + inplace=True, + ) + deduped_df.to_csv( + repo_root / "output" / "splink_deduplicated_UUIDs.csv", + index=False, + mode="a", + header=not os.path.exists("../output/splink_deduplicated_UUIDs.csv"), + ) + + return deduped_df From 42ca58e75333f4d91836b7dc64134adc0247810b Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 28 Feb 2024 04:13:59 +0000 Subject: [PATCH 145/214] pipeline changes --- utils/linkage_pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index b5e4d451..613d3244 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -133,7 +133,6 @@ def preprocess_pipeline( ) individuals = deduplicate_perfect_matches(individuals) -organizations = deduplicate_perfect_matches(organizations) processed_individuals_output_path = ( BASE_FILEPATH / "output" / "processed_individuals_table.csv" From 77bc2b3e4ac276b2d290a092d8f1ae51dd6a41a4 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 28 Feb 2024 04:19:36 +0000 Subject: [PATCH 146/214] adding removed files --- utils/classify.py | 75 +++++++++++++++++++++++++ utils/tests/test_linkage.py | 107 ++++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 utils/classify.py create mode 100644 utils/tests/test_linkage.py diff --git a/utils/classify.py b/utils/classify.py new file mode 100644 index 00000000..db574ace --- /dev/null +++ b/utils/classify.py @@ -0,0 +1,75 @@ +import pandas as pd + +# we want to run down a list of people and, hopefully, their adresses, plus a list of +# corporations, groups, etc, and classify them, basically just looking for matches + +# do we want to just input all the names/people (there's not many, less than 200 +# for sure),give a string similarity match score, and extract the top ten for +# manual review? this should give us a feeling for how to set our threshold +# we might also, once we have all the data, buckle down and just classify +# some of them manually + +inds_list = [] + +# a list of individual names + + +def similarity_calculator( + df: pd.DataFrame, subject: str, n: int, comparison_func +) -> pd.DataFrame: + """Find best matches to a subject name in a pandas dataframe + + For a given individual or organization, the subject, we search through the + 'name'column of a dataframe, select the n highest matches according to a + selected comparison function, and return those as a dataframe. This is meant + to be used manually to search for matches. For quick automated processing, see + automated_classifier(). + + Note that the comparison function must take in two inputs, both strings, and + output a percentage match + """ + + similarities_df = df.copy() + + similarities = similarities_df["name"].apply( + lambda x: comparison_func(x, subject) + ) + + similarities_df["similarities"] = similarities + + top_n_matches = similarities_df.sort_values( + by=["similarities"], ascending=False + )[0:n] + + return top_n_matches + + +def automated_classifier( + df: pd.DataFrame, subjects_dict: dict, threshold: float, comparison_func +): + """Using similarity_calculator, classify entities automatically + + Feeding a dictionary of names and the associated statuses, we compare + the string matches and, if they exceed a certain threshold, classify + them as belonging to some group specified in the subjects dictionary. + """ + + similarities_df = df.copy() + + for subject in subjects_dict: + similarities = similarities_df["name"].apply( + lambda x, sub=subject: comparison_func(x, sub) + ) + matches = similarities >= threshold + + status = subjects_dict[subject] + + similarities_df["classification"] = pd.Series(matches).apply( + lambda x, stat=status: stat if x else "neutral" + ) + + return similarities_df + + # we can use the indices and/or select manually, just add a new + # column to the subjects table + # that marks fossil fuels, green energy, or neither diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py new file mode 100644 index 00000000..3695a399 --- /dev/null +++ b/utils/tests/test_linkage.py @@ -0,0 +1,107 @@ +import numpy as np +import pandas as pd +import pytest + +from utils.linkage import ( + calculate_row_similarity, + calculate_string_similarity, + row_matches, +) + +# import pytest + + +# creating a test for calculate_row_similarity and row_matches + +# to put in data: +d = { + "name": ["bob von rosevich", "anantarya smith", "bob j vonrosevich"], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + ], +} +test_df = pd.DataFrame(data=d) + + +@pytest.fixture +def row_similarity_scen_1(): + return test_df + + +@pytest.fixture +def row_similarity_scen_2(): + return test_df + + +def test_row_similarity_scen_1(row_similarity_scen_1): + wrong = calculate_row_similarity( + row_similarity_scen_1.iloc[[0]], + row_similarity_scen_1.iloc[[1]], + np.array([0.8, 0.2]), + calculate_string_similarity, + ) + right = calculate_row_similarity( + row_similarity_scen_1.iloc[[0]], + row_similarity_scen_1.iloc[[2]], + np.array([0.8, 0.2]), + calculate_string_similarity, + ) + + assert right > wrong + + +def test_row_similarity_scen_2(row_similarity_scen_2): + wrong = calculate_row_similarity( + row_similarity_scen_2.iloc[[0]], + row_similarity_scen_2.iloc[[1]], + np.array([0.2, 0.8]), + calculate_string_similarity, + ) + right = calculate_row_similarity( + row_similarity_scen_2.iloc[[0]], + row_similarity_scen_2.iloc[[2]], + np.array([0.2, 0.8]), + calculate_string_similarity, + ) + + assert right < wrong + + +d2 = { + "name": [ + "bob von rosevich", + "anantarya smith", + "bob j vonrosevich", + "missy elliot", + "mr johnson", + "quarantin directino", + "missy eliot", + "joseph johnson", + ], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + "42 Hollywood Boulevard, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + ], +} +test_df2 = pd.DataFrame(data=d2) + + +@pytest.fixture +def row_match_scen1(): + return test_df2 + + +def test_row_matches(row_match_scen1): + res = row_matches( + row_match_scen1, np.array([0.8, 0.2]), 0.9, calculate_string_similarity + ) + + assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} From 485fe43aabee2a29ce9048a89760c1c7c4424bde Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 27 Feb 2024 22:28:15 -0600 Subject: [PATCH 147/214] basics of makefile and added classify fns --- utils/Makefile | 0 utils/classify.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 utils/Makefile diff --git a/utils/Makefile b/utils/Makefile new file mode 100644 index 00000000..e69de29b diff --git a/utils/classify.py b/utils/classify.py index db574ace..465351ea 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -9,6 +9,63 @@ # we might also, once we have all the data, buckle down and just classify # some of them manually + + + +#several kinds of lists, applied differently, actually they should be dictionaries + +#corp name lists +#employer name lists +#etc + + +#individuals compnay f names +f_companies = ["exxon", "chevron", "southwest gas", "petroleum", "koch industries", "koch companies", "oil & gas", "marathon oil", "shell oil", ] + +#organizations f names +f_org_names = ["koch industries", "koch pac", "kochpac", "southwest gas az", "pinnacle west", "americans for prosperity", "energy transfer"] + +#organizations c names +c_org_names = ["clean energy", "vote solar action", "renewable", "pattern energy", "beyond carbon", "lcv victory", "league of conservation"] + + + +def matcher(df, substring, column, category): + """ + """ + + bool_series = df[column].str.contains(substring, na=False) + + df.loc[bool_series, 'classification'] = category + + return df + + +def classification_pipeline(individuals_df, organizations_df): + """ + + """ + + individuals_df["classification"] = "neutral" + + organizations_df["classification"] = "neutral" + + for i in f_companies: + individuals_df = matcher(individuals_df, i, "company", "f") + + for i in f_org_names: + organizations_df = matcher(organizations_df, i, "name", "f") + + for i in c_org_names: + organizations_df = matcher(organizations_df, i, "name", "c") + + + return individuals_df, organizations_df + + + + + inds_list = [] # a list of individual names From e4b3a0afcef481f995e98dfcf7da7a11ca7b28b8 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 27 Feb 2024 22:37:39 -0600 Subject: [PATCH 148/214] linter fixes --- utils/Makefile | 18 +++++++++++ utils/classify.py | 78 ++++++++++++++++++++++++++++------------------- 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/utils/Makefile b/utils/Makefile index e69de29b..4723bef1 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -0,0 +1,18 @@ +all: all_individuals.csv, all_organizations.csv, all_transactions.csv + +#set this up as a pipeline, check how far along Adil is on this +output deduplicated_individuals: all_individuals.csv + python deduplication_pipeline + +#likewise here +output deduplicated_organizations: all_organizations.csv + python deduplication_pipeline + +output classified_individuals: all_individuals.csv + python classifier_pipeline + +output classified_organizations: all_organizations.csv + python classifier_pipeline + +output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv + python network_graph_pipeline \ No newline at end of file diff --git a/utils/classify.py b/utils/classify.py index 465351ea..493773ec 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -10,41 +10,61 @@ # some of them manually +# several kinds of lists, applied differently, actually they should be dictionaries + +# corp name lists +# employer name lists +# etc + + +# individuals compnay f names +f_companies = [ + "exxon", + "chevron", + "southwest gas", + "petroleum", + "koch industries", + "koch companies", + "oil & gas", + "marathon oil", + "shell oil", +] + +# organizations f names +f_org_names = [ + "koch industries", + "koch pac", + "kochpac", + "southwest gas az", + "pinnacle west", + "americans for prosperity", + "energy transfer", +] + +# organizations c names +c_org_names = [ + "clean energy", + "vote solar action", + "renewable", + "pattern energy", + "beyond carbon", + "lcv victory", + "league of conservation", +] -#several kinds of lists, applied differently, actually they should be dictionaries - -#corp name lists -#employer name lists -#etc - - -#individuals compnay f names -f_companies = ["exxon", "chevron", "southwest gas", "petroleum", "koch industries", "koch companies", "oil & gas", "marathon oil", "shell oil", ] - -#organizations f names -f_org_names = ["koch industries", "koch pac", "kochpac", "southwest gas az", "pinnacle west", "americans for prosperity", "energy transfer"] - -#organizations c names -c_org_names = ["clean energy", "vote solar action", "renewable", "pattern energy", "beyond carbon", "lcv victory", "league of conservation"] +def matcher(df, substring, column, category): + """ """ + bool_series = df[column].str.contains(substring, na=False) + df.loc[bool_series, "classification"] = category -def matcher(df, substring, column, category): - """ - """ - - bool_series = df[column].str.contains(substring, na=False) - - df.loc[bool_series, 'classification'] = category - return df def classification_pipeline(individuals_df, organizations_df): - """ - - """ + """ """ individuals_df["classification"] = "neutral" @@ -55,17 +75,13 @@ def classification_pipeline(individuals_df, organizations_df): for i in f_org_names: organizations_df = matcher(organizations_df, i, "name", "f") - + for i in c_org_names: organizations_df = matcher(organizations_df, i, "name", "c") - return individuals_df, organizations_df - - - inds_list = [] # a list of individual names From 3ce0c50a52789547072d728397fded967496d27c Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 27 Feb 2024 23:15:54 -0600 Subject: [PATCH 149/214] modifying classify to fit makefile --- utils/classify.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 493773ec..a0437e8d 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -63,23 +63,32 @@ def matcher(df, substring, column, category): return df -def classification_pipeline(individuals_df, organizations_df): - """ """ +def classify_individuals(individuals_df): + """ + + """ individuals_df["classification"] = "neutral" - organizations_df["classification"] = "neutral" - for i in f_companies: individuals_df = matcher(individuals_df, i, "company", "f") + return individuals_df + + + +def classify_orgs(organizations_df): + """ """ + + organizations_df["classification"] = "neutral" + for i in f_org_names: organizations_df = matcher(organizations_df, i, "name", "f") for i in c_org_names: organizations_df = matcher(organizations_df, i, "name", "c") - return individuals_df, organizations_df + return organizations_df inds_list = [] From 517f909e62c8e76fc206b392c152784273772e68 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 27 Feb 2024 23:16:28 -0600 Subject: [PATCH 150/214] linter fixes --- utils/classify.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index a0437e8d..fe26acf0 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -64,9 +64,7 @@ def matcher(df, substring, column, category): def classify_individuals(individuals_df): - """ - - """ + """ """ individuals_df["classification"] = "neutral" @@ -76,7 +74,6 @@ def classify_individuals(individuals_df): return individuals_df - def classify_orgs(organizations_df): """ """ From 244fe9414727613a3e8e25a90322824455308a06 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Tue, 27 Feb 2024 23:22:26 -0600 Subject: [PATCH 151/214] make should run classification properly --- utils/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/utils/Makefile b/utils/Makefile index 4723bef1..f0cda2ea 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -8,11 +8,13 @@ output deduplicated_individuals: all_individuals.csv output deduplicated_organizations: all_organizations.csv python deduplication_pipeline + output classified_individuals: all_individuals.csv - python classifier_pipeline + python classify.py classify_individuals output classified_organizations: all_organizations.csv - python classifier_pipeline + python classify.py classify_orgs +#just sketched out output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv python network_graph_pipeline \ No newline at end of file From c273a178f6d67aec7766dc2d0c76f67181667e22 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 28 Feb 2024 09:15:15 -0600 Subject: [PATCH 152/214] moved names to constants --- utils/classify.py | 46 ++-------------------------------------------- utils/constants.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 44 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index fe26acf0..2002472d 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -1,5 +1,7 @@ import pandas as pd +from utils.constants import f_companies, f_org_names, c_org_names + # we want to run down a list of people and, hopefully, their adresses, plus a list of # corporations, groups, etc, and classify them, basically just looking for matches @@ -9,50 +11,6 @@ # we might also, once we have all the data, buckle down and just classify # some of them manually - -# several kinds of lists, applied differently, actually they should be dictionaries - -# corp name lists -# employer name lists -# etc - - -# individuals compnay f names -f_companies = [ - "exxon", - "chevron", - "southwest gas", - "petroleum", - "koch industries", - "koch companies", - "oil & gas", - "marathon oil", - "shell oil", -] - -# organizations f names -f_org_names = [ - "koch industries", - "koch pac", - "kochpac", - "southwest gas az", - "pinnacle west", - "americans for prosperity", - "energy transfer", -] - -# organizations c names -c_org_names = [ - "clean energy", - "vote solar action", - "renewable", - "pattern energy", - "beyond carbon", - "lcv victory", - "league of conservation", -] - - def matcher(df, substring, column, category): """ """ diff --git a/utils/constants.py b/utils/constants.py index f259db36..0384cba4 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -641,3 +641,38 @@ "CIC": "COMMUNITY INTEREST COMPANY", "PAC": "POLITICAL ACTION COMMITTEE", } + +# individuals compnay f names +f_companies = [ + "exxon", + "chevron", + "southwest gas", + "petroleum", + "koch industries", + "koch companies", + "oil & gas", + "marathon oil", + "shell oil", +] + +# organizations f names +f_org_names = [ + "koch industries", + "koch pac", + "kochpac", + "southwest gas az", + "pinnacle west", + "americans for prosperity", + "energy transfer", +] + +# organizations c names +c_org_names = [ + "clean energy", + "vote solar action", + "renewable", + "pattern energy", + "beyond carbon", + "lcv victory", + "league of conservation", +] \ No newline at end of file From 9519d67e03fab79999ac4313f2263af95de8a009 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 28 Feb 2024 09:15:44 -0600 Subject: [PATCH 153/214] linter fixes --- utils/classify.py | 3 ++- utils/constants.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 2002472d..5a729695 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -1,6 +1,6 @@ import pandas as pd -from utils.constants import f_companies, f_org_names, c_org_names +from utils.constants import c_org_names, f_companies, f_org_names # we want to run down a list of people and, hopefully, their adresses, plus a list of # corporations, groups, etc, and classify them, basically just looking for matches @@ -11,6 +11,7 @@ # we might also, once we have all the data, buckle down and just classify # some of them manually + def matcher(df, substring, column, category): """ """ diff --git a/utils/constants.py b/utils/constants.py index 0384cba4..c3bbcbc1 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -675,4 +675,4 @@ "beyond carbon", "lcv victory", "league of conservation", -] \ No newline at end of file +] From 0611585333a2eed4c7b8ed6e2431e6bec44752b8 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 28 Feb 2024 09:52:27 -0600 Subject: [PATCH 154/214] added classification wrapper --- utils/classify.py | 50 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 5a729695..33c0c1bb 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -2,18 +2,38 @@ from utils.constants import c_org_names, f_companies, f_org_names -# we want to run down a list of people and, hopefully, their adresses, plus a list of -# corporations, groups, etc, and classify them, basically just looking for matches -# do we want to just input all the names/people (there's not many, less than 200 -# for sure),give a string similarity match score, and extract the top ten for -# manual review? this should give us a feeling for how to set our threshold -# we might also, once we have all the data, buckle down and just classify -# some of them manually +def classify_wrapper(individuals_df, organizations_df): + """Wrapper for classificaiton in linkage pipeline + + Initialize the classify column in both dataframes and + call sub-functions classifying individuals and organizations + + Args: individuals_df: cleaned and deduplicated dataframe of individuals + organizations_df: cleaned and deduplicated dataframe of organizations + + Returns: individuals and organizations datfarames with a new + 'classification' column containing 'neutral', 'f', or 'c' + """ + + individuals_df["classification"] = "neutral" + organizations_df["classification"] = "neutral" + + classified_individuals = classify_individuals(individuals_df) + classified_orgs = classify_orgs(organizations_df) + + return classified_individuals, classified_orgs def matcher(df, substring, column, category): - """ """ + """ Applies a label to the classification column based on substrings + + We run through a given column containing strings in the dataframe. We + seek out rows containing substrings, and apply a certain label to + the classification column. We initialize using the 'neutral' label and + use the 'f' and 'c' labels to denote fossil fuel and clean energy + entities respectively. + """ bool_series = df[column].str.contains(substring, na=False) @@ -23,9 +43,12 @@ def matcher(df, substring, column, category): def classify_individuals(individuals_df): - """ """ + """ Part of the classification pipeline - individuals_df["classification"] = "neutral" + We apply the matcher function to the individuals dataframe + repeatedly, using a variety of substrings to identify the + employees of fossil fuel companies. + """ for i in f_companies: individuals_df = matcher(individuals_df, i, "company", "f") @@ -34,9 +57,12 @@ def classify_individuals(individuals_df): def classify_orgs(organizations_df): - """ """ + """ Part of the classification pipeline - organizations_df["classification"] = "neutral" + We apply the matcher function to the organizations dataframe + repeatedly, using a variety of substrings to identify fossil + fuel and clean energy companies. + """ for i in f_org_names: organizations_df = matcher(organizations_df, i, "name", "f") From f8c4dc1590efe975e7311983dca8e7cb2b8ee713 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 28 Feb 2024 09:52:58 -0600 Subject: [PATCH 155/214] linter fix --- utils/classify.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 33c0c1bb..3c24f941 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -6,14 +6,14 @@ def classify_wrapper(individuals_df, organizations_df): """Wrapper for classificaiton in linkage pipeline - Initialize the classify column in both dataframes and + Initialize the classify column in both dataframes and call sub-functions classifying individuals and organizations Args: individuals_df: cleaned and deduplicated dataframe of individuals organizations_df: cleaned and deduplicated dataframe of organizations - Returns: individuals and organizations datfarames with a new - 'classification' column containing 'neutral', 'f', or 'c' + Returns: individuals and organizations datfarames with a new + 'classification' column containing 'neutral', 'f', or 'c' """ individuals_df["classification"] = "neutral" @@ -26,13 +26,13 @@ def classify_wrapper(individuals_df, organizations_df): def matcher(df, substring, column, category): - """ Applies a label to the classification column based on substrings + """Applies a label to the classification column based on substrings We run through a given column containing strings in the dataframe. We seek out rows containing substrings, and apply a certain label to - the classification column. We initialize using the 'neutral' label and - use the 'f' and 'c' labels to denote fossil fuel and clean energy - entities respectively. + the classification column. We initialize using the 'neutral' label and + use the 'f' and 'c' labels to denote fossil fuel and clean energy + entities respectively. """ bool_series = df[column].str.contains(substring, na=False) @@ -43,11 +43,11 @@ def matcher(df, substring, column, category): def classify_individuals(individuals_df): - """ Part of the classification pipeline + """Part of the classification pipeline We apply the matcher function to the individuals dataframe - repeatedly, using a variety of substrings to identify the - employees of fossil fuel companies. + repeatedly, using a variety of substrings to identify the + employees of fossil fuel companies. """ for i in f_companies: @@ -57,11 +57,11 @@ def classify_individuals(individuals_df): def classify_orgs(organizations_df): - """ Part of the classification pipeline + """Part of the classification pipeline We apply the matcher function to the organizations dataframe - repeatedly, using a variety of substrings to identify fossil - fuel and clean energy companies. + repeatedly, using a variety of substrings to identify fossil + fuel and clean energy companies. """ for i in f_org_names: From 3c619375b2a8a7a68a72fea4a97a2fee30360043 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 28 Feb 2024 15:54:25 +0000 Subject: [PATCH 156/214] proper updates --- utils/linkage_pipeline.py | 146 +++++++++++++++++++++----------------- 1 file changed, 82 insertions(+), 64 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 613d3244..1f565446 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -1,5 +1,3 @@ -from typing import Tuple - import pandas as pd from nameparser import HumanName @@ -15,45 +13,29 @@ ) -def preprocess_pipeline( - individuals: pd.DataFrame, - organizations: pd.DataFrame, - transactions: pd.DataFrame, -) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: +def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: """ - Preprocesses data for record linkage + Given a dataframe of individual donors, preprocesses the data, + and return a cleaned dataframe. Args: - Individuals: dataframe of individual contributions - Organizations: dataframe of organization contributions - Transactions: dataframe of transactions + individuals: dataframe of individual contributions + Returns: - preprocessed tuple of dataframes - first element is the individuals dataframe, - second element is the organizations dataframe, - third element is the transactions dataframe + cleaned dataframe of individuals """ - # Preprocess organizations dataframe - organizations["name"] = ( - organizations["name"].astype(str).apply(standardize_corp_names) - ) - if "Unnamed: 0" in organizations.columns: - organizations.drop(columns="Unnamed: 0", inplace=True) - - # Preprocess individuals dataframe if "Unnamed: 0" in individuals.columns: individuals.drop(columns="Unnamed: 0", inplace=True) individuals = individuals.astype( { - "first_name": str, - "last_name": str, - "full_name": str, + "first_name": "string", + "last_name": "string", + "full_name": "string", "company": "string", } ) - # Standardize company names in individuals dataframe individuals["company"] = ( individuals["company"] .loc[individuals["company"].notnull()] @@ -66,7 +48,6 @@ def preprocess_pipeline( ) # Address functions, assuming address column is named 'Address' - # If there is an "Address" column in the first place if "Address" in individuals.columns: individuals["Address"] = individuals["Address"].astype(str) individuals["Address Line 1"] = individuals["Address"].apply( @@ -84,20 +65,12 @@ def preprocess_pipeline( individuals["full_name"].notnull() ] if individuals["first_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) first_name = name.apply(lambda x: x["first"]) individuals["first_name"] = first_name if individuals["last_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) last_name = name.apply(lambda x: x["last"]) individuals["last_name"] = last_name @@ -108,44 +81,89 @@ def preprocess_pipeline( axis=1, ) + return individuals + + +def preprocess_organizations(organizations: pd.DataFrame) -> pd.DataFrame: + """ + Given a dataframe of organization donors, preprocesses the data, + and return a cleaned dataframe. + """ + if "Unnamed: 0" in organizations.columns: + organizations.drop(columns="Unnamed: 0", inplace=True) + + organizations["name"] = ( + organizations["name"] + .loc[organizations["name"].notnull()] + .apply(standardize_corp_names) + ) + + return organizations + + +def preprocess_transactions(transactions: pd.DataFrame) -> pd.DataFrame: + """ + Given a dataframe of transactions, preprocesses the data, + and return a cleaned dataframe. + + Args: + transactions: dataframe of transactions + + Returns: + cleaned dataframe of transactions + """ if "Unnamed: 0" in transactions.columns: transactions.drop(columns="Unnamed: 0", inplace=True) transactions["purpose"] = transactions["purpose"].str.upper() - return individuals, organizations, transactions + return transactions -organizations = pd.read_csv( - BASE_FILEPATH / "output" / "complete_organizations_table.csv" -) +def main(): + organizations = pd.read_csv( + BASE_FILEPATH / "output" / "complete_organizations_table.csv" + ) -individuals = pd.read_csv( - BASE_FILEPATH / "output" / "complete_individuals_table.csv" -) + individuals = pd.read_csv( + BASE_FILEPATH / "output" / "complete_individuals_table.csv" + ) -transactions = pd.read_csv( - BASE_FILEPATH / "output" / "complete_transactions_table.csv" -) + transactions = pd.read_csv( + BASE_FILEPATH / "output" / "complete_transactions_table.csv" + ) -individuals, organizations, transactions = preprocess_pipeline( - individuals, organizations, transactions -) + individuals = preprocess_individuals(individuals) + organizations = preprocess_organizations(organizations) + transactions = preprocess_transactions(transactions) -individuals = deduplicate_perfect_matches(individuals) + # Deduplicates perfect matches and creates a new csv file + # in output titled "deduplicated_UUIDs.csv" + individuals = deduplicate_perfect_matches(individuals) + organizations = deduplicate_perfect_matches(organizations) -processed_individuals_output_path = ( - BASE_FILEPATH / "output" / "processed_individuals_table.csv" -) + cleaned_individuals_output_path = ( + BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" + ) -processed_organizations_output_path = ( - BASE_FILEPATH / "output" / "processed_organizations_table.csv" -) + cleaned_organizations_output_path = ( + BASE_FILEPATH / "output" / "cleaned_organizations_table.csv" + ) + + cleaned_transactions_output_path = ( + BASE_FILEPATH / "output" / "cleaned_transactions_table.csv" + ) + + deduped = pd.read_csv(BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv") + + transactions[["donor_id", "recipient_id"]] = transactions[ + ["donor_id", "recipient_id"] + ].replace(deduped) + + individuals.to_csv(cleaned_individuals_output_path) + organizations.to_csv(cleaned_organizations_output_path) + transactions.to_csv(cleaned_transactions_output_path) -processed_transactions_output_path = ( - BASE_FILEPATH / "output" / "processed_transactions_table.csv" -) -individuals.to_csv(processed_individuals_output_path) -organizations.to_csv(processed_organizations_output_path) -transactions.to_csv(processed_transactions_output_path) +if __name__ == "__main__": + main() From 4e32543c82bec739f90cbf55a5749464d2a5851f Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 28 Feb 2024 16:03:19 +0000 Subject: [PATCH 157/214] removing duplicated function --- utils/linkage.py | 60 ------------------------------------------------ 1 file changed, 60 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 29319907..cae5024d 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,5 +1,3 @@ -import re - import textdistance as td import usaddress from names_dataset import NameDataset @@ -635,61 +633,3 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] raise ValueError("Can not find Address Number") - - -def cleaning_company_column(company_entry: str) -> str: - """ - Given a string, check if it contains a variation of self employed, unemployed, - or retired and return the standardized version. - - Args: - company: string of inputted company names - Returns: - standardized for retired, self employed, and unemployed, - or original string if no match or empty string - - >>> cleaning_company_column("Retireed") - 'Retired' - >>> cleaning_company_column("self") - 'Self Employed' - >>> cleaning_company_column("None") - 'Unemployed' - >>> cleaning_company_column("N/A") - 'Unemployed' - """ - - if not company_entry: - return company_entry - - company_edited = company_entry.lower() - - if company_edited == "n/a": - return "Unemployed" - - company_edited = re.sub(r"[^\w\s]", "", company_edited) - - if ( - company_edited == "retired" - or company_edited == "retiree" - or company_edited == "retire" - or "retiree" in company_edited - ): - return "Retired" - - elif ( - "self employe" in company_edited - or "freelance" in company_edited - or company_edited == "self" - or company_edited == "independent contractor" - ): - return "Self Employed" - elif ( - "unemploye" in company_edited - or company_edited == "none" - or company_edited == "not employed" - or company_edited == "nan" - ): - return "Unemployed" - - else: - return company_edited From d94243af71ebbcf97ef7ba50d1cc06f5e15a5ce4 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 28 Feb 2024 16:14:33 +0000 Subject: [PATCH 158/214] attempting to pass dev checks --- utils/linkage.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index cae5024d..a6fcbdab 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -340,9 +340,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: names[i] = determine_comma_role(names[i]) names[i] = names[i].replace(".", "").split(" ") - names[i] = [ - name_part for name_part in names[i] if name_part not in titles - ] + names[i] = [name_part for name_part in names[i] if name_part not in titles] names[i] = " ".join(names[i]) # one last check to remove any pieces that might add extra whitespace @@ -431,9 +429,7 @@ def name_rank(first_name: str, last_name: str) -> list: if first_name_result and isinstance(first_name_result, dict): first_name_data = first_name_result.get("first_name") if first_name_data and "rank" in first_name_data: - first_name_rank = first_name_data["rank"].get( - "United States", 0 - ) + first_name_rank = first_name_data["rank"].get("United States", 0) else: first_name_rank = None if isinstance(last_name, str): From 4336d3b752e7e558cafc87ef2af273f5cb014997 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 28 Feb 2024 10:16:27 -0600 Subject: [PATCH 159/214] modified readme --- utils/README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/utils/README.md b/utils/README.md index 3b9172ad..ee0cd84d 100644 --- a/utils/README.md +++ b/utils/README.md @@ -70,4 +70,12 @@ Util functions for MN EDA classify the donor entities in the expenditures. 3. The Contributors datasets have 4 kinds of recipient entities: lobbyists, candidates, committees, and nan. In order to fit the entries within the - schema, I code nan entries as 'Organization' \ No newline at end of file + schema, I code nan entries as 'Organization' + +#### classify.py +1. These functions take in the deduplicated and cleaned individuals and organizations +dataframes from the deduplication and linkage pipeline. +2. We classify based on substrings known to indicate clean energy or fossil fuels groups. +In particular, individuals are classified based on their employment by fossil fuels companies, +and organizations are classified by their names, prioritizing high profile corporations/PACs +and those which were found by a manual search of the largest donors/recipients in the dataset \ No newline at end of file From df41e42d4134a50305f139ca0e7b53d181f31810 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 28 Feb 2024 16:21:24 +0000 Subject: [PATCH 160/214] reformatting files --- utils/linkage.py | 8 ++++++-- utils/linkage_pipeline.py | 12 ++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index a6fcbdab..cae5024d 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -340,7 +340,9 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: names[i] = determine_comma_role(names[i]) names[i] = names[i].replace(".", "").split(" ") - names[i] = [name_part for name_part in names[i] if name_part not in titles] + names[i] = [ + name_part for name_part in names[i] if name_part not in titles + ] names[i] = " ".join(names[i]) # one last check to remove any pieces that might add extra whitespace @@ -429,7 +431,9 @@ def name_rank(first_name: str, last_name: str) -> list: if first_name_result and isinstance(first_name_result, dict): first_name_data = first_name_result.get("first_name") if first_name_data and "rank" in first_name_data: - first_name_rank = first_name_data["rank"].get("United States", 0) + first_name_rank = first_name_data["rank"].get( + "United States", 0 + ) else: first_name_rank = None if isinstance(last_name, str): diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 1f565446..779469b5 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -65,12 +65,20 @@ def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: individuals["full_name"].notnull() ] if individuals["first_name"].isnull().any(): - name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) first_name = name.apply(lambda x: x["first"]) individuals["first_name"] = first_name if individuals["last_name"].isnull().any(): - name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) + name = ( + individuals["full_name"] + .apply(HumanName) + .apply(lambda x: x.as_dict()) + ) last_name = name.apply(lambda x: x["last"]) individuals["last_name"] = last_name From c687295eee978d6488157235590dc0ea116542ad Mon Sep 17 00:00:00 2001 From: Avery Date: Wed, 28 Feb 2024 10:26:47 -0600 Subject: [PATCH 161/214] add usage instructions --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 498c0999..879a41e0 100644 --- a/README.md +++ b/README.md @@ -34,15 +34,20 @@ If you prefer to develop inside a container with VS Code then do the following s 3. Click the blue or green rectangle in the bottom left of VS code (should say something like `><` or `>< WSL`). Options should appear in the top center of your screen. Select `Reopen in Container`. -### Project Pipeline +### Data Collection and Standardization Pipeline 1. Collect the data through **one** of the steps below a. Collect state's finance campaign data either from web scraping (AZ, MI, PA) or direct download (MN) OR b. Go to the [Project's Google Drive]('https://drive.google.com/drive/u/2/folders/1HUbOU0KRZy85mep2SHMU48qUQ1ZOSNce') to download each state's data to their local repo following this format: repo_root / "data" / "raw" / state acronym / "file" 2. Open in development container which installs all necessary packages. 3. Run the project by running ```python utils/pipeline.py``` or ```python3 utils/pipeline.py``` run the processing pipeline that cleans, standardizes, and creates the individuals, organizations, and transactions concatenated into one comprehensive database. -5. running ```pipeline.py``` returns the tables to the output folder as csv files containing the complete individuals, organizations, and transactions DataFrames combining the AZ, MI, MN, and PA datasets. +5. Running ```pipeline.py``` returns the tables to the output folder as csv files containing the complete individuals, organizations, and transactions DataFrames combining the AZ, MI, MN, and PA datasets. 6. For future reference, the above pipeline also stores the information mapping given id to our database id (generated via uuid) in a csv file in the format of (state)IDMap.csv (example: ArizonaIDMap.csv) in the output folder +### Record Linkage and Network Pipeline +1. Save the standardized tables "complete_individuals_table.csv", "complete_organizations_table.csv", and "complete_transactions_table.csv" (collected from the above pipeline or data from the project's Google Drive) in the following format: repo_root / "output" / "file" +2. **UPDATE:** Run the pipeline by calling ```make run-linkage-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, and an interactive network visual +3. The pipeline will output the deduplicated tables saved as "cleaned_individuals_table.csv", "cleaned_organizations_table.csv", and "cleaned_transactions_table.csv". A mapping file, "deduplicated_UUIDs" tracks the UUIDs designated as duplicates. + ## Repository Structure ### utils From f363bbebdfa2a4948d6a273ceda0fef017fedb99 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Wed, 28 Feb 2024 13:25:52 -0600 Subject: [PATCH 162/214] splink changes + deleted notebook --- notebooks/splink.ipynb | 659 ----------------------------------------- utils/linkage.py | 24 +- 2 files changed, 7 insertions(+), 676 deletions(-) delete mode 100644 notebooks/splink.ipynb diff --git a/notebooks/splink.ipynb b/notebooks/splink.ipynb deleted file mode 100644 index c37813d0..00000000 --- a/notebooks/splink.ipynb +++ /dev/null @@ -1,659 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "1a863d3e-59b4-46c3-ad0f-7d192a61ebe2", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.7.3' currently installed).\n", - " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", - "/Users/naynapashilkar/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.2' currently installed).\n", - " from pandas.core import (\n", - "/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_4396/3624639948.py:16: DtypeWarning: Columns (8,9,10,11,12) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df_i = pd.read_csv('complete_individuals_table.csv')\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# Define sample data\n", - "# i_data = {\n", - "# 'unique_id': range(1, 13),\n", - "# 'first_name': ['John', 'Jane', 'David', 'Emily', 'Michael', 'Sarah', 'John', 'Jane', 'David', 'Emily', 'John', 'John'],\n", - "# 'last_name': ['Doe', 'Smith', 'Johnson', 'Brown', 'Davis', 'Miller', 'Doe', 'Smith', 'Johnson', 'Brown', 'Miller', 'Jones'],\n", - "# 'full_name': ['John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'Michael Davis', 'Sarah Miller', 'John Doe', 'Jane Smith', 'David Johnson', 'Emily Brown', 'John Miller', 'John Jones'],\n", - "# 'entity_type': ['Person'] * 12,\n", - "# 'state': ['CA', 'NY', 'TX', 'FL', 'CA', 'NY', 'CA', 'TX', 'FL', 'NY', 'CA', 'FL'],\n", - "# 'party': ['Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent', 'Democrat', 'Republican', 'Independent'],\n", - "# 'company': ['Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Apple', 'Google', 'Microsoft', 'Amazon', 'Facebook', 'Google', 'Microsoft']\n", - "# }\n", - "\n", - "# Create DataFrame\n", - "df_i = pd.read_csv('complete_individuals_table.csv')\n", - "df_i.rename(columns={'id': 'unique_id'}, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "61447af5-7270-4438-baf7-29ba08203019", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0unique_idfirst_namelast_namefull_nameentity_typestatepartycompanyoccupationaddresszipcity
00efa41f9a-a31c-4154-acfa-3707d2c7cc47FREDERICKBERGFREDERICK BERG ...IndividualMINaNBUTZEL LONGATTORNEY1033 YORKSHIRE48230-0000GROSSE POINTE PARK
113e7661e4-1557-4fc8-9cd7-e4381eaed2d8JENNIFERCONSIGLIOJENNIFER CONSIGLIO ...IndividualMINaNBUTZEL LONGATTORNEY7520 SHUMAN DRIVE48438-0000GOODRICH
22a3bbe060-5da0-4b1d-a61c-0a32ec18c2edVANESSACROCETTOVANESSA CROCETTO ...IndividualMINaNBUTZEL LONGCHIEF MARKETING OFFICER4104 ARLINGTON DRIVE48073-0000ROYAL OAK
33667ae9be-5aff-4ce2-a032-5947032c1a9aCAREY A.DEWITTCAREY A. DEWITT ...IndividualMINaNBUTZEL LONGATTORNEY770 HUPP CROSS ROAD48301-0000BLOOMFIELD TWP
44f02b191a-235a-4d79-9e00-3fd63a249e66JENNIFERDUKARSKIJENNIFER DUKARSKI ...IndividualMINaNNaNNaN11855 BECK ROAD48170-0000PLYMOUTH
..........................................
2486933248693367a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486934248693467a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486935248693567a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486936248693667a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
2486937248693767a8b2e6-3ff2-4d9d-a049-545202df393eGRETCHENWHITMERGRETCHEN WHITMERCandidateMINaNNaNNaNNaNNaNNaN
\n", - "

2486938 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 unique_id \\\n", - "0 0 efa41f9a-a31c-4154-acfa-3707d2c7cc47 \n", - "1 1 3e7661e4-1557-4fc8-9cd7-e4381eaed2d8 \n", - "2 2 a3bbe060-5da0-4b1d-a61c-0a32ec18c2ed \n", - "3 3 667ae9be-5aff-4ce2-a032-5947032c1a9a \n", - "4 4 f02b191a-235a-4d79-9e00-3fd63a249e66 \n", - "... ... ... \n", - "2486933 2486933 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", - "2486934 2486934 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", - "2486935 2486935 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", - "2486936 2486936 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", - "2486937 2486937 67a8b2e6-3ff2-4d9d-a049-545202df393e \n", - "\n", - " first_name last_name \\\n", - "0 FREDERICK BERG \n", - "1 JENNIFER CONSIGLIO \n", - "2 VANESSA CROCETTO \n", - "3 CAREY A. DEWITT \n", - "4 JENNIFER DUKARSKI \n", - "... ... ... \n", - "2486933 GRETCHEN WHITMER \n", - "2486934 GRETCHEN WHITMER \n", - "2486935 GRETCHEN WHITMER \n", - "2486936 GRETCHEN WHITMER \n", - "2486937 GRETCHEN WHITMER \n", - "\n", - " full_name entity_type state \\\n", - "0 FREDERICK BERG ... Individual MI \n", - "1 JENNIFER CONSIGLIO ... Individual MI \n", - "2 VANESSA CROCETTO ... Individual MI \n", - "3 CAREY A. DEWITT ... Individual MI \n", - "4 JENNIFER DUKARSKI ... Individual MI \n", - "... ... ... ... \n", - "2486933 GRETCHEN WHITMER Candidate MI \n", - "2486934 GRETCHEN WHITMER Candidate MI \n", - "2486935 GRETCHEN WHITMER Candidate MI \n", - "2486936 GRETCHEN WHITMER Candidate MI \n", - "2486937 GRETCHEN WHITMER Candidate MI \n", - "\n", - " party company occupation address \\\n", - "0 NaN BUTZEL LONG ATTORNEY 1033 YORKSHIRE \n", - "1 NaN BUTZEL LONG ATTORNEY 7520 SHUMAN DRIVE \n", - "2 NaN BUTZEL LONG CHIEF MARKETING OFFICER 4104 ARLINGTON DRIVE \n", - "3 NaN BUTZEL LONG ATTORNEY 770 HUPP CROSS ROAD \n", - "4 NaN NaN NaN 11855 BECK ROAD \n", - "... ... ... ... ... \n", - "2486933 NaN NaN NaN NaN \n", - "2486934 NaN NaN NaN NaN \n", - "2486935 NaN NaN NaN NaN \n", - "2486936 NaN NaN NaN NaN \n", - "2486937 NaN NaN NaN NaN \n", - "\n", - " zip city \n", - "0 48230-0000 GROSSE POINTE PARK \n", - "1 48438-0000 GOODRICH \n", - "2 48073-0000 ROYAL OAK \n", - "3 48301-0000 BLOOMFIELD TWP \n", - "4 48170-0000 PLYMOUTH \n", - "... ... ... \n", - "2486933 NaN NaN \n", - "2486934 NaN NaN \n", - "2486935 NaN NaN \n", - "2486936 NaN NaN \n", - "2486937 NaN NaN \n", - "\n", - "[2486938 rows x 13 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_i" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "25334eac-e048-47e7-b911-571853e2a666", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RendererRegistry.enable('html')" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from splink.duckdb.linker import DuckDBLinker\n", - "import altair as alt\n", - "alt.renderers.enable('html')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a5915f73-77a6-42c9-a7df-a8f0d396836c", - "metadata": {}, - "outputs": [], - "source": [ - "import splink.duckdb.comparison_template_library as ctl\n", - "import splink.duckdb.comparison_library as cl\n", - "\n", - "individual_settings = {\n", - " \"link_type\": \"dedupe_only\",\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"l.full_name - r.full_name\",\n", - " \"l.first_name = r.first_name and l.last_name = r.last_name\"\n", - " ],\n", - " # \"comparisons\": [\n", - " # ctl.name_comparison(\"first_name\"), #built in comparison function\n", - " # ctl.name_comparison(\"last_name\"),\n", - " # ctl.name_comparison(\"full_name\"),\n", - " # ctl.forename_surname_comparison(\"first_name\", \"last_name\"), #built in comparison function\n", - " # cl.exact_match(\"entity_type\", term_frequency_adjustments=True),\n", - " # cl.jaro_winkler_at_thresholds(\"state\", [0.9, 0.8]), #threshold will catch typos and shortenings\n", - " # cl.jaro_winkler_at_thresholds(\"party\", [0.9, 0.8]),\n", - " # cl.jaro_winkler_at_thresholds(\"company\", [0.9, 0.8]),\n", - " # ],\n", - " \n", - " #DEFAULT\n", - " \"retain_matching_columns\": True,\n", - " \"retain_intermediate_calculation_columns\": True,\n", - " # \"max_iterations\": 10,\n", - " # \"em_convergence\": 0.01\n", - "}\n", - "\n", - "i_blocking = [\n", - " \"l.first_name = r.first_name and l.last_name = r.last_name\",\n", - " ]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "42ce12f9-1160-4e1e-848a-deb7882566a6", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3e162fc03ab041429b0c2b4143081ee6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "linker = DuckDBLinker(df_i, individual_settings)\n", - "linker.count_num_comparisons_from_blocking_rule(\"l.first_name = r.first_name and l.last_name = r.last_name\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "217a07cf-eaa3-42a2-b43b-b2eecd740a7b", - "metadata": {}, - "outputs": [], - "source": [ - "def splink_dedupe(df, settings, blocking):\n", - " linker = DuckDBLinker(df, settings)\n", - " linker.estimate_probability_two_random_records_match(blocking, recall=0.6) #default\n", - " linker.estimate_u_using_random_sampling(max_pairs=5e6)\n", - " \n", - " for i in blocking:\n", - " training_session_names = linker.estimate_parameters_using_expectation_maximisation(i)\n", - " \n", - " df_predict = linker.predict()\n", - " df_e = df_predict.as_pandas_dataframe()\n", - " clusters = linker.cluster_pairwise_predictions_at_threshold(df_predict, threshold_match_probability=0.7) #default\n", - " return clusters.as_pandas_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "307ae7b8-b637-4451-aad3-9c848e8dff65", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b530b6b922494872b2721019d974f23e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: Deterministic matching rules led to no observed matches! This means that no possible record pairs are matches, and no records are linked to one another.\n", - "If this is truly the case then you do not need to run the linkage model.\n", - "However this is usually in error; expected rules to have recall of 60%. Consider revising rules as they may have an error.\n", - "Probability two random records match is estimated to be 0.\n", - "This means that amongst all possible pairwise record comparisons, one in Infinity are expected to match. With 3,092,429,064,453 total possible comparisons, we expect a total of around 0.00 matching pairs\n", - "----- Estimating u probabilities using random sampling -----\n", - "\n", - "Estimated u probabilities using random sampling\n", - "\n", - "Your model is fully trained. All comparisons have at least one estimate for their m and u values\n", - "\n", - "----- Starting EM training session -----\n", - "\n", - "Estimating the m probabilities of the model by blocking on:\n", - "l.first_name = r.first_name and l.last_name = r.last_name\n", - "\n", - "Parameter estimates will be made for the following comparison(s):\n", - "\n", - "Parameter estimates cannot be made for the following comparison(s) since they are used in the blocking rules: \n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "31a41cbc2cde42819b137edfbd189831", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "EMTrainingException", - "evalue": "Training rule `l.first_name = r.first_name and l.last_name = r.last_name` resulted in no record pairs. This means that in the supplied data set there were no pairs of records for which `l.first_name = r.first_name and l.last_name = r.last_name` was `true`.\nExpectation maximisation requires a substantial number of record comparisons to produce accurate parameter estimates - usually at least a few hundred, but preferably at least a few thousand.\nYou must revise your training blocking rule so that the set of generated comparisons is not empty. You can use `linker.count_num_comparisons_from_blocking_rule()` to compute the number of comparisons that will be generated by a blocking rule.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mEMTrainingException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_4396/657554030.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msplink_dedupe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_i\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindividual_settings\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi_blocking\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/var/folders/nk/h__9839s2k1_48m_z2g76vn40000gn/T/ipykernel_4396/2410222890.py\u001b[0m in \u001b[0;36msplink_dedupe\u001b[0;34m(df, settings, blocking)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mblocking\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mtraining_session_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestimate_parameters_using_expectation_maximisation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdf_predict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlinker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/splink/linker.py\u001b[0m in \u001b[0;36mestimate_parameters_using_expectation_maximisation\u001b[0;34m(self, blocking_rule, comparisons_to_deactivate, comparison_levels_to_reverse_blocking_rule, estimate_without_term_frequencies, fix_probability_two_random_records_match, fix_m_probabilities, fix_u_probabilities, populate_probability_two_random_records_match_from_trained_values)\u001b[0m\n\u001b[1;32m 1704\u001b[0m )\n\u001b[1;32m 1705\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1706\u001b[0;31m \u001b[0mem_training_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1708\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_populate_m_u_from_trained_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/splink/em_training_session.py\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mcvv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_record_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlimit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mbr_sql\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"`{self._blocking_rule_for_training.blocking_rule_sql}`\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 181\u001b[0;31m raise EMTrainingException(\n\u001b[0m\u001b[1;32m 182\u001b[0m \u001b[0;34mf\"Training rule {br_sql} resulted in no record pairs. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;34m\"This means that in the supplied data set \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mEMTrainingException\u001b[0m: Training rule `l.first_name = r.first_name and l.last_name = r.last_name` resulted in no record pairs. This means that in the supplied data set there were no pairs of records for which `l.first_name = r.first_name and l.last_name = r.last_name` was `true`.\nExpectation maximisation requires a substantial number of record comparisons to produce accurate parameter estimates - usually at least a few hundred, but preferably at least a few thousand.\nYou must revise your training blocking rule so that the set of generated comparisons is not empty. You can use `linker.count_num_comparisons_from_blocking_rule()` to compute the number of comparisons that will be generated by a blocking rule." - ] - } - ], - "source": [ - "splink_dedupe(df_i, individual_settings, i_blocking)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "890ba4ed-8e55-4128-bd36-cd8413cad00e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "28948465-fea2-433d-bace-d0627dfe348d", - "metadata": {}, - "outputs": [], - "source": [ - "#--------------------------------------------" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef568813-181b-42a7-b3a2-786fe87addfb", - "metadata": {}, - "outputs": [], - "source": [ - "organizations_settings = {\n", - " \"link_type\": \"dedupe_only\",\n", - " \"blocking_rules_to_generate_predictions\": [\n", - " \"l.name = r.name\",\n", - " ],\n", - " \"comparisons\": [\n", - " ctl.name_comparison(\"name\", term_frequency_adjustments=True),\n", - " cl.exact_match(\"entity_type\", term_frequency_adjustments=True),\n", - " cl.jaro_winkler_at_thresholds(\"state\", [0.9, 0.8]), #threshold will catch typos and shortenings\n", - " # Add more comparisons as needed\n", - " ],\n", - " \"retain_matching_columns\": True,\n", - " \"retain_intermediate_calculation_columns\": True,\n", - " \"max_iterations\": 10,\n", - " \"em_convergence\": 0.01\n", - "}\n", - "\n", - "o_blocking = [\n", - " \"l.name = r.name\",\n", - " \"l.name = r.name and l.state = r.state\",\n", - " ]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9354ec53-0aa3-40b7-968a-d6b5263182c9", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# Define sample data\n", - "o_data = {\n", - " 'unique_id': range(1, 13),\n", - " 'name': ['Apple Inc.', 'Google LLC', 'Microsoft Corporation', 'Amazon.com Inc.', 'Facebook Inc.', \n", - " 'Apple Inc.', 'Google LLC', 'Microsoft Corporation', 'Amazon.com Inc.', 'Facebook Inc.', \n", - " 'Google LLC', 'Microsoft Corporation'],\n", - " 'entity_type': ['Organization'] * 12,\n", - " 'state': ['CA', 'NY', 'WA', 'WA', 'CA', 'CA', 'NY', 'WA', 'WA', 'CA', 'NY', 'WA'],\n", - "}\n", - "\n", - "# Create DataFrame\n", - "df_o = pd.DataFrame(o_data)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a1c1785-17b6-4aa2-9a10-b431c70e411d", - "metadata": {}, - "outputs": [], - "source": [ - "splink_dedupe(df_o, organizations_settings, o_blocking)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac3665d0-984c-4367-a7eb-62cd980dff16", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bffdeb09-0788-449d-9046-351d4a258537", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/utils/linkage.py b/utils/linkage.py index 0a575a1d..ca0c8b23 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -673,16 +673,12 @@ def splink_dedupe( clusters = linker.cluster_pairwise_predictions_at_threshold( df_predict, threshold_match_probability=0.7 ) # default - clusters_df = ( - clusters.as_pandas_dataframe() - ) # dataframe where cluster_id maps unique_id to initial instance of row + clusters_df = clusters.as_pandas_dataframe() match_list_df = ( clusters_df.groupby("cluster_id")["unique_id"].agg(list).reset_index() - ) - match_list_df.rename( - columns={"unique_id": "matching_list"}, inplace=True - ) # dataframe which matches cluster_id to a list of unique_ids + ) # dataframe where cluster_id maps unique_id to initial instance of row + match_list_df.rename(columns={"unique_id": "duplicated"}, inplace=True) first_instance_df = clusters_df.drop_duplicates(subset="cluster_id") col_names = np.append("cluster_id", df.columns) @@ -694,16 +690,10 @@ def splink_dedupe( on="cluster_id", how="left", ) + deduped_df.rename(columns={"cluster_id": "unique_id"}, inplace=True) - match_list_df.rename( - columns={"unique_id": "mapped_uuids", "cluster_id": "original_ids"}, - inplace=True, - ) - deduped_df.to_csv( - repo_root / "output" / "splink_deduplicated_UUIDs.csv", - index=False, - mode="a", - header=not os.path.exists("../output/splink_deduplicated_UUIDs.csv"), - ) + convert_duplicates_to_dict(deduped_df) + + deduped_df.drop(columns=["duplicated"]) return deduped_df From 19013415c1eb16c42ba3f355343a6eb6df72f7ea Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Wed, 28 Feb 2024 20:15:29 -0600 Subject: [PATCH 163/214] moved to original makefile --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index e210fb2c..36577581 100644 --- a/Makefile +++ b/Makefile @@ -29,3 +29,9 @@ run-notebooks: jupyter lab --port=8888 --ip='*' --NotebookApp.token='' --NotebookApp.password='' \ --no-browser --allow-root + +#running the linkage pipeline and creating the network graph +#still waiting on linkage_pipeline completion to get this into final shape + +output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv + python linkage_pipeline.py \ No newline at end of file From a7db7d81f6eb8c0d68cc13c7c2f2aae32b1cba03 Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Thu, 29 Feb 2024 08:11:52 -0600 Subject: [PATCH 164/214] Delete utils/Makefile --- utils/Makefile | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 utils/Makefile diff --git a/utils/Makefile b/utils/Makefile deleted file mode 100644 index f0cda2ea..00000000 --- a/utils/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -all: all_individuals.csv, all_organizations.csv, all_transactions.csv - -#set this up as a pipeline, check how far along Adil is on this -output deduplicated_individuals: all_individuals.csv - python deduplication_pipeline - -#likewise here -output deduplicated_organizations: all_organizations.csv - python deduplication_pipeline - - -output classified_individuals: all_individuals.csv - python classify.py classify_individuals - -output classified_organizations: all_organizations.csv - python classify.py classify_orgs - -#just sketched out -output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv - python network_graph_pipeline \ No newline at end of file From b626fc818e018a7d03031ac652389282bf561c9d Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Thu, 29 Feb 2024 08:17:02 -0600 Subject: [PATCH 165/214] Update linkage.py --- utils/linkage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index ca0c8b23..48e24b2d 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -641,8 +641,7 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: def splink_dedupe( df: pd.DataFrame, settings: dict, blocking: list ) -> pd.DataFrame: - """Given a dataframes, the corresponding - configuration settings, and corresponding blocking rules return a + """Given a dataframe and config settings, return a deduplicated dataframe Configuration settings and blocking can be found in constants.py as From 1fc2a2a8379a45f598ed422bebeca0b01393318f Mon Sep 17 00:00:00 2001 From: npashilkar Date: Thu, 29 Feb 2024 08:32:18 -0600 Subject: [PATCH 166/214] splink output edits --- utils/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/constants.py b/utils/constants.py index a67450b3..b4be2565 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -726,4 +726,4 @@ "beyond carbon", "lcv victory", "league of conservation", -] \ No newline at end of file +] From 26d47736e65212150aff8e619d73f3723b859bdc Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Thu, 29 Feb 2024 15:01:36 +0000 Subject: [PATCH 167/214] classify function --- utils/linkage_pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index f13b4235..e9fcf06c 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -1,6 +1,5 @@ import pandas as pd - -# from classify import classify_wrapper +from classify import classify_wrapper from nameparser import HumanName from utils.constants import BASE_FILEPATH @@ -172,6 +171,8 @@ def main(): ["donor_id", "recipient_id"] ].replace(deduped) + individuals, organizations = classify_wrapper(individuals, organizations) + individuals.to_csv(cleaned_individuals_output_path, index=False) organizations.to_csv(cleaned_organizations_output_path, index=False) transactions.to_csv(cleaned_transactions_output_path, index=False) From 609220d143cc710ad9404ee6e54fda16464e72eb Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Sun, 3 Mar 2024 16:26:17 -0600 Subject: [PATCH 168/214] saving work for graph work. No need to review yet --- utils/network.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/utils/network.py b/utils/network.py index 6e9dcd90..8a076efa 100644 --- a/utils/network.py +++ b/utils/network.py @@ -50,14 +50,11 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: "office_sought", "purpose", "transaction_type", - "recipient_id", + "year", "transaction_id", - "recipient_type", "donor_office", - "recipient_name", "amount", ] - for _, row in df.iterrows(): # add node attributes based on the columns relevant to the entity G.add_node(row[node_name]) @@ -67,10 +64,28 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: # link the donor node to the recipient node. add the attributes of the # edge based on relevant nodes + edge_dictionary = {} for column in transact_info: if not pd.isnull(row[column]): - G.add_edge( - row[node_name], row["recipient_name"], column=row[column] - ) + edge_dictionary[column] = row[column] + G.add_edge(row[node_name], row["recipient_name"], **edge_dictionary) + + # the added 'recipient_name' node has no attributes at this moment + # for the final code this line won't be necessary, as each recipient + # should ideally be referenced later on. For now, all added nodes for + # the recipient will only have one default attribute: classification + G.nodes[row["recipient_name"]]["classification"] = "neutral" + + edge_labels = {(u, v): d["amount"] for u, v, d in G.edges(data=True)} + entity_colors = {"neutral": "green", "c": "blue", "f": "red"} + node_colors = [ + entity_colors[G.nodes[node]["classification"]] for node in G.nodes() + ] + + nx.draw_planar(G, with_labels=False, node_color=node_colors) + nx.draw_networkx_edge_labels( + G, pos=nx.spring_layout(G), edge_labels=edge_labels, label_pos=0.5 + ) + # nx.draw_planar(G, with_labels=False) return G From 3266ce7e965a09e816c875e2e79be1d4f062f2df Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 4 Mar 2024 02:04:09 +0000 Subject: [PATCH 169/214] slight changes --- utils/linkage_pipeline.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index e9fcf06c..5f251f3b 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -68,20 +68,12 @@ def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: individuals["full_name"].notnull() ] if individuals["first_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) first_name = name.apply(lambda x: x["first"]) individuals["first_name"] = first_name if individuals["last_name"].isnull().any(): - name = ( - individuals["full_name"] - .apply(HumanName) - .apply(lambda x: x.as_dict()) - ) + name = individuals["full_name"].apply(HumanName).apply(lambda x: x.as_dict()) last_name = name.apply(lambda x: x["last"]) individuals["last_name"] = last_name @@ -167,12 +159,15 @@ def main(): deduped = pd.read_csv(BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv") + # Classifies individuals and organizations with a new 'classification' + # column containing 'neutral', 'f', or 'c' + individuals, organizations = classify_wrapper(individuals, organizations) + + # Update the transactions table with the deduplicated UUIDs transactions[["donor_id", "recipient_id"]] = transactions[ ["donor_id", "recipient_id"] ].replace(deduped) - individuals, organizations = classify_wrapper(individuals, organizations) - individuals.to_csv(cleaned_individuals_output_path, index=False) organizations.to_csv(cleaned_organizations_output_path, index=False) transactions.to_csv(cleaned_transactions_output_path, index=False) From d262deeccc886c0103cc2c12866d83bb8b843370 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 4 Mar 2024 04:00:12 +0000 Subject: [PATCH 170/214] possible splink implementation fix --- utils/linkage_pipeline.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 1b4db66e..9baa5204 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -171,9 +171,10 @@ def main(): deduped = pd.read_csv(BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv") # Splink deduplication - individuals = splink_dedupe( - individuals, individuals_settings, individuals_blocking - ) + individuals["unique_id"] = individuals["id"] + organizations["unique_id"] = organizations["id"] + + individuals = splink_dedupe(individuals, individuals_settings, individuals_blocking) organizations = splink_dedupe( organizations, organizations_settings, organizations_blocking From 5a81b23c3e928b8d9b3aaf30a233e77af473d64f Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 02:39:54 -0600 Subject: [PATCH 171/214] graph work so far with plotly --- utils/network.py | 92 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/utils/network.py b/utils/network.py index 8a076efa..88572aff 100644 --- a/utils/network.py +++ b/utils/network.py @@ -1,5 +1,6 @@ import networkx as nx import pandas as pd +import plotly.graph_objects as go def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: @@ -28,7 +29,7 @@ def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: return None -def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: +def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph: """Takes in a dataframe and generates a MultiDiGraph where the nodes are entity names, and the rest of the dataframe columns make the node attributes @@ -37,7 +38,7 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: complete_organizations_table) Returns: - A Networkx MultiDiGraph with nodes lacking any edges + A Networkx MultiDiGraph with nodes and edges """ G = nx.MultiDiGraph() # first check if df is individuals or organizations dataset @@ -46,7 +47,7 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: else: node_name = "full_name" - transact_info = [ + edge_columns = [ "office_sought", "purpose", "transaction_type", @@ -55,37 +56,68 @@ def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph: "donor_office", "amount", ] + for _, row in df.iterrows(): # add node attributes based on the columns relevant to the entity - G.add_node(row[node_name]) - for column in df.columns.difference(transact_info): - if not pd.isnull(row[column]): - G.nodes[row[node_name]][column] = row[column] - - # link the donor node to the recipient node. add the attributes of the - # edge based on relevant nodes - edge_dictionary = {} - for column in transact_info: - if not pd.isnull(row[column]): - edge_dictionary[column] = row[column] - G.add_edge(row[node_name], row["recipient_name"], **edge_dictionary) - - # the added 'recipient_name' node has no attributes at this moment - # for the final code this line won't be necessary, as each recipient - # should ideally be referenced later on. For now, all added nodes for - # the recipient will only have one default attribute: classification + G.add_node( + row[node_name], + **row[df.columns.difference(edge_columns)].dropna().to_dict(), + ) + # add the recipient as a node G.nodes[row["recipient_name"]]["classification"] = "neutral" - edge_labels = {(u, v): d["amount"] for u, v, d in G.edges(data=True)} - entity_colors = {"neutral": "green", "c": "blue", "f": "red"} - node_colors = [ - entity_colors[G.nodes[node]["classification"]] for node in G.nodes() - ] + # add the edge attributes between two nodes + edge_attributes = row[edge_columns].dropna().to_dict() + G.add_edge(row[node_name], row["recipient_name"], **edge_attributes) + + return G + - nx.draw_planar(G, with_labels=False, node_color=node_colors) - nx.draw_networkx_edge_labels( - G, pos=nx.spring_layout(G), edge_labels=edge_labels, label_pos=0.5 +def plot_network_graph(G: nx.MultiDiGraph): + """Given a networkX Graph, creates a plotly visualization of the nodes and + edges + + Args: + A networkX MultiDiGraph with edges including the attribute 'amount' + + Returns: None. Creates a plotly graph + """ + edge_trace = go.Scatter( + x=[], y=[], line=dict(color="#888"), hoverinfo="text", mode="lines" ) + hovertext = [] - # nx.draw_planar(G, with_labels=False) - return G + for edge in G.edges(data=True): + # donor = edge[0], recipient = edge[1] + hovertext.append(f"Amount: {edge[2]['amount']:.2f}") + + edge_trace["hovertext"] = hovertext + + node_trace = go.Scatter( + x=[], + y=[], + text=[], + mode="markers", + hoverinfo="text", + marker=dict(showscale=True, colorscale="YlGnBu", size=10), + ) + + for node in G.nodes(): + node_info = f"Name: {node}
" + for key, value in G.nodes[node].items(): + node_info += f"{key}: {value}
" + node_trace["text"] += tuple([node_info]) + + # Define layout settings + layout = go.Layout( + title="Network Graph Indicating Campaign Contributions from 2018-2022", + titlefont=dict(size=16), + showlegend=False, + hovermode="closest", + margin=dict(b=20, l=5, r=5, t=40), + xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + ) + + fig = go.Figure(data=[edge_trace, node_trace], layout=layout) + fig.show() From b377acdceb702ca0bd252699e3febd14f3bbb163 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 10:50:25 -0600 Subject: [PATCH 172/214] Test notebook with functions for merging datasets, no need to review, will delete later --- notebooks/Test.ipynb | 12403 +++++++++++++++++++++++++++++++++++------ 1 file changed, 10703 insertions(+), 1700 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index d188b444..b9ac1762 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -10,8 +10,8 @@ "import numpy as np\n", "import networkx as nx\n", "import matplotlib.pyplot as plt\n", - "\n", - "from utils.linkage import deduplicate_perfect_matches" + "import plotly.express as px\n", + "import plotly.graph_objects as go\n" ] }, { @@ -20,9 +20,9 @@ "metadata": {}, "outputs": [], "source": [ - "orgs_df = pd.read_csv(\"../output/complete_organizations_table.csv\",index_col=0)#,nrows=10000).sample(10)\n", - "inds_df = pd.read_csv(\"../output/complete_individuals_table.csv\",index_col=0, low_memory=False)#, nrows=10000).sample(10)\n", - "transactions = pd.read_csv(\"../output/complete_transactions_table.csv\",index_col=0, low_memory=False)" + "orgs_df = pd.read_csv(\"../data/classified_data/classified_organizations_v1\").sample(10000)\n", + "inds_df = pd.read_csv(\"../data/classified_data/classified_individuals_v1\", low_memory=False).sample(10000)\n", + "transactions = pd.read_csv(\"../data/classified_data/transactions_v1\", low_memory=False)" ] }, { @@ -55,31 +55,53 @@ " name\n", " state\n", " entity_type\n", + " classification\n", " \n", " \n", " \n", " \n", - " 0\n", - " 1022\n", - " #1022 arizona committee of automotive retailers\n", - " AZ\n", - " pac\n", + " 63128\n", + " 422065cd-0262-4ac9-a2a4-74136ddb99e2\n", + " floyd workman\n", + " MI\n", + " corporation\n", + " neutral\n", " \n", " \n", - " 4\n", - " 100112\n", - " 314 action victory fund (fec id c00689828)\n", - " DC\n", - " pac\n", + " 98258\n", + " dfd160b5-9389-44ef-a632-c08dc1a1d201\n", + " front 43\n", + " MI\n", + " corporation\n", + " neutral\n", + " \n", + " \n", + " 1712\n", + " 858415ce-d53f-4843-aee0-85560117bdc6\n", + " arizona federation of democratic women\n", + " NaN\n", + " vendor\n", + " neutral\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id name state entity_type\n", - "0 1022 #1022 arizona committee of automotive retailers AZ pac\n", - "4 100112 314 action victory fund (fec id c00689828) DC pac" + " id \\\n", + "63128 422065cd-0262-4ac9-a2a4-74136ddb99e2 \n", + "98258 dfd160b5-9389-44ef-a632-c08dc1a1d201 \n", + "1712 858415ce-d53f-4843-aee0-85560117bdc6 \n", + "\n", + " name state entity_type \\\n", + "63128 floyd workman MI corporation \n", + "98258 front 43 MI corporation \n", + "1712 arizona federation of democratic women NaN vendor \n", + "\n", + " classification \n", + "63128 neutral \n", + "98258 neutral \n", + "1712 neutral " ] }, "execution_count": 3, @@ -88,13 +110,33 @@ } ], "source": [ - "orgs_df.head(2)" + "orgs_df.head(3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['neutral'], dtype=object)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orgs_df.classification.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, "outputs": [ { "data": { @@ -117,74 +159,36 @@ " \n", " \n", " \n", - " transaction_id\n", - " donor_id\n", - " year\n", - " amount\n", - " recipient_id\n", - " office_sought\n", - " purpose\n", - " transaction_type\n", - " donor_type\n", - " recipient_type\n", - " donor_office\n", + " id\n", + " name\n", + " state\n", + " entity_type\n", + " classification\n", " \n", " \n", " \n", - " \n", - " 0\n", - " 4640650\n", - " 100592\n", - " 2021\n", - " 25.0\n", - " 1869727\n", - " none\n", - " wr 9.13\n", - " contribution from individuals\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 1\n", - " 8185257\n", - " 201800301\n", - " 2020\n", - " 100.0\n", - " 1779679\n", - " none\n", - " ab\n", - " contribution from individuals\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " transaction_id donor_id year amount recipient_id office_sought purpose \\\n", - "0 4640650 100592 2021 25.0 1869727 none wr 9.13 \n", - "1 8185257 201800301 2020 100.0 1779679 none ab \n", - "\n", - " transaction_type donor_type recipient_type donor_office \n", - "0 contribution from individuals NaN NaN NaN \n", - "1 contribution from individuals NaN NaN NaN " + "Empty DataFrame\n", + "Columns: [id, name, state, entity_type, classification]\n", + "Index: []" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "transactions.head(2)" + "orgs_df.loc[orgs_df.classification == 'f']" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -208,47 +212,44 @@ " \n", " \n", " \n", - " id\n", - " first_name\n", - " last_name\n", - " full_name\n", - " entity_type\n", - " state\n", - " party\n", - " company\n", - " occupation\n", - " address\n", - " zip\n", - " city\n", + " transaction_id\n", + " donor_id\n", + " year\n", + " amount\n", + " recipient_id\n", + " office_sought\n", + " purpose\n", + " transaction_type\n", + " donor_type\n", + " recipient_type\n", + " donor_office\n", " \n", " \n", " \n", " \n", " 0\n", - " 1869727\n", - " NaN\n", - " NaN\n", - " william \bstoner\n", - " individual\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " 7773a71e-9f67-438e-8313-80b1b75deeb4\n", + " 4544b60d-da6b-4dd5-9efe-334152ccf1f1\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " bob worsley for state senate\n", + " contribute to a candidate committee\n", " NaN\n", " NaN\n", " NaN\n", " \n", " \n", " 1\n", - " 1779679\n", - " NaN\n", - " NaN\n", - " rm coulon\n", - " individual\n", - " NaN\n", - " NaN\n", - " area agency on aging\n", - " NaN\n", + " 95f74915-a945-491f-8751-8c970a76fc24\n", + " 946d7561-42a3-4a4b-b410-3a10271c9f18\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " drew john for state house\n", + " contribute to a candidate committee\n", " NaN\n", " NaN\n", " NaN\n", @@ -258,36 +259,64 @@ "" ], "text/plain": [ - " id first_name last_name full_name entity_type state party \\\n", - "0 1869727 NaN NaN william \bstoner individual NaN NaN \n", - "1 1779679 NaN NaN rm coulon individual NaN NaN \n", + " transaction_id donor_id \\\n", + "0 7773a71e-9f67-438e-8313-80b1b75deeb4 4544b60d-da6b-4dd5-9efe-334152ccf1f1 \n", + "1 95f74915-a945-491f-8751-8c970a76fc24 946d7561-42a3-4a4b-b410-3a10271c9f18 \n", "\n", - " company occupation address zip city \n", - "0 NaN NaN NaN NaN NaN \n", - "1 area agency on aging NaN NaN NaN NaN " + " year amount recipient_id office_sought \\\n", + "0 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + "1 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + "\n", + " purpose transaction_type \\\n", + "0 bob worsley for state senate contribute to a candidate committee \n", + "1 drew john for state house contribute to a candidate committee \n", + "\n", + " donor_type recipient_type donor_office \n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN " ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_df.head(2)" + "transactions.head(2)" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(541803, 541150, 77611, 77611)" + "array(['neutral', 'f'], dtype=object)" ] }, - "execution_count": 6, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inds_df.classification.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9926, 9919, 10000, 10000)" + ] + }, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -315,16 +344,40 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['100883', '100894']" + "['242d019c-e0ab-405e-8e77-abae7418b87f',\n", + " '8b2ad550-64a1-4975-8b77-5eb1f24a8871',\n", + " 'aee69307-194f-4c40-af3d-a55a34e1068e',\n", + " '55e5e946-6261-4f19-9752-fb58219b2e99',\n", + " '4faf251a-73d9-46ef-9e17-d3cf0a3052ae',\n", + " '3b5c0a9e-c6f2-44e9-ad05-fde071447564',\n", + " '3936bdf5-9a7a-462c-9e8c-9124f2bd7f57',\n", + " '13882059-3c74-4d9e-825d-a03a72b43b08',\n", + " '50c78f1a-3e9b-4996-a319-eef4fe01ccfb',\n", + " 'ae96f38f-68c8-47e3-95b3-c6f096d3c22e',\n", + " '74ba8a8a-7256-4eb3-b0f8-995f7a6319fb',\n", + " '12823a76-78e2-4b09-b606-859efaa5c8ef',\n", + " '9de9bf03-8c4a-4d2f-9a95-283b230ddfad',\n", + " '588593b9-9bba-4597-94d9-1b3a7fd5b402',\n", + " '5277b642-6bf0-4423-9350-3602ae51c6ac',\n", + " 'd98985b4-f55d-4ada-b279-0497e3176512',\n", + " 'c8586d36-f188-4684-aa99-193407d4d068',\n", + " '3798fda1-83cd-4e48-974a-e1a390060198',\n", + " 'a536b509-f052-4984-a35d-10397308daec',\n", + " '80996477-ce99-4f34-b5fc-bab4d676fc77',\n", + " 'cd1a740c-b1d7-4334-b335-925bd5708753',\n", + " '46af8908-f4e4-4041-9d1e-5b442d051921',\n", + " '2969075a-86d2-4b04-a991-a81832e096a0',\n", + " 'd0337f72-b701-4524-891b-c48ef6f771ec',\n", + " '591aa72b-511b-4dbb-a161-80458f257471']" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -339,135 +392,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamestateentity_typedonationsdonations_toreceiveddonations_from
050c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee4249REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...730COMMITTEE TO ELECT DR PATRICIA BERNARD
150c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee426MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC853Pabar Pac (Pa Bar Assn)
250c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee382REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...620MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC
362ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee2328MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC4505Paa Pac
462ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee3421Paa Pac672Paa Pac
\n", - "
" - ], - "text/plain": [ - " id \\\n", - "0 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "1 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "2 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", - "3 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "4 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", - "\n", - " name state entity_type \\\n", - "0 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", - "1 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", - "2 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", - "3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", - "4 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", - "\n", - " donations donations_to received \\\n", - "0 4249 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 730 \n", - "1 426 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC 853 \n", - "2 382 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... 620 \n", - "3 2328 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC 4505 \n", - "4 3421 Paa Pac 672 \n", - "\n", - " donations_from \n", - "0 COMMITTEE TO ELECT DR PATRICIA BERNARD \n", - "1 Pabar Pac (Pa Bar Assn) \n", - "2 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", - "3 Paa Pac \n", - "4 Paa Pac " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data = {'id':['50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360',\n", " '62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3',\n", @@ -555,7 +482,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -579,274 +506,180 @@ " \n", " \n", " \n", - " id\n", - " first_name\n", - " last_name\n", - " full_name\n", - " entity_type\n", - " state\n", - " party\n", - " company\n", - " occupation\n", - " address\n", - " zip\n", - " city\n", + " transaction_id\n", + " donor_id\n", + " year\n", + " amount\n", + " recipient_id\n", + " office_sought\n", + " purpose\n", + " transaction_type\n", + " donor_type\n", + " recipient_type\n", + " donor_office\n", + " recipient_name\n", " \n", " \n", " \n", " \n", - " 102\n", - " 100894\n", - " NaN\n", - " NaN\n", - " abdussamad, shams\n", - " candidate\n", - " AZ\n", - " democratic\n", - " none (is a candidate)\n", - " NaN\n", + " 0\n", + " 7773a71e-9f67-438e-8313-80b1b75deeb4\n", + " 4544b60d-da6b-4dd5-9efe-334152ccf1f1\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " bob worsley for state senate\n", + " contribute to a candidate committee\n", " NaN\n", " NaN\n", " NaN\n", + " #1022 arizona committee of automotive retailers\n", " \n", " \n", - " 103\n", - " 100894\n", - " NaN\n", - " NaN\n", - " abdussamad, shams\n", - " candidate\n", - " AZ\n", - " democratic\n", - " none (is a candidate)\n", - " NaN\n", + " 1\n", + " 95f74915-a945-491f-8751-8c970a76fc24\n", + " 946d7561-42a3-4a4b-b410-3a10271c9f18\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " drew john for state house\n", + " contribute to a candidate committee\n", " NaN\n", " NaN\n", " NaN\n", + " #1022 arizona committee of automotive retailers\n", " \n", " \n", - " 104\n", - " 100883\n", - " NaN\n", - " NaN\n", - " abeytia, anna lynn\n", - " candidate\n", - " AZ\n", - " democratic\n", - " none (is a candidate)\n", - " NaN\n", + " 2\n", + " d05f1763-132d-4717-addc-8ff6239ad4d9\n", + " c8f98436-9562-48ed-b51f-45b2b217aad1\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " elect karen fann ld1\n", + " contribute to a candidate committee\n", " NaN\n", " NaN\n", " NaN\n", + " #1022 arizona committee of automotive retailers\n", " \n", " \n", - " 105\n", - " 100883\n", - " NaN\n", - " NaN\n", - " abeytia, anna lynn\n", - " candidate\n", - " AZ\n", - " democratic\n", - " none (is a candidate)\n", - " NaN\n", + " 3\n", + " 3dc3da30-6562-4755-bfad-6a26f1baec15\n", + " b9965bc2-c94d-4f69-98d1-bc4f5ad701c5\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " elect noel campbell for house\n", + " contribute to a candidate committee\n", " NaN\n", " NaN\n", " NaN\n", + " #1022 arizona committee of automotive retailers\n", " \n", " \n", - " 0\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " FREDERICK\n", - " BERG\n", - " FREDERICK BERG ...\n", - " Individual\n", - " MI\n", - " NaN\n", - " BUTZEL LONG\n", - " ATTORNEY\n", - " 1033 YORKSHIRE\n", - " 48230-0000\n", - " GROSSE POINTE PARK\n", - " \n", - " \n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 17734\n", - " 75b99f42-e0d4-4c3c-89a6-16e11f6dd810\n", - " NaN\n", - " NaN\n", - " Rodriguez, Adrian\n", - " Individual\n", - " MN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 17735\n", - " 8b634b74-a6be-4280-a2c4-63e46a8f9bc9\n", - " NaN\n", - " NaN\n", - " O'Connor, Timothy J\n", - " Individual\n", - " MN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 17736\n", - " d7d5b121-015f-474f-8b76-7c6c865da557\n", - " NaN\n", - " NaN\n", - " Frenzel, Robert C\n", - " Individual\n", - " MN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 17737\n", - " b2eaaec4-30d5-46f4-9922-efc8d79c16d2\n", - " NaN\n", - " NaN\n", - " Enzminger, Peter\n", - " Individual\n", - " MN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 17738\n", - " de34f2c7-fa2f-4fa5-abea-b67f6c8fe35f\n", - " NaN\n", - " NaN\n", - " Bowler, Erin\n", - " Individual\n", - " MN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " 4\n", + " a4340a2c-7b8a-4eeb-8290-746f0f436c83\n", + " 946d7561-42a3-4a4b-b410-3a10271c9f18\n", + " 2018\n", + " 1000.0\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " none\n", + " closed to new donations\n", + " refund from contrib to a cand committee\n", " NaN\n", + " NaN\n", + " NaN\n", + " #1022 arizona committee of automotive retailers\n", " \n", " \n", "\n", - "

1760156 rows × 12 columns

\n", "" ], "text/plain": [ - " id first_name \\\n", - "102 100894 NaN \n", - "103 100894 NaN \n", - "104 100883 NaN \n", - "105 100883 NaN \n", - "0 b8fbed14-0766-49ab-8516-97952c654a12 FREDERICK \n", - "... ... ... \n", - "17734 75b99f42-e0d4-4c3c-89a6-16e11f6dd810 NaN \n", - "17735 8b634b74-a6be-4280-a2c4-63e46a8f9bc9 NaN \n", - "17736 d7d5b121-015f-474f-8b76-7c6c865da557 NaN \n", - "17737 b2eaaec4-30d5-46f4-9922-efc8d79c16d2 NaN \n", - "17738 de34f2c7-fa2f-4fa5-abea-b67f6c8fe35f NaN \n", - "\n", - " last_name \\\n", - "102 NaN \n", - "103 NaN \n", - "104 NaN \n", - "105 NaN \n", - "0 BERG \n", - "... ... \n", - "17734 NaN \n", - "17735 NaN \n", - "17736 NaN \n", - "17737 NaN \n", - "17738 NaN \n", + " transaction_id donor_id \\\n", + "0 7773a71e-9f67-438e-8313-80b1b75deeb4 4544b60d-da6b-4dd5-9efe-334152ccf1f1 \n", + "1 95f74915-a945-491f-8751-8c970a76fc24 946d7561-42a3-4a4b-b410-3a10271c9f18 \n", + "2 d05f1763-132d-4717-addc-8ff6239ad4d9 c8f98436-9562-48ed-b51f-45b2b217aad1 \n", + "3 3dc3da30-6562-4755-bfad-6a26f1baec15 b9965bc2-c94d-4f69-98d1-bc4f5ad701c5 \n", + "4 a4340a2c-7b8a-4eeb-8290-746f0f436c83 946d7561-42a3-4a4b-b410-3a10271c9f18 \n", "\n", - " full_name entity_type state \\\n", - "102 abdussamad, shams candidate AZ \n", - "103 abdussamad, shams candidate AZ \n", - "104 abeytia, anna lynn candidate AZ \n", - "105 abeytia, anna lynn candidate AZ \n", - "0 FREDERICK BERG ... Individual MI \n", - "... ... ... ... \n", - "17734 Rodriguez, Adrian Individual MN \n", - "17735 O'Connor, Timothy J Individual MN \n", - "17736 Frenzel, Robert C Individual MN \n", - "17737 Enzminger, Peter Individual MN \n", - "17738 Bowler, Erin Individual MN \n", + " year amount recipient_id office_sought \\\n", + "0 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + "1 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + "2 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + "3 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + "4 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", "\n", - " party company occupation address \\\n", - "102 democratic none (is a candidate) NaN NaN \n", - "103 democratic none (is a candidate) NaN NaN \n", - "104 democratic none (is a candidate) NaN NaN \n", - "105 democratic none (is a candidate) NaN NaN \n", - "0 NaN BUTZEL LONG ATTORNEY 1033 YORKSHIRE \n", - "... ... ... ... ... \n", - "17734 NaN NaN NaN NaN \n", - "17735 NaN NaN NaN NaN \n", - "17736 NaN NaN NaN NaN \n", - "17737 NaN NaN NaN NaN \n", - "17738 NaN NaN NaN NaN \n", + " purpose transaction_type \\\n", + "0 bob worsley for state senate contribute to a candidate committee \n", + "1 drew john for state house contribute to a candidate committee \n", + "2 elect karen fann ld1 contribute to a candidate committee \n", + "3 elect noel campbell for house contribute to a candidate committee \n", + "4 closed to new donations refund from contrib to a cand committee \n", "\n", - " zip city \n", - "102 NaN NaN \n", - "103 NaN NaN \n", - "104 NaN NaN \n", - "105 NaN NaN \n", - "0 48230-0000 GROSSE POINTE PARK \n", - "... ... ... \n", - "17734 NaN NaN \n", - "17735 NaN NaN \n", - "17736 NaN NaN \n", - "17737 NaN NaN \n", - "17738 NaN NaN \n", + " donor_type recipient_type donor_office \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", "\n", - "[1760156 rows x 12 columns]" + " recipient_name \n", + "0 #1022 arizona committee of automotive retailers \n", + "1 #1022 arizona committee of automotive retailers \n", + "2 #1022 arizona committee of automotive retailers \n", + "3 #1022 arizona committee of automotive retailers \n", + "4 #1022 arizona committee of automotive retailers " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# for now only work with datasets \n", - "sample_inds = inds_df.loc[(inds_df['id'].isin(transactions.donor_id.tolist()))]\n", - "sample_inds\n" + "from utils.network import name_identifier\n", + "from utils.linkage import deduplicate_perfect_matches\n", + "transactions = transactions.loc[(transactions.recipient_id.isin(inds_df.id)) | \n", + " (transactions.recipient_id.isin(orgs_df.id)) |\n", + " (transactions.donor_id.isin(inds_df.id)) |\n", + " (transactions.donor_id.isin(inds_df.id))]\n", + "inds = deduplicate_perfect_matches(inds_df) \n", + "orgs = deduplicate_perfect_matches(orgs_df)\n", + "transactions[\"recipient_name\"] = transactions[\"recipient_id\"].apply(name_identifier, args=([orgs, inds],))\n", + "\n", + "transactions.head(5)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "87" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = transactions.loc[transactions.donor_id.isin(inds_df.id)]\n", + "len(x.recipient_name.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -870,8 +703,17 @@ " \n", " \n", " \n", - " transaction_id\n", - " donor_id\n", + " id\n", + " first_name\n", + " last_name\n", + " full_name\n", + " entity_type\n", + " state\n", + " party\n", + " company\n", + " occupation\n", + " address\n", + " ...\n", " year\n", " amount\n", " recipient_id\n", @@ -881,361 +723,229 @@ " donor_type\n", " recipient_type\n", " donor_office\n", + " recipient_name\n", " \n", " \n", " \n", " \n", - " 212637\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 100.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 212667\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 440542\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 440573\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 440607\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 440642\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 636312\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", + " 55243\n", + " 0e24b503-b209-48b5-8edb-cca0cdaca78c\n", + " M.\n", + " TANG\n", + " m. tang ...\n", + " Individual\n", + " MD\n", " NaN\n", " NaN\n", - " \n", - " \n", - " 636346\n", " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", + " 6614 23RD PLACE\n", + " ...\n", + " 2022.0\n", + " 2.0\n", + " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", " NaN\n", " NaN\n", - " DIRECT\n", + " direct\n", " NaN\n", " NaN\n", " NaN\n", + " None\n", " \n", " \n", - " 636382\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", + " 55244\n", + " 0e24b503-b209-48b5-8edb-cca0cdaca78c\n", + " M.\n", + " TANG\n", + " m. tang ...\n", + " Individual\n", + " MD\n", " NaN\n", " NaN\n", - " \n", - " \n", - " 839846\n", " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 83.33\n", - " f9fa8506-bfbb-4ef0-9e08-5c9c3e948121\n", + " 6614 23RD PLACE\n", + " ...\n", + " 2022.0\n", + " 95.0\n", + " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", " NaN\n", " NaN\n", - " DIRECT/FUND RAISER\n", + " direct\n", " NaN\n", " NaN\n", " NaN\n", + " None\n", " \n", " \n", - " 840051\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 83.34\n", - " 389fe2ba-828a-41d4-815c-8efb2499ea11\n", - " NaN\n", - " NaN\n", - " DIRECT/FUND RAISER\n", - " NaN\n", + " 55245\n", + " 0e24b503-b209-48b5-8edb-cca0cdaca78c\n", + " M.\n", + " TANG\n", + " m. tang ...\n", + " Individual\n", + " MD\n", " NaN\n", " NaN\n", - " \n", - " \n", - " 968402\n", " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 83.33\n", - " 043a03b7-af31-4830-b12e-446b93fca9a0\n", + " 6614 23RD PLACE\n", + " ...\n", + " 2022.0\n", + " 10.0\n", + " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", " NaN\n", " NaN\n", - " DIRECT/FUND RAISER\n", + " direct\n", " NaN\n", " NaN\n", " NaN\n", + " None\n", " \n", " \n", - " 1414338\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 250.00\n", - " ba06baf6-eae6-459f-b3a9-7261e4baa33e\n", - " NaN\n", - " NaN\n", - " DIRECT/FUND RAISER\n", - " NaN\n", + " 55246\n", + " a23037f6-741c-43a5-8a6d-0f1db4371e1d\n", + " OLIVIA N\n", + " DALMASSO\n", + " olivia n dalmasso ...\n", + " Individual\n", + " IL\n", " NaN\n", " NaN\n", - " \n", - " \n", - " 1502742\n", " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", + " PO BOX 574\n", + " ...\n", + " 2022.0\n", + " 12.6\n", + " 6b33721f-3f6a-47c0-bce2-284fc58e0d2a\n", " NaN\n", " NaN\n", - " DIRECT\n", + " direct\n", " NaN\n", " NaN\n", " NaN\n", + " None\n", " \n", " \n", - " 1502777\n", - " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", + " 55247\n", + " a23037f6-741c-43a5-8a6d-0f1db4371e1d\n", + " OLIVIA N\n", + " DALMASSO\n", + " olivia n dalmasso ...\n", + " Individual\n", + " IL\n", " NaN\n", " NaN\n", - " \n", - " \n", - " 1502812\n", " NaN\n", - " b8fbed14-0766-49ab-8516-97952c654a12\n", - " 2022\n", - " 50.00\n", - " 1d4ae24b-2814-4d0d-995e-28fd4c26785d\n", + " PO BOX 574\n", + " ...\n", + " 2022.0\n", + " 4.2\n", + " 6b33721f-3f6a-47c0-bce2-284fc58e0d2a\n", " NaN\n", " NaN\n", - " DIRECT\n", + " direct\n", " NaN\n", " NaN\n", " NaN\n", + " None\n", " \n", " \n", "\n", + "

5 rows × 25 columns

\n", "" ], "text/plain": [ - " transaction_id donor_id year amount \\\n", - "212637 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 100.00 \n", - "212667 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "440542 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "440573 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "440607 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "440642 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "636312 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "636346 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "636382 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "839846 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 83.33 \n", - "840051 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 83.34 \n", - "968402 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 83.33 \n", - "1414338 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 250.00 \n", - "1502742 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "1502777 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", - "1502812 NaN b8fbed14-0766-49ab-8516-97952c654a12 2022 50.00 \n", + " id first_name \\\n", + "55243 0e24b503-b209-48b5-8edb-cca0cdaca78c M. \n", + "55244 0e24b503-b209-48b5-8edb-cca0cdaca78c M. \n", + "55245 0e24b503-b209-48b5-8edb-cca0cdaca78c M. \n", + "55246 a23037f6-741c-43a5-8a6d-0f1db4371e1d OLIVIA N \n", + "55247 a23037f6-741c-43a5-8a6d-0f1db4371e1d OLIVIA N \n", + "\n", + " last_name \\\n", + "55243 TANG \n", + "55244 TANG \n", + "55245 TANG \n", + "55246 DALMASSO \n", + "55247 DALMASSO \n", + "\n", + " full_name entity_type state \\\n", + "55243 m. tang ... Individual MD \n", + "55244 m. tang ... Individual MD \n", + "55245 m. tang ... Individual MD \n", + "55246 olivia n dalmasso ... Individual IL \n", + "55247 olivia n dalmasso ... Individual IL \n", + "\n", + " party company occupation address ... year amount \\\n", + "55243 NaN NaN NaN 6614 23RD PLACE ... 2022.0 2.0 \n", + "55244 NaN NaN NaN 6614 23RD PLACE ... 2022.0 95.0 \n", + "55245 NaN NaN NaN 6614 23RD PLACE ... 2022.0 10.0 \n", + "55246 NaN NaN NaN PO BOX 574 ... 2022.0 12.6 \n", + "55247 NaN NaN NaN PO BOX 574 ... 2022.0 4.2 \n", "\n", - " recipient_id office_sought purpose \\\n", - "212637 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "212667 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "440542 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "440573 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "440607 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "440642 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "636312 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "636346 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "636382 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "839846 f9fa8506-bfbb-4ef0-9e08-5c9c3e948121 NaN NaN \n", - "840051 389fe2ba-828a-41d4-815c-8efb2499ea11 NaN NaN \n", - "968402 043a03b7-af31-4830-b12e-446b93fca9a0 NaN NaN \n", - "1414338 ba06baf6-eae6-459f-b3a9-7261e4baa33e NaN NaN \n", - "1502742 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "1502777 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", - "1502812 1d4ae24b-2814-4d0d-995e-28fd4c26785d NaN NaN \n", + " recipient_id office_sought purpose \\\n", + "55243 49a2d46f-5e75-433c-94fa-f910e66d1a1e NaN NaN \n", + "55244 49a2d46f-5e75-433c-94fa-f910e66d1a1e NaN NaN \n", + "55245 49a2d46f-5e75-433c-94fa-f910e66d1a1e NaN NaN \n", + "55246 6b33721f-3f6a-47c0-bce2-284fc58e0d2a NaN NaN \n", + "55247 6b33721f-3f6a-47c0-bce2-284fc58e0d2a NaN NaN \n", "\n", - " transaction_type donor_type recipient_type donor_office \n", - "212637 DIRECT NaN NaN NaN \n", - "212667 DIRECT NaN NaN NaN \n", - "440542 DIRECT NaN NaN NaN \n", - "440573 DIRECT NaN NaN NaN \n", - "440607 DIRECT NaN NaN NaN \n", - "440642 DIRECT NaN NaN NaN \n", - "636312 DIRECT NaN NaN NaN \n", - "636346 DIRECT NaN NaN NaN \n", - "636382 DIRECT NaN NaN NaN \n", - "839846 DIRECT/FUND RAISER NaN NaN NaN \n", - "840051 DIRECT/FUND RAISER NaN NaN NaN \n", - "968402 DIRECT/FUND RAISER NaN NaN NaN \n", - "1414338 DIRECT/FUND RAISER NaN NaN NaN \n", - "1502742 DIRECT NaN NaN NaN \n", - "1502777 DIRECT NaN NaN NaN \n", - "1502812 DIRECT NaN NaN NaN " + " transaction_type donor_type recipient_type donor_office \\\n", + "55243 direct NaN NaN NaN \n", + "55244 direct NaN NaN NaN \n", + "55245 direct NaN NaN NaN \n", + "55246 direct NaN NaN NaN \n", + "55247 direct NaN NaN NaN \n", + "\n", + " recipient_name \n", + "55243 None \n", + "55244 None \n", + "55245 None \n", + "55246 None \n", + "55247 None \n", + "\n", + "[5 rows x 25 columns]" ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "transactions.loc[transactions['donor_id'] == 'b8fbed14-0766-49ab-8516-97952c654a12']" + "# left merge according to ind_id and transaction donor_id. This was entities that only received money will still be there, no info from ind_dataset\n", + "# is lost\n", + "merged_inds_sample = pd.merge(inds_df,transactions,how='left',left_on='id',right_on='donor_id')\n", + "merged_inds_sample.dropna(subset = ['amount'], inplace=True)\n", + "merged_inds_sample.tail(5)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'BUTZEL LONG POLITICAL ACTION COMMITTEE'" + "Index(['id', 'first_name', 'last_name', 'full_name', 'entity_type', 'state',\n", + " 'party', 'company', 'occupation', 'address', 'zip', 'city',\n", + " 'classification', 'transaction_id', 'donor_id', 'year', 'amount',\n", + " 'recipient_id', 'office_sought', 'purpose', 'transaction_type',\n", + " 'donor_type', 'recipient_type', 'donor_office', 'recipient_name'],\n", + " dtype='object')" ] }, - "execution_count": 28, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "x = orgs_df.loc[orgs_df['id']=='1d4ae24b-2814-4d0d-995e-28fd4c26785d']\n", - "x.iloc[0]['name']" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# apply dedup to both inds and orgs\n", - "inds_df = deduplicate_perfect_matches(inds_df)\n", - "orgs_df = deduplicate_perfect_matches(orgs_df)\n", - "\n", - "# map the uuids in transaction donor and recipient columns to the deduplicated uuids\n", - "deduped = pd.read_csv(\"../output/deduplicated_UUIDs.csv\")\n", - "transactions[['donor_id','recipient_id']] = transactions[['donor_id','recipient_id']].replace(deduped)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# add recipient name to transactions df: \n", - "def name_identifier(uuid:str, orgs_df, inds_df) -> str:\n", - " # 1st check orgs df:\n", - " name_in_org = orgs_df.loc[orgs_df['id']==uuid] \n", - " if len(name_in_org)> 0:\n", - " return name_in_org.iloc[0]['name']\n", - " # theoretically it must be in inds if not in orgs, but for the sample data\n", - " # this might not be the case\n", - " name_in_ind = inds_df.loc[inds_df['id']==uuid]\n", - " if len(name_in_ind)> 0:\n", - " return name_in_ind.iloc[0]['full_name']\n", - " else: return None" + "merged_inds_sample.columns" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1259,630 +969,149 @@ " \n", " \n", " \n", - " transaction_id\n", " donor_id\n", - " year\n", - " amount\n", " recipient_id\n", + " full_name\n", + " recipient_name\n", + " address\n", + " amount\n", + " city\n", + " classification\n", + " company\n", + " donor_office\n", + " ...\n", + " occupation\n", " office_sought\n", + " party\n", " purpose\n", - " transaction_type\n", - " donor_type\n", " recipient_type\n", - " donor_office\n", - " recipient_name\n", - " \n", - " \n", - " \n", - " \n", - " 884875\n", - " NaN\n", - " 6c2b94a2-4247-4bc4-b784-6b5a9a2ae9f2\n", - " 2022\n", - " 5.00\n", - " 533f6de5-5140-4799-be24-1d5f4e228d1b\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " FRIENDS OF DANA NESSEL\n", - " \n", - " \n", - " 122735\n", - " NaN\n", - " b906d3eb-3874-4789-b523-e2eaab415328\n", - " 2022\n", - " 9.16\n", - " f2fad7aa-a782-4d56-8343-049d2150c16f\n", - " NaN\n", - " MERCHANT SVCS FEES\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " THE JULIE BRIXIE BLUE WAVE FUND 2\n", - " \n", - " \n", - " 458788\n", - " NaN\n", - " 88740001-952f-477e-b2da-9b24f747f6ce\n", - " 2022\n", - " 1.00\n", - " a0619eff-155f-442f-ab71-b5c0ee942223\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " COMERICA INC POLITICAL ACTION COMMITTEE\n", - " \n", - " \n", - " 1522918\n", - " NaN\n", - " da538e73-c823-48f8-b4ec-920aa1da458f\n", - " 2022\n", - " 20.00\n", - " 81169dce-331e-44ad-b870-1b376d49cf2f\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " WASTE MANAGEMENT EMPLOYEES BETTER GOVERNMENT F...\n", - " \n", - " \n", - " 1933218\n", - " NaN\n", - " 33814139-8442-4050-b15a-40aed1aa9db7\n", - " 2022\n", - " 35.00\n", - " abdf0530-e2fb-40b6-9a52-dea386cd60f4\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " GRETCHEN WHITMER FOR GOVERNOR\n", - " \n", - " \n", - " 465682\n", - " NaN\n", - " 133431dd-41ef-4161-97ef-02d23fc05b42\n", - " 2022\n", - " 7.50\n", - " d582fba6-2a0c-4864-9fb2-5a4f898f26c2\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " MI ASSOC OF COMMUNITY BANKERS OF MICHIGAN POLI...\n", - " \n", - " \n", - " 761674\n", - " NaN\n", - " 668d8471-ade6-469b-9e6b-71ddbfd1d8ba\n", - " 2022\n", - " 25.00\n", - " 1d05ca29-e97f-43cd-bd9e-f313573b324b\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " END CITIZENS UNITED NON-FEDERAL MI\n", - " \n", - " \n", - " 993543\n", - " NaN\n", - " b9b66f08-4e99-43e7-9161-c75db92b0bb4\n", - " 2022\n", - " 10.00\n", - " ecebf482-f298-4777-bea6-e3451c75e3fc\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " RESCARE INC DBA BRIGHTSPRING HEALTH SERVICES L...\n", - " \n", - " \n", - " 1196687\n", - " NaN\n", - " f4942707-0d7f-4617-b478-56af7504123e\n", - " 2022\n", - " 12.00\n", - " a24e305e-a49b-4cb3-a857-d629f1162ce8\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " MARATHON PETROLEUM CORPORATION EMPLOYEES PAC\n", - " \n", - " \n", - " 334698\n", - " NaN\n", - " 96be56db-56b6-48d0-9cf7-9d47da307388\n", - " 2022\n", - " 11.80\n", - " 9fc94e93-b6aa-400d-9a4a-d6501afb84dc\n", - " NaN\n", - " NaN\n", - " DIRECT\n", - " NaN\n", - " NaN\n", - " NaN\n", - " MICHIGAN REGIONAL COUNCIL OF CARPENTERS POLITI...\n", - " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " transaction_id donor_id year amount \\\n", - "884875 NaN 6c2b94a2-4247-4bc4-b784-6b5a9a2ae9f2 2022 5.00 \n", - "122735 NaN b906d3eb-3874-4789-b523-e2eaab415328 2022 9.16 \n", - "458788 NaN 88740001-952f-477e-b2da-9b24f747f6ce 2022 1.00 \n", - "1522918 NaN da538e73-c823-48f8-b4ec-920aa1da458f 2022 20.00 \n", - "1933218 NaN 33814139-8442-4050-b15a-40aed1aa9db7 2022 35.00 \n", - "465682 NaN 133431dd-41ef-4161-97ef-02d23fc05b42 2022 7.50 \n", - "761674 NaN 668d8471-ade6-469b-9e6b-71ddbfd1d8ba 2022 25.00 \n", - "993543 NaN b9b66f08-4e99-43e7-9161-c75db92b0bb4 2022 10.00 \n", - "1196687 NaN f4942707-0d7f-4617-b478-56af7504123e 2022 12.00 \n", - "334698 NaN 96be56db-56b6-48d0-9cf7-9d47da307388 2022 11.80 \n", - "\n", - " recipient_id office_sought \\\n", - "884875 533f6de5-5140-4799-be24-1d5f4e228d1b NaN \n", - "122735 f2fad7aa-a782-4d56-8343-049d2150c16f NaN \n", - "458788 a0619eff-155f-442f-ab71-b5c0ee942223 NaN \n", - "1522918 81169dce-331e-44ad-b870-1b376d49cf2f NaN \n", - "1933218 abdf0530-e2fb-40b6-9a52-dea386cd60f4 NaN \n", - "465682 d582fba6-2a0c-4864-9fb2-5a4f898f26c2 NaN \n", - "761674 1d05ca29-e97f-43cd-bd9e-f313573b324b NaN \n", - "993543 ecebf482-f298-4777-bea6-e3451c75e3fc NaN \n", - "1196687 a24e305e-a49b-4cb3-a857-d629f1162ce8 NaN \n", - "334698 9fc94e93-b6aa-400d-9a4a-d6501afb84dc NaN \n", - "\n", - " purpose transaction_type donor_type \\\n", - "884875 NaN DIRECT NaN \n", - "122735 MERCHANT SVCS FEES NaN NaN \n", - "458788 NaN DIRECT NaN \n", - "1522918 NaN DIRECT NaN \n", - "1933218 NaN DIRECT NaN \n", - "465682 NaN DIRECT NaN \n", - "761674 NaN DIRECT NaN \n", - "993543 NaN DIRECT NaN \n", - "1196687 NaN DIRECT NaN \n", - "334698 NaN DIRECT NaN \n", - "\n", - " recipient_type donor_office \\\n", - "884875 NaN NaN \n", - "122735 NaN NaN \n", - "458788 NaN NaN \n", - "1522918 NaN NaN \n", - "1933218 NaN NaN \n", - "465682 NaN NaN \n", - "761674 NaN NaN \n", - "993543 NaN NaN \n", - "1196687 NaN NaN \n", - "334698 NaN NaN \n", - "\n", - " recipient_name \n", - "884875 FRIENDS OF DANA NESSEL \n", - "122735 THE JULIE BRIXIE BLUE WAVE FUND 2 \n", - "458788 COMERICA INC POLITICAL ACTION COMMITTEE \n", - "1522918 WASTE MANAGEMENT EMPLOYEES BETTER GOVERNMENT F... \n", - "1933218 GRETCHEN WHITMER FOR GOVERNOR \n", - "465682 MI ASSOC OF COMMUNITY BANKERS OF MICHIGAN POLI... \n", - "761674 END CITIZENS UNITED NON-FEDERAL MI \n", - "993543 RESCARE INC DBA BRIGHTSPRING HEALTH SERVICES L... \n", - "1196687 MARATHON PETROLEUM CORPORATION EMPLOYEES PAC \n", - "334698 MICHIGAN REGIONAL COUNCIL OF CARPENTERS POLITI... " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ex = transactions.sample(10)\n", - "ex['recipient_name'] = ex['recipient_id'].apply(name_identifier, args=(orgs_df, inds_df))\n", - "ex" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompanyentity_typefirst_namefull_namelast_namepartystatetransaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_office
025625730individualNaNvarious 0NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
11617483aggregate cashindividualNaNcash _small donationsNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " id company entity_type first_name full_name \\\n", - "0 2562573 0 individual NaN various 0 \n", - "1 1617483 aggregate cash individual NaN cash _small donations \n", - "\n", - " last_name party state transaction_id donor_id year amount recipient_id \\\n", - "0 NaN NaN NaN NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN NaN NaN NaN \n", - "\n", - " office_sought purpose transaction_type donor_type recipient_type \\\n", - "0 NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN \n", - "\n", - " donor_office \n", - "0 NaN \n", - "1 NaN " - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "# this step took more than 16 minutes to run...think of alternative way\n", - "# id_to_name = {id: name for id, name in zip(inds_sample.id.tolist(), inds_sample.full_name.tolist())} #the same would be applied to orgs\n", - "transactions['recipient_name'] = transactions['recipient_id'].apply(lambda x: sample_inds.loc[sample_inds.id == x] )\n", - "\n", - "# left merge according to ind_id and transaction donor_id. This was entities that only received money will still be there, no info from ind_dataset\n", - "# is lost\n", - "merged_inds_sample = pd.merge(sample_inds,transactions,how='left',left_on='id',right_on='donor_id')\n", - "merged_inds_sample.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompanyentity_typefirst_namefull_namelast_namepartystate
27100894none (is a candidate)candidateNaNabdussamad, shamsNaNdemocraticAZ
\n", - "
" - ], - "text/plain": [ - " id company entity_type first_name full_name \\\n", - "27 100894 none (is a candidate) candidate NaN abdussamad, shams \n", - "\n", - " last_name party state \n", - "27 NaN democratic AZ " - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sample_inds.loc[sample_inds.full_name == 'abdussamad, shams']" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [], - "source": [ - "def add_notes_from_df(df):\n", - " G = nx.MultiDiGraph()\n", - " #inds or org...\n", - " if 'name' in df.columns:\n", - " node_name = 'name'\n", - " else: node_name = 'full_name'\n", - "\n", - " for _, row in df.iterrows():\n", - " G.add_node(row[node_name])\n", - " for column in df.columns:\n", - " # only add info that's present\n", - " if (row[column] != 'nan'):\n", - " nx.set_node_attributes(G, row[column], name=column)\n", - " #nx.set\n", - " nx.draw_random(G, with_labels=True)\n", - " plt.show()\n", - " return G" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1905,208 +1134,341 @@ " \n", " \n", " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", " \n", "
idcompanyentity_typefirst_namefull_namelast_namepartystatetransaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_officezip
27100894none (is a candidate)candidateNaNabdussamad, shamsNaNdemocraticAZ508807910089400007b184-4e1d-401a-ba51-99733d2e13e7d461f2bd-9074-44b3-8948-e659bead3e58graham filler ...saginaw county republican committee12705 WARM CREEK500.00DEWITTneutralNoneNone...NoneNoneNoneNoneNoneMINonedirect2022.05.00750413state representative - district 11e-qual online qcccec $5 qualifying contributionNaNNaNNaN48820-0000
28100894none (is a candidate)candidateNaNabdussamad, shamsNaNdemocraticAZ5088080100894100523627-46c7-4f76-ab42-fb2c1fbac1b16126e78b-4e80-4361-a019-9d99aa1623eddaniel millstone ...rooted in community leadership pac10518 ROUNTREE RD0.77LOS ANGELESneutralNoneNone...NoneNoneNoneNoneNoneCANonedirect2022.05.002002235state representative - district 11NaNccec $5 qualifying contributionNaNNaNNaN90064-0000
29100894none (is a candidate)candidateNaNabdussamad, shamsNaNdemocraticAZ5088081100894200934782-86e5-4941-94cf-0a700100a2c02d1a0919-218e-4692-98ec-c4a73a126482josie petersheim ...mi greenstone pac7196 W. BRIGGS RD.25.00STANTONneutralNoneNone...NoneNoneNoneNoneNoneMINonedirect2022.0100.001942680state representative - district 11NaNreceive loan from candidate or family memberNaNNaNNaN48888-0000
30100894none (is a candidate)candidateNaNabdussamad, shamsNaNdemocraticAZ5088083100894300f22bdd-96bf-4074-9620-4737e8444958af8417ee-5bca-49f5-91e9-d2de65d73631robert doerfler ...michigan senate democratic fund1534 NE 5TH AVE50.00FORT LAUDERDALEneutralNoneNone...NoneNoneNoneNoneNoneFLNonedirect2022.05.00-1state representative - district 11NaNin-state contributions $100 or lessNaNNaNNaN33304-1006
31100894none (is a candidate)candidateNaNabdussamad, shamsNaNdemocraticAZ508808410089440138403b-b5b9-453a-a1d2-b6ed9fa5fe586126e78b-4e80-4361-a019-9d99aa1623edjoseph martinez ...rooted in community leadership pac139 HURON AVE1.65MOUNT CLEMENSneutralNoneNone...NoneNoneNoneNoneNoneMINonedirect2022.020.00-1state representative - district 11NaNin-state contributions $100 or lessNaNNaNNaN48043-0000
..................
597100883none (is a candidate)candidateNaNabeytia, anna lynnNaNdemocraticAZ50841001008831120fdccce6b-e55f-4f1d-bd95-1714f2a667eda3fe20e2-8019-448e-9b54-bfdce4d87f2fmichael olthoff ...bumstead leadership fund1499 MIDDLEBROOK DR1000.00NORTON SHORESneutralnicholsNone...ceoNoneNoneNoneNoneMINonedirect2022.010.002017053state representative - district 11NaNcontribution from individualsNaNNaNNaN49441-0000
598100883none (is a candidate)candidateNaNabeytia, anna lynnNaNdemocraticAZ50841021008831121fe969829-b8a4-4d38-88e2-8314b340d5676126e78b-4e80-4361-a019-9d99aa1623edjoanna simon ...rooted in community leadership pac1546 POPLAR GROVE DR3.82RESTONneutralNoneNone...NoneNoneNoneNoneNoneVANonedirect2022.0180.002017970state representative - district 11video productionin-kind cont. from individualNaNNaNNaN20194-1731
599100883none (is a candidate)candidateNaNabeytia, anna lynnNaNdemocraticAZ50841031008831122ff1423ba-ff5e-4bc1-b864-303a9dcc9b326126e78b-4e80-4361-a019-9d99aa1623edadriana p{on ce ...rooted in community leadership pac9 BIRCH CT3.82NORMALneutralNoneNone...NoneNoneNoneNoneNoneILNonedirect2022.051.992008747state representative - district 11NaNcontribution from individualsNaNNaNNaN61761-3900
600100883none (is a candidate)candidateNaNabeytia, anna lynnNaNdemocraticAZ50841051008831123ff24644e-d64a-4a8a-a87f-cdb53b86dd636126e78b-4e80-4361-a019-9d99aa1623eddavid friedman ...rooted in community leadership pac8823 MOUNTAIN PATH CIR0.15AUSTINneutralNoneNone...NoneNoneNoneNoneNoneTXNonedirect2022.010.801193076state representative - district 11NaNcontribution from individualsNaNNaNNaN78759-0000
601100883none (is a candidate)candidateNaNabeytia, anna lynnNaNdemocraticAZ50841071008831124ffb25947-c03f-43b2-abb4-23531cdb73247f272fe4-d592-453c-9ca1-315ea3fdcff1dennis starner ...bill g schuette for state representative4612 CONGRESS DRIVE525.00MIDLANDneutralretiredNone...retiredNoneNoneNoneNoneMINonedirect/fund raiser2022.051.991691025state representative - district 11NaNcontribution from individualsNaNNaNNaN48642-0000
\n", - "

575 rows × 19 columns

\n", + "

1125 rows × 25 columns

\n", "
" ], "text/plain": [ - " id company entity_type first_name \\\n", - "27 100894 none (is a candidate) candidate NaN \n", - "28 100894 none (is a candidate) candidate NaN \n", - "29 100894 none (is a candidate) candidate NaN \n", - "30 100894 none (is a candidate) candidate NaN \n", - "31 100894 none (is a candidate) candidate NaN \n", - ".. ... ... ... ... \n", - "597 100883 none (is a candidate) candidate NaN \n", - "598 100883 none (is a candidate) candidate NaN \n", - "599 100883 none (is a candidate) candidate NaN \n", - "600 100883 none (is a candidate) candidate NaN \n", - "601 100883 none (is a candidate) candidate NaN \n", - "\n", - " full_name last_name party state transaction_id donor_id \\\n", - "27 abdussamad, shams NaN democratic AZ 5088079 100894 \n", - "28 abdussamad, shams NaN democratic AZ 5088080 100894 \n", - "29 abdussamad, shams NaN democratic AZ 5088081 100894 \n", - "30 abdussamad, shams NaN democratic AZ 5088083 100894 \n", - "31 abdussamad, shams NaN democratic AZ 5088084 100894 \n", - ".. ... ... ... ... ... ... \n", - "597 abeytia, anna lynn NaN democratic AZ 5084100 100883 \n", - "598 abeytia, anna lynn NaN democratic AZ 5084102 100883 \n", - "599 abeytia, anna lynn NaN democratic AZ 5084103 100883 \n", - "600 abeytia, anna lynn NaN democratic AZ 5084105 100883 \n", - "601 abeytia, anna lynn NaN democratic AZ 5084107 100883 \n", - "\n", - " year amount recipient_id office_sought \\\n", - "27 2022.0 5.00 750413 state representative - district 11 \n", - "28 2022.0 5.00 2002235 state representative - district 11 \n", - "29 2022.0 100.00 1942680 state representative - district 11 \n", - "30 2022.0 5.00 -1 state representative - district 11 \n", - "31 2022.0 20.00 -1 state representative - district 11 \n", - ".. ... ... ... ... \n", - "597 2022.0 10.00 2017053 state representative - district 11 \n", - "598 2022.0 180.00 2017970 state representative - district 11 \n", - "599 2022.0 51.99 2008747 state representative - district 11 \n", - "600 2022.0 10.80 1193076 state representative - district 11 \n", - "601 2022.0 51.99 1691025 state representative - district 11 \n", - "\n", - " purpose transaction_type \\\n", - "27 e-qual online qc ccec $5 qualifying contribution \n", - "28 NaN ccec $5 qualifying contribution \n", - "29 NaN receive loan from candidate or family member \n", - "30 NaN in-state contributions $100 or less \n", - "31 NaN in-state contributions $100 or less \n", - ".. ... ... \n", - "597 NaN contribution from individuals \n", - "598 video production in-kind cont. from individual \n", - "599 NaN contribution from individuals \n", - "600 NaN contribution from individuals \n", - "601 NaN contribution from individuals \n", - "\n", - " donor_type recipient_type donor_office \n", - "27 NaN NaN NaN \n", - "28 NaN NaN NaN \n", - "29 NaN NaN NaN \n", - "30 NaN NaN NaN \n", - "31 NaN NaN NaN \n", - ".. ... ... ... \n", - "597 NaN NaN NaN \n", - "598 NaN NaN NaN \n", - "599 NaN NaN NaN \n", - "600 NaN NaN NaN \n", - "601 NaN NaN NaN \n", - "\n", - "[575 rows x 19 columns]" + " donor_id \\\n", + "0 0007b184-4e1d-401a-ba51-99733d2e13e7 \n", + "1 00523627-46c7-4f76-ab42-fb2c1fbac1b1 \n", + "2 00934782-86e5-4941-94cf-0a700100a2c0 \n", + "3 00f22bdd-96bf-4074-9620-4737e8444958 \n", + "4 0138403b-b5b9-453a-a1d2-b6ed9fa5fe58 \n", + "... ... \n", + "1120 fdccce6b-e55f-4f1d-bd95-1714f2a667ed \n", + "1121 fe969829-b8a4-4d38-88e2-8314b340d567 \n", + "1122 ff1423ba-ff5e-4bc1-b864-303a9dcc9b32 \n", + "1123 ff24644e-d64a-4a8a-a87f-cdb53b86dd63 \n", + "1124 ffb25947-c03f-43b2-abb4-23531cdb7324 \n", + "\n", + " recipient_id \\\n", + "0 d461f2bd-9074-44b3-8948-e659bead3e58 \n", + "1 6126e78b-4e80-4361-a019-9d99aa1623ed \n", + "2 2d1a0919-218e-4692-98ec-c4a73a126482 \n", + "3 af8417ee-5bca-49f5-91e9-d2de65d73631 \n", + "4 6126e78b-4e80-4361-a019-9d99aa1623ed \n", + "... ... \n", + "1120 a3fe20e2-8019-448e-9b54-bfdce4d87f2f \n", + "1121 6126e78b-4e80-4361-a019-9d99aa1623ed \n", + "1122 6126e78b-4e80-4361-a019-9d99aa1623ed \n", + "1123 6126e78b-4e80-4361-a019-9d99aa1623ed \n", + "1124 7f272fe4-d592-453c-9ca1-315ea3fdcff1 \n", + "\n", + " full_name \\\n", + "0 graham filler ... \n", + "1 daniel millstone ... \n", + "2 josie petersheim ... \n", + "3 robert doerfler ... \n", + "4 joseph martinez ... \n", + "... ... \n", + "1120 michael olthoff ... \n", + "1121 joanna simon ... \n", + "1122 adriana p{on ce ... \n", + "1123 david friedman ... \n", + "1124 dennis starner ... \n", + "\n", + " recipient_name address \\\n", + "0 saginaw county republican committee 12705 WARM CREEK \n", + "1 rooted in community leadership pac 10518 ROUNTREE RD \n", + "2 mi greenstone pac 7196 W. BRIGGS RD. \n", + "3 michigan senate democratic fund 1534 NE 5TH AVE \n", + "4 rooted in community leadership pac 139 HURON AVE \n", + "... ... ... \n", + "1120 bumstead leadership fund 1499 MIDDLEBROOK DR \n", + "1121 rooted in community leadership pac 1546 POPLAR GROVE DR \n", + "1122 rooted in community leadership pac 9 BIRCH CT \n", + "1123 rooted in community leadership pac 8823 MOUNTAIN PATH CIR \n", + "1124 bill g schuette for state representative 4612 CONGRESS DRIVE \n", + "\n", + " amount city classification company donor_office ... \\\n", + "0 500.00 DEWITT neutral None None ... \n", + "1 0.77 LOS ANGELES neutral None None ... \n", + "2 25.00 STANTON neutral None None ... \n", + "3 50.00 FORT LAUDERDALE neutral None None ... \n", + "4 1.65 MOUNT CLEMENS neutral None None ... \n", + "... ... ... ... ... ... ... \n", + "1120 1000.00 NORTON SHORES neutral nichols None ... \n", + "1121 3.82 RESTON neutral None None ... \n", + "1122 3.82 NORMAL neutral None None ... \n", + "1123 0.15 AUSTIN neutral None None ... \n", + "1124 525.00 MIDLAND neutral retired None ... \n", + "\n", + " occupation office_sought party purpose recipient_type state \\\n", + "0 None None None None None MI \n", + "1 None None None None None CA \n", + "2 None None None None None MI \n", + "3 None None None None None FL \n", + "4 None None None None None MI \n", + "... ... ... ... ... ... ... \n", + "1120 ceo None None None None MI \n", + "1121 None None None None None VA \n", + "1122 None None None None None IL \n", + "1123 None None None None None TX \n", + "1124 retired None None None None MI \n", + "\n", + " transaction_id transaction_type year zip \n", + "0 None direct 2022.0 48820-0000 \n", + "1 None direct 2022.0 90064-0000 \n", + "2 None direct 2022.0 48888-0000 \n", + "3 None direct 2022.0 33304-1006 \n", + "4 None direct 2022.0 48043-0000 \n", + "... ... ... ... ... \n", + "1120 None direct 2022.0 49441-0000 \n", + "1121 None direct 2022.0 20194-1731 \n", + "1122 None direct 2022.0 61761-3900 \n", + "1123 None direct 2022.0 78759-0000 \n", + "1124 None direct/fund raiser 2022.0 48642-0000 \n", + "\n", + "[1125 rows x 25 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "attribute_cols = merged_inds_sample.columns.difference(['donor_id','recipient_id','full_name','recipient_name'])\n", + "agg_functions = {col: 'sum' if col == 'amount' else 'first' for col in attribute_cols}\n", + "grouped_sample = merged_inds_sample.groupby(['donor_id','recipient_id','full_name','recipient_name']).agg(agg_functions).reset_index()\n", + "grouped_sample" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph:\n", + " G = nx.MultiDiGraph()\n", + " # first check if df is individuals or organizations dataset\n", + " if \"name\" in df.columns:\n", + " node_name = \"name\"\n", + " else:\n", + " node_name = \"full_name\"\n", + " \n", + " transact_info = ['office_sought', 'purpose', 'transaction_type', 'year','transaction_id','donor_office','amount']\n", + " for _, row in df.iterrows(): \n", + " # add node attributes based on the columns relevant to the entity\n", + " G.add_node(row[node_name])\n", + " for column in df.columns.difference(transact_info):\n", + " if not pd.isnull(row[column]):\n", + " G.nodes[row[node_name]][column] = row[column]\n", + " \n", + " # link the donor node to the recipient node. add the attributes of the\n", + " # edge based on relevant nodes \n", + " edge_dictionary = {}\n", + " for column in transact_info:\n", + " if not pd.isnull(row[column]):\n", + " edge_dictionary[column] = row[column]\n", + " G.add_edge(row[node_name], row['recipient_name'], **edge_dictionary)\n", + "\n", + " # the added 'recipient_name' node has no attributes at this moment\n", + " # for the final code this line won't be necessary, as each recipient\n", + " # should ideally be referenced later on. For now, all added nodes for\n", + " # the recipient will only have one default attribute: classification\n", + " G.nodes[row['recipient_name']]['classification'] = 'neutral' \n", + " \n", + " edge_labels = {(u,v):d['amount'] for u,v,d in G.edges(data=True)}\n", + " entity_colors = {'neutral': 'green', 'c':'blue', 'f':'red'}\n", + " node_colors = [entity_colors[G.nodes[node]['classification']] for node in G.nodes()]\n", + "\n", + " nx.draw_planar(G, with_labels=False,node_color=node_colors)\n", + " plt.figure(3,figsize=(12,12)) \n", + " nx.draw_networkx_edge_labels(G, pos=nx.planar_layout(G),edge_labels=edge_labels, label_pos=0.5)\n", + "\n", + " #nx.draw_planar(G, with_labels=False)\n", + " plt.show()\n", + " return G" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#for u,v in G.nodes(data=True):\n", + " #print(u)#['classification'])\n", + " \n", + "G.nodes['michigan association of health plans political action committee']#['classification'])#['nancy davis ']['classification']" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['neutral', 'f'], dtype=object)" ] }, - "execution_count": 77, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "merged_inds_sample.loc[merged_inds_sample.donor_id.notnull()]" + "grouped_sample.classification.unique()" ] }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -2116,171 +1478,1250 @@ }, { "data": { + "image/png": "", "text/plain": [ - "{'id': '1869727',\n", - " 'company': nan,\n", - " 'entity_type': 'individual',\n", - " 'first_name': nan,\n", - " 'full_name': 'william \\x08stoner',\n", - " 'last_name': nan,\n", - " 'party': nan,\n", - " 'state': nan,\n", - " 'transaction_id': nan,\n", - " 'donor_id': nan,\n", - " 'year': nan,\n", - " 'amount': nan,\n", - " 'recipient_id': nan,\n", - " 'office_sought': nan,\n", - " 'purpose': nan,\n", - " 'transaction_type': nan,\n", - " 'donor_type': nan,\n", - " 'recipient_type': nan,\n", - " 'donor_office': nan}" + "
" ] }, - "execution_count": 91, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'address': '3836 BRISTOL CT', 'city': 'CLARKSTON ', 'classification': 'neutral', 'donor_id': 'c7f7a9e5-2e9e-47d1-92f6-2238c7ce301a', 'entity_type': 'Individual', 'first_name': 'THERESA ', 'full_name': 'theresa fougnie ', 'id': 'c7f7a9e5-2e9e-47d1-92f6-2238c7ce301a', 'last_name': 'FOUGNIE ', 'recipient_id': '520c9ce3-c702-4926-8688-750984ee6c0d', 'recipient_name': 'friends of sarah may seward', 'state': 'MI', 'zip': '48348-3610'}\n", + "{'classification': 'neutral'}\n", + "{'address': '330 BROAD ST APT 1', 'city': 'SPRING CITY ', 'classification': 'neutral', 'donor_id': '318b9b37-369b-45ba-9802-27177198e694', 'entity_type': 'Individual', 'first_name': 'ERIC ', 'full_name': 'eric oconnor ', 'id': '318b9b37-369b-45ba-9802-27177198e694', 'last_name': 'OCONNOR ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'PA', 'zip': '19475-1763'}\n", + "{'classification': 'neutral'}\n", + "{'address': '15 W260 FILLMORE ST', 'city': 'ELMHURST ', 'classification': 'neutral', 'donor_id': '283c7a56-1298-4003-b4b3-e4519b6077b0', 'entity_type': 'Individual', 'first_name': 'EVELYN ', 'full_name': 'evelyn pape ', 'id': '283c7a56-1298-4003-b4b3-e4519b6077b0', 'last_name': 'PAPE ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'IL', 'zip': '60126-5349'}\n", + "{'classification': 'neutral'}\n", + "{'address': '16190 DOBBINS DR', 'city': 'ALBION ', 'classification': 'neutral', 'donor_id': '306d7309-ccc7-457e-a263-394b1143dacb', 'entity_type': 'Individual', 'first_name': 'STEPHANIE ', 'full_name': 'stephanie dobbins ', 'id': '306d7309-ccc7-457e-a263-394b1143dacb', 'last_name': 'DOBBINS ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '49224-9689'}\n", + "{'address': '3685 CREEKSIDE DRIVE', 'city': 'DORR ', 'classification': 'neutral', 'donor_id': '57069727-fd76-4630-9d36-b786d0992b4a', 'entity_type': 'Individual', 'first_name': 'ANNETTE ', 'full_name': 'annette magyar ', 'id': '57069727-fd76-4630-9d36-b786d0992b4a', 'last_name': 'MAGYAR ', 'recipient_id': '097002ca-1bbd-417a-bad9-9fd54887ebab', 'recipient_name': 'movement voter pac mi', 'state': 'MI', 'zip': '49323-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '13330 CAMINITO MAR VILLA', 'city': 'DEL MAR ', 'classification': 'neutral', 'donor_id': 'a4a903b8-a178-4fcc-ae7b-cd6852b447a0', 'entity_type': 'Individual', 'first_name': 'MICHAEL ', 'full_name': 'michael finley ', 'id': 'a4a903b8-a178-4fcc-ae7b-cd6852b447a0', 'last_name': 'FINLEY ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'CA', 'zip': '92014-3614'}\n", + "{'address': '52 PINE HILL RD', 'city': 'ASHLAND ', 'classification': 'neutral', 'donor_id': 'fd303393-0697-48f6-b704-bce3a6b36e04', 'entity_type': 'Individual', 'first_name': 'JANE ', 'full_name': 'jane malick-nugent ', 'id': 'fd303393-0697-48f6-b704-bce3a6b36e04', 'last_name': 'MALICK-NUGENT ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'MA', 'zip': '01721-1169'}\n", + "{'address': '3708 OMAHA', 'city': 'GRANDVILLE ', 'classification': 'neutral', 'donor_id': '2ac954cd-d5a2-4d94-b087-adb400d05d25', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary bristol ', 'id': '2ac954cd-d5a2-4d94-b087-adb400d05d25', 'last_name': 'BRISTOL ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'MI', 'zip': '49418-0000'}\n", + "{'address': '817 VERDALE DR', 'city': 'SPEARFISH ', 'classification': 'neutral', 'donor_id': '243d42aa-2d89-4df0-81c8-30b0eb2bb514', 'entity_type': 'Individual', 'first_name': 'TIARA ', 'full_name': 'tiara heckenlaible ', 'id': '243d42aa-2d89-4df0-81c8-30b0eb2bb514', 'last_name': 'HECKENLAIBLE ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'SD', 'zip': '57783-1636'}\n", + "{'address': '2954 BAY VILLAGE CIR APT 1074', 'city': 'SANTA ROSA ', 'classification': 'neutral', 'donor_id': '5fce81ac-a80a-4153-9893-a4f117312808', 'entity_type': 'Individual', 'first_name': 'JENNIFER ', 'full_name': 'jennifer ellis ', 'id': '5fce81ac-a80a-4153-9893-a4f117312808', 'last_name': 'ELLIS ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'CA', 'zip': '95403-2288'}\n", + "{'address': '12606 CEDAR CROSSINGS DR', 'city': 'CHARLOTTE ', 'classification': 'neutral', 'donor_id': '0b7ab244-7d09-40f6-9da9-04492dca4c59', 'entity_type': 'Individual', 'first_name': 'MARGARET ', 'full_name': 'margaret johnson ', 'id': '0b7ab244-7d09-40f6-9da9-04492dca4c59', 'last_name': 'JOHNSON ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'NC', 'zip': '28273-8868'}\n", + "{'address': '7730 BOHM RD', 'city': 'IMLAY CITY ', 'classification': 'neutral', 'donor_id': '519dfef0-05c0-4759-851a-8caa7f56ff1d', 'entity_type': 'Individual', 'first_name': 'BETTY ', 'full_name': 'betty burton ', 'id': '519dfef0-05c0-4759-851a-8caa7f56ff1d', 'last_name': 'BURTON ', 'recipient_id': '7e56adfa-c5e4-459d-b280-92a2c67e8602', 'recipient_name': 'lapeer county democratic party', 'state': 'MI', 'zip': '48444-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '107 MEYERS AVE', 'city': 'JACKSON ', 'classification': 'neutral', 'donor_id': '67ef676e-27a0-40d5-8f5c-9bfae6f80a88', 'entity_type': 'Individual', 'first_name': 'TERRY ', 'full_name': 'terry applegate ', 'id': '67ef676e-27a0-40d5-8f5c-9bfae6f80a88', 'last_name': 'APPLEGATE ', 'recipient_id': 'a9c205c4-6e86-465d-b9f8-55400317be37', 'recipient_name': 'sheet metal workers local 7 pac', 'state': 'MI', 'zip': '49203-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '4890 GARDENER RD.', 'city': 'METAMORIA ', 'classification': 'neutral', 'company': 'retired', 'donor_id': '31c2546b-6967-4625-8266-2ca498d7b0e1', 'entity_type': 'Individual', 'first_name': 'DIANE ', 'full_name': 'diane scott ', 'id': '31c2546b-6967-4625-8266-2ca498d7b0e1', 'last_name': 'SCOTT ', 'occupation': 'homemaker', 'recipient_id': '4a4659c5-77ec-4e8e-a171-48d9266cd78f', 'recipient_name': 'teamsters 406 political action committee', 'state': 'MI', 'zip': '48455-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '1033 N. PAULINA ST UNIT 1R', 'city': 'CHICAGO ', 'classification': 'neutral', 'company': 'capital area housing pship', 'donor_id': 'f2afa0d1-b1f9-4278-9df4-c5bf2c01c65b', 'entity_type': 'Individual', 'first_name': 'CURTIS ', 'full_name': 'curtis audette ', 'id': 'f2afa0d1-b1f9-4278-9df4-c5bf2c01c65b', 'last_name': 'AUDETTE ', 'occupation': 'marketing director', 'recipient_id': '9187a1f9-7b89-47cc-b136-04b272161da1', 'recipient_name': 'will snyder majority fund', 'state': 'IL', 'zip': '60622-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '1490 7TH ST NW APT 210', 'city': 'WASHINGTON ', 'classification': 'neutral', 'donor_id': '7ca75427-170a-4b3b-8e26-1fdd95e7590f', 'entity_type': 'Individual', 'first_name': 'RITA ', 'full_name': 'rita collins ', 'id': '7ca75427-170a-4b3b-8e26-1fdd95e7590f', 'last_name': 'COLLINS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'DC', 'zip': '20001-3389'}\n", + "{'address': '4358 FOXPOINTE DRIVE', 'city': 'WEST BLOOMFILED ', 'classification': 'neutral', 'donor_id': 'c2269438-d978-4732-a2c1-f2621514a1f1', 'entity_type': 'Individual', 'first_name': 'LAURA ', 'full_name': 'laura noveck ', 'id': 'c2269438-d978-4732-a2c1-f2621514a1f1', 'last_name': 'NOVECK ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '48323-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '83 ANCHOR DR', 'city': 'INDIAN HARBOUR BEACH', 'classification': 'neutral', 'donor_id': '2800af86-a826-4ee1-a2b3-3b8d454b229d', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james bangerter ', 'id': '2800af86-a826-4ee1-a2b3-3b8d454b229d', 'last_name': 'BANGERTER ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'FL', 'zip': '32937-3563'}\n", + "{'address': '1978 EDGEWOOD BLVD', 'city': 'BERKLEY ', 'classification': 'neutral', 'donor_id': 'b61a2f45-5a13-401a-b0c2-470368e45a95', 'entity_type': 'Individual', 'first_name': 'LISA ', 'full_name': 'lisa turner ', 'id': 'b61a2f45-5a13-401a-b0c2-470368e45a95', 'last_name': 'TURNER ', 'recipient_id': '116b2364-8dc9-4ec5-83ad-0f43db55c764', 'recipient_name': 'committee to elect natalie price', 'state': 'MI', 'zip': '48072-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '1791 WALLACE ST', 'city': 'SIMI VALLEY ', 'classification': 'neutral', 'company': 'county of ventura', 'donor_id': '05fbf8f2-14e5-468f-ac3c-6d38cb79aea2', 'entity_type': 'Individual', 'first_name': 'REBECCA ', 'full_name': 'rebecca albarran ', 'id': '05fbf8f2-14e5-468f-ac3c-6d38cb79aea2', 'last_name': 'ALBARRAN ', 'occupation': 'hs client benefit spec iv', 'recipient_id': '0cf71bd1-086d-433d-bebc-02a1976da5fc', 'recipient_name': 'michigan corrections organization political action committee', 'state': 'CA', 'zip': '93065-0000'}\n", + "{'classification': 'neutral'}\n", + "{'classification': 'neutral'}\n", + "{'address': '4375 ELMWOOD DR', 'city': 'OKEMOS ', 'classification': 'neutral', 'donor_id': '2a66be20-50a9-4c95-a836-7dcdf6f85c53', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary hardy ', 'id': '2a66be20-50a9-4c95-a836-7dcdf6f85c53', 'last_name': 'HARDY ', 'recipient_id': '3933a18f-92b6-4fb9-8ed9-a289ae65c09d', 'recipient_name': 'emily busch for state representative', 'state': 'MI', 'zip': '48864-0000'}\n", + "{'address': '9732 NW HENRY CT', 'city': 'PORTLAND ', 'classification': 'neutral', 'donor_id': '135321c7-d5f3-4496-8593-e3d92dc01b4f', 'entity_type': 'Individual', 'first_name': 'DAVID ', 'full_name': 'david evans ', 'id': '135321c7-d5f3-4496-8593-e3d92dc01b4f', 'last_name': 'EVANS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'OR', 'zip': '97229-8060'}\n", + "{'address': '6516 FOREST RIDGE DR', 'city': 'DURHAM ', 'classification': 'neutral', 'donor_id': '82f6e2a5-d1f6-40b1-ab48-b0ddd0d8b2ef', 'entity_type': 'Individual', 'first_name': 'VICTORIA ', 'full_name': 'victoria mathews ', 'id': '82f6e2a5-d1f6-40b1-ab48-b0ddd0d8b2ef', 'last_name': 'MATHEWS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'NC', 'zip': '27713-6743'}\n", + "{'address': '434 FRANKLIN ST APT 2', 'city': 'CAMBRIDGE ', 'classification': 'neutral', 'donor_id': '983946cd-bd5f-49de-8d7d-5c7e5fc187df', 'entity_type': 'Individual', 'first_name': 'ALISON ', 'full_name': 'alison gassett ', 'id': '983946cd-bd5f-49de-8d7d-5c7e5fc187df', 'last_name': 'GASSETT ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'MA', 'zip': '02139-3261'}\n", + "{'address': '401 S LAKESHORE BLVD 314', 'city': 'MARQUETTE ', 'classification': 'neutral', 'donor_id': '59835b92-ae12-4c63-bcf5-bc4c15f49a1a', 'entity_type': 'Individual', 'first_name': 'LISA ', 'full_name': 'lisa stasiuk ', 'id': '59835b92-ae12-4c63-bcf5-bc4c15f49a1a', 'last_name': 'STASIUK ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '49855-0000'}\n", + "{'address': '1398 PARKVIEW DR', 'city': 'NEW RICHMOND ', 'classification': 'neutral', 'donor_id': 'a20e56d4-b16a-48d9-a572-dd5c20afb4ed', 'entity_type': 'Individual', 'first_name': 'STEPHEN ', 'full_name': 'stephen tornio ', 'id': 'a20e56d4-b16a-48d9-a572-dd5c20afb4ed', 'last_name': 'TORNIO ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'WI', 'zip': '54017-2339'}\n", + "{'address': '17367 NORTHWOOD HWY', 'city': 'ARCADIA ', 'classification': 'neutral', 'donor_id': 'd228df64-4788-45fa-8fad-495f05058201', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary williams ', 'id': 'd228df64-4788-45fa-8fad-495f05058201', 'last_name': 'WILLIAMS ', 'recipient_id': '097002ca-1bbd-417a-bad9-9fd54887ebab', 'recipient_name': 'movement voter pac mi', 'state': 'MI', 'zip': '49613-0000'}\n", + "{'address': '2175 W 25TH ST', 'city': 'LOS ANGELES ', 'classification': 'neutral', 'donor_id': '16817b6c-6455-49e3-aec7-ae3a1100a96a', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james haley ', 'id': '16817b6c-6455-49e3-aec7-ae3a1100a96a', 'last_name': 'HALEY ', 'recipient_id': '0cf71bd1-086d-433d-bebc-02a1976da5fc', 'recipient_name': 'michigan corrections organization political action committee', 'state': 'CA', 'zip': '90018-0000'}\n", + "{'address': 'PO BOX 410', 'city': 'MENDOCINO ', 'classification': 'neutral', 'donor_id': 'f1e3260d-301f-4ea5-b503-e0455e3f0f10', 'entity_type': 'Individual', 'first_name': 'SUSAN ', 'full_name': 'susan keller ', 'id': 'f1e3260d-301f-4ea5-b503-e0455e3f0f10', 'last_name': 'KELLER ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'CA', 'zip': '95460-0410'}\n", + "{'address': '1460 E POND DR APT 14', 'city': 'OKEMOS ', 'classification': 'neutral', 'donor_id': 'a7d035e5-12cf-4e5a-8dc2-0d9552bc59d8', 'entity_type': 'Individual', 'first_name': 'RUSS ', 'full_name': 'russ kirkpatrick ', 'id': 'a7d035e5-12cf-4e5a-8dc2-0d9552bc59d8', 'last_name': 'KIRKPATRICK ', 'recipient_id': '520c9ce3-c702-4926-8688-750984ee6c0d', 'recipient_name': 'friends of sarah may seward', 'state': 'MI', 'zip': '48864-0000'}\n", + "{'address': '207 N. 5TH AVE. UNIT A', 'city': 'BARSTOW ', 'classification': 'neutral', 'donor_id': '7f16dd46-24ca-475c-9ee2-e5e49fe90048', 'entity_type': 'Individual', 'first_name': 'BRIDGET ', 'full_name': 'bridget breese ', 'id': '7f16dd46-24ca-475c-9ee2-e5e49fe90048', 'last_name': 'BREESE ', 'recipient_id': '0cf71bd1-086d-433d-bebc-02a1976da5fc', 'recipient_name': 'michigan corrections organization political action committee', 'state': 'CA', 'zip': '92311-0000'}\n", + "{'address': '1127 RANFIELD LANE', 'city': 'FLINT ', 'classification': 'neutral', 'donor_id': '4a2985a0-1033-49d7-bd6e-ff09983ed3b9', 'entity_type': 'Individual', 'first_name': 'DALE ', 'full_name': 'dale weighill ', 'id': '4a2985a0-1033-49d7-bd6e-ff09983ed3b9', 'last_name': 'WEIGHILL ', 'recipient_id': '7dbf96d7-7405-4f4e-8089-da6ecdf2197f', 'recipient_name': 'michigan community college association political action comm', 'state': 'MI', 'zip': '48532-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '2885 SLEEPING MEADOW LANE', 'city': 'MASON ', 'classification': 'neutral', 'company': 'consumers energy', 'donor_id': 'b8df5c77-6655-44d5-8efa-5a1cb02e0b7f', 'entity_type': 'Individual', 'first_name': 'BRIAN ', 'full_name': 'brian bushey ', 'id': 'b8df5c77-6655-44d5-8efa-5a1cb02e0b7f', 'last_name': 'BUSHEY ', 'occupation': 'dir egi analytics', 'recipient_id': '642c45b3-2610-4afe-a3b8-a611eaeb9e94', 'recipient_name': 'cms energy corp employees for better government', 'state': 'MI', 'zip': '48854-8709'}\n", + "{'classification': 'neutral'}\n", + "{'address': '1217 WHISPERING KNOLL LN', 'city': 'ROCHESTER HILLS ', 'classification': 'neutral', 'company': 'blue cross blue shield of mich', 'donor_id': 'c818757b-5305-45c8-b024-30244cc46d21', 'entity_type': 'Individual', 'first_name': 'KATHRYN ', 'full_name': 'kathryn antoski ^ ', 'id': 'c818757b-5305-45c8-b024-30244cc46d21', 'last_name': 'ANTOSKI ^ ', 'occupation': 'analyst - senior', 'recipient_id': '5a56136a-8ea1-4027-918f-be7d7a66c373', 'recipient_name': 'blue cross blue shield of michigan political action committee', 'state': 'MI', 'zip': '48306-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '4608 OAKRIDGE DR', 'city': 'MIDLAND ', 'classification': 'neutral', 'donor_id': '4c1803dc-2633-4432-9d19-005d82aedf68', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james allen ', 'id': '4c1803dc-2633-4432-9d19-005d82aedf68', 'last_name': 'ALLEN ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '48640-1914'}\n", + "{'address': '1919 CURTIS ST', 'city': 'BERKELEY ', 'classification': 'neutral', 'donor_id': '514931c3-da83-44dd-bc30-4fece766d85e', 'entity_type': 'Individual', 'first_name': 'JOAQUIN ', 'full_name': 'joaquin carbonell ', 'id': '514931c3-da83-44dd-bc30-4fece766d85e', 'last_name': 'CARBONELL ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'CA', 'zip': '94702-1648'}\n", + "{'address': '39842 GOLFVIEW DR.', 'city': 'NORTHVILLE ', 'classification': 'neutral', 'donor_id': '739bc866-c9cc-4360-ae52-9b15c22ca6b6', 'entity_type': 'Individual', 'first_name': 'DONALD ', 'full_name': 'donald gates ', 'id': '739bc866-c9cc-4360-ae52-9b15c22ca6b6', 'last_name': 'GATES ', 'recipient_id': 'e9e8bf7f-2d34-42c9-b155-b95481ca238f', 'recipient_name': 'committee to elect dave staudt', 'state': 'MI', 'zip': '48167-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '7300 KRAENZLEIN ROAD', 'city': 'BAY CITY ', 'classification': 'neutral', 'donor_id': '4ae0900b-eac4-4e41-b4a2-6727561db273', 'entity_type': 'Individual', 'first_name': 'JOAN ', 'full_name': 'joan wilson ', 'id': '4ae0900b-eac4-4e41-b4a2-6727561db273', 'last_name': 'WILSON ', 'recipient_id': 'c5bc157e-1eff-4db0-b26a-eea376cc3fd0', 'recipient_name': 'tamara d carlone for state board of education', 'state': 'MI', 'zip': '48706-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '753 PATRICIA PLACE DR', 'city': 'WESTLAND ', 'classification': 'neutral', 'company': 'blue cross blue shield of mich', 'donor_id': '184e5f13-aba5-44da-be09-572ac083b3e9', 'entity_type': 'Individual', 'first_name': 'SHUNDA ', 'full_name': 'shunda jones ^ ', 'id': '184e5f13-aba5-44da-be09-572ac083b3e9', 'last_name': 'JONES ^ ', 'occupation': 'manager - administrative', 'recipient_id': '5a56136a-8ea1-4027-918f-be7d7a66c373', 'recipient_name': 'blue cross blue shield of michigan political action committee', 'state': 'MI', 'zip': '48185-0000'}\n", + "{'address': '3830 33RD AVE SW UNIT A', 'city': 'SEATTLE ', 'classification': 'neutral', 'donor_id': '9a5a86bb-a480-42ad-913a-17f80efbfb86', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james sims ', 'id': '9a5a86bb-a480-42ad-913a-17f80efbfb86', 'last_name': 'SIMS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'WA', 'zip': '98126-2514'}\n", + "{'address': '204 HURON ST', 'city': 'BAY CITY ', 'classification': 'neutral', 'donor_id': '298c73fa-495f-4df0-a348-16a62d6464ee', 'entity_type': 'Individual', 'first_name': 'MATHEWS ', 'full_name': 'mathews gavin ', 'id': '298c73fa-495f-4df0-a348-16a62d6464ee', 'last_name': 'GAVIN ', 'recipient_id': 'a9c205c4-6e86-465d-b9f8-55400317be37', 'recipient_name': 'sheet metal workers local 7 pac', 'state': 'MI', 'zip': '48706-4931'}\n", + "{'address': '740 HEWITT LN', 'city': 'NEW WINDSOR ', 'classification': 'neutral', 'donor_id': 'a41724c3-f42d-42a0-bc7d-8973c2e3a0c8', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary washburn ', 'id': 'a41724c3-f42d-42a0-bc7d-8973c2e3a0c8', 'last_name': 'WASHBURN ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'NY', 'zip': '12553-5462'}\n", + "{'address': '100 ROCKVIEW ST', 'city': 'JAMAICA PLAIN ', 'classification': 'neutral', 'donor_id': '1755fe5d-6210-4ecd-8075-de785b4a8a73', 'entity_type': 'Individual', 'first_name': 'TIMOTHY ', 'full_name': 'timothy havel ', 'id': '1755fe5d-6210-4ecd-8075-de785b4a8a73', 'last_name': 'HAVEL ', 'recipient_id': '097002ca-1bbd-417a-bad9-9fd54887ebab', 'recipient_name': 'movement voter pac mi', 'state': 'MA', 'zip': '02130-4660'}\n", + "{'address': '2260 POLISH LINE RD.', 'city': 'CHEBOYGAN ', 'classification': 'neutral', 'donor_id': '46b3649a-e403-4bd0-8ee2-d65a34d191f9', 'entity_type': 'Individual', 'first_name': 'STEVE ', 'full_name': 'steve downing ', 'id': '46b3649a-e403-4bd0-8ee2-d65a34d191f9', 'last_name': 'DOWNING ', 'recipient_id': 'b92fe9af-a5f5-4f15-8f35-d5536eb946eb', 'recipient_name': 'friends of marie fielder', 'state': 'MI', 'zip': '49721-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '10698 BEAR LAKE TRL', 'city': 'PORTAGE ', 'classification': 'neutral', 'donor_id': 'b0dafcd3-4ba2-4aa1-ac43-2298edc705e4', 'entity_type': 'Individual', 'first_name': 'MICHAEL ', 'full_name': 'michael anderson ', 'id': 'b0dafcd3-4ba2-4aa1-ac43-2298edc705e4', 'last_name': 'ANDERSON ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '49024-6206'}\n", + "{'address': '150 MARINE AVE', 'city': 'BROOKLYN ', 'classification': 'neutral', 'donor_id': '58988e4c-4376-4fd7-8c13-10bc9fc65335', 'entity_type': 'Individual', 'first_name': 'PAMELA L ', 'full_name': 'pamela l landberg ', 'id': '58988e4c-4376-4fd7-8c13-10bc9fc65335', 'last_name': 'LANDBERG ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'NY', 'zip': '11209-7744'}\n", + "{'address': '1701 PORTER SW SUITE 6', 'city': 'WYOMING ', 'classification': 'neutral', 'company': 'self emp;oyed', 'donor_id': '3dfd0b64-eb59-4475-9abc-8be958bd8182', 'entity_type': 'Individual', 'first_name': 'DANIEL ', 'full_name': 'daniel hibma ', 'id': '3dfd0b64-eb59-4475-9abc-8be958bd8182', 'last_name': 'HIBMA ', 'occupation': 'property management', 'recipient_id': 'b4b49f06-2c4d-42e4-83e8-fc63c95fad04', 'recipient_name': 'committee to protect voters rights', 'state': 'MI', 'zip': '49519-0000'}\n", + "{'classification': 'neutral'}\n", + "{'address': '1501 BRIDGEWATER DR', 'city': 'MELBOURNE ', 'classification': 'neutral', 'donor_id': 'd71d895c-b18c-45ed-9a13-ec025564fedb', 'entity_type': 'Individual', 'first_name': 'JUDITH ', 'full_name': 'judith behrendt ', 'id': 'd71d895c-b18c-45ed-9a13-ec025564fedb', 'last_name': 'BEHRENDT ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'FL', 'zip': '32934-3215'}\n" + ] } ], "source": [ - "x = add_notes_from_df(merged_inds_sample)\n", - "x.nodes['abdussamad, shams']" + "matplot_G = create_network_nodes(grouped_sample.sample(50))\n", + "for v,d in matplot_G.nodes(data=True):\n", + " #print(u)\n", + " #print(v)\n", + " print(d)" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 118, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcompanyentity_typefirst_namefull_namelast_namepartystatetransaction_iddonor_idyearamountrecipient_idoffice_soughtpurposetransaction_typedonor_typerecipient_typedonor_office
6631869727NaNindividualNaNwilliam \bstonerNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "
" - ], "text/plain": [ - " id company entity_type first_name full_name last_name \\\n", - "663 1869727 NaN individual NaN william \bstoner NaN \n", - "\n", - " party state transaction_id donor_id year amount recipient_id \\\n", - "663 NaN NaN NaN NaN NaN NaN NaN \n", - "\n", - " office_sought purpose transaction_type donor_type recipient_type \\\n", - "663 NaN NaN NaN NaN NaN \n", - "\n", - " donor_office \n", - "663 NaN " + "['green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'red',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green',\n", + " 'green']" ] }, - "execution_count": 79, + "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "merged_inds_sample.loc[merged_inds_sample.full_name == 'william \\x08stoner']" + "#for a,b in G.nodes(data=True):\n", + " #print(G[node])#['classification'])\n", + "# print(b)#['classification'])\n", + "entity_colors = {'neutral': 'green', 'c':'blue', 'f':'red'}\n", + "node_colors = [entity_colors.get(G.nodes[node].get('classification', 'neutral'), 'green') for node in G.nodes()]\n", + "node_colors" ] }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 36, "metadata": {}, "outputs": [ { "data": { + "image/png": "", "text/plain": [ - "{'color': nan, 'size': 2}" + "
" ] }, - "execution_count": 65, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('William Stoner', 'KALAMAZOO ANESTHESIOLOGY PC', {'amount': 10.0, 'year': 2017})\n", + "('KALAMAZOO ANESTHESIOLOGY PC', 'Bob Kushman', {'amount': 1530})\n", + "('Bob Kushman', 'KALAMAZOO ANESTHESIOLOGY PC', {'amount': 530})\n", + "('James Engelson', 'Bob Kushman', {'amount': 90.0, 'year': 2019})\n", + "('Allen Wolf', 'William Stoner', {'amount': 111.5, 'year': 2018})\n", + "('Allen Wolf', 'William Stoner', {'amount': 11100.5, 'year': 2018})\n" + ] } ], "source": [ - "G = nx.Graph()\n", - "G.add_node(0)\n", - "nx.set_node_attributes(G, \"red\", name=\"color\")\n", - "nx.set_node_attributes(G, 2, name=\"size\")\n", - "G.add_node(1)\n", - "nx.set_node_attributes(G, np.nan, name='color')\n", - "G.nodes[0]" + "G = nx.MultiDiGraph()\n", + " \n", + "G.add_node(\"William Stoner\", Age=10, Weight=110)\n", + "G.add_edge(\"William Stoner\",\"KALAMAZOO ANESTHESIOLOGY PC\",amount=10.00, year=2017)\n", + "G.add_node(\"KALAMAZOO ANESTHESIOLOGY PC\", Age=50, Weight=180)\n", + "G.add_edge(\"KALAMAZOO ANESTHESIOLOGY PC\",\"Bob Kushman\",amount=1530)\n", + "G.add_node(\"Bob Kushman\", Age=90, Weight=111)\n", + "G.add_edge(\"Bob Kushman\",\"KALAMAZOO ANESTHESIOLOGY PC\",amount=530)\n", + "G.add_node(\"James Engelson\", Age=40, Weight=10)\n", + "G.add_edge(\"James Engelson\",\"Bob Kushman\",amount=90.00, year=2019,)\n", + "G.add_node(\"Allen Wolf\", Age=30, Weight=1710)\n", + "G.add_edge(\"Allen Wolf\",\"William Stoner\",amount=111.50,year=2018)\n", + "G.add_edge(\"Allen Wolf\",\"William Stoner\",amount=11100.50,year=2018)\n", + "\n", + "\n", + "\n", + "edge_labels = {(u,v):d['amount'] for u,v,d in G.edges(data=True)}\n", + "nx.draw(G, with_labels=True,node_color='red')\n", + "pos = nx.planar_layout(G)\n", + "for edge, label in edge_labels.items():\n", + " nx.draw_networkx_edge_labels(G, pos=pos, edge_labels={edge: label}, label_pos=0.5, verticalalignment='center', horizontalalignment='center')\n", + "plt.show()\n", + "for edge in G.edges(data=True):\n", + " print(edge)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 35, "metadata": {}, "outputs": [ { "data": { - "image/png": "", - "text/plain": [ - "
" + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hoverinfo": "text", + "hovertext": [ + "Amount: 10.00, Weight: 10.00", + "Amount: 1530.00, Weight: 1530.00", + "Amount: 530.00, Weight: 530.00", + "Amount: 90.00, Weight: 90.00", + "Amount: 111.50, Weight: 111.50" + ], + "line": { + "color": "#888" + }, + "mode": "lines", + "type": "scatter", + "x": [ + 10, + 50, + null, + 50, + 90, + null, + 90, + 50, + null, + 40, + 90, + null, + 30, + 10, + null + ], + "y": [ + 110, + 180, + null, + 180, + 111, + null, + 111, + 180, + null, + 10, + 111, + null, + 1710, + 110, + null + ] + }, + { + "hoverinfo": "text", + "marker": { + "colorscale": [ + [ + 0, + "rgb(255,255,217)" + ], + [ + 0.125, + "rgb(237,248,177)" + ], + [ + 0.25, + "rgb(199,233,180)" + ], + [ + 0.375, + "rgb(127,205,187)" + ], + [ + 0.5, + "rgb(65,182,196)" + ], + [ + 0.625, + "rgb(29,145,192)" + ], + [ + 0.75, + "rgb(34,94,168)" + ], + [ + 0.875, + "rgb(37,52,148)" + ], + [ + 1, + "rgb(8,29,88)" + ] + ], + "showscale": true, + "size": 10 + }, + "mode": "markers", + "text": [ + "William Stoner
Age: 10
Weight: 110", + "KALAMAZOO ANESTHESIOLOGY PC
Age: 50
Weight: 180", + "Bob Kushman
Age: 90
Weight: 111", + "James Engelson
Age: 40
Weight: 10", + "Allen Wolf
Age: 30
Weight: 1710" + ], + "type": "scatter", + "x": [ + 10, + 50, + 90, + 40, + 30 + ], + "y": [ + 110, + 180, + 111, + 10, + 1710 + ] + } + ], + "layout": { + "hovermode": "closest", + "margin": { + "b": 20, + "l": 5, + "r": 5, + "t": 40 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "font": { + "size": 16 + }, + "text": "
Network graph made with Plotly" + }, + "xaxis": { + "showgrid": false, + "showticklabels": false, + "zeroline": false + }, + "yaxis": { + "showgrid": false, + "showticklabels": false, + "zeroline": false + } + } + }, + "text/html": [ + "
" ] }, "metadata": {}, @@ -2288,41 +2729,1184 @@ } ], "source": [ - "G = nx.petersen_graph()\n", - "subax1 = plt.subplot(121)\n", - "nx.draw(G, with_labels=True, font_weight='bold')\n", - "subax2 = plt.subplot(122)\n", - "nx.draw_shell(G, nlist=[range(5, 10), range(5)], with_labels=True, font_weight='light')\n" + "G = nx.MultiDiGraph()\n", + "\n", + "G.add_node(\"William Stoner\", Age=10, Weight=110)\n", + "G.add_node(\"KALAMAZOO ANESTHESIOLOGY PC\", Age=50, Weight=180)\n", + "G.add_node(\"Bob Kushman\", Age=90, Weight=111)\n", + "G.add_node(\"James Engelson\", Age=40, Weight=10)\n", + "G.add_node(\"Allen Wolf\", Age=30, Weight=1710)\n", + "\n", + "G.add_edge(\"William Stoner\", \"KALAMAZOO ANESTHESIOLOGY PC\", weight=10.00, amount=10.00, year=2017)\n", + "G.add_edge(\"KALAMAZOO ANESTHESIOLOGY PC\", \"Bob Kushman\", weight=1530, amount=1530, year=2017)\n", + "G.add_edge(\"Bob Kushman\", \"KALAMAZOO ANESTHESIOLOGY PC\", weight=530, amount=530, year=2017)\n", + "G.add_edge(\"James Engelson\", \"Bob Kushman\", weight=90.00, amount=90.00, year=2017)\n", + "G.add_edge(\"Allen Wolf\", \"William Stoner\", weight=111.50, amount=111.50, year=2017)\n", + "\n", + "# Create Plotly graph\n", + "edge_trace = go.Scatter(x=[], y=[], line=dict(color='#888'), hoverinfo='text', mode='lines')\n", + "hovertext = []\n", + "\n", + "for edge in G.edges(data=True):\n", + " x0, y0 = G.nodes[edge[0]]['Age'], G.nodes[edge[0]]['Weight']\n", + " x1, y1 = G.nodes[edge[1]]['Age'], G.nodes[edge[1]]['Weight']\n", + " edge_trace['x'] += tuple([x0, x1, None])\n", + " edge_trace['y'] += tuple([y0, y1, None])\n", + " hovertext.append(f\"Amount: {edge[2]['amount']:.2f}, Weight: {edge[2]['weight']:.2f}\")\n", + "\n", + "edge_trace['hovertext'] = hovertext\n", + "\n", + "node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', size=10))\n", + "\n", + "for node in G.nodes():\n", + " x, y = G.nodes[node]['Age'], G.nodes[node]['Weight']\n", + " node_trace['x'] += tuple([x])\n", + " node_trace['y'] += tuple([y])\n", + " node_info = node + '
' + 'Age: ' + str(G.nodes[node]['Age']) + '
' + 'Weight: ' + str(G.nodes[node]['Weight'])\n", + " node_trace['text'] += tuple([node_info])\n", + "\n", + "fig = go.Figure(data=[edge_trace, node_trace],\n", + " layout=go.Layout(\n", + " title='
Network graph made with Plotly',\n", + " titlefont=dict(size=16),\n", + " showlegend=False,\n", + " hovermode='closest',\n", + " margin=dict(b=20,l=5,r=5,t=40),\n", + " xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n", + " yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))\n", + "\n", + "fig.show()\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 58, "metadata": {}, "outputs": [ { "data": { - "text/plain": [ - "{'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC': Text(-0.071782758799796, -0.3387166453182715, 'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC'),\n", - " 'Paa Pac': Text(0.06023249378587841, -0.07946204618171311, 'Paa Pac'),\n", - " 'UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB': Text(-0.12554712442237967, 0.08789304420689323, 'UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALLOT CLUB'),\n", - " 'COMMITTEE TO ELECT DR PATRICIA BERNARD': Text(-0.40486733116122986, -0.04769565353200762, 'COMMITTEE TO ELECT DR PATRICIA BERNARD'),\n", - " 'Pabar Pac (Pa Bar Assn)': Text(-0.6714326170558735, 0.21693950702464565, 'Pabar Pac (Pa Bar Assn)'),\n", - " 'Ugi Utilities Inc/Ugi Energy Services Llc Pac': Text(1.0, -0.38838038123915186, 'Ugi Utilities Inc/Ugi Energy Services Llc Pac'),\n", - " 'Pa Fraternal Order Of Police Pac': Text(0.5897482153166077, -0.2569656851069028, 'Pa Fraternal Order Of Police Pac'),\n", - " 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC': Text(-0.27784326029554446, 0.2828712220763738, 'MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC'),\n", - " 'Citizens For Kail': Text(-0.09850761736766293, 0.5235166380701339, 'Citizens For Kail')}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hoverinfo": "text", + "hovertext": [ + "Amount: 5.00", + "Amount: 100.00", + "Amount: 15.00", + "Amount: 151.76", + "Amount: 75.00", + "Amount: 11.12", + "Amount: 1.00", + "Amount: 1.00", + "Amount: 5.88", + "Amount: 250.00", + "Amount: 15.00", + "Amount: 273.00", + "Amount: 25.44", + "Amount: 100.00", + "Amount: 50.00", + "Amount: 400.00", + "Amount: 300.00", + "Amount: 1020.00", + "Amount: 100.00", + "Amount: 100.00", + "Amount: 5.00", + "Amount: 15.00", + "Amount: 100.00", + "Amount: 13.00", + "Amount: 750.00", + "Amount: 15.00", + "Amount: 500.00", + "Amount: 2.50", + "Amount: 1.00", + "Amount: 250.00", + "Amount: 35.00", + "Amount: 40.00", + "Amount: 9.29", + "Amount: 5.00", + "Amount: 19.00", + "Amount: 75.00", + "Amount: 25.15", + "Amount: 15.78", + "Amount: 1.00", + "Amount: 250.00", + "Amount: 1000.00", + "Amount: 2.87", + "Amount: 67.18", + "Amount: 150.00", + "Amount: 29.40", + "Amount: 1.00", + "Amount: 500.00", + "Amount: 60.00", + "Amount: 10.00", + "Amount: 76.32" + ], + "line": { + "color": "#888" + }, + "mode": "lines", + "type": "scatter", + "x": [], + "y": [] + }, + { + "hoverinfo": "text", + "marker": { + "color": [ + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green" + ], + "colorscale": [ + [ + 0, + "rgb(255,255,217)" + ], + [ + 0.125, + "rgb(237,248,177)" + ], + [ + 0.25, + "rgb(199,233,180)" + ], + [ + 0.375, + "rgb(127,205,187)" + ], + [ + 0.5, + "rgb(65,182,196)" + ], + [ + 0.625, + "rgb(29,145,192)" + ], + [ + 0.75, + "rgb(34,94,168)" + ], + [ + 0.875, + "rgb(37,52,148)" + ], + [ + 1, + "rgb(8,29,88)" + ] + ], + "showscale": true, + "size": 10 + }, + "mode": "markers", + "text": [ + "Name: rachel puthuff
donor_id: 639646bf-5176-474c-b800-1afb34c55b53
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rachel puthuff
recipient_name: reproductive freedom for all
address: 3717 WHITAKER
city: SCHERTZ
classification: neutral
entity_type: Individual
first_name: RACHEL
id: 639646bf-5176-474c-b800-1afb34c55b53
last_name: PUTHUFF
state: TX
zip: 78154-0000
", + "Name: reproductive freedom for all
classification: neutral
", + "Name: james bennett
donor_id: 447b61fb-39cc-41a9-8dfc-2dbb4e2f3774
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: james bennett
recipient_name: reproductive freedom for all
address: 533 W OAK ST
city: MASON
classification: neutral
entity_type: Individual
first_name: JAMES
id: 447b61fb-39cc-41a9-8dfc-2dbb4e2f3774
last_name: BENNETT
state: MI
zip: 48854-0000
", + "Name: sonny mandouh mr.^
donor_id: 34d28c8d-c0fe-463d-9afe-73269a47389b
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: sonny mandouh mr.^
recipient_name: realtors political action committee of michigan
address: 23760 HOLLANDER ST
city: DEARBORN
classification: neutral
entity_type: Individual
first_name: SONNY
id: 34d28c8d-c0fe-463d-9afe-73269a47389b
last_name: MANDOUH MR.^
state: MI
zip: 48128-0000
", + "Name: realtors political action committee of michigan
classification: neutral
", + "Name: charles crider
donor_id: e765ba37-66d2-4b65-9f42-3902dca518b6
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: charles crider
recipient_name: reproductive freedom for all
address: 1403 WEST HIGHLAND BLVD.
city: BATTLE CREEK
classification: neutral
entity_type: Individual
first_name: CHARLES
id: e765ba37-66d2-4b65-9f42-3902dca518b6
last_name: CRIDER
state: MI
zip: 49015-0000
", + "Name: michelle zukowski-serlin
donor_id: 5c0fe744-23e3-4346-b112-0730c6d4b60c
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: michelle zukowski-serlin
recipient_name: reproductive freedom for all
address: 4853 LANDING WAY
city: KALAMAZOO
classification: neutral
company: choices for change counseling
entity_type: Individual
first_name: MICHELLE
id: 5c0fe744-23e3-4346-b112-0730c6d4b60c
last_name: ZUKOWSKI-SERLIN
occupation: business owners and clinical s
state: MI
zip: 49048-6153
", + "Name: diana gibson-lee
donor_id: df25775c-dad2-4f56-8fcd-b31171a7dcb0
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: diana gibson-lee
recipient_name: veronica klinefelt for state senate
address: 7450 W DYER RD
city: TWINING
classification: neutral
entity_type: Individual
first_name: DIANA
id: df25775c-dad2-4f56-8fcd-b31171a7dcb0
last_name: GIBSON-LEE
state: MI
zip: 48766-9773
", + "Name: veronica klinefelt for state senate
classification: neutral
", + "Name: edward kazala
donor_id: 74b522f4-6214-42cd-9d68-7abfe3e18a07
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: edward kazala
recipient_name: elect padma kuppa
address: 70 REVERE CT
city: LAFAYETTE
classification: neutral
entity_type: Individual
first_name: EDWARD
id: 74b522f4-6214-42cd-9d68-7abfe3e18a07
last_name: KAZALA
state: CA
zip: 94549-0000
", + "Name: andrea kovalsky
donor_id: 3dc1360d-e9e8-4e55-ac2e-f608f489ab94
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: andrea kovalsky
recipient_name: veronica klinefelt for state senate
address: 497 SAINT MARKS AVE APT 5P
city: BROOKLYN
classification: neutral
entity_type: Individual
first_name: ANDREA
id: 3dc1360d-e9e8-4e55-ac2e-f608f489ab94
last_name: KOVALSKY
state: NY
zip: 11238-5792
", + "Name: colin palmer
donor_id: ad440dcd-79ad-4323-8f19-c7a491f897f7
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: colin palmer
recipient_name: veronica klinefelt for state senate
address: 531 E 20TH ST APT 10D
city: NEW YORK
classification: neutral
company: not employed
entity_type: Individual
first_name: COLIN
id: ad440dcd-79ad-4323-8f19-c7a491f897f7
last_name: PALMER
occupation: not employed
state: NY
zip: 10010-7604
", + "Name: julie svinicki ms.^
donor_id: 4cb88517-6bc4-45a1-ae2f-be0b76688898
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: julie svinicki ms.^
recipient_name: realtors political action committee of michigan
address: 1608 KIRTLAND DRIVE
city: ANN ARBOR
classification: neutral
entity_type: Individual
first_name: JULIE
id: 4cb88517-6bc4-45a1-ae2f-be0b76688898
last_name: SVINICKI MS.^
state: MI
zip: 48103-0000
", + "Name: audrey lance
donor_id: e8ef0925-3f10-4ebf-b025-dea32e506a50
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: audrey lance
recipient_name: reproductive freedom for all
address: 3945 FORBES AVE APT 444
city: PITTSBURGH
classification: neutral
entity_type: Individual
first_name: AUDREY
id: e8ef0925-3f10-4ebf-b025-dea32e506a50
last_name: LANCE
occupation: physician
state: PA
zip: 15213-0000
", + "Name: walker c evans
donor_id: 9853cee2-ff37-41bd-a469-0e338a4fefc9
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: walker c evans
recipient_name: reproductive freedom for all
address: 2810 NORTHVILLE DR NE
city: GRAND RAPIDS
classification: neutral
entity_type: Individual
first_name: WALKER C
id: 9853cee2-ff37-41bd-a469-0e338a4fefc9
last_name: EVANS
state: MI
zip: 49525-0000
", + "Name: lori henderson
donor_id: 3042129c-b91e-4d6a-b723-74cd7ec55e75
recipient_id: 6b51e739-dd22-4556-8555-6e11264ef4ce
full_name: lori henderson
recipient_name: planned parenthood advocates of mi
address: 2401 HARDWOOD AVE
city: ROYAK OAK
classification: neutral
entity_type: Individual
first_name: LORI
id: 3042129c-b91e-4d6a-b723-74cd7ec55e75
last_name: HENDERSON
state: MI
zip: 48067-0000
", + "Name: planned parenthood advocates of mi
classification: neutral
", + "Name: brett lundie
donor_id: 932450e5-f8fc-4cb2-baac-acfad686561f
recipient_id: 2f221dfb-d552-4234-83f8-cd05d10f1266
full_name: brett lundie
recipient_name: citizens to support mi women and children
address: 7779 CIRCLE DR
city: LAINGSBURG
classification: neutral
entity_type: Individual
first_name: BRETT
id: 932450e5-f8fc-4cb2-baac-acfad686561f
last_name: LUNDIE
state: MI
zip: 48848-0000
", + "Name: citizens to support mi women and children
classification: neutral
", + "Name: ian robinson
donor_id: 757923ec-02e3-424e-81b9-4152f6dd165b
recipient_id: 06ebbb03-574c-445b-9416-7d2134a06d1f
full_name: ian robinson
recipient_name: committee to elect james e johnson jr
address: 3435 BRENTWOOD CT
city: ANN ARBOR
classification: neutral
company: university of michigan
entity_type: Individual
first_name: IAN
id: 757923ec-02e3-424e-81b9-4152f6dd165b
last_name: ROBINSON
occupation: faculty
state: MI
zip: 48108-1757
", + "Name: committee to elect james e johnson jr
classification: neutral
", + "Name: kelly bean
donor_id: 8521781f-6ca7-43dc-90a6-c1af13da9e2a
recipient_id: 00a76143-0f24-4683-9963-09f10803e957
full_name: kelly bean
recipient_name: friends of jerry neyer
address: 1405 E BATTLE RD
city: ROSEBUSH
classification: neutral
entity_type: Individual
first_name: KELLY
id: 8521781f-6ca7-43dc-90a6-c1af13da9e2a
last_name: BEAN
state: MI
zip: 48878-9732
", + "Name: friends of jerry neyer
classification: neutral
", + "Name: sandra johnson
donor_id: 49bcd93b-241b-4343-8bbf-bcf70d828c8e
recipient_id: 7ee2db24-b832-4f1b-af2e-e9c8eaf706bd
full_name: sandra johnson
recipient_name: committee to elect charise anderson
address: 424 N 21ST ST 0
city: MONTEBELLO
classification: neutral
entity_type: Individual
first_name: SANDRA
id: 49bcd93b-241b-4343-8bbf-bcf70d828c8e
last_name: JOHNSON
occupation: eligibility worker
state: CA
zip: 90640-0000
", + "Name: committee to elect charise anderson
classification: neutral
", + "Name: christopher mishler
donor_id: 7b8ee884-4471-493d-bf17-386d57bf3f6d
recipient_id: 2f221dfb-d552-4234-83f8-cd05d10f1266
full_name: christopher mishler
recipient_name: citizens to support mi women and children
address: 3690 VORHIES ROAD
city: ANN ARBOR
classification: neutral
entity_type: Individual
first_name: CHRISTOPHER
id: 7b8ee884-4471-493d-bf17-386d57bf3f6d
last_name: MISHLER
state: MI
zip: 48105-0000
", + "Name: stacy leroy daniels
donor_id: 5a40e7db-bb2a-47f4-ac92-5584988c8a5e
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: stacy leroy daniels
recipient_name: bill g schuette for state representative
address: 3901 ORCHARD DRIVE
city: MIDLAND
classification: neutral
entity_type: Individual
first_name: STACY LEROY
id: 5a40e7db-bb2a-47f4-ac92-5584988c8a5e
last_name: DANIELS
state: MI
zip: 48640-0000
", + "Name: bill g schuette for state representative
classification: neutral
", + "Name: suzanne r weinheimer
donor_id: 029a23eb-d90f-405b-995c-c8dc266e255f
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: suzanne r weinheimer
recipient_name: reproductive freedom for all
address: 11045 8TH AVENUE NE APT 826
city: SEATTLE
classification: neutral
entity_type: Individual
first_name: SUZANNE R
id: 029a23eb-d90f-405b-995c-c8dc266e255f
last_name: WEINHEIMER
state: WA
zip: 98125-0000
", + "Name: dustin shaeffer mr.^
donor_id: fc041110-7c11-47af-b1bf-5daca974e4ee
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: dustin shaeffer mr.^
recipient_name: realtors political action committee of michigan
address: 60451 MOJAVE LANE
city: WASHINGTON
classification: neutral
entity_type: Individual
first_name: DUSTIN
id: fc041110-7c11-47af-b1bf-5daca974e4ee
last_name: SHAEFFER MR.^
state: MI
zip: 48094-0000
", + "Name: debra byl
donor_id: b8e9c951-5c8c-42d3-91e1-d6457b28f2ae
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: debra byl
recipient_name: reproductive freedom for all
address: 987 BRADFORD GREENS
city: GRAND RAPIDS
classification: neutral
entity_type: Individual
first_name: DEBRA
id: b8e9c951-5c8c-42d3-91e1-d6457b28f2ae
last_name: BYL
state: MI
zip: 49525-0000
", + "Name: pamela wimp
donor_id: 88ccb4d4-c756-4039-bac2-77a610d69bb0
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: pamela wimp
recipient_name: reproductive freedom for all
address: 8030 MERCER CT NE
city: LACEY
classification: neutral
entity_type: Individual
first_name: PAMELA
id: 88ccb4d4-c756-4039-bac2-77a610d69bb0
last_name: WIMP
state: WA
zip: 98516-6336
", + "Name: lori wortz
donor_id: 821a27dc-aa00-436e-80e2-655ce26bc830
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: lori wortz
recipient_name: bill g schuette for state representative
address: 4144 MERIDIAN RD
city: OKEMOS
classification: neutral
company: braenaru consulting
entity_type: Individual
first_name: LORI
id: 821a27dc-aa00-436e-80e2-655ce26bc830
last_name: WORTZ
occupation: consultant
state: MI
zip: 48864-0000
", + "Name: janet reid
donor_id: 25f2cb86-6d01-4fc2-9aaf-d276ce634a47
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: janet reid
recipient_name: reproductive freedom for all
address: 2378 EATON GATE RD
city: LAKE ORION
classification: neutral
entity_type: Individual
first_name: JANET
id: 25f2cb86-6d01-4fc2-9aaf-d276ce634a47
last_name: REID
state: MI
zip: 48360-1869
", + "Name: gary henderson
donor_id: 05a6c5c3-4a3f-41e0-a9d5-e54f33703d2d
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: gary henderson
recipient_name: bill g schuette for state representative
address: 1601 KINGSWOOD DRIVE
city: LANSING
classification: neutral
company: aircraft precision prod. inc.
entity_type: Individual
first_name: GARY
id: 05a6c5c3-4a3f-41e0-a9d5-e54f33703d2d
last_name: HENDERSON
occupation: sales purchasing manager
state: MI
zip: 48912-0000
", + "Name: claudette levesque
donor_id: 26d5e377-57c4-4f33-95ce-4209bff4242b
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: claudette levesque
recipient_name: reproductive freedom for all
address: 41 CATERPILLAR HILL RD
city: SARGENTVILLE
classification: neutral
entity_type: Individual
first_name: CLAUDETTE
id: 26d5e377-57c4-4f33-95ce-4209bff4242b
last_name: LEVESQUE
state: ME
zip: 04673-2464
", + "Name: graham chapman
donor_id: 8045638c-db65-4a13-9016-05e73766b5b1
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: graham chapman
recipient_name: reproductive freedom for all
address: 1914 CLINTON ST
city: LOS ANGELES
classification: neutral
entity_type: Individual
first_name: GRAHAM
id: 8045638c-db65-4a13-9016-05e73766b5b1
last_name: CHAPMAN
state: CA
zip: 90026-4137
", + "Name: john olson
donor_id: 1ff268c7-fbff-4f94-8810-48f31bb53681
recipient_id: 00a76143-0f24-4683-9963-09f10803e957
full_name: john olson
recipient_name: friends of jerry neyer
address: 6025 VERDE TRL S APT K217
city: BOCA RATON
classification: neutral
entity_type: Individual
first_name: JOHN
id: 1ff268c7-fbff-4f94-8810-48f31bb53681
last_name: OLSON
state: FL
zip: 33433-4442
", + "Name: christina ridalls ms.^
donor_id: 9bea8116-83a3-486a-a457-50c0f80af060
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: christina ridalls ms.^
recipient_name: realtors political action committee of michigan
address: 3083 BEATTIE RD
city: HOWELL
classification: neutral
entity_type: Individual
first_name: CHRISTINA
id: 9bea8116-83a3-486a-a457-50c0f80af060
last_name: RIDALLS MS.^
state: MI
zip: 48843-0000
", + "Name: dylynn mclean
donor_id: a1943974-4abe-4093-be0b-edcc56a97ffe
recipient_id: bbe89315-1939-46e3-a5c0-2d6e5b28bc95
full_name: dylynn mclean
recipient_name: 1st congressional dist rep comm
address: 1531 W 20 MILE RD
city: SAULT STE MARIE
classification: neutral
entity_type: Individual
first_name: DYLYNN
id: a1943974-4abe-4093-be0b-edcc56a97ffe
last_name: MCLEAN
state: MI
zip: 49783-0000
", + "Name: 1st congressional dist rep comm
classification: neutral
", + "Name: andrew morris
donor_id: 767c512a-9c5a-4230-90ab-3fd40d731f60
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: andrew morris
recipient_name: elect padma kuppa
address: 1118 MORNINGSIDE AVE
city: SCHENECTADY
classification: neutral
entity_type: Individual
first_name: ANDREW
id: 767c512a-9c5a-4230-90ab-3fd40d731f60
last_name: MORRIS
state: NY
zip: 12309-5630
", + "Name: elect padma kuppa
classification: neutral
", + "Name: martha scoppa
donor_id: 78fcc760-825f-404a-b058-a88a99992d98
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: martha scoppa
recipient_name: reproductive freedom for all
address: 32 COLD SPRING RD
city: LIBERTY
classification: neutral
entity_type: Individual
first_name: MARTHA
id: 78fcc760-825f-404a-b058-a88a99992d98
last_name: SCOPPA
state: NY
zip: 12754-0000
", + "Name: carol woodard
donor_id: d4ba0589-99d6-4455-a978-315395322208
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: carol woodard
recipient_name: reproductive freedom for all
address: 5143 SPRING MEADOWS
city: TROY
classification: neutral
entity_type: Individual
first_name: CAROL
id: d4ba0589-99d6-4455-a978-315395322208
last_name: WOODARD
state: MI
zip: 48098-0000
", + "Name: rochelle albright
donor_id: 87b3feed-01a5-4cc8-82cd-cf9c78977534
recipient_id: e3294ecb-f6df-48a0-b3b4-7048a9c650a7
full_name: rochelle albright
recipient_name: michael detmer for state senate
address: 1840 GRAY RD
city: HOWELL
classification: neutral
entity_type: Individual
first_name: ROCHELLE
id: 87b3feed-01a5-4cc8-82cd-cf9c78977534
last_name: ALBRIGHT
state: MI
zip: 48843-0000
", + "Name: michael detmer for state senate
classification: neutral
", + "Name: richard mayfield
donor_id: 80ec6920-a933-4c3e-9487-74cbfe6716f7
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: richard mayfield
recipient_name: veronica klinefelt for state senate
address: 3221 GRISCHY LN
city: CINCINNATI
classification: neutral
entity_type: Individual
first_name: RICHARD
id: 80ec6920-a933-4c3e-9487-74cbfe6716f7
last_name: MAYFIELD
state: OH
zip: 45208-3109
", + "Name: charles risch
donor_id: 6b4b51e8-f105-4cc1-96f7-cec2d931e58f
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: charles risch
recipient_name: reproductive freedom for all
address: 300 S WACKER DR
city: CHICAGO
classification: neutral
entity_type: Individual
first_name: CHARLES
id: 6b4b51e8-f105-4cc1-96f7-cec2d931e58f
last_name: RISCH
state: IL
zip: 60606-6680
", + "Name: barbara miller
donor_id: 47043446-3b77-4a34-9d0d-a21786400d9b
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: barbara miller
recipient_name: veronica klinefelt for state senate
address: 820 W END AVE APT 6A
city: NEW YORK
classification: neutral
entity_type: Individual
first_name: BARBARA
id: 47043446-3b77-4a34-9d0d-a21786400d9b
last_name: MILLER
state: NY
zip: 10025-5330
", + "Name: kevin korpi
donor_id: 10f51417-a0e9-4a2c-8bdb-e5d045fcab08
recipient_id: 5f7c53e3-d1be-47a9-acc4-70828a8c7a69
full_name: kevin korpi
recipient_name: committee to elect ed mcbroom
address: 220 MAC AVE APT 418
city: EAST LANSING
classification: neutral
company: acuitas
entity_type: Individual
first_name: KEVIN
id: 10f51417-a0e9-4a2c-8bdb-e5d045fcab08
last_name: KORPI
occupation: lobbyist
state: MI
zip: 48823-0000
", + "Name: committee to elect ed mcbroom
classification: neutral
", + "Name: wayne miller
donor_id: 14208b99-1ecb-4b33-becf-c30882e9b302
recipient_id: f88fdd05-e3e4-4d51-8511-1ffd35965c8e
full_name: wayne miller
recipient_name: committee to elect jack richert
address: 27301 SCENIC HWY
city: FRANKLIN
classification: neutral
company: miller & tischler pc
entity_type: Individual
first_name: WAYNE
id: 14208b99-1ecb-4b33-becf-c30882e9b302
last_name: MILLER
occupation: attorney
state: MI
zip: 48025-0000
", + "Name: committee to elect jack richert
classification: neutral
", + "Name: mary soens
donor_id: 664b4540-8b50-44d3-8570-cb797a4859fe
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: mary soens
recipient_name: elect padma kuppa
address: 55 N HANCOCK ST
city: LEXINGTON
classification: neutral
entity_type: Individual
first_name: MARY
id: 664b4540-8b50-44d3-8570-cb797a4859fe
last_name: SOENS
state: MA
zip: 02420-0000
", + "Name: rebecca baskin
donor_id: 9eb92629-9f8e-4bb5-8dc3-373b56a7db3a
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rebecca baskin
recipient_name: reproductive freedom for all
address: 680 BERKSHIRE DR
city: SALINE
classification: neutral
entity_type: Individual
first_name: REBECCA
id: 9eb92629-9f8e-4bb5-8dc3-373b56a7db3a
last_name: BASKIN
state: MI
zip: 48176-1087
", + "Name: edward kaminski
donor_id: 5b4130f6-d8dd-4739-aa68-2fe81dd4532b
recipient_id: 76a600c1-7ead-437a-85ad-0cca7573393b
full_name: edward kaminski
recipient_name: friends of brian hosticka
address: 8765 LEHMAN RD
city: MONTAGUE
classification: neutral
entity_type: Individual
first_name: EDWARD
id: 5b4130f6-d8dd-4739-aa68-2fe81dd4532b
last_name: KAMINSKI
state: MI
zip: 49437-9326
", + "Name: friends of brian hosticka
classification: neutral
", + "Name: robert brown
donor_id: 766a34f7-1c8b-4635-a69c-0bff1bf155be
recipient_id: 2e8c9124-2258-45e3-a198-e8c1798c49f2
full_name: robert brown
recipient_name: monroe plumbers and pipe fitters local 671 pac fund
address: 1207 SANDHURST DR
city: TALLAHASSEE
classification: neutral
entity_type: Individual
first_name: ROBERT
id: 766a34f7-1c8b-4635-a69c-0bff1bf155be
last_name: BROWN
state: FL
zip: 32312-2527
", + "Name: monroe plumbers and pipe fitters local 671 pac fund
classification: neutral
", + "Name: sandra braddock
donor_id: e42e7230-02f0-4b28-ba39-7b68e796d510
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: sandra braddock
recipient_name: reproductive freedom for all
address: 20087 EDGEWATER DRIVE
city: CANYON COUNTRY
classification: neutral
entity_type: Individual
first_name: SANDRA
id: e42e7230-02f0-4b28-ba39-7b68e796d510
last_name: BRADDOCK
state: CA
zip: 91351-0000
", + "Name: dana fortier
donor_id: 74b93106-3c9f-4f36-b52e-36143e97e7ce
recipient_id: 159692de-135a-45bd-8889-1ab1882ed54c
full_name: dana fortier
recipient_name: committee to elect vicki barnett to state senate
address: 23861 W LEBOST
city: NOVI
classification: neutral
entity_type: Individual
first_name: DANA
id: 74b93106-3c9f-4f36-b52e-36143e97e7ce
last_name: FORTIER
state: MI
zip: 48375-0000
", + "Name: committee to elect vicki barnett to state senate
classification: neutral
", + "Name: rachel geiersbach
donor_id: 40d2d39f-f21b-4130-8d7b-47ca810c9aa9
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rachel geiersbach
recipient_name: reproductive freedom for all
address: 3412 OLD KAWKAWLIN RD
city: BAY CITY
classification: neutral
entity_type: Individual
first_name: RACHEL
id: 40d2d39f-f21b-4130-8d7b-47ca810c9aa9
last_name: GEIERSBACH
state: MI
zip: 48706-0000
", + "Name: matthew burgess
donor_id: de98dec5-b8d3-4701-a9dd-a254aca2c4cf
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: matthew burgess
recipient_name: reproductive freedom for all
address: 8823 SPECTRUM CENTER BLVD 2313
city: SAN DIEGO
classification: neutral
entity_type: Individual
first_name: MATTHEW
id: de98dec5-b8d3-4701-a9dd-a254aca2c4cf
last_name: BURGESS
state: CA
zip: 92123-0000
", + "Name: teresa robertson
donor_id: dcf2b3a5-ddf4-4027-8a75-4477893854ff
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: teresa robertson
recipient_name: reproductive freedom for all
address: 7101 RIVER GLEN DR SE
city: CALEDONIA
classification: neutral
entity_type: Individual
first_name: TERESA
id: dcf2b3a5-ddf4-4027-8a75-4477893854ff
last_name: ROBERTSON
state: MI
zip: 49316-8136
" + ], + "type": "scatter", + "x": [], + "y": [] + } + ], + "layout": { + "hovermode": "closest", + "margin": { + "b": 20, + "l": 5, + "r": 5, + "t": 40 + }, + "showlegend": true, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "font": { + "size": 16 + }, + "text": "Network Graph Indicating Campaign Contributions from 2018-2022" + }, + "xaxis": { + "showgrid": true, + "showticklabels": false, + "zeroline": true + }, + "yaxis": { + "showgrid": true, + "showticklabels": false, + "zeroline": true + } + } + }, + "text/html": [ + "
" ] }, "metadata": {}, @@ -2330,65 +3914,7506 @@ } ], "source": [ - "G = nx.from_pandas_edgelist(sample_df,source='name',target='donations_to',edge_attr=['donations','received'])\n", - "G.nodes()\n", - "pos=nx.spring_layout(G)\n", - "weights = list(nx.get_edge_attributes(G,'donations').values())\n", - "weights = [i/5000 for i in weights]\n", - "node_color = [G.degree(v) for v in G] \n", - "#node_size = [0.0005 * nx.get_node_attributes(G, 'donations')[v] for v in G] \n", - "nx.draw_networkx_nodes(G, pos, node_color=node_color)#, node_size=node_size) \n", - "nx.draw_networkx_edges(G, pos, width=weights)\n", - "nx.draw_networkx_labels(G, pos)" + "def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph:\n", + " G = nx.MultiDiGraph()\n", + " \n", + " # Define columns for edge attributes\n", + " edge_columns = ['amount', 'donor_office', 'office_sought', 'party', 'purpose', 'transaction_id', 'transaction_type', 'year']\n", + " # Define columns for node attributes\n", + " node_columns = ['donor_id', 'recipient_id', 'full_name', 'recipient_name', 'address', 'city', 'classification', 'company', 'donor_type', 'entity_type', 'first_name', 'id', 'last_name', 'occupation', 'recipient_type', 'state', 'zip']\n", + " \n", + " for _, row in df.iterrows(): \n", + " # Add nodes\n", + " G.add_node(row['full_name'], **row[node_columns].dropna().to_dict())\n", + " G.add_node(row['recipient_name'], classification='neutral') # Adding recipient nodes with default classification\n", + "\n", + " # Add edges\n", + " edge_attributes = row[edge_columns].dropna().to_dict()\n", + " G.add_edge(row['full_name'], row['recipient_name'], **edge_attributes)\n", + " \n", + " return G\n", + "\n", + "def plot_network_graph(G: nx.MultiDiGraph):\n", + " edge_trace = go.Scatter(x=[], y=[], line=dict(color='#888'), hoverinfo='text', mode='lines')\n", + " hovertext = []\n", + "\n", + " for edge in G.edges(data=True):\n", + " source = edge[0]\n", + " target = edge[1]\n", + " hovertext.append(f\"Amount: {edge[2]['amount']:.2f}\")\n", + "\n", + " edge_trace['hovertext'] = hovertext\n", + "\n", + " node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', size=10))\n", + " node_trace['marker']['color'] = []\n", + "\n", + " for node in G.nodes():\n", + " node_info = f\"Name: {node}
\"\n", + " for key, value in G.nodes[node].items():\n", + " node_info += f\"{key}: {value}
\"\n", + " node_trace['text'] += tuple([node_info])\n", + " # Get the classification value for the node\n", + " classification = G.nodes[node].get('classification', 'neutral')\n", + " # Assign a color based on the classification value\n", + " if classification == 'c':\n", + " color = 'blue'\n", + " elif classification == 'f':\n", + " color = 'red'\n", + " else:\n", + " color = 'green' # Default color for unknown classification\n", + " node_trace['marker']['color'] += tuple([color])\n", + "\n", + " # Define layout settings\n", + " layout = go.Layout(\n", + " title='Network Graph Indicating Campaign Contributions from 2018-2022',\n", + " titlefont=dict(size=16),\n", + " showlegend=True,\n", + " hovermode='closest',\n", + " margin=dict(b=20, l=5, r=5, t=40),\n", + " xaxis=dict(showgrid=True, zeroline=True, showticklabels=False),\n", + " yaxis=dict(showgrid=True, zeroline=True, showticklabels=False)\n", + " )\n", + "\n", + " fig = go.Figure(data=[edge_trace, node_trace], layout=layout)\n", + "\n", + " # Log information about the figure\n", + "\n", + " fig.show()\n", + "\n", + "sample = grouped_sample.sample(50)\n", + "plot_network_graph(create_network_nodes(sample))\n" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 39, "metadata": {}, "outputs": [ { "data": { - "text/plain": [ - "{}" + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hoverinfo": "none", + "line": { + "color": "#888", + "width": 0.5 + }, + "mode": "lines", + "type": "scatter", + "x": [ + 0.4182243125490408, + 0.3740122792611037, + null, + 0.4182243125490408, + 0.37848025459696877, + null, + 0.4182243125490408, + 0.3821391536049519, + null, + 0.4182243125490408, + 0.31305791514229697, + null, + 0.4182243125490408, + 0.3246624829381992, + null, + 0.4182243125490408, + 0.33203393677870674, + null, + 0.4182243125490408, + 0.4404718698088387, + null, + 0.4182243125490408, + 0.3393815448042514, + null, + 0.4182243125490408, + 0.32444561774289593, + null, + 0.4182243125490408, + 0.33721825060791266, + null, + 0.4182243125490408, + 0.5201251204037126, + null, + 0.12286879065958844, + 0.23992481624351925, + null, + 0.12286879065958844, + 0.09276814106220677, + null, + 0.12286879065958844, + 0.07426685281627932, + null, + 0.12286879065958844, + 0.09471702229050472, + null, + 0.12286879065958844, + 0.06879886671193436, + null, + 0.12286879065958844, + 0.1823584228427031, + null, + 0.12286879065958844, + 0.19852054651169693, + null, + 0.12286879065958844, + 0.13747604708068628, + null, + 0.12286879065958844, + 0.22007362873840486, + null, + 0.12286879065958844, + 0.13940667248499528, + null, + 0.12286879065958844, + 0.0201693226965588, + null, + 0.12286879065958844, + 0.16862303760247477, + null, + 0.12286879065958844, + 0.12355952994556385, + null, + 0.12286879065958844, + 0.04781523934390508, + null, + 0.6730431696885844, + 0.6013564651959642, + null, + 0.6730431696885844, + 0.662108954544855, + null, + 0.6730431696885844, + 0.7007214129943925, + null, + 0.6730431696885844, + 0.7188906153197968, + null, + 0.6730431696885844, + 0.7255980413609877, + null, + 0.6730431696885844, + 0.6802728591951641, + null, + 0.6730431696885844, + 0.7518492361353024, + null, + 0.38165116541180344, + 0.32578353530864457, + null, + 0.38165116541180344, + 0.413948124857326, + null, + 0.38165116541180344, + 0.44119458804978295, + null, + 0.38165116541180344, + 0.3328704753356456, + null, + 0.38165116541180344, + 0.3499260998923053, + null, + 0.38165116541180344, + 0.37301066653863624, + null, + 0.38165116541180344, + 0.4277213938753692, + null, + 0.38165116541180344, + 0.3247821296168134, + null, + 0.38165116541180344, + 0.3187675293980876, + null, + 0.38165116541180344, + 0.34114125407236195, + null, + 0.6084965344664286, + 0.5531504465254558, + null, + 0.6084965344664286, + 0.587704695878027, + null, + 0.6084965344664286, + 0.5593951498649633, + null, + 0.6084965344664286, + 0.5845953849421676, + null, + 0.6084965344664286, + 0.6058132814274794, + null, + 0.6084965344664286, + 0.6322124026692795, + null, + 0.6084965344664286, + 0.5201251204037126, + null, + 0.18155558675901884, + 0.2742000416622462, + null, + 0.18155558675901884, + 0.15570283642495664, + null, + 0.18155558675901884, + 0.19921682827804632, + null, + 0.18155558675901884, + 0.2955343345493908, + null, + 0.18155558675901884, + 0.298647499376007, + null, + 0.18155558675901884, + 0.0914406510425998, + null, + 0.18155558675901884, + 0.0875467755337247, + null, + 0.18155558675901884, + 0.08997327822205015, + null, + 0.18155558675901884, + 0.25656414507004344, + null, + 0.18155558675901884, + 0.20133087739958255, + null, + 0.7722862313192606, + 0.7408684543182315, + null, + 0.7722862313192606, + 0.8385234321105272, + null, + 0.7722862313192606, + 0.7333209824474588, + null, + 0.5368181409256901, + 0.595945044435614, + null, + 0.5368181409256901, + 0.6327007577432437, + null, + 0.5368181409256901, + 0.526779936668903, + null, + 0.5368181409256901, + 0.5433115547736789, + null, + 0.5368181409256901, + 0.5274116361492907, + null, + 0.5368181409256901, + 0.555788147264811, + null, + 0.5368181409256901, + 0.5805679633404117, + null, + 0.5368181409256901, + 0.5989925957177575, + null, + 0.5368181409256901, + 0.48218022499136737, + null, + 0.5368181409256901, + 0.6058132814274794, + null, + 0.5368181409256901, + 0.47443124751760235, + null, + 0.5368181409256901, + 0.5291812256005789, + null, + 0.5368181409256901, + 0.5621062195646831, + null, + 0.5368181409256901, + 0.5465171974419871, + null, + 0.8304626469521129, + 0.8266354543284289, + null, + 0.8304626469521129, + 0.7247552078664479, + null, + 0.8304626469521129, + 0.7827775151390383, + null, + 0.8304626469521129, + 0.9082570345357789, + null, + 0.8304626469521129, + 0.916634041055854, + null, + 0.8304626469521129, + 0.8613129225222332, + null, + 0.8304626469521129, + 0.7703024251104211, + null, + 0.8304626469521129, + 0.9005048863870916, + null, + 0.8304626469521129, + 0.9240127894624793, + null, + 0.7924139234898422, + 0.800297854626628, + null, + 0.7924139234898422, + 0.7364515013041172, + null, + 0.7924139234898422, + 0.8589937476561325, + null, + 0.7924139234898422, + 0.8247840830312709, + null, + 0.7924139234898422, + 0.7948577020793985, + null, + 0.7924139234898422, + 0.7059759544943667, + null, + 0.7924139234898422, + 0.8846357375826375, + null, + 0.7924139234898422, + 0.8323549266756429, + null, + 0.8266354543284289, + 0.7247552078664479, + null, + 0.8266354543284289, + 0.7827775151390383, + null, + 0.8266354543284289, + 0.9082570345357789, + null, + 0.8266354543284289, + 0.7042334738295596, + null, + 0.8266354543284289, + 0.8613129225222332, + null, + 0.8266354543284289, + 0.7703024251104211, + null, + 0.8266354543284289, + 0.9240127894624793, + null, + 0.8266354543284289, + 0.8680862155815134, + null, + 0.4023039585223629, + 0.4611021425875542, + null, + 0.4023039585223629, + 0.44175944307536974, + null, + 0.4023039585223629, + 0.3318561006769827, + null, + 0.4023039585223629, + 0.4349682989231034, + null, + 0.4023039585223629, + 0.29978148854693865, + null, + 0.4023039585223629, + 0.4442228752887084, + null, + 0.5084198498293618, + 0.5436816885151938, + null, + 0.5084198498293618, + 0.5229468203255856, + null, + 0.5084198498293618, + 0.4611021425875542, + null, + 0.5084198498293618, + 0.44175944307536974, + null, + 0.5084198498293618, + 0.6234379896430121, + null, + 0.5084198498293618, + 0.4442228752887084, + null, + 0.23992481624351925, + 0.27440213390552737, + null, + 0.23992481624351925, + 0.2728250610713022, + null, + 0.23992481624351925, + 0.1823584228427031, + null, + 0.23992481624351925, + 0.19852054651169693, + null, + 0.23992481624351925, + 0.22007362873840486, + null, + 0.23992481624351925, + 0.13940667248499528, + null, + 0.23992481624351925, + 0.16862303760247477, + null, + 0.23992481624351925, + 0.12355952994556385, + null, + 0.2742000416622462, + 0.15570283642495664, + null, + 0.2742000416622462, + 0.32578353530864457, + null, + 0.2742000416622462, + 0.3740122792611037, + null, + 0.2742000416622462, + 0.2955343345493908, + null, + 0.2742000416622462, + 0.31305791514229697, + null, + 0.2742000416622462, + 0.298647499376007, + null, + 0.2742000416622462, + 0.3328704753356456, + null, + 0.2742000416622462, + 0.3499260998923053, + null, + 0.2742000416622462, + 0.3181124346701171, + null, + 0.2742000416622462, + 0.3247821296168134, + null, + 0.2742000416622462, + 0.25656414507004344, + null, + 0.2742000416622462, + 0.3187675293980876, + null, + 0.2742000416622462, + 0.20133087739958255, + null, + 0.2742000416622462, + 0.34114125407236195, + null, + 0.15570283642495664, + 0.07513674080757637, + null, + 0.15570283642495664, + 0.05512117222879742, + null, + 0.15570283642495664, + 0.05194805532761382, + null, + 0.15570283642495664, + 0.06202421257916635, + null, + 0.15570283642495664, + 0.09053866681881584, + null, + 0.15570283642495664, + 0.1573630170264504, + null, + 0.15570283642495664, + 0.0852382135963593, + null, + 0.15570283642495664, + 0.0875467755337247, + null, + 0.15570283642495664, + 0.08997327822205015, + null, + 0.15570283642495664, + 0.20133087739958255, + null, + 0.15570283642495664, + 0.038579501382332126, + null, + 0.07513674080757637, + 0.1130639188502468, + null, + 0.07513674080757637, + 0.05512117222879742, + null, + 0.07513674080757637, + 0.07163295816605642, + null, + 0.07513674080757637, + 0.06202421257916635, + null, + 0.07513674080757637, + 0.09053866681881584, + null, + 0.07513674080757637, + 0.1573630170264504, + null, + 0.07513674080757637, + 0.0023771443647881974, + null, + 0.07513674080757637, + 0.0852382135963593, + null, + 0.07513674080757637, + 0.17086936775877049, + null, + 0.07513674080757637, + 0.0875467755337247, + null, + 0.07513674080757637, + 0.08997327822205015, + null, + 0.07513674080757637, + 0.020212382594376965, + null, + 0.07513674080757637, + 0.0897773631019545, + null, + 0.07513674080757637, + 0.038579501382332126, + null, + 0.7247552078664479, + 0.6327007577432437, + null, + 0.7247552078664479, + 0.662108954544855, + null, + 0.7247552078664479, + 0.7827775151390383, + null, + 0.7247552078664479, + 0.7007214129943925, + null, + 0.7247552078664479, + 0.7188906153197968, + null, + 0.7247552078664479, + 0.7042334738295596, + null, + 0.7247552078664479, + 0.7255980413609877, + null, + 0.7247552078664479, + 0.7703024251104211, + null, + 0.2586357176925591, + 0.3019474379086241, + null, + 0.2586357176925591, + 0.2121217358781844, + null, + 0.595945044435614, + 0.6327007577432437, + null, + 0.595945044435614, + 0.526779936668903, + null, + 0.595945044435614, + 0.662108954544855, + null, + 0.595945044435614, + 0.5433115547736789, + null, + 0.595945044435614, + 0.5274116361492907, + null, + 0.595945044435614, + 0.7042334738295596, + null, + 0.595945044435614, + 0.555788147264811, + null, + 0.595945044435614, + 0.5805679633404117, + null, + 0.595945044435614, + 0.5989925957177575, + null, + 0.595945044435614, + 0.6058132814274794, + null, + 0.595945044435614, + 0.5291812256005789, + null, + 0.595945044435614, + 0.5621062195646831, + null, + 0.9428542201780316, + 0.8511753697833563, + null, + 0.9428542201780316, + 0.89080246263295, + null, + 0.9428542201780316, + 0.9521646983336837, + null, + 0.9428542201780316, + 0.9663892923019699, + null, + 0.9428542201780316, + 0.9425745666137786, + null, + 0.9428542201780316, + 0.9851894520572745, + null, + 0.9428542201780316, + 0.9573079778783831, + null, + 0.9428542201780316, + 0.9473667691929577, + null, + 0.9428542201780316, + 0.838803404513024, + null, + 0.03304679952258993, + 0.05596958524873419, + null, + 0.03304679952258993, + 0.014269300880037306, + null, + 0.6013564651959642, + 0.662108954544855, + null, + 0.6013564651959642, + 0.7007214129943925, + null, + 0.6013564651959642, + 0.7188906153197968, + null, + 0.6013564651959642, + 0.555788147264811, + null, + 0.6013564651959642, + 0.5293212253918783, + null, + 0.6013564651959642, + 0.5291812256005789, + null, + 0.6013564651959642, + 0.5191285820034173, + null, + 0.6013564651959642, + 0.5465171974419871, + null, + 0.1130639188502468, + 0.07163295816605642, + null, + 0.1130639188502468, + 0.09053866681881584, + null, + 0.1130639188502468, + 0.1573630170264504, + null, + 0.1130639188502468, + 0.13747604708068628, + null, + 0.1130639188502468, + 0.2275256207367028, + null, + 0.1130639188502468, + 0.18507593174525072, + null, + 0.1130639188502468, + 0.17086936775877049, + null, + 0.1130639188502468, + 0.0897773631019545, + null, + 0.5531504465254558, + 0.47055154706870017, + null, + 0.5531504465254558, + 0.5274116361492907, + null, + 0.5531504465254558, + 0.587704695878027, + null, + 0.5531504465254558, + 0.5989925957177575, + null, + 0.5531504465254558, + 0.5845953849421676, + null, + 0.5531504465254558, + 0.6058132814274794, + null, + 0.5531504465254558, + 0.4564806171162211, + null, + 0.5531504465254558, + 0.5201251204037126, + null, + 0.1635981270944994, + 0.19921682827804632, + null, + 0.1635981270944994, + 0.10310287300704979, + null, + 0.1635981270944994, + 0.05973078995013337, + null, + 0.1635981270944994, + 0.0914406510425998, + null, + 0.1635981270944994, + 0.14711158829428328, + null, + 0.1635981270944994, + 0.21535391032155426, + null, + 0.05512117222879742, + 0.07163295816605642, + null, + 0.05512117222879742, + 0.05194805532761382, + null, + 0.05512117222879742, + 0.06202421257916635, + null, + 0.05512117222879742, + 0.09053866681881584, + null, + 0.05512117222879742, + 0.0023771443647881974, + null, + 0.05512117222879742, + 0.0852382135963593, + null, + 0.05512117222879742, + 0.0875467755337247, + null, + 0.05512117222879742, + 0.08997327822205015, + null, + 0.05512117222879742, + 0.020212382594376965, + null, + 0.05512117222879742, + 0.02312833765025224, + null, + 0.05512117222879742, + 0.04237200971819888, + null, + 0.05512117222879742, + 0.038579501382332126, + null, + 0.05512117222879742, + 0.01777064460825195, + null, + 0.32578353530864457, + 0.413948124857326, + null, + 0.32578353530864457, + 0.3328704753356456, + null, + 0.32578353530864457, + 0.3499260998923053, + null, + 0.32578353530864457, + 0.37301066653863624, + null, + 0.32578353530864457, + 0.2619562675328274, + null, + 0.32578353530864457, + 0.4277213938753692, + null, + 0.32578353530864457, + 0.3247821296168134, + null, + 0.32578353530864457, + 0.3187675293980876, + null, + 0.32578353530864457, + 0.34114125407236195, + null, + 0.27440213390552737, + 0.2728250610713022, + null, + 0.27440213390552737, + 0.1823584228427031, + null, + 0.27440213390552737, + 0.19852054651169693, + null, + 0.27440213390552737, + 0.22007362873840486, + null, + 0.27440213390552737, + 0.37301066653863624, + null, + 0.27440213390552737, + 0.2275256207367028, + null, + 0.27440213390552737, + 0.2619562675328274, + null, + 0.27440213390552737, + 0.3414075728554137, + null, + 0.2728250610713022, + 0.1823584228427031, + null, + 0.2728250610713022, + 0.37549158943196925, + null, + 0.2728250610713022, + 0.22007362873840486, + null, + 0.2728250610713022, + 0.16862303760247477, + null, + 0.6346565064837861, + 0.7364515013041172, + null, + 0.6346565064837861, + 0.5436816885151938, + null, + 0.6346565064837861, + 0.5461279353327784, + null, + 0.6346565064837861, + 0.7059759544943667, + null, + 0.6346565064837861, + 0.6149491168624189, + null, + 0.6346565064837861, + 0.5593069337955722, + null, + 0.6327007577432437, + 0.662108954544855, + null, + 0.6327007577432437, + 0.5433115547736789, + null, + 0.6327007577432437, + 0.5274116361492907, + null, + 0.6327007577432437, + 0.7042334738295596, + null, + 0.6327007577432437, + 0.555788147264811, + null, + 0.6327007577432437, + 0.5805679633404117, + null, + 0.6327007577432437, + 0.5989925957177575, + null, + 0.6327007577432437, + 0.5845953849421676, + null, + 0.6327007577432437, + 0.6058132814274794, + null, + 0.6327007577432437, + 0.5621062195646831, + null, + 0.800297854626628, + 0.7364515013041172, + null, + 0.800297854626628, + 0.6953901849658966, + null, + 0.800297854626628, + 0.8589937476561325, + null, + 0.800297854626628, + 0.7204214783753378, + null, + 0.800297854626628, + 0.8247840830312709, + null, + 0.800297854626628, + 0.7948577020793985, + null, + 0.800297854626628, + 0.7059759544943667, + null, + 0.800297854626628, + 0.8846357375826375, + null, + 0.800297854626628, + 0.8323549266756429, + null, + 0.800297854626628, + 0.8505181106970376, + null, + 0.800297854626628, + 0.7607451357487841, + null, + 0.800297854626628, + 0.9110645875753355, + null, + 0.526779936668903, + 0.5433115547736789, + null, + 0.526779936668903, + 0.5274116361492907, + null, + 0.526779936668903, + 0.555788147264811, + null, + 0.526779936668903, + 0.5805679633404117, + null, + 0.526779936668903, + 0.48218022499136737, + null, + 0.526779936668903, + 0.5293212253918783, + null, + 0.526779936668903, + 0.47443124751760235, + null, + 0.526779936668903, + 0.5291812256005789, + null, + 0.526779936668903, + 0.5621062195646831, + null, + 0.526779936668903, + 0.5465171974419871, + null, + 0.413948124857326, + 0.44119458804978295, + null, + 0.413948124857326, + 0.3328704753356456, + null, + 0.413948124857326, + 0.3499260998923053, + null, + 0.413948124857326, + 0.47055154706870017, + null, + 0.413948124857326, + 0.5274116361492907, + null, + 0.413948124857326, + 0.4277213938753692, + null, + 0.413948124857326, + 0.3247821296168134, + null, + 0.413948124857326, + 0.4564806171162211, + null, + 0.413948124857326, + 0.3187675293980876, + null, + 0.413948124857326, + 0.34114125407236195, + null, + 0.09276814106220677, + 0.03187584930858911, + null, + 0.09276814106220677, + 0.07426685281627932, + null, + 0.09276814106220677, + 0.03446402354654854, + null, + 0.09276814106220677, + 0.06879886671193436, + null, + 0.09276814106220677, + 0.1823584228427031, + null, + 0.09276814106220677, + 0.13940667248499528, + null, + 0.09276814106220677, + 0.0201693226965588, + null, + 0.09276814106220677, + 0.16862303760247477, + null, + 0.09276814106220677, + 0.12355952994556385, + null, + 0.662108954544855, + 0.7827775151390383, + null, + 0.662108954544855, + 0.7007214129943925, + null, + 0.662108954544855, + 0.7188906153197968, + null, + 0.662108954544855, + 0.7042334738295596, + null, + 0.662108954544855, + 0.555788147264811, + null, + 0.662108954544855, + 0.5805679633404117, + null, + 0.662108954544855, + 0.7255980413609877, + null, + 0.662108954544855, + 0.7518492361353024, + null, + 0.07163295816605642, + 0.06202421257916635, + null, + 0.07163295816605642, + 0.09053866681881584, + null, + 0.07163295816605642, + 0.1573630170264504, + null, + 0.07163295816605642, + 0.0023771443647881974, + null, + 0.07163295816605642, + 0.0852382135963593, + null, + 0.07163295816605642, + 0.17086936775877049, + null, + 0.07163295816605642, + 0.0875467755337247, + null, + 0.07163295816605642, + 0.08997327822205015, + null, + 0.07163295816605642, + 0.020212382594376965, + null, + 0.07163295816605642, + 0.0897773631019545, + null, + 0.07163295816605642, + 0.038579501382332126, + null, + 0.44119458804978295, + 0.3740122792611037, + null, + 0.44119458804978295, + 0.3328704753356456, + null, + 0.44119458804978295, + 0.3499260998923053, + null, + 0.44119458804978295, + 0.47055154706870017, + null, + 0.44119458804978295, + 0.5274116361492907, + null, + 0.44119458804978295, + 0.4277213938753692, + null, + 0.44119458804978295, + 0.4564806171162211, + null, + 0.44119458804978295, + 0.34114125407236195, + null, + 0.7364515013041172, + 0.8247840830312709, + null, + 0.7364515013041172, + 0.7948577020793985, + null, + 0.7364515013041172, + 0.7059759544943667, + null, + 0.7364515013041172, + 0.8323549266756429, + null, + 0.7827775151390383, + 0.7007214129943925, + null, + 0.7827775151390383, + 0.7188906153197968, + null, + 0.7827775151390383, + 0.7042334738295596, + null, + 0.7827775151390383, + 0.8613129225222332, + null, + 0.7827775151390383, + 0.7255980413609877, + null, + 0.7827775151390383, + 0.7703024251104211, + null, + 0.7827775151390383, + 0.9005048863870916, + null, + 0.7827775151390383, + 0.7518492361353024, + null, + 0.9600359726880752, + 0.9998698320754983, + null, + 0.9600359726880752, + 0.9082570345357789, + null, + 0.9600359726880752, + 0.9503884723051484, + null, + 0.9600359726880752, + 0.916634041055854, + null, + 0.9600359726880752, + 0.8613129225222332, + null, + 0.9600359726880752, + 0.9005048863870916, + null, + 0.9600359726880752, + 0.9240127894624793, + null, + 0.9600359726880752, + 0.9636590456207981, + null, + 0.8511753697833563, + 0.89080246263295, + null, + 0.8511753697833563, + 0.9521646983336837, + null, + 0.8511753697833563, + 0.9663892923019699, + null, + 0.8511753697833563, + 0.9573079778783831, + null, + 0.8511753697833563, + 0.9473667691929577, + null, + 0.8511753697833563, + 0.838803404513024, + null, + 0.8511753697833563, + 0.7518492361353024, + null, + 0.05194805532761382, + 0.06202421257916635, + null, + 0.05194805532761382, + 0.0852382135963593, + null, + 0.05194805532761382, + 0.0914406510425998, + null, + 0.05194805532761382, + 0.0875467755337247, + null, + 0.05194805532761382, + 0.08997327822205015, + null, + 0.05194805532761382, + 0.020212382594376965, + null, + 0.05194805532761382, + 0.02312833765025224, + null, + 0.05194805532761382, + 0.04237200971819888, + null, + 0.05194805532761382, + 0.038579501382332126, + null, + 0.05194805532761382, + 0.01777064460825195, + null, + 0.03187584930858911, + 0.07426685281627932, + null, + 0.03187584930858911, + 0.03446402354654854, + null, + 0.03187584930858911, + 0.06879886671193436, + null, + 0.03187584930858911, + 0.13940667248499528, + null, + 0.03187584930858911, + 0.0201693226965588, + null, + 0.03187584930858911, + 0.12355952994556385, + null, + 0.07426685281627932, + 0.09471702229050472, + null, + 0.07426685281627932, + 0.06879886671193436, + null, + 0.07426685281627932, + 0.1823584228427031, + null, + 0.07426685281627932, + 0.13940667248499528, + null, + 0.07426685281627932, + 0.0201693226965588, + null, + 0.07426685281627932, + 0.16862303760247477, + null, + 0.07426685281627932, + 0.12355952994556385, + null, + 0.07426685281627932, + 0.04781523934390508, + null, + 0.5257999712304688, + 0.5593951498649633, + null, + 0.5257999712304688, + 0.4404718698088387, + null, + 0.5257999712304688, + 0.5201251204037126, + null, + 0.9998698320754983, + 0.9082570345357789, + null, + 0.9998698320754983, + 0.9636084967560627, + null, + 0.9998698320754983, + 0.9503884723051484, + null, + 0.9998698320754983, + 0.9240127894624793, + null, + 0.9998698320754983, + 0.9636590456207981, + null, + 0.09471702229050472, + 0.06879886671193436, + null, + 0.09471702229050472, + 0.1823584228427031, + null, + 0.09471702229050472, + 0.19852054651169693, + null, + 0.09471702229050472, + 0.13747604708068628, + null, + 0.09471702229050472, + 0.13940667248499528, + null, + 0.09471702229050472, + 0.0201693226965588, + null, + 0.09471702229050472, + 0.12355952994556385, + null, + 0.09471702229050472, + 0.0897773631019545, + null, + 0.09471702229050472, + 0.04781523934390508, + null, + 0.6953901849658966, + 0.7204214783753378, + null, + 0.6953901849658966, + 0.7181048560087516, + null, + 0.6953901849658966, + 0.7948577020793985, + null, + 0.6953901849658966, + 0.7059759544943667, + null, + 0.6953901849658966, + 0.6370268640561303, + null, + 0.6953901849658966, + 0.6149491168624189, + null, + 0.6953901849658966, + 0.7607451357487841, + null, + 0.6953901849658966, + 0.6234379896430121, + null, + 0.03446402354654854, + 0.05596958524873419, + null, + 0.03446402354654854, + 0.014269300880037306, + null, + 0.9082570345357789, + 0.9503884723051484, + null, + 0.9082570345357789, + 0.916634041055854, + null, + 0.9082570345357789, + 0.8613129225222332, + null, + 0.9082570345357789, + 0.9005048863870916, + null, + 0.9082570345357789, + 0.9240127894624793, + null, + 0.9082570345357789, + 0.9636590456207981, + null, + 0.3740122792611037, + 0.37848025459696877, + null, + 0.3740122792611037, + 0.3821391536049519, + null, + 0.3740122792611037, + 0.2955343345493908, + null, + 0.3740122792611037, + 0.31305791514229697, + null, + 0.3740122792611037, + 0.298647499376007, + null, + 0.3740122792611037, + 0.3246624829381992, + null, + 0.3740122792611037, + 0.3328704753356456, + null, + 0.3740122792611037, + 0.33203393677870674, + null, + 0.3740122792611037, + 0.3499260998923053, + null, + 0.3740122792611037, + 0.3181124346701171, + null, + 0.3740122792611037, + 0.47055154706870017, + null, + 0.3740122792611037, + 0.4277213938753692, + null, + 0.3740122792611037, + 0.25656414507004344, + null, + 0.3740122792611037, + 0.4564806171162211, + null, + 0.977854801698089, + 0.9162463356603696, + null, + 0.5436816885151938, + 0.5461279353327784, + null, + 0.5436816885151938, + 0.5229468203255856, + null, + 0.5436816885151938, + 0.4611021425875542, + null, + 0.5436816885151938, + 0.6149491168624189, + null, + 0.5436816885151938, + 0.4349682989231034, + null, + 0.5436816885151938, + 0.6234379896430121, + null, + 0.5436816885151938, + 0.4442228752887084, + null, + 0.5436816885151938, + 0.5593069337955722, + null, + 0.06202421257916635, + 0.09053866681881584, + null, + 0.06202421257916635, + 0.1573630170264504, + null, + 0.06202421257916635, + 0.0023771443647881974, + null, + 0.06202421257916635, + 0.0852382135963593, + null, + 0.06202421257916635, + 0.0875467755337247, + null, + 0.06202421257916635, + 0.08997327822205015, + null, + 0.06202421257916635, + 0.020212382594376965, + null, + 0.06202421257916635, + 0.02312833765025224, + null, + 0.06202421257916635, + 0.04237200971819888, + null, + 0.06202421257916635, + 0.038579501382332126, + null, + 0.06202421257916635, + 0.01777064460825195, + null, + 0.8589937476561325, + 0.8247840830312709, + null, + 0.8589937476561325, + 0.7948577020793985, + null, + 0.8589937476561325, + 0.9210876029743161, + null, + 0.8589937476561325, + 0.9694266665187994, + null, + 0.8589937476561325, + 0.8846357375826375, + null, + 0.8589937476561325, + 0.8323549266756429, + null, + 0.8589937476561325, + 0.8505181106970376, + null, + 0.8589937476561325, + 0.9110645875753355, + null, + 0.06879886671193436, + 0.1823584228427031, + null, + 0.06879886671193436, + 0.13940667248499528, + null, + 0.06879886671193436, + 0.0201693226965588, + null, + 0.06879886671193436, + 0.12355952994556385, + null, + 0.06879886671193436, + 0.04781523934390508, + null, + 0.19921682827804632, + 0.10310287300704979, + null, + 0.19921682827804632, + 0.0914406510425998, + null, + 0.19921682827804632, + 0.14711158829428328, + null, + 0.19921682827804632, + 0.21535391032155426, + null, + 0.19921682827804632, + 0.25656414507004344, + null, + 0.1823584228427031, + 0.19852054651169693, + null, + 0.1823584228427031, + 0.22007362873840486, + null, + 0.1823584228427031, + 0.13940667248499528, + null, + 0.1823584228427031, + 0.16862303760247477, + null, + 0.1823584228427031, + 0.12355952994556385, + null, + 0.37549158943196925, + 0.41808707877840445, + null, + 0.37549158943196925, + 0.42926818011737133, + null, + 0.37549158943196925, + 0.4363707938884992, + null, + 0.37549158943196925, + 0.42077304608666055, + null, + 0.5433115547736789, + 0.5274116361492907, + null, + 0.5433115547736789, + 0.555788147264811, + null, + 0.5433115547736789, + 0.5805679633404117, + null, + 0.5433115547736789, + 0.5989925957177575, + null, + 0.5433115547736789, + 0.48218022499136737, + null, + 0.5433115547736789, + 0.47443124751760235, + null, + 0.5433115547736789, + 0.5291812256005789, + null, + 0.5433115547736789, + 0.5621062195646831, + null, + 0.5433115547736789, + 0.5465171974419871, + null, + 0.37848025459696877, + 0.3821391536049519, + null, + 0.37848025459696877, + 0.31305791514229697, + null, + 0.37848025459696877, + 0.3246624829381992, + null, + 0.37848025459696877, + 0.33203393677870674, + null, + 0.37848025459696877, + 0.4404718698088387, + null, + 0.37848025459696877, + 0.3393815448042514, + null, + 0.37848025459696877, + 0.32444561774289593, + null, + 0.37848025459696877, + 0.33721825060791266, + null, + 0.3821391536049519, + 0.2955343345493908, + null, + 0.3821391536049519, + 0.31305791514229697, + null, + 0.3821391536049519, + 0.298647499376007, + null, + 0.3821391536049519, + 0.3246624829381992, + null, + 0.3821391536049519, + 0.33203393677870674, + null, + 0.3821391536049519, + 0.3499260998923053, + null, + 0.3821391536049519, + 0.3181124346701171, + null, + 0.3821391536049519, + 0.47055154706870017, + null, + 0.3821391536049519, + 0.4404718698088387, + null, + 0.3821391536049519, + 0.3393815448042514, + null, + 0.3821391536049519, + 0.32444561774289593, + null, + 0.3821391536049519, + 0.4564806171162211, + null, + 0.3821391536049519, + 0.33721825060791266, + null, + 0.7204214783753378, + 0.7181048560087516, + null, + 0.7204214783753378, + 0.7948577020793985, + null, + 0.7204214783753378, + 0.7059759544943667, + null, + 0.7204214783753378, + 0.6370268640561303, + null, + 0.7204214783753378, + 0.7607451357487841, + null, + 0.7204214783753378, + 0.6234379896430121, + null, + 0.2955343345493908, + 0.31305791514229697, + null, + 0.2955343345493908, + 0.298647499376007, + null, + 0.2955343345493908, + 0.3246624829381992, + null, + 0.2955343345493908, + 0.3328704753356456, + null, + 0.2955343345493908, + 0.33203393677870674, + null, + 0.2955343345493908, + 0.3499260998923053, + null, + 0.2955343345493908, + 0.3181124346701171, + null, + 0.2955343345493908, + 0.25656414507004344, + null, + 0.2955343345493908, + 0.32444561774289593, + null, + 0.09053866681881584, + 0.1573630170264504, + null, + 0.09053866681881584, + 0.0023771443647881974, + null, + 0.09053866681881584, + 0.0852382135963593, + null, + 0.09053866681881584, + 0.17086936775877049, + null, + 0.09053866681881584, + 0.0875467755337247, + null, + 0.09053866681881584, + 0.08997327822205015, + null, + 0.09053866681881584, + 0.020212382594376965, + null, + 0.09053866681881584, + 0.0897773631019545, + null, + 0.09053866681881584, + 0.02312833765025224, + null, + 0.09053866681881584, + 0.20133087739958255, + null, + 0.09053866681881584, + 0.038579501382332126, + null, + 0.09053866681881584, + 0.01777064460825195, + null, + 0.7181048560087516, + 0.6776948411821848, + null, + 0.7181048560087516, + 0.834199864808296, + null, + 0.7181048560087516, + 0.6370268640561303, + null, + 0.7181048560087516, + 0.6802728591951641, + null, + 0.7181048560087516, + 0.7607451357487841, + null, + 0.7181048560087516, + 0.6314926226168458, + null, + 0.10310287300704979, + 0.05973078995013337, + null, + 0.10310287300704979, + 0.0914406510425998, + null, + 0.10310287300704979, + 0.14711158829428328, + null, + 0.10310287300704979, + 0.21535391032155426, + null, + 0.10310287300704979, + 0.04237200971819888, + null, + 0.8247840830312709, + 0.7948577020793985, + null, + 0.8247840830312709, + 0.9210876029743161, + null, + 0.8247840830312709, + 0.7059759544943667, + null, + 0.8247840830312709, + 0.9186278106648778, + null, + 0.8247840830312709, + 0.8846357375826375, + null, + 0.8247840830312709, + 0.8323549266756429, + null, + 0.8247840830312709, + 0.8505181106970376, + null, + 0.8247840830312709, + 0.9110645875753355, + null, + 0.1573630170264504, + 0.2275256207367028, + null, + 0.1573630170264504, + 0.0852382135963593, + null, + 0.1573630170264504, + 0.18507593174525072, + null, + 0.1573630170264504, + 0.17086936775877049, + null, + 0.1573630170264504, + 0.0875467755337247, + null, + 0.1573630170264504, + 0.08997327822205015, + null, + 0.1573630170264504, + 0.0897773631019545, + null, + 0.1573630170264504, + 0.20133087739958255, + null, + 0.31305791514229697, + 0.298647499376007, + null, + 0.31305791514229697, + 0.3246624829381992, + null, + 0.31305791514229697, + 0.33203393677870674, + null, + 0.31305791514229697, + 0.3499260998923053, + null, + 0.31305791514229697, + 0.3181124346701171, + null, + 0.31305791514229697, + 0.3393815448042514, + null, + 0.31305791514229697, + 0.21535391032155426, + null, + 0.31305791514229697, + 0.25656414507004344, + null, + 0.31305791514229697, + 0.32444561774289593, + null, + 0.31305791514229697, + 0.33721825060791266, + null, + 0.298647499376007, + 0.3328704753356456, + null, + 0.298647499376007, + 0.33203393677870674, + null, + 0.298647499376007, + 0.3499260998923053, + null, + 0.298647499376007, + 0.3181124346701171, + null, + 0.298647499376007, + 0.3247821296168134, + null, + 0.298647499376007, + 0.25656414507004344, + null, + 0.298647499376007, + 0.3187675293980876, + null, + 0.298647499376007, + 0.20133087739958255, + null, + 0.298647499376007, + 0.34114125407236195, + null, + 0.3246624829381992, + 0.33203393677870674, + null, + 0.3246624829381992, + 0.4404718698088387, + null, + 0.3246624829381992, + 0.3393815448042514, + null, + 0.3246624829381992, + 0.21535391032155426, + null, + 0.3246624829381992, + 0.25656414507004344, + null, + 0.3246624829381992, + 0.32444561774289593, + null, + 0.3246624829381992, + 0.33721825060791266, + null, + 0.19852054651169693, + 0.13747604708068628, + null, + 0.19852054651169693, + 0.22007362873840486, + null, + 0.19852054651169693, + 0.2275256207367028, + null, + 0.19852054651169693, + 0.2619562675328274, + null, + 0.19852054651169693, + 0.18507593174525072, + null, + 0.3328704753356456, + 0.3499260998923053, + null, + 0.3328704753356456, + 0.3181124346701171, + null, + 0.3328704753356456, + 0.4277213938753692, + null, + 0.3328704753356456, + 0.3247821296168134, + null, + 0.3328704753356456, + 0.3187675293980876, + null, + 0.3328704753356456, + 0.34114125407236195, + null, + 0.33203393677870674, + 0.3181124346701171, + null, + 0.33203393677870674, + 0.4404718698088387, + null, + 0.33203393677870674, + 0.3393815448042514, + null, + 0.33203393677870674, + 0.21535391032155426, + null, + 0.33203393677870674, + 0.25656414507004344, + null, + 0.33203393677870674, + 0.32444561774289593, + null, + 0.33203393677870674, + 0.33721825060791266, + null, + 0.5461279353327784, + 0.5229468203255856, + null, + 0.5461279353327784, + 0.4611021425875542, + null, + 0.5461279353327784, + 0.6149491168624189, + null, + 0.5461279353327784, + 0.4349682989231034, + null, + 0.5461279353327784, + 0.5593069337955722, + null, + 0.9636084967560627, + 0.9503884723051484, + null, + 0.9636084967560627, + 0.9162463356603696, + null, + 0.9636084967560627, + 0.8541827253649632, + null, + 0.9636084967560627, + 0.9636590456207981, + null, + 0.9636084967560627, + 0.8680862155815134, + null, + 0.9636084967560627, + 0.8668565351624634, + null, + 0.9503884723051484, + 0.9240127894624793, + null, + 0.9503884723051484, + 0.9636590456207981, + null, + 0.9503884723051484, + 0.8680862155815134, + null, + 0.13747604708068628, + 0.22007362873840486, + null, + 0.13747604708068628, + 0.2275256207367028, + null, + 0.13747604708068628, + 0.18507593174525072, + null, + 0.13747604708068628, + 0.17086936775877049, + null, + 0.13747604708068628, + 0.0897773631019545, + null, + 0.13747604708068628, + 0.04781523934390508, + null, + 0.3499260998923053, + 0.3181124346701171, + null, + 0.3499260998923053, + 0.4277213938753692, + null, + 0.3499260998923053, + 0.3247821296168134, + null, + 0.3499260998923053, + 0.4564806171162211, + null, + 0.3499260998923053, + 0.3187675293980876, + null, + 0.3499260998923053, + 0.34114125407236195, + null, + 0.3181124346701171, + 0.3247821296168134, + null, + 0.3181124346701171, + 0.25656414507004344, + null, + 0.3181124346701171, + 0.3187675293980876, + null, + 0.89080246263295, + 0.9521646983336837, + null, + 0.89080246263295, + 0.834199864808296, + null, + 0.89080246263295, + 0.9663892923019699, + null, + 0.89080246263295, + 0.9425745666137786, + null, + 0.89080246263295, + 0.9851894520572745, + null, + 0.89080246263295, + 0.9573079778783831, + null, + 0.89080246263295, + 0.9473667691929577, + null, + 0.89080246263295, + 0.838803404513024, + null, + 0.9521646983336837, + 0.916634041055854, + null, + 0.9521646983336837, + 0.9663892923019699, + null, + 0.9521646983336837, + 0.9573079778783831, + null, + 0.9521646983336837, + 0.9473667691929577, + null, + 0.9521646983336837, + 0.9005048863870916, + null, + 0.6776948411821848, + 0.6802728591951641, + null, + 0.6776948411821848, + 0.6314926226168458, + null, + 0.0023771443647881974, + 0.0852382135963593, + null, + 0.0023771443647881974, + 0.0875467755337247, + null, + 0.0023771443647881974, + 0.020212382594376965, + null, + 0.0023771443647881974, + 0.0897773631019545, + null, + 0.0023771443647881974, + 0.038579501382332126, + null, + 0.7007214129943925, + 0.7188906153197968, + null, + 0.7007214129943925, + 0.7255980413609877, + null, + 0.7007214129943925, + 0.7518492361353024, + null, + 0.7188906153197968, + 0.7255980413609877, + null, + 0.7188906153197968, + 0.7518492361353024, + null, + 0.47055154706870017, + 0.5274116361492907, + null, + 0.47055154706870017, + 0.4277213938753692, + null, + 0.47055154706870017, + 0.5845953849421676, + null, + 0.47055154706870017, + 0.4564806171162211, + null, + 0.47055154706870017, + 0.5201251204037126, + null, + 0.19043749918150743, + 0.2121217358781844, + null, + 0.19043749918150743, + 0.29978148854693865, + null, + 0.5274116361492907, + 0.555788147264811, + null, + 0.5274116361492907, + 0.5805679633404117, + null, + 0.5274116361492907, + 0.5989925957177575, + null, + 0.5274116361492907, + 0.4277213938753692, + null, + 0.5274116361492907, + 0.5845953849421676, + null, + 0.5274116361492907, + 0.6058132814274794, + null, + 0.5274116361492907, + 0.5291812256005789, + null, + 0.5274116361492907, + 0.5621062195646831, + null, + 0.5274116361492907, + 0.4564806171162211, + null, + 0.9162463356603696, + 0.8385234321105272, + null, + 0.9162463356603696, + 0.8668565351624634, + null, + 0.7042334738295596, + 0.5989925957177575, + null, + 0.7042334738295596, + 0.7255980413609877, + null, + 0.7042334738295596, + 0.6058132814274794, + null, + 0.7042334738295596, + 0.7703024251104211, + null, + 0.555788147264811, + 0.5805679633404117, + null, + 0.555788147264811, + 0.5989925957177575, + null, + 0.555788147264811, + 0.48218022499136737, + null, + 0.555788147264811, + 0.5293212253918783, + null, + 0.555788147264811, + 0.47443124751760235, + null, + 0.555788147264811, + 0.5291812256005789, + null, + 0.555788147264811, + 0.5621062195646831, + null, + 0.555788147264811, + 0.5465171974419871, + null, + 0.5805679633404117, + 0.5989925957177575, + null, + 0.5805679633404117, + 0.5845953849421676, + null, + 0.5805679633404117, + 0.6058132814274794, + null, + 0.5805679633404117, + 0.5291812256005789, + null, + 0.5805679633404117, + 0.5621062195646831, + null, + 0.587704695878027, + 0.5593951498649633, + null, + 0.587704695878027, + 0.5845953849421676, + null, + 0.587704695878027, + 0.6058132814274794, + null, + 0.587704695878027, + 0.6322124026692795, + null, + 0.587704695878027, + 0.5201251204037126, + null, + 0.916634041055854, + 0.8613129225222332, + null, + 0.916634041055854, + 0.9005048863870916, + null, + 0.916634041055854, + 0.9240127894624793, + null, + 0.7948577020793985, + 0.7059759544943667, + null, + 0.7948577020793985, + 0.8846357375826375, + null, + 0.7948577020793985, + 0.8323549266756429, + null, + 0.7948577020793985, + 0.8505181106970376, + null, + 0.7948577020793985, + 0.7607451357487841, + null, + 0.7948577020793985, + 0.9110645875753355, + null, + 0.9210876029743161, + 0.9694266665187994, + null, + 0.9210876029743161, + 0.9186278106648778, + null, + 0.9210876029743161, + 0.8846357375826375, + null, + 0.9210876029743161, + 0.8323549266756429, + null, + 0.9210876029743161, + 0.9110645875753355, + null, + 0.9210876029743161, + 0.992283435751248, + null, + 0.834199864808296, + 0.9425745666137786, + null, + 0.834199864808296, + 0.8505181106970376, + null, + 0.834199864808296, + 0.7607451357487841, + null, + 0.5989925957177575, + 0.5845953849421676, + null, + 0.5989925957177575, + 0.6058132814274794, + null, + 0.5989925957177575, + 0.5621062195646831, + null, + 0.05973078995013337, + 0.0914406510425998, + null, + 0.05973078995013337, + 0.14711158829428328, + null, + 0.05973078995013337, + 0.04237200971819888, + null, + 0.5593951498649633, + 0.4404718698088387, + null, + 0.5593951498649633, + 0.6322124026692795, + null, + 0.5593951498649633, + 0.5201251204037126, + null, + 0.5229468203255856, + 0.4611021425875542, + null, + 0.5229468203255856, + 0.44175944307536974, + null, + 0.5229468203255856, + 0.6149491168624189, + null, + 0.5229468203255856, + 0.6234379896430121, + null, + 0.5229468203255856, + 0.4442228752887084, + null, + 0.5229468203255856, + 0.5593069337955722, + null, + 0.22007362873840486, + 0.2275256207367028, + null, + 0.22007362873840486, + 0.2619562675328274, + null, + 0.22007362873840486, + 0.18507593174525072, + null, + 0.22007362873840486, + 0.3414075728554137, + null, + 0.37301066653863624, + 0.2619562675328274, + null, + 0.37301066653863624, + 0.48218022499136737, + null, + 0.37301066653863624, + 0.47443124751760235, + null, + 0.37301066653863624, + 0.3414075728554137, + null, + 0.37301066653863624, + 0.42077304608666055, + null, + 0.37301066653863624, + 0.4039327719907384, + null, + 0.8613129225222332, + 0.7703024251104211, + null, + 0.8613129225222332, + 0.9005048863870916, + null, + 0.9663892923019699, + 0.9851894520572745, + null, + 0.9663892923019699, + 0.9573079778783831, + null, + 0.9663892923019699, + 0.9473667691929577, + null, + 0.2275256207367028, + 0.2619562675328274, + null, + 0.2275256207367028, + 0.18507593174525072, + null, + 0.2275256207367028, + 0.17086936775877049, + null, + 0.0852382135963593, + 0.0875467755337247, + null, + 0.0852382135963593, + 0.08997327822205015, + null, + 0.0852382135963593, + 0.020212382594376965, + null, + 0.0852382135963593, + 0.02312833765025224, + null, + 0.0852382135963593, + 0.20133087739958255, + null, + 0.0852382135963593, + 0.038579501382332126, + null, + 0.0852382135963593, + 0.01777064460825195, + null, + 0.0914406510425998, + 0.14711158829428328, + null, + 0.0914406510425998, + 0.02312833765025224, + null, + 0.0914406510425998, + 0.04237200971819888, + null, + 0.0914406510425998, + 0.01777064460825195, + null, + 0.9425745666137786, + 0.9851894520572745, + null, + 0.9425745666137786, + 0.9473667691929577, + null, + 0.3019474379086241, + 0.2121217358781844, + null, + 0.3019474379086241, + 0.3318561006769827, + null, + 0.3019474379086241, + 0.29978148854693865, + null, + 0.2619562675328274, + 0.3247821296168134, + null, + 0.2619562675328274, + 0.18507593174525072, + null, + 0.2619562675328274, + 0.17086936775877049, + null, + 0.2619562675328274, + 0.3414075728554137, + null, + 0.2619562675328274, + 0.3187675293980876, + null, + 0.2619562675328274, + 0.34114125407236195, + null, + 0.48218022499136737, + 0.5293212253918783, + null, + 0.48218022499136737, + 0.47443124751760235, + null, + 0.48218022499136737, + 0.5291812256005789, + null, + 0.48218022499136737, + 0.5465171974419871, + null, + 0.5293212253918783, + 0.47443124751760235, + null, + 0.5293212253918783, + 0.5291812256005789, + null, + 0.5293212253918783, + 0.5191285820034173, + null, + 0.5293212253918783, + 0.42077304608666055, + null, + 0.5293212253918783, + 0.5465171974419871, + null, + 0.41808707877840445, + 0.42926818011737133, + null, + 0.41808707877840445, + 0.44175944307536974, + null, + 0.41808707877840445, + 0.4363707938884992, + null, + 0.14711158829428328, + 0.21535391032155426, + null, + 0.42926818011737133, + 0.4363707938884992, + null, + 0.9694266665187994, + 0.9186278106648778, + null, + 0.9694266665187994, + 0.8846357375826375, + null, + 0.9694266665187994, + 0.9110645875753355, + null, + 0.9694266665187994, + 0.992283435751248, + null, + 0.4404718698088387, + 0.3393815448042514, + null, + 0.4404718698088387, + 0.32444561774289593, + null, + 0.4404718698088387, + 0.33721825060791266, + null, + 0.4404718698088387, + 0.5201251204037126, + null, + 0.4277213938753692, + 0.3247821296168134, + null, + 0.4277213938753692, + 0.4564806171162211, + null, + 0.4277213938753692, + 0.3187675293980876, + null, + 0.4277213938753692, + 0.34114125407236195, + null, + 0.7059759544943667, + 0.6149491168624189, + null, + 0.7059759544943667, + 0.6234379896430121, + null, + 0.4611021425875542, + 0.44175944307536974, + null, + 0.4611021425875542, + 0.4349682989231034, + null, + 0.4611021425875542, + 0.4442228752887084, + null, + 0.4611021425875542, + 0.5593069337955722, + null, + 0.13940667248499528, + 0.0201693226965588, + null, + 0.13940667248499528, + 0.16862303760247477, + null, + 0.13940667248499528, + 0.12355952994556385, + null, + 0.3393815448042514, + 0.21535391032155426, + null, + 0.3393815448042514, + 0.25656414507004344, + null, + 0.3393815448042514, + 0.32444561774289593, + null, + 0.3393815448042514, + 0.33721825060791266, + null, + 0.6370268640561303, + 0.7607451357487841, + null, + 0.6370268640561303, + 0.6234379896430121, + null, + 0.6370268640561303, + 0.6314926226168458, + null, + 0.9851894520572745, + 0.9573079778783831, + null, + 0.9851894520572745, + 0.9473667691929577, + null, + 0.3247821296168134, + 0.3187675293980876, + null, + 0.3247821296168134, + 0.34114125407236195, + null, + 0.9186278106648778, + 0.8846357375826375, + null, + 0.9186278106648778, + 0.8323549266756429, + null, + 0.9186278106648778, + 0.9110645875753355, + null, + 0.9186278106648778, + 0.992283435751248, + null, + 0.18507593174525072, + 0.17086936775877049, + null, + 0.18507593174525072, + 0.0897773631019545, + null, + 0.18507593174525072, + 0.20133087739958255, + null, + 0.5845953849421676, + 0.6058132814274794, + null, + 0.5845953849421676, + 0.5621062195646831, + null, + 0.44175944307536974, + 0.4442228752887084, + null, + 0.7255980413609877, + 0.7703024251104211, + null, + 0.7255980413609877, + 0.7518492361353024, + null, + 0.6058132814274794, + 0.5621062195646831, + null, + 0.47443124751760235, + 0.5291812256005789, + null, + 0.47443124751760235, + 0.5465171974419871, + null, + 0.9573079778783831, + 0.9473667691929577, + null, + 0.9573079778783831, + 0.838803404513024, + null, + 0.0201693226965588, + 0.12355952994556385, + null, + 0.0201693226965588, + 0.04781523934390508, + null, + 0.17086936775877049, + 0.0897773631019545, + null, + 0.17086936775877049, + 0.20133087739958255, + null, + 0.5291812256005789, + 0.5621062195646831, + null, + 0.5291812256005789, + 0.5465171974419871, + null, + 0.16862303760247477, + 0.12355952994556385, + null, + 0.8846357375826375, + 0.8323549266756429, + null, + 0.8846357375826375, + 0.8505181106970376, + null, + 0.8846357375826375, + 0.9110645875753355, + null, + 0.8846357375826375, + 0.992283435751248, + null, + 0.0875467755337247, + 0.08997327822205015, + null, + 0.0875467755337247, + 0.020212382594376965, + null, + 0.0875467755337247, + 0.02312833765025224, + null, + 0.0875467755337247, + 0.20133087739958255, + null, + 0.0875467755337247, + 0.038579501382332126, + null, + 0.0875467755337247, + 0.01777064460825195, + null, + 0.9473667691929577, + 0.838803404513024, + null, + 0.8541827253649632, + 0.8680862155815134, + null, + 0.8541827253649632, + 0.8668565351624634, + null, + 0.3414075728554137, + 0.42077304608666055, + null, + 0.3414075728554137, + 0.4039327719907384, + null, + 0.3318561006769827, + 0.4349682989231034, + null, + 0.3318561006769827, + 0.29978148854693865, + null, + 0.7408684543182315, + 0.8385234321105272, + null, + 0.7408684543182315, + 0.6322124026692795, + null, + 0.7408684543182315, + 0.7333209824474588, + null, + 0.6149491168624189, + 0.6234379896430121, + null, + 0.6149491168624189, + 0.5593069337955722, + null, + 0.12355952994556385, + 0.04781523934390508, + null, + 0.08997327822205015, + 0.020212382594376965, + null, + 0.08997327822205015, + 0.02312833765025224, + null, + 0.08997327822205015, + 0.20133087739958255, + null, + 0.08997327822205015, + 0.04237200971819888, + null, + 0.08997327822205015, + 0.038579501382332126, + null, + 0.08997327822205015, + 0.01777064460825195, + null, + 0.21535391032155426, + 0.25656414507004344, + null, + 0.21535391032155426, + 0.32444561774289593, + null, + 0.21535391032155426, + 0.33721825060791266, + null, + 0.8323549266756429, + 0.9110645875753355, + null, + 0.8385234321105272, + 0.8668565351624634, + null, + 0.8385234321105272, + 0.7333209824474588, + null, + 0.9240127894624793, + 0.9636590456207981, + null, + 0.9240127894624793, + 0.8680862155815134, + null, + 0.6802728591951641, + 0.6314926226168458, + null, + 0.25656414507004344, + 0.32444561774289593, + null, + 0.25656414507004344, + 0.33721825060791266, + null, + 0.020212382594376965, + 0.02312833765025224, + null, + 0.020212382594376965, + 0.038579501382332126, + null, + 0.020212382594376965, + 0.01777064460825195, + null, + 0.32444561774289593, + 0.33721825060791266, + null, + 0.838803404513024, + 0.7518492361353024, + null, + 0.6322124026692795, + 0.5201251204037126, + null, + 0.6322124026692795, + 0.7333209824474588, + null, + 0.8505181106970376, + 0.7607451357487841, + null, + 0.8505181106970376, + 0.9110645875753355, + null, + 0.0897773631019545, + 0.04781523934390508, + null, + 0.02312833765025224, + 0.04237200971819888, + null, + 0.02312833765025224, + 0.038579501382332126, + null, + 0.02312833765025224, + 0.01777064460825195, + null, + 0.05596958524873419, + 0.014269300880037306, + null, + 0.3187675293980876, + 0.20133087739958255, + null, + 0.3187675293980876, + 0.34114125407236195, + null, + 0.5191285820034173, + 0.42077304608666055, + null, + 0.5191285820034173, + 0.5465171974419871, + null, + 0.4349682989231034, + 0.4442228752887084, + null, + 0.9636590456207981, + 0.8680862155815134, + null, + 0.8680862155815134, + 0.8668565351624634, + null, + 0.6234379896430121, + 0.5593069337955722, + null, + 0.04237200971819888, + 0.038579501382332126, + null, + 0.04237200971819888, + 0.01777064460825195, + null, + 0.038579501382332126, + 0.01777064460825195, + null, + 0.9110645875753355, + 0.992283435751248, + null, + 0.42077304608666055, + 0.4039327719907384, + null + ], + "y": [ + 0.09053726824382247, + 0.17542400609184483, + null, + 0.09053726824382247, + 0.055894273053114896, + null, + 0.09053726824382247, + 0.14933184162295132, + null, + 0.09053726824382247, + 0.1278305132468397, + null, + 0.09053726824382247, + 0.0731473655342364, + null, + 0.09053726824382247, + 0.09533319097359638, + null, + 0.09053726824382247, + 0.055897802218322856, + null, + 0.09053726824382247, + 0.04153202488293273, + null, + 0.09053726824382247, + 0.06013197669987258, + null, + 0.09053726824382247, + 0.040563128366188694, + null, + 0.09053726824382247, + 0.09959517902538939, + null, + 0.571085214777101, + 0.5944498275635773, + null, + 0.571085214777101, + 0.6773365837969099, + null, + 0.571085214777101, + 0.6160873747407943, + null, + 0.571085214777101, + 0.5186581897030644, + null, + 0.571085214777101, + 0.5634679987017406, + null, + 0.571085214777101, + 0.6012106694454529, + null, + 0.571085214777101, + 0.4898861106787329, + null, + 0.571085214777101, + 0.45431497833000367, + null, + 0.571085214777101, + 0.5204579980957379, + null, + 0.571085214777101, + 0.6352288779182178, + null, + 0.571085214777101, + 0.5981086798045652, + null, + 0.571085214777101, + 0.6648266103848882, + null, + 0.571085214777101, + 0.6072525121642058, + null, + 0.571085214777101, + 0.5260776190209286, + null, + 0.5199666766946885, + 0.5219101415039136, + null, + 0.5199666766946885, + 0.4307004647175262, + null, + 0.5199666766946885, + 0.4834545718278357, + null, + 0.5199666766946885, + 0.4847615611240751, + null, + 0.5199666766946885, + 0.4318165589087314, + null, + 0.5199666766946885, + 0.6217058876501556, + null, + 0.5199666766946885, + 0.5097617399826666, + null, + 0.33766327379542094, + 0.33811323660241943, + null, + 0.33766327379542094, + 0.31304614249644347, + null, + 0.33766327379542094, + 0.2697998035002954, + null, + 0.33766327379542094, + 0.2695720924906413, + null, + 0.33766327379542094, + 0.24454670425362057, + null, + 0.33766327379542094, + 0.4500538798110242, + null, + 0.33766327379542094, + 0.2880647319459674, + null, + 0.33766327379542094, + 0.3202314429055858, + null, + 0.33766327379542094, + 0.3169605131706372, + null, + 0.33766327379542094, + 0.32345881810688737, + null, + 0.17196466768963936, + 0.2009582712064717, + null, + 0.17196466768963936, + 0.15069304516745607, + null, + 0.17196466768963936, + 0.06016942899581168, + null, + 0.17196466768963936, + 0.24013807075121119, + null, + 0.17196466768963936, + 0.2693681584998491, + null, + 0.17196466768963936, + 0.10059463740220753, + null, + 0.17196466768963936, + 0.09959517902538939, + null, + 0.17708608014427518, + 0.2373268562908326, + null, + 0.17708608014427518, + 0.23741932367240448, + null, + 0.17708608014427518, + 0.05938145280899054, + null, + 0.17708608014427518, + 0.17619771419691865, + null, + 0.17708608014427518, + 0.21532966919867302, + null, + 0.17708608014427518, + 0.10782775946098799, + null, + 0.17708608014427518, + 0.249116699886752, + null, + 0.17708608014427518, + 0.23700988477155205, + null, + 0.17708608014427518, + 0.1341994714416056, + null, + 0.17708608014427518, + 0.29050814087118004, + null, + 0.04649454781195783, + 0.07011604000159166, + null, + 0.04649454781195783, + 0.019989772968585173, + null, + 0.04649454781195783, + 0.038844634468288675, + null, + 0.37080565676900146, + 0.3648985367210805, + null, + 0.37080565676900146, + 0.3343459796676115, + null, + 0.37080565676900146, + 0.4208812619135248, + null, + 0.37080565676900146, + 0.3900960314334032, + null, + 0.37080565676900146, + 0.3098874271134545, + null, + 0.37080565676900146, + 0.4295667428124167, + null, + 0.37080565676900146, + 0.35350564895305514, + null, + 0.37080565676900146, + 0.31541428705224306, + null, + 0.37080565676900146, + 0.4467311570808764, + null, + 0.37080565676900146, + 0.2693681584998491, + null, + 0.37080565676900146, + 0.4421375373865315, + null, + 0.37080565676900146, + 0.42641694849778966, + null, + 0.37080565676900146, + 0.3333136626479075, + null, + 0.37080565676900146, + 0.4868902788925622, + null, + 0.3602866247185619, + 0.3061539627540061, + null, + 0.3602866247185619, + 0.3661437355856225, + null, + 0.3602866247185619, + 0.40557198035837094, + null, + 0.3602866247185619, + 0.3192831323823997, + null, + 0.3602866247185619, + 0.41535454584101794, + null, + 0.3602866247185619, + 0.40395348439090084, + null, + 0.3602866247185619, + 0.3340702546567942, + null, + 0.3602866247185619, + 0.4248880785102581, + null, + 0.3602866247185619, + 0.29119156039108685, + null, + 0.9483925173875926, + 0.8957623407464501, + null, + 0.9483925173875926, + 0.9727770125665405, + null, + 0.9483925173875926, + 0.8791466031622056, + null, + 0.9483925173875926, + 0.922341377568881, + null, + 0.9483925173875926, + 0.8821215709600496, + null, + 0.9483925173875926, + 0.9328536520894143, + null, + 0.9483925173875926, + 0.9344432405222354, + null, + 0.9483925173875926, + 0.9642772106357639, + null, + 0.3061539627540061, + 0.3661437355856225, + null, + 0.3061539627540061, + 0.40557198035837094, + null, + 0.3061539627540061, + 0.3192831323823997, + null, + 0.3061539627540061, + 0.32266487999330984, + null, + 0.3061539627540061, + 0.40395348439090084, + null, + 0.3061539627540061, + 0.3340702546567942, + null, + 0.3061539627540061, + 0.29119156039108685, + null, + 0.3061539627540061, + 0.19048093242734687, + null, + 0.9643804220706982, + 0.9298960866412943, + null, + 0.9643804220706982, + 0.8599268392047722, + null, + 0.9643804220706982, + 0.9435179236599912, + null, + 0.9643804220706982, + 0.9958360522915445, + null, + 0.9643804220706982, + 0.9756800437762957, + null, + 0.9643804220706982, + 0.8842114977564064, + null, + 0.8336885167043149, + 0.938767234846119, + null, + 0.8336885167043149, + 0.8867112408398291, + null, + 0.8336885167043149, + 0.9298960866412943, + null, + 0.8336885167043149, + 0.8599268392047722, + null, + 0.8336885167043149, + 0.8508124987550889, + null, + 0.8336885167043149, + 0.8842114977564064, + null, + 0.5944498275635773, + 0.5216765314868881, + null, + 0.5944498275635773, + 0.6001026871900049, + null, + 0.5944498275635773, + 0.6012106694454529, + null, + 0.5944498275635773, + 0.4898861106787329, + null, + 0.5944498275635773, + 0.5204579980957379, + null, + 0.5944498275635773, + 0.6352288779182178, + null, + 0.5944498275635773, + 0.6648266103848882, + null, + 0.5944498275635773, + 0.6072525121642058, + null, + 0.2373268562908326, + 0.23741932367240448, + null, + 0.2373268562908326, + 0.33811323660241943, + null, + 0.2373268562908326, + 0.17542400609184483, + null, + 0.2373268562908326, + 0.17619771419691865, + null, + 0.2373268562908326, + 0.1278305132468397, + null, + 0.2373268562908326, + 0.21532966919867302, + null, + 0.2373268562908326, + 0.2695720924906413, + null, + 0.2373268562908326, + 0.24454670425362057, + null, + 0.2373268562908326, + 0.20002447568886628, + null, + 0.2373268562908326, + 0.3202314429055858, + null, + 0.2373268562908326, + 0.1341994714416056, + null, + 0.2373268562908326, + 0.3169605131706372, + null, + 0.2373268562908326, + 0.29050814087118004, + null, + 0.2373268562908326, + 0.32345881810688737, + null, + 0.23741932367240448, + 0.32127102230894566, + null, + 0.23741932367240448, + 0.2381682330796122, + null, + 0.23741932367240448, + 0.17296378957033465, + null, + 0.23741932367240448, + 0.24033413659841596, + null, + 0.23741932367240448, + 0.2981410655965283, + null, + 0.23741932367240448, + 0.3199684158322815, + null, + 0.23741932367240448, + 0.2660491488293679, + null, + 0.23741932367240448, + 0.249116699886752, + null, + 0.23741932367240448, + 0.23700988477155205, + null, + 0.23741932367240448, + 0.29050814087118004, + null, + 0.23741932367240448, + 0.2318219208408404, + null, + 0.32127102230894566, + 0.39453602200590676, + null, + 0.32127102230894566, + 0.2381682330796122, + null, + 0.32127102230894566, + 0.3309683982450944, + null, + 0.32127102230894566, + 0.24033413659841596, + null, + 0.32127102230894566, + 0.2981410655965283, + null, + 0.32127102230894566, + 0.3199684158322815, + null, + 0.32127102230894566, + 0.3355480553373167, + null, + 0.32127102230894566, + 0.2660491488293679, + null, + 0.32127102230894566, + 0.3635517670405215, + null, + 0.32127102230894566, + 0.249116699886752, + null, + 0.32127102230894566, + 0.23700988477155205, + null, + 0.32127102230894566, + 0.28871122138225125, + null, + 0.32127102230894566, + 0.42203254876563234, + null, + 0.32127102230894566, + 0.2318219208408404, + null, + 0.3661437355856225, + 0.3343459796676115, + null, + 0.3661437355856225, + 0.4307004647175262, + null, + 0.3661437355856225, + 0.40557198035837094, + null, + 0.3661437355856225, + 0.4834545718278357, + null, + 0.3661437355856225, + 0.4847615611240751, + null, + 0.3661437355856225, + 0.32266487999330984, + null, + 0.3661437355856225, + 0.4318165589087314, + null, + 0.3661437355856225, + 0.3340702546567942, + null, + 0.7791505090281524, + 0.8520196094107113, + null, + 0.7791505090281524, + 0.8848427298858184, + null, + 0.3648985367210805, + 0.3343459796676115, + null, + 0.3648985367210805, + 0.4208812619135248, + null, + 0.3648985367210805, + 0.4307004647175262, + null, + 0.3648985367210805, + 0.3900960314334032, + null, + 0.3648985367210805, + 0.3098874271134545, + null, + 0.3648985367210805, + 0.32266487999330984, + null, + 0.3648985367210805, + 0.4295667428124167, + null, + 0.3648985367210805, + 0.35350564895305514, + null, + 0.3648985367210805, + 0.31541428705224306, + null, + 0.3648985367210805, + 0.2693681584998491, + null, + 0.3648985367210805, + 0.42641694849778966, + null, + 0.3648985367210805, + 0.3333136626479075, + null, + 0.6244837238804738, + 0.5850986908522726, + null, + 0.6244837238804738, + 0.6267294109959968, + null, + 0.6244837238804738, + 0.5221172076712435, + null, + 0.6244837238804738, + 0.5717872069066212, + null, + 0.6244837238804738, + 0.7302384542961842, + null, + 0.6244837238804738, + 0.6710484758334021, + null, + 0.6244837238804738, + 0.5492873750243871, + null, + 0.6244837238804738, + 0.6201266549140614, + null, + 0.6244837238804738, + 0.5752985482362863, + null, + 0.9012137046519791, + 0.8622415881936324, + null, + 0.9012137046519791, + 0.8350595230795331, + null, + 0.5219101415039136, + 0.4307004647175262, + null, + 0.5219101415039136, + 0.4834545718278357, + null, + 0.5219101415039136, + 0.4847615611240751, + null, + 0.5219101415039136, + 0.4295667428124167, + null, + 0.5219101415039136, + 0.5144551437666581, + null, + 0.5219101415039136, + 0.42641694849778966, + null, + 0.5219101415039136, + 0.6014235590484225, + null, + 0.5219101415039136, + 0.4868902788925622, + null, + 0.39453602200590676, + 0.3309683982450944, + null, + 0.39453602200590676, + 0.2981410655965283, + null, + 0.39453602200590676, + 0.3199684158322815, + null, + 0.39453602200590676, + 0.45431497833000367, + null, + 0.39453602200590676, + 0.42052616285893474, + null, + 0.39453602200590676, + 0.4107398412471005, + null, + 0.39453602200590676, + 0.3635517670405215, + null, + 0.39453602200590676, + 0.42203254876563234, + null, + 0.2009582712064717, + 0.20619722773579274, + null, + 0.2009582712064717, + 0.3098874271134545, + null, + 0.2009582712064717, + 0.15069304516745607, + null, + 0.2009582712064717, + 0.31541428705224306, + null, + 0.2009582712064717, + 0.24013807075121119, + null, + 0.2009582712064717, + 0.2693681584998491, + null, + 0.2009582712064717, + 0.22993075379681738, + null, + 0.2009582712064717, + 0.09959517902538939, + null, + 0.04224314617430658, + 0.05938145280899054, + null, + 0.04224314617430658, + 0.04283815208078323, + null, + 0.04224314617430658, + 0.010366221042083845, + null, + 0.04224314617430658, + 0.10782775946098799, + null, + 0.04224314617430658, + 0.03395115206665145, + null, + 0.04224314617430658, + 0.05477321631284726, + null, + 0.2381682330796122, + 0.3309683982450944, + null, + 0.2381682330796122, + 0.17296378957033465, + null, + 0.2381682330796122, + 0.24033413659841596, + null, + 0.2381682330796122, + 0.2981410655965283, + null, + 0.2381682330796122, + 0.3355480553373167, + null, + 0.2381682330796122, + 0.2660491488293679, + null, + 0.2381682330796122, + 0.249116699886752, + null, + 0.2381682330796122, + 0.23700988477155205, + null, + 0.2381682330796122, + 0.28871122138225125, + null, + 0.2381682330796122, + 0.2002886163837997, + null, + 0.2381682330796122, + 0.13201947050262697, + null, + 0.2381682330796122, + 0.2318219208408404, + null, + 0.2381682330796122, + 0.20307680326083377, + null, + 0.33811323660241943, + 0.31304614249644347, + null, + 0.33811323660241943, + 0.2695720924906413, + null, + 0.33811323660241943, + 0.24454670425362057, + null, + 0.33811323660241943, + 0.4500538798110242, + null, + 0.33811323660241943, + 0.4140065537970282, + null, + 0.33811323660241943, + 0.2880647319459674, + null, + 0.33811323660241943, + 0.3202314429055858, + null, + 0.33811323660241943, + 0.3169605131706372, + null, + 0.33811323660241943, + 0.32345881810688737, + null, + 0.5216765314868881, + 0.6001026871900049, + null, + 0.5216765314868881, + 0.6012106694454529, + null, + 0.5216765314868881, + 0.4898861106787329, + null, + 0.5216765314868881, + 0.5204579980957379, + null, + 0.5216765314868881, + 0.4500538798110242, + null, + 0.5216765314868881, + 0.42052616285893474, + null, + 0.5216765314868881, + 0.4140065537970282, + null, + 0.5216765314868881, + 0.4937592635708411, + null, + 0.6001026871900049, + 0.6012106694454529, + null, + 0.6001026871900049, + 0.6705222836834548, + null, + 0.6001026871900049, + 0.5204579980957379, + null, + 0.6001026871900049, + 0.6648266103848882, + null, + 0.991844460003468, + 0.9727770125665405, + null, + 0.991844460003468, + 0.938767234846119, + null, + 0.991844460003468, + 0.9874110419208606, + null, + 0.991844460003468, + 0.9328536520894143, + null, + 0.991844460003468, + 0.9078978130468089, + null, + 0.991844460003468, + 0.9513646744432486, + null, + 0.3343459796676115, + 0.4307004647175262, + null, + 0.3343459796676115, + 0.3900960314334032, + null, + 0.3343459796676115, + 0.3098874271134545, + null, + 0.3343459796676115, + 0.32266487999330984, + null, + 0.3343459796676115, + 0.4295667428124167, + null, + 0.3343459796676115, + 0.35350564895305514, + null, + 0.3343459796676115, + 0.31541428705224306, + null, + 0.3343459796676115, + 0.24013807075121119, + null, + 0.3343459796676115, + 0.2693681584998491, + null, + 0.3343459796676115, + 0.3333136626479075, + null, + 0.8957623407464501, + 0.9727770125665405, + null, + 0.8957623407464501, + 0.8423383207045981, + null, + 0.8957623407464501, + 0.8791466031622056, + null, + 0.8957623407464501, + 0.8151159149468827, + null, + 0.8957623407464501, + 0.922341377568881, + null, + 0.8957623407464501, + 0.8821215709600496, + null, + 0.8957623407464501, + 0.9328536520894143, + null, + 0.8957623407464501, + 0.9344432405222354, + null, + 0.8957623407464501, + 0.9642772106357639, + null, + 0.8957623407464501, + 0.8157570218353161, + null, + 0.8957623407464501, + 0.7925454632595156, + null, + 0.8957623407464501, + 0.888980486534156, + null, + 0.4208812619135248, + 0.3900960314334032, + null, + 0.4208812619135248, + 0.3098874271134545, + null, + 0.4208812619135248, + 0.4295667428124167, + null, + 0.4208812619135248, + 0.35350564895305514, + null, + 0.4208812619135248, + 0.4467311570808764, + null, + 0.4208812619135248, + 0.5144551437666581, + null, + 0.4208812619135248, + 0.4421375373865315, + null, + 0.4208812619135248, + 0.42641694849778966, + null, + 0.4208812619135248, + 0.3333136626479075, + null, + 0.4208812619135248, + 0.4868902788925622, + null, + 0.31304614249644347, + 0.2697998035002954, + null, + 0.31304614249644347, + 0.2695720924906413, + null, + 0.31304614249644347, + 0.24454670425362057, + null, + 0.31304614249644347, + 0.20619722773579274, + null, + 0.31304614249644347, + 0.3098874271134545, + null, + 0.31304614249644347, + 0.2880647319459674, + null, + 0.31304614249644347, + 0.3202314429055858, + null, + 0.31304614249644347, + 0.22993075379681738, + null, + 0.31304614249644347, + 0.3169605131706372, + null, + 0.31304614249644347, + 0.32345881810688737, + null, + 0.6773365837969099, + 0.6628083689885368, + null, + 0.6773365837969099, + 0.6160873747407943, + null, + 0.6773365837969099, + 0.7537809293531343, + null, + 0.6773365837969099, + 0.5634679987017406, + null, + 0.6773365837969099, + 0.6012106694454529, + null, + 0.6773365837969099, + 0.6352288779182178, + null, + 0.6773365837969099, + 0.5981086798045652, + null, + 0.6773365837969099, + 0.6648266103848882, + null, + 0.6773365837969099, + 0.6072525121642058, + null, + 0.4307004647175262, + 0.40557198035837094, + null, + 0.4307004647175262, + 0.4834545718278357, + null, + 0.4307004647175262, + 0.4847615611240751, + null, + 0.4307004647175262, + 0.32266487999330984, + null, + 0.4307004647175262, + 0.4295667428124167, + null, + 0.4307004647175262, + 0.35350564895305514, + null, + 0.4307004647175262, + 0.4318165589087314, + null, + 0.4307004647175262, + 0.5097617399826666, + null, + 0.3309683982450944, + 0.24033413659841596, + null, + 0.3309683982450944, + 0.2981410655965283, + null, + 0.3309683982450944, + 0.3199684158322815, + null, + 0.3309683982450944, + 0.3355480553373167, + null, + 0.3309683982450944, + 0.2660491488293679, + null, + 0.3309683982450944, + 0.3635517670405215, + null, + 0.3309683982450944, + 0.249116699886752, + null, + 0.3309683982450944, + 0.23700988477155205, + null, + 0.3309683982450944, + 0.28871122138225125, + null, + 0.3309683982450944, + 0.42203254876563234, + null, + 0.3309683982450944, + 0.2318219208408404, + null, + 0.2697998035002954, + 0.17542400609184483, + null, + 0.2697998035002954, + 0.2695720924906413, + null, + 0.2697998035002954, + 0.24454670425362057, + null, + 0.2697998035002954, + 0.20619722773579274, + null, + 0.2697998035002954, + 0.3098874271134545, + null, + 0.2697998035002954, + 0.2880647319459674, + null, + 0.2697998035002954, + 0.22993075379681738, + null, + 0.2697998035002954, + 0.32345881810688737, + null, + 0.9727770125665405, + 0.922341377568881, + null, + 0.9727770125665405, + 0.8821215709600496, + null, + 0.9727770125665405, + 0.9328536520894143, + null, + 0.9727770125665405, + 0.9642772106357639, + null, + 0.40557198035837094, + 0.4834545718278357, + null, + 0.40557198035837094, + 0.4847615611240751, + null, + 0.40557198035837094, + 0.32266487999330984, + null, + 0.40557198035837094, + 0.40395348439090084, + null, + 0.40557198035837094, + 0.4318165589087314, + null, + 0.40557198035837094, + 0.3340702546567942, + null, + 0.40557198035837094, + 0.4248880785102581, + null, + 0.40557198035837094, + 0.5097617399826666, + null, + 0.35532572275494023, + 0.24028581536328997, + null, + 0.35532572275494023, + 0.3192831323823997, + null, + 0.35532572275494023, + 0.2529891644068947, + null, + 0.35532572275494023, + 0.41535454584101794, + null, + 0.35532572275494023, + 0.40395348439090084, + null, + 0.35532572275494023, + 0.4248880785102581, + null, + 0.35532572275494023, + 0.29119156039108685, + null, + 0.35532572275494023, + 0.24102842320743, + null, + 0.5850986908522726, + 0.6267294109959968, + null, + 0.5850986908522726, + 0.5221172076712435, + null, + 0.5850986908522726, + 0.5717872069066212, + null, + 0.5850986908522726, + 0.5492873750243871, + null, + 0.5850986908522726, + 0.6201266549140614, + null, + 0.5850986908522726, + 0.5752985482362863, + null, + 0.5850986908522726, + 0.5097617399826666, + null, + 0.17296378957033465, + 0.24033413659841596, + null, + 0.17296378957033465, + 0.2660491488293679, + null, + 0.17296378957033465, + 0.10782775946098799, + null, + 0.17296378957033465, + 0.249116699886752, + null, + 0.17296378957033465, + 0.23700988477155205, + null, + 0.17296378957033465, + 0.28871122138225125, + null, + 0.17296378957033465, + 0.2002886163837997, + null, + 0.17296378957033465, + 0.13201947050262697, + null, + 0.17296378957033465, + 0.2318219208408404, + null, + 0.17296378957033465, + 0.20307680326083377, + null, + 0.6628083689885368, + 0.6160873747407943, + null, + 0.6628083689885368, + 0.7537809293531343, + null, + 0.6628083689885368, + 0.5634679987017406, + null, + 0.6628083689885368, + 0.6352288779182178, + null, + 0.6628083689885368, + 0.5981086798045652, + null, + 0.6628083689885368, + 0.6072525121642058, + null, + 0.6160873747407943, + 0.5186581897030644, + null, + 0.6160873747407943, + 0.5634679987017406, + null, + 0.6160873747407943, + 0.6012106694454529, + null, + 0.6160873747407943, + 0.6352288779182178, + null, + 0.6160873747407943, + 0.5981086798045652, + null, + 0.6160873747407943, + 0.6648266103848882, + null, + 0.6160873747407943, + 0.6072525121642058, + null, + 0.6160873747407943, + 0.5260776190209286, + null, + 0.025297953521542405, + 0.06016942899581168, + null, + 0.025297953521542405, + 0.055897802218322856, + null, + 0.025297953521542405, + 0.09959517902538939, + null, + 0.24028581536328997, + 0.3192831323823997, + null, + 0.24028581536328997, + 0.1333966979371528, + null, + 0.24028581536328997, + 0.2529891644068947, + null, + 0.24028581536328997, + 0.29119156039108685, + null, + 0.24028581536328997, + 0.24102842320743, + null, + 0.5186581897030644, + 0.5634679987017406, + null, + 0.5186581897030644, + 0.6012106694454529, + null, + 0.5186581897030644, + 0.4898861106787329, + null, + 0.5186581897030644, + 0.45431497833000367, + null, + 0.5186581897030644, + 0.6352288779182178, + null, + 0.5186581897030644, + 0.5981086798045652, + null, + 0.5186581897030644, + 0.6072525121642058, + null, + 0.5186581897030644, + 0.42203254876563234, + null, + 0.5186581897030644, + 0.5260776190209286, + null, + 0.8423383207045981, + 0.8151159149468827, + null, + 0.8423383207045981, + 0.7334929583472656, + null, + 0.8423383207045981, + 0.8821215709600496, + null, + 0.8423383207045981, + 0.9328536520894143, + null, + 0.8423383207045981, + 0.7834166246251234, + null, + 0.8423383207045981, + 0.9078978130468089, + null, + 0.8423383207045981, + 0.7925454632595156, + null, + 0.8423383207045981, + 0.8508124987550889, + null, + 0.7537809293531343, + 0.8622415881936324, + null, + 0.7537809293531343, + 0.8350595230795331, + null, + 0.3192831323823997, + 0.2529891644068947, + null, + 0.3192831323823997, + 0.41535454584101794, + null, + 0.3192831323823997, + 0.40395348439090084, + null, + 0.3192831323823997, + 0.4248880785102581, + null, + 0.3192831323823997, + 0.29119156039108685, + null, + 0.3192831323823997, + 0.24102842320743, + null, + 0.17542400609184483, + 0.055894273053114896, + null, + 0.17542400609184483, + 0.14933184162295132, + null, + 0.17542400609184483, + 0.17619771419691865, + null, + 0.17542400609184483, + 0.1278305132468397, + null, + 0.17542400609184483, + 0.21532966919867302, + null, + 0.17542400609184483, + 0.0731473655342364, + null, + 0.17542400609184483, + 0.2695720924906413, + null, + 0.17542400609184483, + 0.09533319097359638, + null, + 0.17542400609184483, + 0.24454670425362057, + null, + 0.17542400609184483, + 0.20002447568886628, + null, + 0.17542400609184483, + 0.20619722773579274, + null, + 0.17542400609184483, + 0.2880647319459674, + null, + 0.17542400609184483, + 0.1341994714416056, + null, + 0.17542400609184483, + 0.22993075379681738, + null, + 0.008409380348177398, + 0.04149975738749545, + null, + 0.938767234846119, + 0.9874110419208606, + null, + 0.938767234846119, + 0.8867112408398291, + null, + 0.938767234846119, + 0.9298960866412943, + null, + 0.938767234846119, + 0.9078978130468089, + null, + 0.938767234846119, + 0.9958360522915445, + null, + 0.938767234846119, + 0.8508124987550889, + null, + 0.938767234846119, + 0.8842114977564064, + null, + 0.938767234846119, + 0.9513646744432486, + null, + 0.24033413659841596, + 0.2981410655965283, + null, + 0.24033413659841596, + 0.3199684158322815, + null, + 0.24033413659841596, + 0.3355480553373167, + null, + 0.24033413659841596, + 0.2660491488293679, + null, + 0.24033413659841596, + 0.249116699886752, + null, + 0.24033413659841596, + 0.23700988477155205, + null, + 0.24033413659841596, + 0.28871122138225125, + null, + 0.24033413659841596, + 0.2002886163837997, + null, + 0.24033413659841596, + 0.13201947050262697, + null, + 0.24033413659841596, + 0.2318219208408404, + null, + 0.24033413659841596, + 0.20307680326083377, + null, + 0.8791466031622056, + 0.922341377568881, + null, + 0.8791466031622056, + 0.8821215709600496, + null, + 0.8791466031622056, + 0.9542382277667263, + null, + 0.8791466031622056, + 0.9024846524956353, + null, + 0.8791466031622056, + 0.9344432405222354, + null, + 0.8791466031622056, + 0.9642772106357639, + null, + 0.8791466031622056, + 0.8157570218353161, + null, + 0.8791466031622056, + 0.888980486534156, + null, + 0.5634679987017406, + 0.6012106694454529, + null, + 0.5634679987017406, + 0.6352288779182178, + null, + 0.5634679987017406, + 0.5981086798045652, + null, + 0.5634679987017406, + 0.6072525121642058, + null, + 0.5634679987017406, + 0.5260776190209286, + null, + 0.05938145280899054, + 0.04283815208078323, + null, + 0.05938145280899054, + 0.10782775946098799, + null, + 0.05938145280899054, + 0.03395115206665145, + null, + 0.05938145280899054, + 0.05477321631284726, + null, + 0.05938145280899054, + 0.1341994714416056, + null, + 0.6012106694454529, + 0.4898861106787329, + null, + 0.6012106694454529, + 0.5204579980957379, + null, + 0.6012106694454529, + 0.6352288779182178, + null, + 0.6012106694454529, + 0.6648266103848882, + null, + 0.6012106694454529, + 0.6072525121642058, + null, + 0.6705222836834548, + 0.7454337953380579, + null, + 0.6705222836834548, + 0.7077207700167599, + null, + 0.6705222836834548, + 0.7005910562446783, + null, + 0.6705222836834548, + 0.5603277981830703, + null, + 0.3900960314334032, + 0.3098874271134545, + null, + 0.3900960314334032, + 0.4295667428124167, + null, + 0.3900960314334032, + 0.35350564895305514, + null, + 0.3900960314334032, + 0.31541428705224306, + null, + 0.3900960314334032, + 0.4467311570808764, + null, + 0.3900960314334032, + 0.4421375373865315, + null, + 0.3900960314334032, + 0.42641694849778966, + null, + 0.3900960314334032, + 0.3333136626479075, + null, + 0.3900960314334032, + 0.4868902788925622, + null, + 0.055894273053114896, + 0.14933184162295132, + null, + 0.055894273053114896, + 0.1278305132468397, + null, + 0.055894273053114896, + 0.0731473655342364, + null, + 0.055894273053114896, + 0.09533319097359638, + null, + 0.055894273053114896, + 0.055897802218322856, + null, + 0.055894273053114896, + 0.04153202488293273, + null, + 0.055894273053114896, + 0.06013197669987258, + null, + 0.055894273053114896, + 0.040563128366188694, + null, + 0.14933184162295132, + 0.17619771419691865, + null, + 0.14933184162295132, + 0.1278305132468397, + null, + 0.14933184162295132, + 0.21532966919867302, + null, + 0.14933184162295132, + 0.0731473655342364, + null, + 0.14933184162295132, + 0.09533319097359638, + null, + 0.14933184162295132, + 0.24454670425362057, + null, + 0.14933184162295132, + 0.20002447568886628, + null, + 0.14933184162295132, + 0.20619722773579274, + null, + 0.14933184162295132, + 0.055897802218322856, + null, + 0.14933184162295132, + 0.04153202488293273, + null, + 0.14933184162295132, + 0.06013197669987258, + null, + 0.14933184162295132, + 0.22993075379681738, + null, + 0.14933184162295132, + 0.040563128366188694, + null, + 0.8151159149468827, + 0.7334929583472656, + null, + 0.8151159149468827, + 0.8821215709600496, + null, + 0.8151159149468827, + 0.9328536520894143, + null, + 0.8151159149468827, + 0.7834166246251234, + null, + 0.8151159149468827, + 0.7925454632595156, + null, + 0.8151159149468827, + 0.8508124987550889, + null, + 0.17619771419691865, + 0.1278305132468397, + null, + 0.17619771419691865, + 0.21532966919867302, + null, + 0.17619771419691865, + 0.0731473655342364, + null, + 0.17619771419691865, + 0.2695720924906413, + null, + 0.17619771419691865, + 0.09533319097359638, + null, + 0.17619771419691865, + 0.24454670425362057, + null, + 0.17619771419691865, + 0.20002447568886628, + null, + 0.17619771419691865, + 0.1341994714416056, + null, + 0.17619771419691865, + 0.06013197669987258, + null, + 0.2981410655965283, + 0.3199684158322815, + null, + 0.2981410655965283, + 0.3355480553373167, + null, + 0.2981410655965283, + 0.2660491488293679, + null, + 0.2981410655965283, + 0.3635517670405215, + null, + 0.2981410655965283, + 0.249116699886752, + null, + 0.2981410655965283, + 0.23700988477155205, + null, + 0.2981410655965283, + 0.28871122138225125, + null, + 0.2981410655965283, + 0.42203254876563234, + null, + 0.2981410655965283, + 0.2002886163837997, + null, + 0.2981410655965283, + 0.29050814087118004, + null, + 0.2981410655965283, + 0.2318219208408404, + null, + 0.2981410655965283, + 0.20307680326083377, + null, + 0.7334929583472656, + 0.6512622326935055, + null, + 0.7334929583472656, + 0.7205270186163313, + null, + 0.7334929583472656, + 0.7834166246251234, + null, + 0.7334929583472656, + 0.6217058876501556, + null, + 0.7334929583472656, + 0.7925454632595156, + null, + 0.7334929583472656, + 0.6714278208298593, + null, + 0.04283815208078323, + 0.010366221042083845, + null, + 0.04283815208078323, + 0.10782775946098799, + null, + 0.04283815208078323, + 0.03395115206665145, + null, + 0.04283815208078323, + 0.05477321631284726, + null, + 0.04283815208078323, + 0.13201947050262697, + null, + 0.922341377568881, + 0.8821215709600496, + null, + 0.922341377568881, + 0.9542382277667263, + null, + 0.922341377568881, + 0.9328536520894143, + null, + 0.922341377568881, + 0.9961038345306213, + null, + 0.922341377568881, + 0.9344432405222354, + null, + 0.922341377568881, + 0.9642772106357639, + null, + 0.922341377568881, + 0.8157570218353161, + null, + 0.922341377568881, + 0.888980486534156, + null, + 0.3199684158322815, + 0.42052616285893474, + null, + 0.3199684158322815, + 0.2660491488293679, + null, + 0.3199684158322815, + 0.4107398412471005, + null, + 0.3199684158322815, + 0.3635517670405215, + null, + 0.3199684158322815, + 0.249116699886752, + null, + 0.3199684158322815, + 0.23700988477155205, + null, + 0.3199684158322815, + 0.42203254876563234, + null, + 0.3199684158322815, + 0.29050814087118004, + null, + 0.1278305132468397, + 0.21532966919867302, + null, + 0.1278305132468397, + 0.0731473655342364, + null, + 0.1278305132468397, + 0.09533319097359638, + null, + 0.1278305132468397, + 0.24454670425362057, + null, + 0.1278305132468397, + 0.20002447568886628, + null, + 0.1278305132468397, + 0.04153202488293273, + null, + 0.1278305132468397, + 0.05477321631284726, + null, + 0.1278305132468397, + 0.1341994714416056, + null, + 0.1278305132468397, + 0.06013197669987258, + null, + 0.1278305132468397, + 0.040563128366188694, + null, + 0.21532966919867302, + 0.2695720924906413, + null, + 0.21532966919867302, + 0.09533319097359638, + null, + 0.21532966919867302, + 0.24454670425362057, + null, + 0.21532966919867302, + 0.20002447568886628, + null, + 0.21532966919867302, + 0.3202314429055858, + null, + 0.21532966919867302, + 0.1341994714416056, + null, + 0.21532966919867302, + 0.3169605131706372, + null, + 0.21532966919867302, + 0.29050814087118004, + null, + 0.21532966919867302, + 0.32345881810688737, + null, + 0.0731473655342364, + 0.09533319097359638, + null, + 0.0731473655342364, + 0.055897802218322856, + null, + 0.0731473655342364, + 0.04153202488293273, + null, + 0.0731473655342364, + 0.05477321631284726, + null, + 0.0731473655342364, + 0.1341994714416056, + null, + 0.0731473655342364, + 0.06013197669987258, + null, + 0.0731473655342364, + 0.040563128366188694, + null, + 0.4898861106787329, + 0.45431497833000367, + null, + 0.4898861106787329, + 0.5204579980957379, + null, + 0.4898861106787329, + 0.42052616285893474, + null, + 0.4898861106787329, + 0.4140065537970282, + null, + 0.4898861106787329, + 0.4107398412471005, + null, + 0.2695720924906413, + 0.24454670425362057, + null, + 0.2695720924906413, + 0.20002447568886628, + null, + 0.2695720924906413, + 0.2880647319459674, + null, + 0.2695720924906413, + 0.3202314429055858, + null, + 0.2695720924906413, + 0.3169605131706372, + null, + 0.2695720924906413, + 0.32345881810688737, + null, + 0.09533319097359638, + 0.20002447568886628, + null, + 0.09533319097359638, + 0.055897802218322856, + null, + 0.09533319097359638, + 0.04153202488293273, + null, + 0.09533319097359638, + 0.05477321631284726, + null, + 0.09533319097359638, + 0.1341994714416056, + null, + 0.09533319097359638, + 0.06013197669987258, + null, + 0.09533319097359638, + 0.040563128366188694, + null, + 0.9874110419208606, + 0.8867112408398291, + null, + 0.9874110419208606, + 0.9298960866412943, + null, + 0.9874110419208606, + 0.9078978130468089, + null, + 0.9874110419208606, + 0.9958360522915445, + null, + 0.9874110419208606, + 0.9513646744432486, + null, + 0.1333966979371528, + 0.2529891644068947, + null, + 0.1333966979371528, + 0.04149975738749545, + null, + 0.1333966979371528, + 0.16781555203357146, + null, + 0.1333966979371528, + 0.24102842320743, + null, + 0.1333966979371528, + 0.19048093242734687, + null, + 0.1333966979371528, + 0.1294716874165911, + null, + 0.2529891644068947, + 0.29119156039108685, + null, + 0.2529891644068947, + 0.24102842320743, + null, + 0.2529891644068947, + 0.19048093242734687, + null, + 0.45431497833000367, + 0.5204579980957379, + null, + 0.45431497833000367, + 0.42052616285893474, + null, + 0.45431497833000367, + 0.4107398412471005, + null, + 0.45431497833000367, + 0.3635517670405215, + null, + 0.45431497833000367, + 0.42203254876563234, + null, + 0.45431497833000367, + 0.5260776190209286, + null, + 0.24454670425362057, + 0.20002447568886628, + null, + 0.24454670425362057, + 0.2880647319459674, + null, + 0.24454670425362057, + 0.3202314429055858, + null, + 0.24454670425362057, + 0.22993075379681738, + null, + 0.24454670425362057, + 0.3169605131706372, + null, + 0.24454670425362057, + 0.32345881810688737, + null, + 0.20002447568886628, + 0.3202314429055858, + null, + 0.20002447568886628, + 0.1341994714416056, + null, + 0.20002447568886628, + 0.3169605131706372, + null, + 0.6267294109959968, + 0.5221172076712435, + null, + 0.6267294109959968, + 0.7205270186163313, + null, + 0.6267294109959968, + 0.5717872069066212, + null, + 0.6267294109959968, + 0.7302384542961842, + null, + 0.6267294109959968, + 0.6710484758334021, + null, + 0.6267294109959968, + 0.5492873750243871, + null, + 0.6267294109959968, + 0.6201266549140614, + null, + 0.6267294109959968, + 0.5752985482362863, + null, + 0.5221172076712435, + 0.41535454584101794, + null, + 0.5221172076712435, + 0.5717872069066212, + null, + 0.5221172076712435, + 0.5492873750243871, + null, + 0.5221172076712435, + 0.6201266549140614, + null, + 0.5221172076712435, + 0.4248880785102581, + null, + 0.6512622326935055, + 0.6217058876501556, + null, + 0.6512622326935055, + 0.6714278208298593, + null, + 0.3355480553373167, + 0.2660491488293679, + null, + 0.3355480553373167, + 0.249116699886752, + null, + 0.3355480553373167, + 0.28871122138225125, + null, + 0.3355480553373167, + 0.42203254876563234, + null, + 0.3355480553373167, + 0.2318219208408404, + null, + 0.4834545718278357, + 0.4847615611240751, + null, + 0.4834545718278357, + 0.4318165589087314, + null, + 0.4834545718278357, + 0.5097617399826666, + null, + 0.4847615611240751, + 0.4318165589087314, + null, + 0.4847615611240751, + 0.5097617399826666, + null, + 0.20619722773579274, + 0.3098874271134545, + null, + 0.20619722773579274, + 0.2880647319459674, + null, + 0.20619722773579274, + 0.24013807075121119, + null, + 0.20619722773579274, + 0.22993075379681738, + null, + 0.20619722773579274, + 0.09959517902538939, + null, + 0.9419075807648644, + 0.8848427298858184, + null, + 0.9419075807648644, + 0.9756800437762957, + null, + 0.3098874271134545, + 0.4295667428124167, + null, + 0.3098874271134545, + 0.35350564895305514, + null, + 0.3098874271134545, + 0.31541428705224306, + null, + 0.3098874271134545, + 0.2880647319459674, + null, + 0.3098874271134545, + 0.24013807075121119, + null, + 0.3098874271134545, + 0.2693681584998491, + null, + 0.3098874271134545, + 0.42641694849778966, + null, + 0.3098874271134545, + 0.3333136626479075, + null, + 0.3098874271134545, + 0.22993075379681738, + null, + 0.04149975738749545, + 0.019989772968585173, + null, + 0.04149975738749545, + 0.1294716874165911, + null, + 0.32266487999330984, + 0.31541428705224306, + null, + 0.32266487999330984, + 0.4318165589087314, + null, + 0.32266487999330984, + 0.2693681584998491, + null, + 0.32266487999330984, + 0.3340702546567942, + null, + 0.4295667428124167, + 0.35350564895305514, + null, + 0.4295667428124167, + 0.31541428705224306, + null, + 0.4295667428124167, + 0.4467311570808764, + null, + 0.4295667428124167, + 0.5144551437666581, + null, + 0.4295667428124167, + 0.4421375373865315, + null, + 0.4295667428124167, + 0.42641694849778966, + null, + 0.4295667428124167, + 0.3333136626479075, + null, + 0.4295667428124167, + 0.4868902788925622, + null, + 0.35350564895305514, + 0.31541428705224306, + null, + 0.35350564895305514, + 0.24013807075121119, + null, + 0.35350564895305514, + 0.2693681584998491, + null, + 0.35350564895305514, + 0.42641694849778966, + null, + 0.35350564895305514, + 0.3333136626479075, + null, + 0.15069304516745607, + 0.06016942899581168, + null, + 0.15069304516745607, + 0.24013807075121119, + null, + 0.15069304516745607, + 0.2693681584998491, + null, + 0.15069304516745607, + 0.10059463740220753, + null, + 0.15069304516745607, + 0.09959517902538939, + null, + 0.41535454584101794, + 0.40395348439090084, + null, + 0.41535454584101794, + 0.4248880785102581, + null, + 0.41535454584101794, + 0.29119156039108685, + null, + 0.8821215709600496, + 0.9328536520894143, + null, + 0.8821215709600496, + 0.9344432405222354, + null, + 0.8821215709600496, + 0.9642772106357639, + null, + 0.8821215709600496, + 0.8157570218353161, + null, + 0.8821215709600496, + 0.7925454632595156, + null, + 0.8821215709600496, + 0.888980486534156, + null, + 0.9542382277667263, + 0.9024846524956353, + null, + 0.9542382277667263, + 0.9961038345306213, + null, + 0.9542382277667263, + 0.9344432405222354, + null, + 0.9542382277667263, + 0.9642772106357639, + null, + 0.9542382277667263, + 0.888980486534156, + null, + 0.9542382277667263, + 0.9810704436128125, + null, + 0.7205270186163313, + 0.7302384542961842, + null, + 0.7205270186163313, + 0.8157570218353161, + null, + 0.7205270186163313, + 0.7925454632595156, + null, + 0.31541428705224306, + 0.24013807075121119, + null, + 0.31541428705224306, + 0.2693681584998491, + null, + 0.31541428705224306, + 0.3333136626479075, + null, + 0.010366221042083845, + 0.10782775946098799, + null, + 0.010366221042083845, + 0.03395115206665145, + null, + 0.010366221042083845, + 0.13201947050262697, + null, + 0.06016942899581168, + 0.055897802218322856, + null, + 0.06016942899581168, + 0.10059463740220753, + null, + 0.06016942899581168, + 0.09959517902538939, + null, + 0.8867112408398291, + 0.9298960866412943, + null, + 0.8867112408398291, + 0.8599268392047722, + null, + 0.8867112408398291, + 0.9078978130468089, + null, + 0.8867112408398291, + 0.8508124987550889, + null, + 0.8867112408398291, + 0.8842114977564064, + null, + 0.8867112408398291, + 0.9513646744432486, + null, + 0.5204579980957379, + 0.42052616285893474, + null, + 0.5204579980957379, + 0.4140065537970282, + null, + 0.5204579980957379, + 0.4107398412471005, + null, + 0.5204579980957379, + 0.4937592635708411, + null, + 0.4500538798110242, + 0.4140065537970282, + null, + 0.4500538798110242, + 0.4467311570808764, + null, + 0.4500538798110242, + 0.4421375373865315, + null, + 0.4500538798110242, + 0.4937592635708411, + null, + 0.4500538798110242, + 0.5603277981830703, + null, + 0.4500538798110242, + 0.547451424618544, + null, + 0.40395348439090084, + 0.3340702546567942, + null, + 0.40395348439090084, + 0.4248880785102581, + null, + 0.5717872069066212, + 0.6710484758334021, + null, + 0.5717872069066212, + 0.5492873750243871, + null, + 0.5717872069066212, + 0.6201266549140614, + null, + 0.42052616285893474, + 0.4140065537970282, + null, + 0.42052616285893474, + 0.4107398412471005, + null, + 0.42052616285893474, + 0.3635517670405215, + null, + 0.2660491488293679, + 0.249116699886752, + null, + 0.2660491488293679, + 0.23700988477155205, + null, + 0.2660491488293679, + 0.28871122138225125, + null, + 0.2660491488293679, + 0.2002886163837997, + null, + 0.2660491488293679, + 0.29050814087118004, + null, + 0.2660491488293679, + 0.2318219208408404, + null, + 0.2660491488293679, + 0.20307680326083377, + null, + 0.10782775946098799, + 0.03395115206665145, + null, + 0.10782775946098799, + 0.2002886163837997, + null, + 0.10782775946098799, + 0.13201947050262697, + null, + 0.10782775946098799, + 0.20307680326083377, + null, + 0.7302384542961842, + 0.6710484758334021, + null, + 0.7302384542961842, + 0.6201266549140614, + null, + 0.8520196094107113, + 0.8848427298858184, + null, + 0.8520196094107113, + 0.9435179236599912, + null, + 0.8520196094107113, + 0.9756800437762957, + null, + 0.4140065537970282, + 0.3202314429055858, + null, + 0.4140065537970282, + 0.4107398412471005, + null, + 0.4140065537970282, + 0.3635517670405215, + null, + 0.4140065537970282, + 0.4937592635708411, + null, + 0.4140065537970282, + 0.3169605131706372, + null, + 0.4140065537970282, + 0.32345881810688737, + null, + 0.4467311570808764, + 0.5144551437666581, + null, + 0.4467311570808764, + 0.4421375373865315, + null, + 0.4467311570808764, + 0.42641694849778966, + null, + 0.4467311570808764, + 0.4868902788925622, + null, + 0.5144551437666581, + 0.4421375373865315, + null, + 0.5144551437666581, + 0.42641694849778966, + null, + 0.5144551437666581, + 0.6014235590484225, + null, + 0.5144551437666581, + 0.5603277981830703, + null, + 0.5144551437666581, + 0.4868902788925622, + null, + 0.7454337953380579, + 0.7077207700167599, + null, + 0.7454337953380579, + 0.8599268392047722, + null, + 0.7454337953380579, + 0.7005910562446783, + null, + 0.03395115206665145, + 0.05477321631284726, + null, + 0.7077207700167599, + 0.7005910562446783, + null, + 0.9024846524956353, + 0.9961038345306213, + null, + 0.9024846524956353, + 0.9344432405222354, + null, + 0.9024846524956353, + 0.888980486534156, + null, + 0.9024846524956353, + 0.9810704436128125, + null, + 0.055897802218322856, + 0.04153202488293273, + null, + 0.055897802218322856, + 0.06013197669987258, + null, + 0.055897802218322856, + 0.040563128366188694, + null, + 0.055897802218322856, + 0.09959517902538939, + null, + 0.2880647319459674, + 0.3202314429055858, + null, + 0.2880647319459674, + 0.22993075379681738, + null, + 0.2880647319459674, + 0.3169605131706372, + null, + 0.2880647319459674, + 0.32345881810688737, + null, + 0.9328536520894143, + 0.9078978130468089, + null, + 0.9328536520894143, + 0.8508124987550889, + null, + 0.9298960866412943, + 0.8599268392047722, + null, + 0.9298960866412943, + 0.9958360522915445, + null, + 0.9298960866412943, + 0.8842114977564064, + null, + 0.9298960866412943, + 0.9513646744432486, + null, + 0.6352288779182178, + 0.5981086798045652, + null, + 0.6352288779182178, + 0.6648266103848882, + null, + 0.6352288779182178, + 0.6072525121642058, + null, + 0.04153202488293273, + 0.05477321631284726, + null, + 0.04153202488293273, + 0.1341994714416056, + null, + 0.04153202488293273, + 0.06013197669987258, + null, + 0.04153202488293273, + 0.040563128366188694, + null, + 0.7834166246251234, + 0.7925454632595156, + null, + 0.7834166246251234, + 0.8508124987550889, + null, + 0.7834166246251234, + 0.6714278208298593, + null, + 0.6710484758334021, + 0.5492873750243871, + null, + 0.6710484758334021, + 0.6201266549140614, + null, + 0.3202314429055858, + 0.3169605131706372, + null, + 0.3202314429055858, + 0.32345881810688737, + null, + 0.9961038345306213, + 0.9344432405222354, + null, + 0.9961038345306213, + 0.9642772106357639, + null, + 0.9961038345306213, + 0.888980486534156, + null, + 0.9961038345306213, + 0.9810704436128125, + null, + 0.4107398412471005, + 0.3635517670405215, + null, + 0.4107398412471005, + 0.42203254876563234, + null, + 0.4107398412471005, + 0.29050814087118004, + null, + 0.24013807075121119, + 0.2693681584998491, + null, + 0.24013807075121119, + 0.3333136626479075, + null, + 0.8599268392047722, + 0.8842114977564064, + null, + 0.4318165589087314, + 0.3340702546567942, + null, + 0.4318165589087314, + 0.5097617399826666, + null, + 0.2693681584998491, + 0.3333136626479075, + null, + 0.4421375373865315, + 0.42641694849778966, + null, + 0.4421375373865315, + 0.4868902788925622, + null, + 0.5492873750243871, + 0.6201266549140614, + null, + 0.5492873750243871, + 0.5752985482362863, + null, + 0.5981086798045652, + 0.6072525121642058, + null, + 0.5981086798045652, + 0.5260776190209286, + null, + 0.3635517670405215, + 0.42203254876563234, + null, + 0.3635517670405215, + 0.29050814087118004, + null, + 0.42641694849778966, + 0.3333136626479075, + null, + 0.42641694849778966, + 0.4868902788925622, + null, + 0.6648266103848882, + 0.6072525121642058, + null, + 0.9344432405222354, + 0.9642772106357639, + null, + 0.9344432405222354, + 0.8157570218353161, + null, + 0.9344432405222354, + 0.888980486534156, + null, + 0.9344432405222354, + 0.9810704436128125, + null, + 0.249116699886752, + 0.23700988477155205, + null, + 0.249116699886752, + 0.28871122138225125, + null, + 0.249116699886752, + 0.2002886163837997, + null, + 0.249116699886752, + 0.29050814087118004, + null, + 0.249116699886752, + 0.2318219208408404, + null, + 0.249116699886752, + 0.20307680326083377, + null, + 0.6201266549140614, + 0.5752985482362863, + null, + 0.16781555203357146, + 0.19048093242734687, + null, + 0.16781555203357146, + 0.1294716874165911, + null, + 0.4937592635708411, + 0.5603277981830703, + null, + 0.4937592635708411, + 0.547451424618544, + null, + 0.9435179236599912, + 0.9958360522915445, + null, + 0.9435179236599912, + 0.9756800437762957, + null, + 0.07011604000159166, + 0.019989772968585173, + null, + 0.07011604000159166, + 0.10059463740220753, + null, + 0.07011604000159166, + 0.038844634468288675, + null, + 0.9078978130468089, + 0.8508124987550889, + null, + 0.9078978130468089, + 0.9513646744432486, + null, + 0.6072525121642058, + 0.5260776190209286, + null, + 0.23700988477155205, + 0.28871122138225125, + null, + 0.23700988477155205, + 0.2002886163837997, + null, + 0.23700988477155205, + 0.29050814087118004, + null, + 0.23700988477155205, + 0.13201947050262697, + null, + 0.23700988477155205, + 0.2318219208408404, + null, + 0.23700988477155205, + 0.20307680326083377, + null, + 0.05477321631284726, + 0.1341994714416056, + null, + 0.05477321631284726, + 0.06013197669987258, + null, + 0.05477321631284726, + 0.040563128366188694, + null, + 0.9642772106357639, + 0.888980486534156, + null, + 0.019989772968585173, + 0.1294716874165911, + null, + 0.019989772968585173, + 0.038844634468288675, + null, + 0.29119156039108685, + 0.24102842320743, + null, + 0.29119156039108685, + 0.19048093242734687, + null, + 0.6217058876501556, + 0.6714278208298593, + null, + 0.1341994714416056, + 0.06013197669987258, + null, + 0.1341994714416056, + 0.040563128366188694, + null, + 0.28871122138225125, + 0.2002886163837997, + null, + 0.28871122138225125, + 0.2318219208408404, + null, + 0.28871122138225125, + 0.20307680326083377, + null, + 0.06013197669987258, + 0.040563128366188694, + null, + 0.5752985482362863, + 0.5097617399826666, + null, + 0.10059463740220753, + 0.09959517902538939, + null, + 0.10059463740220753, + 0.038844634468288675, + null, + 0.8157570218353161, + 0.7925454632595156, + null, + 0.8157570218353161, + 0.888980486534156, + null, + 0.42203254876563234, + 0.5260776190209286, + null, + 0.2002886163837997, + 0.13201947050262697, + null, + 0.2002886163837997, + 0.2318219208408404, + null, + 0.2002886163837997, + 0.20307680326083377, + null, + 0.8622415881936324, + 0.8350595230795331, + null, + 0.3169605131706372, + 0.29050814087118004, + null, + 0.3169605131706372, + 0.32345881810688737, + null, + 0.6014235590484225, + 0.5603277981830703, + null, + 0.6014235590484225, + 0.4868902788925622, + null, + 0.9958360522915445, + 0.8842114977564064, + null, + 0.24102842320743, + 0.19048093242734687, + null, + 0.19048093242734687, + 0.1294716874165911, + null, + 0.8508124987550889, + 0.9513646744432486, + null, + 0.13201947050262697, + 0.2318219208408404, + null, + 0.13201947050262697, + 0.20307680326083377, + null, + 0.2318219208408404, + 0.20307680326083377, + null, + 0.888980486534156, + 0.9810704436128125, + null, + 0.5603277981830703, + 0.547451424618544, + null + ] + }, + { + "hoverinfo": "text", + "marker": { + "color": [ + 11, + 14, + 7, + 10, + 7, + 10, + 3, + 14, + 9, + 8, + 9, + 6, + 6, + 9, + 15, + 13, + 15, + 10, + 2, + 13, + 9, + 2, + 9, + 9, + 9, + 6, + 15, + 11, + 9, + 6, + 6, + 13, + 13, + 12, + 12, + 10, + 13, + 14, + 10, + 7, + 12, + 8, + 8, + 12, + 7, + 11, + 3, + 6, + 11, + 9, + 4, + 10, + 17, + 1, + 10, + 16, + 10, + 10, + 7, + 13, + 5, + 13, + 10, + 16, + 8, + 13, + 18, + 8, + 7, + 12, + 14, + 16, + 15, + 13, + 10, + 14, + 15, + 7, + 7, + 7, + 10, + 17, + 12, + 10, + 8, + 3, + 10, + 8, + 8, + 10, + 2, + 18, + 4, + 10, + 16, + 13, + 7, + 7, + 13, + 8, + 5, + 12, + 5, + 6, + 9, + 11, + 9, + 8, + 7, + 9, + 16, + 10, + 5, + 4, + 12, + 9, + 9, + 4, + 6, + 3, + 6, + 11, + 13, + 10, + 9, + 11, + 11, + 6, + 6, + 12, + 7, + 10, + 10, + 6, + 9, + 12, + 7, + 9, + 8, + 9, + 11, + 13, + 12, + 3, + 8, + 12, + 17, + 9, + 3, + 6, + 7, + 4, + 4, + 8, + 12, + 17, + 11, + 10, + 5, + 9, + 4, + 14, + 13, + 12, + 9, + 6, + 6, + 8, + 11, + 8, + 12, + 3, + 13, + 4, + 6, + 8, + 3, + 7, + 7, + 3, + 12, + 9, + 4, + 4, + 11, + 8, + 7, + 10, + 9, + 15, + 11, + 7, + 5, + 6, + 10, + 4, + 3, + 12, + 12, + 5 + ], + "colorbar": { + "thickness": 15, + "title": { + "side": "right", + "text": "Node Connections" + }, + "xanchor": "left" + }, + "colorscale": [ + [ + 0, + "rgb(255,255,217)" + ], + [ + 0.125, + "rgb(237,248,177)" + ], + [ + 0.25, + "rgb(199,233,180)" + ], + [ + 0.375, + "rgb(127,205,187)" + ], + [ + 0.5, + "rgb(65,182,196)" + ], + [ + 0.625, + "rgb(29,145,192)" + ], + [ + 0.75, + "rgb(34,94,168)" + ], + [ + 0.875, + "rgb(37,52,148)" + ], + [ + 1, + "rgb(8,29,88)" + ] + ], + "line": { + "width": 2 + }, + "reversescale": true, + "showscale": true, + "size": 10 + }, + "mode": "markers", + "text": [ + "# of connections: 11", + "# of connections: 14", + "# of connections: 7", + "# of connections: 10", + "# of connections: 7", + "# of connections: 10", + "# of connections: 3", + "# of connections: 14", + "# of connections: 9", + "# of connections: 8", + "# of connections: 9", + "# of connections: 6", + "# of connections: 6", + "# of connections: 9", + "# of connections: 15", + "# of connections: 13", + "# of connections: 15", + "# of connections: 10", + "# of connections: 2", + "# of connections: 13", + "# of connections: 9", + "# of connections: 2", + "# of connections: 9", + "# of connections: 9", + "# of connections: 9", + "# of connections: 6", + "# of connections: 15", + "# of connections: 11", + "# of connections: 9", + "# of connections: 6", + "# of connections: 6", + "# of connections: 13", + "# of connections: 13", + "# of connections: 12", + "# of connections: 12", + "# of connections: 10", + "# of connections: 13", + "# of connections: 14", + "# of connections: 10", + "# of connections: 7", + "# of connections: 12", + "# of connections: 8", + "# of connections: 8", + "# of connections: 12", + "# of connections: 7", + "# of connections: 11", + "# of connections: 3", + "# of connections: 6", + "# of connections: 11", + "# of connections: 9", + "# of connections: 4", + "# of connections: 10", + "# of connections: 17", + "# of connections: 1", + "# of connections: 10", + "# of connections: 16", + "# of connections: 10", + "# of connections: 10", + "# of connections: 7", + "# of connections: 13", + "# of connections: 5", + "# of connections: 13", + "# of connections: 10", + "# of connections: 16", + "# of connections: 8", + "# of connections: 13", + "# of connections: 18", + "# of connections: 8", + "# of connections: 7", + "# of connections: 12", + "# of connections: 14", + "# of connections: 16", + "# of connections: 15", + "# of connections: 13", + "# of connections: 10", + "# of connections: 14", + "# of connections: 15", + "# of connections: 7", + "# of connections: 7", + "# of connections: 7", + "# of connections: 10", + "# of connections: 17", + "# of connections: 12", + "# of connections: 10", + "# of connections: 8", + "# of connections: 3", + "# of connections: 10", + "# of connections: 8", + "# of connections: 8", + "# of connections: 10", + "# of connections: 2", + "# of connections: 18", + "# of connections: 4", + "# of connections: 10", + "# of connections: 16", + "# of connections: 13", + "# of connections: 7", + "# of connections: 7", + "# of connections: 13", + "# of connections: 8", + "# of connections: 5", + "# of connections: 12", + "# of connections: 5", + "# of connections: 6", + "# of connections: 9", + "# of connections: 11", + "# of connections: 9", + "# of connections: 8", + "# of connections: 7", + "# of connections: 9", + "# of connections: 16", + "# of connections: 10", + "# of connections: 5", + "# of connections: 4", + "# of connections: 12", + "# of connections: 9", + "# of connections: 9", + "# of connections: 4", + "# of connections: 6", + "# of connections: 3", + "# of connections: 6", + "# of connections: 11", + "# of connections: 13", + "# of connections: 10", + "# of connections: 9", + "# of connections: 11", + "# of connections: 11", + "# of connections: 6", + "# of connections: 6", + "# of connections: 12", + "# of connections: 7", + "# of connections: 10", + "# of connections: 10", + "# of connections: 6", + "# of connections: 9", + "# of connections: 12", + "# of connections: 7", + "# of connections: 9", + "# of connections: 8", + "# of connections: 9", + "# of connections: 11", + "# of connections: 13", + "# of connections: 12", + "# of connections: 3", + "# of connections: 8", + "# of connections: 12", + "# of connections: 17", + "# of connections: 9", + "# of connections: 3", + "# of connections: 6", + "# of connections: 7", + "# of connections: 4", + "# of connections: 4", + "# of connections: 8", + "# of connections: 12", + "# of connections: 17", + "# of connections: 11", + "# of connections: 10", + "# of connections: 5", + "# of connections: 9", + "# of connections: 4", + "# of connections: 14", + "# of connections: 13", + "# of connections: 12", + "# of connections: 9", + "# of connections: 6", + "# of connections: 6", + "# of connections: 8", + "# of connections: 11", + "# of connections: 8", + "# of connections: 12", + "# of connections: 3", + "# of connections: 13", + "# of connections: 4", + "# of connections: 6", + "# of connections: 8", + "# of connections: 3", + "# of connections: 7", + "# of connections: 7", + "# of connections: 3", + "# of connections: 12", + "# of connections: 9", + "# of connections: 4", + "# of connections: 4", + "# of connections: 11", + "# of connections: 8", + "# of connections: 7", + "# of connections: 10", + "# of connections: 9", + "# of connections: 15", + "# of connections: 11", + "# of connections: 7", + "# of connections: 5", + "# of connections: 6", + "# of connections: 10", + "# of connections: 4", + "# of connections: 3", + "# of connections: 12", + "# of connections: 12", + "# of connections: 5" + ], + "type": "scatter", + "x": [ + 0.4182243125490408, + 0.12286879065958844, + 0.6730431696885844, + 0.38165116541180344, + 0.6084965344664286, + 0.18155558675901884, + 0.7722862313192606, + 0.5368181409256901, + 0.8304626469521129, + 0.7924139234898422, + 0.8266354543284289, + 0.4023039585223629, + 0.5084198498293618, + 0.23992481624351925, + 0.2742000416622462, + 0.15570283642495664, + 0.07513674080757637, + 0.7247552078664479, + 0.2586357176925591, + 0.595945044435614, + 0.9428542201780316, + 0.03304679952258993, + 0.6013564651959642, + 0.1130639188502468, + 0.5531504465254558, + 0.1635981270944994, + 0.05512117222879742, + 0.32578353530864457, + 0.27440213390552737, + 0.2728250610713022, + 0.6346565064837861, + 0.6327007577432437, + 0.800297854626628, + 0.526779936668903, + 0.413948124857326, + 0.09276814106220677, + 0.662108954544855, + 0.07163295816605642, + 0.44119458804978295, + 0.7364515013041172, + 0.7827775151390383, + 0.9600359726880752, + 0.8511753697833563, + 0.05194805532761382, + 0.03187584930858911, + 0.07426685281627932, + 0.5257999712304688, + 0.9998698320754983, + 0.09471702229050472, + 0.6953901849658966, + 0.03446402354654854, + 0.9082570345357789, + 0.3740122792611037, + 0.977854801698089, + 0.5436816885151938, + 0.06202421257916635, + 0.8589937476561325, + 0.06879886671193436, + 0.19921682827804632, + 0.1823584228427031, + 0.37549158943196925, + 0.5433115547736789, + 0.37848025459696877, + 0.3821391536049519, + 0.7204214783753378, + 0.2955343345493908, + 0.09053866681881584, + 0.7181048560087516, + 0.10310287300704979, + 0.8247840830312709, + 0.1573630170264504, + 0.31305791514229697, + 0.298647499376007, + 0.3246624829381992, + 0.19852054651169693, + 0.3328704753356456, + 0.33203393677870674, + 0.5461279353327784, + 0.9636084967560627, + 0.9503884723051484, + 0.13747604708068628, + 0.3499260998923053, + 0.3181124346701171, + 0.89080246263295, + 0.9521646983336837, + 0.6776948411821848, + 0.0023771443647881974, + 0.7007214129943925, + 0.7188906153197968, + 0.47055154706870017, + 0.19043749918150743, + 0.5274116361492907, + 0.9162463356603696, + 0.7042334738295596, + 0.555788147264811, + 0.5805679633404117, + 0.587704695878027, + 0.916634041055854, + 0.7948577020793985, + 0.9210876029743161, + 0.834199864808296, + 0.5989925957177575, + 0.05973078995013337, + 0.5593951498649633, + 0.5229468203255856, + 0.22007362873840486, + 0.37301066653863624, + 0.8613129225222332, + 0.9663892923019699, + 0.2275256207367028, + 0.0852382135963593, + 0.0914406510425998, + 0.9425745666137786, + 0.3019474379086241, + 0.2619562675328274, + 0.48218022499136737, + 0.5293212253918783, + 0.41808707877840445, + 0.14711158829428328, + 0.42926818011737133, + 0.9694266665187994, + 0.4404718698088387, + 0.4277213938753692, + 0.7059759544943667, + 0.4611021425875542, + 0.13940667248499528, + 0.3393815448042514, + 0.6370268640561303, + 0.9851894520572745, + 0.3247821296168134, + 0.9186278106648778, + 0.18507593174525072, + 0.5845953849421676, + 0.44175944307536974, + 0.7255980413609877, + 0.6058132814274794, + 0.7703024251104211, + 0.47443124751760235, + 0.9573079778783831, + 0.0201693226965588, + 0.17086936775877049, + 0.5291812256005789, + 0.5621062195646831, + 0.2121217358781844, + 0.16862303760247477, + 0.8846357375826375, + 0.0875467755337247, + 0.9473667691929577, + 0.8541827253649632, + 0.3414075728554137, + 0.9005048863870916, + 0.3318561006769827, + 0.7408684543182315, + 0.6149491168624189, + 0.12355952994556385, + 0.08997327822205015, + 0.21535391032155426, + 0.8323549266756429, + 0.8385234321105272, + 0.9240127894624793, + 0.6802728591951641, + 0.25656414507004344, + 0.020212382594376965, + 0.32444561774289593, + 0.4564806171162211, + 0.838803404513024, + 0.6322124026692795, + 0.8505181106970376, + 0.0897773631019545, + 0.7607451357487841, + 0.02312833765025224, + 0.05596958524873419, + 0.3187675293980876, + 0.5191285820034173, + 0.4349682989231034, + 0.04781523934390508, + 0.014269300880037306, + 0.9636590456207981, + 0.8680862155815134, + 0.4363707938884992, + 0.20133087739958255, + 0.6234379896430121, + 0.6314926226168458, + 0.29978148854693865, + 0.33721825060791266, + 0.7518492361353024, + 0.4442228752887084, + 0.04237200971819888, + 0.5201251204037126, + 0.038579501382332126, + 0.9110645875753355, + 0.5593069337955722, + 0.8668565351624634, + 0.42077304608666055, + 0.5465171974419871, + 0.7333209824474588, + 0.4039327719907384, + 0.34114125407236195, + 0.01777064460825195, + 0.992283435751248 + ], + "y": [ + 0.09053726824382247, + 0.571085214777101, + 0.5199666766946885, + 0.33766327379542094, + 0.17196466768963936, + 0.17708608014427518, + 0.04649454781195783, + 0.37080565676900146, + 0.3602866247185619, + 0.9483925173875926, + 0.3061539627540061, + 0.9643804220706982, + 0.8336885167043149, + 0.5944498275635773, + 0.2373268562908326, + 0.23741932367240448, + 0.32127102230894566, + 0.3661437355856225, + 0.7791505090281524, + 0.3648985367210805, + 0.6244837238804738, + 0.9012137046519791, + 0.5219101415039136, + 0.39453602200590676, + 0.2009582712064717, + 0.04224314617430658, + 0.2381682330796122, + 0.33811323660241943, + 0.5216765314868881, + 0.6001026871900049, + 0.991844460003468, + 0.3343459796676115, + 0.8957623407464501, + 0.4208812619135248, + 0.31304614249644347, + 0.6773365837969099, + 0.4307004647175262, + 0.3309683982450944, + 0.2697998035002954, + 0.9727770125665405, + 0.40557198035837094, + 0.35532572275494023, + 0.5850986908522726, + 0.17296378957033465, + 0.6628083689885368, + 0.6160873747407943, + 0.025297953521542405, + 0.24028581536328997, + 0.5186581897030644, + 0.8423383207045981, + 0.7537809293531343, + 0.3192831323823997, + 0.17542400609184483, + 0.008409380348177398, + 0.938767234846119, + 0.24033413659841596, + 0.8791466031622056, + 0.5634679987017406, + 0.05938145280899054, + 0.6012106694454529, + 0.6705222836834548, + 0.3900960314334032, + 0.055894273053114896, + 0.14933184162295132, + 0.8151159149468827, + 0.17619771419691865, + 0.2981410655965283, + 0.7334929583472656, + 0.04283815208078323, + 0.922341377568881, + 0.3199684158322815, + 0.1278305132468397, + 0.21532966919867302, + 0.0731473655342364, + 0.4898861106787329, + 0.2695720924906413, + 0.09533319097359638, + 0.9874110419208606, + 0.1333966979371528, + 0.2529891644068947, + 0.45431497833000367, + 0.24454670425362057, + 0.20002447568886628, + 0.6267294109959968, + 0.5221172076712435, + 0.6512622326935055, + 0.3355480553373167, + 0.4834545718278357, + 0.4847615611240751, + 0.20619722773579274, + 0.9419075807648644, + 0.3098874271134545, + 0.04149975738749545, + 0.32266487999330984, + 0.4295667428124167, + 0.35350564895305514, + 0.15069304516745607, + 0.41535454584101794, + 0.8821215709600496, + 0.9542382277667263, + 0.7205270186163313, + 0.31541428705224306, + 0.010366221042083845, + 0.06016942899581168, + 0.8867112408398291, + 0.5204579980957379, + 0.4500538798110242, + 0.40395348439090084, + 0.5717872069066212, + 0.42052616285893474, + 0.2660491488293679, + 0.10782775946098799, + 0.7302384542961842, + 0.8520196094107113, + 0.4140065537970282, + 0.4467311570808764, + 0.5144551437666581, + 0.7454337953380579, + 0.03395115206665145, + 0.7077207700167599, + 0.9024846524956353, + 0.055897802218322856, + 0.2880647319459674, + 0.9328536520894143, + 0.9298960866412943, + 0.6352288779182178, + 0.04153202488293273, + 0.7834166246251234, + 0.6710484758334021, + 0.3202314429055858, + 0.9961038345306213, + 0.4107398412471005, + 0.24013807075121119, + 0.8599268392047722, + 0.4318165589087314, + 0.2693681584998491, + 0.3340702546567942, + 0.4421375373865315, + 0.5492873750243871, + 0.5981086798045652, + 0.3635517670405215, + 0.42641694849778966, + 0.3333136626479075, + 0.8848427298858184, + 0.6648266103848882, + 0.9344432405222354, + 0.249116699886752, + 0.6201266549140614, + 0.16781555203357146, + 0.4937592635708411, + 0.4248880785102581, + 0.9435179236599912, + 0.07011604000159166, + 0.9078978130468089, + 0.6072525121642058, + 0.23700988477155205, + 0.05477321631284726, + 0.9642772106357639, + 0.019989772968585173, + 0.29119156039108685, + 0.6217058876501556, + 0.1341994714416056, + 0.28871122138225125, + 0.06013197669987258, + 0.22993075379681738, + 0.5752985482362863, + 0.10059463740220753, + 0.8157570218353161, + 0.42203254876563234, + 0.7925454632595156, + 0.2002886163837997, + 0.8622415881936324, + 0.3169605131706372, + 0.6014235590484225, + 0.9958360522915445, + 0.5260776190209286, + 0.8350595230795331, + 0.24102842320743, + 0.19048093242734687, + 0.7005910562446783, + 0.29050814087118004, + 0.8508124987550889, + 0.6714278208298593, + 0.9756800437762957, + 0.040563128366188694, + 0.5097617399826666, + 0.8842114977564064, + 0.13201947050262697, + 0.09959517902538939, + 0.2318219208408404, + 0.888980486534156, + 0.9513646744432486, + 0.1294716874165911, + 0.5603277981830703, + 0.4868902788925622, + 0.038844634468288675, + 0.547451424618544, + 0.32345881810688737, + 0.20307680326083377, + 0.9810704436128125 + ] + } + ], + "layout": { + "annotations": [ + { + "showarrow": true, + "text": "graphs", + "x": 0.005, + "xref": "paper", + "y": -0.002, + "yref": "paper" + } + ], + "hovermode": "closest", + "margin": { + "b": 20, + "l": 5, + "r": 5, + "t": 40 + }, + "showlegend": false, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "font": { + "size": 16 + }, + "text": "Network graph made with Python" + }, + "xaxis": { + "showgrid": false, + "showticklabels": false, + "zeroline": false + }, + "yaxis": { + "showgrid": false, + "showticklabels": false, + "zeroline": false + } + } + }, + "text/html": [ + "
" ] }, - "execution_count": 12, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "G.nodes['Citizens For Kail']" + "G = nx.random_geometric_graph(200, 0.125)\n", + "edge_x = []\n", + "edge_y = []\n", + "for edge in G.edges():\n", + " x0, y0 = G.nodes[edge[0]]['pos']\n", + " x1, y1 = G.nodes[edge[1]]['pos']\n", + " edge_x.append(x0)\n", + " edge_x.append(x1)\n", + " edge_x.append(None)\n", + " edge_y.append(y0)\n", + " edge_y.append(y1)\n", + " edge_y.append(None)\n", + "\n", + "edge_trace = go.Scatter(\n", + " x=edge_x, y=edge_y,\n", + " line=dict(width=0.5, color='#888'),\n", + " hoverinfo='none',\n", + " mode='lines')\n", + "\n", + "node_x = []\n", + "node_y = []\n", + "for node in G.nodes():\n", + " x, y = G.nodes[node]['pos']\n", + " node_x.append(x)\n", + " node_y.append(y)\n", + "\n", + "node_trace = go.Scatter(\n", + " x=node_x, y=node_y,\n", + " mode='markers',\n", + " hoverinfo='text',\n", + " marker=dict(\n", + " showscale=True,\n", + " # colorscale options\n", + " #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |\n", + " #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |\n", + " #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |\n", + " colorscale='YlGnBu',\n", + " reversescale=True,\n", + " color=[],\n", + " size=10,\n", + " colorbar=dict(\n", + " thickness=15,\n", + " title='Node Connections',\n", + " xanchor='left',\n", + " titleside='right'\n", + " ),\n", + " line_width=2))\n", + "\n", + "node_adjacencies = []\n", + "node_text = []\n", + "for node, adjacencies in enumerate(G.adjacency()):\n", + " node_adjacencies.append(len(adjacencies[1]))\n", + " node_text.append('# of connections: '+str(len(adjacencies[1])))\n", + "\n", + "node_trace.marker.color = node_adjacencies\n", + "node_trace.text = node_text\n", + "\n", + "\n", + "fig = go.Figure(data=[edge_trace, node_trace],\n", + " layout=go.Layout(\n", + " title='Network graph made with Python',\n", + " titlefont_size=16,\n", + " showlegend=False,\n", + " hovermode='closest',\n", + " margin=dict(b=20,l=5,r=5,t=40),\n", + " annotations=[ dict(\n", + " text=\"graphs\",\n", + " showarrow=True,\n", + " xref=\"paper\", yref=\"paper\",\n", + " x=0.005, y=-0.002 ) ],\n", + " xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n", + " yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))\n", + " )\n", + "fig.show()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "G = nx.Graph()\n", + "G.add_node(0)\n", + "nx.set_node_attributes(G, \"red\", name=\"color\")\n", + "nx.set_node_attributes(G, 2, name=\"size\")\n", + "G.add_node(1)\n", + "nx.set_node_attributes(G, np.nan, name='color')\n", + "G.nodes[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "metadata": {}, "outputs": [ { - "ename": "KeyError", - "evalue": "'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC'", + "ename": "NetworkXError", + "evalue": "Invalid edge_attr argument: ['donations', 'received']", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m node_color \u001b[38;5;241m=\u001b[39m [G\u001b[38;5;241m.\u001b[39mdegree(v) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m G] \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# node colour is a list of degrees of nodes \u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m node_size \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[38;5;241;43m0.0005\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_node_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpopulation\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mG\u001b[49m\u001b[43m]\u001b[49m \n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# size of node is a list of population of cities \u001b[39;00m\n\u001b[1;32m 10\u001b[0m edge_width \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0.0015\u001b[39m \u001b[38;5;241m*\u001b[39m G[u][v][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m u, v \u001b[38;5;129;01min\u001b[39;00m G\u001b[38;5;241m.\u001b[39medges()] \n", - "Cell \u001b[0;32mIn[8], line 7\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 4\u001b[0m node_color \u001b[38;5;241m=\u001b[39m [G\u001b[38;5;241m.\u001b[39mdegree(v) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m G] \n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# node colour is a list of degrees of nodes \u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m node_size \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0.0005\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_node_attributes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mG\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mpopulation\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43mv\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m G] \n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# size of node is a list of population of cities \u001b[39;00m\n\u001b[1;32m 10\u001b[0m edge_width \u001b[38;5;241m=\u001b[39m [\u001b[38;5;241m0.0015\u001b[39m \u001b[38;5;241m*\u001b[39m G[u][v][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m u, v \u001b[38;5;129;01min\u001b[39;00m G\u001b[38;5;241m.\u001b[39medges()] \n", - "\u001b[0;31mKeyError\u001b[0m: 'REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN PAC'" + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/core/indexes/base.py:3653\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3652\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3653\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3654\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/_libs/index.pyx:147\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/_libs/index.pyx:176\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'donations'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/convert_matrix.py:455\u001b[0m, in \u001b[0;36mfrom_pandas_edgelist\u001b[0;34m(df, source, target, edge_attr, create_using, edge_key)\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 455\u001b[0m attribute_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcol\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mattr_col_headings\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/convert_matrix.py:455\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 455\u001b[0m attribute_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;241m*\u001b[39m[\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m attr_col_headings])\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/core/frame.py:3761\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3760\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 3761\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3762\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/core/indexes/base.py:3655\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3654\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3655\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3658\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3659\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n", + "\u001b[0;31mKeyError\u001b[0m: 'donations'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mNetworkXError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m G \u001b[38;5;241m=\u001b[39m \u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas_edgelist\u001b[49m\u001b[43m(\u001b[49m\u001b[43msample_df\u001b[49m\u001b[43m,\u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mname\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mtarget\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdonations_to\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43medge_attr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdonations\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreceived\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m G\u001b[38;5;241m.\u001b[39mnodes()\n\u001b[1;32m 3\u001b[0m pos\u001b[38;5;241m=\u001b[39mnx\u001b[38;5;241m.\u001b[39mspring_layout(G)\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/utils/backends.py:412\u001b[0m, in \u001b[0;36m_dispatch.__call__\u001b[0;34m(self, backend, *args, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m/\u001b[39m, \u001b[38;5;241m*\u001b[39margs, backend\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m backends:\n\u001b[1;32m 411\u001b[0m \u001b[38;5;66;03m# Fast path if no backends are installed\u001b[39;00m\n\u001b[0;32m--> 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43morig_func\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;66;03m# Use `backend_name` in this function instead of `backend`\u001b[39;00m\n\u001b[1;32m 415\u001b[0m backend_name \u001b[38;5;241m=\u001b[39m backend\n", + "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/convert_matrix.py:458\u001b[0m, in \u001b[0;36mfrom_pandas_edgelist\u001b[0;34m(df, source, target, edge_attr, create_using, edge_key)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 457\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid edge_attr argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00medge_attr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 458\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m nx\u001b[38;5;241m.\u001b[39mNetworkXError(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 460\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m g\u001b[38;5;241m.\u001b[39mis_multigraph():\n\u001b[1;32m 461\u001b[0m \u001b[38;5;66;03m# => append the edge keys from the df to the bundled data\u001b[39;00m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m edge_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "\u001b[0;31mNetworkXError\u001b[0m: Invalid edge_attr argument: ['donations', 'received']" ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], + "source": [ + "G = nx.from_pandas_edgelist(sample_df,source='name',target='donations_to',edge_attr=['donations','received'])\n", + "G.nodes()\n", + "pos=nx.spring_layout(G)\n", + "weights = list(nx.get_edge_attributes(G,'donations').values())\n", + "weights = [i/5000 for i in weights]\n", + "node_color = [G.degree(v) for v in G] \n", + "#node_size = [0.0005 * nx.get_node_attributes(G, 'donations')[v] for v in G] \n", + "nx.draw_networkx_nodes(G, pos, node_color=node_color)#, node_size=node_size) \n", + "nx.draw_networkx_edges(G, pos, width=weights)\n", + "nx.draw_networkx_labels(G, pos)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "\n", "# fixing the size of the figure \n", @@ -2414,20 +11439,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'color': 'white'}" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "G = nx.MultiDiGraph()\n", "G.add_node(0)\n", @@ -2440,20 +11454,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'color': 'white', 'age': 4}" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "G.add_node(2)\n", "nx.set_node_attributes(G, 4, name='age')\n", From b8da98e509ad572dc736228a49d0f067eed063e2 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 4 Mar 2024 17:26:11 +0000 Subject: [PATCH 173/214] updating splink function --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 5988a8e3..d7237037 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -683,7 +683,7 @@ def splink_dedupe( deduped_df = pd.merge( first_instance_df, - match_list_df[["cluster_id"]], + match_list_df[["cluster_id", "duplicated"]], on="cluster_id", how="left", ) From 0185093f0ca00189f9959399693d18127b530c0b Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 4 Mar 2024 17:26:27 +0000 Subject: [PATCH 174/214] pipeline updates --- utils/linkage_pipeline.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 9baa5204..e80bd032 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -151,44 +151,40 @@ def main(): organizations = preprocess_organizations(organizations) transactions = preprocess_transactions(transactions) - # Deduplicates perfect matches and creates a new csv file - # in output titled "deduplicated_UUIDs.csv" + individuals, organizations = classify_wrapper(individuals, organizations) + individuals = deduplicate_perfect_matches(individuals) organizations = deduplicate_perfect_matches(organizations) - cleaned_individuals_output_path = ( - BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" - ) - - cleaned_organizations_output_path = ( - BASE_FILEPATH / "output" / "cleaned_organizations_table.csv" - ) - - cleaned_transactions_output_path = ( - BASE_FILEPATH / "output" / "cleaned_transactions_table.csv" - ) - deduped = pd.read_csv(BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv") - # Splink deduplication individuals["unique_id"] = individuals["id"] organizations["unique_id"] = organizations["id"] - individuals = splink_dedupe(individuals, individuals_settings, individuals_blocking) - organizations = splink_dedupe( organizations, organizations_settings, organizations_blocking ) - # Classifies individuals and organizations with a new 'classification' - # column containing 'neutral', 'f', or 'c' - individuals, organizations = classify_wrapper(individuals, organizations) + individuals = splink_dedupe( + individuals, individuals_settings, individuals_blocking + ) - # Update the transactions table with the deduplicated UUIDs transactions[["donor_id", "recipient_id"]] = transactions[ ["donor_id", "recipient_id"] ].replace(deduped) + cleaned_individuals_output_path = ( + BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" + ) + + cleaned_organizations_output_path = ( + BASE_FILEPATH / "output" / "cleaned_organizations_table.csv" + ) + + cleaned_transactions_output_path = ( + BASE_FILEPATH / "output" / "cleaned_transactions_table.csv" + ) + individuals.to_csv(cleaned_individuals_output_path, index=False) organizations.to_csv(cleaned_organizations_output_path, index=False) transactions.to_csv(cleaned_transactions_output_path, index=False) From f05778b1171fe12f17d3d104311679b76f3a751d Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 4 Mar 2024 17:29:21 +0000 Subject: [PATCH 175/214] passing linter --- utils/linkage_pipeline.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index e80bd032..537e79d3 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -1,7 +1,7 @@ import pandas as pd -from classify import classify_wrapper from nameparser import HumanName +from utils.classify import classify_wrapper from utils.constants import ( BASE_FILEPATH, individuals_blocking, @@ -165,9 +165,7 @@ def main(): organizations, organizations_settings, organizations_blocking ) - individuals = splink_dedupe( - individuals, individuals_settings, individuals_blocking - ) + individuals = splink_dedupe(individuals, individuals_settings, individuals_blocking) transactions[["donor_id", "recipient_id"]] = transactions[ ["donor_id", "recipient_id"] From 6de450df8e8b91eb40b1803e4bbcd4f698dd9dea Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Mon, 4 Mar 2024 17:31:16 +0000 Subject: [PATCH 176/214] linter --- utils/linkage_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 537e79d3..ac911559 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -165,7 +165,9 @@ def main(): organizations, organizations_settings, organizations_blocking ) - individuals = splink_dedupe(individuals, individuals_settings, individuals_blocking) + individuals = splink_dedupe( + individuals, individuals_settings, individuals_blocking + ) transactions[["donor_id", "recipient_id"]] = transactions[ ["donor_id", "recipient_id"] From 96b8e0b33b6d7f9cd9b9ac70d5de1f1ededca7b8 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 12:21:22 -0600 Subject: [PATCH 177/214] updated network graph work --- utils/network.py | 83 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/utils/network.py b/utils/network.py index 88572aff..d6222cc8 100644 --- a/utils/network.py +++ b/utils/network.py @@ -29,24 +29,77 @@ def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: return None +def combine_datasets_for_network_graph(dfs: list[pd.DataFrame]) -> pd.DataFrame: + """Combines the 3 dataframes into a single dataframe to create the graph + + Given the inds, orgs, and transactions dataframes, the func first finds the + recipient_id in the transaction dataframe in either the org or inds + dataframes and adds the name of the recipient to the transaction df. Then, + the inds and orgs dfs are merged with the transaction df and concatenated + with the contributions amount aggregated, making a final dataframe of the + merged transactions and entity dataframes. + + Args: + list of dataframes in the order: [inds_df, orgs_df, transactions_df] + Transactions dataframe with at least column: 'recipient_id' + Individuals dataframe with at least column: 'full_name' + Organizations dataframe with at least column: 'name' + + Returns + A merged dataframe with aggregate contribution amounts between entitites + """ + + inds_df, orgs_df, transactions_df = dfs + + # first update the transactions df to have a recipient name tied to id + transactions_df["recipient_name"] = transactions_df["recipient_id"].apply( + name_identifier, args=([orgs_df, inds_df],) + ) + + # next, merge the inds_df and orgs_df with the transactions_df + inds_trans_df = pd.merge( + inds_df, transactions_df, how="left", left_on="id", right_on="donor_id" + ) + inds_trans_df = inds_trans_df.dropna(subset=["amount"]) + orgs_trans_df = pd.merge( + orgs_df, transactions_df, how="left", left_on="id", right_on="donor_id" + ) + orgs_trans_df = orgs_trans_df.dropna(subset=["amount"]) + orgs_trans_df = orgs_trans_df.rename(columns={"name": "full_name"}) + + # concatenated the merged dfs + merged_df = pd.concat([orgs_trans_df, inds_trans_df]) + + # lastly, create the final dataframe with aggregated attributes + attribute_cols = merged_df.columns.difference( + ["donor_id", "recipient_id", "full_name", "recipient_name"] + ) + agg_functions = { + col: "sum" if col == "amount" else "first" for col in attribute_cols + } + aggreg_df = ( + merged_df.groupby( + ["donor_id", "recipient_id", "full_name", "recipient_name"] + ) + .agg(agg_functions) + .reset_index() + ) + + return aggreg_df + + def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph: """Takes in a dataframe and generates a MultiDiGraph where the nodes are entity names, and the rest of the dataframe columns make the node attributes Args: - df: a pandas dataframe (complete_individuals_table / - complete_organizations_table) + df: a pandas dataframe with merged information from the inds, orgs, & + transactions dataframes Returns: A Networkx MultiDiGraph with nodes and edges """ G = nx.MultiDiGraph() - # first check if df is individuals or organizations dataset - if "name" in df.columns: - node_name = "name" - else: - node_name = "full_name" - edge_columns = [ "office_sought", "purpose", @@ -60,7 +113,7 @@ def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph: for _, row in df.iterrows(): # add node attributes based on the columns relevant to the entity G.add_node( - row[node_name], + row["full_name"], **row[df.columns.difference(edge_columns)].dropna().to_dict(), ) # add the recipient as a node @@ -68,7 +121,7 @@ def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph: # add the edge attributes between two nodes edge_attributes = row[edge_columns].dropna().to_dict() - G.add_edge(row[node_name], row["recipient_name"], **edge_attributes) + G.add_edge(row["full_name"], row["recipient_name"], **edge_attributes) return G @@ -102,11 +155,21 @@ def plot_network_graph(G: nx.MultiDiGraph): marker=dict(showscale=True, colorscale="YlGnBu", size=10), ) + node_trace["marker"]["color"] = [] for node in G.nodes(): node_info = f"Name: {node}
" for key, value in G.nodes[node].items(): node_info += f"{key}: {value}
" node_trace["text"] += tuple([node_info]) + classification = G.nodes[node].get("classification", "neutral") + # Assign a color based on the classification value + if classification == "c": + color = "blue" + elif classification == "f": + color = "red" + else: + color = "green" # Default color for unknown/neutral classification + node_trace["marker"]["color"] += tuple([color]) # Define layout settings layout = go.Layout( From 51cc9def6d793165c9022c21124e4a40e30a6c38 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 14:57:36 -0600 Subject: [PATCH 178/214] updated classify test --- utils/tests/test_classifier.py | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 utils/tests/test_classifier.py diff --git a/utils/tests/test_classifier.py b/utils/tests/test_classifier.py new file mode 100644 index 00000000..602c52ac --- /dev/null +++ b/utils/tests/test_classifier.py @@ -0,0 +1,45 @@ +import numpy as np +import pandas as pd +import pytest + +from utils.classify import matcher + +d = { + "name": [ + "bob von rosevich", + "anantarya smith", + "bob j vonrosevich", + "missy elliot", + "mr johnson", + "quarantin directino", + "missy eliot", + "joseph johnson", + ], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + "42 Hollywood Boulevard, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + ], +} + +test_df = pd.DataFrame(data=d) + +test_df["classification"] = "neutral" + + +@pytest.fixture +def matcher_scen_1(): + return test_df + + +def test_matcher_scen_1(matcher_scen_1): + res = matcher(matcher_scen_1, "Fancy", "address", "f") + + assert np.all( + res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) + ) From 4cc7ce4c8a7d8edc50a7f032a96a25b0c74db60f Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 15:03:32 -0600 Subject: [PATCH 179/214] fix pytest --- utils/classify.py | 19 ++++++------------- utils/tests/test_classifier.py | 3 ++- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 3c24f941..4061970a 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -3,7 +3,9 @@ from utils.constants import c_org_names, f_companies, f_org_names -def classify_wrapper(individuals_df, organizations_df): +def classify_wrapper( + individuals_df: pd.DataFrame, organizations_df: pd.DataFrame +): """Wrapper for classificaiton in linkage pipeline Initialize the classify column in both dataframes and @@ -25,7 +27,7 @@ def classify_wrapper(individuals_df, organizations_df): return classified_individuals, classified_orgs -def matcher(df, substring, column, category): +def matcher(df: pd.DataFrame, substring: str, column: str, category: str): """Applies a label to the classification column based on substrings We run through a given column containing strings in the dataframe. We @@ -42,7 +44,7 @@ def matcher(df, substring, column, category): return df -def classify_individuals(individuals_df): +def classify_individuals(individuals_df: pd.DataFrame): """Part of the classification pipeline We apply the matcher function to the individuals dataframe @@ -56,7 +58,7 @@ def classify_individuals(individuals_df): return individuals_df -def classify_orgs(organizations_df): +def classify_orgs(organizations_df: pd.DataFrame): """Part of the classification pipeline We apply the matcher function to the organizations dataframe @@ -73,11 +75,6 @@ def classify_orgs(organizations_df): return organizations_df -inds_list = [] - -# a list of individual names - - def similarity_calculator( df: pd.DataFrame, subject: str, n: int, comparison_func ) -> pd.DataFrame: @@ -133,7 +130,3 @@ def automated_classifier( ) return similarities_df - - # we can use the indices and/or select manually, just add a new - # column to the subjects table - # that marks fossil fuels, green energy, or neither diff --git a/utils/tests/test_classifier.py b/utils/tests/test_classifier.py index 602c52ac..b6bce883 100644 --- a/utils/tests/test_classifier.py +++ b/utils/tests/test_classifier.py @@ -38,7 +38,8 @@ def matcher_scen_1(): def test_matcher_scen_1(matcher_scen_1): - res = matcher(matcher_scen_1, "Fancy", "address", "f") + matcher(matcher_scen_1, "Fancy", "address", "f") + res = test_df[test_df["classification"] == "f"]["name"].values assert np.all( res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) From 7f3483cf4989799a50e4480d9d21594e0e66facc Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 15:09:53 -0600 Subject: [PATCH 180/214] updated classify and test_classifier --- utils/classify.py | 76 +++------------------------------- utils/tests/test_classifier.py | 46 ++++++++++++++++++++ 2 files changed, 52 insertions(+), 70 deletions(-) create mode 100644 utils/tests/test_classifier.py diff --git a/utils/classify.py b/utils/classify.py index 3c24f941..31e371ef 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -3,7 +3,9 @@ from utils.constants import c_org_names, f_companies, f_org_names -def classify_wrapper(individuals_df, organizations_df): +def classify_wrapper( + individuals_df: pd.DataFrame, organizations_df: pd.DataFrame +): """Wrapper for classificaiton in linkage pipeline Initialize the classify column in both dataframes and @@ -25,7 +27,7 @@ def classify_wrapper(individuals_df, organizations_df): return classified_individuals, classified_orgs -def matcher(df, substring, column, category): +def matcher(df: pd.DataFrame, substring: str, column: str, category: str): """Applies a label to the classification column based on substrings We run through a given column containing strings in the dataframe. We @@ -42,7 +44,7 @@ def matcher(df, substring, column, category): return df -def classify_individuals(individuals_df): +def classify_individuals(individuals_df: pd.DataFrame): """Part of the classification pipeline We apply the matcher function to the individuals dataframe @@ -56,7 +58,7 @@ def classify_individuals(individuals_df): return individuals_df -def classify_orgs(organizations_df): +def classify_orgs(organizations_df: pd.DataFrame): """Part of the classification pipeline We apply the matcher function to the organizations dataframe @@ -71,69 +73,3 @@ def classify_orgs(organizations_df): organizations_df = matcher(organizations_df, i, "name", "c") return organizations_df - - -inds_list = [] - -# a list of individual names - - -def similarity_calculator( - df: pd.DataFrame, subject: str, n: int, comparison_func -) -> pd.DataFrame: - """Find best matches to a subject name in a pandas dataframe - - For a given individual or organization, the subject, we search through the - 'name'column of a dataframe, select the n highest matches according to a - selected comparison function, and return those as a dataframe. This is meant - to be used manually to search for matches. For quick automated processing, see - automated_classifier(). - - Note that the comparison function must take in two inputs, both strings, and - output a percentage match - """ - - similarities_df = df.copy() - - similarities = similarities_df["name"].apply( - lambda x: comparison_func(x, subject) - ) - - similarities_df["similarities"] = similarities - - top_n_matches = similarities_df.sort_values( - by=["similarities"], ascending=False - )[0:n] - - return top_n_matches - - -def automated_classifier( - df: pd.DataFrame, subjects_dict: dict, threshold: float, comparison_func -): - """Using similarity_calculator, classify entities automatically - - Feeding a dictionary of names and the associated statuses, we compare - the string matches and, if they exceed a certain threshold, classify - them as belonging to some group specified in the subjects dictionary. - """ - - similarities_df = df.copy() - - for subject in subjects_dict: - similarities = similarities_df["name"].apply( - lambda x, sub=subject: comparison_func(x, sub) - ) - matches = similarities >= threshold - - status = subjects_dict[subject] - - similarities_df["classification"] = pd.Series(matches).apply( - lambda x, stat=status: stat if x else "neutral" - ) - - return similarities_df - - # we can use the indices and/or select manually, just add a new - # column to the subjects table - # that marks fossil fuels, green energy, or neither diff --git a/utils/tests/test_classifier.py b/utils/tests/test_classifier.py new file mode 100644 index 00000000..b6bce883 --- /dev/null +++ b/utils/tests/test_classifier.py @@ -0,0 +1,46 @@ +import numpy as np +import pandas as pd +import pytest + +from utils.classify import matcher + +d = { + "name": [ + "bob von rosevich", + "anantarya smith", + "bob j vonrosevich", + "missy elliot", + "mr johnson", + "quarantin directino", + "missy eliot", + "joseph johnson", + ], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + "42 Hollywood Boulevard, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + ], +} + +test_df = pd.DataFrame(data=d) + +test_df["classification"] = "neutral" + + +@pytest.fixture +def matcher_scen_1(): + return test_df + + +def test_matcher_scen_1(matcher_scen_1): + matcher(matcher_scen_1, "Fancy", "address", "f") + res = test_df[test_df["classification"] == "f"]["name"].values + + assert np.all( + res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) + ) From 94f807c1693a04039c6f7f95114da81897ce489f Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 15:12:14 -0600 Subject: [PATCH 181/214] Revert "fix pytest" This reverts commit 4cc7ce4c8a7d8edc50a7f032a96a25b0c74db60f. i accidentally put this on the wrong branch --- utils/classify.py | 19 +++++++++++++------ utils/tests/test_classifier.py | 3 +-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 4061970a..3c24f941 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -3,9 +3,7 @@ from utils.constants import c_org_names, f_companies, f_org_names -def classify_wrapper( - individuals_df: pd.DataFrame, organizations_df: pd.DataFrame -): +def classify_wrapper(individuals_df, organizations_df): """Wrapper for classificaiton in linkage pipeline Initialize the classify column in both dataframes and @@ -27,7 +25,7 @@ def classify_wrapper( return classified_individuals, classified_orgs -def matcher(df: pd.DataFrame, substring: str, column: str, category: str): +def matcher(df, substring, column, category): """Applies a label to the classification column based on substrings We run through a given column containing strings in the dataframe. We @@ -44,7 +42,7 @@ def matcher(df: pd.DataFrame, substring: str, column: str, category: str): return df -def classify_individuals(individuals_df: pd.DataFrame): +def classify_individuals(individuals_df): """Part of the classification pipeline We apply the matcher function to the individuals dataframe @@ -58,7 +56,7 @@ def classify_individuals(individuals_df: pd.DataFrame): return individuals_df -def classify_orgs(organizations_df: pd.DataFrame): +def classify_orgs(organizations_df): """Part of the classification pipeline We apply the matcher function to the organizations dataframe @@ -75,6 +73,11 @@ def classify_orgs(organizations_df: pd.DataFrame): return organizations_df +inds_list = [] + +# a list of individual names + + def similarity_calculator( df: pd.DataFrame, subject: str, n: int, comparison_func ) -> pd.DataFrame: @@ -130,3 +133,7 @@ def automated_classifier( ) return similarities_df + + # we can use the indices and/or select manually, just add a new + # column to the subjects table + # that marks fossil fuels, green energy, or neither diff --git a/utils/tests/test_classifier.py b/utils/tests/test_classifier.py index b6bce883..602c52ac 100644 --- a/utils/tests/test_classifier.py +++ b/utils/tests/test_classifier.py @@ -38,8 +38,7 @@ def matcher_scen_1(): def test_matcher_scen_1(matcher_scen_1): - matcher(matcher_scen_1, "Fancy", "address", "f") - res = test_df[test_df["classification"] == "f"]["name"].values + res = matcher(matcher_scen_1, "Fancy", "address", "f") assert np.all( res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) From d62f3b70049e55b6f26eaad2774d9bd7dca8c2e3 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 15:14:07 -0600 Subject: [PATCH 182/214] Revert "updated classify test" This reverts commit 51cc9def6d793165c9022c21124e4a40e30a6c38. accidentally on wrong branch --- utils/tests/test_classifier.py | 45 ---------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 utils/tests/test_classifier.py diff --git a/utils/tests/test_classifier.py b/utils/tests/test_classifier.py deleted file mode 100644 index 602c52ac..00000000 --- a/utils/tests/test_classifier.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -import pandas as pd -import pytest - -from utils.classify import matcher - -d = { - "name": [ - "bob von rosevich", - "anantarya smith", - "bob j vonrosevich", - "missy elliot", - "mr johnson", - "quarantin directino", - "missy eliot", - "joseph johnson", - ], - "address": [ - "3 Blue Drive, Chicago", - "4 Blue Drive, Chicago", - "8 Fancy Way, Chicago", - "8 Fancy Way, Evanston", - "17 Regular Road, Chicago", - "42 Hollywood Boulevard, Chicago", - "8 Fancy Way, Evanston", - "17 Regular Road, Chicago", - ], -} - -test_df = pd.DataFrame(data=d) - -test_df["classification"] = "neutral" - - -@pytest.fixture -def matcher_scen_1(): - return test_df - - -def test_matcher_scen_1(matcher_scen_1): - res = matcher(matcher_scen_1, "Fancy", "address", "f") - - assert np.all( - res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) - ) From 621e35ae28f38cb523ffb4b3f07c21c5a19781ab Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 15:21:56 -0600 Subject: [PATCH 183/214] expanded docstrings for classify --- utils/classify.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/utils/classify.py b/utils/classify.py index 31e371ef..915c30c3 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -11,11 +11,13 @@ def classify_wrapper( Initialize the classify column in both dataframes and call sub-functions classifying individuals and organizations - Args: individuals_df: cleaned and deduplicated dataframe of individuals - organizations_df: cleaned and deduplicated dataframe of organizations + Args: + individuals_df: cleaned and deduplicated dataframe of individuals + organizations_df: cleaned and deduplicated dataframe of organizations - Returns: individuals and organizations datfarames with a new - 'classification' column containing 'neutral', 'f', or 'c' + Returns: + individuals and organizations datfarames with a new + 'classification' column containing 'neutral', 'f', or 'c' """ individuals_df["classification"] = "neutral" @@ -35,6 +37,16 @@ def matcher(df: pd.DataFrame, substring: str, column: str, category: str): the classification column. We initialize using the 'neutral' label and use the 'f' and 'c' labels to denote fossil fuel and clean energy entities respectively. + + Args: + df: a pandas dataframe + substring: the string to search for + column: the column name in which to search + category: the category to assign the row, such as 'f' 'c' or 'neutral' + + Returns: + A pandas dataframe in which rows matching the substring conditions in + a certain column are marked with the appropriate category """ bool_series = df[column].str.contains(substring, na=False) @@ -50,6 +62,13 @@ def classify_individuals(individuals_df: pd.DataFrame): We apply the matcher function to the individuals dataframe repeatedly, using a variety of substrings to identify the employees of fossil fuel companies. + + Args: + individuals_df: a dataframe containing deduplicated + standardized individuals data + + Returns: + an individuals dataframe updated with the fossil fuels category """ for i in f_companies: @@ -64,6 +83,14 @@ def classify_orgs(organizations_df: pd.DataFrame): We apply the matcher function to the organizations dataframe repeatedly, using a variety of substrings to identify fossil fuel and clean energy companies. + + Args: + organizations_df: a dataframe containing deduplicated + standardized organizations data + + Returns: + an organizations dataframe updated with the fossil fuels + and clean energy category """ for i in f_org_names: From cdf035a93e0ebac357bf73e9d45842af7ffc6770 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 16:15:40 -0600 Subject: [PATCH 184/214] updated visualizations for the graph --- utils/network.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/utils/network.py b/utils/network.py index d6222cc8..3fa86eaa 100644 --- a/utils/network.py +++ b/utils/network.py @@ -84,7 +84,7 @@ def combine_datasets_for_network_graph(dfs: list[pd.DataFrame]) -> pd.DataFrame: .agg(agg_functions) .reset_index() ) - + aggreg_df = aggreg_df.drop(["id"], axis=1) return aggreg_df @@ -136,16 +136,34 @@ def plot_network_graph(G: nx.MultiDiGraph): Returns: None. Creates a plotly graph """ edge_trace = go.Scatter( - x=[], y=[], line=dict(color="#888"), hoverinfo="text", mode="lines" + x=(), + y=(), + line=dict(color="#888", width=1.5), + hoverinfo="text", + mode="lines+markers", ) hovertext = [] + pos = nx.spring_layout(G) for edge in G.edges(data=True): - # donor = edge[0], recipient = edge[1] + source = edge[0] + target = edge[1] hovertext.append(f"Amount: {edge[2]['amount']:.2f}") + # Adding coordinates of source and target nodes to edge_trace + edge_trace["x"] += ( + pos[source][0], + pos[target][0], + None, + ) # None creates a gap between line segments + edge_trace["y"] += (pos[source][1], pos[target][1], None) edge_trace["hovertext"] = hovertext + # Define arrow symbol for edges + edge_trace["marker"] = dict( + symbol="arrow", color="#888", size=10, angleref="previous" + ) + node_trace = go.Scatter( x=[], y=[], @@ -154,13 +172,14 @@ def plot_network_graph(G: nx.MultiDiGraph): hoverinfo="text", marker=dict(showscale=True, colorscale="YlGnBu", size=10), ) - node_trace["marker"]["color"] = [] + for node in G.nodes(): node_info = f"Name: {node}
" for key, value in G.nodes[node].items(): node_info += f"{key}: {value}
" node_trace["text"] += tuple([node_info]) + # Get the classification value for the node classification = G.nodes[node].get("classification", "neutral") # Assign a color based on the classification value if classification == "c": @@ -168,18 +187,22 @@ def plot_network_graph(G: nx.MultiDiGraph): elif classification == "f": color = "red" else: - color = "green" # Default color for unknown/neutral classification + color = "green" # Default color for unknown classification node_trace["marker"]["color"] += tuple([color]) + # Add node positions to the trace + node_trace["x"] += tuple([pos[node][0]]) + node_trace["y"] += tuple([pos[node][1]]) + # Define layout settings layout = go.Layout( title="Network Graph Indicating Campaign Contributions from 2018-2022", titlefont=dict(size=16), - showlegend=False, + showlegend=True, hovermode="closest", margin=dict(b=20, l=5, r=5, t=40), - xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), - yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), + xaxis=dict(showgrid=True, zeroline=True, showticklabels=False), + yaxis=dict(showgrid=True, zeroline=True, showticklabels=False), ) fig = go.Figure(data=[edge_trace, node_trace], layout=layout) From 74996a56dc8e04007bef8986f139a9f658bdf7d5 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 17:47:39 -0600 Subject: [PATCH 185/214] updates to the README files under the output and data directories --- data/README.md | 9 +++++++++ output/README.md | 1 + 2 files changed, 10 insertions(+) diff --git a/data/README.md b/data/README.md index 5326bff8..9c154f71 100644 --- a/data/README.md +++ b/data/README.md @@ -160,3 +160,12 @@ contribution data and READMEs in a Google Drive for the duration of this project 3. The Finance Report states that a record must be kept for any contribution over \$10.00, but “Contributions and receipts of \$50.00 or less per contributor, during the reporting period, need not be itemized on the report” … this might mean that if 1,000 people for instance donate \$50 or less, there could be potentially thousands/tens of thousands of \$ not shown on the data, even though this information is recorded. This means that the total contributions that filers itemize does not necessarily reflect the total contributions they received. 4. Transparency USA has aggregated data on the contributions of individuals and committees. This could be a helpful source to cross-check the data and potentially help alleviate the debt-contribution issue. Pennsylvania' Dept. of State also offers a detailed website that shows all the aggregated contributions made and received, expenditures made, debts, and receipts. The catch is one must know which candidate they are looking for as it's a searchable database, but it can be very helpful for cross-matching and verification. Here's the link :https://www.campaignfinanceonline.pa.gov/Pages/CFReportSearch.aspx + +## classified_data +### Summary +- The classified_data subdirectory consists of 3 files: 'classified_individuals_v1', 'classified_organizations_v1' & 'transactions_v1'. These files are derived from the record_linkage pipeline, which mainly adds a classification column to the individuals and organizations entities that reflects the entity's affiliation with the fossil-fuel industry, clean-energy industry, or neutrality. These take the form of 'f' for fossil-fuel, 'c' for clean-energy, and 'neutral' for neutrality' + +### Format +- The 'classified_individuals_v1' dataset comprises of the following columns: ['id', 'first_name', 'last_name', 'full_name', 'entity_type', 'state','party', 'company', 'occupation', 'address', 'zip', 'city','classification']. As noted, the 'classification' column is the added column. + +- The 'classified_organization_v1' dataset comprises of the following columns: ['id', 'name', 'state', 'entity_type', 'classification']. As noted, the 'classification' column is the added column diff --git a/output/README.md b/output/README.md index 932298fd..5c511d5e 100644 --- a/output/README.md +++ b/output/README.md @@ -1,2 +1,3 @@ # Output README --- +'deduplicated_UUIDs.csv' : Following record linkage work in the record_linkage pipeline, this file stores all the original uuids, and indicates the uuids to which the deduplicated uuids have been matched to. From 0cebc4ce893ba8fea4bbcd1767ef987af2aa0d6a Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 18:31:46 -0600 Subject: [PATCH 186/214] latest version of networkx work --- utils/network.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/utils/network.py b/utils/network.py index 3fa86eaa..5b95b2b4 100644 --- a/utils/network.py +++ b/utils/network.py @@ -207,3 +207,49 @@ def plot_network_graph(G: nx.MultiDiGraph): fig = go.Figure(data=[edge_trace, node_trace], layout=layout) fig.show() + + +# create pipeline + + +def construct_network_graph( + start_year: int, end_year: int, dfs: list[pd.DataFrame] +): + """Runs the network construction pipeline starting from 3 dataframes + + Args: + start_year & end_year: the range of the desired data + + Returns: + """ + inds_df, orgs_df, transactions_df = dfs + transactions_df = transactions_df.loc[ + (transactions_df.year >= start_year) + & (transactions_df.year <= end_year) + ] + + aggreg_df = combine_datasets_for_network_graph( + [inds_df, orgs_df, transactions_df] + ) + G = create_network_graph(aggreg_df) + plot_network_graph(G) + nx.write_adjlist(G, "Network Graph Node Data") + + +def main(): + """""" + text = input( + "Provide a range of desired years to extract data. Format is year1, \ + year2. Ex: 2018, 2023" + ) + + assert len(text == 2) + start_year, end_year = text.split(",") + construct_network_graph( + start_year, + end_year, + ) + + +if __name__ == "__main__": + construct_network_graph(1998, 2023) From 4625248cdffb6653a8f4401741daca8ef0f696a8 Mon Sep 17 00:00:00 2001 From: npashilkar Date: Mon, 4 Mar 2024 18:39:42 -0600 Subject: [PATCH 187/214] linkage.py clean up including additions to constants.py --- utils/constants.py | 28 +++++ utils/linkage.py | 268 +++++---------------------------------------- 2 files changed, 53 insertions(+), 243 deletions(-) diff --git a/utils/constants.py b/utils/constants.py index b4be2565..bc6d4cf6 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -727,3 +727,31 @@ "lcv victory", "league of conservation", ] + +suffixes = [ + "sr", + "jr", + "i", + "ii", + "iii", + "iv", + "v", + "vi", + "vii", + "viii", + "ix", + "x", +] + +titles = [ + "mr", + "ms", + "mrs", + "miss", + "prof", + "dr", + "doctor", + "sir", + "madam", + "professor", +] diff --git a/utils/linkage.py b/utils/linkage.py index 662d18ed..32a44dfc 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -1,15 +1,12 @@ -import math import os.path import re import numpy as np import pandas as pd -import textdistance as td import usaddress -from names_dataset import NameDataset from splink.duckdb.linker import DuckDBLinker -from utils.constants import COMPANY_TYPES, repo_root +from utils.constants import COMPANY_TYPES, repo_root, suffixes, titles """ Module for performing record linkage on state campaign finance dataset @@ -21,10 +18,14 @@ def get_address_line_1_from_full_address(address: str) -> str: Address line 1 usually includes street address or PO Box information. + Uses the usaddress libray which splits an address string into components, + and labels each component. + https://usaddress.readthedocs.io/en/latest/ + Args: address: raw string representing full address Returns: - address_line_1 + address_line_1 as a string Sample Usage: >>> get_address_line_1_from_full_address('6727 W. Corrine Dr. Peoria,AZ 85381') @@ -42,7 +43,7 @@ def get_address_line_1_from_full_address(address: str) -> str: address_tuples = usaddress.parse( address - ) # takes a string address and put them into value,key pairs as tuples + ) # takes a string address and put them into value, key pairs as tuples line1_components = [] for value, key in address_tuples: if key == "PlaceName": @@ -60,167 +61,6 @@ def get_address_line_1_from_full_address(address: str) -> str: return line1 -def calculate_string_similarity(string1: str, string2: str) -> float: - """Returns how similar two strings are on a scale of 0 to 1 - - This version utilizes Jaro-Winkler distance, which is a metric of - edit distance. Jaro-Winkler specially prioritizes the early - characters in a string. - - Since the ends of strings are often more valuable in matching names - and addresses, we reverse the strings before matching them. - - https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance - https://github.com/Yomguithereal/talisman/blob/master/src/metrics/jaro-winkler.js - - The exact meaning of the metric is open, but the following must hold true: - 1. equivalent strings must return 1 - 2. strings with no similar characters must return 0 - 3. strings with higher intuitive similarity must return higher scores - similarity score - - Args: - string1: any string - string2: any string - Returns: - similarity score - - Sample Usage: - >>> calculate_string_similarity("exact match", "exact match") - 1.0 - >>> calculate_string_similarity("aaaaaa", "bbbbbbbbbbb") - 0.0 - >>> similar_score = calculate_string_similarity("very similar", "vary similar") - >>> different_score = calculate_string_similarity("very similar", "very not close") - >>> similar_score > different_score - True - """ - - return float(td.jaro_winkler(string1.lower()[::-1], string2.lower()[::-1])) - - -def calculate_row_similarity( - row1: pd.DataFrame, row2: pd.DataFrame, weights: np.array, comparison_func -) -> float: - """Find weighted similarity of two rows in a dataframe - - The length of the weights vector must be the same as - the number of selected columns. - - This version is slow and not optimized, and will be - revised in order to make it more efficient. It - exists as to provide basic functionality. Once we have - the comparison function locked in, using .apply will - likely be easier and more efficient. - """ - - row_length = len(weights) - if not (row1.shape[1] == row2.shape[1] == row_length): - raise ValueError("Number of columns and weights must be the same") - - similarity = np.zeros(row_length) - - for i in range(row_length): - similarity[i] = comparison_func( - row1.reset_index().drop(columns="index").iloc[:, i][0], - row2.reset_index().drop(columns="index").iloc[:, i][0], - ) - - return sum(similarity * weights) - - -def row_matches( - df: pd.DataFrame, weights: np.array, threshold: float, comparison_func -) -> dict: - """Get weighted similarity score of two rows - - Run through the rows using indices: if two rows have a comparison score - greater than a threshold, we assign the later row to the former. Any - row which is matched to any other row is not examined again. Matches are - stored in a dictionary object, with each index appearing no more than once. - - This is not optimized. Not presently sure how to make a good test case - for this, will submit and ask in mentor session. - """ - - all_indices = np.array(list(df.index)) - - index_dict = {} - [index_dict.setdefault(x, []) for x in all_indices] - - discard_indices = [] - - end = max(all_indices) - for i in all_indices: - # Skip indices that have been stored in the discard_indices list - if i in discard_indices: - continue - - # Iterate through the remaining numbers - for j in range(i + 1, end): - if j in discard_indices: - continue - - # Our conditional - if ( - calculate_row_similarity( - df.iloc[[i]], df.iloc[[j]], weights, comparison_func - ) - > threshold - ): - # Store the other index and mark it for skipping in future iterations - discard_indices.append(j) - index_dict[i].append(j) - - return index_dict - - -def match_confidence( - confidences: np.array(float), weights: np.array(float), weights_toggle: bool -) -> float: - """Combine confidences for row matches into a final confidence - - This is a weighted log-odds based combination of row match confidences - originating from various record linkage methods. Weights will be applied - to the linkage methods in order and must be of the same length. - - weights_toggle allows one to turn weights on and off when calling the - function. False cancels the use of weights. - - Since log-odds have undesirable behaviors at 0 and 1, we truncate at - +-5, which corresponds to around half a percent probability or - 1 - the same. - >>> match_confidence(np.array([.6, .9, .0001]), np.array([2,5.7,8]), True) - 2.627759082143462e-12 - >>> match_confidence(np.array([.6, .9, .0001]), np.array([2,5.7,8]), False) - 0.08337802853594725 - """ - - if (min(confidences) < 0) or (max(confidences) > 1): - raise ValueError("Probabilities must be bounded on [0, 1]") - - log_odds = [] - - for c in confidences: - l_o = np.log(c / (1 - c)) - - if l_o > 5: - l_o = 5 - - elif l_o < -5: - l_o = -5 - - log_odds.append(l_o) - - if weights_toggle: - log_odds = log_odds * weights - - l_o_sum = np.sum(log_odds) - - conf_sum = math.e ** (l_o_sum) / (1 + math.e ** (l_o_sum)) - return conf_sum - - def determine_comma_role(name: str) -> str: """Given a string (someone's name), attempts to determine the role of the comma in the name and where it ought to belong. @@ -247,20 +87,7 @@ def determine_comma_role(name: str) -> str: >>> determine_comma_role("DOe, Jane") ' Jane Doe' """ - suffixes = [ - "sr", - "jr", - "i", - "ii", - "iii", - "iv", - "v", - "vi", - "vii", - "viii", - "ix", - "x", - ] + name_parts = name.lower().split(",") # if the comma is just in the end as a typo: if len(name_parts[1]) == 0: @@ -326,18 +153,6 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: # some names have titles or professions associated with the name. We need to # remove those from the name. - titles = [ - "mr", - "ms", - "mrs", - "miss", - "prof", - "dr", - "doctor", - "sir", - "madam", - "professor", - ] names = [first_name, last_name, full_name] for i in range(len(names)): @@ -363,10 +178,14 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: def get_street_from_address_line_1(address_line_1: str) -> str: """Given an address line 1, return the street name + Uses the usaddress libray which splits an address string into components, + and labels each component. + https://usaddress.readthedocs.io/en/latest/ + Args: - address_line_1: either street information or PO box + address_line_1: either street information or PO box as a string Returns: - street name + street name as a string Raises: ValueError: if string is malformed and no street can be reasonably found. @@ -405,54 +224,6 @@ def get_street_from_address_line_1(address_line_1: str) -> str: return " ".join(string) -def name_rank(first_name: str, last_name: str) -> list: - """Returns a score for the rank of a given first name and last name - https://github.com/philipperemy/name-dataset - Args: - first_name: any string - last_name: any string - Returns: - name rank for first name and last names - 1 is the most common name, only for names in the United States - First element in the list corresponds to the rank of the first name - Second element in the list corresponds to the rank of the last name - Empty or non string values will return None - Names that are not found in the dataset will return 0 - - >>> name_rank("John", "Smith") - [5, 7] - >>> name_rank("Adil", "Kassim") - [0, 7392] - >>> name_rank(None, 9) - [None, None] - """ - - # Initialize the NameDataset class - nd = NameDataset() - - first_name_rank = 0 - last_name_rank = 0 - if isinstance(first_name, str): - first_name_result = nd.search(first_name) - if first_name_result and isinstance(first_name_result, dict): - first_name_data = first_name_result.get("first_name") - if first_name_data and "rank" in first_name_data: - first_name_rank = first_name_data["rank"].get( - "United States", 0 - ) - else: - first_name_rank = None - if isinstance(last_name, str): - last_name_result = nd.search(last_name) - if last_name_result and isinstance(last_name_result, dict): - last_name_data = last_name_result.get("last_name") - if last_name_data and "rank" in last_name_data: - last_name_rank = last_name_data["rank"].get("United States", 0) - else: - last_name_rank = None - return [first_name_rank, last_name_rank] - - def convert_duplicates_to_dict(df: pd.DataFrame) -> None: """For each uuid, maps it to all other uuids for which it has been deemed a match. @@ -537,6 +308,7 @@ def cleaning_company_column(company_entry: str) -> str: standardized for retired, self employed, and unemployed, or original string if no match or empty string + Sample Usage: >>> cleaning_company_column("Retireed") 'Retired' >>> cleaning_company_column("self") @@ -592,6 +364,7 @@ def standardize_corp_names(company_name: str) -> str: Returns: standardized company name + Sample Usage: >>> standardize_corp_names('MI BEER WINE WHOLESALERS ASSOC') 'MI BEER WINE WHOLESALERS ASSOCIATION' @@ -617,6 +390,10 @@ def standardize_corp_names(company_name: str) -> str: def get_address_number_from_address_line_1(address_line_1: str) -> str: """Given an address line 1, return the building number or po box + Uses the usaddress libray which splits an address string into components, + and labels each component. + https://usaddress.readthedocs.io/en/latest/ + Args: address_line_1: either street information or PO box Returns: @@ -655,6 +432,11 @@ def splink_dedupe( individuals_settings, indivduals_blocking, organizations_settings, organizations_blocking + Uses the splink library which employs probabilistic matching for + record linkage + https://moj-analytical-services.github.io/splink/index.html + + Args: df: dataframe settings: configuration settings From 133dadcd22cb6dd28e6e968396574b8502f5bbc6 Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 19:55:14 -0600 Subject: [PATCH 188/214] addressing comments on classify and test_classify --- utils/classify.py | 15 ++++++++---- utils/tests/test_classify.py | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 utils/tests/test_classify.py diff --git a/utils/classify.py b/utils/classify.py index 915c30c3..5475894b 100644 --- a/utils/classify.py +++ b/utils/classify.py @@ -6,7 +6,7 @@ def classify_wrapper( individuals_df: pd.DataFrame, organizations_df: pd.DataFrame ): - """Wrapper for classificaiton in linkage pipeline + """Wrapper for classification in linkage pipeline Initialize the classify column in both dataframes and call sub-functions classifying individuals and organizations @@ -17,7 +17,13 @@ def classify_wrapper( Returns: individuals and organizations datfarames with a new - 'classification' column containing 'neutral', 'f', or 'c' + 'classification' column containing 'neutral', 'f', or 'c'. + 'neutral' status is the default for all entities, and those tagged + as 'neutral' are entities which we could not confidently identify as + either fossil fuel or clean energy organizations or affiliates. + Classification is very conservative, and we are very confident that + entities classified as one group or another are related to them. + """ individuals_df["classification"] = "neutral" @@ -59,9 +65,8 @@ def matcher(df: pd.DataFrame, substring: str, column: str, category: str): def classify_individuals(individuals_df: pd.DataFrame): """Part of the classification pipeline - We apply the matcher function to the individuals dataframe - repeatedly, using a variety of substrings to identify the - employees of fossil fuel companies. + We check if individuals work for a known fossil fuel company + and categorize them using the matcher() function. Args: individuals_df: a dataframe containing deduplicated diff --git a/utils/tests/test_classify.py b/utils/tests/test_classify.py new file mode 100644 index 00000000..b6bce883 --- /dev/null +++ b/utils/tests/test_classify.py @@ -0,0 +1,46 @@ +import numpy as np +import pandas as pd +import pytest + +from utils.classify import matcher + +d = { + "name": [ + "bob von rosevich", + "anantarya smith", + "bob j vonrosevich", + "missy elliot", + "mr johnson", + "quarantin directino", + "missy eliot", + "joseph johnson", + ], + "address": [ + "3 Blue Drive, Chicago", + "4 Blue Drive, Chicago", + "8 Fancy Way, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + "42 Hollywood Boulevard, Chicago", + "8 Fancy Way, Evanston", + "17 Regular Road, Chicago", + ], +} + +test_df = pd.DataFrame(data=d) + +test_df["classification"] = "neutral" + + +@pytest.fixture +def matcher_scen_1(): + return test_df + + +def test_matcher_scen_1(matcher_scen_1): + matcher(matcher_scen_1, "Fancy", "address", "f") + res = test_df[test_df["classification"] == "f"]["name"].values + + assert np.all( + res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) + ) From feda102730544ce1c47c231ff0806d24abb20b3c Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Mon, 4 Mar 2024 20:19:14 -0600 Subject: [PATCH 189/214] Delete utils/tests/test_classifier.py --- utils/tests/test_classifier.py | 46 ---------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 utils/tests/test_classifier.py diff --git a/utils/tests/test_classifier.py b/utils/tests/test_classifier.py deleted file mode 100644 index b6bce883..00000000 --- a/utils/tests/test_classifier.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import pandas as pd -import pytest - -from utils.classify import matcher - -d = { - "name": [ - "bob von rosevich", - "anantarya smith", - "bob j vonrosevich", - "missy elliot", - "mr johnson", - "quarantin directino", - "missy eliot", - "joseph johnson", - ], - "address": [ - "3 Blue Drive, Chicago", - "4 Blue Drive, Chicago", - "8 Fancy Way, Chicago", - "8 Fancy Way, Evanston", - "17 Regular Road, Chicago", - "42 Hollywood Boulevard, Chicago", - "8 Fancy Way, Evanston", - "17 Regular Road, Chicago", - ], -} - -test_df = pd.DataFrame(data=d) - -test_df["classification"] = "neutral" - - -@pytest.fixture -def matcher_scen_1(): - return test_df - - -def test_matcher_scen_1(matcher_scen_1): - matcher(matcher_scen_1, "Fancy", "address", "f") - res = test_df[test_df["classification"] == "f"]["name"].values - - assert np.all( - res == np.array(["bob j vonrosevich", "missy elliot", "missy eliot"]) - ) From 9c5ff3cfebaef805ee700263579c9c54ec3280b2 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 20:52:51 -0600 Subject: [PATCH 190/214] making revisions to data/README and network.py per Avery's feedback --- data/README.md | 9 --------- utils/network.py | 28 ++++++++++------------------ 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/data/README.md b/data/README.md index 9c154f71..5326bff8 100644 --- a/data/README.md +++ b/data/README.md @@ -160,12 +160,3 @@ contribution data and READMEs in a Google Drive for the duration of this project 3. The Finance Report states that a record must be kept for any contribution over \$10.00, but “Contributions and receipts of \$50.00 or less per contributor, during the reporting period, need not be itemized on the report” … this might mean that if 1,000 people for instance donate \$50 or less, there could be potentially thousands/tens of thousands of \$ not shown on the data, even though this information is recorded. This means that the total contributions that filers itemize does not necessarily reflect the total contributions they received. 4. Transparency USA has aggregated data on the contributions of individuals and committees. This could be a helpful source to cross-check the data and potentially help alleviate the debt-contribution issue. Pennsylvania' Dept. of State also offers a detailed website that shows all the aggregated contributions made and received, expenditures made, debts, and receipts. The catch is one must know which candidate they are looking for as it's a searchable database, but it can be very helpful for cross-matching and verification. Here's the link :https://www.campaignfinanceonline.pa.gov/Pages/CFReportSearch.aspx - -## classified_data -### Summary -- The classified_data subdirectory consists of 3 files: 'classified_individuals_v1', 'classified_organizations_v1' & 'transactions_v1'. These files are derived from the record_linkage pipeline, which mainly adds a classification column to the individuals and organizations entities that reflects the entity's affiliation with the fossil-fuel industry, clean-energy industry, or neutrality. These take the form of 'f' for fossil-fuel, 'c' for clean-energy, and 'neutral' for neutrality' - -### Format -- The 'classified_individuals_v1' dataset comprises of the following columns: ['id', 'first_name', 'last_name', 'full_name', 'entity_type', 'state','party', 'company', 'occupation', 'address', 'zip', 'city','classification']. As noted, the 'classification' column is the added column. - -- The 'classified_organization_v1' dataset comprises of the following columns: ['id', 'name', 'state', 'entity_type', 'classification']. As noted, the 'classification' column is the added column diff --git a/utils/network.py b/utils/network.py index 5b95b2b4..0dcc5a87 100644 --- a/utils/network.py +++ b/utils/network.py @@ -14,13 +14,10 @@ def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: The entity's name """ for df in dfs: - # first, check orgs df: if "name" in df.columns: name_in_org = df.loc[df["id"] == uuid] if len(name_in_org) > 0: return name_in_org.iloc[0]["name"] - # theoretically it must be in inds if not in orgs, but for the sample - # data this might not be the case if "full_name" in df.columns: name_in_ind = df.loc[df["id"] == uuid] @@ -30,20 +27,18 @@ def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: def combine_datasets_for_network_graph(dfs: list[pd.DataFrame]) -> pd.DataFrame: - """Combines the 3 dataframes into a single dataframe to create the graph + """Combines the 3 dataframes into a single dataframe to create a graph - Given the inds, orgs, and transactions dataframes, the func first finds the - recipient_id in the transaction dataframe in either the org or inds - dataframes and adds the name of the recipient to the transaction df. Then, - the inds and orgs dfs are merged with the transaction df and concatenated - with the contributions amount aggregated, making a final dataframe of the - merged transactions and entity dataframes. + Given 3 dataframes, the func adds a 'recipient_name' column in the + transactions df, merges the dfs together to record transaction info between + entities, then concatenates the dfs into a final df of the merged + transactions and entity dfs. Args: list of dataframes in the order: [inds_df, orgs_df, transactions_df] - Transactions dataframe with at least column: 'recipient_id' - Individuals dataframe with at least column: 'full_name' - Organizations dataframe with at least column: 'name' + Transactions dataframe with column: 'recipient_id' + Individuals dataframe with column: 'full_name' + Organizations dataframe with column: 'name' Returns A merged dataframe with aggregate contribution amounts between entitites @@ -56,7 +51,7 @@ def combine_datasets_for_network_graph(dfs: list[pd.DataFrame]) -> pd.DataFrame: name_identifier, args=([orgs_df, inds_df],) ) - # next, merge the inds_df and orgs_df with the transactions_df + # next, merge the inds_df and orgs_df ids with the transactions_df donor_id inds_trans_df = pd.merge( inds_df, transactions_df, how="left", left_on="id", right_on="donor_id" ) @@ -208,10 +203,6 @@ def plot_network_graph(G: nx.MultiDiGraph): fig = go.Figure(data=[edge_trace, node_trace], layout=layout) fig.show() - -# create pipeline - - def construct_network_graph( start_year: int, end_year: int, dfs: list[pd.DataFrame] ): @@ -219,6 +210,7 @@ def construct_network_graph( Args: start_year & end_year: the range of the desired data + dfs: dataframes in the order: inds_df, orgs_df, transactions_df Returns: """ From 18a52ff3acbb8f2819de6cb6c9eef7cc69fe35c9 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Mon, 4 Mar 2024 20:54:30 -0600 Subject: [PATCH 191/214] making revisions to data/README and network.py per Avery's feedback --- utils/network.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/network.py b/utils/network.py index 0dcc5a87..c3910034 100644 --- a/utils/network.py +++ b/utils/network.py @@ -203,6 +203,7 @@ def plot_network_graph(G: nx.MultiDiGraph): fig = go.Figure(data=[edge_trace, node_trace], layout=layout) fig.show() + def construct_network_graph( start_year: int, end_year: int, dfs: list[pd.DataFrame] ): From 743b30618689ec698101052c47c705e3476e85bc Mon Sep 17 00:00:00 2001 From: Nicolas Posner Date: Mon, 4 Mar 2024 20:59:25 -0600 Subject: [PATCH 192/214] updating readme and makefile as well as location of data for linkage_pipeline --- Makefile | 7 +++---- README.md | 9 +++++---- data/README.md | 6 ++++++ output/README.md | 4 ++++ utils/linkage_pipeline.py | 6 +++--- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 36577581..3de9758d 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,7 @@ run-notebooks: --no-browser --allow-root -#running the linkage pipeline and creating the network graph -#still waiting on linkage_pipeline completion to get this into final shape +output_network_graph: + python linkage_pipeline.py -output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv - python linkage_pipeline.py \ No newline at end of file +.PHONY: output_network_graph \ No newline at end of file diff --git a/README.md b/README.md index 879a41e0..4be8c9ba 100644 --- a/README.md +++ b/README.md @@ -44,9 +44,9 @@ If you prefer to develop inside a container with VS Code then do the following s 6. For future reference, the above pipeline also stores the information mapping given id to our database id (generated via uuid) in a csv file in the format of (state)IDMap.csv (example: ArizonaIDMap.csv) in the output folder ### Record Linkage and Network Pipeline -1. Save the standardized tables "complete_individuals_table.csv", "complete_organizations_table.csv", and "complete_transactions_table.csv" (collected from the above pipeline or data from the project's Google Drive) in the following format: repo_root / "output" / "file" -2. **UPDATE:** Run the pipeline by calling ```make run-linkage-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, and an interactive network visual -3. The pipeline will output the deduplicated tables saved as "cleaned_individuals_table.csv", "cleaned_organizations_table.csv", and "cleaned_transactions_table.csv". A mapping file, "deduplicated_UUIDs" tracks the UUIDs designated as duplicates. +1. Download the complete_data.zip file from the Climate Cabinet Data Clinic Google Drive using this link: https://drive.google.com/file/d/1zbjt7iBU0NAWSBcUyEsjvuumn3VgI4z9/view?usp=sharing. After downloading this .zip, unzip it to find three files: complete_individuals.csv, complete_organizations.csv, and complete_transactions.csv. Upload these files into the data folder and ensure that their names are correct. They must follow this format: repo_root / "output" / "file" +2. Run the pipeline by calling ```make run-linkage-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, and create a NetworkX Graph object. +3. The pipeline will output a NetworkX Graph object and a txt file containing graph metrics into the output folder. ## Repository Structure @@ -65,7 +65,8 @@ If the data is larger than 50MB than you should not add it to the repo and inste This [README.md file](/data/README.md) should be kept up to date. ### output -Should contain work product generated by the analysis. Keep in mind that results should (generally) be excluded from the git repository. +This folder is empty by default. The final outputs of the Makefile will be placed here, consisting of a NetworkX Graph object and a txt file containing graph metrics. + ## Team Member diff --git a/data/README.md b/data/README.md index 5326bff8..df9336b7 100644 --- a/data/README.md +++ b/data/README.md @@ -2,6 +2,12 @@ This directory contains information for use in this project. +## Makefile and Final Pipeline +- This folder is empty by default. In order to run the Makefile, download the complete_data.zip file from the Climate Cabinet Data Clinic Google Drive using this link: https://drive.google.com/file/d/1zbjt7iBU0NAWSBcUyEsjvuumn3VgI4z9/view?usp=sharing + + - After downloading this .zip, unzip it to find three files: complete_individuals.csv, complete_organizations.csv, and complete_transactions.csv. Upload these files into the data folder and ensure that their names are correct. Once they are in place, you may run the Makefile. + + ## Arizona Campaign Finance Data ### Summary diff --git a/output/README.md b/output/README.md index 932298fd..06e91212 100644 --- a/output/README.md +++ b/output/README.md @@ -1,2 +1,6 @@ # Output README --- + +## Makefile and Final Pipeline + +- This folder is empty by default. The output of the Makefile process will be output into this folder, consisting of a NetworkX Graph object and a txt file containing graph metrics. diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index ac911559..499726e9 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -136,15 +136,15 @@ def preprocess_transactions(transactions: pd.DataFrame) -> pd.DataFrame: def main(): organizations = pd.read_csv( - BASE_FILEPATH / "output" / "complete_organizations_table.csv" + BASE_FILEPATH / "data" / "complete_organizations_table.csv" ) individuals = pd.read_csv( - BASE_FILEPATH / "output" / "complete_individuals_table.csv" + BASE_FILEPATH / "data" / "complete_individuals_table.csv" ) transactions = pd.read_csv( - BASE_FILEPATH / "output" / "complete_transactions_table.csv" + BASE_FILEPATH / "data" / "complete_transactions_table.csv" ) individuals = preprocess_individuals(individuals) From 793b8afe50a8708b5510fd159f85e425f2781c0e Mon Sep 17 00:00:00 2001 From: npashilkar Date: Tue, 5 Mar 2024 00:07:18 -0600 Subject: [PATCH 193/214] removing unneccessary tests --- utils/tests/test_linkage.py | 104 +----------------------------------- 1 file changed, 1 insertion(+), 103 deletions(-) diff --git a/utils/tests/test_linkage.py b/utils/tests/test_linkage.py index 4a5f73f2..d96339d6 100644 --- a/utils/tests/test_linkage.py +++ b/utils/tests/test_linkage.py @@ -1,115 +1,13 @@ -import numpy as np import pandas as pd import pytest from utils.constants import BASE_FILEPATH -from utils.linkage import ( - calculate_row_similarity, - calculate_string_similarity, - deduplicate_perfect_matches, - row_matches, -) +from utils.linkage import deduplicate_perfect_matches """ Module for testing functions in linkage.py """ -# Creating a test for calculate_row_similarity and row_matches - -# to put in data: -d = { - "name": ["bob von rosevich", "anantarya smith", "bob j vonrosevich"], - "address": [ - "3 Blue Drive, Chicago", - "4 Blue Drive, Chicago", - "8 Fancy Way, Chicago", - ], -} - -test_df = pd.DataFrame(data=d) - - -@pytest.fixture -def row_similarity_scen_1(): - return test_df - - -@pytest.fixture -def row_similarity_scen_2(): - return test_df - - -def test_row_similarity_scen_1(row_similarity_scen_1): - wrong = calculate_row_similarity( - row_similarity_scen_1.iloc[[0]], - row_similarity_scen_1.iloc[[1]], - np.array([0.8, 0.2]), - calculate_string_similarity, - ) - right = calculate_row_similarity( - row_similarity_scen_1.iloc[[0]], - row_similarity_scen_1.iloc[[2]], - np.array([0.8, 0.2]), - calculate_string_similarity, - ) - - assert right > wrong - - -def test_row_similarity_scen_2(row_similarity_scen_2): - wrong = calculate_row_similarity( - row_similarity_scen_2.iloc[[0]], - row_similarity_scen_2.iloc[[1]], - np.array([0.2, 0.8]), - calculate_string_similarity, - ) - right = calculate_row_similarity( - row_similarity_scen_2.iloc[[0]], - row_similarity_scen_2.iloc[[2]], - np.array([0.2, 0.8]), - calculate_string_similarity, - ) - - assert right < wrong - - -d2 = { - "name": [ - "bob von rosevich", - "anantarya smith", - "bob j vonrosevich", - "missy elliot", - "mr johnson", - "quarantin directino", - "missy eliot", - "joseph johnson", - ], - "address": [ - "3 Blue Drive, Chicago", - "4 Blue Drive, Chicago", - "8 Fancy Way, Chicago", - "8 Fancy Way, Evanston", - "17 Regular Road, Chicago", - "42 Hollywood Boulevard, Chicago", - "8 Fancy Way, Evanston", - "17 Regular Road, Chicago", - ], -} -test_df2 = pd.DataFrame(data=d2) - - -@pytest.fixture -def row_match_scen1(): - return test_df2 - - -def test_row_matches(row_match_scen1): - res = row_matches( - row_match_scen1, np.array([0.8, 0.2]), 0.9, calculate_string_similarity - ) - - assert res == {0: [2], 1: [], 2: [], 3: [6], 4: [], 5: [], 6: [], 7: []} - # Test for dedupe function @pytest.fixture From a571d91cb5238089aca9fde1f27878828cc7a08a Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Tue, 5 Mar 2024 06:21:34 +0000 Subject: [PATCH 194/214] slight update to splink_dedupe function --- utils/linkage.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index d7237037..484c1060 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -341,9 +341,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: names[i] = determine_comma_role(names[i]) names[i] = names[i].replace(".", "").split(" ") - names[i] = [ - name_part for name_part in names[i] if name_part not in titles - ] + names[i] = [name_part for name_part in names[i] if name_part not in titles] names[i] = " ".join(names[i]) # one last check to remove any pieces that might add extra whitespace @@ -432,9 +430,7 @@ def name_rank(first_name: str, last_name: str) -> list: if first_name_result and isinstance(first_name_result, dict): first_name_data = first_name_result.get("first_name") if first_name_data and "rank" in first_name_data: - first_name_rank = first_name_data["rank"].get( - "United States", 0 - ) + first_name_rank = first_name_data["rank"].get("United States", 0) else: first_name_rank = None if isinstance(last_name, str): @@ -636,9 +632,7 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: raise ValueError("Can not find Address Number") -def splink_dedupe( - df: pd.DataFrame, settings: dict, blocking: list -) -> pd.DataFrame: +def splink_dedupe(df: pd.DataFrame, settings: dict, blocking: list) -> pd.DataFrame: """Given a dataframe and config settings, return a deduplicated dataframe @@ -689,6 +683,9 @@ def splink_dedupe( ) deduped_df.rename(columns={"cluster_id": "unique_id"}, inplace=True) + deduped_df["duplicated"] = deduped_df["duplicated"].apply( + lambda x: x if isinstance(x, list) else [x] + ) convert_duplicates_to_dict(deduped_df) deduped_df.drop(columns=["duplicated"]) From 1db28399005bb2c5ee38e9b4bfd3c6f2d3fb77c2 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Tue, 5 Mar 2024 06:22:21 +0000 Subject: [PATCH 195/214] pre-commit fixes --- utils/linkage.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 484c1060..43febf41 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -341,7 +341,9 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: names[i] = determine_comma_role(names[i]) names[i] = names[i].replace(".", "").split(" ") - names[i] = [name_part for name_part in names[i] if name_part not in titles] + names[i] = [ + name_part for name_part in names[i] if name_part not in titles + ] names[i] = " ".join(names[i]) # one last check to remove any pieces that might add extra whitespace @@ -430,7 +432,9 @@ def name_rank(first_name: str, last_name: str) -> list: if first_name_result and isinstance(first_name_result, dict): first_name_data = first_name_result.get("first_name") if first_name_data and "rank" in first_name_data: - first_name_rank = first_name_data["rank"].get("United States", 0) + first_name_rank = first_name_data["rank"].get( + "United States", 0 + ) else: first_name_rank = None if isinstance(last_name, str): @@ -632,7 +636,9 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: raise ValueError("Can not find Address Number") -def splink_dedupe(df: pd.DataFrame, settings: dict, blocking: list) -> pd.DataFrame: +def splink_dedupe( + df: pd.DataFrame, settings: dict, blocking: list +) -> pd.DataFrame: """Given a dataframe and config settings, return a deduplicated dataframe From 083f92f84554b5d60cbc5c7e38e4a8a1562f288a Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 01:23:19 -0600 Subject: [PATCH 196/214] last minute modifications to network file. final version --- utils/network.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/utils/network.py b/utils/network.py index c3910034..5f1ada0a 100644 --- a/utils/network.py +++ b/utils/network.py @@ -2,6 +2,16 @@ import pandas as pd import plotly.graph_objects as go +from utils.constants import BASE_FILEPATH + +inds_path = BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" +orgs_path = BASE_FILEPATH / "output" / "cleaned_organizations_table.csv" +transactions_path = BASE_FILEPATH / "output" / "cleaned_transactions_table" + +inds_df = pd.read_csv(inds_path, low_memory=False) +orgs_df = pd.read_csv(orgs_path, low_memory=False) +transactions_df = pd.read_csv(transactions_path, low_memory=False) + def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: """Returns the name of the entity given the entity's uuid @@ -112,7 +122,7 @@ def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph: **row[df.columns.difference(edge_columns)].dropna().to_dict(), ) # add the recipient as a node - G.nodes[row["recipient_name"]]["classification"] = "neutral" + G.add_node(row["recipient_name"], classification = "neutral") # add the edge attributes between two nodes edge_attributes = row[edge_columns].dropna().to_dict() @@ -236,11 +246,14 @@ def main(): year2. Ex: 2018, 2023" ) - assert len(text == 2) + assert len(text == 2), ( + "Wrong input for range of years. Format should be" + + " year1, year2. Ex: 1998,2023" + ) + start_year, end_year = text.split(",") construct_network_graph( - start_year, - end_year, + start_year, end_year, [inds_df, orgs_df, transactions_path] ) From 09aca55f9d39d4b7707d00b53dd55bb083444a4c Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 11:34:34 -0600 Subject: [PATCH 197/214] removing main() from file --- utils/network.py | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/utils/network.py b/utils/network.py index 5f1ada0a..39785122 100644 --- a/utils/network.py +++ b/utils/network.py @@ -122,7 +122,7 @@ def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph: **row[df.columns.difference(edge_columns)].dropna().to_dict(), ) # add the recipient as a node - G.add_node(row["recipient_name"], classification = "neutral") + G.add_node(row["recipient_name"], classification="neutral") # add the edge attributes between two nodes edge_attributes = row[edge_columns].dropna().to_dict() @@ -237,25 +237,3 @@ def construct_network_graph( G = create_network_graph(aggreg_df) plot_network_graph(G) nx.write_adjlist(G, "Network Graph Node Data") - - -def main(): - """""" - text = input( - "Provide a range of desired years to extract data. Format is year1, \ - year2. Ex: 2018, 2023" - ) - - assert len(text == 2), ( - "Wrong input for range of years. Format should be" - + " year1, year2. Ex: 1998,2023" - ) - - start_year, end_year = text.split(",") - construct_network_graph( - start_year, end_year, [inds_df, orgs_df, transactions_path] - ) - - -if __name__ == "__main__": - construct_network_graph(1998, 2023) From 269998cf46a29cf362c8ae13f658542e308163e6 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 11:36:44 -0600 Subject: [PATCH 198/214] removing main() from file --- utils/network.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/utils/network.py b/utils/network.py index 39785122..90f12a69 100644 --- a/utils/network.py +++ b/utils/network.py @@ -2,16 +2,6 @@ import pandas as pd import plotly.graph_objects as go -from utils.constants import BASE_FILEPATH - -inds_path = BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" -orgs_path = BASE_FILEPATH / "output" / "cleaned_organizations_table.csv" -transactions_path = BASE_FILEPATH / "output" / "cleaned_transactions_table" - -inds_df = pd.read_csv(inds_path, low_memory=False) -orgs_df = pd.read_csv(orgs_path, low_memory=False) -transactions_df = pd.read_csv(transactions_path, low_memory=False) - def name_identifier(uuid: str, dfs: list[pd.DataFrame]) -> str: """Returns the name of the entity given the entity's uuid From d6167df68bd2e86a1f267807f044b6656ed88707 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 12:03:07 -0600 Subject: [PATCH 199/214] updated README.md to show networkX portion of the pipeline --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 879a41e0..0a5fe1a0 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ If you prefer to develop inside a container with VS Code then do the following s ### Record Linkage and Network Pipeline 1. Save the standardized tables "complete_individuals_table.csv", "complete_organizations_table.csv", and "complete_transactions_table.csv" (collected from the above pipeline or data from the project's Google Drive) in the following format: repo_root / "output" / "file" -2. **UPDATE:** Run the pipeline by calling ```make run-linkage-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, and an interactive network visual -3. The pipeline will output the deduplicated tables saved as "cleaned_individuals_table.csv", "cleaned_organizations_table.csv", and "cleaned_transactions_table.csv". A mapping file, "deduplicated_UUIDs" tracks the UUIDs designated as duplicates. +2. **UPDATE:** Run the pipeline by calling ```make run-linkage-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, convert the standardized tables into a NetworkX Graph, and show an interactive network visual. +3. The pipeline will output the deduplicated tables saved as "cleaned_individuals_table.csv", "cleaned_organizations_table.csv", and "cleaned_transactions_table.csv". A mapping file, "deduplicated_UUIDs" tracks the UUIDs designated as duplicates. The pipeline will also output "Network Graph Node Data", which is the NetworkX Graph object converted into an adjecency list. ## Repository Structure From 8d629397a260f9650d9b2cc23128bb7a53dd7ac3 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 12:06:25 -0600 Subject: [PATCH 200/214] saving Test.ipynb work --- notebooks/Test.ipynb | 11490 +++++++---------------------------------- 1 file changed, 1935 insertions(+), 9555 deletions(-) diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb index b9ac1762..d242967e 100644 --- a/notebooks/Test.ipynb +++ b/notebooks/Test.ipynb @@ -10,8 +10,7 @@ "import numpy as np\n", "import networkx as nx\n", "import matplotlib.pyplot as plt\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go\n" + "import plotly.graph_objects as go" ] }, { @@ -60,25 +59,17 @@ " \n", " \n", " \n", - " 63128\n", - " 422065cd-0262-4ac9-a2a4-74136ddb99e2\n", - " floyd workman\n", + " 35663\n", + " 7d7d4521-b765-4501-b2da-4220578a5ce2\n", + " lucas gutgsell\n", " MI\n", " corporation\n", " neutral\n", " \n", " \n", - " 98258\n", - " dfd160b5-9389-44ef-a632-c08dc1a1d201\n", - " front 43\n", - " MI\n", - " corporation\n", - " neutral\n", - " \n", - " \n", - " 1712\n", - " 858415ce-d53f-4843-aee0-85560117bdc6\n", - " arizona federation of democratic women\n", + " 6274\n", + " 155e6b64-cada-4046-8d7e-799e1d520e98\n", + " manna bbq\n", " NaN\n", " vendor\n", " neutral\n", @@ -89,19 +80,12 @@ ], "text/plain": [ " id \\\n", - "63128 422065cd-0262-4ac9-a2a4-74136ddb99e2 \n", - "98258 dfd160b5-9389-44ef-a632-c08dc1a1d201 \n", - "1712 858415ce-d53f-4843-aee0-85560117bdc6 \n", - "\n", - " name state entity_type \\\n", - "63128 floyd workman MI corporation \n", - "98258 front 43 MI corporation \n", - "1712 arizona federation of democratic women NaN vendor \n", + "35663 7d7d4521-b765-4501-b2da-4220578a5ce2 \n", + "6274 155e6b64-cada-4046-8d7e-799e1d520e98 \n", "\n", - " classification \n", - "63128 neutral \n", - "98258 neutral \n", - "1712 neutral " + " name state entity_type classification \n", + "35663 lucas gutgsell MI corporation neutral \n", + "6274 manna bbq NaN vendor neutral " ] }, "execution_count": 3, @@ -110,7 +94,7 @@ } ], "source": [ - "orgs_df.head(3)" + "orgs_df.head(2)" ] }, { @@ -121,7 +105,7 @@ { "data": { "text/plain": [ - "array(['neutral'], dtype=object)" + "array(['neutral', 'f'], dtype=object)" ] }, "execution_count": 4, @@ -137,59 +121,6 @@ "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idnamestateentity_typeclassification
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [id, name, state, entity_type, classification]\n", - "Index: []" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "orgs_df.loc[orgs_df.classification == 'f']" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, "outputs": [ { "data": { @@ -276,7 +207,7 @@ "1 NaN NaN NaN " ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -287,36 +218,39 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['neutral', 'f'], dtype=object)" + "Index(['id', 'first_name', 'last_name', 'full_name', 'entity_type', 'state',\n", + " 'party', 'company', 'occupation', 'address', 'zip', 'city',\n", + " 'classification'],\n", + " dtype='object')" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inds_df.classification.unique()" + "inds_df.columns" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(9926, 9919, 10000, 10000)" + "(9929, 9924, 10000, 10000)" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -350,31 +284,17 @@ { "data": { "text/plain": [ - "['242d019c-e0ab-405e-8e77-abae7418b87f',\n", - " '8b2ad550-64a1-4975-8b77-5eb1f24a8871',\n", - " 'aee69307-194f-4c40-af3d-a55a34e1068e',\n", - " '55e5e946-6261-4f19-9752-fb58219b2e99',\n", - " '4faf251a-73d9-46ef-9e17-d3cf0a3052ae',\n", - " '3b5c0a9e-c6f2-44e9-ad05-fde071447564',\n", - " '3936bdf5-9a7a-462c-9e8c-9124f2bd7f57',\n", - " '13882059-3c74-4d9e-825d-a03a72b43b08',\n", - " '50c78f1a-3e9b-4996-a319-eef4fe01ccfb',\n", - " 'ae96f38f-68c8-47e3-95b3-c6f096d3c22e',\n", - " '74ba8a8a-7256-4eb3-b0f8-995f7a6319fb',\n", - " '12823a76-78e2-4b09-b606-859efaa5c8ef',\n", - " '9de9bf03-8c4a-4d2f-9a95-283b230ddfad',\n", - " '588593b9-9bba-4597-94d9-1b3a7fd5b402',\n", - " '5277b642-6bf0-4423-9350-3602ae51c6ac',\n", - " 'd98985b4-f55d-4ada-b279-0497e3176512',\n", - " 'c8586d36-f188-4684-aa99-193407d4d068',\n", - " '3798fda1-83cd-4e48-974a-e1a390060198',\n", - " 'a536b509-f052-4984-a35d-10397308daec',\n", - " '80996477-ce99-4f34-b5fc-bab4d676fc77',\n", - " 'cd1a740c-b1d7-4334-b335-925bd5708753',\n", - " '46af8908-f4e4-4041-9d1e-5b442d051921',\n", - " '2969075a-86d2-4b04-a991-a81832e096a0',\n", - " 'd0337f72-b701-4524-891b-c48ef6f771ec',\n", - " '591aa72b-511b-4dbb-a161-80458f257471']" + "['60387f5b-f134-4b11-a703-a8e49f5ddde2',\n", + " '3e212d00-396f-4e1c-9842-27a96083026c',\n", + " 'd3b9ee8e-e2e1-4946-b97b-173d6f79d578',\n", + " '8801a95f-4395-4702-98a5-d99229ffeb9b',\n", + " '93430d77-badf-4fb1-ad8d-09c551d8c4c2',\n", + " '2880f6d2-7ae1-4370-bb5d-7cacbbd0549f',\n", + " 'ad9ce7af-f69e-4e06-8a7e-11b48e95be16',\n", + " 'bf66ba56-d479-4ec3-bad7-c2279c9cd216',\n", + " 'f251ce50-480d-4449-99c3-f3ac6c37696b',\n", + " '61ee467f-437f-44cf-beea-3b3f05664484',\n", + " '4f65eea2-9923-4158-8354-4b81d3f4f549']" ] }, "execution_count": 9, @@ -392,9 +312,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamestateentity_typedonationsdonations_toreceiveddonations_from
050c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee456Pa Fraternal Order Of Police Pac177MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC
150c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee4395Pa Fraternal Order Of Police Pac3983UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...
250c7d9a1-b448-46a5-8e2d-cd15b3097360REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...MIcommittee1638Paa Pac437Pabar Pac (Pa Bar Assn)
362ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee3232UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...3213MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC
462ea1e9c-ac12-400c-b3dc-519389c0f7d3UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL...MIcommittee1251COMMITTEE TO ELECT DR PATRICIA BERNARD5912REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN...
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "1 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "2 50c7d9a1-b448-46a5-8e2d-cd15b3097360 \n", + "3 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", + "4 62ea1e9c-ac12-400c-b3dc-519389c0f7d3 \n", + "\n", + " name state entity_type \\\n", + "0 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", + "1 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", + "2 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... MI committee \n", + "3 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", + "4 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... MI committee \n", + "\n", + " donations donations_to received \\\n", + "0 456 Pa Fraternal Order Of Police Pac 177 \n", + "1 4395 Pa Fraternal Order Of Police Pac 3983 \n", + "2 1638 Paa Pac 437 \n", + "3 3232 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... 3213 \n", + "4 1251 COMMITTEE TO ELECT DR PATRICIA BERNARD 5912 \n", + "\n", + " donations_from \n", + "0 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "1 UNITED FOOD AND COMMERCIAL WORKERS ACTIVE BALL... \n", + "2 Pabar Pac (Pa Bar Assn) \n", + "3 MICHIGAN ASSOCIATION OF NURSE ANESTHETISTS PAC \n", + "4 REPUBLICAN STATE LEADERSHIP COMMITTEE MICHIGAN... " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "data = {'id':['50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360','50c7d9a1-b448-46a5-8e2d-cd15b3097360',\n", " '62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3','62ea1e9c-ac12-400c-b3dc-519389c0f7d3',\n", @@ -482,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -522,122 +568,122 @@ " \n", " \n", " \n", - " 0\n", - " 7773a71e-9f67-438e-8313-80b1b75deeb4\n", - " 4544b60d-da6b-4dd5-9efe-334152ccf1f1\n", + " 7\n", + " e6f510a6-9a99-49f7-a702-ea47396c9f6c\n", + " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", " 2018\n", " 1000.0\n", - " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " 015b3bd6-fa5d-445e-9ccb-d249d4ef5d18\n", " none\n", - " bob worsley for state senate\n", - " contribute to a candidate committee\n", + " NaN\n", + " contribution from individuals\n", " NaN\n", " NaN\n", " NaN\n", - " #1022 arizona committee of automotive retailers\n", + " joseph w fisher,ii\n", " \n", " \n", - " 1\n", - " 95f74915-a945-491f-8751-8c970a76fc24\n", - " 946d7561-42a3-4a4b-b410-3a10271c9f18\n", + " 246\n", + " 9db8f578-6b1a-4d2c-9b7d-44cb5999116c\n", + " 262e24f9-69e0-4da5-ac52-b7aeb16d29bb\n", " 2018\n", - " 1000.0\n", - " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " 10.0\n", + " 304dce88-03a4-4107-813a-72b32a2398a6\n", " none\n", - " drew john for state house\n", - " contribute to a candidate committee\n", " NaN\n", + " contribution from individuals\n", " NaN\n", " NaN\n", - " #1022 arizona committee of automotive retailers\n", + " NaN\n", + " janet wilson\n", " \n", " \n", - " 2\n", - " d05f1763-132d-4717-addc-8ff6239ad4d9\n", - " c8f98436-9562-48ed-b51f-45b2b217aad1\n", + " 271\n", + " 6f032c4b-5032-44b1-be31-10629dd8b30c\n", + " 262e24f9-69e0-4da5-ac52-b7aeb16d29bb\n", " 2018\n", - " 1000.0\n", - " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " 20.0\n", + " f31c772c-02e1-4b60-99fe-8a18195d04de\n", " none\n", - " elect karen fann ld1\n", - " contribute to a candidate committee\n", + " NaN\n", + " contribution from individuals\n", " NaN\n", " NaN\n", " NaN\n", - " #1022 arizona committee of automotive retailers\n", + " deanna melendez\n", " \n", " \n", - " 3\n", - " 3dc3da30-6562-4755-bfad-6a26f1baec15\n", - " b9965bc2-c94d-4f69-98d1-bc4f5ad701c5\n", + " 280\n", + " 8726a0a7-d293-4763-aefb-0afbd9b16ac6\n", + " 262e24f9-69e0-4da5-ac52-b7aeb16d29bb\n", " 2018\n", - " 1000.0\n", - " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " 15.0\n", + " f31c772c-02e1-4b60-99fe-8a18195d04de\n", " none\n", - " elect noel campbell for house\n", - " contribute to a candidate committee\n", " NaN\n", + " contribution from individuals\n", " NaN\n", " NaN\n", - " #1022 arizona committee of automotive retailers\n", + " NaN\n", + " deanna melendez\n", " \n", " \n", - " 4\n", - " a4340a2c-7b8a-4eeb-8290-746f0f436c83\n", - " 946d7561-42a3-4a4b-b410-3a10271c9f18\n", + " 588\n", + " d5ab6b6b-ad6f-4b9b-8b35-fd5f80475b40\n", + " 262e24f9-69e0-4da5-ac52-b7aeb16d29bb\n", " 2018\n", - " 1000.0\n", - " 981a0414-b738-4e20-91b8-a29ee2cc7edf\n", + " 20.0\n", + " 9cceeac2-3166-48e0-ae08-74e5a9d3c70c\n", " none\n", - " closed to new donations\n", - " refund from contrib to a cand committee\n", + " NaN\n", + " contribution from individuals\n", " NaN\n", " NaN\n", " NaN\n", - " #1022 arizona committee of automotive retailers\n", + " gail kamaras\n", " \n", " \n", "\n", "" ], "text/plain": [ - " transaction_id donor_id \\\n", - "0 7773a71e-9f67-438e-8313-80b1b75deeb4 4544b60d-da6b-4dd5-9efe-334152ccf1f1 \n", - "1 95f74915-a945-491f-8751-8c970a76fc24 946d7561-42a3-4a4b-b410-3a10271c9f18 \n", - "2 d05f1763-132d-4717-addc-8ff6239ad4d9 c8f98436-9562-48ed-b51f-45b2b217aad1 \n", - "3 3dc3da30-6562-4755-bfad-6a26f1baec15 b9965bc2-c94d-4f69-98d1-bc4f5ad701c5 \n", - "4 a4340a2c-7b8a-4eeb-8290-746f0f436c83 946d7561-42a3-4a4b-b410-3a10271c9f18 \n", + " transaction_id \\\n", + "7 e6f510a6-9a99-49f7-a702-ea47396c9f6c \n", + "246 9db8f578-6b1a-4d2c-9b7d-44cb5999116c \n", + "271 6f032c4b-5032-44b1-be31-10629dd8b30c \n", + "280 8726a0a7-d293-4763-aefb-0afbd9b16ac6 \n", + "588 d5ab6b6b-ad6f-4b9b-8b35-fd5f80475b40 \n", "\n", - " year amount recipient_id office_sought \\\n", - "0 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", - "1 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", - "2 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", - "3 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", - "4 2018 1000.0 981a0414-b738-4e20-91b8-a29ee2cc7edf none \n", + " donor_id year amount \\\n", + "7 981a0414-b738-4e20-91b8-a29ee2cc7edf 2018 1000.0 \n", + "246 262e24f9-69e0-4da5-ac52-b7aeb16d29bb 2018 10.0 \n", + "271 262e24f9-69e0-4da5-ac52-b7aeb16d29bb 2018 20.0 \n", + "280 262e24f9-69e0-4da5-ac52-b7aeb16d29bb 2018 15.0 \n", + "588 262e24f9-69e0-4da5-ac52-b7aeb16d29bb 2018 20.0 \n", "\n", - " purpose transaction_type \\\n", - "0 bob worsley for state senate contribute to a candidate committee \n", - "1 drew john for state house contribute to a candidate committee \n", - "2 elect karen fann ld1 contribute to a candidate committee \n", - "3 elect noel campbell for house contribute to a candidate committee \n", - "4 closed to new donations refund from contrib to a cand committee \n", + " recipient_id office_sought purpose \\\n", + "7 015b3bd6-fa5d-445e-9ccb-d249d4ef5d18 none NaN \n", + "246 304dce88-03a4-4107-813a-72b32a2398a6 none NaN \n", + "271 f31c772c-02e1-4b60-99fe-8a18195d04de none NaN \n", + "280 f31c772c-02e1-4b60-99fe-8a18195d04de none NaN \n", + "588 9cceeac2-3166-48e0-ae08-74e5a9d3c70c none NaN \n", "\n", - " donor_type recipient_type donor_office \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", + " transaction_type donor_type recipient_type donor_office \\\n", + "7 contribution from individuals NaN NaN NaN \n", + "246 contribution from individuals NaN NaN NaN \n", + "271 contribution from individuals NaN NaN NaN \n", + "280 contribution from individuals NaN NaN NaN \n", + "588 contribution from individuals NaN NaN NaN \n", "\n", - " recipient_name \n", - "0 #1022 arizona committee of automotive retailers \n", - "1 #1022 arizona committee of automotive retailers \n", - "2 #1022 arizona committee of automotive retailers \n", - "3 #1022 arizona committee of automotive retailers \n", - "4 #1022 arizona committee of automotive retailers " + " recipient_name \n", + "7 joseph w fisher,ii \n", + "246 janet wilson \n", + "271 deanna melendez \n", + "280 deanna melendez \n", + "588 gail kamaras " ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -658,16 +704,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "87" + "105" ] }, - "execution_count": 13, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -679,7 +725,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -704,16 +750,12 @@ " \n", " \n", " id\n", - " first_name\n", - " last_name\n", " full_name\n", - " entity_type\n", " state\n", - " party\n", - " company\n", - " occupation\n", - " address\n", - " ...\n", + " entity_type\n", + " classification\n", + " transaction_id\n", + " donor_id\n", " year\n", " amount\n", " recipient_id\n", @@ -728,184 +770,161 @@ " \n", " \n", " \n", - " 55243\n", - " 0e24b503-b209-48b5-8edb-cca0cdaca78c\n", - " M.\n", - " TANG\n", - " m. tang ...\n", - " Individual\n", - " MD\n", - " NaN\n", - " NaN\n", + " 12011\n", + " 6c77a26b-7035-422f-bccb-4af4f400add8\n", + " mi act com for rural electrification\n", + " MI\n", + " corporation\n", + " neutral\n", " NaN\n", - " 6614 23RD PLACE\n", - " ...\n", + " 6c77a26b-7035-422f-bccb-4af4f400add8\n", " 2022.0\n", - " 2.0\n", - " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", + " 2000.00\n", + " ed2344d4-6570-4093-9326-13ee37d99e6d\n", " NaN\n", " NaN\n", " direct\n", " NaN\n", " NaN\n", " NaN\n", - " None\n", + " cte dan lauwers for senate\n", " \n", " \n", - " 55244\n", - " 0e24b503-b209-48b5-8edb-cca0cdaca78c\n", - " M.\n", - " TANG\n", - " m. tang ...\n", - " Individual\n", - " MD\n", - " NaN\n", - " NaN\n", + " 12037\n", + " 563fb08b-8811-4d33-9fcf-450e3fcfc8a9\n", + " operating engineers local #324\n", + " MI\n", + " corporation\n", + " neutral\n", " NaN\n", - " 6614 23RD PLACE\n", - " ...\n", + " 563fb08b-8811-4d33-9fcf-450e3fcfc8a9\n", " 2022.0\n", - " 95.0\n", - " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", + " 2500.00\n", + " 0b7514c0-c936-427c-8d0c-838534a1cb45\n", " NaN\n", " NaN\n", - " direct\n", + " direct/fund raiser\n", " NaN\n", " NaN\n", " NaN\n", - " None\n", + " committee to elect sara cambensy for 109th dis...\n", " \n", " \n", - " 55245\n", - " 0e24b503-b209-48b5-8edb-cca0cdaca78c\n", - " M.\n", - " TANG\n", - " m. tang ...\n", - " Individual\n", - " MD\n", - " NaN\n", - " NaN\n", + " 12039\n", + " cc7fcdbd-9fce-4aad-98ac-36441d4986fc\n", + " house republican camp committe\n", + " MI\n", + " corporation\n", + " neutral\n", " NaN\n", - " 6614 23RD PLACE\n", - " ...\n", + " cc7fcdbd-9fce-4aad-98ac-36441d4986fc\n", " 2022.0\n", - " 10.0\n", - " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", + " 500.00\n", + " 76d1b93c-bd7f-4c90-9940-e9b843e45ee2\n", " NaN\n", + " direct contribution\n", " NaN\n", - " direct\n", " NaN\n", " NaN\n", " NaN\n", - " None\n", + " michigan motorcycle riders pac\n", " \n", " \n", - " 55246\n", - " a23037f6-741c-43a5-8a6d-0f1db4371e1d\n", - " OLIVIA N\n", - " DALMASSO\n", - " olivia n dalmasso ...\n", - " Individual\n", - " IL\n", - " NaN\n", - " NaN\n", + " 12057\n", + " 3941536a-03a8-4049-b35b-6a975e1bbbf4\n", + " van dessel kathleen\n", + " MI\n", + " corporation\n", + " neutral\n", " NaN\n", - " PO BOX 574\n", - " ...\n", + " 3941536a-03a8-4049-b35b-6a975e1bbbf4\n", " 2022.0\n", - " 12.6\n", - " 6b33721f-3f6a-47c0-bce2-284fc58e0d2a\n", + " 26.01\n", + " 501217db-524f-4534-848e-78b9afe68961\n", " NaN\n", " NaN\n", " direct\n", " NaN\n", " NaN\n", " NaN\n", - " None\n", + " voters action committee (superpac)\n", " \n", " \n", - " 55247\n", - " a23037f6-741c-43a5-8a6d-0f1db4371e1d\n", - " OLIVIA N\n", - " DALMASSO\n", - " olivia n dalmasso ...\n", - " Individual\n", - " IL\n", - " NaN\n", - " NaN\n", + " 12058\n", + " 3941536a-03a8-4049-b35b-6a975e1bbbf4\n", + " van dessel kathleen\n", + " MI\n", + " corporation\n", + " neutral\n", " NaN\n", - " PO BOX 574\n", - " ...\n", + " 3941536a-03a8-4049-b35b-6a975e1bbbf4\n", " 2022.0\n", - " 4.2\n", - " 6b33721f-3f6a-47c0-bce2-284fc58e0d2a\n", + " 26.55\n", + " 501217db-524f-4534-848e-78b9afe68961\n", " NaN\n", " NaN\n", " direct\n", " NaN\n", " NaN\n", " NaN\n", - " None\n", + " voters action committee (superpac)\n", " \n", " \n", "\n", - "

5 rows × 25 columns

\n", "" ], "text/plain": [ - " id first_name \\\n", - "55243 0e24b503-b209-48b5-8edb-cca0cdaca78c M. \n", - "55244 0e24b503-b209-48b5-8edb-cca0cdaca78c M. \n", - "55245 0e24b503-b209-48b5-8edb-cca0cdaca78c M. \n", - "55246 a23037f6-741c-43a5-8a6d-0f1db4371e1d OLIVIA N \n", - "55247 a23037f6-741c-43a5-8a6d-0f1db4371e1d OLIVIA N \n", - "\n", - " last_name \\\n", - "55243 TANG \n", - "55244 TANG \n", - "55245 TANG \n", - "55246 DALMASSO \n", - "55247 DALMASSO \n", + " id \\\n", + "12011 6c77a26b-7035-422f-bccb-4af4f400add8 \n", + "12037 563fb08b-8811-4d33-9fcf-450e3fcfc8a9 \n", + "12039 cc7fcdbd-9fce-4aad-98ac-36441d4986fc \n", + "12057 3941536a-03a8-4049-b35b-6a975e1bbbf4 \n", + "12058 3941536a-03a8-4049-b35b-6a975e1bbbf4 \n", "\n", - " full_name entity_type state \\\n", - "55243 m. tang ... Individual MD \n", - "55244 m. tang ... Individual MD \n", - "55245 m. tang ... Individual MD \n", - "55246 olivia n dalmasso ... Individual IL \n", - "55247 olivia n dalmasso ... Individual IL \n", + " full_name state entity_type \\\n", + "12011 mi act com for rural electrification MI corporation \n", + "12037 operating engineers local #324 MI corporation \n", + "12039 house republican camp committe MI corporation \n", + "12057 van dessel kathleen MI corporation \n", + "12058 van dessel kathleen MI corporation \n", "\n", - " party company occupation address ... year amount \\\n", - "55243 NaN NaN NaN 6614 23RD PLACE ... 2022.0 2.0 \n", - "55244 NaN NaN NaN 6614 23RD PLACE ... 2022.0 95.0 \n", - "55245 NaN NaN NaN 6614 23RD PLACE ... 2022.0 10.0 \n", - "55246 NaN NaN NaN PO BOX 574 ... 2022.0 12.6 \n", - "55247 NaN NaN NaN PO BOX 574 ... 2022.0 4.2 \n", + " classification transaction_id donor_id \\\n", + "12011 neutral NaN 6c77a26b-7035-422f-bccb-4af4f400add8 \n", + "12037 neutral NaN 563fb08b-8811-4d33-9fcf-450e3fcfc8a9 \n", + "12039 neutral NaN cc7fcdbd-9fce-4aad-98ac-36441d4986fc \n", + "12057 neutral NaN 3941536a-03a8-4049-b35b-6a975e1bbbf4 \n", + "12058 neutral NaN 3941536a-03a8-4049-b35b-6a975e1bbbf4 \n", "\n", - " recipient_id office_sought purpose \\\n", - "55243 49a2d46f-5e75-433c-94fa-f910e66d1a1e NaN NaN \n", - "55244 49a2d46f-5e75-433c-94fa-f910e66d1a1e NaN NaN \n", - "55245 49a2d46f-5e75-433c-94fa-f910e66d1a1e NaN NaN \n", - "55246 6b33721f-3f6a-47c0-bce2-284fc58e0d2a NaN NaN \n", - "55247 6b33721f-3f6a-47c0-bce2-284fc58e0d2a NaN NaN \n", + " year amount recipient_id office_sought \\\n", + "12011 2022.0 2000.00 ed2344d4-6570-4093-9326-13ee37d99e6d NaN \n", + "12037 2022.0 2500.00 0b7514c0-c936-427c-8d0c-838534a1cb45 NaN \n", + "12039 2022.0 500.00 76d1b93c-bd7f-4c90-9940-e9b843e45ee2 NaN \n", + "12057 2022.0 26.01 501217db-524f-4534-848e-78b9afe68961 NaN \n", + "12058 2022.0 26.55 501217db-524f-4534-848e-78b9afe68961 NaN \n", "\n", - " transaction_type donor_type recipient_type donor_office \\\n", - "55243 direct NaN NaN NaN \n", - "55244 direct NaN NaN NaN \n", - "55245 direct NaN NaN NaN \n", - "55246 direct NaN NaN NaN \n", - "55247 direct NaN NaN NaN \n", + " purpose transaction_type donor_type \\\n", + "12011 NaN direct NaN \n", + "12037 NaN direct/fund raiser NaN \n", + "12039 direct contribution NaN NaN \n", + "12057 NaN direct NaN \n", + "12058 NaN direct NaN \n", "\n", - " recipient_name \n", - "55243 None \n", - "55244 None \n", - "55245 None \n", - "55246 None \n", - "55247 None \n", + " recipient_type donor_office \\\n", + "12011 NaN NaN \n", + "12037 NaN NaN \n", + "12039 NaN NaN \n", + "12057 NaN NaN \n", + "12058 NaN NaN \n", "\n", - "[5 rows x 25 columns]" + " recipient_name \n", + "12011 cte dan lauwers for senate \n", + "12037 committee to elect sara cambensy for 109th dis... \n", + "12039 michigan motorcycle riders pac \n", + "12057 voters action committee (superpac) \n", + "12058 voters action committee (superpac) " ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -915,37 +934,16 @@ "# is lost\n", "merged_inds_sample = pd.merge(inds_df,transactions,how='left',left_on='id',right_on='donor_id')\n", "merged_inds_sample.dropna(subset = ['amount'], inplace=True)\n", - "merged_inds_sample.tail(5)" + "merged_orgs_sample = pd.merge(orgs_df,transactions,how='left',left_on='id',right_on='donor_id')\n", + "merged_orgs_sample.dropna(subset = ['amount'], inplace=True)\n", + "merged_orgs_sample = merged_orgs_sample.rename(columns={'name':'full_name'})\n", + "\n", + "merged_orgs_sample.tail(5)" ] }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['id', 'first_name', 'last_name', 'full_name', 'entity_type', 'state',\n", - " 'party', 'company', 'occupation', 'address', 'zip', 'city',\n", - " 'classification', 'transaction_id', 'donor_id', 'year', 'amount',\n", - " 'recipient_id', 'office_sought', 'purpose', 'transaction_type',\n", - " 'donor_type', 'recipient_type', 'donor_office', 'recipient_name'],\n", - " dtype='object')" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged_inds_sample.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -969,149 +967,149 @@ " \n", " \n", " \n", + " id\n", + " full_name\n", + " state\n", + " entity_type\n", + " classification\n", + " transaction_id\n", " donor_id\n", + " year\n", + " amount\n", " recipient_id\n", - " full_name\n", + " ...\n", + " donor_office\n", " recipient_name\n", - " address\n", - " amount\n", - " city\n", - " classification\n", + " first_name\n", + " last_name\n", + " party\n", " company\n", - " donor_office\n", - " ...\n", " occupation\n", - " office_sought\n", - " party\n", - " purpose\n", - " recipient_type\n", - " state\n", - " transaction_id\n", - " transaction_type\n", - " year\n", + " address\n", " zip\n", + " city\n", " \n", " \n", " \n", " \n", - " 0\n", - " 0007b184-4e1d-401a-ba51-99733d2e13e7\n", - " d461f2bd-9074-44b3-8948-e659bead3e58\n", - " graham filler ...\n", - " saginaw county republican committee\n", - " 12705 WARM CREEK\n", - " 500.00\n", - " DEWITT\n", - " neutral\n", - " None\n", - " None\n", - " ...\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", + " 4\n", + " 041ae77f-bc8f-42e4-88d9-3db7177106ce\n", + " gale stephen\n", " MI\n", - " None\n", - " direct\n", + " corporation\n", + " neutral\n", + " NaN\n", + " 041ae77f-bc8f-42e4-88d9-3db7177106ce\n", " 2022.0\n", - " 48820-0000\n", + " 50.0\n", + " 501217db-524f-4534-848e-78b9afe68961\n", + " ...\n", + " NaN\n", + " voters action committee (superpac)\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 1\n", - " 00523627-46c7-4f76-ab42-fb2c1fbac1b1\n", - " 6126e78b-4e80-4361-a019-9d99aa1623ed\n", - " daniel millstone ...\n", - " rooted in community leadership pac\n", - " 10518 ROUNTREE RD\n", - " 0.77\n", - " LOS ANGELES\n", + " 22\n", + " a927c8d6-bfba-4a0c-97a6-72ab136a1dd5\n", + " connor loftus\n", + " MI\n", + " corporation\n", " neutral\n", - " None\n", - " None\n", - " ...\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " CA\n", - " None\n", - " direct\n", + " NaN\n", + " a927c8d6-bfba-4a0c-97a6-72ab136a1dd5\n", " 2022.0\n", - " 90064-0000\n", + " 500.0\n", + " 0b7514c0-c936-427c-8d0c-838534a1cb45\n", + " ...\n", + " NaN\n", + " committee to elect sara cambensy for 109th dis...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 2\n", - " 00934782-86e5-4941-94cf-0a700100a2c0\n", - " 2d1a0919-218e-4692-98ec-c4a73a126482\n", - " josie petersheim ...\n", - " mi greenstone pac\n", - " 7196 W. BRIGGS RD.\n", - " 25.00\n", - " STANTON\n", - " neutral\n", - " None\n", - " None\n", - " ...\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", + " 23\n", + " a927c8d6-bfba-4a0c-97a6-72ab136a1dd5\n", + " connor loftus\n", " MI\n", - " None\n", - " direct\n", + " corporation\n", + " neutral\n", + " NaN\n", + " a927c8d6-bfba-4a0c-97a6-72ab136a1dd5\n", " 2022.0\n", - " 48888-0000\n", + " 450.0\n", + " 0b7514c0-c936-427c-8d0c-838534a1cb45\n", + " ...\n", + " NaN\n", + " committee to elect sara cambensy for 109th dis...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 3\n", - " 00f22bdd-96bf-4074-9620-4737e8444958\n", - " af8417ee-5bca-49f5-91e9-d2de65d73631\n", - " robert doerfler ...\n", - " michigan senate democratic fund\n", - " 1534 NE 5TH AVE\n", - " 50.00\n", - " FORT LAUDERDALE\n", + " 55\n", + " 1d5af863-be97-4601-93a0-e0d623281538\n", + " williamsjason\n", + " MI\n", + " corporation\n", " neutral\n", - " None\n", - " None\n", - " ...\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " FL\n", - " None\n", - " direct\n", + " NaN\n", + " 1d5af863-be97-4601-93a0-e0d623281538\n", " 2022.0\n", - " 33304-1006\n", + " 7.0\n", + " 44751f04-3ce1-4288-979c-8a35b9d5e89c\n", + " ...\n", + " NaN\n", + " teamsters 243 political action committee\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 4\n", - " 0138403b-b5b9-453a-a1d2-b6ed9fa5fe58\n", - " 6126e78b-4e80-4361-a019-9d99aa1623ed\n", - " joseph martinez ...\n", - " rooted in community leadership pac\n", - " 139 HURON AVE\n", - " 1.65\n", - " MOUNT CLEMENS\n", - " neutral\n", - " None\n", - " None\n", - " ...\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", + " 56\n", + " 1d5af863-be97-4601-93a0-e0d623281538\n", + " williamsjason\n", " MI\n", - " None\n", - " direct\n", + " corporation\n", + " neutral\n", + " NaN\n", + " 1d5af863-be97-4601-93a0-e0d623281538\n", " 2022.0\n", - " 48043-0000\n", + " 6.0\n", + " 44751f04-3ce1-4288-979c-8a35b9d5e89c\n", + " ...\n", + " NaN\n", + " teamsters 243 political action committee\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", " ...\n", @@ -1138,596 +1136,424 @@ " ...\n", " \n", " \n", - " 1120\n", - " fdccce6b-e55f-4f1d-bd95-1714f2a667ed\n", - " a3fe20e2-8019-448e-9b54-bfdce4d87f2f\n", - " michael olthoff ...\n", - " bumstead leadership fund\n", - " 1499 MIDDLEBROOK DR\n", - " 1000.00\n", - " NORTON SHORES\n", + " 34717\n", + " a79d898d-37a9-4dac-9434-4345c4491726\n", + " richard hill ...\n", + " MI\n", + " Individual\n", " neutral\n", - " nichols\n", - " None\n", + " NaN\n", + " a79d898d-37a9-4dac-9434-4345c4491726\n", + " 2022.0\n", + " 250.0\n", + " 666d21c4-f346-46e7-8949-1431dfeba9f6\n", " ...\n", - " ceo\n", - " None\n", - " None\n", - " None\n", - " None\n", - " MI\n", + " NaN\n", " None\n", - " direct\n", - " 2022.0\n", - " 49441-0000\n", + " RICHARD\n", + " HILL\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 31853 EDWARDS ST\n", + " 49047-9324\n", + " DOWAGIAC\n", " \n", " \n", - " 1121\n", - " fe969829-b8a4-4d38-88e2-8314b340d567\n", - " 6126e78b-4e80-4361-a019-9d99aa1623ed\n", - " joanna simon ...\n", - " rooted in community leadership pac\n", - " 1546 POPLAR GROVE DR\n", - " 3.82\n", - " RESTON\n", + " 34718\n", + " bd535dbd-d8b7-4d42-bfaf-99fa4c91aabe\n", + " fern slotman ^ ...\n", + " MI\n", + " Individual\n", " neutral\n", - " None\n", - " None\n", + " NaN\n", + " bd535dbd-d8b7-4d42-bfaf-99fa4c91aabe\n", + " 2022.0\n", + " 20.0\n", + " 2170560a-af48-498f-9b55-01f5b74a3fcd\n", " ...\n", + " NaN\n", " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " VA\n", - " None\n", - " direct\n", - " 2022.0\n", - " 20194-1731\n", + " FERN\n", + " SLOTMAN ^\n", + " NaN\n", + " jackie's place restaurant\n", + " waitress\n", + " 4354 127TH AVE\n", + " 49010-0000\n", + " ALLEGAN\n", " \n", " \n", - " 1122\n", - " ff1423ba-ff5e-4bc1-b864-303a9dcc9b32\n", - " 6126e78b-4e80-4361-a019-9d99aa1623ed\n", - " adriana p{on ce ...\n", - " rooted in community leadership pac\n", - " 9 BIRCH CT\n", - " 3.82\n", - " NORMAL\n", + " 34719\n", + " 7d719953-640a-43b3-9d6c-91951ba89863\n", + " karen macdonell ...\n", + " MI\n", + " Individual\n", " neutral\n", - " None\n", - " None\n", + " NaN\n", + " 7d719953-640a-43b3-9d6c-91951ba89863\n", + " 2022.0\n", + " 10.0\n", + " 092ed0bf-ecbd-40a5-aa14-e81b534939a9\n", " ...\n", + " NaN\n", " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " IL\n", - " None\n", - " direct\n", - " 2022.0\n", - " 61761-3900\n", + " KAREN\n", + " MACDONELL\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 21758 RATHLONE DR\n", + " 48167-2829\n", + " NORTHVILLE\n", " \n", " \n", - " 1123\n", - " ff24644e-d64a-4a8a-a87f-cdb53b86dd63\n", - " 6126e78b-4e80-4361-a019-9d99aa1623ed\n", - " david friedman ...\n", - " rooted in community leadership pac\n", - " 8823 MOUNTAIN PATH CIR\n", - " 0.15\n", - " AUSTIN\n", + " 34720\n", + " 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18\n", + " ellen trzaskowski ...\n", + " AZ\n", + " Individual\n", " neutral\n", - " None\n", - " None\n", + " NaN\n", + " 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18\n", + " 2022.0\n", + " 25.0\n", + " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", " ...\n", + " NaN\n", " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " TX\n", - " None\n", - " direct\n", - " 2022.0\n", - " 78759-0000\n", + " ELLEN\n", + " TRZASKOWSKI\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 1984 E BALBOA DR\n", + " 85282-0000\n", + " TEMPE\n", " \n", " \n", - " 1124\n", - " ffb25947-c03f-43b2-abb4-23531cdb7324\n", - " 7f272fe4-d592-453c-9ca1-315ea3fdcff1\n", - " dennis starner ...\n", - " bill g schuette for state representative\n", - " 4612 CONGRESS DRIVE\n", - " 525.00\n", - " MIDLAND\n", + " 34721\n", + " 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18\n", + " ellen trzaskowski ...\n", + " AZ\n", + " Individual\n", " neutral\n", - " retired\n", - " None\n", + " NaN\n", + " 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18\n", + " 2022.0\n", + " 15.0\n", + " 49a2d46f-5e75-433c-94fa-f910e66d1a1e\n", " ...\n", - " retired\n", - " None\n", - " None\n", - " None\n", - " None\n", - " MI\n", + " NaN\n", " None\n", - " direct/fund raiser\n", - " 2022.0\n", - " 48642-0000\n", + " ELLEN\n", + " TRZASKOWSKI\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 1984 E BALBOA DR\n", + " 85282-0000\n", + " TEMPE\n", " \n", " \n", "\n", - "

1125 rows × 25 columns

\n", + "

36946 rows × 25 columns

\n", "" ], "text/plain": [ - " donor_id \\\n", - "0 0007b184-4e1d-401a-ba51-99733d2e13e7 \n", - "1 00523627-46c7-4f76-ab42-fb2c1fbac1b1 \n", - "2 00934782-86e5-4941-94cf-0a700100a2c0 \n", - "3 00f22bdd-96bf-4074-9620-4737e8444958 \n", - "4 0138403b-b5b9-453a-a1d2-b6ed9fa5fe58 \n", - "... ... \n", - "1120 fdccce6b-e55f-4f1d-bd95-1714f2a667ed \n", - "1121 fe969829-b8a4-4d38-88e2-8314b340d567 \n", - "1122 ff1423ba-ff5e-4bc1-b864-303a9dcc9b32 \n", - "1123 ff24644e-d64a-4a8a-a87f-cdb53b86dd63 \n", - "1124 ffb25947-c03f-43b2-abb4-23531cdb7324 \n", + " id \\\n", + "4 041ae77f-bc8f-42e4-88d9-3db7177106ce \n", + "22 a927c8d6-bfba-4a0c-97a6-72ab136a1dd5 \n", + "23 a927c8d6-bfba-4a0c-97a6-72ab136a1dd5 \n", + "55 1d5af863-be97-4601-93a0-e0d623281538 \n", + "56 1d5af863-be97-4601-93a0-e0d623281538 \n", + "... ... \n", + "34717 a79d898d-37a9-4dac-9434-4345c4491726 \n", + "34718 bd535dbd-d8b7-4d42-bfaf-99fa4c91aabe \n", + "34719 7d719953-640a-43b3-9d6c-91951ba89863 \n", + "34720 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18 \n", + "34721 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18 \n", + "\n", + " full_name state entity_type \\\n", + "4 gale stephen MI corporation \n", + "22 connor loftus MI corporation \n", + "23 connor loftus MI corporation \n", + "55 williamsjason MI corporation \n", + "56 williamsjason MI corporation \n", + "... ... ... ... \n", + "34717 richard hill ... MI Individual \n", + "34718 fern slotman ^ ... MI Individual \n", + "34719 karen macdonell ... MI Individual \n", + "34720 ellen trzaskowski ... AZ Individual \n", + "34721 ellen trzaskowski ... AZ Individual \n", "\n", - " recipient_id \\\n", - "0 d461f2bd-9074-44b3-8948-e659bead3e58 \n", - "1 6126e78b-4e80-4361-a019-9d99aa1623ed \n", - "2 2d1a0919-218e-4692-98ec-c4a73a126482 \n", - "3 af8417ee-5bca-49f5-91e9-d2de65d73631 \n", - "4 6126e78b-4e80-4361-a019-9d99aa1623ed \n", - "... ... \n", - "1120 a3fe20e2-8019-448e-9b54-bfdce4d87f2f \n", - "1121 6126e78b-4e80-4361-a019-9d99aa1623ed \n", - "1122 6126e78b-4e80-4361-a019-9d99aa1623ed \n", - "1123 6126e78b-4e80-4361-a019-9d99aa1623ed \n", - "1124 7f272fe4-d592-453c-9ca1-315ea3fdcff1 \n", + " classification transaction_id donor_id \\\n", + "4 neutral NaN 041ae77f-bc8f-42e4-88d9-3db7177106ce \n", + "22 neutral NaN a927c8d6-bfba-4a0c-97a6-72ab136a1dd5 \n", + "23 neutral NaN a927c8d6-bfba-4a0c-97a6-72ab136a1dd5 \n", + "55 neutral NaN 1d5af863-be97-4601-93a0-e0d623281538 \n", + "56 neutral NaN 1d5af863-be97-4601-93a0-e0d623281538 \n", + "... ... ... ... \n", + "34717 neutral NaN a79d898d-37a9-4dac-9434-4345c4491726 \n", + "34718 neutral NaN bd535dbd-d8b7-4d42-bfaf-99fa4c91aabe \n", + "34719 neutral NaN 7d719953-640a-43b3-9d6c-91951ba89863 \n", + "34720 neutral NaN 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18 \n", + "34721 neutral NaN 628ddb92-06d8-4bf0-aa5c-0e3af4fa4c18 \n", "\n", - " full_name \\\n", - "0 graham filler ... \n", - "1 daniel millstone ... \n", - "2 josie petersheim ... \n", - "3 robert doerfler ... \n", - "4 joseph martinez ... \n", - "... ... \n", - "1120 michael olthoff ... \n", - "1121 joanna simon ... \n", - "1122 adriana p{on ce ... \n", - "1123 david friedman ... \n", - "1124 dennis starner ... \n", + " year amount recipient_id ... donor_office \\\n", + "4 2022.0 50.0 501217db-524f-4534-848e-78b9afe68961 ... NaN \n", + "22 2022.0 500.0 0b7514c0-c936-427c-8d0c-838534a1cb45 ... NaN \n", + "23 2022.0 450.0 0b7514c0-c936-427c-8d0c-838534a1cb45 ... NaN \n", + "55 2022.0 7.0 44751f04-3ce1-4288-979c-8a35b9d5e89c ... NaN \n", + "56 2022.0 6.0 44751f04-3ce1-4288-979c-8a35b9d5e89c ... NaN \n", + "... ... ... ... ... ... \n", + "34717 2022.0 250.0 666d21c4-f346-46e7-8949-1431dfeba9f6 ... NaN \n", + "34718 2022.0 20.0 2170560a-af48-498f-9b55-01f5b74a3fcd ... NaN \n", + "34719 2022.0 10.0 092ed0bf-ecbd-40a5-aa14-e81b534939a9 ... NaN \n", + "34720 2022.0 25.0 49a2d46f-5e75-433c-94fa-f910e66d1a1e ... NaN \n", + "34721 2022.0 15.0 49a2d46f-5e75-433c-94fa-f910e66d1a1e ... NaN \n", "\n", - " recipient_name address \\\n", - "0 saginaw county republican committee 12705 WARM CREEK \n", - "1 rooted in community leadership pac 10518 ROUNTREE RD \n", - "2 mi greenstone pac 7196 W. BRIGGS RD. \n", - "3 michigan senate democratic fund 1534 NE 5TH AVE \n", - "4 rooted in community leadership pac 139 HURON AVE \n", - "... ... ... \n", - "1120 bumstead leadership fund 1499 MIDDLEBROOK DR \n", - "1121 rooted in community leadership pac 1546 POPLAR GROVE DR \n", - "1122 rooted in community leadership pac 9 BIRCH CT \n", - "1123 rooted in community leadership pac 8823 MOUNTAIN PATH CIR \n", - "1124 bill g schuette for state representative 4612 CONGRESS DRIVE \n", + " recipient_name \\\n", + "4 voters action committee (superpac) \n", + "22 committee to elect sara cambensy for 109th dis... \n", + "23 committee to elect sara cambensy for 109th dis... \n", + "55 teamsters 243 political action committee \n", + "56 teamsters 243 political action committee \n", + "... ... \n", + "34717 None \n", + "34718 None \n", + "34719 None \n", + "34720 None \n", + "34721 None \n", "\n", - " amount city classification company donor_office ... \\\n", - "0 500.00 DEWITT neutral None None ... \n", - "1 0.77 LOS ANGELES neutral None None ... \n", - "2 25.00 STANTON neutral None None ... \n", - "3 50.00 FORT LAUDERDALE neutral None None ... \n", - "4 1.65 MOUNT CLEMENS neutral None None ... \n", - "... ... ... ... ... ... ... \n", - "1120 1000.00 NORTON SHORES neutral nichols None ... \n", - "1121 3.82 RESTON neutral None None ... \n", - "1122 3.82 NORMAL neutral None None ... \n", - "1123 0.15 AUSTIN neutral None None ... \n", - "1124 525.00 MIDLAND neutral retired None ... \n", + " first_name last_name party \\\n", + "4 NaN NaN NaN \n", + "22 NaN NaN NaN \n", + "23 NaN NaN NaN \n", + "55 NaN NaN NaN \n", + "56 NaN NaN NaN \n", + "... ... ... ... \n", + "34717 RICHARD HILL NaN \n", + "34718 FERN SLOTMAN ^ NaN \n", + "34719 KAREN MACDONELL NaN \n", + "34720 ELLEN TRZASKOWSKI NaN \n", + "34721 ELLEN TRZASKOWSKI NaN \n", "\n", - " occupation office_sought party purpose recipient_type state \\\n", - "0 None None None None None MI \n", - "1 None None None None None CA \n", - "2 None None None None None MI \n", - "3 None None None None None FL \n", - "4 None None None None None MI \n", - "... ... ... ... ... ... ... \n", - "1120 ceo None None None None MI \n", - "1121 None None None None None VA \n", - "1122 None None None None None IL \n", - "1123 None None None None None TX \n", - "1124 retired None None None None MI \n", + " company occupation address zip \\\n", + "4 NaN NaN NaN NaN \n", + "22 NaN NaN NaN NaN \n", + "23 NaN NaN NaN NaN \n", + "55 NaN NaN NaN NaN \n", + "56 NaN NaN NaN NaN \n", + "... ... ... ... ... \n", + "34717 NaN NaN 31853 EDWARDS ST 49047-9324 \n", + "34718 jackie's place restaurant waitress 4354 127TH AVE 49010-0000 \n", + "34719 NaN NaN 21758 RATHLONE DR 48167-2829 \n", + "34720 NaN NaN 1984 E BALBOA DR 85282-0000 \n", + "34721 NaN NaN 1984 E BALBOA DR 85282-0000 \n", "\n", - " transaction_id transaction_type year zip \n", - "0 None direct 2022.0 48820-0000 \n", - "1 None direct 2022.0 90064-0000 \n", - "2 None direct 2022.0 48888-0000 \n", - "3 None direct 2022.0 33304-1006 \n", - "4 None direct 2022.0 48043-0000 \n", - "... ... ... ... ... \n", - "1120 None direct 2022.0 49441-0000 \n", - "1121 None direct 2022.0 20194-1731 \n", - "1122 None direct 2022.0 61761-3900 \n", - "1123 None direct 2022.0 78759-0000 \n", - "1124 None direct/fund raiser 2022.0 48642-0000 \n", + " city \n", + "4 NaN \n", + "22 NaN \n", + "23 NaN \n", + "55 NaN \n", + "56 NaN \n", + "... ... \n", + "34717 DOWAGIAC \n", + "34718 ALLEGAN \n", + "34719 NORTHVILLE \n", + "34720 TEMPE \n", + "34721 TEMPE \n", "\n", - "[1125 rows x 25 columns]" + "[36946 rows x 25 columns]" ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "attribute_cols = merged_inds_sample.columns.difference(['donor_id','recipient_id','full_name','recipient_name'])\n", - "agg_functions = {col: 'sum' if col == 'amount' else 'first' for col in attribute_cols}\n", - "grouped_sample = merged_inds_sample.groupby(['donor_id','recipient_id','full_name','recipient_name']).agg(agg_functions).reset_index()\n", - "grouped_sample" + "merged_sample = pd.concat([merged_orgs_sample, merged_inds_sample])\n", + "merged_sample" ] }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph:\n", - " G = nx.MultiDiGraph()\n", - " # first check if df is individuals or organizations dataset\n", - " if \"name\" in df.columns:\n", - " node_name = \"name\"\n", - " else:\n", - " node_name = \"full_name\"\n", - " \n", - " transact_info = ['office_sought', 'purpose', 'transaction_type', 'year','transaction_id','donor_office','amount']\n", - " for _, row in df.iterrows(): \n", - " # add node attributes based on the columns relevant to the entity\n", - " G.add_node(row[node_name])\n", - " for column in df.columns.difference(transact_info):\n", - " if not pd.isnull(row[column]):\n", - " G.nodes[row[node_name]][column] = row[column]\n", - " \n", - " # link the donor node to the recipient node. add the attributes of the\n", - " # edge based on relevant nodes \n", - " edge_dictionary = {}\n", - " for column in transact_info:\n", - " if not pd.isnull(row[column]):\n", - " edge_dictionary[column] = row[column]\n", - " G.add_edge(row[node_name], row['recipient_name'], **edge_dictionary)\n", - "\n", - " # the added 'recipient_name' node has no attributes at this moment\n", - " # for the final code this line won't be necessary, as each recipient\n", - " # should ideally be referenced later on. For now, all added nodes for\n", - " # the recipient will only have one default attribute: classification\n", - " G.nodes[row['recipient_name']]['classification'] = 'neutral' \n", - " \n", - " edge_labels = {(u,v):d['amount'] for u,v,d in G.edges(data=True)}\n", - " entity_colors = {'neutral': 'green', 'c':'blue', 'f':'red'}\n", - " node_colors = [entity_colors[G.nodes[node]['classification']] for node in G.nodes()]\n", - "\n", - " nx.draw_planar(G, with_labels=False,node_color=node_colors)\n", - " plt.figure(3,figsize=(12,12)) \n", - " nx.draw_networkx_edge_labels(G, pos=nx.planar_layout(G),edge_labels=edge_labels, label_pos=0.5)\n", - "\n", - " #nx.draw_planar(G, with_labels=False)\n", - " plt.show()\n", - " return G" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{}" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#for u,v in G.nodes(data=True):\n", - " #print(u)#['classification'])\n", - " \n", - "G.nodes['michigan association of health plans political action committee']#['classification'])#['nancy davis ']['classification']" - ] - }, - { - "cell_type": "code", - "execution_count": 66, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['neutral', 'f'], dtype=object)" + "Index(['donor_id', 'recipient_id', 'full_name', 'recipient_name', 'address',\n", + " 'amount', 'city', 'classification', 'company', 'donor_office',\n", + " 'donor_type', 'entity_type', 'first_name', 'last_name', 'occupation',\n", + " 'office_sought', 'party', 'purpose', 'recipient_type', 'state',\n", + " 'transaction_id', 'transaction_type', 'year', 'zip'],\n", + " dtype='object')" ] }, - "execution_count": 66, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "grouped_sample.classification.unique()" + "attribute_cols = merged_sample.columns.difference(['donor_id','recipient_id','full_name','recipient_name'])\n", + "agg_functions = {col: 'sum' if col == 'amount' else 'first' for col in attribute_cols}\n", + "grouped_sample = merged_sample.groupby(['donor_id','recipient_id','full_name','recipient_name']).agg(agg_functions).reset_index()\n", + "grouped_sample = grouped_sample.drop(['id'], axis=1)\n", + "grouped_sample.columns" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 30, "metadata": {}, "outputs": [ { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "{'address': '3836 BRISTOL CT', 'city': 'CLARKSTON ', 'classification': 'neutral', 'donor_id': 'c7f7a9e5-2e9e-47d1-92f6-2238c7ce301a', 'entity_type': 'Individual', 'first_name': 'THERESA ', 'full_name': 'theresa fougnie ', 'id': 'c7f7a9e5-2e9e-47d1-92f6-2238c7ce301a', 'last_name': 'FOUGNIE ', 'recipient_id': '520c9ce3-c702-4926-8688-750984ee6c0d', 'recipient_name': 'friends of sarah may seward', 'state': 'MI', 'zip': '48348-3610'}\n", - "{'classification': 'neutral'}\n", - "{'address': '330 BROAD ST APT 1', 'city': 'SPRING CITY ', 'classification': 'neutral', 'donor_id': '318b9b37-369b-45ba-9802-27177198e694', 'entity_type': 'Individual', 'first_name': 'ERIC ', 'full_name': 'eric oconnor ', 'id': '318b9b37-369b-45ba-9802-27177198e694', 'last_name': 'OCONNOR ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'PA', 'zip': '19475-1763'}\n", - "{'classification': 'neutral'}\n", - "{'address': '15 W260 FILLMORE ST', 'city': 'ELMHURST ', 'classification': 'neutral', 'donor_id': '283c7a56-1298-4003-b4b3-e4519b6077b0', 'entity_type': 'Individual', 'first_name': 'EVELYN ', 'full_name': 'evelyn pape ', 'id': '283c7a56-1298-4003-b4b3-e4519b6077b0', 'last_name': 'PAPE ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'IL', 'zip': '60126-5349'}\n", - "{'classification': 'neutral'}\n", - "{'address': '16190 DOBBINS DR', 'city': 'ALBION ', 'classification': 'neutral', 'donor_id': '306d7309-ccc7-457e-a263-394b1143dacb', 'entity_type': 'Individual', 'first_name': 'STEPHANIE ', 'full_name': 'stephanie dobbins ', 'id': '306d7309-ccc7-457e-a263-394b1143dacb', 'last_name': 'DOBBINS ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '49224-9689'}\n", - "{'address': '3685 CREEKSIDE DRIVE', 'city': 'DORR ', 'classification': 'neutral', 'donor_id': '57069727-fd76-4630-9d36-b786d0992b4a', 'entity_type': 'Individual', 'first_name': 'ANNETTE ', 'full_name': 'annette magyar ', 'id': '57069727-fd76-4630-9d36-b786d0992b4a', 'last_name': 'MAGYAR ', 'recipient_id': '097002ca-1bbd-417a-bad9-9fd54887ebab', 'recipient_name': 'movement voter pac mi', 'state': 'MI', 'zip': '49323-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '13330 CAMINITO MAR VILLA', 'city': 'DEL MAR ', 'classification': 'neutral', 'donor_id': 'a4a903b8-a178-4fcc-ae7b-cd6852b447a0', 'entity_type': 'Individual', 'first_name': 'MICHAEL ', 'full_name': 'michael finley ', 'id': 'a4a903b8-a178-4fcc-ae7b-cd6852b447a0', 'last_name': 'FINLEY ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'CA', 'zip': '92014-3614'}\n", - "{'address': '52 PINE HILL RD', 'city': 'ASHLAND ', 'classification': 'neutral', 'donor_id': 'fd303393-0697-48f6-b704-bce3a6b36e04', 'entity_type': 'Individual', 'first_name': 'JANE ', 'full_name': 'jane malick-nugent ', 'id': 'fd303393-0697-48f6-b704-bce3a6b36e04', 'last_name': 'MALICK-NUGENT ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'MA', 'zip': '01721-1169'}\n", - "{'address': '3708 OMAHA', 'city': 'GRANDVILLE ', 'classification': 'neutral', 'donor_id': '2ac954cd-d5a2-4d94-b087-adb400d05d25', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary bristol ', 'id': '2ac954cd-d5a2-4d94-b087-adb400d05d25', 'last_name': 'BRISTOL ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'MI', 'zip': '49418-0000'}\n", - "{'address': '817 VERDALE DR', 'city': 'SPEARFISH ', 'classification': 'neutral', 'donor_id': '243d42aa-2d89-4df0-81c8-30b0eb2bb514', 'entity_type': 'Individual', 'first_name': 'TIARA ', 'full_name': 'tiara heckenlaible ', 'id': '243d42aa-2d89-4df0-81c8-30b0eb2bb514', 'last_name': 'HECKENLAIBLE ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'SD', 'zip': '57783-1636'}\n", - "{'address': '2954 BAY VILLAGE CIR APT 1074', 'city': 'SANTA ROSA ', 'classification': 'neutral', 'donor_id': '5fce81ac-a80a-4153-9893-a4f117312808', 'entity_type': 'Individual', 'first_name': 'JENNIFER ', 'full_name': 'jennifer ellis ', 'id': '5fce81ac-a80a-4153-9893-a4f117312808', 'last_name': 'ELLIS ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'CA', 'zip': '95403-2288'}\n", - "{'address': '12606 CEDAR CROSSINGS DR', 'city': 'CHARLOTTE ', 'classification': 'neutral', 'donor_id': '0b7ab244-7d09-40f6-9da9-04492dca4c59', 'entity_type': 'Individual', 'first_name': 'MARGARET ', 'full_name': 'margaret johnson ', 'id': '0b7ab244-7d09-40f6-9da9-04492dca4c59', 'last_name': 'JOHNSON ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'NC', 'zip': '28273-8868'}\n", - "{'address': '7730 BOHM RD', 'city': 'IMLAY CITY ', 'classification': 'neutral', 'donor_id': '519dfef0-05c0-4759-851a-8caa7f56ff1d', 'entity_type': 'Individual', 'first_name': 'BETTY ', 'full_name': 'betty burton ', 'id': '519dfef0-05c0-4759-851a-8caa7f56ff1d', 'last_name': 'BURTON ', 'recipient_id': '7e56adfa-c5e4-459d-b280-92a2c67e8602', 'recipient_name': 'lapeer county democratic party', 'state': 'MI', 'zip': '48444-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '107 MEYERS AVE', 'city': 'JACKSON ', 'classification': 'neutral', 'donor_id': '67ef676e-27a0-40d5-8f5c-9bfae6f80a88', 'entity_type': 'Individual', 'first_name': 'TERRY ', 'full_name': 'terry applegate ', 'id': '67ef676e-27a0-40d5-8f5c-9bfae6f80a88', 'last_name': 'APPLEGATE ', 'recipient_id': 'a9c205c4-6e86-465d-b9f8-55400317be37', 'recipient_name': 'sheet metal workers local 7 pac', 'state': 'MI', 'zip': '49203-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '4890 GARDENER RD.', 'city': 'METAMORIA ', 'classification': 'neutral', 'company': 'retired', 'donor_id': '31c2546b-6967-4625-8266-2ca498d7b0e1', 'entity_type': 'Individual', 'first_name': 'DIANE ', 'full_name': 'diane scott ', 'id': '31c2546b-6967-4625-8266-2ca498d7b0e1', 'last_name': 'SCOTT ', 'occupation': 'homemaker', 'recipient_id': '4a4659c5-77ec-4e8e-a171-48d9266cd78f', 'recipient_name': 'teamsters 406 political action committee', 'state': 'MI', 'zip': '48455-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '1033 N. PAULINA ST UNIT 1R', 'city': 'CHICAGO ', 'classification': 'neutral', 'company': 'capital area housing pship', 'donor_id': 'f2afa0d1-b1f9-4278-9df4-c5bf2c01c65b', 'entity_type': 'Individual', 'first_name': 'CURTIS ', 'full_name': 'curtis audette ', 'id': 'f2afa0d1-b1f9-4278-9df4-c5bf2c01c65b', 'last_name': 'AUDETTE ', 'occupation': 'marketing director', 'recipient_id': '9187a1f9-7b89-47cc-b136-04b272161da1', 'recipient_name': 'will snyder majority fund', 'state': 'IL', 'zip': '60622-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '1490 7TH ST NW APT 210', 'city': 'WASHINGTON ', 'classification': 'neutral', 'donor_id': '7ca75427-170a-4b3b-8e26-1fdd95e7590f', 'entity_type': 'Individual', 'first_name': 'RITA ', 'full_name': 'rita collins ', 'id': '7ca75427-170a-4b3b-8e26-1fdd95e7590f', 'last_name': 'COLLINS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'DC', 'zip': '20001-3389'}\n", - "{'address': '4358 FOXPOINTE DRIVE', 'city': 'WEST BLOOMFILED ', 'classification': 'neutral', 'donor_id': 'c2269438-d978-4732-a2c1-f2621514a1f1', 'entity_type': 'Individual', 'first_name': 'LAURA ', 'full_name': 'laura noveck ', 'id': 'c2269438-d978-4732-a2c1-f2621514a1f1', 'last_name': 'NOVECK ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '48323-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '83 ANCHOR DR', 'city': 'INDIAN HARBOUR BEACH', 'classification': 'neutral', 'donor_id': '2800af86-a826-4ee1-a2b3-3b8d454b229d', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james bangerter ', 'id': '2800af86-a826-4ee1-a2b3-3b8d454b229d', 'last_name': 'BANGERTER ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'FL', 'zip': '32937-3563'}\n", - "{'address': '1978 EDGEWOOD BLVD', 'city': 'BERKLEY ', 'classification': 'neutral', 'donor_id': 'b61a2f45-5a13-401a-b0c2-470368e45a95', 'entity_type': 'Individual', 'first_name': 'LISA ', 'full_name': 'lisa turner ', 'id': 'b61a2f45-5a13-401a-b0c2-470368e45a95', 'last_name': 'TURNER ', 'recipient_id': '116b2364-8dc9-4ec5-83ad-0f43db55c764', 'recipient_name': 'committee to elect natalie price', 'state': 'MI', 'zip': '48072-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '1791 WALLACE ST', 'city': 'SIMI VALLEY ', 'classification': 'neutral', 'company': 'county of ventura', 'donor_id': '05fbf8f2-14e5-468f-ac3c-6d38cb79aea2', 'entity_type': 'Individual', 'first_name': 'REBECCA ', 'full_name': 'rebecca albarran ', 'id': '05fbf8f2-14e5-468f-ac3c-6d38cb79aea2', 'last_name': 'ALBARRAN ', 'occupation': 'hs client benefit spec iv', 'recipient_id': '0cf71bd1-086d-433d-bebc-02a1976da5fc', 'recipient_name': 'michigan corrections organization political action committee', 'state': 'CA', 'zip': '93065-0000'}\n", - "{'classification': 'neutral'}\n", - "{'classification': 'neutral'}\n", - "{'address': '4375 ELMWOOD DR', 'city': 'OKEMOS ', 'classification': 'neutral', 'donor_id': '2a66be20-50a9-4c95-a836-7dcdf6f85c53', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary hardy ', 'id': '2a66be20-50a9-4c95-a836-7dcdf6f85c53', 'last_name': 'HARDY ', 'recipient_id': '3933a18f-92b6-4fb9-8ed9-a289ae65c09d', 'recipient_name': 'emily busch for state representative', 'state': 'MI', 'zip': '48864-0000'}\n", - "{'address': '9732 NW HENRY CT', 'city': 'PORTLAND ', 'classification': 'neutral', 'donor_id': '135321c7-d5f3-4496-8593-e3d92dc01b4f', 'entity_type': 'Individual', 'first_name': 'DAVID ', 'full_name': 'david evans ', 'id': '135321c7-d5f3-4496-8593-e3d92dc01b4f', 'last_name': 'EVANS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'OR', 'zip': '97229-8060'}\n", - "{'address': '6516 FOREST RIDGE DR', 'city': 'DURHAM ', 'classification': 'neutral', 'donor_id': '82f6e2a5-d1f6-40b1-ab48-b0ddd0d8b2ef', 'entity_type': 'Individual', 'first_name': 'VICTORIA ', 'full_name': 'victoria mathews ', 'id': '82f6e2a5-d1f6-40b1-ab48-b0ddd0d8b2ef', 'last_name': 'MATHEWS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'NC', 'zip': '27713-6743'}\n", - "{'address': '434 FRANKLIN ST APT 2', 'city': 'CAMBRIDGE ', 'classification': 'neutral', 'donor_id': '983946cd-bd5f-49de-8d7d-5c7e5fc187df', 'entity_type': 'Individual', 'first_name': 'ALISON ', 'full_name': 'alison gassett ', 'id': '983946cd-bd5f-49de-8d7d-5c7e5fc187df', 'last_name': 'GASSETT ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'MA', 'zip': '02139-3261'}\n", - "{'address': '401 S LAKESHORE BLVD 314', 'city': 'MARQUETTE ', 'classification': 'neutral', 'donor_id': '59835b92-ae12-4c63-bcf5-bc4c15f49a1a', 'entity_type': 'Individual', 'first_name': 'LISA ', 'full_name': 'lisa stasiuk ', 'id': '59835b92-ae12-4c63-bcf5-bc4c15f49a1a', 'last_name': 'STASIUK ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '49855-0000'}\n", - "{'address': '1398 PARKVIEW DR', 'city': 'NEW RICHMOND ', 'classification': 'neutral', 'donor_id': 'a20e56d4-b16a-48d9-a572-dd5c20afb4ed', 'entity_type': 'Individual', 'first_name': 'STEPHEN ', 'full_name': 'stephen tornio ', 'id': 'a20e56d4-b16a-48d9-a572-dd5c20afb4ed', 'last_name': 'TORNIO ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'WI', 'zip': '54017-2339'}\n", - "{'address': '17367 NORTHWOOD HWY', 'city': 'ARCADIA ', 'classification': 'neutral', 'donor_id': 'd228df64-4788-45fa-8fad-495f05058201', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary williams ', 'id': 'd228df64-4788-45fa-8fad-495f05058201', 'last_name': 'WILLIAMS ', 'recipient_id': '097002ca-1bbd-417a-bad9-9fd54887ebab', 'recipient_name': 'movement voter pac mi', 'state': 'MI', 'zip': '49613-0000'}\n", - "{'address': '2175 W 25TH ST', 'city': 'LOS ANGELES ', 'classification': 'neutral', 'donor_id': '16817b6c-6455-49e3-aec7-ae3a1100a96a', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james haley ', 'id': '16817b6c-6455-49e3-aec7-ae3a1100a96a', 'last_name': 'HALEY ', 'recipient_id': '0cf71bd1-086d-433d-bebc-02a1976da5fc', 'recipient_name': 'michigan corrections organization political action committee', 'state': 'CA', 'zip': '90018-0000'}\n", - "{'address': 'PO BOX 410', 'city': 'MENDOCINO ', 'classification': 'neutral', 'donor_id': 'f1e3260d-301f-4ea5-b503-e0455e3f0f10', 'entity_type': 'Individual', 'first_name': 'SUSAN ', 'full_name': 'susan keller ', 'id': 'f1e3260d-301f-4ea5-b503-e0455e3f0f10', 'last_name': 'KELLER ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'CA', 'zip': '95460-0410'}\n", - "{'address': '1460 E POND DR APT 14', 'city': 'OKEMOS ', 'classification': 'neutral', 'donor_id': 'a7d035e5-12cf-4e5a-8dc2-0d9552bc59d8', 'entity_type': 'Individual', 'first_name': 'RUSS ', 'full_name': 'russ kirkpatrick ', 'id': 'a7d035e5-12cf-4e5a-8dc2-0d9552bc59d8', 'last_name': 'KIRKPATRICK ', 'recipient_id': '520c9ce3-c702-4926-8688-750984ee6c0d', 'recipient_name': 'friends of sarah may seward', 'state': 'MI', 'zip': '48864-0000'}\n", - "{'address': '207 N. 5TH AVE. UNIT A', 'city': 'BARSTOW ', 'classification': 'neutral', 'donor_id': '7f16dd46-24ca-475c-9ee2-e5e49fe90048', 'entity_type': 'Individual', 'first_name': 'BRIDGET ', 'full_name': 'bridget breese ', 'id': '7f16dd46-24ca-475c-9ee2-e5e49fe90048', 'last_name': 'BREESE ', 'recipient_id': '0cf71bd1-086d-433d-bebc-02a1976da5fc', 'recipient_name': 'michigan corrections organization political action committee', 'state': 'CA', 'zip': '92311-0000'}\n", - "{'address': '1127 RANFIELD LANE', 'city': 'FLINT ', 'classification': 'neutral', 'donor_id': '4a2985a0-1033-49d7-bd6e-ff09983ed3b9', 'entity_type': 'Individual', 'first_name': 'DALE ', 'full_name': 'dale weighill ', 'id': '4a2985a0-1033-49d7-bd6e-ff09983ed3b9', 'last_name': 'WEIGHILL ', 'recipient_id': '7dbf96d7-7405-4f4e-8089-da6ecdf2197f', 'recipient_name': 'michigan community college association political action comm', 'state': 'MI', 'zip': '48532-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '2885 SLEEPING MEADOW LANE', 'city': 'MASON ', 'classification': 'neutral', 'company': 'consumers energy', 'donor_id': 'b8df5c77-6655-44d5-8efa-5a1cb02e0b7f', 'entity_type': 'Individual', 'first_name': 'BRIAN ', 'full_name': 'brian bushey ', 'id': 'b8df5c77-6655-44d5-8efa-5a1cb02e0b7f', 'last_name': 'BUSHEY ', 'occupation': 'dir egi analytics', 'recipient_id': '642c45b3-2610-4afe-a3b8-a611eaeb9e94', 'recipient_name': 'cms energy corp employees for better government', 'state': 'MI', 'zip': '48854-8709'}\n", - "{'classification': 'neutral'}\n", - "{'address': '1217 WHISPERING KNOLL LN', 'city': 'ROCHESTER HILLS ', 'classification': 'neutral', 'company': 'blue cross blue shield of mich', 'donor_id': 'c818757b-5305-45c8-b024-30244cc46d21', 'entity_type': 'Individual', 'first_name': 'KATHRYN ', 'full_name': 'kathryn antoski ^ ', 'id': 'c818757b-5305-45c8-b024-30244cc46d21', 'last_name': 'ANTOSKI ^ ', 'occupation': 'analyst - senior', 'recipient_id': '5a56136a-8ea1-4027-918f-be7d7a66c373', 'recipient_name': 'blue cross blue shield of michigan political action committee', 'state': 'MI', 'zip': '48306-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '4608 OAKRIDGE DR', 'city': 'MIDLAND ', 'classification': 'neutral', 'donor_id': '4c1803dc-2633-4432-9d19-005d82aedf68', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james allen ', 'id': '4c1803dc-2633-4432-9d19-005d82aedf68', 'last_name': 'ALLEN ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '48640-1914'}\n", - "{'address': '1919 CURTIS ST', 'city': 'BERKELEY ', 'classification': 'neutral', 'donor_id': '514931c3-da83-44dd-bc30-4fece766d85e', 'entity_type': 'Individual', 'first_name': 'JOAQUIN ', 'full_name': 'joaquin carbonell ', 'id': '514931c3-da83-44dd-bc30-4fece766d85e', 'last_name': 'CARBONELL ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'CA', 'zip': '94702-1648'}\n", - "{'address': '39842 GOLFVIEW DR.', 'city': 'NORTHVILLE ', 'classification': 'neutral', 'donor_id': '739bc866-c9cc-4360-ae52-9b15c22ca6b6', 'entity_type': 'Individual', 'first_name': 'DONALD ', 'full_name': 'donald gates ', 'id': '739bc866-c9cc-4360-ae52-9b15c22ca6b6', 'last_name': 'GATES ', 'recipient_id': 'e9e8bf7f-2d34-42c9-b155-b95481ca238f', 'recipient_name': 'committee to elect dave staudt', 'state': 'MI', 'zip': '48167-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '7300 KRAENZLEIN ROAD', 'city': 'BAY CITY ', 'classification': 'neutral', 'donor_id': '4ae0900b-eac4-4e41-b4a2-6727561db273', 'entity_type': 'Individual', 'first_name': 'JOAN ', 'full_name': 'joan wilson ', 'id': '4ae0900b-eac4-4e41-b4a2-6727561db273', 'last_name': 'WILSON ', 'recipient_id': 'c5bc157e-1eff-4db0-b26a-eea376cc3fd0', 'recipient_name': 'tamara d carlone for state board of education', 'state': 'MI', 'zip': '48706-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '753 PATRICIA PLACE DR', 'city': 'WESTLAND ', 'classification': 'neutral', 'company': 'blue cross blue shield of mich', 'donor_id': '184e5f13-aba5-44da-be09-572ac083b3e9', 'entity_type': 'Individual', 'first_name': 'SHUNDA ', 'full_name': 'shunda jones ^ ', 'id': '184e5f13-aba5-44da-be09-572ac083b3e9', 'last_name': 'JONES ^ ', 'occupation': 'manager - administrative', 'recipient_id': '5a56136a-8ea1-4027-918f-be7d7a66c373', 'recipient_name': 'blue cross blue shield of michigan political action committee', 'state': 'MI', 'zip': '48185-0000'}\n", - "{'address': '3830 33RD AVE SW UNIT A', 'city': 'SEATTLE ', 'classification': 'neutral', 'donor_id': '9a5a86bb-a480-42ad-913a-17f80efbfb86', 'entity_type': 'Individual', 'first_name': 'JAMES ', 'full_name': 'james sims ', 'id': '9a5a86bb-a480-42ad-913a-17f80efbfb86', 'last_name': 'SIMS ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'WA', 'zip': '98126-2514'}\n", - "{'address': '204 HURON ST', 'city': 'BAY CITY ', 'classification': 'neutral', 'donor_id': '298c73fa-495f-4df0-a348-16a62d6464ee', 'entity_type': 'Individual', 'first_name': 'MATHEWS ', 'full_name': 'mathews gavin ', 'id': '298c73fa-495f-4df0-a348-16a62d6464ee', 'last_name': 'GAVIN ', 'recipient_id': 'a9c205c4-6e86-465d-b9f8-55400317be37', 'recipient_name': 'sheet metal workers local 7 pac', 'state': 'MI', 'zip': '48706-4931'}\n", - "{'address': '740 HEWITT LN', 'city': 'NEW WINDSOR ', 'classification': 'neutral', 'donor_id': 'a41724c3-f42d-42a0-bc7d-8973c2e3a0c8', 'entity_type': 'Individual', 'first_name': 'MARY ', 'full_name': 'mary washburn ', 'id': 'a41724c3-f42d-42a0-bc7d-8973c2e3a0c8', 'last_name': 'WASHBURN ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'NY', 'zip': '12553-5462'}\n", - "{'address': '100 ROCKVIEW ST', 'city': 'JAMAICA PLAIN ', 'classification': 'neutral', 'donor_id': '1755fe5d-6210-4ecd-8075-de785b4a8a73', 'entity_type': 'Individual', 'first_name': 'TIMOTHY ', 'full_name': 'timothy havel ', 'id': '1755fe5d-6210-4ecd-8075-de785b4a8a73', 'last_name': 'HAVEL ', 'recipient_id': '097002ca-1bbd-417a-bad9-9fd54887ebab', 'recipient_name': 'movement voter pac mi', 'state': 'MA', 'zip': '02130-4660'}\n", - "{'address': '2260 POLISH LINE RD.', 'city': 'CHEBOYGAN ', 'classification': 'neutral', 'donor_id': '46b3649a-e403-4bd0-8ee2-d65a34d191f9', 'entity_type': 'Individual', 'first_name': 'STEVE ', 'full_name': 'steve downing ', 'id': '46b3649a-e403-4bd0-8ee2-d65a34d191f9', 'last_name': 'DOWNING ', 'recipient_id': 'b92fe9af-a5f5-4f15-8f35-d5536eb946eb', 'recipient_name': 'friends of marie fielder', 'state': 'MI', 'zip': '49721-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '10698 BEAR LAKE TRL', 'city': 'PORTAGE ', 'classification': 'neutral', 'donor_id': 'b0dafcd3-4ba2-4aa1-ac43-2298edc705e4', 'entity_type': 'Individual', 'first_name': 'MICHAEL ', 'full_name': 'michael anderson ', 'id': 'b0dafcd3-4ba2-4aa1-ac43-2298edc705e4', 'last_name': 'ANDERSON ', 'recipient_id': 'af8417ee-5bca-49f5-91e9-d2de65d73631', 'recipient_name': 'michigan senate democratic fund', 'state': 'MI', 'zip': '49024-6206'}\n", - "{'address': '150 MARINE AVE', 'city': 'BROOKLYN ', 'classification': 'neutral', 'donor_id': '58988e4c-4376-4fd7-8c13-10bc9fc65335', 'entity_type': 'Individual', 'first_name': 'PAMELA L ', 'full_name': 'pamela l landberg ', 'id': '58988e4c-4376-4fd7-8c13-10bc9fc65335', 'last_name': 'LANDBERG ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'NY', 'zip': '11209-7744'}\n", - "{'address': '1701 PORTER SW SUITE 6', 'city': 'WYOMING ', 'classification': 'neutral', 'company': 'self emp;oyed', 'donor_id': '3dfd0b64-eb59-4475-9abc-8be958bd8182', 'entity_type': 'Individual', 'first_name': 'DANIEL ', 'full_name': 'daniel hibma ', 'id': '3dfd0b64-eb59-4475-9abc-8be958bd8182', 'last_name': 'HIBMA ', 'occupation': 'property management', 'recipient_id': 'b4b49f06-2c4d-42e4-83e8-fc63c95fad04', 'recipient_name': 'committee to protect voters rights', 'state': 'MI', 'zip': '49519-0000'}\n", - "{'classification': 'neutral'}\n", - "{'address': '1501 BRIDGEWATER DR', 'city': 'MELBOURNE ', 'classification': 'neutral', 'donor_id': 'd71d895c-b18c-45ed-9a13-ec025564fedb', 'entity_type': 'Individual', 'first_name': 'JUDITH ', 'full_name': 'judith behrendt ', 'id': 'd71d895c-b18c-45ed-9a13-ec025564fedb', 'last_name': 'BEHRENDT ', 'recipient_id': '6126e78b-4e80-4361-a019-9d99aa1623ed', 'recipient_name': 'rooted in community leadership pac', 'state': 'FL', 'zip': '32934-3215'}\n" + "/home/alankagiri/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/algorithms/assortativity/correlation.py:282: RuntimeWarning: invalid value encountered in scalar divide\n", + " r = (t - s) / (1 - s)\n" ] } ], "source": [ - "matplot_G = create_network_nodes(grouped_sample.sample(50))\n", - "for v,d in matplot_G.nodes(data=True):\n", - " #print(u)\n", - " #print(v)\n", - " print(d)" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'red',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green',\n", - " 'green']" - ] - }, - "execution_count": 118, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#for a,b in G.nodes(data=True):\n", - " #print(G[node])#['classification'])\n", - "# print(b)#['classification'])\n", - "entity_colors = {'neutral': 'green', 'c':'blue', 'f':'red'}\n", - "node_colors = [entity_colors.get(G.nodes[node].get('classification', 'neutral'), 'green') for node in G.nodes()]\n", - "node_colors" + "import itertools\n", + "def create_network_graph(df: pd.DataFrame) -> nx.MultiDiGraph:\n", + " \"\"\"Takes in a dataframe and generates a MultiDiGraph where the nodes are\n", + " entity names, and the rest of the dataframe columns make the node attributes\n", + "\n", + " Args:\n", + " df: a pandas dataframe with merged information from the inds, orgs, &\n", + " transactions dataframes\n", + "\n", + " Returns:\n", + " A Networkx MultiDiGraph with nodes and edges\n", + " \"\"\"\n", + " G = nx.MultiDiGraph()\n", + " edge_columns = [\n", + " \"office_sought\",\n", + " \"purpose\",\n", + " \"transaction_type\",\n", + " \"year\",\n", + " \"transaction_id\",\n", + " \"donor_office\",\n", + " \"amount\",\n", + " ]\n", + "\n", + " for _, row in df.iterrows():\n", + " # add node attributes based on the columns relevant to the entity\n", + " G.add_node(\n", + " row[\"full_name\"],\n", + " **row[df.columns.difference(edge_columns)].dropna().to_dict(),\n", + " )\n", + " # add the recipient as a node\n", + " G.add_node(row[\"recipient_name\"], classification = \"neutral\")\n", + "\n", + " # add the edge attributes between two nodes\n", + " edge_attributes = row[edge_columns].dropna().to_dict()\n", + " G.add_edge(row[\"full_name\"], row[\"recipient_name\"], **edge_attributes)\n", + "\n", + " return G\n", + "\n", + "\n", + "sample = grouped_sample.sample(100)\n", + "G = create_network_graph(sample)\n", + "\n", + "def network_metrics(net_graph: nx.Graph) -> None:\n", + " \"\"\"Given a network graph, return a text files with list of nodes\n", + " with greatest calculated centrality\n", + " Args:\n", + " net_graph: network graph as defined by networkx\n", + " Returns:\n", + " a text file with list of nodes with greatest calculated\n", + " centrality for each metric: in degree, out degree,\n", + " eigenvector, and betweenness\n", + " \"\"\"\n", + " in_degree = nx.in_degree_centrality(net_graph) # calculates in degree centrality of nodes\n", + " out_degree = nx.out_degree_centrality(net_graph) # calculated out degree centrality of nodes\n", + " eigenvector = nx.eigenvector_centrality_numpy(net_graph, weight=\"amount\") # calculates eigenvector centrality of nodes\n", + " betweenness = nx.betweenness_centrality(net_graph, weight=\"amount\") # calculates betweenness centrality of nodes\n", + "\n", + " # sort + truncate dictionaries to 50 nodes with greatest centrality\n", + " in_degree = sorted(in_degree.items(), key=lambda x: x[1], reverse=True)[:50]\n", + " out_degree = sorted(out_degree.items(), key=lambda x: x[1], reverse=True)[:50]\n", + " eigenvector = sorted(eigenvector.items(), key=lambda x: x[1], reverse=True)[\n", + " :50\n", + " ]\n", + " betweenness = sorted(betweenness.items(), key=lambda x: x[1], reverse=True)[\n", + " :50\n", + " ]\n", + "\n", + " assortativity = nx.attribute_assortativity_coefficient(\n", + " net_graph, \"classification\"\n", + " ) # calculates assortativity of graph\n", + "\n", + " num_nodes = len(net_graph.nodes())\n", + " num_edges = len(net_graph.edges())\n", + " density = num_edges / (\n", + " num_nodes * (num_nodes - 1)\n", + " ) # calculates density of graph\n", + "\n", + " k = 5\n", + " comp = nx.community.girvan_newman(net_graph)\n", + " for communities in itertools.islice(comp, k):\n", + " communities = tuple(\n", + " sorted(c) for c in communities\n", + " ) # creates clusters of nodes with high interactions where granularity = 5\n", + "\n", + " with open(\"network_metrics.txt\", \"w\") as file:\n", + " file.write(f\"in degree centrality: {in_degree}\\n\")\n", + " file.write(f\"out degree centrality: {out_degree}\\n\")\n", + " file.write(f\"eigenvector centrality: {eigenvector}\\n\")\n", + " file.write(f\"betweenness centrality: {betweenness}\\n\\n\")\n", + "\n", + " file.write(f\"assortativity based on 'classification': {assortativity}\\n\")\n", + " file.write(f\"assortativity based on 'density': {density}\\n\")\n", + " file.write(f\"assortativity based on 'communities': {communities}\")\n", + "\n", + "\n", + "network_metrics(G)" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 31, "metadata": {}, "outputs": [ { "data": { - "image/png": "", - "text/plain": [ - "
" + "text/html": [ + " \n", + " " ] }, "metadata": {}, "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('William Stoner', 'KALAMAZOO ANESTHESIOLOGY PC', {'amount': 10.0, 'year': 2017})\n", - "('KALAMAZOO ANESTHESIOLOGY PC', 'Bob Kushman', {'amount': 1530})\n", - "('Bob Kushman', 'KALAMAZOO ANESTHESIOLOGY PC', {'amount': 530})\n", - "('James Engelson', 'Bob Kushman', {'amount': 90.0, 'year': 2019})\n", - "('Allen Wolf', 'William Stoner', {'amount': 111.5, 'year': 2018})\n", - "('Allen Wolf', 'William Stoner', {'amount': 11100.5, 'year': 2018})\n" - ] - } - ], - "source": [ - "G = nx.MultiDiGraph()\n", - " \n", - "G.add_node(\"William Stoner\", Age=10, Weight=110)\n", - "G.add_edge(\"William Stoner\",\"KALAMAZOO ANESTHESIOLOGY PC\",amount=10.00, year=2017)\n", - "G.add_node(\"KALAMAZOO ANESTHESIOLOGY PC\", Age=50, Weight=180)\n", - "G.add_edge(\"KALAMAZOO ANESTHESIOLOGY PC\",\"Bob Kushman\",amount=1530)\n", - "G.add_node(\"Bob Kushman\", Age=90, Weight=111)\n", - "G.add_edge(\"Bob Kushman\",\"KALAMAZOO ANESTHESIOLOGY PC\",amount=530)\n", - "G.add_node(\"James Engelson\", Age=40, Weight=10)\n", - "G.add_edge(\"James Engelson\",\"Bob Kushman\",amount=90.00, year=2019,)\n", - "G.add_node(\"Allen Wolf\", Age=30, Weight=1710)\n", - "G.add_edge(\"Allen Wolf\",\"William Stoner\",amount=111.50,year=2018)\n", - "G.add_edge(\"Allen Wolf\",\"William Stoner\",amount=11100.50,year=2018)\n", - "\n", - "\n", - "\n", - "edge_labels = {(u,v):d['amount'] for u,v,d in G.edges(data=True)}\n", - "nx.draw(G, with_labels=True,node_color='red')\n", - "pos = nx.planar_layout(G)\n", - "for edge, label in edge_labels.items():\n", - " nx.draw_networkx_edge_labels(G, pos=pos, edge_labels={edge: label}, label_pos=0.5, verticalalignment='center', horizontalalignment='center')\n", - "plt.show()\n", - "for edge in G.edges(data=True):\n", - " print(edge)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ { "data": { "application/vnd.plotly.v1+json": { @@ -1738,7782 +1564,721 @@ { "hoverinfo": "text", "hovertext": [ - "Amount: 10.00, Weight: 10.00", - "Amount: 1530.00, Weight: 1530.00", - "Amount: 530.00, Weight: 530.00", - "Amount: 90.00, Weight: 90.00", - "Amount: 111.50, Weight: 111.50" - ], - "line": { - "color": "#888" - }, - "mode": "lines", - "type": "scatter", - "x": [ - 10, - 50, - null, - 50, - 90, - null, - 90, - 50, - null, - 40, - 90, - null, - 30, - 10, - null - ], - "y": [ - 110, - 180, - null, - 180, - 111, - null, - 111, - 180, - null, - 10, - 111, - null, - 1710, - 110, - null - ] - }, - { - "hoverinfo": "text", - "marker": { - "colorscale": [ - [ - 0, - "rgb(255,255,217)" - ], - [ - 0.125, - "rgb(237,248,177)" - ], - [ - 0.25, - "rgb(199,233,180)" - ], - [ - 0.375, - "rgb(127,205,187)" - ], - [ - 0.5, - "rgb(65,182,196)" - ], - [ - 0.625, - "rgb(29,145,192)" - ], - [ - 0.75, - "rgb(34,94,168)" - ], - [ - 0.875, - "rgb(37,52,148)" - ], - [ - 1, - "rgb(8,29,88)" - ] - ], - "showscale": true, - "size": 10 - }, - "mode": "markers", - "text": [ - "William Stoner
Age: 10
Weight: 110", - "KALAMAZOO ANESTHESIOLOGY PC
Age: 50
Weight: 180", - "Bob Kushman
Age: 90
Weight: 111", - "James Engelson
Age: 40
Weight: 10", - "Allen Wolf
Age: 30
Weight: 1710" - ], - "type": "scatter", - "x": [ - 10, - 50, - 90, - 40, - 30 - ], - "y": [ - 110, - 180, - 111, - 10, - 1710 - ] - } - ], - "layout": { - "hovermode": "closest", - "margin": { - "b": 20, - "l": 5, - "r": 5, - "t": 40 - }, - "showlegend": false, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "font": { - "size": 16 - }, - "text": "
Network graph made with Plotly" - }, - "xaxis": { - "showgrid": false, - "showticklabels": false, - "zeroline": false - }, - "yaxis": { - "showgrid": false, - "showticklabels": false, - "zeroline": false - } - } - }, - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "G = nx.MultiDiGraph()\n", - "\n", - "G.add_node(\"William Stoner\", Age=10, Weight=110)\n", - "G.add_node(\"KALAMAZOO ANESTHESIOLOGY PC\", Age=50, Weight=180)\n", - "G.add_node(\"Bob Kushman\", Age=90, Weight=111)\n", - "G.add_node(\"James Engelson\", Age=40, Weight=10)\n", - "G.add_node(\"Allen Wolf\", Age=30, Weight=1710)\n", - "\n", - "G.add_edge(\"William Stoner\", \"KALAMAZOO ANESTHESIOLOGY PC\", weight=10.00, amount=10.00, year=2017)\n", - "G.add_edge(\"KALAMAZOO ANESTHESIOLOGY PC\", \"Bob Kushman\", weight=1530, amount=1530, year=2017)\n", - "G.add_edge(\"Bob Kushman\", \"KALAMAZOO ANESTHESIOLOGY PC\", weight=530, amount=530, year=2017)\n", - "G.add_edge(\"James Engelson\", \"Bob Kushman\", weight=90.00, amount=90.00, year=2017)\n", - "G.add_edge(\"Allen Wolf\", \"William Stoner\", weight=111.50, amount=111.50, year=2017)\n", - "\n", - "# Create Plotly graph\n", - "edge_trace = go.Scatter(x=[], y=[], line=dict(color='#888'), hoverinfo='text', mode='lines')\n", - "hovertext = []\n", - "\n", - "for edge in G.edges(data=True):\n", - " x0, y0 = G.nodes[edge[0]]['Age'], G.nodes[edge[0]]['Weight']\n", - " x1, y1 = G.nodes[edge[1]]['Age'], G.nodes[edge[1]]['Weight']\n", - " edge_trace['x'] += tuple([x0, x1, None])\n", - " edge_trace['y'] += tuple([y0, y1, None])\n", - " hovertext.append(f\"Amount: {edge[2]['amount']:.2f}, Weight: {edge[2]['weight']:.2f}\")\n", - "\n", - "edge_trace['hovertext'] = hovertext\n", - "\n", - "node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', size=10))\n", - "\n", - "for node in G.nodes():\n", - " x, y = G.nodes[node]['Age'], G.nodes[node]['Weight']\n", - " node_trace['x'] += tuple([x])\n", - " node_trace['y'] += tuple([y])\n", - " node_info = node + '
' + 'Age: ' + str(G.nodes[node]['Age']) + '
' + 'Weight: ' + str(G.nodes[node]['Weight'])\n", - " node_trace['text'] += tuple([node_info])\n", - "\n", - "fig = go.Figure(data=[edge_trace, node_trace],\n", - " layout=go.Layout(\n", - " title='
Network graph made with Plotly',\n", - " titlefont=dict(size=16),\n", - " showlegend=False,\n", - " hovermode='closest',\n", - " margin=dict(b=20,l=5,r=5,t=40),\n", - " xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n", - " yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))\n", - "\n", - "fig.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hoverinfo": "text", - "hovertext": [ - "Amount: 5.00", - "Amount: 100.00", - "Amount: 15.00", - "Amount: 151.76", - "Amount: 75.00", - "Amount: 11.12", - "Amount: 1.00", - "Amount: 1.00", - "Amount: 5.88", - "Amount: 250.00", - "Amount: 15.00", - "Amount: 273.00", - "Amount: 25.44", - "Amount: 100.00", - "Amount: 50.00", - "Amount: 400.00", - "Amount: 300.00", - "Amount: 1020.00", - "Amount: 100.00", - "Amount: 100.00", - "Amount: 5.00", - "Amount: 15.00", - "Amount: 100.00", - "Amount: 13.00", - "Amount: 750.00", - "Amount: 15.00", - "Amount: 500.00", - "Amount: 2.50", - "Amount: 1.00", - "Amount: 250.00", - "Amount: 35.00", - "Amount: 40.00", - "Amount: 9.29", - "Amount: 5.00", - "Amount: 19.00", - "Amount: 75.00", - "Amount: 25.15", - "Amount: 15.78", - "Amount: 1.00", - "Amount: 250.00", - "Amount: 1000.00", - "Amount: 2.87", - "Amount: 67.18", - "Amount: 150.00", - "Amount: 29.40", - "Amount: 1.00", - "Amount: 500.00", - "Amount: 60.00", - "Amount: 10.00", - "Amount: 76.32" - ], - "line": { - "color": "#888" - }, - "mode": "lines", - "type": "scatter", - "x": [], - "y": [] - }, - { - "hoverinfo": "text", - "marker": { - "color": [ - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green" - ], - "colorscale": [ - [ - 0, - "rgb(255,255,217)" - ], - [ - 0.125, - "rgb(237,248,177)" - ], - [ - 0.25, - "rgb(199,233,180)" - ], - [ - 0.375, - "rgb(127,205,187)" - ], - [ - 0.5, - "rgb(65,182,196)" - ], - [ - 0.625, - "rgb(29,145,192)" - ], - [ - 0.75, - "rgb(34,94,168)" - ], - [ - 0.875, - "rgb(37,52,148)" - ], - [ - 1, - "rgb(8,29,88)" - ] - ], - "showscale": true, - "size": 10 - }, - "mode": "markers", - "text": [ - "Name: rachel puthuff
donor_id: 639646bf-5176-474c-b800-1afb34c55b53
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rachel puthuff
recipient_name: reproductive freedom for all
address: 3717 WHITAKER
city: SCHERTZ
classification: neutral
entity_type: Individual
first_name: RACHEL
id: 639646bf-5176-474c-b800-1afb34c55b53
last_name: PUTHUFF
state: TX
zip: 78154-0000
", - "Name: reproductive freedom for all
classification: neutral
", - "Name: james bennett
donor_id: 447b61fb-39cc-41a9-8dfc-2dbb4e2f3774
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: james bennett
recipient_name: reproductive freedom for all
address: 533 W OAK ST
city: MASON
classification: neutral
entity_type: Individual
first_name: JAMES
id: 447b61fb-39cc-41a9-8dfc-2dbb4e2f3774
last_name: BENNETT
state: MI
zip: 48854-0000
", - "Name: sonny mandouh mr.^
donor_id: 34d28c8d-c0fe-463d-9afe-73269a47389b
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: sonny mandouh mr.^
recipient_name: realtors political action committee of michigan
address: 23760 HOLLANDER ST
city: DEARBORN
classification: neutral
entity_type: Individual
first_name: SONNY
id: 34d28c8d-c0fe-463d-9afe-73269a47389b
last_name: MANDOUH MR.^
state: MI
zip: 48128-0000
", - "Name: realtors political action committee of michigan
classification: neutral
", - "Name: charles crider
donor_id: e765ba37-66d2-4b65-9f42-3902dca518b6
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: charles crider
recipient_name: reproductive freedom for all
address: 1403 WEST HIGHLAND BLVD.
city: BATTLE CREEK
classification: neutral
entity_type: Individual
first_name: CHARLES
id: e765ba37-66d2-4b65-9f42-3902dca518b6
last_name: CRIDER
state: MI
zip: 49015-0000
", - "Name: michelle zukowski-serlin
donor_id: 5c0fe744-23e3-4346-b112-0730c6d4b60c
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: michelle zukowski-serlin
recipient_name: reproductive freedom for all
address: 4853 LANDING WAY
city: KALAMAZOO
classification: neutral
company: choices for change counseling
entity_type: Individual
first_name: MICHELLE
id: 5c0fe744-23e3-4346-b112-0730c6d4b60c
last_name: ZUKOWSKI-SERLIN
occupation: business owners and clinical s
state: MI
zip: 49048-6153
", - "Name: diana gibson-lee
donor_id: df25775c-dad2-4f56-8fcd-b31171a7dcb0
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: diana gibson-lee
recipient_name: veronica klinefelt for state senate
address: 7450 W DYER RD
city: TWINING
classification: neutral
entity_type: Individual
first_name: DIANA
id: df25775c-dad2-4f56-8fcd-b31171a7dcb0
last_name: GIBSON-LEE
state: MI
zip: 48766-9773
", - "Name: veronica klinefelt for state senate
classification: neutral
", - "Name: edward kazala
donor_id: 74b522f4-6214-42cd-9d68-7abfe3e18a07
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: edward kazala
recipient_name: elect padma kuppa
address: 70 REVERE CT
city: LAFAYETTE
classification: neutral
entity_type: Individual
first_name: EDWARD
id: 74b522f4-6214-42cd-9d68-7abfe3e18a07
last_name: KAZALA
state: CA
zip: 94549-0000
", - "Name: andrea kovalsky
donor_id: 3dc1360d-e9e8-4e55-ac2e-f608f489ab94
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: andrea kovalsky
recipient_name: veronica klinefelt for state senate
address: 497 SAINT MARKS AVE APT 5P
city: BROOKLYN
classification: neutral
entity_type: Individual
first_name: ANDREA
id: 3dc1360d-e9e8-4e55-ac2e-f608f489ab94
last_name: KOVALSKY
state: NY
zip: 11238-5792
", - "Name: colin palmer
donor_id: ad440dcd-79ad-4323-8f19-c7a491f897f7
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: colin palmer
recipient_name: veronica klinefelt for state senate
address: 531 E 20TH ST APT 10D
city: NEW YORK
classification: neutral
company: not employed
entity_type: Individual
first_name: COLIN
id: ad440dcd-79ad-4323-8f19-c7a491f897f7
last_name: PALMER
occupation: not employed
state: NY
zip: 10010-7604
", - "Name: julie svinicki ms.^
donor_id: 4cb88517-6bc4-45a1-ae2f-be0b76688898
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: julie svinicki ms.^
recipient_name: realtors political action committee of michigan
address: 1608 KIRTLAND DRIVE
city: ANN ARBOR
classification: neutral
entity_type: Individual
first_name: JULIE
id: 4cb88517-6bc4-45a1-ae2f-be0b76688898
last_name: SVINICKI MS.^
state: MI
zip: 48103-0000
", - "Name: audrey lance
donor_id: e8ef0925-3f10-4ebf-b025-dea32e506a50
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: audrey lance
recipient_name: reproductive freedom for all
address: 3945 FORBES AVE APT 444
city: PITTSBURGH
classification: neutral
entity_type: Individual
first_name: AUDREY
id: e8ef0925-3f10-4ebf-b025-dea32e506a50
last_name: LANCE
occupation: physician
state: PA
zip: 15213-0000
", - "Name: walker c evans
donor_id: 9853cee2-ff37-41bd-a469-0e338a4fefc9
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: walker c evans
recipient_name: reproductive freedom for all
address: 2810 NORTHVILLE DR NE
city: GRAND RAPIDS
classification: neutral
entity_type: Individual
first_name: WALKER C
id: 9853cee2-ff37-41bd-a469-0e338a4fefc9
last_name: EVANS
state: MI
zip: 49525-0000
", - "Name: lori henderson
donor_id: 3042129c-b91e-4d6a-b723-74cd7ec55e75
recipient_id: 6b51e739-dd22-4556-8555-6e11264ef4ce
full_name: lori henderson
recipient_name: planned parenthood advocates of mi
address: 2401 HARDWOOD AVE
city: ROYAK OAK
classification: neutral
entity_type: Individual
first_name: LORI
id: 3042129c-b91e-4d6a-b723-74cd7ec55e75
last_name: HENDERSON
state: MI
zip: 48067-0000
", - "Name: planned parenthood advocates of mi
classification: neutral
", - "Name: brett lundie
donor_id: 932450e5-f8fc-4cb2-baac-acfad686561f
recipient_id: 2f221dfb-d552-4234-83f8-cd05d10f1266
full_name: brett lundie
recipient_name: citizens to support mi women and children
address: 7779 CIRCLE DR
city: LAINGSBURG
classification: neutral
entity_type: Individual
first_name: BRETT
id: 932450e5-f8fc-4cb2-baac-acfad686561f
last_name: LUNDIE
state: MI
zip: 48848-0000
", - "Name: citizens to support mi women and children
classification: neutral
", - "Name: ian robinson
donor_id: 757923ec-02e3-424e-81b9-4152f6dd165b
recipient_id: 06ebbb03-574c-445b-9416-7d2134a06d1f
full_name: ian robinson
recipient_name: committee to elect james e johnson jr
address: 3435 BRENTWOOD CT
city: ANN ARBOR
classification: neutral
company: university of michigan
entity_type: Individual
first_name: IAN
id: 757923ec-02e3-424e-81b9-4152f6dd165b
last_name: ROBINSON
occupation: faculty
state: MI
zip: 48108-1757
", - "Name: committee to elect james e johnson jr
classification: neutral
", - "Name: kelly bean
donor_id: 8521781f-6ca7-43dc-90a6-c1af13da9e2a
recipient_id: 00a76143-0f24-4683-9963-09f10803e957
full_name: kelly bean
recipient_name: friends of jerry neyer
address: 1405 E BATTLE RD
city: ROSEBUSH
classification: neutral
entity_type: Individual
first_name: KELLY
id: 8521781f-6ca7-43dc-90a6-c1af13da9e2a
last_name: BEAN
state: MI
zip: 48878-9732
", - "Name: friends of jerry neyer
classification: neutral
", - "Name: sandra johnson
donor_id: 49bcd93b-241b-4343-8bbf-bcf70d828c8e
recipient_id: 7ee2db24-b832-4f1b-af2e-e9c8eaf706bd
full_name: sandra johnson
recipient_name: committee to elect charise anderson
address: 424 N 21ST ST 0
city: MONTEBELLO
classification: neutral
entity_type: Individual
first_name: SANDRA
id: 49bcd93b-241b-4343-8bbf-bcf70d828c8e
last_name: JOHNSON
occupation: eligibility worker
state: CA
zip: 90640-0000
", - "Name: committee to elect charise anderson
classification: neutral
", - "Name: christopher mishler
donor_id: 7b8ee884-4471-493d-bf17-386d57bf3f6d
recipient_id: 2f221dfb-d552-4234-83f8-cd05d10f1266
full_name: christopher mishler
recipient_name: citizens to support mi women and children
address: 3690 VORHIES ROAD
city: ANN ARBOR
classification: neutral
entity_type: Individual
first_name: CHRISTOPHER
id: 7b8ee884-4471-493d-bf17-386d57bf3f6d
last_name: MISHLER
state: MI
zip: 48105-0000
", - "Name: stacy leroy daniels
donor_id: 5a40e7db-bb2a-47f4-ac92-5584988c8a5e
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: stacy leroy daniels
recipient_name: bill g schuette for state representative
address: 3901 ORCHARD DRIVE
city: MIDLAND
classification: neutral
entity_type: Individual
first_name: STACY LEROY
id: 5a40e7db-bb2a-47f4-ac92-5584988c8a5e
last_name: DANIELS
state: MI
zip: 48640-0000
", - "Name: bill g schuette for state representative
classification: neutral
", - "Name: suzanne r weinheimer
donor_id: 029a23eb-d90f-405b-995c-c8dc266e255f
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: suzanne r weinheimer
recipient_name: reproductive freedom for all
address: 11045 8TH AVENUE NE APT 826
city: SEATTLE
classification: neutral
entity_type: Individual
first_name: SUZANNE R
id: 029a23eb-d90f-405b-995c-c8dc266e255f
last_name: WEINHEIMER
state: WA
zip: 98125-0000
", - "Name: dustin shaeffer mr.^
donor_id: fc041110-7c11-47af-b1bf-5daca974e4ee
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: dustin shaeffer mr.^
recipient_name: realtors political action committee of michigan
address: 60451 MOJAVE LANE
city: WASHINGTON
classification: neutral
entity_type: Individual
first_name: DUSTIN
id: fc041110-7c11-47af-b1bf-5daca974e4ee
last_name: SHAEFFER MR.^
state: MI
zip: 48094-0000
", - "Name: debra byl
donor_id: b8e9c951-5c8c-42d3-91e1-d6457b28f2ae
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: debra byl
recipient_name: reproductive freedom for all
address: 987 BRADFORD GREENS
city: GRAND RAPIDS
classification: neutral
entity_type: Individual
first_name: DEBRA
id: b8e9c951-5c8c-42d3-91e1-d6457b28f2ae
last_name: BYL
state: MI
zip: 49525-0000
", - "Name: pamela wimp
donor_id: 88ccb4d4-c756-4039-bac2-77a610d69bb0
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: pamela wimp
recipient_name: reproductive freedom for all
address: 8030 MERCER CT NE
city: LACEY
classification: neutral
entity_type: Individual
first_name: PAMELA
id: 88ccb4d4-c756-4039-bac2-77a610d69bb0
last_name: WIMP
state: WA
zip: 98516-6336
", - "Name: lori wortz
donor_id: 821a27dc-aa00-436e-80e2-655ce26bc830
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: lori wortz
recipient_name: bill g schuette for state representative
address: 4144 MERIDIAN RD
city: OKEMOS
classification: neutral
company: braenaru consulting
entity_type: Individual
first_name: LORI
id: 821a27dc-aa00-436e-80e2-655ce26bc830
last_name: WORTZ
occupation: consultant
state: MI
zip: 48864-0000
", - "Name: janet reid
donor_id: 25f2cb86-6d01-4fc2-9aaf-d276ce634a47
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: janet reid
recipient_name: reproductive freedom for all
address: 2378 EATON GATE RD
city: LAKE ORION
classification: neutral
entity_type: Individual
first_name: JANET
id: 25f2cb86-6d01-4fc2-9aaf-d276ce634a47
last_name: REID
state: MI
zip: 48360-1869
", - "Name: gary henderson
donor_id: 05a6c5c3-4a3f-41e0-a9d5-e54f33703d2d
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: gary henderson
recipient_name: bill g schuette for state representative
address: 1601 KINGSWOOD DRIVE
city: LANSING
classification: neutral
company: aircraft precision prod. inc.
entity_type: Individual
first_name: GARY
id: 05a6c5c3-4a3f-41e0-a9d5-e54f33703d2d
last_name: HENDERSON
occupation: sales purchasing manager
state: MI
zip: 48912-0000
", - "Name: claudette levesque
donor_id: 26d5e377-57c4-4f33-95ce-4209bff4242b
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: claudette levesque
recipient_name: reproductive freedom for all
address: 41 CATERPILLAR HILL RD
city: SARGENTVILLE
classification: neutral
entity_type: Individual
first_name: CLAUDETTE
id: 26d5e377-57c4-4f33-95ce-4209bff4242b
last_name: LEVESQUE
state: ME
zip: 04673-2464
", - "Name: graham chapman
donor_id: 8045638c-db65-4a13-9016-05e73766b5b1
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: graham chapman
recipient_name: reproductive freedom for all
address: 1914 CLINTON ST
city: LOS ANGELES
classification: neutral
entity_type: Individual
first_name: GRAHAM
id: 8045638c-db65-4a13-9016-05e73766b5b1
last_name: CHAPMAN
state: CA
zip: 90026-4137
", - "Name: john olson
donor_id: 1ff268c7-fbff-4f94-8810-48f31bb53681
recipient_id: 00a76143-0f24-4683-9963-09f10803e957
full_name: john olson
recipient_name: friends of jerry neyer
address: 6025 VERDE TRL S APT K217
city: BOCA RATON
classification: neutral
entity_type: Individual
first_name: JOHN
id: 1ff268c7-fbff-4f94-8810-48f31bb53681
last_name: OLSON
state: FL
zip: 33433-4442
", - "Name: christina ridalls ms.^
donor_id: 9bea8116-83a3-486a-a457-50c0f80af060
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: christina ridalls ms.^
recipient_name: realtors political action committee of michigan
address: 3083 BEATTIE RD
city: HOWELL
classification: neutral
entity_type: Individual
first_name: CHRISTINA
id: 9bea8116-83a3-486a-a457-50c0f80af060
last_name: RIDALLS MS.^
state: MI
zip: 48843-0000
", - "Name: dylynn mclean
donor_id: a1943974-4abe-4093-be0b-edcc56a97ffe
recipient_id: bbe89315-1939-46e3-a5c0-2d6e5b28bc95
full_name: dylynn mclean
recipient_name: 1st congressional dist rep comm
address: 1531 W 20 MILE RD
city: SAULT STE MARIE
classification: neutral
entity_type: Individual
first_name: DYLYNN
id: a1943974-4abe-4093-be0b-edcc56a97ffe
last_name: MCLEAN
state: MI
zip: 49783-0000
", - "Name: 1st congressional dist rep comm
classification: neutral
", - "Name: andrew morris
donor_id: 767c512a-9c5a-4230-90ab-3fd40d731f60
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: andrew morris
recipient_name: elect padma kuppa
address: 1118 MORNINGSIDE AVE
city: SCHENECTADY
classification: neutral
entity_type: Individual
first_name: ANDREW
id: 767c512a-9c5a-4230-90ab-3fd40d731f60
last_name: MORRIS
state: NY
zip: 12309-5630
", - "Name: elect padma kuppa
classification: neutral
", - "Name: martha scoppa
donor_id: 78fcc760-825f-404a-b058-a88a99992d98
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: martha scoppa
recipient_name: reproductive freedom for all
address: 32 COLD SPRING RD
city: LIBERTY
classification: neutral
entity_type: Individual
first_name: MARTHA
id: 78fcc760-825f-404a-b058-a88a99992d98
last_name: SCOPPA
state: NY
zip: 12754-0000
", - "Name: carol woodard
donor_id: d4ba0589-99d6-4455-a978-315395322208
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: carol woodard
recipient_name: reproductive freedom for all
address: 5143 SPRING MEADOWS
city: TROY
classification: neutral
entity_type: Individual
first_name: CAROL
id: d4ba0589-99d6-4455-a978-315395322208
last_name: WOODARD
state: MI
zip: 48098-0000
", - "Name: rochelle albright
donor_id: 87b3feed-01a5-4cc8-82cd-cf9c78977534
recipient_id: e3294ecb-f6df-48a0-b3b4-7048a9c650a7
full_name: rochelle albright
recipient_name: michael detmer for state senate
address: 1840 GRAY RD
city: HOWELL
classification: neutral
entity_type: Individual
first_name: ROCHELLE
id: 87b3feed-01a5-4cc8-82cd-cf9c78977534
last_name: ALBRIGHT
state: MI
zip: 48843-0000
", - "Name: michael detmer for state senate
classification: neutral
", - "Name: richard mayfield
donor_id: 80ec6920-a933-4c3e-9487-74cbfe6716f7
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: richard mayfield
recipient_name: veronica klinefelt for state senate
address: 3221 GRISCHY LN
city: CINCINNATI
classification: neutral
entity_type: Individual
first_name: RICHARD
id: 80ec6920-a933-4c3e-9487-74cbfe6716f7
last_name: MAYFIELD
state: OH
zip: 45208-3109
", - "Name: charles risch
donor_id: 6b4b51e8-f105-4cc1-96f7-cec2d931e58f
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: charles risch
recipient_name: reproductive freedom for all
address: 300 S WACKER DR
city: CHICAGO
classification: neutral
entity_type: Individual
first_name: CHARLES
id: 6b4b51e8-f105-4cc1-96f7-cec2d931e58f
last_name: RISCH
state: IL
zip: 60606-6680
", - "Name: barbara miller
donor_id: 47043446-3b77-4a34-9d0d-a21786400d9b
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: barbara miller
recipient_name: veronica klinefelt for state senate
address: 820 W END AVE APT 6A
city: NEW YORK
classification: neutral
entity_type: Individual
first_name: BARBARA
id: 47043446-3b77-4a34-9d0d-a21786400d9b
last_name: MILLER
state: NY
zip: 10025-5330
", - "Name: kevin korpi
donor_id: 10f51417-a0e9-4a2c-8bdb-e5d045fcab08
recipient_id: 5f7c53e3-d1be-47a9-acc4-70828a8c7a69
full_name: kevin korpi
recipient_name: committee to elect ed mcbroom
address: 220 MAC AVE APT 418
city: EAST LANSING
classification: neutral
company: acuitas
entity_type: Individual
first_name: KEVIN
id: 10f51417-a0e9-4a2c-8bdb-e5d045fcab08
last_name: KORPI
occupation: lobbyist
state: MI
zip: 48823-0000
", - "Name: committee to elect ed mcbroom
classification: neutral
", - "Name: wayne miller
donor_id: 14208b99-1ecb-4b33-becf-c30882e9b302
recipient_id: f88fdd05-e3e4-4d51-8511-1ffd35965c8e
full_name: wayne miller
recipient_name: committee to elect jack richert
address: 27301 SCENIC HWY
city: FRANKLIN
classification: neutral
company: miller & tischler pc
entity_type: Individual
first_name: WAYNE
id: 14208b99-1ecb-4b33-becf-c30882e9b302
last_name: MILLER
occupation: attorney
state: MI
zip: 48025-0000
", - "Name: committee to elect jack richert
classification: neutral
", - "Name: mary soens
donor_id: 664b4540-8b50-44d3-8570-cb797a4859fe
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: mary soens
recipient_name: elect padma kuppa
address: 55 N HANCOCK ST
city: LEXINGTON
classification: neutral
entity_type: Individual
first_name: MARY
id: 664b4540-8b50-44d3-8570-cb797a4859fe
last_name: SOENS
state: MA
zip: 02420-0000
", - "Name: rebecca baskin
donor_id: 9eb92629-9f8e-4bb5-8dc3-373b56a7db3a
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rebecca baskin
recipient_name: reproductive freedom for all
address: 680 BERKSHIRE DR
city: SALINE
classification: neutral
entity_type: Individual
first_name: REBECCA
id: 9eb92629-9f8e-4bb5-8dc3-373b56a7db3a
last_name: BASKIN
state: MI
zip: 48176-1087
", - "Name: edward kaminski
donor_id: 5b4130f6-d8dd-4739-aa68-2fe81dd4532b
recipient_id: 76a600c1-7ead-437a-85ad-0cca7573393b
full_name: edward kaminski
recipient_name: friends of brian hosticka
address: 8765 LEHMAN RD
city: MONTAGUE
classification: neutral
entity_type: Individual
first_name: EDWARD
id: 5b4130f6-d8dd-4739-aa68-2fe81dd4532b
last_name: KAMINSKI
state: MI
zip: 49437-9326
", - "Name: friends of brian hosticka
classification: neutral
", - "Name: robert brown
donor_id: 766a34f7-1c8b-4635-a69c-0bff1bf155be
recipient_id: 2e8c9124-2258-45e3-a198-e8c1798c49f2
full_name: robert brown
recipient_name: monroe plumbers and pipe fitters local 671 pac fund
address: 1207 SANDHURST DR
city: TALLAHASSEE
classification: neutral
entity_type: Individual
first_name: ROBERT
id: 766a34f7-1c8b-4635-a69c-0bff1bf155be
last_name: BROWN
state: FL
zip: 32312-2527
", - "Name: monroe plumbers and pipe fitters local 671 pac fund
classification: neutral
", - "Name: sandra braddock
donor_id: e42e7230-02f0-4b28-ba39-7b68e796d510
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: sandra braddock
recipient_name: reproductive freedom for all
address: 20087 EDGEWATER DRIVE
city: CANYON COUNTRY
classification: neutral
entity_type: Individual
first_name: SANDRA
id: e42e7230-02f0-4b28-ba39-7b68e796d510
last_name: BRADDOCK
state: CA
zip: 91351-0000
", - "Name: dana fortier
donor_id: 74b93106-3c9f-4f36-b52e-36143e97e7ce
recipient_id: 159692de-135a-45bd-8889-1ab1882ed54c
full_name: dana fortier
recipient_name: committee to elect vicki barnett to state senate
address: 23861 W LEBOST
city: NOVI
classification: neutral
entity_type: Individual
first_name: DANA
id: 74b93106-3c9f-4f36-b52e-36143e97e7ce
last_name: FORTIER
state: MI
zip: 48375-0000
", - "Name: committee to elect vicki barnett to state senate
classification: neutral
", - "Name: rachel geiersbach
donor_id: 40d2d39f-f21b-4130-8d7b-47ca810c9aa9
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rachel geiersbach
recipient_name: reproductive freedom for all
address: 3412 OLD KAWKAWLIN RD
city: BAY CITY
classification: neutral
entity_type: Individual
first_name: RACHEL
id: 40d2d39f-f21b-4130-8d7b-47ca810c9aa9
last_name: GEIERSBACH
state: MI
zip: 48706-0000
", - "Name: matthew burgess
donor_id: de98dec5-b8d3-4701-a9dd-a254aca2c4cf
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: matthew burgess
recipient_name: reproductive freedom for all
address: 8823 SPECTRUM CENTER BLVD 2313
city: SAN DIEGO
classification: neutral
entity_type: Individual
first_name: MATTHEW
id: de98dec5-b8d3-4701-a9dd-a254aca2c4cf
last_name: BURGESS
state: CA
zip: 92123-0000
", - "Name: teresa robertson
donor_id: dcf2b3a5-ddf4-4027-8a75-4477893854ff
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: teresa robertson
recipient_name: reproductive freedom for all
address: 7101 RIVER GLEN DR SE
city: CALEDONIA
classification: neutral
entity_type: Individual
first_name: TERESA
id: dcf2b3a5-ddf4-4027-8a75-4477893854ff
last_name: ROBERTSON
state: MI
zip: 49316-8136
" - ], - "type": "scatter", - "x": [], - "y": [] - } - ], - "layout": { - "hovermode": "closest", - "margin": { - "b": 20, - "l": 5, - "r": 5, - "t": 40 - }, - "showlegend": true, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "font": { - "size": 16 - }, - "text": "Network Graph Indicating Campaign Contributions from 2018-2022" - }, - "xaxis": { - "showgrid": true, - "showticklabels": false, - "zeroline": true - }, - "yaxis": { - "showgrid": true, - "showticklabels": false, - "zeroline": true - } - } - }, - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph:\n", - " G = nx.MultiDiGraph()\n", - " \n", - " # Define columns for edge attributes\n", - " edge_columns = ['amount', 'donor_office', 'office_sought', 'party', 'purpose', 'transaction_id', 'transaction_type', 'year']\n", - " # Define columns for node attributes\n", - " node_columns = ['donor_id', 'recipient_id', 'full_name', 'recipient_name', 'address', 'city', 'classification', 'company', 'donor_type', 'entity_type', 'first_name', 'id', 'last_name', 'occupation', 'recipient_type', 'state', 'zip']\n", - " \n", - " for _, row in df.iterrows(): \n", - " # Add nodes\n", - " G.add_node(row['full_name'], **row[node_columns].dropna().to_dict())\n", - " G.add_node(row['recipient_name'], classification='neutral') # Adding recipient nodes with default classification\n", - "\n", - " # Add edges\n", - " edge_attributes = row[edge_columns].dropna().to_dict()\n", - " G.add_edge(row['full_name'], row['recipient_name'], **edge_attributes)\n", - " \n", - " return G\n", - "\n", - "def plot_network_graph(G: nx.MultiDiGraph):\n", - " edge_trace = go.Scatter(x=[], y=[], line=dict(color='#888'), hoverinfo='text', mode='lines')\n", - " hovertext = []\n", - "\n", - " for edge in G.edges(data=True):\n", - " source = edge[0]\n", - " target = edge[1]\n", - " hovertext.append(f\"Amount: {edge[2]['amount']:.2f}\")\n", - "\n", - " edge_trace['hovertext'] = hovertext\n", - "\n", - " node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', size=10))\n", - " node_trace['marker']['color'] = []\n", - "\n", - " for node in G.nodes():\n", - " node_info = f\"Name: {node}
\"\n", - " for key, value in G.nodes[node].items():\n", - " node_info += f\"{key}: {value}
\"\n", - " node_trace['text'] += tuple([node_info])\n", - " # Get the classification value for the node\n", - " classification = G.nodes[node].get('classification', 'neutral')\n", - " # Assign a color based on the classification value\n", - " if classification == 'c':\n", - " color = 'blue'\n", - " elif classification == 'f':\n", - " color = 'red'\n", - " else:\n", - " color = 'green' # Default color for unknown classification\n", - " node_trace['marker']['color'] += tuple([color])\n", - "\n", - " # Define layout settings\n", - " layout = go.Layout(\n", - " title='Network Graph Indicating Campaign Contributions from 2018-2022',\n", - " titlefont=dict(size=16),\n", - " showlegend=True,\n", - " hovermode='closest',\n", - " margin=dict(b=20, l=5, r=5, t=40),\n", - " xaxis=dict(showgrid=True, zeroline=True, showticklabels=False),\n", - " yaxis=dict(showgrid=True, zeroline=True, showticklabels=False)\n", - " )\n", - "\n", - " fig = go.Figure(data=[edge_trace, node_trace], layout=layout)\n", - "\n", - " # Log information about the figure\n", - "\n", - " fig.show()\n", - "\n", - "sample = grouped_sample.sample(50)\n", - "plot_network_graph(create_network_nodes(sample))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hoverinfo": "none", - "line": { - "color": "#888", - "width": 0.5 - }, - "mode": "lines", - "type": "scatter", - "x": [ - 0.4182243125490408, - 0.3740122792611037, - null, - 0.4182243125490408, - 0.37848025459696877, - null, - 0.4182243125490408, - 0.3821391536049519, - null, - 0.4182243125490408, - 0.31305791514229697, - null, - 0.4182243125490408, - 0.3246624829381992, - null, - 0.4182243125490408, - 0.33203393677870674, - null, - 0.4182243125490408, - 0.4404718698088387, - null, - 0.4182243125490408, - 0.3393815448042514, - null, - 0.4182243125490408, - 0.32444561774289593, - null, - 0.4182243125490408, - 0.33721825060791266, - null, - 0.4182243125490408, - 0.5201251204037126, - null, - 0.12286879065958844, - 0.23992481624351925, - null, - 0.12286879065958844, - 0.09276814106220677, - null, - 0.12286879065958844, - 0.07426685281627932, - null, - 0.12286879065958844, - 0.09471702229050472, - null, - 0.12286879065958844, - 0.06879886671193436, - null, - 0.12286879065958844, - 0.1823584228427031, - null, - 0.12286879065958844, - 0.19852054651169693, - null, - 0.12286879065958844, - 0.13747604708068628, - null, - 0.12286879065958844, - 0.22007362873840486, - null, - 0.12286879065958844, - 0.13940667248499528, - null, - 0.12286879065958844, - 0.0201693226965588, - null, - 0.12286879065958844, - 0.16862303760247477, - null, - 0.12286879065958844, - 0.12355952994556385, - null, - 0.12286879065958844, - 0.04781523934390508, - null, - 0.6730431696885844, - 0.6013564651959642, - null, - 0.6730431696885844, - 0.662108954544855, - null, - 0.6730431696885844, - 0.7007214129943925, - null, - 0.6730431696885844, - 0.7188906153197968, - null, - 0.6730431696885844, - 0.7255980413609877, - null, - 0.6730431696885844, - 0.6802728591951641, - null, - 0.6730431696885844, - 0.7518492361353024, - null, - 0.38165116541180344, - 0.32578353530864457, - null, - 0.38165116541180344, - 0.413948124857326, - null, - 0.38165116541180344, - 0.44119458804978295, - null, - 0.38165116541180344, - 0.3328704753356456, - null, - 0.38165116541180344, - 0.3499260998923053, - null, - 0.38165116541180344, - 0.37301066653863624, - null, - 0.38165116541180344, - 0.4277213938753692, - null, - 0.38165116541180344, - 0.3247821296168134, - null, - 0.38165116541180344, - 0.3187675293980876, - null, - 0.38165116541180344, - 0.34114125407236195, - null, - 0.6084965344664286, - 0.5531504465254558, - null, - 0.6084965344664286, - 0.587704695878027, - null, - 0.6084965344664286, - 0.5593951498649633, - null, - 0.6084965344664286, - 0.5845953849421676, - null, - 0.6084965344664286, - 0.6058132814274794, - null, - 0.6084965344664286, - 0.6322124026692795, - null, - 0.6084965344664286, - 0.5201251204037126, - null, - 0.18155558675901884, - 0.2742000416622462, - null, - 0.18155558675901884, - 0.15570283642495664, - null, - 0.18155558675901884, - 0.19921682827804632, - null, - 0.18155558675901884, - 0.2955343345493908, - null, - 0.18155558675901884, - 0.298647499376007, - null, - 0.18155558675901884, - 0.0914406510425998, - null, - 0.18155558675901884, - 0.0875467755337247, - null, - 0.18155558675901884, - 0.08997327822205015, - null, - 0.18155558675901884, - 0.25656414507004344, - null, - 0.18155558675901884, - 0.20133087739958255, - null, - 0.7722862313192606, - 0.7408684543182315, - null, - 0.7722862313192606, - 0.8385234321105272, - null, - 0.7722862313192606, - 0.7333209824474588, - null, - 0.5368181409256901, - 0.595945044435614, - null, - 0.5368181409256901, - 0.6327007577432437, - null, - 0.5368181409256901, - 0.526779936668903, - null, - 0.5368181409256901, - 0.5433115547736789, - null, - 0.5368181409256901, - 0.5274116361492907, - null, - 0.5368181409256901, - 0.555788147264811, - null, - 0.5368181409256901, - 0.5805679633404117, - null, - 0.5368181409256901, - 0.5989925957177575, - null, - 0.5368181409256901, - 0.48218022499136737, - null, - 0.5368181409256901, - 0.6058132814274794, - null, - 0.5368181409256901, - 0.47443124751760235, - null, - 0.5368181409256901, - 0.5291812256005789, - null, - 0.5368181409256901, - 0.5621062195646831, - null, - 0.5368181409256901, - 0.5465171974419871, - null, - 0.8304626469521129, - 0.8266354543284289, - null, - 0.8304626469521129, - 0.7247552078664479, - null, - 0.8304626469521129, - 0.7827775151390383, - null, - 0.8304626469521129, - 0.9082570345357789, - null, - 0.8304626469521129, - 0.916634041055854, - null, - 0.8304626469521129, - 0.8613129225222332, - null, - 0.8304626469521129, - 0.7703024251104211, - null, - 0.8304626469521129, - 0.9005048863870916, - null, - 0.8304626469521129, - 0.9240127894624793, - null, - 0.7924139234898422, - 0.800297854626628, - null, - 0.7924139234898422, - 0.7364515013041172, - null, - 0.7924139234898422, - 0.8589937476561325, - null, - 0.7924139234898422, - 0.8247840830312709, - null, - 0.7924139234898422, - 0.7948577020793985, - null, - 0.7924139234898422, - 0.7059759544943667, - null, - 0.7924139234898422, - 0.8846357375826375, - null, - 0.7924139234898422, - 0.8323549266756429, - null, - 0.8266354543284289, - 0.7247552078664479, - null, - 0.8266354543284289, - 0.7827775151390383, - null, - 0.8266354543284289, - 0.9082570345357789, - null, - 0.8266354543284289, - 0.7042334738295596, - null, - 0.8266354543284289, - 0.8613129225222332, - null, - 0.8266354543284289, - 0.7703024251104211, - null, - 0.8266354543284289, - 0.9240127894624793, - null, - 0.8266354543284289, - 0.8680862155815134, - null, - 0.4023039585223629, - 0.4611021425875542, - null, - 0.4023039585223629, - 0.44175944307536974, - null, - 0.4023039585223629, - 0.3318561006769827, - null, - 0.4023039585223629, - 0.4349682989231034, - null, - 0.4023039585223629, - 0.29978148854693865, - null, - 0.4023039585223629, - 0.4442228752887084, - null, - 0.5084198498293618, - 0.5436816885151938, - null, - 0.5084198498293618, - 0.5229468203255856, - null, - 0.5084198498293618, - 0.4611021425875542, - null, - 0.5084198498293618, - 0.44175944307536974, - null, - 0.5084198498293618, - 0.6234379896430121, - null, - 0.5084198498293618, - 0.4442228752887084, - null, - 0.23992481624351925, - 0.27440213390552737, - null, - 0.23992481624351925, - 0.2728250610713022, - null, - 0.23992481624351925, - 0.1823584228427031, - null, - 0.23992481624351925, - 0.19852054651169693, - null, - 0.23992481624351925, - 0.22007362873840486, - null, - 0.23992481624351925, - 0.13940667248499528, - null, - 0.23992481624351925, - 0.16862303760247477, - null, - 0.23992481624351925, - 0.12355952994556385, - null, - 0.2742000416622462, - 0.15570283642495664, - null, - 0.2742000416622462, - 0.32578353530864457, - null, - 0.2742000416622462, - 0.3740122792611037, - null, - 0.2742000416622462, - 0.2955343345493908, - null, - 0.2742000416622462, - 0.31305791514229697, - null, - 0.2742000416622462, - 0.298647499376007, - null, - 0.2742000416622462, - 0.3328704753356456, - null, - 0.2742000416622462, - 0.3499260998923053, - null, - 0.2742000416622462, - 0.3181124346701171, - null, - 0.2742000416622462, - 0.3247821296168134, - null, - 0.2742000416622462, - 0.25656414507004344, - null, - 0.2742000416622462, - 0.3187675293980876, - null, - 0.2742000416622462, - 0.20133087739958255, - null, - 0.2742000416622462, - 0.34114125407236195, - null, - 0.15570283642495664, - 0.07513674080757637, - null, - 0.15570283642495664, - 0.05512117222879742, - null, - 0.15570283642495664, - 0.05194805532761382, - null, - 0.15570283642495664, - 0.06202421257916635, - null, - 0.15570283642495664, - 0.09053866681881584, - null, - 0.15570283642495664, - 0.1573630170264504, - null, - 0.15570283642495664, - 0.0852382135963593, - null, - 0.15570283642495664, - 0.0875467755337247, - null, - 0.15570283642495664, - 0.08997327822205015, - null, - 0.15570283642495664, - 0.20133087739958255, - null, - 0.15570283642495664, - 0.038579501382332126, - null, - 0.07513674080757637, - 0.1130639188502468, - null, - 0.07513674080757637, - 0.05512117222879742, - null, - 0.07513674080757637, - 0.07163295816605642, - null, - 0.07513674080757637, - 0.06202421257916635, - null, - 0.07513674080757637, - 0.09053866681881584, - null, - 0.07513674080757637, - 0.1573630170264504, - null, - 0.07513674080757637, - 0.0023771443647881974, - null, - 0.07513674080757637, - 0.0852382135963593, - null, - 0.07513674080757637, - 0.17086936775877049, - null, - 0.07513674080757637, - 0.0875467755337247, - null, - 0.07513674080757637, - 0.08997327822205015, - null, - 0.07513674080757637, - 0.020212382594376965, - null, - 0.07513674080757637, - 0.0897773631019545, - null, - 0.07513674080757637, - 0.038579501382332126, - null, - 0.7247552078664479, - 0.6327007577432437, - null, - 0.7247552078664479, - 0.662108954544855, - null, - 0.7247552078664479, - 0.7827775151390383, - null, - 0.7247552078664479, - 0.7007214129943925, - null, - 0.7247552078664479, - 0.7188906153197968, - null, - 0.7247552078664479, - 0.7042334738295596, - null, - 0.7247552078664479, - 0.7255980413609877, - null, - 0.7247552078664479, - 0.7703024251104211, - null, - 0.2586357176925591, - 0.3019474379086241, - null, - 0.2586357176925591, - 0.2121217358781844, - null, - 0.595945044435614, - 0.6327007577432437, - null, - 0.595945044435614, - 0.526779936668903, - null, - 0.595945044435614, - 0.662108954544855, - null, - 0.595945044435614, - 0.5433115547736789, - null, - 0.595945044435614, - 0.5274116361492907, - null, - 0.595945044435614, - 0.7042334738295596, - null, - 0.595945044435614, - 0.555788147264811, - null, - 0.595945044435614, - 0.5805679633404117, - null, - 0.595945044435614, - 0.5989925957177575, - null, - 0.595945044435614, - 0.6058132814274794, - null, - 0.595945044435614, - 0.5291812256005789, - null, - 0.595945044435614, - 0.5621062195646831, - null, - 0.9428542201780316, - 0.8511753697833563, - null, - 0.9428542201780316, - 0.89080246263295, - null, - 0.9428542201780316, - 0.9521646983336837, - null, - 0.9428542201780316, - 0.9663892923019699, - null, - 0.9428542201780316, - 0.9425745666137786, - null, - 0.9428542201780316, - 0.9851894520572745, - null, - 0.9428542201780316, - 0.9573079778783831, - null, - 0.9428542201780316, - 0.9473667691929577, - null, - 0.9428542201780316, - 0.838803404513024, - null, - 0.03304679952258993, - 0.05596958524873419, - null, - 0.03304679952258993, - 0.014269300880037306, - null, - 0.6013564651959642, - 0.662108954544855, - null, - 0.6013564651959642, - 0.7007214129943925, - null, - 0.6013564651959642, - 0.7188906153197968, - null, - 0.6013564651959642, - 0.555788147264811, - null, - 0.6013564651959642, - 0.5293212253918783, - null, - 0.6013564651959642, - 0.5291812256005789, - null, - 0.6013564651959642, - 0.5191285820034173, - null, - 0.6013564651959642, - 0.5465171974419871, - null, - 0.1130639188502468, - 0.07163295816605642, - null, - 0.1130639188502468, - 0.09053866681881584, - null, - 0.1130639188502468, - 0.1573630170264504, - null, - 0.1130639188502468, - 0.13747604708068628, - null, - 0.1130639188502468, - 0.2275256207367028, - null, - 0.1130639188502468, - 0.18507593174525072, - null, - 0.1130639188502468, - 0.17086936775877049, - null, - 0.1130639188502468, - 0.0897773631019545, - null, - 0.5531504465254558, - 0.47055154706870017, - null, - 0.5531504465254558, - 0.5274116361492907, - null, - 0.5531504465254558, - 0.587704695878027, - null, - 0.5531504465254558, - 0.5989925957177575, - null, - 0.5531504465254558, - 0.5845953849421676, - null, - 0.5531504465254558, - 0.6058132814274794, - null, - 0.5531504465254558, - 0.4564806171162211, - null, - 0.5531504465254558, - 0.5201251204037126, - null, - 0.1635981270944994, - 0.19921682827804632, - null, - 0.1635981270944994, - 0.10310287300704979, - null, - 0.1635981270944994, - 0.05973078995013337, - null, - 0.1635981270944994, - 0.0914406510425998, - null, - 0.1635981270944994, - 0.14711158829428328, - null, - 0.1635981270944994, - 0.21535391032155426, - null, - 0.05512117222879742, - 0.07163295816605642, - null, - 0.05512117222879742, - 0.05194805532761382, - null, - 0.05512117222879742, - 0.06202421257916635, - null, - 0.05512117222879742, - 0.09053866681881584, - null, - 0.05512117222879742, - 0.0023771443647881974, - null, - 0.05512117222879742, - 0.0852382135963593, - null, - 0.05512117222879742, - 0.0875467755337247, - null, - 0.05512117222879742, - 0.08997327822205015, - null, - 0.05512117222879742, - 0.020212382594376965, - null, - 0.05512117222879742, - 0.02312833765025224, - null, - 0.05512117222879742, - 0.04237200971819888, - null, - 0.05512117222879742, - 0.038579501382332126, - null, - 0.05512117222879742, - 0.01777064460825195, - null, - 0.32578353530864457, - 0.413948124857326, - null, - 0.32578353530864457, - 0.3328704753356456, - null, - 0.32578353530864457, - 0.3499260998923053, - null, - 0.32578353530864457, - 0.37301066653863624, - null, - 0.32578353530864457, - 0.2619562675328274, - null, - 0.32578353530864457, - 0.4277213938753692, - null, - 0.32578353530864457, - 0.3247821296168134, - null, - 0.32578353530864457, - 0.3187675293980876, - null, - 0.32578353530864457, - 0.34114125407236195, - null, - 0.27440213390552737, - 0.2728250610713022, - null, - 0.27440213390552737, - 0.1823584228427031, - null, - 0.27440213390552737, - 0.19852054651169693, - null, - 0.27440213390552737, - 0.22007362873840486, - null, - 0.27440213390552737, - 0.37301066653863624, - null, - 0.27440213390552737, - 0.2275256207367028, - null, - 0.27440213390552737, - 0.2619562675328274, - null, - 0.27440213390552737, - 0.3414075728554137, - null, - 0.2728250610713022, - 0.1823584228427031, - null, - 0.2728250610713022, - 0.37549158943196925, - null, - 0.2728250610713022, - 0.22007362873840486, - null, - 0.2728250610713022, - 0.16862303760247477, - null, - 0.6346565064837861, - 0.7364515013041172, - null, - 0.6346565064837861, - 0.5436816885151938, - null, - 0.6346565064837861, - 0.5461279353327784, - null, - 0.6346565064837861, - 0.7059759544943667, - null, - 0.6346565064837861, - 0.6149491168624189, - null, - 0.6346565064837861, - 0.5593069337955722, - null, - 0.6327007577432437, - 0.662108954544855, - null, - 0.6327007577432437, - 0.5433115547736789, - null, - 0.6327007577432437, - 0.5274116361492907, - null, - 0.6327007577432437, - 0.7042334738295596, - null, - 0.6327007577432437, - 0.555788147264811, - null, - 0.6327007577432437, - 0.5805679633404117, - null, - 0.6327007577432437, - 0.5989925957177575, - null, - 0.6327007577432437, - 0.5845953849421676, - null, - 0.6327007577432437, - 0.6058132814274794, - null, - 0.6327007577432437, - 0.5621062195646831, - null, - 0.800297854626628, - 0.7364515013041172, - null, - 0.800297854626628, - 0.6953901849658966, - null, - 0.800297854626628, - 0.8589937476561325, - null, - 0.800297854626628, - 0.7204214783753378, - null, - 0.800297854626628, - 0.8247840830312709, - null, - 0.800297854626628, - 0.7948577020793985, - null, - 0.800297854626628, - 0.7059759544943667, - null, - 0.800297854626628, - 0.8846357375826375, - null, - 0.800297854626628, - 0.8323549266756429, - null, - 0.800297854626628, - 0.8505181106970376, - null, - 0.800297854626628, - 0.7607451357487841, - null, - 0.800297854626628, - 0.9110645875753355, - null, - 0.526779936668903, - 0.5433115547736789, - null, - 0.526779936668903, - 0.5274116361492907, - null, - 0.526779936668903, - 0.555788147264811, - null, - 0.526779936668903, - 0.5805679633404117, - null, - 0.526779936668903, - 0.48218022499136737, - null, - 0.526779936668903, - 0.5293212253918783, - null, - 0.526779936668903, - 0.47443124751760235, - null, - 0.526779936668903, - 0.5291812256005789, - null, - 0.526779936668903, - 0.5621062195646831, - null, - 0.526779936668903, - 0.5465171974419871, - null, - 0.413948124857326, - 0.44119458804978295, - null, - 0.413948124857326, - 0.3328704753356456, - null, - 0.413948124857326, - 0.3499260998923053, - null, - 0.413948124857326, - 0.47055154706870017, - null, - 0.413948124857326, - 0.5274116361492907, - null, - 0.413948124857326, - 0.4277213938753692, - null, - 0.413948124857326, - 0.3247821296168134, - null, - 0.413948124857326, - 0.4564806171162211, - null, - 0.413948124857326, - 0.3187675293980876, - null, - 0.413948124857326, - 0.34114125407236195, - null, - 0.09276814106220677, - 0.03187584930858911, - null, - 0.09276814106220677, - 0.07426685281627932, - null, - 0.09276814106220677, - 0.03446402354654854, - null, - 0.09276814106220677, - 0.06879886671193436, - null, - 0.09276814106220677, - 0.1823584228427031, - null, - 0.09276814106220677, - 0.13940667248499528, - null, - 0.09276814106220677, - 0.0201693226965588, - null, - 0.09276814106220677, - 0.16862303760247477, - null, - 0.09276814106220677, - 0.12355952994556385, - null, - 0.662108954544855, - 0.7827775151390383, - null, - 0.662108954544855, - 0.7007214129943925, - null, - 0.662108954544855, - 0.7188906153197968, - null, - 0.662108954544855, - 0.7042334738295596, - null, - 0.662108954544855, - 0.555788147264811, - null, - 0.662108954544855, - 0.5805679633404117, - null, - 0.662108954544855, - 0.7255980413609877, - null, - 0.662108954544855, - 0.7518492361353024, - null, - 0.07163295816605642, - 0.06202421257916635, - null, - 0.07163295816605642, - 0.09053866681881584, - null, - 0.07163295816605642, - 0.1573630170264504, - null, - 0.07163295816605642, - 0.0023771443647881974, - null, - 0.07163295816605642, - 0.0852382135963593, - null, - 0.07163295816605642, - 0.17086936775877049, - null, - 0.07163295816605642, - 0.0875467755337247, - null, - 0.07163295816605642, - 0.08997327822205015, - null, - 0.07163295816605642, - 0.020212382594376965, - null, - 0.07163295816605642, - 0.0897773631019545, - null, - 0.07163295816605642, - 0.038579501382332126, - null, - 0.44119458804978295, - 0.3740122792611037, - null, - 0.44119458804978295, - 0.3328704753356456, - null, - 0.44119458804978295, - 0.3499260998923053, - null, - 0.44119458804978295, - 0.47055154706870017, - null, - 0.44119458804978295, - 0.5274116361492907, - null, - 0.44119458804978295, - 0.4277213938753692, - null, - 0.44119458804978295, - 0.4564806171162211, - null, - 0.44119458804978295, - 0.34114125407236195, - null, - 0.7364515013041172, - 0.8247840830312709, - null, - 0.7364515013041172, - 0.7948577020793985, - null, - 0.7364515013041172, - 0.7059759544943667, - null, - 0.7364515013041172, - 0.8323549266756429, - null, - 0.7827775151390383, - 0.7007214129943925, - null, - 0.7827775151390383, - 0.7188906153197968, - null, - 0.7827775151390383, - 0.7042334738295596, - null, - 0.7827775151390383, - 0.8613129225222332, - null, - 0.7827775151390383, - 0.7255980413609877, - null, - 0.7827775151390383, - 0.7703024251104211, - null, - 0.7827775151390383, - 0.9005048863870916, - null, - 0.7827775151390383, - 0.7518492361353024, - null, - 0.9600359726880752, - 0.9998698320754983, - null, - 0.9600359726880752, - 0.9082570345357789, - null, - 0.9600359726880752, - 0.9503884723051484, - null, - 0.9600359726880752, - 0.916634041055854, - null, - 0.9600359726880752, - 0.8613129225222332, - null, - 0.9600359726880752, - 0.9005048863870916, - null, - 0.9600359726880752, - 0.9240127894624793, - null, - 0.9600359726880752, - 0.9636590456207981, - null, - 0.8511753697833563, - 0.89080246263295, - null, - 0.8511753697833563, - 0.9521646983336837, - null, - 0.8511753697833563, - 0.9663892923019699, - null, - 0.8511753697833563, - 0.9573079778783831, - null, - 0.8511753697833563, - 0.9473667691929577, - null, - 0.8511753697833563, - 0.838803404513024, - null, - 0.8511753697833563, - 0.7518492361353024, - null, - 0.05194805532761382, - 0.06202421257916635, - null, - 0.05194805532761382, - 0.0852382135963593, - null, - 0.05194805532761382, - 0.0914406510425998, - null, - 0.05194805532761382, - 0.0875467755337247, - null, - 0.05194805532761382, - 0.08997327822205015, - null, - 0.05194805532761382, - 0.020212382594376965, - null, - 0.05194805532761382, - 0.02312833765025224, - null, - 0.05194805532761382, - 0.04237200971819888, - null, - 0.05194805532761382, - 0.038579501382332126, - null, - 0.05194805532761382, - 0.01777064460825195, - null, - 0.03187584930858911, - 0.07426685281627932, - null, - 0.03187584930858911, - 0.03446402354654854, - null, - 0.03187584930858911, - 0.06879886671193436, - null, - 0.03187584930858911, - 0.13940667248499528, - null, - 0.03187584930858911, - 0.0201693226965588, - null, - 0.03187584930858911, - 0.12355952994556385, - null, - 0.07426685281627932, - 0.09471702229050472, - null, - 0.07426685281627932, - 0.06879886671193436, - null, - 0.07426685281627932, - 0.1823584228427031, - null, - 0.07426685281627932, - 0.13940667248499528, - null, - 0.07426685281627932, - 0.0201693226965588, - null, - 0.07426685281627932, - 0.16862303760247477, - null, - 0.07426685281627932, - 0.12355952994556385, - null, - 0.07426685281627932, - 0.04781523934390508, - null, - 0.5257999712304688, - 0.5593951498649633, - null, - 0.5257999712304688, - 0.4404718698088387, - null, - 0.5257999712304688, - 0.5201251204037126, - null, - 0.9998698320754983, - 0.9082570345357789, - null, - 0.9998698320754983, - 0.9636084967560627, - null, - 0.9998698320754983, - 0.9503884723051484, - null, - 0.9998698320754983, - 0.9240127894624793, - null, - 0.9998698320754983, - 0.9636590456207981, - null, - 0.09471702229050472, - 0.06879886671193436, - null, - 0.09471702229050472, - 0.1823584228427031, - null, - 0.09471702229050472, - 0.19852054651169693, - null, - 0.09471702229050472, - 0.13747604708068628, - null, - 0.09471702229050472, - 0.13940667248499528, - null, - 0.09471702229050472, - 0.0201693226965588, - null, - 0.09471702229050472, - 0.12355952994556385, - null, - 0.09471702229050472, - 0.0897773631019545, - null, - 0.09471702229050472, - 0.04781523934390508, - null, - 0.6953901849658966, - 0.7204214783753378, - null, - 0.6953901849658966, - 0.7181048560087516, - null, - 0.6953901849658966, - 0.7948577020793985, - null, - 0.6953901849658966, - 0.7059759544943667, - null, - 0.6953901849658966, - 0.6370268640561303, - null, - 0.6953901849658966, - 0.6149491168624189, - null, - 0.6953901849658966, - 0.7607451357487841, - null, - 0.6953901849658966, - 0.6234379896430121, - null, - 0.03446402354654854, - 0.05596958524873419, - null, - 0.03446402354654854, - 0.014269300880037306, - null, - 0.9082570345357789, - 0.9503884723051484, - null, - 0.9082570345357789, - 0.916634041055854, - null, - 0.9082570345357789, - 0.8613129225222332, - null, - 0.9082570345357789, - 0.9005048863870916, - null, - 0.9082570345357789, - 0.9240127894624793, - null, - 0.9082570345357789, - 0.9636590456207981, - null, - 0.3740122792611037, - 0.37848025459696877, - null, - 0.3740122792611037, - 0.3821391536049519, - null, - 0.3740122792611037, - 0.2955343345493908, - null, - 0.3740122792611037, - 0.31305791514229697, - null, - 0.3740122792611037, - 0.298647499376007, - null, - 0.3740122792611037, - 0.3246624829381992, - null, - 0.3740122792611037, - 0.3328704753356456, - null, - 0.3740122792611037, - 0.33203393677870674, - null, - 0.3740122792611037, - 0.3499260998923053, - null, - 0.3740122792611037, - 0.3181124346701171, - null, - 0.3740122792611037, - 0.47055154706870017, - null, - 0.3740122792611037, - 0.4277213938753692, - null, - 0.3740122792611037, - 0.25656414507004344, - null, - 0.3740122792611037, - 0.4564806171162211, - null, - 0.977854801698089, - 0.9162463356603696, - null, - 0.5436816885151938, - 0.5461279353327784, - null, - 0.5436816885151938, - 0.5229468203255856, - null, - 0.5436816885151938, - 0.4611021425875542, - null, - 0.5436816885151938, - 0.6149491168624189, - null, - 0.5436816885151938, - 0.4349682989231034, - null, - 0.5436816885151938, - 0.6234379896430121, - null, - 0.5436816885151938, - 0.4442228752887084, - null, - 0.5436816885151938, - 0.5593069337955722, - null, - 0.06202421257916635, - 0.09053866681881584, - null, - 0.06202421257916635, - 0.1573630170264504, - null, - 0.06202421257916635, - 0.0023771443647881974, - null, - 0.06202421257916635, - 0.0852382135963593, - null, - 0.06202421257916635, - 0.0875467755337247, - null, - 0.06202421257916635, - 0.08997327822205015, - null, - 0.06202421257916635, - 0.020212382594376965, - null, - 0.06202421257916635, - 0.02312833765025224, - null, - 0.06202421257916635, - 0.04237200971819888, - null, - 0.06202421257916635, - 0.038579501382332126, - null, - 0.06202421257916635, - 0.01777064460825195, - null, - 0.8589937476561325, - 0.8247840830312709, - null, - 0.8589937476561325, - 0.7948577020793985, - null, - 0.8589937476561325, - 0.9210876029743161, - null, - 0.8589937476561325, - 0.9694266665187994, - null, - 0.8589937476561325, - 0.8846357375826375, - null, - 0.8589937476561325, - 0.8323549266756429, - null, - 0.8589937476561325, - 0.8505181106970376, - null, - 0.8589937476561325, - 0.9110645875753355, - null, - 0.06879886671193436, - 0.1823584228427031, - null, - 0.06879886671193436, - 0.13940667248499528, - null, - 0.06879886671193436, - 0.0201693226965588, - null, - 0.06879886671193436, - 0.12355952994556385, - null, - 0.06879886671193436, - 0.04781523934390508, - null, - 0.19921682827804632, - 0.10310287300704979, - null, - 0.19921682827804632, - 0.0914406510425998, - null, - 0.19921682827804632, - 0.14711158829428328, - null, - 0.19921682827804632, - 0.21535391032155426, - null, - 0.19921682827804632, - 0.25656414507004344, - null, - 0.1823584228427031, - 0.19852054651169693, - null, - 0.1823584228427031, - 0.22007362873840486, - null, - 0.1823584228427031, - 0.13940667248499528, - null, - 0.1823584228427031, - 0.16862303760247477, - null, - 0.1823584228427031, - 0.12355952994556385, - null, - 0.37549158943196925, - 0.41808707877840445, - null, - 0.37549158943196925, - 0.42926818011737133, - null, - 0.37549158943196925, - 0.4363707938884992, - null, - 0.37549158943196925, - 0.42077304608666055, - null, - 0.5433115547736789, - 0.5274116361492907, - null, - 0.5433115547736789, - 0.555788147264811, - null, - 0.5433115547736789, - 0.5805679633404117, - null, - 0.5433115547736789, - 0.5989925957177575, - null, - 0.5433115547736789, - 0.48218022499136737, - null, - 0.5433115547736789, - 0.47443124751760235, - null, - 0.5433115547736789, - 0.5291812256005789, - null, - 0.5433115547736789, - 0.5621062195646831, - null, - 0.5433115547736789, - 0.5465171974419871, - null, - 0.37848025459696877, - 0.3821391536049519, - null, - 0.37848025459696877, - 0.31305791514229697, - null, - 0.37848025459696877, - 0.3246624829381992, - null, - 0.37848025459696877, - 0.33203393677870674, - null, - 0.37848025459696877, - 0.4404718698088387, - null, - 0.37848025459696877, - 0.3393815448042514, - null, - 0.37848025459696877, - 0.32444561774289593, - null, - 0.37848025459696877, - 0.33721825060791266, - null, - 0.3821391536049519, - 0.2955343345493908, - null, - 0.3821391536049519, - 0.31305791514229697, - null, - 0.3821391536049519, - 0.298647499376007, - null, - 0.3821391536049519, - 0.3246624829381992, - null, - 0.3821391536049519, - 0.33203393677870674, - null, - 0.3821391536049519, - 0.3499260998923053, - null, - 0.3821391536049519, - 0.3181124346701171, - null, - 0.3821391536049519, - 0.47055154706870017, - null, - 0.3821391536049519, - 0.4404718698088387, - null, - 0.3821391536049519, - 0.3393815448042514, - null, - 0.3821391536049519, - 0.32444561774289593, - null, - 0.3821391536049519, - 0.4564806171162211, - null, - 0.3821391536049519, - 0.33721825060791266, - null, - 0.7204214783753378, - 0.7181048560087516, - null, - 0.7204214783753378, - 0.7948577020793985, - null, - 0.7204214783753378, - 0.7059759544943667, - null, - 0.7204214783753378, - 0.6370268640561303, - null, - 0.7204214783753378, - 0.7607451357487841, - null, - 0.7204214783753378, - 0.6234379896430121, - null, - 0.2955343345493908, - 0.31305791514229697, - null, - 0.2955343345493908, - 0.298647499376007, - null, - 0.2955343345493908, - 0.3246624829381992, - null, - 0.2955343345493908, - 0.3328704753356456, - null, - 0.2955343345493908, - 0.33203393677870674, - null, - 0.2955343345493908, - 0.3499260998923053, - null, - 0.2955343345493908, - 0.3181124346701171, - null, - 0.2955343345493908, - 0.25656414507004344, - null, - 0.2955343345493908, - 0.32444561774289593, - null, - 0.09053866681881584, - 0.1573630170264504, - null, - 0.09053866681881584, - 0.0023771443647881974, - null, - 0.09053866681881584, - 0.0852382135963593, - null, - 0.09053866681881584, - 0.17086936775877049, - null, - 0.09053866681881584, - 0.0875467755337247, - null, - 0.09053866681881584, - 0.08997327822205015, - null, - 0.09053866681881584, - 0.020212382594376965, - null, - 0.09053866681881584, - 0.0897773631019545, - null, - 0.09053866681881584, - 0.02312833765025224, - null, - 0.09053866681881584, - 0.20133087739958255, - null, - 0.09053866681881584, - 0.038579501382332126, - null, - 0.09053866681881584, - 0.01777064460825195, - null, - 0.7181048560087516, - 0.6776948411821848, - null, - 0.7181048560087516, - 0.834199864808296, - null, - 0.7181048560087516, - 0.6370268640561303, - null, - 0.7181048560087516, - 0.6802728591951641, - null, - 0.7181048560087516, - 0.7607451357487841, - null, - 0.7181048560087516, - 0.6314926226168458, - null, - 0.10310287300704979, - 0.05973078995013337, - null, - 0.10310287300704979, - 0.0914406510425998, - null, - 0.10310287300704979, - 0.14711158829428328, - null, - 0.10310287300704979, - 0.21535391032155426, - null, - 0.10310287300704979, - 0.04237200971819888, - null, - 0.8247840830312709, - 0.7948577020793985, - null, - 0.8247840830312709, - 0.9210876029743161, - null, - 0.8247840830312709, - 0.7059759544943667, - null, - 0.8247840830312709, - 0.9186278106648778, - null, - 0.8247840830312709, - 0.8846357375826375, - null, - 0.8247840830312709, - 0.8323549266756429, - null, - 0.8247840830312709, - 0.8505181106970376, - null, - 0.8247840830312709, - 0.9110645875753355, - null, - 0.1573630170264504, - 0.2275256207367028, - null, - 0.1573630170264504, - 0.0852382135963593, - null, - 0.1573630170264504, - 0.18507593174525072, - null, - 0.1573630170264504, - 0.17086936775877049, - null, - 0.1573630170264504, - 0.0875467755337247, - null, - 0.1573630170264504, - 0.08997327822205015, - null, - 0.1573630170264504, - 0.0897773631019545, - null, - 0.1573630170264504, - 0.20133087739958255, - null, - 0.31305791514229697, - 0.298647499376007, - null, - 0.31305791514229697, - 0.3246624829381992, - null, - 0.31305791514229697, - 0.33203393677870674, - null, - 0.31305791514229697, - 0.3499260998923053, - null, - 0.31305791514229697, - 0.3181124346701171, - null, - 0.31305791514229697, - 0.3393815448042514, - null, - 0.31305791514229697, - 0.21535391032155426, - null, - 0.31305791514229697, - 0.25656414507004344, - null, - 0.31305791514229697, - 0.32444561774289593, - null, - 0.31305791514229697, - 0.33721825060791266, - null, - 0.298647499376007, - 0.3328704753356456, - null, - 0.298647499376007, - 0.33203393677870674, - null, - 0.298647499376007, - 0.3499260998923053, - null, - 0.298647499376007, - 0.3181124346701171, - null, - 0.298647499376007, - 0.3247821296168134, - null, - 0.298647499376007, - 0.25656414507004344, - null, - 0.298647499376007, - 0.3187675293980876, - null, - 0.298647499376007, - 0.20133087739958255, - null, - 0.298647499376007, - 0.34114125407236195, - null, - 0.3246624829381992, - 0.33203393677870674, - null, - 0.3246624829381992, - 0.4404718698088387, - null, - 0.3246624829381992, - 0.3393815448042514, - null, - 0.3246624829381992, - 0.21535391032155426, - null, - 0.3246624829381992, - 0.25656414507004344, - null, - 0.3246624829381992, - 0.32444561774289593, - null, - 0.3246624829381992, - 0.33721825060791266, - null, - 0.19852054651169693, - 0.13747604708068628, - null, - 0.19852054651169693, - 0.22007362873840486, - null, - 0.19852054651169693, - 0.2275256207367028, - null, - 0.19852054651169693, - 0.2619562675328274, - null, - 0.19852054651169693, - 0.18507593174525072, - null, - 0.3328704753356456, - 0.3499260998923053, - null, - 0.3328704753356456, - 0.3181124346701171, - null, - 0.3328704753356456, - 0.4277213938753692, - null, - 0.3328704753356456, - 0.3247821296168134, - null, - 0.3328704753356456, - 0.3187675293980876, - null, - 0.3328704753356456, - 0.34114125407236195, - null, - 0.33203393677870674, - 0.3181124346701171, - null, - 0.33203393677870674, - 0.4404718698088387, - null, - 0.33203393677870674, - 0.3393815448042514, - null, - 0.33203393677870674, - 0.21535391032155426, - null, - 0.33203393677870674, - 0.25656414507004344, - null, - 0.33203393677870674, - 0.32444561774289593, - null, - 0.33203393677870674, - 0.33721825060791266, - null, - 0.5461279353327784, - 0.5229468203255856, - null, - 0.5461279353327784, - 0.4611021425875542, - null, - 0.5461279353327784, - 0.6149491168624189, - null, - 0.5461279353327784, - 0.4349682989231034, - null, - 0.5461279353327784, - 0.5593069337955722, - null, - 0.9636084967560627, - 0.9503884723051484, - null, - 0.9636084967560627, - 0.9162463356603696, - null, - 0.9636084967560627, - 0.8541827253649632, - null, - 0.9636084967560627, - 0.9636590456207981, - null, - 0.9636084967560627, - 0.8680862155815134, - null, - 0.9636084967560627, - 0.8668565351624634, - null, - 0.9503884723051484, - 0.9240127894624793, - null, - 0.9503884723051484, - 0.9636590456207981, - null, - 0.9503884723051484, - 0.8680862155815134, - null, - 0.13747604708068628, - 0.22007362873840486, - null, - 0.13747604708068628, - 0.2275256207367028, - null, - 0.13747604708068628, - 0.18507593174525072, - null, - 0.13747604708068628, - 0.17086936775877049, - null, - 0.13747604708068628, - 0.0897773631019545, - null, - 0.13747604708068628, - 0.04781523934390508, - null, - 0.3499260998923053, - 0.3181124346701171, - null, - 0.3499260998923053, - 0.4277213938753692, - null, - 0.3499260998923053, - 0.3247821296168134, - null, - 0.3499260998923053, - 0.4564806171162211, - null, - 0.3499260998923053, - 0.3187675293980876, - null, - 0.3499260998923053, - 0.34114125407236195, - null, - 0.3181124346701171, - 0.3247821296168134, - null, - 0.3181124346701171, - 0.25656414507004344, - null, - 0.3181124346701171, - 0.3187675293980876, - null, - 0.89080246263295, - 0.9521646983336837, - null, - 0.89080246263295, - 0.834199864808296, - null, - 0.89080246263295, - 0.9663892923019699, - null, - 0.89080246263295, - 0.9425745666137786, - null, - 0.89080246263295, - 0.9851894520572745, - null, - 0.89080246263295, - 0.9573079778783831, - null, - 0.89080246263295, - 0.9473667691929577, - null, - 0.89080246263295, - 0.838803404513024, - null, - 0.9521646983336837, - 0.916634041055854, - null, - 0.9521646983336837, - 0.9663892923019699, - null, - 0.9521646983336837, - 0.9573079778783831, - null, - 0.9521646983336837, - 0.9473667691929577, - null, - 0.9521646983336837, - 0.9005048863870916, - null, - 0.6776948411821848, - 0.6802728591951641, - null, - 0.6776948411821848, - 0.6314926226168458, - null, - 0.0023771443647881974, - 0.0852382135963593, - null, - 0.0023771443647881974, - 0.0875467755337247, - null, - 0.0023771443647881974, - 0.020212382594376965, - null, - 0.0023771443647881974, - 0.0897773631019545, - null, - 0.0023771443647881974, - 0.038579501382332126, - null, - 0.7007214129943925, - 0.7188906153197968, - null, - 0.7007214129943925, - 0.7255980413609877, - null, - 0.7007214129943925, - 0.7518492361353024, - null, - 0.7188906153197968, - 0.7255980413609877, - null, - 0.7188906153197968, - 0.7518492361353024, - null, - 0.47055154706870017, - 0.5274116361492907, - null, - 0.47055154706870017, - 0.4277213938753692, - null, - 0.47055154706870017, - 0.5845953849421676, - null, - 0.47055154706870017, - 0.4564806171162211, - null, - 0.47055154706870017, - 0.5201251204037126, - null, - 0.19043749918150743, - 0.2121217358781844, - null, - 0.19043749918150743, - 0.29978148854693865, - null, - 0.5274116361492907, - 0.555788147264811, - null, - 0.5274116361492907, - 0.5805679633404117, - null, - 0.5274116361492907, - 0.5989925957177575, - null, - 0.5274116361492907, - 0.4277213938753692, - null, - 0.5274116361492907, - 0.5845953849421676, - null, - 0.5274116361492907, - 0.6058132814274794, - null, - 0.5274116361492907, - 0.5291812256005789, - null, - 0.5274116361492907, - 0.5621062195646831, - null, - 0.5274116361492907, - 0.4564806171162211, - null, - 0.9162463356603696, - 0.8385234321105272, - null, - 0.9162463356603696, - 0.8668565351624634, - null, - 0.7042334738295596, - 0.5989925957177575, - null, - 0.7042334738295596, - 0.7255980413609877, - null, - 0.7042334738295596, - 0.6058132814274794, - null, - 0.7042334738295596, - 0.7703024251104211, - null, - 0.555788147264811, - 0.5805679633404117, - null, - 0.555788147264811, - 0.5989925957177575, - null, - 0.555788147264811, - 0.48218022499136737, - null, - 0.555788147264811, - 0.5293212253918783, - null, - 0.555788147264811, - 0.47443124751760235, - null, - 0.555788147264811, - 0.5291812256005789, - null, - 0.555788147264811, - 0.5621062195646831, - null, - 0.555788147264811, - 0.5465171974419871, - null, - 0.5805679633404117, - 0.5989925957177575, - null, - 0.5805679633404117, - 0.5845953849421676, - null, - 0.5805679633404117, - 0.6058132814274794, - null, - 0.5805679633404117, - 0.5291812256005789, - null, - 0.5805679633404117, - 0.5621062195646831, - null, - 0.587704695878027, - 0.5593951498649633, - null, - 0.587704695878027, - 0.5845953849421676, - null, - 0.587704695878027, - 0.6058132814274794, - null, - 0.587704695878027, - 0.6322124026692795, - null, - 0.587704695878027, - 0.5201251204037126, - null, - 0.916634041055854, - 0.8613129225222332, - null, - 0.916634041055854, - 0.9005048863870916, - null, - 0.916634041055854, - 0.9240127894624793, - null, - 0.7948577020793985, - 0.7059759544943667, - null, - 0.7948577020793985, - 0.8846357375826375, - null, - 0.7948577020793985, - 0.8323549266756429, - null, - 0.7948577020793985, - 0.8505181106970376, - null, - 0.7948577020793985, - 0.7607451357487841, - null, - 0.7948577020793985, - 0.9110645875753355, - null, - 0.9210876029743161, - 0.9694266665187994, - null, - 0.9210876029743161, - 0.9186278106648778, - null, - 0.9210876029743161, - 0.8846357375826375, - null, - 0.9210876029743161, - 0.8323549266756429, - null, - 0.9210876029743161, - 0.9110645875753355, - null, - 0.9210876029743161, - 0.992283435751248, - null, - 0.834199864808296, - 0.9425745666137786, - null, - 0.834199864808296, - 0.8505181106970376, - null, - 0.834199864808296, - 0.7607451357487841, - null, - 0.5989925957177575, - 0.5845953849421676, - null, - 0.5989925957177575, - 0.6058132814274794, - null, - 0.5989925957177575, - 0.5621062195646831, - null, - 0.05973078995013337, - 0.0914406510425998, - null, - 0.05973078995013337, - 0.14711158829428328, - null, - 0.05973078995013337, - 0.04237200971819888, - null, - 0.5593951498649633, - 0.4404718698088387, - null, - 0.5593951498649633, - 0.6322124026692795, - null, - 0.5593951498649633, - 0.5201251204037126, - null, - 0.5229468203255856, - 0.4611021425875542, - null, - 0.5229468203255856, - 0.44175944307536974, - null, - 0.5229468203255856, - 0.6149491168624189, - null, - 0.5229468203255856, - 0.6234379896430121, - null, - 0.5229468203255856, - 0.4442228752887084, - null, - 0.5229468203255856, - 0.5593069337955722, - null, - 0.22007362873840486, - 0.2275256207367028, - null, - 0.22007362873840486, - 0.2619562675328274, - null, - 0.22007362873840486, - 0.18507593174525072, - null, - 0.22007362873840486, - 0.3414075728554137, - null, - 0.37301066653863624, - 0.2619562675328274, - null, - 0.37301066653863624, - 0.48218022499136737, - null, - 0.37301066653863624, - 0.47443124751760235, - null, - 0.37301066653863624, - 0.3414075728554137, - null, - 0.37301066653863624, - 0.42077304608666055, - null, - 0.37301066653863624, - 0.4039327719907384, - null, - 0.8613129225222332, - 0.7703024251104211, - null, - 0.8613129225222332, - 0.9005048863870916, - null, - 0.9663892923019699, - 0.9851894520572745, - null, - 0.9663892923019699, - 0.9573079778783831, - null, - 0.9663892923019699, - 0.9473667691929577, - null, - 0.2275256207367028, - 0.2619562675328274, - null, - 0.2275256207367028, - 0.18507593174525072, - null, - 0.2275256207367028, - 0.17086936775877049, - null, - 0.0852382135963593, - 0.0875467755337247, - null, - 0.0852382135963593, - 0.08997327822205015, - null, - 0.0852382135963593, - 0.020212382594376965, - null, - 0.0852382135963593, - 0.02312833765025224, - null, - 0.0852382135963593, - 0.20133087739958255, - null, - 0.0852382135963593, - 0.038579501382332126, - null, - 0.0852382135963593, - 0.01777064460825195, - null, - 0.0914406510425998, - 0.14711158829428328, - null, - 0.0914406510425998, - 0.02312833765025224, - null, - 0.0914406510425998, - 0.04237200971819888, - null, - 0.0914406510425998, - 0.01777064460825195, - null, - 0.9425745666137786, - 0.9851894520572745, - null, - 0.9425745666137786, - 0.9473667691929577, - null, - 0.3019474379086241, - 0.2121217358781844, - null, - 0.3019474379086241, - 0.3318561006769827, - null, - 0.3019474379086241, - 0.29978148854693865, - null, - 0.2619562675328274, - 0.3247821296168134, - null, - 0.2619562675328274, - 0.18507593174525072, - null, - 0.2619562675328274, - 0.17086936775877049, - null, - 0.2619562675328274, - 0.3414075728554137, - null, - 0.2619562675328274, - 0.3187675293980876, - null, - 0.2619562675328274, - 0.34114125407236195, - null, - 0.48218022499136737, - 0.5293212253918783, - null, - 0.48218022499136737, - 0.47443124751760235, - null, - 0.48218022499136737, - 0.5291812256005789, - null, - 0.48218022499136737, - 0.5465171974419871, - null, - 0.5293212253918783, - 0.47443124751760235, - null, - 0.5293212253918783, - 0.5291812256005789, - null, - 0.5293212253918783, - 0.5191285820034173, - null, - 0.5293212253918783, - 0.42077304608666055, - null, - 0.5293212253918783, - 0.5465171974419871, - null, - 0.41808707877840445, - 0.42926818011737133, - null, - 0.41808707877840445, - 0.44175944307536974, - null, - 0.41808707877840445, - 0.4363707938884992, - null, - 0.14711158829428328, - 0.21535391032155426, - null, - 0.42926818011737133, - 0.4363707938884992, - null, - 0.9694266665187994, - 0.9186278106648778, - null, - 0.9694266665187994, - 0.8846357375826375, - null, - 0.9694266665187994, - 0.9110645875753355, - null, - 0.9694266665187994, - 0.992283435751248, - null, - 0.4404718698088387, - 0.3393815448042514, - null, - 0.4404718698088387, - 0.32444561774289593, - null, - 0.4404718698088387, - 0.33721825060791266, - null, - 0.4404718698088387, - 0.5201251204037126, - null, - 0.4277213938753692, - 0.3247821296168134, - null, - 0.4277213938753692, - 0.4564806171162211, - null, - 0.4277213938753692, - 0.3187675293980876, - null, - 0.4277213938753692, - 0.34114125407236195, - null, - 0.7059759544943667, - 0.6149491168624189, - null, - 0.7059759544943667, - 0.6234379896430121, - null, - 0.4611021425875542, - 0.44175944307536974, - null, - 0.4611021425875542, - 0.4349682989231034, - null, - 0.4611021425875542, - 0.4442228752887084, - null, - 0.4611021425875542, - 0.5593069337955722, - null, - 0.13940667248499528, - 0.0201693226965588, - null, - 0.13940667248499528, - 0.16862303760247477, - null, - 0.13940667248499528, - 0.12355952994556385, - null, - 0.3393815448042514, - 0.21535391032155426, - null, - 0.3393815448042514, - 0.25656414507004344, - null, - 0.3393815448042514, - 0.32444561774289593, - null, - 0.3393815448042514, - 0.33721825060791266, - null, - 0.6370268640561303, - 0.7607451357487841, - null, - 0.6370268640561303, - 0.6234379896430121, - null, - 0.6370268640561303, - 0.6314926226168458, - null, - 0.9851894520572745, - 0.9573079778783831, - null, - 0.9851894520572745, - 0.9473667691929577, - null, - 0.3247821296168134, - 0.3187675293980876, - null, - 0.3247821296168134, - 0.34114125407236195, - null, - 0.9186278106648778, - 0.8846357375826375, - null, - 0.9186278106648778, - 0.8323549266756429, - null, - 0.9186278106648778, - 0.9110645875753355, - null, - 0.9186278106648778, - 0.992283435751248, - null, - 0.18507593174525072, - 0.17086936775877049, - null, - 0.18507593174525072, - 0.0897773631019545, - null, - 0.18507593174525072, - 0.20133087739958255, - null, - 0.5845953849421676, - 0.6058132814274794, - null, - 0.5845953849421676, - 0.5621062195646831, - null, - 0.44175944307536974, - 0.4442228752887084, - null, - 0.7255980413609877, - 0.7703024251104211, - null, - 0.7255980413609877, - 0.7518492361353024, - null, - 0.6058132814274794, - 0.5621062195646831, - null, - 0.47443124751760235, - 0.5291812256005789, - null, - 0.47443124751760235, - 0.5465171974419871, - null, - 0.9573079778783831, - 0.9473667691929577, - null, - 0.9573079778783831, - 0.838803404513024, - null, - 0.0201693226965588, - 0.12355952994556385, - null, - 0.0201693226965588, - 0.04781523934390508, - null, - 0.17086936775877049, - 0.0897773631019545, - null, - 0.17086936775877049, - 0.20133087739958255, - null, - 0.5291812256005789, - 0.5621062195646831, - null, - 0.5291812256005789, - 0.5465171974419871, - null, - 0.16862303760247477, - 0.12355952994556385, - null, - 0.8846357375826375, - 0.8323549266756429, - null, - 0.8846357375826375, - 0.8505181106970376, - null, - 0.8846357375826375, - 0.9110645875753355, - null, - 0.8846357375826375, - 0.992283435751248, - null, - 0.0875467755337247, - 0.08997327822205015, - null, - 0.0875467755337247, - 0.020212382594376965, - null, - 0.0875467755337247, - 0.02312833765025224, - null, - 0.0875467755337247, - 0.20133087739958255, - null, - 0.0875467755337247, - 0.038579501382332126, - null, - 0.0875467755337247, - 0.01777064460825195, - null, - 0.9473667691929577, - 0.838803404513024, - null, - 0.8541827253649632, - 0.8680862155815134, - null, - 0.8541827253649632, - 0.8668565351624634, - null, - 0.3414075728554137, - 0.42077304608666055, - null, - 0.3414075728554137, - 0.4039327719907384, - null, - 0.3318561006769827, - 0.4349682989231034, - null, - 0.3318561006769827, - 0.29978148854693865, - null, - 0.7408684543182315, - 0.8385234321105272, - null, - 0.7408684543182315, - 0.6322124026692795, - null, - 0.7408684543182315, - 0.7333209824474588, - null, - 0.6149491168624189, - 0.6234379896430121, - null, - 0.6149491168624189, - 0.5593069337955722, - null, - 0.12355952994556385, - 0.04781523934390508, - null, - 0.08997327822205015, - 0.020212382594376965, - null, - 0.08997327822205015, - 0.02312833765025224, - null, - 0.08997327822205015, - 0.20133087739958255, - null, - 0.08997327822205015, - 0.04237200971819888, - null, - 0.08997327822205015, - 0.038579501382332126, - null, - 0.08997327822205015, - 0.01777064460825195, - null, - 0.21535391032155426, - 0.25656414507004344, - null, - 0.21535391032155426, - 0.32444561774289593, - null, - 0.21535391032155426, - 0.33721825060791266, - null, - 0.8323549266756429, - 0.9110645875753355, - null, - 0.8385234321105272, - 0.8668565351624634, - null, - 0.8385234321105272, - 0.7333209824474588, - null, - 0.9240127894624793, - 0.9636590456207981, - null, - 0.9240127894624793, - 0.8680862155815134, - null, - 0.6802728591951641, - 0.6314926226168458, - null, - 0.25656414507004344, - 0.32444561774289593, - null, - 0.25656414507004344, - 0.33721825060791266, - null, - 0.020212382594376965, - 0.02312833765025224, - null, - 0.020212382594376965, - 0.038579501382332126, - null, - 0.020212382594376965, - 0.01777064460825195, - null, - 0.32444561774289593, - 0.33721825060791266, - null, - 0.838803404513024, - 0.7518492361353024, - null, - 0.6322124026692795, - 0.5201251204037126, - null, - 0.6322124026692795, - 0.7333209824474588, - null, - 0.8505181106970376, - 0.7607451357487841, - null, - 0.8505181106970376, - 0.9110645875753355, - null, - 0.0897773631019545, - 0.04781523934390508, - null, - 0.02312833765025224, - 0.04237200971819888, - null, - 0.02312833765025224, - 0.038579501382332126, - null, - 0.02312833765025224, - 0.01777064460825195, - null, - 0.05596958524873419, - 0.014269300880037306, - null, - 0.3187675293980876, - 0.20133087739958255, - null, - 0.3187675293980876, - 0.34114125407236195, - null, - 0.5191285820034173, - 0.42077304608666055, - null, - 0.5191285820034173, - 0.5465171974419871, - null, - 0.4349682989231034, - 0.4442228752887084, - null, - 0.9636590456207981, - 0.8680862155815134, - null, - 0.8680862155815134, - 0.8668565351624634, - null, - 0.6234379896430121, - 0.5593069337955722, - null, - 0.04237200971819888, - 0.038579501382332126, - null, - 0.04237200971819888, - 0.01777064460825195, - null, - 0.038579501382332126, - 0.01777064460825195, - null, - 0.9110645875753355, - 0.992283435751248, - null, - 0.42077304608666055, - 0.4039327719907384, - null - ], - "y": [ - 0.09053726824382247, - 0.17542400609184483, - null, - 0.09053726824382247, - 0.055894273053114896, - null, - 0.09053726824382247, - 0.14933184162295132, - null, - 0.09053726824382247, - 0.1278305132468397, - null, - 0.09053726824382247, - 0.0731473655342364, - null, - 0.09053726824382247, - 0.09533319097359638, - null, - 0.09053726824382247, - 0.055897802218322856, - null, - 0.09053726824382247, - 0.04153202488293273, - null, - 0.09053726824382247, - 0.06013197669987258, - null, - 0.09053726824382247, - 0.040563128366188694, - null, - 0.09053726824382247, - 0.09959517902538939, - null, - 0.571085214777101, - 0.5944498275635773, - null, - 0.571085214777101, - 0.6773365837969099, - null, - 0.571085214777101, - 0.6160873747407943, - null, - 0.571085214777101, - 0.5186581897030644, - null, - 0.571085214777101, - 0.5634679987017406, - null, - 0.571085214777101, - 0.6012106694454529, - null, - 0.571085214777101, - 0.4898861106787329, - null, - 0.571085214777101, - 0.45431497833000367, - null, - 0.571085214777101, - 0.5204579980957379, - null, - 0.571085214777101, - 0.6352288779182178, - null, - 0.571085214777101, - 0.5981086798045652, - null, - 0.571085214777101, - 0.6648266103848882, - null, - 0.571085214777101, - 0.6072525121642058, - null, - 0.571085214777101, - 0.5260776190209286, - null, - 0.5199666766946885, - 0.5219101415039136, - null, - 0.5199666766946885, - 0.4307004647175262, - null, - 0.5199666766946885, - 0.4834545718278357, - null, - 0.5199666766946885, - 0.4847615611240751, - null, - 0.5199666766946885, - 0.4318165589087314, - null, - 0.5199666766946885, - 0.6217058876501556, - null, - 0.5199666766946885, - 0.5097617399826666, - null, - 0.33766327379542094, - 0.33811323660241943, - null, - 0.33766327379542094, - 0.31304614249644347, - null, - 0.33766327379542094, - 0.2697998035002954, - null, - 0.33766327379542094, - 0.2695720924906413, - null, - 0.33766327379542094, - 0.24454670425362057, - null, - 0.33766327379542094, - 0.4500538798110242, - null, - 0.33766327379542094, - 0.2880647319459674, - null, - 0.33766327379542094, - 0.3202314429055858, - null, - 0.33766327379542094, - 0.3169605131706372, - null, - 0.33766327379542094, - 0.32345881810688737, - null, - 0.17196466768963936, - 0.2009582712064717, - null, - 0.17196466768963936, - 0.15069304516745607, - null, - 0.17196466768963936, - 0.06016942899581168, - null, - 0.17196466768963936, - 0.24013807075121119, - null, - 0.17196466768963936, - 0.2693681584998491, - null, - 0.17196466768963936, - 0.10059463740220753, - null, - 0.17196466768963936, - 0.09959517902538939, - null, - 0.17708608014427518, - 0.2373268562908326, - null, - 0.17708608014427518, - 0.23741932367240448, - null, - 0.17708608014427518, - 0.05938145280899054, - null, - 0.17708608014427518, - 0.17619771419691865, - null, - 0.17708608014427518, - 0.21532966919867302, - null, - 0.17708608014427518, - 0.10782775946098799, - null, - 0.17708608014427518, - 0.249116699886752, - null, - 0.17708608014427518, - 0.23700988477155205, - null, - 0.17708608014427518, - 0.1341994714416056, - null, - 0.17708608014427518, - 0.29050814087118004, - null, - 0.04649454781195783, - 0.07011604000159166, - null, - 0.04649454781195783, - 0.019989772968585173, - null, - 0.04649454781195783, - 0.038844634468288675, - null, - 0.37080565676900146, - 0.3648985367210805, - null, - 0.37080565676900146, - 0.3343459796676115, - null, - 0.37080565676900146, - 0.4208812619135248, - null, - 0.37080565676900146, - 0.3900960314334032, - null, - 0.37080565676900146, - 0.3098874271134545, - null, - 0.37080565676900146, - 0.4295667428124167, - null, - 0.37080565676900146, - 0.35350564895305514, - null, - 0.37080565676900146, - 0.31541428705224306, - null, - 0.37080565676900146, - 0.4467311570808764, - null, - 0.37080565676900146, - 0.2693681584998491, - null, - 0.37080565676900146, - 0.4421375373865315, - null, - 0.37080565676900146, - 0.42641694849778966, - null, - 0.37080565676900146, - 0.3333136626479075, - null, - 0.37080565676900146, - 0.4868902788925622, - null, - 0.3602866247185619, - 0.3061539627540061, - null, - 0.3602866247185619, - 0.3661437355856225, - null, - 0.3602866247185619, - 0.40557198035837094, - null, - 0.3602866247185619, - 0.3192831323823997, - null, - 0.3602866247185619, - 0.41535454584101794, - null, - 0.3602866247185619, - 0.40395348439090084, - null, - 0.3602866247185619, - 0.3340702546567942, - null, - 0.3602866247185619, - 0.4248880785102581, - null, - 0.3602866247185619, - 0.29119156039108685, - null, - 0.9483925173875926, - 0.8957623407464501, - null, - 0.9483925173875926, - 0.9727770125665405, - null, - 0.9483925173875926, - 0.8791466031622056, - null, - 0.9483925173875926, - 0.922341377568881, - null, - 0.9483925173875926, - 0.8821215709600496, - null, - 0.9483925173875926, - 0.9328536520894143, - null, - 0.9483925173875926, - 0.9344432405222354, - null, - 0.9483925173875926, - 0.9642772106357639, - null, - 0.3061539627540061, - 0.3661437355856225, - null, - 0.3061539627540061, - 0.40557198035837094, - null, - 0.3061539627540061, - 0.3192831323823997, - null, - 0.3061539627540061, - 0.32266487999330984, - null, - 0.3061539627540061, - 0.40395348439090084, - null, - 0.3061539627540061, - 0.3340702546567942, - null, - 0.3061539627540061, - 0.29119156039108685, - null, - 0.3061539627540061, - 0.19048093242734687, - null, - 0.9643804220706982, - 0.9298960866412943, - null, - 0.9643804220706982, - 0.8599268392047722, - null, - 0.9643804220706982, - 0.9435179236599912, - null, - 0.9643804220706982, - 0.9958360522915445, - null, - 0.9643804220706982, - 0.9756800437762957, - null, - 0.9643804220706982, - 0.8842114977564064, - null, - 0.8336885167043149, - 0.938767234846119, - null, - 0.8336885167043149, - 0.8867112408398291, - null, - 0.8336885167043149, - 0.9298960866412943, - null, - 0.8336885167043149, - 0.8599268392047722, - null, - 0.8336885167043149, - 0.8508124987550889, - null, - 0.8336885167043149, - 0.8842114977564064, - null, - 0.5944498275635773, - 0.5216765314868881, - null, - 0.5944498275635773, - 0.6001026871900049, - null, - 0.5944498275635773, - 0.6012106694454529, - null, - 0.5944498275635773, - 0.4898861106787329, - null, - 0.5944498275635773, - 0.5204579980957379, - null, - 0.5944498275635773, - 0.6352288779182178, - null, - 0.5944498275635773, - 0.6648266103848882, - null, - 0.5944498275635773, - 0.6072525121642058, - null, - 0.2373268562908326, - 0.23741932367240448, - null, - 0.2373268562908326, - 0.33811323660241943, - null, - 0.2373268562908326, - 0.17542400609184483, - null, - 0.2373268562908326, - 0.17619771419691865, - null, - 0.2373268562908326, - 0.1278305132468397, - null, - 0.2373268562908326, - 0.21532966919867302, - null, - 0.2373268562908326, - 0.2695720924906413, - null, - 0.2373268562908326, - 0.24454670425362057, - null, - 0.2373268562908326, - 0.20002447568886628, - null, - 0.2373268562908326, - 0.3202314429055858, - null, - 0.2373268562908326, - 0.1341994714416056, - null, - 0.2373268562908326, - 0.3169605131706372, - null, - 0.2373268562908326, - 0.29050814087118004, - null, - 0.2373268562908326, - 0.32345881810688737, - null, - 0.23741932367240448, - 0.32127102230894566, - null, - 0.23741932367240448, - 0.2381682330796122, - null, - 0.23741932367240448, - 0.17296378957033465, - null, - 0.23741932367240448, - 0.24033413659841596, - null, - 0.23741932367240448, - 0.2981410655965283, - null, - 0.23741932367240448, - 0.3199684158322815, - null, - 0.23741932367240448, - 0.2660491488293679, - null, - 0.23741932367240448, - 0.249116699886752, - null, - 0.23741932367240448, - 0.23700988477155205, - null, - 0.23741932367240448, - 0.29050814087118004, - null, - 0.23741932367240448, - 0.2318219208408404, - null, - 0.32127102230894566, - 0.39453602200590676, - null, - 0.32127102230894566, - 0.2381682330796122, - null, - 0.32127102230894566, - 0.3309683982450944, - null, - 0.32127102230894566, - 0.24033413659841596, - null, - 0.32127102230894566, - 0.2981410655965283, - null, - 0.32127102230894566, - 0.3199684158322815, - null, - 0.32127102230894566, - 0.3355480553373167, - null, - 0.32127102230894566, - 0.2660491488293679, - null, - 0.32127102230894566, - 0.3635517670405215, - null, - 0.32127102230894566, - 0.249116699886752, - null, - 0.32127102230894566, - 0.23700988477155205, - null, - 0.32127102230894566, - 0.28871122138225125, - null, - 0.32127102230894566, - 0.42203254876563234, - null, - 0.32127102230894566, - 0.2318219208408404, - null, - 0.3661437355856225, - 0.3343459796676115, - null, - 0.3661437355856225, - 0.4307004647175262, - null, - 0.3661437355856225, - 0.40557198035837094, - null, - 0.3661437355856225, - 0.4834545718278357, - null, - 0.3661437355856225, - 0.4847615611240751, - null, - 0.3661437355856225, - 0.32266487999330984, - null, - 0.3661437355856225, - 0.4318165589087314, - null, - 0.3661437355856225, - 0.3340702546567942, - null, - 0.7791505090281524, - 0.8520196094107113, - null, - 0.7791505090281524, - 0.8848427298858184, - null, - 0.3648985367210805, - 0.3343459796676115, - null, - 0.3648985367210805, - 0.4208812619135248, - null, - 0.3648985367210805, - 0.4307004647175262, - null, - 0.3648985367210805, - 0.3900960314334032, - null, - 0.3648985367210805, - 0.3098874271134545, - null, - 0.3648985367210805, - 0.32266487999330984, - null, - 0.3648985367210805, - 0.4295667428124167, - null, - 0.3648985367210805, - 0.35350564895305514, - null, - 0.3648985367210805, - 0.31541428705224306, - null, - 0.3648985367210805, - 0.2693681584998491, - null, - 0.3648985367210805, - 0.42641694849778966, - null, - 0.3648985367210805, - 0.3333136626479075, - null, - 0.6244837238804738, - 0.5850986908522726, - null, - 0.6244837238804738, - 0.6267294109959968, - null, - 0.6244837238804738, - 0.5221172076712435, - null, - 0.6244837238804738, - 0.5717872069066212, - null, - 0.6244837238804738, - 0.7302384542961842, - null, - 0.6244837238804738, - 0.6710484758334021, - null, - 0.6244837238804738, - 0.5492873750243871, - null, - 0.6244837238804738, - 0.6201266549140614, - null, - 0.6244837238804738, - 0.5752985482362863, - null, - 0.9012137046519791, - 0.8622415881936324, - null, - 0.9012137046519791, - 0.8350595230795331, - null, - 0.5219101415039136, - 0.4307004647175262, - null, - 0.5219101415039136, - 0.4834545718278357, - null, - 0.5219101415039136, - 0.4847615611240751, - null, - 0.5219101415039136, - 0.4295667428124167, - null, - 0.5219101415039136, - 0.5144551437666581, - null, - 0.5219101415039136, - 0.42641694849778966, - null, - 0.5219101415039136, - 0.6014235590484225, - null, - 0.5219101415039136, - 0.4868902788925622, - null, - 0.39453602200590676, - 0.3309683982450944, - null, - 0.39453602200590676, - 0.2981410655965283, - null, - 0.39453602200590676, - 0.3199684158322815, - null, - 0.39453602200590676, - 0.45431497833000367, - null, - 0.39453602200590676, - 0.42052616285893474, - null, - 0.39453602200590676, - 0.4107398412471005, - null, - 0.39453602200590676, - 0.3635517670405215, - null, - 0.39453602200590676, - 0.42203254876563234, - null, - 0.2009582712064717, - 0.20619722773579274, - null, - 0.2009582712064717, - 0.3098874271134545, - null, - 0.2009582712064717, - 0.15069304516745607, - null, - 0.2009582712064717, - 0.31541428705224306, - null, - 0.2009582712064717, - 0.24013807075121119, - null, - 0.2009582712064717, - 0.2693681584998491, - null, - 0.2009582712064717, - 0.22993075379681738, - null, - 0.2009582712064717, - 0.09959517902538939, - null, - 0.04224314617430658, - 0.05938145280899054, - null, - 0.04224314617430658, - 0.04283815208078323, - null, - 0.04224314617430658, - 0.010366221042083845, - null, - 0.04224314617430658, - 0.10782775946098799, - null, - 0.04224314617430658, - 0.03395115206665145, - null, - 0.04224314617430658, - 0.05477321631284726, - null, - 0.2381682330796122, - 0.3309683982450944, - null, - 0.2381682330796122, - 0.17296378957033465, - null, - 0.2381682330796122, - 0.24033413659841596, - null, - 0.2381682330796122, - 0.2981410655965283, - null, - 0.2381682330796122, - 0.3355480553373167, - null, - 0.2381682330796122, - 0.2660491488293679, - null, - 0.2381682330796122, - 0.249116699886752, - null, - 0.2381682330796122, - 0.23700988477155205, - null, - 0.2381682330796122, - 0.28871122138225125, - null, - 0.2381682330796122, - 0.2002886163837997, - null, - 0.2381682330796122, - 0.13201947050262697, - null, - 0.2381682330796122, - 0.2318219208408404, - null, - 0.2381682330796122, - 0.20307680326083377, - null, - 0.33811323660241943, - 0.31304614249644347, - null, - 0.33811323660241943, - 0.2695720924906413, - null, - 0.33811323660241943, - 0.24454670425362057, - null, - 0.33811323660241943, - 0.4500538798110242, - null, - 0.33811323660241943, - 0.4140065537970282, - null, - 0.33811323660241943, - 0.2880647319459674, - null, - 0.33811323660241943, - 0.3202314429055858, - null, - 0.33811323660241943, - 0.3169605131706372, - null, - 0.33811323660241943, - 0.32345881810688737, - null, - 0.5216765314868881, - 0.6001026871900049, - null, - 0.5216765314868881, - 0.6012106694454529, - null, - 0.5216765314868881, - 0.4898861106787329, - null, - 0.5216765314868881, - 0.5204579980957379, - null, - 0.5216765314868881, - 0.4500538798110242, - null, - 0.5216765314868881, - 0.42052616285893474, - null, - 0.5216765314868881, - 0.4140065537970282, - null, - 0.5216765314868881, - 0.4937592635708411, - null, - 0.6001026871900049, - 0.6012106694454529, - null, - 0.6001026871900049, - 0.6705222836834548, - null, - 0.6001026871900049, - 0.5204579980957379, - null, - 0.6001026871900049, - 0.6648266103848882, - null, - 0.991844460003468, - 0.9727770125665405, - null, - 0.991844460003468, - 0.938767234846119, - null, - 0.991844460003468, - 0.9874110419208606, - null, - 0.991844460003468, - 0.9328536520894143, - null, - 0.991844460003468, - 0.9078978130468089, - null, - 0.991844460003468, - 0.9513646744432486, - null, - 0.3343459796676115, - 0.4307004647175262, - null, - 0.3343459796676115, - 0.3900960314334032, - null, - 0.3343459796676115, - 0.3098874271134545, - null, - 0.3343459796676115, - 0.32266487999330984, - null, - 0.3343459796676115, - 0.4295667428124167, - null, - 0.3343459796676115, - 0.35350564895305514, - null, - 0.3343459796676115, - 0.31541428705224306, - null, - 0.3343459796676115, - 0.24013807075121119, - null, - 0.3343459796676115, - 0.2693681584998491, - null, - 0.3343459796676115, - 0.3333136626479075, - null, - 0.8957623407464501, - 0.9727770125665405, - null, - 0.8957623407464501, - 0.8423383207045981, - null, - 0.8957623407464501, - 0.8791466031622056, - null, - 0.8957623407464501, - 0.8151159149468827, - null, - 0.8957623407464501, - 0.922341377568881, - null, - 0.8957623407464501, - 0.8821215709600496, - null, - 0.8957623407464501, - 0.9328536520894143, - null, - 0.8957623407464501, - 0.9344432405222354, - null, - 0.8957623407464501, - 0.9642772106357639, - null, - 0.8957623407464501, - 0.8157570218353161, - null, - 0.8957623407464501, - 0.7925454632595156, - null, - 0.8957623407464501, - 0.888980486534156, - null, - 0.4208812619135248, - 0.3900960314334032, - null, - 0.4208812619135248, - 0.3098874271134545, - null, - 0.4208812619135248, - 0.4295667428124167, - null, - 0.4208812619135248, - 0.35350564895305514, - null, - 0.4208812619135248, - 0.4467311570808764, - null, - 0.4208812619135248, - 0.5144551437666581, - null, - 0.4208812619135248, - 0.4421375373865315, - null, - 0.4208812619135248, - 0.42641694849778966, - null, - 0.4208812619135248, - 0.3333136626479075, - null, - 0.4208812619135248, - 0.4868902788925622, - null, - 0.31304614249644347, - 0.2697998035002954, - null, - 0.31304614249644347, - 0.2695720924906413, - null, - 0.31304614249644347, - 0.24454670425362057, - null, - 0.31304614249644347, - 0.20619722773579274, - null, - 0.31304614249644347, - 0.3098874271134545, - null, - 0.31304614249644347, - 0.2880647319459674, - null, - 0.31304614249644347, - 0.3202314429055858, - null, - 0.31304614249644347, - 0.22993075379681738, - null, - 0.31304614249644347, - 0.3169605131706372, - null, - 0.31304614249644347, - 0.32345881810688737, - null, - 0.6773365837969099, - 0.6628083689885368, - null, - 0.6773365837969099, - 0.6160873747407943, - null, - 0.6773365837969099, - 0.7537809293531343, - null, - 0.6773365837969099, - 0.5634679987017406, - null, - 0.6773365837969099, - 0.6012106694454529, - null, - 0.6773365837969099, - 0.6352288779182178, - null, - 0.6773365837969099, - 0.5981086798045652, - null, - 0.6773365837969099, - 0.6648266103848882, - null, - 0.6773365837969099, - 0.6072525121642058, - null, - 0.4307004647175262, - 0.40557198035837094, - null, - 0.4307004647175262, - 0.4834545718278357, - null, - 0.4307004647175262, - 0.4847615611240751, - null, - 0.4307004647175262, - 0.32266487999330984, - null, - 0.4307004647175262, - 0.4295667428124167, - null, - 0.4307004647175262, - 0.35350564895305514, - null, - 0.4307004647175262, - 0.4318165589087314, - null, - 0.4307004647175262, - 0.5097617399826666, - null, - 0.3309683982450944, - 0.24033413659841596, - null, - 0.3309683982450944, - 0.2981410655965283, - null, - 0.3309683982450944, - 0.3199684158322815, - null, - 0.3309683982450944, - 0.3355480553373167, - null, - 0.3309683982450944, - 0.2660491488293679, - null, - 0.3309683982450944, - 0.3635517670405215, - null, - 0.3309683982450944, - 0.249116699886752, - null, - 0.3309683982450944, - 0.23700988477155205, - null, - 0.3309683982450944, - 0.28871122138225125, - null, - 0.3309683982450944, - 0.42203254876563234, - null, - 0.3309683982450944, - 0.2318219208408404, - null, - 0.2697998035002954, - 0.17542400609184483, - null, - 0.2697998035002954, - 0.2695720924906413, - null, - 0.2697998035002954, - 0.24454670425362057, - null, - 0.2697998035002954, - 0.20619722773579274, - null, - 0.2697998035002954, - 0.3098874271134545, - null, - 0.2697998035002954, - 0.2880647319459674, - null, - 0.2697998035002954, - 0.22993075379681738, - null, - 0.2697998035002954, - 0.32345881810688737, - null, - 0.9727770125665405, - 0.922341377568881, - null, - 0.9727770125665405, - 0.8821215709600496, - null, - 0.9727770125665405, - 0.9328536520894143, - null, - 0.9727770125665405, - 0.9642772106357639, - null, - 0.40557198035837094, - 0.4834545718278357, - null, - 0.40557198035837094, - 0.4847615611240751, - null, - 0.40557198035837094, - 0.32266487999330984, - null, - 0.40557198035837094, - 0.40395348439090084, - null, - 0.40557198035837094, - 0.4318165589087314, - null, - 0.40557198035837094, - 0.3340702546567942, - null, - 0.40557198035837094, - 0.4248880785102581, - null, - 0.40557198035837094, - 0.5097617399826666, - null, - 0.35532572275494023, - 0.24028581536328997, - null, - 0.35532572275494023, - 0.3192831323823997, - null, - 0.35532572275494023, - 0.2529891644068947, - null, - 0.35532572275494023, - 0.41535454584101794, - null, - 0.35532572275494023, - 0.40395348439090084, - null, - 0.35532572275494023, - 0.4248880785102581, - null, - 0.35532572275494023, - 0.29119156039108685, - null, - 0.35532572275494023, - 0.24102842320743, - null, - 0.5850986908522726, - 0.6267294109959968, - null, - 0.5850986908522726, - 0.5221172076712435, - null, - 0.5850986908522726, - 0.5717872069066212, - null, - 0.5850986908522726, - 0.5492873750243871, - null, - 0.5850986908522726, - 0.6201266549140614, - null, - 0.5850986908522726, - 0.5752985482362863, - null, - 0.5850986908522726, - 0.5097617399826666, - null, - 0.17296378957033465, - 0.24033413659841596, - null, - 0.17296378957033465, - 0.2660491488293679, - null, - 0.17296378957033465, - 0.10782775946098799, - null, - 0.17296378957033465, - 0.249116699886752, - null, - 0.17296378957033465, - 0.23700988477155205, - null, - 0.17296378957033465, - 0.28871122138225125, - null, - 0.17296378957033465, - 0.2002886163837997, - null, - 0.17296378957033465, - 0.13201947050262697, - null, - 0.17296378957033465, - 0.2318219208408404, - null, - 0.17296378957033465, - 0.20307680326083377, - null, - 0.6628083689885368, - 0.6160873747407943, - null, - 0.6628083689885368, - 0.7537809293531343, - null, - 0.6628083689885368, - 0.5634679987017406, - null, - 0.6628083689885368, - 0.6352288779182178, - null, - 0.6628083689885368, - 0.5981086798045652, - null, - 0.6628083689885368, - 0.6072525121642058, - null, - 0.6160873747407943, - 0.5186581897030644, - null, - 0.6160873747407943, - 0.5634679987017406, - null, - 0.6160873747407943, - 0.6012106694454529, - null, - 0.6160873747407943, - 0.6352288779182178, - null, - 0.6160873747407943, - 0.5981086798045652, - null, - 0.6160873747407943, - 0.6648266103848882, - null, - 0.6160873747407943, - 0.6072525121642058, - null, - 0.6160873747407943, - 0.5260776190209286, - null, - 0.025297953521542405, - 0.06016942899581168, - null, - 0.025297953521542405, - 0.055897802218322856, - null, - 0.025297953521542405, - 0.09959517902538939, - null, - 0.24028581536328997, - 0.3192831323823997, - null, - 0.24028581536328997, - 0.1333966979371528, - null, - 0.24028581536328997, - 0.2529891644068947, - null, - 0.24028581536328997, - 0.29119156039108685, - null, - 0.24028581536328997, - 0.24102842320743, - null, - 0.5186581897030644, - 0.5634679987017406, - null, - 0.5186581897030644, - 0.6012106694454529, - null, - 0.5186581897030644, - 0.4898861106787329, - null, - 0.5186581897030644, - 0.45431497833000367, - null, - 0.5186581897030644, - 0.6352288779182178, - null, - 0.5186581897030644, - 0.5981086798045652, - null, - 0.5186581897030644, - 0.6072525121642058, - null, - 0.5186581897030644, - 0.42203254876563234, - null, - 0.5186581897030644, - 0.5260776190209286, - null, - 0.8423383207045981, - 0.8151159149468827, - null, - 0.8423383207045981, - 0.7334929583472656, - null, - 0.8423383207045981, - 0.8821215709600496, - null, - 0.8423383207045981, - 0.9328536520894143, - null, - 0.8423383207045981, - 0.7834166246251234, - null, - 0.8423383207045981, - 0.9078978130468089, - null, - 0.8423383207045981, - 0.7925454632595156, - null, - 0.8423383207045981, - 0.8508124987550889, - null, - 0.7537809293531343, - 0.8622415881936324, - null, - 0.7537809293531343, - 0.8350595230795331, - null, - 0.3192831323823997, - 0.2529891644068947, - null, - 0.3192831323823997, - 0.41535454584101794, - null, - 0.3192831323823997, - 0.40395348439090084, - null, - 0.3192831323823997, - 0.4248880785102581, - null, - 0.3192831323823997, - 0.29119156039108685, - null, - 0.3192831323823997, - 0.24102842320743, - null, - 0.17542400609184483, - 0.055894273053114896, - null, - 0.17542400609184483, - 0.14933184162295132, - null, - 0.17542400609184483, - 0.17619771419691865, - null, - 0.17542400609184483, - 0.1278305132468397, - null, - 0.17542400609184483, - 0.21532966919867302, - null, - 0.17542400609184483, - 0.0731473655342364, - null, - 0.17542400609184483, - 0.2695720924906413, - null, - 0.17542400609184483, - 0.09533319097359638, - null, - 0.17542400609184483, - 0.24454670425362057, - null, - 0.17542400609184483, - 0.20002447568886628, - null, - 0.17542400609184483, - 0.20619722773579274, - null, - 0.17542400609184483, - 0.2880647319459674, - null, - 0.17542400609184483, - 0.1341994714416056, - null, - 0.17542400609184483, - 0.22993075379681738, - null, - 0.008409380348177398, - 0.04149975738749545, - null, - 0.938767234846119, - 0.9874110419208606, - null, - 0.938767234846119, - 0.8867112408398291, - null, - 0.938767234846119, - 0.9298960866412943, - null, - 0.938767234846119, - 0.9078978130468089, - null, - 0.938767234846119, - 0.9958360522915445, - null, - 0.938767234846119, - 0.8508124987550889, - null, - 0.938767234846119, - 0.8842114977564064, - null, - 0.938767234846119, - 0.9513646744432486, - null, - 0.24033413659841596, - 0.2981410655965283, - null, - 0.24033413659841596, - 0.3199684158322815, - null, - 0.24033413659841596, - 0.3355480553373167, - null, - 0.24033413659841596, - 0.2660491488293679, - null, - 0.24033413659841596, - 0.249116699886752, - null, - 0.24033413659841596, - 0.23700988477155205, - null, - 0.24033413659841596, - 0.28871122138225125, - null, - 0.24033413659841596, - 0.2002886163837997, - null, - 0.24033413659841596, - 0.13201947050262697, - null, - 0.24033413659841596, - 0.2318219208408404, - null, - 0.24033413659841596, - 0.20307680326083377, - null, - 0.8791466031622056, - 0.922341377568881, - null, - 0.8791466031622056, - 0.8821215709600496, - null, - 0.8791466031622056, - 0.9542382277667263, - null, - 0.8791466031622056, - 0.9024846524956353, - null, - 0.8791466031622056, - 0.9344432405222354, - null, - 0.8791466031622056, - 0.9642772106357639, - null, - 0.8791466031622056, - 0.8157570218353161, - null, - 0.8791466031622056, - 0.888980486534156, - null, - 0.5634679987017406, - 0.6012106694454529, - null, - 0.5634679987017406, - 0.6352288779182178, - null, - 0.5634679987017406, - 0.5981086798045652, - null, - 0.5634679987017406, - 0.6072525121642058, - null, - 0.5634679987017406, - 0.5260776190209286, - null, - 0.05938145280899054, - 0.04283815208078323, - null, - 0.05938145280899054, - 0.10782775946098799, - null, - 0.05938145280899054, - 0.03395115206665145, - null, - 0.05938145280899054, - 0.05477321631284726, - null, - 0.05938145280899054, - 0.1341994714416056, - null, - 0.6012106694454529, - 0.4898861106787329, - null, - 0.6012106694454529, - 0.5204579980957379, - null, - 0.6012106694454529, - 0.6352288779182178, - null, - 0.6012106694454529, - 0.6648266103848882, - null, - 0.6012106694454529, - 0.6072525121642058, - null, - 0.6705222836834548, - 0.7454337953380579, - null, - 0.6705222836834548, - 0.7077207700167599, - null, - 0.6705222836834548, - 0.7005910562446783, - null, - 0.6705222836834548, - 0.5603277981830703, - null, - 0.3900960314334032, - 0.3098874271134545, - null, - 0.3900960314334032, - 0.4295667428124167, - null, - 0.3900960314334032, - 0.35350564895305514, - null, - 0.3900960314334032, - 0.31541428705224306, - null, - 0.3900960314334032, - 0.4467311570808764, - null, - 0.3900960314334032, - 0.4421375373865315, - null, - 0.3900960314334032, - 0.42641694849778966, - null, - 0.3900960314334032, - 0.3333136626479075, - null, - 0.3900960314334032, - 0.4868902788925622, - null, - 0.055894273053114896, - 0.14933184162295132, - null, - 0.055894273053114896, - 0.1278305132468397, - null, - 0.055894273053114896, - 0.0731473655342364, - null, - 0.055894273053114896, - 0.09533319097359638, - null, - 0.055894273053114896, - 0.055897802218322856, - null, - 0.055894273053114896, - 0.04153202488293273, - null, - 0.055894273053114896, - 0.06013197669987258, - null, - 0.055894273053114896, - 0.040563128366188694, - null, - 0.14933184162295132, - 0.17619771419691865, - null, - 0.14933184162295132, - 0.1278305132468397, - null, - 0.14933184162295132, - 0.21532966919867302, - null, - 0.14933184162295132, - 0.0731473655342364, - null, - 0.14933184162295132, - 0.09533319097359638, - null, - 0.14933184162295132, - 0.24454670425362057, - null, - 0.14933184162295132, - 0.20002447568886628, - null, - 0.14933184162295132, - 0.20619722773579274, - null, - 0.14933184162295132, - 0.055897802218322856, - null, - 0.14933184162295132, - 0.04153202488293273, - null, - 0.14933184162295132, - 0.06013197669987258, - null, - 0.14933184162295132, - 0.22993075379681738, - null, - 0.14933184162295132, - 0.040563128366188694, - null, - 0.8151159149468827, - 0.7334929583472656, - null, - 0.8151159149468827, - 0.8821215709600496, - null, - 0.8151159149468827, - 0.9328536520894143, - null, - 0.8151159149468827, - 0.7834166246251234, - null, - 0.8151159149468827, - 0.7925454632595156, - null, - 0.8151159149468827, - 0.8508124987550889, - null, - 0.17619771419691865, - 0.1278305132468397, - null, - 0.17619771419691865, - 0.21532966919867302, - null, - 0.17619771419691865, - 0.0731473655342364, - null, - 0.17619771419691865, - 0.2695720924906413, - null, - 0.17619771419691865, - 0.09533319097359638, - null, - 0.17619771419691865, - 0.24454670425362057, - null, - 0.17619771419691865, - 0.20002447568886628, - null, - 0.17619771419691865, - 0.1341994714416056, - null, - 0.17619771419691865, - 0.06013197669987258, - null, - 0.2981410655965283, - 0.3199684158322815, - null, - 0.2981410655965283, - 0.3355480553373167, - null, - 0.2981410655965283, - 0.2660491488293679, - null, - 0.2981410655965283, - 0.3635517670405215, - null, - 0.2981410655965283, - 0.249116699886752, - null, - 0.2981410655965283, - 0.23700988477155205, - null, - 0.2981410655965283, - 0.28871122138225125, - null, - 0.2981410655965283, - 0.42203254876563234, - null, - 0.2981410655965283, - 0.2002886163837997, - null, - 0.2981410655965283, - 0.29050814087118004, - null, - 0.2981410655965283, - 0.2318219208408404, - null, - 0.2981410655965283, - 0.20307680326083377, - null, - 0.7334929583472656, - 0.6512622326935055, - null, - 0.7334929583472656, - 0.7205270186163313, - null, - 0.7334929583472656, - 0.7834166246251234, - null, - 0.7334929583472656, - 0.6217058876501556, - null, - 0.7334929583472656, - 0.7925454632595156, - null, - 0.7334929583472656, - 0.6714278208298593, - null, - 0.04283815208078323, - 0.010366221042083845, - null, - 0.04283815208078323, - 0.10782775946098799, - null, - 0.04283815208078323, - 0.03395115206665145, - null, - 0.04283815208078323, - 0.05477321631284726, - null, - 0.04283815208078323, - 0.13201947050262697, - null, - 0.922341377568881, - 0.8821215709600496, - null, - 0.922341377568881, - 0.9542382277667263, - null, - 0.922341377568881, - 0.9328536520894143, - null, - 0.922341377568881, - 0.9961038345306213, - null, - 0.922341377568881, - 0.9344432405222354, - null, - 0.922341377568881, - 0.9642772106357639, - null, - 0.922341377568881, - 0.8157570218353161, - null, - 0.922341377568881, - 0.888980486534156, - null, - 0.3199684158322815, - 0.42052616285893474, - null, - 0.3199684158322815, - 0.2660491488293679, - null, - 0.3199684158322815, - 0.4107398412471005, - null, - 0.3199684158322815, - 0.3635517670405215, - null, - 0.3199684158322815, - 0.249116699886752, - null, - 0.3199684158322815, - 0.23700988477155205, - null, - 0.3199684158322815, - 0.42203254876563234, - null, - 0.3199684158322815, - 0.29050814087118004, - null, - 0.1278305132468397, - 0.21532966919867302, - null, - 0.1278305132468397, - 0.0731473655342364, - null, - 0.1278305132468397, - 0.09533319097359638, - null, - 0.1278305132468397, - 0.24454670425362057, - null, - 0.1278305132468397, - 0.20002447568886628, - null, - 0.1278305132468397, - 0.04153202488293273, - null, - 0.1278305132468397, - 0.05477321631284726, - null, - 0.1278305132468397, - 0.1341994714416056, - null, - 0.1278305132468397, - 0.06013197669987258, - null, - 0.1278305132468397, - 0.040563128366188694, - null, - 0.21532966919867302, - 0.2695720924906413, - null, - 0.21532966919867302, - 0.09533319097359638, - null, - 0.21532966919867302, - 0.24454670425362057, - null, - 0.21532966919867302, - 0.20002447568886628, - null, - 0.21532966919867302, - 0.3202314429055858, - null, - 0.21532966919867302, - 0.1341994714416056, - null, - 0.21532966919867302, - 0.3169605131706372, - null, - 0.21532966919867302, - 0.29050814087118004, - null, - 0.21532966919867302, - 0.32345881810688737, - null, - 0.0731473655342364, - 0.09533319097359638, - null, - 0.0731473655342364, - 0.055897802218322856, - null, - 0.0731473655342364, - 0.04153202488293273, - null, - 0.0731473655342364, - 0.05477321631284726, - null, - 0.0731473655342364, - 0.1341994714416056, - null, - 0.0731473655342364, - 0.06013197669987258, - null, - 0.0731473655342364, - 0.040563128366188694, - null, - 0.4898861106787329, - 0.45431497833000367, - null, - 0.4898861106787329, - 0.5204579980957379, - null, - 0.4898861106787329, - 0.42052616285893474, - null, - 0.4898861106787329, - 0.4140065537970282, - null, - 0.4898861106787329, - 0.4107398412471005, - null, - 0.2695720924906413, - 0.24454670425362057, - null, - 0.2695720924906413, - 0.20002447568886628, - null, - 0.2695720924906413, - 0.2880647319459674, - null, - 0.2695720924906413, - 0.3202314429055858, - null, - 0.2695720924906413, - 0.3169605131706372, - null, - 0.2695720924906413, - 0.32345881810688737, - null, - 0.09533319097359638, - 0.20002447568886628, - null, - 0.09533319097359638, - 0.055897802218322856, - null, - 0.09533319097359638, - 0.04153202488293273, - null, - 0.09533319097359638, - 0.05477321631284726, - null, - 0.09533319097359638, - 0.1341994714416056, - null, - 0.09533319097359638, - 0.06013197669987258, - null, - 0.09533319097359638, - 0.040563128366188694, - null, - 0.9874110419208606, - 0.8867112408398291, - null, - 0.9874110419208606, - 0.9298960866412943, - null, - 0.9874110419208606, - 0.9078978130468089, - null, - 0.9874110419208606, - 0.9958360522915445, - null, - 0.9874110419208606, - 0.9513646744432486, - null, - 0.1333966979371528, - 0.2529891644068947, - null, - 0.1333966979371528, - 0.04149975738749545, - null, - 0.1333966979371528, - 0.16781555203357146, - null, - 0.1333966979371528, - 0.24102842320743, - null, - 0.1333966979371528, - 0.19048093242734687, - null, - 0.1333966979371528, - 0.1294716874165911, - null, - 0.2529891644068947, - 0.29119156039108685, - null, - 0.2529891644068947, - 0.24102842320743, - null, - 0.2529891644068947, - 0.19048093242734687, - null, - 0.45431497833000367, - 0.5204579980957379, - null, - 0.45431497833000367, - 0.42052616285893474, - null, - 0.45431497833000367, - 0.4107398412471005, - null, - 0.45431497833000367, - 0.3635517670405215, - null, - 0.45431497833000367, - 0.42203254876563234, - null, - 0.45431497833000367, - 0.5260776190209286, - null, - 0.24454670425362057, - 0.20002447568886628, - null, - 0.24454670425362057, - 0.2880647319459674, - null, - 0.24454670425362057, - 0.3202314429055858, - null, - 0.24454670425362057, - 0.22993075379681738, - null, - 0.24454670425362057, - 0.3169605131706372, - null, - 0.24454670425362057, - 0.32345881810688737, - null, - 0.20002447568886628, - 0.3202314429055858, - null, - 0.20002447568886628, - 0.1341994714416056, - null, - 0.20002447568886628, - 0.3169605131706372, - null, - 0.6267294109959968, - 0.5221172076712435, - null, - 0.6267294109959968, - 0.7205270186163313, - null, - 0.6267294109959968, - 0.5717872069066212, - null, - 0.6267294109959968, - 0.7302384542961842, - null, - 0.6267294109959968, - 0.6710484758334021, - null, - 0.6267294109959968, - 0.5492873750243871, - null, - 0.6267294109959968, - 0.6201266549140614, - null, - 0.6267294109959968, - 0.5752985482362863, - null, - 0.5221172076712435, - 0.41535454584101794, - null, - 0.5221172076712435, - 0.5717872069066212, - null, - 0.5221172076712435, - 0.5492873750243871, - null, - 0.5221172076712435, - 0.6201266549140614, - null, - 0.5221172076712435, - 0.4248880785102581, - null, - 0.6512622326935055, - 0.6217058876501556, - null, - 0.6512622326935055, - 0.6714278208298593, - null, - 0.3355480553373167, - 0.2660491488293679, - null, - 0.3355480553373167, - 0.249116699886752, - null, - 0.3355480553373167, - 0.28871122138225125, - null, - 0.3355480553373167, - 0.42203254876563234, - null, - 0.3355480553373167, - 0.2318219208408404, - null, - 0.4834545718278357, - 0.4847615611240751, - null, - 0.4834545718278357, - 0.4318165589087314, - null, - 0.4834545718278357, - 0.5097617399826666, - null, - 0.4847615611240751, - 0.4318165589087314, - null, - 0.4847615611240751, - 0.5097617399826666, - null, - 0.20619722773579274, - 0.3098874271134545, - null, - 0.20619722773579274, - 0.2880647319459674, - null, - 0.20619722773579274, - 0.24013807075121119, - null, - 0.20619722773579274, - 0.22993075379681738, - null, - 0.20619722773579274, - 0.09959517902538939, - null, - 0.9419075807648644, - 0.8848427298858184, - null, - 0.9419075807648644, - 0.9756800437762957, - null, - 0.3098874271134545, - 0.4295667428124167, - null, - 0.3098874271134545, - 0.35350564895305514, - null, - 0.3098874271134545, - 0.31541428705224306, - null, - 0.3098874271134545, - 0.2880647319459674, - null, - 0.3098874271134545, - 0.24013807075121119, - null, - 0.3098874271134545, - 0.2693681584998491, - null, - 0.3098874271134545, - 0.42641694849778966, - null, - 0.3098874271134545, - 0.3333136626479075, - null, - 0.3098874271134545, - 0.22993075379681738, - null, - 0.04149975738749545, - 0.019989772968585173, - null, - 0.04149975738749545, - 0.1294716874165911, - null, - 0.32266487999330984, - 0.31541428705224306, - null, - 0.32266487999330984, - 0.4318165589087314, - null, - 0.32266487999330984, - 0.2693681584998491, - null, - 0.32266487999330984, - 0.3340702546567942, - null, - 0.4295667428124167, - 0.35350564895305514, - null, - 0.4295667428124167, - 0.31541428705224306, - null, - 0.4295667428124167, - 0.4467311570808764, - null, - 0.4295667428124167, - 0.5144551437666581, - null, - 0.4295667428124167, - 0.4421375373865315, - null, - 0.4295667428124167, - 0.42641694849778966, - null, - 0.4295667428124167, - 0.3333136626479075, - null, - 0.4295667428124167, - 0.4868902788925622, - null, - 0.35350564895305514, - 0.31541428705224306, - null, - 0.35350564895305514, - 0.24013807075121119, - null, - 0.35350564895305514, - 0.2693681584998491, - null, - 0.35350564895305514, - 0.42641694849778966, - null, - 0.35350564895305514, - 0.3333136626479075, - null, - 0.15069304516745607, - 0.06016942899581168, - null, - 0.15069304516745607, - 0.24013807075121119, - null, - 0.15069304516745607, - 0.2693681584998491, - null, - 0.15069304516745607, - 0.10059463740220753, - null, - 0.15069304516745607, - 0.09959517902538939, - null, - 0.41535454584101794, - 0.40395348439090084, - null, - 0.41535454584101794, - 0.4248880785102581, - null, - 0.41535454584101794, - 0.29119156039108685, - null, - 0.8821215709600496, - 0.9328536520894143, - null, - 0.8821215709600496, - 0.9344432405222354, - null, - 0.8821215709600496, - 0.9642772106357639, - null, - 0.8821215709600496, - 0.8157570218353161, - null, - 0.8821215709600496, - 0.7925454632595156, - null, - 0.8821215709600496, - 0.888980486534156, - null, - 0.9542382277667263, - 0.9024846524956353, - null, - 0.9542382277667263, - 0.9961038345306213, - null, - 0.9542382277667263, - 0.9344432405222354, + "Amount: 1000.00", + "Amount: 38.50", + "Amount: 100.00", + "Amount: 20.00", + "Amount: 10.00", + "Amount: 102.70", + "Amount: 9.52", + "Amount: 50.00", + "Amount: 14.00", + "Amount: 446.57", + "Amount: 350.00", + "Amount: 43.50", + "Amount: 50.00", + "Amount: 19410.37", + "Amount: 3.00", + "Amount: 2.00", + "Amount: 458.43", + "Amount: 100.00", + "Amount: 427.17", + "Amount: 50.00", + "Amount: 100.00", + "Amount: 5.00", + "Amount: 7150.00", + "Amount: 20.00", + "Amount: 2000.00", + "Amount: 8.00", + "Amount: 750.00", + "Amount: 18.00", + "Amount: 42.00", + "Amount: 5.00", + "Amount: 75.00", + "Amount: 5.00", + "Amount: 14.00", + "Amount: 10.00", + "Amount: 5.00", + "Amount: 500.00", + "Amount: 5.00", + "Amount: 5.00", + "Amount: 52.56", + "Amount: 10.00", + "Amount: 19.00", + "Amount: 5.00", + "Amount: 300.00", + "Amount: 100.00", + "Amount: 73.19", + "Amount: 243.68", + "Amount: 100.00", + "Amount: 125.00", + "Amount: 1.19", + "Amount: 14.00", + "Amount: 100.00", + "Amount: 45.00", + "Amount: 20.00", + "Amount: 500.00", + "Amount: 100.00", + "Amount: 127.86", + "Amount: 15.00", + "Amount: 150.00", + "Amount: 5.00", + "Amount: 5.00", + "Amount: 400.00", + "Amount: 22.00", + "Amount: 51.93", + "Amount: 5.00", + "Amount: 5.00", + "Amount: 5.00", + "Amount: 15.00", + "Amount: 30.00", + "Amount: 5.00", + "Amount: 122.00", + "Amount: 3.00", + "Amount: 300.00", + "Amount: 100.00", + "Amount: 162.00", + "Amount: 19.08", + "Amount: 20.00", + "Amount: 30.37", + "Amount: 1000.00", + "Amount: 5.00", + "Amount: 250.00", + "Amount: 8.00", + "Amount: 13772.85", + "Amount: 420000.00", + "Amount: 9.90", + "Amount: 107.00", + "Amount: 2.00", + "Amount: 22.00", + "Amount: 25.00", + "Amount: 1504.28", + "Amount: 5.00", + "Amount: 252.00", + "Amount: 2642.04", + "Amount: 252.00", + "Amount: 5.00", + "Amount: 15.00", + "Amount: 3.00", + "Amount: 35.00", + "Amount: 65.00", + "Amount: 5.00", + "Amount: 75.00" + ], + "line": { + "color": "#888", + "width": 1.5 + }, + "marker": { + "angleref": "previous", + "color": "#888", + "size": 10, + "symbol": "arrow" + }, + "mode": "lines+markers", + "type": "scatter", + "x": [ + -0.43213011477334773, + -0.4430121181061702, null, - 0.9542382277667263, - 0.9642772106357639, + 0.5112616608477374, + 0.5931128851827875, null, - 0.9542382277667263, - 0.888980486534156, + -0.5362524214379699, + -0.6830386686787555, null, - 0.9542382277667263, - 0.9810704436128125, + 0.8321531242995657, + 0.7947955540041922, null, - 0.7205270186163313, - 0.7302384542961842, + -0.015339979027039215, + -0.004598212759833403, null, - 0.7205270186163313, - 0.8157570218353161, + -0.5242232381582058, + -0.7859456498474336, null, - 0.7205270186163313, - 0.7925454632595156, + -0.010396805586450043, + -0.004598212759833403, null, - 0.31541428705224306, - 0.24013807075121119, + -0.5907762774639415, + -0.8824105534014709, null, - 0.31541428705224306, - 0.2693681584998491, + 0.37340208125254337, + 0.42289489293422566, null, - 0.31541428705224306, - 0.3333136626479075, + -0.06531798460926314, + -0.06282488255576262, null, - 0.010366221042083845, - 0.10782775946098799, + 0.4571954695717908, + 0.5782995282255566, null, - 0.010366221042083845, - 0.03395115206665145, + 0.7251331716127464, + 0.8930310568108012, null, - 0.010366221042083845, - 0.13201947050262697, + 0.3440714981019444, + 0.42289489293422566, null, - 0.06016942899581168, - 0.055897802218322856, + 0.6452743136225527, + 0.8930310568108012, null, - 0.06016942899581168, - 0.10059463740220753, + -0.6833691066109641, + -0.922556143412663, null, - 0.06016942899581168, - 0.09959517902538939, + -0.4578754972216337, + -0.5260075373652568, null, - 0.8867112408398291, - 0.9298960866412943, + -0.13576410630761374, + -0.19017196191254732, null, - 0.8867112408398291, - 0.8599268392047722, + -0.7794963369787193, + -0.8284631501006486, null, - 0.8867112408398291, - 0.9078978130468089, + 0.05049348978287421, + 0.04533793523578743, null, - 0.8867112408398291, - 0.8508124987550889, + 0.4307114594735427, + 0.6211510564756442, null, - 0.8867112408398291, - 0.8842114977564064, + -0.488645870104004, + -0.5159370768528078, null, - 0.8867112408398291, - 0.9513646744432486, + 0.8299385473651612, + 0.8835075264311746, null, - 0.5204579980957379, - 0.42052616285893474, + 0.21235561702753328, + 0.22007339811741142, null, - 0.5204579980957379, - 0.4140065537970282, + -0.7317074047395691, + -0.922556143412663, null, - 0.5204579980957379, - 0.4107398412471005, + -0.35898258388050897, + -0.5260075373652568, null, - 0.5204579980957379, - 0.4937592635708411, + 0.3513819881725677, + 0.42289489293422566, null, - 0.4500538798110242, - 0.4140065537970282, + 0.6251699386269727, + 0.5931128851827875, null, - 0.4500538798110242, - 0.4467311570808764, + 0.41070323859290586, + 0.42289489293422566, null, - 0.4500538798110242, - 0.4421375373865315, + 0.5885337748775, + 0.7476884190896049, null, - 0.4500538798110242, - 0.4937592635708411, + -0.7442157911045258, + -0.8182264745850724, null, - 0.4500538798110242, - 0.5603277981830703, + -0.4240986484371276, + -0.5260075373652568, null, - 0.4500538798110242, - 0.547451424618544, + -0.7884885800275052, + -0.8182264745850724, null, - 0.40395348439090084, - 0.3340702546567942, + 0.41998910604831713, + 0.42289489293422566, null, - 0.40395348439090084, - 0.4248880785102581, + -0.8304528603225746, + -0.922556143412663, null, - 0.5717872069066212, - 0.6710484758334021, + 0.9329149222041224, + 0.8835075264311746, null, - 0.5717872069066212, - 0.5492873750243871, + 0.12267587639275908, + 0.15152247306622144, null, - 0.5717872069066212, - 0.6201266549140614, + 0.7743223692717235, + 0.8835075264311746, null, - 0.42052616285893474, - 0.4140065537970282, + -0.020367479302382467, + -0.004598212759833403, null, - 0.42052616285893474, - 0.4107398412471005, + 0.5525550860920077, + 0.6211510564756442, null, - 0.42052616285893474, - 0.3635517670405215, + -0.7341525511916877, + -0.922556143412663, null, - 0.2660491488293679, - 0.249116699886752, + 0.3943452039362869, + 0.42289489293422566, null, - 0.2660491488293679, - 0.23700988477155205, + -0.697118996520046, + -0.922556143412663, null, - 0.2660491488293679, - 0.28871122138225125, + 0.3729152346875802, + 0.45108517908588003, null, - 0.2660491488293679, - 0.2002886163837997, + 0.44882023012427213, + 0.6211510564756442, null, - 0.2660491488293679, - 0.29050814087118004, + -0.522022411657337, + -0.5159370768528078, null, - 0.2660491488293679, - 0.2318219208408404, + -0.09362722072652985, + -0.12627985133706823, null, - 0.2660491488293679, - 0.20307680326083377, + 0.5297743915831321, + 0.6211510564756442, null, - 0.10782775946098799, - 0.03395115206665145, + 0.3733883300104818, + 0.42289489293422566, null, - 0.10782775946098799, - 0.2002886163837997, + 0.2098519168108027, + -0.004598212759833403, null, - 0.10782775946098799, - 0.13201947050262697, + 0.2098519168108027, + 0.42289489293422566, null, - 0.10782775946098799, - 0.20307680326083377, + -0.3787781527763615, + -0.41621756255078585, null, - 0.7302384542961842, - 0.6710484758334021, + 0.8894556041854287, + 0.9294865179016704, null, - 0.7302384542961842, - 0.6201266549140614, + -0.798347214605227, + -0.8824105534014709, null, - 0.8520196094107113, - 0.8848427298858184, + -0.18131224898913595, + -0.1595247701666436, null, - 0.8520196094107113, - 0.9435179236599912, + 0.6522974110153149, + 0.8567967077274671, null, - 0.8520196094107113, - 0.9756800437762957, + -0.7026192767091103, + -0.7884728651042877, null, - 0.4140065537970282, - 0.3202314429055858, + -0.8276350144938375, + -0.922556143412663, null, - 0.4140065537970282, - 0.4107398412471005, + -0.6098376430856963, + -0.5862824632127743, null, - 0.4140065537970282, - 0.3635517670405215, + 0.6554230284551037, + 0.8835075264311746, null, - 0.4140065537970282, - 0.4937592635708411, + 0.8922189766627708, + 0.8835075264311746, null, - 0.4140065537970282, - 0.3169605131706372, + 0.4295097745598557, + 0.45108517908588003, null, - 0.4140065537970282, - 0.32345881810688737, + 0.43460925643764853, + 0.42289489293422566, null, - 0.4467311570808764, - 0.5144551437666581, + -0.3980280117268984, + -0.5260075373652568, null, - 0.4467311570808764, - 0.4421375373865315, + 0.7024852559248044, + 0.8835075264311746, null, - 0.4467311570808764, - 0.42641694849778966, + 0.7717529582442381, + 0.8835075264311746, null, - 0.4467311570808764, - 0.4868902788925622, + -0.6806600992316747, + -0.922556143412663, null, - 0.5144551437666581, - 0.4421375373865315, + -0.926723528147117, + -0.922556143412663, null, - 0.5144551437666581, - 0.42641694849778966, + -0.6740662992025501, + -0.922556143412663, null, - 0.5144551437666581, - 0.6014235590484225, + -0.645856740142745, + -0.8182264745850724, null, - 0.5144551437666581, - 0.5603277981830703, + -0.5177228561621657, + -0.7617659088160793, null, - 0.5144551437666581, - 0.4868902788925622, + -0.7442137071319165, + -0.8824105534014709, null, - 0.7454337953380579, - 0.7077207700167599, + -0.4309900961374921, + -0.5260075373652568, null, - 0.7454337953380579, - 0.8599268392047722, + 0.7245290699084046, + 0.7866931209891337, null, - 0.7454337953380579, - 0.7005910562446783, + -0.46411496857079576, + -0.5260075373652568, null, - 0.03395115206665145, - 0.05477321631284726, + 0.3232896688907876, + 0.42289489293422566, null, - 0.7077207700167599, - 0.7005910562446783, + 0.37115926386562975, + 0.42289489293422566, null, - 0.9024846524956353, - 0.9961038345306213, + 0.8144104230201399, + 0.8482266969909324, null, - 0.9024846524956353, - 0.9344432405222354, + -0.6862355564324931, + -0.8824105534014709, null, - 0.9024846524956353, - 0.888980486534156, + 0.8143822305059775, + 0.8835075264311746, null, - 0.9024846524956353, - 0.9810704436128125, + 0.1952194444171441, + 0.2613197935543201, null, - 0.055897802218322856, - 0.04153202488293273, + -0.44116587580288175, + -0.5260075373652568, null, - 0.055897802218322856, - 0.06013197669987258, + 0.34219400777112563, + 0.42289489293422566, null, - 0.055897802218322856, - 0.040563128366188694, + 0.3482854537227453, + 0.3895535076931305, null, - 0.055897802218322856, - 0.09959517902538939, + -0.7845181509980911, + -0.8824105534014709, null, - 0.2880647319459674, - 0.3202314429055858, + -0.010628018143674797, + -0.004598212759833403, null, - 0.2880647319459674, - 0.22993075379681738, + -0.333303022046739, + -0.5159370768528078, null, - 0.2880647319459674, - 0.3169605131706372, + 0.391453184513052, + 0.42289489293422566, null, - 0.2880647319459674, - 0.32345881810688737, + -0.8187977817844195, + -0.8182264745850724, null, - 0.9328536520894143, - 0.9078978130468089, + 0.675110539921739, + 0.8930310568108012, null, - 0.9328536520894143, - 0.8508124987550889, + -0.3990251391372163, + -0.5260075373652568, null, - 0.9298960866412943, - 0.8599268392047722, + -0.30002656361791447, + -0.30032745656510185, null, - 0.9298960866412943, - 0.9958360522915445, + 0.32580390247949387, + 0.42289489293422566, null, - 0.9298960866412943, - 0.8842114977564064, + -0.41032900004231715, + -0.5260075373652568, null, - 0.9298960866412943, - 0.9513646744432486, + 0.7814157333397895, + 0.8835075264311746, null, - 0.6352288779182178, - 0.5981086798045652, + -0.6545012668193522, + -0.8182264745850724, null, - 0.6352288779182178, - 0.6648266103848882, + -0.8250766319170834, + -0.922556143412663, null, - 0.6352288779182178, - 0.6072525121642058, + 0.11642140741252306, + 0.15152247306622144, null, - 0.04153202488293273, - 0.05477321631284726, + 0.3584413729045867, + 0.42289489293422566, null, - 0.04153202488293273, - 0.1341994714416056, + 0.6893479879923822, + 0.8835075264311746, null, - 0.04153202488293273, - 0.06013197669987258, + -0.5817258086810362, + -0.6830386686787555, + null + ], + "y": [ + 0.7121785088398352, + 0.6637386367247285, null, - 0.04153202488293273, - 0.040563128366188694, + -0.598438979460561, + -0.6680444571236058, null, - 0.7834166246251234, - 0.7925454632595156, + 0.44361523531892444, + 0.5765261392732328, null, - 0.7834166246251234, - 0.8508124987550889, + 0.26080967071708444, + 0.24096872716900972, null, - 0.7834166246251234, - 0.6714278208298593, + -0.7089068308054137, + -0.9755872442336094, null, - 0.6710484758334021, - 0.5492873750243871, + -0.34274934403468454, + -0.5420785313722902, null, - 0.6710484758334021, - 0.6201266549140614, + -0.7621228818670577, + -0.9755872442336094, null, - 0.3202314429055858, - 0.3169605131706372, + 0.2648966145363047, + 0.403612631630065, null, - 0.3202314429055858, - 0.32345881810688737, + 0.6789811252229511, + 0.7959212776137778, null, - 0.9961038345306213, - 0.9344432405222354, + 0.7900197865139884, + 0.8310972903269801, null, - 0.9961038345306213, - 0.9642772106357639, + -0.6713224168822408, + -0.8317527771805896, null, - 0.9961038345306213, - 0.888980486534156, + -0.21408032557683887, + -0.28978736522770937, null, - 0.9961038345306213, - 0.9810704436128125, + 0.604772500031668, + 0.7959212776137778, null, - 0.4107398412471005, - 0.3635517670405215, + -0.1954793343876051, + -0.28978736522770937, null, - 0.4107398412471005, - 0.42203254876563234, + -0.1310109485870865, + -0.23208476462159833, null, - 0.4107398412471005, - 0.29050814087118004, + 0.6489829876156022, + 0.7932680240853172, null, - 0.24013807075121119, - 0.2693681584998491, + 0.6211459206982282, + 0.8677949363592646, null, - 0.24013807075121119, - 0.3333136626479075, + -0.2060018213633483, + -0.2001301912924086, null, - 0.8599268392047722, - 0.8842114977564064, + 0.6598165306544911, + 0.7693710498570913, null, - 0.4318165589087314, - 0.3340702546567942, + 0.5144371164893282, + 0.7073620048724625, null, - 0.4318165589087314, - 0.5097617399826666, + -0.8104899180559856, + -0.8622970096309992, null, - 0.2693681584998491, - 0.3333136626479075, + -0.06816718399097474, + -0.027959167306021424, null, - 0.4421375373865315, - 0.42641694849778966, + 0.7449128610002976, + 0.8604966990942203, null, - 0.4421375373865315, - 0.4868902788925622, + -0.1689229441285857, + -0.23208476462159833, null, - 0.5492873750243871, - 0.6201266549140614, + 0.5112080095793095, + 0.7932680240853172, null, - 0.5492873750243871, - 0.5752985482362863, + 0.5807404071548778, + 0.7959212776137778, null, - 0.5981086798045652, - 0.6072525121642058, + -0.6965363437404857, + -0.6680444571236058, null, - 0.5981086798045652, - 0.5260776190209286, + 0.6849817062815453, + 0.7959212776137778, null, - 0.3635517670405215, - 0.42203254876563234, + -0.35224437514754586, + -0.44360886803933014, null, - 0.3635517670405215, - 0.29050814087118004, + 0.09593694912577341, + 0.08889623846074263, null, - 0.42641694849778966, - 0.3333136626479075, + 0.6029760119136741, + 0.7932680240853172, null, - 0.42641694849778966, - 0.4868902788925622, + 0.0924254061247565, + 0.08889623846074263, null, - 0.6648266103848882, - 0.6072525121642058, + 0.7455745376985553, + 0.7959212776137778, null, - 0.9344432405222354, - 0.9642772106357639, + -0.21470076141573366, + -0.23208476462159833, null, - 0.9344432405222354, - 0.8157570218353161, + -0.02976484897864857, + -0.027959167306021424, null, - 0.9344432405222354, - 0.888980486534156, + -0.7796703040360509, + -0.9657058443610594, null, - 0.9344432405222354, - 0.9810704436128125, + -0.06769339318848107, + -0.027959167306021424, null, - 0.249116699886752, - 0.23700988477155205, + -0.7307241565679895, + -0.9755872442336094, null, - 0.249116699886752, - 0.28871122138225125, + 0.6450827404437204, + 0.7073620048724625, null, - 0.249116699886752, - 0.2002886163837997, + -0.15363835146843016, + -0.23208476462159833, null, - 0.249116699886752, - 0.29050814087118004, + 0.6724237806357076, + 0.7959212776137778, null, - 0.249116699886752, - 0.2318219208408404, + -0.15963946240330748, + -0.23208476462159833, null, - 0.249116699886752, - 0.20307680326083377, + -0.6981965728205204, + -0.8219907437087358, null, - 0.6201266549140614, - 0.5752985482362863, + 0.5099911479349106, + 0.7073620048724625, null, - 0.16781555203357146, - 0.19048093242734687, + -0.8213284146465071, + -0.8622970096309992, null, - 0.16781555203357146, - 0.1294716874165911, + -0.7638667955160672, + -1, null, - 0.4937592635708411, - 0.5603277981830703, + 0.6051504434721513, + 0.7073620048724625, null, - 0.4937592635708411, - 0.547451424618544, + 0.630517583672798, + 0.7959212776137778, null, - 0.9435179236599912, - 0.9958360522915445, + -0.08910955067197081, + -0.9755872442336094, null, - 0.9435179236599912, - 0.9756800437762957, + -0.08910955067197081, + 0.7959212776137778, null, - 0.07011604000159166, - 0.019989772968585173, + -0.7157507162452663, + -0.7993949320290974, null, - 0.07011604000159166, - 0.10059463740220753, + 0.21748807668373418, + 0.23472763490409848, null, - 0.07011604000159166, - 0.038844634468288675, + 0.3744351904237455, + 0.403612631630065, null, - 0.9078978130468089, - 0.8508124987550889, + -0.912907039528394, + -0.9377070802046585, null, - 0.9078978130468089, - 0.9513646744432486, + 0.2987504763255712, + 0.394186370625655, null, - 0.6072525121642058, - 0.5260776190209286, + -0.3919634530446739, + -0.44844761451575715, null, - 0.23700988477155205, - 0.28871122138225125, + -0.18103128274552913, + -0.23208476462159833, null, - 0.23700988477155205, - 0.2002886163837997, + -0.6480839567003398, + -0.6714013657386378, null, - 0.23700988477155205, - 0.29050814087118004, + -0.05639446704405653, + -0.027959167306021424, null, - 0.23700988477155205, - 0.13201947050262697, + -0.06884618548241157, + -0.027959167306021424, null, - 0.23700988477155205, - 0.2318219208408404, + -0.7884482829492515, + -0.8219907437087358, null, - 0.23700988477155205, - 0.20307680326083377, + 0.7254199795788893, + 0.7959212776137778, null, - 0.05477321631284726, - 0.1341994714416056, + 0.5991531652056148, + 0.7932680240853172, null, - 0.05477321631284726, - 0.06013197669987258, + -0.06419545984665424, + -0.027959167306021424, null, - 0.05477321631284726, - 0.040563128366188694, + -0.03388402641622064, + -0.027959167306021424, null, - 0.9642772106357639, - 0.888980486534156, + -0.1428808457598657, + -0.23208476462159833, null, - 0.019989772968585173, - 0.1294716874165911, + -0.19435412457565315, + -0.23208476462159833, null, - 0.019989772968585173, - 0.038844634468288675, + -0.1603371299138783, + -0.23208476462159833, null, - 0.29119156039108685, - 0.24102842320743, + 0.08169793780859752, + 0.08889623846074263, null, - 0.29119156039108685, - 0.19048093242734687, + 0.39067339539297724, + 0.5799030308386766, null, - 0.6217058876501556, - 0.6714278208298593, + 0.345012671198623, + 0.403612631630065, null, - 0.1341994714416056, - 0.06013197669987258, + 0.6678716426804834, + 0.7932680240853172, null, - 0.1341994714416056, - 0.040563128366188694, + -0.5318696248666036, + -0.5727295276261749, null, - 0.28871122138225125, - 0.2002886163837997, + 0.6775596287803459, + 0.7932680240853172, null, - 0.28871122138225125, - 0.2318219208408404, + 0.5304054329523671, + 0.7959212776137778, null, - 0.28871122138225125, - 0.20307680326083377, + 0.7125808229017521, + 0.7959212776137778, null, - 0.06013197669987258, - 0.040563128366188694, + 0.5152301783287565, + 0.5153009464739415, null, - 0.5752985482362863, - 0.5097617399826666, + 0.32033499347523814, + 0.403612631630065, null, - 0.10059463740220753, - 0.09959517902538939, + -0.04159208903692613, + -0.027959167306021424, null, - 0.10059463740220753, - 0.038844634468288675, + -0.7188816814442675, + -0.9964621174020154, null, - 0.8157570218353161, - 0.7925454632595156, + 0.6293531161898335, + 0.7932680240853172, null, - 0.8157570218353161, - 0.888980486534156, + 0.5575985531739881, + 0.7959212776137778, null, - 0.42203254876563234, - 0.5260776190209286, + -0.8363676412114709, + -0.9280481286500764, null, - 0.2002886163837997, - 0.13201947050262697, + 0.3550572121642179, + 0.403612631630065, null, - 0.2002886163837997, - 0.2318219208408404, + -0.8792824988230634, + -0.9755872442336094, null, - 0.2002886163837997, - 0.20307680326083377, + -0.5662007883069198, + -0.8622970096309992, null, - 0.8622415881936324, - 0.8350595230795331, + 0.7030652464929427, + 0.7959212776137778, null, - 0.3169605131706372, - 0.29050814087118004, + 0.12658592510635783, + 0.08889623846074263, null, - 0.3169605131706372, - 0.32345881810688737, + -0.20166127879656748, + -0.28978736522770937, null, - 0.6014235590484225, - 0.5603277981830703, + 0.5570923883380997, + 0.7932680240853172, null, - 0.6014235590484225, - 0.4868902788925622, + -0.8222108886920065, + -0.7885877762376592, null, - 0.9958360522915445, - 0.8842114977564064, + 0.5627700325736714, + 0.7959212776137778, null, - 0.24102842320743, - 0.19048093242734687, + 0.6028638927072018, + 0.7932680240853172, null, - 0.19048093242734687, - 0.1294716874165911, + -0.05023816831269756, + -0.027959167306021424, null, - 0.8508124987550889, - 0.9513646744432486, + 0.09509796558003482, + 0.08889623846074263, null, - 0.13201947050262697, - 0.2318219208408404, + -0.165232208286361, + -0.23208476462159833, null, - 0.13201947050262697, - 0.20307680326083377, + -0.7309343925864511, + -0.9657058443610594, null, - 0.2318219208408404, - 0.20307680326083377, + 0.621621947321043, + 0.7959212776137778, null, - 0.888980486534156, - 0.9810704436128125, + -0.052931911634106425, + -0.027959167306021424, null, - 0.5603277981830703, - 0.547451424618544, + 0.48262281712395333, + 0.5765261392732328, null ] }, @@ -9521,215 +2286,143 @@ "hoverinfo": "text", "marker": { "color": [ - 11, - 14, - 7, - 10, - 7, - 10, - 3, - 14, - 9, - 8, - 9, - 6, - 6, - 9, - 15, - 13, - 15, - 10, - 2, - 13, - 9, - 2, - 9, - 9, - 9, - 6, - 15, - 11, - 9, - 6, - 6, - 13, - 13, - 12, - 12, - 10, - 13, - 14, - 10, - 7, - 12, - 8, - 8, - 12, - 7, - 11, - 3, - 6, - 11, - 9, - 4, - 10, - 17, - 1, - 10, - 16, - 10, - 10, - 7, - 13, - 5, - 13, - 10, - 16, - 8, - 13, - 18, - 8, - 7, - 12, - 14, - 16, - 15, - 13, - 10, - 14, - 15, - 7, - 7, - 7, - 10, - 17, - 12, - 10, - 8, - 3, - 10, - 8, - 8, - 10, - 2, - 18, - 4, - 10, - 16, - 13, - 7, - 7, - 13, - 8, - 5, - 12, - 5, - 6, - 9, - 11, - 9, - 8, - 7, - 9, - 16, - 10, - 5, - 4, - 12, - 9, - 9, - 4, - 6, - 3, - 6, - 11, - 13, - 10, - 9, - 11, - 11, - 6, - 6, - 12, - 7, - 10, - 10, - 6, - 9, - 12, - 7, - 9, - 8, - 9, - 11, - 13, - 12, - 3, - 8, - 12, - 17, - 9, - 3, - 6, - 7, - 4, - 4, - 8, - 12, - 17, - 11, - 10, - 5, - 9, - 4, - 14, - 13, - 12, - 9, - 6, - 6, - 8, - 11, - 8, - 12, - 3, - 13, - 4, - 6, - 8, - 3, - 7, - 7, - 3, - 12, - 9, - 4, - 4, - 11, - 8, - 7, - 10, - 9, - 15, - 11, - 7, - 5, - 6, - 10, - 4, - 3, - 12, - 12, - 5 + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green", + "green" ], - "colorbar": { - "thickness": 15, - "title": { - "side": "right", - "text": "Node Connections" - }, - "xanchor": "left" - }, "colorscale": [ [ 0, @@ -9768,634 +2461,428 @@ "rgb(8,29,88)" ] ], - "line": { - "width": 2 - }, - "reversescale": true, "showscale": true, "size": 10 }, "mode": "markers", "text": [ - "# of connections: 11", - "# of connections: 14", - "# of connections: 7", - "# of connections: 10", - "# of connections: 7", - "# of connections: 10", - "# of connections: 3", - "# of connections: 14", - "# of connections: 9", - "# of connections: 8", - "# of connections: 9", - "# of connections: 6", - "# of connections: 6", - "# of connections: 9", - "# of connections: 15", - "# of connections: 13", - "# of connections: 15", - "# of connections: 10", - "# of connections: 2", - "# of connections: 13", - "# of connections: 9", - "# of connections: 2", - "# of connections: 9", - "# of connections: 9", - "# of connections: 9", - "# of connections: 6", - "# of connections: 15", - "# of connections: 11", - "# of connections: 9", - "# of connections: 6", - "# of connections: 6", - "# of connections: 13", - "# of connections: 13", - "# of connections: 12", - "# of connections: 12", - "# of connections: 10", - "# of connections: 13", - "# of connections: 14", - "# of connections: 10", - "# of connections: 7", - "# of connections: 12", - "# of connections: 8", - "# of connections: 8", - "# of connections: 12", - "# of connections: 7", - "# of connections: 11", - "# of connections: 3", - "# of connections: 6", - "# of connections: 11", - "# of connections: 9", - "# of connections: 4", - "# of connections: 10", - "# of connections: 17", - "# of connections: 1", - "# of connections: 10", - "# of connections: 16", - "# of connections: 10", - "# of connections: 10", - "# of connections: 7", - "# of connections: 13", - "# of connections: 5", - "# of connections: 13", - "# of connections: 10", - "# of connections: 16", - "# of connections: 8", - "# of connections: 13", - "# of connections: 18", - "# of connections: 8", - "# of connections: 7", - "# of connections: 12", - "# of connections: 14", - "# of connections: 16", - "# of connections: 15", - "# of connections: 13", - "# of connections: 10", - "# of connections: 14", - "# of connections: 15", - "# of connections: 7", - "# of connections: 7", - "# of connections: 7", - "# of connections: 10", - "# of connections: 17", - "# of connections: 12", - "# of connections: 10", - "# of connections: 8", - "# of connections: 3", - "# of connections: 10", - "# of connections: 8", - "# of connections: 8", - "# of connections: 10", - "# of connections: 2", - "# of connections: 18", - "# of connections: 4", - "# of connections: 10", - "# of connections: 16", - "# of connections: 13", - "# of connections: 7", - "# of connections: 7", - "# of connections: 13", - "# of connections: 8", - "# of connections: 5", - "# of connections: 12", - "# of connections: 5", - "# of connections: 6", - "# of connections: 9", - "# of connections: 11", - "# of connections: 9", - "# of connections: 8", - "# of connections: 7", - "# of connections: 9", - "# of connections: 16", - "# of connections: 10", - "# of connections: 5", - "# of connections: 4", - "# of connections: 12", - "# of connections: 9", - "# of connections: 9", - "# of connections: 4", - "# of connections: 6", - "# of connections: 3", - "# of connections: 6", - "# of connections: 11", - "# of connections: 13", - "# of connections: 10", - "# of connections: 9", - "# of connections: 11", - "# of connections: 11", - "# of connections: 6", - "# of connections: 6", - "# of connections: 12", - "# of connections: 7", - "# of connections: 10", - "# of connections: 10", - "# of connections: 6", - "# of connections: 9", - "# of connections: 12", - "# of connections: 7", - "# of connections: 9", - "# of connections: 8", - "# of connections: 9", - "# of connections: 11", - "# of connections: 13", - "# of connections: 12", - "# of connections: 3", - "# of connections: 8", - "# of connections: 12", - "# of connections: 17", - "# of connections: 9", - "# of connections: 3", - "# of connections: 6", - "# of connections: 7", - "# of connections: 4", - "# of connections: 4", - "# of connections: 8", - "# of connections: 12", - "# of connections: 17", - "# of connections: 11", - "# of connections: 10", - "# of connections: 5", - "# of connections: 9", - "# of connections: 4", - "# of connections: 14", - "# of connections: 13", - "# of connections: 12", - "# of connections: 9", - "# of connections: 6", - "# of connections: 6", - "# of connections: 8", - "# of connections: 11", - "# of connections: 8", - "# of connections: 12", - "# of connections: 3", - "# of connections: 13", - "# of connections: 4", - "# of connections: 6", - "# of connections: 8", - "# of connections: 3", - "# of connections: 7", - "# of connections: 7", - "# of connections: 3", - "# of connections: 12", - "# of connections: 9", - "# of connections: 4", - "# of connections: 4", - "# of connections: 11", - "# of connections: 8", - "# of connections: 7", - "# of connections: 10", - "# of connections: 9", - "# of connections: 15", - "# of connections: 11", - "# of connections: 7", - "# of connections: 5", - "# of connections: 6", - "# of connections: 10", - "# of connections: 4", - "# of connections: 3", - "# of connections: 12", - "# of connections: 12", - "# of connections: 5" + "Name: mike macdonald for senate
classification: neutral
donor_id: 76a5bf29-5b5c-4313-8d25-76d147183275
entity_type: corporation
full_name: mike macdonald for senate
recipient_id: 5982c37b-08f0-4bfe-84ef-fe8a5d163937
recipient_name: michigan petroleum association political action committee
state: MI
", + "Name: michigan petroleum association political action committee
classification: neutral
", + "Name: douglas harris
address: 12601 MINER ROAD
city: PARMA
classification: neutral
company: retired
donor_id: 2bed980b-c5df-4c3f-b242-bb5f6f5891d5
entity_type: Individual
first_name: DOUGLAS
full_name: douglas harris
last_name: HARRIS
occupation: not employed
recipient_id: 9f82ffff-0906-4554-a3c4-f23d22da925d
recipient_name: plumbers and steamfitters local 190 pac fund
state: MI
zip: 49269-9613
", + "Name: plumbers and steamfitters local 190 pac fund
classification: neutral
", + "Name: manny lentine
address: 230 NORTH WASHINGTON SQUARE SUITE 100
city: LANSING
classification: neutral
company: lentine & associates
donor_id: 091ac544-c755-4189-b6b4-690bbf3aab9c
entity_type: Individual
first_name: MANNY
full_name: manny lentine
last_name: LENTINE
occupation: self-employed
recipient_id: cf320278-7c53-4ae3-87d5-8b8382acda89
recipient_name: steve johnson for state rep
state: MI
zip: 48933-0000
", + "Name: steve johnson for state rep
classification: neutral
", + "Name: arizona secretary of state
classification: neutral
donor_id: 904ad1f0-8cf9-492d-b04c-08a2c749b147
entity_type: vendor
full_name: arizona secretary of state
recipient_id: 5b4de071-973d-4b43-97d4-598da37538ef
recipient_name: tucson firefighters local #479 fire pac
", + "Name: tucson firefighters local #479 fire pac
classification: neutral
", + "Name: erik schulwolf
address: 34 BONNY VIEW RD
city: WEST HARTFORD
classification: neutral
donor_id: 095db171-e7b9-4c63-9fec-e503a488eea6
entity_type: Individual
first_name: ERIK
full_name: erik schulwolf
last_name: SCHULWOLF
recipient_id: 4b5c27d8-f1bf-4ff6-b717-d67025ab2a1b
recipient_name: committee to elect maurice imhoff
state: CT
zip: 06107-3401
", + "Name: committee to elect maurice imhoff
classification: neutral
", + "Name: wonderwear/aveva
classification: neutral
donor_id: e8126d99-dd7a-4f36-99e0-bb2ed18f1ecd
entity_type: corporation
full_name: wonderwear/aveva
recipient_id: e3294ecb-f6df-48a0-b3b4-7048a9c650a7
recipient_name: michael detmer for state senate
state: MI
", + "Name: michael detmer for state senate
classification: neutral
", + "Name: mark lewis
address: 11 HIGHLAND AVE
city: REDDING
classification: neutral
donor_id: d64afe41-a308-4fc1-b88d-6e963b0f3e10
entity_type: Individual
first_name: MARK
full_name: mark lewis
last_name: LEWIS
recipient_id: 4b5c27d8-f1bf-4ff6-b717-d67025ab2a1b
recipient_name: committee to elect maurice imhoff
state: CT
zip: 06896-0000
", + "Name: edwin mcilvried
address: 3 FROG POND DR
city: GLEN ARBOR
classification: neutral
donor_id: 5b9e3f4e-cda1-49fb-99db-dac2fc050bd9
entity_type: Individual
first_name: EDWIN
full_name: edwin mcilvried
last_name: MCILVRIED
recipient_id: 9a96a76f-8b26-4db7-be95-b14987373991
recipient_name: committee to elect betsy coffia
state: MI
zip: 49636-0000
", + "Name: committee to elect betsy coffia
classification: neutral
", + "Name: jeanette roper
address: 315 W 70TH ST APT 5
city: NEW YORK
classification: neutral
donor_id: c20be49d-0bd3-4965-a2ba-cc73d147e566
entity_type: Individual
first_name: JEANETTE
full_name: jeanette roper
last_name: ROPER
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: NY
zip: 10023-3504
", + "Name: reproductive freedom for all
classification: neutral
", + "Name: scale to win
classification: neutral
donor_id: 4dc1a60d-c371-4699-adbd-6eaefad3301e
entity_type: corporation
full_name: scale to win
recipient_id: 89c7b2c1-abe8-4b59-98c2-c7b580b45a60
recipient_name: friends of james sklar
state: MI
", + "Name: friends of james sklar
classification: neutral
", + "Name: roberta cramer
address: 318 S.2ND ST
city: GRAND HAVEN
classification: neutral
donor_id: b4611504-1f85-4d50-ac6e-5f4ab87a5632
entity_type: Individual
first_name: ROBERTA
full_name: roberta cramer
last_name: CRAMER
recipient_id: 8072345f-9b00-47fc-be6c-66758ba6e5d5
recipient_name: christine baker
state: MI
zip: 49417-0000
", + "Name: christine baker
classification: neutral
", + "Name: la placita express
classification: neutral
donor_id: dc7b145f-e322-4f20-ac17-2a96a6ee5673
entity_type: vendor
full_name: la placita express
recipient_id: 349703c0-a6d9-45a1-8491-b7f9ba0d287d
recipient_name: unite here arizona
", + "Name: unite here arizona
classification: neutral
", + "Name: suzette sanders
address: 3632 WILLOWICK DRIVE
city: VENTURA
classification: neutral
donor_id: 8d0a34c2-fec8-443d-9bc1-267a653408aa
entity_type: Individual
first_name: SUZETTE
full_name: suzette sanders
last_name: SANDERS
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: CA
zip: 93003-0000
", + "Name: case action fund
classification: neutral
donor_id: abf0476c-1598-4290-8719-d36bfa4de87c
entity_type: vendor
full_name: case action fund
recipient_id: 349703c0-a6d9-45a1-8491-b7f9ba0d287d
recipient_name: unite here arizona
", + "Name: ronald smith
address: 4895 HELENA RD N
city: SAINT PAUL
classification: neutral
donor_id: 71f02d97-4f75-4b61-be52-43f42b6a205a
entity_type: Individual
first_name: RONALD
full_name: ronald smith
last_name: SMITH
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: MN
zip: 55128-8202
", + "Name: progressive turnout project
classification: neutral
", + "Name: christopher johnson
address: 8604 DARBEE RD
city: REESE
classification: neutral
donor_id: d33ce2a8-fd78-4f08-add3-6526d858383c
entity_type: Individual
first_name: CHRISTOPHER
full_name: christopher johnson
last_name: JOHNSON
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: MI
zip: 48757-0000
", + "Name: united food and commercial workers active ballot club
classification: neutral
", + "Name: tmobile
classification: neutral
donor_id: f1a7362b-f009-4a4c-aba9-4ec1b107902d
entity_type: vendor
full_name: tmobile
recipient_id: 77a286de-0d6c-4e00-a293-b7357d82f3c5
recipient_name: cochise county democratic committee
", + "Name: cochise county democratic committee
classification: neutral
", + "Name: owosso knights of columbus
classification: neutral
donor_id: b70d0c69-9df1-43db-b48c-274c27ff43b7
entity_type: corporation
full_name: owosso knights of columbus
recipient_id: 154ba5d3-e21d-41af-9e8b-c8a81ed3b270
recipient_name: brian begole for state rep
state: MI
", + "Name: brian begole for state rep
classification: neutral
", + "Name: vibe ink
classification: neutral
donor_id: 9c3bd8ce-2032-4cac-b25d-d0e70982949f
entity_type: corporation
full_name: vibe ink
recipient_id: 4a1bcd84-e553-4e6a-afe6-ff386a1b0257
recipient_name: ernest little for state rep
state: MI
", + "Name: ernest little for state rep
classification: neutral
", + "Name: beutler frederick
classification: neutral
donor_id: 554ab63a-812c-4c61-8a28-21d795889b66
entity_type: corporation
full_name: beutler frederick
recipient_id: 501217db-524f-4534-848e-78b9afe68961
recipient_name: voters action committee (superpac)
state: MI
", + "Name: voters action committee (superpac)
classification: neutral
", + "Name: lorraine lerner
address: 7118 SUNCREST RD
city: WEST BLOOMFIELD
classification: neutral
donor_id: 135aac23-535d-44fd-ae45-e99493a37df9
entity_type: Individual
first_name: LORRAINE
full_name: lorraine lerner
last_name: LERNER
recipient_id: 97b5db6e-a1b8-4013-8304-df0009d1a1b0
recipient_name: noah arbit for michigan
state: MI
zip: 48322-4340
", + "Name: noah arbit for michigan
classification: neutral
", + "Name: michael kotts
address: PO BOX 995
city: ROSCOMMON
classification: neutral
donor_id: 076bcb51-e328-462e-b58c-690c742255c3
entity_type: Individual
first_name: MICHAEL
full_name: michael kotts
last_name: KOTTS
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 48653-0995
", + "Name: michigan farm bureau political action committee
classification: neutral
", + "Name: j.c. huizenga
address: 3755 36TH STREET SE SUITE 100
city: GRAND RAPIDS
classification: neutral
company: huzinga group
donor_id: 09edd2a7-3653-4c64-b967-f16dca6383c5
entity_type: Individual
first_name: J.C.
full_name: j.c. huizenga
last_name: HUIZENGA
occupation: executive
recipient_id: 6a18b3b8-b810-4a49-ac9b-db89ea23fa2a
recipient_name: tom leonard for michigan
state: MI
zip: 49512-0000
", + "Name: tom leonard for michigan
classification: neutral
", + "Name: bittermann laura
address: 127 HILLSWOOD DR
city: FOLSOM
classification: neutral
donor_id: 7e5a60a4-829b-4851-a17c-bde180321158
entity_type: Individual
first_name: BITTERMANN
full_name: bittermann laura
last_name: LAURA
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: CA
zip: 95630-0000
", + "Name: jason morgan for state representativ
classification: neutral
donor_id: fb8c97ef-cbc2-4d9c-87b3-958f7a64bc0c
entity_type: corporation
full_name: jason morgan for state representativ
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: MI
", + "Name: marybeth webster
address: 727 SW ROGUE RIVER AVE. #74
city: GRANTS PASS
classification: neutral
donor_id: 0bd6955a-e95b-4af8-8ada-69fccd52ad11
entity_type: Individual
first_name: MARYBETH
full_name: marybeth webster
last_name: WEBSTER
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: OR
zip: 97526-0000
", + "Name: washtenaw cnty democratic part
classification: neutral
donor_id: db7f4263-c82a-4264-af7a-4a40db0f3083
entity_type: corporation
full_name: washtenaw cnty democratic part
recipient_id: 9f82ffff-0906-4554-a3c4-f23d22da925d
recipient_name: plumbers and steamfitters local 190 pac fund
state: MI
", + "Name: beth collins
address: 2013 EAST FRONT STREET
city: TRAVERSE CITY
classification: neutral
donor_id: 71b84806-a02b-4411-bc72-843ff882eb97
entity_type: Individual
first_name: BETH
full_name: beth collins
last_name: COLLINS
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 49686-0000
", + "Name: popeadam
classification: neutral
donor_id: 1184bfa5-3fce-4248-aa8d-a89639ce04b8
entity_type: corporation
full_name: popeadam
recipient_id: 44751f04-3ce1-4288-979c-8a35b9d5e89c
recipient_name: teamsters 243 political action committee
state: MI
", + "Name: teamsters 243 political action committee
classification: neutral
", + "Name: robert lisowski
address: 6 REGAL RD
city: EDISON
classification: neutral
donor_id: 59c6e536-1832-4ac1-ad7d-78aedac0c3f9
entity_type: Individual
first_name: ROBERT
full_name: robert lisowski
last_name: LISOWSKI
recipient_id: fa79b50e-9eb3-4c65-a25b-968fa5059e75
recipient_name: ivote mi pac
state: NJ
zip: 08820-0000
", + "Name: ivote mi pac
classification: neutral
", + "Name: tiffany ross
address: 7250 POE AVENUE SUITE 400
city: DAYTON
classification: neutral
company: ufcw local no. 75
donor_id: ca4383cc-c8de-46e9-ab3b-f8faca955a2c
entity_type: Individual
first_name: TIFFANY
full_name: tiffany ross
last_name: ROSS
occupation: l/u representative
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: OH
zip: 45414-2698
", + "Name: kathleen pruzek
address: 113 HOMESTEAD A D
city: ALBANY
classification: neutral
donor_id: 0fabe1f0-0913-4079-bde8-1164b3f375b8
entity_type: Individual
first_name: KATHLEEN
full_name: kathleen pruzek
last_name: PRUZEK
recipient_id: fa79b50e-9eb3-4c65-a25b-968fa5059e75
recipient_name: ivote mi pac
state: NY
zip: 12203-0000
", + "Name: ariel martin-cone
address: 92 LOCUST ST
city: DANVERS
classification: neutral
donor_id: b4e159ad-9d78-4826-893d-71349f8f1008
entity_type: Individual
first_name: ARIEL
full_name: ariel martin-cone
last_name: MARTIN-CONE
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MA
zip: 01923-0000
", + "Name: william hamilton
address: 1011 LAKE HEATHER RESERVE
city: BIRMINGHAM
classification: neutral
donor_id: c0417820-aec4-4e2e-900d-7c0d03cf703f
entity_type: Individual
first_name: WILLIAM
full_name: william hamilton
last_name: HAMILTON
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: AL
zip: 35242-0000
", + "Name: matthew curtis
address: 7375 VILLAGE SQUARE DR
city: WEST BLOOMFIELD
classification: neutral
donor_id: 64a16a5d-05c5-452b-aa17-4129808d5604
entity_type: Individual
first_name: MATTHEW
full_name: matthew curtis
last_name: CURTIS
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 48322-0000
", + "Name: riverview lapeer llc
classification: neutral
donor_id: 238b5103-2978-494e-ade9-bcc61831b5f3
entity_type: corporation
full_name: riverview lapeer llc
recipient_id: 27759108-2ed1-4059-a134-6fd6f901201a
recipient_name: lapeer county republican party
state: MI
", + "Name: lapeer county republican party
classification: neutral
", + "Name: john cooper
address: 8224 FIVE POINT HWY
city: EATON RAPIDS
classification: neutral
company: cooper
donor_id: 183332c5-4ee5-4601-917e-6603aa451107
entity_type: Individual
first_name: JOHN
full_name: john cooper
last_name: COOPER
occupation: retired
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 48827-0000
", + "Name: amy ream
address: 8785 SW WHITE PINE LN
city: PORTLAND
classification: neutral
donor_id: cd5f97b5-fd79-4162-9d10-34253ee57c52
entity_type: Individual
first_name: AMY
full_name: amy ream
last_name: REAM
recipient_id: 4b5c27d8-f1bf-4ff6-b717-d67025ab2a1b
recipient_name: committee to elect maurice imhoff
state: OR
zip: 97225-2440
", + "Name: van dessel kathleen
classification: neutral
donor_id: 3941536a-03a8-4049-b35b-6a975e1bbbf4
entity_type: corporation
full_name: van dessel kathleen
recipient_id: 501217db-524f-4534-848e-78b9afe68961
recipient_name: voters action committee (superpac)
state: MI
", + "Name: knight kent
address: 257 POINT VIEW DR
city: WASHINGTON
classification: neutral
donor_id: 9eb8804b-fda1-4bb3-a083-d149252a0036
entity_type: Individual
first_name: KNIGHT
full_name: knight kent
last_name: KENT
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: PA
zip: 15301-0000
", + "Name: carol wilson
address: 2501 N BELL AVE
city: DENTON
classification: neutral
donor_id: 87a28b81-0b11-48c4-aa97-a071f0947671
entity_type: Individual
first_name: CAROL
full_name: carol wilson
last_name: WILSON
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: TX
zip: 76209-1968
", + "Name: schwartz dorothy
address: 2101SE29THSTREET
city: OKEECHOBEE
classification: neutral
donor_id: 69c1e119-f836-44e7-aaa5-aa12b271c13f
entity_type: Individual
first_name: SCHWARTZ
full_name: schwartz dorothy
last_name: DOROTHY
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: FL
zip: 34974-0000
", + "Name: i360 llc
classification: neutral
donor_id: 59083df1-2a4f-4ba8-8f50-f7bf04103787
entity_type: corporation
full_name: i360 llc
recipient_id: f331c69e-69f6-401d-bbfc-10cc94f622ab
recipient_name: committee to elect rachelle smit for state representative
state: MI
", + "Name: committee to elect rachelle smit for state representative
classification: neutral
", + "Name: alessi galen j
classification: neutral
donor_id: 8b1a42ae-6ef8-4747-aa2b-f7dc8a98bff0
entity_type: corporation
full_name: alessi galen j
recipient_id: 501217db-524f-4534-848e-78b9afe68961
recipient_name: voters action committee (superpac)
state: MI
", + "Name: scott bennett
address: 2301 WILTON DR APT R401
city: WILTON MANORS
classification: neutral
company: not employed
donor_id: d26a9a30-d0ec-4b3c-bb56-2fab2cb42b0e
entity_type: Individual
first_name: SCOTT
full_name: scott bennett
last_name: BENNETT
occupation: not employed
recipient_id: 97b5db6e-a1b8-4013-8304-df0009d1a1b0
recipient_name: noah arbit for michigan
state: FL
zip: 33305-1202
", + "Name: nypd pizza
classification: neutral
donor_id: 7e431e50-d013-46e7-bef4-c12d1609475e
entity_type: vendor
full_name: nypd pizza
recipient_id: 00c11cf0-1cb8-47a1-8298-1e869a623b7c
recipient_name: maricopa county democratic party
", + "Name: maricopa county democratic party
classification: neutral
", + "Name: novak thomas william
classification: neutral
donor_id: 8d52ad60-68d6-4368-920e-ef02da078457
entity_type: corporation
full_name: novak thomas william
recipient_id: 501217db-524f-4534-848e-78b9afe68961
recipient_name: voters action committee (superpac)
state: MI
", + "Name: shelby timmis
address: 291 EDMUND PLACE 2
city: DETROIT
classification: neutral
donor_id: 1b28baed-5bd4-4439-9233-9bd4c682cdef
entity_type: Individual
first_name: SHELBY
full_name: shelby timmis
last_name: TIMMIS
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 48201-0000
", + "Name: karen bailey
address: 21727 GREGORY ST
city: DEARBORN
classification: neutral
donor_id: 66af7522-8ad3-4d94-9e7a-fdbcba0ecfac
entity_type: Individual
first_name: KAREN
full_name: karen bailey
last_name: BAILEY
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 48124-0000
", + "Name: michigan agri biz pac
classification: neutral
donor_id: 50f3a121-008f-495b-8243-3a7b333eae2b
entity_type: corporation
full_name: michigan agri biz pac
recipient_id: a4a95b69-a472-427c-a028-368d3260f2b8
recipient_name: joseph fox for state representative
state: MI
", + "Name: joseph fox for state representative
classification: neutral
", + "Name: michelle revard
address: 10145 HORNBILL DR
city: FREELAND
classification: neutral
donor_id: 1f995e9b-a29a-4c2b-bca5-cb28c394124e
entity_type: Individual
first_name: MICHELLE
full_name: michelle revard
last_name: REVARD
recipient_id: 4e3e4aca-ae3a-420a-a51e-1c2933182098
recipient_name: bay city educators public affairs council-mi education association
state: MI
zip: 48623-0000
", + "Name: bay city educators public affairs council-mi education association
classification: neutral
", + "Name: mary lou melville
address: 100 TIMBERHILL PL
city: CHAPEL HILL
classification: neutral
donor_id: 84380abe-454a-415f-a1ed-21786288166f
entity_type: Individual
first_name: MARY LOU
full_name: mary lou melville
last_name: MELVILLE
recipient_id: 9a96a76f-8b26-4db7-be95-b14987373991
recipient_name: committee to elect betsy coffia
state: NC
zip: 27514-1961
", + "Name: st. joseph county republican party
classification: neutral
donor_id: 4e9cecb3-dbc7-4da5-a59a-22320d7e5b09
entity_type: corporation
full_name: st. joseph county republican party
recipient_id: a1de9252-2708-4c1a-8282-4ca0b3f7dcea
recipient_name: david morgan
state: MI
", + "Name: david morgan
classification: neutral
", + "Name: dharma akmon
address: 1156 GLEN LEVEN RD
city: ANN ARBOR
classification: neutral
company: university of michigan
donor_id: d37e5d58-741b-4689-9dc3-49c9d72127c6
entity_type: Individual
first_name: DHARMA
full_name: dharma akmon
last_name: AKMON
occupation: university of michigan
recipient_id: 7b008f64-c423-430d-a2dd-945d46f83f1e
recipient_name: jeff irwin for state senate
state: MI
zip: 48103-5712
", + "Name: jeff irwin for state senate
classification: neutral
", + "Name: mailchimp
classification: neutral
donor_id: 83e72f72-5955-4222-a98d-7c951cf83c97
entity_type: vendor
full_name: mailchimp
recipient_id: 5be80df7-54ea-4aeb-880f-99902334ef84
recipient_name: oak & wren society
", + "Name: oak & wren society
classification: neutral
", + "Name: martin robert
address: 244 SPENCER RD
city: TOWANDA
classification: neutral
donor_id: 679795de-5106-4b65-9361-164263dd9c51
entity_type: Individual
first_name: MARTIN
full_name: martin robert
last_name: ROBERT
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: PA
zip: 18848-0000
", + "Name: gsci 21st century pac
classification: neutral
donor_id: a6f00fb6-eb32-4cf5-8acc-55e42c784609
entity_type: corporation
full_name: gsci 21st century pac
recipient_id: e54d2ae0-0c44-4bc8-bf1e-dcadc670d80d
recipient_name: bollin for michigan
state: MI
", + "Name: bollin for michigan
classification: neutral
", + "Name: kathryn johnson
address: N10290
city: IRONWOOD
classification: neutral
donor_id: 11724306-81a6-4416-86ef-33b011b83eaf
entity_type: Individual
first_name: KATHRYN
full_name: kathryn johnson
last_name: JOHNSON
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 49938-0000
", + "Name: roberta deroos-brockway
address: 759 WELLS ST NE
city: GRAND RAPIDS
classification: neutral
donor_id: d9ca981a-3384-4bf4-a50f-e921b7d1f354
entity_type: Individual
first_name: ROBERTA
full_name: roberta deroos-brockway
last_name: DEROOS-BROCKWAY
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 49525-2561
", + "Name: bryce doornbos
address: 8445 24TH AVE
city: JENISON
classification: neutral
company: ayers basement systems
donor_id: baee9837-26fe-4b49-99ab-16e48f54b6fc
entity_type: Individual
first_name: BRYCE
full_name: bryce doornbos
last_name: DOORNBOS
occupation: sales
recipient_id: f331c69e-69f6-401d-bbfc-10cc94f622ab
recipient_name: committee to elect rachelle smit for state representative
state: MI
zip: 49428-0000
", + "Name: jennifer anderson
address: 115 K PUGSLEY CTR
city: BROOKINGS
classification: neutral
donor_id: 02b5cf36-44dd-4176-9d11-e44b6d62a6fa
entity_type: Individual
first_name: JENNIFER
full_name: jennifer anderson
last_name: ANDERSON
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: SD
zip: 57007-0001
", + "Name: sarah sarahj
address: POST OFFICE BOX 23555
city: TIGARD
classification: neutral
company: ufcw local no. 555
donor_id: 6623e448-904a-4651-9e05-f80365c16e7c
entity_type: Individual
first_name: SARAH
full_name: sarah sarahj
last_name: SARAHJ
occupation: l/u representative
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: OR
zip: 97281-3555
", + "Name: lynn porter
address: 1715 ELLINCOURT DR APT 5
city: SOUTH PASADENA
classification: neutral
donor_id: c1faf71c-ba25-405f-8595-898b2dea440d
entity_type: Individual
first_name: LYNN
full_name: lynn porter
last_name: PORTER
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: CA
zip: 91030-0000
", + "Name: nancy smith
address: 2420 PRINCESS LN SE
city: MARIETTA
classification: neutral
donor_id: 799eae4a-20b7-41e3-a357-a1e3617f0c48
entity_type: Individual
first_name: NANCY
full_name: nancy smith
last_name: SMITH
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: GA
zip: 30067-6716
", + "Name: confer mary
address: 20503 TRUE RD
city: CALDWELL
classification: neutral
donor_id: 9e4563d2-d7a1-40c1-bc1e-37890a380e97
entity_type: Individual
first_name: CONFER
full_name: confer mary
last_name: MARY
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: ID
zip: 83607-0000
", + "Name: wiseheart katherine
address: 529 1/2 4TH ST
city: BARABOO
classification: neutral
donor_id: 929938ff-d8e3-40b5-9fe0-5c8eb0c95537
entity_type: Individual
first_name: WISEHEART
full_name: wiseheart katherine
last_name: KATHERINE
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: WI
zip: 53913-0000
", + "Name: riggin don
address: 1 BENT TREE DR
city: LITTLE ROCK
classification: neutral
donor_id: 16e32b84-4835-434c-a787-63967988cfa9
entity_type: Individual
first_name: RIGGIN
full_name: riggin don
last_name: DON
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: AR
zip: 72212-0000
", + "Name: david peterson
address: 6325 OTTAWA LN
city: PENTWATER
classification: neutral
donor_id: 083a5988-c1ad-4b1d-9ced-cd0e6939f96e
entity_type: Individual
first_name: DAVID
full_name: david peterson
last_name: PETERSON
recipient_id: fa79b50e-9eb3-4c65-a25b-968fa5059e75
recipient_name: ivote mi pac
state: MI
zip: 49449-9485
", + "Name: mary krueger
address: 1095 COUNTRYAIR DR
city: WAYLAND
classification: neutral
donor_id: 4256e027-0aad-47d7-9508-9c8a0bb369c2
entity_type: Individual
first_name: MARY
full_name: mary krueger
last_name: KRUEGER
recipient_id: eac1a086-d8a6-4f51-80f9-d92f51ee7e1b
recipient_name: muskegon county democratic party
state: MI
zip: 49348-0000
", + "Name: muskegon county democratic party
classification: neutral
", + "Name: robert j courson
address: 3812 RIDGEWAY PLACE
city: TRAVERSE CITY
classification: neutral
donor_id: 158ba0b1-6f60-4e65-92d4-7b8639e77b8e
entity_type: Individual
first_name: ROBERT
full_name: robert j courson
last_name: J COURSON
recipient_id: 9a96a76f-8b26-4db7-be95-b14987373991
recipient_name: committee to elect betsy coffia
state: MI
zip: 49684-0000
", + "Name: david williams
address: 1455 FOSTER RD
city: NAPA
classification: neutral
donor_id: 435cdf80-ab5c-403c-8716-1a6f041b6791
entity_type: Individual
first_name: DAVID
full_name: david williams
last_name: WILLIAMS
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: CA
zip: 94558-6529
", + "Name: committee to elect yousef rabhi
classification: neutral
donor_id: 11fb01ed-12ca-4719-b8f0-3139fb49dff0
entity_type: corporation
full_name: committee to elect yousef rabhi
recipient_id: 543224af-e8b7-4a5f-9203-b30ead8253cc
recipient_name: people for dylan wegela
state: MI
", + "Name: people for dylan wegela
classification: neutral
", + "Name: dalene mathias
address: 523 MYRTLE STREET
city: MILTON
classification: neutral
company: ufcw local no. 38
donor_id: 60e3854e-ffe7-4ca3-824a-dfa4a4fc2ad5
entity_type: Individual
first_name: DALENE
full_name: dalene mathias
last_name: MATHIAS
occupation: l/u representative
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: PA
zip: 17847-2358
", + "Name: brittany mahalick
address: 36640 MILL LAKE RD
city: GOBLES
classification: neutral
donor_id: dd8fbc71-9d9e-4217-a435-833cc1c0f4d1
entity_type: Individual
first_name: BRITTANY
full_name: brittany mahalick
last_name: MAHALICK
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 49055-0000
", + "Name: james porter
address: 220 RAILROAD S STREER
city: RUPERT
classification: neutral
donor_id: a3f6bba6-bcd6-4a1c-a052-c2acf0e4056e
entity_type: Individual
first_name: JAMES
full_name: james porter
last_name: PORTER
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: WV
zip: 25984-0000
", + "Name: jacob smith
address: 826 COUTANT ST.
city: FLUSHING
classification: neutral
donor_id: f2021f43-141f-49e1-9387-1a77bf3faeac
entity_type: Individual
first_name: JACOB
full_name: jacob smith
last_name: SMITH
recipient_id: 8de8f699-8855-4e6c-9039-0410c821802a
recipient_name: united association of plumbers and pipefitters local 370 pac
state: MI
zip: 48433-0000
", + "Name: united association of plumbers and pipefitters local 370 pac
classification: neutral
", + "Name: rebecca mang
address: 228 S SPRUCE ST
city: TRAVERSE CITY
classification: neutral
donor_id: 3f3bb07c-80f8-443b-a866-e69ac14c3ab9
entity_type: Individual
first_name: REBECCA
full_name: rebecca mang
last_name: MANG
occupation: not employed
recipient_id: 9a96a76f-8b26-4db7-be95-b14987373991
recipient_name: committee to elect betsy coffia
state: MI
zip: 49684-0000
", + "Name: ruth kingscott
address: 737 MASSACHUSETTS BLVD
city: ALMA
classification: neutral
donor_id: d48c8c7c-1eb9-4fa8-ba37-846b5d0686b9
entity_type: Individual
first_name: RUTH
full_name: ruth kingscott
last_name: KINGSCOTT
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 48801-2038
", + "Name: mike burns
classification: neutral
donor_id: 2018ef41-d2dc-49a7-86ee-93dd8777433b
entity_type: vendor
full_name: mike burns
recipient_id: 591aa72b-511b-4dbb-a161-80458f257471
recipient_name: hoffman, jake
", + "Name: hoffman, jake
classification: neutral
", + "Name: anthony king
address: 2024 ONTONAGON AVE SE
city: GRAND RAPIDS
classification: neutral
company: the wellness plan
donor_id: 632cc8c4-5539-45dd-bc79-bfbf12643c0d
entity_type: Individual
first_name: ANTHONY
full_name: anthony king
last_name: KING
occupation: healthcare
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: MI
zip: 49506-5364
", + "Name: michelle williams
address: 6142 ARROWCREST CT NE
city: ROCKFORD
classification: neutral
company: self
donor_id: 9f4d9e40-12d7-482f-b783-1cd037aec187
entity_type: Individual
first_name: MICHELLE
full_name: michelle williams
last_name: WILLIAMS
occupation: homemaker
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 49341-7368
", + "Name: advanced micro targeting
classification: neutral
donor_id: bb81fa88-8a06-42a0-ad5f-7b23f7e5f371
entity_type: vendor
full_name: advanced micro targeting
recipient_id: 2ca0910b-5556-4007-94d1-d7485e9ff876
recipient_name: invest in education committee
", + "Name: invest in education committee
classification: neutral
", + "Name: john staropoli
address: 790 BOYLSTON STREET
city: BOSTON
classification: neutral
donor_id: 787e7792-c390-4d86-8fb3-53061f78273b
entity_type: Individual
first_name: JOHN
full_name: john staropoli
last_name: STAROPOLI
recipient_id: 9a96a76f-8b26-4db7-be95-b14987373991
recipient_name: committee to elect betsy coffia
state: MA
zip: 02199-0000
", + "Name: act blue
classification: neutral
donor_id: b906d3eb-3874-4789-b523-e2eaab415328
entity_type: corporation
full_name: act blue
recipient_id: 4b5c27d8-f1bf-4ff6-b717-d67025ab2a1b
recipient_name: committee to elect maurice imhoff
state: MI
", + "Name: nicole kowalski
address: 756 MONTECILLO RD
city: SAN RAFAEL
classification: neutral
donor_id: 6e929d04-28b6-4c31-b6ff-8d958fe6bb97
entity_type: Individual
first_name: NICOLE
full_name: nicole kowalski
last_name: KOWALSKI
recipient_id: 97b5db6e-a1b8-4013-8304-df0009d1a1b0
recipient_name: noah arbit for michigan
state: CA
zip: 94903-0000
", + "Name: jaime calderone
address: 4749 WILLIS RD
city: GRASS LAKE
classification: neutral
donor_id: 1dfefa19-bb9f-4254-a8d8-538f9f6b7870
entity_type: Individual
first_name: JAIME
full_name: jaime calderone
last_name: CALDERONE
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 49240-9548
", + "Name: richard tully
address: 718 AFTON CT
city: REDLANDS
classification: neutral
company: loma linda physicians medical
donor_id: 67bed0cc-8d2a-4868-8879-dbfb5426914e
entity_type: Individual
first_name: RICHARD
full_name: richard tully
last_name: TULLY
occupation: physician
recipient_id: fa79b50e-9eb3-4c65-a25b-968fa5059e75
recipient_name: ivote mi pac
state: CA
zip: 92374-6343
", + "Name: best western
classification: neutral
donor_id: 2dc71e38-25c9-47df-8779-e02855fda9e0
entity_type: vendor
full_name: best western
recipient_id: 349703c0-a6d9-45a1-8491-b7f9ba0d287d
recipient_name: unite here arizona
", + "Name: brendan floyd
address: 5030 FIRST AVENUE SOUTH SUITE 200
city: SEATTLE
classification: neutral
company: l0021
donor_id: a8ceab02-2f26-4131-a053-4976c0170b1e
entity_type: Individual
first_name: BRENDAN
full_name: brendan floyd
last_name: FLOYD
occupation: l/u representative
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: WA
zip: 98134-2438
", + "Name: stephen brown
address: 1507 SHADFORD ROAD
city: ANN ARBOR
classification: neutral
company: secretorylga inc.
donor_id: 0e75f687-9a87-49bb-90d8-889c1d8b3755
entity_type: Individual
first_name: STEPHEN
full_name: stephen brown
last_name: BROWN
occupation: cso
recipient_id: 06ebbb03-574c-445b-9416-7d2134a06d1f
recipient_name: committee to elect james e johnson jr
state: MI
zip: 48104-0000
", + "Name: committee to elect james e johnson jr
classification: neutral
", + "Name: lori carpentier
address: 8033 IVY GLEN PARK
city: WHITE LAKE
classification: neutral
company: susan thompson buffett fund
donor_id: 9c905232-4d69-46d4-8517-8f21675bcfcf
entity_type: Individual
first_name: LORI
full_name: lori carpentier
last_name: CARPENTIER
occupation: director
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 48386-0000
", + "Name: jose leon
address: 1305 EAST 27TH STREET
city: KANSAS CITY
classification: neutral
company: ufcw local no. 2
donor_id: bf408bc4-21cd-4e71-93d9-0acf9bee7aca
entity_type: Individual
first_name: JOSE
full_name: jose leon
last_name: LEON
occupation: l/u representative
recipient_id: b3084657-ebf9-44db-8f15-1da48eaaa2d9
recipient_name: united food and commercial workers active ballot club
state: MO
zip: 64108-2921
", + "Name: douglas martin
address: 1100 DEER TRACT RD
city: OPELIKA
classification: neutral
company: none
donor_id: be3b87b2-4131-4d71-86f7-da986c906aa4
entity_type: Individual
first_name: DOUGLAS
full_name: douglas martin
last_name: MARTIN
occupation: retired
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: AL
zip: 36801-0000
", + "Name: gaile chiles
address: 5402 MAPLE ST
city: BELLAIRE
classification: neutral
donor_id: 1eb90df6-e4c6-4711-b05c-d94a633e7c16
entity_type: Individual
first_name: GAILE
full_name: gaile chiles
last_name: CHILES
recipient_id: fa79b50e-9eb3-4c65-a25b-968fa5059e75
recipient_name: ivote mi pac
state: TX
zip: 77401-4705
", + "Name: linda thompson
address: 111 BERNARD TER
city: CRIPPLE CREEK
classification: neutral
donor_id: 4c168995-73dd-4ac6-8ef0-8dab6fa6f828
entity_type: Individual
first_name: LINDA
full_name: linda thompson
last_name: THOMPSON
recipient_id: c00726af-3b4b-46c6-88df-8c62dfbe4733
recipient_name: progressive turnout project
state: CO
zip: 80813-9558
", + "Name: peter gavin
address: 2351 SPAULDING RD.
city: ATTICA
classification: neutral
donor_id: 5b3a5842-dad6-498d-b528-5d5f1b299da8
entity_type: Individual
first_name: PETER
full_name: peter gavin
last_name: GAVIN
recipient_id: 27759108-2ed1-4059-a134-6fd6f901201a
recipient_name: lapeer county republican party
state: MI
zip: 48412-0000
", + "Name: jennifer cox
address: 6145 LAKE ROAD
city: OSSEO
classification: neutral
donor_id: f23fe218-ef77-4e31-9058-01e336e82919
entity_type: Individual
first_name: JENNIFER
full_name: jennifer cox
last_name: COX
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
recipient_name: reproductive freedom for all
state: MI
zip: 49266-0000
", + "Name: duane maurer
address: 4213 KIPPER RD
city: BAD AXE
classification: neutral
donor_id: f435259a-2b8e-43b4-a148-826d18ab91a4
entity_type: Individual
first_name: DUANE
full_name: duane maurer
last_name: MAURER
recipient_id: 9056a6ee-a211-425a-9ea4-4203d9ec8276
recipient_name: michigan farm bureau political action committee
state: MI
zip: 48413-8851
", + "Name: michigan beer & wine wholesalers
classification: neutral
donor_id: be583d56-2245-460e-8633-8438c0d65a6e
entity_type: corporation
full_name: michigan beer & wine wholesalers
recipient_id: cf320278-7c53-4ae3-87d5-8b8382acda89
recipient_name: steve johnson for state rep
state: MI
" ], "type": "scatter", "x": [ - 0.4182243125490408, - 0.12286879065958844, - 0.6730431696885844, - 0.38165116541180344, - 0.6084965344664286, - 0.18155558675901884, - 0.7722862313192606, - 0.5368181409256901, - 0.8304626469521129, - 0.7924139234898422, - 0.8266354543284289, - 0.4023039585223629, - 0.5084198498293618, - 0.23992481624351925, - 0.2742000416622462, - 0.15570283642495664, - 0.07513674080757637, - 0.7247552078664479, - 0.2586357176925591, - 0.595945044435614, - 0.9428542201780316, - 0.03304679952258993, - 0.6013564651959642, - 0.1130639188502468, - 0.5531504465254558, - 0.1635981270944994, - 0.05512117222879742, - 0.32578353530864457, - 0.27440213390552737, - 0.2728250610713022, - 0.6346565064837861, - 0.6327007577432437, - 0.800297854626628, - 0.526779936668903, - 0.413948124857326, - 0.09276814106220677, - 0.662108954544855, - 0.07163295816605642, - 0.44119458804978295, - 0.7364515013041172, - 0.7827775151390383, - 0.9600359726880752, - 0.8511753697833563, - 0.05194805532761382, - 0.03187584930858911, - 0.07426685281627932, - 0.5257999712304688, - 0.9998698320754983, - 0.09471702229050472, - 0.6953901849658966, - 0.03446402354654854, - 0.9082570345357789, - 0.3740122792611037, - 0.977854801698089, - 0.5436816885151938, - 0.06202421257916635, - 0.8589937476561325, - 0.06879886671193436, - 0.19921682827804632, - 0.1823584228427031, - 0.37549158943196925, - 0.5433115547736789, - 0.37848025459696877, - 0.3821391536049519, - 0.7204214783753378, - 0.2955343345493908, - 0.09053866681881584, - 0.7181048560087516, - 0.10310287300704979, - 0.8247840830312709, - 0.1573630170264504, - 0.31305791514229697, - 0.298647499376007, - 0.3246624829381992, - 0.19852054651169693, - 0.3328704753356456, - 0.33203393677870674, - 0.5461279353327784, - 0.9636084967560627, - 0.9503884723051484, - 0.13747604708068628, - 0.3499260998923053, - 0.3181124346701171, - 0.89080246263295, - 0.9521646983336837, - 0.6776948411821848, - 0.0023771443647881974, - 0.7007214129943925, - 0.7188906153197968, - 0.47055154706870017, - 0.19043749918150743, - 0.5274116361492907, - 0.9162463356603696, - 0.7042334738295596, - 0.555788147264811, - 0.5805679633404117, - 0.587704695878027, - 0.916634041055854, - 0.7948577020793985, - 0.9210876029743161, - 0.834199864808296, - 0.5989925957177575, - 0.05973078995013337, - 0.5593951498649633, - 0.5229468203255856, - 0.22007362873840486, - 0.37301066653863624, - 0.8613129225222332, - 0.9663892923019699, - 0.2275256207367028, - 0.0852382135963593, - 0.0914406510425998, - 0.9425745666137786, - 0.3019474379086241, - 0.2619562675328274, - 0.48218022499136737, - 0.5293212253918783, - 0.41808707877840445, - 0.14711158829428328, - 0.42926818011737133, - 0.9694266665187994, - 0.4404718698088387, - 0.4277213938753692, - 0.7059759544943667, - 0.4611021425875542, - 0.13940667248499528, - 0.3393815448042514, - 0.6370268640561303, - 0.9851894520572745, - 0.3247821296168134, - 0.9186278106648778, - 0.18507593174525072, - 0.5845953849421676, - 0.44175944307536974, - 0.7255980413609877, - 0.6058132814274794, - 0.7703024251104211, - 0.47443124751760235, - 0.9573079778783831, - 0.0201693226965588, - 0.17086936775877049, - 0.5291812256005789, - 0.5621062195646831, - 0.2121217358781844, - 0.16862303760247477, - 0.8846357375826375, - 0.0875467755337247, - 0.9473667691929577, - 0.8541827253649632, - 0.3414075728554137, - 0.9005048863870916, - 0.3318561006769827, - 0.7408684543182315, - 0.6149491168624189, - 0.12355952994556385, - 0.08997327822205015, - 0.21535391032155426, - 0.8323549266756429, - 0.8385234321105272, - 0.9240127894624793, - 0.6802728591951641, - 0.25656414507004344, - 0.020212382594376965, - 0.32444561774289593, - 0.4564806171162211, - 0.838803404513024, - 0.6322124026692795, - 0.8505181106970376, - 0.0897773631019545, - 0.7607451357487841, - 0.02312833765025224, - 0.05596958524873419, - 0.3187675293980876, - 0.5191285820034173, - 0.4349682989231034, - 0.04781523934390508, - 0.014269300880037306, - 0.9636590456207981, - 0.8680862155815134, - 0.4363707938884992, - 0.20133087739958255, - 0.6234379896430121, - 0.6314926226168458, - 0.29978148854693865, - 0.33721825060791266, - 0.7518492361353024, - 0.4442228752887084, - 0.04237200971819888, - 0.5201251204037126, - 0.038579501382332126, - 0.9110645875753355, - 0.5593069337955722, - 0.8668565351624634, - 0.42077304608666055, - 0.5465171974419871, - 0.7333209824474588, - 0.4039327719907384, - 0.34114125407236195, - 0.01777064460825195, - 0.992283435751248 + -0.43213011477334773, + -0.4430121181061702, + 0.5112616608477374, + 0.5931128851827875, + -0.5362524214379699, + -0.6830386686787555, + 0.8321531242995657, + 0.7947955540041922, + -0.015339979027039215, + -0.004598212759833403, + -0.5242232381582058, + -0.7859456498474336, + -0.010396805586450043, + -0.5907762774639415, + -0.8824105534014709, + 0.37340208125254337, + 0.42289489293422566, + -0.06531798460926314, + -0.06282488255576262, + 0.4571954695717908, + 0.5782995282255566, + 0.7251331716127464, + 0.8930310568108012, + 0.3440714981019444, + 0.6452743136225527, + -0.6833691066109641, + -0.922556143412663, + -0.4578754972216337, + -0.5260075373652568, + -0.13576410630761374, + -0.19017196191254732, + -0.7794963369787193, + -0.8284631501006486, + 0.05049348978287421, + 0.04533793523578743, + 0.4307114594735427, + 0.6211510564756442, + -0.488645870104004, + -0.5159370768528078, + 0.8299385473651612, + 0.8835075264311746, + 0.21235561702753328, + 0.22007339811741142, + -0.7317074047395691, + -0.35898258388050897, + 0.3513819881725677, + 0.6251699386269727, + 0.41070323859290586, + 0.5885337748775, + 0.7476884190896049, + -0.7442157911045258, + -0.8182264745850724, + -0.4240986484371276, + -0.7884885800275052, + 0.41998910604831713, + -0.8304528603225746, + 0.9329149222041224, + 0.12267587639275908, + 0.15152247306622144, + 0.7743223692717235, + -0.020367479302382467, + 0.5525550860920077, + -0.7341525511916877, + 0.3943452039362869, + -0.697118996520046, + 0.3729152346875802, + 0.45108517908588003, + 0.44882023012427213, + -0.522022411657337, + -0.09362722072652985, + -0.12627985133706823, + 0.5297743915831321, + 0.3733883300104818, + 0.2098519168108027, + -0.3787781527763615, + -0.41621756255078585, + 0.8894556041854287, + 0.9294865179016704, + -0.798347214605227, + -0.18131224898913595, + -0.1595247701666436, + 0.6522974110153149, + 0.8567967077274671, + -0.7026192767091103, + -0.7884728651042877, + -0.8276350144938375, + -0.6098376430856963, + -0.5862824632127743, + 0.6554230284551037, + 0.8922189766627708, + 0.4295097745598557, + 0.43460925643764853, + -0.3980280117268984, + 0.7024852559248044, + 0.7717529582442381, + -0.6806600992316747, + -0.926723528147117, + -0.6740662992025501, + -0.645856740142745, + -0.5177228561621657, + -0.7617659088160793, + -0.7442137071319165, + -0.4309900961374921, + 0.7245290699084046, + 0.7866931209891337, + -0.46411496857079576, + 0.3232896688907876, + 0.37115926386562975, + 0.8144104230201399, + 0.8482266969909324, + -0.6862355564324931, + 0.8143822305059775, + 0.1952194444171441, + 0.2613197935543201, + -0.44116587580288175, + 0.34219400777112563, + 0.3482854537227453, + 0.3895535076931305, + -0.7845181509980911, + -0.010628018143674797, + -0.333303022046739, + 0.391453184513052, + -0.8187977817844195, + 0.675110539921739, + -0.3990251391372163, + -0.30002656361791447, + -0.30032745656510185, + 0.32580390247949387, + -0.41032900004231715, + 0.7814157333397895, + -0.6545012668193522, + -0.8250766319170834, + 0.11642140741252306, + 0.3584413729045867, + 0.6893479879923822, + -0.5817258086810362 ], "y": [ - 0.09053726824382247, - 0.571085214777101, - 0.5199666766946885, - 0.33766327379542094, - 0.17196466768963936, - 0.17708608014427518, - 0.04649454781195783, - 0.37080565676900146, - 0.3602866247185619, - 0.9483925173875926, - 0.3061539627540061, - 0.9643804220706982, - 0.8336885167043149, - 0.5944498275635773, - 0.2373268562908326, - 0.23741932367240448, - 0.32127102230894566, - 0.3661437355856225, - 0.7791505090281524, - 0.3648985367210805, - 0.6244837238804738, - 0.9012137046519791, - 0.5219101415039136, - 0.39453602200590676, - 0.2009582712064717, - 0.04224314617430658, - 0.2381682330796122, - 0.33811323660241943, - 0.5216765314868881, - 0.6001026871900049, - 0.991844460003468, - 0.3343459796676115, - 0.8957623407464501, - 0.4208812619135248, - 0.31304614249644347, - 0.6773365837969099, - 0.4307004647175262, - 0.3309683982450944, - 0.2697998035002954, - 0.9727770125665405, - 0.40557198035837094, - 0.35532572275494023, - 0.5850986908522726, - 0.17296378957033465, - 0.6628083689885368, - 0.6160873747407943, - 0.025297953521542405, - 0.24028581536328997, - 0.5186581897030644, - 0.8423383207045981, - 0.7537809293531343, - 0.3192831323823997, - 0.17542400609184483, - 0.008409380348177398, - 0.938767234846119, - 0.24033413659841596, - 0.8791466031622056, - 0.5634679987017406, - 0.05938145280899054, - 0.6012106694454529, - 0.6705222836834548, - 0.3900960314334032, - 0.055894273053114896, - 0.14933184162295132, - 0.8151159149468827, - 0.17619771419691865, - 0.2981410655965283, - 0.7334929583472656, - 0.04283815208078323, - 0.922341377568881, - 0.3199684158322815, - 0.1278305132468397, - 0.21532966919867302, - 0.0731473655342364, - 0.4898861106787329, - 0.2695720924906413, - 0.09533319097359638, - 0.9874110419208606, - 0.1333966979371528, - 0.2529891644068947, - 0.45431497833000367, - 0.24454670425362057, - 0.20002447568886628, - 0.6267294109959968, - 0.5221172076712435, - 0.6512622326935055, - 0.3355480553373167, - 0.4834545718278357, - 0.4847615611240751, - 0.20619722773579274, - 0.9419075807648644, - 0.3098874271134545, - 0.04149975738749545, - 0.32266487999330984, - 0.4295667428124167, - 0.35350564895305514, - 0.15069304516745607, - 0.41535454584101794, - 0.8821215709600496, - 0.9542382277667263, - 0.7205270186163313, - 0.31541428705224306, - 0.010366221042083845, - 0.06016942899581168, - 0.8867112408398291, - 0.5204579980957379, - 0.4500538798110242, - 0.40395348439090084, - 0.5717872069066212, - 0.42052616285893474, - 0.2660491488293679, - 0.10782775946098799, - 0.7302384542961842, - 0.8520196094107113, - 0.4140065537970282, - 0.4467311570808764, - 0.5144551437666581, - 0.7454337953380579, - 0.03395115206665145, - 0.7077207700167599, - 0.9024846524956353, - 0.055897802218322856, - 0.2880647319459674, - 0.9328536520894143, - 0.9298960866412943, - 0.6352288779182178, - 0.04153202488293273, - 0.7834166246251234, - 0.6710484758334021, - 0.3202314429055858, - 0.9961038345306213, - 0.4107398412471005, - 0.24013807075121119, - 0.8599268392047722, - 0.4318165589087314, - 0.2693681584998491, - 0.3340702546567942, - 0.4421375373865315, - 0.5492873750243871, - 0.5981086798045652, - 0.3635517670405215, - 0.42641694849778966, - 0.3333136626479075, - 0.8848427298858184, - 0.6648266103848882, - 0.9344432405222354, - 0.249116699886752, - 0.6201266549140614, - 0.16781555203357146, - 0.4937592635708411, - 0.4248880785102581, - 0.9435179236599912, - 0.07011604000159166, - 0.9078978130468089, - 0.6072525121642058, - 0.23700988477155205, - 0.05477321631284726, - 0.9642772106357639, - 0.019989772968585173, - 0.29119156039108685, - 0.6217058876501556, - 0.1341994714416056, - 0.28871122138225125, - 0.06013197669987258, - 0.22993075379681738, - 0.5752985482362863, - 0.10059463740220753, - 0.8157570218353161, - 0.42203254876563234, - 0.7925454632595156, - 0.2002886163837997, - 0.8622415881936324, - 0.3169605131706372, - 0.6014235590484225, - 0.9958360522915445, - 0.5260776190209286, - 0.8350595230795331, - 0.24102842320743, - 0.19048093242734687, - 0.7005910562446783, - 0.29050814087118004, - 0.8508124987550889, - 0.6714278208298593, - 0.9756800437762957, - 0.040563128366188694, - 0.5097617399826666, - 0.8842114977564064, - 0.13201947050262697, - 0.09959517902538939, - 0.2318219208408404, - 0.888980486534156, - 0.9513646744432486, - 0.1294716874165911, - 0.5603277981830703, - 0.4868902788925622, - 0.038844634468288675, - 0.547451424618544, - 0.32345881810688737, - 0.20307680326083377, - 0.9810704436128125 + 0.7121785088398352, + 0.6637386367247285, + -0.598438979460561, + -0.6680444571236058, + 0.44361523531892444, + 0.5765261392732328, + 0.26080967071708444, + 0.24096872716900972, + -0.7089068308054137, + -0.9755872442336094, + -0.34274934403468454, + -0.5420785313722902, + -0.7621228818670577, + 0.2648966145363047, + 0.403612631630065, + 0.6789811252229511, + 0.7959212776137778, + 0.7900197865139884, + 0.8310972903269801, + -0.6713224168822408, + -0.8317527771805896, + -0.21408032557683887, + -0.28978736522770937, + 0.604772500031668, + -0.1954793343876051, + -0.1310109485870865, + -0.23208476462159833, + 0.6489829876156022, + 0.7932680240853172, + 0.6211459206982282, + 0.8677949363592646, + -0.2060018213633483, + -0.2001301912924086, + 0.6598165306544911, + 0.7693710498570913, + 0.5144371164893282, + 0.7073620048724625, + -0.8104899180559856, + -0.8622970096309992, + -0.06816718399097474, + -0.027959167306021424, + 0.7449128610002976, + 0.8604966990942203, + -0.1689229441285857, + 0.5112080095793095, + 0.5807404071548778, + -0.6965363437404857, + 0.6849817062815453, + -0.35224437514754586, + -0.44360886803933014, + 0.09593694912577341, + 0.08889623846074263, + 0.6029760119136741, + 0.0924254061247565, + 0.7455745376985553, + -0.21470076141573366, + -0.02976484897864857, + -0.7796703040360509, + -0.9657058443610594, + -0.06769339318848107, + -0.7307241565679895, + 0.6450827404437204, + -0.15363835146843016, + 0.6724237806357076, + -0.15963946240330748, + -0.6981965728205204, + -0.8219907437087358, + 0.5099911479349106, + -0.8213284146465071, + -0.7638667955160672, + -1, + 0.6051504434721513, + 0.630517583672798, + -0.08910955067197081, + -0.7157507162452663, + -0.7993949320290974, + 0.21748807668373418, + 0.23472763490409848, + 0.3744351904237455, + -0.912907039528394, + -0.9377070802046585, + 0.2987504763255712, + 0.394186370625655, + -0.3919634530446739, + -0.44844761451575715, + -0.18103128274552913, + -0.6480839567003398, + -0.6714013657386378, + -0.05639446704405653, + -0.06884618548241157, + -0.7884482829492515, + 0.7254199795788893, + 0.5991531652056148, + -0.06419545984665424, + -0.03388402641622064, + -0.1428808457598657, + -0.19435412457565315, + -0.1603371299138783, + 0.08169793780859752, + 0.39067339539297724, + 0.5799030308386766, + 0.345012671198623, + 0.6678716426804834, + -0.5318696248666036, + -0.5727295276261749, + 0.6775596287803459, + 0.5304054329523671, + 0.7125808229017521, + 0.5152301783287565, + 0.5153009464739415, + 0.32033499347523814, + -0.04159208903692613, + -0.7188816814442675, + -0.9964621174020154, + 0.6293531161898335, + 0.5575985531739881, + -0.8363676412114709, + -0.9280481286500764, + 0.3550572121642179, + -0.8792824988230634, + -0.5662007883069198, + 0.7030652464929427, + 0.12658592510635783, + -0.20166127879656748, + 0.5570923883380997, + -0.8222108886920065, + -0.7885877762376592, + 0.5627700325736714, + 0.6028638927072018, + -0.05023816831269756, + 0.09509796558003482, + -0.165232208286361, + -0.7309343925864511, + 0.621621947321043, + -0.052931911634106425, + 0.48262281712395333 ] } ], "layout": { - "annotations": [ - { - "showarrow": true, - "text": "graphs", - "x": 0.005, - "xref": "paper", - "y": -0.002, - "yref": "paper" - } - ], "hovermode": "closest", "margin": { "b": 20, @@ -10403,7 +2890,7 @@ "r": 5, "t": 40 }, - "showlegend": false, + "showlegend": true, "template": { "data": { "bar": [ @@ -11224,24 +3711,24 @@ "font": { "size": 16 }, - "text": "Network graph made with Python" + "text": "Network Graph Indicating Campaign Contributions from 2018-2022" }, "xaxis": { - "showgrid": false, + "showgrid": true, "showticklabels": false, - "zeroline": false + "zeroline": true }, "yaxis": { - "showgrid": false, + "showgrid": true, "showticklabels": false, - "zeroline": false + "zeroline": true } } }, "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "G = nx.MultiDiGraph()\n", - "\n", - "G.add_node(\"William Stoner\", Age=10, Weight=110)\n", - "G.add_node(\"KALAMAZOO ANESTHESIOLOGY PC\", Age=50, Weight=180)\n", - "G.add_node(\"Bob Kushman\", Age=90, Weight=111)\n", - "G.add_node(\"James Engelson\", Age=40, Weight=10)\n", - "G.add_node(\"Allen Wolf\", Age=30, Weight=1710)\n", - "\n", - "G.add_edge(\"William Stoner\", \"KALAMAZOO ANESTHESIOLOGY PC\", weight=10.00, amount=10.00, year=2017)\n", - "G.add_edge(\"KALAMAZOO ANESTHESIOLOGY PC\", \"Bob Kushman\", weight=1530, amount=1530, year=2017)\n", - "G.add_edge(\"Bob Kushman\", \"KALAMAZOO ANESTHESIOLOGY PC\", weight=530, amount=530, year=2017)\n", - "G.add_edge(\"James Engelson\", \"Bob Kushman\", weight=90.00, amount=90.00, year=2017)\n", - "G.add_edge(\"Allen Wolf\", \"William Stoner\", weight=111.50, amount=111.50, year=2017)\n", - "\n", - "# Create Plotly graph\n", - "edge_trace = go.Scatter(x=[], y=[], line=dict(color='#888'), hoverinfo='text', mode='lines')\n", - "hovertext = []\n", - "\n", - "for edge in G.edges(data=True):\n", - " x0, y0 = G.nodes[edge[0]]['Age'], G.nodes[edge[0]]['Weight']\n", - " x1, y1 = G.nodes[edge[1]]['Age'], G.nodes[edge[1]]['Weight']\n", - " edge_trace['x'] += tuple([x0, x1, None])\n", - " edge_trace['y'] += tuple([y0, y1, None])\n", - " hovertext.append(f\"Amount: {edge[2]['amount']:.2f}, Weight: {edge[2]['weight']:.2f}\")\n", - "\n", - "edge_trace['hovertext'] = hovertext\n", - "\n", - "node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', size=10))\n", - "\n", - "for node in G.nodes():\n", - " x, y = G.nodes[node]['Age'], G.nodes[node]['Weight']\n", - " node_trace['x'] += tuple([x])\n", - " node_trace['y'] += tuple([y])\n", - " node_info = node + '
' + 'Age: ' + str(G.nodes[node]['Age']) + '
' + 'Weight: ' + str(G.nodes[node]['Weight'])\n", - " node_trace['text'] += tuple([node_info])\n", - "\n", - "fig = go.Figure(data=[edge_trace, node_trace],\n", - " layout=go.Layout(\n", - " title='
Network graph made with Plotly',\n", - " titlefont=dict(size=16),\n", - " showlegend=False,\n", - " hovermode='closest',\n", - " margin=dict(b=20,l=5,r=5,t=40),\n", - " xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n", - " yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))\n", - "\n", - "fig.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hoverinfo": "text", - "hovertext": [ - "Amount: 5.00", - "Amount: 100.00", - "Amount: 15.00", - "Amount: 151.76", - "Amount: 75.00", - "Amount: 11.12", - "Amount: 1.00", - "Amount: 1.00", - "Amount: 5.88", - "Amount: 250.00", - "Amount: 15.00", - "Amount: 273.00", - "Amount: 25.44", - "Amount: 100.00", - "Amount: 50.00", - "Amount: 400.00", - "Amount: 300.00", - "Amount: 1020.00", - "Amount: 100.00", - "Amount: 100.00", - "Amount: 5.00", - "Amount: 15.00", - "Amount: 100.00", - "Amount: 13.00", - "Amount: 750.00", - "Amount: 15.00", - "Amount: 500.00", - "Amount: 2.50", - "Amount: 1.00", - "Amount: 250.00", - "Amount: 35.00", - "Amount: 40.00", - "Amount: 9.29", - "Amount: 5.00", - "Amount: 19.00", - "Amount: 75.00", - "Amount: 25.15", - "Amount: 15.78", - "Amount: 1.00", - "Amount: 250.00", - "Amount: 1000.00", - "Amount: 2.87", - "Amount: 67.18", - "Amount: 150.00", - "Amount: 29.40", - "Amount: 1.00", - "Amount: 500.00", - "Amount: 60.00", - "Amount: 10.00", - "Amount: 76.32" - ], - "line": { - "color": "#888" - }, - "mode": "lines", - "type": "scatter", - "x": [], - "y": [] - }, - { - "hoverinfo": "text", - "marker": { - "color": [ - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green", - "green" - ], - "colorscale": [ - [ - 0, - "rgb(255,255,217)" - ], - [ - 0.125, - "rgb(237,248,177)" - ], - [ - 0.25, - "rgb(199,233,180)" - ], - [ - 0.375, - "rgb(127,205,187)" - ], - [ - 0.5, - "rgb(65,182,196)" - ], - [ - 0.625, - "rgb(29,145,192)" - ], - [ - 0.75, - "rgb(34,94,168)" - ], - [ - 0.875, - "rgb(37,52,148)" - ], - [ - 1, - "rgb(8,29,88)" - ] - ], - "showscale": true, - "size": 10 - }, - "mode": "markers", - "text": [ - "Name: rachel puthuff
donor_id: 639646bf-5176-474c-b800-1afb34c55b53
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rachel puthuff
recipient_name: reproductive freedom for all
address: 3717 WHITAKER
city: SCHERTZ
classification: neutral
entity_type: Individual
first_name: RACHEL
id: 639646bf-5176-474c-b800-1afb34c55b53
last_name: PUTHUFF
state: TX
zip: 78154-0000
", - "Name: reproductive freedom for all
classification: neutral
", - "Name: james bennett
donor_id: 447b61fb-39cc-41a9-8dfc-2dbb4e2f3774
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: james bennett
recipient_name: reproductive freedom for all
address: 533 W OAK ST
city: MASON
classification: neutral
entity_type: Individual
first_name: JAMES
id: 447b61fb-39cc-41a9-8dfc-2dbb4e2f3774
last_name: BENNETT
state: MI
zip: 48854-0000
", - "Name: sonny mandouh mr.^
donor_id: 34d28c8d-c0fe-463d-9afe-73269a47389b
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: sonny mandouh mr.^
recipient_name: realtors political action committee of michigan
address: 23760 HOLLANDER ST
city: DEARBORN
classification: neutral
entity_type: Individual
first_name: SONNY
id: 34d28c8d-c0fe-463d-9afe-73269a47389b
last_name: MANDOUH MR.^
state: MI
zip: 48128-0000
", - "Name: realtors political action committee of michigan
classification: neutral
", - "Name: charles crider
donor_id: e765ba37-66d2-4b65-9f42-3902dca518b6
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: charles crider
recipient_name: reproductive freedom for all
address: 1403 WEST HIGHLAND BLVD.
city: BATTLE CREEK
classification: neutral
entity_type: Individual
first_name: CHARLES
id: e765ba37-66d2-4b65-9f42-3902dca518b6
last_name: CRIDER
state: MI
zip: 49015-0000
", - "Name: michelle zukowski-serlin
donor_id: 5c0fe744-23e3-4346-b112-0730c6d4b60c
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: michelle zukowski-serlin
recipient_name: reproductive freedom for all
address: 4853 LANDING WAY
city: KALAMAZOO
classification: neutral
company: choices for change counseling
entity_type: Individual
first_name: MICHELLE
id: 5c0fe744-23e3-4346-b112-0730c6d4b60c
last_name: ZUKOWSKI-SERLIN
occupation: business owners and clinical s
state: MI
zip: 49048-6153
", - "Name: diana gibson-lee
donor_id: df25775c-dad2-4f56-8fcd-b31171a7dcb0
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: diana gibson-lee
recipient_name: veronica klinefelt for state senate
address: 7450 W DYER RD
city: TWINING
classification: neutral
entity_type: Individual
first_name: DIANA
id: df25775c-dad2-4f56-8fcd-b31171a7dcb0
last_name: GIBSON-LEE
state: MI
zip: 48766-9773
", - "Name: veronica klinefelt for state senate
classification: neutral
", - "Name: edward kazala
donor_id: 74b522f4-6214-42cd-9d68-7abfe3e18a07
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: edward kazala
recipient_name: elect padma kuppa
address: 70 REVERE CT
city: LAFAYETTE
classification: neutral
entity_type: Individual
first_name: EDWARD
id: 74b522f4-6214-42cd-9d68-7abfe3e18a07
last_name: KAZALA
state: CA
zip: 94549-0000
", - "Name: andrea kovalsky
donor_id: 3dc1360d-e9e8-4e55-ac2e-f608f489ab94
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: andrea kovalsky
recipient_name: veronica klinefelt for state senate
address: 497 SAINT MARKS AVE APT 5P
city: BROOKLYN
classification: neutral
entity_type: Individual
first_name: ANDREA
id: 3dc1360d-e9e8-4e55-ac2e-f608f489ab94
last_name: KOVALSKY
state: NY
zip: 11238-5792
", - "Name: colin palmer
donor_id: ad440dcd-79ad-4323-8f19-c7a491f897f7
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: colin palmer
recipient_name: veronica klinefelt for state senate
address: 531 E 20TH ST APT 10D
city: NEW YORK
classification: neutral
company: not employed
entity_type: Individual
first_name: COLIN
id: ad440dcd-79ad-4323-8f19-c7a491f897f7
last_name: PALMER
occupation: not employed
state: NY
zip: 10010-7604
", - "Name: julie svinicki ms.^
donor_id: 4cb88517-6bc4-45a1-ae2f-be0b76688898
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: julie svinicki ms.^
recipient_name: realtors political action committee of michigan
address: 1608 KIRTLAND DRIVE
city: ANN ARBOR
classification: neutral
entity_type: Individual
first_name: JULIE
id: 4cb88517-6bc4-45a1-ae2f-be0b76688898
last_name: SVINICKI MS.^
state: MI
zip: 48103-0000
", - "Name: audrey lance
donor_id: e8ef0925-3f10-4ebf-b025-dea32e506a50
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: audrey lance
recipient_name: reproductive freedom for all
address: 3945 FORBES AVE APT 444
city: PITTSBURGH
classification: neutral
entity_type: Individual
first_name: AUDREY
id: e8ef0925-3f10-4ebf-b025-dea32e506a50
last_name: LANCE
occupation: physician
state: PA
zip: 15213-0000
", - "Name: walker c evans
donor_id: 9853cee2-ff37-41bd-a469-0e338a4fefc9
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: walker c evans
recipient_name: reproductive freedom for all
address: 2810 NORTHVILLE DR NE
city: GRAND RAPIDS
classification: neutral
entity_type: Individual
first_name: WALKER C
id: 9853cee2-ff37-41bd-a469-0e338a4fefc9
last_name: EVANS
state: MI
zip: 49525-0000
", - "Name: lori henderson
donor_id: 3042129c-b91e-4d6a-b723-74cd7ec55e75
recipient_id: 6b51e739-dd22-4556-8555-6e11264ef4ce
full_name: lori henderson
recipient_name: planned parenthood advocates of mi
address: 2401 HARDWOOD AVE
city: ROYAK OAK
classification: neutral
entity_type: Individual
first_name: LORI
id: 3042129c-b91e-4d6a-b723-74cd7ec55e75
last_name: HENDERSON
state: MI
zip: 48067-0000
", - "Name: planned parenthood advocates of mi
classification: neutral
", - "Name: brett lundie
donor_id: 932450e5-f8fc-4cb2-baac-acfad686561f
recipient_id: 2f221dfb-d552-4234-83f8-cd05d10f1266
full_name: brett lundie
recipient_name: citizens to support mi women and children
address: 7779 CIRCLE DR
city: LAINGSBURG
classification: neutral
entity_type: Individual
first_name: BRETT
id: 932450e5-f8fc-4cb2-baac-acfad686561f
last_name: LUNDIE
state: MI
zip: 48848-0000
", - "Name: citizens to support mi women and children
classification: neutral
", - "Name: ian robinson
donor_id: 757923ec-02e3-424e-81b9-4152f6dd165b
recipient_id: 06ebbb03-574c-445b-9416-7d2134a06d1f
full_name: ian robinson
recipient_name: committee to elect james e johnson jr
address: 3435 BRENTWOOD CT
city: ANN ARBOR
classification: neutral
company: university of michigan
entity_type: Individual
first_name: IAN
id: 757923ec-02e3-424e-81b9-4152f6dd165b
last_name: ROBINSON
occupation: faculty
state: MI
zip: 48108-1757
", - "Name: committee to elect james e johnson jr
classification: neutral
", - "Name: kelly bean
donor_id: 8521781f-6ca7-43dc-90a6-c1af13da9e2a
recipient_id: 00a76143-0f24-4683-9963-09f10803e957
full_name: kelly bean
recipient_name: friends of jerry neyer
address: 1405 E BATTLE RD
city: ROSEBUSH
classification: neutral
entity_type: Individual
first_name: KELLY
id: 8521781f-6ca7-43dc-90a6-c1af13da9e2a
last_name: BEAN
state: MI
zip: 48878-9732
", - "Name: friends of jerry neyer
classification: neutral
", - "Name: sandra johnson
donor_id: 49bcd93b-241b-4343-8bbf-bcf70d828c8e
recipient_id: 7ee2db24-b832-4f1b-af2e-e9c8eaf706bd
full_name: sandra johnson
recipient_name: committee to elect charise anderson
address: 424 N 21ST ST 0
city: MONTEBELLO
classification: neutral
entity_type: Individual
first_name: SANDRA
id: 49bcd93b-241b-4343-8bbf-bcf70d828c8e
last_name: JOHNSON
occupation: eligibility worker
state: CA
zip: 90640-0000
", - "Name: committee to elect charise anderson
classification: neutral
", - "Name: christopher mishler
donor_id: 7b8ee884-4471-493d-bf17-386d57bf3f6d
recipient_id: 2f221dfb-d552-4234-83f8-cd05d10f1266
full_name: christopher mishler
recipient_name: citizens to support mi women and children
address: 3690 VORHIES ROAD
city: ANN ARBOR
classification: neutral
entity_type: Individual
first_name: CHRISTOPHER
id: 7b8ee884-4471-493d-bf17-386d57bf3f6d
last_name: MISHLER
state: MI
zip: 48105-0000
", - "Name: stacy leroy daniels
donor_id: 5a40e7db-bb2a-47f4-ac92-5584988c8a5e
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: stacy leroy daniels
recipient_name: bill g schuette for state representative
address: 3901 ORCHARD DRIVE
city: MIDLAND
classification: neutral
entity_type: Individual
first_name: STACY LEROY
id: 5a40e7db-bb2a-47f4-ac92-5584988c8a5e
last_name: DANIELS
state: MI
zip: 48640-0000
", - "Name: bill g schuette for state representative
classification: neutral
", - "Name: suzanne r weinheimer
donor_id: 029a23eb-d90f-405b-995c-c8dc266e255f
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: suzanne r weinheimer
recipient_name: reproductive freedom for all
address: 11045 8TH AVENUE NE APT 826
city: SEATTLE
classification: neutral
entity_type: Individual
first_name: SUZANNE R
id: 029a23eb-d90f-405b-995c-c8dc266e255f
last_name: WEINHEIMER
state: WA
zip: 98125-0000
", - "Name: dustin shaeffer mr.^
donor_id: fc041110-7c11-47af-b1bf-5daca974e4ee
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: dustin shaeffer mr.^
recipient_name: realtors political action committee of michigan
address: 60451 MOJAVE LANE
city: WASHINGTON
classification: neutral
entity_type: Individual
first_name: DUSTIN
id: fc041110-7c11-47af-b1bf-5daca974e4ee
last_name: SHAEFFER MR.^
state: MI
zip: 48094-0000
", - "Name: debra byl
donor_id: b8e9c951-5c8c-42d3-91e1-d6457b28f2ae
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: debra byl
recipient_name: reproductive freedom for all
address: 987 BRADFORD GREENS
city: GRAND RAPIDS
classification: neutral
entity_type: Individual
first_name: DEBRA
id: b8e9c951-5c8c-42d3-91e1-d6457b28f2ae
last_name: BYL
state: MI
zip: 49525-0000
", - "Name: pamela wimp
donor_id: 88ccb4d4-c756-4039-bac2-77a610d69bb0
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: pamela wimp
recipient_name: reproductive freedom for all
address: 8030 MERCER CT NE
city: LACEY
classification: neutral
entity_type: Individual
first_name: PAMELA
id: 88ccb4d4-c756-4039-bac2-77a610d69bb0
last_name: WIMP
state: WA
zip: 98516-6336
", - "Name: lori wortz
donor_id: 821a27dc-aa00-436e-80e2-655ce26bc830
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: lori wortz
recipient_name: bill g schuette for state representative
address: 4144 MERIDIAN RD
city: OKEMOS
classification: neutral
company: braenaru consulting
entity_type: Individual
first_name: LORI
id: 821a27dc-aa00-436e-80e2-655ce26bc830
last_name: WORTZ
occupation: consultant
state: MI
zip: 48864-0000
", - "Name: janet reid
donor_id: 25f2cb86-6d01-4fc2-9aaf-d276ce634a47
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: janet reid
recipient_name: reproductive freedom for all
address: 2378 EATON GATE RD
city: LAKE ORION
classification: neutral
entity_type: Individual
first_name: JANET
id: 25f2cb86-6d01-4fc2-9aaf-d276ce634a47
last_name: REID
state: MI
zip: 48360-1869
", - "Name: gary henderson
donor_id: 05a6c5c3-4a3f-41e0-a9d5-e54f33703d2d
recipient_id: 7f272fe4-d592-453c-9ca1-315ea3fdcff1
full_name: gary henderson
recipient_name: bill g schuette for state representative
address: 1601 KINGSWOOD DRIVE
city: LANSING
classification: neutral
company: aircraft precision prod. inc.
entity_type: Individual
first_name: GARY
id: 05a6c5c3-4a3f-41e0-a9d5-e54f33703d2d
last_name: HENDERSON
occupation: sales purchasing manager
state: MI
zip: 48912-0000
", - "Name: claudette levesque
donor_id: 26d5e377-57c4-4f33-95ce-4209bff4242b
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: claudette levesque
recipient_name: reproductive freedom for all
address: 41 CATERPILLAR HILL RD
city: SARGENTVILLE
classification: neutral
entity_type: Individual
first_name: CLAUDETTE
id: 26d5e377-57c4-4f33-95ce-4209bff4242b
last_name: LEVESQUE
state: ME
zip: 04673-2464
", - "Name: graham chapman
donor_id: 8045638c-db65-4a13-9016-05e73766b5b1
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: graham chapman
recipient_name: reproductive freedom for all
address: 1914 CLINTON ST
city: LOS ANGELES
classification: neutral
entity_type: Individual
first_name: GRAHAM
id: 8045638c-db65-4a13-9016-05e73766b5b1
last_name: CHAPMAN
state: CA
zip: 90026-4137
", - "Name: john olson
donor_id: 1ff268c7-fbff-4f94-8810-48f31bb53681
recipient_id: 00a76143-0f24-4683-9963-09f10803e957
full_name: john olson
recipient_name: friends of jerry neyer
address: 6025 VERDE TRL S APT K217
city: BOCA RATON
classification: neutral
entity_type: Individual
first_name: JOHN
id: 1ff268c7-fbff-4f94-8810-48f31bb53681
last_name: OLSON
state: FL
zip: 33433-4442
", - "Name: christina ridalls ms.^
donor_id: 9bea8116-83a3-486a-a457-50c0f80af060
recipient_id: f4360141-0f69-41dc-bb51-facbf40ae4a4
full_name: christina ridalls ms.^
recipient_name: realtors political action committee of michigan
address: 3083 BEATTIE RD
city: HOWELL
classification: neutral
entity_type: Individual
first_name: CHRISTINA
id: 9bea8116-83a3-486a-a457-50c0f80af060
last_name: RIDALLS MS.^
state: MI
zip: 48843-0000
", - "Name: dylynn mclean
donor_id: a1943974-4abe-4093-be0b-edcc56a97ffe
recipient_id: bbe89315-1939-46e3-a5c0-2d6e5b28bc95
full_name: dylynn mclean
recipient_name: 1st congressional dist rep comm
address: 1531 W 20 MILE RD
city: SAULT STE MARIE
classification: neutral
entity_type: Individual
first_name: DYLYNN
id: a1943974-4abe-4093-be0b-edcc56a97ffe
last_name: MCLEAN
state: MI
zip: 49783-0000
", - "Name: 1st congressional dist rep comm
classification: neutral
", - "Name: andrew morris
donor_id: 767c512a-9c5a-4230-90ab-3fd40d731f60
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: andrew morris
recipient_name: elect padma kuppa
address: 1118 MORNINGSIDE AVE
city: SCHENECTADY
classification: neutral
entity_type: Individual
first_name: ANDREW
id: 767c512a-9c5a-4230-90ab-3fd40d731f60
last_name: MORRIS
state: NY
zip: 12309-5630
", - "Name: elect padma kuppa
classification: neutral
", - "Name: martha scoppa
donor_id: 78fcc760-825f-404a-b058-a88a99992d98
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: martha scoppa
recipient_name: reproductive freedom for all
address: 32 COLD SPRING RD
city: LIBERTY
classification: neutral
entity_type: Individual
first_name: MARTHA
id: 78fcc760-825f-404a-b058-a88a99992d98
last_name: SCOPPA
state: NY
zip: 12754-0000
", - "Name: carol woodard
donor_id: d4ba0589-99d6-4455-a978-315395322208
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: carol woodard
recipient_name: reproductive freedom for all
address: 5143 SPRING MEADOWS
city: TROY
classification: neutral
entity_type: Individual
first_name: CAROL
id: d4ba0589-99d6-4455-a978-315395322208
last_name: WOODARD
state: MI
zip: 48098-0000
", - "Name: rochelle albright
donor_id: 87b3feed-01a5-4cc8-82cd-cf9c78977534
recipient_id: e3294ecb-f6df-48a0-b3b4-7048a9c650a7
full_name: rochelle albright
recipient_name: michael detmer for state senate
address: 1840 GRAY RD
city: HOWELL
classification: neutral
entity_type: Individual
first_name: ROCHELLE
id: 87b3feed-01a5-4cc8-82cd-cf9c78977534
last_name: ALBRIGHT
state: MI
zip: 48843-0000
", - "Name: michael detmer for state senate
classification: neutral
", - "Name: richard mayfield
donor_id: 80ec6920-a933-4c3e-9487-74cbfe6716f7
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: richard mayfield
recipient_name: veronica klinefelt for state senate
address: 3221 GRISCHY LN
city: CINCINNATI
classification: neutral
entity_type: Individual
first_name: RICHARD
id: 80ec6920-a933-4c3e-9487-74cbfe6716f7
last_name: MAYFIELD
state: OH
zip: 45208-3109
", - "Name: charles risch
donor_id: 6b4b51e8-f105-4cc1-96f7-cec2d931e58f
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: charles risch
recipient_name: reproductive freedom for all
address: 300 S WACKER DR
city: CHICAGO
classification: neutral
entity_type: Individual
first_name: CHARLES
id: 6b4b51e8-f105-4cc1-96f7-cec2d931e58f
last_name: RISCH
state: IL
zip: 60606-6680
", - "Name: barbara miller
donor_id: 47043446-3b77-4a34-9d0d-a21786400d9b
recipient_id: 6b7da911-9835-4789-9635-f6ad2a71dd86
full_name: barbara miller
recipient_name: veronica klinefelt for state senate
address: 820 W END AVE APT 6A
city: NEW YORK
classification: neutral
entity_type: Individual
first_name: BARBARA
id: 47043446-3b77-4a34-9d0d-a21786400d9b
last_name: MILLER
state: NY
zip: 10025-5330
", - "Name: kevin korpi
donor_id: 10f51417-a0e9-4a2c-8bdb-e5d045fcab08
recipient_id: 5f7c53e3-d1be-47a9-acc4-70828a8c7a69
full_name: kevin korpi
recipient_name: committee to elect ed mcbroom
address: 220 MAC AVE APT 418
city: EAST LANSING
classification: neutral
company: acuitas
entity_type: Individual
first_name: KEVIN
id: 10f51417-a0e9-4a2c-8bdb-e5d045fcab08
last_name: KORPI
occupation: lobbyist
state: MI
zip: 48823-0000
", - "Name: committee to elect ed mcbroom
classification: neutral
", - "Name: wayne miller
donor_id: 14208b99-1ecb-4b33-becf-c30882e9b302
recipient_id: f88fdd05-e3e4-4d51-8511-1ffd35965c8e
full_name: wayne miller
recipient_name: committee to elect jack richert
address: 27301 SCENIC HWY
city: FRANKLIN
classification: neutral
company: miller & tischler pc
entity_type: Individual
first_name: WAYNE
id: 14208b99-1ecb-4b33-becf-c30882e9b302
last_name: MILLER
occupation: attorney
state: MI
zip: 48025-0000
", - "Name: committee to elect jack richert
classification: neutral
", - "Name: mary soens
donor_id: 664b4540-8b50-44d3-8570-cb797a4859fe
recipient_id: 707305ca-e572-4109-8429-00600edf3fb8
full_name: mary soens
recipient_name: elect padma kuppa
address: 55 N HANCOCK ST
city: LEXINGTON
classification: neutral
entity_type: Individual
first_name: MARY
id: 664b4540-8b50-44d3-8570-cb797a4859fe
last_name: SOENS
state: MA
zip: 02420-0000
", - "Name: rebecca baskin
donor_id: 9eb92629-9f8e-4bb5-8dc3-373b56a7db3a
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rebecca baskin
recipient_name: reproductive freedom for all
address: 680 BERKSHIRE DR
city: SALINE
classification: neutral
entity_type: Individual
first_name: REBECCA
id: 9eb92629-9f8e-4bb5-8dc3-373b56a7db3a
last_name: BASKIN
state: MI
zip: 48176-1087
", - "Name: edward kaminski
donor_id: 5b4130f6-d8dd-4739-aa68-2fe81dd4532b
recipient_id: 76a600c1-7ead-437a-85ad-0cca7573393b
full_name: edward kaminski
recipient_name: friends of brian hosticka
address: 8765 LEHMAN RD
city: MONTAGUE
classification: neutral
entity_type: Individual
first_name: EDWARD
id: 5b4130f6-d8dd-4739-aa68-2fe81dd4532b
last_name: KAMINSKI
state: MI
zip: 49437-9326
", - "Name: friends of brian hosticka
classification: neutral
", - "Name: robert brown
donor_id: 766a34f7-1c8b-4635-a69c-0bff1bf155be
recipient_id: 2e8c9124-2258-45e3-a198-e8c1798c49f2
full_name: robert brown
recipient_name: monroe plumbers and pipe fitters local 671 pac fund
address: 1207 SANDHURST DR
city: TALLAHASSEE
classification: neutral
entity_type: Individual
first_name: ROBERT
id: 766a34f7-1c8b-4635-a69c-0bff1bf155be
last_name: BROWN
state: FL
zip: 32312-2527
", - "Name: monroe plumbers and pipe fitters local 671 pac fund
classification: neutral
", - "Name: sandra braddock
donor_id: e42e7230-02f0-4b28-ba39-7b68e796d510
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: sandra braddock
recipient_name: reproductive freedom for all
address: 20087 EDGEWATER DRIVE
city: CANYON COUNTRY
classification: neutral
entity_type: Individual
first_name: SANDRA
id: e42e7230-02f0-4b28-ba39-7b68e796d510
last_name: BRADDOCK
state: CA
zip: 91351-0000
", - "Name: dana fortier
donor_id: 74b93106-3c9f-4f36-b52e-36143e97e7ce
recipient_id: 159692de-135a-45bd-8889-1ab1882ed54c
full_name: dana fortier
recipient_name: committee to elect vicki barnett to state senate
address: 23861 W LEBOST
city: NOVI
classification: neutral
entity_type: Individual
first_name: DANA
id: 74b93106-3c9f-4f36-b52e-36143e97e7ce
last_name: FORTIER
state: MI
zip: 48375-0000
", - "Name: committee to elect vicki barnett to state senate
classification: neutral
", - "Name: rachel geiersbach
donor_id: 40d2d39f-f21b-4130-8d7b-47ca810c9aa9
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: rachel geiersbach
recipient_name: reproductive freedom for all
address: 3412 OLD KAWKAWLIN RD
city: BAY CITY
classification: neutral
entity_type: Individual
first_name: RACHEL
id: 40d2d39f-f21b-4130-8d7b-47ca810c9aa9
last_name: GEIERSBACH
state: MI
zip: 48706-0000
", - "Name: matthew burgess
donor_id: de98dec5-b8d3-4701-a9dd-a254aca2c4cf
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: matthew burgess
recipient_name: reproductive freedom for all
address: 8823 SPECTRUM CENTER BLVD 2313
city: SAN DIEGO
classification: neutral
entity_type: Individual
first_name: MATTHEW
id: de98dec5-b8d3-4701-a9dd-a254aca2c4cf
last_name: BURGESS
state: CA
zip: 92123-0000
", - "Name: teresa robertson
donor_id: dcf2b3a5-ddf4-4027-8a75-4477893854ff
recipient_id: 4844870e-39f8-41d7-8a41-a824d5dd9998
full_name: teresa robertson
recipient_name: reproductive freedom for all
address: 7101 RIVER GLEN DR SE
city: CALEDONIA
classification: neutral
entity_type: Individual
first_name: TERESA
id: dcf2b3a5-ddf4-4027-8a75-4477893854ff
last_name: ROBERTSON
state: MI
zip: 49316-8136
" - ], - "type": "scatter", - "x": [], - "y": [] - } - ], - "layout": { - "hovermode": "closest", - "margin": { - "b": 20, - "l": 5, - "r": 5, - "t": 40 - }, - "showlegend": true, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "font": { - "size": 16 - }, - "text": "Network Graph Indicating Campaign Contributions from 2018-2022" - }, - "xaxis": { - "showgrid": true, - "showticklabels": false, - "zeroline": true - }, - "yaxis": { - "showgrid": true, - "showticklabels": false, - "zeroline": true - } - } - }, - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def create_network_nodes(df: pd.DataFrame) -> nx.MultiDiGraph:\n", - " G = nx.MultiDiGraph()\n", - " \n", - " # Define columns for edge attributes\n", - " edge_columns = ['amount', 'donor_office', 'office_sought', 'party', 'purpose', 'transaction_id', 'transaction_type', 'year']\n", - " # Define columns for node attributes\n", - " node_columns = ['donor_id', 'recipient_id', 'full_name', 'recipient_name', 'address', 'city', 'classification', 'company', 'donor_type', 'entity_type', 'first_name', 'id', 'last_name', 'occupation', 'recipient_type', 'state', 'zip']\n", - " \n", - " for _, row in df.iterrows(): \n", - " # Add nodes\n", - " G.add_node(row['full_name'], **row[node_columns].dropna().to_dict())\n", - " G.add_node(row['recipient_name'], classification='neutral') # Adding recipient nodes with default classification\n", - "\n", - " # Add edges\n", - " edge_attributes = row[edge_columns].dropna().to_dict()\n", - " G.add_edge(row['full_name'], row['recipient_name'], **edge_attributes)\n", - " \n", - " return G\n", - "\n", - "def plot_network_graph(G: nx.MultiDiGraph):\n", - " edge_trace = go.Scatter(x=[], y=[], line=dict(color='#888'), hoverinfo='text', mode='lines')\n", - " hovertext = []\n", - "\n", - " for edge in G.edges(data=True):\n", - " source = edge[0]\n", - " target = edge[1]\n", - " hovertext.append(f\"Amount: {edge[2]['amount']:.2f}\")\n", - "\n", - " edge_trace['hovertext'] = hovertext\n", - "\n", - " node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='YlGnBu', size=10))\n", - " node_trace['marker']['color'] = []\n", - "\n", - " for node in G.nodes():\n", - " node_info = f\"Name: {node}
\"\n", - " for key, value in G.nodes[node].items():\n", - " node_info += f\"{key}: {value}
\"\n", - " node_trace['text'] += tuple([node_info])\n", - " # Get the classification value for the node\n", - " classification = G.nodes[node].get('classification', 'neutral')\n", - " # Assign a color based on the classification value\n", - " if classification == 'c':\n", - " color = 'blue'\n", - " elif classification == 'f':\n", - " color = 'red'\n", - " else:\n", - " color = 'green' # Default color for unknown classification\n", - " node_trace['marker']['color'] += tuple([color])\n", - "\n", - " # Define layout settings\n", - " layout = go.Layout(\n", - " title='Network Graph Indicating Campaign Contributions from 2018-2022',\n", - " titlefont=dict(size=16),\n", - " showlegend=True,\n", - " hovermode='closest',\n", - " margin=dict(b=20, l=5, r=5, t=40),\n", - " xaxis=dict(showgrid=True, zeroline=True, showticklabels=False),\n", - " yaxis=dict(showgrid=True, zeroline=True, showticklabels=False)\n", - " )\n", - "\n", - " fig = go.Figure(data=[edge_trace, node_trace], layout=layout)\n", - "\n", - " # Log information about the figure\n", - "\n", - " fig.show()\n", - "\n", - "sample = grouped_sample.sample(50)\n", - "plot_network_graph(create_network_nodes(sample))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "hoverinfo": "none", - "line": { - "color": "#888", - "width": 0.5 - }, - "mode": "lines", - "type": "scatter", - "x": [ - 0.4182243125490408, - 0.3740122792611037, - null, - 0.4182243125490408, - 0.37848025459696877, - null, - 0.4182243125490408, - 0.3821391536049519, - null, - 0.4182243125490408, - 0.31305791514229697, - null, - 0.4182243125490408, - 0.3246624829381992, - null, - 0.4182243125490408, - 0.33203393677870674, - null, - 0.4182243125490408, - 0.4404718698088387, - null, - 0.4182243125490408, - 0.3393815448042514, - null, - 0.4182243125490408, - 0.32444561774289593, - null, - 0.4182243125490408, - 0.33721825060791266, - null, - 0.4182243125490408, - 0.5201251204037126, - null, - 0.12286879065958844, - 0.23992481624351925, - null, - 0.12286879065958844, - 0.09276814106220677, - null, - 0.12286879065958844, - 0.07426685281627932, - null, - 0.12286879065958844, - 0.09471702229050472, - null, - 0.12286879065958844, - 0.06879886671193436, - null, - 0.12286879065958844, - 0.1823584228427031, - null, - 0.12286879065958844, - 0.19852054651169693, - null, - 0.12286879065958844, - 0.13747604708068628, - null, - 0.12286879065958844, - 0.22007362873840486, - null, - 0.12286879065958844, - 0.13940667248499528, - null, - 0.12286879065958844, - 0.0201693226965588, - null, - 0.12286879065958844, - 0.16862303760247477, - null, - 0.12286879065958844, - 0.12355952994556385, - null, - 0.12286879065958844, - 0.04781523934390508, - null, - 0.6730431696885844, - 0.6013564651959642, - null, - 0.6730431696885844, - 0.662108954544855, - null, - 0.6730431696885844, - 0.7007214129943925, - null, - 0.6730431696885844, - 0.7188906153197968, - null, - 0.6730431696885844, - 0.7255980413609877, - null, - 0.6730431696885844, - 0.6802728591951641, - null, - 0.6730431696885844, - 0.7518492361353024, - null, - 0.38165116541180344, - 0.32578353530864457, - null, - 0.38165116541180344, - 0.413948124857326, - null, - 0.38165116541180344, - 0.44119458804978295, - null, - 0.38165116541180344, - 0.3328704753356456, - null, - 0.38165116541180344, - 0.3499260998923053, - null, - 0.38165116541180344, - 0.37301066653863624, - null, - 0.38165116541180344, - 0.4277213938753692, - null, - 0.38165116541180344, - 0.3247821296168134, - null, - 0.38165116541180344, - 0.3187675293980876, - null, - 0.38165116541180344, - 0.34114125407236195, - null, - 0.6084965344664286, - 0.5531504465254558, - null, - 0.6084965344664286, - 0.587704695878027, - null, - 0.6084965344664286, - 0.5593951498649633, - null, - 0.6084965344664286, - 0.5845953849421676, - null, - 0.6084965344664286, - 0.6058132814274794, - null, - 0.6084965344664286, - 0.6322124026692795, - null, - 0.6084965344664286, - 0.5201251204037126, - null, - 0.18155558675901884, - 0.2742000416622462, - null, - 0.18155558675901884, - 0.15570283642495664, - null, - 0.18155558675901884, - 0.19921682827804632, - null, - 0.18155558675901884, - 0.2955343345493908, - null, - 0.18155558675901884, - 0.298647499376007, - null, - 0.18155558675901884, - 0.0914406510425998, - null, - 0.18155558675901884, - 0.0875467755337247, - null, - 0.18155558675901884, - 0.08997327822205015, - null, - 0.18155558675901884, - 0.25656414507004344, - null, - 0.18155558675901884, - 0.20133087739958255, - null, - 0.7722862313192606, - 0.7408684543182315, - null, - 0.7722862313192606, - 0.8385234321105272, - null, - 0.7722862313192606, - 0.7333209824474588, - null, - 0.5368181409256901, - 0.595945044435614, - null, - 0.5368181409256901, - 0.6327007577432437, - null, - 0.5368181409256901, - 0.526779936668903, - null, - 0.5368181409256901, - 0.5433115547736789, - null, - 0.5368181409256901, - 0.5274116361492907, - null, - 0.5368181409256901, - 0.555788147264811, - null, - 0.5368181409256901, - 0.5805679633404117, - null, - 0.5368181409256901, - 0.5989925957177575, - null, - 0.5368181409256901, - 0.48218022499136737, - null, - 0.5368181409256901, - 0.6058132814274794, - null, - 0.5368181409256901, - 0.47443124751760235, - null, - 0.5368181409256901, - 0.5291812256005789, - null, - 0.5368181409256901, - 0.5621062195646831, - null, - 0.5368181409256901, - 0.5465171974419871, - null, - 0.8304626469521129, - 0.8266354543284289, - null, - 0.8304626469521129, - 0.7247552078664479, - null, - 0.8304626469521129, - 0.7827775151390383, - null, - 0.8304626469521129, - 0.9082570345357789, - null, - 0.8304626469521129, - 0.916634041055854, - null, - 0.8304626469521129, - 0.8613129225222332, - null, - 0.8304626469521129, - 0.7703024251104211, - null, - 0.8304626469521129, - 0.9005048863870916, - null, - 0.8304626469521129, - 0.9240127894624793, - null, - 0.7924139234898422, - 0.800297854626628, - null, - 0.7924139234898422, - 0.7364515013041172, - null, - 0.7924139234898422, - 0.8589937476561325, - null, - 0.7924139234898422, - 0.8247840830312709, - null, - 0.7924139234898422, - 0.7948577020793985, - null, - 0.7924139234898422, - 0.7059759544943667, - null, - 0.7924139234898422, - 0.8846357375826375, - null, - 0.7924139234898422, - 0.8323549266756429, - null, - 0.8266354543284289, - 0.7247552078664479, - null, - 0.8266354543284289, - 0.7827775151390383, - null, - 0.8266354543284289, - 0.9082570345357789, - null, - 0.8266354543284289, - 0.7042334738295596, - null, - 0.8266354543284289, - 0.8613129225222332, - null, - 0.8266354543284289, - 0.7703024251104211, - null, - 0.8266354543284289, - 0.9240127894624793, - null, - 0.8266354543284289, - 0.8680862155815134, - null, - 0.4023039585223629, - 0.4611021425875542, - null, - 0.4023039585223629, - 0.44175944307536974, - null, - 0.4023039585223629, - 0.3318561006769827, - null, - 0.4023039585223629, - 0.4349682989231034, - null, - 0.4023039585223629, - 0.29978148854693865, - null, - 0.4023039585223629, - 0.4442228752887084, - null, - 0.5084198498293618, - 0.5436816885151938, - null, - 0.5084198498293618, - 0.5229468203255856, - null, - 0.5084198498293618, - 0.4611021425875542, - null, - 0.5084198498293618, - 0.44175944307536974, - null, - 0.5084198498293618, - 0.6234379896430121, - null, - 0.5084198498293618, - 0.4442228752887084, - null, - 0.23992481624351925, - 0.27440213390552737, - null, - 0.23992481624351925, - 0.2728250610713022, - null, - 0.23992481624351925, - 0.1823584228427031, - null, - 0.23992481624351925, - 0.19852054651169693, - null, - 0.23992481624351925, - 0.22007362873840486, - null, - 0.23992481624351925, - 0.13940667248499528, - null, - 0.23992481624351925, - 0.16862303760247477, - null, - 0.23992481624351925, - 0.12355952994556385, - null, - 0.2742000416622462, - 0.15570283642495664, - null, - 0.2742000416622462, - 0.32578353530864457, - null, - 0.2742000416622462, - 0.3740122792611037, - null, - 0.2742000416622462, - 0.2955343345493908, - null, - 0.2742000416622462, - 0.31305791514229697, - null, - 0.2742000416622462, - 0.298647499376007, - null, - 0.2742000416622462, - 0.3328704753356456, - null, - 0.2742000416622462, - 0.3499260998923053, - null, - 0.2742000416622462, - 0.3181124346701171, - null, - 0.2742000416622462, - 0.3247821296168134, - null, - 0.2742000416622462, - 0.25656414507004344, - null, - 0.2742000416622462, - 0.3187675293980876, - null, - 0.2742000416622462, - 0.20133087739958255, - null, - 0.2742000416622462, - 0.34114125407236195, - null, - 0.15570283642495664, - 0.07513674080757637, - null, - 0.15570283642495664, - 0.05512117222879742, - null, - 0.15570283642495664, - 0.05194805532761382, - null, - 0.15570283642495664, - 0.06202421257916635, - null, - 0.15570283642495664, - 0.09053866681881584, - null, - 0.15570283642495664, - 0.1573630170264504, - null, - 0.15570283642495664, - 0.0852382135963593, - null, - 0.15570283642495664, - 0.0875467755337247, - null, - 0.15570283642495664, - 0.08997327822205015, - null, - 0.15570283642495664, - 0.20133087739958255, - null, - 0.15570283642495664, - 0.038579501382332126, - null, - 0.07513674080757637, - 0.1130639188502468, - null, - 0.07513674080757637, - 0.05512117222879742, - null, - 0.07513674080757637, - 0.07163295816605642, - null, - 0.07513674080757637, - 0.06202421257916635, - null, - 0.07513674080757637, - 0.09053866681881584, - null, - 0.07513674080757637, - 0.1573630170264504, - null, - 0.07513674080757637, - 0.0023771443647881974, - null, - 0.07513674080757637, - 0.0852382135963593, - null, - 0.07513674080757637, - 0.17086936775877049, - null, - 0.07513674080757637, - 0.0875467755337247, - null, - 0.07513674080757637, - 0.08997327822205015, - null, - 0.07513674080757637, - 0.020212382594376965, - null, - 0.07513674080757637, - 0.0897773631019545, - null, - 0.07513674080757637, - 0.038579501382332126, - null, - 0.7247552078664479, - 0.6327007577432437, - null, - 0.7247552078664479, - 0.662108954544855, - null, - 0.7247552078664479, - 0.7827775151390383, - null, - 0.7247552078664479, - 0.7007214129943925, - null, - 0.7247552078664479, - 0.7188906153197968, - null, - 0.7247552078664479, - 0.7042334738295596, - null, - 0.7247552078664479, - 0.7255980413609877, - null, - 0.7247552078664479, - 0.7703024251104211, - null, - 0.2586357176925591, - 0.3019474379086241, - null, - 0.2586357176925591, - 0.2121217358781844, - null, - 0.595945044435614, - 0.6327007577432437, - null, - 0.595945044435614, - 0.526779936668903, - null, - 0.595945044435614, - 0.662108954544855, - null, - 0.595945044435614, - 0.5433115547736789, - null, - 0.595945044435614, - 0.5274116361492907, - null, - 0.595945044435614, - 0.7042334738295596, - null, - 0.595945044435614, - 0.555788147264811, - null, - 0.595945044435614, - 0.5805679633404117, - null, - 0.595945044435614, - 0.5989925957177575, - null, - 0.595945044435614, - 0.6058132814274794, - null, - 0.595945044435614, - 0.5291812256005789, - null, - 0.595945044435614, - 0.5621062195646831, - null, - 0.9428542201780316, - 0.8511753697833563, - null, - 0.9428542201780316, - 0.89080246263295, - null, - 0.9428542201780316, - 0.9521646983336837, - null, - 0.9428542201780316, - 0.9663892923019699, - null, - 0.9428542201780316, - 0.9425745666137786, - null, - 0.9428542201780316, - 0.9851894520572745, - null, - 0.9428542201780316, - 0.9573079778783831, - null, - 0.9428542201780316, - 0.9473667691929577, - null, - 0.9428542201780316, - 0.838803404513024, - null, - 0.03304679952258993, - 0.05596958524873419, - null, - 0.03304679952258993, - 0.014269300880037306, - null, - 0.6013564651959642, - 0.662108954544855, - null, - 0.6013564651959642, - 0.7007214129943925, - null, - 0.6013564651959642, - 0.7188906153197968, - null, - 0.6013564651959642, - 0.555788147264811, - null, - 0.6013564651959642, - 0.5293212253918783, - null, - 0.6013564651959642, - 0.5291812256005789, - null, - 0.6013564651959642, - 0.5191285820034173, - null, - 0.6013564651959642, - 0.5465171974419871, - null, - 0.1130639188502468, - 0.07163295816605642, - null, - 0.1130639188502468, - 0.09053866681881584, - null, - 0.1130639188502468, - 0.1573630170264504, - null, - 0.1130639188502468, - 0.13747604708068628, - null, - 0.1130639188502468, - 0.2275256207367028, - null, - 0.1130639188502468, - 0.18507593174525072, - null, - 0.1130639188502468, - 0.17086936775877049, - null, - 0.1130639188502468, - 0.0897773631019545, - null, - 0.5531504465254558, - 0.47055154706870017, - null, - 0.5531504465254558, - 0.5274116361492907, - null, - 0.5531504465254558, - 0.587704695878027, - null, - 0.5531504465254558, - 0.5989925957177575, - null, - 0.5531504465254558, - 0.5845953849421676, - null, - 0.5531504465254558, - 0.6058132814274794, - null, - 0.5531504465254558, - 0.4564806171162211, - null, - 0.5531504465254558, - 0.5201251204037126, - null, - 0.1635981270944994, - 0.19921682827804632, - null, - 0.1635981270944994, - 0.10310287300704979, - null, - 0.1635981270944994, - 0.05973078995013337, - null, - 0.1635981270944994, - 0.0914406510425998, - null, - 0.1635981270944994, - 0.14711158829428328, - null, - 0.1635981270944994, - 0.21535391032155426, - null, - 0.05512117222879742, - 0.07163295816605642, - null, - 0.05512117222879742, - 0.05194805532761382, - null, - 0.05512117222879742, - 0.06202421257916635, - null, - 0.05512117222879742, - 0.09053866681881584, - null, - 0.05512117222879742, - 0.0023771443647881974, - null, - 0.05512117222879742, - 0.0852382135963593, - null, - 0.05512117222879742, - 0.0875467755337247, - null, - 0.05512117222879742, - 0.08997327822205015, - null, - 0.05512117222879742, - 0.020212382594376965, - null, - 0.05512117222879742, - 0.02312833765025224, - null, - 0.05512117222879742, - 0.04237200971819888, - null, - 0.05512117222879742, - 0.038579501382332126, - null, - 0.05512117222879742, - 0.01777064460825195, - null, - 0.32578353530864457, - 0.413948124857326, - null, - 0.32578353530864457, - 0.3328704753356456, - null, - 0.32578353530864457, - 0.3499260998923053, - null, - 0.32578353530864457, - 0.37301066653863624, - null, - 0.32578353530864457, - 0.2619562675328274, - null, - 0.32578353530864457, - 0.4277213938753692, - null, - 0.32578353530864457, - 0.3247821296168134, - null, - 0.32578353530864457, - 0.3187675293980876, - null, - 0.32578353530864457, - 0.34114125407236195, - null, - 0.27440213390552737, - 0.2728250610713022, - null, - 0.27440213390552737, - 0.1823584228427031, - null, - 0.27440213390552737, - 0.19852054651169693, - null, - 0.27440213390552737, - 0.22007362873840486, - null, - 0.27440213390552737, - 0.37301066653863624, - null, - 0.27440213390552737, - 0.2275256207367028, - null, - 0.27440213390552737, - 0.2619562675328274, - null, - 0.27440213390552737, - 0.3414075728554137, - null, - 0.2728250610713022, - 0.1823584228427031, - null, - 0.2728250610713022, - 0.37549158943196925, - null, - 0.2728250610713022, - 0.22007362873840486, - null, - 0.2728250610713022, - 0.16862303760247477, - null, - 0.6346565064837861, - 0.7364515013041172, - null, - 0.6346565064837861, - 0.5436816885151938, - null, - 0.6346565064837861, - 0.5461279353327784, - null, - 0.6346565064837861, - 0.7059759544943667, - null, - 0.6346565064837861, - 0.6149491168624189, - null, - 0.6346565064837861, - 0.5593069337955722, - null, - 0.6327007577432437, - 0.662108954544855, - null, - 0.6327007577432437, - 0.5433115547736789, - null, - 0.6327007577432437, - 0.5274116361492907, - null, - 0.6327007577432437, - 0.7042334738295596, - null, - 0.6327007577432437, - 0.555788147264811, - null, - 0.6327007577432437, - 0.5805679633404117, - null, - 0.6327007577432437, - 0.5989925957177575, - null, - 0.6327007577432437, - 0.5845953849421676, - null, - 0.6327007577432437, - 0.6058132814274794, - null, - 0.6327007577432437, - 0.5621062195646831, - null, - 0.800297854626628, - 0.7364515013041172, - null, - 0.800297854626628, - 0.6953901849658966, - null, - 0.800297854626628, - 0.8589937476561325, - null, - 0.800297854626628, - 0.7204214783753378, - null, - 0.800297854626628, - 0.8247840830312709, - null, - 0.800297854626628, - 0.7948577020793985, - null, - 0.800297854626628, - 0.7059759544943667, - null, - 0.800297854626628, - 0.8846357375826375, - null, - 0.800297854626628, - 0.8323549266756429, - null, - 0.800297854626628, - 0.8505181106970376, - null, - 0.800297854626628, - 0.7607451357487841, - null, - 0.800297854626628, - 0.9110645875753355, - null, - 0.526779936668903, - 0.5433115547736789, - null, - 0.526779936668903, - 0.5274116361492907, - null, - 0.526779936668903, - 0.555788147264811, - null, - 0.526779936668903, - 0.5805679633404117, - null, - 0.526779936668903, - 0.48218022499136737, - null, - 0.526779936668903, - 0.5293212253918783, - null, - 0.526779936668903, - 0.47443124751760235, - null, - 0.526779936668903, - 0.5291812256005789, - null, - 0.526779936668903, - 0.5621062195646831, - null, - 0.526779936668903, - 0.5465171974419871, - null, - 0.413948124857326, - 0.44119458804978295, - null, - 0.413948124857326, - 0.3328704753356456, - null, - 0.413948124857326, - 0.3499260998923053, - null, - 0.413948124857326, - 0.47055154706870017, - null, - 0.413948124857326, - 0.5274116361492907, - null, - 0.413948124857326, - 0.4277213938753692, - null, - 0.413948124857326, - 0.3247821296168134, - null, - 0.413948124857326, - 0.4564806171162211, - null, - 0.413948124857326, - 0.3187675293980876, - null, - 0.413948124857326, - 0.34114125407236195, - null, - 0.09276814106220677, - 0.03187584930858911, - null, - 0.09276814106220677, - 0.07426685281627932, - null, - 0.09276814106220677, - 0.03446402354654854, - null, - 0.09276814106220677, - 0.06879886671193436, - null, - 0.09276814106220677, - 0.1823584228427031, - null, - 0.09276814106220677, - 0.13940667248499528, - null, - 0.09276814106220677, - 0.0201693226965588, - null, - 0.09276814106220677, - 0.16862303760247477, - null, - 0.09276814106220677, - 0.12355952994556385, - null, - 0.662108954544855, - 0.7827775151390383, - null, - 0.662108954544855, - 0.7007214129943925, - null, - 0.662108954544855, - 0.7188906153197968, - null, - 0.662108954544855, - 0.7042334738295596, - null, - 0.662108954544855, - 0.555788147264811, - null, - 0.662108954544855, - 0.5805679633404117, - null, - 0.662108954544855, - 0.7255980413609877, - null, - 0.662108954544855, - 0.7518492361353024, - null, - 0.07163295816605642, - 0.06202421257916635, - null, - 0.07163295816605642, - 0.09053866681881584, - null, - 0.07163295816605642, - 0.1573630170264504, - null, - 0.07163295816605642, - 0.0023771443647881974, - null, - 0.07163295816605642, - 0.0852382135963593, - null, - 0.07163295816605642, - 0.17086936775877049, - null, - 0.07163295816605642, - 0.0875467755337247, - null, - 0.07163295816605642, - 0.08997327822205015, - null, - 0.07163295816605642, - 0.020212382594376965, - null, - 0.07163295816605642, - 0.0897773631019545, - null, - 0.07163295816605642, - 0.038579501382332126, - null, - 0.44119458804978295, - 0.3740122792611037, - null, - 0.44119458804978295, - 0.3328704753356456, - null, - 0.44119458804978295, - 0.3499260998923053, - null, - 0.44119458804978295, - 0.47055154706870017, - null, - 0.44119458804978295, - 0.5274116361492907, - null, - 0.44119458804978295, - 0.4277213938753692, - null, - 0.44119458804978295, - 0.4564806171162211, - null, - 0.44119458804978295, - 0.34114125407236195, - null, - 0.7364515013041172, - 0.8247840830312709, - null, - 0.7364515013041172, - 0.7948577020793985, - null, - 0.7364515013041172, - 0.7059759544943667, - null, - 0.7364515013041172, - 0.8323549266756429, - null, - 0.7827775151390383, - 0.7007214129943925, - null, - 0.7827775151390383, - 0.7188906153197968, - null, - 0.7827775151390383, - 0.7042334738295596, - null, - 0.7827775151390383, - 0.8613129225222332, - null, - 0.7827775151390383, - 0.7255980413609877, - null, - 0.7827775151390383, - 0.7703024251104211, - null, - 0.7827775151390383, - 0.9005048863870916, - null, - 0.7827775151390383, - 0.7518492361353024, - null, - 0.9600359726880752, - 0.9998698320754983, - null, - 0.9600359726880752, - 0.9082570345357789, - null, - 0.9600359726880752, - 0.9503884723051484, - null, - 0.9600359726880752, - 0.916634041055854, - null, - 0.9600359726880752, - 0.8613129225222332, - null, - 0.9600359726880752, - 0.9005048863870916, - null, - 0.9600359726880752, - 0.9240127894624793, - null, - 0.9600359726880752, - 0.9636590456207981, - null, - 0.8511753697833563, - 0.89080246263295, - null, - 0.8511753697833563, - 0.9521646983336837, - null, - 0.8511753697833563, - 0.9663892923019699, - null, - 0.8511753697833563, - 0.9573079778783831, - null, - 0.8511753697833563, - 0.9473667691929577, - null, - 0.8511753697833563, - 0.838803404513024, - null, - 0.8511753697833563, - 0.7518492361353024, - null, - 0.05194805532761382, - 0.06202421257916635, - null, - 0.05194805532761382, - 0.0852382135963593, - null, - 0.05194805532761382, - 0.0914406510425998, - null, - 0.05194805532761382, - 0.0875467755337247, - null, - 0.05194805532761382, - 0.08997327822205015, - null, - 0.05194805532761382, - 0.020212382594376965, - null, - 0.05194805532761382, - 0.02312833765025224, - null, - 0.05194805532761382, - 0.04237200971819888, - null, - 0.05194805532761382, - 0.038579501382332126, - null, - 0.05194805532761382, - 0.01777064460825195, - null, - 0.03187584930858911, - 0.07426685281627932, - null, - 0.03187584930858911, - 0.03446402354654854, - null, - 0.03187584930858911, - 0.06879886671193436, - null, - 0.03187584930858911, - 0.13940667248499528, - null, - 0.03187584930858911, - 0.0201693226965588, - null, - 0.03187584930858911, - 0.12355952994556385, - null, - 0.07426685281627932, - 0.09471702229050472, - null, - 0.07426685281627932, - 0.06879886671193436, - null, - 0.07426685281627932, - 0.1823584228427031, - null, - 0.07426685281627932, - 0.13940667248499528, - null, - 0.07426685281627932, - 0.0201693226965588, - null, - 0.07426685281627932, - 0.16862303760247477, - null, - 0.07426685281627932, - 0.12355952994556385, - null, - 0.07426685281627932, - 0.04781523934390508, - null, - 0.5257999712304688, - 0.5593951498649633, - null, - 0.5257999712304688, - 0.4404718698088387, - null, - 0.5257999712304688, - 0.5201251204037126, - null, - 0.9998698320754983, - 0.9082570345357789, - null, - 0.9998698320754983, - 0.9636084967560627, - null, - 0.9998698320754983, - 0.9503884723051484, - null, - 0.9998698320754983, - 0.9240127894624793, - null, - 0.9998698320754983, - 0.9636590456207981, - null, - 0.09471702229050472, - 0.06879886671193436, - null, - 0.09471702229050472, - 0.1823584228427031, - null, - 0.09471702229050472, - 0.19852054651169693, - null, - 0.09471702229050472, - 0.13747604708068628, - null, - 0.09471702229050472, - 0.13940667248499528, - null, - 0.09471702229050472, - 0.0201693226965588, - null, - 0.09471702229050472, - 0.12355952994556385, - null, - 0.09471702229050472, - 0.0897773631019545, - null, - 0.09471702229050472, - 0.04781523934390508, - null, - 0.6953901849658966, - 0.7204214783753378, - null, - 0.6953901849658966, - 0.7181048560087516, - null, - 0.6953901849658966, - 0.7948577020793985, - null, - 0.6953901849658966, - 0.7059759544943667, - null, - 0.6953901849658966, - 0.6370268640561303, - null, - 0.6953901849658966, - 0.6149491168624189, - null, - 0.6953901849658966, - 0.7607451357487841, - null, - 0.6953901849658966, - 0.6234379896430121, - null, - 0.03446402354654854, - 0.05596958524873419, - null, - 0.03446402354654854, - 0.014269300880037306, - null, - 0.9082570345357789, - 0.9503884723051484, - null, - 0.9082570345357789, - 0.916634041055854, - null, - 0.9082570345357789, - 0.8613129225222332, - null, - 0.9082570345357789, - 0.9005048863870916, - null, - 0.9082570345357789, - 0.9240127894624793, - null, - 0.9082570345357789, - 0.9636590456207981, - null, - 0.3740122792611037, - 0.37848025459696877, - null, - 0.3740122792611037, - 0.3821391536049519, - null, - 0.3740122792611037, - 0.2955343345493908, - null, - 0.3740122792611037, - 0.31305791514229697, - null, - 0.3740122792611037, - 0.298647499376007, - null, - 0.3740122792611037, - 0.3246624829381992, - null, - 0.3740122792611037, - 0.3328704753356456, - null, - 0.3740122792611037, - 0.33203393677870674, - null, - 0.3740122792611037, - 0.3499260998923053, - null, - 0.3740122792611037, - 0.3181124346701171, - null, - 0.3740122792611037, - 0.47055154706870017, - null, - 0.3740122792611037, - 0.4277213938753692, - null, - 0.3740122792611037, - 0.25656414507004344, - null, - 0.3740122792611037, - 0.4564806171162211, - null, - 0.977854801698089, - 0.9162463356603696, - null, - 0.5436816885151938, - 0.5461279353327784, - null, - 0.5436816885151938, - 0.5229468203255856, - null, - 0.5436816885151938, - 0.4611021425875542, - null, - 0.5436816885151938, - 0.6149491168624189, - null, - 0.5436816885151938, - 0.4349682989231034, - null, - 0.5436816885151938, - 0.6234379896430121, - null, - 0.5436816885151938, - 0.4442228752887084, - null, - 0.5436816885151938, - 0.5593069337955722, - null, - 0.06202421257916635, - 0.09053866681881584, - null, - 0.06202421257916635, - 0.1573630170264504, - null, - 0.06202421257916635, - 0.0023771443647881974, - null, - 0.06202421257916635, - 0.0852382135963593, - null, - 0.06202421257916635, - 0.0875467755337247, - null, - 0.06202421257916635, - 0.08997327822205015, - null, - 0.06202421257916635, - 0.020212382594376965, - null, - 0.06202421257916635, - 0.02312833765025224, - null, - 0.06202421257916635, - 0.04237200971819888, - null, - 0.06202421257916635, - 0.038579501382332126, - null, - 0.06202421257916635, - 0.01777064460825195, - null, - 0.8589937476561325, - 0.8247840830312709, - null, - 0.8589937476561325, - 0.7948577020793985, - null, - 0.8589937476561325, - 0.9210876029743161, - null, - 0.8589937476561325, - 0.9694266665187994, - null, - 0.8589937476561325, - 0.8846357375826375, - null, - 0.8589937476561325, - 0.8323549266756429, - null, - 0.8589937476561325, - 0.8505181106970376, - null, - 0.8589937476561325, - 0.9110645875753355, - null, - 0.06879886671193436, - 0.1823584228427031, - null, - 0.06879886671193436, - 0.13940667248499528, - null, - 0.06879886671193436, - 0.0201693226965588, - null, - 0.06879886671193436, - 0.12355952994556385, - null, - 0.06879886671193436, - 0.04781523934390508, - null, - 0.19921682827804632, - 0.10310287300704979, - null, - 0.19921682827804632, - 0.0914406510425998, - null, - 0.19921682827804632, - 0.14711158829428328, - null, - 0.19921682827804632, - 0.21535391032155426, - null, - 0.19921682827804632, - 0.25656414507004344, - null, - 0.1823584228427031, - 0.19852054651169693, - null, - 0.1823584228427031, - 0.22007362873840486, - null, - 0.1823584228427031, - 0.13940667248499528, - null, - 0.1823584228427031, - 0.16862303760247477, - null, - 0.1823584228427031, - 0.12355952994556385, - null, - 0.37549158943196925, - 0.41808707877840445, - null, - 0.37549158943196925, - 0.42926818011737133, - null, - 0.37549158943196925, - 0.4363707938884992, - null, - 0.37549158943196925, - 0.42077304608666055, - null, - 0.5433115547736789, - 0.5274116361492907, - null, - 0.5433115547736789, - 0.555788147264811, - null, - 0.5433115547736789, - 0.5805679633404117, - null, - 0.5433115547736789, - 0.5989925957177575, - null, - 0.5433115547736789, - 0.48218022499136737, - null, - 0.5433115547736789, - 0.47443124751760235, - null, - 0.5433115547736789, - 0.5291812256005789, - null, - 0.5433115547736789, - 0.5621062195646831, - null, - 0.5433115547736789, - 0.5465171974419871, - null, - 0.37848025459696877, - 0.3821391536049519, - null, - 0.37848025459696877, - 0.31305791514229697, - null, - 0.37848025459696877, - 0.3246624829381992, - null, - 0.37848025459696877, - 0.33203393677870674, - null, - 0.37848025459696877, - 0.4404718698088387, - null, - 0.37848025459696877, - 0.3393815448042514, - null, - 0.37848025459696877, - 0.32444561774289593, - null, - 0.37848025459696877, - 0.33721825060791266, - null, - 0.3821391536049519, - 0.2955343345493908, - null, - 0.3821391536049519, - 0.31305791514229697, - null, - 0.3821391536049519, - 0.298647499376007, - null, - 0.3821391536049519, - 0.3246624829381992, - null, - 0.3821391536049519, - 0.33203393677870674, - null, - 0.3821391536049519, - 0.3499260998923053, - null, - 0.3821391536049519, - 0.3181124346701171, - null, - 0.3821391536049519, - 0.47055154706870017, - null, - 0.3821391536049519, - 0.4404718698088387, - null, - 0.3821391536049519, - 0.3393815448042514, - null, - 0.3821391536049519, - 0.32444561774289593, - null, - 0.3821391536049519, - 0.4564806171162211, - null, - 0.3821391536049519, - 0.33721825060791266, - null, - 0.7204214783753378, - 0.7181048560087516, - null, - 0.7204214783753378, - 0.7948577020793985, - null, - 0.7204214783753378, - 0.7059759544943667, - null, - 0.7204214783753378, - 0.6370268640561303, - null, - 0.7204214783753378, - 0.7607451357487841, - null, - 0.7204214783753378, - 0.6234379896430121, - null, - 0.2955343345493908, - 0.31305791514229697, - null, - 0.2955343345493908, - 0.298647499376007, - null, - 0.2955343345493908, - 0.3246624829381992, - null, - 0.2955343345493908, - 0.3328704753356456, - null, - 0.2955343345493908, - 0.33203393677870674, - null, - 0.2955343345493908, - 0.3499260998923053, - null, - 0.2955343345493908, - 0.3181124346701171, - null, - 0.2955343345493908, - 0.25656414507004344, - null, - 0.2955343345493908, - 0.32444561774289593, - null, - 0.09053866681881584, - 0.1573630170264504, - null, - 0.09053866681881584, - 0.0023771443647881974, - null, - 0.09053866681881584, - 0.0852382135963593, - null, - 0.09053866681881584, - 0.17086936775877049, - null, - 0.09053866681881584, - 0.0875467755337247, - null, - 0.09053866681881584, - 0.08997327822205015, - null, - 0.09053866681881584, - 0.020212382594376965, - null, - 0.09053866681881584, - 0.0897773631019545, - null, - 0.09053866681881584, - 0.02312833765025224, - null, - 0.09053866681881584, - 0.20133087739958255, - null, - 0.09053866681881584, - 0.038579501382332126, - null, - 0.09053866681881584, - 0.01777064460825195, - null, - 0.7181048560087516, - 0.6776948411821848, - null, - 0.7181048560087516, - 0.834199864808296, - null, - 0.7181048560087516, - 0.6370268640561303, - null, - 0.7181048560087516, - 0.6802728591951641, - null, - 0.7181048560087516, - 0.7607451357487841, - null, - 0.7181048560087516, - 0.6314926226168458, - null, - 0.10310287300704979, - 0.05973078995013337, - null, - 0.10310287300704979, - 0.0914406510425998, - null, - 0.10310287300704979, - 0.14711158829428328, - null, - 0.10310287300704979, - 0.21535391032155426, - null, - 0.10310287300704979, - 0.04237200971819888, - null, - 0.8247840830312709, - 0.7948577020793985, - null, - 0.8247840830312709, - 0.9210876029743161, - null, - 0.8247840830312709, - 0.7059759544943667, - null, - 0.8247840830312709, - 0.9186278106648778, - null, - 0.8247840830312709, - 0.8846357375826375, - null, - 0.8247840830312709, - 0.8323549266756429, - null, - 0.8247840830312709, - 0.8505181106970376, - null, - 0.8247840830312709, - 0.9110645875753355, - null, - 0.1573630170264504, - 0.2275256207367028, - null, - 0.1573630170264504, - 0.0852382135963593, - null, - 0.1573630170264504, - 0.18507593174525072, - null, - 0.1573630170264504, - 0.17086936775877049, - null, - 0.1573630170264504, - 0.0875467755337247, - null, - 0.1573630170264504, - 0.08997327822205015, - null, - 0.1573630170264504, - 0.0897773631019545, - null, - 0.1573630170264504, - 0.20133087739958255, - null, - 0.31305791514229697, - 0.298647499376007, - null, - 0.31305791514229697, - 0.3246624829381992, - null, - 0.31305791514229697, - 0.33203393677870674, - null, - 0.31305791514229697, - 0.3499260998923053, - null, - 0.31305791514229697, - 0.3181124346701171, - null, - 0.31305791514229697, - 0.3393815448042514, - null, - 0.31305791514229697, - 0.21535391032155426, - null, - 0.31305791514229697, - 0.25656414507004344, - null, - 0.31305791514229697, - 0.32444561774289593, - null, - 0.31305791514229697, - 0.33721825060791266, - null, - 0.298647499376007, - 0.3328704753356456, - null, - 0.298647499376007, - 0.33203393677870674, - null, - 0.298647499376007, - 0.3499260998923053, - null, - 0.298647499376007, - 0.3181124346701171, - null, - 0.298647499376007, - 0.3247821296168134, - null, - 0.298647499376007, - 0.25656414507004344, - null, - 0.298647499376007, - 0.3187675293980876, - null, - 0.298647499376007, - 0.20133087739958255, - null, - 0.298647499376007, - 0.34114125407236195, - null, - 0.3246624829381992, - 0.33203393677870674, - null, - 0.3246624829381992, - 0.4404718698088387, - null, - 0.3246624829381992, - 0.3393815448042514, - null, - 0.3246624829381992, - 0.21535391032155426, - null, - 0.3246624829381992, - 0.25656414507004344, - null, - 0.3246624829381992, - 0.32444561774289593, - null, - 0.3246624829381992, - 0.33721825060791266, - null, - 0.19852054651169693, - 0.13747604708068628, - null, - 0.19852054651169693, - 0.22007362873840486, - null, - 0.19852054651169693, - 0.2275256207367028, - null, - 0.19852054651169693, - 0.2619562675328274, - null, - 0.19852054651169693, - 0.18507593174525072, - null, - 0.3328704753356456, - 0.3499260998923053, - null, - 0.3328704753356456, - 0.3181124346701171, - null, - 0.3328704753356456, - 0.4277213938753692, - null, - 0.3328704753356456, - 0.3247821296168134, - null, - 0.3328704753356456, - 0.3187675293980876, - null, - 0.3328704753356456, - 0.34114125407236195, - null, - 0.33203393677870674, - 0.3181124346701171, - null, - 0.33203393677870674, - 0.4404718698088387, - null, - 0.33203393677870674, - 0.3393815448042514, - null, - 0.33203393677870674, - 0.21535391032155426, - null, - 0.33203393677870674, - 0.25656414507004344, - null, - 0.33203393677870674, - 0.32444561774289593, - null, - 0.33203393677870674, - 0.33721825060791266, - null, - 0.5461279353327784, - 0.5229468203255856, - null, - 0.5461279353327784, - 0.4611021425875542, - null, - 0.5461279353327784, - 0.6149491168624189, - null, - 0.5461279353327784, - 0.4349682989231034, - null, - 0.5461279353327784, - 0.5593069337955722, - null, - 0.9636084967560627, - 0.9503884723051484, - null, - 0.9636084967560627, - 0.9162463356603696, - null, - 0.9636084967560627, - 0.8541827253649632, - null, - 0.9636084967560627, - 0.9636590456207981, - null, - 0.9636084967560627, - 0.8680862155815134, - null, - 0.9636084967560627, - 0.8668565351624634, - null, - 0.9503884723051484, - 0.9240127894624793, - null, - 0.9503884723051484, - 0.9636590456207981, - null, - 0.9503884723051484, - 0.8680862155815134, - null, - 0.13747604708068628, - 0.22007362873840486, - null, - 0.13747604708068628, - 0.2275256207367028, - null, - 0.13747604708068628, - 0.18507593174525072, - null, - 0.13747604708068628, - 0.17086936775877049, - null, - 0.13747604708068628, - 0.0897773631019545, - null, - 0.13747604708068628, - 0.04781523934390508, - null, - 0.3499260998923053, - 0.3181124346701171, - null, - 0.3499260998923053, - 0.4277213938753692, - null, - 0.3499260998923053, - 0.3247821296168134, - null, - 0.3499260998923053, - 0.4564806171162211, - null, - 0.3499260998923053, - 0.3187675293980876, - null, - 0.3499260998923053, - 0.34114125407236195, - null, - 0.3181124346701171, - 0.3247821296168134, - null, - 0.3181124346701171, - 0.25656414507004344, - null, - 0.3181124346701171, - 0.3187675293980876, - null, - 0.89080246263295, - 0.9521646983336837, - null, - 0.89080246263295, - 0.834199864808296, - null, - 0.89080246263295, - 0.9663892923019699, - null, - 0.89080246263295, - 0.9425745666137786, - null, - 0.89080246263295, - 0.9851894520572745, - null, - 0.89080246263295, - 0.9573079778783831, - null, - 0.89080246263295, - 0.9473667691929577, - null, - 0.89080246263295, - 0.838803404513024, - null, - 0.9521646983336837, - 0.916634041055854, - null, - 0.9521646983336837, - 0.9663892923019699, - null, - 0.9521646983336837, - 0.9573079778783831, - null, - 0.9521646983336837, - 0.9473667691929577, - null, - 0.9521646983336837, - 0.9005048863870916, - null, - 0.6776948411821848, - 0.6802728591951641, - null, - 0.6776948411821848, - 0.6314926226168458, - null, - 0.0023771443647881974, - 0.0852382135963593, - null, - 0.0023771443647881974, - 0.0875467755337247, - null, - 0.0023771443647881974, - 0.020212382594376965, - null, - 0.0023771443647881974, - 0.0897773631019545, - null, - 0.0023771443647881974, - 0.038579501382332126, - null, - 0.7007214129943925, - 0.7188906153197968, - null, - 0.7007214129943925, - 0.7255980413609877, - null, - 0.7007214129943925, - 0.7518492361353024, - null, - 0.7188906153197968, - 0.7255980413609877, - null, - 0.7188906153197968, - 0.7518492361353024, - null, - 0.47055154706870017, - 0.5274116361492907, - null, - 0.47055154706870017, - 0.4277213938753692, - null, - 0.47055154706870017, - 0.5845953849421676, - null, - 0.47055154706870017, - 0.4564806171162211, - null, - 0.47055154706870017, - 0.5201251204037126, - null, - 0.19043749918150743, - 0.2121217358781844, - null, - 0.19043749918150743, - 0.29978148854693865, - null, - 0.5274116361492907, - 0.555788147264811, - null, - 0.5274116361492907, - 0.5805679633404117, - null, - 0.5274116361492907, - 0.5989925957177575, - null, - 0.5274116361492907, - 0.4277213938753692, - null, - 0.5274116361492907, - 0.5845953849421676, - null, - 0.5274116361492907, - 0.6058132814274794, - null, - 0.5274116361492907, - 0.5291812256005789, - null, - 0.5274116361492907, - 0.5621062195646831, - null, - 0.5274116361492907, - 0.4564806171162211, - null, - 0.9162463356603696, - 0.8385234321105272, - null, - 0.9162463356603696, - 0.8668565351624634, - null, - 0.7042334738295596, - 0.5989925957177575, - null, - 0.7042334738295596, - 0.7255980413609877, - null, - 0.7042334738295596, - 0.6058132814274794, - null, - 0.7042334738295596, - 0.7703024251104211, - null, - 0.555788147264811, - 0.5805679633404117, - null, - 0.555788147264811, - 0.5989925957177575, - null, - 0.555788147264811, - 0.48218022499136737, - null, - 0.555788147264811, - 0.5293212253918783, - null, - 0.555788147264811, - 0.47443124751760235, - null, - 0.555788147264811, - 0.5291812256005789, - null, - 0.555788147264811, - 0.5621062195646831, - null, - 0.555788147264811, - 0.5465171974419871, - null, - 0.5805679633404117, - 0.5989925957177575, - null, - 0.5805679633404117, - 0.5845953849421676, - null, - 0.5805679633404117, - 0.6058132814274794, - null, - 0.5805679633404117, - 0.5291812256005789, - null, - 0.5805679633404117, - 0.5621062195646831, - null, - 0.587704695878027, - 0.5593951498649633, - null, - 0.587704695878027, - 0.5845953849421676, - null, - 0.587704695878027, - 0.6058132814274794, - null, - 0.587704695878027, - 0.6322124026692795, - null, - 0.587704695878027, - 0.5201251204037126, - null, - 0.916634041055854, - 0.8613129225222332, - null, - 0.916634041055854, - 0.9005048863870916, - null, - 0.916634041055854, - 0.9240127894624793, - null, - 0.7948577020793985, - 0.7059759544943667, - null, - 0.7948577020793985, - 0.8846357375826375, - null, - 0.7948577020793985, - 0.8323549266756429, - null, - 0.7948577020793985, - 0.8505181106970376, - null, - 0.7948577020793985, - 0.7607451357487841, - null, - 0.7948577020793985, - 0.9110645875753355, - null, - 0.9210876029743161, - 0.9694266665187994, - null, - 0.9210876029743161, - 0.9186278106648778, - null, - 0.9210876029743161, - 0.8846357375826375, - null, - 0.9210876029743161, - 0.8323549266756429, - null, - 0.9210876029743161, - 0.9110645875753355, - null, - 0.9210876029743161, - 0.992283435751248, - null, - 0.834199864808296, - 0.9425745666137786, - null, - 0.834199864808296, - 0.8505181106970376, - null, - 0.834199864808296, - 0.7607451357487841, - null, - 0.5989925957177575, - 0.5845953849421676, - null, - 0.5989925957177575, - 0.6058132814274794, - null, - 0.5989925957177575, - 0.5621062195646831, - null, - 0.05973078995013337, - 0.0914406510425998, - null, - 0.05973078995013337, - 0.14711158829428328, - null, - 0.05973078995013337, - 0.04237200971819888, - null, - 0.5593951498649633, - 0.4404718698088387, - null, - 0.5593951498649633, - 0.6322124026692795, - null, - 0.5593951498649633, - 0.5201251204037126, - null, - 0.5229468203255856, - 0.4611021425875542, - null, - 0.5229468203255856, - 0.44175944307536974, - null, - 0.5229468203255856, - 0.6149491168624189, - null, - 0.5229468203255856, - 0.6234379896430121, - null, - 0.5229468203255856, - 0.4442228752887084, - null, - 0.5229468203255856, - 0.5593069337955722, - null, - 0.22007362873840486, - 0.2275256207367028, - null, - 0.22007362873840486, - 0.2619562675328274, - null, - 0.22007362873840486, - 0.18507593174525072, - null, - 0.22007362873840486, - 0.3414075728554137, - null, - 0.37301066653863624, - 0.2619562675328274, - null, - 0.37301066653863624, - 0.48218022499136737, - null, - 0.37301066653863624, - 0.47443124751760235, - null, - 0.37301066653863624, - 0.3414075728554137, - null, - 0.37301066653863624, - 0.42077304608666055, - null, - 0.37301066653863624, - 0.4039327719907384, - null, - 0.8613129225222332, - 0.7703024251104211, - null, - 0.8613129225222332, - 0.9005048863870916, - null, - 0.9663892923019699, - 0.9851894520572745, - null, - 0.9663892923019699, - 0.9573079778783831, - null, - 0.9663892923019699, - 0.9473667691929577, - null, - 0.2275256207367028, - 0.2619562675328274, - null, - 0.2275256207367028, - 0.18507593174525072, - null, - 0.2275256207367028, - 0.17086936775877049, - null, - 0.0852382135963593, - 0.0875467755337247, - null, - 0.0852382135963593, - 0.08997327822205015, - null, - 0.0852382135963593, - 0.020212382594376965, - null, - 0.0852382135963593, - 0.02312833765025224, - null, - 0.0852382135963593, - 0.20133087739958255, - null, - 0.0852382135963593, - 0.038579501382332126, - null, - 0.0852382135963593, - 0.01777064460825195, - null, - 0.0914406510425998, - 0.14711158829428328, - null, - 0.0914406510425998, - 0.02312833765025224, - null, - 0.0914406510425998, - 0.04237200971819888, - null, - 0.0914406510425998, - 0.01777064460825195, - null, - 0.9425745666137786, - 0.9851894520572745, - null, - 0.9425745666137786, - 0.9473667691929577, - null, - 0.3019474379086241, - 0.2121217358781844, - null, - 0.3019474379086241, - 0.3318561006769827, - null, - 0.3019474379086241, - 0.29978148854693865, - null, - 0.2619562675328274, - 0.3247821296168134, - null, - 0.2619562675328274, - 0.18507593174525072, - null, - 0.2619562675328274, - 0.17086936775877049, - null, - 0.2619562675328274, - 0.3414075728554137, - null, - 0.2619562675328274, - 0.3187675293980876, - null, - 0.2619562675328274, - 0.34114125407236195, - null, - 0.48218022499136737, - 0.5293212253918783, - null, - 0.48218022499136737, - 0.47443124751760235, - null, - 0.48218022499136737, - 0.5291812256005789, - null, - 0.48218022499136737, - 0.5465171974419871, - null, - 0.5293212253918783, - 0.47443124751760235, - null, - 0.5293212253918783, - 0.5291812256005789, - null, - 0.5293212253918783, - 0.5191285820034173, - null, - 0.5293212253918783, - 0.42077304608666055, - null, - 0.5293212253918783, - 0.5465171974419871, - null, - 0.41808707877840445, - 0.42926818011737133, - null, - 0.41808707877840445, - 0.44175944307536974, - null, - 0.41808707877840445, - 0.4363707938884992, - null, - 0.14711158829428328, - 0.21535391032155426, - null, - 0.42926818011737133, - 0.4363707938884992, - null, - 0.9694266665187994, - 0.9186278106648778, - null, - 0.9694266665187994, - 0.8846357375826375, - null, - 0.9694266665187994, - 0.9110645875753355, - null, - 0.9694266665187994, - 0.992283435751248, - null, - 0.4404718698088387, - 0.3393815448042514, - null, - 0.4404718698088387, - 0.32444561774289593, - null, - 0.4404718698088387, - 0.33721825060791266, - null, - 0.4404718698088387, - 0.5201251204037126, - null, - 0.4277213938753692, - 0.3247821296168134, - null, - 0.4277213938753692, - 0.4564806171162211, - null, - 0.4277213938753692, - 0.3187675293980876, - null, - 0.4277213938753692, - 0.34114125407236195, - null, - 0.7059759544943667, - 0.6149491168624189, - null, - 0.7059759544943667, - 0.6234379896430121, - null, - 0.4611021425875542, - 0.44175944307536974, - null, - 0.4611021425875542, - 0.4349682989231034, - null, - 0.4611021425875542, - 0.4442228752887084, - null, - 0.4611021425875542, - 0.5593069337955722, - null, - 0.13940667248499528, - 0.0201693226965588, - null, - 0.13940667248499528, - 0.16862303760247477, - null, - 0.13940667248499528, - 0.12355952994556385, - null, - 0.3393815448042514, - 0.21535391032155426, - null, - 0.3393815448042514, - 0.25656414507004344, - null, - 0.3393815448042514, - 0.32444561774289593, - null, - 0.3393815448042514, - 0.33721825060791266, - null, - 0.6370268640561303, - 0.7607451357487841, - null, - 0.6370268640561303, - 0.6234379896430121, - null, - 0.6370268640561303, - 0.6314926226168458, - null, - 0.9851894520572745, - 0.9573079778783831, - null, - 0.9851894520572745, - 0.9473667691929577, - null, - 0.3247821296168134, - 0.3187675293980876, - null, - 0.3247821296168134, - 0.34114125407236195, - null, - 0.9186278106648778, - 0.8846357375826375, - null, - 0.9186278106648778, - 0.8323549266756429, - null, - 0.9186278106648778, - 0.9110645875753355, - null, - 0.9186278106648778, - 0.992283435751248, - null, - 0.18507593174525072, - 0.17086936775877049, - null, - 0.18507593174525072, - 0.0897773631019545, - null, - 0.18507593174525072, - 0.20133087739958255, - null, - 0.5845953849421676, - 0.6058132814274794, - null, - 0.5845953849421676, - 0.5621062195646831, - null, - 0.44175944307536974, - 0.4442228752887084, - null, - 0.7255980413609877, - 0.7703024251104211, - null, - 0.7255980413609877, - 0.7518492361353024, - null, - 0.6058132814274794, - 0.5621062195646831, - null, - 0.47443124751760235, - 0.5291812256005789, - null, - 0.47443124751760235, - 0.5465171974419871, - null, - 0.9573079778783831, - 0.9473667691929577, - null, - 0.9573079778783831, - 0.838803404513024, - null, - 0.0201693226965588, - 0.12355952994556385, - null, - 0.0201693226965588, - 0.04781523934390508, - null, - 0.17086936775877049, - 0.0897773631019545, - null, - 0.17086936775877049, - 0.20133087739958255, - null, - 0.5291812256005789, - 0.5621062195646831, - null, - 0.5291812256005789, - 0.5465171974419871, - null, - 0.16862303760247477, - 0.12355952994556385, - null, - 0.8846357375826375, - 0.8323549266756429, - null, - 0.8846357375826375, - 0.8505181106970376, - null, - 0.8846357375826375, - 0.9110645875753355, - null, - 0.8846357375826375, - 0.992283435751248, - null, - 0.0875467755337247, - 0.08997327822205015, - null, - 0.0875467755337247, - 0.020212382594376965, - null, - 0.0875467755337247, - 0.02312833765025224, - null, - 0.0875467755337247, - 0.20133087739958255, - null, - 0.0875467755337247, - 0.038579501382332126, - null, - 0.0875467755337247, - 0.01777064460825195, - null, - 0.9473667691929577, - 0.838803404513024, - null, - 0.8541827253649632, - 0.8680862155815134, - null, - 0.8541827253649632, - 0.8668565351624634, - null, - 0.3414075728554137, - 0.42077304608666055, - null, - 0.3414075728554137, - 0.4039327719907384, - null, - 0.3318561006769827, - 0.4349682989231034, - null, - 0.3318561006769827, - 0.29978148854693865, - null, - 0.7408684543182315, - 0.8385234321105272, - null, - 0.7408684543182315, - 0.6322124026692795, - null, - 0.7408684543182315, - 0.7333209824474588, - null, - 0.6149491168624189, - 0.6234379896430121, - null, - 0.6149491168624189, - 0.5593069337955722, - null, - 0.12355952994556385, - 0.04781523934390508, - null, - 0.08997327822205015, - 0.020212382594376965, - null, - 0.08997327822205015, - 0.02312833765025224, - null, - 0.08997327822205015, - 0.20133087739958255, - null, - 0.08997327822205015, - 0.04237200971819888, - null, - 0.08997327822205015, - 0.038579501382332126, - null, - 0.08997327822205015, - 0.01777064460825195, - null, - 0.21535391032155426, - 0.25656414507004344, - null, - 0.21535391032155426, - 0.32444561774289593, - null, - 0.21535391032155426, - 0.33721825060791266, - null, - 0.8323549266756429, - 0.9110645875753355, - null, - 0.8385234321105272, - 0.8668565351624634, - null, - 0.8385234321105272, - 0.7333209824474588, - null, - 0.9240127894624793, - 0.9636590456207981, - null, - 0.9240127894624793, - 0.8680862155815134, - null, - 0.6802728591951641, - 0.6314926226168458, - null, - 0.25656414507004344, - 0.32444561774289593, - null, - 0.25656414507004344, - 0.33721825060791266, - null, - 0.020212382594376965, - 0.02312833765025224, - null, - 0.020212382594376965, - 0.038579501382332126, - null, - 0.020212382594376965, - 0.01777064460825195, - null, - 0.32444561774289593, - 0.33721825060791266, - null, - 0.838803404513024, - 0.7518492361353024, - null, - 0.6322124026692795, - 0.5201251204037126, - null, - 0.6322124026692795, - 0.7333209824474588, - null, - 0.8505181106970376, - 0.7607451357487841, - null, - 0.8505181106970376, - 0.9110645875753355, - null, - 0.0897773631019545, - 0.04781523934390508, - null, - 0.02312833765025224, - 0.04237200971819888, - null, - 0.02312833765025224, - 0.038579501382332126, - null, - 0.02312833765025224, - 0.01777064460825195, - null, - 0.05596958524873419, - 0.014269300880037306, - null, - 0.3187675293980876, - 0.20133087739958255, - null, - 0.3187675293980876, - 0.34114125407236195, - null, - 0.5191285820034173, - 0.42077304608666055, - null, - 0.5191285820034173, - 0.5465171974419871, - null, - 0.4349682989231034, - 0.4442228752887084, - null, - 0.9636590456207981, - 0.8680862155815134, - null, - 0.8680862155815134, - 0.8668565351624634, - null, - 0.6234379896430121, - 0.5593069337955722, - null, - 0.04237200971819888, - 0.038579501382332126, - null, - 0.04237200971819888, - 0.01777064460825195, - null, - 0.038579501382332126, - 0.01777064460825195, - null, - 0.9110645875753355, - 0.992283435751248, - null, - 0.42077304608666055, - 0.4039327719907384, - null - ], - "y": [ - 0.09053726824382247, - 0.17542400609184483, - null, - 0.09053726824382247, - 0.055894273053114896, - null, - 0.09053726824382247, - 0.14933184162295132, - null, - 0.09053726824382247, - 0.1278305132468397, - null, - 0.09053726824382247, - 0.0731473655342364, - null, - 0.09053726824382247, - 0.09533319097359638, - null, - 0.09053726824382247, - 0.055897802218322856, - null, - 0.09053726824382247, - 0.04153202488293273, - null, - 0.09053726824382247, - 0.06013197669987258, - null, - 0.09053726824382247, - 0.040563128366188694, - null, - 0.09053726824382247, - 0.09959517902538939, - null, - 0.571085214777101, - 0.5944498275635773, - null, - 0.571085214777101, - 0.6773365837969099, - null, - 0.571085214777101, - 0.6160873747407943, - null, - 0.571085214777101, - 0.5186581897030644, - null, - 0.571085214777101, - 0.5634679987017406, - null, - 0.571085214777101, - 0.6012106694454529, - null, - 0.571085214777101, - 0.4898861106787329, - null, - 0.571085214777101, - 0.45431497833000367, - null, - 0.571085214777101, - 0.5204579980957379, - null, - 0.571085214777101, - 0.6352288779182178, - null, - 0.571085214777101, - 0.5981086798045652, - null, - 0.571085214777101, - 0.6648266103848882, - null, - 0.571085214777101, - 0.6072525121642058, - null, - 0.571085214777101, - 0.5260776190209286, - null, - 0.5199666766946885, - 0.5219101415039136, - null, - 0.5199666766946885, - 0.4307004647175262, - null, - 0.5199666766946885, - 0.4834545718278357, - null, - 0.5199666766946885, - 0.4847615611240751, - null, - 0.5199666766946885, - 0.4318165589087314, - null, - 0.5199666766946885, - 0.6217058876501556, - null, - 0.5199666766946885, - 0.5097617399826666, - null, - 0.33766327379542094, - 0.33811323660241943, - null, - 0.33766327379542094, - 0.31304614249644347, - null, - 0.33766327379542094, - 0.2697998035002954, - null, - 0.33766327379542094, - 0.2695720924906413, - null, - 0.33766327379542094, - 0.24454670425362057, - null, - 0.33766327379542094, - 0.4500538798110242, - null, - 0.33766327379542094, - 0.2880647319459674, - null, - 0.33766327379542094, - 0.3202314429055858, - null, - 0.33766327379542094, - 0.3169605131706372, - null, - 0.33766327379542094, - 0.32345881810688737, - null, - 0.17196466768963936, - 0.2009582712064717, - null, - 0.17196466768963936, - 0.15069304516745607, - null, - 0.17196466768963936, - 0.06016942899581168, - null, - 0.17196466768963936, - 0.24013807075121119, - null, - 0.17196466768963936, - 0.2693681584998491, - null, - 0.17196466768963936, - 0.10059463740220753, - null, - 0.17196466768963936, - 0.09959517902538939, - null, - 0.17708608014427518, - 0.2373268562908326, - null, - 0.17708608014427518, - 0.23741932367240448, - null, - 0.17708608014427518, - 0.05938145280899054, - null, - 0.17708608014427518, - 0.17619771419691865, - null, - 0.17708608014427518, - 0.21532966919867302, - null, - 0.17708608014427518, - 0.10782775946098799, - null, - 0.17708608014427518, - 0.249116699886752, - null, - 0.17708608014427518, - 0.23700988477155205, - null, - 0.17708608014427518, - 0.1341994714416056, - null, - 0.17708608014427518, - 0.29050814087118004, - null, - 0.04649454781195783, - 0.07011604000159166, - null, - 0.04649454781195783, - 0.019989772968585173, - null, - 0.04649454781195783, - 0.038844634468288675, - null, - 0.37080565676900146, - 0.3648985367210805, - null, - 0.37080565676900146, - 0.3343459796676115, - null, - 0.37080565676900146, - 0.4208812619135248, - null, - 0.37080565676900146, - 0.3900960314334032, - null, - 0.37080565676900146, - 0.3098874271134545, - null, - 0.37080565676900146, - 0.4295667428124167, - null, - 0.37080565676900146, - 0.35350564895305514, - null, - 0.37080565676900146, - 0.31541428705224306, - null, - 0.37080565676900146, - 0.4467311570808764, - null, - 0.37080565676900146, - 0.2693681584998491, - null, - 0.37080565676900146, - 0.4421375373865315, - null, - 0.37080565676900146, - 0.42641694849778966, - null, - 0.37080565676900146, - 0.3333136626479075, - null, - 0.37080565676900146, - 0.4868902788925622, - null, - 0.3602866247185619, - 0.3061539627540061, - null, - 0.3602866247185619, - 0.3661437355856225, - null, - 0.3602866247185619, - 0.40557198035837094, - null, - 0.3602866247185619, - 0.3192831323823997, - null, - 0.3602866247185619, - 0.41535454584101794, - null, - 0.3602866247185619, - 0.40395348439090084, - null, - 0.3602866247185619, - 0.3340702546567942, - null, - 0.3602866247185619, - 0.4248880785102581, - null, - 0.3602866247185619, - 0.29119156039108685, - null, - 0.9483925173875926, - 0.8957623407464501, - null, - 0.9483925173875926, - 0.9727770125665405, - null, - 0.9483925173875926, - 0.8791466031622056, - null, - 0.9483925173875926, - 0.922341377568881, - null, - 0.9483925173875926, - 0.8821215709600496, - null, - 0.9483925173875926, - 0.9328536520894143, - null, - 0.9483925173875926, - 0.9344432405222354, - null, - 0.9483925173875926, - 0.9642772106357639, - null, - 0.3061539627540061, - 0.3661437355856225, - null, - 0.3061539627540061, - 0.40557198035837094, - null, - 0.3061539627540061, - 0.3192831323823997, - null, - 0.3061539627540061, - 0.32266487999330984, - null, - 0.3061539627540061, - 0.40395348439090084, - null, - 0.3061539627540061, - 0.3340702546567942, - null, - 0.3061539627540061, - 0.29119156039108685, - null, - 0.3061539627540061, - 0.19048093242734687, - null, - 0.9643804220706982, - 0.9298960866412943, - null, - 0.9643804220706982, - 0.8599268392047722, - null, - 0.9643804220706982, - 0.9435179236599912, - null, - 0.9643804220706982, - 0.9958360522915445, - null, - 0.9643804220706982, - 0.9756800437762957, - null, - 0.9643804220706982, - 0.8842114977564064, - null, - 0.8336885167043149, - 0.938767234846119, - null, - 0.8336885167043149, - 0.8867112408398291, - null, - 0.8336885167043149, - 0.9298960866412943, - null, - 0.8336885167043149, - 0.8599268392047722, - null, - 0.8336885167043149, - 0.8508124987550889, - null, - 0.8336885167043149, - 0.8842114977564064, - null, - 0.5944498275635773, - 0.5216765314868881, - null, - 0.5944498275635773, - 0.6001026871900049, - null, - 0.5944498275635773, - 0.6012106694454529, - null, - 0.5944498275635773, - 0.4898861106787329, - null, - 0.5944498275635773, - 0.5204579980957379, - null, - 0.5944498275635773, - 0.6352288779182178, - null, - 0.5944498275635773, - 0.6648266103848882, - null, - 0.5944498275635773, - 0.6072525121642058, - null, - 0.2373268562908326, - 0.23741932367240448, - null, - 0.2373268562908326, - 0.33811323660241943, - null, - 0.2373268562908326, - 0.17542400609184483, - null, - 0.2373268562908326, - 0.17619771419691865, - null, - 0.2373268562908326, - 0.1278305132468397, - null, - 0.2373268562908326, - 0.21532966919867302, - null, - 0.2373268562908326, - 0.2695720924906413, - null, - 0.2373268562908326, - 0.24454670425362057, - null, - 0.2373268562908326, - 0.20002447568886628, - null, - 0.2373268562908326, - 0.3202314429055858, - null, - 0.2373268562908326, - 0.1341994714416056, - null, - 0.2373268562908326, - 0.3169605131706372, - null, - 0.2373268562908326, - 0.29050814087118004, - null, - 0.2373268562908326, - 0.32345881810688737, - null, - 0.23741932367240448, - 0.32127102230894566, - null, - 0.23741932367240448, - 0.2381682330796122, - null, - 0.23741932367240448, - 0.17296378957033465, - null, - 0.23741932367240448, - 0.24033413659841596, - null, - 0.23741932367240448, - 0.2981410655965283, - null, - 0.23741932367240448, - 0.3199684158322815, - null, - 0.23741932367240448, - 0.2660491488293679, - null, - 0.23741932367240448, - 0.249116699886752, - null, - 0.23741932367240448, - 0.23700988477155205, - null, - 0.23741932367240448, - 0.29050814087118004, - null, - 0.23741932367240448, - 0.2318219208408404, - null, - 0.32127102230894566, - 0.39453602200590676, - null, - 0.32127102230894566, - 0.2381682330796122, - null, - 0.32127102230894566, - 0.3309683982450944, - null, - 0.32127102230894566, - 0.24033413659841596, - null, - 0.32127102230894566, - 0.2981410655965283, - null, - 0.32127102230894566, - 0.3199684158322815, - null, - 0.32127102230894566, - 0.3355480553373167, - null, - 0.32127102230894566, - 0.2660491488293679, - null, - 0.32127102230894566, - 0.3635517670405215, - null, - 0.32127102230894566, - 0.249116699886752, - null, - 0.32127102230894566, - 0.23700988477155205, - null, - 0.32127102230894566, - 0.28871122138225125, - null, - 0.32127102230894566, - 0.42203254876563234, - null, - 0.32127102230894566, - 0.2318219208408404, - null, - 0.3661437355856225, - 0.3343459796676115, - null, - 0.3661437355856225, - 0.4307004647175262, - null, - 0.3661437355856225, - 0.40557198035837094, - null, - 0.3661437355856225, - 0.4834545718278357, - null, - 0.3661437355856225, - 0.4847615611240751, - null, - 0.3661437355856225, - 0.32266487999330984, - null, - 0.3661437355856225, - 0.4318165589087314, - null, - 0.3661437355856225, - 0.3340702546567942, - null, - 0.7791505090281524, - 0.8520196094107113, - null, - 0.7791505090281524, - 0.8848427298858184, - null, - 0.3648985367210805, - 0.3343459796676115, - null, - 0.3648985367210805, - 0.4208812619135248, - null, - 0.3648985367210805, - 0.4307004647175262, - null, - 0.3648985367210805, - 0.3900960314334032, - null, - 0.3648985367210805, - 0.3098874271134545, - null, - 0.3648985367210805, - 0.32266487999330984, - null, - 0.3648985367210805, - 0.4295667428124167, - null, - 0.3648985367210805, - 0.35350564895305514, - null, - 0.3648985367210805, - 0.31541428705224306, - null, - 0.3648985367210805, - 0.2693681584998491, - null, - 0.3648985367210805, - 0.42641694849778966, - null, - 0.3648985367210805, - 0.3333136626479075, - null, - 0.6244837238804738, - 0.5850986908522726, - null, - 0.6244837238804738, - 0.6267294109959968, - null, - 0.6244837238804738, - 0.5221172076712435, - null, - 0.6244837238804738, - 0.5717872069066212, - null, - 0.6244837238804738, - 0.7302384542961842, - null, - 0.6244837238804738, - 0.6710484758334021, - null, - 0.6244837238804738, - 0.5492873750243871, - null, - 0.6244837238804738, - 0.6201266549140614, - null, - 0.6244837238804738, - 0.5752985482362863, - null, - 0.9012137046519791, - 0.8622415881936324, - null, - 0.9012137046519791, - 0.8350595230795331, - null, - 0.5219101415039136, - 0.4307004647175262, - null, - 0.5219101415039136, - 0.4834545718278357, - null, - 0.5219101415039136, - 0.4847615611240751, - null, - 0.5219101415039136, - 0.4295667428124167, - null, - 0.5219101415039136, - 0.5144551437666581, - null, - 0.5219101415039136, - 0.42641694849778966, - null, - 0.5219101415039136, - 0.6014235590484225, - null, - 0.5219101415039136, - 0.4868902788925622, - null, - 0.39453602200590676, - 0.3309683982450944, - null, - 0.39453602200590676, - 0.2981410655965283, - null, - 0.39453602200590676, - 0.3199684158322815, - null, - 0.39453602200590676, - 0.45431497833000367, - null, - 0.39453602200590676, - 0.42052616285893474, - null, - 0.39453602200590676, - 0.4107398412471005, - null, - 0.39453602200590676, - 0.3635517670405215, - null, - 0.39453602200590676, - 0.42203254876563234, - null, - 0.2009582712064717, - 0.20619722773579274, - null, - 0.2009582712064717, - 0.3098874271134545, - null, - 0.2009582712064717, - 0.15069304516745607, - null, - 0.2009582712064717, - 0.31541428705224306, - null, - 0.2009582712064717, - 0.24013807075121119, - null, - 0.2009582712064717, - 0.2693681584998491, - null, - 0.2009582712064717, - 0.22993075379681738, - null, - 0.2009582712064717, - 0.09959517902538939, - null, - 0.04224314617430658, - 0.05938145280899054, - null, - 0.04224314617430658, - 0.04283815208078323, - null, - 0.04224314617430658, - 0.010366221042083845, - null, - 0.04224314617430658, - 0.10782775946098799, - null, - 0.04224314617430658, - 0.03395115206665145, - null, - 0.04224314617430658, - 0.05477321631284726, - null, - 0.2381682330796122, - 0.3309683982450944, - null, - 0.2381682330796122, - 0.17296378957033465, - null, - 0.2381682330796122, - 0.24033413659841596, - null, - 0.2381682330796122, - 0.2981410655965283, - null, - 0.2381682330796122, - 0.3355480553373167, - null, - 0.2381682330796122, - 0.2660491488293679, - null, - 0.2381682330796122, - 0.249116699886752, - null, - 0.2381682330796122, - 0.23700988477155205, - null, - 0.2381682330796122, - 0.28871122138225125, - null, - 0.2381682330796122, - 0.2002886163837997, - null, - 0.2381682330796122, - 0.13201947050262697, - null, - 0.2381682330796122, - 0.2318219208408404, - null, - 0.2381682330796122, - 0.20307680326083377, - null, - 0.33811323660241943, - 0.31304614249644347, - null, - 0.33811323660241943, - 0.2695720924906413, - null, - 0.33811323660241943, - 0.24454670425362057, - null, - 0.33811323660241943, - 0.4500538798110242, - null, - 0.33811323660241943, - 0.4140065537970282, - null, - 0.33811323660241943, - 0.2880647319459674, - null, - 0.33811323660241943, - 0.3202314429055858, - null, - 0.33811323660241943, - 0.3169605131706372, - null, - 0.33811323660241943, - 0.32345881810688737, - null, - 0.5216765314868881, - 0.6001026871900049, - null, - 0.5216765314868881, - 0.6012106694454529, - null, - 0.5216765314868881, - 0.4898861106787329, - null, - 0.5216765314868881, - 0.5204579980957379, - null, - 0.5216765314868881, - 0.4500538798110242, - null, - 0.5216765314868881, - 0.42052616285893474, - null, - 0.5216765314868881, - 0.4140065537970282, - null, - 0.5216765314868881, - 0.4937592635708411, - null, - 0.6001026871900049, - 0.6012106694454529, - null, - 0.6001026871900049, - 0.6705222836834548, - null, - 0.6001026871900049, - 0.5204579980957379, - null, - 0.6001026871900049, - 0.6648266103848882, - null, - 0.991844460003468, - 0.9727770125665405, - null, - 0.991844460003468, - 0.938767234846119, - null, - 0.991844460003468, - 0.9874110419208606, - null, - 0.991844460003468, - 0.9328536520894143, - null, - 0.991844460003468, - 0.9078978130468089, - null, - 0.991844460003468, - 0.9513646744432486, - null, - 0.3343459796676115, - 0.4307004647175262, - null, - 0.3343459796676115, - 0.3900960314334032, - null, - 0.3343459796676115, - 0.3098874271134545, - null, - 0.3343459796676115, - 0.32266487999330984, - null, - 0.3343459796676115, - 0.4295667428124167, - null, - 0.3343459796676115, - 0.35350564895305514, - null, - 0.3343459796676115, - 0.31541428705224306, - null, - 0.3343459796676115, - 0.24013807075121119, - null, - 0.3343459796676115, - 0.2693681584998491, - null, - 0.3343459796676115, - 0.3333136626479075, - null, - 0.8957623407464501, - 0.9727770125665405, - null, - 0.8957623407464501, - 0.8423383207045981, - null, - 0.8957623407464501, - 0.8791466031622056, - null, - 0.8957623407464501, - 0.8151159149468827, - null, - 0.8957623407464501, - 0.922341377568881, - null, - 0.8957623407464501, - 0.8821215709600496, - null, - 0.8957623407464501, - 0.9328536520894143, - null, - 0.8957623407464501, - 0.9344432405222354, - null, - 0.8957623407464501, - 0.9642772106357639, - null, - 0.8957623407464501, - 0.8157570218353161, - null, - 0.8957623407464501, - 0.7925454632595156, - null, - 0.8957623407464501, - 0.888980486534156, - null, - 0.4208812619135248, - 0.3900960314334032, - null, - 0.4208812619135248, - 0.3098874271134545, - null, - 0.4208812619135248, - 0.4295667428124167, - null, - 0.4208812619135248, - 0.35350564895305514, - null, - 0.4208812619135248, - 0.4467311570808764, - null, - 0.4208812619135248, - 0.5144551437666581, - null, - 0.4208812619135248, - 0.4421375373865315, - null, - 0.4208812619135248, - 0.42641694849778966, - null, - 0.4208812619135248, - 0.3333136626479075, - null, - 0.4208812619135248, - 0.4868902788925622, - null, - 0.31304614249644347, - 0.2697998035002954, - null, - 0.31304614249644347, - 0.2695720924906413, - null, - 0.31304614249644347, - 0.24454670425362057, - null, - 0.31304614249644347, - 0.20619722773579274, - null, - 0.31304614249644347, - 0.3098874271134545, - null, - 0.31304614249644347, - 0.2880647319459674, - null, - 0.31304614249644347, - 0.3202314429055858, - null, - 0.31304614249644347, - 0.22993075379681738, - null, - 0.31304614249644347, - 0.3169605131706372, - null, - 0.31304614249644347, - 0.32345881810688737, - null, - 0.6773365837969099, - 0.6628083689885368, - null, - 0.6773365837969099, - 0.6160873747407943, - null, - 0.6773365837969099, - 0.7537809293531343, - null, - 0.6773365837969099, - 0.5634679987017406, - null, - 0.6773365837969099, - 0.6012106694454529, - null, - 0.6773365837969099, - 0.6352288779182178, - null, - 0.6773365837969099, - 0.5981086798045652, - null, - 0.6773365837969099, - 0.6648266103848882, - null, - 0.6773365837969099, - 0.6072525121642058, - null, - 0.4307004647175262, - 0.40557198035837094, - null, - 0.4307004647175262, - 0.4834545718278357, - null, - 0.4307004647175262, - 0.4847615611240751, - null, - 0.4307004647175262, - 0.32266487999330984, - null, - 0.4307004647175262, - 0.4295667428124167, - null, - 0.4307004647175262, - 0.35350564895305514, - null, - 0.4307004647175262, - 0.4318165589087314, - null, - 0.4307004647175262, - 0.5097617399826666, - null, - 0.3309683982450944, - 0.24033413659841596, - null, - 0.3309683982450944, - 0.2981410655965283, - null, - 0.3309683982450944, - 0.3199684158322815, - null, - 0.3309683982450944, - 0.3355480553373167, - null, - 0.3309683982450944, - 0.2660491488293679, - null, - 0.3309683982450944, - 0.3635517670405215, - null, - 0.3309683982450944, - 0.249116699886752, - null, - 0.3309683982450944, - 0.23700988477155205, - null, - 0.3309683982450944, - 0.28871122138225125, - null, - 0.3309683982450944, - 0.42203254876563234, - null, - 0.3309683982450944, - 0.2318219208408404, - null, - 0.2697998035002954, - 0.17542400609184483, - null, - 0.2697998035002954, - 0.2695720924906413, - null, - 0.2697998035002954, - 0.24454670425362057, - null, - 0.2697998035002954, - 0.20619722773579274, - null, - 0.2697998035002954, - 0.3098874271134545, - null, - 0.2697998035002954, - 0.2880647319459674, - null, - 0.2697998035002954, - 0.22993075379681738, - null, - 0.2697998035002954, - 0.32345881810688737, - null, - 0.9727770125665405, - 0.922341377568881, - null, - 0.9727770125665405, - 0.8821215709600496, - null, - 0.9727770125665405, - 0.9328536520894143, - null, - 0.9727770125665405, - 0.9642772106357639, - null, - 0.40557198035837094, - 0.4834545718278357, - null, - 0.40557198035837094, - 0.4847615611240751, - null, - 0.40557198035837094, - 0.32266487999330984, - null, - 0.40557198035837094, - 0.40395348439090084, - null, - 0.40557198035837094, - 0.4318165589087314, - null, - 0.40557198035837094, - 0.3340702546567942, - null, - 0.40557198035837094, - 0.4248880785102581, - null, - 0.40557198035837094, - 0.5097617399826666, - null, - 0.35532572275494023, - 0.24028581536328997, - null, - 0.35532572275494023, - 0.3192831323823997, - null, - 0.35532572275494023, - 0.2529891644068947, - null, - 0.35532572275494023, - 0.41535454584101794, - null, - 0.35532572275494023, - 0.40395348439090084, - null, - 0.35532572275494023, - 0.4248880785102581, - null, - 0.35532572275494023, - 0.29119156039108685, - null, - 0.35532572275494023, - 0.24102842320743, - null, - 0.5850986908522726, - 0.6267294109959968, - null, - 0.5850986908522726, - 0.5221172076712435, - null, - 0.5850986908522726, - 0.5717872069066212, - null, - 0.5850986908522726, - 0.5492873750243871, - null, - 0.5850986908522726, - 0.6201266549140614, - null, - 0.5850986908522726, - 0.5752985482362863, - null, - 0.5850986908522726, - 0.5097617399826666, - null, - 0.17296378957033465, - 0.24033413659841596, - null, - 0.17296378957033465, - 0.2660491488293679, - null, - 0.17296378957033465, - 0.10782775946098799, - null, - 0.17296378957033465, - 0.249116699886752, - null, - 0.17296378957033465, - 0.23700988477155205, - null, - 0.17296378957033465, - 0.28871122138225125, - null, - 0.17296378957033465, - 0.2002886163837997, - null, - 0.17296378957033465, - 0.13201947050262697, - null, - 0.17296378957033465, - 0.2318219208408404, - null, - 0.17296378957033465, - 0.20307680326083377, - null, - 0.6628083689885368, - 0.6160873747407943, - null, - 0.6628083689885368, - 0.7537809293531343, - null, - 0.6628083689885368, - 0.5634679987017406, - null, - 0.6628083689885368, - 0.6352288779182178, - null, - 0.6628083689885368, - 0.5981086798045652, - null, - 0.6628083689885368, - 0.6072525121642058, - null, - 0.6160873747407943, - 0.5186581897030644, - null, - 0.6160873747407943, - 0.5634679987017406, - null, - 0.6160873747407943, - 0.6012106694454529, - null, - 0.6160873747407943, - 0.6352288779182178, - null, - 0.6160873747407943, - 0.5981086798045652, - null, - 0.6160873747407943, - 0.6648266103848882, - null, - 0.6160873747407943, - 0.6072525121642058, - null, - 0.6160873747407943, - 0.5260776190209286, - null, - 0.025297953521542405, - 0.06016942899581168, - null, - 0.025297953521542405, - 0.055897802218322856, - null, - 0.025297953521542405, - 0.09959517902538939, - null, - 0.24028581536328997, - 0.3192831323823997, - null, - 0.24028581536328997, - 0.1333966979371528, - null, - 0.24028581536328997, - 0.2529891644068947, - null, - 0.24028581536328997, - 0.29119156039108685, - null, - 0.24028581536328997, - 0.24102842320743, - null, - 0.5186581897030644, - 0.5634679987017406, - null, - 0.5186581897030644, - 0.6012106694454529, - null, - 0.5186581897030644, - 0.4898861106787329, - null, - 0.5186581897030644, - 0.45431497833000367, - null, - 0.5186581897030644, - 0.6352288779182178, - null, - 0.5186581897030644, - 0.5981086798045652, - null, - 0.5186581897030644, - 0.6072525121642058, - null, - 0.5186581897030644, - 0.42203254876563234, - null, - 0.5186581897030644, - 0.5260776190209286, - null, - 0.8423383207045981, - 0.8151159149468827, - null, - 0.8423383207045981, - 0.7334929583472656, - null, - 0.8423383207045981, - 0.8821215709600496, - null, - 0.8423383207045981, - 0.9328536520894143, - null, - 0.8423383207045981, - 0.7834166246251234, - null, - 0.8423383207045981, - 0.9078978130468089, - null, - 0.8423383207045981, - 0.7925454632595156, - null, - 0.8423383207045981, - 0.8508124987550889, - null, - 0.7537809293531343, - 0.8622415881936324, - null, - 0.7537809293531343, - 0.8350595230795331, - null, - 0.3192831323823997, - 0.2529891644068947, - null, - 0.3192831323823997, - 0.41535454584101794, - null, - 0.3192831323823997, - 0.40395348439090084, - null, - 0.3192831323823997, - 0.4248880785102581, - null, - 0.3192831323823997, - 0.29119156039108685, - null, - 0.3192831323823997, - 0.24102842320743, - null, - 0.17542400609184483, - 0.055894273053114896, - null, - 0.17542400609184483, - 0.14933184162295132, - null, - 0.17542400609184483, - 0.17619771419691865, - null, - 0.17542400609184483, - 0.1278305132468397, - null, - 0.17542400609184483, - 0.21532966919867302, - null, - 0.17542400609184483, - 0.0731473655342364, - null, - 0.17542400609184483, - 0.2695720924906413, - null, - 0.17542400609184483, - 0.09533319097359638, - null, - 0.17542400609184483, - 0.24454670425362057, - null, - 0.17542400609184483, - 0.20002447568886628, - null, - 0.17542400609184483, - 0.20619722773579274, - null, - 0.17542400609184483, - 0.2880647319459674, - null, - 0.17542400609184483, - 0.1341994714416056, - null, - 0.17542400609184483, - 0.22993075379681738, - null, - 0.008409380348177398, - 0.04149975738749545, - null, - 0.938767234846119, - 0.9874110419208606, - null, - 0.938767234846119, - 0.8867112408398291, - null, - 0.938767234846119, - 0.9298960866412943, - null, - 0.938767234846119, - 0.9078978130468089, - null, - 0.938767234846119, - 0.9958360522915445, - null, - 0.938767234846119, - 0.8508124987550889, - null, - 0.938767234846119, - 0.8842114977564064, - null, - 0.938767234846119, - 0.9513646744432486, - null, - 0.24033413659841596, - 0.2981410655965283, - null, - 0.24033413659841596, - 0.3199684158322815, - null, - 0.24033413659841596, - 0.3355480553373167, - null, - 0.24033413659841596, - 0.2660491488293679, - null, - 0.24033413659841596, - 0.249116699886752, - null, - 0.24033413659841596, - 0.23700988477155205, - null, - 0.24033413659841596, - 0.28871122138225125, - null, - 0.24033413659841596, - 0.2002886163837997, - null, - 0.24033413659841596, - 0.13201947050262697, - null, - 0.24033413659841596, - 0.2318219208408404, - null, - 0.24033413659841596, - 0.20307680326083377, - null, - 0.8791466031622056, - 0.922341377568881, - null, - 0.8791466031622056, - 0.8821215709600496, - null, - 0.8791466031622056, - 0.9542382277667263, - null, - 0.8791466031622056, - 0.9024846524956353, - null, - 0.8791466031622056, - 0.9344432405222354, - null, - 0.8791466031622056, - 0.9642772106357639, - null, - 0.8791466031622056, - 0.8157570218353161, - null, - 0.8791466031622056, - 0.888980486534156, - null, - 0.5634679987017406, - 0.6012106694454529, - null, - 0.5634679987017406, - 0.6352288779182178, - null, - 0.5634679987017406, - 0.5981086798045652, - null, - 0.5634679987017406, - 0.6072525121642058, - null, - 0.5634679987017406, - 0.5260776190209286, - null, - 0.05938145280899054, - 0.04283815208078323, - null, - 0.05938145280899054, - 0.10782775946098799, - null, - 0.05938145280899054, - 0.03395115206665145, - null, - 0.05938145280899054, - 0.05477321631284726, - null, - 0.05938145280899054, - 0.1341994714416056, - null, - 0.6012106694454529, - 0.4898861106787329, - null, - 0.6012106694454529, - 0.5204579980957379, - null, - 0.6012106694454529, - 0.6352288779182178, - null, - 0.6012106694454529, - 0.6648266103848882, - null, - 0.6012106694454529, - 0.6072525121642058, - null, - 0.6705222836834548, - 0.7454337953380579, - null, - 0.6705222836834548, - 0.7077207700167599, - null, - 0.6705222836834548, - 0.7005910562446783, - null, - 0.6705222836834548, - 0.5603277981830703, - null, - 0.3900960314334032, - 0.3098874271134545, - null, - 0.3900960314334032, - 0.4295667428124167, - null, - 0.3900960314334032, - 0.35350564895305514, - null, - 0.3900960314334032, - 0.31541428705224306, - null, - 0.3900960314334032, - 0.4467311570808764, - null, - 0.3900960314334032, - 0.4421375373865315, - null, - 0.3900960314334032, - 0.42641694849778966, - null, - 0.3900960314334032, - 0.3333136626479075, - null, - 0.3900960314334032, - 0.4868902788925622, - null, - 0.055894273053114896, - 0.14933184162295132, - null, - 0.055894273053114896, - 0.1278305132468397, - null, - 0.055894273053114896, - 0.0731473655342364, - null, - 0.055894273053114896, - 0.09533319097359638, - null, - 0.055894273053114896, - 0.055897802218322856, - null, - 0.055894273053114896, - 0.04153202488293273, - null, - 0.055894273053114896, - 0.06013197669987258, - null, - 0.055894273053114896, - 0.040563128366188694, - null, - 0.14933184162295132, - 0.17619771419691865, - null, - 0.14933184162295132, - 0.1278305132468397, - null, - 0.14933184162295132, - 0.21532966919867302, - null, - 0.14933184162295132, - 0.0731473655342364, - null, - 0.14933184162295132, - 0.09533319097359638, - null, - 0.14933184162295132, - 0.24454670425362057, - null, - 0.14933184162295132, - 0.20002447568886628, - null, - 0.14933184162295132, - 0.20619722773579274, - null, - 0.14933184162295132, - 0.055897802218322856, - null, - 0.14933184162295132, - 0.04153202488293273, - null, - 0.14933184162295132, - 0.06013197669987258, - null, - 0.14933184162295132, - 0.22993075379681738, - null, - 0.14933184162295132, - 0.040563128366188694, - null, - 0.8151159149468827, - 0.7334929583472656, - null, - 0.8151159149468827, - 0.8821215709600496, - null, - 0.8151159149468827, - 0.9328536520894143, - null, - 0.8151159149468827, - 0.7834166246251234, - null, - 0.8151159149468827, - 0.7925454632595156, - null, - 0.8151159149468827, - 0.8508124987550889, - null, - 0.17619771419691865, - 0.1278305132468397, - null, - 0.17619771419691865, - 0.21532966919867302, - null, - 0.17619771419691865, - 0.0731473655342364, - null, - 0.17619771419691865, - 0.2695720924906413, - null, - 0.17619771419691865, - 0.09533319097359638, - null, - 0.17619771419691865, - 0.24454670425362057, - null, - 0.17619771419691865, - 0.20002447568886628, - null, - 0.17619771419691865, - 0.1341994714416056, - null, - 0.17619771419691865, - 0.06013197669987258, - null, - 0.2981410655965283, - 0.3199684158322815, - null, - 0.2981410655965283, - 0.3355480553373167, - null, - 0.2981410655965283, - 0.2660491488293679, - null, - 0.2981410655965283, - 0.3635517670405215, - null, - 0.2981410655965283, - 0.249116699886752, - null, - 0.2981410655965283, - 0.23700988477155205, - null, - 0.2981410655965283, - 0.28871122138225125, - null, - 0.2981410655965283, - 0.42203254876563234, - null, - 0.2981410655965283, - 0.2002886163837997, - null, - 0.2981410655965283, - 0.29050814087118004, - null, - 0.2981410655965283, - 0.2318219208408404, - null, - 0.2981410655965283, - 0.20307680326083377, - null, - 0.7334929583472656, - 0.6512622326935055, - null, - 0.7334929583472656, - 0.7205270186163313, - null, - 0.7334929583472656, - 0.7834166246251234, - null, - 0.7334929583472656, - 0.6217058876501556, - null, - 0.7334929583472656, - 0.7925454632595156, - null, - 0.7334929583472656, - 0.6714278208298593, - null, - 0.04283815208078323, - 0.010366221042083845, - null, - 0.04283815208078323, - 0.10782775946098799, - null, - 0.04283815208078323, - 0.03395115206665145, - null, - 0.04283815208078323, - 0.05477321631284726, - null, - 0.04283815208078323, - 0.13201947050262697, - null, - 0.922341377568881, - 0.8821215709600496, - null, - 0.922341377568881, - 0.9542382277667263, - null, - 0.922341377568881, - 0.9328536520894143, - null, - 0.922341377568881, - 0.9961038345306213, - null, - 0.922341377568881, - 0.9344432405222354, - null, - 0.922341377568881, - 0.9642772106357639, - null, - 0.922341377568881, - 0.8157570218353161, - null, - 0.922341377568881, - 0.888980486534156, - null, - 0.3199684158322815, - 0.42052616285893474, - null, - 0.3199684158322815, - 0.2660491488293679, - null, - 0.3199684158322815, - 0.4107398412471005, - null, - 0.3199684158322815, - 0.3635517670405215, - null, - 0.3199684158322815, - 0.249116699886752, - null, - 0.3199684158322815, - 0.23700988477155205, - null, - 0.3199684158322815, - 0.42203254876563234, - null, - 0.3199684158322815, - 0.29050814087118004, - null, - 0.1278305132468397, - 0.21532966919867302, - null, - 0.1278305132468397, - 0.0731473655342364, - null, - 0.1278305132468397, - 0.09533319097359638, - null, - 0.1278305132468397, - 0.24454670425362057, - null, - 0.1278305132468397, - 0.20002447568886628, - null, - 0.1278305132468397, - 0.04153202488293273, - null, - 0.1278305132468397, - 0.05477321631284726, - null, - 0.1278305132468397, - 0.1341994714416056, - null, - 0.1278305132468397, - 0.06013197669987258, - null, - 0.1278305132468397, - 0.040563128366188694, - null, - 0.21532966919867302, - 0.2695720924906413, - null, - 0.21532966919867302, - 0.09533319097359638, - null, - 0.21532966919867302, - 0.24454670425362057, - null, - 0.21532966919867302, - 0.20002447568886628, - null, - 0.21532966919867302, - 0.3202314429055858, - null, - 0.21532966919867302, - 0.1341994714416056, - null, - 0.21532966919867302, - 0.3169605131706372, - null, - 0.21532966919867302, - 0.29050814087118004, - null, - 0.21532966919867302, - 0.32345881810688737, - null, - 0.0731473655342364, - 0.09533319097359638, - null, - 0.0731473655342364, - 0.055897802218322856, - null, - 0.0731473655342364, - 0.04153202488293273, - null, - 0.0731473655342364, - 0.05477321631284726, - null, - 0.0731473655342364, - 0.1341994714416056, - null, - 0.0731473655342364, - 0.06013197669987258, - null, - 0.0731473655342364, - 0.040563128366188694, - null, - 0.4898861106787329, - 0.45431497833000367, - null, - 0.4898861106787329, - 0.5204579980957379, - null, - 0.4898861106787329, - 0.42052616285893474, - null, - 0.4898861106787329, - 0.4140065537970282, - null, - 0.4898861106787329, - 0.4107398412471005, - null, - 0.2695720924906413, - 0.24454670425362057, - null, - 0.2695720924906413, - 0.20002447568886628, - null, - 0.2695720924906413, - 0.2880647319459674, - null, - 0.2695720924906413, - 0.3202314429055858, - null, - 0.2695720924906413, - 0.3169605131706372, - null, - 0.2695720924906413, - 0.32345881810688737, - null, - 0.09533319097359638, - 0.20002447568886628, - null, - 0.09533319097359638, - 0.055897802218322856, - null, - 0.09533319097359638, - 0.04153202488293273, - null, - 0.09533319097359638, - 0.05477321631284726, - null, - 0.09533319097359638, - 0.1341994714416056, - null, - 0.09533319097359638, - 0.06013197669987258, - null, - 0.09533319097359638, - 0.040563128366188694, - null, - 0.9874110419208606, - 0.8867112408398291, - null, - 0.9874110419208606, - 0.9298960866412943, - null, - 0.9874110419208606, - 0.9078978130468089, - null, - 0.9874110419208606, - 0.9958360522915445, - null, - 0.9874110419208606, - 0.9513646744432486, - null, - 0.1333966979371528, - 0.2529891644068947, - null, - 0.1333966979371528, - 0.04149975738749545, - null, - 0.1333966979371528, - 0.16781555203357146, - null, - 0.1333966979371528, - 0.24102842320743, - null, - 0.1333966979371528, - 0.19048093242734687, - null, - 0.1333966979371528, - 0.1294716874165911, - null, - 0.2529891644068947, - 0.29119156039108685, - null, - 0.2529891644068947, - 0.24102842320743, - null, - 0.2529891644068947, - 0.19048093242734687, - null, - 0.45431497833000367, - 0.5204579980957379, - null, - 0.45431497833000367, - 0.42052616285893474, - null, - 0.45431497833000367, - 0.4107398412471005, - null, - 0.45431497833000367, - 0.3635517670405215, - null, - 0.45431497833000367, - 0.42203254876563234, - null, - 0.45431497833000367, - 0.5260776190209286, - null, - 0.24454670425362057, - 0.20002447568886628, - null, - 0.24454670425362057, - 0.2880647319459674, - null, - 0.24454670425362057, - 0.3202314429055858, - null, - 0.24454670425362057, - 0.22993075379681738, - null, - 0.24454670425362057, - 0.3169605131706372, - null, - 0.24454670425362057, - 0.32345881810688737, - null, - 0.20002447568886628, - 0.3202314429055858, - null, - 0.20002447568886628, - 0.1341994714416056, - null, - 0.20002447568886628, - 0.3169605131706372, - null, - 0.6267294109959968, - 0.5221172076712435, - null, - 0.6267294109959968, - 0.7205270186163313, - null, - 0.6267294109959968, - 0.5717872069066212, - null, - 0.6267294109959968, - 0.7302384542961842, - null, - 0.6267294109959968, - 0.6710484758334021, - null, - 0.6267294109959968, - 0.5492873750243871, - null, - 0.6267294109959968, - 0.6201266549140614, - null, - 0.6267294109959968, - 0.5752985482362863, - null, - 0.5221172076712435, - 0.41535454584101794, - null, - 0.5221172076712435, - 0.5717872069066212, - null, - 0.5221172076712435, - 0.5492873750243871, - null, - 0.5221172076712435, - 0.6201266549140614, - null, - 0.5221172076712435, - 0.4248880785102581, - null, - 0.6512622326935055, - 0.6217058876501556, - null, - 0.6512622326935055, - 0.6714278208298593, - null, - 0.3355480553373167, - 0.2660491488293679, - null, - 0.3355480553373167, - 0.249116699886752, - null, - 0.3355480553373167, - 0.28871122138225125, - null, - 0.3355480553373167, - 0.42203254876563234, - null, - 0.3355480553373167, - 0.2318219208408404, - null, - 0.4834545718278357, - 0.4847615611240751, - null, - 0.4834545718278357, - 0.4318165589087314, - null, - 0.4834545718278357, - 0.5097617399826666, - null, - 0.4847615611240751, - 0.4318165589087314, - null, - 0.4847615611240751, - 0.5097617399826666, - null, - 0.20619722773579274, - 0.3098874271134545, - null, - 0.20619722773579274, - 0.2880647319459674, - null, - 0.20619722773579274, - 0.24013807075121119, - null, - 0.20619722773579274, - 0.22993075379681738, - null, - 0.20619722773579274, - 0.09959517902538939, - null, - 0.9419075807648644, - 0.8848427298858184, - null, - 0.9419075807648644, - 0.9756800437762957, - null, - 0.3098874271134545, - 0.4295667428124167, - null, - 0.3098874271134545, - 0.35350564895305514, - null, - 0.3098874271134545, - 0.31541428705224306, - null, - 0.3098874271134545, - 0.2880647319459674, - null, - 0.3098874271134545, - 0.24013807075121119, - null, - 0.3098874271134545, - 0.2693681584998491, - null, - 0.3098874271134545, - 0.42641694849778966, - null, - 0.3098874271134545, - 0.3333136626479075, - null, - 0.3098874271134545, - 0.22993075379681738, - null, - 0.04149975738749545, - 0.019989772968585173, - null, - 0.04149975738749545, - 0.1294716874165911, - null, - 0.32266487999330984, - 0.31541428705224306, - null, - 0.32266487999330984, - 0.4318165589087314, - null, - 0.32266487999330984, - 0.2693681584998491, - null, - 0.32266487999330984, - 0.3340702546567942, - null, - 0.4295667428124167, - 0.35350564895305514, - null, - 0.4295667428124167, - 0.31541428705224306, - null, - 0.4295667428124167, - 0.4467311570808764, - null, - 0.4295667428124167, - 0.5144551437666581, - null, - 0.4295667428124167, - 0.4421375373865315, - null, - 0.4295667428124167, - 0.42641694849778966, - null, - 0.4295667428124167, - 0.3333136626479075, - null, - 0.4295667428124167, - 0.4868902788925622, - null, - 0.35350564895305514, - 0.31541428705224306, - null, - 0.35350564895305514, - 0.24013807075121119, - null, - 0.35350564895305514, - 0.2693681584998491, - null, - 0.35350564895305514, - 0.42641694849778966, - null, - 0.35350564895305514, - 0.3333136626479075, - null, - 0.15069304516745607, - 0.06016942899581168, - null, - 0.15069304516745607, - 0.24013807075121119, - null, - 0.15069304516745607, - 0.2693681584998491, - null, - 0.15069304516745607, - 0.10059463740220753, - null, - 0.15069304516745607, - 0.09959517902538939, - null, - 0.41535454584101794, - 0.40395348439090084, - null, - 0.41535454584101794, - 0.4248880785102581, - null, - 0.41535454584101794, - 0.29119156039108685, - null, - 0.8821215709600496, - 0.9328536520894143, - null, - 0.8821215709600496, - 0.9344432405222354, - null, - 0.8821215709600496, - 0.9642772106357639, - null, - 0.8821215709600496, - 0.8157570218353161, - null, - 0.8821215709600496, - 0.7925454632595156, - null, - 0.8821215709600496, - 0.888980486534156, - null, - 0.9542382277667263, - 0.9024846524956353, - null, - 0.9542382277667263, - 0.9961038345306213, - null, - 0.9542382277667263, - 0.9344432405222354, - null, - 0.9542382277667263, - 0.9642772106357639, - null, - 0.9542382277667263, - 0.888980486534156, - null, - 0.9542382277667263, - 0.9810704436128125, - null, - 0.7205270186163313, - 0.7302384542961842, - null, - 0.7205270186163313, - 0.8157570218353161, - null, - 0.7205270186163313, - 0.7925454632595156, - null, - 0.31541428705224306, - 0.24013807075121119, - null, - 0.31541428705224306, - 0.2693681584998491, - null, - 0.31541428705224306, - 0.3333136626479075, - null, - 0.010366221042083845, - 0.10782775946098799, - null, - 0.010366221042083845, - 0.03395115206665145, - null, - 0.010366221042083845, - 0.13201947050262697, - null, - 0.06016942899581168, - 0.055897802218322856, - null, - 0.06016942899581168, - 0.10059463740220753, - null, - 0.06016942899581168, - 0.09959517902538939, - null, - 0.8867112408398291, - 0.9298960866412943, - null, - 0.8867112408398291, - 0.8599268392047722, - null, - 0.8867112408398291, - 0.9078978130468089, - null, - 0.8867112408398291, - 0.8508124987550889, - null, - 0.8867112408398291, - 0.8842114977564064, - null, - 0.8867112408398291, - 0.9513646744432486, - null, - 0.5204579980957379, - 0.42052616285893474, - null, - 0.5204579980957379, - 0.4140065537970282, - null, - 0.5204579980957379, - 0.4107398412471005, - null, - 0.5204579980957379, - 0.4937592635708411, - null, - 0.4500538798110242, - 0.4140065537970282, - null, - 0.4500538798110242, - 0.4467311570808764, - null, - 0.4500538798110242, - 0.4421375373865315, - null, - 0.4500538798110242, - 0.4937592635708411, - null, - 0.4500538798110242, - 0.5603277981830703, - null, - 0.4500538798110242, - 0.547451424618544, - null, - 0.40395348439090084, - 0.3340702546567942, - null, - 0.40395348439090084, - 0.4248880785102581, - null, - 0.5717872069066212, - 0.6710484758334021, - null, - 0.5717872069066212, - 0.5492873750243871, - null, - 0.5717872069066212, - 0.6201266549140614, - null, - 0.42052616285893474, - 0.4140065537970282, - null, - 0.42052616285893474, - 0.4107398412471005, - null, - 0.42052616285893474, - 0.3635517670405215, - null, - 0.2660491488293679, - 0.249116699886752, - null, - 0.2660491488293679, - 0.23700988477155205, - null, - 0.2660491488293679, - 0.28871122138225125, - null, - 0.2660491488293679, - 0.2002886163837997, - null, - 0.2660491488293679, - 0.29050814087118004, - null, - 0.2660491488293679, - 0.2318219208408404, - null, - 0.2660491488293679, - 0.20307680326083377, - null, - 0.10782775946098799, - 0.03395115206665145, - null, - 0.10782775946098799, - 0.2002886163837997, - null, - 0.10782775946098799, - 0.13201947050262697, - null, - 0.10782775946098799, - 0.20307680326083377, - null, - 0.7302384542961842, - 0.6710484758334021, - null, - 0.7302384542961842, - 0.6201266549140614, - null, - 0.8520196094107113, - 0.8848427298858184, - null, - 0.8520196094107113, - 0.9435179236599912, - null, - 0.8520196094107113, - 0.9756800437762957, - null, - 0.4140065537970282, - 0.3202314429055858, - null, - 0.4140065537970282, - 0.4107398412471005, - null, - 0.4140065537970282, - 0.3635517670405215, - null, - 0.4140065537970282, - 0.4937592635708411, - null, - 0.4140065537970282, - 0.3169605131706372, - null, - 0.4140065537970282, - 0.32345881810688737, - null, - 0.4467311570808764, - 0.5144551437666581, - null, - 0.4467311570808764, - 0.4421375373865315, - null, - 0.4467311570808764, - 0.42641694849778966, - null, - 0.4467311570808764, - 0.4868902788925622, - null, - 0.5144551437666581, - 0.4421375373865315, - null, - 0.5144551437666581, - 0.42641694849778966, - null, - 0.5144551437666581, - 0.6014235590484225, - null, - 0.5144551437666581, - 0.5603277981830703, - null, - 0.5144551437666581, - 0.4868902788925622, - null, - 0.7454337953380579, - 0.7077207700167599, - null, - 0.7454337953380579, - 0.8599268392047722, - null, - 0.7454337953380579, - 0.7005910562446783, - null, - 0.03395115206665145, - 0.05477321631284726, - null, - 0.7077207700167599, - 0.7005910562446783, - null, - 0.9024846524956353, - 0.9961038345306213, - null, - 0.9024846524956353, - 0.9344432405222354, - null, - 0.9024846524956353, - 0.888980486534156, - null, - 0.9024846524956353, - 0.9810704436128125, - null, - 0.055897802218322856, - 0.04153202488293273, - null, - 0.055897802218322856, - 0.06013197669987258, - null, - 0.055897802218322856, - 0.040563128366188694, - null, - 0.055897802218322856, - 0.09959517902538939, - null, - 0.2880647319459674, - 0.3202314429055858, - null, - 0.2880647319459674, - 0.22993075379681738, - null, - 0.2880647319459674, - 0.3169605131706372, - null, - 0.2880647319459674, - 0.32345881810688737, - null, - 0.9328536520894143, - 0.9078978130468089, - null, - 0.9328536520894143, - 0.8508124987550889, - null, - 0.9298960866412943, - 0.8599268392047722, - null, - 0.9298960866412943, - 0.9958360522915445, - null, - 0.9298960866412943, - 0.8842114977564064, - null, - 0.9298960866412943, - 0.9513646744432486, - null, - 0.6352288779182178, - 0.5981086798045652, - null, - 0.6352288779182178, - 0.6648266103848882, - null, - 0.6352288779182178, - 0.6072525121642058, - null, - 0.04153202488293273, - 0.05477321631284726, - null, - 0.04153202488293273, - 0.1341994714416056, - null, - 0.04153202488293273, - 0.06013197669987258, - null, - 0.04153202488293273, - 0.040563128366188694, - null, - 0.7834166246251234, - 0.7925454632595156, - null, - 0.7834166246251234, - 0.8508124987550889, - null, - 0.7834166246251234, - 0.6714278208298593, - null, - 0.6710484758334021, - 0.5492873750243871, - null, - 0.6710484758334021, - 0.6201266549140614, - null, - 0.3202314429055858, - 0.3169605131706372, - null, - 0.3202314429055858, - 0.32345881810688737, - null, - 0.9961038345306213, - 0.9344432405222354, - null, - 0.9961038345306213, - 0.9642772106357639, - null, - 0.9961038345306213, - 0.888980486534156, - null, - 0.9961038345306213, - 0.9810704436128125, - null, - 0.4107398412471005, - 0.3635517670405215, - null, - 0.4107398412471005, - 0.42203254876563234, - null, - 0.4107398412471005, - 0.29050814087118004, - null, - 0.24013807075121119, - 0.2693681584998491, - null, - 0.24013807075121119, - 0.3333136626479075, - null, - 0.8599268392047722, - 0.8842114977564064, - null, - 0.4318165589087314, - 0.3340702546567942, - null, - 0.4318165589087314, - 0.5097617399826666, - null, - 0.2693681584998491, - 0.3333136626479075, - null, - 0.4421375373865315, - 0.42641694849778966, - null, - 0.4421375373865315, - 0.4868902788925622, - null, - 0.5492873750243871, - 0.6201266549140614, - null, - 0.5492873750243871, - 0.5752985482362863, - null, - 0.5981086798045652, - 0.6072525121642058, - null, - 0.5981086798045652, - 0.5260776190209286, - null, - 0.3635517670405215, - 0.42203254876563234, - null, - 0.3635517670405215, - 0.29050814087118004, - null, - 0.42641694849778966, - 0.3333136626479075, - null, - 0.42641694849778966, - 0.4868902788925622, - null, - 0.6648266103848882, - 0.6072525121642058, - null, - 0.9344432405222354, - 0.9642772106357639, - null, - 0.9344432405222354, - 0.8157570218353161, - null, - 0.9344432405222354, - 0.888980486534156, - null, - 0.9344432405222354, - 0.9810704436128125, - null, - 0.249116699886752, - 0.23700988477155205, - null, - 0.249116699886752, - 0.28871122138225125, - null, - 0.249116699886752, - 0.2002886163837997, - null, - 0.249116699886752, - 0.29050814087118004, - null, - 0.249116699886752, - 0.2318219208408404, - null, - 0.249116699886752, - 0.20307680326083377, - null, - 0.6201266549140614, - 0.5752985482362863, - null, - 0.16781555203357146, - 0.19048093242734687, - null, - 0.16781555203357146, - 0.1294716874165911, - null, - 0.4937592635708411, - 0.5603277981830703, - null, - 0.4937592635708411, - 0.547451424618544, - null, - 0.9435179236599912, - 0.9958360522915445, - null, - 0.9435179236599912, - 0.9756800437762957, - null, - 0.07011604000159166, - 0.019989772968585173, - null, - 0.07011604000159166, - 0.10059463740220753, - null, - 0.07011604000159166, - 0.038844634468288675, - null, - 0.9078978130468089, - 0.8508124987550889, - null, - 0.9078978130468089, - 0.9513646744432486, - null, - 0.6072525121642058, - 0.5260776190209286, - null, - 0.23700988477155205, - 0.28871122138225125, - null, - 0.23700988477155205, - 0.2002886163837997, - null, - 0.23700988477155205, - 0.29050814087118004, - null, - 0.23700988477155205, - 0.13201947050262697, - null, - 0.23700988477155205, - 0.2318219208408404, - null, - 0.23700988477155205, - 0.20307680326083377, - null, - 0.05477321631284726, - 0.1341994714416056, - null, - 0.05477321631284726, - 0.06013197669987258, - null, - 0.05477321631284726, - 0.040563128366188694, - null, - 0.9642772106357639, - 0.888980486534156, - null, - 0.019989772968585173, - 0.1294716874165911, - null, - 0.019989772968585173, - 0.038844634468288675, - null, - 0.29119156039108685, - 0.24102842320743, - null, - 0.29119156039108685, - 0.19048093242734687, - null, - 0.6217058876501556, - 0.6714278208298593, - null, - 0.1341994714416056, - 0.06013197669987258, - null, - 0.1341994714416056, - 0.040563128366188694, - null, - 0.28871122138225125, - 0.2002886163837997, - null, - 0.28871122138225125, - 0.2318219208408404, - null, - 0.28871122138225125, - 0.20307680326083377, - null, - 0.06013197669987258, - 0.040563128366188694, - null, - 0.5752985482362863, - 0.5097617399826666, - null, - 0.10059463740220753, - 0.09959517902538939, - null, - 0.10059463740220753, - 0.038844634468288675, - null, - 0.8157570218353161, - 0.7925454632595156, - null, - 0.8157570218353161, - 0.888980486534156, - null, - 0.42203254876563234, - 0.5260776190209286, - null, - 0.2002886163837997, - 0.13201947050262697, - null, - 0.2002886163837997, - 0.2318219208408404, - null, - 0.2002886163837997, - 0.20307680326083377, - null, - 0.8622415881936324, - 0.8350595230795331, - null, - 0.3169605131706372, - 0.29050814087118004, - null, - 0.3169605131706372, - 0.32345881810688737, - null, - 0.6014235590484225, - 0.5603277981830703, - null, - 0.6014235590484225, - 0.4868902788925622, - null, - 0.9958360522915445, - 0.8842114977564064, - null, - 0.24102842320743, - 0.19048093242734687, - null, - 0.19048093242734687, - 0.1294716874165911, - null, - 0.8508124987550889, - 0.9513646744432486, - null, - 0.13201947050262697, - 0.2318219208408404, - null, - 0.13201947050262697, - 0.20307680326083377, - null, - 0.2318219208408404, - 0.20307680326083377, - null, - 0.888980486534156, - 0.9810704436128125, - null, - 0.5603277981830703, - 0.547451424618544, - null - ] - }, - { - "hoverinfo": "text", - "marker": { - "color": [ - 11, - 14, - 7, - 10, - 7, - 10, - 3, - 14, - 9, - 8, - 9, - 6, - 6, - 9, - 15, - 13, - 15, - 10, - 2, - 13, - 9, - 2, - 9, - 9, - 9, - 6, - 15, - 11, - 9, - 6, - 6, - 13, - 13, - 12, - 12, - 10, - 13, - 14, - 10, - 7, - 12, - 8, - 8, - 12, - 7, - 11, - 3, - 6, - 11, - 9, - 4, - 10, - 17, - 1, - 10, - 16, - 10, - 10, - 7, - 13, - 5, - 13, - 10, - 16, - 8, - 13, - 18, - 8, - 7, - 12, - 14, - 16, - 15, - 13, - 10, - 14, - 15, - 7, - 7, - 7, - 10, - 17, - 12, - 10, - 8, - 3, - 10, - 8, - 8, - 10, - 2, - 18, - 4, - 10, - 16, - 13, - 7, - 7, - 13, - 8, - 5, - 12, - 5, - 6, - 9, - 11, - 9, - 8, - 7, - 9, - 16, - 10, - 5, - 4, - 12, - 9, - 9, - 4, - 6, - 3, - 6, - 11, - 13, - 10, - 9, - 11, - 11, - 6, - 6, - 12, - 7, - 10, - 10, - 6, - 9, - 12, - 7, - 9, - 8, - 9, - 11, - 13, - 12, - 3, - 8, - 12, - 17, - 9, - 3, - 6, - 7, - 4, - 4, - 8, - 12, - 17, - 11, - 10, - 5, - 9, - 4, - 14, - 13, - 12, - 9, - 6, - 6, - 8, - 11, - 8, - 12, - 3, - 13, - 4, - 6, - 8, - 3, - 7, - 7, - 3, - 12, - 9, - 4, - 4, - 11, - 8, - 7, - 10, - 9, - 15, - 11, - 7, - 5, - 6, - 10, - 4, - 3, - 12, - 12, - 5 - ], - "colorbar": { - "thickness": 15, - "title": { - "side": "right", - "text": "Node Connections" - }, - "xanchor": "left" - }, - "colorscale": [ - [ - 0, - "rgb(255,255,217)" - ], - [ - 0.125, - "rgb(237,248,177)" - ], - [ - 0.25, - "rgb(199,233,180)" - ], - [ - 0.375, - "rgb(127,205,187)" - ], - [ - 0.5, - "rgb(65,182,196)" - ], - [ - 0.625, - "rgb(29,145,192)" - ], - [ - 0.75, - "rgb(34,94,168)" - ], - [ - 0.875, - "rgb(37,52,148)" - ], - [ - 1, - "rgb(8,29,88)" - ] - ], - "line": { - "width": 2 - }, - "reversescale": true, - "showscale": true, - "size": 10 - }, - "mode": "markers", - "text": [ - "# of connections: 11", - "# of connections: 14", - "# of connections: 7", - "# of connections: 10", - "# of connections: 7", - "# of connections: 10", - "# of connections: 3", - "# of connections: 14", - "# of connections: 9", - "# of connections: 8", - "# of connections: 9", - "# of connections: 6", - "# of connections: 6", - "# of connections: 9", - "# of connections: 15", - "# of connections: 13", - "# of connections: 15", - "# of connections: 10", - "# of connections: 2", - "# of connections: 13", - "# of connections: 9", - "# of connections: 2", - "# of connections: 9", - "# of connections: 9", - "# of connections: 9", - "# of connections: 6", - "# of connections: 15", - "# of connections: 11", - "# of connections: 9", - "# of connections: 6", - "# of connections: 6", - "# of connections: 13", - "# of connections: 13", - "# of connections: 12", - "# of connections: 12", - "# of connections: 10", - "# of connections: 13", - "# of connections: 14", - "# of connections: 10", - "# of connections: 7", - "# of connections: 12", - "# of connections: 8", - "# of connections: 8", - "# of connections: 12", - "# of connections: 7", - "# of connections: 11", - "# of connections: 3", - "# of connections: 6", - "# of connections: 11", - "# of connections: 9", - "# of connections: 4", - "# of connections: 10", - "# of connections: 17", - "# of connections: 1", - "# of connections: 10", - "# of connections: 16", - "# of connections: 10", - "# of connections: 10", - "# of connections: 7", - "# of connections: 13", - "# of connections: 5", - "# of connections: 13", - "# of connections: 10", - "# of connections: 16", - "# of connections: 8", - "# of connections: 13", - "# of connections: 18", - "# of connections: 8", - "# of connections: 7", - "# of connections: 12", - "# of connections: 14", - "# of connections: 16", - "# of connections: 15", - "# of connections: 13", - "# of connections: 10", - "# of connections: 14", - "# of connections: 15", - "# of connections: 7", - "# of connections: 7", - "# of connections: 7", - "# of connections: 10", - "# of connections: 17", - "# of connections: 12", - "# of connections: 10", - "# of connections: 8", - "# of connections: 3", - "# of connections: 10", - "# of connections: 8", - "# of connections: 8", - "# of connections: 10", - "# of connections: 2", - "# of connections: 18", - "# of connections: 4", - "# of connections: 10", - "# of connections: 16", - "# of connections: 13", - "# of connections: 7", - "# of connections: 7", - "# of connections: 13", - "# of connections: 8", - "# of connections: 5", - "# of connections: 12", - "# of connections: 5", - "# of connections: 6", - "# of connections: 9", - "# of connections: 11", - "# of connections: 9", - "# of connections: 8", - "# of connections: 7", - "# of connections: 9", - "# of connections: 16", - "# of connections: 10", - "# of connections: 5", - "# of connections: 4", - "# of connections: 12", - "# of connections: 9", - "# of connections: 9", - "# of connections: 4", - "# of connections: 6", - "# of connections: 3", - "# of connections: 6", - "# of connections: 11", - "# of connections: 13", - "# of connections: 10", - "# of connections: 9", - "# of connections: 11", - "# of connections: 11", - "# of connections: 6", - "# of connections: 6", - "# of connections: 12", - "# of connections: 7", - "# of connections: 10", - "# of connections: 10", - "# of connections: 6", - "# of connections: 9", - "# of connections: 12", - "# of connections: 7", - "# of connections: 9", - "# of connections: 8", - "# of connections: 9", - "# of connections: 11", - "# of connections: 13", - "# of connections: 12", - "# of connections: 3", - "# of connections: 8", - "# of connections: 12", - "# of connections: 17", - "# of connections: 9", - "# of connections: 3", - "# of connections: 6", - "# of connections: 7", - "# of connections: 4", - "# of connections: 4", - "# of connections: 8", - "# of connections: 12", - "# of connections: 17", - "# of connections: 11", - "# of connections: 10", - "# of connections: 5", - "# of connections: 9", - "# of connections: 4", - "# of connections: 14", - "# of connections: 13", - "# of connections: 12", - "# of connections: 9", - "# of connections: 6", - "# of connections: 6", - "# of connections: 8", - "# of connections: 11", - "# of connections: 8", - "# of connections: 12", - "# of connections: 3", - "# of connections: 13", - "# of connections: 4", - "# of connections: 6", - "# of connections: 8", - "# of connections: 3", - "# of connections: 7", - "# of connections: 7", - "# of connections: 3", - "# of connections: 12", - "# of connections: 9", - "# of connections: 4", - "# of connections: 4", - "# of connections: 11", - "# of connections: 8", - "# of connections: 7", - "# of connections: 10", - "# of connections: 9", - "# of connections: 15", - "# of connections: 11", - "# of connections: 7", - "# of connections: 5", - "# of connections: 6", - "# of connections: 10", - "# of connections: 4", - "# of connections: 3", - "# of connections: 12", - "# of connections: 12", - "# of connections: 5" - ], - "type": "scatter", - "x": [ - 0.4182243125490408, - 0.12286879065958844, - 0.6730431696885844, - 0.38165116541180344, - 0.6084965344664286, - 0.18155558675901884, - 0.7722862313192606, - 0.5368181409256901, - 0.8304626469521129, - 0.7924139234898422, - 0.8266354543284289, - 0.4023039585223629, - 0.5084198498293618, - 0.23992481624351925, - 0.2742000416622462, - 0.15570283642495664, - 0.07513674080757637, - 0.7247552078664479, - 0.2586357176925591, - 0.595945044435614, - 0.9428542201780316, - 0.03304679952258993, - 0.6013564651959642, - 0.1130639188502468, - 0.5531504465254558, - 0.1635981270944994, - 0.05512117222879742, - 0.32578353530864457, - 0.27440213390552737, - 0.2728250610713022, - 0.6346565064837861, - 0.6327007577432437, - 0.800297854626628, - 0.526779936668903, - 0.413948124857326, - 0.09276814106220677, - 0.662108954544855, - 0.07163295816605642, - 0.44119458804978295, - 0.7364515013041172, - 0.7827775151390383, - 0.9600359726880752, - 0.8511753697833563, - 0.05194805532761382, - 0.03187584930858911, - 0.07426685281627932, - 0.5257999712304688, - 0.9998698320754983, - 0.09471702229050472, - 0.6953901849658966, - 0.03446402354654854, - 0.9082570345357789, - 0.3740122792611037, - 0.977854801698089, - 0.5436816885151938, - 0.06202421257916635, - 0.8589937476561325, - 0.06879886671193436, - 0.19921682827804632, - 0.1823584228427031, - 0.37549158943196925, - 0.5433115547736789, - 0.37848025459696877, - 0.3821391536049519, - 0.7204214783753378, - 0.2955343345493908, - 0.09053866681881584, - 0.7181048560087516, - 0.10310287300704979, - 0.8247840830312709, - 0.1573630170264504, - 0.31305791514229697, - 0.298647499376007, - 0.3246624829381992, - 0.19852054651169693, - 0.3328704753356456, - 0.33203393677870674, - 0.5461279353327784, - 0.9636084967560627, - 0.9503884723051484, - 0.13747604708068628, - 0.3499260998923053, - 0.3181124346701171, - 0.89080246263295, - 0.9521646983336837, - 0.6776948411821848, - 0.0023771443647881974, - 0.7007214129943925, - 0.7188906153197968, - 0.47055154706870017, - 0.19043749918150743, - 0.5274116361492907, - 0.9162463356603696, - 0.7042334738295596, - 0.555788147264811, - 0.5805679633404117, - 0.587704695878027, - 0.916634041055854, - 0.7948577020793985, - 0.9210876029743161, - 0.834199864808296, - 0.5989925957177575, - 0.05973078995013337, - 0.5593951498649633, - 0.5229468203255856, - 0.22007362873840486, - 0.37301066653863624, - 0.8613129225222332, - 0.9663892923019699, - 0.2275256207367028, - 0.0852382135963593, - 0.0914406510425998, - 0.9425745666137786, - 0.3019474379086241, - 0.2619562675328274, - 0.48218022499136737, - 0.5293212253918783, - 0.41808707877840445, - 0.14711158829428328, - 0.42926818011737133, - 0.9694266665187994, - 0.4404718698088387, - 0.4277213938753692, - 0.7059759544943667, - 0.4611021425875542, - 0.13940667248499528, - 0.3393815448042514, - 0.6370268640561303, - 0.9851894520572745, - 0.3247821296168134, - 0.9186278106648778, - 0.18507593174525072, - 0.5845953849421676, - 0.44175944307536974, - 0.7255980413609877, - 0.6058132814274794, - 0.7703024251104211, - 0.47443124751760235, - 0.9573079778783831, - 0.0201693226965588, - 0.17086936775877049, - 0.5291812256005789, - 0.5621062195646831, - 0.2121217358781844, - 0.16862303760247477, - 0.8846357375826375, - 0.0875467755337247, - 0.9473667691929577, - 0.8541827253649632, - 0.3414075728554137, - 0.9005048863870916, - 0.3318561006769827, - 0.7408684543182315, - 0.6149491168624189, - 0.12355952994556385, - 0.08997327822205015, - 0.21535391032155426, - 0.8323549266756429, - 0.8385234321105272, - 0.9240127894624793, - 0.6802728591951641, - 0.25656414507004344, - 0.020212382594376965, - 0.32444561774289593, - 0.4564806171162211, - 0.838803404513024, - 0.6322124026692795, - 0.8505181106970376, - 0.0897773631019545, - 0.7607451357487841, - 0.02312833765025224, - 0.05596958524873419, - 0.3187675293980876, - 0.5191285820034173, - 0.4349682989231034, - 0.04781523934390508, - 0.014269300880037306, - 0.9636590456207981, - 0.8680862155815134, - 0.4363707938884992, - 0.20133087739958255, - 0.6234379896430121, - 0.6314926226168458, - 0.29978148854693865, - 0.33721825060791266, - 0.7518492361353024, - 0.4442228752887084, - 0.04237200971819888, - 0.5201251204037126, - 0.038579501382332126, - 0.9110645875753355, - 0.5593069337955722, - 0.8668565351624634, - 0.42077304608666055, - 0.5465171974419871, - 0.7333209824474588, - 0.4039327719907384, - 0.34114125407236195, - 0.01777064460825195, - 0.992283435751248 - ], - "y": [ - 0.09053726824382247, - 0.571085214777101, - 0.5199666766946885, - 0.33766327379542094, - 0.17196466768963936, - 0.17708608014427518, - 0.04649454781195783, - 0.37080565676900146, - 0.3602866247185619, - 0.9483925173875926, - 0.3061539627540061, - 0.9643804220706982, - 0.8336885167043149, - 0.5944498275635773, - 0.2373268562908326, - 0.23741932367240448, - 0.32127102230894566, - 0.3661437355856225, - 0.7791505090281524, - 0.3648985367210805, - 0.6244837238804738, - 0.9012137046519791, - 0.5219101415039136, - 0.39453602200590676, - 0.2009582712064717, - 0.04224314617430658, - 0.2381682330796122, - 0.33811323660241943, - 0.5216765314868881, - 0.6001026871900049, - 0.991844460003468, - 0.3343459796676115, - 0.8957623407464501, - 0.4208812619135248, - 0.31304614249644347, - 0.6773365837969099, - 0.4307004647175262, - 0.3309683982450944, - 0.2697998035002954, - 0.9727770125665405, - 0.40557198035837094, - 0.35532572275494023, - 0.5850986908522726, - 0.17296378957033465, - 0.6628083689885368, - 0.6160873747407943, - 0.025297953521542405, - 0.24028581536328997, - 0.5186581897030644, - 0.8423383207045981, - 0.7537809293531343, - 0.3192831323823997, - 0.17542400609184483, - 0.008409380348177398, - 0.938767234846119, - 0.24033413659841596, - 0.8791466031622056, - 0.5634679987017406, - 0.05938145280899054, - 0.6012106694454529, - 0.6705222836834548, - 0.3900960314334032, - 0.055894273053114896, - 0.14933184162295132, - 0.8151159149468827, - 0.17619771419691865, - 0.2981410655965283, - 0.7334929583472656, - 0.04283815208078323, - 0.922341377568881, - 0.3199684158322815, - 0.1278305132468397, - 0.21532966919867302, - 0.0731473655342364, - 0.4898861106787329, - 0.2695720924906413, - 0.09533319097359638, - 0.9874110419208606, - 0.1333966979371528, - 0.2529891644068947, - 0.45431497833000367, - 0.24454670425362057, - 0.20002447568886628, - 0.6267294109959968, - 0.5221172076712435, - 0.6512622326935055, - 0.3355480553373167, - 0.4834545718278357, - 0.4847615611240751, - 0.20619722773579274, - 0.9419075807648644, - 0.3098874271134545, - 0.04149975738749545, - 0.32266487999330984, - 0.4295667428124167, - 0.35350564895305514, - 0.15069304516745607, - 0.41535454584101794, - 0.8821215709600496, - 0.9542382277667263, - 0.7205270186163313, - 0.31541428705224306, - 0.010366221042083845, - 0.06016942899581168, - 0.8867112408398291, - 0.5204579980957379, - 0.4500538798110242, - 0.40395348439090084, - 0.5717872069066212, - 0.42052616285893474, - 0.2660491488293679, - 0.10782775946098799, - 0.7302384542961842, - 0.8520196094107113, - 0.4140065537970282, - 0.4467311570808764, - 0.5144551437666581, - 0.7454337953380579, - 0.03395115206665145, - 0.7077207700167599, - 0.9024846524956353, - 0.055897802218322856, - 0.2880647319459674, - 0.9328536520894143, - 0.9298960866412943, - 0.6352288779182178, - 0.04153202488293273, - 0.7834166246251234, - 0.6710484758334021, - 0.3202314429055858, - 0.9961038345306213, - 0.4107398412471005, - 0.24013807075121119, - 0.8599268392047722, - 0.4318165589087314, - 0.2693681584998491, - 0.3340702546567942, - 0.4421375373865315, - 0.5492873750243871, - 0.5981086798045652, - 0.3635517670405215, - 0.42641694849778966, - 0.3333136626479075, - 0.8848427298858184, - 0.6648266103848882, - 0.9344432405222354, - 0.249116699886752, - 0.6201266549140614, - 0.16781555203357146, - 0.4937592635708411, - 0.4248880785102581, - 0.9435179236599912, - 0.07011604000159166, - 0.9078978130468089, - 0.6072525121642058, - 0.23700988477155205, - 0.05477321631284726, - 0.9642772106357639, - 0.019989772968585173, - 0.29119156039108685, - 0.6217058876501556, - 0.1341994714416056, - 0.28871122138225125, - 0.06013197669987258, - 0.22993075379681738, - 0.5752985482362863, - 0.10059463740220753, - 0.8157570218353161, - 0.42203254876563234, - 0.7925454632595156, - 0.2002886163837997, - 0.8622415881936324, - 0.3169605131706372, - 0.6014235590484225, - 0.9958360522915445, - 0.5260776190209286, - 0.8350595230795331, - 0.24102842320743, - 0.19048093242734687, - 0.7005910562446783, - 0.29050814087118004, - 0.8508124987550889, - 0.6714278208298593, - 0.9756800437762957, - 0.040563128366188694, - 0.5097617399826666, - 0.8842114977564064, - 0.13201947050262697, - 0.09959517902538939, - 0.2318219208408404, - 0.888980486534156, - 0.9513646744432486, - 0.1294716874165911, - 0.5603277981830703, - 0.4868902788925622, - 0.038844634468288675, - 0.547451424618544, - 0.32345881810688737, - 0.20307680326083377, - 0.9810704436128125 - ] - } - ], - "layout": { - "annotations": [ - { - "showarrow": true, - "text": "graphs", - "x": 0.005, - "xref": "paper", - "y": -0.002, - "yref": "paper" - } - ], - "hovermode": "closest", - "margin": { - "b": 20, - "l": 5, - "r": 5, - "t": 40 - }, - "showlegend": false, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "#E5ECF6", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "white", - "linecolor": "white", - "minorgridcolor": "white", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "#E5ECF6", - "showlakes": true, - "showland": true, - "subunitcolor": "white" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "#E5ECF6", - "polar": { - "angularaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "radialaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "yaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - }, - "zaxis": { - "backgroundcolor": "#E5ECF6", - "gridcolor": "white", - "gridwidth": 2, - "linecolor": "white", - "showbackground": true, - "ticks": "", - "zerolinecolor": "white" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "baxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - }, - "bgcolor": "#E5ECF6", - "caxis": { - "gridcolor": "white", - "linecolor": "white", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "white", - "linecolor": "white", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "white", - "zerolinewidth": 2 - } - } - }, - "title": { - "font": { - "size": 16 - }, - "text": "Network graph made with Python" - }, - "xaxis": { - "showgrid": false, - "showticklabels": false, - "zeroline": false - }, - "yaxis": { - "showgrid": false, - "showticklabels": false, - "zeroline": false - } - } - }, - "text/html": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "G = nx.random_geometric_graph(200, 0.125)\n", - "edge_x = []\n", - "edge_y = []\n", - "for edge in G.edges():\n", - " x0, y0 = G.nodes[edge[0]]['pos']\n", - " x1, y1 = G.nodes[edge[1]]['pos']\n", - " edge_x.append(x0)\n", - " edge_x.append(x1)\n", - " edge_x.append(None)\n", - " edge_y.append(y0)\n", - " edge_y.append(y1)\n", - " edge_y.append(None)\n", - "\n", - "edge_trace = go.Scatter(\n", - " x=edge_x, y=edge_y,\n", - " line=dict(width=0.5, color='#888'),\n", - " hoverinfo='none',\n", - " mode='lines')\n", - "\n", - "node_x = []\n", - "node_y = []\n", - "for node in G.nodes():\n", - " x, y = G.nodes[node]['pos']\n", - " node_x.append(x)\n", - " node_y.append(y)\n", - "\n", - "node_trace = go.Scatter(\n", - " x=node_x, y=node_y,\n", - " mode='markers',\n", - " hoverinfo='text',\n", - " marker=dict(\n", - " showscale=True,\n", - " # colorscale options\n", - " #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |\n", - " #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |\n", - " #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |\n", - " colorscale='YlGnBu',\n", - " reversescale=True,\n", - " color=[],\n", - " size=10,\n", - " colorbar=dict(\n", - " thickness=15,\n", - " title='Node Connections',\n", - " xanchor='left',\n", - " titleside='right'\n", - " ),\n", - " line_width=2))\n", - "\n", - "node_adjacencies = []\n", - "node_text = []\n", - "for node, adjacencies in enumerate(G.adjacency()):\n", - " node_adjacencies.append(len(adjacencies[1]))\n", - " node_text.append('# of connections: '+str(len(adjacencies[1])))\n", - "\n", - "node_trace.marker.color = node_adjacencies\n", - "node_trace.text = node_text\n", - "\n", - "\n", - "fig = go.Figure(data=[edge_trace, node_trace],\n", - " layout=go.Layout(\n", - " title='Network graph made with Python',\n", - " titlefont_size=16,\n", - " showlegend=False,\n", - " hovermode='closest',\n", - " margin=dict(b=20,l=5,r=5,t=40),\n", - " annotations=[ dict(\n", - " text=\"graphs\",\n", - " showarrow=True,\n", - " xref=\"paper\", yref=\"paper\",\n", - " x=0.005, y=-0.002 ) ],\n", - " xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),\n", - " yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))\n", - " )\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "G = nx.Graph()\n", - "G.add_node(0)\n", - "nx.set_node_attributes(G, \"red\", name=\"color\")\n", - "nx.set_node_attributes(G, 2, name=\"size\")\n", - "G.add_node(1)\n", - "nx.set_node_attributes(G, np.nan, name='color')\n", - "G.nodes[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "ename": "NetworkXError", - "evalue": "Invalid edge_attr argument: ['donations', 'received']", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/core/indexes/base.py:3653\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3652\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3653\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3654\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/_libs/index.pyx:147\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/_libs/index.pyx:176\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'donations'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/convert_matrix.py:455\u001b[0m, in \u001b[0;36mfrom_pandas_edgelist\u001b[0;34m(df, source, target, edge_attr, create_using, edge_key)\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 455\u001b[0m attribute_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[43m[\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcol\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mattr_col_headings\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/convert_matrix.py:455\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 455\u001b[0m attribute_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;241m*\u001b[39m[\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m attr_col_headings])\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/core/frame.py:3761\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3760\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 3761\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3762\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/pandas/core/indexes/base.py:3655\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3654\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3655\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3658\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3659\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n", - "\u001b[0;31mKeyError\u001b[0m: 'donations'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mNetworkXError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m G \u001b[38;5;241m=\u001b[39m \u001b[43mnx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas_edgelist\u001b[49m\u001b[43m(\u001b[49m\u001b[43msample_df\u001b[49m\u001b[43m,\u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mname\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mtarget\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdonations_to\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43medge_attr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdonations\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mreceived\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m G\u001b[38;5;241m.\u001b[39mnodes()\n\u001b[1;32m 3\u001b[0m pos\u001b[38;5;241m=\u001b[39mnx\u001b[38;5;241m.\u001b[39mspring_layout(G)\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/utils/backends.py:412\u001b[0m, in \u001b[0;36m_dispatch.__call__\u001b[0;34m(self, backend, *args, **kwargs)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m/\u001b[39m, \u001b[38;5;241m*\u001b[39margs, backend\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m backends:\n\u001b[1;32m 411\u001b[0m \u001b[38;5;66;03m# Fast path if no backends are installed\u001b[39;00m\n\u001b[0;32m--> 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43morig_func\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;66;03m# Use `backend_name` in this function instead of `backend`\u001b[39;00m\n\u001b[1;32m 415\u001b[0m backend_name \u001b[38;5;241m=\u001b[39m backend\n", - "File \u001b[0;32m~/miniconda3/envs/climate_cabinet/lib/python3.11/site-packages/networkx/convert_matrix.py:458\u001b[0m, in \u001b[0;36mfrom_pandas_edgelist\u001b[0;34m(df, source, target, edge_attr, create_using, edge_key)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mKeyError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 457\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid edge_attr argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00medge_attr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 458\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m nx\u001b[38;5;241m.\u001b[39mNetworkXError(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 460\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m g\u001b[38;5;241m.\u001b[39mis_multigraph():\n\u001b[1;32m 461\u001b[0m \u001b[38;5;66;03m# => append the edge keys from the df to the bundled data\u001b[39;00m\n\u001b[1;32m 462\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m edge_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "\u001b[0;31mNetworkXError\u001b[0m: Invalid edge_attr argument: ['donations', 'received']" - ] - } - ], - "source": [ - "G = nx.from_pandas_edgelist(sample_df,source='name',target='donations_to',edge_attr=['donations','received'])\n", - "G.nodes()\n", - "pos=nx.spring_layout(G)\n", - "weights = list(nx.get_edge_attributes(G,'donations').values())\n", - "weights = [i/5000 for i in weights]\n", - "node_color = [G.degree(v) for v in G] \n", - "#node_size = [0.0005 * nx.get_node_attributes(G, 'donations')[v] for v in G] \n", - "nx.draw_networkx_nodes(G, pos, node_color=node_color)#, node_size=node_size) \n", - "nx.draw_networkx_edges(G, pos, width=weights)\n", - "nx.draw_networkx_labels(G, pos)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# fixing the size of the figure \n", - "plt.figure(figsize =(10, 7)) \n", - "\n", - "node_color = [G.degree(v) for v in G] \n", - "# node colour is a list of degrees of nodes \n", - "\n", - "node_size = [0.0005 * nx.get_node_attributes(G, 'population')[v] for v in G] \n", - "# size of node is a list of population of cities \n", - "\n", - "edge_width = [0.0015 * G[u][v]['weight'] for u, v in G.edges()] \n", - "# width of edge is a list of weight of edges \n", - "\n", - "nx.draw_networkx(G, node_size = node_size, \n", - "\t\t\t\tnode_color = node_color, alpha = 0.7, \n", - "\t\t\t\twith_labels = True, width = edge_width, \n", - "\t\t\t\tedge_color ='.4', cmap = plt.cm.Blues) \n", - "\n", - "plt.axis('off') \n", - "plt.tight_layout(); " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "G = nx.MultiDiGraph()\n", - "G.add_node(0)\n", - "nx.set_node_attributes(G, \"red\", name=\"color\")\n", - "nx.set_node_attributes(G, 4, name = 'size')\n", - "G.add_node(2)\n", - "nx.set_node_attributes(G, \"white\", name='color')\n", - "G.nodes[2]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "G.add_node(2)\n", - "nx.set_node_attributes(G, 4, name='age')\n", - "G.nodes[2]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "climate_cabinet", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 0f7d07ef00da198d3aa9e28b0ddcbdb830270dba Mon Sep 17 00:00:00 2001 From: Avery Schoen <33437601+averyschoen@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:07:31 -0600 Subject: [PATCH 202/214] Update Makefile --- Makefile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 273bc9c0..07383c3c 100644 --- a/Makefile +++ b/Makefile @@ -19,11 +19,6 @@ project_dir := "$(current_abs_path)" build-only: docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) - # these are called directives - # run-pipeline: - # docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) - # docker run -e python pipeline.py - run-interactive: docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) docker run -it -v $(current_abs_path):/project -t $(project_image_name) /bin/bash @@ -39,4 +34,4 @@ run-notebooks: #still waiting on linkage_pipeline completion to get this into final shape output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv - python linkage_pipeline.py \ No newline at end of file + python linkage_pipeline.py From 0a3b4e78d72489494e15a2fc0bd0ebc98a9a60b7 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 18:53:22 -0600 Subject: [PATCH 203/214] slight modifications to linkage.py for cleaning purposes --- utils/linkage.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 32a44dfc..964ba31e 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -39,7 +39,6 @@ def get_address_line_1_from_full_address(address: str) -> str: ... ) '1415 PARKER STREET' """ - pass address_tuples = usaddress.parse( address @@ -137,7 +136,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: >>> get_likely_name("Jane","","Doe, Jane, Elisabeth") 'Jane Elisabeth Doe' """ - # first, convert any Nans to empty strings '' + # first, convert any NaNs to empty strings '' first_name, last_name, full_name = [ "" if x is np.NAN else x for x in [first_name, last_name, full_name] ] @@ -151,8 +150,7 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: if first_name + " " + last_name == full_name: return full_name.title() - # some names have titles or professions associated with the name. We need to - # remove those from the name. + # remove titles or professions from the name names = [first_name, last_name, full_name] for i in range(len(names)): @@ -278,7 +276,7 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: # first remove all duplicate entries: new_df = df.drop_duplicates() - # now find the duplicates along all columns but the ID + # find the duplicates along all columns but the id new_df = ( new_df.groupby(df.columns.difference(["id"]).tolist(), dropna=False)[ "id" @@ -289,7 +287,7 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: ) new_df.index = new_df["duplicated"].str[0].tolist() - # now convert the duplicated column into a dictionary that can will be + # convert the duplicated column into a dictionary that can will be # an output by only feeding the entries with duplicates new_df = new_df.reset_index().rename(columns={"index": "id"}) convert_duplicates_to_dict(new_df[["id", "duplicated"]]) @@ -299,8 +297,8 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: def cleaning_company_column(company_entry: str) -> str: """ - Given a string, check if it contains a variation of self employed, unemployed, - or retired and return the standardized version. + Given a string, check if it contains a variation of self employed, + unemployed, or retired and return the standardized version. Args: company: string of inputted company names @@ -376,7 +374,7 @@ def standardize_corp_names(company_name: str) -> str: """ - company_name_split = company_name.upper().split(" ") + company_name_split = company_name.title().split(" ") for i in range(len(company_name_split)): if company_name_split[i] in list(COMPANY_TYPES.keys()): @@ -419,7 +417,7 @@ def get_address_number_from_address_line_1(address_line_1: str) -> str: return address_line_1_components[i][0] elif address_line_1_components[i][1] == "USPSBoxID": return address_line_1_components[i][0] - raise ValueError("Can not find Address Number") + raise ValueError("Cannot find Address Number") def splink_dedupe( @@ -478,10 +476,10 @@ def splink_dedupe( on="cluster_id", how="left", ) - deduped_df.rename(columns={"cluster_id": "unique_id"}, inplace=True) + deduped_df = deduped_df.rename(columns={"cluster_id": "unique_id"}) convert_duplicates_to_dict(deduped_df) - deduped_df.drop(columns=["duplicated"]) + deduped_df = deduped_df.drop(columns=["duplicated"]) return deduped_df From 9f980ff31bcd3b5d97e6c8e3b32290349f5b6114 Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 18:58:12 -0600 Subject: [PATCH 204/214] slight modifications to linkage.py for cleaning purposes, now passing linter --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 964ba31e..5b63e7e3 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -297,7 +297,7 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: def cleaning_company_column(company_entry: str) -> str: """ - Given a string, check if it contains a variation of self employed, + Given a string, check if it contains a variation of self employed, unemployed, or retired and return the standardized version. Args: From 7ebe2a26e3f8c18ec3f9aebd8aeae9872cfa1050 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 01:38:37 +0000 Subject: [PATCH 205/214] slight changes --- utils/linkage_pipeline.py | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/utils/linkage_pipeline.py b/utils/linkage_pipeline.py index 499726e9..b9a87fe8 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_pipeline.py @@ -19,6 +19,7 @@ splink_dedupe, standardize_corp_names, ) +from utils.network import construct_network_graph def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: @@ -95,6 +96,18 @@ def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: axis=1, ) + individuals["sort_priority"] = ( + ~individuals["first_name"].isna() + & ~individuals["last_name"].isna() + & ~individuals["company"].isna() + ) * 2 + (~individuals["party"].isna()) + + individuals = individuals.sort_values( + by="sort_priority", ascending=False + ).drop(columns=["sort_priority"]) + + individuals["unique_id"] = individuals["id"] + return individuals @@ -112,6 +125,8 @@ def preprocess_organizations(organizations: pd.DataFrame) -> pd.DataFrame: .apply(standardize_corp_names) ) + organizations["unique_id"] = organizations["id"] + return organizations @@ -131,6 +146,11 @@ def preprocess_transactions(transactions: pd.DataFrame) -> pd.DataFrame: transactions["purpose"] = transactions["purpose"].str.upper() + deduped = pd.read_csv(BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv") + transactions[["donor_id", "recipient_id"]] = transactions[ + ["donor_id", "recipient_id"] + ].replace(deduped) + return transactions @@ -149,17 +169,13 @@ def main(): individuals = preprocess_individuals(individuals) organizations = preprocess_organizations(organizations) - transactions = preprocess_transactions(transactions) individuals, organizations = classify_wrapper(individuals, organizations) individuals = deduplicate_perfect_matches(individuals) organizations = deduplicate_perfect_matches(organizations) - deduped = pd.read_csv(BASE_FILEPATH / "output" / "deduplicated_UUIDs.csv") - - individuals["unique_id"] = individuals["id"] - organizations["unique_id"] = organizations["id"] + transactions = preprocess_transactions(transactions) organizations = splink_dedupe( organizations, organizations_settings, organizations_blocking @@ -169,10 +185,6 @@ def main(): individuals, individuals_settings, individuals_blocking ) - transactions[["donor_id", "recipient_id"]] = transactions[ - ["donor_id", "recipient_id"] - ].replace(deduped) - cleaned_individuals_output_path = ( BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" ) @@ -189,6 +201,10 @@ def main(): organizations.to_csv(cleaned_organizations_output_path, index=False) transactions.to_csv(cleaned_transactions_output_path, index=False) + construct_network_graph( + 2018, 2024, [individuals, organizations, transactions] + ) + if __name__ == "__main__": main() From 51f82d436cd693744525d421fc6d3f038c41e7ad Mon Sep 17 00:00:00 2001 From: Alan Mburu Kagiri Date: Tue, 5 Mar 2024 19:40:36 -0600 Subject: [PATCH 206/214] revert changes to standardize_corp_names...the logic goes through many channels --- utils/linkage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/linkage.py b/utils/linkage.py index 5b63e7e3..c89b5818 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -374,7 +374,7 @@ def standardize_corp_names(company_name: str) -> str: """ - company_name_split = company_name.title().split(" ") + company_name_split = company_name.upper().split(" ") for i in range(len(company_name_split)): if company_name_split[i] in list(COMPANY_TYPES.keys()): From 9a0352151fb451ddf1846e89d2948921e3bee149 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 01:44:30 +0000 Subject: [PATCH 207/214] renaming file --- utils/{linkage_pipeline.py => linkage_and_network_pipeline.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename utils/{linkage_pipeline.py => linkage_and_network_pipeline.py} (99%) diff --git a/utils/linkage_pipeline.py b/utils/linkage_and_network_pipeline.py similarity index 99% rename from utils/linkage_pipeline.py rename to utils/linkage_and_network_pipeline.py index b9a87fe8..134d5f2d 100644 --- a/utils/linkage_pipeline.py +++ b/utils/linkage_and_network_pipeline.py @@ -202,7 +202,7 @@ def main(): transactions.to_csv(cleaned_transactions_output_path, index=False) construct_network_graph( - 2018, 2024, [individuals, organizations, transactions] + 2018, 2023, [individuals, organizations, transactions] ) From d4161f61db0df0e24bc3bd002999ceca6b0f0c70 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 02:07:32 +0000 Subject: [PATCH 208/214] updating functions to latest versions --- utils/linkage.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/utils/linkage.py b/utils/linkage.py index 28f12dd4..5791da59 100644 --- a/utils/linkage.py +++ b/utils/linkage.py @@ -137,7 +137,12 @@ def get_likely_name(first_name: str, last_name: str, full_name: str) -> str: >>> get_likely_name("Jane","","Doe, Jane, Elisabeth") 'Jane Elisabeth Doe' """ - # first ensure clean input by deleting spaces: + # first, convert any Nans to empty strings '' + first_name, last_name, full_name = [ + "" if x is np.NAN else x for x in [first_name, last_name, full_name] + ] + + # second, ensure clean input by deleting spaces: first_name, last_name, full_name = list( map(lambda x: x.lower().strip(), [first_name, last_name, full_name]) ) @@ -220,21 +225,23 @@ def get_street_from_address_line_1(address_line_1: str) -> str: def convert_duplicates_to_dict(df: pd.DataFrame) -> None: - """Saves to the "output" directory a file where each row represents a string - matching to another string + """For each uuid, maps it to all other uuids for which it has been deemed a + match. - Given a dataframe where each row contains one string in a column and a list - of strings in another column, the function maps each string in the list to - the single string. + Given a dataframe where the uuids of all rows deemed similar are stored in a + list and all but the first row of each paired uuid is dropped, this function + maps the matched uuids to a single uuid. Args: - A pandas dataframe + A pandas df containing a column called 'duplicated', where each row is a + list of all uuids deemed a match. In each list, all uuids but the first + have their rows already dropped. Returns None. However it outputs a file to the output directory, with 2 - columns. The first, which indicates the duplicated UUIDs, is labeled - 'duplicated_uuids', and the 2nd, which shows the uuids to which the - deduplicated entries match to, is labeled 'mapped_uuids'. + columns. The first lists all the uuids in df, and is labeled + 'original_uuids.' The 2nd shows the uuids to which each entry is mapped + to, and is labeled 'mapped_uuids'. """ deduped_dict = {} for i in range(len(df)): @@ -245,7 +252,7 @@ def convert_duplicates_to_dict(df: pd.DataFrame) -> None: # now convert dictionary into a csv file deduped_df = pd.DataFrame.from_dict(deduped_dict, "index") deduped_df = deduped_df.reset_index().rename( - columns={"index": "duplicated_uuids", 0: "mapped_uuids"} + columns={"index": "original_uuids", 0: "mapped_uuid"} ) deduped_df.to_csv( repo_root / "output" / "deduplicated_UUIDs.csv", @@ -273,7 +280,9 @@ def deduplicate_perfect_matches(df: pd.DataFrame) -> pd.DataFrame: # now find the duplicates along all columns but the ID new_df = ( - new_df.groupby(df.columns[1:].tolist(), dropna=False)["id"] + new_df.groupby(df.columns.difference(["id"]).tolist(), dropna=False)[ + "id" + ] .agg(list) .reset_index() .rename(columns={"id": "duplicated"}) From 45347e26a39b50a951a69e2c77c50d16b1fd0bfc Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 02:09:14 +0000 Subject: [PATCH 209/214] slight changes to match function changes in linkage.py --- utils/linkage_and_network_pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/linkage_and_network_pipeline.py b/utils/linkage_and_network_pipeline.py index 134d5f2d..bd6bcfbd 100644 --- a/utils/linkage_and_network_pipeline.py +++ b/utils/linkage_and_network_pipeline.py @@ -89,13 +89,13 @@ def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: individuals["full_name"] = individuals.apply( lambda row: get_likely_name( - row["first_name"] if pd.notnull(row["first_name"]) else "", - row["last_name"] if pd.notnull(row["last_name"]) else "", - row["full_name"] if pd.notnull(row["full_name"]) else "", + row["first_name"], row["last_name"], row["full_name"] ), axis=1, ) + # Ensure that columns with values are prioritized and appear first + # important for splink implementation individuals["sort_priority"] = ( ~individuals["first_name"].isna() & ~individuals["last_name"].isna() From ad2ed0f5e9a30bd5c246c01ab4e3d4550a8f3dc3 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 18:36:38 +0000 Subject: [PATCH 210/214] slight changes --- Makefile | 5 ++-- notebooks/Test.ipynb | 39 --------------------------- setup.py | 2 +- utils/linkage_and_network_pipeline.py | 34 +++++++++++++---------- 4 files changed, 24 insertions(+), 56 deletions(-) delete mode 100644 notebooks/Test.ipynb diff --git a/Makefile b/Makefile index 07383c3c..48879489 100644 --- a/Makefile +++ b/Makefile @@ -33,5 +33,6 @@ run-notebooks: #running the linkage pipeline and creating the network graph #still waiting on linkage_pipeline completion to get this into final shape -output network_graph: all_individuals.csv all_organizations.csv all_transactions.csv - python linkage_pipeline.py +run-linkage-and-network-pipeline: + docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) + docker run -v $(current_abs_path):/project -t $(project_image_name) python utils/linkage_pipeline.py \ No newline at end of file diff --git a/notebooks/Test.ipynb b/notebooks/Test.ipynb deleted file mode 100644 index 5df942e1..00000000 --- a/notebooks/Test.ipynb +++ /dev/null @@ -1,39 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Example Notebook file demonstrating how to use the file structure\n", - "from utils.preprocess_util_lib_example import save_random_dataframe\n", - "from pathlib import Path\n", - "\n", - "save_random_dataframe(Path(\"../output\"), Path(\"test.csv\"))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/setup.py b/setup.py index 63ef672a..07404acd 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages, setup setup( - name="2023-fall-clinic-climate-cabinet", + name="2024-winter-clinic-climate-cabinet", version="0.1.0", packages=find_packages( include=[ diff --git a/utils/linkage_and_network_pipeline.py b/utils/linkage_and_network_pipeline.py index bd6bcfbd..7e5f8cec 100644 --- a/utils/linkage_and_network_pipeline.py +++ b/utils/linkage_and_network_pipeline.py @@ -1,3 +1,4 @@ +import networkx as nx import pandas as pd from nameparser import HumanName @@ -19,7 +20,11 @@ splink_dedupe, standardize_corp_names, ) -from utils.network import construct_network_graph +from utils.network import ( + create_network_graph, + combine_datasets_for_network_graph, + construct_network_graph, +) def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: @@ -102,9 +107,9 @@ def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: & ~individuals["company"].isna() ) * 2 + (~individuals["party"].isna()) - individuals = individuals.sort_values( - by="sort_priority", ascending=False - ).drop(columns=["sort_priority"]) + individuals = individuals.sort_values(by="sort_priority", ascending=False).drop( + columns=["sort_priority"] + ) individuals["unique_id"] = individuals["id"] @@ -159,9 +164,7 @@ def main(): BASE_FILEPATH / "data" / "complete_organizations_table.csv" ) - individuals = pd.read_csv( - BASE_FILEPATH / "data" / "complete_individuals_table.csv" - ) + individuals = pd.read_csv(BASE_FILEPATH / "data" / "complete_individuals_table.csv") transactions = pd.read_csv( BASE_FILEPATH / "data" / "complete_transactions_table.csv" @@ -175,15 +178,13 @@ def main(): individuals = deduplicate_perfect_matches(individuals) organizations = deduplicate_perfect_matches(organizations) - transactions = preprocess_transactions(transactions) - organizations = splink_dedupe( organizations, organizations_settings, organizations_blocking ) - individuals = splink_dedupe( - individuals, individuals_settings, individuals_blocking - ) + individuals = splink_dedupe(individuals, individuals_settings, individuals_blocking) + + transactions = preprocess_transactions(transactions) cleaned_individuals_output_path = ( BASE_FILEPATH / "output" / "cleaned_individuals_table.csv" @@ -201,9 +202,14 @@ def main(): organizations.to_csv(cleaned_organizations_output_path, index=False) transactions.to_csv(cleaned_transactions_output_path, index=False) - construct_network_graph( - 2018, 2023, [individuals, organizations, transactions] + aggreg_df = combine_datasets_for_network_graph( + [individuals, organizations, transactions] ) + g = create_network_graph(aggreg_df) + g_output_path = BASE_FILEPATH / "output" / "g.gml" + nx.write_graphml(g, g_output_path) + + construct_network_graph(2018, 2023, [individuals, organizations, transactions]) if __name__ == "__main__": From 4b0de47ef040c04e6adc78409e272fb04e132129 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 18:48:41 +0000 Subject: [PATCH 211/214] readme changes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a610a4ec..da6bba26 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ If you prefer to develop inside a container with VS Code then do the following s ### Record Linkage and Network Pipeline 1. Save the standardized tables "complete_individuals_table.csv", "complete_organizations_table.csv", and "complete_transactions_table.csv" (collected from the above pipeline or data from the project's Google Drive) in the following format: repo_root / "output" / "file" -2. **UPDATE:** Run the pipeline by calling ```make run-linkage-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, convert the standardized tables into a NetworkX Graph, and show an interactive network visual. +2. **UPDATE:** Run the pipeline by calling ```make run-linkage-and-network-pipeline```. This pipeline will perform conservative record linkage, attempt to classify entities as neutral, fossil fuels, or clean energy, convert the standardized tables into a NetworkX Graph, and show an interactive network visual. 3. The pipeline will output the deduplicated tables saved as "cleaned_individuals_table.csv", "cleaned_organizations_table.csv", and "cleaned_transactions_table.csv". A mapping file, "deduplicated_UUIDs" tracks the UUIDs designated as duplicates. The pipeline will also output "Network Graph Node Data", which is the NetworkX Graph object converted into an adjecency list. ## Repository Structure From 0c7902394bce4864e984eab13305ba99855a86dd Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 18:50:36 +0000 Subject: [PATCH 212/214] data/ readme changes --- data/README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/data/README.md b/data/README.md index df9336b7..5326bff8 100644 --- a/data/README.md +++ b/data/README.md @@ -2,12 +2,6 @@ This directory contains information for use in this project. -## Makefile and Final Pipeline -- This folder is empty by default. In order to run the Makefile, download the complete_data.zip file from the Climate Cabinet Data Clinic Google Drive using this link: https://drive.google.com/file/d/1zbjt7iBU0NAWSBcUyEsjvuumn3VgI4z9/view?usp=sharing - - - After downloading this .zip, unzip it to find three files: complete_individuals.csv, complete_organizations.csv, and complete_transactions.csv. Upload these files into the data folder and ensure that their names are correct. Once they are in place, you may run the Makefile. - - ## Arizona Campaign Finance Data ### Summary From 48470c21eb14ca164516f241ab5d2646f008a318 Mon Sep 17 00:00:00 2001 From: Adil Kassim Date: Wed, 6 Mar 2024 18:51:15 +0000 Subject: [PATCH 213/214] pre-commit formatting changes --- utils/linkage_and_network_pipeline.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/utils/linkage_and_network_pipeline.py b/utils/linkage_and_network_pipeline.py index 7e5f8cec..86e0ab62 100644 --- a/utils/linkage_and_network_pipeline.py +++ b/utils/linkage_and_network_pipeline.py @@ -21,9 +21,9 @@ standardize_corp_names, ) from utils.network import ( - create_network_graph, combine_datasets_for_network_graph, construct_network_graph, + create_network_graph, ) @@ -107,9 +107,9 @@ def preprocess_individuals(individuals: pd.DataFrame) -> pd.DataFrame: & ~individuals["company"].isna() ) * 2 + (~individuals["party"].isna()) - individuals = individuals.sort_values(by="sort_priority", ascending=False).drop( - columns=["sort_priority"] - ) + individuals = individuals.sort_values( + by="sort_priority", ascending=False + ).drop(columns=["sort_priority"]) individuals["unique_id"] = individuals["id"] @@ -164,7 +164,9 @@ def main(): BASE_FILEPATH / "data" / "complete_organizations_table.csv" ) - individuals = pd.read_csv(BASE_FILEPATH / "data" / "complete_individuals_table.csv") + individuals = pd.read_csv( + BASE_FILEPATH / "data" / "complete_individuals_table.csv" + ) transactions = pd.read_csv( BASE_FILEPATH / "data" / "complete_transactions_table.csv" @@ -182,7 +184,9 @@ def main(): organizations, organizations_settings, organizations_blocking ) - individuals = splink_dedupe(individuals, individuals_settings, individuals_blocking) + individuals = splink_dedupe( + individuals, individuals_settings, individuals_blocking + ) transactions = preprocess_transactions(transactions) @@ -209,7 +213,9 @@ def main(): g_output_path = BASE_FILEPATH / "output" / "g.gml" nx.write_graphml(g, g_output_path) - construct_network_graph(2018, 2023, [individuals, organizations, transactions]) + construct_network_graph( + 2018, 2023, [individuals, organizations, transactions] + ) if __name__ == "__main__": From bee198affb27f50a4c58a968d7691ade72332fd0 Mon Sep 17 00:00:00 2001 From: Adil Kassim <76892521+adilkassim@users.noreply.github.com> Date: Wed, 6 Mar 2024 13:41:18 -0600 Subject: [PATCH 214/214] Update Makefile updating filename in makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 48879489..c2553e2d 100644 --- a/Makefile +++ b/Makefile @@ -35,4 +35,4 @@ run-notebooks: run-linkage-and-network-pipeline: docker build -t $(project_image_name) -f Dockerfile $(current_abs_path) - docker run -v $(current_abs_path):/project -t $(project_image_name) python utils/linkage_pipeline.py \ No newline at end of file + docker run -v $(current_abs_path):/project -t $(project_image_name) python utils/linkage_and_network_pipeline.py