Skip to content

Commit

Permalink
Merge pull request #1271 from cityofaustin/md-15550-complete-data-tra…
Browse files Browse the repository at this point in the history
…cker-sync

Data Tracker sync continued
  • Loading branch information
mddilley authored Feb 8, 2024
2 parents 76cace6 + eb9ab85 commit f502b5a
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 3 deletions.
1 change: 1 addition & 0 deletions moped-etl/data-tracker-sync/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
env_file
env_file_production
.git%
__pycache__
1 change: 1 addition & 0 deletions moped-etl/data-tracker-sync/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*env
env_file_production
10 changes: 10 additions & 0 deletions moped-etl/data-tracker-sync/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,13 @@ Data Tracker app and you should see that the title has been updated with your ed
This script can also be run from the local Airflow stack. The secrets from the `development` sections
of the **Knack AMD Data Tracker** and **Moped Hasura Admin** entries will be used automatically in the
DAG.

### Sync evaluation script

On first deployment of this ETL, duplicate records were created in the Knack `projects` table. The `sync_evaluation.py` script:

- Gathers all Knack project record IDs stored in the `moped_project` table rows in the `knack_project_id` column
- Gather all Knack record IDs of all rows in the Data Tracker `projects` table
- Evaluates the overlap (synced correctly) and difference (mark for deletion) of these two lists
- Evaluates the list of differences to make sure there are no connections between Knack records marked for deletion and `work_order_signals` records
- Deletes the records with no connections and logs records with connections that were not deleted
12 changes: 9 additions & 3 deletions moped-etl/data-tracker-sync/data_tracker_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def find_unsynced_moped_projects(is_test=False):
data = make_hasura_request(query=GET_UNSYNCED_PROJECTS)
unsynced_projects = data["moped_project"]

logger.info(f"Found {len(unsynced_projects)} unsynced projects")
logger.info(
f"Found {len(unsynced_projects)} unsynced projects to create in Data Tracker"
)
logger.debug(f"Found unsynced projects: {unsynced_projects}")
return unsynced_projects

Expand Down Expand Up @@ -138,7 +140,9 @@ def find_synced_moped_projects(last_run_date, is_test=False):
)
synced_projects = data["moped_project"]

logger.info(f"Found {len(synced_projects)} synced projects")
logger.info(
f"Found {len(synced_projects)} synced projects to update in Data Tracker"
)
logger.debug(f"Found synced projects: {synced_projects}")
return synced_projects

Expand Down Expand Up @@ -179,6 +183,7 @@ def main(args):
unsynced_moped_projects = find_unsynced_moped_projects(is_test=args.test)

# Create a Knack project for each unsynced Moped project
logger.info(f"Creating Knack records...")
created_knack_records = []
for project in unsynced_moped_projects:
moped_project_id = project["project_id"]
Expand All @@ -194,7 +199,7 @@ def main(args):

# Update Moped project with Knack record ID of created record
logger.info(
f"Updating Knack ID {knack_record_id} from Moped project {moped_project_id}"
f"Updating Moped project {moped_project_id} with Knack ID {knack_record_id}"
)
update_moped_project_knack_id(moped_project_id, knack_record_id)

Expand All @@ -204,6 +209,7 @@ def main(args):
)

# Update synced Moped projects in Data Tracker and skip those just created
logger.info(f"Updating...")
updated_knack_records = []
updates_to_skip = [record["moped_project_id"] for record in created_knack_records]

Expand Down
132 changes: 132 additions & 0 deletions moped-etl/data-tracker-sync/sync_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env python

import os
import logging

import knackpy

from process.request import make_hasura_request
from process.logging import get_logger

KNACK_DATA_TRACKER_APP_ID = os.getenv("KNACK_DATA_TRACKER_APP_ID")
KNACK_DATA_TRACKER_API_KEY = os.getenv("KNACK_DATA_TRACKER_API_KEY")

KNACK_DATA_TRACKER_PROJECT_OBJECT = "object_201"
KNACK_DATA_TRACKER_WORK_ORDER_SIGNALS_OBJECT = "object_31"

WORK_ORDER_SIGNALS_PROJECT_FIELD = "field_3965"

GET_MOPED_PROJECTS = """
query GetMopedProjects {
moped_project(where: { knack_project_id: { _is_null: false }}) {
project_id
knack_project_id
}
}
"""


def get_synced_moped_project_knack_ids():
data = make_hasura_request(query=GET_MOPED_PROJECTS)
projects = data["moped_project"]
knack_ids = [project["knack_project_id"] for project in projects]

logger.debug(f"Found Moped Project Knack IDs: {knack_ids}")
logger.info(f"Found {len(knack_ids)} Moped Project Knack IDs")
return knack_ids


def get_knack_project_record_ids():
knack_projects = knackpy.api.get(
app_id=KNACK_DATA_TRACKER_APP_ID,
api_key=KNACK_DATA_TRACKER_API_KEY,
obj=KNACK_DATA_TRACKER_PROJECT_OBJECT,
)
knack_ids = [project["id"] for project in knack_projects]

logger.debug(f"Found Knack projects: {knack_ids}")
logger.info(f"Found {len(knack_ids)} Knack projects")
return knack_ids


def check_for_work_order_signals_connection(knack_id):
work_order_signals = knackpy.api.get(
app_id=KNACK_DATA_TRACKER_APP_ID,
api_key=KNACK_DATA_TRACKER_API_KEY,
obj=KNACK_DATA_TRACKER_WORK_ORDER_SIGNALS_OBJECT,
filters=[
{
"field": WORK_ORDER_SIGNALS_PROJECT_FIELD,
"operator": "is",
"value": knack_id,
}
],
)

logger.debug(f"Found Knack work order signals record: {work_order_signals}")
return work_order_signals


def delete_knack_project_record(knack_id):
logger.info(f"Deleting Knack project record with ID: {knack_id}")
knackpy.api.record(
app_id=KNACK_DATA_TRACKER_APP_ID,
api_key=KNACK_DATA_TRACKER_API_KEY,
obj=KNACK_DATA_TRACKER_PROJECT_OBJECT,
method="delete",
data={"id": knack_id},
)


def main():
logger.info(f"Getting all Knack project IDs from Moped projects...")
knack_project_ids_in_moped = get_synced_moped_project_knack_ids()
logger.info(f"Getting all Knack project IDs from Knack...")
knack_project_ids_in_knack = get_knack_project_record_ids()

logger.info(f"Finding overlap and differences in those lists...")
ids_in_both_tables = list(
set(knack_project_ids_in_knack) & set(knack_project_ids_in_moped)
)
logger.info(
f"Found {len(ids_in_both_tables)} records in both tables that should be retained"
)

ids_not_in_both_tables = list(
set(knack_project_ids_in_knack) - set(knack_project_ids_in_moped)
)
logger.info(f"Records in Knack but not in Moped: {ids_not_in_both_tables}")
logger.info(
f"Found {len(ids_not_in_both_tables)} records in Knack but not in Moped that are ready to delete"
)

logger.info(f"Checking Knack project records for work order signals connections...")
deletes_to_skip = []
count = 1
for id in ids_not_in_both_tables:
logger.info(f"{count}/{len(ids_not_in_both_tables)}: Knack ID {id}")

work_order_signals = check_for_work_order_signals_connection(id)
count += 1

if len(work_order_signals) > 0:
logger.info(
f"Found work order signals connected to project ID {id}: {work_order_signals}"
)
deletes_to_skip.append(id)
else:
logger.info(f"No work order signals connection found deleting...")
delete_knack_project_record(id)

logger.info(
f"Record IDs of {len(deletes_to_skip)} deletes skipped: {deletes_to_skip}"
)
logger.info(f"Done.")


if __name__ == "__main__":
log_level = logging.INFO
logger = get_logger(name="sync_evaluation", level=log_level)
logger.info(f"Starting.")

main()

0 comments on commit f502b5a

Please sign in to comment.