From 6105f5c0c1ad51839489379fc2a4450c5f862de1 Mon Sep 17 00:00:00 2001 From: Sara Burns Date: Tue, 24 Sep 2024 10:17:19 -0400 Subject: [PATCH] feat: Delete unused assets owned by Aspects on imports --- tutoraspects/asset_command_helpers.py | 134 ++++++++++++++---- tutoraspects/commands_v1.py | 17 ++- .../pythonpath/aspects_asset_list.yaml | 8 ++ .../apps/superset/pythonpath/create_assets.py | 36 ++++- .../pythonpath/create_assets_utils.py | 27 ++++ 5 files changed, 189 insertions(+), 33 deletions(-) create mode 100644 tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml diff --git a/tutoraspects/asset_command_helpers.py b/tutoraspects/asset_command_helpers.py index 2cf1f8b9c..5edec1889 100644 --- a/tutoraspects/asset_command_helpers.py +++ b/tutoraspects/asset_command_helpers.py @@ -16,6 +16,15 @@ FILE_NAME_ATTRIBUTE = "_file_name" PLUGIN_PATH = os.path.dirname(os.path.abspath(__file__)) +ASSET_FILE_PATH = os.path.join( + PLUGIN_PATH, + "templates", + "aspects", + "apps", + "superset", + "pythonpath", + "aspects_asset_list.yaml", +) ASSETS_PATH = os.path.join( PLUGIN_PATH, "templates", @@ -474,19 +483,20 @@ def check_asset_names(echo): warn = 0 names = set() - for file_name, asset in _get_asset_files(): + for _, asset in _get_asset_files(): for k in ("slice_name", "dashboard_title", "database_name"): if k in asset: if asset[k] in names: warn += 1 echo( - f"WARNING: Duplicate name {asset[k]} in {file_name}, this " - f"could confuse users, consider changing it." + f"WARNING: Duplicate {k} {asset[k]} in {asset.get('_file_name')}" ) names.add(asset[k]) break - echo(f"{warn} duplicate names detected.") + echo( + f"{warn} duplicate names detected. This could confuse users, consider changing them." + ) def _get_all_chart_dataset_uuids(): @@ -497,11 +507,17 @@ def _get_all_chart_dataset_uuids(): all_chart_uuids = {} # First get all known uuid's - for _, asset in _get_asset_files(): + for file_path, asset in _get_asset_files(): if "slice_name" in asset: - all_chart_uuids[asset["uuid"]] = asset["slice_name"] + all_chart_uuids[asset["uuid"]] = { + "name": asset["_file_name"], + "file_path": file_path, + } elif "table_name" in asset: - all_dataset_uuids[asset["uuid"]] = asset["table_name"] + all_dataset_uuids[asset["uuid"]] = { + "name": asset["table_name"], + "file_path": file_path, + } return all_dataset_uuids, all_chart_uuids @@ -518,10 +534,9 @@ def _get_used_chart_dataset_uuids(): filters = asset["metadata"].get("native_filter_configuration", []) for filter_config in filters: - for filter_dataset in filter_config.get("target", {}).get( - "datasetUuid", [] - ): - used_dataset_uuids.add(filter_dataset) + for item in filter_config.get("targets", {}): + if item.get("datasetUuid"): + used_dataset_uuids.add(item.get("datasetUuid")) for pos in asset["position"]: if pos.startswith("CHART-"): @@ -537,42 +552,105 @@ def _get_used_chart_dataset_uuids(): return used_dataset_uuids, used_chart_uuids -def check_orphan_assets(echo): +def _find_unused_assets(echo): """ - Warn about any potentially unused assets. + Find potentially unused assets. + UUIDs listed as 'ignored' in aspects_asset_list.yaml are owned + by Aspects and will be removed from the list of potential unused assets. """ - echo("Looking for potentially orphaned assets...") + echo("Looking for potentially unused assets...") all_dataset_uuids, all_chart_uuids = _get_all_chart_dataset_uuids() used_dataset_uuids, used_chart_uuids = _get_used_chart_dataset_uuids() + # Remove uuids from all list that are in used list for k in used_dataset_uuids: try: all_dataset_uuids.pop(k) except KeyError: click.echo( - click.style(f"WARNING: Dataset {k} used nut not found!", fg="red") + click.style(f"WARNING: Dataset {k} used but not found!", fg="red") ) - # Remove the "Query performance" chart from the list, it's needed for - # the performance_metrics script, but not in any dashboard. - all_chart_uuids.pop("bb13bb31-c797-4ed3-a7f9-7825cc6dc482", None) - for k in used_chart_uuids: try: all_chart_uuids.pop(k) except KeyError: - click.echo(click.style(f"WARNING: Chart {k} used nut not found!", fg="red")) + click.echo(click.style(f"WARNING: Chart {k} used but not found!", fg="red")) - echo() + # Remove uuids from all list that are in ignored yaml + with open(ASSET_FILE_PATH, "r", encoding="utf-8") as file: + aspects_assets = yaml.safe_load_all(file) - if all_dataset_uuids: - echo(click.style("Potentially unused datasets detected:", fg="yellow")) - echo("\n".join(sorted(all_dataset_uuids.values()))) + for line in aspects_assets: + ignored_uuids = line.get("ignored_uuids") + if ignored_uuids and ignored_uuids.get("datasets"): + for k in ignored_uuids.get("datasets"): + all_dataset_uuids.pop(k, None) - if all_chart_uuids: - echo(click.style("Potentially unused charts detected:", fg="yellow")) - echo("\n".join(sorted(all_chart_uuids.values()))) + if ignored_uuids and ignored_uuids.get("charts"): + for k in ignored_uuids.get("charts"): + all_chart_uuids.pop(k, None) if not all_dataset_uuids and not all_chart_uuids: - echo(f"{len(all_chart_uuids) + len(all_dataset_uuids)} orphans detected.") + echo(f"{len(all_chart_uuids) + len(all_dataset_uuids)} unused assets detected.") + + return all_dataset_uuids, all_chart_uuids + + +def delete_aspects_unused_assets(): + """ + Warn about any potentially unused assets. AND + Delete any unused charts and datasets whose UUIDs are listed in + aspects_assets_list.yaml - these are owned by Aspects and can safely + be deleted. + """ + unused_dataset_uuids, unused_chart_uuids = _find_unused_assets(click.echo) + + if unused_dataset_uuids: + click.echo(click.style("Potentially unused datasets detected:", fg="yellow")) + click.echo( + click.style( + "Add the UUIDs to aspects_asset_list.yaml to be deleted", fg="green" + ) + ) + for x, y in unused_dataset_uuids.items(): + click.echo(f'{y.get("name")} (UUID: {x})') + + if unused_chart_uuids: + click.echo(click.style("Potentially unused charts detected:", fg="yellow")) + click.echo( + click.style( + "Add the UUIDs to aspects_asset_list.yaml to be deleted", fg="green" + ) + ) + for x, y in unused_chart_uuids.items(): + click.echo(f'{y.get("name")} (UUID: {x})') + + click.echo() + + with open(ASSET_FILE_PATH, "r", encoding="utf-8") as file: + aspects_assets = yaml.safe_load_all(file) + + uuids_to_drop = [] + for line in aspects_assets: + unused_uuids = line.get("unused_uuids") + for uuid, data in unused_dataset_uuids.items(): + if unused_uuids and unused_uuids.get("datasets"): + if uuid in unused_uuids.get("datasets"): + uuids_to_drop.append(uuid) + click.echo( + f"Deleting unused dataset yaml {data.get('name')} (UUID: {uuid})" + ) + os.remove(data.get("file_path")) + + for uuid, data in unused_chart_uuids.items(): + if unused_uuids and unused_uuids.get("charts"): + if uuid in unused_uuids.get("charts"): + uuids_to_drop.append(uuid) + click.echo( + f"Deleting unused chart yaml {data.get('name')} (UUID: {uuid})" + ) + os.remove(data.get("file_path")) + + click.echo(f"Deleted {len(uuids_to_drop)} assets") diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index dffdbb23e..ba5e5f2cf 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -12,9 +12,9 @@ ASSETS_PATH, SupersetCommandError, check_asset_names, - check_orphan_assets, deduplicate_superset_assets, import_superset_assets, + delete_aspects_unused_assets, ) @@ -362,6 +362,8 @@ def serialize_zip(file, base_assets_path): click.echo() check_asset_names(click.echo) + click.echo() + delete_aspects_unused_assets() click.echo() click.echo("Asset merge complete!") @@ -383,8 +385,9 @@ def check_superset_assets(): click.echo() check_asset_names(click.echo) + click.echo() - check_orphan_assets(click.echo) + delete_aspects_unused_assets() click.echo() click.echo( @@ -395,6 +398,16 @@ def check_superset_assets(): ) +@aspects.command("delete_unused_assets") +def delete_unused_assets(): + """ + Delete any unused charts and datasets whose UUIDs are listed in + aspects_assets_list.yaml - these are owned by Aspects and can safely + be deleted. + """ + delete_aspects_unused_assets() + + DO_COMMANDS = ( load_xapi_test_data, dbt, diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml b/tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml new file mode 100644 index 000000000..96888742f --- /dev/null +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml @@ -0,0 +1,8 @@ +unused_uuids: + datasets: + charts: + +ignored_uuids: + datasets: + charts: + - bb13bb31-c797-4ed3-a7f9-7825cc6dc482 # "Query performance" needed for the performance_metrics script \ No newline at end of file diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py index f72727249..6c71fcd77 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py @@ -18,10 +18,11 @@ from superset.connectors.sqla.models import SqlaTable from superset.utils.database import get_or_create_db from superset.models.embedded_dashboard import EmbeddedDashboard +from pythonpath.create_assets_utils import load_configs_from_directory, delete_aspects_assets -from pythonpath.create_assets_utils import load_configs_from_directory from pythonpath.localization import get_translation from pythonpath.create_row_level_security import create_rls_filters +from sqlalchemy.exc import NoResultFound logger = logging.getLogger("create_assets") @@ -54,6 +55,11 @@ def create_assets(): """Create assets from a yaml file.""" roles = {} + with open( "/app/pythonpath/aspects_asset_list.yaml", "r", encoding="utf-8") as file: + aspects_assets = yaml.safe_load_all(file) + for line in aspects_assets: + unused_uuids = line.get('unused_uuids') + for root, dirs, files in os.walk(ASSETS_PATH): for file in files: if not file.endswith(".yaml"): @@ -62,7 +68,8 @@ def create_assets(): path = os.path.join(root, file) with open(path, "r") as file: asset = yaml.safe_load(file) - process_asset(asset, roles) + if asset['uuid'] not in unused_uuids: + process_asset(asset, roles) with open(ASSETS_FILE_PATH, "r") as file: extra_assets = yaml.safe_load_all(file) @@ -70,13 +77,15 @@ def create_assets(): if extra_assets: # For each asset, create a file in the right folder for asset in extra_assets: - process_asset(asset, roles) + if asset['uuid'] not in unused_uuids: + process_asset(asset, roles) import_assets() update_dashboard_roles(roles) update_embeddable_uuids() update_datasets() create_rls_filters() + delete_assets(unused_uuids) def process_asset(asset, roles): @@ -243,6 +252,27 @@ def import_assets(): o.owners = owners db.session.commit() +def delete_assets(unused_uuids): + """Delete unused assets""" + chart_id_list = [] + if unused_uuids.get('charts'): + for chart_uuid in unused_uuids.get('charts'): + try: + row = db.session.query(Slice).filter_by(uuid=chart_uuid).one() + chart_id_list.append(row.id) + except NoResultFound: + continue + + dataset_id_list = [] + if unused_uuids.get('datasets'): + for dataset_uuid in unused_uuids.get('datasets'): + try: + row = db.session.query(Tables).filter_by(uuid=dataset_uuid).one() + dataset_id_list.append(row.id) + except NoResultFound: + continue + + delete_aspects_assets(chart_id_list, dataset_id_list) def update_dashboard_roles(roles): """Update the roles of the dashboards""" diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets_utils.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets_utils.py index 73105dc00..c94a10632 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets_utils.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets_utils.py @@ -32,6 +32,8 @@ from superset.commands.importers.v1.assets import ImportAssetsCommand from superset.commands.importers.v1.utils import METADATA_FILE_NAME from superset.commands.database.importers.v1.utils import security_manager +from superset.commands.chart.delete import DeleteChartCommand +from superset.commands.dataset.delete import DeleteDatasetCommand logger = logging.getLogger(__name__) @@ -85,3 +87,28 @@ def load_configs_from_directory( command.run() except CommandInvalidError as ex: logger.error("An error occurred: %s", ex.normalized_messages()) + +def delete_aspects_assets(chart_id_list, dataset_id_list): + """ + Delete charts and databases that are no longer used by Aspects. + Defined in aspects_asset_list.yaml + """ + # Force our use to the admin user to prevent errors on delete + g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}") + + if len(chart_id_list) > 0 or len(dataset_id_list) > 0: + logger.warning('----Deleting unused Aspects assets----') + + if chart_id_list: + command_chart = DeleteChartCommand(chart_id_list) + try: + command_chart.run() + except CommandInvalidError as ex: + logger.error("An error occurred: %s", ex.normalized_messages()) + + if dataset_id_list: + command_dataset = DeleteDatasetCommand(dataset_id_list) + try: + command_dataset.run() + except CommandInvalidError as ex: + logger.error("An error occurred: %s", ex.normalized_messages())