diff --git a/README.rst b/README.rst index 7788d9583..1395f4ca0 100644 --- a/README.rst +++ b/README.rst @@ -112,6 +112,11 @@ asset by editing the asset in Superset and selecting "Save As" to save it to a n # Note: If you are using custom assets you will need to rebuild your aspects-superset # image on your local machine with `tutor images build aspects-superset --no-cache`. +Assets (charts/datasets) created for Aspects that are no longer used can be listed in +`aspects_asset_list.yaml`. These assets & any translated assets created from them, +are deleted from Superset during `init` (specifically `import-assets`). The corresponding +YAML files are deleted during `import_superset_zip` or and `check_superset_assets`. + Sharing Charts and Dashboards ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tutoraspects/asset_command_helpers.py b/tutoraspects/asset_command_helpers.py index 491e170e4..d6366dad6 100644 --- a/tutoraspects/asset_command_helpers.py +++ b/tutoraspects/asset_command_helpers.py @@ -15,11 +15,18 @@ FILE_NAME_ATTRIBUTE = "_file_name" -PLUGIN_PATH = os.path.dirname(os.path.abspath(__file__)) +PLUGIN_PATH = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "templates", "aspects" +) +ASPECT_ASSET_LIST = os.path.join( + PLUGIN_PATH, + "apps", + "superset", + "pythonpath", + "aspects_asset_list.yaml", +) ASSETS_PATH = os.path.join( PLUGIN_PATH, - "templates", - "aspects", "build", "aspects-superset", "openedx-assets", @@ -465,116 +472,126 @@ def deduplicate_superset_assets(echo): echo() echo(click.style(f"{err} errors found!", fg="red")) - echo("Deduplication complete.") - - -def check_asset_names(echo): - """ - Warn about any duplicate asset names. - """ - echo("Looking for duplicate names...") - warn = 0 - - names = set() - for file_name, asset in _get_asset_files(): - for k in ("slice_name", "dashboard_title", "database_name"): - if k in asset: - if asset[k] in names: - warn += 1 - echo( - f"WARNING: Duplicate name {asset[k]} in {file_name}, this " - f"could confuse users, consider changing it." - ) - names.add(asset[k]) - break - - echo(f"{warn} duplicate names detected.") + echo("De-duplication complete.") -def _get_all_chart_dataset_uuids(): +def _get_all_uuids(): """ - Return the UUIDs of all datasets and charts in our file assets. + Return the UUIDs of all assets. """ - all_dataset_uuids = {} - all_chart_uuids = {} + all_uuids = {"charts": {}, "datasets": {}} # First get all known uuid's - for _, asset in _get_asset_files(): + for file_path, asset in _get_asset_files(): if "slice_name" in asset: - all_chart_uuids[asset["uuid"]] = asset["slice_name"] + all_uuids["charts"][asset["uuid"]] = { + "name": asset["_file_name"], + "file_path": file_path, + } elif "table_name" in asset: - all_dataset_uuids[asset["uuid"]] = asset["table_name"] + all_uuids["datasets"][asset["uuid"]] = { + "name": asset["table_name"], + "file_path": file_path, + } - return all_dataset_uuids, all_chart_uuids + return all_uuids -def _get_used_chart_dataset_uuids(): +def _get_used_uuids(): """ Return the UUIDs of all datasets and charts actually used in our file assets. """ - used_dataset_uuids = set() - used_chart_uuids = set() + used_uuids = {"charts": set(), "datasets": set()} for _, asset in _get_asset_files(): if "dashboard_title" in asset: filters = asset["metadata"].get("native_filter_configuration", []) for filter_config in filters: - for filter_dataset in filter_config.get("target", {}).get( - "datasetUuid", [] - ): - used_dataset_uuids.add(filter_dataset) + for item in filter_config.get("targets", {}): + if item.get("datasetUuid"): + used_uuids["datasets"].add(item.get("datasetUuid")) for pos in asset["position"]: if pos.startswith("CHART-"): - slice_uuid = asset["position"][pos]["meta"].get("uuid") - - if slice_uuid: - used_chart_uuids.add(slice_uuid) + used_uuids["charts"].add(asset["position"][pos]["meta"].get("uuid")) if "slice_name" in asset: - dataset_uuid = asset["dataset_uuid"] - used_dataset_uuids.add(dataset_uuid) + used_uuids["datasets"].add(asset["dataset_uuid"]) - return used_dataset_uuids, used_chart_uuids + return used_uuids -def check_orphan_assets(echo): +def _find_unused_assets(): """ - Warn about any potentially unused assets. + Find potentially unused assets. + UUIDs listed as 'ignored' in aspects_asset_list.yaml are owned + by Aspects and will be removed from the list of potential unused assets. """ - echo("Looking for potentially orphaned assets...") + all_uuids = _get_all_uuids() + used_uuids = _get_used_uuids() - all_dataset_uuids, all_chart_uuids = _get_all_chart_dataset_uuids() - used_dataset_uuids, used_chart_uuids = _get_used_chart_dataset_uuids() + # Remove uuids from 'all' list that are in used list + for asset_type, uuids in used_uuids.items(): + for uuid in uuids or []: + try: + all_uuids[asset_type].pop(uuid) + except KeyError: + click.echo( + click.style( + f"WARNING: {asset_type} {uuid} used but not found!", fg="red" + ) + ) - for k in used_dataset_uuids: - try: - all_dataset_uuids.pop(k) - except KeyError: - click.echo( - click.style(f"WARNING: Dataset {k} used nut not found!", fg="red") - ) + # Remove uuids from 'all' list that are in ignored yaml + with open(ASPECT_ASSET_LIST, "r", encoding="utf-8") as file: + aspects_assets = yaml.safe_load(file) - # Remove the "Query performance" chart from the list, it's needed for - # the performance_metrics script, but not in any dashboard. - all_chart_uuids.pop("bb13bb31-c797-4ed3-a7f9-7825cc6dc482", None) + ignored_uuids = aspects_assets.get("ignored_uuids") + for asset_type in ignored_uuids: + for uuid in ignored_uuids[asset_type] or []: + all_uuids[asset_type].pop(uuid, None) - for k in used_chart_uuids: - try: - all_chart_uuids.pop(k) - except KeyError: - click.echo(click.style(f"WARNING: Chart {k} used nut not found!", fg="red")) + return all_uuids - echo() - if all_dataset_uuids: - echo(click.style("Potentially unused datasets detected:", fg="yellow")) - echo("\n".join(sorted(all_dataset_uuids.values()))) +def delete_aspects_unused_assets(echo): + """ + Warn about any potentially unused assets AND + delete any unused chart and dataset yamls whose UUIDs are listed in + aspects_assets_list.yaml - these are owned by Aspects and can safely + be deleted. + """ + unused_uuids = _find_unused_assets() + + count_unused_uuids = sum(len(uuids) for asset_type, uuids in unused_uuids.items()) + if count_unused_uuids: + with open(ASPECT_ASSET_LIST, "r", encoding="utf-8") as file: + aspects_assets = yaml.safe_load(file) + + unused_aspects_uuids = aspects_assets.get("unused_uuids") + for asset_type in unused_aspects_uuids: + for uuid in unused_aspects_uuids[asset_type] or []: + if uuid in unused_uuids[asset_type]: + data = unused_uuids[asset_type][uuid] + echo( + f"Deleting unused {asset_type} yaml {data.get('name')} (UUID: {uuid})" + ) + os.remove(data.get("file_path")) + unused_uuids[asset_type].pop(uuid) + + new_count_unused_uuids = sum( + len(uuids) for asset_type, uuids in unused_uuids.items() + ) - if all_chart_uuids: - echo(click.style("Potentially unused charts detected:", fg="yellow")) - echo("\n".join(sorted(all_chart_uuids.values()))) + if new_count_unused_uuids: + echo(click.style("Potentially unused assets detected:", fg="yellow")) + echo( + click.style( + "Add the UUIDs to aspects_asset_list.yaml to be deleted", fg="green" + ) + ) - if not all_dataset_uuids and not all_chart_uuids: - echo(f"{len(all_chart_uuids) + len(all_dataset_uuids)} orphans detected.") + for asset_type, uuids in unused_uuids.items(): + for uuid, data in uuids.items(): + echo(f'{asset_type} {data.get("name")} (UUID: {uuid})') diff --git a/tutoraspects/commands_v1.py b/tutoraspects/commands_v1.py index 03623b9ca..fdde965e2 100644 --- a/tutoraspects/commands_v1.py +++ b/tutoraspects/commands_v1.py @@ -11,10 +11,9 @@ from tutoraspects.asset_command_helpers import ( ASSETS_PATH, SupersetCommandError, - check_asset_names, - check_orphan_assets, deduplicate_superset_assets, import_superset_assets, + delete_aspects_unused_assets, ) @@ -366,9 +365,7 @@ def serialize_zip(file, base_assets_path): click.echo() deduplicate_superset_assets(click.echo) - - click.echo() - check_asset_names(click.echo) + delete_aspects_unused_assets(click.echo) click.echo() click.echo("Asset merge complete!") @@ -387,11 +384,7 @@ def check_superset_assets(): Deduplicate assets by UUID, and check for duplicate asset names. """ deduplicate_superset_assets(click.echo) - - click.echo() - check_asset_names(click.echo) - click.echo() - check_orphan_assets(click.echo) + delete_aspects_unused_assets(click.echo) click.echo() click.echo( diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml b/tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml new file mode 100644 index 000000000..6d990972f --- /dev/null +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/aspects_asset_list.yaml @@ -0,0 +1,18 @@ +unused_uuids: + datasets: + charts: + - a433e3cc-8ed5-454a-8b17-5dd75cfc84e4 # Course Info + - e0098cfe-a312-4c49-8efd-7e74256b6ea4 # Course info + - d3ae0546-37a8-4841-a57b-8087a6c33049 # Evolution of engagement + - 2f2af8b0-94ae-4300-b71f-3bd7f9fc127c # Enrollment counts + - bd37be7f-6672-4dca-80ae-a54f69d169da # Enrollment counts + - dde44a03-649f-4d77-990b-a95be204e1ba # Learner performance + - 4e48b8f9-e757-4263-a9d7-d18018620a24 # Learner performance + - 4250c976-a9b7-43ff-b5ad-8dd00a5acef7 # Learner performance breakdown + - 9c3f7291-1bd9-4b2f-abc0-472aad3aff06 # Learner performance breakdown + - 8b0535a8-a43f-49bf-9d50-439a16bd3f74 # Video engagement + +ignored_uuids: + datasets: + charts: + - bb13bb31-c797-4ed3-a7f9-7825cc6dc482 # "Query performance" needed for the performance_metrics script diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py index 2ae24294f..7c6232874 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py @@ -12,21 +12,26 @@ from pathlib import Path from sqlfmt.api import format_string from sqlfmt.mode import Mode +from collections import defaultdict from superset import security_manager from superset.extensions import db from superset.models.dashboard import Dashboard from superset.models.slice import Slice from superset.connectors.sqla.models import SqlaTable -from superset.utils.database import get_or_create_db from superset.models.embedded_dashboard import EmbeddedDashboard - from pythonpath.create_assets_utils import load_configs_from_directory +from pythonpath.delete_assets import delete_assets + from pythonpath.localization import get_translation from pythonpath.create_row_level_security import create_rls_filters logger = logging.getLogger("create_assets") +# Supress output from black (sqlfmt) formatting +blib2to3_logger = logging.getLogger('blib2to3.pgen2.driver') +blib2to3_logger.setLevel(logging.WARN) + BASE_DIR = "/app/assets/superset" ASSET_FOLDER_MAPPING = { "dashboard_title": "dashboards", @@ -44,7 +49,7 @@ FILE_NAME_ATTRIBUTE = "_file_name" -ASSETS_FILE_PATH = "/app/pythonpath/assets.yaml" +PYTHONPATH = "/app/pythonpath" ASSETS_PATH = "/app/openedx-assets/assets" @@ -55,6 +60,7 @@ def main(): def create_assets(): """Create assets from a yaml file.""" roles = {} + translated_asset_uuids = defaultdict(set) for root, dirs, files in os.walk(ASSETS_PATH): for file in files: @@ -64,15 +70,16 @@ def create_assets(): path = os.path.join(root, file) with open(path, "r") as file: asset = yaml.safe_load(file) - process_asset(asset, roles) + process_asset(asset, roles, translated_asset_uuids) - with open(ASSETS_FILE_PATH, "r") as file: + # Get extra assets + with open(os.path.join(PYTHONPATH,"assets.yaml"), "r") as file: extra_assets = yaml.safe_load_all(file) if extra_assets: # For each asset, create a file in the right folder for asset in extra_assets: - process_asset(asset, roles) + process_asset(asset, roles, translated_asset_uuids) import_assets() update_dashboard_roles(roles) @@ -80,8 +87,13 @@ def create_assets(): update_datasets() create_rls_filters() + # Delete unused assets + with open(os.path.join(PYTHONPATH,"aspects_asset_list.yaml"), "r", encoding="utf-8") as file: + assets = yaml.safe_load(file) + unused_aspect_uuids = assets['unused_uuids'] + delete_assets(unused_aspect_uuids, translated_asset_uuids) -def process_asset(asset, roles): +def process_asset(asset, roles, translated_asset_uuids): if FILE_NAME_ATTRIBUTE not in asset: raise Exception(f"Asset {asset} has no {FILE_NAME_ATTRIBUTE}") file_name = asset.pop(FILE_NAME_ATTRIBUTE) @@ -89,7 +101,7 @@ def process_asset(asset, roles): # Find the right folder to create the asset in for asset_name, folder in ASSET_FOLDER_MAPPING.items(): if asset_name in asset: - write_asset_to_file(asset, asset_name, folder, file_name, roles) + write_asset_to_file(asset, asset_name, folder, file_name, roles, translated_asset_uuids) return @@ -101,9 +113,8 @@ def get_localized_uuid(base_uuid, language): base_namespace = uuid.uuid5(base_uuid, "superset") normalized_language = language.lower().replace("-", "_") return str(uuid.uuid5(base_namespace, normalized_language)) - - -def write_asset_to_file(asset, asset_name, folder, file_name, roles): + +def write_asset_to_file(asset, asset_name, folder, file_name, roles, translated_asset_uuids): """Write an asset to a file and generated translated assets""" if folder == "databases": # Update the sqlalchery_uri from the asset override pre-generated values @@ -114,7 +125,7 @@ def write_asset_to_file(asset, asset_name, folder, file_name, roles): asset["sql"] = format_string(asset["sql"], mode=Mode(dialect_name="clickhouse")) updated_asset = generate_translated_asset( - asset, asset_name, folder, locale, roles + asset, asset_name, folder, locale, roles, translated_asset_uuids ) # Clean up old localized dashboards @@ -147,11 +158,15 @@ def write_asset_to_file(asset, asset_name, folder, file_name, roles): db.session.commit() -def generate_translated_asset(asset, asset_name, folder, language, roles): +def generate_translated_asset(asset, asset_name, folder, language, roles, translated_asset_uuids): """Generate a translated asset with their elements updated""" copy = deepcopy(asset) + parent_uuid = copy['uuid'] copy["uuid"] = str(get_localized_uuid(copy["uuid"], language)) copy[asset_name] = get_translation(copy[asset_name], language) + + # Save parent & translated uuids in yaml file + translated_asset_uuids[parent_uuid].add(copy['uuid']) if folder == "dashboards": copy["slug"] = f"{copy['slug']}-{language}" copy["description"] = get_translation(copy["description"], language) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/delete_assets.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/delete_assets.py new file mode 100644 index 000000000..5363d64e0 --- /dev/null +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/delete_assets.py @@ -0,0 +1,61 @@ +"""Delete all unused Aspects assets from Superset tables""" +import logging +from flask import g + +from superset import security_manager +from superset.extensions import db +from superset.models.slice import Slice +from superset.connectors.sqla.models import SqlaTable +from superset.tags.models import TaggedObject, ObjectType +from superset.commands.chart.delete import DeleteChartCommand +from superset.commands.dataset.delete import DeleteDatasetCommand +from sqlalchemy.exc import NoResultFound +from superset.commands.exceptions import CommandInvalidError + +logger = logging.getLogger("delete_assets") +PYTHONPATH = "/app/pythonpath" + +ASSET_TABLES = {'charts': Slice, 'datasets': SqlaTable} +ASSET_NAME_COLUMN = {'charts': 'slice_name', 'datasets': 'table_name'} +ASSET_COMMANDS = {'charts': DeleteChartCommand, 'datasets': DeleteDatasetCommand} +OBJECT_TYPES = {'charts': ObjectType.chart, 'datasets': ObjectType.dataset} + +def delete_assets(unused_uuids, translated_asset_uuids): + """Delete unused assets and their translated versions""" + for type in unused_uuids: + id_list = [] + asset_list = set() + for uuid in unused_uuids[type] or []: + try: + row = db.session.query(ASSET_TABLES[type]).filter_by(uuid=uuid).one() + id_list.append(row.id) + asset_list.add(getattr(row,ASSET_NAME_COLUMN[type])) + + if uuid in translated_asset_uuids: + for child_uuid in translated_asset_uuids[uuid]: + row = db.session.query(ASSET_TABLES[type]).filter_by(uuid=child_uuid).one() + id_list.append(row.id) + asset_list.add(getattr(row,ASSET_NAME_COLUMN[type])) + except NoResultFound: + continue + + if len(id_list) > 0: + try: + logger.warning(f'Deleting the following {type}: ') + logger.warning(asset_list) + + # Force our use to the admin user to prevent errors on delete + g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}") + + # Delete tagged object rows first because the DeleteCommands are currently + # broken in Superset if there is more than 1 tag per asset + for id in id_list: + rows = db.session.query(TaggedObject).filter_by(object_id = id).all() + for row in rows: + db.session.delete(row) + + command = ASSET_COMMANDS[type](id_list) + command.run() + + except CommandInvalidError as ex: + logger.error("An error occurred: %s", ex.normalized_messages())