Skip to content

Commit

Permalink
feat: Delete unused assets owned by Aspects on imports
Browse files Browse the repository at this point in the history
  • Loading branch information
saraburns1 committed Sep 25, 2024
1 parent 8a08eae commit 6105f5c
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 33 deletions.
134 changes: 106 additions & 28 deletions tutoraspects/asset_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
FILE_NAME_ATTRIBUTE = "_file_name"

PLUGIN_PATH = os.path.dirname(os.path.abspath(__file__))
ASSET_FILE_PATH = os.path.join(
PLUGIN_PATH,
"templates",
"aspects",
"apps",
"superset",
"pythonpath",
"aspects_asset_list.yaml",
)
ASSETS_PATH = os.path.join(
PLUGIN_PATH,
"templates",
Expand Down Expand Up @@ -474,19 +483,20 @@ def check_asset_names(echo):
warn = 0

names = set()
for file_name, asset in _get_asset_files():
for _, asset in _get_asset_files():
for k in ("slice_name", "dashboard_title", "database_name"):
if k in asset:
if asset[k] in names:
warn += 1
echo(
f"WARNING: Duplicate name {asset[k]} in {file_name}, this "
f"could confuse users, consider changing it."
f"WARNING: Duplicate {k} {asset[k]} in {asset.get('_file_name')}"
)
names.add(asset[k])
break

echo(f"{warn} duplicate names detected.")
echo(
f"{warn} duplicate names detected. This could confuse users, consider changing them."
)


def _get_all_chart_dataset_uuids():
Expand All @@ -497,11 +507,17 @@ def _get_all_chart_dataset_uuids():
all_chart_uuids = {}

# First get all known uuid's
for _, asset in _get_asset_files():
for file_path, asset in _get_asset_files():
if "slice_name" in asset:
all_chart_uuids[asset["uuid"]] = asset["slice_name"]
all_chart_uuids[asset["uuid"]] = {
"name": asset["_file_name"],
"file_path": file_path,
}
elif "table_name" in asset:
all_dataset_uuids[asset["uuid"]] = asset["table_name"]
all_dataset_uuids[asset["uuid"]] = {
"name": asset["table_name"],
"file_path": file_path,
}

return all_dataset_uuids, all_chart_uuids

Expand All @@ -518,10 +534,9 @@ def _get_used_chart_dataset_uuids():
filters = asset["metadata"].get("native_filter_configuration", [])

for filter_config in filters:
for filter_dataset in filter_config.get("target", {}).get(
"datasetUuid", []
):
used_dataset_uuids.add(filter_dataset)
for item in filter_config.get("targets", {}):
if item.get("datasetUuid"):
used_dataset_uuids.add(item.get("datasetUuid"))

for pos in asset["position"]:
if pos.startswith("CHART-"):
Expand All @@ -537,42 +552,105 @@ def _get_used_chart_dataset_uuids():
return used_dataset_uuids, used_chart_uuids


def check_orphan_assets(echo):
def _find_unused_assets(echo):
"""
Warn about any potentially unused assets.
Find potentially unused assets.
UUIDs listed as 'ignored' in aspects_asset_list.yaml are owned
by Aspects and will be removed from the list of potential unused assets.
"""
echo("Looking for potentially orphaned assets...")
echo("Looking for potentially unused assets...")

all_dataset_uuids, all_chart_uuids = _get_all_chart_dataset_uuids()
used_dataset_uuids, used_chart_uuids = _get_used_chart_dataset_uuids()

# Remove uuids from all list that are in used list
for k in used_dataset_uuids:
try:
all_dataset_uuids.pop(k)
except KeyError:
click.echo(
click.style(f"WARNING: Dataset {k} used nut not found!", fg="red")
click.style(f"WARNING: Dataset {k} used but not found!", fg="red")
)

# Remove the "Query performance" chart from the list, it's needed for
# the performance_metrics script, but not in any dashboard.
all_chart_uuids.pop("bb13bb31-c797-4ed3-a7f9-7825cc6dc482", None)

for k in used_chart_uuids:
try:
all_chart_uuids.pop(k)
except KeyError:
click.echo(click.style(f"WARNING: Chart {k} used nut not found!", fg="red"))
click.echo(click.style(f"WARNING: Chart {k} used but not found!", fg="red"))

echo()
# Remove uuids from all list that are in ignored yaml
with open(ASSET_FILE_PATH, "r", encoding="utf-8") as file:
aspects_assets = yaml.safe_load_all(file)

if all_dataset_uuids:
echo(click.style("Potentially unused datasets detected:", fg="yellow"))
echo("\n".join(sorted(all_dataset_uuids.values())))
for line in aspects_assets:
ignored_uuids = line.get("ignored_uuids")
if ignored_uuids and ignored_uuids.get("datasets"):
for k in ignored_uuids.get("datasets"):
all_dataset_uuids.pop(k, None)

if all_chart_uuids:
echo(click.style("Potentially unused charts detected:", fg="yellow"))
echo("\n".join(sorted(all_chart_uuids.values())))
if ignored_uuids and ignored_uuids.get("charts"):
for k in ignored_uuids.get("charts"):
all_chart_uuids.pop(k, None)

if not all_dataset_uuids and not all_chart_uuids:
echo(f"{len(all_chart_uuids) + len(all_dataset_uuids)} orphans detected.")
echo(f"{len(all_chart_uuids) + len(all_dataset_uuids)} unused assets detected.")

return all_dataset_uuids, all_chart_uuids


def delete_aspects_unused_assets():
"""
Warn about any potentially unused assets. AND
Delete any unused charts and datasets whose UUIDs are listed in
aspects_assets_list.yaml - these are owned by Aspects and can safely
be deleted.
"""
unused_dataset_uuids, unused_chart_uuids = _find_unused_assets(click.echo)

if unused_dataset_uuids:
click.echo(click.style("Potentially unused datasets detected:", fg="yellow"))
click.echo(
click.style(
"Add the UUIDs to aspects_asset_list.yaml to be deleted", fg="green"
)
)
for x, y in unused_dataset_uuids.items():
click.echo(f'{y.get("name")} (UUID: {x})')

if unused_chart_uuids:
click.echo(click.style("Potentially unused charts detected:", fg="yellow"))
click.echo(
click.style(
"Add the UUIDs to aspects_asset_list.yaml to be deleted", fg="green"
)
)
for x, y in unused_chart_uuids.items():
click.echo(f'{y.get("name")} (UUID: {x})')

click.echo()

with open(ASSET_FILE_PATH, "r", encoding="utf-8") as file:
aspects_assets = yaml.safe_load_all(file)

uuids_to_drop = []
for line in aspects_assets:
unused_uuids = line.get("unused_uuids")
for uuid, data in unused_dataset_uuids.items():
if unused_uuids and unused_uuids.get("datasets"):
if uuid in unused_uuids.get("datasets"):
uuids_to_drop.append(uuid)
click.echo(
f"Deleting unused dataset yaml {data.get('name')} (UUID: {uuid})"
)
os.remove(data.get("file_path"))

for uuid, data in unused_chart_uuids.items():
if unused_uuids and unused_uuids.get("charts"):
if uuid in unused_uuids.get("charts"):
uuids_to_drop.append(uuid)
click.echo(
f"Deleting unused chart yaml {data.get('name')} (UUID: {uuid})"
)
os.remove(data.get("file_path"))

click.echo(f"Deleted {len(uuids_to_drop)} assets")
17 changes: 15 additions & 2 deletions tutoraspects/commands_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
ASSETS_PATH,
SupersetCommandError,
check_asset_names,
check_orphan_assets,
deduplicate_superset_assets,
import_superset_assets,
delete_aspects_unused_assets,
)


Expand Down Expand Up @@ -362,6 +362,8 @@ def serialize_zip(file, base_assets_path):

click.echo()
check_asset_names(click.echo)
click.echo()
delete_aspects_unused_assets()

click.echo()
click.echo("Asset merge complete!")
Expand All @@ -383,8 +385,9 @@ def check_superset_assets():

click.echo()
check_asset_names(click.echo)

click.echo()
check_orphan_assets(click.echo)
delete_aspects_unused_assets()

click.echo()
click.echo(
Expand All @@ -395,6 +398,16 @@ def check_superset_assets():
)


@aspects.command("delete_unused_assets")
def delete_unused_assets():
"""
Delete any unused charts and datasets whose UUIDs are listed in
aspects_assets_list.yaml - these are owned by Aspects and can safely
be deleted.
"""
delete_aspects_unused_assets()


DO_COMMANDS = (
load_xapi_test_data,
dbt,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
unused_uuids:
datasets:
charts:

ignored_uuids:
datasets:
charts:
- bb13bb31-c797-4ed3-a7f9-7825cc6dc482 # "Query performance" needed for the performance_metrics script
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
from superset.connectors.sqla.models import SqlaTable
from superset.utils.database import get_or_create_db
from superset.models.embedded_dashboard import EmbeddedDashboard
from pythonpath.create_assets_utils import load_configs_from_directory, delete_aspects_assets

from pythonpath.create_assets_utils import load_configs_from_directory
from pythonpath.localization import get_translation
from pythonpath.create_row_level_security import create_rls_filters
from sqlalchemy.exc import NoResultFound


logger = logging.getLogger("create_assets")
Expand Down Expand Up @@ -54,6 +55,11 @@ def create_assets():
"""Create assets from a yaml file."""
roles = {}

with open( "/app/pythonpath/aspects_asset_list.yaml", "r", encoding="utf-8") as file:
aspects_assets = yaml.safe_load_all(file)
for line in aspects_assets:
unused_uuids = line.get('unused_uuids')

for root, dirs, files in os.walk(ASSETS_PATH):
for file in files:
if not file.endswith(".yaml"):
Expand All @@ -62,21 +68,24 @@ def create_assets():
path = os.path.join(root, file)
with open(path, "r") as file:
asset = yaml.safe_load(file)
process_asset(asset, roles)
if asset['uuid'] not in unused_uuids:
process_asset(asset, roles)

with open(ASSETS_FILE_PATH, "r") as file:
extra_assets = yaml.safe_load_all(file)

if extra_assets:
# For each asset, create a file in the right folder
for asset in extra_assets:
process_asset(asset, roles)
if asset['uuid'] not in unused_uuids:
process_asset(asset, roles)

import_assets()
update_dashboard_roles(roles)
update_embeddable_uuids()
update_datasets()
create_rls_filters()
delete_assets(unused_uuids)


def process_asset(asset, roles):
Expand Down Expand Up @@ -243,6 +252,27 @@ def import_assets():
o.owners = owners
db.session.commit()

def delete_assets(unused_uuids):
"""Delete unused assets"""
chart_id_list = []
if unused_uuids.get('charts'):
for chart_uuid in unused_uuids.get('charts'):
try:
row = db.session.query(Slice).filter_by(uuid=chart_uuid).one()
chart_id_list.append(row.id)
except NoResultFound:
continue

dataset_id_list = []
if unused_uuids.get('datasets'):
for dataset_uuid in unused_uuids.get('datasets'):
try:
row = db.session.query(Tables).filter_by(uuid=dataset_uuid).one()
dataset_id_list.append(row.id)
except NoResultFound:
continue

delete_aspects_assets(chart_id_list, dataset_id_list)

def update_dashboard_roles(roles):
"""Update the roles of the dashboards"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from superset.commands.importers.v1.assets import ImportAssetsCommand
from superset.commands.importers.v1.utils import METADATA_FILE_NAME
from superset.commands.database.importers.v1.utils import security_manager
from superset.commands.chart.delete import DeleteChartCommand
from superset.commands.dataset.delete import DeleteDatasetCommand

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -85,3 +87,28 @@ def load_configs_from_directory(
command.run()
except CommandInvalidError as ex:
logger.error("An error occurred: %s", ex.normalized_messages())

def delete_aspects_assets(chart_id_list, dataset_id_list):
"""
Delete charts and databases that are no longer used by Aspects.
Defined in aspects_asset_list.yaml
"""
# Force our use to the admin user to prevent errors on delete
g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}")

if len(chart_id_list) > 0 or len(dataset_id_list) > 0:
logger.warning('----Deleting unused Aspects assets----')

if chart_id_list:
command_chart = DeleteChartCommand(chart_id_list)
try:
command_chart.run()
except CommandInvalidError as ex:
logger.error("An error occurred: %s", ex.normalized_messages())

if dataset_id_list:
command_dataset = DeleteDatasetCommand(dataset_id_list)
try:
command_dataset.run()
except CommandInvalidError as ex:
logger.error("An error occurred: %s", ex.normalized_messages())

0 comments on commit 6105f5c

Please sign in to comment.