Skip to content

Commit

Permalink
Make script output more informative
Browse files Browse the repository at this point in the history
  • Loading branch information
aalexfvk committed Jan 26, 2024
1 parent b6ac017 commit ba20cf7
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 119 deletions.
68 changes: 52 additions & 16 deletions ch_tools/chadmin/cli/object_storage_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
# Prefix for a listing table name
LISTING_TABLE_PREFIX = "listing_objects_from_"
# Batch size for inserts in a listing table
INSERT_BATCH_SIZE = 1000
# Set not very big value due to default ClickHouse 'http_max_field_value_size' settings value 128Kb
INSERT_BATCH_SIZE = 100


@group("object-storage")
Expand Down Expand Up @@ -97,7 +98,7 @@ def object_storage_group(ctx: Context, disk_name: str) -> None:
help=("Do not delete collected paths of objects from object storage."),
)
@pass_context
def clean_object_storage(
def clean_command(
ctx: Context,
object_name_prefix: str,
from_time: Optional[timedelta],
Expand All @@ -117,23 +118,56 @@ def clean_object_storage(
)

disk_conf: S3DiskConfiguration = ctx.obj["disk_configuration"]
listing_table = LISTING_TABLE_PREFIX + disk_conf.name
# Create listing table for storing paths from object storage
try:
execute_query(
ctx,
f"CREATE TABLE IF NOT EXISTS {listing_table} (obj_path String) ENGINE MergeTree ORDER BY obj_path",
)
_clean_object_storage(
ctx,
object_name_prefix,
from_time,
to_time,
on_cluster,
cluster_name,
dry_run,
listing_table,
)
finally:
if not keep_paths:
execute_query(
ctx, f"TRUNCATE TABLE IF EXISTS {listing_table}", format_=None
)


def _clean_object_storage(
ctx: Context,
object_name_prefix: str,
from_time: Optional[timedelta],
to_time: timedelta,
on_cluster: bool,
cluster_name: str,
dry_run: bool,
listing_table: str,
) -> None:
"""
Delete orphaned objects from object storage.
"""
disk_conf: S3DiskConfiguration = ctx.obj["disk_configuration"]
click.echo(
f"Collecting objects for S3 disk '{disk_conf.name}' with endpoint '{disk_conf.endpoint_url}' "
f"in bucket [{disk_conf.bucket_name}] with prefix '{disk_conf.prefix}'"
)
_traverse_object_storage(ctx, listing_table, from_time, to_time, object_name_prefix)

remote_data_paths_table = "system.remote_data_paths"
if on_cluster:
remote_data_paths_table = (
f"clusterAllReplicas('{cluster_name}', {remote_data_paths_table})"
)

listing_table = LISTING_TABLE_PREFIX + disk_conf.name
# Create listing table for storing paths from object storage
execute_query(
ctx,
f"CREATE TABLE IF NOT EXISTS {listing_table} (obj_path String) ENGINE MergeTree ORDER BY obj_path",
)
_traverse_object_storage(
ctx, listing_table, from_time, to_time, object_name_prefix
)

antijoin_query = f"""
SELECT obj_path FROM {listing_table} AS object_storage
LEFT ANTI JOIN {remote_data_paths_table} AS object_table
Expand All @@ -142,6 +176,11 @@ def clean_object_storage(
"""
logging.info("Antijoin query: %s", antijoin_query)

if dry_run:
click.echo("Counting orphaned objects...")
else:
click.echo("Deleting orphaned objects...")

deleted = 0
with execute_query(
ctx, antijoin_query, stream=True, format_="TabSeparated"
Expand All @@ -154,11 +193,8 @@ def clean_object_storage(
else:
deleted = cleanup_s3_object_storage(disk_conf, paths_to_delete)

if not keep_paths:
execute_query(ctx, f"TRUNCATE {listing_table}", format_=None)

click.echo(
f"{'Would delete' if dry_run else 'Deleted'} {deleted} objects from bucket [{disk_conf.bucket_name}]"
f"{'Would delete' if dry_run else 'Deleted'} {deleted} objects from bucket [{disk_conf.bucket_name}] with prefix {disk_conf.prefix}"
)


Expand Down
3 changes: 0 additions & 3 deletions ch_tools/chadmin/internal/object_storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,3 @@
ObjectSummary,
s3_object_storage_iterator,
)
from ch_tools.chadmin.internal.object_storage.s3_local_metadata import (
S3ObjectLocalMetaData,
)
92 changes: 0 additions & 92 deletions ch_tools/chadmin/internal/object_storage/s3_local_metadata.py

This file was deleted.

16 changes: 8 additions & 8 deletions tests/features/object_storage.feature
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Feature: chadmin object-storage commands
"""
chadmin object-storage clean --dry-run
"""
Then we get response
Then we get response contains
"""
Would delete 0 objects from bucket [cloud-storage-test]
"""
Expand All @@ -43,15 +43,15 @@ Feature: chadmin object-storage commands
"""
chadmin object-storage clean --dry-run --on-cluster
"""
Then we get response
Then we get response contains
"""
Would delete 0 objects from bucket [cloud-storage-test]
"""
When we execute command on clickhouse01
"""
chadmin object-storage clean --to-time 0h --dry-run --on-cluster
"""
Then we get response
Then we get response contains
"""
Would delete 0 objects from bucket [cloud-storage-test]
"""
Expand All @@ -68,15 +68,15 @@ Feature: chadmin object-storage commands
"""
chadmin object-storage clean --dry-run --to-time 0h --on-cluster
"""
Then we get response
Then we get response contains
"""
Would delete 1 objects from bucket [cloud-storage-test]
"""
When we execute command on clickhouse01
"""
chadmin object-storage clean --to-time 0h --on-cluster
"""
Then we get response
Then we get response contains
"""
Deleted 1 objects from bucket [cloud-storage-test]
"""
Expand All @@ -98,23 +98,23 @@ Feature: chadmin object-storage commands
"""
chadmin object-storage clean --dry-run --to-time 0h --on-cluster
"""
Then we get response
Then we get response contains
"""
Would delete 100 objects from bucket [cloud-storage-test]
"""
When we execute command on clickhouse01
"""
chadmin object-storage clean --to-time 0h --on-cluster
"""
Then we get response
Then we get response contains
"""
Deleted 100 objects from bucket [cloud-storage-test]
"""
When we execute command on clickhouse01
"""
chadmin object-storage clean --to-time 0h --dry-run --on-cluster
"""
Then we get response
Then we get response contains
"""
Would delete 0 objects from bucket [cloud-storage-test]
"""

0 comments on commit ba20cf7

Please sign in to comment.