Skip to content

Commit

Permalink
[rev.1]
Browse files Browse the repository at this point in the history
  • Loading branch information
aalexfvk committed Feb 21, 2024
1 parent d44a9b4 commit 9ed7389
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 29 deletions.
27 changes: 5 additions & 22 deletions ch_tools/chadmin/cli/object_storage_group.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from tempfile import TemporaryFile
from typing import List, Optional
Expand All @@ -10,6 +9,7 @@

from ch_tools.chadmin.cli import get_clickhouse_config
from ch_tools.chadmin.internal.object_storage import (
ObjListItem,
cleanup_s3_object_storage,
s3_object_storage_iterator,
)
Expand All @@ -30,17 +30,6 @@
STREAM_TIMEOUT = 10 * 60


@dataclass
class ObjListItem:
path: str
size: int

@classmethod
def from_tab_separated(cls, value: str) -> "ObjListItem":
path, size = value.split("\t")
return cls(path, int(size))


@group("object-storage")
@option(
"-d",
Expand Down Expand Up @@ -229,16 +218,10 @@ def _clean_object_storage(

keys_file.seek(0) # rewind file pointer to the beginning

# Generator producing keys from temporary file with counting of statistics
def keys():
nonlocal deleted, total_size
for line in keys_file:
obj = ObjListItem.from_tab_separated(line.decode().strip())
yield obj.path
deleted += 1
total_size += obj.size

cleanup_s3_object_storage(disk_conf, keys(), dry_run)
keys = (
ObjListItem.from_tab_separated(line.decode().strip()) for line in keys_file
)
deleted, total_size = cleanup_s3_object_storage(disk_conf, keys, dry_run)

logging.info(
"%s %s objects with total size %s from bucket [%s] with prefix %s",
Expand Down
1 change: 1 addition & 0 deletions ch_tools/chadmin/internal/object_storage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from ch_tools.chadmin.internal.object_storage.obj_list_item import ObjListItem
from ch_tools.chadmin.internal.object_storage.s3_cleanup import (
cleanup_s3_object_storage,
)
Expand Down
16 changes: 16 additions & 0 deletions ch_tools/chadmin/internal/object_storage/obj_list_item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dataclasses import dataclass


@dataclass
class ObjListItem:
"""
Item of object storage listing.
"""

path: str
size: int

@classmethod
def from_tab_separated(cls, value: str) -> "ObjListItem":
path, size = value.split("\t")
return cls(path, int(size))
17 changes: 10 additions & 7 deletions ch_tools/chadmin/internal/object_storage/s3_cleanup.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from typing import Any, Iterator, List
from typing import Any, Iterator, List, Tuple

import boto3
from botocore.client import Config # type: ignore[import]
from botocore.client import Config

from ch_tools.chadmin.internal.object_storage import ObjListItem
from ch_tools.chadmin.internal.utils import chunked
from ch_tools.common.clickhouse.config.storage_configuration import S3DiskConfiguration

BULK_DELETE_CHUNK_SIZE = 1000


def cleanup_s3_object_storage(
disk: S3DiskConfiguration, keys: Iterator[str], dry_run: bool = False
) -> int:
disk: S3DiskConfiguration, keys: Iterator[ObjListItem], dry_run: bool = False
) -> Tuple[int, int]:
s3 = boto3.resource(
"s3",
endpoint_url=disk.endpoint_url,
Expand All @@ -21,15 +22,17 @@ def cleanup_s3_object_storage(
)
bucket = s3.Bucket(disk.bucket_name)
deleted = 0
total_size = 0

for chunk in chunked(keys, BULK_DELETE_CHUNK_SIZE):
if not dry_run:
_bulk_delete(bucket, chunk)
deleted += len(chunk)
total_size += sum(item.size for item in chunk)

return deleted
return deleted, total_size


def _bulk_delete(bucket: Any, keys: List[str]) -> None:
objects = [{"Key": key} for key in keys]
def _bulk_delete(bucket: Any, items: List[ObjListItem]) -> None:
objects = [{"Key": item.path} for item in items]
bucket.delete_objects(Delete={"Objects": objects, "Quiet": False})

0 comments on commit 9ed7389

Please sign in to comment.