Skip to content

Commit

Permalink
Merge pull request #379 from kris-konina-reef/master
Browse files Browse the repository at this point in the history
Refactor integration tests to use persistent buckets
  • Loading branch information
kris-konina-reef authored Sep 30, 2024
2 parents 5b5ac44 + 4193185 commit cc9300f
Show file tree
Hide file tree
Showing 11 changed files with 810 additions and 596 deletions.
1 change: 1 addition & 0 deletions changelog.d/+test_with_persistent_bucket.infrastructure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve internal testing infrastructure by updating integration tests to use persistent buckets.
57 changes: 7 additions & 50 deletions test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,31 @@
from __future__ import annotations

from test.integration.bucket_cleaner import BucketCleaner
from test.integration.helpers import (
BUCKET_CREATED_AT_MILLIS,
random_bucket_name,
)
from test.integration.persistent_bucket import PersistentBucketAggregate

import pytest

from b2sdk.v2 import B2Api, current_time_millis
from b2sdk.v2.exception import DuplicateBucketName
from b2sdk.v2 import B2Api


@pytest.mark.usefixtures("cls_setup")
class IntegrationTestBase:
b2_api: B2Api
this_run_bucket_name_prefix: str
bucket_cleaner: BucketCleaner
persistent_bucket: PersistentBucketAggregate

@pytest.fixture(autouse=True, scope="class")
def cls_setup(self, request, b2_api, b2_auth_data, bucket_name_prefix, bucket_cleaner):
def cls_setup(
self, request, b2_api, b2_auth_data, bucket_name_prefix, bucket_cleaner, persistent_bucket
):
cls = request.cls
cls.b2_auth_data = b2_auth_data
cls.this_run_bucket_name_prefix = bucket_name_prefix
cls.bucket_cleaner = bucket_cleaner
cls.b2_api = b2_api
cls.info = b2_api.account_info

@pytest.fixture(autouse=True)
def setup_method(self):
self.buckets_created = []
yield
for bucket in self.buckets_created:
self.bucket_cleaner.cleanup_bucket(bucket)

def generate_bucket_name(self):
return random_bucket_name(self.this_run_bucket_name_prefix)
cls.persistent_bucket = persistent_bucket

def write_zeros(self, file, number):
line = b'0' * 1000 + b'\n'
Expand All @@ -53,36 +43,3 @@ def write_zeros(self, file, number):
while written <= number:
file.write(line)
written += line_len

def create_bucket(self):
bucket_name = self.generate_bucket_name()
try:
bucket = self.b2_api.create_bucket(
bucket_name,
'allPublic',
bucket_info={BUCKET_CREATED_AT_MILLIS: str(current_time_millis())}
)
except DuplicateBucketName:
self._duplicated_bucket_name_debug_info(bucket_name)
raise
self.buckets_created.append(bucket)
return bucket

def _duplicated_bucket_name_debug_info(self, bucket_name: str) -> None:
# Trying to obtain as much information as possible about this bucket.
print(' DUPLICATED BUCKET DEBUG START '.center(60, '='))
bucket = self.b2_api.get_bucket_by_name(bucket_name)

print('Bucket metadata:')
bucket_dict = bucket.as_dict()
for info_key, info in bucket_dict.items():
print(f'\t{info_key}: "{info}"')

print('All files (and their versions) inside the bucket:')
ls_generator = bucket.ls(recursive=True, latest_only=False)
for file_version, _directory in ls_generator:
# as_dict() is bound to have more info than we can use,
# but maybe some of it will cast some light on the issue.
print(f'\t{file_version.file_name} ({file_version.as_dict()})')

print(' DUPLICATED BUCKET DEBUG END '.center(60, '='))
2 changes: 1 addition & 1 deletion test/integration/cleanup_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from . import get_b2_auth_data
from .bucket_cleaner import BucketCleaner
from .test_raw_api import cleanup_old_buckets
from .helpers import cleanup_old_buckets

if __name__ == '__main__':
cleanup_old_buckets()
Expand Down
41 changes: 41 additions & 0 deletions test/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,18 @@
get_bucket_name_prefix,
random_bucket_name,
)
from test.integration.persistent_bucket import (
PersistentBucketAggregate,
get_or_create_persistent_bucket,
)
from typing import Callable

import pytest

from b2sdk._internal.b2http import B2Http
from b2sdk._internal.raw_api import REALM_URLS
from b2sdk._internal.utils import current_time_millis
from b2sdk.v2.raw_api import B2RawHTTPApi


def pytest_addoption(parser):
Expand Down Expand Up @@ -100,3 +108,36 @@ def bucket(b2_api, bucket_name_prefix, bucket_cleaner):
def b2_subfolder(bucket, request):
subfolder_name = f"{request.node.name}_{secrets.token_urlsafe(4)}"
return f"b2://{bucket.name}/{subfolder_name}"


@pytest.fixture(scope="class")
def raw_api():
return B2RawHTTPApi(B2Http())


@pytest.fixture(scope="class")
def auth_info(b2_auth_data, raw_api):
application_key_id, application_key = b2_auth_data
realm = os.environ.get('B2_TEST_ENVIRONMENT', 'production')
realm_url = REALM_URLS.get(realm, realm)
return raw_api.authorize_account(realm_url, application_key_id, application_key)


# -- Persistent bucket fixtures --
@pytest.fixture(scope="session")
def persistent_bucket_factory(b2_api) -> Callable[[], PersistentBucketAggregate]:
"""
Since all consumers of the `bucket_name` fixture expect a new bucket to be created,
we need to mirror this behavior by appending a unique subfolder to the persistent bucket name.
"""

def _persistent_bucket(**bucket_create_options):
persistent_bucket = get_or_create_persistent_bucket(b2_api, **bucket_create_options)
return PersistentBucketAggregate(persistent_bucket)

yield _persistent_bucket


@pytest.fixture(scope="class")
def persistent_bucket(persistent_bucket_factory):
return persistent_bucket_factory()
93 changes: 93 additions & 0 deletions test/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@
from __future__ import annotations

import os
import re
import secrets
import sys
import time

from b2sdk._internal.b2http import B2Http
from b2sdk._internal.file_lock import NO_RETENTION_FILE_SETTING
from b2sdk._internal.raw_api import REALM_URLS, B2RawHTTPApi
from b2sdk.v2 import (
BUCKET_NAME_CHARS_UNIQ,
BUCKET_NAME_LENGTH_RANGE,
Expand Down Expand Up @@ -45,3 +51,90 @@ def authorize(b2_auth_data, api_config=DEFAULT_HTTP_API_CONFIG):
realm = os.environ.get('B2_TEST_ENVIRONMENT', 'production')
b2_api.authorize_account(realm, *b2_auth_data)
return b2_api, info


def authorize_raw_api(raw_api):
application_key_id = os.environ.get('B2_TEST_APPLICATION_KEY_ID')
if application_key_id is None:
print('B2_TEST_APPLICATION_KEY_ID is not set.', file=sys.stderr)
sys.exit(1)

application_key = os.environ.get('B2_TEST_APPLICATION_KEY')
if application_key is None:
print('B2_TEST_APPLICATION_KEY is not set.', file=sys.stderr)
sys.exit(1)

realm = os.environ.get('B2_TEST_ENVIRONMENT', 'production')
realm_url = REALM_URLS.get(realm, realm)
auth_dict = raw_api.authorize_account(realm_url, application_key_id, application_key)
return auth_dict


def cleanup_old_buckets():
raw_api = B2RawHTTPApi(B2Http())
auth_dict = authorize_raw_api(raw_api)
bucket_list_dict = raw_api.list_buckets(
auth_dict['apiUrl'], auth_dict['authorizationToken'], auth_dict['accountId']
)
_cleanup_old_buckets(raw_api, auth_dict, bucket_list_dict)


def _cleanup_old_buckets(raw_api, auth_dict, bucket_list_dict):
for bucket_dict in bucket_list_dict['buckets']:
bucket_id = bucket_dict['bucketId']
bucket_name = bucket_dict['bucketName']
if _should_delete_bucket(bucket_name):
print('cleaning up old bucket: ' + bucket_name)
_clean_and_delete_bucket(
raw_api,
auth_dict['apiUrl'],
auth_dict['authorizationToken'],
auth_dict['accountId'],
bucket_id,
)


def _clean_and_delete_bucket(raw_api, api_url, account_auth_token, account_id, bucket_id):
"""
Clean up and delete a bucket, including all its contents.
List and delete all file versions, handle retention settings,
and remove both regular and large files before deleting the bucket.
"""
versions_dict = raw_api.list_file_versions(api_url, account_auth_token, bucket_id)
for version_dict in versions_dict['files']:
file_id = version_dict['fileId']
file_name = version_dict['fileName']
action = version_dict['action']
if action in ['hide', 'upload']:
print('b2_delete_file', file_name, action)
if action == 'upload' and version_dict[
'fileRetention'] and version_dict['fileRetention']['value']['mode'] is not None:
raw_api.update_file_retention(
api_url,
account_auth_token,
file_id,
file_name,
NO_RETENTION_FILE_SETTING,
bypass_governance=True
)
raw_api.delete_file_version(api_url, account_auth_token, file_id, file_name)
else:
print('b2_cancel_large_file', file_name)
raw_api.cancel_large_file(api_url, account_auth_token, file_id)

# Delete the bucket
print('b2_delete_bucket', bucket_id)
raw_api.delete_bucket(api_url, account_auth_token, account_id, bucket_id)


def _should_delete_bucket(bucket_name):
# Bucket names for this test look like: c7b22d0b0ad7-1460060364-5670
# Other buckets should not be deleted.
match = re.match(r'^test-raw-api-[a-f0-9]+-([0-9]+)-([0-9]+)', bucket_name)
if match is None:
return False

# Is it more than an hour old?
bucket_time = int(match.group(1))
now = time.time()
return bucket_time + 3600 <= now
89 changes: 89 additions & 0 deletions test/integration/persistent_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
######################################################################
#
# File: test/integration/persistent_bucket.py
#
# Copyright 2024 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
import hashlib
import os
import uuid
from dataclasses import dataclass
from functools import cached_property
from test.integration.helpers import BUCKET_NAME_LENGTH

from b2sdk._internal.bucket import Bucket
from b2sdk.v2 import B2Api
from b2sdk.v2.exception import NonExistentBucket

PERSISTENT_BUCKET_NAME_PREFIX = "constst"


@dataclass
class PersistentBucketAggregate:
bucket: Bucket

def __post_init__(self):
self.subfolder = self.new_subfolder()

@property
def bucket_name(self) -> str:
return self.bucket.name

def new_subfolder(self) -> str:
return f"test-{uuid.uuid4().hex[:8]}"

@property
def bucket_id(self):
return self.bucket.id_

@cached_property
def b2_uri(self):
return f"b2://{self.bucket_name}/{self.subfolder}"


def hash_dict_sha256(d):
"""
Create a sha256 hash of the given dictionary.
"""
dict_repr = repr(sorted((k, repr(v)) for k, v in d.items()))
hash_obj = hashlib.sha256()
hash_obj.update(dict_repr.encode('utf-8'))
return hash_obj.hexdigest()


def get_persistent_bucket_name(b2_api: B2Api, create_options: dict) -> str:
"""
Create a hash of the `create_options` dictionary, include it in the bucket name
so that we can easily reuse buckets with the same options across (parallel) test runs.
"""
# Exclude sensitive options from the hash
unsafe_options = {"authorizationToken", "accountId", "default_server_side_encryption"}
create_options_hashable = {k: v for k, v in create_options.items() if k not in unsafe_options}
hashed_options = hash_dict_sha256(create_options_hashable)
bucket_owner = os.environ.get("GITHUB_REPOSITORY_ID", b2_api.get_account_id())
bucket_base = f"{bucket_owner}:{hashed_options}"
bucket_hash = hashlib.sha256(bucket_base.encode()).hexdigest()
return f"{PERSISTENT_BUCKET_NAME_PREFIX}-{bucket_hash}" [:BUCKET_NAME_LENGTH]


def get_or_create_persistent_bucket(b2_api: B2Api, **create_options) -> Bucket:
bucket_name = get_persistent_bucket_name(b2_api, create_options.copy())
try:
bucket = b2_api.get_bucket_by_name(bucket_name)
except NonExistentBucket:
bucket = b2_api.create_bucket(
bucket_name,
bucket_type="allPublic",
lifecycle_rules=[
{
"daysFromHidingToDeleting": 1,
"daysFromUploadingToHiding": 1,
"fileNamePrefix": "",
}
],
**create_options,
)
return bucket
Loading

0 comments on commit cc9300f

Please sign in to comment.