Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor integration tests to use persistent buckets #379

Merged
merged 7 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/+test_with_persistent_bucket.infrastructure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve internal testing infrastructure by updating integration tests to use persistent buckets.
57 changes: 7 additions & 50 deletions test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,31 @@
from __future__ import annotations

from test.integration.bucket_cleaner import BucketCleaner
from test.integration.helpers import (
BUCKET_CREATED_AT_MILLIS,
random_bucket_name,
)
from test.integration.persistent_bucket import PersistentBucketAggregate

import pytest

from b2sdk.v2 import B2Api, current_time_millis
from b2sdk.v2.exception import DuplicateBucketName
from b2sdk.v2 import B2Api


@pytest.mark.usefixtures("cls_setup")
class IntegrationTestBase:
b2_api: B2Api
this_run_bucket_name_prefix: str
bucket_cleaner: BucketCleaner
persistent_bucket: PersistentBucketAggregate

@pytest.fixture(autouse=True, scope="class")
def cls_setup(self, request, b2_api, b2_auth_data, bucket_name_prefix, bucket_cleaner):
def cls_setup(
self, request, b2_api, b2_auth_data, bucket_name_prefix, bucket_cleaner, persistent_bucket
):
cls = request.cls
cls.b2_auth_data = b2_auth_data
cls.this_run_bucket_name_prefix = bucket_name_prefix
cls.bucket_cleaner = bucket_cleaner
cls.b2_api = b2_api
cls.info = b2_api.account_info

@pytest.fixture(autouse=True)
def setup_method(self):
self.buckets_created = []
yield
for bucket in self.buckets_created:
self.bucket_cleaner.cleanup_bucket(bucket)

def generate_bucket_name(self):
return random_bucket_name(self.this_run_bucket_name_prefix)
cls.persistent_bucket = persistent_bucket

def write_zeros(self, file, number):
line = b'0' * 1000 + b'\n'
Expand All @@ -53,36 +43,3 @@ def write_zeros(self, file, number):
while written <= number:
file.write(line)
written += line_len

def create_bucket(self):
bucket_name = self.generate_bucket_name()
try:
bucket = self.b2_api.create_bucket(
bucket_name,
'allPublic',
bucket_info={BUCKET_CREATED_AT_MILLIS: str(current_time_millis())}
)
except DuplicateBucketName:
self._duplicated_bucket_name_debug_info(bucket_name)
raise
self.buckets_created.append(bucket)
return bucket

def _duplicated_bucket_name_debug_info(self, bucket_name: str) -> None:
# Trying to obtain as much information as possible about this bucket.
print(' DUPLICATED BUCKET DEBUG START '.center(60, '='))
bucket = self.b2_api.get_bucket_by_name(bucket_name)

print('Bucket metadata:')
bucket_dict = bucket.as_dict()
for info_key, info in bucket_dict.items():
print(f'\t{info_key}: "{info}"')

print('All files (and their versions) inside the bucket:')
ls_generator = bucket.ls(recursive=True, latest_only=False)
for file_version, _directory in ls_generator:
# as_dict() is bound to have more info than we can use,
# but maybe some of it will cast some light on the issue.
print(f'\t{file_version.file_name} ({file_version.as_dict()})')

print(' DUPLICATED BUCKET DEBUG END '.center(60, '='))
2 changes: 1 addition & 1 deletion test/integration/cleanup_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from . import get_b2_auth_data
from .bucket_cleaner import BucketCleaner
from .test_raw_api import cleanup_old_buckets
from .helpers import cleanup_old_buckets

if __name__ == '__main__':
cleanup_old_buckets()
Expand Down
41 changes: 41 additions & 0 deletions test/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,18 @@
get_bucket_name_prefix,
random_bucket_name,
)
from test.integration.persistent_bucket import (
PersistentBucketAggregate,
get_or_create_persistent_bucket,
)
from typing import Callable

import pytest

from b2sdk._internal.b2http import B2Http
from b2sdk._internal.raw_api import REALM_URLS
from b2sdk._internal.utils import current_time_millis
from b2sdk.v2.raw_api import B2RawHTTPApi


def pytest_addoption(parser):
Expand Down Expand Up @@ -100,3 +108,36 @@ def bucket(b2_api, bucket_name_prefix, bucket_cleaner):
def b2_subfolder(bucket, request):
subfolder_name = f"{request.node.name}_{secrets.token_urlsafe(4)}"
return f"b2://{bucket.name}/{subfolder_name}"


@pytest.fixture(scope="class")
def raw_api():
return B2RawHTTPApi(B2Http())


@pytest.fixture(scope="class")
def auth_info(b2_auth_data, raw_api):
application_key_id, application_key = b2_auth_data
realm = os.environ.get('B2_TEST_ENVIRONMENT', 'production')
realm_url = REALM_URLS.get(realm, realm)
return raw_api.authorize_account(realm_url, application_key_id, application_key)


# -- Persistent bucket fixtures --
@pytest.fixture(scope="session")
def persistent_bucket_factory(b2_api) -> Callable[[], PersistentBucketAggregate]:
"""
Since all consumers of the `bucket_name` fixture expect a new bucket to be created,
we need to mirror this behavior by appending a unique subfolder to the persistent bucket name.
"""

def _persistent_bucket(**bucket_create_options):
persistent_bucket = get_or_create_persistent_bucket(b2_api, **bucket_create_options)
return PersistentBucketAggregate(persistent_bucket)

yield _persistent_bucket


@pytest.fixture(scope="class")
def persistent_bucket(persistent_bucket_factory):
return persistent_bucket_factory()
93 changes: 93 additions & 0 deletions test/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@
from __future__ import annotations

import os
import re
import secrets
import sys
import time

from b2sdk._internal.b2http import B2Http
from b2sdk._internal.file_lock import NO_RETENTION_FILE_SETTING
from b2sdk._internal.raw_api import REALM_URLS, B2RawHTTPApi
from b2sdk.v2 import (
BUCKET_NAME_CHARS_UNIQ,
BUCKET_NAME_LENGTH_RANGE,
Expand Down Expand Up @@ -45,3 +51,90 @@ def authorize(b2_auth_data, api_config=DEFAULT_HTTP_API_CONFIG):
realm = os.environ.get('B2_TEST_ENVIRONMENT', 'production')
b2_api.authorize_account(realm, *b2_auth_data)
return b2_api, info


def authorize_raw_api(raw_api):
application_key_id = os.environ.get('B2_TEST_APPLICATION_KEY_ID')
if application_key_id is None:
print('B2_TEST_APPLICATION_KEY_ID is not set.', file=sys.stderr)
sys.exit(1)

application_key = os.environ.get('B2_TEST_APPLICATION_KEY')
if application_key is None:
print('B2_TEST_APPLICATION_KEY is not set.', file=sys.stderr)
sys.exit(1)

realm = os.environ.get('B2_TEST_ENVIRONMENT', 'production')
realm_url = REALM_URLS.get(realm, realm)
auth_dict = raw_api.authorize_account(realm_url, application_key_id, application_key)
return auth_dict


def cleanup_old_buckets():
raw_api = B2RawHTTPApi(B2Http())
auth_dict = authorize_raw_api(raw_api)
bucket_list_dict = raw_api.list_buckets(
auth_dict['apiUrl'], auth_dict['authorizationToken'], auth_dict['accountId']
)
_cleanup_old_buckets(raw_api, auth_dict, bucket_list_dict)


def _cleanup_old_buckets(raw_api, auth_dict, bucket_list_dict):
for bucket_dict in bucket_list_dict['buckets']:
bucket_id = bucket_dict['bucketId']
bucket_name = bucket_dict['bucketName']
if _should_delete_bucket(bucket_name):
print('cleaning up old bucket: ' + bucket_name)
_clean_and_delete_bucket(
raw_api,
auth_dict['apiUrl'],
auth_dict['authorizationToken'],
auth_dict['accountId'],
bucket_id,
)


def _clean_and_delete_bucket(raw_api, api_url, account_auth_token, account_id, bucket_id):
"""
Clean up and delete a bucket, including all its contents.
List and delete all file versions, handle retention settings,
and remove both regular and large files before deleting the bucket.
"""
versions_dict = raw_api.list_file_versions(api_url, account_auth_token, bucket_id)
for version_dict in versions_dict['files']:
file_id = version_dict['fileId']
file_name = version_dict['fileName']
action = version_dict['action']
if action in ['hide', 'upload']:
print('b2_delete_file', file_name, action)
if action == 'upload' and version_dict[
'fileRetention'] and version_dict['fileRetention']['value']['mode'] is not None:
raw_api.update_file_retention(
api_url,
account_auth_token,
file_id,
file_name,
NO_RETENTION_FILE_SETTING,
bypass_governance=True
)
raw_api.delete_file_version(api_url, account_auth_token, file_id, file_name)
else:
print('b2_cancel_large_file', file_name)
raw_api.cancel_large_file(api_url, account_auth_token, file_id)

# Delete the bucket
print('b2_delete_bucket', bucket_id)
raw_api.delete_bucket(api_url, account_auth_token, account_id, bucket_id)


def _should_delete_bucket(bucket_name):
# Bucket names for this test look like: c7b22d0b0ad7-1460060364-5670
# Other buckets should not be deleted.
match = re.match(r'^test-raw-api-[a-f0-9]+-([0-9]+)-([0-9]+)', bucket_name)
if match is None:
return False

# Is it more than an hour old?
bucket_time = int(match.group(1))
now = time.time()
return bucket_time + 3600 <= now
89 changes: 89 additions & 0 deletions test/integration/persistent_bucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
######################################################################
#
# File: test/integration/persistent_bucket.py
#
# Copyright 2024 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
import hashlib
import os
import uuid
from dataclasses import dataclass
from functools import cached_property
from test.integration.helpers import BUCKET_NAME_LENGTH

from b2sdk._internal.bucket import Bucket
from b2sdk.v2 import B2Api
from b2sdk.v2.exception import NonExistentBucket

PERSISTENT_BUCKET_NAME_PREFIX = "constst"


@dataclass
class PersistentBucketAggregate:
bucket: Bucket

def __post_init__(self):
self.subfolder = self.new_subfolder()

@property
def bucket_name(self) -> str:
return self.bucket.name

def new_subfolder(self) -> str:
return f"test-{uuid.uuid4().hex[:8]}"

@property
def bucket_id(self):
return self.bucket.id_

@cached_property
def b2_uri(self):
return f"b2://{self.bucket_name}/{self.subfolder}"


def hash_dict_sha256(d):
"""
Create a sha256 hash of the given dictionary.
"""
dict_repr = repr(sorted((k, repr(v)) for k, v in d.items()))
hash_obj = hashlib.sha256()
hash_obj.update(dict_repr.encode('utf-8'))
return hash_obj.hexdigest()


def get_persistent_bucket_name(b2_api: B2Api, create_options: dict) -> str:
"""
Create a hash of the `create_options` dictionary, include it in the bucket name
so that we can easily reuse buckets with the same options across (parallel) test runs.
"""
# Exclude sensitive options from the hash
unsafe_options = {"authorizationToken", "accountId", "default_server_side_encryption"}
create_options_hashable = {k: v for k, v in create_options.items() if k not in unsafe_options}
hashed_options = hash_dict_sha256(create_options_hashable)
bucket_owner = os.environ.get("GITHUB_REPOSITORY_ID", b2_api.get_account_id())
bucket_base = f"{bucket_owner}:{hashed_options}"
bucket_hash = hashlib.sha256(bucket_base.encode()).hexdigest()
return f"{PERSISTENT_BUCKET_NAME_PREFIX}-{bucket_hash}" [:BUCKET_NAME_LENGTH]


def get_or_create_persistent_bucket(b2_api: B2Api, **create_options) -> Bucket:
bucket_name = get_persistent_bucket_name(b2_api, create_options.copy())
try:
bucket = b2_api.get_bucket_by_name(bucket_name)
except NonExistentBucket:
bucket = b2_api.create_bucket(
bucket_name,
bucket_type="allPublic",
lifecycle_rules=[
{
"daysFromHidingToDeleting": 1,
"daysFromUploadingToHiding": 1,
"fileNamePrefix": "",
}
],
**create_options,
)
return bucket
Loading