Skip to content

Commit

Permalink
Merge pull request Backblaze#959 from reef-technologies/b2_uri_cmds
Browse files Browse the repository at this point in the history
B2 URI commands
  • Loading branch information
mjurbanski-reef authored Nov 23, 2023
2 parents 93ce517 + 853ba16 commit ae9138d
Show file tree
Hide file tree
Showing 26 changed files with 843 additions and 263 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ The `changelog.d` file name convention is:

These files can either be created manually, or using `towncrier` e.g.

towncrier create -c 'write your description here' 157.fixed.md
towncrier create -c 'Add proper changelog example to CONTRIBUTING guide' 157.added.md

`towncrier create` also takes care of duplicates automatically (if there is more than 1 news fragment of one type
for a given github issue).
Expand Down
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,11 @@ b2 create-key [-h] [--bucket BUCKET] [--namePrefix NAMEPREFIX] [--duration DURAT
b2 delete-bucket [-h] bucketName
b2 delete-file-version [-h] [--bypassGovernance] [fileName] fileId
b2 delete-key [-h] applicationKeyId
b2 download-file-by-id [-h] [--threads THREADS] [--noProgress] [--sourceServerSideEncryption {SSE-C}] [--sourceServerSideEncryptionAlgorithm {AES256}] [--write-buffer-size BYTES] [--skip-hash-verification] [--max-download-streams-per-file MAX_DOWNLOAD_STREAMS_PER_FILE] fileId localFileName
b2 download-file-by-name [-h] [--threads THREADS] [--noProgress] [--sourceServerSideEncryption {SSE-C}] [--sourceServerSideEncryptionAlgorithm {AES256}] [--write-buffer-size BYTES] [--skip-hash-verification] [--max-download-streams-per-file MAX_DOWNLOAD_STREAMS_PER_FILE] bucketName b2FileName localFileName
b2 cat [-h] [--noProgress] [--sourceServerSideEncryption {SSE-C}] [--sourceServerSideEncryptionAlgorithm {AES256}] [--write-buffer-size BYTES] [--skip-hash-verification] b2uri
b2 download-file [-h] [--threads THREADS] [--max-download-streams-per-file MAX_DOWNLOAD_STREAMS_PER_FILE] [--noProgress] [--sourceServerSideEncryption {SSE-C}] [--sourceServerSideEncryptionAlgorithm {AES256}] [--write-buffer-size BYTES] [--skip-hash-verification] B2_URI localFileName
b2 cat [-h] [--noProgress] [--sourceServerSideEncryption {SSE-C}] [--sourceServerSideEncryptionAlgorithm {AES256}] [--write-buffer-size BYTES] [--skip-hash-verification] B2_URI
b2 get-account-info [-h]
b2 get-bucket [-h] [--showSize] bucketName
b2 get-file-info [-h] fileId
b2 file-info [-h] B2_URI
b2 get-download-auth [-h] [--prefix PREFIX] [--duration DURATION] bucketName
b2 get-download-url-with-auth [-h] [--duration DURATION] bucketName fileName
b2 hide-file [-h] bucketName fileName
Expand All @@ -89,8 +88,7 @@ b2 list-parts [-h] largeFileId
b2 list-unfinished-large-files [-h] bucketName
b2 ls [-h] [--long] [--json] [--replication] [--versions] [-r] [--withWildcard] bucketName [folderName]
b2 rm [-h] [--dryRun] [--queueSize QUEUESIZE] [--noProgress] [--failFast] [--threads THREADS] [--versions] [-r] [--withWildcard] bucketName [folderName]
b2 make-url [-h] fileId
b2 make-friendly-url [-h] bucketName fileName
b2 get-url [-h] B2_URI
b2 sync [-h] [--noProgress] [--dryRun] [--allowEmptySource] [--excludeAllSymlinks] [--syncThreads SYNCTHREADS] [--downloadThreads DOWNLOADTHREADS] [--uploadThreads UPLOADTHREADS] [--compareVersions {none,modTime,size}] [--compareThreshold MILLIS] [--excludeRegex REGEX] [--includeRegex REGEX] [--excludeDirRegex REGEX] [--excludeIfModifiedAfter TIMESTAMP] [--threads THREADS] [--destinationServerSideEncryption {SSE-B2,SSE-C}] [--destinationServerSideEncryptionAlgorithm {AES256}] [--sourceServerSideEncryption {SSE-C}] [--sourceServerSideEncryptionAlgorithm {AES256}] [--write-buffer-size BYTES] [--skip-hash-verification] [--max-download-streams-per-file MAX_DOWNLOAD_STREAMS_PER_FILE] [--incrementalMode] [--skipNewer | --replaceNewer] [--delete | --keepDays DAYS] source destination
b2 update-bucket [-h] [--bucketInfo BUCKETINFO] [--corsRules CORSRULES] [--defaultRetentionMode {compliance,governance,none}] [--defaultRetentionPeriod period] [--replication REPLICATION] [--fileLockEnabled] [--defaultServerSideEncryption {SSE-B2,none}] [--defaultServerSideEncryptionAlgorithm {AES256}] [--lifecycleRule LIFECYCLERULES | --lifecycleRules LIFECYCLERULES] bucketName [{allPublic,allPrivate}]
b2 upload-file [-h] [--contentType CONTENTTYPE] [--sha1 SHA1] [--cache-control CACHE_CONTROL] [--info INFO] [--custom-upload-timestamp CUSTOM_UPLOAD_TIMESTAMP] [--minPartSize MINPARTSIZE] [--threads THREADS] [--noProgress] [--destinationServerSideEncryption {SSE-B2,SSE-C}] [--destinationServerSideEncryptionAlgorithm {AES256}] [--legalHold {on,off}] [--fileRetentionMode {compliance,governance}] [--retainUntil TIMESTAMP] [--incrementalMode] bucketName localFilePath b2FileName
Expand Down
36 changes: 36 additions & 0 deletions b2/_cli/argcompleters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from b2sdk.v2.api import B2Api

from b2._cli.b2api import _get_b2api_for_profile
from b2._utils.python_compat import removeprefix
from b2._utils.uri import parse_b2_uri


def _with_api(func):
Expand Down Expand Up @@ -50,3 +52,37 @@ def file_name_completer(api: B2Api, parsed_args, **kwargs):
folder_name or file_version.file_name
for file_version, folder_name in islice(file_versions, LIST_FILE_NAMES_MAX_LIMIT)
]


@_with_api
def b2uri_file_completer(api: B2Api, prefix: str, **kwargs):
"""
Complete B2 URI pointing to a file-like object in a bucket.
"""
if prefix.startswith('b2://'):
prefix_without_scheme = removeprefix(prefix, 'b2://')
if '/' not in prefix_without_scheme:
return [f"b2://{bucket.name}/" for bucket in api.list_buckets(use_cache=True)]

b2_uri = parse_b2_uri(prefix)
bucket = api.get_bucket_by_name(b2_uri.bucket_name)
file_versions = bucket.ls(
f"{b2_uri.path}*",
latest_only=True,
recursive=True,
fetch_count=LIST_FILE_NAMES_MAX_LIMIT,
with_wildcard=True,
)
return [
f"b2://{bucket.name}/{file_version.file_name}"
for file_version, folder_name in islice(file_versions, LIST_FILE_NAMES_MAX_LIMIT)
if file_version
]
elif prefix.startswith('b2id://'):
# listing all files from all buckets is unreasonably expensive
return ["b2id://"]
else:
return [
"b2://",
"b2id://",
]
44 changes: 44 additions & 0 deletions b2/_cli/b2args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
######################################################################
#
# File: b2/_cli/b2args.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
"""
Utility functions for adding b2-specific arguments to an argparse parser.
"""
import argparse

from b2._cli.argcompleters import b2uri_file_completer
from b2._utils.uri import B2URI, B2URIBase, parse_b2_uri
from b2.arg_parser import wrap_with_argument_type_error


def b2_file_uri(value: str) -> B2URIBase:
b2_uri = parse_b2_uri(value)
if isinstance(b2_uri, B2URI):
if b2_uri.is_dir():
raise ValueError(
f"B2 URI pointing to a file-like object is required, but {value} was provided"
)
return b2_uri

return b2_uri


B2_URI_ARG_TYPE = wrap_with_argument_type_error(parse_b2_uri)
B2_URI_FILE_ARG_TYPE = wrap_with_argument_type_error(b2_file_uri)


def add_b2_file_argument(parser: argparse.ArgumentParser, name="B2_URI"):
"""
Add a B2 URI pointing to a file as an argument to the parser.
"""
parser.add_argument(
name,
type=B2_URI_FILE_ARG_TYPE,
help="B2 URI pointing to a file, e.g. b2://yourBucket/file.txt or b2id://fileId",
).completer = b2uri_file_completer
49 changes: 49 additions & 0 deletions b2/_utils/python_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
######################################################################
#
# File: b2/_utils/python_compat.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
"""
Utilities for compatibility with older Python versions.
"""
import functools
import sys

if sys.version_info < (3, 9):

def removeprefix(s: str, prefix: str) -> str:
return s[len(prefix):] if s.startswith(prefix) else s

else:
removeprefix = str.removeprefix

if sys.version_info < (3, 8):

class singledispatchmethod:
"""
singledispatchmethod backport for Python 3.7.
There are no guarantees for its completeness.
"""

def __init__(self, method):
self.dispatcher = functools.singledispatch(method)
self.method = method

def register(self, cls, method=None):
return self.dispatcher.register(cls, func=method)

def __get__(self, obj, cls):
@functools.wraps(self.method)
def method_wrapper(arg, *args, **kwargs):
method_desc = self.dispatcher.dispatch(arg.__class__)
return method_desc.__get__(obj, cls)(arg, *args, **kwargs)

method_wrapper.register = self.register
return method_wrapper
else:
singledispatchmethod = functools.singledispatchmethod
111 changes: 104 additions & 7 deletions b2/_utils/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,65 @@
import urllib
from pathlib import Path

from b2sdk.v2 import (
B2Api,
DownloadVersion,
FileVersion,
)

from b2._utils.python_compat import removeprefix, singledispatchmethod


class B2URIBase:
pass


@dataclasses.dataclass
@dataclasses.dataclass(frozen=True)
class B2URI(B2URIBase):
bucket: str
path: str
"""
B2 URI designating a particular object by name & bucket or "subdirectory" in a bucket.
Please note, both files and directories are symbolical concept, not a real one in B2, i.e.
there is no such thing as "directory" in B2, but it is possible to mimic it by using object names with non-trailing
slashes.
To make it possible, it is highly discouraged to use trailing slashes in object names.
"""

bucket_name: str
path: str = ""

def __post_init__(self):
path = removeprefix(self.path, "/")
self.__dict__["path"] = path # hack for a custom init in frozen dataclass

def __str__(self) -> str:
return f"b2://{self.bucket}{self.path}"
return f"b2://{self.bucket_name}/{self.path}"

def is_dir(self) -> bool | None:
"""
Return if the path is a directory.
Please note this is symbolical.
It is possible for file to have a trailing slash, but it is HIGHLY discouraged, and not supported by B2 CLI.
At the same time it is possible for a directory to not have a trailing slash,
which is discouraged, but allowed by B2 CLI.
This is done to mimic unix-like Path.
@dataclasses.dataclass
In practice, this means that `.is_dir() == True` will always be interpreted as "this is a directory",
but reverse is not necessary true, and `not uri.is_dir()` should be merely interpreted as
"this is a directory or a file".
:return: True if the path is a directory, None if it is unknown
"""
return not self.path or self.path.endswith("/") or None


@dataclasses.dataclass(frozen=True)
class B2FileIdURI(B2URIBase):
"""
B2 URI designating a particular file by its id.
"""

file_id: str

def __str__(self) -> str:
Expand Down Expand Up @@ -58,8 +101,62 @@ def _parse_b2_uri(uri, parsed: urllib.parse.ParseResult) -> B2URI | B2FileIdURI:
)

if parsed.scheme == "b2":
return B2URI(bucket=parsed.netloc, path=parsed.path[1:])
return B2URI(bucket_name=parsed.netloc, path=parsed.path)
elif parsed.scheme == "b2id":
return B2FileIdURI(file_id=parsed.netloc)
file_id = parsed.netloc
if not file_id:
raise ValueError(f"File id was not provided in B2 URI: {uri!r}")
return B2FileIdURI(file_id=file_id)
else:
raise ValueError(f"Unsupported URI scheme: {parsed.scheme!r}")


class B2URIAdapter:
"""
Adapter for using B2URI with B2Api.
When this matures enough methods from here should be moved to b2sdk B2Api class.
"""

def __init__(self, api: B2Api):
self.api = api

def __getattr__(self, name):
return getattr(self.api, name)

@singledispatchmethod
def download_file_by_uri(self, uri, *args, **kwargs):
raise NotImplementedError(f"Unsupported URI type: {type(uri)}")

@download_file_by_uri.register
def _(self, uri: B2URI, *args, **kwargs):
bucket = self.get_bucket_by_name(uri.bucket_name)
return bucket.download_file_by_name(uri.path, *args, **kwargs)

@download_file_by_uri.register
def _(self, uri: B2FileIdURI, *args, **kwargs):
return self.download_file_by_id(uri.file_id, *args, **kwargs)

@singledispatchmethod
def get_file_info_by_uri(self, uri, *args, **kwargs):
raise NotImplementedError(f"Unsupported URI type: {type(uri)}")

@get_file_info_by_uri.register
def _(self, uri: B2URI, *args, **kwargs) -> DownloadVersion:
return self.get_file_info_by_name(uri.bucket_name, uri.path, *args, **kwargs)

@get_file_info_by_uri.register
def _(self, uri: B2FileIdURI, *args, **kwargs) -> FileVersion:
return self.get_file_info(uri.file_id, *args, **kwargs)

@singledispatchmethod
def get_download_url_by_uri(self, uri, *args, **kwargs):
raise NotImplementedError(f"Unsupported URI type: {type(uri)}")

@get_download_url_by_uri.register
def _(self, uri: B2URI, *args, **kwargs) -> str:
return self.get_download_url_for_file_name(uri.bucket_name, uri.path, *args, **kwargs)

@get_download_url_by_uri.register
def _(self, uri: B2FileIdURI, *args, **kwargs) -> str:
return self.get_download_url_for_fileid(uri.file_id, *args, **kwargs)
Loading

0 comments on commit ae9138d

Please sign in to comment.