Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use B2URI in ls & rm #248

Merged
merged 4 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion b2/_internal/_cli/b2args.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,33 @@ def b2_file_uri(value: str) -> B2URIBase:
return b2_uri


B2_URI_ARG_TYPE = wrap_with_argument_type_error(parse_b2_uri)
def b2_uri(value: str) -> B2URI:
uri = parse_b2_uri(value)
if not isinstance(uri, B2URI):
raise ValueError(
f"B2 URI of the form b2://bucket/path/ is required, but {value} was provided"
)
return uri


B2_URI_ARG_TYPE = wrap_with_argument_type_error(b2_uri)
B2_URI_FILE_ARG_TYPE = wrap_with_argument_type_error(b2_file_uri)
Comment on lines +41 to 42

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we have this distinction of URI vs URI_FILE, this naming is a bit misleading (at least it did mislead me a bit). That's because the real distinction here is not "file" vs "non-file" (i.e. a folder, a pattern), it's actually between file id vs any URI (b2:// vs b2id://). It might be worth renaming B2_URI_FILE_ARG_TYPE to something more explicit.

Same goes for add_b2_file_argument (-> add_b2_file_id_argument). I know this these are not your names, it's just that seeing b2_uri etc. brings this to attention. :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, it is unclear if the names mean b2://, b2id:// or both. I'd leave them like that for now. I'll clean them up in the follow up PR adding b2id:// support to rm.



def add_b2_uri_argument(parser: argparse.ArgumentParser, name="B2_URI"):
"""
Add a B2 URI pointing to a bucket, optionally with a directory
or a pattern as an argument to the parser.
"""
parser.add_argument(
name,
type=B2_URI_ARG_TYPE,
help="B2 URI pointing to a bucket, directory or a pattern, "
"e.g. b2://yourBucket, b2://yourBucket/file.txt, b2://yourBucket/folder/, "
"b2://yourBucket/*.txt or b2id://fileId",
)


def add_b2_file_argument(parser: argparse.ArgumentParser, name="B2_URI"):
"""
Add a B2 URI pointing to a file as an argument to the parser.
Expand Down
16 changes: 7 additions & 9 deletions b2/_internal/_utils/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import dataclasses
import pathlib
import urllib
import urllib.parse
from pathlib import Path

from b2sdk.v2 import (
Expand Down Expand Up @@ -80,18 +80,18 @@ def __str__(self) -> str:


def parse_uri(uri: str) -> Path | B2URI | B2FileIdURI:
parsed = urllib.parse.urlparse(uri)
parsed = urllib.parse.urlsplit(uri)
if parsed.scheme == "":
return pathlib.Path(uri)
return _parse_b2_uri(uri, parsed)


def parse_b2_uri(uri: str) -> B2URI | B2FileIdURI:
parsed = urllib.parse.urlparse(uri)
parsed = urllib.parse.urlsplit(uri)
return _parse_b2_uri(uri, parsed)


def _parse_b2_uri(uri, parsed: urllib.parse.ParseResult) -> B2URI | B2FileIdURI:
def _parse_b2_uri(uri, parsed: urllib.parse.SplitResult) -> B2URI | B2FileIdURI:
if parsed.scheme in ("b2", "b2id"):
if not parsed.netloc:
raise ValueError(f"Invalid B2 URI: {uri!r}")
Expand All @@ -101,12 +101,10 @@ def _parse_b2_uri(uri, parsed: urllib.parse.ParseResult) -> B2URI | B2FileIdURI:
)

if parsed.scheme == "b2":
return B2URI(bucket_name=parsed.netloc, path=parsed.path)
path = urllib.parse.urlunsplit(parsed._replace(scheme="", netloc=""))
return B2URI(bucket_name=parsed.netloc, path=path)
elif parsed.scheme == "b2id":
file_id = parsed.netloc
if not file_id:
raise ValueError(f"File id was not provided in B2 URI: {uri!r}")
return B2FileIdURI(file_id=file_id)
return B2FileIdURI(file_id=parsed.netloc)
else:
raise ValueError(f"Unsupported URI scheme: {parsed.scheme!r}")

Expand Down
41 changes: 41 additions & 0 deletions b2/_internal/b2v3/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,47 @@

# ruff: noqa: F405
from b2._internal._b2v4.registry import * # noqa
from .rm import Rm


class Ls(B2URIBucketNFolderNameArgMixin, BaseLs):
"""
{BASELS}

Examples

.. note::

Note the use of quotes, to ensure that special
characters are not expanded by the shell.


List csv and tsv files (in any directory, in the whole bucket):

.. code-block::

{NAME} ls --recursive --withWildcard bucketName "*.[ct]sv"


List all info.txt files from buckets bX, where X is any character:

.. code-block::

{NAME} ls --recursive --withWildcard bucketName "b?/info.txt"


List all pdf files from buckets b0 to b9 (including sub-directories):

.. code-block::

{NAME} ls --recursive --withWildcard bucketName "b[0-9]/*.pdf"


Requires capability:

- **listFiles**
"""


B2.register_subcommand(AuthorizeAccount)
B2.register_subcommand(CancelAllUnfinishedLargeFiles)
Expand Down
59 changes: 59 additions & 0 deletions b2/_internal/b2v3/rm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
######################################################################
#
# File: b2/_internal/b2v3/rm.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
from __future__ import annotations

from b2._internal._b2v4.registry import B2URIBucketNFolderNameArgMixin, BaseRm


# NOTE: We need to keep v3 Rm in separate file, because we need to import it in
# unit tests without registering any commands.
class Rm(B2URIBucketNFolderNameArgMixin, BaseRm):
"""
{BASERM}

Examples.

.. note::

Note the use of quotes, to ensure that special
characters are not expanded by the shell.


.. note::

Use with caution. Running examples presented below can cause data-loss.


Remove all csv and tsv files (in any directory, in the whole bucket):

.. code-block::

{NAME} rm --recursive --withWildcard bucketName "*.[ct]sv"


Remove all info.txt files from buckets bX, where X is any character:

.. code-block::

{NAME} rm --recursive --withWildcard bucketName "b?/info.txt"


Remove all pdf files from buckets b0 to b9 (including sub-directories):

.. code-block::

{NAME} rm --recursive --withWildcard bucketName "b[0-9]/*.pdf"


Requires capability:

- **listFiles**
- **deleteFiles**
"""
Loading
Loading