Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix ls --json being corrupted by Using https://... msg #218

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
* Emit `Using https://api.backblazeb2.com` message to stderr instead of stdout, therefor prevent JSON output corruption

### Changed
* Stream `ls --json` JSON output instead of dumping it only after all objects have been fetched

## [3.12.0] - 2023-10-28

### Added
Expand Down
63 changes: 35 additions & 28 deletions b2/console_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,23 +685,28 @@ def _parse_file_infos(cls, args_info):
file_infos[parts[0]] = parts[1]
return file_infos

def _print_json(self, data):
self._print(
json.dumps(data, indent=4, sort_keys=True, cls=B2CliJsonEncoder), enforce_output=True
)
def _print_json(self, data, *, raw: bool = False):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes no sense to name it _print_json and not to do json dump. I'd say, _print_json should not accept raw data at all. Instead of _print_json(..., raw=True), use _print('[\n', print_endline=False). Actually, since _print(...) adds \n in the end by default, you don't need the print_endline at all, just call _print('['), and it will add \n automatically.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the reason was that I didn't want to expose enforce_output to anything else that method printing actual data - that would invite someone to break intended --quiet behavior.

I guess I will document it in docstring instead.

data = data if raw else json.dumps(data, indent=4, sort_keys=True, cls=B2CliJsonEncoder)
self._print(data, enforce_output=True, raw=raw)

def _print(self, *args, enforce_output=False):
self._print_standard_descriptor(self.stdout, 'stdout', *args, enforce_output=enforce_output)
def _print(self, *args, enforce_output=False, raw: bool = False):
self._print_standard_descriptor(
self.stdout, 'stdout', *args, enforce_output=enforce_output, raw=raw
)

def _print_stderr(self, *args, **kwargs):
self._print_standard_descriptor(self.stderr, 'stderr', *args, enforce_output=True)

def _print_standard_descriptor(self, descriptor, descriptor_name, *args, enforce_output=False):
def _print_standard_descriptor(
self, descriptor, descriptor_name, *args, enforce_output=False, raw: bool = False
):
if not self.quiet or enforce_output:
self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args)
self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args, raw=raw)

@classmethod
def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args):
def _print_helper(
cls, descriptor, descriptor_encoding, descriptor_name, *args, raw: bool = False

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why raw instead of print_endline or so?

):
try:
descriptor.write(' '.join(args))
except UnicodeEncodeError:
Expand All @@ -714,7 +719,8 @@ def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args):
args = [arg.encode('ascii', 'backslashreplace').decode() for arg in args]
sys.stderr.write("Trying to print: %s\n" % args)
descriptor.write(' '.join(args))
descriptor.write('\n')
if not raw:
descriptor.write('\n')

def __str__(self):
return f'{self.__class__.__module__}.{self.__class__.__name__}'
Expand Down Expand Up @@ -861,7 +867,7 @@ def authorize(self, application_key_id, application_key, realm):
:return: exit status
"""
url = REALM_URLS.get(realm, realm)
self._print('Using %s' % url)
self._print_stderr(f"Using {url}")
try:
self.api.authorize_account(realm, application_key_id, application_key)

Expand Down Expand Up @@ -1889,15 +1895,12 @@ def _setup_parser(cls, parser):
parser.add_argument('folderName', nargs='?').completer = file_name_completer
super()._setup_parser(parser)

def run(self, args):
super().run(args)
def _print_files(self, args):
Comment on lines -1893 to +1919

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's how it should've been from the beginning.

generator = self._get_ls_generator(args)

for file_version, folder_name in generator:
self._print_file_version(args, file_version, folder_name)

return 0

def _print_file_version(
self,
args,
Expand Down Expand Up @@ -1982,6 +1985,7 @@ class Ls(AbstractLsCommand):
# order is file_id, action, date, time, size(, replication), name
LS_ENTRY_TEMPLATE = '%83s %6s %10s %8s %9d %s'
LS_ENTRY_TEMPLATE_REPLICATION = LS_ENTRY_TEMPLATE + ' %s'
_first_row = True

@classmethod
def _setup_parser(cls, parser):
Expand All @@ -1991,30 +1995,33 @@ def _setup_parser(cls, parser):
super()._setup_parser(parser)

def run(self, args):
super().run(args)
self._print_files(args)
if args.json:
# TODO: Make this work for an infinite generation.
# Use `_print_file_version` to print a single `file_version` and manage the external JSON list
# e.g. manually. However, to do it right, some sort of state needs to be kept e.g. info whether
# at least one element was written to the stream, so we can add a `,` on the start of another.
# That would sadly lead to an ugly formatting, so `_print` needs to be expanded with an ability
# to not print end-line character(s).
self._print_json([file_version for file_version, _ in self._get_ls_generator(args)])
return 0

return super().run(args)
if self._first_row: # no files were printed
self._print_json('[', raw=True)
self._print_json(']\n', raw=True)
return 0

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get it. How is it supposed to work?
Why not something stupid like

if args.json:
    self._print('[')
    for i, (file_version, _) in enumerate(self._get_ls_generator(args)):
         if i:
            self._print(', ', print_endline=False)
         self._print_json(file_version)
    self._print(']')
else:
     self._print_files(args)


def _print_file_version(
self,
args,
file_version: FileVersion,
folder_name: Optional[str],
) -> None:
if not args.long:
if args.json:
if self._first_row:
self._print_json('[\n', raw=True)
else:
self._print_json(',', raw=True)
self._print_json(file_version)
elif not args.long:
super()._print_file_version(args, file_version, folder_name)
elif folder_name is not None:
self._print(self.format_folder_ls_entry(folder_name, args.replication))
else:
self._print(self.format_ls_entry(file_version, args.replication))
self._first_row = False

def format_folder_ls_entry(self, name, replication: bool):
if replication:
Expand Down Expand Up @@ -2216,8 +2223,8 @@ def _setup_parser(cls, parser):

def run(self, args):
if args.dryRun:
return super().run(args)
super().run(args)
self._print_files(args)
return 0
Comment on lines +2246 to +2247

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not calling super().run(args)? It won't set args.quiet now, and thus you won't be able to mute self.PROGRESS_REPORT_CLASS later as far as I see.

failed_on_any_file = False
messages_queue = queue.Queue()

Expand Down
1 change: 1 addition & 0 deletions test/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def should_equal(expected, actual):
class CommandLine:

EXPECTED_STDERR_PATTERNS = [
re.compile(r'^Using https?://[\w.]+.com$'), # account auth

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+\.com

re.compile(r'.*B/s]$', re.DOTALL), # progress bar
re.compile(r'^\r?$'), # empty line
re.compile(
Expand Down
67 changes: 32 additions & 35 deletions test/unit/test_console_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def _run_command_ignore_output(self, argv):
print('ACTUAL STDERR: ', repr(actual_stderr))
print(actual_stderr)

self.assertEqual('', actual_stderr, 'stderr')
assert re.match(
r'^(|Using https?://[\w.]+.com\n)$', actual_stderr

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer (xxx)? over (|xxx) :)
Also, +\.com.
Also, do you need \n if you use $ in the end?

Copy link
Author

@mjurbanski-reef mjurbanski-reef Nov 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(|xxx) catches empty line as well as xxx
as for \n - it indeed is not needed

), f"stderr: {actual_stderr!r}"
self.assertEqual(0, actual_status, 'exit status code')

def _trim_leading_spaces(self, s):
Expand Down Expand Up @@ -253,11 +255,9 @@ def _upload_multiple_files(cls, bucket):

class TestConsoleTool(BaseConsoleToolTest):
def test_authorize_with_bad_key(self):
expected_stdout = '''
Using http://production.example.com
'''

expected_stdout = ''
expected_stderr = '''
Using http://production.example.com
ERROR: unable to authorize account: Invalid authorization token. Server said: secret key is wrong (unauthorized)
'''

Expand All @@ -271,12 +271,12 @@ def test_authorize_with_good_key_using_hyphen(self):
assert self.account_info.get_account_auth_token() is None

# Authorize an account with a good api key.
expected_stdout = """
expected_stderr = """
Using http://production.example.com
"""

self._run_command(
['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0
['authorize-account', self.account_id, self.master_key], '', expected_stderr, 0
)

# Auth token should be in account info now
Expand All @@ -287,12 +287,9 @@ def test_authorize_with_good_key_using_underscore(self):
assert self.account_info.get_account_auth_token() is None

# Authorize an account with a good api key.
expected_stdout = """
Using http://production.example.com
"""

self._run_command(
['authorize_account', self.account_id, self.master_key], expected_stdout, '', 0
['authorize_account', self.account_id, self.master_key], '',
"Using http://production.example.com\n", 0

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd make it consistent with other tests, i.e. add

expected_stderr = """
    ...
"""

 '', expected_sterr, 0

)

# Auth token should be in account info now
Expand All @@ -303,7 +300,7 @@ def test_authorize_using_env_variables(self):
assert self.account_info.get_account_auth_token() is None

# Authorize an account with a good api key.
expected_stdout = """
expected_stderr = """
Using http://production.example.com
"""

Expand All @@ -317,7 +314,7 @@ def test_authorize_using_env_variables(self):
assert B2_APPLICATION_KEY_ID_ENV_VAR in os.environ
assert B2_APPLICATION_KEY_ENV_VAR in os.environ

self._run_command(['authorize-account'], expected_stdout, '', 0)
self._run_command(['authorize-account'], '', expected_stderr, 0)

# Auth token should be in account info now
assert self.account_info.get_account_auth_token() is not None
Expand All @@ -327,7 +324,7 @@ def test_authorize_towards_custom_realm(self):
assert self.account_info.get_account_auth_token() is None

# Authorize an account with a good api key.
expected_stdout = """
expected_stderr = """
Using http://custom.example.com
"""

Expand All @@ -336,23 +333,21 @@ def test_authorize_towards_custom_realm(self):
[
'authorize-account', '--environment', 'http://custom.example.com', self.account_id,
self.master_key
], expected_stdout, '', 0
], '', expected_stderr, 0
)

# Auth token should be in account info now
assert self.account_info.get_account_auth_token() is not None

expected_stdout = """
Using http://custom2.example.com
"""
# realm provided with env var
with mock.patch.dict(
'os.environ', {
B2_ENVIRONMENT_ENV_VAR: 'http://custom2.example.com',
}
):
self._run_command(
['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0
['authorize-account', self.account_id, self.master_key], '',
'Using http://custom2.example.com\n', 0
)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, make it consistent plz


def test_create_key_and_authorize_with_it(self):
Expand All @@ -370,8 +365,8 @@ def test_create_key_and_authorize_with_it(self):
# Authorize with the key
self._run_command(
['authorize-account', 'appKeyId0', 'appKey0'],
'Using http://production.example.com\n',
'',
'Using http://production.example.com\n',
0,
)

Expand All @@ -394,9 +389,8 @@ def test_create_key_with_authorization_from_env_vars(self):
# The first time we're running on this cache there will be output from the implicit "authorize-account" call
self._run_command(
['create-key', 'key1', 'listBuckets,listKeys'],
'Using http://production.example.com\n'
'appKeyId0 appKey0\n',
'',
'Using http://production.example.com\n',
0,
)

Expand All @@ -417,9 +411,8 @@ def test_create_key_with_authorization_from_env_vars(self):
# "authorize-account" is called when the key changes
self._run_command(
['create-key', 'key1', 'listBuckets,listKeys'],
'Using http://production.example.com\n'
'appKeyId2 appKey2\n',
'',
'Using http://production.example.com\n',
0,
)

Expand All @@ -431,9 +424,8 @@ def test_create_key_with_authorization_from_env_vars(self):
):
self._run_command(
['create-key', 'key1', 'listBuckets,listKeys'],
'Using http://custom.example.com\n'
'appKeyId3 appKey3\n',
'',
'Using http://custom.example.com\n',
0,
)

Expand All @@ -446,7 +438,8 @@ def test_authorize_key_without_list_buckets(self):
# Authorize with the key
self._run_command(
['authorize-account', 'appKeyId0', 'appKey0'],
'Using http://production.example.com\n',
'',
'Using http://production.example.com\n'
'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n',
1,
)
Expand Down Expand Up @@ -519,8 +512,8 @@ def test_create_bucket_key_and_authorize_with_it(self):
# Authorize with the key
self._run_command(
['authorize-account', 'appKeyId0', 'appKey0'],
'Using http://production.example.com\n',
'',
'Using http://production.example.com\n',
0,
)

Expand Down Expand Up @@ -772,8 +765,8 @@ def test_keys(self):

# authorize and make calls using application key with no restrictions
self._run_command(
['authorize-account', 'appKeyId0', 'appKey0'], 'Using http://production.example.com\n',
'', 0
['authorize-account', 'appKeyId0', 'appKey0'], '',
'Using http://production.example.com\n', 0
)
self._run_command(
['list-buckets'],
Expand All @@ -798,8 +791,8 @@ def test_keys(self):

# authorize and make calls using an application key with bucket restrictions
self._run_command(
['authorize-account', 'appKeyId1', 'appKey1'], 'Using http://production.example.com\n',
'', 0
['authorize-account', 'appKeyId1', 'appKey1'], '',
'Using http://production.example.com\n', 0
)

self._run_command(
Expand Down Expand Up @@ -2351,7 +2344,8 @@ def test_list_buckets_not_allowed_for_app_key(self):
# Authorize with the key, which should result in an error.
self._run_command(
['authorize-account', 'appKeyId0', 'appKey0'],
'Using http://production.example.com\n',
'',
'Using http://production.example.com\n'
'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n',
1,
)
Expand Down Expand Up @@ -2379,7 +2373,8 @@ def test_bucket_missing_for_bucket_key(self):
# to be able to look up the name of the bucket.
self._run_command(
['authorize-account', 'appKeyId0', 'appKey0'],
'Using http://production.example.com\n',
'',
"Using http://production.example.com\n"
"ERROR: unable to authorize account: Application key is restricted to a bucket that doesn't exist\n",
1,
)
Expand Down Expand Up @@ -2739,6 +2734,8 @@ def test_rm_skipping_over_errors(self):
'''
self._run_command(['ls', '--recursive', 'my-bucket'], expected_stdout)


class TestVersionConsoleTool(BaseConsoleToolTest):
def test_version(self):
self._run_command(['version', '--short'], expected_stdout=f'{VERSION}\n')
self._run_command(['version'], expected_stdout=f'b2 command line tool, version {VERSION}\n')