fix ls --json being corrupted by Using https://... msg

reef-technologies · Nov 9, 2023 · 0a78850 · 0a78850
1 parent e46ece6
commit 0a78850
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 63 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+* Emit `Using https://api.backblazeb2.com` message to stderr instead of stdout, therefor prevent JSON output corruption
+
+### Changed
+* Stream `ls --json` JSON output instead of dumping it only after all objects have been fetched
+
 ## [3.12.0] - 2023-10-28
 
 ### Added

diff --git a/b2/console_tool.py b/b2/console_tool.py
@@ -685,23 +685,28 @@ def _parse_file_infos(cls, args_info):
             file_infos[parts[0]] = parts[1]
         return file_infos
 
-    def _print_json(self, data):
-        self._print(
-            json.dumps(data, indent=4, sort_keys=True, cls=B2CliJsonEncoder), enforce_output=True
-        )
+    def _print_json(self, data, *, raw: bool = False):
+        data = data if raw else json.dumps(data, indent=4, sort_keys=True, cls=B2CliJsonEncoder)
+        self._print(data, enforce_output=True, raw=raw)
 
-    def _print(self, *args, enforce_output=False):
-        self._print_standard_descriptor(self.stdout, 'stdout', *args, enforce_output=enforce_output)
+    def _print(self, *args, enforce_output=False, raw: bool = False):
+        self._print_standard_descriptor(
+            self.stdout, 'stdout', *args, enforce_output=enforce_output, raw=raw
+        )
 
     def _print_stderr(self, *args, **kwargs):
         self._print_standard_descriptor(self.stderr, 'stderr', *args, enforce_output=True)
 
-    def _print_standard_descriptor(self, descriptor, descriptor_name, *args, enforce_output=False):
+    def _print_standard_descriptor(
+        self, descriptor, descriptor_name, *args, enforce_output=False, raw: bool = False
+    ):
         if not self.quiet or enforce_output:
-            self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args)
+            self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args, raw=raw)
 
     @classmethod
-    def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args):
+    def _print_helper(
+        cls, descriptor, descriptor_encoding, descriptor_name, *args, raw: bool = False
+    ):
         try:
             descriptor.write(' '.join(args))
         except UnicodeEncodeError:
@@ -714,7 +719,8 @@ def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args):
             args = [arg.encode('ascii', 'backslashreplace').decode() for arg in args]
             sys.stderr.write("Trying to print: %s\n" % args)
             descriptor.write(' '.join(args))
-        descriptor.write('\n')
+        if not raw:
+            descriptor.write('\n')
 
     def __str__(self):
         return f'{self.__class__.__module__}.{self.__class__.__name__}'
@@ -861,7 +867,7 @@ def authorize(self, application_key_id, application_key, realm):
         :return: exit status
         """
         url = REALM_URLS.get(realm, realm)
-        self._print('Using %s' % url)
+        self._print_stderr(f"Using {url}")
         try:
             self.api.authorize_account(realm, application_key_id, application_key)
 
@@ -1889,15 +1895,12 @@ def _setup_parser(cls, parser):
         parser.add_argument('folderName', nargs='?').completer = file_name_completer
         super()._setup_parser(parser)
 
-    def run(self, args):
-        super().run(args)
+    def _print_files(self, args):
         generator = self._get_ls_generator(args)
 
         for file_version, folder_name in generator:
             self._print_file_version(args, file_version, folder_name)
 
-        return 0
-
     def _print_file_version(
         self,
         args,
@@ -1982,6 +1985,7 @@ class Ls(AbstractLsCommand):
     # order is file_id, action, date, time, size(, replication), name
     LS_ENTRY_TEMPLATE = '%83s  %6s  %10s  %8s  %9d  %s'
     LS_ENTRY_TEMPLATE_REPLICATION = LS_ENTRY_TEMPLATE + '  %s'
+    _first_row = True
 
     @classmethod
     def _setup_parser(cls, parser):
@@ -1991,30 +1995,33 @@ def _setup_parser(cls, parser):
         super()._setup_parser(parser)
 
     def run(self, args):
+        super().run(args)
+        self._print_files(args)
         if args.json:
-            # TODO: Make this work for an infinite generation.
-            #   Use `_print_file_version` to print a single `file_version` and manage the external JSON list
-            #   e.g. manually. However, to do it right, some sort of state needs to be kept e.g. info whether
-            #   at least one element was written to the stream, so we can add a `,` on the start of another.
-            #   That would sadly lead to an ugly formatting, so `_print` needs to be expanded with an ability
-            #   to not print end-line character(s).
-            self._print_json([file_version for file_version, _ in self._get_ls_generator(args)])
-            return 0
-
-        return super().run(args)
+            if self._first_row:  # no files were printed
+                self._print_json('[', raw=True)
+            self._print_json(']\n', raw=True)
+        return 0
 
     def _print_file_version(
         self,
         args,
         file_version: FileVersion,
         folder_name: Optional[str],
     ) -> None:
-        if not args.long:
+        if args.json:
+            if self._first_row:
+                self._print_json('[\n', raw=True)
+            else:
+                self._print_json(',', raw=True)
+            self._print_json(file_version)
+        elif not args.long:
             super()._print_file_version(args, file_version, folder_name)
         elif folder_name is not None:
             self._print(self.format_folder_ls_entry(folder_name, args.replication))
         else:
             self._print(self.format_ls_entry(file_version, args.replication))
+        self._first_row = False
 
     def format_folder_ls_entry(self, name, replication: bool):
         if replication:
@@ -2216,8 +2223,8 @@ def _setup_parser(cls, parser):
 
     def run(self, args):
         if args.dryRun:
-            return super().run(args)
-        super().run(args)
+            self._print_files(args)
+            return 0
         failed_on_any_file = False
         messages_queue = queue.Queue()
 

diff --git a/test/unit/test_console_tool.py b/test/unit/test_console_tool.py
@@ -84,7 +84,9 @@ def _run_command_ignore_output(self, argv):
             print('ACTUAL STDERR:  ', repr(actual_stderr))
             print(actual_stderr)
 
-        self.assertEqual('', actual_stderr, 'stderr')
+        assert re.match(
+            r'^|Using http://\w+.example.com\n$', actual_stderr
+        ), f"stderr: {actual_stderr!r}"
         self.assertEqual(0, actual_status, 'exit status code')
 
     def _trim_leading_spaces(self, s):
@@ -253,11 +255,9 @@ def _upload_multiple_files(cls, bucket):
 
 class TestConsoleTool(BaseConsoleToolTest):
     def test_authorize_with_bad_key(self):
-        expected_stdout = '''
-        Using http://production.example.com
-        '''
-
+        expected_stdout = ''
         expected_stderr = '''
+        Using http://production.example.com
         ERROR: unable to authorize account: Invalid authorization token. Server said: secret key is wrong (unauthorized)
         '''
 
@@ -271,12 +271,12 @@ def test_authorize_with_good_key_using_hyphen(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://production.example.com
         """
 
         self._run_command(
-            ['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0
+            ['authorize-account', self.account_id, self.master_key], '', expected_stderr, 0
         )
 
         # Auth token should be in account info now
@@ -287,12 +287,9 @@ def test_authorize_with_good_key_using_underscore(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
-        Using http://production.example.com
-        """
-
         self._run_command(
-            ['authorize_account', self.account_id, self.master_key], expected_stdout, '', 0
+            ['authorize_account', self.account_id, self.master_key], '',
+            "Using http://production.example.com\n", 0
         )
 
         # Auth token should be in account info now
@@ -303,7 +300,7 @@ def test_authorize_using_env_variables(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://production.example.com
         """
 
@@ -317,7 +314,7 @@ def test_authorize_using_env_variables(self):
             assert B2_APPLICATION_KEY_ID_ENV_VAR in os.environ
             assert B2_APPLICATION_KEY_ENV_VAR in os.environ
 
-            self._run_command(['authorize-account'], expected_stdout, '', 0)
+            self._run_command(['authorize-account'], '', expected_stderr, 0)
 
         # Auth token should be in account info now
         assert self.account_info.get_account_auth_token() is not None
@@ -327,7 +324,7 @@ def test_authorize_towards_custom_realm(self):
         assert self.account_info.get_account_auth_token() is None
 
         # Authorize an account with a good api key.
-        expected_stdout = """
+        expected_stderr = """
         Using http://custom.example.com
         """
 
@@ -336,23 +333,21 @@ def test_authorize_towards_custom_realm(self):
             [
                 'authorize-account', '--environment', 'http://custom.example.com', self.account_id,
                 self.master_key
-            ], expected_stdout, '', 0
+            ], '', expected_stderr, 0
         )
 
         # Auth token should be in account info now
         assert self.account_info.get_account_auth_token() is not None
 
-        expected_stdout = """
-        Using http://custom2.example.com
-        """
         # realm provided with env var
         with mock.patch.dict(
             'os.environ', {
                 B2_ENVIRONMENT_ENV_VAR: 'http://custom2.example.com',
             }
         ):
             self._run_command(
-                ['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0
+                ['authorize-account', self.account_id, self.master_key], '',
+                'Using http://custom2.example.com\n', 0
             )
 
     def test_create_key_and_authorize_with_it(self):
@@ -370,8 +365,8 @@ def test_create_key_and_authorize_with_it(self):
         # Authorize with the key
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
             '',
+            'Using http://production.example.com\n',
             0,
         )
 
@@ -394,9 +389,8 @@ def test_create_key_with_authorization_from_env_vars(self):
             # The first time we're running on this cache there will be output from the implicit "authorize-account" call
             self._run_command(
                 ['create-key', 'key1', 'listBuckets,listKeys'],
-                'Using http://production.example.com\n'
                 'appKeyId0 appKey0\n',
-                '',
+                'Using http://production.example.com\n',
                 0,
             )
 
@@ -417,9 +411,8 @@ def test_create_key_with_authorization_from_env_vars(self):
                 # "authorize-account" is called when the key changes
                 self._run_command(
                     ['create-key', 'key1', 'listBuckets,listKeys'],
-                    'Using http://production.example.com\n'
                     'appKeyId2 appKey2\n',
-                    '',
+                    'Using http://production.example.com\n',
                     0,
                 )
 
@@ -431,9 +424,8 @@ def test_create_key_with_authorization_from_env_vars(self):
                 ):
                     self._run_command(
                         ['create-key', 'key1', 'listBuckets,listKeys'],
-                        'Using http://custom.example.com\n'
                         'appKeyId3 appKey3\n',
-                        '',
+                        'Using http://custom.example.com\n',
                         0,
                     )
 
@@ -446,7 +438,8 @@ def test_authorize_key_without_list_buckets(self):
         # Authorize with the key
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
+            '',
+            'Using http://production.example.com\n'
             'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n',
             1,
         )
@@ -519,8 +512,8 @@ def test_create_bucket_key_and_authorize_with_it(self):
         # Authorize with the key
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
             '',
+            'Using http://production.example.com\n',
             0,
         )
 
@@ -772,8 +765,8 @@ def test_keys(self):
 
         # authorize and make calls using application key with no restrictions
         self._run_command(
-            ['authorize-account', 'appKeyId0', 'appKey0'], 'Using http://production.example.com\n',
-            '', 0
+            ['authorize-account', 'appKeyId0', 'appKey0'], '',
+            'Using http://production.example.com\n', 0
         )
         self._run_command(
             ['list-buckets'],
@@ -798,8 +791,8 @@ def test_keys(self):
 
         # authorize and make calls using an application key with bucket restrictions
         self._run_command(
-            ['authorize-account', 'appKeyId1', 'appKey1'], 'Using http://production.example.com\n',
-            '', 0
+            ['authorize-account', 'appKeyId1', 'appKey1'], '',
+            'Using http://production.example.com\n', 0
         )
 
         self._run_command(
@@ -2351,7 +2344,8 @@ def test_list_buckets_not_allowed_for_app_key(self):
         # Authorize with the key, which should result in an error.
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
+            '',
+            'Using http://production.example.com\n'
             'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n',
             1,
         )
@@ -2379,7 +2373,8 @@ def test_bucket_missing_for_bucket_key(self):
         # to be able to look up the name of the bucket.
         self._run_command(
             ['authorize-account', 'appKeyId0', 'appKey0'],
-            'Using http://production.example.com\n',
+            '',
+            "Using http://production.example.com\n"
             "ERROR: unable to authorize account: Application key is restricted to a bucket that doesn't exist\n",
             1,
         )
@@ -2739,6 +2734,8 @@ def test_rm_skipping_over_errors(self):
         '''
         self._run_command(['ls', '--recursive', 'my-bucket'], expected_stdout)
 
+
+class TestVersionConsoleTool(BaseConsoleToolTest):
     def test_version(self):
         self._run_command(['version', '--short'], expected_stdout=f'{VERSION}\n')
         self._run_command(['version'], expected_stdout=f'b2 command line tool, version {VERSION}\n')