Skip to content

Commit

Permalink
Escape Control Characters (#480)
Browse files Browse the repository at this point in the history
* fix

* fixes

* tqdm

* add to v2
  • Loading branch information
andreea-popescu-reef authored Mar 15, 2024
1 parent 86f28dc commit 3cd3292
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 24 deletions.
1 change: 1 addition & 0 deletions b2sdk/_v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@
)
from b2sdk.session import B2Session
from b2sdk.utils.thread_pool import ThreadPoolMixin
from b2sdk.utils.escape import unprintable_to_hex, escape_control_chars, substitute_control_chars

# filter
from b2sdk.filter import FilterType, Filter
Expand Down
6 changes: 4 additions & 2 deletions b2sdk/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import time
from abc import ABCMeta, abstractmethod

from .utils.escape import escape_control_chars

try:
from tqdm import tqdm # displays a nice progress bar
except ImportError:
Expand Down Expand Up @@ -112,7 +114,7 @@ def __init__(self, *args, **kwargs):
def set_total_bytes(self, total_byte_count: int) -> None:
if self.tqdm is None:
self.tqdm = tqdm(
desc=self.description,
desc=escape_control_chars(self.description),
total=total_byte_count,
unit='B',
unit_scale=True,
Expand Down Expand Up @@ -159,7 +161,7 @@ def bytes_completed(self, byte_count: int) -> None:
elapsed = now - self.last_time
if 3 <= elapsed and self.total != 0:
if not self.any_printed:
print(self.description)
print(escape_control_chars(self.description))
print(' %d%%' % int(100.0 * byte_count / self.total))
self.last_time = now
self.any_printed = True
Expand Down
19 changes: 2 additions & 17 deletions b2sdk/raw_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
from __future__ import annotations

import base64
import re
from abc import ABCMeta, abstractmethod
from enum import Enum, unique
from logging import getLogger
from typing import Any

from .utils.escape import unprintable_to_hex
from .utils.typing import JSON

try:
Expand Down Expand Up @@ -873,21 +873,6 @@ def update_file_legal_hold(
except AccessDenied:
raise RetentionWriteError()

def unprintable_to_hex(self, string):
"""
Replace unprintable chars in string with a hex representation.
:param string: an arbitrary string, possibly with unprintable characters.
:return: the string, with unprintable characters changed to hex (e.g., "\x07")
"""
unprintables_pattern = re.compile(r'[\x00-\x1f]')

def hexify(match):
return fr'\x{ord(match.group()):02x}'

return unprintables_pattern.sub(hexify, string)

def check_b2_filename(self, filename):
"""
Raise an appropriate exception with details if the filename is unusable.
Expand All @@ -906,7 +891,7 @@ def check_b2_filename(self, filename):
lowest_unicode_value = ord(min(filename))
if lowest_unicode_value < 32:
message = "Filename \"{}\" contains code {} (hex {:02x}), less than 32.".format(
self.unprintable_to_hex(filename), lowest_unicode_value, lowest_unicode_value
unprintable_to_hex(filename), lowest_unicode_value, lowest_unicode_value
)
raise UnusableFileName(message)
# No DEL for you.
Expand Down
9 changes: 5 additions & 4 deletions b2sdk/sync/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from ..sync.report import ProgressReport, SyncReport
from ..transfer.outbound.outbound_source import OutboundTransferSource
from ..transfer.outbound.upload_source import UploadSourceLocalFile
from ..utils.escape import escape_control_chars
from .encryption_provider import AbstractSyncEncryptionSettingsProvider
from .report import SyncFileReporter

Expand Down Expand Up @@ -179,7 +180,7 @@ def do_report(self, bucket: Bucket, reporter: ProgressReport) -> None:
:param bucket: a Bucket object
:param reporter: a place to report errors
"""
reporter.print_completion('upload ' + self.relative_name)
reporter.print_completion(f'upload {escape_control_chars(self.relative_name)}')

def __str__(self) -> str:
return f'b2_upload({self.local_full_path}, {self.b2_file_name}, {self.mod_time_millis})'
Expand Down Expand Up @@ -255,7 +256,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport):
:param reporter: a place to report errors
"""
reporter.update_transfer(1, 0)
reporter.print_completion('hide ' + self.relative_name)
reporter.print_completion(f'hide {escape_control_chars(self.relative_name)}')

def __str__(self) -> str:
return f'b2_hide({self.b2_file_name})'
Expand Down Expand Up @@ -478,7 +479,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport):
:param reporter: a place to report errors
"""
reporter.update_transfer(1, 0)
reporter.print_completion('delete ' + self.relative_name + ' ' + self.note)
reporter.print_completion(f"delete {escape_control_chars(self.relative_name)} {self.note}")

def __str__(self) -> str:
return f'b2_delete({self.b2_file_name}, {self.file_id}, {self.note})'
Expand Down Expand Up @@ -519,7 +520,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport):
:param reporter: a place to report errors
"""
reporter.update_transfer(1, 0)
reporter.print_completion('delete ' + self.relative_name)
reporter.print_completion(f'delete {escape_control_chars(self.relative_name)}')

def __str__(self) -> str:
return f'local_delete({self.full_path})'
58 changes: 58 additions & 0 deletions b2sdk/utils/escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
######################################################################
#
# File: b2sdk/utils/escape.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################

import re
import shlex

# skip newline, tab
UNPRINTABLE_PATTERN = re.compile(r'[\x00-\x08\x0e-\x1f\x7f-\x9f]')


def unprintable_to_hex(s):
"""
Replace unprintable chars in string with a hex representation.
:param string: an arbitrary string, possibly with unprintable characters.
:return: the string, with unprintable characters changed to hex (e.g., "\x07")
"""

def hexify(match):
return fr'\x{ord(match.group()):02x}'

if s:
return UNPRINTABLE_PATTERN.sub(hexify, s)
return None


def escape_control_chars(s):
"""
Replace unprintable chars in string with a hex representation AND shell quotes the string.
:param string: an arbitrary string, possibly with unprintable characters.
:return: the string, with unprintable characters changed to hex (e.g., "\x07")
"""
if s:
return shlex.quote(unprintable_to_hex(s))
return None


def substitute_control_chars(s):
"""
Replace unprintable chars in string with � unicode char
:param string: an arbitrary string, possibly with unprintable characters.
:return: tuple of the string with � replacements made and boolean indicated if chars were replaced
"""
match_result = UNPRINTABLE_PATTERN.search(s)
s = UNPRINTABLE_PATTERN.sub('�', s)
return (s, match_result is not None)
3 changes: 2 additions & 1 deletion b2sdk/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from b2sdk._v3 import parse_folder as parse_sync_folder
from b2sdk._v3 import AbstractPath as AbstractSyncPath
from b2sdk._v3 import LocalPath as LocalSyncPath
from b2sdk.utils.escape import unprintable_to_hex, escape_control_chars, substitute_control_chars

from .account_info import AbstractAccountInfo
from .api import B2Api
Expand Down Expand Up @@ -45,4 +46,4 @@
# large_file

from .large_file import LargeFileServices
from .large_file import UnfinishedLargeFile
from .large_file import UnfinishedLargeFile
3 changes: 3 additions & 0 deletions b2sdk/v2/raw_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def get_upload_file_headers(
**kwargs,
)

def unprintable_to_hex(self, s):
return v3.unprintable_to_hex(s)

@_file_infos_rename
def upload_file(
self,
Expand Down
1 change: 1 addition & 0 deletions changelog.d/+escape_control_characters.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added control character escaping for bucket and filenames.
32 changes: 32 additions & 0 deletions test/unit/utils/test_escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
######################################################################
#
# File: test/unit/utils/test_escape.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################

from b2sdk.utils.escape import escape_control_chars, substitute_control_chars, unprintable_to_hex


def test_unprintable_to_hex():
cases = [
(' abc-z', ' abc-z', "' abc-z'", (' abc-z', False)),
('a\x7fb', 'a\\x7fb', "'a\\x7fb'", ('a�b', True)),
('a\x00b a\x9fb ', 'a\\x00b a\\x9fb ', "'a\\x00b a\\x9fb '", ('a�b a�b ', True)),
('a\x7fb\nc', 'a\\x7fb\nc', "'a\\x7fb\nc'", ('a�b\nc', True)),
('\x9bT\x9bEtest', '\\x9bT\\x9bEtest', "'\\x9bT\\x9bEtest'", ('�T�Etest', True)),
(
'\x1b[32mC\x1b[33mC\x1b[34mI', '\\x1b[32mC\\x1b[33mC\\x1b[34mI',
"'\\x1b[32mC\\x1b[33mC\\x1b[34mI'", ('�[32mC�[33mC�[34mI', True)
)
]
for (
s, expected_unprintable_to_hex, expected_escape_control_chars,
expected_substitute_control_chars
) in cases:
assert unprintable_to_hex(s) == expected_unprintable_to_hex
assert escape_control_chars(s) == expected_escape_control_chars
assert substitute_control_chars(s) == expected_substitute_control_chars

0 comments on commit 3cd3292

Please sign in to comment.