Skip to content

Commit

Permalink
Merge pull request #438 from reef-technologies/download_to_stdout
Browse files Browse the repository at this point in the history
Download to stdout
  • Loading branch information
mjurbanski-reef authored Nov 15, 2023
2 parents 97caa25 + 23e383f commit d6e7cdd
Show file tree
Hide file tree
Showing 22 changed files with 517 additions and 121 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
* Add `*_PART_SIZE` constants to public interface
* Add `*_PART_SIZE`, `BUCKET_NAME_*`, `STDOUT_FILEPATH` constants
* Add `points_to_fifo`, `points_to_stdout` functions

### Changed
* Mark `TempDir` as deprecated in favor of `tempfile.TemporaryDirectory`

### Fixed
* Fix downloading to a non-seekable file, such as /dev/stdout
* Fix ScanPoliciesManager support for compiled regexes

### Infrastructure
* Fix readthedocs build by updating to v2 configuration schema
* Fix spellcheck erroring out on LICENSE file
* Fix snyk reporting vulnerability due to tornado package use in docs generation
* Deduplicate test_base files in test suite
* Refactor integration tests for better pytest compatibility & eager bucket cleanup

## [1.24.1] - 2023-09-27

Expand Down
14 changes: 14 additions & 0 deletions b2sdk/_internal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
######################################################################
#
# File: b2sdk/_internal/__init__.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
"""
b2sdk._internal package contains internal modules, and should not be used directly.
Please use chosen apiver package instead, e.g. b2sdk.v2
"""
9 changes: 9 additions & 0 deletions b2sdk/_internal/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
######################################################################
#
# File: b2sdk/_internal/utils/__init__.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
36 changes: 36 additions & 0 deletions b2sdk/_internal/utils/filesystem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
######################################################################
#
# File: b2sdk/_internal/utils/filesystem.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
import pathlib
import platform
import stat

_IS_WINDOWS = platform.system() == "Windows"


def points_to_fifo(path: pathlib.Path) -> bool:
"""Check if the path points to a fifo."""
path = path.resolve()
try:

return stat.S_ISFIFO(path.stat().st_mode)
except OSError:
return False


_STDOUT_FILENAME = "CON" if _IS_WINDOWS else "/dev/stdout"
STDOUT_FILEPATH = pathlib.Path(_STDOUT_FILENAME)


def points_to_stdout(path: pathlib.Path) -> bool:
"""Check if the path points to stdout."""
try:
return path == STDOUT_FILEPATH or path.resolve() == STDOUT_FILEPATH
except OSError:
return False
8 changes: 8 additions & 0 deletions b2sdk/_v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@
IncrementalHexDigester,
)

from b2sdk._internal.utils.filesystem import (
points_to_fifo,
points_to_stdout,
STDOUT_FILEPATH,
)
from b2sdk.utils import trace_call
from b2sdk.utils.docs import get_b2sdk_doc_urls

Expand Down Expand Up @@ -239,6 +244,9 @@
from b2sdk.cache import DummyCache
from b2sdk.cache import InMemoryCache
from b2sdk.http_constants import (
BUCKET_NAME_CHARS,
BUCKET_NAME_CHARS_UNIQ,
BUCKET_NAME_LENGTH_RANGE,
DEFAULT_MAX_PART_SIZE,
DEFAULT_MIN_PART_SIZE,
DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE,
Expand Down
6 changes: 5 additions & 1 deletion b2sdk/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,11 @@ class PotentialS3EndpointPassedAsRealm(InvalidJsonResponse):
pass


class DestinationDirectoryError(B2Error):
class DestinationError(B2Error):
pass


class DestinationDirectoryError(DestinationError):
pass


Expand Down
7 changes: 7 additions & 0 deletions b2sdk/http_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@
######################################################################
from __future__ import annotations

import string

# These constants are needed in different modules, so they are stored in this module, that
# imports nothing, thus avoiding circular imports

# https://www.backblaze.com/docs/cloud-storage-buckets#bucket-names
BUCKET_NAME_CHARS = string.ascii_lowercase + string.digits + '-'
BUCKET_NAME_CHARS_UNIQ = string.ascii_lowercase + string.digits + '-'
BUCKET_NAME_LENGTH_RANGE = (6, 63)

LIST_FILE_NAMES_MAX_LIMIT = 10000 # https://www.backblaze.com/b2/docs/b2_list_file_names.html

FILE_INFO_HEADER_PREFIX = 'X-Bz-Info-'
Expand Down
90 changes: 79 additions & 11 deletions b2sdk/transfer/inbound/downloaded_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,32 @@
######################################################################
from __future__ import annotations

import contextlib
import io
import logging
import pathlib
from typing import TYPE_CHECKING
import sys
from typing import TYPE_CHECKING, BinaryIO

from requests.models import Response

from b2sdk._internal.utils.filesystem import _IS_WINDOWS, points_to_fifo, points_to_stdout
from b2sdk.exception import (
ChecksumMismatch,
DestinationDirectoryDoesntAllowOperation,
DestinationDirectoryDoesntExist,
DestinationError,
DestinationIsADirectory,
DestinationParentIsNotADirectory,
TruncatedOutput,
)
from b2sdk.utils import set_file_mtime

try:
from typing_extensions import Literal
except ImportError:
from typing import Literal

from ...encryption.setting import EncryptionSetting
from ...file_version import DownloadVersion
from ...progress import AbstractProgressListener
Expand All @@ -40,6 +49,9 @@
class MtimeUpdatedFile(io.IOBase):
"""
Helper class that facilitates updating a files mod_time after closing.
Over the time this class has grown, and now it also adds better exception handling.
Usage:
.. code-block: python
Expand All @@ -50,13 +62,27 @@ class MtimeUpdatedFile(io.IOBase):
# 'some_local_path' has the mod_time set according to metadata in B2
"""

def __init__(self, path_, mod_time_millis: int, mode='wb+', buffering=None):
self.path_ = path_
def __init__(
self,
path_: str | pathlib.Path,
mod_time_millis: int,
mode: Literal['wb', 'wb+'] = 'wb+',
buffering: int | None = None,
):
self.path = pathlib.Path(path_) if isinstance(path_, str) else path_
self.mode = mode
self.buffering = buffering if buffering is not None else -1
self.mod_time_to_set = mod_time_millis
self.file = None

@property
def path_(self) -> str:
return str(self.path)

@path_.setter
def path_(self, value: str) -> None:
self.path = pathlib.Path(value)

def write(self, value):
"""
This method is overwritten (monkey-patched) in __enter__ for performance reasons
Expand All @@ -69,6 +95,9 @@ def read(self, *a):
"""
raise NotImplementedError

def seekable(self) -> bool:
return self.file.seekable()

def seek(self, offset, whence=0):
return self.file.seek(offset, whence)

Expand All @@ -77,7 +106,7 @@ def tell(self):

def __enter__(self):
try:
path = pathlib.Path(self.path_)
path = self.path
if not path.parent.exists():
raise DestinationDirectoryDoesntExist()

Expand All @@ -91,22 +120,26 @@ def __enter__(self):
except PermissionError as ex:
raise DestinationDirectoryDoesntAllowOperation() from ex

# All remaining problems should be with permissions.
try:
self.file = open(self.path_, self.mode, buffering=self.buffering)
self.file = open(
self.path,
self.mode,
buffering=self.buffering,
)
except PermissionError as ex:
raise DestinationDirectoryDoesntAllowOperation() from ex

self.write = self.file.write
self.read = self.file.read
self.mode = self.file.mode
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.file.close()
set_file_mtime(self.path_, self.mod_time_to_set)

def __str__(self):
return str(self.path_)
return str(self.path)


class DownloadedFile:
Expand Down Expand Up @@ -157,14 +190,20 @@ def _validate_download(self, bytes_read, actual_sha1):
if bytes_read != desired_length:
raise TruncatedOutput(bytes_read, desired_length)

def save(self, file, allow_seeking=True):
def save(self, file: BinaryIO, allow_seeking: bool | None = None) -> None:
"""
Read data from B2 cloud and write it to a file-like object
:param file: a file-like object
:param allow_seeking: if False, download strategies that rely on seeking to write data
(parallel strategies) will be discarded.
"""
if allow_seeking is None:
allow_seeking = file.seekable()
elif allow_seeking and not file.seekable():
logger.warning('File is not seekable, disabling strategies that require seeking')
allow_seeking = False

if self.progress_listener:
file = WritingStreamWithProgress(file, self.progress_listener)
if self.range_ is not None:
Expand All @@ -187,7 +226,12 @@ def save(self, file, allow_seeking=True):
)
self._validate_download(bytes_read, actual_sha1)

def save_to(self, path_, mode='wb+', allow_seeking=True):
def save_to(
self,
path_: str | pathlib.Path,
mode: Literal['wb', 'wb+'] | None = None,
allow_seeking: bool | None = None,
) -> None:
"""
Open a local file and write data from B2 cloud to it, also update the mod_time.
Expand All @@ -196,10 +240,34 @@ def save_to(self, path_, mode='wb+', allow_seeking=True):
:param allow_seeking: if False, download strategies that rely on seeking to write data
(parallel strategies) will be discarded.
"""
path_ = pathlib.Path(path_)
is_stdout = points_to_stdout(path_)
if is_stdout or points_to_fifo(path_):
if mode not in (None, 'wb'):
raise DestinationError(f'invalid mode requested {mode!r} for FIFO file {path_!r}')

if is_stdout and _IS_WINDOWS:
if self.write_buffer_size and self.write_buffer_size not in (
-1, io.DEFAULT_BUFFER_SIZE
):
logger.warning(
'Unable to set arbitrary write_buffer_size for stdout on Windows'
)
context = contextlib.nullcontext(sys.stdout.buffer)
else:
context = open(path_, 'wb', buffering=self.write_buffer_size or -1)

try:
with context as file:
return self.save(file, allow_seeking=allow_seeking)
finally:
if not is_stdout:
set_file_mtime(path_, self.download_version.mod_time_millis)

with MtimeUpdatedFile(
path_,
mod_time_millis=self.download_version.mod_time_millis,
mode=mode,
mode=mode or 'wb+',
buffering=self.write_buffer_size,
) as file:
self.save(file, allow_seeking=allow_seeking)
return self.save(file, allow_seeking=allow_seeking)
28 changes: 24 additions & 4 deletions b2sdk/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

import base64
import hashlib
import logging
import os
import pathlib
import platform
import re
import time
Expand All @@ -23,6 +25,8 @@

from logfury.v1 import DefaultTraceAbstractMeta, DefaultTraceMeta, limit_trace_arguments, disable_trace, trace_call

logger = logging.getLogger(__name__)

Sha1HexDigest = NewType('Sha1HexDigest', str)
T = TypeVar('T')
# TODO: When we drop Python 3.7 support, this should be replaced
Expand Down Expand Up @@ -277,14 +281,26 @@ def get_file_mtime(local_path):
return int(mod_time)


def set_file_mtime(local_path, mod_time_millis):
def is_special_file(path: str | pathlib.Path) -> bool:
"""
Is the path a special file, such as /dev/null or stdout?
:param path: a "file" path
:return: True if the path is a special file
"""
path_str = str(path)
return (
path == os.devnull or path_str.startswith('/dev/') or
platform.system() == 'Windows' and path_str.upper() in ('CON', 'NUL')
)


def set_file_mtime(local_path: str | pathlib.Path, mod_time_millis: int) -> None:
"""
Set modification time of a file in milliseconds.
:param local_path: a file path
:type local_path: str
:param mod_time_millis: time to be set
:type mod_time_millis: int
"""
mod_time = mod_time_millis / 1000.0

Expand All @@ -299,7 +315,11 @@ def set_file_mtime(local_path, mod_time_millis):
# See #617 for details.
mod_time = float(Decimal('%.3f5' % mod_time))

os.utime(local_path, (mod_time, mod_time))
try:
os.utime(local_path, (mod_time, mod_time))
except OSError:
if not is_special_file(local_path):
raise


def fix_windows_path_limit(path):
Expand Down
Loading

0 comments on commit d6e7cdd

Please sign in to comment.