Skip to content

Commit

Permalink
work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
dannymeijer committed Dec 2, 2024
1 parent 55b24d3 commit 7660183
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 22 deletions.
14 changes: 8 additions & 6 deletions src/koheesio/steps/download_file.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# TODO: add module description

from __future__ import annotations

from typing import Any, Optional, Type
Expand Down Expand Up @@ -50,10 +52,10 @@ def from_string(cls, mode: str) -> Type[FileWriteMode[Any]]:
def write_mode(self) -> str:
"""Return the write mode for the given SFTPWriteMode."""
if self in {FileWriteMode.OVERWRITE, FileWriteMode.BACKUP, FileWriteMode.EXCLUSIVE}:
# Overwrite, Backup, and Exclusive modes set the file to be written from the beginning
# OVERWRITE, BACKUP, and EXCLUSIVE modes set the file to be written from the beginning
return "wb"
if self == FileWriteMode.APPEND:
# Append mode sets the file to be written from the end
# APPEND mode sets the file to be written from the end
return "ab"


Expand Down Expand Up @@ -126,7 +128,7 @@ def handle_file_write_modes(self, _filepath: Path, _filename: str) -> Optional[s

def execute(self) -> Output:
"""
Executes the file download process, handling different write modes and saving the file to the specified path.
Executes the file download process, handling different write modes, and saving the file to the specified path.
"""
_filename = Path(self.url).name
_filepath = self.download_path / _filename
Expand All @@ -135,14 +137,14 @@ def execute(self) -> Output:
if (mode := self.handle_file_write_modes(_filepath, _filename)) is None:
return self.output

# create the download path if it does not exist
# Create the download path if it does not exist
self.output.download_file_path = _filepath
self.output.download_file_path.touch(exist_ok=True)

# download the file content and write the downloaded content to the file
# Download the file content and write the downloaded content to the file
with self.request() as response, self.output.download_file_path.open(mode=mode) as f:
for chunk in response.iter_content(chunk_size=self.chunk_size):
self.log.debug(f"Downloading chunk of size {len(chunk)}")
self.log.debug(f"Writing to file {self.output.download_file_path}")
self.log.debug(f"Downloaded {f.tell()} bytes")
self.log.debug(f"Writing to file {self.output.download_file_path}")
f.write(chunk)
4 changes: 2 additions & 2 deletions src/koheesio/steps/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def set_outputs(self, response: requests.Response) -> None:
self.output.response_json = response.json()

except json.decoder.JSONDecodeError as e:
self.log.info(f"An error occurred while processing the JSON payload. Error message:\n{e.msg}")
self.log.error(f"An error occurred while processing the JSON payload. Error message:\n{e.msg}")

def get_options(self) -> dict:
"""options to be passed to requests.request()"""
Expand Down Expand Up @@ -298,7 +298,7 @@ def execute(self) -> None:
The last exception that was caught if `self.request()` fails after `self.max_retries` attempts.
"""
with self.request() as response:
self.log.info(f"HTTP request to {self.url} was successful with status code {response.status_code}")
self.log.info(f"HTTP request to {self.url}, status code {response.status_code}")


class HttpGetStep(HttpStep):
Expand Down
35 changes: 21 additions & 14 deletions tests/steps/test_download_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,32 @@ def test_download_file_step_ignore_mode(self, download_path, downloaded_file, ca
# Arrange
downloaded_file.write_bytes(b"foo")

with caplog.at_level("INFO"), Mocker() as mocker:
mocker.get(URL, content=b"bar")

# FIXME: logging is not working in the unit tests

# Act
# Act and Assert -- dry run
with caplog.at_level(logging.INFO):
step = DownloadFileStep(url=URL, download_path=download_path, mode="ignore")
step.log.setLevel("INFO")
step.execute()
print(f"2 {caplog.record_tuples = }")

# Assert
print(f"5 {caplog.text = }")
assert "Ignoring testfile.txt based on IGNORE mode." in caplog.text

print(f"3 {caplog.text = }")
assert downloaded_file.exists()
print(f"4 {caplog.text = }")
assert downloaded_file.read_bytes() == b"foo"
# with caplog.at_level("INFO"), Mocker() as mocker:
# mocker.get(URL, content=b"bar")
#
# # FIXME: logging is not working in the unit tests
#
# # Act
# step = DownloadFileStep(url=URL, download_path=download_path, mode="ignore")
# step.log.setLevel("INFO")
# step.execute()
# print(f"2 {caplog.record_tuples = }")
#
# # Assert
# print(f"5 {caplog.text = }")
# assert "Ignoring testfile.txt based on IGNORE mode." in caplog.text
#
# print(f"3 {caplog.text = }")
# assert downloaded_file.exists()
# print(f"4 {caplog.text = }")
# assert downloaded_file.read_bytes() == b"foo"

def test_download_file_step_exclusive_mode(self, download_path, downloaded_file):
"""In EXCLUSIVE mode, an error should be raised if the file exists"""
Expand Down

0 comments on commit 7660183

Please sign in to comment.