Skip to content

Commit

Permalink
fix: duplicate file check after HTTP connection started.
Browse files Browse the repository at this point in the history
Closes #88
  • Loading branch information
Ljzd-PRO committed Mar 31, 2024
1 parent cdbae0f commit 5c4562a
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
28 changes: 15 additions & 13 deletions ktoolbox/downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from ktoolbox._enum import RetCodeEnum
from ktoolbox.configuration import config
from ktoolbox.downloader import DownloaderRet, filename_from_headers
from ktoolbox.downloader import DownloaderRet, filename_from_headers, duplicate_file_check
from ktoolbox.utils import generate_msg

__all__ = ["Downloader"]
Expand Down Expand Up @@ -52,7 +52,7 @@ def __init__(
:param buffer_size: Number of bytes for file I/O buffer
:param chunk_size: Number of bytes for chunk of download stream
:param designated_filename: Manually specify the filename for saving
:param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` is ``True``, \
:param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \
it will be used as the save path.
"""

Expand Down Expand Up @@ -151,23 +151,15 @@ async def run(
server_path_filename = unquote(Path(server_relpath_without_params).name)
# Priority order can be referenced from the constructor's documentation
save_filepath = self._path / (self._save_filename or server_path_filename)
duplicate_check_path = save_filepath

# Get bucket file path
bucket_file_path: Optional[Path] = None
if config.downloader.use_bucket:
bucket_file_path = config.downloader.bucket_path / server_relpath
duplicate_check_path = bucket_file_path

# Check if the file exists
if duplicate_check_path.is_file():
if config.downloader.use_bucket:
ret_msg = "Download file already exists in both bucket and local, skipping"
if not save_filepath.is_file():
ret_msg = "Download file already exists in bucket, linking to target path"
os.link(bucket_file_path, save_filepath)
else:
ret_msg = "Download file already exists, skipping"
file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
if file_existed:
return DownloaderRet(
code=RetCodeEnum.FileExisted,
message=generate_msg(
Expand Down Expand Up @@ -199,9 +191,19 @@ async def run(
# Priority order can be referenced from the constructor's documentation
self._save_filename = self._designated_filename or filename_from_headers(
res.headers) or server_path_filename
save_filepath = self._path / self._save_filename
file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
if file_existed:
return DownloaderRet(
code=RetCodeEnum.FileExisted,
message=generate_msg(
ret_msg,
path=save_filepath
)
)

# Download
temp_filepath = Path(f"{(self._path / server_path_filename)}.{config.downloader.temp_suffix}")
temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}")
total_size = int(length_str) if (length_str := res.headers.get("Content-Length")) else None
async with aiofiles.open(str(temp_filepath), "wb", self._buffer_size) as f:
chunk_iterator = res.aiter_bytes(self._chunk_size)
Expand Down
29 changes: 27 additions & 2 deletions ktoolbox/downloader/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import cgi
import os
import urllib.parse
from typing import Optional, Dict
from pathlib import Path
from typing import Optional, Dict, Tuple

from ktoolbox.configuration import config

__all__ = ["filename_from_headers"]
__all__ = ["filename_from_headers", "duplicate_file_check"]


def parse_header(line: str) -> Dict[str, Optional[str]]:
Expand Down Expand Up @@ -67,3 +69,26 @@ def filename_from_headers(headers: Dict[str, str]) -> Optional[str]:
if filename := options.get("filename"):
return urllib.parse.unquote(filename, config.downloader.encoding)
return None


def duplicate_file_check(local_file_path: Path, bucket_file_path: Path = None) -> Tuple[bool, Optional[str]]:
"""
Check if the file existed, and link the bucket filepath to local filepath \
if ``DownloaderConfiguration.use_bucket`` enabled.
:param local_file_path: Download target path
:param bucket_file_path: The bucket filepath of the local download path
:return: ``(if file existed, message)``
"""
duplicate_check_path = bucket_file_path or local_file_path
if duplicate_check_path.is_file():
if config.downloader.use_bucket:
ret_msg = "Download file already exists in both bucket and local, skipping"
if not local_file_path.is_file():
ret_msg = "Download file already exists in bucket, linking to local path"
os.link(bucket_file_path, local_file_path)
else:
ret_msg = "Download file already exists, skipping"
return True, ret_msg
else:
return False, None

0 comments on commit 5c4562a

Please sign in to comment.