From 5c4562aea34113450caf217d0a0dbdbf966e8f92 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:35:58 +0800 Subject: [PATCH] fix: duplicate file check after HTTP connection started. Closes #88 --- ktoolbox/downloader/downloader.py | 28 +++++++++++++++------------- ktoolbox/downloader/utils.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index d7f66fa..0500713 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -17,7 +17,7 @@ from ktoolbox._enum import RetCodeEnum from ktoolbox.configuration import config -from ktoolbox.downloader import DownloaderRet, filename_from_headers +from ktoolbox.downloader import DownloaderRet, filename_from_headers, duplicate_file_check from ktoolbox.utils import generate_msg __all__ = ["Downloader"] @@ -52,7 +52,7 @@ def __init__( :param buffer_size: Number of bytes for file I/O buffer :param chunk_size: Number of bytes for chunk of download stream :param designated_filename: Manually specify the filename for saving - :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` is ``True``, \ + :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \ it will be used as the save path. """ @@ -151,23 +151,15 @@ async def run( server_path_filename = unquote(Path(server_relpath_without_params).name) # Priority order can be referenced from the constructor's documentation save_filepath = self._path / (self._save_filename or server_path_filename) - duplicate_check_path = save_filepath # Get bucket file path bucket_file_path: Optional[Path] = None if config.downloader.use_bucket: bucket_file_path = config.downloader.bucket_path / server_relpath - duplicate_check_path = bucket_file_path # Check if the file exists - if duplicate_check_path.is_file(): - if config.downloader.use_bucket: - ret_msg = "Download file already exists in both bucket and local, skipping" - if not save_filepath.is_file(): - ret_msg = "Download file already exists in bucket, linking to target path" - os.link(bucket_file_path, save_filepath) - else: - ret_msg = "Download file already exists, skipping" + file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path) + if file_existed: return DownloaderRet( code=RetCodeEnum.FileExisted, message=generate_msg( @@ -199,9 +191,19 @@ async def run( # Priority order can be referenced from the constructor's documentation self._save_filename = self._designated_filename or filename_from_headers( res.headers) or server_path_filename + save_filepath = self._path / self._save_filename + file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path) + if file_existed: + return DownloaderRet( + code=RetCodeEnum.FileExisted, + message=generate_msg( + ret_msg, + path=save_filepath + ) + ) # Download - temp_filepath = Path(f"{(self._path / server_path_filename)}.{config.downloader.temp_suffix}") + temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}") total_size = int(length_str) if (length_str := res.headers.get("Content-Length")) else None async with aiofiles.open(str(temp_filepath), "wb", self._buffer_size) as f: chunk_iterator = res.aiter_bytes(self._chunk_size) diff --git a/ktoolbox/downloader/utils.py b/ktoolbox/downloader/utils.py index c1525c5..bacb47d 100644 --- a/ktoolbox/downloader/utils.py +++ b/ktoolbox/downloader/utils.py @@ -1,10 +1,12 @@ import cgi +import os import urllib.parse -from typing import Optional, Dict +from pathlib import Path +from typing import Optional, Dict, Tuple from ktoolbox.configuration import config -__all__ = ["filename_from_headers"] +__all__ = ["filename_from_headers", "duplicate_file_check"] def parse_header(line: str) -> Dict[str, Optional[str]]: @@ -67,3 +69,26 @@ def filename_from_headers(headers: Dict[str, str]) -> Optional[str]: if filename := options.get("filename"): return urllib.parse.unquote(filename, config.downloader.encoding) return None + + +def duplicate_file_check(local_file_path: Path, bucket_file_path: Path = None) -> Tuple[bool, Optional[str]]: + """ + Check if the file existed, and link the bucket filepath to local filepath \ + if ``DownloaderConfiguration.use_bucket`` enabled. + + :param local_file_path: Download target path + :param bucket_file_path: The bucket filepath of the local download path + :return: ``(if file existed, message)`` + """ + duplicate_check_path = bucket_file_path or local_file_path + if duplicate_check_path.is_file(): + if config.downloader.use_bucket: + ret_msg = "Download file already exists in both bucket and local, skipping" + if not local_file_path.is_file(): + ret_msg = "Download file already exists in bucket, linking to local path" + os.link(bucket_file_path, local_file_path) + else: + ret_msg = "Download file already exists, skipping" + return True, ret_msg + else: + return False, None