From fff18f9e486fcc2505f122801a005bbdd6613c29 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:06:43 +0800 Subject: [PATCH] refactor: improved variable naming and commentary explanations Closes #88 --- ktoolbox/downloader/downloader.py | 73 +++++++++++++++++-------------- ktoolbox/job/runner.py | 2 +- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index ee2b964..c5e5194 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -24,6 +24,10 @@ class Downloader: + """ + :ivar _save_filename: The actual filename for saving. + """ + def __init__( self, url: str, @@ -31,7 +35,7 @@ def __init__( *, buffer_size: int = None, chunk_size: int = None, - alt_filename: str = None, + designated_filename: str = None, server_path: str = None ): # noinspection GrazieInspection @@ -39,27 +43,26 @@ def __init__( Initialize a file downloader - About filename: - * If ``alt_filename`` parameter is set, use it. - * Else if ``Content-Disposition`` is set in headers, use filename from it. - * Else use filename from URL 'path' part. + 1. If ``designated_filename`` parameter is set, use it. + 2. Else if ``Content-Disposition`` is set in headers, use filename from it. + 3. Else use filename from 'file' part of ``server_path``. :param url: Download URL :param path: Directory path to save the file :param buffer_size: Number of bytes for file I/O buffer :param chunk_size: Number of bytes for chunk of download stream - :param alt_filename: Use this name if no filename given by the server - :param server_path: Server path of the file. if config.use_bucket is True, \ - it will be used as save the path to the file + :param designated_filename: Manually specify the filename for saving + :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` is ``True``, \ + it will be used as the save path. """ self._url = url self._path = path self._buffer_size = buffer_size or config.downloader.buffer_size self._chunk_size = chunk_size or config.downloader.chunk_size - # _alt_filename 是用于下载的文件名 - self._alt_filename = alt_filename # 用于下载的文件名 - self._server_path = server_path # 服务器文件路径 /hash[:1]/hash2[1:3]/hash - self._filename = alt_filename # 保留用做实际文件名 + self._designated_filename = designated_filename + self._server_path = server_path # /hash[:1]/hash2[1:3]/hash + self._save_filename = designated_filename # Prioritize the manually specified filename self._lock = asyncio.Lock() self._stop: bool = False @@ -87,7 +90,7 @@ def chunk_size(self) -> int: @property def filename(self) -> Optional[str]: """Actual filename of the download file""" - return self._filename + return self._save_filename @property def finished(self) -> bool: @@ -141,34 +144,35 @@ async def run( :return: ``DownloaderRet`` which contain the actual output filename :raise CancelledError """ - # Get filename to check if file exists + # Get filename to check if file exists (First-time duplicate file check) # Check it before request to make progress more efficiency server_relpath = self._server_path[1:] server_relpath_without_params = urlparse(server_relpath).path server_path_filename = unquote(Path(server_relpath_without_params).name) - art_file_path = self._path / (self._filename or server_path_filename) - check_path = art_file_path + # Priority order can be referenced from the constructor's documentation + save_filepath = self._path / (self._save_filename or server_path_filename) + duplicate_check_path = save_filepath # Get bucket file path - art_bucket_file_path: Optional[Path] = None + bucket_file_path: Optional[Path] = None if config.downloader.use_bucket: - art_bucket_file_path = config.downloader.bucket_path / server_relpath - check_path = art_bucket_file_path + bucket_file_path = config.downloader.bucket_path / server_relpath + duplicate_check_path = bucket_file_path # Check if the file exists - if check_path.is_file(): + if duplicate_check_path.is_file(): if config.downloader.use_bucket: ret_msg = "Download file already exists in both bucket and local, skipping" - if not art_file_path.is_file(): + if not save_filepath.is_file(): ret_msg = "Download file already exists in bucket, linking to target path" - os.link(art_bucket_file_path, art_file_path) + os.link(bucket_file_path, save_filepath) else: ret_msg = "Download file already exists, skipping" return DownloaderRet( code=RetCodeEnum.FileExisted, message=generate_msg( ret_msg, - path=art_file_path + path=save_filepath ) ) @@ -187,13 +191,14 @@ async def run( message=generate_msg( "Download failed", status_code=res.status_code, - filename=art_file_path + filename=save_filepath ) ) - # Get filename - filename = self._alt_filename or filename_from_headers(res.headers) or server_path_filename - self._filename = filename + # Get filename for saving and check if file exists (Second-time duplicate file check) + # Priority order can be referenced from the constructor's documentation + self._save_filename = self._designated_filename or filename_from_headers(res.headers) or \ + server_path_filename # Download temp_filepath = Path(f"{(self._path / server_path_filename)}.{config.downloader.temp_suffix}") @@ -201,7 +206,7 @@ async def run( async with aiofiles.open(str(temp_filepath), "wb", self._buffer_size) as f: chunk_iterator = res.aiter_bytes(self._chunk_size) t = tqdm_class( - desc=filename, + desc=self._save_filename, total=total_size, disable=not progress, unit="iB", @@ -216,21 +221,23 @@ async def run( # Download finished if config.downloader.use_bucket: - art_bucket_file_path.parent.mkdir(parents=True, exist_ok=True) - os.link(temp_filepath, art_bucket_file_path) + bucket_file_path.parent.mkdir(parents=True, exist_ok=True) + os.link(temp_filepath, bucket_file_path) + temp_filepath.rename(self._path / self._save_filename) - temp_filepath.rename(self._path / filename) + # Callbacks if sync_callable: sync_callable(self) if async_callable: await async_callable(self) + return DownloaderRet( - data=filename - ) if filename else DownloaderRet( + data=self._save_filename + ) if self._save_filename else DownloaderRet( code=RetCodeEnum.GeneralFailure, message=generate_msg( "Download failed", - filename=self._alt_filename + filename=self._designated_filename ) ) diff --git a/ktoolbox/job/runner.py b/ktoolbox/job/runner.py index 73be61f..8e75402 100644 --- a/ktoolbox/job/runner.py +++ b/ktoolbox/job/runner.py @@ -85,7 +85,7 @@ async def processor(self) -> int: downloader = Downloader( url=url, path=job.path, - alt_filename=job.alt_filename, + designated_filename=job.alt_filename, server_path=job.server_path )