From 3368c43b28cdd4acb1397a88faef92174f5c29f9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Mar 2024 21:07:36 +0000 Subject: [PATCH 01/13] build(deps-dev): bump mkdocs-material from 9.5.14 to 9.5.15 Bumps [mkdocs-material](https://github.com/squidfunk/mkdocs-material) from 9.5.14 to 9.5.15. - [Release notes](https://github.com/squidfunk/mkdocs-material/releases) - [Changelog](https://github.com/squidfunk/mkdocs-material/blob/master/CHANGELOG) - [Commits](https://github.com/squidfunk/mkdocs-material/compare/9.5.14...9.5.15) --- updated-dependencies: - dependency-name: mkdocs-material dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- poetry.lock | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/poetry.lock b/poetry.lock index d44f897..7656a41 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiofiles" @@ -936,13 +936,13 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-material" -version = "9.5.14" +version = "9.5.15" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.14-py3-none-any.whl", hash = "sha256:a45244ac221fda46ecf8337f00ec0e5cb5348ab9ffb203ca2a0c313b0d4dbc27"}, - {file = "mkdocs_material-9.5.14.tar.gz", hash = "sha256:2a1f8e67cda2587ab93ecea9ba42d0ca61d1d7b5fad8cf690eeaeb39dcd4b9af"}, + {file = "mkdocs_material-9.5.15-py3-none-any.whl", hash = "sha256:e5c96dec3d19491de49ca643fc1dbb92b278e43cdb816c775bc47db77d9b62fb"}, + {file = "mkdocs_material-9.5.15.tar.gz", hash = "sha256:39f03cca45e82bf54eb7456b5a18bd252eabfdd67f237a229471484a0a4d4635"}, ] [package.dependencies] @@ -1585,6 +1585,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -1906,24 +1907,24 @@ python-versions = ">=3.6" files = [ {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, - {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d92f81886165cb14d7b067ef37e142256f1c6a90a65cd156b063a43da1708cfd"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fff3573c2db359f091e1589c3d7c5fc2f86f5bdb6f24252c2d8e539d4e45f412"}, + {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:aa2267c6a303eb483de8d02db2871afb5c5fc15618d894300b88958f729ad74f"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:840f0c7f194986a63d2c2465ca63af8ccbbc90ab1c6001b1978f05119b5e7334"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:024cfe1fc7c7f4e1aff4a81e718109e13409767e4f871443cbff3dba3578203d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win32.whl", hash = "sha256:c69212f63169ec1cfc9bb44723bf2917cbbd8f6191a00ef3410f5a7fe300722d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-win_amd64.whl", hash = "sha256:cabddb8d8ead485e255fe80429f833172b4cadf99274db39abc080e068cbcc31"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:bef08cd86169d9eafb3ccb0a39edb11d8e25f3dae2b28f5c52fd997521133069"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:b16420e621d26fdfa949a8b4b47ade8810c56002f5389970db4ddda51dbff248"}, - {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b5edda50e5e9e15e54a6a8a0070302b00c518a9d32accc2346ad6c984aacd279"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:25c515e350e5b739842fc3228d662413ef28f295791af5e5110b543cf0b57d9b"}, + {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:1707814f0d9791df063f8c19bb51b0d1278b8e9a2353abbb676c2f685dee6afe"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:46d378daaac94f454b3a0e3d8d78cafd78a026b1d71443f4966c696b48a6d899"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09b055c05697b38ecacb7ac50bdab2240bfca1a0c4872b0fd309bb07dc9aa3a9"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win32.whl", hash = "sha256:53a300ed9cea38cf5a2a9b069058137c2ca1ce658a874b79baceb8f892f915a7"}, {file = "ruamel.yaml.clib-0.2.8-cp311-cp311-win_amd64.whl", hash = "sha256:c2a72e9109ea74e511e29032f3b670835f8a59bbdc9ce692c5b4ed91ccf1eedb"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ebc06178e8821efc9692ea7544aa5644217358490145629914d8020042c24aa1"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:edaef1c1200c4b4cb914583150dcaa3bc30e592e907c01117c08b13a07255ec2"}, - {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:7048c338b6c86627afb27faecf418768acb6331fc24cfa56c93e8c9780f815fa"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d176b57452ab5b7028ac47e7b3cf644bcfdc8cacfecf7e71759f7f51a59e5c92"}, + {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:1dc67314e7e1086c9fdf2680b7b6c2be1c0d8e3a8279f2e993ca2a7545fecf62"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3213ece08ea033eb159ac52ae052a4899b56ecc124bb80020d9bbceeb50258e9"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aab7fd643f71d7946f2ee58cc88c9b7bfc97debd71dcc93e03e2d174628e7e2d"}, {file = "ruamel.yaml.clib-0.2.8-cp312-cp312-win32.whl", hash = "sha256:5c365d91c88390c8d0a8545df0b5857172824b1c604e867161e6b3d59a827eaa"}, @@ -1931,7 +1932,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a5aa27bad2bb83670b71683aae140a1f52b0857a2deff56ad3f6c13a017a26ed"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c58ecd827313af6864893e7af0a3bb85fd529f862b6adbefe14643947cfe2942"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f481f16baec5290e45aebdc2a5168ebc6d35189ae6fea7a58787613a25f6e875"}, - {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3fcc54cb0c8b811ff66082de1680b4b14cf8a81dce0d4fbf665c2265a81e07a1"}, + {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:77159f5d5b5c14f7c34073862a6b7d34944075d9f93e681638f6d753606c6ce6"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7f67a1ee819dc4562d444bbafb135832b0b909f81cc90f7aa00260968c9ca1b3"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4ecbf9c3e19f9562c7fdd462e8d18dd902a47ca046a2e64dba80699f0b6c09b7"}, {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:87ea5ff66d8064301a154b3933ae406b0863402a799b16e4a1d24d9fbbcbe0d3"}, @@ -1939,7 +1940,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp37-cp37m-win_amd64.whl", hash = "sha256:3f215c5daf6a9d7bbed4a0a4f760f3113b10e82ff4c5c44bec20a68c8014f675"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b617618914cb00bf5c34d4357c37aa15183fa229b24767259657746c9077615"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a6a9ffd280b71ad062eae53ac1659ad86a17f59a0fdc7699fd9be40525153337"}, - {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:665f58bfd29b167039f714c6998178d27ccd83984084c286110ef26b230f259f"}, + {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:305889baa4043a09e5b76f8e2a51d4ffba44259f6b4c72dec8ca56207d9c6fe1"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:700e4ebb569e59e16a976857c8798aee258dceac7c7d6b50cab63e080058df91"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e2b4c44b60eadec492926a7270abb100ef9f72798e18743939bdbf037aab8c28"}, {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e79e5db08739731b0ce4850bed599235d601701d5694c36570a99a0c5ca41a9d"}, @@ -1947,7 +1948,7 @@ files = [ {file = "ruamel.yaml.clib-0.2.8-cp38-cp38-win_amd64.whl", hash = "sha256:56f4252222c067b4ce51ae12cbac231bce32aee1d33fbfc9d17e5b8d6966c312"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:03d1162b6d1df1caa3a4bd27aa51ce17c9afc2046c31b0ad60a0a96ec22f8001"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:bba64af9fa9cebe325a62fa398760f5c7206b215201b0ec825005f1b18b9bccf"}, - {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9eb5dee2772b0f704ca2e45b1713e4e5198c18f515b52743576d196348f374d3"}, + {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:a1a45e0bb052edf6a1d3a93baef85319733a888363938e1fc9924cb00c8df24c"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:da09ad1c359a728e112d60116f626cc9f29730ff3e0e7db72b9a2dbc2e4beed5"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:184565012b60405d93838167f425713180b949e9d8dd0bbc7b49f074407c5a8b"}, {file = "ruamel.yaml.clib-0.2.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a75879bacf2c987c003368cf14bed0ffe99e8e85acfa6c0bfffc21a090f16880"}, From aca488af066a388f9a36f4ae02512e1eca1b6a6c Mon Sep 17 00:00:00 2001 From: Nacosia <161319046+Nacosia@users.noreply.github.com> Date: Sun, 31 Mar 2024 21:46:25 +0800 Subject: [PATCH 02/13] fix: Incorrect argument order when using `pathlib.Path.link_to`. --- ktoolbox/downloader/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index f1d2bf5..ee2b964 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -161,7 +161,7 @@ async def run( ret_msg = "Download file already exists in both bucket and local, skipping" if not art_file_path.is_file(): ret_msg = "Download file already exists in bucket, linking to target path" - check_path.hardlink_to(art_file_path) + os.link(art_bucket_file_path, art_file_path) else: ret_msg = "Download file already exists, skipping" return DownloaderRet( From fff18f9e486fcc2505f122801a005bbdd6613c29 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:06:43 +0800 Subject: [PATCH 03/13] refactor: improved variable naming and commentary explanations Closes #88 --- ktoolbox/downloader/downloader.py | 73 +++++++++++++++++-------------- ktoolbox/job/runner.py | 2 +- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index ee2b964..c5e5194 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -24,6 +24,10 @@ class Downloader: + """ + :ivar _save_filename: The actual filename for saving. + """ + def __init__( self, url: str, @@ -31,7 +35,7 @@ def __init__( *, buffer_size: int = None, chunk_size: int = None, - alt_filename: str = None, + designated_filename: str = None, server_path: str = None ): # noinspection GrazieInspection @@ -39,27 +43,26 @@ def __init__( Initialize a file downloader - About filename: - * If ``alt_filename`` parameter is set, use it. - * Else if ``Content-Disposition`` is set in headers, use filename from it. - * Else use filename from URL 'path' part. + 1. If ``designated_filename`` parameter is set, use it. + 2. Else if ``Content-Disposition`` is set in headers, use filename from it. + 3. Else use filename from 'file' part of ``server_path``. :param url: Download URL :param path: Directory path to save the file :param buffer_size: Number of bytes for file I/O buffer :param chunk_size: Number of bytes for chunk of download stream - :param alt_filename: Use this name if no filename given by the server - :param server_path: Server path of the file. if config.use_bucket is True, \ - it will be used as save the path to the file + :param designated_filename: Manually specify the filename for saving + :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` is ``True``, \ + it will be used as the save path. """ self._url = url self._path = path self._buffer_size = buffer_size or config.downloader.buffer_size self._chunk_size = chunk_size or config.downloader.chunk_size - # _alt_filename 是用于下载的文件名 - self._alt_filename = alt_filename # 用于下载的文件名 - self._server_path = server_path # 服务器文件路径 /hash[:1]/hash2[1:3]/hash - self._filename = alt_filename # 保留用做实际文件名 + self._designated_filename = designated_filename + self._server_path = server_path # /hash[:1]/hash2[1:3]/hash + self._save_filename = designated_filename # Prioritize the manually specified filename self._lock = asyncio.Lock() self._stop: bool = False @@ -87,7 +90,7 @@ def chunk_size(self) -> int: @property def filename(self) -> Optional[str]: """Actual filename of the download file""" - return self._filename + return self._save_filename @property def finished(self) -> bool: @@ -141,34 +144,35 @@ async def run( :return: ``DownloaderRet`` which contain the actual output filename :raise CancelledError """ - # Get filename to check if file exists + # Get filename to check if file exists (First-time duplicate file check) # Check it before request to make progress more efficiency server_relpath = self._server_path[1:] server_relpath_without_params = urlparse(server_relpath).path server_path_filename = unquote(Path(server_relpath_without_params).name) - art_file_path = self._path / (self._filename or server_path_filename) - check_path = art_file_path + # Priority order can be referenced from the constructor's documentation + save_filepath = self._path / (self._save_filename or server_path_filename) + duplicate_check_path = save_filepath # Get bucket file path - art_bucket_file_path: Optional[Path] = None + bucket_file_path: Optional[Path] = None if config.downloader.use_bucket: - art_bucket_file_path = config.downloader.bucket_path / server_relpath - check_path = art_bucket_file_path + bucket_file_path = config.downloader.bucket_path / server_relpath + duplicate_check_path = bucket_file_path # Check if the file exists - if check_path.is_file(): + if duplicate_check_path.is_file(): if config.downloader.use_bucket: ret_msg = "Download file already exists in both bucket and local, skipping" - if not art_file_path.is_file(): + if not save_filepath.is_file(): ret_msg = "Download file already exists in bucket, linking to target path" - os.link(art_bucket_file_path, art_file_path) + os.link(bucket_file_path, save_filepath) else: ret_msg = "Download file already exists, skipping" return DownloaderRet( code=RetCodeEnum.FileExisted, message=generate_msg( ret_msg, - path=art_file_path + path=save_filepath ) ) @@ -187,13 +191,14 @@ async def run( message=generate_msg( "Download failed", status_code=res.status_code, - filename=art_file_path + filename=save_filepath ) ) - # Get filename - filename = self._alt_filename or filename_from_headers(res.headers) or server_path_filename - self._filename = filename + # Get filename for saving and check if file exists (Second-time duplicate file check) + # Priority order can be referenced from the constructor's documentation + self._save_filename = self._designated_filename or filename_from_headers(res.headers) or \ + server_path_filename # Download temp_filepath = Path(f"{(self._path / server_path_filename)}.{config.downloader.temp_suffix}") @@ -201,7 +206,7 @@ async def run( async with aiofiles.open(str(temp_filepath), "wb", self._buffer_size) as f: chunk_iterator = res.aiter_bytes(self._chunk_size) t = tqdm_class( - desc=filename, + desc=self._save_filename, total=total_size, disable=not progress, unit="iB", @@ -216,21 +221,23 @@ async def run( # Download finished if config.downloader.use_bucket: - art_bucket_file_path.parent.mkdir(parents=True, exist_ok=True) - os.link(temp_filepath, art_bucket_file_path) + bucket_file_path.parent.mkdir(parents=True, exist_ok=True) + os.link(temp_filepath, bucket_file_path) + temp_filepath.rename(self._path / self._save_filename) - temp_filepath.rename(self._path / filename) + # Callbacks if sync_callable: sync_callable(self) if async_callable: await async_callable(self) + return DownloaderRet( - data=filename - ) if filename else DownloaderRet( + data=self._save_filename + ) if self._save_filename else DownloaderRet( code=RetCodeEnum.GeneralFailure, message=generate_msg( "Download failed", - filename=self._alt_filename + filename=self._designated_filename ) ) diff --git a/ktoolbox/job/runner.py b/ktoolbox/job/runner.py index 73be61f..8e75402 100644 --- a/ktoolbox/job/runner.py +++ b/ktoolbox/job/runner.py @@ -85,7 +85,7 @@ async def processor(self) -> int: downloader = Downloader( url=url, path=job.path, - alt_filename=job.alt_filename, + designated_filename=job.alt_filename, server_path=job.server_path ) From 856846688d9ce7b56e57e43aa479d879c999bb1b Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:08:11 +0800 Subject: [PATCH 04/13] style: code formatted [skip ci] --- ktoolbox/downloader/downloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index c5e5194..843776e 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -197,8 +197,8 @@ async def run( # Get filename for saving and check if file exists (Second-time duplicate file check) # Priority order can be referenced from the constructor's documentation - self._save_filename = self._designated_filename or filename_from_headers(res.headers) or \ - server_path_filename + self._save_filename = self._designated_filename or filename_from_headers( + res.headers) or server_path_filename # Download temp_filepath = Path(f"{(self._path / server_path_filename)}.{config.downloader.temp_suffix}") From cdbae0f452b179ebb40f2057065ebdd23f88c35a Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:13:20 +0800 Subject: [PATCH 05/13] refactor: improved structure --- ktoolbox/downloader/__init__.py | 1 + ktoolbox/downloader/downloader.py | 4 +- ktoolbox/downloader/utils.py | 69 +++++++++++++++++++++++++++++++ ktoolbox/utils.py | 67 +----------------------------- 4 files changed, 73 insertions(+), 68 deletions(-) create mode 100644 ktoolbox/downloader/utils.py diff --git a/ktoolbox/downloader/__init__.py b/ktoolbox/downloader/__init__.py index 421d387..2c0f387 100644 --- a/ktoolbox/downloader/__init__.py +++ b/ktoolbox/downloader/__init__.py @@ -1,2 +1,3 @@ from .base import * from .downloader import * +from .utils import * diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index 843776e..d7f66fa 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -17,8 +17,8 @@ from ktoolbox._enum import RetCodeEnum from ktoolbox.configuration import config -from ktoolbox.downloader import DownloaderRet -from ktoolbox.utils import filename_from_headers, generate_msg +from ktoolbox.downloader import DownloaderRet, filename_from_headers +from ktoolbox.utils import generate_msg __all__ = ["Downloader"] diff --git a/ktoolbox/downloader/utils.py b/ktoolbox/downloader/utils.py new file mode 100644 index 0000000..c1525c5 --- /dev/null +++ b/ktoolbox/downloader/utils.py @@ -0,0 +1,69 @@ +import cgi +import urllib.parse +from typing import Optional, Dict + +from ktoolbox.configuration import config + +__all__ = ["filename_from_headers"] + + +def parse_header(line: str) -> Dict[str, Optional[str]]: + """ + Alternative resolution for parsing header line. + + Apply when ``cgi.parse_header`` is unable to use due to the deprecation of `cgi` module. + + https://peps.python.org/pep-0594/#cgi + + - Example: + ``` + parse_header("text/html; charset=utf-8") + ``` + + - Return: + ``` + {'text/html': None, 'charset': 'utf-8'} + ``` + + :param line: Header line + :return: Dict of header line + """ + dict_value: Dict[str, Optional[str]] = {} + for item in line.split(";"): + if len(pair := item.split("=")) == 1: + dict_value[pair[0]] = None + else: + dict_value.setdefault(*pair) + return dict_value + + +def filename_from_headers(headers: Dict[str, str]) -> Optional[str]: + """ + Get file name from headers. + + Parse from ``Content-Disposition``. + + - Example: + ``` + filename_from_headers('attachment;filename*=utf-8\\'\\'README%2Emd;filename="README.md"') + ``` + + - Return: + ``` + README.md + ``` + + :param headers: HTTP headers + :return: File name + """ + if not (disposition := headers.get("Content-Disposition")): + if not (disposition := headers.get("content-disposition")): + return None + _, options = cgi.parse_header(disposition) # alternative: `parse_header` in `utils.py` + if filename := options.get("filename*"): + if len(name_with_charset := filename.split("''")) == 2: + charset, name = name_with_charset + return urllib.parse.unquote(name, charset) + if filename := options.get("filename"): + return urllib.parse.unquote(filename, config.downloader.encoding) + return None diff --git a/ktoolbox/utils.py b/ktoolbox/utils.py index 475447e..77a737b 100644 --- a/ktoolbox/utils.py +++ b/ktoolbox/utils.py @@ -1,10 +1,8 @@ import asyncio -import cgi import logging import sys -import urllib.parse from pathlib import Path -from typing import Generic, TypeVar, Optional, Dict, List, Tuple +from typing import Generic, TypeVar, Optional, List, Tuple import aiofiles from loguru import logger @@ -16,7 +14,6 @@ __all__ = [ "BaseRet", - "filename_from_headers", "generate_msg", "logger_init", "dump_search", @@ -40,68 +37,6 @@ def __bool__(self): return self.code == RetCodeEnum.Success -def parse_header(line: str) -> Dict[str, Optional[str]]: - """ - Alternative resolution for parsing header line. - - Apply when ``cgi.parse_header`` is unable to use due to the deprecation of `cgi` module. - - https://peps.python.org/pep-0594/#cgi - - - Example: - ``` - parse_header("text/html; charset=utf-8") - ``` - - - Return: - ``` - {'text/html': None, 'charset': 'utf-8'} - ``` - - :param line: Header line - :return: Dict of header line - """ - dict_value: Dict[str, Optional[str]] = {} - for item in line.split(";"): - if len(pair := item.split("=")) == 1: - dict_value[pair[0]] = None - else: - dict_value.setdefault(*pair) - return dict_value - - -def filename_from_headers(headers: Dict[str, str]) -> Optional[str]: - """ - Get file name from headers. - - Parse from ``Content-Disposition``. - - - Example: - ``` - filename_from_headers('attachment;filename*=utf-8\\'\\'README%2Emd;filename="README.md"') - ``` - - - Return: - ``` - README.md - ``` - - :param headers: HTTP headers - :return: File name - """ - if not (disposition := headers.get("Content-Disposition")): - if not (disposition := headers.get("content-disposition")): - return None - _, options = cgi.parse_header(disposition) # alternative: `parse_header` in `utils.py` - if filename := options.get("filename*"): - if len(name_with_charset := filename.split("''")) == 2: - charset, name = name_with_charset - return urllib.parse.unquote(name, charset) - if filename := options.get("filename"): - return urllib.parse.unquote(filename, config.downloader.encoding) - return None - - def generate_msg(title: str = None, **kwargs): """ Generate message for ``BaseRet`` and logger From 5c4562aea34113450caf217d0a0dbdbf966e8f92 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:35:58 +0800 Subject: [PATCH 06/13] fix: duplicate file check after HTTP connection started. Closes #88 --- ktoolbox/downloader/downloader.py | 28 +++++++++++++++------------- ktoolbox/downloader/utils.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index d7f66fa..0500713 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -17,7 +17,7 @@ from ktoolbox._enum import RetCodeEnum from ktoolbox.configuration import config -from ktoolbox.downloader import DownloaderRet, filename_from_headers +from ktoolbox.downloader import DownloaderRet, filename_from_headers, duplicate_file_check from ktoolbox.utils import generate_msg __all__ = ["Downloader"] @@ -52,7 +52,7 @@ def __init__( :param buffer_size: Number of bytes for file I/O buffer :param chunk_size: Number of bytes for chunk of download stream :param designated_filename: Manually specify the filename for saving - :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` is ``True``, \ + :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \ it will be used as the save path. """ @@ -151,23 +151,15 @@ async def run( server_path_filename = unquote(Path(server_relpath_without_params).name) # Priority order can be referenced from the constructor's documentation save_filepath = self._path / (self._save_filename or server_path_filename) - duplicate_check_path = save_filepath # Get bucket file path bucket_file_path: Optional[Path] = None if config.downloader.use_bucket: bucket_file_path = config.downloader.bucket_path / server_relpath - duplicate_check_path = bucket_file_path # Check if the file exists - if duplicate_check_path.is_file(): - if config.downloader.use_bucket: - ret_msg = "Download file already exists in both bucket and local, skipping" - if not save_filepath.is_file(): - ret_msg = "Download file already exists in bucket, linking to target path" - os.link(bucket_file_path, save_filepath) - else: - ret_msg = "Download file already exists, skipping" + file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path) + if file_existed: return DownloaderRet( code=RetCodeEnum.FileExisted, message=generate_msg( @@ -199,9 +191,19 @@ async def run( # Priority order can be referenced from the constructor's documentation self._save_filename = self._designated_filename or filename_from_headers( res.headers) or server_path_filename + save_filepath = self._path / self._save_filename + file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path) + if file_existed: + return DownloaderRet( + code=RetCodeEnum.FileExisted, + message=generate_msg( + ret_msg, + path=save_filepath + ) + ) # Download - temp_filepath = Path(f"{(self._path / server_path_filename)}.{config.downloader.temp_suffix}") + temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}") total_size = int(length_str) if (length_str := res.headers.get("Content-Length")) else None async with aiofiles.open(str(temp_filepath), "wb", self._buffer_size) as f: chunk_iterator = res.aiter_bytes(self._chunk_size) diff --git a/ktoolbox/downloader/utils.py b/ktoolbox/downloader/utils.py index c1525c5..bacb47d 100644 --- a/ktoolbox/downloader/utils.py +++ b/ktoolbox/downloader/utils.py @@ -1,10 +1,12 @@ import cgi +import os import urllib.parse -from typing import Optional, Dict +from pathlib import Path +from typing import Optional, Dict, Tuple from ktoolbox.configuration import config -__all__ = ["filename_from_headers"] +__all__ = ["filename_from_headers", "duplicate_file_check"] def parse_header(line: str) -> Dict[str, Optional[str]]: @@ -67,3 +69,26 @@ def filename_from_headers(headers: Dict[str, str]) -> Optional[str]: if filename := options.get("filename"): return urllib.parse.unquote(filename, config.downloader.encoding) return None + + +def duplicate_file_check(local_file_path: Path, bucket_file_path: Path = None) -> Tuple[bool, Optional[str]]: + """ + Check if the file existed, and link the bucket filepath to local filepath \ + if ``DownloaderConfiguration.use_bucket`` enabled. + + :param local_file_path: Download target path + :param bucket_file_path: The bucket filepath of the local download path + :return: ``(if file existed, message)`` + """ + duplicate_check_path = bucket_file_path or local_file_path + if duplicate_check_path.is_file(): + if config.downloader.use_bucket: + ret_msg = "Download file already exists in both bucket and local, skipping" + if not local_file_path.is_file(): + ret_msg = "Download file already exists in bucket, linking to local path" + os.link(bucket_file_path, local_file_path) + else: + ret_msg = "Download file already exists, skipping" + return True, ret_msg + else: + return False, None From 44ca384dcede26c59fcff8b37a6abbb15c147e15 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 1 Apr 2024 00:42:20 +0800 Subject: [PATCH 07/13] fix: fixed circular import Closes #88 --- ktoolbox/downloader/downloader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index 0500713..88a4c26 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -17,7 +17,8 @@ from ktoolbox._enum import RetCodeEnum from ktoolbox.configuration import config -from ktoolbox.downloader import DownloaderRet, filename_from_headers, duplicate_file_check +from ktoolbox.downloader.base import DownloaderRet +from ktoolbox.downloader.utils import filename_from_headers, duplicate_file_check from ktoolbox.utils import generate_msg __all__ = ["Downloader"] From c1b3c94219a4a1c9c491563a8a5da035f5928fe3 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Sun, 7 Apr 2024 01:20:08 +0800 Subject: [PATCH 08/13] docs: Add docs for `length`, `offset` option again (lost in merge) [skip ci] --- README.md | 6 ++++++ README_zh-CN.md | 6 ++++++ docs/en/commands/guide.md | 15 +++++++++++++++ docs/en/index.md | 6 ++++++ docs/zh/commands/guide.md | 15 +++++++++++++++ docs/zh/index.md | 6 ++++++ 6 files changed, 54 insertions(+) diff --git a/README.md b/README.md index 4d3a85a..d32d3a8 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,12 @@ the downloaded files will be **skipped**. # Download all posts of the creator/artist ktoolbox sync-creator https://kemono.su/fanbox/user/9016 +# Download latest 10 posts of the creator/artist +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --length=10 + +# Download latest No.11-No.15 posts of the creator/artist +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 + # Download posts from the creator/artist from 2024-1-1 to 2024-3-1 ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --start-time=2024-1-1 --end-time=2024-3-1 ``` diff --git a/README_zh-CN.md b/README_zh-CN.md index 40c56da..29de32a 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -102,6 +102,12 @@ ktoolbox download-post https://kemono.su/fanbox/user/49494721/post/6608808 # 下载作者/画师的所有作品 ktoolbox sync-creator https://kemono.su/fanbox/user/9016 +# 下载作者/画师最新的 10 个作品 +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --length=10 + +# 下载作者/画师最新的第 11 至 15 个作品 +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 + # 下载作者/画师从 2024-1-1 到 2024-3-1 的作品 ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --start-time=2024-1-1 --end-time=2024-3-1 ``` diff --git a/docs/en/commands/guide.md b/docs/en/commands/guide.md index 3c85d47..cf0b936 100644 --- a/docs/en/commands/guide.md +++ b/docs/en/commands/guide.md @@ -44,6 +44,21 @@ ktoolbox sync-creator https://kemono.su/fanbox/user/9016 ??? tip "Update creator directory" You can rerun the command, files with the same filename will be skipped. +## Download a specified number of posts from the creator + +`sync-creator` + +- `--offset`: Posts result offset (or start offset) +- `--length`: The number of posts to fetch, defaults to fetching all posts + +```bash +# Download latest 10 posts of the creator/artist +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --length=10 + +# Download latest No.11-No.15 posts of the creator/artist +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 +``` + ## Download posts that published within the specified time range `sync-creator` diff --git a/docs/en/index.md b/docs/en/index.md index ea787d6..6389bac 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -93,6 +93,12 @@ ktoolbox download-post https://kemono.su/fanbox/user/49494721/post/6608808 # Download all posts of the creator/artist ktoolbox sync-creator https://kemono.su/fanbox/user/9016 +# Download latest 10 posts of the creator/artist +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --length=10 + +# Download latest No.11-No.15 posts of the creator/artist +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 + # Download posts from the creator/artist from 2024-1-1 to 2024-3-1 ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --start-time=2024-1-1 --end-time=2024-3-1 ``` diff --git a/docs/zh/commands/guide.md b/docs/zh/commands/guide.md index b038311..0b2e6b0 100644 --- a/docs/zh/commands/guide.md +++ b/docs/zh/commands/guide.md @@ -42,6 +42,21 @@ ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 ??? tip "更新作者目录" 你可以再次运行命令,文件名相同的文件将会被跳过。 +## 下载指定数量的作品 + +`sync-creator` + +- `--offset`:作品结果偏移量(或起始偏移量) +- `--length`:要获取的作品数量,默认获取所有作品 + +```bash +# 下载作者/画师最新的 10 个作品 +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --length=10 + +# 下载作者/画师最新的第 11 至 15 个作品 +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 +``` + ## 下载在指定时间范围内发布的作品 `sync-creator` diff --git a/docs/zh/index.md b/docs/zh/index.md index fbdebeb..0a67de6 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -92,6 +92,12 @@ ktoolbox download-post https://kemono.su/fanbox/user/49494721/post/6608808 # 下载作者/画师的所有作品 ktoolbox sync-creator https://kemono.su/fanbox/user/9016 +# 下载作者/画师最新的 10 个作品 +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --length=10 + +# 下载作者/画师最新的第 11 至 15 个作品 +ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --offset=10 --length=5 + # 下载作者/画师从 2024-1-1 到 2024-3-1 的作品 ktoolbox sync-creator https://kemono.su/fanbox/user/9016 --start-time=2024-1-1 --end-time=2024-3-1 ``` From 6b0d747a5072210daa8c19822b48483c00cdb3af Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:01:07 +0800 Subject: [PATCH 09/13] fix: fix FileNotFoundError Closes #94 --- ktoolbox/action/job.py | 4 ++-- ktoolbox/downloader/downloader.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ktoolbox/action/job.py b/ktoolbox/action/job.py index 0e191d2..0b87d15 100644 --- a/ktoolbox/action/job.py +++ b/ktoolbox/action/job.py @@ -29,7 +29,7 @@ async def create_job_from_post( Create a list of download job from a post data :param post: post data - :param post_path: Path of the post directory + :param post_path: Path of the post directory, which needs to be sanitized :param post_structure: post path structure, ``False`` -> disable, \ ``True`` & ``None`` -> ``config.job.post_structure`` :param dump_post_data: Whether to dump post data (post.json) in post directory @@ -109,7 +109,7 @@ async def create_job_from_creator( :param service: The service where the post is located :param creator_id: The ID of the creator - :param path: The path for posts to download + :param path: The path for downloading posts, which needs to be sanitized :param all_pages: Fetch all posts, ``offset`` and ``length`` will be ignored if enabled :param offset: Result offset (or start offset) :param length: The number of posts to fetch diff --git a/ktoolbox/downloader/downloader.py b/ktoolbox/downloader/downloader.py index 88a4c26..1836160 100644 --- a/ktoolbox/downloader/downloader.py +++ b/ktoolbox/downloader/downloader.py @@ -11,6 +11,7 @@ import tenacity import tqdm.asyncio from loguru import logger +from pathvalidate import sanitize_filename from tenacity import wait_fixed, retry_if_result, retry_if_exception from tenacity.stop import stop_after_attempt, stop_never from tqdm import tqdm as std_tqdm @@ -49,10 +50,10 @@ def __init__( 3. Else use filename from 'file' part of ``server_path``. :param url: Download URL - :param path: Directory path to save the file + :param path: Directory path to save the file, which needs to be sanitized :param buffer_size: Number of bytes for file I/O buffer :param chunk_size: Number of bytes for chunk of download stream - :param designated_filename: Manually specify the filename for saving + :param designated_filename: Manually specify the filename for saving, which needs to be sanitized :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \ it will be used as the save path. """ @@ -190,8 +191,9 @@ async def run( # Get filename for saving and check if file exists (Second-time duplicate file check) # Priority order can be referenced from the constructor's documentation - self._save_filename = self._designated_filename or filename_from_headers( - res.headers) or server_path_filename + self._save_filename = self._designated_filename or sanitize_filename( + filename_from_headers(res.headers) + ) or server_path_filename save_filepath = self._path / self._save_filename file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path) if file_existed: From d7c824949d8b83bb7496e33c3e91dd3252281633 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:13:13 +0800 Subject: [PATCH 10/13] fix: Fix TypeError: '<' not supported between instances of 'NoneType' and 'datetime.datetime' Closes #93 --- ktoolbox/action/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ktoolbox/action/utils.py b/ktoolbox/action/utils.py index 1159404..a38ab5b 100644 --- a/ktoolbox/action/utils.py +++ b/ktoolbox/action/utils.py @@ -47,9 +47,10 @@ def _match_post_time( :param end_time: End time of the time range :return: Whether if the post publish date match the time range """ - if start_time and post.published < start_time: + post_date = post.published or post.added + if start_time and post_date and post_date < start_time: return False - if end_time and post.published > end_time: + if end_time and post_date and post_date > end_time: return False return True From 84ebc30b8a8cf135cc013fe5f807f4429a5531a5 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:18:40 +0800 Subject: [PATCH 11/13] refactor: rename `time` to `date` --- ktoolbox/action/job.py | 4 ++-- ktoolbox/action/utils.py | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ktoolbox/action/job.py b/ktoolbox/action/job.py index 0b87d15..9981209 100644 --- a/ktoolbox/action/job.py +++ b/ktoolbox/action/job.py @@ -10,7 +10,7 @@ from ktoolbox._enum import PostFileTypeEnum, DataStorageNameEnum from ktoolbox.action import ActionRet, fetch_creator_posts, FetchInterruptError -from ktoolbox.action.utils import generate_post_path_name, filter_posts_by_time +from ktoolbox.action.utils import generate_post_path_name, filter_posts_by_date from ktoolbox.api.model import Post, Attachment from ktoolbox.configuration import config, PostStructureConfiguration from ktoolbox.job import Job, CreatorIndices @@ -147,7 +147,7 @@ async def create_job_from_creator( # Filter posts by publish time if start_time or end_time: - post_list = list(filter_posts_by_time(post_list, start_time, end_time)) + post_list = list(filter_posts_by_date(post_list, start_time, end_time)) logger.info(f"Get {len(post_list)} posts, start creating jobs") # Filter posts and generate ``CreatorIndices`` diff --git a/ktoolbox/action/utils.py b/ktoolbox/action/utils.py index a38ab5b..5c928b2 100644 --- a/ktoolbox/action/utils.py +++ b/ktoolbox/action/utils.py @@ -8,7 +8,7 @@ from ktoolbox.configuration import config from ktoolbox.job import CreatorIndices -__all__ = ["generate_post_path_name", "filter_posts_by_time", "filter_posts_by_indices"] +__all__ = ["generate_post_path_name", "filter_posts_by_date", "filter_posts_by_indices"] def generate_post_path_name(post: Post) -> str: @@ -34,40 +34,40 @@ def generate_post_path_name(post: Post) -> str: exit(1) -def _match_post_time( +def _match_post_date( post: Post, - start_time: Optional[datetime], - end_time: Optional[datetime] + start_date: Optional[datetime], + end_date: Optional[datetime] ) -> bool: """ - Check if the post publish date match the time range. + Check if the post date match the time range. :param post: Target post object - :param start_time: Start time of the time range - :param end_time: End time of the time range + :param start_date: Start time of the time range + :param end_date: End time of the time range :return: Whether if the post publish date match the time range """ post_date = post.published or post.added - if start_time and post_date and post_date < start_time: + if start_date and post_date and post_date < start_date: return False - if end_time and post_date and post_date > end_time: + if end_date and post_date and post_date > end_date: return False return True -def filter_posts_by_time( +def filter_posts_by_date( post_list: List[Post], - start_time: Optional[datetime], - end_time: Optional[datetime] + start_date: Optional[datetime], + end_date: Optional[datetime] ) -> Generator[Post, Any, Any]: """ - Filter posts by publish time range + Filter posts by publish date range :param post_list: List of posts - :param start_time: Start time of the time range - :param end_time: End time of the time range + :param start_date: Start time of the time range + :param end_date: End time of the time range """ - post_filter = filter(lambda x: _match_post_time(x, start_time, end_time), post_list) + post_filter = filter(lambda x: _match_post_date(x, start_date, end_date), post_list) yield from post_filter From d697847592b6f7b2dad3cdb507accac610254878 Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:20:09 +0800 Subject: [PATCH 12/13] build: bump version --- ktoolbox/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ktoolbox/__init__.py b/ktoolbox/__init__.py index 16fdc82..32ae349 100644 --- a/ktoolbox/__init__.py +++ b/ktoolbox/__init__.py @@ -1,4 +1,4 @@ __title__ = "KToolBox" # noinspection SpellCheckingInspection __description__ = "A useful CLI tool for downloading posts in Kemono.party / .su" -__version__ = "0.5.1" +__version__ = "0.5.2" diff --git a/pyproject.toml b/pyproject.toml index a03fa59..3a82041 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ktoolbox" -version = "0.5.1" +version = "0.5.2" description = "A useful CLI tool for downloading posts in Kemono.party / .su" authors = ["Ljzd-PRO "] readme = "README.md" From d88682a84b4e075a1f0aff666a15902ef7eafc5d Mon Sep 17 00:00:00 2001 From: Ljzd-PRO <63289359+Ljzd-PRO@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:40:29 +0800 Subject: [PATCH 13/13] docs: update CHANGELOG.md [skip ci] --- CHANGELOG.md | 54 ++++++++++++++++++---------------------------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af8543a..2aa1e4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,41 +1,23 @@ ## Changes -### 💡 Feature - -- Added support for downloading works within a specified range of quantity. - - Added `--offset`, `--length` options in `sync-creator` command - - `--offset`: Posts result offset (or start offset) - - `--length`: The number of posts to fetch, defaults to fetching all posts - - ```bash - # Download latest 10 posts of the creator/artist - ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --length=10 - - # Download latest No.11-No.15 posts of the creator/artist - ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --offset=10 --length=5 - - # Download all posts of the creator/artist - ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx - ``` +[//]: # (### 💡 Feature) + +### 🪲 Fix + +- Fix `FileNotFoundError` occurred when filename contains special characters (#94) +- Fix `TypeError` occurred when using `--start-time`, `--end-time` options and posts had no `published` property (#93) +- Fixed incorrect argument order when using bucket storage (#89 - @Nacosia) +- Duplicate file check after HTTP connection started (#88) - - - -### 💡 新特性 - -- 增加下载指定数量范围作品的支持 - - 在 `sync-creator` 命令中增加了 `--offset`, `--length` 选项 - - `--offset`:作品结果偏移量(或起始偏移量) - - `--length`:要获取的作品数量,默认获取所有作品 - - ```bash - # 下载作者/画师最新的 10 个作品 - ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --length=10 - - # 下载作者/画师最新的第 11 至 15 个作品 - ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --offset=10 --length=5 - - # 下载作者/画师的所有作品 - ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx - ``` - -**Full Changelog**: https://github.com/Ljzd-PRO/KToolBox/compare/v0.5.0...v0.5.1 \ No newline at end of file +[//]: # (### 💡 新特性) + +### 🪲 Fix + +- 修复当文件名包含特殊字符时会出现 `FileNotFoundError` 错误的问题 (#94) +- 修复当使用 `--start-time`, `--end-time` 参数且作品 `published` 属性不存在的情况下会出现 `TypeError` 错误的问题 (#93) +- 修复当使用桶储存时参数顺序不正确的问题 (#89 - @Nacosia) +- 在建立 HTTP 连接后进行重复文件检查 (#88) + +**Full Changelog**: https://github.com/Ljzd-PRO/KToolBox/compare/v0.5.1...v0.5.2 \ No newline at end of file