diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 03ca112..36ad13a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,8 +3,6 @@ name: GitHub Release on: workflow_dispatch: push: - tags: - - "*" permissions: contents: write @@ -43,8 +41,3 @@ jobs: body_path: CHANGELOG.md files: artifact/*.zip prerelease: contains(github.ref, 'beta') - - publish: - needs: - - release - uses: ./.github/workflows/python-publish.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index c1c0071..af8543a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,61 +1,41 @@ ## Changes -### 🐍 Fix - -- Fixed download failure when server returns an invalid filename (`Attachment.name`) (#73) - ### 💡 Feature -- Add support for local storage bucket mode (#74) (@Nacosia) - - Edit `KTOOLBOX_DOWNLOADER__USE_BUCKET`, `KTOOLBOX_DOWNLOADER_BUCKET_PATH` in `prod.env` or environment variables to set this option - - 📖More information: [Configuration-Reference-DownloaderConfiguration](https://ktoolbox.readthedocs.io/latest/configuration/reference/#ktoolbox.configuration.DownloaderConfiguration) - -- Add support for customizing the post directory name format (#45, #46) - - Edit `KTOOLBOX_JOB__POST_DIRNAME_FORMAT` in `prod.env` or environment variables to set this option - - 📖More information: [Configuration-Reference-JobConfiguration](https://ktoolbox.readthedocs.io/latest/configuration/reference/#ktoolbox.configuration.JobConfiguration) - ```dotenv - # It will create directories like `[2024-1-1]HelloWorld` - KTOOLBOX_JOB__POST_DIRNAME_FORMAT="[{published}]{title}" - ``` - ```dotenv - # It will create directories like `2024-1-1_12345_112233` - KTOOLBOX_JOB__POST_DIRNAME_FORMAT="{published}_{user}_{id}" - ``` - ```dotenv - # Default value. It will create directories like `HelloWorld` - KTOOLBOX_JOB__POST_DIRNAME_FORMAT="{title}" - ``` - -- Marked `JobConfiguration.post_id_as_path` as deprecated, use `JobConfiguration.post_dirname_format` instead +- Added support for downloading works within a specified range of quantity. + - Added `--offset`, `--length` options in `sync-creator` command + - `--offset`: Posts result offset (or start offset) + - `--length`: The number of posts to fetch, defaults to fetching all posts + + ```bash + # Download latest 10 posts of the creator/artist + ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --length=10 + + # Download latest No.11-No.15 posts of the creator/artist + ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --offset=10 --length=5 + + # Download all posts of the creator/artist + ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx + ``` - - - -### 🐍 修复 - -- 修复当服务器返回的文件名不合法时下载出错的问题 (`Attachment.name`) (#73) - ### 💡 新特性 -- 增加本地存储桶模式的存储支持 (#74) (@Nacosia) - - 在 `prod.env` 或环境变量中编辑 `KTOOLBOX_DOWNLOADER__USE_BUCKET`, `KTOOLBOX_DOWNLOADER_BUCKET_PATH` 以设置该选项 - - 📖更多信息: [Configuration-Reference-DownloaderConfiguration](https://ktoolbox.readthedocs.io/latest/configuration/reference/#ktoolbox.configuration.DownloaderConfiguration) - -- 增加支持自定义作品目录名格式 (#45, #46) - - 在 `prod.env` 或环境变量中编辑 `KTOOLBOX_JOB__POST_DIRNAME_FORMAT` 以设置该选项 - - 📖更多信息: [Configuration-Reference-JobConfiguration](https://ktoolbox.readthedocs.io/latest/configuration/reference/#ktoolbox.configuration.JobConfiguration) - ```dotenv - # 将会创建例如 `[2024-1-1]HelloWorld` 的目录名 - KTOOLBOX_JOB__POST_DIRNAME_FORMAT="[{published}]{title}" - ``` - ```dotenv - # 将会创建例如 `2024-1-1_12345_112233` 的目录名 - KTOOLBOX_JOB__POST_DIRNAME_FORMAT="{published}_{user}_{id}" - ``` - ```dotenv - # 默认值。 将会创建例如 `HelloWorld` 的目录名 - KTOOLBOX_JOB__POST_DIRNAME_FORMAT="{title}" - ``` - -- 将 `JobConfiguration.post_id_as_path` 标记为已弃用, 请用 `JobConfiguration.post_dirname_format` 取代 - -**Full Changelog**: https://github.com/Ljzd-PRO/KToolBox/compare/v0.4.0...v0.5.0 \ No newline at end of file +- 增加下载指定数量范围作品的支持 + - 在 `sync-creator` 命令中增加了 `--offset`, `--length` 选项 + - `--offset`:作品结果偏移量(或起始偏移量) + - `--length`:要获取的作品数量,默认获取所有作品 + + ```bash + # 下载作者/画师最新的 10 个作品 + ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --length=10 + + # 下载作者/画师最新的第 11 至 15 个作品 + ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx --offset=10 --length=5 + + # 下载作者/画师的所有作品 + ktoolbox sync-creator https://kemono.su/fanbox/user/xxxx + ``` + +**Full Changelog**: https://github.com/Ljzd-PRO/KToolBox/compare/v0.5.0...v0.5.1 \ No newline at end of file diff --git a/ktoolbox/__init__.py b/ktoolbox/__init__.py index 1837837..16fdc82 100644 --- a/ktoolbox/__init__.py +++ b/ktoolbox/__init__.py @@ -1,4 +1,4 @@ __title__ = "KToolBox" # noinspection SpellCheckingInspection __description__ = "A useful CLI tool for downloading posts in Kemono.party / .su" -__version__ = "0.5.0" +__version__ = "0.5.1" diff --git a/ktoolbox/action/fetch.py b/ktoolbox/action/fetch.py index c5ef3f6..b32f70d 100644 --- a/ktoolbox/action/fetch.py +++ b/ktoolbox/action/fetch.py @@ -5,7 +5,7 @@ from ktoolbox.api.utils import SEARCH_STEP from ktoolbox.utils import BaseRet -__all__ = ["FetchInterruptError", "fetch_all_creator_posts"] +__all__ = ["FetchInterruptError", "fetch_creator_posts"] class FetchInterruptError(Exception): @@ -16,16 +16,16 @@ def __init__(self, *args, ret: BaseRet = None): self.ret = ret -async def fetch_all_creator_posts(service: str, creator_id: str) -> AsyncGenerator[List[Post], Any]: +async def fetch_creator_posts(service: str, creator_id: str, o: int = 0) -> AsyncGenerator[List[Post], Any]: """ - Fetch all posts from a creator + Fetch posts from a creator :param service: The service where the post is located :param creator_id: The ID of the creator + :param o: Result offset, stepping of 50 is enforced :return: Async generator of several list of posts :raise FetchInterruptError """ - o = 0 while True: ret = await get_creator_post(service=service, creator_id=creator_id, o=o) if ret: diff --git a/ktoolbox/action/job.py b/ktoolbox/action/job.py index 58b67e9..0e191d2 100644 --- a/ktoolbox/action/job.py +++ b/ktoolbox/action/job.py @@ -1,4 +1,5 @@ from datetime import datetime +from itertools import count from pathlib import Path from typing import List, Union, Optional from urllib.parse import urlparse @@ -8,10 +9,9 @@ from pathvalidate import sanitize_filename, is_valid_filename from ktoolbox._enum import PostFileTypeEnum, DataStorageNameEnum -from ktoolbox.action import ActionRet, fetch_all_creator_posts, FetchInterruptError +from ktoolbox.action import ActionRet, fetch_creator_posts, FetchInterruptError from ktoolbox.action.utils import generate_post_path_name, filter_posts_by_time from ktoolbox.api.model import Post, Attachment -from ktoolbox.api.posts import get_creator_post from ktoolbox.configuration import config, PostStructureConfiguration from ktoolbox.job import Job, CreatorIndices @@ -97,7 +97,8 @@ async def create_job_from_creator( path: Path, *, all_pages: bool = False, - o: int = None, + offset: int = 0, + length: Optional[int] = 50, save_creator_indices: bool = True, mix_posts: bool = None, start_time: Optional[datetime], @@ -109,8 +110,9 @@ async def create_job_from_creator( :param service: The service where the post is located :param creator_id: The ID of the creator :param path: The path for posts to download - :param all_pages: Fetch all pages of posts, ``o`` will be ignored if enabled - :param o: Result offset, stepping of 50 is enforced + :param all_pages: Fetch all posts, ``offset`` and ``length`` will be ignored if enabled + :param offset: Result offset (or start offset) + :param length: The number of posts to fetch :param save_creator_indices: Record ``CreatorIndices`` data for update posts from current creator directory :param mix_posts: Save all files from different posts at same path, \ ``update_from``, ``save_creator_indices`` will be ignored if enabled @@ -121,19 +123,27 @@ async def create_job_from_creator( # Get posts logger.info(f"Start fetching posts from creator {creator_id}") + post_list: List[Post] = [] + start_offset = offset - offset % 50 if all_pages: - post_list: List[Post] = [] - try: - async for part in fetch_all_creator_posts(service=service, creator_id=creator_id): + page_counter = count() + else: + page_num = length // 50 + 1 + page_counter = iter(range(page_num)) + + try: + async for part in fetch_creator_posts(service=service, creator_id=creator_id, o=start_offset): + if next(page_counter, None) is not None: post_list += part - except FetchInterruptError as e: - return ActionRet(**e.ret.model_dump(mode="python")) + else: + break + except FetchInterruptError as e: + return ActionRet(**e.ret.model_dump(mode="python")) + + if not all_pages: + post_list = post_list[offset % 50:][:length] else: - ret = await get_creator_post(service=service, creator_id=creator_id, o=o) - if ret: - post_list = ret.data - else: - return ActionRet(**ret.model_dump(mode="python")) + post_list = post_list[offset % 50:] # Filter posts by publish time if start_time or end_time: diff --git a/ktoolbox/cli.py b/ktoolbox/cli.py index a7d0e41..e45d5a2 100644 --- a/ktoolbox/cli.py +++ b/ktoolbox/cli.py @@ -1,6 +1,6 @@ from datetime import datetime from pathlib import Path -from typing import Union, overload, Tuple +from typing import Union, overload import aiofiles from loguru import logger @@ -191,7 +191,6 @@ async def sync_creator( *, save_creator_indices: bool = True, mix_posts: bool = None, - time_range: Tuple[str, str] = None, start_time: str = None, end_time: str = None ): @@ -206,7 +205,6 @@ async def sync_creator( *, save_creator_indices: bool = True, mix_posts: bool = None, - time_range: Tuple[str, str] = None, start_time: str = None, end_time: str = None ): @@ -222,10 +220,12 @@ async def sync_creator( save_creator_indices: bool = True, mix_posts: bool = None, start_time: str = None, - end_time: str = None + end_time: str = None, + offset: int = 0, + length: int = None ): """ - Sync all posts from a creator + Sync posts from a creator You can update the directory anytime after download finished, \ such as to update after creator published new posts. @@ -238,7 +238,7 @@ async def sync_creator( :param creator_id: The ID of the creator :param path: Download path, default is current directory :param save_creator_indices: Record ``CreatorIndices`` data for update posts from current creator directory - :param mix_posts: Save all files from different posts at same path, \ + :param mix_posts: Save all_pages files from different posts at same path, \ ``update_from``, ``save_creator_indices`` will be ignored if enabled :param start_time: Start time of the published time range for posts downloading. \ Set to ``0`` if ``None`` was given. \ @@ -246,6 +246,8 @@ async def sync_creator( :param end_time: End time of the published time range for posts downloading. \ Set to latest time (infinity) if ``None`` was given. \ Time format: ``%Y-%m-%d`` + :param offset: Result offset (or start offset) + :param length: The number of posts to fetch, defaults to fetching all posts after ``offset``. """ # Get service, creator_id if url: @@ -288,7 +290,9 @@ async def sync_creator( service=service, creator_id=creator_id, path=creator_path, - all_pages=True, + all_pages=not length, + offset=offset, + length=length, save_creator_indices=save_creator_indices, mix_posts=mix_posts, start_time=datetime.strptime(start_time, "%Y-%m-%d") if start_time else None, diff --git a/pyproject.toml b/pyproject.toml index 9a4de62..a03fa59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ktoolbox" -version = "0.5.0" +version = "0.5.1" description = "A useful CLI tool for downloading posts in Kemono.party / .su" authors = ["Ljzd-PRO "] readme = "README.md"