Skip to content

Commit

Permalink
feat: finished filter_posts_with_indices and add some other actions
Browse files Browse the repository at this point in the history
  • Loading branch information
Ljzd-PRO committed Nov 8, 2023
1 parent 4eeaf33 commit efb3341
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 23 deletions.
1 change: 1 addition & 0 deletions ktoolbox/action/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .base import *
from .fetch import *
from .job import *
from .search import *
38 changes: 38 additions & 0 deletions ktoolbox/action/fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import AsyncGenerator, List

from ktoolbox.api.model import Post
from ktoolbox.api.posts import get_creator_post
from ktoolbox.api.utils import SEARCH_STEP
from ktoolbox.utils import BaseRet

__all__ = ["FetchInterruptError", "fetch_all_creator_posts"]


class FetchInterruptError(Exception):
"""Exception for interrupt of data fetching"""

def __init__(self, *args, ret: BaseRet = None):
super().__init__(*args)
self.ret = ret


async def fetch_all_creator_posts(service: str, creator_id: str) -> AsyncGenerator[List[Post]]:
"""
Fetch all posts from a creator
:param service: The service where the post is located
:param creator_id: The ID of the creator
:return: Async generator of several list of posts
:raise FetchInterruptError
"""
o = 0
while True:
ret = await get_creator_post(service=service, creator_id=creator_id, o=o)
if ret:
yield ret.data
if len(ret.data) < SEARCH_STEP:
break
else:
o += SEARCH_STEP
else:
raise FetchInterruptError(ret=ret)
78 changes: 59 additions & 19 deletions ktoolbox/action/job.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from pathlib import Path
from typing import List, Union
from typing import List, Union, Tuple

import aiofiles

from ktoolbox.action import ActionRet
from ktoolbox.action import ActionRet, fetch_all_creator_posts, FetchInterruptError
from ktoolbox.api.model import Post
from ktoolbox.api.posts import get_creator_post
from ktoolbox.api.utils import SEARCH_STEP
from ktoolbox.configuration import config, PostStructureConfiguration
from ktoolbox.enum import PostFileTypeEnum, DataStorageNameEnum
from ktoolbox.job import Job, CreatorIndices

__all__ = ["create_job_from_post"]
__all__ = ["create_job_from_post", "filter_posts_with_indices", "create_job_from_creator"]


async def create_job_from_post(
Expand Down Expand Up @@ -66,7 +65,27 @@ async def create_job_from_post(
return jobs


# TODO
def filter_posts_with_indices(posts: List[Post], indices: CreatorIndices) -> Tuple[List[Post], CreatorIndices]:
"""
Compare and filter posts by `CreatorIndices` data
Only keep posts that was edited after last download.
:param posts: Posts to filter
:param indices: `CreatorIndices` data to use
:return: A updated `List[Post]` and updated **new** `CreatorIndices` instance
"""
new_list = list(
filter(
lambda x: x.edited > indices.posts[x.id].edited, posts
)
)
new_indices = indices.model_copy(deep=True)
for post in new_list:
new_indices.posts[post.id] = post
return new_list, new_indices


async def create_job_from_creator(
service: str,
creator_id: str,
Expand All @@ -75,7 +94,7 @@ async def create_job_from_creator(
update_from: CreatorIndices = None,
all_pages: bool = False,
o: int = None,
save_creator_indices: bool = False,
save_creator_indices: bool = True,
mix_posts: bool = None
) -> ActionRet[List[Job]]:
"""
Expand All @@ -84,39 +103,60 @@ async def create_job_from_creator(
:param service: The service where the post is located
:param creator_id: The ID of the creator
:param path: The path for posts to download
:param update_from: `CreatorIndices` data for update posts from current creator directory
:param update_from: `CreatorIndices` data for update posts from current creator directory, \
`save_creator_indices` will be enabled if this provided
:param all_pages: Fetch all pages of posts, `o` will be ignored if enabled
:param o: Result offset, stepping of 50 is enforced
:param save_creator_indices: Record `CreatorIndices` data for update posts from current creator directory
:param mix_posts: Save all files from different posts at same path, \
`update_from`, `save_creator_indices` will be ignored if enabled
"""
mix_posts = config.job.mix_posts if mix_posts is None else mix_posts

# Get posts
if all_pages:
post_list: List[Post] = []
o = 0
while True:
ret = await get_creator_post(service=service, creator_id=creator_id, o=o)
if ret:
post_list += ret.data
if len(ret.data) < SEARCH_STEP:
break
else:
o += SEARCH_STEP
else:
return ActionRet(**ret.model_dump(mode="python"))
try:
async for part in fetch_all_creator_posts(service=service, creator_id=creator_id):
post_list += part
except FetchInterruptError as e:
return ActionRet(**e.ret.model_dump(mode="python"))
else:
ret = await get_creator_post(service=service, creator_id=creator_id, o=o)
if ret:
post_list = ret.data
else:
return ActionRet(**ret.model_dump(mode="python"))

# Filter posts and generate `CreatorIndices`
if not mix_posts:
indices = None
if update_from:
post_list, indices = filter_posts_with_indices(post_list, update_from)
elif save_creator_indices: # It's unnecessary to create indices again when `update_from` was provided
indices = CreatorIndices(
creator_id=creator_id,
service=service,
posts={path / post.title: post for post in post_list}
)
if indices:
async with aiofiles.open(path / DataStorageNameEnum.CreatorIndicesData.value) as f:
await f.write(indices.model_dump_json(indent=config.json_dump_indent))

job_list: List[Job] = []
for post in post_list:
# Get post path
default_post_path = path if mix_posts else path / post.title
if update_from:
if not (post_path := update_from.posts_path.get(post.id)):
post_path = default_post_path
else:
post_path = default_post_path

# Generate jobs
job_list += await create_job_from_post(
post=post,
post_path=path if mix_posts else path / post.title,
post_path=post_path,
post_structure=False if mix_posts else None,
dump_post_data=not mix_posts
)
Expand Down
12 changes: 8 additions & 4 deletions ktoolbox/job/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ class CreatorIndices(BaseKToolBoxData):
Record the path of each downloaded post.
"""
creator: Creator
"""Creator data"""
posts: Dict[Path, Post] = {}
"""Posts path and their `Post` data"""
creator_id: str
"""Creator ID"""
service: str
"""Creator service"""
posts: Dict[str, Post] = {}
"""All posts, `id` -> `Post`"""
posts_path: Dict[str, Path] = {}
"""Posts and their path, `id` -> `Path`"""

0 comments on commit efb3341

Please sign in to comment.