diff --git a/src/welearnbot/action_handlers.py b/src/welearnbot/action_handlers.py index 49840b7..e6d8d94 100644 --- a/src/welearnbot/action_handlers.py +++ b/src/welearnbot/action_handlers.py @@ -1,14 +1,21 @@ from argparse import Namespace from configparser import RawConfigParser +from datetime import datetime import sys -from typing import List +from typing import Any, List, Tuple + from bs4 import BeautifulSoup as bs -from datetime import datetime from moodlews.service import MoodleClient, ServerFunctions from welearnbot import resolvers from welearnbot.gcal import publish_gcal_event -from welearnbot.utils import get_resource, read_cache, write_cache, show_file_statuses +from welearnbot.utils import ( + get_resource, + get_resources, + read_cache, + show_file_statuses, + write_cache, +) def handle_whoami(moodle: MoodleClient) -> None: @@ -181,40 +188,34 @@ def handle_files( for courseid in course_ids: course_name = course_ids[courseid] page = moodle.server(ServerFunctions.COURSE_CONTENTS, courseid=courseid) + # For each course we populate resources and download all of them in + # parallel + # List[Typle[resource, subpath]] + resources_data: List[Tuple[Any, str]] = [] + for item in page: modules = item.get("modules", []) for module in modules: modname = module.get("modname", "") if modname == "resource": for resource in module["contents"]: - file_statuses.append( - get_resource( - args, - moodle, - ignore_types, - resource, - prefix_path, - course_name, - link_cache, - token, - ) - ) + resources_data.append((resource, "")) elif modname == "folder": folder_name = module.get("name", "") for resource in module["contents"]: - file_statuses.append( - get_resource( - args, - moodle, - ignore_types, - resource, - prefix_path, - course_name, - link_cache, - token, - subfolder=folder_name, - ) - ) + resources_data.append((resource, folder_name)) + + # download all the resources for the course, and append their statuses + course_file_statuses = get_resources(args, + moodle, + ignore_types, + resources_data, + prefix_path, + course_name, + link_cache, + token + ) + file_statuses.extend(course_file_statuses) write_cache(link_cache_filepath, link_cache) show_file_statuses(file_statuses, verbose=args.verbose) diff --git a/src/welearnbot/utils.py b/src/welearnbot/utils.py index c3807fc..dfb524d 100644 --- a/src/welearnbot/utils.py +++ b/src/welearnbot/utils.py @@ -1,11 +1,11 @@ -from moodlews.service import MoodleClient - from argparse import Namespace -from typing import Any, Dict, List, Tuple - +from concurrent.futures import ThreadPoolExecutor import json -import os import mimetypes +import os +from typing import Any, Dict, List, Tuple + +from moodlews.service import MoodleClient def read_cache(filepath: str) -> dict: @@ -97,19 +97,59 @@ def get_resource( # Download the file and write to the folder print( " " * indent + "Downloading " + short_filepath, - end="", flush=True, ) response = moodle.response(fileurl, token=token) with open(filepath, "wb") as download: download.write(response.content) - print(" ... DONE") + print(" " * indent + short_filepath + " ... DONE", flush=True) # Add the file url to the cache cache[fileurl] = timemodified return "DOWNLOADED", short_filepath +def get_resources( + args: Namespace, + moodle: MoodleClient, + ignore_types: List[str], + resources_data: List[Tuple[Any, str]], + prefix: str, + course: str, + cache: dict, + token: str, +) -> List[Tuple[str, str]]: + """ + This is a wrapper over get_resource that parallelizes downloads + + resources_data_list is a list of resource_data + where resource_data is the data of what needs to be downloaded with + it's folder location like this + Tuple[resource, subfolder] + """ + + def _get_resource(resource_data: Tuple[Any, str]) -> Tuple[str, str]: + resource, folder_name = resource_data + return get_resource(args, + moodle, + ignore_types, + resource, + prefix, + course, + cache, + token, + subfolder=folder_name) + + with ThreadPoolExecutor() as exe: + file_statuses = exe.map(_get_resource, resources_data) + + # https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor.map + # exception hanlding must be done while retrieving + # the items for the map's iterator + # ie, exceptions will be raised here while converting iterator in to list + return list(file_statuses) + + def show_file_statuses(file_statuses, verbose=False) -> None: """Helper function to print ignored, missing files""" ignored = []