Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Concurrent Downloads #21

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 29 additions & 28 deletions src/welearnbot/action_handlers.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from argparse import Namespace
from configparser import RawConfigParser
from datetime import datetime
import sys
from typing import List
from typing import Any, List, Tuple

from bs4 import BeautifulSoup as bs
from datetime import datetime

from moodlews.service import MoodleClient, ServerFunctions
from welearnbot import resolvers
from welearnbot.gcal import publish_gcal_event
from welearnbot.utils import get_resource, read_cache, write_cache, show_file_statuses
from welearnbot.utils import (
get_resource,
get_resources,
read_cache,
show_file_statuses,
write_cache,
)


def handle_whoami(moodle: MoodleClient) -> None:
Expand Down Expand Up @@ -181,40 +188,34 @@ def handle_files(
for courseid in course_ids:
course_name = course_ids[courseid]
page = moodle.server(ServerFunctions.COURSE_CONTENTS, courseid=courseid)
# For each course we populate resources and download all of them in
# parallel
# List[Typle[resource, subpath]]
resources_data: List[Tuple[Any, str]] = []

for item in page:
modules = item.get("modules", [])
for module in modules:
modname = module.get("modname", "")
if modname == "resource":
for resource in module["contents"]:
file_statuses.append(
get_resource(
args,
moodle,
ignore_types,
resource,
prefix_path,
course_name,
link_cache,
token,
)
)
resources_data.append((resource, ""))
elif modname == "folder":
folder_name = module.get("name", "")
for resource in module["contents"]:
file_statuses.append(
get_resource(
args,
moodle,
ignore_types,
resource,
prefix_path,
course_name,
link_cache,
token,
subfolder=folder_name,
)
)
resources_data.append((resource, folder_name))

# download all the resources for the course, and append their statuses
course_file_statuses = get_resources(args,
moodle,
ignore_types,
resources_data,
prefix_path,
course_name,
link_cache,
token
)
file_statuses.extend(course_file_statuses)

write_cache(link_cache_filepath, link_cache)
show_file_statuses(file_statuses, verbose=args.verbose)
54 changes: 47 additions & 7 deletions src/welearnbot/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from moodlews.service import MoodleClient

from argparse import Namespace
from typing import Any, Dict, List, Tuple

from concurrent.futures import ThreadPoolExecutor
import json
import os
import mimetypes
import os
from typing import Any, Dict, List, Tuple

from moodlews.service import MoodleClient


def read_cache(filepath: str) -> dict:
Expand Down Expand Up @@ -97,19 +97,59 @@ def get_resource(
# Download the file and write to the folder
print(
" " * indent + "Downloading " + short_filepath,
end="",
flush=True,
)
response = moodle.response(fileurl, token=token)
with open(filepath, "wb") as download:
download.write(response.content)
print(" ... DONE")
print(" " * indent + short_filepath + " ... DONE", flush=True)

# Add the file url to the cache
cache[fileurl] = timemodified
return "DOWNLOADED", short_filepath


def get_resources(
args: Namespace,
moodle: MoodleClient,
ignore_types: List[str],
resources_data: List[Tuple[Any, str]],
prefix: str,
course: str,
cache: dict,
token: str,
) -> List[Tuple[str, str]]:
"""
This is a wrapper over get_resource that parallelizes downloads

resources_data_list is a list of resource_data
where resource_data is the data of what needs to be downloaded with
it's folder location like this
Tuple[resource, subfolder]
"""

def _get_resource(resource_data: Tuple[Any, str]) -> Tuple[str, str]:
resource, folder_name = resource_data
return get_resource(args,
moodle,
ignore_types,
resource,
prefix,
course,
cache,
token,
subfolder=folder_name)

with ThreadPoolExecutor() as exe:
file_statuses = exe.map(_get_resource, resources_data)

# https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor.map
# exception hanlding must be done while retrieving
# the items for the map's iterator
# ie, exceptions will be raised here while converting iterator in to list
return list(file_statuses)


def show_file_statuses(file_statuses, verbose=False) -> None:
"""Helper function to print ignored, missing files"""
ignored = []
Expand Down