diff --git a/src/scrape/model/page/courses_in_program.py b/src/scrape/model/page/courses_in_program.py index 527f0e1..4692f4a 100644 --- a/src/scrape/model/page/courses_in_program.py +++ b/src/scrape/model/page/courses_in_program.py @@ -19,41 +19,30 @@ def first_field_matching(program): return next(fields_list) -def get_program_page(program: str, field: str = None, - level: str = None) -> str: +def get_program_page(program: str, field: str, level: str) -> str: """ For given program, return the content of the program's page in the course catalogue. Lookup is done on data returned by sibling methods - until a reasonable or exact match is found, or the search is exhausted. - If the field is provided, shortcut the lookup by trying to match - programs associated with the given field only. + until a match is found, or the search is exhausted. - :param level: Optional. Level of study (Undergraduate, Graduate, - Non-degree) - :param field: Optional. Field of study :param program: Program name + :param field: Field of study + :param level: Level of study (Undergraduate, Graduate, Non-degree) :return: HTML content from first found page - :raises KeyError: Program or field cannot be found """ - if level is None: - # try to infer the level from the program name - level = infer_level(program) - if field is None: - # try to infer the field of study from the program name - field_str = infer_field(program) - if field_str is not None: - # confirm that field_str leads to page with matching program - try: - program_page_url = attempt_get_program_page_url(field_str, - level) - except KeyError as e: - raise e - - # must search everywhere in given level! - # find the program on the page for the field at the appropriate - # level - url = first_field_matching(program) - content = get_content(url) + field_at_level_url = get_programs_url(level, field) + programs_in_field_at_level_content = get_content(field_at_level_url) + programs_in_field_at_level_data = parse_programs( + programs_in_field_at_level_content, field_at_level_url) + program_page_url = next( + ( + url + for title, url + in programs_in_field_at_level_data.items() + if program in title + ) + ) + content = get_content(program_page_url) return content diff --git a/test/steps/test_courses_required_by_program.py b/test/steps/test_courses_required_by_program.py index 68f8c4b..dabb3e5 100644 --- a/test/steps/test_courses_required_by_program.py +++ b/test/steps/test_courses_required_by_program.py @@ -1,3 +1,5 @@ +from itertools import chain + from behave import * use_step_matcher("re") @@ -20,11 +22,17 @@ def step_impl(context, program, field, level): @then("(?P.+) is listed as a requirement") def step_impl(context, code): - assert code in context.lookup + data = context.program_data + course_codes = chain.from_iterable(data.values()) + assert any( + (code in listed_code for listed_code in course_codes) + ) @step("the list of requirements for the program") def step_impl(context): - content = context.page + content = context.content from scrape.parse import get_program_data - context.program_data = get_program_data(content) + data = get_program_data(content) + assert len(data) + context.program_data = data