Skip to content

Commit

Permalink
feat(scrape.model.page.courses_in_program, test): pass tests
Browse files Browse the repository at this point in the history
- simplify code implementation
- correct incomplete test implementations
- (successfully confirmed expected test failure after changes by
  constructing false test case)
  • Loading branch information
devvyn committed Nov 16, 2019
1 parent 541014d commit 4ec532a
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 31 deletions.
45 changes: 17 additions & 28 deletions src/scrape/model/page/courses_in_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,41 +19,30 @@ def first_field_matching(program):
return next(fields_list)


def get_program_page(program: str, field: str = None,
level: str = None) -> str:
def get_program_page(program: str, field: str, level: str) -> str:
"""
For given program, return the content of the program's page in the
course catalogue. Lookup is done on data returned by sibling methods
until a reasonable or exact match is found, or the search is exhausted.
If the field is provided, shortcut the lookup by trying to match
programs associated with the given field only.
until a match is found, or the search is exhausted.
:param level: Optional. Level of study (Undergraduate, Graduate,
Non-degree)
:param field: Optional. Field of study
:param program: Program name
:param field: Field of study
:param level: Level of study (Undergraduate, Graduate, Non-degree)
:return: HTML content from first found page
:raises KeyError: Program or field cannot be found
"""
if level is None:
# try to infer the level from the program name
level = infer_level(program)
if field is None:
# try to infer the field of study from the program name
field_str = infer_field(program)
if field_str is not None:
# confirm that field_str leads to page with matching program
try:
program_page_url = attempt_get_program_page_url(field_str,
level)
except KeyError as e:
raise e

# must search everywhere in given level!
# find the program on the page for the field at the appropriate
# level
url = first_field_matching(program)
content = get_content(url)
field_at_level_url = get_programs_url(level, field)
programs_in_field_at_level_content = get_content(field_at_level_url)
programs_in_field_at_level_data = parse_programs(
programs_in_field_at_level_content, field_at_level_url)
program_page_url = next(
(
url
for title, url
in programs_in_field_at_level_data.items()
if program in title
)
)
content = get_content(program_page_url)
return content


Expand Down
14 changes: 11 additions & 3 deletions test/steps/test_courses_required_by_program.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from itertools import chain

from behave import *

use_step_matcher("re")
Expand All @@ -20,11 +22,17 @@ def step_impl(context, program, field, level):

@then("(?P<code>.+) is listed as a requirement")
def step_impl(context, code):
assert code in context.lookup
data = context.program_data
course_codes = chain.from_iterable(data.values())
assert any(
(code in listed_code for listed_code in course_codes)
)


@step("the list of requirements for the program")
def step_impl(context):
content = context.page
content = context.content
from scrape.parse import get_program_data
context.program_data = get_program_data(content)
data = get_program_data(content)
assert len(data)
context.program_data = data

0 comments on commit 4ec532a

Please sign in to comment.