Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename RawClass property names #83

Merged
merged 1 commit into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion public/f22.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/f23.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/f24.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/s23.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion public/s24.json

Large diffs are not rendered by default.

36 changes: 13 additions & 23 deletions scrapers/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@

{
"6.3900": {
"on": "6.036",
"nx": true | false,
"rp": true | false,
"hf": false | 1 | 2,
"u": "https://introml.mit.edu",
"f": true | false,
"lm": true | false,
"nonext": true | false,
"repeat": true | false,
"url": "https://introml.mit.edu",
"final": true | false,
"half": false | 1 | 2,
"limited": true | false,
}
}
"""
Expand Down Expand Up @@ -116,24 +115,15 @@ def get_course_data(filtered_html):
Returns:
* dict[str, Union[bool, int, str]]: metadata about that particular class
"""
no_next = is_not_offered_next_year(filtered_html)
repeat = is_repeat_allowed(filtered_html)
url = get_url(filtered_html)
final = has_final(filtered_html)
half = get_half(filtered_html)
limited = is_limited(filtered_html)

course_data = {
"nx": no_next,
"rp": repeat,
"u": url,
"f": final,
"hf": half,
"lm": limited,
return {
"nonext": is_not_offered_next_year(filtered_html),
"repeat": is_repeat_allowed(filtered_html),
"url": get_url(filtered_html),
"final": has_final(filtered_html),
"half": get_half(filtered_html),
"limited": is_limited(filtered_html),
}

return course_data


def get_home_catalog_links():
"""
Expand Down
75 changes: 39 additions & 36 deletions scrapers/fireroad.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def parse_schedule(course):
result = {}

# Kinds of sections that exist.
result["s"] = []
result["sectionKinds"] = []
section_kinds = ("Lecture", "Recitation", "Lab", "Design")

for chunk in schedule.split(";"):
Expand All @@ -100,7 +100,7 @@ def parse_schedule(course):

# The key is lowercase
kind = name.lower()
result["s"].append(kind)
result["sectionKinds"].append(kind)

# Raw section times, e.g. T9.301-11 or TR1,F2.
result[kind + "RawSections"] = sections
Expand All @@ -115,7 +115,7 @@ def parse_schedule(course):
result[kindSectionsName].append(parse_section(info))

# True if some schedule is not scheduled yet.
result["tb"] = section_tba
result["tba"] = section_tba
return result


Expand All @@ -134,15 +134,15 @@ def parse_attributes(course):
gir_attr = course.get("gir_attribute", "")

return {
"hh": hass_code == "H",
"ha": hass_code == "A",
"hs": hass_code == "S",
"he": hass_code == "E",
"ci": comms_code == "CI-H",
"cw": comms_code == "CI-HW",
"re": gir_attr == "REST",
"la": gir_attr == "LAB",
"pl": gir_attr == "LAB2",
"hassH": hass_code == "H",
"hassA": hass_code == "A",
"hassS": hass_code == "S",
"hassE": hass_code == "E",
"cih": comms_code == "CI-H",
"cihw": comms_code == "CI-HW",
"rest": gir_attr == "REST",
"lab": gir_attr == "LAB",
"partLab": gir_attr == "LAB2",
}


Expand All @@ -154,7 +154,7 @@ def parse_terms(course):
* course (dict[str, Union[bool, float, int, list[str], str]]): The course object.

Returns:
* dict[str, list[str]]: The parsed terms, stored in the key "t".
* dict[str, list[str]]: The parsed terms, stored in the key "terms".
"""
terms = [
name
Expand All @@ -166,7 +166,7 @@ def parse_terms(course):
]
if course[attr]
]
return {"t": terms}
return {"terms": terms}


def parse_prereqs(course):
Expand All @@ -177,14 +177,14 @@ def parse_prereqs(course):
* course (dict[str, Union[bool, float, int, list[str], str]]): The course object.

Returns:
* dict[str, str]: The parsed prereqs, in the key "pr".
* dict[str, str]: The parsed prereqs, in the key "prereqs".
"""
prereqs = course.get("prerequisites", "")
for gir, gir_rw in utils.GIR_REWRITE.items():
prereqs = prereqs.replace(gir, gir_rw)
if not prereqs:
prereqs = "None"
return {"pr": prereqs}
return {"prereqs": prereqs}


def get_course_data(courses, course):
Expand All @@ -203,9 +203,9 @@ def get_course_data(courses, course):
course_code = course["subject_id"]
course_num, course_class = course_code.split(".")
raw_class = {
"no": course_code,
"co": course_num,
"cl": course_class,
"number": course_code,
"course": course_num,
"subject": course_class,
}

if "schedule" not in course:
Expand All @@ -224,43 +224,46 @@ def get_course_data(courses, course):
raw_class.update(parse_attributes(course))
raw_class.update(
{
"u1": course["lecture_units"],
"u2": course["lab_units"],
"u3": course["preparation_units"],
"le": course["level"],
"vu": course["is_variable_units"],
"sa": ", ".join(course.get("joint_subjects", [])),
"mw": ", ".join(course.get("meets_with_subjects", [])),
"lectureUnits": course["lecture_units"],
"labUnits": course["lab_units"],
"preparationUnits": course["preparation_units"],
"level": course["level"],
"isVariableUnits": course["is_variable_units"],
"same": ", ".join(course.get("joint_subjects", [])),
"meets": ", ".join(course.get("meets_with_subjects", [])),
}
)
# This should be the case with variable-units classes, but just to make sure.
if raw_class["vu"]:
raw_class["u1"] = raw_class["u2"] = raw_class["u3"] = 0
if raw_class["isVariableUnits"]:
assert raw_class["lectureUnits"] == 0
assert raw_class["labUnits"] == 0
assert raw_class["preparationUnits"] == 0

# t, pr
raw_class.update(parse_terms(course))
raw_class.update(parse_prereqs(course))

raw_class.update(
{
"d": course.get("description", ""),
"n": course.get("title", ""),
"description": course.get("description", ""),
"name": course.get("title", ""),
# TODO: improve instructor parsing
"i": ",".join(course.get("instructors", [])),
"v": course.get("virtual_status", "") == "Virtual",
"inCharge": ",".join(course.get("instructors", [])),
"virtualStatus": course.get("virtual_status", "") == "Virtual",
}
)

# nx, rp, u, f, hf, lm are from catalog.json, not here

if "old_id" in course:
raw_class["on"] = course["old_id"]
raw_class["oldNumber"] = course["old_id"]

raw_class.update(
{
"ra": course.get("rating", 0),
"h": course.get("in_class_hours", 0) + course.get("out_of_class_hours", 0),
"si": course.get("enrollment_number", 0),
"rating": course.get("rating", 0),
"hours": course.get("in_class_hours", 0)
+ course.get("out_of_class_hours", 0),
"size": course.get("enrollment_number", 0),
}
)

Expand Down
Loading