-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
129 lines (115 loc) · 3.8 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from selenium import webdriver
from bs4 import BeautifulSoup
def _get_page_by_helper(course, type):
'''
type must be: 'Course' or 'Title'
'''
url = 'https://portal.claremontmckenna.edu/ics'
option = webdriver.ChromeOptions()
option.add_argument("-incognito")
option.add_argument("--headless")
browser = webdriver.Chrome('./chromedriver', options=option)
browser.get(url)
browser.find_element_by_xpath('//*[@id="pg0_V_lnkToMainView"]').click()
first_frame = browser.find_element_by_xpath('//*[@id="pg0_V_litFrame"]')
browser.switch_to.frame(first_frame)
by_course_field = browser.find_element_by_name(type)
by_course_field.send_keys(course)
form_submit = browser.find_element_by_xpath('//*[@id="secondary-container"]/form')
form_submit.submit()
browser.switch_to.default_content();
second_frame = browser.find_element_by_xpath('//*[@id="pg0_V_litFrame"]')
browser.switch_to.frame(second_frame)
page_source = browser.page_source
soup = BeautifulSoup(page_source, 'lxml')
browser.quit()
return soup
def get_page_by_code(course_code):
try:
target_soup = _get_page_by_helper(course_code, 'Course')
return target_soup
except:
return None
def get_page_by_title(course_title):
try:
target_soup = _get_page_by_helper(course_title, 'Title')
return target_soup
except:
return None
def _to_days(days_abv):
full_days = []
if 'M' in days_abv:
full_days.append('Monday')
if 'T' in days_abv:
full_days.append('Tuesday')
if 'W' in days_abv:
full_days.append('Wednesday')
if 'R' in days_abv:
full_days.append('Thursday')
if 'F' in days_abv:
full_days.append('Friday')
return full_days
def _get_data_helper(entry):
cells = entry.find_all('td')
# Avaliability
full_aval = cells[2].get_text().split(" ")
spots_aval = full_aval[0].split("/")[0]
spots_aval = int(spots_aval)
total_aval = full_aval[0].split("/")[1]
total_aval = int(total_aval)
# Location
full_loc = cells[4].get_text()
full_loc = full_loc.split("/ ")
days_loc = full_loc[0].strip().split()[0]
days_loc = _to_days(days_loc)
full_time = full_loc[0].strip().split()[1]
if 'M' not in full_time:
start_time = 'N/A'
end_time = 'N/A'
else:
start_time = full_time.split("-")[0]
end_time = full_time.split("-")[1]
room_loc = full_loc[1]
# Course Code
full_code = cells[0].get_text().split(" - ")
course_code = full_code[0]
course_code = course_code.replace(" ", " ")
sec_code = full_code[1]
# Course Title
course_title = cells[1].get_text().strip()
# Prof
full_name = cells[5].get_text().split(", ")
if len(full_name) >= 2:
first_name = full_name[1]
last_name = full_name[0]
prof_name = first_name + ' ' + last_name
else:
prof_name = cells[5].get_text()
course_info = {
"course_title": course_title,
"course_code": course_code,
"sec_code": sec_code,
"prof_name": prof_name,
"spots_aval": spots_aval,
"total_aval": total_aval,
"days_loc": days_loc,
"start_time": start_time,
"end_time": end_time,
"room_loc": room_loc
}
return course_info
def get_data(soup):
if soup is None:
return {"matches": []}
all_class_info = []
entries = soup.find("table").find("tbody").find_all("tr")
# Pop -1 as is the number of results of the page
entries.pop()
for entry in entries:
try:
res = _get_data_helper(entry)
all_class_info.append(res)
except:
error = {"error": "String Parsing Error"}
all_class_info.append(error)
return {"matches": all_class_info}