-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape6.py
54 lines (39 loc) · 1.37 KB
/
scrape6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from bs4 import BeautifulSoup
from urllib2 import urlopen
BASE_URL = "http://calendar.southernct.edu/?d=2015-06-07&m=1" #june 7th
BASE_URL2 = "http://vems.southernct.edu/VirtualEMS/BrowseEvents.aspx"
BASE_URL3 = "http://www.southernctowls.com/calendar.aspx"
def make_soup(section_url):
html = urlopen(section_url).read()
soup = BeautifulSoup(html, "lxml")
return soup
def get_university_events(section_url):
soup = make_soup(section_url)
calendar = soup.find("section", id="events")
events = [ul.li for ul in calendar.findAll("ul")]
return events
def get_stucenter_events(section_url):
soup = make_soup(section_url)
today = soup.find("td", "todayCell")
events = [ul.li for ul in today.findAll("ul")]
# return today
return False
def get_lyman_events(section_url):
soup = make_soup(section_url)
return False
def get_residence_events(section_url):
soup = make_soup(section_url)
return False
def get_athletic_events(section_url):
soup = make_soup(section_url)
today = soup.find("td", "composite_cal_today")
events = [div for div in today.findAll("div", "composite_cal_item")]
return events
# return False
if __name__ == '__main__':
events = get_university_events(BASE_URL)
print 'University Events: ', events
events = get_athletic_events(BASE_URL3)
print 'Athletic Events: ', events
# events = get_stucenter_events(BASE_URL)
# print '\nStudent Center Events: ', events