-
Notifications
You must be signed in to change notification settings - Fork 1
/
scrapeHalfLines.py
61 lines (50 loc) · 1.89 KB
/
scrapeHalfLines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
__author__ = 'tanyacashorali'
import urllib2
import time
import re
import random
import datetime
import os
import sqlite3
import pandas as pd
from urlparse import urlparse
from bs4 import BeautifulSoup as bs
from datetime import date, timedelta
db = sqlite3.connect('/home/ec2-user/sports/sports.db')
x=random.randint(1, 20)
time.sleep(x)
url = urllib2.urlopen('http://www.covers.com/odds/basketball/college-basketball-2nd-half-lines.aspx')
soup = bs(url.read(), ['fast', 'lxml'])
tables = soup.findAll('table')
lines = tables[0]
away = lines.findAll('div', {'class':'team_away'})
home = lines.findAll('div', {'class':'team_home'})
covers = lines.findAll('td', {'class':'covers_top'})
#today = date.today()
#today = today.strftime("%m/%d/%Y")
today = str(datetime.datetime.now() - timedelta(hours=2))[0:10]
today = time.strftime("%m/%d/%Y", time.strptime(today, '%Y-%m-%d'))
lines = []
spreads = []
for i in range(0, len(covers)):
line = covers[i].find('div', {'class':'line_top'}).text
line_number = re.search('\d+\.*\d*|\w+', line).group(0)
lines.append(line_number)
spread = covers[i].find('div', {'class':'covers_bottom'}).text
spread_number = re.search('[-|+]\d+\.*\d*|\w+', spread).group(0)
spreads.append(spread_number)
a_teams = filter(None, [a.strong for a in away])
h_teams = filter(None, [h.strong for h in home])
away_teams = [a.text for a in a_teams]
home_teams = [h.text for h in h_teams]
## remove @ symbol for home teams
home_teams = [re.sub('@', '', h) for h in home_teams]
date_time = str(datetime.datetime.now())
for i in range(0, len(away_teams)):
try:
with db:
db.execute('''INSERT INTO NCAAHalfLines(away_team, home_team, line, spread, game_date, game_time) VALUES(?,?,?,?,?,?)''', (away_teams[i], home_teams[i], lines[i], spreads[i], today, date_time))
db.commit()
except sqlite3.IntegrityError:
print 'Record Exists'
db.close()