-
Notifications
You must be signed in to change notification settings - Fork 3
/
parse_beers.py
executable file
·79 lines (60 loc) · 2.11 KB
/
parse_beers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python2.7
import requests
import bs4
import urllib2
import pickle
import string
import sys
# class Beer:
# name = "none"
# strength = 0.0
# cost_price = 0.0
# desc = "none"
# def __init__(self, param_name, param_strength, param_cost_price, param_desc):
# self.name = param_name
# self.strength = float(param_strength)
# self.cost_price = float(param_cost_price)
# self.desc = param_desc
def get_beer_desc(beer_name):
response = requests.get('http://www.miltonbrewery.co.uk/beers/%s.html'%beer_name)
if response.status_code == 404: return "NO SITE FOUND"
soup = bs4.BeautifulSoup(response.text)
contents_tag = soup.select('#Content')
return contents_tag[0].contents[3].string
beers = open("beers.txt")
lines = beers.readlines()
beers = {}
for line in lines:
b_name, b_strength, b_cost_price, _ = line.split(',')
b_desc = get_beer_desc(b_name.lower().strip())
# b = Beer(b_name, b_strength, b_cost_price, b_desc)
print "processing %s"%b_name
beers[b_name.strip()] = {
"name":b_name.strip(),
"strength":float(b_strength),
"cost_price":float(b_cost_price),
"desc":str(b_desc)
}
# save img
possible_names = [b_name.lower().strip(),
string.capitalize(b_name),
string.capitalize(b_name) + "_Web",
string.capitalize(b_name) + "_Website"]
found_beer = False
for name in possible_names:
try:
resp = urllib2.urlopen("http://www.miltonbrewery.co.uk/media/pumpclips/%s.png" %
name)
localFile = open('fig/%s.png' % b_name.strip().lower(), 'w')
localFile.write(resp.read())
localFile.close()
found_beer = True
except urllib2.URLError, e:
if e.code == 404:
continue
else:
raise
if not found_beer:
sys.stderr.write('Cannot find image for beer: %s\n' % b_name)
# beers = sorted(beers, key=lambda b: b['name'])
pickle.dump(beers, open("beers.pkl","wb"))