forked from geekcomputers/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmovie_details
50 lines (37 loc) · 1.51 KB
/
movie_details
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import mechanize
import urllib2
from bs4 import BeautifulSoup
# Create a Browser
browser = mechanize.Browser()
# Disable loading robots.txt
browser.set_handle_robots(False)
browser.addheaders = [('User-agent',
'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98;)')]
movie_title = raw_input("Enter movie title: ")
movie_types = ('feature', 'tv_movie', 'tv_series', 'tv_episode', 'tv_special',
'mini_series', 'documentary', 'game', 'short', 'video')
# Navigate
browser.open('http://www.imdb.com/search/title')
# Choose a form
browser.select_form(nr=1)
browser['title'] = movie_title
# Check all the boxes of movie types
for type in movie_types:
browser.find_control(type='checkbox',nr=0).get(type).selected = True
# Submit
fd = browser.submit()
soup = BeautifulSoup(fd.read(),'html5lib')
# Updated from td tag to h3 tag
for div in soup.findAll('h3', {'class': 'lister-item-header'}, limit=1):
a = div.findAll('a')[0]
hht = 'http://www.imdb.com' + a.attrs['href']
print(hht)
page = urllib2.urlopen(hht)
soup2 = BeautifulSoup(page.read(),'html.parser')
find = soup2.find
print("Title: " + find(itemprop='name').get_text().strip())
print("Duration: " + find(itemprop='duration').get_text().strip())
print("Director: " + find(itemprop='director').get_text().strip())
print("Genre: " + find(itemprop='genre').get_text().strip())
print("IMDB rating: " + find(itemprop='ratingValue').get_text().strip())
print("Summary: " + find(itemprop='description').get_text().strip())