-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMissingKids.py
53 lines (42 loc) · 1.54 KB
/
MissingKids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#
# script for scrapping missing chlidren from missingkids
#
import requests
import traceback
from bs4 import BeautifulSoup
from utils import sleep_scrapper, get_request_headers
class MissingKids:
def __init__(self):
pass
def run(self):
try:
url = 'https://api.missingkids.org/missingkids/servlet/PubCaseSearchServlet?' \
'act=usMapSearch&missState=VA&searchLang=en_US&casedata=latest'
r = requests.get(url, headers=get_request_headers())
if not r.status_code == 200:
print '[MissingKids] :: faild to get content of url: %s' % url
return
html_doc = r.content
soup = BeautifulSoup(html_doc, 'html.parser')
for td in soup.find_all('td', width="40%"):
# print '--------td', td
self.scrap_result_row(td)
# sleep_scrapper('MissingKids')
except Exception as exp:
print '[MissingKids] :: run() :: Got exception: %s' % exp
print(traceback.format_exc())
def scrap_result_row(self, td):
a = td.find('a')
# name
b = a.find('b').text.strip()
print '[MissingKids] :: Name: ', b
# bs = td.find_all('b')
bs = td.find_all('b')
# y = bs.strip().split(",")
print '[MissingKids] :: details', bs
# alerts
span = td.find('span', class_='alerts').text.strip()
print '[MissingKids] :: span: ', span
if __name__ == '__main__':
scraper = MissingKids()
scraper.run()