-
Notifications
You must be signed in to change notification settings - Fork 0
/
GuoKe.py
24 lines (24 loc) · 836 Bytes
/
GuoKe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from bs4 import BeautifulSoup
import requests
import json
import pymongo
url = 'http://www.guokr.com/scientific/'
def sava_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0'
}
client = pymongo.MongoClient('localhost', 27017)
db_name = 'GuoKe_Data'
db = client[db_name]
collection = db['Guoke_Science']
web_data = requests.get(url, headers=headers)
datas = json.loads(web_data.text)
for data in datas['result']:
collection.insert_one(data)
def start():
urls = ['http://www.guokr.com/apis/minisite/article.json?retrieve_type=by_subject&limit=20&offset={}&_=1462252453410'
.format(str(i)) for i in range(18, 98, 20)]
for url in urls:
sava_data(url)
if __name__ == '__main__':
start()