-
Notifications
You must be signed in to change notification settings - Fork 1
/
Imagescraper.py
52 lines (46 loc) · 1.44 KB
/
Imagescraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import requests
import shutil
import os
from bs4 import BeautifulSoup as B
import time
heder = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'
}
#os.mkdir( os.path.join( os.getcwd(), 'images' ) )
path = "ImageFolder"
if not os.path.exists(path):
os.makedirs(path)
def send_rq(url):
page = requests.get(url,headers=heder)
return page.content
def saving_img(title,link):
r = requests.get(link,headers=heder,stream=True)
if r.status_code !=200:
print('{0}----{1}'.format(link,r.status_code))
if r.status_code == 200:
r.raw.decode_content = True
with open(os.path.join(path,'{0}..png'.format(title[:30])),'wb') as f:
shutil.copyfileobj(r.raw,f)
print('Done an image')
def parsing_page(data):
soup = B(data,'html.parser')
img_list = []
for i in soup.find_all('div',class_='single-post')[1:]:
img_title = i.find('h2').text.strip()
print(img_title)
img = i.find('img')['style'].split()[1]
img_list.append(img)
saving_img(img_title,img)
print('Total image link got ={0}'.format(len(img_list)))
print("Hello World")
if __name__=="__main__":
print('Work started')
url_list =list()
for i in range(1,2):
url = 'https://www.mlsbd.asia/?paged={0}'.format(i)
url_list.append(url)
for i in url_list:
data = send_rq(i)
parsing_page(data)
time.sleep(5)
print('Done All the work')