-
Notifications
You must be signed in to change notification settings - Fork 1
/
tumblr.py
69 lines (59 loc) · 2.97 KB
/
tumblr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
# coding=utf-8
# a old dirver RSS list https://pastebin.com/raw/zmKcfPqg
# from https://www.92ez.com/?action=show&id=23460
# edit by JrD 2018/8/29
import requests
import sys
import os
import re
proxy={"http" : "127.0.0.1:1080"} #增加本地代理
def get_source(nickname, page_index, source_type):
aim_url = "https://%s.tumblr.com/page/%d" % (nickname, page_index)
print('[o] Get source from blog %s ...' % aim_url)
try:
response_string = requests.get(url=aim_url, timeout=50, proxies=proxy).content.decode('utf8')
if "Older Posts" in response_string: #用Older Posts判定是否为有资源的最后一页
# if "posts-no-posts content" not in response_string: #因为tumble页面改动这里对无资源的判定已经失效了
if source_type == "images":
source_elements = re.findall(r'<img(.+?)>', response_string)
else:
source_elements = re.findall(r'<iframe(.+?)>', response_string)
if len(source_elements) > 0:
dir_path = sys.path[0] + '/' + aim_url.split('//')[1].split('.')[0] + '/'
if not os.path.exists(dir_path):
os.makedirs(dir_path)
for this_source in source_elements:
if source_type == "images":
image_url = re.findall(r'src="(.+?)"', this_source)[0]
image_name = image_url.split('/')[-1]
if "tumblr_" in image_name:
write_file(image_url, dir_path, image_name)
else:
if "/video/" in this_source:
video_url = re.findall(r"src='(.+?)'", this_source)[0]
video_response = requests.get(url=video_url, timeout=50, proxies=proxy).content.decode('utf8')
video_source = re.findall(r'<source src="(.+?)"', video_response)[0]
video_name = video_source.split('tumblr_')[1].split('/')[0] + '.mp4'
write_file(video_source, dir_path, video_name)
else:
print('[x] Can not get any source.')
page_index += 1
get_source(nickname, page_index, source_type)
else:
print('[!] Get source complete!')
except Exception as e:
print(e)
get_source(nickname, page_index, source_type)
def write_file(source_url, dir_path, file_name):
if not os.path.exists(dir_path + file_name):
print('[*] Source %s is downloading...' % file_name)
file_download = requests.get(url=source_url, timeout=50, proxies=proxy)
if file_download.status_code == 200:
open(dir_path + file_name, 'wb').write(file_download.content)
else:
print('[*] Source %s has been downloaded.' % file_name)
if __name__ == '__main__':
source_type = sys.argv[1]
user_nickname = sys.argv[2]
get_source(user_nickname, 1, source_type)