-
Notifications
You must be signed in to change notification settings - Fork 2
/
juniper-support-scraper.py
134 lines (108 loc) · 3.7 KB
/
juniper-support-scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python
import argparse, pycurl, os, re
from urllib import urlencode
from StringIO import StringIO
from urlparse import urlparse
from os.path import basename, splitext
# Argument definitions
def argsInit():
parser = argparse.ArgumentParser()
parser.add_argument("url", help="URL to support.juniper.net file to download")
parser.add_argument("username", help="Username to access support.juniper.net")
parser.add_argument("password", help="Password to access support.juniper.net. "
"Passwords with special characters must be "
"enclosed in single-quotes")
parser.add_argument("--output-name", help="Output filename", required=False)
return parser.parse_args()
# Clear the cookies.txt file before and after execution
def clearCookies():
try:
os.remove('cookie.txt')
except OSError:
pass
# Executes curl on a given url, sends POST data (if set)
def executeCurl(input_url, params={}, write_function=lambda x: None):
c = pycurl.Curl()
c.setopt(c.URL, input_url)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.HEADER, False)
c.setopt(c.SSL_VERIFYPEER, False)
c.setopt(c.COOKIEJAR, 'cookie.txt')
c.setopt(c.COOKIEFILE, 'cookie.txt')
if params:
c.setopt(c.POSTFIELDS, urlencode(params))
c.setopt(c.WRITEFUNCTION, write_function)
c.perform()
c.close()
# Downloads the file once we've got the direct link
def downloadFile(input_url, write_data):
c = pycurl.Curl()
c.setopt(c.URL, input_url)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.HEADER, False)
c.setopt(c.SSL_VERIFYPEER, False)
c.setopt(c.COOKIEJAR, 'cookie.txt')
c.setopt(c.COOKIEFILE, 'cookie.txt')
c.setopt(c.WRITEDATA, write_data)
c.perform()
c.close()
# Retrieves the record ID from the inputted url
def getRecordID(input_url):
recordID = urlparse(input_url)
recordID, file_ext = splitext(basename(recordID.path))
return recordID
# Parses page output for actual link to file
def getDownloadLink(content):
download_link = re.search(
pattern='Your download should start in a few seconds. If not <a href="(.*)">Click to Download</a>',
string=content
).group(1)
return download_link
# Parses actual file's filename
def getFilename(input_url):
return basename(urlparse(input_url).path)
def scrape(url, username, password, outputName):
storage = StringIO()
clearCookies()
# 1. Get initial page so our cookies get set
executeCurl(url)
# 2. Send login credentials
executeCurl(
input_url="https://webdownload.juniper.net/access/oblix/apps/webgate/bin/webgate.so",
params={
'HiddenURI': '',
'LOCALE': 'en_us',
'AUTHMETHOD': 'UserPassword',
'username': username,
'password': password
}
)
# 3. Get to EULA page and submit
executeCurl(
input_url="https://webdownload.juniper.net/swdl/dl/download",
params={
'recordId': getRecordID(url),
'siteId': '1',
'eulaAccepted': 'Yes'
},
write_function=storage.write
)
# 4. Parse download URL from content
download_link = getDownloadLink(storage.getvalue())
# 5. Calculate filename
filename = getFilename(download_link)
if outputName is not None:
filename = outputName
print "Downloading..."
fp = open(filename, "wb")
#6 . Download the file
downloadFile(download_link, fp)
fp.close()
clearCookies()
print "Done"
# Main
def main():
args = argsInit()
scrape(args.url, args.username, args.password, args.output_name)
if __name__ == "__main__":
main()