-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathweb_mirror_record_quic.py
135 lines (109 loc) · 4.21 KB
/
web_mirror_record_quic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import sys
import os
import shutil
#apac = open('etc/apache2/apache2.conf', 'a')
# file name with gets and ips
getinfo = sys.argv[1]
print getinfo
# root folder to save in
mirror_root = sys.argv[2]
# working directory
working_dir = sys.argv[3]
# keep track of distinct IPs in gets file for etc/hosts
ips = []
host_mapping = open('ips.txt', 'w')
# File with list of IPs for Mininet setup
mininet_cfg = open('serverips.txt','w')
# Number of distinct IPs
count = 0
#mirror_path is folder where wgets should be stored (outmost directory)
mirror_path = '/home/skype-alpha/July3rdGoogleMeeting/src/out/Release/' + str(mirror_root)
# remove old vestiges
os.system("rm -rf "+mirror_path);
#get only first entries domain_name for folder
linecount = 0
quic_folder = ''
# Parse gets.txt
for line in open(getinfo):
records = line.split(' ')
scheme_plus_url = records[0]
ip = records[1]
domain_name = ''
resource_folder = ''
if 'https' in scheme_plus_url:
domain_name = scheme_plus_url.split('https://')[1].split('/')[0]
resource_folders = scheme_plus_url.split('https://')[1].split('/')
elif 'http' in scheme_plus_url:
domain_name = scheme_plus_url.split('http://')[1].split('/')[0]
resource_folders = scheme_plus_url.split('http://')[1].split('/')
else :
print "Die here. Found non HTTP/HTTPS"
exit(5)
if linecount == 0:
quic_folder = domain_name
print "Domain name", domain_name
print "resource_folders", resource_folders
linecount = linecount + 1
if ip not in ips:
# Saw distinct IP
ips.append(ip)
# Add to host mapping file for DNS
host_mapping.write(str(ip.strip()) + ' ' + domain_name + '\n')
# Add to list of IPs for Mininet
mininet_cfg.write(str(ip.strip()) + '\n')
count = count + 1
#directories to be made for wget
resource_directory = mirror_path
for i in range(1, len(resource_folders)-1):
# start from 1 because 0 is hostname
# end at -1 because the last entry is the file itself
resource_directory = resource_directory + '/' + resource_folders[i]
print "resource_directory",resource_directory
# Create directory for resource if it doesn't exist
if not os.path.exists(resource_directory):
os.makedirs(resource_directory)
# Change directory to copy resource into it's final resting place
os.chdir(resource_directory)
# Get name of resource
resource_name = resource_folders[-1]
# Get rid of query parameters
resource_name = resource_name.split('?')[0]
print "resource_name",resource_name
# Reconstruct URL for fetching
url_records = scheme_plus_url.split('/')
# Concatenate all except the last record
fetch_url = ''
for record in url_records[:-1] :
fetch_url += record+'/'
fetch_url += resource_name
print "fetch_url",fetch_url,"\n"
# Finally, fetch resource using wget
os.system("wget --save-headers \"" + str(fetch_url) + "\"")
#Get rid of Transfer-Encoding: chunked so quic_server can load files into cache
sed_cmd = " sed -i '/Transfer-Encoding: chunked/ d' " + resource_name
os.system(sed_cmd)
# Change directory back to original directory
os.chdir(working_dir)
#make sure index.html gets rid of Transfer-Encoding: chunked
index_path = mirror_path + '/index.html'
sed_cmd = " sed -i '/Transfer-Encoding: chunked/ d' " + index_path
os.system(sed_cmd)
# Close Mininet Config
mininet_cfg.close()
host_mapping.close()
# Concatenate to /etc/hosts
dns_mapping = open('/etc/hosts', 'w')
dns_mapping.write('127.0.0.1 localhost\n128.30.76.203 copley.csail.mit.edu copley\n 127.0.1.1 skypealpha-ThinkCentre-M91p \n')
for entry in open('ips.txt'):
dns_mapping.write(entry)
dns_mapping.write('\n\n# The following lines are desirable for IPv6 capable hosts\n::1 ip6-localhost ip6-loopback\nfe00::0 ip6-localnet\nff00::0 ip6-mcastprefix\nff02::1 ip6-allnodes\nff02::2 ip6-allrouters')
dns_mapping.close()
# Save mirror setup for posterity
os.system("rm -rf "+mirror_root);
os.makedirs(mirror_root)
saveip = mirror_root + '/' + mirror_root + 'ips.txt'
savegets = mirror_root + '/' + mirror_root + 'gets.txt'
saveserverips = mirror_root + '/' + mirror_root + 'serverips.txt'
shutil.copy('ips.txt', saveip)
shutil.copy('tempgets.txt', savegets)
shutil.copy('serverips.txt', saveserverips)