forked from mozilla-mobile/focus-android
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-disconnect.py
executable file
·131 lines (103 loc) · 4.93 KB
/
build-disconnect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import print_function
import json
import os.path
import urlparse
import shutil
def url_filter(resource):
return "^https?://([^/]+\\.)?" + resource.replace(".", "\\.")
def unless_domain(properties):
return ["*" + domain for domain in properties]
def create_blocklist_entry(resource, properties):
return {"trigger": {"url-filter": url_filter(resource),
"load-type": ["third-party"],
"unless-domain": unless_domain(properties)},
"action": {"type": "block"}}
def generate_entity_list(path="shavar-prod-lists/disconnect-entitylist.json"):
with open(path) as fp:
entitylist = json.load(fp)
blocklist = []
for name, value in entitylist.items():
for resource in value['resources']:
entry = create_blocklist_entry(resource, value['properties'])
blocklist.append(entry)
f = open('Lists/disconnect.json', 'w')
out = json.dumps(blocklist, indent=0,
separators=(',', ':')).replace('\n', '')
f.write(out)
# Human-readable output.
# print json.dumps(blocklist, indent=2)
def generate_blacklists(blacklist="shavar-prod-lists/disconnect-blacklist.json", entitylist="shavar-prod-lists/disconnect-entitylist.json"):
# Generating the categorical lists requires some manual tweaking to the
# data at the moment.
def find_entry(entry, list_):
for d in list_:
if d.keys() == [entry]:
return d
# First, massage the existing categorical data slightly
with open(blacklist) as fp:
categories = json.load(fp)["categories"]
# Remove what we know we don't care about
del categories["Legacy Disconnect"]
del categories["Legacy Content"]
# Move the Twitter and Facebook entries into the Social category from
# the Disconnect category
disconnect = categories["Disconnect"]
del categories["Disconnect"]
categories["Social"].append(find_entry("Facebook", disconnect))
categories["Social"].append(find_entry("Twitter", disconnect))
# Load the entitylist to map the whitelist entries.
with open(entitylist) as fp:
entities = json.load(fp)
# Change the Google entries for the respective categories
with open("shavar-prod-lists/google_mapping.json") as fp:
tweaks = json.load(fp)["categories"]
for category in ("Advertising", "Analytics", "Social"):
cat = categories[category]
goog = find_entry("Google", cat) or None
if goog is None:
# No data exist for this category, just append
cat.append(tweaks[category][0])
else:
for prop, resources in tweaks[category]["Google"].items():
if prop not in goog:
goog[prop] = resources
continue
for resource in resources:
if resource not in goog[prop]:
goog[prop].append(resource)
goog[prop].sort()
cat.sort()
for category in ("Advertising", "Analytics", "Social", "Content"):
blocklist = []
for entity in categories[category]:
for name, domains in entity.iteritems():
for property_, resources in domains.iteritems():
if name in entities:
props = entities[name]["properties"]
else:
prop = urlparse.urlparse(property_).netloc.split(".")
if prop[0] == "www":
prop.pop(0)
props = [".".join(prop)]
for res in resources:
blocklist.append(create_blocklist_entry(res, props))
print("{cat} blacklist has {count} entries."
.format(cat=category, count=len(blocklist)))
with open("app/src/main/res/raw/disconnect_{0}.json".format(category.lower()),
"w") as fp:
out = json.dumps(blocklist, indent=0,
separators=(',', ':')).replace('\n', '')
fp.write(out)
if __name__ == "__main__":
# generate_blacklists()
# Dumb copy the lists for now, until we switch to the compacted version as per focus-ios
rawpath = "app/src/webkit/res/raw"
if not os.path.exists(rawpath):
os.makedirs(rawpath)
shutil.copy("shavar-prod-lists/disconnect-entitylist.json", rawpath + "/entitylist.json")
shutil.copy("shavar-prod-lists/disconnect-blacklist.json", rawpath + "/blocklist.json")
shutil.copy("shavar-prod-lists/google_mapping.json", rawpath + "/google_mapping.json")