-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathwebsite-formatter.py
69 lines (60 loc) · 3.11 KB
/
website-formatter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import csv, sys, socket, json
from ip2geotools.databases.noncommercial import DbIpCity
from passivetotal.libs.attributes import AttributeRequest
response_data = []
with open(sys.argv[1]) as csvfile:
filereader = csv.DictReader(csvfile)
for row in filereader:
if "processed" not in row.keys():
row["processed"] = "False"
if row["processed"] == "False":
# Get the IP address of the domain name
try:
ip = socket.gethostbyname(row['domain'])
except:
ip = "domain did not resolve"
# Get geo-coordinates of the IP address
coordinates = None
if not row['lat'] and not row['lng']:
try:
coordinates = DbIpCity.get(ip, api_key='free')
except:
pass
# Gather web trackers (Google Analytics Tracking ID, FacebookId, etc.)
username = ""
api_key = ""
trackers = AttributeRequest(username=username, api_key=api_key).get_host_attribute_trackers(query=row['domain'])
# Create dictionary that will be written at the end of processing the entire file.
# We do this because the RiskIQ Tracker API call can return a varying amount of fields
# so we don't know what headers to write in the CSV file until the processing is complete.
csv_row = {}
csv_row["domain"] = row['domain']
csv_row["ip"] = ip
csv_row["locationVerified"] = row['locationVerified']
if coordinates:
csv_row["latitude"] = str(coordinates.latitude)
csv_row["longitude"] = str(coordinates.longitude)
else:
csv_row["latitude"] = row['lat']
csv_row["longitude"] = row['lng']
if "status" in trackers:
csv_row["processed"] = "False"
elif "success" in trackers:
for tracker in trackers["results"]:
if tracker["attributeType"] == "GoogleAnalyticsTrackingId":
csv_row["GoogleAnalyticsTrackingId"] = tracker['attributeValue']
if tracker["attributeType"] == "AddThisPubID":
csv_row["AddThisPubID"] = tracker['attributeValue']
if tracker["attributeType"] == "FacebookId":
csv_row["FacebookId"] = tracker['attributeValue']
if tracker["attributeType"] == "TwitterId":
csv_row["TwitterId"] = tracker['attributeValue']
row["processed"] == "True"
response_data.append(csv_row)
#sys.argv[2]
with open("processed.csv", mode='w') as new_csv_file:
fieldnames = ['domain', 'ip', 'latitude', 'longitude', 'locationVerified', 'processed', 'GoogleAnalyticsTrackingId', 'AddThisPubID', 'FacebookId', 'TwitterId' ]
writer = csv.DictWriter(new_csv_file, fieldnames=fieldnames, restval=None, extrasaction='ignore')
writer.writeheader()
for element in response_data:
writer.writerow(element)