-
Notifications
You must be signed in to change notification settings - Fork 13
/
observer.py
executable file
·261 lines (241 loc) · 11 KB
/
observer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/usr/bin/python
# -*- coding: utf-8 -*-
__author__ = 'Andrey Glauzer'
__license__ = "MIT"
__version__ = "0.0.1"
__maintainer__ = "Andrey Glauzer"
__status__ = "Development"
import os.path
import logging
import argparse
import yaml
import sys
from utils.engines.crawler import TorConnect
from utils.engines.dbconnection import DataBase
from utils.engines.gist import GistAPI
from utils.engines.SecurityNews import CyberSecurityNews
from utils.engines.reddit import Reddit
from utils.engines.DiscoverDarkWeb import DiscoverDarkWebService
from utils.engines.torch import TORCH
from utils.engines.pastebin import Pastebin
class VigilantOnion:
def __init__(self):
parser = argparse.ArgumentParser(
description="VigilantOnion is a script to collect as many sites from the onion network as possible and add them to a database. With all possible sites collected, a crawler will be created so that we can monitor the search for keywords defined by you.",
epilog="You can also develop new framework so that the database has more urls onions."
)
parser.add_argument(
'--config',
'-c',
help="Configuration file in yaml format",
action='store',
dest='config'
)
parser.add_argument(
'--crawler',
help="Starts the TOR network URL crawler process.",
action='store_true',
dest='crawler'
)
parser.add_argument(
'--url',
'-u',
help="Specifies the URL to crawl.",
action='store',
dest='url'
)
parser.add_argument(
'--search',
help="Start the URL search process on the surface.\nSelect a framework where you want to search for urls.",
action='store_true',
dest='search'
)
parser.add_argument(
'--engines',
'-e',
help="Validates the available engines for searching URLs.",
action='store',
dest='engines'
)
parser.add_argument(
'--find',
'-f',
help="Search for arguments on .onion network research sites",
action='store',
dest='find'
)
parser.add_argument(
'--pastebin',
help="Search for .onion urls in pastebin paste.",
action='store',
dest='pastebin'
)
args = parser.parse_args()
if os.path.exists(args.config):
if '.yml' in args.config:
with open(args.config, 'r') as stream:
data = yaml.load(stream, Loader=yaml.FullLoader)
self.debug = data.get('debug', '')
self.dbname = data.get('dbname', '')
self.dbpath = data.get('dbpath', '')
self.server_proxy = data.get('server_proxy', '')
self.port_proxy = data.get('port_proxy', '')
self.type_proxy = data.get('type_proxy', '')
self.timeout = data.get('timeout', '')
self.score_categorie = data.get('score_categorie', '')
self.score_keywords = data.get('score_keywords', '')
self.count_categories = data.get('count_categories', '')
self.sendlog = data.get('sendlog', '')
self.logip = data.get('logip', '')
self.logport = data.get('logport', '')
if self.debug:
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
else:
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
self.logger = logging.getLogger('Start VigilantOnion')
self.crawler = args.crawler
self.search = args.search
self.url = args.url
self.engines = args.engines
self.find = args.find
self.pastebin = args.pastebin
else:
self.logger.error(
'The type of settings file entered is not valid\n')
sys.exit(1)
else:
self.logger.error(
'The directory or file does not exist. Check and try again.\n')
sys.exit(1)
@property
def start(self):
self.logger.debug('Checking the database if everything is OK.')
self.database = DataBase(
dbname=self.dbname,
dbpath=self.dbpath)
if self.search \
and self.engines is not None \
or self.pastebin is not None \
or self.find is not None:
if self.engines is not None:
for engine in self.engines.split(','):
if engine.lower() == 'cybersecuritynews':
geturls = CyberSecurityNews().start
if geturls is not None \
and len(geturls) > 0:
for url in geturls:
if len(self.database.compare(url=url)) == 0:
self.database.save(
url=url,
source="CyberSecurityNews",
type="Domain",
)
elif engine.lower() == 'gist':
geturls = GistAPI().start
if geturls is not None \
and len(geturls) > 0:
for url in geturls:
if len(self.database.compare(url=url)) == 0:
self.database.save(
url=url,
source="Gist",
type="Domain",
)
elif engine.lower() == 'reddit':
geturls = Reddit().start
if geturls is not None \
and len(geturls) > 0:
for url in geturls:
if len(self.database.compare(url=url)) == 0:
self.database.save(
url=url,
source="Reddit",
type="Domain",
)
elif engine.lower() == 'discoverdarkwebservice':
geturls = DiscoverDarkWebService(port_proxy=self.port_proxy,
type_proxy=self.type_proxy,
server_proxy=self.server_proxy).start
if geturls is not None \
and len(geturls) > 0:
for url in geturls:
if len(self.database.compare(url=url)) == 0:
self.database.save(
url=url,
source="DiscoverDarkWebService",
type="Domain",
)
elif self.pastebin is not None:
getPastebin = Pastebin(ids=self.pastebin).start
for url in getPastebin:
if len(self.database.compare(url=url)) == 0:
self.database.save(
url=url,
source="Pastebin",
type="Domain",
)
elif self.find is not None:
geturlsTORCH = TORCH(port_proxy=self.port_proxy,
type_proxy=self.type_proxy,
server_proxy=self.server_proxy,
terms=self.find.split(','),
timeout=self.timeout).start
if geturlsTORCH is not None:
for url in geturlsTORCH:
if len(self.database.compare(url=url)) == 0:
self.database.save(
url=url,
source="TORCH",
type="Domain",
)
elif self.crawler:
self.logger.info('Starting the Crawler process.')
if self.url is not None:
if len(self.database.compare(url=self.url)) == 0:
self.database.save(
url=self.url,
source="Script",
type="Script"
)
TorConnect(
urls=self.database.select_url(url=self.url),
port_proxy=self.port_proxy,
type_proxy=self.type_proxy,
server_proxy=self.server_proxy,
dbname=self.dbname,
dbpath=self.dbpath,
timeout=self.timeout,
count_categories=self.count_categories,
sendlog=self.sendlog,
logip=self.logip,
logport=self.logport,
score_categorie=self.score_categorie,
score_keywords=self.score_keywords
).start
else:
select_urls = self.database.select(
score_categorie=self.score_categorie, score_keywords=self.score_keywords)
TorConnect(
urls=select_urls,
port_proxy=self.port_proxy,
type_proxy=self.type_proxy,
server_proxy=self.server_proxy,
dbname=self.dbname,
dbpath=self.dbpath,
timeout=self.timeout,
count_categories=self.count_categories,
sendlog=self.sendlog,
logip=self.logip,
logport=self.logport,
score_categorie=self.score_categorie,
score_keywords=self.score_keywords
).start
VigilantOnion().start