forked from malicialab/avclass
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathavclass_generic_detect.py
executable file
·83 lines (66 loc) · 2.4 KB
/
avclass_generic_detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
'''
AVClass Generic detect
'''
import sys
import argparse
import subprocess
import os
def main(args):
# Set input switch
itype = '-vt' if args.vt else '-lb'
ifile = args.vt if args.vt else args.lb
# Run avclass_labeler
sys.stderr.write('[-] Running avclass_labeler on %s\n' % (ifile))
FNULL = open(os.devnull, 'w')
labeler = subprocess.Popen(\
"python avclass_labeler.py %s %s -alias /dev/null"\
" -gen /dev/null -gendetect -gt %s" %
(itype, ifile, args.gt), shell=True, stdout=FNULL)
labeler.wait()
# Process generic tokens file
sys.stderr.write('[-] Processing results.\n')
gen_fname = os.path.basename(os.path.splitext(ifile)[0]) + '.gen'
with open(gen_fname, 'r') as fr:
for pos, line in enumerate(fr):
cline = line.strip('\n')
# Print headers
if not pos:
sys.stdout.write("%s\n" % cline)
continue
token, fam_num = cline.split('\t')
if int(fam_num) > args.tgen:
sys.stdout.write("%s\n" % cline)
# Done
sys.stderr.write('[-] Done.\n')
if __name__=='__main__':
argparser = argparse.ArgumentParser(prog='avclass_alias_detect',
description='''Given a collection of VT reports and the family
names of these samples (i.e., groundtruth) it generates a list
of generic tokens to be excluded from labeling.''')
argparser.add_argument('-vt',
help='file to parse with full VT reports '
'(REQUIRED if -lb argument not present)')
argparser.add_argument('-lb',
help='file to parse with subset of VT reports'
'{md5,sha1,sha256,scan_date,av_labels} '
'(REQUIRED if -vt not present)')
argparser.add_argument('-tgen',
help='Minimum number of families that a token appears. '
'Default: 8',
type=int,
default = 8)
argparser.add_argument('-gt',
help='file with ground truth')
args = argparser.parse_args()
if not args.vt and not args.lb:
sys.stderr.write('Argument -vt or -lb is required\n')
exit(1)
if args.vt and args.lb:
sys.stderr.write('Use either -vt or -lb argument, not both.\n')
exit(1)
if not args.gt:
sys.stderr.write('Generic token detection needs groundtruth (-gt)\n')
exit(1)
main(args)