-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathham.py
executable file
·181 lines (165 loc) · 7.48 KB
/
ham.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/python
try:
import re2 as re
except ImportError:
import re
import binascii
import sys
from optparse import OptionParser
import glob
import os
try:
from cisco.bass.algorithms import hamming_klcs
from cisco.bass.algorithms import ndb_from_common_sequence
HAVE_BASS = True
except:
HAVE_BASS = False
if not HAVE_BASS:
print("You must be all about that b.a.s.s.... https://github.com/Cisco-Talos/BASS/tree/master/bass\nsudo yum -y install graphviz graphviz-devel\ngit clone https://github.com/Cisco-Talos/BASS.git\ncd BASS/bass/python\nsudo python setup.py install")
sys.exit(0)
targetstring="""0 = any file\n
1 = Portable Executable, both 32- and 64-bit.;\n
2 = file inside OLE2 container (e.g. image, embedded executable, VBA;\n
script). The OLE2 format is primarily used by MS Office and MSI installa-tion files.;\n
3 = HTML (normalized: whitespace transformed to spaces, tags/tag at-tributes normalized, all lowercase), Javascript is normalized too: all strings
are normalized (hex encoding is decoded), numbers are parsed and normal-ized, local variables/function names are normalized to n001 format, argu-ment to eval() is parsed as JS again, unescape() is handled, some simple JS packers are handled, output is whitespace normalized.;\n
4 = Mail file;\n
5 = Graphics;\n
6 = ELF;\n
7 = ASCII text file (normalized);\n
8 = Unused;\n
9 = Mach-O files;\n
10 = PDF files;\n
11 = Flash files;\n
12 = Java class files;"""
parser = OptionParser(usage="https://www.youtube.com/watch?v=nLB4dU3Yc6M\nexample:\n\n./ham.py -i --maxsplit=500 --normwhite --target=7 -P '.{0,500}(?i)ActiveXObject.{0,500}' -s LockyJS -d /home/coz/Downloads/LockyJS")
parser.add_option("-s", dest="sname", type="string", help="the sig name dumb-dumb")
parser.add_option("-d", dest="dir", type="string", help="dir of similar samples to H.A.M")
parser.add_option("-f", dest="input_file", type="string", help="will read a LF seperated file and treat each line as a seperate string to HAM")
parser.add_option("-P", dest="i_re", type="string", help="Must match this regex to be included as input string '(?i).{0,100}poopship.{0,500}")
parser.add_option("--target",dest="target", type="int", default=2, help="it's a clamav target, it's a number. From clamav man:%s" % (targetstring))
parser.add_option("--normwhite",dest="normwhite", action="store_true", default=False, help="Some targets clamav will normalize whitespace in the backgroup, so do the same")
parser.add_option("--maxsplit",dest="maxsplit", type="int", default=0, help="remove wildcards from sig replace with max distance format like {0-500} where --maxplit=500")
parser.add_option("--seqrepl",dest="seqrepl", type="int", default=0,help="replace repeating byte sequences longer than argument with length of sequence minumum of 8.Good starting value might be 20")
parser.add_option("-i",dest="nocase", action="store_true", default=False,help="make the resulting sig nocase")
parser.add_option("-l",dest="lwsom", action="store_true", default=False,help="make the resulting sig nocase")
parser.add_option("--sizerange",dest="sizerange", action="store_true", default=False,help="make the resulting sig nocase")
(options, args) = parser.parse_args()
include_regex = None
target_strings = []
matches = []
misses = []
size_list = []
if options.i_re:
try:
include_regex = re.compile(options.i_re,re.S)
except Exception as e:
print("Error Compiling Include Regex" % (e))
sys.exit(-1)
if options.input_file and os.path.exists(options.input_file):
lines=open(options.input_file).readlines()
if not lines:
print "empty input file exiting"
sys.exit(-1)
lines = sorted(lines, key=len)
print("The number of lines in the list is: %s." % (len(lines)))
print("The shortest line in the list is: %s." % (len(lines[0])))
print("The longest line in the list is: %s." % (len(lines[-1])))
for dapoop in lines:
if options.normwhite:
dapoop = re.sub(r'[\r\n\s\t]+',"\x20",dapoop)
if options.nocase:
dapoop = dapoop.lower()
if include_regex:
m=include_regex.search(dapoop)
if m and m.group(0) not in target_strings:
dapoop2=m.group(0)
target_strings.append(dapoop2)
elif dapoop not in target_strings:
target_strings.append(dapoop)
elif(options.dir):
flist = []
if os.path.isdir(options.dir):
flist = glob.glob(options.dir + "/*")
else:
print("print you gotta provdie a dir dawg")
sys.exit(-1)
if len(flist) > 1:
for entry in flist:
size_list.append(int(os.path.getsize(entry)))
print entry
dapoop=open(entry).read()
if options.normwhite:
dapoop = re.sub(r'[\r\n\s\t]+',"\x20",dapoop)
if options.nocase:
dapoop = dapoop.lower()
if include_regex:
m=include_regex.search(dapoop)
if m and m.group(0) not in target_strings:
target_strings.append(m.group(0))
matches.append(entry)
else:
misses.append(entry)
elif dapoop not in target_strings:
target_strings.append(dapoop)
print "finished loading files"
if not options.sname:
print "need a signame via -s"
sys.exit(-1)
if options.sizerange:
sig = options.sname + ";Engine:81-255,FileSize:%s-%s,Target:%s;(0);" % (min(size_list), max(size_list), options.target)
else:
sig = options.sname + ";Engine:81-255,Target:%s;(0);" % (options.target)
if include_regex:
for entry in matches:
print("match: {0}".format(entry))
for entry in misses:
print("miss: {0}".format(entry))
print("Matches:{0}, Misses:{1}\n".format(len(matches),len(misses)))
if include_regex:
for entry in matches:
print("match: {0}".format(entry))
for entry in misses:
print("miss: {0}".format(entry))
print("Matches:{0}, Misses:{1}\n".format(len(matches),len(misses)))
if len(matches) == 0:
print("not going to generate sig no files matched the regex")
sys.exit(-1)
elif len(matches) == 1:
print("not going to generate sig only one one file matched the regex {0}".format(matches[0]))
sys.exit(-1)
print("finding klcs")
common = hamming_klcs(target_strings)
print("steaming dem clams")
ndb = ndb_from_common_sequence(target_strings, common)
if not options.lwsom:
disass=ndb.split('*')
reass=[]
for entry in disass:
if re.match("^(?:[0[9dabc]|20)+$",entry) == None:
reass.append(entry)
ndb="*".join(reass)
def replacenulls(nullmatch):
nullseqlen=len(nullmatch) - 4
return "%sclamnullseqrep{%s}%s" % (nullmatch[0:2],nullseqlen,nullmatch[-2:])
def addmatch(match):
tmp=binascii.unhexlify(match[4:-4])
tmp=tmp.replace("clamnullseqrep","")
return match[0:4] + tmp + match[-4:]
if options.seqrepl and options.seqrepl > 0:
if options.seqrepl < 8:
options.seqrepl = 8
disass=ndb.split('*')
reass=[]
for entry in disass:
buflen=len(entry)
buff=binascii.unhexlify(entry)
buff=re.sub(r'(.)\1{%s,}' % (options.seqrepl),lambda m: replacenulls(m.group()),buff)
buff=re.sub(r'.{4}636c616d6e756c6c7365717265707b.+?7d.{4}',lambda m: addmatch(m.group()),binascii.hexlify(buff))
reass.append(buff)
ndb="*".join(reass)
if options.nocase:
ndb = ndb + "::i"
if options.maxsplit > 0:
ndb = re.sub(r'\*',"{0-%s}" % (options.maxsplit),ndb)
print("{0}{1}".format(sig,ndb))