forked from omriher/CapTipper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCTMagic.py
190 lines (163 loc) · 6.46 KB
/
CTMagic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#
# CapTipper is a malicious HTTP traffic explorer tool
# By Omri Herscovici <omriher AT gmail.com>
# http://omriher.com
# @omriher
#
#
# This file is part of CapTipper, and part of the Whatype library
# Whatype is an independent file type identification python library
# https://github.com/omriher/whatype
#
# CapTipper is a free software under the GPLv3 License
#
import os
class WhatypeErr(Exception):
def __init__(self, when, error):
self.when = when
self.error = error
def __str__(self):
return repr("Whatype Error on " + self.when + " : " + self.error)
class MagicNode(object):
def __init__(self, byte):
self.byte = byte
self.filetype = ""
self.ext = ""
self.strings = ""
self.children = []
def add_child(self, obj):
n = MagicNode(obj)
self.children.append(n)
return n
def has_child(self, data):
for child in self.children:
if child.byte.lower() == data.lower():
return child
return None
def get_childrens_by_byte(self, data):
childrens = []
for child in self.children:
if child.byte.lower() == data.lower():
#return child
childrens.append(child)
return childrens
class Whatype(object):
WTver = "0.1"
WTrev = "01"
MAGICLIST_NAME = "magics.csv"
def __init__(self,magic_file=""):
if magic_file:
if os.path.isfile(magic_file):
self.magic_list_file = magic_file
else:
raise WhatypeErr("magics list load", "Couldn't find " + magic_file)
else:
default_mgc = os.path.join(os.path.dirname(os.path.realpath(__file__)),Whatype.MAGICLIST_NAME)
if os.path.isfile(default_mgc):
self.magic_list_file = default_mgc
else:
raise WhatypeErr("loading default magics list","Couldn't find default magics list. " \
"Please provide a magics CSV file")
# Create main prefix tree graph (Trie)
self.Tree = MagicNode("all_magics")
with open(self.magic_list_file, "r") as ins:
for line in ins:
parts = line.split(",")
# parts[0] = File Type
# parts[1] = Magic bytes
# parts[2] = File Ext
# parts[3] = File Strings
self.create_branch(0, self.Tree, parts[0], parts[1], parts[2],parts[3])
def create_branch(self, node_level, father, filetype, magic, ext, strings):
magic_bytes = magic.split(" ")
byte = magic_bytes[node_level]
son = father.has_child(byte)
node_level += 1
if (node_level < len(magic_bytes)):
if son is None:
son = father.add_child(byte)
self.create_branch(node_level, son, filetype, magic, ext,strings)
else:
if (node_level == len(magic_bytes)):
son = father.add_child(byte)
son.filetype = filetype
son.ext = ext
son.strings = strings
def print_tree(self,Node, index):
for nd in Node.children:
print "--" * index + nd.byte
if (len(nd.children) > 0):
self.print_tree(nd, index + 1)
def strings_search(self,strings_list, content):
bGood = True
for str in strings_list.split(";"):
if content.lower().find(str.lower().rstrip()) == -1:
bGood = False
return bGood
def return_magic(self,cont,Name,Ext):
if not Name:
Name = "Inconclusive. "
if self.istext(cont):
Name += "Probably text"
Ext = "TEXT"
else:
Name += "Probably binary"
Ext = "BINARY"
return Name,Ext
def istext(self,cont):
# Based on http://code.activestate.com/recipes/173220/
import string
text_characters = "".join(map(chr, range(32, 127)) + list("\n\r\t\b"))
_null_trans = string.maketrans("", "")
if not cont:
# Empty files are considered text
return True
if "\0" in cont:
# Files with null bytes are likely binary
return False
# Get the non-text characters (maps a character to itself then
# use the 'remove' option to get rid of the text characters.)
t = cont.translate(_null_trans, text_characters)
# If more than 30% non-text characters, then
# this is considered a binary file
if float(len(t))/float(len(cont)) > 0.30:
return False
return True
def find(self, cont, Node, index=0, magic_history=[]):
if cont == "" or cont is None:
return "",""
curr_byte = cont[index].encode('hex')
NextNode = Node.get_childrens_by_byte(curr_byte)
if NextNode:
magic_history.extend(NextNode)
Name, Ext = self.find(cont, NextNode[0], index+1, magic_history)
if Ext == "Rollback":
for i in range(len(magic_history)):
Node = magic_history.pop()
if Node.filetype != "":
if self.strings_search(Node.strings, cont):
return Node.filetype, Node.ext
else:
return Name, Ext
return self.return_magic(cont,"","")
#return ""
else:
# last hex node found
if Node.filetype != "":
if self.strings_search(Node.strings, cont):
return Node.filetype, Node.ext
if len(magic_history) == 0:
#return "",""
return self.return_magic(cont,"","")
return "", "Rollback" # Magic search went too far, rollbacking
def identify_file(self,filepath):
try:
file_content = open(filepath).read()
return self.find(file_content, self.Tree)
except Exception, e:
raise WhatypeErr("file identification", str(e))
def identify_buffer(self,file_content):
try:
return self.find(file_content, self.Tree,0,[])
except Exception, e:
raise WhatypeErr("buffer identification", str(e))