-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpcapmatric.py
204 lines (180 loc) · 6.63 KB
/
pcapmatric.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env python
"""
This script can take a pcap format network flow data file and extrate all the contained network flow information, such as data length, request frequence, request interval, etc.
"""
import getopt, sys, os
import dpkt
import socket, hashlib, getopt
def usage():
print """
Usage:
------
python %s -s <singlefile> -d <directory>
Valid options are:
-h You are looking at this.
-s Single pcap file name.
-d Multiple pcap files, typically a directory path.
-m Maxnum packets to be processed, default is 16.
-n Minnum packets of a stream, default is 16.
""" % (sys.argv[0])
def filemd5(f):
md5 = hashlib.md5()
block_size=128*md5.block_size
file = open(f, 'rb')
while True:
data = file.read(block_size)
if not data:
break
md5.update(data)
file.close()
return md5.hexdigest()
def mfile(directory, maxlen, minlen, tcpstreamnum, udpstreamnum, tcpmatric, udpmatric):
dirlist = os.listdir(directory)
filelist = []
tsnum = tcpstreamnum
usnum = udpstreamnum
tcplist = tcpmatric
udplist = udpmatric
for fname in dirlist:
fname = os.path.join(directory, fname)
#print fname
if fname.split('.')[len(fname.split('.'))-1] != 'pcap':
continue
if filemd5(fname) not in filelist:
filelist.append(filemd5(fname))
tsnum, usnum, tcplist, udplist = sfile(fname, maxlen, minlen, tsnum, usnum, tcplist, udplist)
else:
continue
#print snum
return tsnum, usnum, tcplist, udplist
def sfile(fname, maxlen, minlen, tcpstreamnum, udpstreamnum, tcpmatric, udpmatric):
try:
f = file(fname,"rb")
pcap = dpkt.pcap.Reader(f)
except:
print "Open pcap error: maybe not pcap format file"
sys.exit()
streamlist = {}
processnum = maxlen
app = os.path.basename(fname)
#print app
for ts, buf in pcap:
try:
eth = dpkt.ethernet.Ethernet(buf)
if eth.type != dpkt.ethernet.ETH_TYPE_IP:
print eth.type
continue
except:
continue
ip = eth.data
src = socket.inet_ntoa(ip.src)
dst = socket.inet_ntoa(ip.dst)
sport = str(ip.data.sport)
dport = str(ip.data.dport)
proto = str(ip.p)
packetlength = ip.len-ip.hl*4
#if packetnum == 1:
# init_ts = ts
#else ts -= init_ts
if ip.p == dpkt.ip.IP_PROTO_TCP:
packetlength = packetlength - ip.data.off*4
#print ( ip.data.flags & dpkt.tcp.TH_FIN ) != 0
#print "%s : tcp, %s, %s, %4s" % (ts,ip.ttl,ip.len,ip.src)
elif ip.p == dpkt.ip.IP_PROTO_UDP:
packetlength = packetlength - 8
#print "%s : udp, %s, %s, %4s" % (ts,ip.ttl,ip.len,ip.src)
else:
print ip.p
continue
#print sport, dport, packetlength
if streamlist.has_key(dst+'_'+dport+'_'+src+'_'+sport+'_'+proto) :
if len(streamlist[dst+'_'+dport+'_'+src+'_'+sport+'_'+proto]) < processnum:
streamlist[dst+'_'+dport+'_'+src+'_'+sport+'_'+proto].append(('r',packetlength))
else:
continue
elif streamlist.has_key(src+'_'+sport+'_'+dst+'_'+dport+'_'+proto) :
if len(streamlist[src+'_'+sport+'_'+dst+'_'+dport+'_'+proto]) < processnum:
streamlist[src+'_'+sport+'_'+dst+'_'+dport+'_'+proto].append(('s',packetlength))
else:
continue
else:
streamlist[src+'_'+sport+'_'+dst+'_'+dport+'_'+proto] = [('s',packetlength)]
for key, stream in streamlist.items():
if len(stream) < minlen:
continue
else:
lenlist = []
if key.split('_')[3] == 1900:
continue
print key
for packets in stream:
lenlist.append(packets[1])
if key.split('_')[4] == '6':
tcpmatric.append(lenlist)
tcpstreamnum = tcpstreamnum + 1
elif key.split('_')[4] == '17':
udpmatric.append(lenlist)
udpstreamnum = udpstreamnum + 1
else:
continue
#print len(timelist)
#if tempsend == 0 or temprecv == 0:
# continue
return tcpstreamnum, udpstreamnum, tcpmatric, udpmatric
def main():
""" pcap paser for machine learning. """
if len(sys.argv) < 3:
usage()
sys.exit(0)
else:
try:
opts, args = getopt.getopt(sys.argv[1:], "hs:d:o:p:m:n:", ["help", "single=", "directory=", "app=", "maxlen=", "minlen="])
except getopt.GetoptError, err:
print str(err)
usage()
sys.exit(1)
fname = ''
directory = ''
maxlen = 16
minlen = 16
tcpstreamnum = 0
udpstreamnum = 0
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-s", "--single"):
fname = a
elif o in ("-d", "--directory"):
directory = a
elif o in ("-m", "--maxlen"):
maxlen = int(a)
elif o in ("-n", "--minlen"):
minlen = int(a)
tcp_output = open('tcp.csv','w')
udp_output = open('udp.csv','w')
tcpmatric = []
udpmatric = []
print "Starting to processing the pcap file(s), please wait...\n"
if fname and directory:
print "Syntax error: You can only use one mode, either provide a single file or a directory"
sys.exit()
elif fname:
tcpstreamnum, udpstreamnum, tcpmatric, udpmatric = sfile(fname, maxlen, minlen, tcpstreamnum, udpstreamnum, tcpmatric, udpmatric)
elif directory:
tcpstreamnum, udpstreamnum, tcpmatric, udpmatric = mfile(directory, maxlen, minlen, tcpstreamnum, udpstreamnum, tcpmatric, udpmatric)
if tcpmatric != []:
for tcp in tcpmatric:
tcp_output.write(','.join(map(lambda x:str(x), tcp)))
tcp_output.write('\n')
print "Tcp stream flow matrics saved to %s , got %s stream flows\n " % (tcp_output.name, tcpstreamnum)
if udpmatric != []:
for udp in udpmatric:
udp_output.write(','.join(map(lambda x:str(x), udp)))
udp_output.write('\n')
print "Udp stream flow matrics saved to %s , got %s stream flows\n" % (udp_output.name, udpstreamnum)
tcp_output.close()
udp_output.close()
print "All done!"
if __name__ == '__main__':
main()