Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Single feature add #31

Open
wants to merge 55 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
07ed3c4
Initial commit
ymirsky Jul 1, 2018
abe5a94
Added foundation code
ymirsky Jul 3, 2018
d2e5e66
updates readme
ymirsky Jul 3, 2018
dc48693
AfterImage/FeatureExtractor.py: Set limit when using scapy
willnewton Oct 5, 2018
f6278a3
AfterImage/FeatureExtractor.py: Find tshark in PATH
willnewton Oct 5, 2018
7cca8a8
Merge pull request #3 from willnewton/load-pcap
ymirsky Oct 7, 2018
83a8f01
Fixed error in figure of PDF
ymirsky Nov 11, 2018
a49419a
Added test evironment details
ymirsky Nov 22, 2018
8a844a4
Fixed import AfterImage error
ymirsky Feb 7, 2019
6c2cf39
Fixed "AfterImage.py" not found error
ymirsky Dec 30, 2019
92b29b3
Added NDSS'18 version of AfterImage without Extrapolation
ymirsky Dec 30, 2019
45a159c
Updated readme
ymirsky Dec 30, 2019
c8aa0d3
Fixed bug in vbias and hbias updates
ymirsky Apr 5, 2020
13cd6a3
Fixed get-next index bug in queue for extrapolation
ymirsky Apr 17, 2020
9d5b1af
Reverted back to NDSS paper
ymirsky May 15, 2020
fb0aac2
Updated plot for NDSS version
ymirsky Jun 17, 2020
cd468b8
Fixed Radius bug: now squaring the dimensions
ymirsky Aug 28, 2020
aad5870
Adapted Kitsune for DI
Aug 30, 2023
d856e2e
Remove pickles
Aug 30, 2023
c45e20d
Merge unrelated histories
Aug 30, 2023
7a67dc1
Solve merge conflicts
Aug 30, 2023
013a11b
Solve merge conflict
Aug 30, 2023
de59ad3
merge conflict
Aug 30, 2023
0540706
fix print
Aug 30, 2023
8b33986
Add hyperparameter optimization
Aug 31, 2023
976ddaf
Add EER calculation
Sep 5, 2023
01f456e
Fix hyperparam opt
Sep 5, 2023
b22d412
Fix conflict
Sep 5, 2023
fe71564
Clean up
Sep 6, 2023
db86c3a
Change hyperparam optimization
Sep 11, 2023
908c616
Intermediate commit
Sep 11, 2023
a55cf12
Fix merge conflict
Sep 11, 2023
f932695
Fix double-running
Sep 12, 2023
48be16c
Add conversation sampling
Sep 14, 2023
f04a7e8
Parameterised calling of Kitsune
Sep 14, 2023
7c6faad
Merge branch 'master' of https://github.com/GuyPuts/Kitsune-adaptation
Sep 14, 2023
d36c170
Change conversation sampling
Sep 15, 2023
5f2f043
Intermediate commit
Sep 15, 2023
ea4e8ec
intermediate commit
Sep 18, 2023
24b8689
Sampling features from complete file
Sep 19, 2023
1dfa670
Sampling on conversation basis
Sep 20, 2023
9723399
Hashes
Sep 20, 2023
e254877
hashes don't seem to work
Sep 20, 2023
04f01c9
Added label ID
Sep 21, 2023
1234e5f
Set up hyperparam optimization
Sep 22, 2023
f7495ac
SHAP value calculation by directly calling KitNET
Sep 26, 2023
d430052
Fix Excel Export
Sep 26, 2023
500e153
metadata bug
Sep 26, 2023
320d225
shap
Oct 4, 2023
4783099
hyperparam
Oct 4, 2023
438f341
conf
Oct 4, 2023
a2748f6
getting tired of the hyperparam opt
Nov 9, 2023
d9a73e4
hyperopt
Nov 9, 2023
aa5df81
getting tired of the hyperparam opt
Nov 9, 2023
3f9b18d
Most significant packets
Dec 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
input_data/*
output_data/*
pickles/*
__pycache__/*
.idea/*
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 32 additions & 10 deletions AfterImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


class incStat:
def __init__(self, Lambda, ID, init_time=0, isTypeDiff=False): # timestamp is creation time
def __init__(self, Lambda, ID, init_time=0, isTypeDiff=False, tcpFlags=False): # timestamp is creation time
self.ID = ID
self.CF1 = 0 # linear sum
self.CF2 = 0 # sum of squares
Expand All @@ -15,8 +15,28 @@ def __init__(self, Lambda, ID, init_time=0, isTypeDiff=False): # timestamp is c
self.cur_var = np.nan
self.cur_std = np.nan
self.covs = [] # a list of incStat_covs (references) with relate to this incStat
self.tcpPkts = 0
self.flag_counts = {
"FIN": 0,
"SYN": 0,
"RST": 0,
"PSH": 0,
"ACK": 0,
"URG": 0,
"ECE": 0,
"CWR": 0
}

def insert(self, v, t=0, tcpFlags=False): # v is a scalar, t is v's arrival the timestamp
if tcpFlags:
self.tcpPkts += 1
flag_int = int(tcpFlags, 16) # Convert hex string to integer
flags = ["FIN", "SYN", "RST", "PSH", "ACK", "URG", "ECE", "CWR"]
for i, flag in enumerate(flags):
if flag_int & (1 << i): # Check if the flag is set
self.flag_counts[flag] += 1
return True

def insert(self, v, t=0): # v is a scalar, t is v's arrival the timestamp
if self.isTypeDiff:
dif = t - self.lastTimestamp
if dif > 0:
Expand Down Expand Up @@ -98,9 +118,13 @@ def magnitude(self, other_incStats): # the magnitude of a set of incStats
return math.sqrt(A)

#calculates and pulls all stats on this stream
def allstats_1D(self):
def allstats_1D(self, tcpFlags=False):
self.cur_mean = self.CF1 / self.w
self.cur_var = abs(self.CF2 / self.w - math.pow(self.cur_mean, 2))
# Return mean of tcp flags
if tcpFlags:
flags = [flag / self.tcpPkts for flag in list(self.flag_counts.values())]
return flags
return [self.w, self.cur_mean, self.cur_var]

#calculates and pulls all stats on this stream, and stats shared with the indicated stream
Expand Down Expand Up @@ -264,7 +288,6 @@ def get_lambda(self,Lambda):
def register(self,ID,Lambda=1,init_time=0,isTypeDiff=False):
#Default Lambda?
Lambda = self.get_lambda(Lambda)

#Retrieve incStat
key = ID+"_"+str(Lambda)
incS = self.HT.get(key)
Expand Down Expand Up @@ -298,9 +321,9 @@ def register_cov(self,ID1,ID2,Lambda=1,init_time=0,isTypeDiff=False):
return inc_cov

# updates/registers stream
def update(self,ID,t,v,Lambda=1,isTypeDiff=False):
def update(self,ID,t,v,Lambda=1,isTypeDiff=False,tcpFlags=False):
incS = self.register(ID,Lambda,t,isTypeDiff)
incS.insert(v,t)
incS.insert(v,t,tcpFlags=tcpFlags)
return incS

# Pulls current stats from the given ID
Expand Down Expand Up @@ -369,9 +392,9 @@ def get_nD_Stats(self,IDs,Lambda=1): #radius, magnitude (IDs is a list)
return [np.sqrt(rad),np.sqrt(mag)]

# Updates and then pulls current 1D stats from the given ID. Automatically registers previously unknown stream IDs
def update_get_1D_Stats(self, ID,t,v,Lambda=1,isTypeDiff=False): # weight, mean, std
incS = self.update(ID,t,v,Lambda,isTypeDiff)
return incS.allstats_1D()
def update_get_1D_Stats(self, ID,t,v,Lambda=1,isTypeDiff=False, tcpFlags=False): # weight, mean, std
incS = self.update(ID,t,v,Lambda,isTypeDiff, tcpFlags=tcpFlags)
return incS.allstats_1D(tcpFlags)


# Updates and then pulls current correlative stats between the given IDs. Automatically registers previously unknown stream IDs, and cov tracking
Expand Down Expand Up @@ -439,4 +462,3 @@ def cleanOutOldRecords(self,cutoffWeight,curTime):
elif W > cutoffWeight:
break
return n

45 changes: 37 additions & 8 deletions FeatureExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os.path
import platform
import subprocess
import csv


#Extracts Kitsune features from given pcap file one packet at a time using "get_next_vector()"
Expand Down Expand Up @@ -63,7 +64,6 @@ def __prep__(self):
##If file is TSV (pre-parsed by wireshark script)
if type == "tsv":
self.parse_type = "tsv"

##If file is pcap
elif type == "pcap" or type == 'pcapng':
# Try parsing via tshark dll of wireshark (faster)
Expand Down Expand Up @@ -106,7 +106,7 @@ def __prep__(self):
self.limit = len(self.scapyin)
print("Loaded " + str(len(self.scapyin)) + " Packets.")

def get_next_vector(self):
def get_next_vector(self, single=False):
if self.curPacketIndx == self.limit:
if self.parse_type == 'tsv':
self.tsvinf.close()
Expand All @@ -120,6 +120,10 @@ def get_next_vector(self):
framelen = row[1]
srcIP = ''
dstIP = ''
tcpFlags = ''
tcpFlags = row[19]
payload = ''
#payload = int(row[20])+int(row[21])
if row[4] != '': # IPv4
srcIP = row[4]
dstIP = row[5]
Expand All @@ -128,8 +132,7 @@ def get_next_vector(self):
srcIP = row[17]
dstIP = row[18]
IPtype = 1
srcproto = row[6] + row[
8] # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
srcproto = row[6] + row[8] # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
dstproto = row[7] + row[9] # UDP or TCP port
srcMAC = row[2]
dstMAC = row[3]
Expand All @@ -147,7 +150,6 @@ def get_next_vector(self):
elif srcIP + srcproto + dstIP + dstproto == '': # some other protocol
srcIP = row[2] # src MAC
dstIP = row[3] # dst MAC

elif self.parse_type == "scapy":
packet = self.scapyin[self.curPacketIndx]
IPtype = np.nan
Expand Down Expand Up @@ -195,24 +197,51 @@ def get_next_vector(self):
return []

self.curPacketIndx = self.curPacketIndx + 1

if not single:
tcpFlags = False

### Extract Features
try:
return self.nstat.updateGetStats(IPtype, srcMAC, dstMAC, srcIP, srcproto, dstIP, dstproto,
int(framelen),
float(timestamp))
float(timestamp), tcpFlags, payload)
except Exception as e:
print(e)
return []


def pcap2tsv_with_tshark(self):
print('Parsing with tshark...')
fields = "-e frame.time_epoch -e frame.len -e eth.src -e eth.dst -e ip.src -e ip.dst -e tcp.srcport -e tcp.dstport -e udp.srcport -e udp.dstport -e icmp.type -e icmp.code -e arp.opcode -e arp.src.hw_mac -e arp.src.proto_ipv4 -e arp.dst.hw_mac -e arp.dst.proto_ipv4 -e ipv6.src -e ipv6.dst"
fields = "-e frame.time_epoch -e frame.len -e eth.src -e eth.dst -e ip.src -e ip.dst -e tcp.srcport -e tcp.dstport -e udp.srcport -e udp.dstport -e icmp.type -e icmp.code -e arp.opcode -e arp.src.hw_mac -e arp.src.proto_ipv4 -e arp.dst.hw_mac -e arp.dst.proto_ipv4 -e ipv6.src -e ipv6.dst -e tcp.flags -e tcp.len -e udp.length -e http.response.code"
cmd = '"' + self._tshark + '" -r '+ self.path +' -T fields '+ fields +' -E header=y -E occurrence=f > '+self.path+".tsv"
subprocess.call(cmd,shell=True)
print("tshark parsing complete. File saved as: "+self.path +".tsv")

def get_num_features(self):
return len(self.nstat.getNetStatHeaders())

def get_all_vectors(self, csv_path=False, single=False):
vectorList = []
if csv_path:
with open(csv_path, mode='w', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
while True:
if self.curPacketIndx % 100000 == 0:
print(self.curPacketIndx)
vector = self.get_next_vector(single)
if len(vector) == 0 or self.curPacketIndx > self.limit:
self.curPacketIndx = 0
return csv_path
else:
csv_writer.writerow(vector)
else:
while True:
if self.curPacketIndx % 1000 == 0:
print(self.curPacketIndx)
vector = self.get_next_vector()
if len(vector) == 0 or self.curPacketIndx > self.limit:
self.curPacketIndx = 0
return vectorList
else:
vectorList.append(vector)

15 changes: 13 additions & 2 deletions KitNET/KitNET.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class KitNET:
#feature_map: One may optionally provide a feature map instead of learning one. The map must be a list,
# where the i-th entry contains a list of the feature indices to be assingned to the i-th autoencoder in the ensemble.
# For example, [[2,5,3],[4,0,1],[6,7]]
def __init__(self,n,max_autoencoder_size=10,FM_grace_period=None,AD_grace_period=10000,learning_rate=0.1,hidden_ratio=0.75, feature_map = None):
def __init__(self,n,max_autoencoder_size=10,FM_grace_period=None,AD_grace_period=10000,learning_rate=0.1,hidden_ratio=0.75, feature_map=None):
# Parameters:
self.AD_grace_period = AD_grace_period
if FM_grace_period is None:
Expand Down Expand Up @@ -50,7 +50,8 @@ def __init__(self,n,max_autoencoder_size=10,FM_grace_period=None,AD_grace_period
#Note: KitNET automatically performs 0-1 normalization on all attributes.
def process(self,x):
if self.n_trained > self.FM_grace_period + self.AD_grace_period: #If both the FM and AD are in execute-mode
return self.execute(x)
result = self.execute(x)
return result
else:
self.train(x)
return 0.0
Expand Down Expand Up @@ -104,6 +105,16 @@ def __createAD__(self):
params = AE.dA_params(len(self.v), n_hidden=0, lr=self.lr, corruption_level=0, gracePeriod=0, hiddenRatio=self.hr)
self.outputLayer = AE.dA(params)

def process_batch(self, data):
resultList = []
count = 0
for instance in data:
if count % 1000 == 0:
print("processing packet ", count, " / ", len(data))
resultList.append(self.process(instance))
count += 1
return np.array(resultList)

# Copyright (c) 2017 Yisroel Mirsky
#
# MIT License
Expand Down
Binary file added KitNET/__pycache__/KitNET.cpython-39.pyc
Binary file not shown.
Binary file added KitNET/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file added KitNET/__pycache__/corClust.cpython-39.pyc
Binary file not shown.
Binary file added KitNET/__pycache__/dA.cpython-39.pyc
Binary file not shown.
Binary file added KitNET/__pycache__/utils.cpython-39.pyc
Binary file not shown.
Loading