-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
68 lines (54 loc) · 3.4 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from rl.agents.dqn import TrainDQN
from rl.data import get_train_test_val, load_csv, get_behaviour_data
from rl.utils import rounded_dict
from tensorflow.keras.layers import Dense, Dropout
import pickle
from keras.regularizers import l2,l1
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # CPU is faster than GPU on structured data
# Hyper-parameters.
episodes = 60000 # Total number of episodes
warmup_steps = 1700 # Amount of warmup steps to collect data with random policy
memory_length = warmup_steps # Max length of the Replay Memory
batch_size = 10000
collect_steps_per_episode = 100
collect_every = 1
target_update_period = 800 # Period to overwrite the target Q-network with the default Q-network
target_update_tau = 1 # Soften the target model update
n_step_update = 1
layers = [Dense(256, activation="relu"), Dropout(0.2),
Dense(256, activation="relu"), Dropout(0.2),
Dense(256, activation="relu"), Dropout(0.2),
Dense(5, activation=None)] # No activation, pure Q-values
learning_rate = 0.00025 # Learning rate
gamma = 0.1 # Discount factor
min_epsilon = 0.5 # Minimal and final chance of choosing random action
decay_episodes = episodes // 10 # Number of episodes to decay from 1.0 to `min_epsilon``
# {'backdoor': 7842, 'banker': 4875, 'cryptominer': 990, 'deceptor': 4614, 'downloader': 9274, 'normal': 8961, 'pua': 7149, 'ransomware': 2236, 'spyware': 11585}
# {'backdoor': 0.13632096791016235, 'banker': 0.0847442895386434, 'cryptominer': 0.017209609567847582, 'deceptor': 0.08020721065257448, 'downloader': 0.1612140597295136, 'normal': 0.1557730417550325, 'pua': 0.12427424121266906, 'ransomware': 0.03886938080172444, 'spyware': 0.20138719883183256}
# {'backdoor': 0, 'banker': 1, 'cryptominer': 2, 'deceptor': 3, 'downloader': 4, 'normal': 5, 'pua': 6, 'ransomware': 7, 'spyware': 8}
# Dropping categorical columns and columns that have the same value for all rows.
X_train, y_train, X_test, y_test = load_csv(r"C:\Users\ATHARVA\Documents\Atharva\Malware Detection IP\Malware-IP\rl_v2\train.csv",r"C:\Users\ATHARVA\Documents\Atharva\Malware Detection IP\Malware-IP\rl_v2\test.csv","Label",["Flow ID","Timestamp"], normalization=False)
X_train, y_train, X_test, y_test, X_val, y_val = get_train_test_val(X_train, y_train, X_test, y_test,val_frac=0.3)
# no_rows = 2
# import numpy as np
# X_train, y_train, X_test, y_test, X_val, y_val = get_behaviour_data(no_rows, X_train, y_train, X_test, y_test, X_val, y_val)
model_path = r"C:\Users\ATHARVA\Documents\Atharva\Malware Detection IP\Malware-IP\rl_v2\rl.pkl"
model = TrainDQN(episodes, warmup_steps, learning_rate, gamma, min_epsilon, decay_episodes, target_update_period=target_update_period,
target_update_tau=target_update_tau, batch_size=batch_size, collect_steps_per_episode=collect_steps_per_episode,
memory_length=memory_length, collect_every=collect_every, n_step_update=n_step_update, model_path=model_path)
# loaded_model = pickle.load(open('finalized_model.sav', 'rb'))
model.compile_model(X_train, y_train, layers=layers)
model.q_net.summary()
h = model.train(X_val, y_val, "Accuracy")
import json
with open('final.csv', 'w') as f:
json.dump(list(map(str, h)), f)
# import matplotlib.pyplot as plt
# plt.plot(h)
# plt.xlabel("epochs")
# plt.ylabel("Loss")
# print(len(h))
# print(h[0], "<<<<<<<")
# plt.savefig("history.png")
stats = model.evaluate(X_test, y_test, X_train, y_train)
print(rounded_dict(stats))