# coding: utf-8# len white data: 1282285# len black data: 81903from __future__ import division, print_function, absolute_importimport pickleimport tensorflow as tfimport tflearnimport numpy as npfrom tflearn.layers.core import input_data, d
# coding: utf-8# len white data: 1282285# len black data: 81903from __future__ import division, print_function, absolute_importimport pickleimport tensorflow as tfimport tflearnimport numpy as npfrom tflearn.layers.core import input_data, dropout, fully_connectedfrom tflearn.layers.conv import conv_1d, global_max_poolfrom tflearn.layers.merge_ops import mergefrom tflearn.layers.estimator import regressionfrom tflearn.data_utils import to_categorical, pad_sequencesfrom tflearn.datasets import imdbimport numpy as npfrom tflearn.layers.core import dropout, fully_connectedfrom tflearn.layers.conv import conv_1d, max_pool_1dfrom tflearn.layers.estimator import regressionfrom tflearn.layers.normalization import batch_normalizationfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import confusion_matriximport pandas as pdfrom matplotlib import pyplot as pltfrom sklearn.metrics import average_precision_score, recall_score, precision_score, f1_score# filename = "data/99666.pcap_svKcnF"# with open(filename, "rb") as tmp_file:# ans = (pickle.load(tmp_file))# # print(ans)# for k,v in ans.items():# print(k, type(v[0]), v)# if v[0] != 0 and v[1]!=0:# out_flow, in_flow = (list(v[0]), list(v[1]))# print(out_flow, in_flow)# print(len(out_flow), len(in_flow))def report_evaluation_metrics(y_true, y_pred): average_precision = average_precision_score(y_true, y_pred) precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1) recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1) f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1) print(‘Average precision-recall score: {0:0.2f}‘.format(average_precision)) print(‘Precision: {0:0.2f}‘.format(precision)) print(‘Recall: {0:0.2f}‘.format(recall)) print(‘F1: {0:0.2f}‘.format(f1))# def plot_confusion_matrix(y_true, y_pred): conf_matrix = confusion_matrix(y_true, y_pred) print("confusion matrix:", conf_matrix) # plt.figure(figsize=(12, 12)) # sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d") # plt.title("Confusion matrix") # plt.ylabel(‘True class‘) # plt.xlabel(‘Predicted class‘) # plt.show()FLOW_SIZE = 1024def extract_flows(filename): ans = [] with open(filename, "rb") as tmp_file: pkl_data = (pickle.load(tmp_file)) # print(ans) for k,v in pkl_data.items(): # print(k, type(v[0]), v) if v[0] != 0 and v[1]!=0: out_flow, in_flow = (list(v[0]), list(v[1])) # print(out_flow, in_flow) # print(len(out_flow), len(in_flow)) half_size = FLOW_SIZE//2 padding_flow = out_flow[:half_size]+[0]*(half_size-len(out_flow))+in_flow[:half_size]+[0]*(half_size-len(in_flow)) assert len(padding_flow) == FLOW_SIZE ans.append([filename+":"+k, padding_flow]) return ans# def get_cnn_model(max_len=1024, volcab_size=256):# # Building convolutional network# network = tflearn.input_data(shape=[None, max_len], name=‘input‘)# network = tflearn.embedding(network, input_dim=volcab_size, output_dim=32)# network = conv_1d(network, 64, 3, activation=‘relu‘, regularizer="L2")# network = max_pool_1d(network, 2)# network = conv_1d(network, 64, 3, activation=‘relu‘, regularizer="L2")# network = max_pool_1d(network, 2)# network = batch_normalization(network)# network = fully_connected(network, 64, activation=‘relu‘)# network = dropout(network, 0.5)# network = fully_connected(network, 2, activation=‘softmax‘)# sgd = tflearn.SGD(learning_rate=0.1, lr_decay=0.96, decay_step=1000)# network = regression(network, optimizer=sgd, loss=‘categorical_crossentropy‘)# model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path=‘model.tfl.ckpt‘)# return modeldef get_cnn_model(max_len=FLOW_SIZE, volcab_size=256): # Building convolutional network network = tflearn.input_data(shape=[None, max_len], name=‘input‘) # network = tflearn.embedding(network, input_dim=volcab_size, output_dim=32) # refer: https://github.com/echowei/DeepTraffic/blob/master/2.encrypted_traffic_classification/4.TrainAndTest/2d_cnn/encrypt_traffic_cnn_2d.py ==>5*5 conv # refer: https://github.com/echowei/DeepTraffic/blob/master/2.encrypted_traffic_classification/4.TrainAndTest/1d_cnn_25%2B3/encrypt_traffic_cnn_1d.py ==> 25 conv # refer: https://github.com/echowei/DeepTraffic/blob/master/1.malware_traffic_classification/4.TrainAndTest/traffic_cnn.py # network = tflearn.input_data(shape=[None, 1, max_len], name=‘input‘) # network = tflearn.reshape(network, (-1, max_len, 1)) network = tf.expand_dims(network, 2) network = conv_1d(network, nb_filter=32, filter_size=25, strides=1, padding=‘same‘, activation=‘relu‘) network = max_pool_1d(network, kernel_size=3, strides=3) network = conv_1d(network, nb_filter=32, filter_size=25, strides=1, padding=‘same‘, activation=‘relu‘) network = max_pool_1d(network, kernel_size=3, strides=3) network = fully_connected(network, n_units=1024, activation=‘relu‘) network = dropout(network, 0.5) network = fully_connected(network, 2, activation=‘softmax‘) sgd = tflearn.SGD(learning_rate=0.0001, lr_decay=0.96, decay_step=1000) network = regression(network, optimizer=sgd, loss=‘categorical_crossentropy‘) model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path=‘model.tfl.ckpt‘) return model"""# dns tunnel# black detect rate is ZERO!!!!def get_cnn_model(max_len, volcab_size): # Building convolutional network network = tflearn.input_data(shape=[None, max_len], name=‘input‘) network = tflearn.embedding(network, input_dim=volcab_size, output_dim=64) branch1 = conv_1d(network, 128, 3, padding=‘valid‘, activation=‘relu‘, regularizer="L2") branch2 = conv_1d(network, 128, 4, padding=‘valid‘, activation=‘relu‘, regularizer="L2") branch3 = conv_1d(network, 128, 5, padding=‘valid‘, activation=‘relu‘, regularizer="L2") network = merge([branch1, branch2, branch3], mode=‘concat‘, axis=1) network = tf.expand_dims(network, 2) network = global_max_pool(network) network = dropout(network, 0.5) network = fully_connected(network, 4, activation=‘softmax‘) network = regression(network, optimizer=‘adam‘, learning_rate=0.001, loss=‘categorical_crossentropy‘, name=‘target‘) model = tflearn.DNN(network, tensorboard_verbose=0) return model"""WHITE_DIR = "/home/langjihai/resolve_pcap_for_NN/white/SSL_PAYLOAD_PER_DIR"BLACK_DIR = "/home/langjihai/resolve_pcap_for_NN/black/SSL_PAYLOAD_PER_DIR"import osdef get_files(directory): for dirpath,_,filenames in os.walk(directory): for f in filenames: yield os.path.abspath(os.path.join(dirpath, f))def get_data(dirname): ans = [] for file in get_files(dirname): flows = extract_flows(file) if len(ans) >= 2000000: break if flows: ans.extend(flows) print(len(ans), " flows in", dirname) return ansdef save_data(data): with open(‘data.pickle‘, ‘wb‘) as handle: pickle.dump(data, handle)def load_data(): with open(‘data.pickle‘, ‘rb‘) as handle: return pickle.load(handle)data_file = "data.pickle"if os.path.exists(data_file): print("load data file data.pickle!!!") data = load_data() white_data, black_data = data[‘white_data‘], data[‘black_data‘]else: black_data = get_data(BLACK_DIR) white_data = get_data(WHITE_DIR) save_data({"white_data": white_data, "black_data": black_data}) # np.savez(data_file, white_data=white_data, black_data=black_data)print("len white data:", len(white_data))print("len black data:", len(black_data))dataX = []dataY = []for flow in white_data: dataX.append(flow[1]) dataY.append(0)for flow in black_data: dataX.append(flow[1]) dataY.append(1)trainX, testX, trainY, testY = train_test_split(dataX, dataY, test_size=0.2, random_state=666)# trainX = np.reshape(trainX, [-1, 1, FLOW_SIZE])# testX = np.reshape(testX, [-1, 1, FLOW_SIZE])trainY = to_categorical(trainY, nb_classes=2)testY = to_categorical(testY, nb_classes=2)model = get_cnn_model()# Train model, with model checkpoint every epoch and every 200 training steps.# model.fit(trainX, trainY, n_epoch=10,# validation_set=(testX, testY),# show_metric=True,# snapshot_epoch=True, # Snapshot (save & evaluate) model every epoch.# snapshot_step=10000, # Snapshot (save & evalaute) model every 500 steps.# run_id=‘model_and_weights‘)## model.save("ECA_CNN.model")## model.load("ECA_CNN.model")# test=np.linspace(1,101,100).reshape(1,100)# print("测试结果:",model.predict(test))model_file = "ECA_CNN.model"if os.path.exists(model_file + ".meta"): print("Load a model from local!!!") model.load(model_file)# else: # pass # model.fit({‘input_x‘: trainX}, {‘target_out‘: trainX}, n_epoch=30, # validation_set=(testX, testX), batch_size=256, run_id="vae")model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True, snapshot_epoch=True, # Snapshot (save & evaluate) model every epoch. # snapshot_step=10000, # Snapshot (save & evalaute) model every 500 steps. batch_size=256, run_id=‘model_and_weights‘)model.save(model_file)Ypred = []L = len(dataX)i = 0N = 10000while i < L: p = model.predict(dataX[i:i+N]) for p1,p2 in p: if p1 > 0.5: Ypred.append(0) else: Ypred.append(1) i += Nreport_evaluation_metrics(dataY, Ypred)