当前位置 : 主页 > 手机开发 > ROM >

CNN tmp

来源:互联网 收集:自由互联 发布时间:2021-06-10
# coding: utf-8# len white data: 1282285# len black data: 81903from __future__ import division, print_function, absolute_importimport pickleimport tensorflow as tfimport tflearnimport numpy as npfrom tflearn.layers.core import input_data, d
# coding: utf-8# len white data: 1282285# len black data: 81903from __future__ import division, print_function, absolute_importimport pickleimport tensorflow as tfimport tflearnimport numpy as npfrom tflearn.layers.core import input_data, dropout, fully_connectedfrom tflearn.layers.conv import conv_1d, global_max_poolfrom tflearn.layers.merge_ops import mergefrom tflearn.layers.estimator import regressionfrom tflearn.data_utils import to_categorical, pad_sequencesfrom tflearn.datasets import imdbimport numpy as npfrom tflearn.layers.core import dropout, fully_connectedfrom tflearn.layers.conv import conv_1d,  max_pool_1dfrom tflearn.layers.estimator import regressionfrom tflearn.layers.normalization import batch_normalizationfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import confusion_matriximport pandas as pdfrom matplotlib import pyplot as pltfrom sklearn.metrics import average_precision_score, recall_score, precision_score, f1_score# filename = "data/99666.pcap_svKcnF"# with open(filename, "rb") as tmp_file:#     ans = (pickle.load(tmp_file))#     # print(ans)#     for k,v in ans.items():#         print(k, type(v[0]), v)#         if v[0] != 0 and v[1]!=0:#             out_flow, in_flow = (list(v[0]), list(v[1]))#             print(out_flow, in_flow)#             print(len(out_flow), len(in_flow))def report_evaluation_metrics(y_true, y_pred):    average_precision = average_precision_score(y_true, y_pred)    precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1)    recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1)    f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1)    print(‘Average precision-recall score: {0:0.2f}‘.format(average_precision))    print(‘Precision: {0:0.2f}‘.format(precision))    print(‘Recall: {0:0.2f}‘.format(recall))    print(‘F1: {0:0.2f}‘.format(f1))# def plot_confusion_matrix(y_true, y_pred):    conf_matrix = confusion_matrix(y_true, y_pred)    print("confusion matrix:", conf_matrix)    # plt.figure(figsize=(12, 12))    # sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d")    # plt.title("Confusion matrix")    # plt.ylabel(‘True class‘)    # plt.xlabel(‘Predicted class‘)    # plt.show()FLOW_SIZE = 1024def extract_flows(filename):    ans = []    with open(filename, "rb") as tmp_file:        pkl_data = (pickle.load(tmp_file))        # print(ans)        for k,v in pkl_data.items():            # print(k, type(v[0]), v)            if v[0] != 0 and v[1]!=0:                out_flow, in_flow = (list(v[0]), list(v[1]))                # print(out_flow, in_flow)                # print(len(out_flow), len(in_flow))                half_size = FLOW_SIZE//2                padding_flow = out_flow[:half_size]+[0]*(half_size-len(out_flow))+in_flow[:half_size]+[0]*(half_size-len(in_flow))                assert len(padding_flow) == FLOW_SIZE                ans.append([filename+":"+k, padding_flow])    return ans# def get_cnn_model(max_len=1024, volcab_size=256):#     # Building convolutional network#     network = tflearn.input_data(shape=[None, max_len], name=‘input‘)#     network = tflearn.embedding(network, input_dim=volcab_size, output_dim=32)#     network = conv_1d(network, 64, 3, activation=‘relu‘, regularizer="L2")#     network = max_pool_1d(network, 2)#     network = conv_1d(network, 64, 3, activation=‘relu‘, regularizer="L2")#     network = max_pool_1d(network, 2)#     network = batch_normalization(network)#     network = fully_connected(network, 64, activation=‘relu‘)#     network = dropout(network, 0.5)#     network = fully_connected(network, 2, activation=‘softmax‘)#     sgd = tflearn.SGD(learning_rate=0.1, lr_decay=0.96, decay_step=1000)#     network = regression(network, optimizer=sgd, loss=‘categorical_crossentropy‘)#     model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path=‘model.tfl.ckpt‘)#     return modeldef get_cnn_model(max_len=FLOW_SIZE, volcab_size=256):    # Building convolutional network    network = tflearn.input_data(shape=[None, max_len], name=‘input‘)    # network = tflearn.embedding(network, input_dim=volcab_size, output_dim=32)    # refer:  https://github.com/echowei/DeepTraffic/blob/master/2.encrypted_traffic_classification/4.TrainAndTest/2d_cnn/encrypt_traffic_cnn_2d.py ==>5*5 conv    # refer:  https://github.com/echowei/DeepTraffic/blob/master/2.encrypted_traffic_classification/4.TrainAndTest/1d_cnn_25%2B3/encrypt_traffic_cnn_1d.py ==> 25 conv    # refer: https://github.com/echowei/DeepTraffic/blob/master/1.malware_traffic_classification/4.TrainAndTest/traffic_cnn.py    # network = tflearn.input_data(shape=[None, 1, max_len], name=‘input‘)    # network = tflearn.reshape(network, (-1, max_len, 1))    network = tf.expand_dims(network, 2)    network = conv_1d(network, nb_filter=32, filter_size=25, strides=1, padding=‘same‘, activation=‘relu‘)    network = max_pool_1d(network, kernel_size=3, strides=3)    network = conv_1d(network, nb_filter=32, filter_size=25, strides=1, padding=‘same‘, activation=‘relu‘)    network = max_pool_1d(network, kernel_size=3, strides=3)    network = fully_connected(network, n_units=1024, activation=‘relu‘)    network = dropout(network, 0.5)    network = fully_connected(network, 2, activation=‘softmax‘)    sgd = tflearn.SGD(learning_rate=0.0001, lr_decay=0.96, decay_step=1000)    network = regression(network, optimizer=sgd, loss=‘categorical_crossentropy‘)    model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path=‘model.tfl.ckpt‘)    return model"""# dns tunnel# black detect rate is ZERO!!!!def get_cnn_model(max_len, volcab_size):    # Building convolutional network    network = tflearn.input_data(shape=[None, max_len], name=‘input‘)    network = tflearn.embedding(network, input_dim=volcab_size, output_dim=64)    branch1 = conv_1d(network, 128, 3, padding=‘valid‘, activation=‘relu‘, regularizer="L2")    branch2 = conv_1d(network, 128, 4, padding=‘valid‘, activation=‘relu‘, regularizer="L2")    branch3 = conv_1d(network, 128, 5, padding=‘valid‘, activation=‘relu‘, regularizer="L2")    network = merge([branch1, branch2, branch3], mode=‘concat‘, axis=1)    network = tf.expand_dims(network, 2)    network = global_max_pool(network)    network = dropout(network, 0.5)    network = fully_connected(network, 4, activation=‘softmax‘)    network = regression(network, optimizer=‘adam‘, learning_rate=0.001,                         loss=‘categorical_crossentropy‘, name=‘target‘)    model = tflearn.DNN(network, tensorboard_verbose=0)    return model"""WHITE_DIR = "/home/langjihai/resolve_pcap_for_NN/white/SSL_PAYLOAD_PER_DIR"BLACK_DIR = "/home/langjihai/resolve_pcap_for_NN/black/SSL_PAYLOAD_PER_DIR"import osdef get_files(directory):   for dirpath,_,filenames in os.walk(directory):       for f in filenames:           yield os.path.abspath(os.path.join(dirpath, f))def get_data(dirname):    ans = []    for file in get_files(dirname):        flows = extract_flows(file)        if len(ans) >= 2000000:            break        if flows: ans.extend(flows)        print(len(ans), " flows in", dirname)    return ansdef save_data(data):    with open(‘data.pickle‘, ‘wb‘) as handle:        pickle.dump(data, handle)def load_data():    with open(‘data.pickle‘, ‘rb‘) as handle:        return  pickle.load(handle)data_file = "data.pickle"if os.path.exists(data_file):    print("load data file data.pickle!!!")    data = load_data()    white_data, black_data = data[‘white_data‘], data[‘black_data‘]else:    black_data = get_data(BLACK_DIR)    white_data = get_data(WHITE_DIR)    save_data({"white_data": white_data, "black_data": black_data})    # np.savez(data_file, white_data=white_data, black_data=black_data)print("len white data:", len(white_data))print("len black data:", len(black_data))dataX = []dataY = []for flow in white_data:    dataX.append(flow[1])    dataY.append(0)for flow in black_data:    dataX.append(flow[1])    dataY.append(1)trainX, testX, trainY, testY = train_test_split(dataX, dataY, test_size=0.2, random_state=666)# trainX = np.reshape(trainX, [-1, 1, FLOW_SIZE])# testX = np.reshape(testX, [-1, 1, FLOW_SIZE])trainY = to_categorical(trainY, nb_classes=2)testY = to_categorical(testY, nb_classes=2)model = get_cnn_model()# Train model, with model checkpoint every epoch and every 200 training steps.# model.fit(trainX, trainY, n_epoch=10,#           validation_set=(testX, testY),#           show_metric=True,#           snapshot_epoch=True, # Snapshot (save & evaluate) model every epoch.#           snapshot_step=10000, # Snapshot (save & evalaute) model every 500 steps.#           run_id=‘model_and_weights‘)## model.save("ECA_CNN.model")## model.load("ECA_CNN.model")# test=np.linspace(1,101,100).reshape(1,100)# print("测试结果:",model.predict(test))model_file = "ECA_CNN.model"if os.path.exists(model_file + ".meta"):    print("Load a model from local!!!")    model.load(model_file)# else:    # pass    # model.fit({‘input_x‘: trainX}, {‘target_out‘: trainX}, n_epoch=30,    #                    validation_set=(testX, testX), batch_size=256, run_id="vae")model.fit(trainX, trainY, n_epoch=10,      validation_set=(testX, testY),      show_metric=True,      snapshot_epoch=True, # Snapshot (save & evaluate) model every epoch.      # snapshot_step=10000, # Snapshot (save & evalaute) model every 500 steps.       batch_size=256,      run_id=‘model_and_weights‘)model.save(model_file)Ypred = []L = len(dataX)i = 0N = 10000while i < L:    p = model.predict(dataX[i:i+N])    for p1,p2 in p:        if p1 > 0.5:            Ypred.append(0)        else:            Ypred.append(1)    i += Nreport_evaluation_metrics(dataY, Ypred)
上一篇:fetch
下一篇:L262
网友评论