lemonheart99/기계학습

악성코드 이미지화 - ver.02

레몬하트99 2020. 10. 7. 23:29
import os,sys
from PIL import Image
import csv
from os.path import getsize




def get_binarydata(filename):
    binary_values = []
    file = open(filename, "rb")
    data = file.read(1)
    while data !=b"":
        try:
            binary_values.append(ord(data))
        except TypeError:
            pass
        data = file.read(1)

    return binary_values


def create_greyscale_image_specific_with(dataset, file_name_short_list, width=0):

    if(width == 0):
        size = len(dataset)
        #size = 25964544
        #width = 1024

        if(size < 10240):
            width = 32
        elif (10240 <= size <= 10240):
            width = 64
        elif(10240*3 <= size <=10240*3):
            width = 128
        elif (10240 * 6 <= size <= 10240 * 6):
            width = 256
        elif (10240 * 10 <= size <= 10240 * 10):
            width = 384
        elif (10240 * 20 <= size <= 10240 * 120):
            width = 512
        elif (10240 * 3 <= size <= 10240 * 3):
            width = 768
        else:
            width = 1024

    height = int(size/width)+1
    #height = int(25964544/1024/)

    image = Image.new('L', (width,height))

    image.putdata(dataset)

    if file_name_short_list[1] == '1':
        imagename = 'E:/dataset/malware_images_02/False/'+file_name_short_list[0]+'.png'
        image.save(imagename)
        print(imagename + " Greyscale image created")
    if file_name_short_list[1] == '0':
        imagename = 'E:/dataset/malware_images_02/True/'+file_name_short_list[0]+'.png'
        image.save(imagename)
        print(imagename + " Greyscale image created")
    #image.show()




def get_max_file_size():
    file_size_list = []
    f = open('E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/label.csv', 'r',)
    file_name_csv = csv.reader(f)
    for i in file_name_csv:
        file_path = 'E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/{}.vir'.format(i[0])
        file_size_list.append(getsize(file_path))
    print(max(file_size_list))
    max_file_size = max(file_size_list)
    f.close()
    return max_file_size


def start_file_imaging():
    f = open('E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/label.csv', 'r', )
    file_name_csv = csv.reader(f)
    for i in file_name_csv:
        file_full_path = 'E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/{}.vir'.format(i[0])
        # path = os.path.dirname(file_full_path)
        # base_name = os.path.splitext((os.path.basename(file_full_path)))[0]
        # #output_Filename = os.path.join(path, base_name)
        binary_data = get_binarydata(file_full_path)
        create_greyscale_image_specific_with(binary_data,i)



if __name__ == "__main__":
    get_max_file_size()
    start_file_imaging()