lemonheart99/기계학습
악성코드 이미지화 - ver.02
레몬하트99
2020. 10. 7. 23:29
import os,sys
from PIL import Image
import csv
from os.path import getsize
def get_binarydata(filename):
binary_values = []
file = open(filename, "rb")
data = file.read(1)
while data !=b"":
try:
binary_values.append(ord(data))
except TypeError:
pass
data = file.read(1)
return binary_values
def create_greyscale_image_specific_with(dataset, file_name_short_list, width=0):
if(width == 0):
size = len(dataset)
#size = 25964544
#width = 1024
if(size < 10240):
width = 32
elif (10240 <= size <= 10240):
width = 64
elif(10240*3 <= size <=10240*3):
width = 128
elif (10240 * 6 <= size <= 10240 * 6):
width = 256
elif (10240 * 10 <= size <= 10240 * 10):
width = 384
elif (10240 * 20 <= size <= 10240 * 120):
width = 512
elif (10240 * 3 <= size <= 10240 * 3):
width = 768
else:
width = 1024
height = int(size/width)+1
#height = int(25964544/1024/)
image = Image.new('L', (width,height))
image.putdata(dataset)
if file_name_short_list[1] == '1':
imagename = 'E:/dataset/malware_images_02/False/'+file_name_short_list[0]+'.png'
image.save(imagename)
print(imagename + " Greyscale image created")
if file_name_short_list[1] == '0':
imagename = 'E:/dataset/malware_images_02/True/'+file_name_short_list[0]+'.png'
image.save(imagename)
print(imagename + " Greyscale image created")
#image.show()
def get_max_file_size():
file_size_list = []
f = open('E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/label.csv', 'r',)
file_name_csv = csv.reader(f)
for i in file_name_csv:
file_path = 'E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/{}.vir'.format(i[0])
file_size_list.append(getsize(file_path))
print(max(file_size_list))
max_file_size = max(file_size_list)
f.close()
return max_file_size
def start_file_imaging():
f = open('E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/label.csv', 'r', )
file_name_csv = csv.reader(f)
for i in file_name_csv:
file_full_path = 'E:/dataset/183a2e5c8992419ca6af43e56630ba83/train_set/{}.vir'.format(i[0])
# path = os.path.dirname(file_full_path)
# base_name = os.path.splitext((os.path.basename(file_full_path)))[0]
# #output_Filename = os.path.join(path, base_name)
binary_data = get_binarydata(file_full_path)
create_greyscale_image_specific_with(binary_data,i)
if __name__ == "__main__":
get_max_file_size()
start_file_imaging()