import hashlib,os,shutil
from hashlib import md5
from execel import *
import openpyxl,datetime
#将数据写入xlsx表格
def write_xlsx_excel1(url, sheet_name, two_dimensional_data):
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = sheet_name
for i in range(0, len(two_dimensional_data)):
for j in range(0, len(two_dimensional_data[i])):
sheet.cell(row=i + 1, column=j + 1, value=str(two_dimensional_data[i][j]))
workbook.save(url)
print("写入成功")
#读取xlsx表格内容
def read_xlsx_excel1(url, sheet_name):
workbook = openpyxl.load_workbook(url)
sheet = workbook[sheet_name]
data = []
for row in sheet.rows:
da = []
for cell in row:
da.append(cell.value)
data.append(da)
return data
lst = []
def traverse(rootDir):
for root, dirs, files in os.walk(rootDir):
for file in files:
lst.append(os.path.join(root, file))
for dir in dirs:
traverse(dir)
def createMD5tofile(file):
m = md5()
a_file = open(file, 'rb')
m.update(a_file.read())
a_file.close()
return m.hexdigest()
def quc():
"""
去除重复的pcap包
:return:
"""
a = read_xlsx_excel1("md5.xlsx","md5")
b = []
c = []
for i in a:
if i[2] in b:
continue
c.append(i)
b.append(i[2])
write_xlsx_excel1("去重后样本.xlsx","1",c)
def move_pcap(path):
"""
将去重后的样本放到指定路径下
:return:
"""
a = read_xlsx_excel1("去重后样本.xlsx","1")
for i in a:
# try:
shutil.copy(i[0],path)
# except:
# print(i[1])
def main2(path):
traverse(path)
print(lst)
c = []
for i in lst:
if "pcap" not in i:
continue
a = []
md5v = createMD5tofile(i)
lastone = i.split('\\')
a.append(i)
a.append(lastone[-1])
a.append(md5v)
c.append(a)
write_xlsx_excel1(datetime.datetime.now().strftime("%Y%m%d")+".xlsx", "md5", c)
quc()
if __name__ == '__main__':
#根据提供的路径,生成路径下所有pcap样本的md5
# rootDir = r"D:\git\版本包\1\Sample_Package_Construction\基线样本"
# rootDir = r"D:\git\版本包\packets\ips_packet"
rootDir = r"D:\git\版本包\1\Sample_Package_Construction\第一步"
main2(rootDir)
# #删除MD5重复的样本
quc()
# #移动去重后的样本到指定路径
move_pcap(r"D:\git\版本包\1\Sample_Package_Construction\第二步")
文件md5
猜你喜欢
转载自blog.csdn.net/qq_39306128/article/details/132172193
今日推荐
周排行