Opencv uses feature similarity to classify pictures

Article directory


foreword

Photographer Xiao Liu is fond of photography. He has many photos (in different formats and resolutions), some of which were taken by himself, and some were taken with the help of a friend's camera.
But he is very distressed, because there are many similar pictures (for example, a little bit off the angle), please use the program to help him pick out similar pictures.
Tip: Use opencv to change the picture into a uniform size, and use the feature similarity to judge. Multiple students can jointly form a data set (>100 images)


1. Code

insert image description here

import os
import shutil
import random
import cv2
import numpy as np

#定义resize维度
dim = 32
dim_sqr = dim*dim

#源数据集文件夹路径
path_data = './data'
#数据处理后的目录
deter_exam = './pro_data/Fruits-all-examples/'
deter_flag = './pro_data/Fruits-all-flag/'
#将数据打乱文件夹目录
deter_exam_2 = './pro_data/Fruits-all-examples_2'


#将数据集划分为flag和exam两部分
def data_procss(root_path,exam,flag):
    # 根目录下的所有一级目录,以列表形式赋给first_dir
    first_dir = os.listdir(root_path)
    # 遍历每一个一级目录
    for first in first_dir:
        # 一级目录绝对路径
        dir = root_path + '/' + str(first)
        # 得到一级目录下的二级目录
        imgs = os.listdir(dir)
        sample = random.sample(imgs, 1)  # 每个文件夹随机选取1张图片作为flag
        shutil.copyfile(dir + '/' + str(sample[0]), flag + str(sample[0]))
        # 遍历每幅图片
        for img in imgs:
            source_img = dir + '/' + str(img)
            detern = exam + str(img)
            shutil.copyfile(source_img, detern)


#对数据集进行重命名,方便后续遍历循环
def data_rename(path):
    filelist = os.listdir(path)  # 获取指定的文件夹包含的文件或文件夹的名字的列表
    i = 0  # 图片名字从 0 开始
    for item in filelist:  # 遍历这个文件夹下的文件,即 图片
        if item.endswith('.jpg'):
            src = os.path.join(os.path.abspath(path), item)
            dst = os.path.join(os.path.abspath(path), str(i) + '.jpg')
            try:
                os.rename(src, dst)
                i = i + 1
            except:
                continue


#将exam打乱,提高程序可行性
def data_shulffe(image_dir,result_dir):
    img_name_list = os.listdir(image_dir)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    # 创建随机数
    random_len = len(img_name_list)
    img_index = [i for i in range(random_len)]
    random.shuffle(img_index)
    # 开始转移每个文件
    for i, img in enumerate(img_name_list):
        dot_index = img.find('.')
        if dot_index > 0:
            img_name = str(img_index[i]) + img[dot_index:]
            shutil.copyfile(image_dir + img, result_dir + img_name)

#均值哈希算法
def aHash(image):
    #将图片缩小到32x32的尺寸,总共32*32个像素。这一步的作用是去除图片的细节,只保留结构、明暗等基本信息,摒弃不同尺寸、比例带来的图片差异。
    image=cv2.resize(image,(dim,dim),interpolation=cv2.INTER_CUBIC)
    #转换为灰度图
    image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    #计算所有32*32个像素的灰度平均值
    avreage = np.mean(image)
    #将每个像素的灰度,与平均值进行比较。大于或等于平均值,记为1;小于平均值,记为0。
    hash = []
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            if image[i,j] > avreage:
                hash.append(1)
            else:
                hash.append(0)
    return hash


#计算汉明距离
def Hamming_distance(hash1,hash2):
    num = 0
    #得到两张图片的指纹信息后,计算两组32*32位数据的汉明距离,即对比数据不同的位数,不同位数越少,表明图片的相似度越大。
    for index in range(len(hash1)):
        if hash1[index] != hash2[index]:
            num += 1
    return num


#对打乱的exam数据集进行分类
def data_classify():
    for i in range(4):
        img1=cv2.imread(deter_flag + "/{}.jpg".format(i))
        hash1 = aHash(img1)
        data_path = os.path.join('Fruits-' + str(i))  # 分类后图片文件夹路径
        if not os.path.exists(data_path):  # 判断文件夹是否存在
            os.makedirs(data_path)  # 不存在则新建文件夹
        for j in range(104):
            img2 = cv2.imread(deter_exam_2 + "/{}.jpg".format(j))
            hash2 = aHash(img2)
            n = Hamming_distance(hash1, hash2)
            similarity = 1 - n * 1.0 / dim_sqr  #将距离转化为相似度
            print("flag中第{}张图片与exam中第{}张图片相似度为:{}".format(i,j,similarity))
            if (similarity > 0.80):
                cv2.imwrite('./' + data_path + '/' + '{}.jpg'.format(j), img2)


if __name__ == '__main__':
    data_procss(path_data, deter_exam, deter_flag)
    data_rename(deter_exam)
    data_rename(deter_flag)
    data_shulffe(deter_exam,deter_exam_2)
    data_classify()

2. Results display

Generate folder example:

insert image description here
Classification results under Fruits-3:

insert image description here
Screenshot of the program running results:

insert image description here
Data and code connection:

https://pan.baidu.com/s/1eweFKIRDu3ZLKv00Tg_SEA

Extraction code: 1414

Guess you like

Origin blog.csdn.net/qq_50492541/article/details/124626525