Computer Vision Experiment 1

Common feature extraction algorithms are mainly divided into the following three categories:

Based on color features: such as color histogram, color set, color moment, color aggregation vector, etc.;
Based on texture features: such as Tamura texture features, autoregressive texture model, Gabor transform, wavelet transform, MPEG7 edge histogram, etc.;
Based on shape features: such as Fourier shape descriptors, invariant moments, wavelet contour descriptors, etc.;

Table of contents

LBP Texture Feature Extraction Algorithm

1. LBP texture feature extraction algorithm

LBP (Local Binary Patterns, local binary pattern), which has the advantages of rotation invariance and grayscale invariance. The basic one is a 3*3 pane ( $2^{8}$ a mode), and after improvement, a circular LBP operator appears, $LBP_{P}^{R}$ which means that there are P pixels in a circular neighborhood with a radius R ( $2^{P}$ a mode).

The LBP operator also has an equivalent mode , which is defined as: when the cyclic binary number corresponding to an LBP has at most two jumps from 0 to 1 or from 1 to 0, the binary number corresponding to the LBP is called a Equivalent mode classes (P(P-1)+2 modes), modes other than equivalent mode classes are classified as mixed mode classes. This makes the dimension of the feature vector less and can reduce the influence of high-frequency noise.

Extending the LBP operator, a rotation-invariant LBP operator ( LBP rotation-invariant mode ) is proposed, that is, to continuously rotate the circular neighborhood to obtain a series of initially defined LBP values, and take the minimum value as the neighborhood LBP values, expressed as $LBP_{P,R}^{ri}$ . This makes the types of LBP modes further reduced, and the texture recognition is easier, but the direction information is lost.

The LBP operator can also be combined with the equivalent pattern to rotate the equivalent pattern class to obtain the equivalent pattern class that is invariant to the rotation , and the willingness category is reduced to P+1 class, and all non-equivalent patterns are classified as the first P+1 class.

The steps used by LBP for detection:

Firstly, the detection window is divided into small areas (cells) of 16×16;
For a pixel in each cell, compare the gray value of the adjacent 8 pixels with it, if the surrounding pixel value is greater than the center pixel value, the position of the pixel is marked as 1, otherwise it is 0. In this way, the 8 points in the 3*3 neighborhood can be compared to generate an 8-bit binary number, that is, the LBP value of the center pixel of the window is obtained;
Then calculate the histogram of each cell, that is, the frequency of each number (assumed to be a decimal number LBP value); then normalize the histogram.
Finally, connect the obtained statistical histograms of each cell into a feature vector, that is, the LBP texture feature vector of the entire image; then use SVM or other machine learning algorithms for classification.

Code implementation:

import numpy as np
import cv2
import matplotlib.pyplot as plt
import math

# 对图像进行预处理：将每个图像转换为灰度图像
# 先定义计算旋转后灰度值的函数，以保证旋转不变的结果
def value_rotation(num):
    value_list = np.zeros((8), np.uint8)
    temp = int(num)
    value_list[0] = temp
    for i in range(7):
        temp = ((temp << 1) | (temp // 128)) % 256
        value_list[i+1] = temp
    return np.min(value_list)

# 等价模式LBP，就是限制一个二进制序列从 0到 1或从 1到 0的跳变次数不超过2次
# 计算跳变次数
def getHopcnt(num):
    '''
    : param num :8位的整形数，0-255
    : return :
    '''
    if num > 255:
        num = 255
    elif num < 0:
        num = 0

    num_b = bin(num)
    num_b = str(num_b)[2:]

    #补0
    if len(num_b) < 8:
        temp = []
        for i in range(8-len(num_b) ):temp.append( '0')
        temp.extend(num_b)
        num_b = temp

    cnt = 0
    for i in range(8):
        if i == 0:
            former = num_b[-1]
        else:
            former = num_b[i-1]
        if former == num_b[i]:
            pass
        else:
            cnt += 1
    return cnt

# 归一化函数，将像素值归一化，并重新投影到新的灰度空间
# 默认最大值为255，最小值为0
def img_max_min_normalization(src, min=0, max=255):
    height = src.shape[0]
    width = src.shape[1]
    if len(src.shape) > 2:
        channel = src.shape[2]
    else:
        channel = 1

    src_min = np.min(src)
    src_max = np.max(src)

    if channel == 1:
        dst = np.zeros([height,width], dtype=np.float32)
        for h in range(height):
            for w in range(width):
                dst[h, w] = float(src[h, w] - src_min) / float(src_max - src_min) * (max - min) + min
    else:
        dst = np.zeros([height,width,channel], dtype=np.float32)
        for c in range(channel):
            for h in range(height) :
                for w in range(width):
                    dst[h, w, c] = float(src[h, w, c] - src_min) / float(src_max - src_min)* (max - min) + min
    return dst

# 旋转不变模式+等价模式LBP
def rotation_invariant_uniform_LBP(src):
    table = np.zeros((256), dtype=np. uint8)
    temp = 1
    for i in range(256):
        if getHopcnt(i) <= 2 :
            table[i] = temp
            temp += 1
    height = src.shape[0]
    width = src.shape[1]
    dst = np.zeros([height, width], dtype=np.uint8)
    dst = src.copy()
    lbp_value = np.zeros((1, 8), dtype=np.uint8)
    neighbours = np.zeros((1, 8), dtype=np.uint8)
    for x in range(1, width - 1):
        for y in range(1,height - 1):
            neighbours[0, 0] = src[y - 1, x - 1]
            neighbours[0, 1] = src[y - 1, x]
            neighbours[0, 2] = src[y - 1, x + 1]
            neighbours[0, 3] = src[y, x - 1]
            neighbours[0, 4] = src[y, x + 1]
            neighbours[0, 5] = src[y + 1, x - 1]
            neighbours[0, 6] = src[y + 1, x]
            neighbours[0, 7] = src[y + 1, x + 1]
            center = src[y, x]
            for i in range(8):
                if neighbours[0, i] > center:
                    lbp_value[0, i] = 1
                else:
                    lbp_value[0, i] = 0
            lbp = lbp_value[0, 0] * 1 + lbp_value[0, 1] * 2 + lbp_value[0, 2]* 4 + lbp_value[0, 3] * 8 \
                  + lbp_value[0, 4]* 16 + lbp_value[0, 5] * 32 + lbp_value[0, 6]* 64 + lbp_value[0, 0] * 128
            dst[y,  x] = table[lbp]
    dst = img_max_min_normalization(dst)
    for x in range(width):
        for y in range(height) :
            dst[y, x] = value_rotation(dst[y, x])
    return dst

# 结果可视化
def disp_test_result(img, gray, dst, mode=0):
    '''
    : param mode : 0 , opencv 显示图片; 1 , matplotlib 显示图片。
    : return:
    '''
    if mode == 0:
        cv2.imshow('src', img)
        cv2.imshow('gray', gray)
        cv2.imshow('LBP', dst)
        cv2.waitKey()
        cv2.destroyAllWindows()
    else:
        plt.figure()
        # plt.figure(figsize=(1, 1))
        plt.subplot(131)
        plt.imshow(img)
        plt.title('src')

        plt.subplot(132)
        plt.imshow(gray, cmap='gray')
        plt.title('gray')

        plt.subplot(133)
        plt.imshow(dst,cmap='gray')
        plt.title('LBP')
        plt.show()


if __name__ == '__main__' :
    img = cv2.imread('test_2.jpg')
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 将图片变为灰度图像
    dst3 = rotation_invariant_uniform_LBP(gray)

    disp_test_result(img, gray, dst3, mode=0)

The result display:

2. Harris corner detection algorithm

Harris corners, a salient point, move the small viewing window in any direction, resulting in large pixel shifts.

Mathematical model : After shifting (u, v), the image in the window changes, and the patch with large E (u, v) is taken:

$E(u,v)=\sum_{x,y}^{}\omega (x,y)\left [ I(x+u,y+v)-I(x,y)\right ]$

Code implementation:

import cv2
import numpy as np

# 读取图像
img=cv2.imread('test_2.jpg')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

# 计算导数
Ix=cv2.Sobel(gray,cv2.CV_64F,1,0,ksize=3)
Iy=cv2.Sobel(gray,cv2.CV_64F,0,1,ksize=3)

# 计算Harris矩阵的三个分量
Ix2=Ix*Ix
Iy2=Iy*Iy
Ixy=Ix*Iy

# 计算窗口内的和
ksize=3
kernel=np.ones((ksize,ksize),np.float32)
Sx2=cv2.filter2D(Ix2,-1,kernel)
Sy2=cv2.filter2D(Iy2,-1,kernel)
Sxy=cv2.filter2D(Ixy,-1,kernel)

# 计算Harris响应函数R
k=0.04
R=(Sx2*Sy2-Sxy*Sxy)-k*((Sx2+Sy2)**2)

# 设定阈值并进行非极大值抑制
thresh=0.01*np.max(R)
radius=5
corners=[]
for y in range(radius,R.shape[0]-radius):
    for x in range(radius,R.shape[1]-radius):
        if R[y,x]>thresh and R[y,x]== np.max(R[y-radius:y+radius+1,x-radius:x+radius+1]):
            corners.append((y,x))

# 在图像中标记角点
for corner in corners:
    cv2.circle(img,corner,radius,(0,0,255),-1)

if __name__ == '__main__':
    # 显示图像
    cv2.imshow('Harris Corners',img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

The result display:

3. Fast corner detection algorithm

FAST corner point : If a pixel point is in a different area from enough pixels in its surrounding area, the pixel point may be a corner point. That is, some attributes are different. Consider a grayscale image, that is, if the grayscale value of the point is larger or smaller than the grayscale value of enough pixels in its surrounding area, the point may be a corner point.

Algorithm steps :

Select a pixel P from the picture, first set its brightness value to Ip;
Set an appropriate threshold t;
Consider a discretized Bresenham circle with a radius equal to 3 pixels centered on this pixel point, and there are 16 pixels on the boundary of this circle;
If there are n consecutive pixels on this circle with a size of 16 pixels, and their pixel values are either larger than Ip+t or smaller than Ip−t, then it is a corner. The value of n can be set to 12 or 9.

Code implementation:

import cv2
import numpy as np

def non_max_suppression(img,nms_window_size):
    # 创建一个全零的同大小矩阵
    nms_img = np.zeros_like(img)# 获取图像尺寸
    rows, cols = img.shape[:2]
    # 定义非极大值抑制窗口的大小
    half_size= nms_window_size // 2
    # 遍历图像中所有的像素点
    for r in range(half_size, rows - half_size):
        for c in range(half_size,cols - half_size):
            # 获取当前像素点的灰度值
            current_pixel = img[r,c]

            # 定义当前窗口内灰度值最大的像素点
            max_pixel = current_pixel

            # 遍历当前窗口内所有的像素点
            for i in range(-half_size,half_size + 1):
                for j in range(-half_size,half_size + 1):
                    #如果当前像素点的灰度值大于当前窗口内的最大灰度值
                    if img[r + i,c + j]> max_pixel:
                        # 更新最大灰度值
                        max_pixeL= img[r + i,c + j]
                        # 如果当前像素点的灰度值等于当前窗口内的最大灰度值
                        if current_pixel == max_pixel:
                            # 将当前像素点标记为非极大值抑制
                            nms_img[r,c]=255
    return nms_img




if __name__ == '__main__':
    # 读取图像
    img = cv2.imread("test_2.jpg")
    # Fast角点检测
    fast = cv2.FastFeatureDetector_create(threshold=40)
    kp = fast.detect(img, None)
    # 提取角点坐标
    pts = cv2.KeyPoint_convert(kp)

    # 绘制角点
    img_kp = cv2.drawKeypoints(img, kp, None, color=(0, 255, 0))

    # 非极大值抑制
    nms_window_size = 5
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    nms_img = non_max_suppression(gray, nms_window_size)

    # 将非极大值抑制后的图像转换为彩色图像
    nms_img_color = cv2.cvtColor(nms_img, cv2.COLOR_GRAY2BGR)

    # 将角点绘制在非极大值抑制后的图像上
    for pt in pts:
        x, y = pt
        cv2.circle(nms_img_color, (int(x), int(y)), 5, (0, 0, 255), 2)

    # 显示图像
    cv2.imshow("Original Image with Keypoints",img_kp)
    cv2.imshow("Non-maximum Suppressed Image", nms_img_color)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

The result display: