1 Canny edge detection process
1) Use a Gaussian filter to smooth the image and filter out noise
2) Calculate the gradient strength and direction of each pixel in the image
3) Apply non-maximum suppression to eliminate spurious responses from edge detection
4) Apply Dual-threshold detection to determine real and potential edges
5) Finalize edge detection by suppressing isolated weak edges
1. Gaussian filter
2. Gradient and direction
3. Non-maximum suppression
4. Dual threshold detection
import cv2
import numpy as np
def cv_show(im, name):
cv2.imshow(name, im)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread('data/test2.jpg', cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (400, 400))
v1 = cv2.Canny(img, 80, 150)
v2 = cv2.Canny(img, 50, 100)
res = np.hstack((img, v1, v2))
cv_show(res, 'res')
Conclusion: The larger the interval value, the fewer boundaries detected; the smaller the interval value, the more boundaries detected.
2 Image Pyramids
Gaussian Pyramid
Laplacian Pyramid
Gaussian Pyramid: Downsampling Method (Zoom In)
Gaussian Pyramid: Upsampling Method (Zoom In)
import cv2
import numpy as np
def cv_show(im, name):
cv2.imshow(name, im)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread('data/dog.jpg')
img = cv2.resize(img, (400, 400))
up = cv2.pyrUp(img)
cv_show(up, 'up')
down = cv2.pyrDown(img)
cv_show(down, 'down')
reset = cv2.pyrUp(down)
res = np.hstack([img, reset])
cv_show(res, 'res')
The result of upsampling The
result of downsampling The original image and the Laplacian pyramid
that was first upsampled and then downsampled
import cv2
import numpy as np
def cv_show(im, name):
cv2.imshow(name, im)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = cv2.imread('data/dog.jpg')
img = cv2.resize(img, (400, 400))
up = cv2.pyrUp(img)
down = cv2.pyrDown(img)
reset = cv2.pyrUp(down)
res = np.hstack([img, reset, img - reset])
cv_show(res, 'res')
3 Contour detection method
cv2.findContours(img, mode, method)
mode: contour retrieval mode
- RETR_EXTERNAL: retrieve only the outermost contours
- RETR_LIST: Retrieve all contours and save them in a list
- RETER_CCOMP: Retrieves all contours and organizes them into two layers: the top layer is the outer boundaries of the parts, and the second layer is the boundaries of the cavities
- RETER_TREE: Retrieve all contours and reconstruct the entire hierarchy of nested contours
method: contour approximation method
- CHAIN_APPROX_NONE: Outlines are output as Freema chaincodes, and polygons (vertex sequences) are output everywhere else.
- CHAIN_APPROX_SIMPLE: Compresses the horizontal, vertical and oblique outer parts, that is, the function only keeps their end parts.
4 template matching
The principle of template matching and convolution is very similar. The template slides from the origin on the original image, and the degree of difference between the template and (the place where the image is covered by the template) is calculated. There are six methods for calculating the degree of difference in opencv. Put the result into a matrix and output it as the result. If the original image is AxB size, and the template is axb size, the output result matrix is (A-a+1)x(B-b+1)
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('data/lena.png', cv2.IMREAD_GRAYSCALE)
template = cv2.imread('data/face.png', cv2.IMREAD_GRAYSCALE)
w, h = template.shape[0], template.shape[1]
methods = ['cv2.TM_CCORR', 'cv2.TM_CCORR_NORMED', 'cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED', 'cv2.TM_SQDIFF',
'cv2.TM_SQDIFF_NORMED']
for meth in methods:
img2 = img.copy()
method = eval(meth)
print(method)
res = cv2.matchTemplate(img, template, method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# 如果是平方差匹配TM_SQDIFF或归一化平方差匹配TM_SQDIFF_NORMED取最小值
if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
# 画矩形
cv2.rectangle(img2, top_left, bottom_right, 255, 2)
plt.subplot(121), plt.imshow(res, cmap='gray')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(img2, cmap='gray')
plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
5 Image histogram
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('data/lena.png', cv2.IMREAD_GRAYSCALE)
res = cv2.calcHist(images=[img], channels=[0], mask=None, histSize=[256], ranges=[0, 256])
print(res.shape) # (256, 1)
plt.hist(img.ravel(), 256)
plt.show()
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('data/lena.png')
color = ('b', 'g', 'r')
for i, col in enumerate(color):
histr = cv2.calcHist([img], [i], None, [256], [0, 256])
plt.plot(histr, color=col)
plt.xlim([0, 256])
plt.show()
6 Fourier transform
The role of the Fourier transform
- High frequency: grayscale components that change drastically, such as boundaries
- Low frequency: slowly changing grayscale components, such as a sea
filtering
- Low-pass filter: only retains low frequencies, making the image blurry
- High-pass filter: Only high frequencies are retained to enhance image details
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('data/lena.png', cv2.IMREAD_GRAYSCALE)
img_float32 = np.float32(img)
dft = cv2.dft(img_float32, flags=cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)
# 得到灰度图能表示的形式
magnitude_spectrum = 20 * np.log(cv2.magnitude(dft_shift[:, :, 0], dft_shift[:, :, 1]))
plt.subplot(121), plt.imshow(img, cmap='gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(magnitude_spectrum, cmap='gray')
plt.title('magnitude spectrum'), plt.xticks([]), plt.yticks([])
plt.show()
low pass filter
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('data/lena.png', cv2.IMREAD_GRAYSCALE)
img_float32 = np.float32(img)
dft = cv2.dft(img_float32, flags=cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)
rows, cols = img.shape
c_row, c_col = int(rows / 2), int(cols / 2) # 中心位置
# 低通滤波器
mask = np.zeros((rows, cols, 2), np.uint8)
mask[c_row - 30:c_row + 30, c_col - 30:c_col + 30] = 1
# IDFT
f_shift = dft_shift * mask
f_ishift = np.fft.ifftshift(f_shift)
img_back = cv2.idft(f_ishift)
img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
plt.subplot(121), plt.imshow(img, cmap='gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(img_back, cmap='gray')
plt.title('Result'), plt.xticks([]), plt.yticks([])
plt.show()
high pass filter
import cv2
import numpy as np
import matplotlib.pyplot as plt
img = cv2.imread('data/lena.png', cv2.IMREAD_GRAYSCALE)
img_float32 = np.float32(img)
dft = cv2.dft(img_float32, flags=cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)
rows, cols = img.shape
c_row, c_col = int(rows / 2), int(cols / 2) # 中心位置
# 高通滤波器
mask = np.ones((rows, cols, 2), np.uint8)
mask[c_row - 30:c_row + 30, c_col - 30:c_col + 30] = 0
# IDFT
f_shift = dft_shift * mask
f_ishift = np.fft.ifftshift(f_shift)
img_back = cv2.idft(f_ishift)
img_back = cv2.magnitude(img_back[:, :, 0], img_back[:, :, 1])
plt.subplot(121), plt.imshow(img, cmap='gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(img_back, cmap='gray')
plt.title('Result'), plt.xticks([]), plt.yticks([])
plt.show()