OpenCV-Python combat
Record all the content of my learning OpenCV
Image Basic Operation
An image is composed of pixels (0-255).
Data read-image
- 1.cv2.IMREAD_COLOR: color image
- 2.cv2.IMREAD_GRAYSCALE: grayscale image
import cv2
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
img=cv2.imread('cat.jpg')
#图像的显示,也可以创建多个窗口
cv2.imshow('image',img)
#等待时间,毫秒级,0表示任意键终止
cv2.waitKey(0)
cv2.destroyAllWindows()
Press any key and the image will
def cv_show(name,img):
cv2.imshow(name,img)
cv2.waitKey(0)
cv2.destroyAllWindows()
img.shape
img=cv2.imread('cat.jpg',cv2.IMREAD_GRAYSCALE)
img
img.shape
#图像的显示,也可以创建多个窗口
cv2.imshow('image',img)
#等待时间,毫秒级,0表示任意键终止
cv2.waitKey(0)
cv2.destroyAllWindows()
#保存
cv2.imwrite('mycat.png',img)
Data Reading - Video
- cv2.VideoCapture can capture the camera and use it to digitally control different devices, such as 0, 1 If it is a video file, just specify the path directly
vc=cv2.VideoCapture('test.mp4')
#检查是否打开正确
if vc.isOpened():
open,frame = vc.read()
else:
open=False
while open:
ret,frame = vc.read()
if frame is None:
break
if ret == True:
gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
cv2.imshow('result',gray)
if cv2.waitKey(10)&0xff==27:
break
vc.release()
cv2.destroyAllwindows()
ROI area
img=cv2.imread('cat.jpg')
cat=img[0:200,0:200]
cv_show('cat',cat)
border padding
Set the bounding box method : cv2.copyMakeBorder(src, top, bottom, left, right, borderType, value)
- src : input image
- top, bottom, left, right : the border width in the corresponding direction
- borderType : defines the method of adding a border
- value : If the borderType is cv2.BORDER_CONSTANT, the constant that needs to be filled
The borderType method is as follows:
- cv2.BORDER_REPLICATE : Copy method, copy the most edge pixels
- cv2.BORDER_REFLECT : Reflection method, perform reflection copy on both sides of the specified pixel in the image, such as: gfedcba|abcdefgh|hgfedcb
- cv2.BORDER_REFLECT_101 : Reflection method, with the most edge pixel as the axis, such as: dcba|abcd|dcba
- cv2.BORDER_WRAP : Outer packaging method, such as: cdefgh|abcdefgh|abcdefg
- cv2.BORDER_CONSTANT : constant value padding
top_size,bottom_size,left_size,right_size = (50,50,50,50)
replicate=cv2.copyMakeBorder(img,top_size,bottom_size,left_size,right_size,borderType=cv2.BORDER_REPLICATE)
reflect=cv2.copyMakeBorder(img,top_size,bottom_size,left_size,right_size,borderType=cv2.BORDER_REFLECT)
reflect101=cv2.copyMakeBorder(img,top_size,bottom_size,left_size,right_size,borderType=cv2.BORDER_REFLECT101)
wrap=cv2.copyMakeBorder(img,top_size,bottom_size,left_size,right_size,borderType=cv2.BORDER_WRAP)
constant=cv2.copyMakeBorder(img,top_size,bottom_size,left_size,right_size,borderType=cv2.BORDER_CONSTANT,value=0)
import matplotlib.pyplot as plt
plt.subplot(231),plt.imshow(img,'gray'),plt.title('ORIGINAL')
plt.subplot(232),plt.imshow(replicate,'gray'),plt.title('REPLICATE')
plt.subplot(233),plt.imshow(reflect,'gray'),plt.title('REFLECT')
plt.subplot(234),plt.imshow(reflect101,'gray'),plt.title('REFECT_101')
plt.subplot(235),plt.imshow(wrap,'gray'),plt.title('WRAP')
plt.subplot(236),plt.imshow(constant,'gray'),plt.title('CONSTANT')
plt.show()
Numeral Calculations
To add two images directly, the size of the two images needs to be the same, and the corresponding pixels are added. Since the value of each pixel is between 0-255, the pixel value greater than 255 is automatically subtracted after the addition. 255 ( equivalent to %256 ). If a constant is added to an image as a whole, that is, a constant value is added to each element, the brightness will increase.
img_cat=cv2.imread('cat.jpg')
img_dog=cv2.imread('dog.jpg')
img_cat2=img_cat+10
#每个像素点加10
img_cat[:5,:,0]
img_cat2=img_cat+10
img_cat[:5,:,0]
#相当于%256
(img_cat+img_cat2)[:5,:,0]
cv2.add(src1, src2)
requires the size and number of channels of the two images to be the same. Add the corresponding pixel values. After the addition, the value exceeding 255 will make it equal to 255
cv2.add(img_cat,img_cat2)[:5,:,0]
image fusion
img_cat+img_dog
res=cv2.addWeighted(img_cat,0.4,img_dog,0.6,0)
plt.imshow(res)
image threshold
- Grayscale
import cv2 #opencv读取的格式是BGR
import numpy as np
import matplotlib.pyplot as plt#Matplotlib是RGB
%matplotlib inline
img=cv2.imread('cat.jpg')
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img_gray.shape
cv2.imshow("img_gray", img_gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
H-tone (dominant wavelength).
S - Saturation (saturation/shade of color)
V - (Intensity)
hsv=cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
cv2.imshow("hsv", hsv)
cv2.waitKey(0)
cv2.destroyAllWindows()
ret, dst = cv2.threshold(src, thresh, maxval, type)
-
src : input image, only single-channel image can be input, usually a grayscale image
-
dst : output map
-
thresh : threshold
-
maxval : When the pixel value exceeds the threshold (or less than the threshold, depending on the type), the value assigned
-
type : The type of binarization operation, including the following 5 types: cv2.THRESH_BINARY; cv2.THRESH_BINARY_INV; cv2.THRESH_TRUNC; cv2.THRESH_TOZERO; cv2.THRESH_TOZERO_INV
-
cv2.THRESH_BINARY takes maxval (maximum value) for the part exceeding the threshold, otherwise takes 0
-
cv2.THRESH_BINARY_INV Inversion of THRESH_BINARY
-
cv2.THRESH_TRUNC greater than the threshold is set to the threshold, otherwise unchanged
-
The part of cv2.THRESH_TOZERO greater than the threshold does not change, otherwise it is set to 0
-
cv2.THRESH_TOZERO_INV Inversion of THRESH_TOZERO
ret, thresh1 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(img_gray, 127, 255, cv2.THRESH_TOZERO_INV)
titles = ['Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV']
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
for i in range(6):
plt.subplot(2, 3, i + 1), plt.imshow(images[i], 'gray')
plt.title(titles[i])
plt.xticks([]), plt.yticks([])
plt.show()
image smoothing
In the process of processing and transmitting digital images, it may be disturbed by different noises. Image noise will cause image quality degradation, image blurring, and the characteristics of the image will be submerged, and the operation of image enhancement can be achieved by removing noise. Known as image smoothing ;
-
1. Mean filtering
Assume that the digital image space contaminated by noise contains N*N pixels, and the value of each pixel in this digital image can be averaged by several pixels in the field (which can be understood as enclosed by a rectangle). The value is calculated, and a new image can be obtained by smoothing image processing. This process is called mean filtering. -
2. Box filtering
The mean filtering mentioned above is to average the pixels in the entire field, and then assign the average value to the target pixel, but the box filtering can choose whether to normalize the pixel value, that is, it can be The total value of the obtained pixel is assigned to a pixel -
3. Gaussian filtering
In mean filtering, we add up the value of each pixel in the box to average and then assign it to the original pixel, so that the weight of each pixel is 1, but in Gaussian filtering , the weight of each pixel is Gaussian distributed with the distance from the pixel; that is, the farther the distance is, the smaller the weight of the pixel is -
4. Median filtering
Median filtering sorts the gray value of all pixels in a sliding window, and replaces the gray value of the central pixel in the sliding window with the median value ; for example, the value of a pixel in a window is [90 ,89,76,89,94,93,23,54,87], then the median value of 89 will be used instead of the original midpoint of 94
img = cv2.imread('lenaNoise.png')
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
mean filtering
# 均值滤波
# 简单的平均卷积操作
blur = cv2.blur(img, (3, 3))
cv2.imshow('blur', blur)
cv2.waitKey(0)
cv2.destroyAllWindows()
box filter
# 方框滤波
# 基本和均值一样,可以选择归一化
box = cv2.boxFilter(img,-1,(3,3), normalize=True)
cv2.imshow('box', box)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 方框滤波
# 基本和均值一样,可以选择归一化,容易越界
box = cv2.boxFilter(img,-1,(3,3), normalize=False)
cv2.imshow('box', box)
cv2.waitKey(0)
cv2.destroyAllWindows()
Gaussian filter
# 高斯滤波
# 高斯模糊的卷积核里的数值是满足高斯分布,相当于更重视中间的
aussian = cv2.GaussianBlur(img, (5, 5), 1)
cv2.imshow('aussian', aussian)
cv2.waitKey(0)
cv2.destroyAllWindows()
median filter
# 中值滤波
# 相当于用中值代替
median = cv2.medianBlur(img, 5) # 中值滤波
cv2.imshow('median', median)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 展示所有的
res = np.hstack((blur,aussian,median))
#print (res)
cv2.imshow('median vs average', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
Image Morphological Operations
Morphological operations are simple operations performed on the shape of an image. In general, it operates on binarized images/grayscale images. It requires two operations as input, one is the original image, and the other is called a structuring element or kernel, which is used to decide the nature of the operation. Two basic morphological operations are erosion and dilation, and their variants constitute the opening operation, closing operation and gradient, etc. In short, morphological operations are actually changing the shape of an object. For example, corrosion means "thinning", and expansion means "fatting". Note that erosion and expansion are for the white part of the picture. The effect is shown in the following figure:
corrosion
img = cv2.imread('dige.png')
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
kernel = np.ones((3,3),np.uint8)
erosion = cv2.erode(img,kernel,iterations = 1)
cv2.imshow('erosion', erosion)
cv2.waitKey(0)
cv2.destroyAllWindows()
pie = cv2.imread('pie.png')
cv2.imshow('pie', pie)
cv2.waitKey(0)
cv2.destroyAllWindows()
kernel = np.ones((30,30),np.uint8)
erosion_1 = cv2.erode(pie,kernel,iterations = 1)
erosion_2 = cv2.erode(pie,kernel,iterations = 2)
erosion_3 = cv2.erode(pie,kernel,iterations = 3)
res = np.hstack((erosion_1,erosion_2,erosion_3))
cv2.imshow('res', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
expansion operation
img = cv2.imread('dige.png')
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
kernel = np.ones((3,3),np.uint8)
dige_erosion = cv2.erode(img,kernel,iterations = 1)
cv2.imshow('erosion', erosion)
cv2.waitKey(0)
cv2.destroyAllWindows()
kernel = np.ones((3,3),np.uint8)
dige_dilate = cv2.dilate(dige_erosion,kernel,iterations = 1)
cv2.imshow('dilate', dige_dilate)
cv2.waitKey(0)
cv2.destroyAllWindows()
pie = cv2.imread('pie.png')
kernel = np.ones((30,30),np.uint8)
dilate_1 = cv2.dilate(pie,kernel,iterations = 1)
dilate_2 = cv2.dilate(pie,kernel,iterations = 2)
dilate_3 = cv2.dilate(pie,kernel,iterations = 3)
res = np.hstack((dilate_1,dilate_2,dilate_3))
cv2.imshow('res', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
Open and close operations
1. Open operation = Corrosion operation first, and then expansion operation (it seems to separate the two objects that are closely connected together). The
effect diagram of the open operation is shown in the figure below:
- Open operation summary:
(1) The open operation can remove isolated small points, burrs and small bridges, while the overall position and shape are inconvenient.
(2) Open operation is a filter based on geometric operation.
(3) Different sizes of structural elements will lead to different filtering effects.
(4) The selection of different structural elements leads to different segmentations, that is, different features are extracted.
# 开:先腐蚀,再膨胀
img = cv2.imread('dige.png')
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
cv2.imshow('opening', opening)
cv2.waitKey(0)
cv2.destroyAllWindows()
2. Closing operation = expansion operation first, and then erosion operation (it seems to close two finely connected blocks together). The
effect diagram of the closing operation is shown in the figure below:
- Closing operation summary:
(1) The closing operation can fill up small lakes (ie, small holes) and bridge small cracks, while the overall position and shape remain unchanged.
(2) The closed operation filters the image by filling the concave corners of the image.
(3) Different sizes of structural elements will result in different filtering effects.
(4) The selection of different structural elements leads to different segmentations.
# 闭:先膨胀,再腐蚀
img = cv2.imread('dige.png')
kernel = np.ones((5,5),np.uint8)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
cv2.imshow('closing', closing)
cv2.waitKey(0)
cv2.destroyAllWindows()
gradient operation
A picture is expanded and eroded separately, and finally the corroded picture is subtracted from the dilated picture, and the new picture obtained is the gradient operation of the picture.
That is: gradient operation (img) = expansion (img) - corrosion (img)
The outline of the foreground object can be obtained by gradient operation.
# 梯度=膨胀-腐蚀
pie = cv2.imread('pie.png')
kernel = np.ones((7,7),np.uint8)
dilate = cv2.dilate(pie,kernel,iterations = 5)
erosion = cv2.erode(pie,kernel,iterations = 5)
res = np.hstack((dilate,erosion))
cv2.imshow('res', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
gradient = cv2.morphologyEx(pie, cv2.MORPH_GRADIENT, kernel)
cv2.imshow('gradient', gradient)
cv2.waitKey(0)
cv2.destroyAllWindows()
Top hats and black hats
- top hat = original input - open operation result
- Black hat = closed operation - original input
top hat
#礼帽
img = cv2.imread('dige.png')
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
cv2.imshow('tophat', tophat)
cv2.waitKey(0)
cv2.destroyAllWindows()
black hat
#黑帽
img = cv2.imread('dige.png')
blackhat = cv2.morphologyEx(img,cv2.MORPH_BLACKHAT, kernel)
cv2.imshow('blackhat ', blackhat )
cv2.waitKey(0)
cv2.destroyAllWindows()
image gradient
Image gradient-Sobel operator
The gradient can be calculated according to the x direction or the y direction. In fact, it is to look at the difference and change of the pixel points. For example, the junction of black and white objects has a very large difference in pixel value change.
The function used for gradient calculation is called Sobel operator, which can be divided into horizontal gradient and vertical gradient.
To put it simply, the Sobel operator is a special convolution kernel that can be used for edge detection of images.
img = cv2.imread('pie.png',cv2.IMREAD_GRAYSCALE)
cv2.imshow("img",img)
cv2.waitKey()
cv2.destroyAllWindows()
dst = cv2.Sobel(src, ddepth, dx, dy, ksize)
- ddepth: the depth of the image
- dx and dy represent the horizontal and vertical directions, respectively
- ksize is the size of the Sobel operator
def cv_show(img,name):
cv2.imshow(name,img)
cv2.waitKey()
cv2.destroyAllWindows()
sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=3)
cv_show(sobelx,'sobelx')
White to black is a positive number, black to white is a negative number, all negative numbers will be truncated to 0, so the absolute value should be taken
sobelx = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=3)
sobelx = cv2.convertScaleAbs(sobelx)
cv_show(sobelx,'sobelx')
sobely = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=3)
sobely = cv2.convertScaleAbs(sobely)
cv_show(sobely,'sobely')
Image gradient-Scharr operator
img = cv2.imread('pie.png', cv2.IMREAD_GRAYSCALE)
scharrx = cv2.Scharr(img, cv2.CV_64F, 1, 0)
scharry = cv2.Scharr(img, cv2.CV_64F, 0, 1)
scharrx = cv2.convertScaleAbs(scharrx)
scharry = cv2.convertScaleAbs(scharry)
scharrxy = cv2.addWeighted(scharrx, 0.5, scharry, 0.5, 0)
Image gradient-laplacian operator
- The difference between different operators
#不同算子的差异
img=cv2.imread('lena.jpg',cv2.IMREAD_GRAYSCALE)
sobelx=cv2.Sobel(img,cv2.CV_64F,1,0,ksize=3)
sobely=cv2.Sobel(img,cv2.CV_64F,0,1,ksize=3)
sobelx=cv2.convertScaleAbs(sobelx)
sobely=cv2.convertScaleAbs(sobely)
sobelxy=cv2.addWeighted(sobelx,0.5,sobely,0.5,0)
scharrx=cv2.Scharr(img,cv2.CV_64F,1,0)
scharry=cv2.Scharr(img,cv2.CV_64F,0,1)
scharrx=cv2.convertScaleAbs(scharrx)
scharry=cv2.convertScaleAbs(scharry)
scharrxy=cv2.addWeighted(scharrx,0.5,scharry,0.5,0)
laplacian=cv2.Laplacian(img,cv2.CV_64F)
laplacian=cv2.convertScaleAbs(laplacian)
res=np.hstack((sobelxy,scharrxy,laplacian))
cv_show(res,'res')
img = cv2.imread('lena.jpg',cv2.IMREAD_GRAYSCALE)
cv_show(img,'img')
Canny edge detection
-
1 Use a Gaussian filter to smooth the image and filter out noise.
-
2 Calculate the gradient strength and direction of each pixel in the image.
-
3 Apply Non-Maximum Suppression suppression to eliminate spurious responses from edge detection.
-
4 Apply Double-Threshold detection to identify real and potential edges.
-
5 Edge detection is finally done by suppressing isolated weak edges.
Gaussian filter
The essence is to blur the image. Those who have played PS should understand it well. Originally, there are a lot of noises on the image. Use the smudge tool to wipe it. The noise is gone, the picture becomes blurred, and the noise disappears. This stage is similar to convolution.
Take Multiply the image with a weighted filter of 33 or 55.
Here, take a Gaussian kernel of size=3 as an example: Here, normalization processing is done (the sum of elements is 1). The
main purpose of filtering is to reduce noise , and the general image All processing algorithms need to perform noise reduction first. While Gaussian filtering mainly smoothes (blurs) the image, it may also increase the width of the edge.
gradient and direction
For the smoothed image, first use the Sobel kernel to calculate the horizontal direction Gx and the vertical direction Gy in the horizontal and vertical directions
Then calculate the edge gradient and gradient direction for each pixel:
non-maximum suppression
After calculating the gradient value and gradient direction, scan the image comprehensively to remove irrelevant pixels that do not constitute edges.
For each pixel, check whether it is a local maximum among its adjacent pixels in the gradient direction. As shown in the figure:
point A is located at the edge of the image (vertical direction). The gradient direction (Gradient Direction) is perpendicular to the edge. Point B and point C is in the direction of the gradient.
Therefore, check point A and point B, point C, to determine whether point A is a local maximum. If point A is a local maximum, continue to the next stage; if point A is not a local maximum, its is suppressed (set to 0).
In simple terms, the result obtained by NMS is a binary image of thin edges. It can be understood that all edge points are marked as 1, and non-edge points are marked as 0.
Dual Threshold Monitoring
This stage mainly judges which of the above NMS are real edges and which are false edges.
This stage needs to set two thresholds, minVal and maxVal. Any edge strength greater than maxVal is determined as an edge, while less than minVal is determined as a non-edge , to be discarded.
The edge between maxVal and minVal is the edge to be determined, and the continuity judgment is carried out. If it is connected to the determined edge, it is considered to be a part of the real edge, otherwise, it is discarded. As shown in the figure below. Edge A is greater than maxVal
, Therefore, it is "sure-edge".
Although edge C is less than maxVal, it is connected to edge A, so it is also considered as a valid edge to obtain a complete edge curve. However
, although edge B is greater than minVal, it is connected with edge C is located in the same area, but it is not connected with any "definite edge", so this edge B is discarded. As
can be seen above, the selection of minVal and maxVal values is very important for the result of edge detection.
In addition, the processing of this stage also removes small pixel noise, because the edge is assumed to be a long curve.
Finally, the effective edge of the picture can be obtained.
img=cv2.imread("lena.jpg",cv2.IMREAD_GRAYSCALE)
v1=cv2.Canny(img,80,150)
v2=cv2.Canny(img,50,100)
res = np.hstack((v1,v2))
cv_show(res,'res')
img=cv2.imread("car.png",cv2.IMREAD_GRAYSCALE)
v1=cv2.Canny(img,120,250)
v2=cv2.Canny(img,50,100)
res = np.hstack((v1,v2))
cv_show(res,'res')
Image Pyramid and Contour Detection
Image Pyramid Definition
The image pyramid is a kind of multi-scale expression in the image. It is mainly used for the segmentation of the image. It is an effective but conceptually simple structure to explain the image at multiple resolutions. The image pyramid was originally used for machine vision and image compression. The pyramid of an image is a series of images arranged in a pyramid shape with gradually reduced resolution and derived from the same original image. It is obtained by down-sampling in steps, and the sampling is stopped until a certain termination condition is reached. The bottom of the pyramid is a high-resolution representation of the image to be processed, while the top is a low-resolution approximation. We compare layer-by-layer images to a pyramid. The higher the level, the smaller the image and the lower the resolution.
Common two types of image pyramids
- Gaussian pyramid (Gaussian pyramid): used for down/down sampling, the main image pyramid
- Laplacian pyramid: It is used to reconstruct the upper unsampled image from the lower image of the pyramid. In digital image processing, it is also the prediction residual. It can restore the image to the greatest extent, and it is used together with the Gaussian pyramid.
Gaussian Pyramid
- Gaussian Pyramid: Downsampling method (shrinking)
- Gaussian Pyramid: Upsampling Method (Zoom In)
img=cv2.imread("AM.png")
cv_show(img,'img')
print (img.shape)
up=cv2.pyrUp(img)
cv_show(up,'up')
print (up.shape)
down=cv2.pyrDown(img)
cv_show(down,'down')
print (down.shape)
up2=cv2.pyrUp(up)
cv_show(up2,'up2')
print (up2.shape)
up=cv2.pyrUp(img)
up_down=cv2.pyrDown(up)
cv_show(up_down,'up_down')
cv_show(np.hstack((img,up_down)),'up_down')
up=cv2.pyrUp(img)
up_down=cv2.pyrDown(up)
cv_show(img-up_down,'img-up_down')
Laplace Pyramid
down=cv2.pyrDown(img)
down_up=cv2.pyrUp(down)
l_1=img-down_up
cv_show(l_1,'l_1')
image outline
cv2.findContours(img,mode,method)
mode: contour retrieval mode
- RETR_EXTERNAL : retrieve only the outermost contours;
- RETR_LIST : Retrieve all contours and save them in a linked list;
- RETR_CCOMP : retrieves all contours and organizes them into two layers: the top layer is the outer boundaries of the parts, and the second layer is the boundaries of the cavities;
- RETR_TREE : retrieves all contours, and reconstructs the entire hierarchy of nested contours;
method: contour approximation method
- CHAIN_APPROX_NONE : outputs contours as Freeman chaincodes, all other methods output polygons (sequences of vertices).
- CHAIN_APPROX_SIMPLE : Compresses horizontal, vertical and oblique parts, that is, the function keeps only their end parts.
For higher accuracy, use binary images
img = cv2.imread('contours.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
cv_show(thresh,'thresh')
binary, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
draw outline
#传入绘制图像,轮廓,轮廓索引,颜色模式,线条厚度
# 注意需要copy,要不原图会变。。。
draw_img = img.copy()
res = cv2.drawContours(draw_img, contours, -1, (0, 0, 255), 2)
cv_show(res,'res')
draw_img = img.copy()
res = cv2.drawContours(draw_img, contours, 0, (0, 0, 255), 2)
cv_show(res,'res')
Contour features
cnt = contours[0]
#面积
cv2.contourArea(cnt)
#周长,True表示闭合的
cv2.arcLength(cnt,True)
contour approximation
img = cv2.imread('contours2.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
binary, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnt = contours[0]
draw_img = img.copy()
res = cv2.drawContours(draw_img, [cnt], -1, (0, 0, 255), 2)
cv_show(res,'res')
epsilon = 0.15*cv2.arcLength(cnt,True)
approx = cv2.approxPolyDP(cnt,epsilon,True)
draw_img = img.copy()
res = cv2.drawContours(draw_img, [approx], -1, (0, 0, 255), 2)
cv_show(res,'res')
bounding rectangle
img = cv2.imread('contours.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
binary, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnt = contours[0]
x,y,w,h = cv2.boundingRect(cnt)
img = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
cv_show(img,'img')
area = cv2.contourArea(cnt)
x, y, w, h = cv2.boundingRect(cnt)
rect_area = w * h
extent = float(area) / rect_area
print ('轮廓面积与边界矩形比',extent)
Contour area to bounding rectangle ratio 0.5154317244724715
Circumscribed circle
(x,y),radius = cv2.minEnclosingCircle(cnt)
center = (int(x),int(y))
radius = int(radius)
img = cv2.circle(img,center,radius,(0,255,0),2)
cv_show(img,'img')
Fourier transform
The role of the Fourier transform
-
High frequency: grayscale components that change drastically, such as boundaries
-
Low frequency: slowly changing grayscale components, such as a sea
filtering
-
Low-pass filter: only retains low frequencies, which will blur the image
-
High-pass filter: Only high frequencies are retained, which will enhance image details
-
Opencv is mainly cv2.dft() and cv2.idft(), and the input image needs to be converted to np.float32 format first.
-
The part with a frequency of 0 in the obtained result will be in the upper left corner, and usually needs to be converted to the center position, which can be achieved by shift transformation.
-
The result returned by cv2.dft() is dual-channel (real part, imaginary part), and usually needs to be converted into an image format to display (0,255).
Frequency domain transformation result
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread('lena.jpg',0)
img_float32 = np.float32(img)
dft = cv2.dft(img_float32, flags = cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)
# 得到灰度图能表示的形式
magnitude_spectrum = 20*np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1]))
plt.subplot(121),plt.imshow(img, cmap = 'gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(magnitude_spectrum, cmap = 'gray')
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
plt.show()
Low-Pass and High-Pass Filtering
img = cv2.imread('lena.jpg',0)
img_float32 = np.float32(img)
dft = cv2.dft(img_float32, flags = cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)
rows, cols = img.shape
crow, ccol = int(rows/2) , int(cols/2) # 中心位置
# 高通滤波
mask = np.ones((rows, cols, 2), np.uint8)
mask[crow-30:crow+30, ccol-30:ccol+30] = 0
# IDFT
fshift = dft_shift*mask
f_ishift = np.fft.ifftshift(fshift)
img_back = cv2.idft(f_ishift)
img_back = cv2.magnitude(img_back[:,:,0],img_back[:,:,1])
plt.subplot(121),plt.imshow(img, cmap = 'gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img_back, cmap = 'gray')
plt.title('Result'), plt.xticks([]), plt.yticks([])
plt.show()
img = cv2.imread('lena.jpg',0)
img_float32 = np.float32(img)
dft = cv2.dft(img_float32, flags = cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)
rows, cols = img.shape
crow, ccol = int(rows/2) , int(cols/2) # 中心位置
# 高通滤波
mask = np.ones((rows, cols, 2), np.uint8)
mask[crow-30:crow+30, ccol-30:ccol+30] = 0
# IDFT
fshift = dft_shift*mask
f_ishift = np.fft.ifftshift(fshift)
img_back = cv2.idft(f_ishift)
img_back = cv2.magnitude(img_back[:,:,0],img_back[:,:,1])
plt.subplot(121),plt.imshow(img, cmap = 'gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img_back, cmap = 'gray')
plt.title('Result'), plt.xticks([]), plt.yticks([])
plt.show()