Opencv basic knowledge compilation

1. Basic operations

Image manipulation

  • cv2.IMRED_COLOR: Color image
  • cv2.IMREAD_GRAYSCALE: Grayscale image
import cv2
import numpy as np

path = "test.png" # 测试图像
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # 默认是彩色图像,可以使用灰度图像

# numpy生成测试图像

# 图像读取函数
def cv_imshow(name, img):
    # 图像显示,也可以创建多个窗口
    cv2.imshow(name, img)
    # 等待时间,毫秒级,0表示任意终止
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# 保存图像
cv2.imwrite('mytest.png', img)   # 保存成功会返回值

# 图像类型
type(img) # numpy.ndarray

# 图像像素点
img.size

# 图像存储类型
img.dtype # dtype('uint8')

# 截取图像--使用索引形式即可
img[0:200, 0:200]

# 颜色通道提取
b, g, r = cv2.split(img)

# 合并
img = cv2.merge((b, g, r))

# 只保留单通道
cur_img = img.copy()
cur_img[:,:,0] = 0 # B 通道置为0
cur_img[:,:.1] = 0 # G 通道置为0

border padding

# 边界填充
top_size, bottom_size, left_size, right_size = (200,200,200,200)
replicate = cv2.copyMakeBorder(img, top_size, bottom_size, left_size, right_size, borderType=cv2.BORDER_REPLICATE) # 复制法,复制边缘像素
reflect = cv2.copyMakeBorder(img, top_size, bottom_size, left_size, right_size, borderType=cv2.BORDER_REFLECT) # 反射法,对感兴趣的图像中的像素两边进行复制
reflect101 = cv2.copyMakeBorder(img, top_size, bottom_size, left_size, right_size, borderType=cv2.BORDER_REFLECT_101) # 反射法,以最边缘像素为轴 gfedcb|abcdefgh|gfedcba
warp = cv2.copyMakeBorder(img, top_size, bottom_size, left_size, right_size, borderType=cv2.BORDER_WRAP) # 外包装法 cdefgh|abcdefgh|abcdefg
constant = cv2.copyMakeBorder(img, top_size, bottom_size, left_size, right_size, borderType=cv2.BORDER_CONSTANT, value=0) # 常量法,常数值填充

plt.subplot(231), plt.imshow(img, 'gray'), plt.title('ORIGINAL')
plt.subplot(232), plt.imshow(replicate, 'gray'), plt.title('replicate')
plt.subplot(233), plt.imshow(reflect, 'gray'), plt.title('reflect')
plt.subplot(234), plt.imshow(reflect101, 'gray'), plt.title('reflect101')
plt.subplot(235), plt.imshow(warp, 'gray'), plt.title('warp')
plt.subplot(236), plt.imshow(constant, 'gray'), plt.title('constant')

Numeral Calculations

img+10 # 图像每个位置+10
img+img # 相当于(img+img)%256   相同shape对应位置相加
cv2.add(img, img) # 相当于img+img

image fusion

# 两种图像img1(640, 640, 3) img2(320, 320, 3)
cv2.resize(img1, (320, 320))  # resize图像
# cv2.resize(img1, (0, 0), fx=0.5, fy=0.5) # 对图像x,y变成原来的0.5
cv2.addWeighted(img1, 0.4, img2, 0.5, 0) # 0.4*img1 + 0.5*img2 + 0

image threshold
ret, dst = cv2.threshold(src, thresh, maxval, type)

  • src: Input image, only single-channel images can be input, usually grayscale images
  • dst: output graph
  • thresh: threshold 127
  • maxval: The value assigned when the pixel value exceeds the threshold (or is less than the threshold, depending on the type)
  • type: The type of binary operation, including the following 5 types: cv2.THRESH_BINARY; cv2.THRESH_BINARY_INV; cv2.THRESH_TRUNC; cv2.THRESH_TOZERO; cv2.THERSH_TOZERO_INV
    • cv2.THRESH_BINARY takes maxval (maximum value) for the part exceeding the threshold, otherwise takes 0
    • cv2.THRESH_BINARY_INV THRESH_BINARY inversion
    • cv2.THRESH_TRUNC The part greater than the threshold is set to the threshold, otherwise it remains unchanged
    • cv2.THRESH_TOZERO The part greater than the threshold is not changed, otherwise it is set to 0
    • cv2.THERSH_TOZERO_INV cv2.THRESH_TOZERO invert
ret, thresh1 = cv2.threshold(img1, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(img1, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(img1, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(img1, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(img1, 127, 255, cv2.THRESH_TOZERO_INV)

titles = ['Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV']
images = [img1, thresh1, thresh2, thresh3, thresh4, thresh5]

for i in range(6):
    plt.subplot(2, 3, i+1), plt.imshow(images[i], 'gray')
    plt.title(titles[i])
    plt.xticks([]), plt.yticks([])
plt.show()

Image filtering

# 均值滤波
# 简单的平均卷积
blur = cv2.blur(img, (3,3))

# 方框滤波
# 基本和均值一样,可以选择归一化
box = cv2.boxFilter(img1, -1, (3,3), normalize=True) # noremalize=True 卷积除以个数,noremal=False 只是卷积求和

# 高斯滤波
# 同一中心点根据距离不同参数的比例不同
gaussian = cv2.GaussianBlur(img1, (5,5), 1)

# 中值滤波
# 相当于用中值代替
median = cv2.medianBlur(img, 5)

res = np.hstack((blur, gaussian, median))
cv_imshow('res', res)

Morphology-corrosion operations

kernel = np.ones((5,5), np.uint8)
erosion = cv2.erode(img, kernel, iterations=1) # iterations 腐蚀次数

Morphology-expansion operation

kernel = np.ones((3, 3), np.uint8)
dige_dilate = cv2.dilate(img, kernel, iterations=1)

Opening and closing operations

# 开:先腐蚀,再膨胀
# 将毛刺去掉
kernel = np.ones((5,5), np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

# 闭: 先膨胀,再腐蚀
# 扩张毛刺
kernel = np.ones((5,5), np.uint8)
opening = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

Gradient operation

# 梯度=膨胀-腐蚀
kernel = np.ones((7,7), np.uint8)
gradient = cv2.morphologyEx(pie, cv2.MORPH_GRADIENT, kernel) # 原图-腐蚀,默认iteration = 1

Top hat and black hat

  • hat = original input - open operation result = sting
  • Black hat = closed operation - original input = newly generated sting of closed operation
# 礼帽
kernel = np.ones((5,5), np.uint8)
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
# 黑帽
tophat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)

Image gradient-Sobel operator

  • Image mutation position – image gradient
    G x = [ − 1 0 + 1 − 2 0 + 2 − 1 0 + 1 ] ∗ A G_x = \begin{bmatrix} -1&0&+1\\ -2&0&+2\\ -1&0&+ 1\\ \end{bmatrix} * AGx= 121000+1+2+1 A
    G y = [ − 1 − 2 − 1 0 0 0 + 1 + 2 + 1 ] ∗ A G_y = \begin{bmatrix} -1&-2&-1\\ 0&0&0\\ +1&+2&+1\\ \ end { bmatrix } * AGy= 10+120+210+1 A

dst = cv2.Sobel(src, ddepth, dx, dy, ksize)

  • ddepth: depth of image
  • dx和dy分别表示水平和竖直方向
  • ksizeis Sobelthe size of the operator
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3) # 水平梯度
# 白到黑是正数,黑到白是负数,负数会被截断成0,所以要取绝对值,方法如下:
sobelx = cv2.convertScaleAbs(sobelx)

sobelx = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3) # 垂直梯度
# 白到黑是正数,黑到白是负数,负数会被截断成0,所以要取绝对值,方法如下:
sobelx = cv2.convertScaleAbs(sobelx)

# 分别计算完x, y求和
sobelxy = cv2.addWeighted(sobelx, 0.5, sobely, 0.5, 0)

# 可以直接计算,但是不建议,效果不如分开合起来计算的好
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 1, ksize=3) # 水平垂直梯度

Image gradient - Scharr operator

  • Sobel improvement, more sensitive and detailed
    G x = [ − 3 0 3 − 10 0 10 − 3 0 3 ] ∗ A G_x = \begin{bmatrix} -3&0&3\\ -10&0&10\\ -3&0&3\\ \end{bmatrix } *AGx= 31030003103 A
    G y = [ − 3 − 10 − 3 0 0 0 3 10 3 ] ∗ A G_y = \begin{bmatrix} -3&-10&-3\\ 0&0&0\\ 3&10&3\\ \end{bmatrix} * AGy= 30310010303 A
scharrx = cv2.Scharr(img, cv2.CV_64F, 1, 0)
scharry = cv2.Scharr(img, cv2.CV_64F, 0, 1)
scharrx = cv2.covertScaleAbs(scharrx)
scharry = cv2.covertScaleAbs(scharry)
scharrxy = cv2.addWeighted(scharrx, 0.5, scharry, 0.5, 0)

Image gradient-laplacian operator

  • Sensitive to noise points, generally used in combination with other methods
    G = [ 0 1 0 1 − 4 1 0 1 0 ] G = \begin{bmatrix} 0&1&0\\ 1&-4&1\\ 0&1&0\\ \end{bmatrix}G= 010141010
laplacian = cv2..Laplaciap(img, cv2.CV_64F)
laplacian = cv2.converScaleAbs(laplacian)

Canny edge detection

    1. Use a Gaussian filter to smooth the image and filter out noise.
    1. Calculate the gradient strength and direction of each pixel in the image.
    1. Apply Non-Maximum Suppression to eliminate spurious responses caused by edge detection.
    1. Apply Double-Threshold detection to determine real and potential edges.
    1. Edge detection is finally accomplished by suppressing isolated weak edges.

1. Gaussian filter
H = [ 0.0924 0.1192 0.0924 0.1192 0.1538 0.1192 0.0924 0.1192 0.0924 ] < − − − Normalization process H = \begin{bmatrix} 0.0924&0.1192&0.0924\\ 0.1 192&0.1538&0.1192\\ 0.0924 &0.1192&0.0924\\ \end{bmatrix}<---Normalization processingH= 0.09240.11920.09240.11920.15380.11920.09240.11920.0924 <归一化处理
e = H ∗ A = [ h 11 h 12 h 13 h 21 h 22 h 23 h 31 h 32 h 33 ] ∗ [ a b c d e f g h i ] = s u m ( [ a × h 11 b × h 12 c × h 13 d × h 21 e × h 22 f × h 23 g × h 31 h × h 32 i × h 33 ] ) e = H*A = \begin{bmatrix} h_{11}&h_{12}&h_{13}\\ h_{21}&h_{22}&h_{23}\\ h_{31}&h_{32}&h_{33}\\ \end{bmatrix}*\begin{bmatrix} a&b&c\\ d&e&f\\ g&h&i\\ \end{bmatrix}=sum(\begin{bmatrix} a\times h_{11}&b\times h_{12}&c\times h_{13}\\ d\times h_{21}&e\times h_{22}&f\times h_{23}\\ g\times h_{31}&h\times h_{32}&i\times h_{33}\\ \end{bmatrix}) e=HA= h11h21h31h12h22h32h13h23h33 adgbehcfi =sum( a×h11d×h21g×h31b×h12e×h22h×h32c×h13f×h23i×h33 )

2. Gradient and direction (Sobel)
G = G x 2 + G y 2 G=\sqrt{G_x^2+G_y^2}G=Gx2+Gy2
θ = arctan ( G and G x ) \theta = arctan(\frac{G_y}{G_x})i=a rc t an (GxGy)
S x = [ − 1 0 1 − 2 0 2 − 1 0 1 ] S y = [ 1 2 1 0 0 0 − 1 − 2 − 1 ] S_x = \begin{bmatrix} -1&0&1\\ -2&0&2\\ -1&0&1\\ \end{bmatrix} S_y = \begin{bmatrix} 1&2&1\\ 0&0&0\\ -1&-2&-1\\ \end{bmatrix} Sx= 121000121 Sy= 101202101
G x = S x ∗ A = [ − 1 0 1 − 2 0 2 − 1 0 1 ] ∗ [ abcdefhgi ] = sum ( [ − a 0 c − 2 d 0 2 f − g 0 i ] ) G_x = S_x* A= \begin{bmatrix} -1&0&1\\ -2&0&2\\ -1&0&1\\ \end{bmatrix} * \begin{bmatrix} a&b&c\\ d&e&f\\ h&g&i\\ \end{bmatrix}=sum(\begin{ bmatrix} -a&0&c\\ -2d&0&2f\\ -g&0&i\\ \end{bmatrix})Gx=SxA= 121000121 adhbegcfi =sum( a2 dg000c2f _i )
G y = S y ∗ A = [ 1 2 1 0 0 0 − 1 − 2 − 1 ] ∗ [ abcdefhgi ] = sum ( [ a 2 bc 0 0 0 − g − 2 h − i ] ) G_y = S_y* A= \begin{bmatrix} 1&2&1\\ 0&0&0\\ -1&-2&-1\\ \end{bmatrix} * \begin{bmatrix} a&b&c\\ d&e&f\\ h&g&i\\ \end{bmatrix}=sum(\ begin{bmatrix} a&2b&c\\ 0&0&0\\ -g&-2h&-i\\ \end{bmatrix})Gy=SyA= 101202101 adhbegcfi =sum( a0g2 b02 hc0i )

3. Non-maximum suppression
Insert image description here

Insert image description here
4. Double threshold detection
Insert image description here

v = cv2.Canny(img, 80, 150) # minval = 80, maxval = 150 值越大对于边缘特征提取越细致,信息点过滤越多

image pyramid

  • Gaussian Pyramid
  • Laplacian Pyramid
    Insert image description here
    Gaussian Pyramid: Downsampling method (downsampling)
    H = 1 25 [ 1 4 6 4 1 4 16 24 16 4 6 24 36 24 6 4 16 24 16 4 1 4 6 4 1 ] H = \frac {1}{25} \begin{bmatrix} 1&4&6&4&1\\ 4&16&24&16&4\\ 6&24&36&24&6\\ 4&16&24&16&4\\ 1&4&6&4&1\\ \end{bmatrix}H=251 1464141624164624362464162416414641
  • General G i G_iGiConvolved with Gaussian kernel
  • Remove all even rows and columns

Gaussian pyramid: upsampling method (amplification)
[ 1 4 4 16 ] − − > [ 1 0 4 0 0 0 0 0 4 0 16 0 0 0 0 0 ] \begin{bmatrix} 1&4\\ 4&16\\ \end{ bmatrix}-->\begin{bmatrix} 1&0&4&0\\ 0&0&0&0\\ 4&0&16&0\\ 0&0&0&0\\ \end{bmatrix}[14416]> 10400000401600000

    1. Expand the image to twice its original size in each direction, filling new rows and columns with zeros
    1. Use the same kernel as before (multiplied by 4) to convolve with the enlarged image to obtain an approximation
up = cv2.pyrUp(img)

down = cv2.pyrDown(img)

Laplace Pyramid
L i = G i − P yr UP ( P yr D own ( G i ) ) L_i=G_i-PyrUP(PyrDown(G_i))Li=GiPyrUP(PyrDown(Gi))

img - cv2.pyrUp(cv2.pyrDown(img))

image outline

  1. Convert original image to grayscale
  2. Perform binary threshold filtering
  3. Make a contour call

cv2.findContours(img, mode, method)
mode:Luo Kuo search mode

  • RETR_EXTERNAL: Retrieve only the outermost contour
  • RETR_LIST: Retrieve all contours and save them into a linked list;
  • RETR_CCOMP: Consolidate all contours and organize them into two layers; the top layer is the outer boundary of each part, and the second layer is the hole boundary;
  • RETR_TREE: Retrieve all contours and reconstruct the entire hierarchy of nested contours.

method: Contour approximation method

  • CHAIN_APPROX_NONE: Output contours in Freeman chain code, all other methods output polygons (sequences of vertices).
  • CHAIN_APPROX_SIMPLE: Compress the horizontal, vertical and oblique parts, that is, the function only retains their end points.
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

draw_img = img.copy() # 不拷贝会在img上留下轮廓痕迹
res = cv2.drawContours(draw_img, contours, -1, (0, 0, 255), 2) # -1表示所有轮廓目标,可以0,1,2...对应各种目标

Contour features

cnt = contours[0]
# 面积
cv2.contourArea(cnt)
# 周长,True表示闭合的
cv2.arcLength(cnt, True)

Contour approximation using
straight lines instead of curves
Insert image description here

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

cnt = contours[0]

# 1. 根据图像形状进行轮廓拟合
epsilon = 0.1*cv2.arcLength(cnt, True)  # 周长作为阈值,两点之前使用直线代替的阈值,阈值越小,线段越短,整体性越差
approx = cv2.approxPolyDP(cnt, epsilon, True)

draw_img = img.copy()
res = cv2.drawContours(draw_img, [approx], -1, (0,0,255), 2)
cv_show('res', res)

# 2. 根据边界矩形进行轮廓拟合
x, y, w, h = cv2.boundingRect(cnt)
rec_img = cv2.rectangle(draw_img, (x,y), (x+w, y+h), (0, 255, 0), 2)
cv_show('img', rec_img)

area = cv2.contourArea(cnt)
rect_area = w*h
extent = float(area) / rect_area
print(‘轮廓面积与边界矩形比’, extent)

# 3. 外接圆
(x, y), radius = cv2.minEnclosingCircle(cnt)
center = (int(x), int(y))
radius = int(radius)
rad_img = cv2.circle(draw_img, center, radius, (0, 255, 0), 2)
cv_show('img', rad_img)

template matching

The principle of template matching is very similar to that of convolution. The template starts from the origin on the original image, and calculates the degree of difference between the template and (the place where the image is covered by the template). There are 6 ways to calculate the degree of difference in opencv, and then each time The calculated results are placed in a matrix and output as the result. Assuming that the original graphic is AxB size and the template is axb size, the matrix of the output result is (A-a+1)x(B-b+1)

  • TM_SQDIFF: Calculates square differences. The smaller the calculated value, the more relevant it is.
  • TM_CCORR: Calculate correlation. The larger the calculated value, the more relevant it is.
  • TM_CCOEFF: Calculate the correlation coefficient. The larger the calculated value, the more relevant it is.
  • TM_SQDIFF_NORMED: Calculate the normalized square difference. The closer the calculated value is to 0, the more relevant it is.
  • TM_CCORR_NORMED: Calculate the normalized correlation. The closer the calculated value is to 1, the more relevant it is.
  • TM_CCOEFF_NORMED: Calculate the normalized correlation coefficient. The closer the calculated value is to 1, the more relevant it is.
  • The results after normalization are stable
img = cv2.imread('', 0)
template = cv2.imread('', 0)
h, w = template.shape[:2]

methods = ['cv2.TM_CCOEFF', ‘cv2.TM_CCOEFF_NORMED’, ‘cv2.TM_CCORR’, 'cv2.TM_CCORR_NORMED', 'cv2.TM_SQDIFF', 'cv2.TM_SQDIFF_NORMED']
res = cv2.matchTemplate(img, template, 1, cv2.TM_SQDIFF)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

for meth in methods:
	img2 = img.copy()
	# 匹配方法的真值
	method = eval(meth)
	print(method)
	res = cv2.matchTemplate(img, template, 1, method)
	
	# 如果是平方差匹配TM_SQDIFF或归一化平方差匹配TM_SQDIFF_NORMED,取最小值
	if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
		top_left = min_loc
	else:
		top_left = max_loc
	bottom_right = (top_left[0] + w, top_left[1] + h)

	# 画矩形
	cv2.rectangle(img2, top_left, bottom_right, 255, 2)

	plt.subplot(121), plt.imshow(res, cmap='gray')
	plt.xticks([]), plt.yticks([]) # 隐藏坐标轴
	plt.subplot(122), plt.imshow(img2, cmap='gray')
	plt.xticks([]), plt.yticks([]) # 隐藏坐标轴
	plt.suptitle(meth)
	plt.show()

Match multiple targets

img = cv2.imread('', 0)
template = cv2.imread('', 0)
h, w = template.shape[:2]

res = cv2.matchTemplate(img, template, 1, cv2.TM_CCOEFF_NORMED)
threshhold = 0.8
# 取匹配程度大于80%的坐标
loc = np.where(res >= threshold)
for pt in zip(*loc[::-1]):
	bottom_right = (pt[0] + w, pt[1] + h)
	cv2.rectangle(img, pt, bottom_right, (0, 0, 255), 2)
cv_show('img_rgb', img)

Histogram

cv2.calcHist(images, channels, mask, histSize, ranges)

  • images: The original image image format is uint8 or float32. When passed in a function, apply [] e.g. [img]
  • channels: Also use [] to calculate the histogram of the image using a high-number function. If the input image is a grayscale image, its value is [0], and the parameters passed in for the color image are [0][1][2], corresponding to BGR.
  • mask: mask image. The histogram of the entire image is None. If the histogram of a certain part of the image is calculated, create a mask image and use it.
  • histSize: number of BINs
  • ranges: The pixel value range is usually [0-256]
hist_0 = cv2.calcHist([img], [0], None, [256], [0, 256])
hist_1 = cv2.calcHist([img], [1], None, [256], [0, 256])
hist_2 = cv2.calcHist([img], [2], None, [256], [0, 256])
hist.shape
plt.hist(img_nly.ravel(), 256)
plt.show()

# 或者
color = ('b', 'g', 'r')
for i, col in enumerate(color):
	histr = cv2.calcHist([img], [i], None, [256], [0, 256])
	plt.plot(histr, color=col)
	plt.xlim([0,256])

mask operation

w, h, _ = img.shape
mask = np.zeros(img.shape[:2], np.uint8)
mask[w//2-500:w//2+500, h//2-300:h//2+300] = 255
cv_imshow('mask', mask)

mask_img = cv2.bitwise_and(img_nly, img_nly, mask=mask) # 与操作
cv_imshow("mask_img", mask_img)

hist_full = cv2.calcHist([img], [0], None, [256], [0,256])
hist_mask = cv2.calcHist([img], [0], mask, [256], [0,256])
plt.subplot(221, plt.imshow(img, 'img'))
plt.subplot(222, plt.imshow(mask, 'img'))
plt.subplot(223, plt.imshow(mask_img, 'img'))
plt.subplot(224, plt.plot(hist_full), plt.plot(hist_mask))
plt.xlim([0,256])
plt.show()

Histogram equalization:
equalize the entire image
Insert image description here

# 原图直方图查看
plt.hist(img_nly.ravel(), 256)
plt.show()

# 单通道直方图均衡化
equ = cv2.equalizeHist(img_nly[:,:,0])
plt.hist(equ.ravel(), 256)
plt.show()

# 3通道直方图均衡化
B, G, R = cv2.split(img)
output_B = cv2.equalizeHist(B)
output_G = cv2.equalizeHist(G)
output_R = cv2.equalizeHist(R)
equ = cv2.merge((output_B, output_G, output_R))
plt.hist(equ.ravel(), 256)
plt.show()

# 结果对比
res = np.hstack((img, equ))
cv_imshow('res', res)

Adaptive histogram equalization
Block equalization

# 实例化直方图
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# 对三通道进行直方图均衡化
B, G, R = cv2.split(img)
res_clahe_B = clahe.apply(B)
res_clahe_G = clahe.apply(G)
res_clahe_R = clahe.apply(R)
res_clahe = cv2.merge((res_clahe_B, res_clahe_G, res_clahe_R))
res = np.hstack((img_nly, equ, res_clahe))
cv_imshow('img', res)

Fourier transform

Conversion between time domain and frequency domain

https://zhuanlan.zhihu.com/p/19763358
Insert image description here

Fourier transform effect

  • High frequency: heavily transformed grayscale components, such as boundaries
  • Low frequency: slowly changing gray components, such as a sea

filter

  • Low-pass filter: only retains low frequencies, which will blur the image

  • High-pass filter: only retaining high frequencies will enhance image details

  • Opencv mainly uses cv2.dft() and cv2.idft(). The input image needs to be converted to np.float32 format first.

  • The part with frequency 0 in the obtained result will be in the upper left corner, and usually needs to be converted to the center position, which can be achieved through shift transformation.

  • The result returned by cv2.dft() is dual-channel (real part, imaginary part), and usually needs to be converted into an image format to display (0, 255).

img_float32 = np.float32(img[:,:,0])

dft = cv2.dft(img_float32, flags = cv2.DFT_COMPLEX_OUTPUT) # 傅里叶变换
dft_shift = np.fft.fftshift(dft) # 移动

magnitude_spectrum = 20*np.log(cv2.magnitude(dft_shift[:,:,0], dft_shift[:,:,1])) # magnitude实部和虚部调整

plt.subplot(121), plt.imshow(img_nly, cmap='gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(magnitude_spectrum, cmap='gray')
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
plt.show()
img_float32 = np.float32(img[:,:,0])

dft = cv2.dft(img_float32, flags = cv2.DFT_COMPLEX_OUTPUT)
dft_shift = np.fft.fftshift(dft)

rows, cols = img_nly.shape[:2]
crow, ccol = int(rows/2), int(cols/2)

# 低通滤波
mask = np.zeros((rows, cols, 2), np.uint8)
mask[crow-30:crow+30, ccol-30: ccol+30] = 1

# IDFT
fshift = dft_shift*mask
f_ishift = np.fft.ifftshift(fshift)
img_back = cv2.idft(f_ishift)
img_back = cv2.magnitude(img_back[:,:,0], img_back[:,:,1])

plt.subplot(121), plt.imshow(img_nly, cmap='gray')
plt.title('Input Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(img_back, cmap='gray')
plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
plt.show()

Video operation

  • cv2.VideoCapture can capture the camera and use numbers to control different devices, such as 0,1

  • If it is a video file, just specify the path.

  • cv2.COLOR_BGR2RGBConvert BGR format to RGB format
    cv2.COLOR_BGR2GRAYConvert BGR format to grayscale image

  • cv2.COLOR_BGR2BGRAConvert BGR format to BGR format, cv2 displays normally

  • cv2.COLOR_BGR2HSVConvert BGR format to HSV format

import cv2
import numpy as np

vc = cv2.VideoCapture('test.mp4') # 打开视频
# vc = cv2.VideoCapture(0) # 打开摄像头0

# 判断是否正确读取视频
if vc.isOpened():
    open, fram = vc.read()
else:
    open = False

# 播放视频
while open:
    ret, frame = vc.read()
    if frame is None:
        break
    if ret:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cv2.imshow('result', gray)
        if cv2.waitKey(10) & 0xFF == 27:  # 27退出键
            break
vc.release()
cv2.destroyAllWindows()

Guess you like

Origin blog.csdn.net/frighting_ing/article/details/129844228
Recommended