Opencv quick start tutorial, Python computer vision basics

Quick start

OpenCV is an Intel® open source computer vision library. It consists of a series of C functions and a small number of C++ classes, which implements many common algorithms in image processing and computer vision. OpenCV has a cross-platform mid- and high-level API that includes more than 300 C functions. It has no dependencies on other external libraries - although some can be used. OpenCV is free for both non-commercial and commercial applications

OpenCV installation

pip install opencv-python

After successful installation, print the version for testing.

import cv2
print(cv2.__version__)
4.8.0

Read pictures

import cv2
import numpy as np
img=cv2.imread('349926.jpeg')

print('image size',np.shape(img),img.shape)

cv2.imshow('Image', img)

cv2.waitKey(0)
image size (1344, 960, 3) (1344, 960, 3)

Image scaling, cropping, color changes

Image zoom

The general order of parameters in openCV (w,h) (x,y) Note the difference with numpy dimensions

# 获取图像的高h和宽w
h, w = img.shape[:2]
# 设置缩放倍数为2
scale = 2
# 将图像缩放为原来的2倍大小
imgResize1 = cv2.resize(img, (int(w*scale), int(h*scale)))
# 显示缩放后的图像
# cv2.imshow("resize1", imgResize1)
# 打印缩放后图像的shape
print("imgResize1 size", imgResize1.shape)
# 设置缩放倍数为0.5
scale = 0.5
# 将图像缩放为原来的0.5倍大小
imgResize2 = cv2.resize(img, (int(w*scale), int(h*scale)))
cv2.imshow("resize2", imgResize2)
print("imgResize2 size", imgResize2.shape)
cv2.waitKey(0)

imgResize1 size (2688, 1920, 3)
imgResize2 size (672, 480, 3)

cv2.resize

Input parameters:
- src: The image to be scaled, type is UMat
- dsize: target size, type is a sequence containing two ints, such as(width, height)
- dst: optional output image, type UMat
- fx: optional horizontal scaling factor, type float
- fy: Optional vertical scaling factor, type float
- interpolation: optional interpolation method, type int

Image cropping, color changes

Wait for any key press. 0 means waiting forever. Adding any number (ms) means waiting for a period of time and then continuing.

# 图像剪裁
imgCropped = img[int(h/3):int(2*h/3), int(w/3):int(w*2/3)]
# 显示剪裁后的图像
cv2.imshow("cropped", imgCropped)
# 打印剪裁后图像大小
print("imgcropped size", imgCropped.shape)
# 颜色变换 - RGB，将BGR颜色空间转换到RGB颜色空间 BGR是opencv默认的颜色顺序,代表蓝绿红通道(Blue, Green, Red)。但大多数图像处理和机器学习算法都采用RGB颜色顺序,代表红绿蓝(Red, Green, Blue)。
#因此在opencv读取图片后,经常需要将BGR转为RGB,以便后续算法处理。
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
imgRGB=cv2.resize(imgRGB,(int(w*0.5),int(h*0.5)))
# 颜色变换 - 灰度化
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgGray=cv2.resize(imgGray,(int(w*0.5),int(h*0.5)))
# 打印灰度图像大小
print("imgGray size", imgGray.shape)
# 显示 RGB 图像
cv2.imshow("RGB", imgRGB)
# 显示灰度图像
cv2.imshow("Gray", imgGray)
# 等待按键输入
cv2.waitKey(0)

imgcropped size (448, 320, 3)
imgGray size (672, 480)

cv2.cvtColor

BGR: OpenCV default color space, blue, green and red channels
GRAY: Grayscale image, brightness as pixel value
HSV: Hue Saturation Value, hue saturation space
YCrCb: Luma and chroma space, used for compression and encoding
HLS: Hue Lightness Saturation,hue brightness saturation space
Lab: CIE Lab* space, L is the brightness, a and b represent the color opposition dimensions
RGB: red, green, and blue additive color space, used for display

Read video, camera, video stream

import cv2

# 打开视频文件
cap = cv2.VideoCapture('My 10 YEAR Indie Game Development Journey.mp4')

# 打开摄像头,参数是设备id
# cap = cv2.VideoCapture(0)

# 打开视频流,需要指定视频流的 URL
# video = "http://admin:[email protected]:8081/u"
# cap = cv2.VideoCapture(video)

while True:
    # 读取视频的一帧,success表示是否成功,img是当前帧的图像
    success, img = cap.read()

    if success:
       # 如果成功,显示当前帧
       cv2.imshow('video', img)

       # 每显示一帧,检查是否按下q键退出
       if cv2.waitKey(1) & 0xFF == ord('q'):
           break

# 完成后,释放摄像头/文件
cap.release()

Drawing

import cv2
import numpy as np

# 创建一个空的黑色图像作为绘图Canvas
img = np.zeros((512,512,3), np.uint8)

# 绘制圆形
# 圆心坐标
center = (400,50)
# 半径
radius = 30
# 线条颜色
color = (255,255,0)
# 线条宽度
thickness = 5
# 绘制空心圆
cv2.circle(img, center, radius, color, thickness)

# 绘制填充圆
cv2.circle(img,(450,80), 30, (0,255,255), cv2.FILLED)

# 绘制直线
# 起始点坐标
p1 = (0,0)
# 结束点坐标
p2 = (img.shape[1], img.shape[0])
# 线条颜色
color = (0,255,0)
# 线条宽度
thickness = 3
# 绘制直线
cv2.line(img, p1, p2, color, thickness)


# 绘制矩形
# 左上角坐标
p1 = (0,0)
# 矩形大小(宽度,高度)
size = (250, 350)
# 线条颜色
color = (0,0,255)
# 线条宽度
thickness = 2
# 绘制空心矩形
cv2.rectangle(img, p1, size, color, thickness)

# 绘制填充矩形
cv2.rectangle(img, (100,100), (200,200), (255,0,0), cv2.FILLED)

# 显示图像
cv2.imshow('OpenCV', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

Painting text

import cv2
from PIL import ImageFont, ImageDraw, Image
import numpy as np


# Pillow模块绘制中文
def paint_chinese_opencv(img, chinese, position, color, font_size=20):
    # OpenCV图片转PIL图片
    img_PIL = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    # 加载字体
    font = ImageFont.truetype('Noto Sans CJK Bold.otf', font_size, encoding="utf-8")

    # 输出的文字
    draw = ImageDraw.Draw(img_PIL)
    # 绘制文字
    draw.text(position, chinese, color, font)
    # draw.text(position, chinese, color)

    # PIL图片转OpenCV图片
    img = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)

    return img


# OpenCV模块绘制英文
img = np.zeros((512, 512, 3), np.uint8)

# 文字内容
text = "OpenCV"
# 文字区域左上角坐标
position = (300, 200)
# 加载字体
font = cv2.FONT_HERSHEY_COMPLEX
# 字号
font_size = 1
# 文字颜色 green
color = (0, 150, 0)
# 线条宽度
thickness = 3

# 绘制文字
cv2.putText(img, text, position, font, font_size, color, thickness)

# Pillow模块绘制中文
text = "这是中文"
position = (300, 250)
color = (150, 0, 0)

img = paint_chinese_opencv(img, text, position, color)

cv2.imshow('Image', img)
cv2.waitKey(0)

common problem:

libgtk2.0-dev and pkg-config libraries cannot be found

The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'

pip install opencv-contrib-python