FFmpeg source code analysis: video filter deshake anti-shake

FFmpeg provides various audio and video filters in the avfilter module. This article focuses on deshake anti-shake, also known as de-shake, used to fix small changes in horizontal and/or vertical movement. SAD block-matched motion compensation is used to eliminate minor deviations caused by vertical or horizontal drift. This filter helps eliminate camera shake when holding a camera and moving on a vehicle. For the motion estimation algorithm involved, please refer to: GPU_Motion_Estimation .

For a detailed introduction to video filters, see the official FFmpeg documentation: Video-Filters . As mentioned in the documentation, the parameter options supported by deshake are as follows:

  • x, y, w, h: specify the rectangular area to be searched, xy is the upper left corner coordinate, wh is the width and height
  • rx, ry: The maximum pixel point to move in the x-axis and y-axis directions, the range is [0, 64], the default is 16
  • edge: Specifies to generate pixel patterns at the edge of the video frame. The available patterns are as follows:
  •     'blank, 0': fill blank positions with 0
  •     'original, 1': blank positions are filled with original image pixels
  •     'clamp, 2': stretch at blank positions
  •     'mirror, 3': Mirror blank position
  • blocksize: specifies the block size of the motion search, the range is [4, 128], the default is 8
  • contrast: specifies the contrast threshold of the search block, the range is [1, 255], the default is 125
  • search: search strategy, the default is detailed search, the available strategies are as follows:
  •     'exhaustive, 0': detailed search
  •     'less, 1': fuzzy search

1. Anti-jitter overall process

The source code of deshake anti-jitter is located in libavfilter/vf_deshake.c. The overall process is: read the video frame data from the buffer, find the most similar global motion, generate the luminance transformation matrix, generate the chrominance transformation matrix, and transform the luminance and chrominance. The key code is as follows:

static int filter_frame(AVFilterLink *link, AVFrame *in)
{
	// 从缓冲区读取视频帧数据
    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
    if (!out) {
        av_frame_free(&in);
        return AVERROR(ENOMEM);
    }
    av_frame_copy_props(out, in);

    aligned = !((intptr_t)in->data[0] & 15 | in->linesize[0] & 15);
    deshake->sad = av_pixelutils_get_sad_fn(4, 4, aligned, deshake);
    if (!deshake->sad)
        return AVERROR(EINVAL);

    if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
        // 寻找最相似的全局运动
        find_motion(deshake, (deshake->ref == NULL) ? 
		    in->data[0] : deshake->ref->data[0], 
		    in->data[0], link->w, link->h, in->linesize[0], &t);
    } else {
        uint8_t *src1 = (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0];
        uint8_t *src2 = in->data[0];
        deshake->cx = FFMIN(deshake->cx, link->w);
        deshake->cy = FFMIN(deshake->cy, link->h);

        if ((unsigned)deshake->cx + (unsigned)deshake->cw > link->w) 
			deshake->cw = link->w - deshake->cx;
        if ((unsigned)deshake->cy + (unsigned)deshake->ch > link->h) 
			deshake->ch = link->h - deshake->cy;

        deshake->cw &= ~15;
        src1 += deshake->cy * in->linesize[0] + deshake->cx;
        src2 += deshake->cy * in->linesize[0] + deshake->cx;
		// 寻找最相似的全局运动
        find_motion(deshake, src1, src2, deshake->cw, deshake->ch, in->linesize[0], &t);
    }
    ......
    // 生成亮度变换矩阵
    ff_get_matrix(t.vec.x, t.vec.y, t.angle, transform_zoom, transform_zoom, matrix_y);
    // 生成色度变换矩阵
    ff_get_matrix(t.vec.x / (link->w / chroma_width), t.vec.y / (link->h / chroma_height), 
	t.angle, transform_zoom, transform_zoom, matrix_uv);
    // 亮度与色度变换
    ret = deshake->transform(link->dst, link->w, link->h, chroma_width, chroma_height,
                             matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);

    av_frame_free(&deshake->ref);
    if (ret < 0)
        goto fail;

    deshake->ref = in;
    return ff_filter_frame(outlink, out);
fail:
    av_frame_free(&out);
    return ret;
}

2. Find the most similar global motion

Through block-by-block motion search (diamond search, also known as diamond search), find the most similar global motion, and finally calculate the offset value and offset angle. The relevant codes are as follows:

static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2,
                        int width, int height, int stride, Transform *t)
{
    // 抖动计数清零
    for (x = 0; x < deshake->rx * 2 + 1; x++) {
        for (y = 0; y < deshake->ry * 2 + 1; y++) {
            deshake->counts[x][y] = 0;
        }
    }
    // 1、逐块运动搜索
    for (y = deshake->ry; y < height - deshake->ry - (deshake->blocksize * 2); 
	    y += deshake->blocksize * 2) {
        // 2、使用宽为16来匹配sad函数
        for (x = deshake->rx; x < width - deshake->rx - 16; x += 16) {
            // 3、计算块对比度
            contrast = block_contrast(src2, x, y, stride, deshake->blocksize);
            if (contrast > deshake->contrast) {
				// 4、找出块运动
                find_block_motion(deshake, src1, src2, x, y, stride, &mv);
                if (mv.x != -1 && mv.y != -1) {
                    deshake->counts[mv.x + deshake->rx][mv.y + deshake->ry] += 1;
					// 5、计算块角度
                    if (x > deshake->rx && y > deshake->ry)
                        deshake->angles[pos++] = block_angle(x, y, 0, 0, &mv);

                    center_x += mv.x;
                    center_y += mv.y;
                }
            }
        }
    }

    if (pos) {
         center_x /= pos;
         center_y /= pos;
         t->angle = clean_mean(deshake->angles, pos);
         if (t->angle < 0.001)
              t->angle = 0;
    } else {
         t->angle = 0;
    }
    // 6、找出当前帧最相似的运动矢量
    for (y = deshake->ry * 2; y >= 0; y--) {
        for (x = 0; x < deshake->rx * 2 + 1; x++) {
            if (deshake->counts[x][y] > count_max_value) {
                t->vec.x = x - deshake->rx;
                t->vec.y = y - deshake->ry;
                count_max_value = deshake->counts[x][y];
            }
        }
    }

    p_x = (center_x - width / 2.0);
    p_y = (center_y - height / 2.0);
    t->vec.x += (cos(t->angle)-1)*p_x  - sin(t->angle)*p_y;
    t->vec.y += sin(t->angle)*p_x  + (cos(t->angle)-1)*p_y;
    // 7、计算偏移值与角度
    t->vec.x = av_clipf(t->vec.x, -deshake->rx * 2, deshake->rx * 2);
    t->vec.y = av_clipf(t->vec.y, -deshake->ry * 2, deshake->ry * 2);
    t->angle = av_clipf(t->angle, -0.1, 0.1);
}

3. Calculate the block contrast

Calculate the contrast for the given block. If the contrast is large, the next step will be processed; if the contrast is small, the current block will be skipped directly. The block_contrast() function is as follows:

static int block_contrast(uint8_t *src, int x, int y, int stride, int blocksize)
{
    int highest = 0;
    int lowest = 255;
    int i, j, pos;

    for (i = 0; i <= blocksize * 2; i++) {
        // 使用宽为16来匹配sad函数
        for (j = 0; j <= 15; j++) {
            pos = (y + i) * stride + (x + j);
            if (src[pos] < lowest)
                lowest = src[pos];
            else if (src[pos] > highest) {
                highest = src[pos];
            }
        }
    }

    return highest - lowest;
}

4. Find block motion

Finds the most similar motion offset between two frames for a given macroblock. Search using these shift matrices to select the most probable shift by the smallest difference in the block. The find_block_motion() function is as follows:

static void find_block_motion(DeshakeContext *deshake, uint8_t *src1,
                              uint8_t *src2, int cx, int cy, int stride,
                              IntMotionVector *mv)
{
    int x, y;
    int diff;
    int smallest = INT_MAX;
    int tmp, tmp2;

    #define CMP(i, j) deshake->sad(src1 + cy  * stride + cx,  stride,\
                                   src2 + (j) * stride + (i), stride)

    if (deshake->search == EXHAUSTIVE) {
        // 比较相似位置
        for (y = -deshake->ry; y <= deshake->ry; y++) {
            for (x = -deshake->rx; x <= deshake->rx; x++) {
                diff = CMP(cx - x, cy - y);
                if (diff < smallest) {
                    smallest = diff;
                    mv->x = x;
                    mv->y = y;
                }
            }
        }
    } else if (deshake->search == SMART_EXHAUSTIVE) {
        // 比较相似位置,找出最佳匹配
        for (y = -deshake->ry + 1; y < deshake->ry; y += 2) {
            for (x = -deshake->rx + 1; x < deshake->rx; x += 2) {
                diff = CMP(cx - x, cy - y);
                if (diff < smallest) {
                    smallest = diff;
                    mv->x = x;
                    mv->y = y;
                }
            }
        }

        tmp = mv->x;
        tmp2 = mv->y;

        for (y = tmp2 - 1; y <= tmp2 + 1; y++) {
            for (x = tmp - 1; x <= tmp + 1; x++) {
                if (x == tmp && y == tmp2)
                    continue;

                diff = CMP(cx - x, cy - y);
                if (diff < smallest) {
                    smallest = diff;
                    mv->x = x;
                    mv->y = y;
                }
            }
        }
    }

    if (smallest > 512) {
        mv->x = -1;
        mv->y = -1;
    }
    emms_c();
}

5. Calculate the block angle

To calculate the offset angle for a given block, the block_angle() function is as follows:

static double block_angle(int x, int y, int cx, int cy, IntMotionVector *shift)
{
    double a1, a2, diff;

    a1 = atan2(y - cy, x - cx);
    a2 = atan2(y - cy + shift->y, x - cx + shift->x);
    diff = a2 - a1;

    return (diff > M_PI)  ? diff - 2 * M_PI :
           (diff < -M_PI) ? diff + 2 * M_PI :
           diff;
}

6. Luminance and chroma transformation

deshake->transform is a function pointer, which points to deshake_transform_c() during initialization. The function is as follows:

static int deshake_transform_c(AVFilterContext *ctx,
                                    int width, int height, int cw, int ch,
                                    const float *matrix_y, const float *matrix_uv,
                                    enum InterpolateMethod interpolate,
                                    enum FillMethod fill, AVFrame *in, AVFrame *out)
{
    int i = 0, ret = 0;
    const float *matrixs[3];
    int plane_w[3], plane_h[3];
    matrixs[0] = matrix_y;
    matrixs[1] =  matrixs[2] = matrix_uv;
    plane_w[0] = width;
    plane_w[1] = plane_w[2] = cw;
    plane_h[0] = height;
    plane_h[1] = plane_h[2] = ch;

    for (i = 0; i < 3; i++) {
        // 转换亮度与色度分量
        ret = avfilter_transform(in->data[i], out->data[i], in->linesize[i], out->linesize[i],
                                 plane_w[i], plane_h[i], matrixs[i], interpolate, fill);
        if (ret < 0)
            return ret;
    }
    return ret;
}

Among them, the avfilter_transform() function is located in the transform.c source file, and performs affine transformation with the given interpolation method. The function is implemented as follows:

int avfilter_transform(const uint8_t *src, uint8_t *dst,
                        int src_stride, int dst_stride,
                        int width, int height, const float *matrix,
                        enum InterpolateMethod interpolate,
                        enum FillMethod fill)
{
    int x, y;
    float x_s, y_s;
    uint8_t def = 0;
    uint8_t (*func)(float, float, const uint8_t *, int, int, int, uint8_t) = NULL;

    switch(interpolate) {
        case INTERPOLATE_NEAREST:     // 最近邻插值
            func = interpolate_nearest;
            break;
        case INTERPOLATE_BILINEAR:    // 双线性插值
            func = interpolate_bilinear;
            break;
        case INTERPOLATE_BIQUADRATIC: // 双四次插值
            func = interpolate_biquadratic;
            break;
        default:
            return AVERROR(EINVAL);
    }

    for (y = 0; y < height; y++) {
        for(x = 0; x < width; x++) {
            x_s = x * matrix[0] + y * matrix[1] + matrix[2];
            y_s = x * matrix[3] + y * matrix[4] + matrix[5];

            switch(fill) {
                case FILL_ORIGINAL: // 原始
                    def = src[y * src_stride + x];
                    break;
                case FILL_CLAMP:    // 截取
                    y_s = av_clipf(y_s, 0, height - 1);
                    x_s = av_clipf(x_s, 0, width - 1);
                    def = src[(int)y_s * src_stride + (int)x_s];
                    break;
                case FILL_MIRROR:   // 镜像
                    x_s = avpriv_mirror(x_s,  width-1);
                    y_s = avpriv_mirror(y_s, height-1);

                    av_assert2(x_s >= 0 && y_s >= 0);
                    av_assert2(x_s < width && y_s < height);
                    def = src[(int)y_s * src_stride + (int)x_s];
            }

            dst[y * dst_stride + x] = func(x_s, y_s, src, width, height, src_stride, def);
        }
    }
    return 0;
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324448922&siteId=291194637