CUDA图像处理 | 模板匹配

模板匹配

OpenCV中的模板匹配

该部分转载自【OpenCV3】模板匹配——cv::matchTemplate()详解

匹配方法

cv::TM_SQDIFF：该方法使用平方差进行匹配，因此最佳的匹配结果在结果为0处，值越大匹配结果越差。
cv::TM_CCORR：相关性匹配方法，该方法使用源图像与模板图像的卷积结果进行匹配，因此，最佳匹配位置在值最大处，值越小匹配结果越差。
cv::TM_CCORR_NORMED：归一化的相关性匹配方法，与相关性匹配方法类似，最佳匹配位置也是在值最大处。
cv::TM_CCOEFF：相关性系数匹配方法，该方法使用源图像与其均值的差、模板与其均值的差二者之间的相关性进行匹配，最佳匹配结果在值等于1处，最差匹配结果在值等于-1处，值等于0直接表示二者不相关。
cv::TM_CCOEFF_NORMED：归一化的相关性系数匹配方法，正值表示匹配的结果较好，负值则表示匹配的效果较差，也是值越大，匹配效果也好。

代码实现

int main() {
    Mat image_source = imread("E:\\JZCHEN\\test\\lena.jpg", IMREAD_GRAYSCALE);
    Mat image_template = imread("E:\\JZCHEN\\test\\template.jpg", IMREAD_GRAYSCALE);
    Mat image_matched;
    //模板匹配
    /*
        void cv::matchTemplate(
            cv::InputArray image, // 用于搜索的输入图像, 8U 或 32F, 大小 WxH
            cv::InputArray templ, // 用于匹配的模板，和image类型相同， 大小 wxh
            cv::OutputArray result, // 匹配结果图像, 类型 32F, 大小 (W-w+1)x(H-h+1)
            int method // 用于比较的方法
        );
    */
    cv::matchTemplate(image_source, image_template, image_matched, cv::TM_CCOEFF_NORMED);

    double minVal, maxVal;
    cv::Point minLoc, maxLoc;

    //寻找最佳匹配位置
    // 这里注意匹配结果图像与原图像之间的大小关系
    cv::minMaxLoc(image_matched, &minVal, &maxVal, &minLoc, &maxLoc);

    cv::Mat image_color;
    cv::cvtColor(image_source, image_color, CV_GRAY2BGR); // 将灰度图转成BGR格式
    cv::circle(image_color,
        cv::Point(maxLoc.x + image_template.cols / 2, maxLoc.y + image_template.rows / 2),
        20,
        cv::Scalar(0, 0, 255),
        2,
        8,
        0);

    cv::imshow("source image", image_source);
    cv::imshow("match result", image_matched);
    cv::imshow("target", image_color);
    cv::waitKey(0);

    return 0;
}

C++实现模板匹配

归一化的相关性系数匹配方法实现

double MatchTemplateCPU(BYTE *pSearchImage,
    BYTE *pTemplate,
    int nSearchImageWidth,
    int nSearchImageHeight,
    int nTemplateWidth,
    int nTemplateHeight,
    int &nMatchPointX,
    int &nMatchPointY
)
{
    float *m_pCorrMatrix = NULL;
    m_pCorrMatrix = new float[(nSearchImageHeight - nTemplateHeight)*(nSearchImageWidth - nTemplateWidth)];
    memset(m_pCorrMatrix, 0, (nSearchImageHeight - nTemplateHeight)*(nSearchImageWidth - nTemplateWidth)*sizeof(BYTE));

    // 模板匹配过程
    double dMaxR = 0.1;
    int i, j, m, n;
    int nVectX = 0;
    int nVectY = 0;
    float fSigmaT, fSigmaS, fSigmaST;
    for (i = 0; i < (nSearchImageHeight - nTemplateHeight); i++) {
        for (j = 0; j < (nSearchImageWidth - nTemplateWidth); j++) {
            fSigmaT = 0;
            fSigmaS = 0;
            fSigmaST = 0;
            for (m = 0; m < nTemplateHeight; m++) {
                for (n = 0; n < nTemplateWidth;n++) {
                    fSigmaT = fSigmaT + (*(pTemplate + m*nTemplateWidth + n))*(*(pTemplate + m*nTemplateWidth + n));
                    fSigmaS = fSigmaS + (*(pSearchImage + (i+m)*nSearchImageWidth + (j+n)))*(*(pSearchImage + (m+i)*nSearchImageWidth + (j + n)));
                    fSigmaST = fSigmaST + (*(pSearchImage + (i + m)*nSearchImageWidth + (j + n)))*(*(pTemplate + m*nTemplateWidth + n));
                }
            }
            float tmp = fSigmaST / sqrt(fSigmaS*fSigmaS + fSigmaT*fSigmaT);
            *(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j) = tmp;
            if (*(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j) > dMaxR) {
                dMaxR = *(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j);
                nVectX = i;
                nVectY = j;
            }
        }
    }
    nMatchPointX = nVectX;
    nMatchPointY = nVectY;

    if (m_pCorrMatrix != NULL)
        delete m_pCorrMatrix;
    return 0;
}

GPU实现模板匹配

extern "C"
static int iDivUp(int a, int b)
{
    return (a % b != 0) ? (a / b + 1) : (a / b);
}

__global__ void CudaMatrixMul_kernal(BYTE *pSrc,
                                float *pSelfMul,
                                int nHeight,
                                int nWidth) 
{

    const int row = blockDim.y*blockIdx.y + threadIdx.y;
    const int col = blockDim.x*blockIdx.x + threadIdx.x;

    const int point = row*nWidth + col;

    float tmp;
    if (row < nHeight&&col < nWidth) {
        tmp = pSrc[point];
        tmp *= tmp;
        pSelfMul[point] = tmp;
    }
}

void CudaMatrixSelfMul(
            BYTE *pSrc,
            float *pSelfMul,
            int nHeight,
            int nWidth
) {
    cudaMemset(pSelfMul, 0, nHeight*nWidth * sizeof(float));

    dim3 threads(16, 16);
    dim3 grid(iDivUp(nWidth, threads.x), iDivUp(nHeight, threads.y));

    CudaMatrixMul_kernal << <grid, threads >> > (pSrc, pSelfMul, nHeight, nWidth);

}


__global__ static void
CorrMatrix_kernel(
                BYTE *pTemplate,
                BYTE *pSearchImage,
                float *pMulTT,
                float *pMulSS,
                float *pMulST,
                float *pCorrMatrix,
                int nTemplateHeight,
                int nTemplateWidth,
                int nSearchImageHeight,
                int nSearchImageWidth
) {
    const int y = blockDim.y *blockIdx.y + threadIdx.y;
    const int x = blockDim.x *blockIdx.x + threadIdx.x;

    float fSigmaTT = 0;
    float fSigmaSS = 0;
    float fSigmaST = 0;
    float fSigmaS = 0;
    float fSigmaT = 0;

    // 
    if (y >= 0 && y <= (nSearchImageHeight - nTemplateHeight + 1) &&
        x >= 0 && x <= (nSearchImageWidth - nTemplateWidth + 1)) {

        for (int m = 0; m < nTemplateHeight; m++) {

            for (int n = 0; n < nTemplateWidth; n++) {

                fSigmaTT = fSigmaTT + pMulTT[m*nTemplateWidth + n];
                fSigmaSS = fSigmaSS + pMulSS[(y + m)*nSearchImageWidth + (x + n)];
                fSigmaS = pSearchImage[((y + m)*nSearchImageWidth + (x + n))];
                fSigmaT = pTemplate[m*nTemplateWidth + n];
                fSigmaST = fSigmaST + fSigmaS*fSigmaT;
            }
        }

        pCorrMatrix[y*(nSearchImageWidth - nTemplateWidth) + x] =
            fSigmaST / sqrt(fSigmaSS*fSigmaSS + fSigmaTT*fSigmaTT);
    }
}


void CorrMatrix(BYTE *pTemplate, 
    BYTE* pSearchImage,
    float *pMulTT, 
    float* pMulSS, 
    float *pMulST, 
    float *pCorrMatrix,
    int nTemplatHeigt, 
    int nTemplatWidth, 
    int nSearchImageHeight, 
    int nSearchImageWidth)
{

    // 计算相关性矩阵
    dim3 threads(16, 16);
    dim3 grid(iDivUp(nSearchImageWidth - nTemplatWidth, threads.x), iDivUp(nSearchImageHeight, threads.y));

    // 启动计算相关性矩阵的核函数
    CorrMatrix_kernel << <grid, threads >> > (pTemplate, pSearchImage,
        pMulTT, pMulSS, pMulST, pCorrMatrix, nTemplatHeigt, nTemplatWidth,
        nSearchImageHeight, nSearchImageWidth);
}




void FindMaxCorrMatrixPoint(
        float *pCorrMatrix,
        int nWidth,
        int nHeight,
        double dMaxR,
        int &nVectY,
        int &nVectX
) {

    // 寻找最新点
    for (int i = 0; i < nHeight; i++) {
        for (int j = 0; j < nWidth; j++) {
            if (*(pCorrMatrix + i*nWidth + j) > dMaxR) {
                dMaxR = *(pCorrMatrix + i*nWidth + j);
                nVectY = i;
                nVectX = j;
            }
        }
    }
}


extern "C"
double cudaMatchTemplate(
    BYTE *pSrc,BYTE *pTemplate,
    int nWidth,int nHeight,
    int nTemplateWidth,int nTemplateHeight,
    int &nMatchPointX,int nMatchPointY
) {
    // 图像空间初始化
    BYTE *d_pSearchImage = NULL;
    BYTE *d_pTemplate = NULL;

    float *d_pMulTT = NULL;
    float *d_pMulSS = NULL;
    float *d_pMulST = NULL;
    float *d_pCorrMatrix = NULL;
    float *m_CorrMatrix = NULL;

    cudaMalloc((void**)&d_pSearchImage, nWidth*nHeight*sizeof(BYTE));
    cudaMalloc((void**)&d_pTemplate, nTemplateHeight*nTemplateWidth*sizeof(BYTE));

    cudaMalloc((void**)&d_pMulTT, nTemplateHeight*nTemplateWidth * sizeof(float));
    cudaMalloc((void**)&d_pMulSS, nWidth*nHeight * sizeof(float));
    cudaMalloc((void**)&d_pMulST, nTemplateWidth*nTemplateHeight * sizeof(float));

    cudaMalloc((void**)&d_pCorrMatrix, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight)*sizeof(float));

    cudaMemset(d_pSearchImage, 0, nWidth*nHeight * sizeof(BYTE));
    cudaMemset(d_pTemplate, 0, nTemplateHeight*nTemplateWidth * sizeof(BYTE));
    cudaMemset(d_pMulTT, 0, nTemplateHeight*nTemplateWidth * sizeof(float));
    cudaMemset(d_pMulSS, 0, nTemplateHeight*nTemplateWidth * sizeof(float));
    cudaMemset(d_pMulST, 0, nTemplateWidth*nTemplateHeight * sizeof(float));
    cudaMemset(d_pCorrMatrix, 0, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight) * sizeof(float));

    cudaMemcpy(d_pSearchImage, pSrc, nWidth*nHeight * sizeof(BYTE), cudaMemcpyHostToDevice);
    cudaMemcpy(d_pTemplate, pTemplate, nTemplateWidth*nTemplateHeight * sizeof(BYTE), cudaMemcpyHostToDevice);



    // 模板匹配过程
    double dMaxR = 0.1;
    int i, j;
    int nVectX = 0;
    int nVectY = 0;
    // 模板元素自己点乘
    CudaMatrixSelfMul(d_pTemplate, d_pMulTT,nTemplateHeight,nTemplateWidth);

    // 搜素图元素自己点乘
    CudaMatrixSelfMul(d_pSearchImage, d_pMulSS, nHeight, nWidth);

    // 计算相关性矩阵
    CorrMatrix(d_pTemplate, d_pSearchImage, d_pMulTT, d_pMulSS, d_pMulST,
        d_pCorrMatrix,nTemplateHeight, nTemplateWidth, nHeight, nWidth);

    float *m_pCorrMatrix = new float[(nWidth - nTemplateWidth)*(nHeight - nTemplateHeight)];
    cudaMemcpy(m_pCorrMatrix, d_pCorrMatrix, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight),cudaMemcpyDeviceToHost);

    //  寻找最佳匹配点
    FindMaxCorrMatrixPoint(m_pCorrMatrix, nWidth - nTemplateWidth, nHeight - nTemplateHeight,dMaxR, nVectY, nVectX);

    nMatchPointX = nVectX;
    nMatchPointY = nVectY;

    return 0;

}

CUDA图像处理 | 模板匹配

模板匹配

OpenCV中的模板匹配

匹配方法

代码实现

C++实现模板匹配

GPU实现模板匹配

猜你喜欢