计算RGB图像直方图(一)

RGB图像代表每个像素占4个字节(RGB占3字节还有一个字节空),R、G、B分量各占一个字节,每个分量都有256种取值可能(每个分量值是32位整数)。因此RGB图像的直方图是一个256 * 3的数组。

软件算法:

// This function computes the histogram for R, G, and B.
//
// image_data is a pointer to an RGBA image with 8 bits per channel
// w is the width of the image in pixels
// h is the height of the image in pixels

// The histogram is an array of 256 bins for R, G, and B.
// Each bin entry is a 32-bit unsigned integer value.
unsigned int *
histogram_rgba_unorm8(void *image_data, int w, int h)
{
    unsigned char *img = (unsigned char *)image_data;
    unsigned int *ref_histogram_results;
    unsigned int *ptr;
    int i;
    // clear the histogram results buffer to zeros.
    //
    // the histogram buffer stores the histogram values for R
    // followed by the histogram values for G and then B.
    // Since there are 256 bins for an 8-bit color channel,
    // the histogram buffer is 256 * 3 entries in size.
    // Each entry is a 32-bit unsigned integer value.
    //
    ref_histogram_results = (unsigned int *)malloc(256 * 3 *
    sizeof(unsigned int));
    ptr = ref_histogram_results;
    memset(ref_histogram_results, 0x0, 256 * 3 *
    sizeof(unsigned int));
    // compute histogram for R
    for (i=0; i<w*h*4; i+=4)
    {
        int indx = img[i];//每一个R分量的值
        ptr[indx]++;
    }
    ptr += 256;//R分量有256种可能取值,在ptr[256*3]中占256项
    // compute histogram for G
    for (i=1; i<w*h*4; i+=4)
    {
        int indx = img[i];//每一个G分量的值
        ptr[indx]++;
    }
    ptr += 256;//G分量同样在ptr[256*3]中占256项
    // compute histogram for B
    for (i=2; i<w*h*4; i+=4)
    {
        int indx = img[i];//每一个B分量的值
        ptr[indx]++;
    }
    return ref_histogram_results;
}

OpenCL加速实现:

首先,对每个work group都计算一个tmp_histgram[256*3]的直方图数组,最后都写入histgram[num_groups*256*3]数组中。 

global_work_size[0] = ((image_width + gsize[0] - 1) / gsize[0]);
global_work_size[1] = ((image_height + gsize[1] - 1) / gsize[1]);
num_groups = global_work_size[0] * global_work_size[1];
global_work_size[0] *= gsize[0];
global_work_size[1] *= gsize[1];

kernel void
histogram_partial_image_rgba_unorm8(image2d_t img,
global uint *histogram)
{
    int local_size = (int)get_local_size(0) * (int)get_local_size(1);
    int image_width = get_image_width(img);
    int image_height = get_image_height(img);
    int group_indx = (get_group_id(1) * get_num_groups(0) + get_group_id(0)) * 256 *         3;//work item在work group中的id计算出tmp_histogram偏移
    int x = get_global_id(0);//img宽度
    int y = get_global_id(1);//img高度
    local uint tmp_histogram[256 * 3];
    int tid = get_local_id(1) * get_local_size(0) + get_local_id(0));
    int j = 256 * 3;
    int indx = 0;
    // clear the local buffer that will generate the partial
    // histogram
    do
    {
        if (tid < j)
            tmp_histogram[indx+tid] = 0;
        
        j -= local_size;
        indx += local_size;
    } while (j > 0);
    //每个work item负责更新num_groups组中,每组对应位置的tmp_histogram值
    barrier(CLK_LOCAL_MEM_FENCE);

    if ((x < image_width) && (y < image_height))
    {
        float4 clr = read_imagef(img,
            CLK_NORMALIZED_COORDS_FALSE |
            CLK_ADDRESS_CLAMP_TO_EDGE |
            CLK_FILTER_NEAREST,
            (float2)(x, y));
        uchar indx_x, indx_y, indx_z;
        indx_x = convert_uchar_sat(clr.x * 255.0f);
        indx_y = convert_uchar_sat(clr.y * 255.0f);
        indx_z = convert_uchar_sat(clr.z * 255.0f);
        atomic_inc(&tmp_histogram[indx_x]);
        atomic_inc(&tmp_histogram[256+(uint)indx_y]);
        atomic_inc(&tmp_histogram[512+(uint)indx_z]);
    }
    barrier(CLK_LOCAL_MEM_FENCE);

    // copy the partial histogram to appropriate location in
    // histogram given by group_indx
    if (local_size >= (256 * 3))
    {
        //一个work group可以写完一个tmp_histogram,每个work item写对应部分
        if (tid < (256 * 3))
            histogram[group_indx + tid] = tmp_histogram[tid];
    }
    else
    {
        //一个work group写不完一个tmp_histogram,256*3平均分到每个work group,每个work item写所有256*3分的每个组中对应位置。
        j = 256 * 3;
        indx = 0;
        do
        {
            if (tid < j)
                histogram[group_indx + indx + tid] = tmp_histogram[indx + tid];
            j -= local_size;
            indx += local_size;
        } while (j > 0);
    }
}

然后,处理前面生成的histgram[num_groups*256*3]的数组,得到最终的直方图数组。

partial_global_work_size[0] = 256*3;
partial_local_work_size[0] =
(workgroup_size > 256) ? 256 : workgroup_size;

kernel void
histogram_sum_partial_results_unorm8(
global uint *partial_histogram,
int num_groups,
global uint *histogram)
{
    int tid = (int)get_global_id(0);
    int group_indx;
    int n = num_groups;
    local uint tmp_histogram[256 * 3];
    tmp_histogram[tid] = partial_histogram[tid];
    group_indx = 256*3;
    while (--n > 0)
    {
        tmp_histogram[tid] += partial_histogram[group_indx + tid];//每个work item处理所有num_groups的对应项
        group_indx += 256*3;
    }
    histogram[tid] = tmp_histogram[tid];//tmp_histogram[tid]的值就是tid项最终的值
}

猜你喜欢

转载自blog.csdn.net/A199222/article/details/82348482