RGB图像代表每个像素占4个字节(RGB占3字节还有一个字节空),R、G、B分量各占一个字节,每个分量都有256种取值可能(每个分量值是32位整数)。因此RGB图像的直方图是一个256 * 3的数组。
软件算法:
// This function computes the histogram for R, G, and B.
//
// image_data is a pointer to an RGBA image with 8 bits per channel
// w is the width of the image in pixels
// h is the height of the image in pixels
// The histogram is an array of 256 bins for R, G, and B.
// Each bin entry is a 32-bit unsigned integer value.
unsigned int *
histogram_rgba_unorm8(void *image_data, int w, int h)
{
unsigned char *img = (unsigned char *)image_data;
unsigned int *ref_histogram_results;
unsigned int *ptr;
int i;
// clear the histogram results buffer to zeros.
//
// the histogram buffer stores the histogram values for R
// followed by the histogram values for G and then B.
// Since there are 256 bins for an 8-bit color channel,
// the histogram buffer is 256 * 3 entries in size.
// Each entry is a 32-bit unsigned integer value.
//
ref_histogram_results = (unsigned int *)malloc(256 * 3 *
sizeof(unsigned int));
ptr = ref_histogram_results;
memset(ref_histogram_results, 0x0, 256 * 3 *
sizeof(unsigned int));
// compute histogram for R
for (i=0; i<w*h*4; i+=4)
{
int indx = img[i];//每一个R分量的值
ptr[indx]++;
}
ptr += 256;//R分量有256种可能取值,在ptr[256*3]中占256项
// compute histogram for G
for (i=1; i<w*h*4; i+=4)
{
int indx = img[i];//每一个G分量的值
ptr[indx]++;
}
ptr += 256;//G分量同样在ptr[256*3]中占256项
// compute histogram for B
for (i=2; i<w*h*4; i+=4)
{
int indx = img[i];//每一个B分量的值
ptr[indx]++;
}
return ref_histogram_results;
}
OpenCL加速实现:
首先,对每个work group都计算一个tmp_histgram[256*3]的直方图数组,最后都写入histgram[num_groups*256*3]数组中。
global_work_size[0] = ((image_width + gsize[0] - 1) / gsize[0]);
global_work_size[1] = ((image_height + gsize[1] - 1) / gsize[1]);
num_groups = global_work_size[0] * global_work_size[1];
global_work_size[0] *= gsize[0];
global_work_size[1] *= gsize[1];
kernel void
histogram_partial_image_rgba_unorm8(image2d_t img,
global uint *histogram)
{
int local_size = (int)get_local_size(0) * (int)get_local_size(1);
int image_width = get_image_width(img);
int image_height = get_image_height(img);
int group_indx = (get_group_id(1) * get_num_groups(0) + get_group_id(0)) * 256 * 3;//work item在work group中的id计算出tmp_histogram偏移
int x = get_global_id(0);//img宽度
int y = get_global_id(1);//img高度
local uint tmp_histogram[256 * 3];
int tid = get_local_id(1) * get_local_size(0) + get_local_id(0));
int j = 256 * 3;
int indx = 0;
// clear the local buffer that will generate the partial
// histogram
do
{
if (tid < j)
tmp_histogram[indx+tid] = 0;
j -= local_size;
indx += local_size;
} while (j > 0);
//每个work item负责更新num_groups组中,每组对应位置的tmp_histogram值
barrier(CLK_LOCAL_MEM_FENCE);
if ((x < image_width) && (y < image_height))
{
float4 clr = read_imagef(img,
CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST,
(float2)(x, y));
uchar indx_x, indx_y, indx_z;
indx_x = convert_uchar_sat(clr.x * 255.0f);
indx_y = convert_uchar_sat(clr.y * 255.0f);
indx_z = convert_uchar_sat(clr.z * 255.0f);
atomic_inc(&tmp_histogram[indx_x]);
atomic_inc(&tmp_histogram[256+(uint)indx_y]);
atomic_inc(&tmp_histogram[512+(uint)indx_z]);
}
barrier(CLK_LOCAL_MEM_FENCE);
// copy the partial histogram to appropriate location in
// histogram given by group_indx
if (local_size >= (256 * 3))
{
//一个work group可以写完一个tmp_histogram,每个work item写对应部分
if (tid < (256 * 3))
histogram[group_indx + tid] = tmp_histogram[tid];
}
else
{
//一个work group写不完一个tmp_histogram,256*3平均分到每个work group,每个work item写所有256*3分的每个组中对应位置。
j = 256 * 3;
indx = 0;
do
{
if (tid < j)
histogram[group_indx + indx + tid] = tmp_histogram[indx + tid];
j -= local_size;
indx += local_size;
} while (j > 0);
}
}
然后,处理前面生成的histgram[num_groups*256*3]的数组,得到最终的直方图数组。
partial_global_work_size[0] = 256*3;
partial_local_work_size[0] =
(workgroup_size > 256) ? 256 : workgroup_size;
kernel void
histogram_sum_partial_results_unorm8(
global uint *partial_histogram,
int num_groups,
global uint *histogram)
{
int tid = (int)get_global_id(0);
int group_indx;
int n = num_groups;
local uint tmp_histogram[256 * 3];
tmp_histogram[tid] = partial_histogram[tid];
group_indx = 256*3;
while (--n > 0)
{
tmp_histogram[tid] += partial_histogram[group_indx + tid];//每个work item处理所有num_groups的对应项
group_indx += 256*3;
}
histogram[tid] = tmp_histogram[tid];//tmp_histogram[tid]的值就是tid项最终的值
}