版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sac761/article/details/75043981
一,不使用VEC-C版本:
#include "highgui.h"
#include "opencv2/opencv.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <string>
using namespace cv;
using namespace std;
void gaussian3x3_ref(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols);
int rmain(int argc, char *argv[])
{
const char* imagename = "E:\\test\\ce.jpeg";
//从文件中读入图像
IplImage* img=cvLoadImage(imagename,0);
uchar* src=(uchar*)img->imageData;
int width=img->width;
int height=img->height;
IplImage* dst=cvCreateImage(CvSize(width,height),img->depth,0);
uchar* dst_u=(uchar*)dst->imageData;
gaussian3x3_ref((uchar*)src, (uchar*)dst_u, width,height);
cvShowImage("src", img);
cvShowImage("dst", dst);
waitKey();
return 0;
}
void gaussian3x3_ref(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols)
{
uint i,j;
char s8Kernel[9] = {1,2,1,2,4,2,1,2,1};
uchar sat, ps = 4;
ushort res;
//horizontal
for (j = 0; j < u32Rows; j++)
{
uchar* p_in_u8 = &p_u8Src[j*u32Cols];
uchar* p_out_u8 = &p_u8Dst[j*u32Cols];
for (i = 0; i <u32Cols; i++)
{
uint acc = 0;
acc += p_in_u8[-1 - u32Rows] * s8Kernel[0];
acc += p_in_u8[ 0 - u32Rows] * s8Kernel[1];
acc += p_in_u8[ 1 - u32Rows] * s8Kernel[2];
acc += p_in_u8[-1 ] * s8Kernel[3];
acc += p_in_u8[ 0 ] * s8Kernel[4];
acc += p_in_u8[ 1 ] * s8Kernel[5];
acc += p_in_u8[-1 + u32Rows] * s8Kernel[6];
acc += p_in_u8[ 0 + u32Rows] * s8Kernel[7];
acc += p_in_u8[ 1 + u32Rows] * s8Kernel[8];
res = (acc >> ps) &0xFFFF;
sat = (res>255) ? 255 : (uchar)res;
p_out_u8[i] = sat;
p_in_u8++;
}
}
}
使用VEC-C版本:
#include <opencv2/opencv.hpp>
#include <vec-c.h>
using namespace std;
int hist[256];
ushort p_u16DstB0[256*16];
ushort p_u16DstB1[256*16];
void gaussian3x3(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols);
int main()
{
{
short16 inN2,inN3;
short16 inN,inN1;
short inn[16]={3,0,1,0,1,0,1,0,0,0,0,0,0,0,0};
short in[16]={9,4369,0,3,4,5,6,7,8,9,10,11,12,13,14,15};
inN=*(short16*)inn;
//short16 v0=(short16)vpld(in,inN,inN1,inN2,inN3);
vpld(rel,in,inN,inN2,inN3);
//ushort coeff[16] = { 1, 2, 1, 0, 2, 4, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0 };
//short16 vtemp;
//ushort16 vec_weights0;
//vec_weights0 = vpld(rel, coeff,vtemp);
short p_out_u8[16];
vst(inN2,(short*)p_out_u8,(short)0xffff);
for(int i=0;i<16;i++)
{
cout<<p_out_u8[i]<<endl;
}
cout<<endl;
short p_out_u[16];
vst(inN3,(short*)p_out_u,(short)0xffff);
for(int i=0;i<16;i++)
{
cout<<p_out_u[i]<<endl;
}
getchar();
}
//for(int i=0;i<256;i++)
// hist[i]=0;
//for(int i=0;i<256*16;i++)
// p_u16DstB0[i]=0;
//const char* imagename = "E:\\test\\ce.jpeg";
////从文件中读入图像
//IplImage* img=cvLoadImage(imagename,0);
//uchar* src=(uchar*)img->imageData;
//for(int i=0;i<20;i++)
// src[i]=1;
//
//int width = img->width;//图片宽度
//int height = img->height;//图片高度
//IplImage* dst=cvCreateImage(CvSize(width,height),img->depth,0);
//uchar* dst_u=(uchar*)dst->imageData;
////src[15]=255;
////TODO vec-c progress
//gaussian3x3((uchar*)src,(uchar*)dst_u,width,height);
////显示图像
//cvShowImage("src", img);
//cvShowImage("dst", dst);
////cvSvSaveImage("E:\\test\\cev.jpeg",img);
//
//cv::waitKey();
return 0;
}
void gaussian3x3(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols)
{
ushort coeff[16] = { 1, 2, 1, 0, 2, 4, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0 };
uchar32 v0,v1,v2;
ushort16 v3,v_coeff;
short16 vOff, vtemp;
ushort j,i;
uint u32OutLoop = (u32Rows + 15)>> 4; //每次做16个高斯滤波
uint16 vacc0;
ushort step[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
v_coeff = *(ushort16*)coeff;
vtemp = *(short16*)step; //数组初始化向量
ushort vprRightMask, vprMask;
/* 2个掩码 */
vprMask = 0xffff;//4个f表示全取
vprRightMask = 0xffff;
if (u32OutLoop != u32Cols >> 4)
vprRightMask = (1 << (u32Cols & 15)) - 1;
for (i = 0; i <u32OutLoop; i++)//水平
{
uchar* p_in_u8A = (uchar *)&p_u8Src[-1 - 1 * u32Rows + i * 16];
uchar* p_in_u8B = (uchar *)&p_u8Src[-1 + i * 16];
uchar* p_in_u8C = (uchar *)&p_u8Src[-1 + 1 * u32Rows + i * 16];
uchar* p_out_u8 = (uchar *)&p_u8Dst[i * 16];
if (i == u32OutLoop-1)
vprMask = vprRightMask;
v0 = *(uchar32*)p_in_u8A;
v1 = *(uchar32*)p_in_u8B;
v2 = *(uchar32*)p_in_u8C;
p_in_u8A += 3 * u32Rows;
for (j = 0; j < u32Cols; ++j)
{
/*#define SW_CONFIG(init_psh,num_filter,src_offset,coeff_offset,step,pattern)*/
/*滑窗长度为8,1次移1位,结果为移动的coeff与v0的乘和*/
vacc0 = (uint16) vswmpy5(v0, v0, v_coeff, (uint)0);//对v0滑窗,v0后面那个v0只是凑长度,
/*将滑窗的结果向量与向量vacc0做内和
*d[20:16]=4表示coeff偏移4位
*accumulate相当于vacc1 = (uint16) vswmpy5(v1, v1, v_coeff, (uint)4<<16);vacc0=vintrasum(vacc1,vacc0);*/
vacc0 = vswmac5(accumulate, v1, v1, v_coeff, (uint)4<<16, vacc0);
/*psl表示结果向量使用逻辑移位,也就是每个元素除以2^n,忽略符号
*d[5:0]=4表示shift=4,element要除以2^4
*d[20:16]=8表示coeff偏移8位*/
v3 = (ushort16) vswmac5(psl, v2, v2, v_coeff, (uint)4|8<<16, vacc0);
/*以上3步就一次性做了16位的3X3高斯滤波*/
vst(sat, v3, (uchar16*)p_out_u8, vprMask);
p_out_u8+=u32Rows;
v0 = v1;
v1 = v2;
v2 = *(uchar32*)p_in_u8A;
p_in_u8A += u32Rows;
}//m
}//n
}