VEC-C之滑窗应用案例-3X3高斯滤波器

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sac761/article/details/75043981

一,不使用VEC-C版本:

#include "highgui.h"
#include "opencv2/opencv.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
#include <string>

using namespace cv;
using namespace std;
void gaussian3x3_ref(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols);
int rmain(int argc, char *argv[])
{
	const char* imagename = "E:\\test\\ce.jpeg";
	//从文件中读入图像
	IplImage* img=cvLoadImage(imagename,0);
	uchar* src=(uchar*)img->imageData;
	int width=img->width;
	int height=img->height;
	IplImage* dst=cvCreateImage(CvSize(width,height),img->depth,0);
	uchar* dst_u=(uchar*)dst->imageData;
	
	gaussian3x3_ref((uchar*)src, (uchar*)dst_u, width,height);
	cvShowImage("src", img);
	cvShowImage("dst", dst);
	waitKey();
	return 0;
}
void gaussian3x3_ref(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols)
{					
	uint i,j;
	char s8Kernel[9] = {1,2,1,2,4,2,1,2,1};
	uchar sat, ps = 4; 
	ushort res;

	//horizontal
	for (j = 0; j < u32Rows; j++)
	{		
		uchar* p_in_u8 = &p_u8Src[j*u32Cols];
		uchar* p_out_u8 = &p_u8Dst[j*u32Cols];

		for (i = 0; i <u32Cols; i++)
		{
			uint acc = 0;
			acc +=  p_in_u8[-1 - u32Rows] * s8Kernel[0]; 
			acc +=  p_in_u8[ 0 - u32Rows] * s8Kernel[1];
			acc +=  p_in_u8[ 1 - u32Rows] * s8Kernel[2];
			acc +=  p_in_u8[-1             ] * s8Kernel[3]; 
			acc +=  p_in_u8[ 0             ] * s8Kernel[4];
			acc +=  p_in_u8[ 1             ] * s8Kernel[5];
			acc +=  p_in_u8[-1 + u32Rows] * s8Kernel[6]; 
			acc +=  p_in_u8[ 0 + u32Rows] * s8Kernel[7];
			acc +=  p_in_u8[ 1 + u32Rows] * s8Kernel[8];

			res = (acc >> ps) &0xFFFF;
			sat = (res>255) ? 255 : (uchar)res;
			p_out_u8[i] = sat;
			p_in_u8++;
		}
	}
}
使用VEC-C版本:

#include <opencv2/opencv.hpp>
#include <vec-c.h>
using namespace std;
int hist[256];
ushort p_u16DstB0[256*16];
ushort p_u16DstB1[256*16];

void gaussian3x3(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols);

int main()
{


	{
		short16 inN2,inN3;
		short16 inN,inN1;
		short inn[16]={3,0,1,0,1,0,1,0,0,0,0,0,0,0,0};
		short in[16]={9,4369,0,3,4,5,6,7,8,9,10,11,12,13,14,15};
		inN=*(short16*)inn;
		//short16 v0=(short16)vpld(in,inN,inN1,inN2,inN3);
		vpld(rel,in,inN,inN2,inN3);
		//ushort coeff[16] = { 1, 2, 1, 0, 2, 4, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0 };
		//short16 vtemp; 
		//ushort16 vec_weights0;
		//vec_weights0 = vpld(rel, coeff,vtemp);
		short p_out_u8[16];
		vst(inN2,(short*)p_out_u8,(short)0xffff);
		for(int i=0;i<16;i++)
		{
			cout<<p_out_u8[i]<<endl;
		}
		cout<<endl;
		short p_out_u[16];
		vst(inN3,(short*)p_out_u,(short)0xffff);
		for(int i=0;i<16;i++)
		{
			cout<<p_out_u[i]<<endl;
		}
		
		getchar();
	}

	//for(int i=0;i<256;i++)
	//	hist[i]=0;
	//for(int i=0;i<256*16;i++)
	//	p_u16DstB0[i]=0;
	//const char* imagename = "E:\\test\\ce.jpeg";
	////从文件中读入图像
	//IplImage* img=cvLoadImage(imagename,0);
	//uchar* src=(uchar*)img->imageData;
	//for(int i=0;i<20;i++)
	//	src[i]=1;
	//
	//int width = img->width;//图片宽度
	//int height = img->height;//图片高度

	//IplImage* dst=cvCreateImage(CvSize(width,height),img->depth,0);
	//uchar* dst_u=(uchar*)dst->imageData;
	////src[15]=255;
	////TODO vec-c progress
	//gaussian3x3((uchar*)src,(uchar*)dst_u,width,height);

	////显示图像
	//cvShowImage("src", img);
	//cvShowImage("dst", dst);
	////cvSvSaveImage("E:\\test\\cev.jpeg",img);
	//
	//cv::waitKey();
	
	return 0;
}
void gaussian3x3(uchar *p_u8Src, uchar *p_u8Dst, uint u32Rows, uint u32Cols)
{
	ushort coeff[16] = { 1, 2, 1, 0, 2, 4, 2, 0, 1, 2, 1, 0, 0, 0, 0, 0 };

	uchar32 v0,v1,v2;
	ushort16 v3,v_coeff;
	short16 vOff, vtemp; 
	ushort j,i;
	uint u32OutLoop    = (u32Rows + 15)>> 4; //每次做16个高斯滤波
	uint16 vacc0;
	ushort step[16]  = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
	v_coeff = *(ushort16*)coeff;
	vtemp = *(short16*)step; //数组初始化向量
	ushort vprRightMask, vprMask;

	/* 2个掩码 */
	vprMask = 0xffff;//4个f表示全取
	vprRightMask = 0xffff;
	if (u32OutLoop != u32Cols >> 4)
		vprRightMask = (1 << (u32Cols & 15)) - 1;

	for (i = 0; i <u32OutLoop; i++)//水平
	{	
		uchar* p_in_u8A = (uchar *)&p_u8Src[-1 - 1 * u32Rows + i * 16];
		uchar* p_in_u8B = (uchar *)&p_u8Src[-1 + i * 16];
		uchar* p_in_u8C = (uchar *)&p_u8Src[-1 + 1 * u32Rows + i * 16];
		uchar* p_out_u8 = (uchar *)&p_u8Dst[i * 16];

		if (i == u32OutLoop-1)
			vprMask = vprRightMask;
		v0 = *(uchar32*)p_in_u8A;
		v1 = *(uchar32*)p_in_u8B;
		v2 = *(uchar32*)p_in_u8C;
		p_in_u8A += 3 * u32Rows;

		for (j = 0; j < u32Cols; ++j)	
		{
			/*#define SW_CONFIG(init_psh,num_filter,src_offset,coeff_offset,step,pattern)*/
			
			/*滑窗长度为8,1次移1位,结果为移动的coeff与v0的乘和*/
			vacc0 = (uint16) vswmpy5(v0, v0, v_coeff, (uint)0);//对v0滑窗,v0后面那个v0只是凑长度,
			/*将滑窗的结果向量与向量vacc0做内和
			*d[20:16]=4表示coeff偏移4位
			*accumulate相当于vacc1 = (uint16) vswmpy5(v1, v1, v_coeff, (uint)4<<16);vacc0=vintrasum(vacc1,vacc0);*/
			vacc0 = vswmac5(accumulate, v1, v1, v_coeff, (uint)4<<16, vacc0);
			/*psl表示结果向量使用逻辑移位,也就是每个元素除以2^n,忽略符号
			*d[5:0]=4表示shift=4,element要除以2^4	
			*d[20:16]=8表示coeff偏移8位*/
			v3   = (ushort16) vswmac5(psl, v2, v2, v_coeff, (uint)4|8<<16, vacc0);
			/*以上3步就一次性做了16位的3X3高斯滤波*/

			vst(sat, v3, (uchar16*)p_out_u8, vprMask);

			p_out_u8+=u32Rows;
			v0 = v1;
			v1 = v2;
			v2 = *(uchar32*)p_in_u8A;
			p_in_u8A += u32Rows;
		}//m
	}//n	
}



猜你喜欢

转载自blog.csdn.net/sac761/article/details/75043981