#define WIN
#include <Windows.h>
#include <intrin.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
using namespace std;
#define N 4*100000 // 注意:必须是4的倍数,否则使用SSE指令计算,要进行一些处理,从而保证正确。
_MM_ALIGN16 float op1[N];
_MM_ALIGN16 float op2[N];
_MM_ALIGN16 float result1[N];
_MM_ALIGN16 float result2[N];
void init()
{
for (int i = 0; i < N; i++)
{
op1[i] = (float)rand() / (float)RAND_MAX;
op2[i] = (float)rand() / (float)RAND_MAX;
}
}
void checkResult(int debug)
{
bool isSame = true;
for (int i = 0; i < N; i++)
{
if (debug)
{
cout << "result1:" << result1[i] << "result2:" << result2[i] << endl;
}
else
{
if (fabs(result1[i] - result2[i]) > 0.000001)
{
isSame = false;
break;
}
}
}
if (!debug) {
if (isSame)
printf("Result is Same\n");
else
printf("Result is not same\n");
}
}
void add1()
{
for (int i = 0; i < N; i++)
result1[i] = op1[i] + op2[i];
}
void add2()
{
__m128 a;
__m128 b;
__m128 c;
for (int i = 0; i < N; i = i + 4)
{
// Load
a = _mm_load_ps(op1 + i);
b = _mm_load_ps(op2 + i);
c = _mm_add_ps(a, b); // c = a + b
_mm_store_ps(result2 + i, c);
}
}
int main()
{
init();
_LARGE_INTEGER time_start; /*开始时间*/
_LARGE_INTEGER time_over; /*结束时间*/
double dqFreq; /*计时器频率*/
LARGE_INTEGER f; /*计时器频率*/
QueryPerformanceFrequency(&f);
dqFreq = (double)f.QuadPart;
QueryPerformanceCounter(&time_start);
//---
printf("Add a vector:\n");
add1();
//
QueryPerformanceCounter(&time_over);
cout << ((time_over.QuadPart - time_start.QuadPart) / dqFreq) << endl;//单位为秒,精度为1000 000/(cpu主频)微秒
printf("\n");
_LARGE_INTEGER time_start1; /*开始时间*/
_LARGE_INTEGER time_over1; /*结束时间*/
double dqFreq1; /*计时器频率*/
LARGE_INTEGER f1; /*计时器频率*/
QueryPerformanceFrequency(&f1);
dqFreq1 = (double)f.QuadPart;
QueryPerformanceCounter(&time_start1);
printf("Add a vector with SSE instructions:\n");
add2();
QueryPerformanceCounter(&time_over1);
cout << ((time_over1.QuadPart - time_start1.QuadPart) / dqFreq1) << endl;
printf("\n");
checkResult(0);
return 0;
}
加速指令集和计时的应用
猜你喜欢
转载自blog.csdn.net/u014413083/article/details/53558036
今日推荐
周排行