加速指令集和计时的应用

#define WIN  
#include <Windows.h>
#include <intrin.h>  
#include <stdlib.h>  
#include <math.h>  
#include <iostream>
using namespace std;
#define N 4*100000      // 注意:必须是4的倍数,否则使用SSE指令计算,要进行一些处理,从而保证正确。  
_MM_ALIGN16 float op1[N];
_MM_ALIGN16 float op2[N];
_MM_ALIGN16 float result1[N];
_MM_ALIGN16 float result2[N];

void init()
{
    for (int i = 0; i < N; i++)
    {
        op1[i] = (float)rand() / (float)RAND_MAX;
        op2[i] = (float)rand() / (float)RAND_MAX;
    }
}

void checkResult(int debug)
{
    bool isSame = true;
    for (int i = 0; i < N; i++)
    {
        if (debug)
        {
            cout << "result1:" << result1[i] << "result2:" << result2[i] << endl;
        }
        else
        {
            if (fabs(result1[i] - result2[i]) > 0.000001)
            {
                isSame = false;
                break;
            }
        }
    }
    if (!debug) {
        if (isSame)
            printf("Result is Same\n");
        else
            printf("Result is not same\n");
    }
}

void add1()
{
    for (int i = 0; i < N; i++)
        result1[i] = op1[i] + op2[i];
}

void add2()
{
    __m128  a;
    __m128  b;
    __m128  c;

    for (int i = 0; i < N; i = i + 4)
    {
        // Load  
        a = _mm_load_ps(op1 + i);
        b = _mm_load_ps(op2 + i);

        c = _mm_add_ps(a, b);   // c = a + b  

        _mm_store_ps(result2 + i, c);
    }
}

int main()
{
    init();

    _LARGE_INTEGER time_start;    /*开始时间*/
    _LARGE_INTEGER time_over;        /*结束时间*/
    double dqFreq;                /*计时器频率*/
    LARGE_INTEGER f;            /*计时器频率*/
    QueryPerformanceFrequency(&f);
    dqFreq = (double)f.QuadPart;
    QueryPerformanceCounter(&time_start);
    //---
    printf("Add a vector:\n");
    add1();
    //
    QueryPerformanceCounter(&time_over);
    cout << ((time_over.QuadPart - time_start.QuadPart) / dqFreq) << endl;//单位为秒,精度为1000 000/(cpu主频)微秒

    printf("\n");
    _LARGE_INTEGER time_start1;    /*开始时间*/
    _LARGE_INTEGER time_over1;        /*结束时间*/
    double dqFreq1;                /*计时器频率*/
    LARGE_INTEGER f1;            /*计时器频率*/
    QueryPerformanceFrequency(&f1);
    dqFreq1 = (double)f.QuadPart;
    QueryPerformanceCounter(&time_start1);
    printf("Add a vector with SSE instructions:\n");
    add2();
    QueryPerformanceCounter(&time_over1);
    cout << ((time_over1.QuadPart - time_start1.QuadPart) / dqFreq1) << endl;

    printf("\n");
    checkResult(0);

    return 0;
}

猜你喜欢

转载自blog.csdn.net/u014413083/article/details/53558036
今日推荐