Optlab, 利用csapp里面的优化技术提高程序效率

Optlab

实验目标

优化程序运行效率

大体分为:

  • 有条件:1个整数加法,2个浮点数乘法、加法
  • 无条件:1个整数加法,2个浮点数乘法、加法

设计思路

一、结合书上内容的尝试

代码移动

将循环中多次使用的数据用变量保存,尽可能使其存储在寄存器中

		int rows = lineorder_table_info.rows;

减少过程调用

将一些常用的指针,在循环外赋值给临时变量,方便从寄存器中使用

		int * LO_QUANTITY = lineorder_table_info.table->lo_quantity;
    int * LO_EXTENDEDPRICE = lineorder_table_info.table -> lo_extendedprice;
    int * ORDER_DATE = lineorder_table_info.table -> lo_orderdate;
    double * LO_DISCOUNT = lineorder_table_info.table -> lo_discount;
    double * LO_TAX = lineorder_table_info.table -> lo_tax;

消除内存引用

原代码本身已经采用该策略,将求和结果保存在变量中而非采用指针不断引用内存

	unsigned int quantity_sum = 0;
	double discount_total_price = 0;
	double tax_discount_total_price = 0;
	unsigned int quantity_sum_with_condition = 0;
	double discount_total_price_with_condition = 0;
	double tax_discount_total_price_with_condition = 0;

以下为只使用了上述3种优化方法的代码

	int rows = lineorder_table_info.rows;
    int * LO_QUANTITY = lineorder_table_info.table->lo_quantity;
    int * LO_EXTENDEDPRICE = lineorder_table_info.table -> lo_extendedprice;
    int * ORDER_DATE = lineorder_table_info.table -> lo_orderdate;
    double * LO_DISCOUNT = lineorder_table_info.table -> lo_discount;
    double * LO_TAX = lineorder_table_info.table -> lo_tax;
	for (int i = 0; i < rows; ++i) {

		quantity_sum = quantity_sum + LO_QUANTITY[i];
		discount_total_price = discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
		tax_discount_total_price = tax_discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
								   (1 + LO_TAX[i] );

		if (ORDER_DATE[i] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
		}

		if (ORDER_DATE[i] <= limit_orderdate) {
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
		}

		if (ORDER_DATE[i] <= limit_orderdate) {
			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
		}
	}

下图为该程序运行的结果

下图为原始query.cpp在服务器上的运行结果

​ 在学校的服务器上运行的结果,使用的测试数据是自己生成后上传到服务器的

循环展开

由于不同程度的循环展开,并不是展开次数越多效率越好,我尝试了2✖️1,3✖️1,4✖️1的循环展开

从最终结果来看,3✖️1的循环展开效果最好

  • 2✖️1
	int rows = lineorder_table_info.rows;
    int limit = rows - 2;
    int * LO_QUANTITY = lineorder_table_info.table->lo_quantity;
    int * LO_EXTENDEDPRICE = lineorder_table_info.table -> lo_extendedprice;
    int * ORDER_DATE = lineorder_table_info.table -> lo_orderdate;
    double * LO_DISCOUNT = lineorder_table_info.table -> lo_discount;
    double * LO_TAX = lineorder_table_info.table -> lo_tax;
	for (int i = 0; i < limit; i = i + 2) {

		quantity_sum = quantity_sum + LO_QUANTITY[i] + LO_QUANTITY[i+1] ;//modified
		discount_total_price = discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) + 
        LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) ; // modified
		tax_discount_total_price = tax_discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
		(1 + LO_TAX[i] ) + LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) * (1 + LO_TAX[i+1]);//modified

		if (ORDER_DATE[i] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
		}

        if (ORDER_DATE[i+1] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i+1];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) * (1 + LO_TAX[i+1] );
		}
        
        
    }
    for(int i = limit;i < rows;i++)
    {

		quantity_sum = quantity_sum + LO_QUANTITY[i];
		discount_total_price = discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
		tax_discount_total_price = tax_discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
								   (1 + LO_TAX[i] );

		
		if (ORDER_DATE[i] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
		}

        
    }
    

2✖️1的运行结果

​ 可以看出比之前原代码和不使用循环展开的优化的程序效率更高

  • 3✖️1
int rows = lineorder_table_info.rows;
    int limit = rows - 2;
    int * LO_QUANTITY = lineorder_table_info.table->lo_quantity;
    int * LO_EXTENDEDPRICE = lineorder_table_info.table -> lo_extendedprice;
    int * ORDER_DATE = lineorder_table_info.table -> lo_orderdate;
    double * LO_DISCOUNT = lineorder_table_info.table -> lo_discount;
    double * LO_TAX = lineorder_table_info.table -> lo_tax;
	for (int i = 0; i < limit; i = i + 3) {

		quantity_sum = quantity_sum + LO_QUANTITY[i] + ( LO_QUANTITY[i+1] + LO_QUANTITY[i+2] );//modified
		discount_total_price = discount_total_price +  LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) + 
         LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) + LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]) ; // modified
		tax_discount_total_price = tax_discount_total_price +  LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
		(1 + LO_TAX[i] ) +  LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) * (1 + LO_TAX[i+1])
        + LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]) * (1 + LO_TAX[i+2]) ;//modified

		if (ORDER_DATE[i] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
		}

        if (ORDER_DATE[i+1] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i+1];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) * (1 + LO_TAX[i+1] );
		}
        
        if (ORDER_DATE[i+2] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i+2];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]) * (1 + LO_TAX[i+2] );
		}
    }
    for(int i = limit;i < rows;i++)
    {

		quantity_sum = quantity_sum + LO_QUANTITY[i];
		discount_total_price = discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
		tax_discount_total_price = tax_discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
								   (1 + LO_TAX[i] );

		
		if (ORDER_DATE[i] <= limit_orderdate) {
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
		
			discount_total_price_with_condition = discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);

			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
		}

        
    }

下图为3✖️1循环展开的结果

​ 可以看出3✖️1的循环展开是目前效果最好的优化

  • 4✖️1

    int rows = lineorder_table_info.rows;
        int limit = rows - 3;
        int * LO_QUANTITY = lineorder_table_info.table->lo_quantity;
        int * LO_EXTENDEDPRICE = lineorder_table_info.table -> lo_extendedprice;
        int * ORDER_DATE = lineorder_table_info.table -> lo_orderdate;
        double * LO_DISCOUNT = lineorder_table_info.table -> lo_discount;
        double * LO_TAX = lineorder_table_info.table -> lo_tax;
    	for (int i = 0; i < limit; i = i + 4) {
    
    		quantity_sum = quantity_sum + LO_QUANTITY[i] ;
            t1 +=   LO_QUANTITY[i+1];
            t2 +=   LO_QUANTITY[i+2] ;
            t3 +=   LO_QUANTITY[i+3];//modified
    		discount_total_price = discount_total_price +  LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]); 
            te1 += LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]);
            te2 += LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]);
            te3 += LO_EXTENDEDPRICE[i+3] * (1 - LO_DISCOUNT[i+3]); // modified
    		tax_discount_total_price = tax_discount_total_price +  LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
    		(1 + LO_TAX[i] ) +  LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) * (1 + LO_TAX[i+1])
            + LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]) * (1 + LO_TAX[i+2]) 
            + LO_EXTENDEDPRICE[i+3] * (1 - LO_DISCOUNT[i+3]) * (1 + LO_TAX[i+3]);//modified
    
    		if (ORDER_DATE[i] <= limit_orderdate) {
    			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
    		
    			discount_total_price_with_condition = discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
    
    			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
    		}
    
            if (ORDER_DATE[i+1] <= limit_orderdate) {
    			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i+1];
    		
    			discount_total_price_with_condition = discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]);
    
    			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i+1] * (1 - LO_DISCOUNT[i+1]) * (1 + LO_TAX[i+1] );
    		}
            
            if (ORDER_DATE[i+2] <= limit_orderdate) {
    			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i+2];
    		
    			discount_total_price_with_condition = discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]);
    
    			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i+2] * (1 - LO_DISCOUNT[i+2]) * (1 + LO_TAX[i+2] );
    		}
            if (ORDER_DATE[i+3] <= limit_orderdate) {
    			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i+3];
    		
    			discount_total_price_with_condition = discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i+3] * (1 - LO_DISCOUNT[i+3]);
    
    			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i+3] * (1 - LO_DISCOUNT[i+3]) * (1 + LO_TAX[i+3] );
    		}
        }
        for(int i = limit;i < rows;i++)
        {
    
    		quantity_sum = quantity_sum + LO_QUANTITY[i];
    		discount_total_price = discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
    		tax_discount_total_price = tax_discount_total_price + LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * 
    								   (1 + LO_TAX[i] );
    
    		
    		if (ORDER_DATE[i] <= limit_orderdate) {
    			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
    		
    			discount_total_price_with_condition = discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]);
    
    			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition 
    			+ LO_EXTENDEDPRICE[i] * (1 - LO_DISCOUNT[i]) * (1 + LO_TAX[i] );
    		}
    
            
        }
        quantity_sum = quantity_sum + t1 + t2 + t3;
        discount_total_price = discount_total_price+ te1 + te2 + te3;
    
    

    可以看出4✖️1的优化效果不如3✖️1

多个累积变量

  • 对于double型数据来说,采用多个累积变量会导致数据在舍入以后,导致结果和顺序计算不同,产生了会扣16分的严重后果!所以不能采用多个累积变量的方法

  • 在实际实验以后,多个累积变量也没有对运行时间有明显帮助

    这是将4✖️1变成4✖️4以后的结果

重新结合变换

  • 这个方法对于double类型数据来说也是一样,重新结合变换会导致导致答案错误
  • 应用到int类型的数据上,并没有对于结果有什么明显区别

总结

综上所述,在书上提到的优化方法综合以后,使用代码移动、减少过程调用、消除内存引用以后,再进行3✖️1的循环展开后,得到的结果是最优结果


二、结合SIMD的尝试

#include <cstdio>
#include "dataload.h"
#include <immintrin.h>
#include <emmintrin.h>
#define limit_orderdate 19950630
#define block 12

const char lineorder_name[] = "lineorder.tbl";

static __inline__ uint64_t curtick() {
	uint64_t tick;
	unsigned long lo,hi;
	__asm__ __volatile__ ("rdtsc":"=a"(lo),"=d"(hi));
	tick = (uint64_t) hi << 32 | lo;
	return tick;
}

static __inline__ void startTimer(uint64_t *t) {
	(*t) = curtick();
}

static __inline__ void stopTimer(uint64_t *t) {
	(*t) = curtick() - *t;
}


int main() {
	table_info lineorder_table_info;
	FILE * lineorder_file;
	
	//load lineorder table from file
	lineorder_file = fopen(lineorder_name,"r");	
	loadTable(lineorder_file, &lineorder_table_info);

	unsigned int quantity_sum = 0;
	double discount_total_price = 0;
	double tax_discount_total_price = 0;
	unsigned int quantity_sum_with_condition = 0;
	double discount_total_price_with_condition = 0;
	double tax_discount_total_price_with_condition = 0;
	
	uint64_t beg;
	startTimer(&beg);
	
	//you should editor the following the part to accelerate the calculation
	/*--------------------------------*/
	
	int rows = lineorder_table_info.rows;
    int * LO_QUANTITY = lineorder_table_info.table->lo_quantity;
    int * LO_EXTENDEDPRICE = lineorder_table_info.table -> lo_extendedprice;
    int * ORDER_DATE = lineorder_table_info.table -> lo_orderdate;
    double * LO_DISCOUNT = lineorder_table_info.table -> lo_discount;
    double * LO_TAX = lineorder_table_info.table -> lo_tax;
	
    int cycle = rows;
    int residule = cycle % block;
    cycle = cycle - residule;

    __m128i quantity_sum_0={0,0},quantity_sum_1={0,0},quantity_sum_2={0,0};
	__m128i int_load_0,int_load_1,int_load_2;
	__m256d discount_sum_0=_mm256_setzero_pd(),discount_sum_1=_mm256_setzero_pd(),discount_sum_2=_mm256_setzero_pd();
	__m256d tax_sum_0=_mm256_setzero_pd(),tax_sum_1=_mm256_setzero_pd(),tax_sum_2=_mm256_setzero_pd();
	__m256d double_load_0,double_load_1,double_load_2;
	__m256d double_load_3,double_load_4,double_load_5;

    for(int i = 0;i < cycle; i+=block)
    {
        for(int j = i; j < i+block;j++)
        {
            if(ORDER_DATE[j] <= limit_orderdate)
            {
                quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[j];
                discount_total_price_with_condition = discount_total_price_with_condition + LO_EXTENDEDPRICE[j]*(1-LO_DISCOUNT[j]);
                tax_discount_total_price_with_condition = tax_discount_total_price_with_condition  + LO_EXTENDEDPRICE[j]*(1 - LO_DISCOUNT[j])*(1 + LO_TAX[j]);
            }
            LO_DISCOUNT[j] = LO_EXTENDEDPRICE[j] * (1 - LO_DISCOUNT[j]);
            LO_TAX[j] = 1 + LO_TAX[j];
        }

        int_load_0=_mm_load_si128((__m128i*)(LO_QUANTITY+i));
        int_load_1=_mm_load_si128((__m128i*)(LO_QUANTITY+i+4));
        int_load_2=_mm_load_si128((__m128i*)(LO_QUANTITY+i+8));
        quantity_sum_0=_mm_add_epi32(int_load_0,quantity_sum_0);
		quantity_sum_1=_mm_add_epi32(int_load_1,quantity_sum_1);
		quantity_sum_2=_mm_add_epi32(int_load_2,quantity_sum_2);

        double_load_0=_mm256_loadu_pd(LO_DISCOUNT+i);
        double_load_1=_mm256_loadu_pd(LO_DISCOUNT+i+4);
        double_load_2=_mm256_loadu_pd(LO_DISCOUNT+i+8);
        discount_sum_0=_mm256_add_pd(discount_sum_0,double_load_0);
		discount_sum_1=_mm256_add_pd(discount_sum_1,double_load_1);
		discount_sum_2=_mm256_add_pd(discount_sum_2,double_load_2);

        double_load_3=_mm256_loadu_pd(LO_TAX+i);
		double_load_4=_mm256_loadu_pd(LO_TAX+i+4);
		double_load_5=_mm256_loadu_pd(LO_TAX+i+8);
		double_load_3=_mm256_mul_pd(double_load_0,double_load_3);
		double_load_4=_mm256_mul_pd(double_load_1,double_load_4);
		double_load_5=_mm256_mul_pd(double_load_2,double_load_5);
		tax_sum_0=_mm256_add_pd(tax_sum_0,double_load_3);
		tax_sum_1=_mm256_add_pd(tax_sum_1,double_load_4);
		tax_sum_2=_mm256_add_pd(tax_sum_2,double_load_5);
    }

    int* quantity_sum_a=(int*)&quantity_sum_0;
	int* quantity_sum_b=(int*)&quantity_sum_1;
	int* quantity_sum_c=(int*)&quantity_sum_2;
	quantity_sum=quantity_sum+(quantity_sum_a[0]+quantity_sum_a[1]+quantity_sum_a[2]+quantity_sum_a[3]);
	quantity_sum=quantity_sum+(quantity_sum_b[0]+quantity_sum_b[1]+quantity_sum_b[2]+quantity_sum_b[3]);
	quantity_sum=quantity_sum+(quantity_sum_c[0]+quantity_sum_c[1]+quantity_sum_c[2]+quantity_sum_c[3]);

	double* discount_sum_a=(double*)&discount_sum_0;
	double* discount_sum_b=(double*)&discount_sum_1;
	double* discount_sum_c=(double*)&discount_sum_2;
	discount_total_price=discount_total_price+(discount_sum_a[0]+discount_sum_a[1]+discount_sum_a[2]+discount_sum_a[3]);
	discount_total_price=discount_total_price+(discount_sum_b[0]+discount_sum_b[1]+discount_sum_b[2]+discount_sum_b[3]);
	discount_total_price=discount_total_price+(discount_sum_c[0]+discount_sum_c[1]+discount_sum_c[2]+discount_sum_c[3]);

	double* tax_sum_a=(double*)&tax_sum_0;
	double* tax_sum_b=(double*)&tax_sum_1;
	double* tax_sum_c=(double*)&tax_sum_2;
	tax_discount_total_price=tax_discount_total_price+(tax_sum_a[0]+tax_sum_a[1]+tax_sum_a[2]+tax_sum_a[3]);
	tax_discount_total_price=tax_discount_total_price+(tax_sum_b[0]+tax_sum_b[1]+tax_sum_b[2]+tax_sum_b[3]);
	tax_discount_total_price=tax_discount_total_price+(tax_sum_c[0]+tax_sum_c[1]+tax_sum_c[2]+tax_sum_c[3]);

    for(int i=cycle;i<cycle+residule;i++)
	{
		double temp=LO_EXTENDEDPRICE[i]*(1-LO_DISCOUNT[i]);
		quantity_sum = quantity_sum + LO_QUANTITY[i];
		discount_total_price = discount_total_price + temp;
		tax_discount_total_price=tax_discount_total_price+temp*(1+LO_TAX[i]);
		
		if (ORDER_DATE[i]<=limit_orderdate)
		{
			quantity_sum_with_condition = quantity_sum_with_condition + LO_QUANTITY[i];
			discount_total_price_with_condition = discount_total_price_with_condition + temp;
			tax_discount_total_price_with_condition = tax_discount_total_price_with_condition + temp*(1+LO_TAX[i]);
		}
	}
	/*--------------------------------*/
	
	
	stopTimer(&beg); 

	//output
	printf("%d\n",quantity_sum);
	printf("%0.6lf\n",discount_total_price);
	printf("%0.6lf\n",tax_discount_total_price);
	printf("%d\n",quantity_sum_with_condition);
	printf("%0.6lf\n",discount_total_price_with_condition);
	printf("%0.6lf\n",tax_discount_total_price_with_condition);
	printf("running time is %ld\n", (long)(beg));
}

用SIMD技术优化以后跑得更慢了…

发布了10 篇原创文章 · 获赞 2 · 访问量 130

猜你喜欢

转载自blog.csdn.net/weixin_44110392/article/details/104119393