Performance example source code from Inside the c++ object model(2)

编译环境:gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)
例子按4.4节所写:"指向 Member Function之指针"的效率(Pointer-to-Member Efficiency),未包括多继承和虚拟继承

float g_cx = 0.f, g_cy = 0.f, g_cz = 0.f;

class pt3d
{
    
    
public:
	float x;
	float y;
	float z;

public:
	pt3d(float xx = 0.0, float yy = 0.0, float zz = 0.0)
		: x(xx), y(yy), z(zz)
	{
    
    
	}
	virtual inline void cross_product(const pt3d &pA)
	{
    
    
		g_cx += pA.y * z - pA.z * y;
		g_cy += pA.z * x - pA.x * z;
		g_cz += pA.x * y - pA.y * x;
	}
};

struct pt3d_derive : public pt3d
{
    
    
	pt3d_derive(float xx = 0.0, float yy = 0.0, float zz = 0.0)
		: pt3d(xx, yy, zz)
	{
    
    
	}
	virtual inline void cross_product(const pt3d &pA) override
	{
    
    
		g_cx += pA.y * z - pA.z * y;
		g_cy += pA.z * x - pA.x * z;
		g_cz += pA.x * y - pA.y * x;
	}
};

void cross_product(const pt3d &pA, const pt3d &pB)
{
    
    
	g_cx += pA.y * pB.z - pA.z * pB.y;
	g_cy += pA.z * pB.x - pA.x * pB.z;
	g_cz += pA.x * pB.y - pA.y * pB.x;
}

inline void cross_product_inline(const pt3d &pA, const pt3d &pB)
{
    
    
	g_cx += pA.y * pB.z - pA.z * pB.y;
	g_cy += pA.z * pB.x - pA.x * pB.z;
	g_cz += pA.x * pB.y - pA.y * pB.x;
}

constexpr unsigned long long ITERATION_COUNT_FUN_PERFOM = 1024 * 1024 * 1024ULL;
void test_common_function()
{
    
    
	pt3d pA(1.725f, 0.875f, 0.478f);
	pt3d pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		cross_product(pA, pB);
	}
	END_TIMING(0);
	cout << "common:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_common_function()
{
    
    
	pt3d pA(1.725f, 0.875f, 0.478f);
	pt3d pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		cross_product_inline(pA, pB);
	}
	END_TIMING(0);
	cout << "inline common:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_virtual_member_function()
{
    
    
	pt3d *pA = new pt3d_derive(1.725f, 0.875f, 0.478f);
	pt3d *pB = new pt3d_derive(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		pB->cross_product(*pA);
	}
	END_TIMING(0);
	cout << "virtual:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_member_function()
{
    
    
	pt3d pA(1.725f, 0.875f, 0.478f);
	pt3d pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		pB.cross_product(pA);
	}
	END_TIMING(0);
	cout << "inline member:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}

void test_inline_member_function_derive()
{
    
    
	pt3d_derive pA(1.725f, 0.875f, 0.478f);
	pt3d_derive pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		pB.cross_product(pA);
	}
	END_TIMING(0);
	cout << "inline member derive:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}

void test_function_call_performance()
{
    
    
	test_inline_common_function();//warm up
	test_inline_common_function();
	test_common_function();
	test_inline_member_function();
	test_inline_member_function_derive();
	test_virtual_member_function();
}

-O2优化编译下的结果:

1.879560 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.784226 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
5.440695 common:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.791872 inline member:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.767321 inline member derive:1.67772e+07, -3.35544e+07 ,8.38861e+06
5.453181 virtual:1.67772e+07, -3.35544e+07 ,8.38861e+06

-O0编译下的结果:

13.819541 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.965824 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.135956 common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.531857 inline member:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.435564 inline member derive:1.67772e+07, -3.35544e+07 ,8.38861e+06
15.631393 virtual:1.67772e+07, -3.35544e+07 ,8.38861e+06

结论:

  1. -O2优化情况下,普通函数与虚函数的性能一致,均慢于inline函数的三倍
  2. -O0关闭优化下,除虚函数调用慢以外,其它所有函数调用性能一致

猜你喜欢

转载自blog.csdn.net/alpha_007/article/details/115025960