Performance example source code from Inside the c++ object model(2)

Compilation environment: gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) The
example is written in section 4.4: "Pointer-to-Member Efficiency" of "Pointer-to-Member Efficiency", does not include multiple inheritance and virtual inheritance

float g_cx = 0.f, g_cy = 0.f, g_cz = 0.f;

class pt3d
{
    
    
public:
	float x;
	float y;
	float z;

public:
	pt3d(float xx = 0.0, float yy = 0.0, float zz = 0.0)
		: x(xx), y(yy), z(zz)
	{
    
    
	}
	virtual inline void cross_product(const pt3d &pA)
	{
    
    
		g_cx += pA.y * z - pA.z * y;
		g_cy += pA.z * x - pA.x * z;
		g_cz += pA.x * y - pA.y * x;
	}
};

struct pt3d_derive : public pt3d
{
    
    
	pt3d_derive(float xx = 0.0, float yy = 0.0, float zz = 0.0)
		: pt3d(xx, yy, zz)
	{
    
    
	}
	virtual inline void cross_product(const pt3d &pA) override
	{
    
    
		g_cx += pA.y * z - pA.z * y;
		g_cy += pA.z * x - pA.x * z;
		g_cz += pA.x * y - pA.y * x;
	}
};

void cross_product(const pt3d &pA, const pt3d &pB)
{
    
    
	g_cx += pA.y * pB.z - pA.z * pB.y;
	g_cy += pA.z * pB.x - pA.x * pB.z;
	g_cz += pA.x * pB.y - pA.y * pB.x;
}

inline void cross_product_inline(const pt3d &pA, const pt3d &pB)
{
    
    
	g_cx += pA.y * pB.z - pA.z * pB.y;
	g_cy += pA.z * pB.x - pA.x * pB.z;
	g_cz += pA.x * pB.y - pA.y * pB.x;
}

constexpr unsigned long long ITERATION_COUNT_FUN_PERFOM = 1024 * 1024 * 1024ULL;
void test_common_function()
{
    
    
	pt3d pA(1.725f, 0.875f, 0.478f);
	pt3d pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		cross_product(pA, pB);
	}
	END_TIMING(0);
	cout << "common:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_common_function()
{
    
    
	pt3d pA(1.725f, 0.875f, 0.478f);
	pt3d pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		cross_product_inline(pA, pB);
	}
	END_TIMING(0);
	cout << "inline common:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_virtual_member_function()
{
    
    
	pt3d *pA = new pt3d_derive(1.725f, 0.875f, 0.478f);
	pt3d *pB = new pt3d_derive(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		pB->cross_product(*pA);
	}
	END_TIMING(0);
	cout << "virtual:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}
void test_inline_member_function()
{
    
    
	pt3d pA(1.725f, 0.875f, 0.478f);
	pt3d pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		pB.cross_product(pA);
	}
	END_TIMING(0);
	cout << "inline member:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}

void test_inline_member_function_derive()
{
    
    
	pt3d_derive pA(1.725f, 0.875f, 0.478f);
	pt3d_derive pB(0.315f, 0.317f, 0.838f);
	g_cx = 0.f;
	g_cy = 0.f;
	g_cz = 0.f;
	START_TIMING(0);
	for (unsigned long long iters = 0; iters < ITERATION_COUNT_FUN_PERFOM; iters++)
	{
    
    
		pB.cross_product(pA);
	}
	END_TIMING(0);
	cout << "inline member derive:" << g_cx << ", " << g_cy << " ," << g_cz << endl;
}

void test_function_call_performance()
{
    
    
	test_inline_common_function();//warm up
	test_inline_common_function();
	test_common_function();
	test_inline_member_function();
	test_inline_member_function_derive();
	test_virtual_member_function();
}

-O2 optimized compilation results:

1.879560 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.784226 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
5.440695 common:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.791872 inline member:1.67772e+07, -3.35544e+07 ,8.38861e+06
1.767321 inline member derive:1.67772e+07, -3.35544e+07 ,8.38861e+06
5.453181 virtual:1.67772e+07, -3.35544e+07 ,8.38861e+06

The result of -O0 compilation:

13.819541 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.965824 inline common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.135956 common:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.531857 inline member:1.67772e+07, -3.35544e+07 ,8.38861e+06
13.435564 inline member derive:1.67772e+07, -3.35544e+07 ,8.38861e+06
15.631393 virtual:1.67772e+07, -3.35544e+07 ,8.38861e+06

in conclusion:

  1. -In the case of O2 optimization, the performance of the ordinary function and the virtual function are the same, and both are three times slower than the inline function
  2. -O0 closes optimization, except for the slow virtual function call, the performance of all other function calls is the same

Guess you like

Origin blog.csdn.net/alpha_007/article/details/115025960