GPU parallel computing capability ( calculating matrix determinant: the sum of the product of each element of any row and its corresponding algebraic remainder ) serial operation time, openMP parallel operation time
#include<stdlib.h>#include<stdio.h>#include<math.h>#include<vector>#include<time.h>#include<omp.h>#ifdef Matrix_Order#if Matrix_Order <= 10#define Matrix_Order 10#endif#endif#ifndef Matrix_Order#define Matrix_Order 10#endif#ifdef n_threads#if n_threads <= 2#define n_threads 5#endif#endif#ifndef n_threads#define n_threads 5#endif#ifdef NTIMES#if NTIMES <= 2#define NTIMES 5#endif#endif#ifndef NTIMES#define NTIMES 5#endifint p[100][100];voidcreate(){
int i, j;for( i =0; i < Matrix_Order; i++){
for( j =0; j < Matrix_Order; j++){
int a =rand()%15;
p[i][j]= a;}}}voidprint(){
int i, j;for( i =0; i < Matrix_Order; i++){
for( j =0; j < Matrix_Order; j++){
}}}longlongmydet(int p [100][100],int n ){
if( n ==1)return(p[0][0]);else{
longlong sum =0;for(int i =0; i < n; i++){
int pp[100][100];for(int j =1, j1 =0; j < n; j++){
for(int k =0, k1 =0; k < n; k++){
if( k == i );else{
pp[j1][k1]= p[j][k];
k1++;}}
j1++;}if( i %2)
sum +=(-1)* p[0][i]*mydet( pp, n -1);else
sum += p[0][i]*mydet( pp, n -1);}return(sum);}}intmain(){
int k;
FILE *fp;
fp =fopen("/tmp/gpudata","w+");for( k =0; k < NTIMES; k++){
create();
clock_t start_t =clock();mydet( p, Matrix_Order );
clock_t end_t =clock();double serialruning_t =(double)(end_t - start_t)/ CLOCKS_PER_SEC;fprintf(fp,"%.4lf\t", serialruning_t );double start1, finish1;
start1 =omp_get_wtime();longlong sum =0;omp_set_num_threads( n_threads );#pragma omp parallel for reduction(+:sum)for(int i =0; i < Matrix_Order; i++){
int pp[100][100];for(int j =1, j1 =0; j < Matrix_Order; j++){
for(int k =0, k1 =0; k < Matrix_Order; k++){
if( k == i );else{
pp[j1][k1]= p[j][k];
k1++;}}
j1++;}if( i %2)
sum +=(-1)* p[0][i]*mydet( pp, Matrix_Order -1);else
sum += p[0][i]*mydet( pp, Matrix_Order -1);}
finish1 =omp_get_wtime();double openmpruning_t = finish1 - start1;fprintf(fp,"%.4lf\n", openmpruning_t );}fclose( fp );return(0);}
1. OpenMP given MPI
OpenMP is a library for multi-threaded programming of parallel systems with shared memory (multiple cores on a processor, and each core shares a main memory). It supports parallel program development and design. Through high-level instructions, serial Program changed to parallel program
MPI: process level; distributed storage (multiple processors, and can be located in different computers, communicate with remote processors through communication channels); explicit; good scalability
OpenMP and MPI, both of which are parallel program development libraries that have been widely used. The difference between them is: OpenMP is for multi-core processors and uses a parallel method of shared memory, which can be said to be more threaded; MPI is for the case of multiple symmetrical parallel CPUs or cluster servers in the server, and the content sharing method is mixed Yes, more progress.
Two, compile and run in Linux
Default compilation
g++ gpustbench.c -o gpustbench.o -fopenmp
Compilation with parameters (any number of parameters)