MPI matrix-vector multiplication


foreword

Complete MPI matrix-vector multiplication algorithm, and compile, debug, debug, run, and optimize in a distributed environment.


1. Experimental principle and tips

In order to facilitate the generation of the matrix and the verification of the calculation results, the identity matrix and the vector whose elements are all 1 can be directly generated by the custom function for calculation. Pay special attention to the fact that when passing a two-dimensional array to a function in C language, it can only be passed in the form of a one-dimensional array, and the one-dimensional array is used as a two-dimensional array inside the function.

2. Experimental source code

code show as below:

#include<stdio.h>
#include<mpi.h>
#include<stdlib.h>
void Get_input(int my_rank,int *m,int *n)
{
    
    
	if(my_rank==0){
    
    
		printf("Please enter m,n:\n");
		scanf("%d %d",m,n);
	}
	MPI_Bcast(m,1,MPI_INT,0,MPI_COMM_WORLD);
	MPI_Bcast(n,1,MPI_INT,0,MPI_COMM_WORLD);
}
void Get_matrix(int n, int m, double *local_matrix, int local_m, int my_rank)
{
    
    
    double *A;
    if (!my_rank)
    {
    
    
        A = (double *)malloc(m * n * sizeof(double));
        printf("Please enter the matrix:\n");
        for (int i = 0; i < m; ++i)
            for (int j = 0; j < n; ++j)
                scanf("%lf", &A[i * n + j]);
    }

    MPI_Scatter(A, local_m * n, MPI_DOUBLE, local_matrix, local_m * n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}

void Print_matrix(int my_rank,int n,int m,int local_m,double *local_matrix,MPI_Comm comm)
{
    
    
	double *matrix = NULL;
	int i,j;
	if(my_rank==0)
	{
    
    
		matrix = malloc(m*n*sizeof(double));
		MPI_Gather(local_matrix,local_m*n,MPI_DOUBLE,matrix,local_m*n,MPI_DOUBLE,0,comm);
		printf("The matrix is:\n");
		for(i=0;i<m;++i)
		{
    
    
			for(j=0;j<n;++j)
			{
    
    
				printf("%f ",matrix[i*n+j]);
			}
			printf("\n");
		}
		free(matrix);
	}
	else{
    
    
		MPI_Gather(local_matrix,local_m*n,MPI_DOUBLE,matrix,local_m*n,MPI_DOUBLE,0,comm);
	}
}

void Get_vector(int my_rank,int n,int local_n,double *local_vector,MPI_Comm comm)
{
    
    
	double *vector = NULL;
	int i;
	if(my_rank==0)
	{
    
    
		vector=(double *)malloc(n*sizeof(double));
		printf("Please enter the vector:\n");
		for(i=0;i<n;i++)
		{
    
    
			scanf("%lf",&vector[i]);
		}
	}
	printf("\n");
	MPI_Scatter(vector,local_n,MPI_DOUBLE,local_vector,local_n,MPI_DOUBLE,0,comm);
}

void Print_vector(int my_rank,int n,int local_n,double *local_vector,MPI_Comm comm)
{
    
    
	double *vector = NULL;
	int i,j;
	if(my_rank==0)
	{
    
    
		vector = malloc(n*sizeof(double));
		MPI_Gather(local_vector,local_n,MPI_DOUBLE,vector,local_n,MPI_DOUBLE,0,comm);
		printf("The vector is:\n");
		for(i=0;i<n;i++){
    
    
			printf("%f ",vector[i]);
		}
		printf("\n");
		free(vector);
	}
	else{
    
    
		MPI_Gather(local_vector,local_n,MPI_DOUBLE,vector,local_n,MPI_DOUBLE,0,comm);
	}
}


void Mat_vect_mult(double *local_matrix,double *local_vector,double *local_y,int local_m,int n,int local_n,MPI_Comm comm)
{
    
    
	int local_i,j;
	double *x;
	
	x=malloc(n*sizeof(double));

	MPI_Allgather(local_vector,local_n,MPI_DOUBLE,x,local_n,MPI_DOUBLE,comm);
	
	for(local_i=0;local_i<local_m;local_i++)
	{
    
    
		local_y[local_i]=0.0;
		for(j=0;j<n;j++)
		{
    
    
			local_y[local_i]+=local_matrix[local_i*n+j]*x[j];
		}
	}
	free(x);
}

void Print_y(int my_rank,double *local_y,int m,int local_m,MPI_Comm comm)
{
    
    
	double *y=NULL;
	int i;
	if(my_rank==0){
    
    
		y=malloc(m*sizeof(double));
		MPI_Gather(local_y,local_m,MPI_DOUBLE,y,local_m,MPI_DOUBLE,0,comm);
		printf("The vector y is:\n");
		for(i=0;i<m;i++)
		{
    
    
			printf("%lf ",y[i]);
		}
		printf("\n");
		free(y);
	}
	else{
    
    
		MPI_Gather(local_y,local_m,MPI_DOUBLE,y,local_m,MPI_DOUBLE,0,comm);
	}
}

void main()
{
    
    
	int comm_sz,my_rank,i;
	int m,n,local_m,local_n;
	double *local_matrix,*local_vector;
	double *local_y;
	
	MPI_Init(NULL,NULL);
	MPI_Comm_size(MPI_COMM_WORLD,&comm_sz);
	MPI_Comm_rank(MPI_COMM_WORLD,&my_rank);
	
	Get_input(my_rank,&m,&n);
	local_m=m/comm_sz;
	local_n=n/comm_sz;
	local_matrix=(double *)malloc(local_m*n*sizeof(double));
	local_vector=(double *)malloc(local_n*sizeof(double));
	local_y=(double *)malloc(local_m*sizeof(double));

	Get_matrix(n,m,local_matrix,local_m,my_rank);
	Print_matrix(my_rank,n,m,local_m,local_matrix,MPI_COMM_WORLD);
	Get_vector(my_rank,n,local_n,local_vector,MPI_COMM_WORLD);
	Print_vector(my_rank,n,local_n,local_vector,MPI_COMM_WORLD);
	Mat_vect_mult(local_matrix,local_vector,local_y,local_m,n,local_n,MPI_COMM_WORLD);
	Print_y(my_rank,local_y,m,local_m,MPI_COMM_WORLD);	
	MPI_Finalize();	
}

3. Experimental results

1. The realization of 4*4 matrix-vector multiplication.
insert image description here

4. Problems encountered in the experiment and solutions

Problem 1: The memory of the matrix cannot be dynamically generated
Solution: The basic knowledge of C language is still not well grasped, so learn the function of malloc to solve it.
Question 2: This matrix can only be applied on the basis of equal distribution, that is, the case of divisibility requires further optimization of the program.
Solution: further change the program, and solve it by letting a certain process be responsible for the excess that cannot be divisible.

Summarize

This experiment has gained a lot. Firstly, the matrix is ​​generated and the space is allocated dynamically. Secondly, the multiplication of matrix and vector is realized. It is not difficult to realize the multiplication of matrix and vector. I have learned a lot of parallel computing knowledge and understood the idea of ​​parallel computing, which has benefited a lot.

Guess you like

Origin blog.csdn.net/weixin_51759592/article/details/127574571
Recommended