[并行与分布式程序设计] Jacobi迭代数据并行

#include <iostream>
#include <mpi.h>
#include <sys/time.h>
#include <vector>
using namespace std;

void mpi_jacobi(vector<float>*, vector<float>*, int rank,
                int size, int n, int my_n, int steps, MPI_Status status);
void init_matrix(vector<float>*, int);
void show_matrix(vector<float>*, int);
int main(int arg, char* argv[]) {
    int rank, size;
    int my_n;
    struct timeval start, end;
    long total_time;
    MPI_Status status;
    int n = 4;
    int steps = 2;
    vector<float> A[n];
    vector<float> B[n];
    init_matrix(A, n);
    init_matrix(B, n);
    MPI_Init(&arg, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    if (rank == 0) {
        // 保证整除
        show_matrix(A, n);
        gettimeofday(&start, nullptr);
        my_n = n / size;
        for (int i = 1; i < size; i++)
            MPI_Send(&A[i * my_n][0], n * my_n, MPI_FLOAT, i, 0, MPI_COMM_WORLD);
        mpi_jacobi(A, B, rank, size, n, my_n, steps, status);
        for (int i = 1; i < size; i++)
            MPI_Recv(&A[i * my_n][0], n * my_n, MPI_FLOAT, i, 1, MPI_COMM_WORLD, &status);
        gettimeofday(&end, nullptr);
        total_time = (1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec)/1000;
        printf("total time: %ld\n", total_time);
        show_matrix(A, n);
    } else {
        my_n = n / size;
        // 所有进程负责的部分都被移到了前my_n排, 之后两层loop循环的时候, 都是在改变前my_n行的值
//        MPI_Recv(&A[1][0], n * my_n, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &status);
        MPI_Recv(&A[rank * my_n][0], n * my_n, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, &status);
        mpi_jacobi(A, B, rank, size, n, my_n, steps, status);
//        MPI_Send(&A[1][0], n * my_n, MPI_FLOAT, 0, 1, MPI_COMM_WORLD);
        MPI_Send(&A[rank * my_n][0], n * my_n, MPI_FLOAT, 0, 1, MPI_COMM_WORLD);
    }
    MPI_Finalize();
    return 0;
}

// A 和 B 是两个相同的矩阵, B相当于一个临时存储的x作用, 不然如果只在一个矩阵上进行迭代, 每次计算都会影响后一次的结果
void mpi_jacobi(vector<float>* A, vector<float>* B, int rank,
                int size, int n, int my_n, int steps, MPI_Status status) {
    for (int k = 0; k < steps; k += 2) {
        // 前n-1个进程接收下面传来的一行
        if (rank < size -1) {
            MPI_Recv(&A[(rank + 1) * my_n][0], n, MPI_FLOAT, rank + 1, 10, MPI_COMM_WORLD, &status);
        }
        // 后n-1个进程发送第一行给上一个进程
        if (rank > 0) {
            MPI_Send(&A[rank * my_n][0], n, MPI_FLOAT, rank - 1, 10, MPI_COMM_WORLD);
        }
        // 前n-1个进程发送最后一行给下一个进程
        if (rank < size -1) {
            MPI_Send(&A[(rank + 1) * my_n - 1][0], n, MPI_FLOAT, rank + 1, 9, MPI_COMM_WORLD);
        }
        // 后n-1个进程接收上面传来的一行
        if (rank > 0) {
            MPI_Recv(&A[rank * my_n - 1][0], n, MPI_FLOAT, rank - 1, 9, MPI_COMM_WORLD, &status);
        }
        
        for (int i = rank * my_n ; i < (rank + 1) * my_n; i++) {
            for (int j = 1; j < n - 1; j++) {
                if (i == 0) {
                    continue;
                }
                else if (i == n-1) {
                    continue;
                }
                else {
                    B[i][j] = 0.25*(A[i-1][j] + A[i+1][j] + A[i][j+1] + A[i][j-1]);
                }
            }
        }
        
        // 前n-1个进程接收下面传来的一行
        if (rank < size -1) {
            MPI_Recv(&B[(rank + 1) * my_n][0], n, MPI_FLOAT, rank + 1, 10, MPI_COMM_WORLD, &status);
        }
        // 后n-1个进程发送第一行给上一个进程
        if (rank > 0) {
            MPI_Send(&B[rank * my_n][0], n, MPI_FLOAT, rank - 1, 10, MPI_COMM_WORLD);
        }
        // 前n-1个进程发送最后一行给下一个进程
        if (rank < size -1) {
            MPI_Send(&B[(rank + 1) * my_n - 1][0], n, MPI_FLOAT, rank + 1, 9, MPI_COMM_WORLD);
        }
        // 后n-1个进程接收上面传来的一行
        if (rank > 0) {
            MPI_Recv(&B[rank * my_n - 1][0], n, MPI_FLOAT, rank - 1, 9, MPI_COMM_WORLD, &status);
        }
        for (int i = rank * my_n ; i < (rank + 1) * my_n; i++) {
            for (int j = 1; j < n - 1; j++) {
                if (i == 0) {
                    continue;
                }
                else if (i == n-1) {
                    continue;
                }
                else {
                    A[i][j] = 0.25*(B[i-1][j] + B[i+1][j] + B[i][j+1] + B[i][j-1]);
                }
            }
        }
    }
}

void init_matrix(vector<float>* A, int n) {
    srand(unsigned(time(nullptr)));
    for (int i = 0; i < n; i++) {
        A[i].resize(n);
        for (int j = 0; j < n; j++) {
            A[i][j] = rand() % 10;
        }
    }
}

void show_matrix(vector<float>* A, int n) {
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            printf("%f ", A[i][j]);
        }
        printf("\n");
    }
    printf("\n");
}
每个进程都有矩阵A和B, 只是每个进程使用的部分不同, 只是用0号进程分配给他们的那部分, 其余部分都忽略.
每个进程把自己的那部分计算完后在统一集中到0号进程
0号进程分配任务是主从模式
所有进程计算任务是对等模式
[并行与分布式程序设计] Jacobi迭代数据并行

猜你喜欢