代码的结构
本代码1中的主要文件有:Makefile、component.h、day.h、hour.h、libmpit.a、mpiname.h、mpiwrapper.c、mpiwrapper_f.c、profile.c、profile.h、tauutil.h、time.c、time.h等。其中,下面标红的文件可以重点了解一下:
- Makefile:Makefile文件用于编译,可直接用make或make clean指令进行编译生成libmpit.a的静态库。
- component.h:定义了6个区域,分别是"physics"、“dynamic”、“baroclinic”、“barotropic”、“ice”、“lnd”。
- day.h:定义了天数。`
- hour.h:定义了小时。
- libmpit.a:该文件是make编译后生成的静态库文件。
- mpiname.h:用数组方式罗列了mpi的功能函数。
- mpiwrapper.c:对mpi功能函数进行同名覆盖2。
- mpiwrapper_f.c:Fortran调用C语言函数。
- profile.c:定义了profile功能。
- profile.h:定义头文件
- tauutil.h3:Tuning and Analysis Utilities (TAU) 性能分析头文件。
- time.c:定义了执行时间。
- time.h:time.c的头文件。
结构图
代码之间的调用结构图
代码函数结构:
|- mpiwrapper.c
|- MPI_Init()
|- PROFILE_START(0)
|- PMPI_Init()
|- PMPI_Comm_rank()
|- PROFILE_INIT()
|- PROFILE_STOP(0, 0)
|- MPI_Init_thread()
|- PROFILE_START(1)
|- PMPI_Init_thread()
|- PROFILE_STOP(1, 0)
|- ***
|- MPI_Profile_on()
|- PROFILE_ON()
|- MPI_Profile_off()
|- PROFILE_OFF()
|- profile.c ------------------------------------
|- PROFILE_INIT()
|- PROFILE_PHYSICS_INIT()
|- _timer_physics_start(0)
|- profile_physics_init_()
|- PROFILE_PHYSICS_INIT()
|- ***
|- PROFILE_ON()
|- PROFILE_OFF()
|- PROFILE_START()
|- PROFILE_STOP()
|- PROFILE_PHYSICS_FINISH()
|- _timer_physics_stop(0)
|- _physics_timer_clear()
|- profile_physics_finish_()
|- PROFILE_PHYSICS_FINISH()
|- ***
|- PROFILE_FINISH()
|- _timer_clear()
|- sort_mpi()
|- time.c ---------------------------------------
|- current_time()
|- gettimeofday()
|- _wtime()
|- gettimeofday()
|- _elapsed_time()
|- _wtime()
|- ***
|- _time_clear()
|- ***
|- _time_start()
|- ***
|- timer_stop()
|- ***
|- _timer_read()
|- ***
代码分析
下面分别对上述文件进行详细的分析。
- Makefile
CC =cc
OPT=-O3
CFLAGS = (mpi的include文件路径)
# DEF为预编译选项
DEF= -DPERF_PROFILE -DATM -DLND -DICE -DOCN
#AR为编译指令,为了生成静态文件libmpit.a文件
AR = ar
#ARTAG为编译选项
ARTAG = rcv
libmpit : profile.o timer.o mpiwrapper.o mpiwrapper_f.o
$(AR) $(ARTAG) libmpit.a mpiwrapper.o mpiwrapper_f.o timer.o profile.o
$(RM) -f *.o
profile.o : profile.c
$(CC) $(CFLAGS) $(DEF) -c profile.c
mpiwrapper.o: mpiwrapper.c
$(CC) $(CFLAGS) $(DEF) -c mpiwrapper.c
mpiwrapper_f.o: mpiwrapper_f.c
$(CC) $(CFLAGS) $(DEF) -c mpiwrapper_f.c
timer.o : timer.c
$(CC) $(CFLAGS) -c timer.c
clean :
rm -f *.o rm -f libmpit.a
其中,AR的相关知识可参考4 。
2. mpiwrapper.c文件
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <string.h>
#include "profile.h"
/******************************************************************
* *
* MPI Functions for Management *
* *
******************************************************************/
int MPI_Init( argc, argv )
int * argc;
char *** argv;
{
int returnVal;
int proc_id;
int size;
char procname[MPI_MAX_PROCESSOR_NAME];
int procnamelength;
#ifdef PERF_PROFILE
PROFILE_START(0);
#endif
#ifdef DEBUG
printf("Enter MPI_Init\n");
#endif
returnVal = PMPI_Init( argc, argv );
#ifdef PERF_PROFILE
PMPI_Comm_rank( MPI_COMM_WORLD, &proc_id );
PROFILE_INIT(proc_id);
PROFILE_STOP(0,0);
#endif
return returnVal;
}
#ifdef PERF_MPI_THREADED
int MPI_Init_thread (argc, argv, required, provided )
int * argc;
char *** argv;
int required;
int *provided;
{
int returnVal;
#ifdef PERF_PROFILE
PROFILE_START(1);
#endif
#ifdef DEBUG
printf("Enter MPI_Init_thread\n");
#endif
returnVal = PMPI_Init_thread( argc, argv, required, provided );
#ifdef PERF_PROFILE
PROFILE_STOP(1,0);
#endif
return returnVal;
}
#endif /* PERF_MPI_THREADED */
int MPI_Finalize( )
{
int returnVal;
int size;
char procname[MPI_MAX_PROCESSOR_NAME];
int procnamelength;
int ymd;
int hour;
#ifdef PERF_PROFILE
//PROFILE_FINISH();
PROFILE_FINISH();
#endif
#ifdef DEBUG
printf("Enter MPI_Finalize\n");
#endif
returnVal = PMPI_Finalize();
return returnVal;
}
/********************************************************
* User defined functions *
********************************************************/
void MPI_Profile_on()
{
PROFILE_ON();
}
void MPI_Profile_off()
{
PROFILE_OFF();
}
- profile.c文件
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "mpi.h"
#include "profile.h"
#include "timer.h"
#include "day.h"
#include "mpiname.h"
#include "component.h"
int proc_id = 0;
int profile_on = 1;
int threshold;
int date = 0;
double comm_time;
static mpi_perf mpi_physics_profile[MPI_FUNCTIONS];
double used_physics_time = 0.0, comm_physics_time = 0.0, comp_physics_time = 0.0,elapsed_physics_time = 0.0;
//...(省略)
FILE* profile_file;
static mpi_store mpi_record[KERNEL][MPI_FUNCTIONS];
void sort_mpi(int model, mpi_store *_mpi_sort[KERNEL][MPI_FUNCTIONS]);
void PROFILE_INIT(int process_id){
char profile_name[100];
int m,i,j,k;
for(m=0;m<KERNEL;m++){
for (i=0; i<MPI_FUNCTIONS; i++){
mpi_record[m][i].total_time = 0.0;
mpi_record[m][i].comm_time = 0.0;
mpi_record[m][i].comp_time = 0.0;
mpi_record[m][i].comm_size = 0;
mpi_record[m][i].count = 0;
mpi_record[m][i].flag = 0;
}
}
proc_id = process_id;
sprintf(profile_name, "mpi_record.%d", proc_id);
profile_file = fopen(profile_name, "w");
if (profile_file == NULL){
printf("Open profile file failed\n");
exit(0);
}
#ifdef MPI_TIME
mpi_profile[25].flag = 1;
mpi_profile[26].flag = 1;
threshold = 2000;
#endif
}
void PROFILE_PHYSICS_INIT(){
_timer_physics_start(0);
int i;
for (i=0; i<MPI_FUNCTIONS; i++){
mpi_physics_profile[i].entry_time = 0.0;
mpi_physics_profile[i].total_time = 0.0;
mpi_physics_profile[i].count = 0;
mpi_physics_profile[i].flag = 0;
}
}
void profile_physics_init_(){
PROFILE_PHYSICS_INIT();
}
//...(省略)
void PROFILE_ON(){
// Default is ON.
profile_on = 1;
}
void PROFILE_OFF(){
profile_on = 0;
}
void PROFILE_START(int mpi_id){
if (profile_on){
mpi_physics_profile[mpi_id].entry_time = current_time();
mpi_dynamic_profile[mpi_id].entry_time = current_time();
mpi_baroclinic_profile[mpi_id].entry_time = current_time();
mpi_barotropic_profile[mpi_id].entry_time = current_time();
mpi_lnd_profile[mpi_id].entry_time = current_time();
mpi_ice_profile[mpi_id].entry_time = current_time();
}
}
void PROFILE_STOP(int mpi_id,int size){
if (profile_on){
used_physics_time = current_time() - mpi_physics_profile[mpi_id].entry_time;
mpi_physics_profile[mpi_id].total_time += used_physics_time;
mpi_physics_profile[mpi_id].comm_size += size;
(mpi_physics_profile[mpi_id].count)++;
used_baroclinic_time = current_time() - mpi_baroclinic_profile[mpi_id].entry_time;
mpi_baroclinic_profile[mpi_id].total_time += used_baroclinic_time;
mpi_baroclinic_profile[mpi_id].comm_size += size;
(mpi_baroclinic_profile[mpi_id].count)++;
used_barotropic_time = current_time() - mpi_barotropic_profile[mpi_id].entry_time;
mpi_barotropic_profile[mpi_id].total_time += used_barotropic_time;
mpi_barotropic_profile[mpi_id].comm_size += size;
(mpi_barotropic_profile[mpi_id].count)++;
used_dynamic_time = current_time() - mpi_dynamic_profile[mpi_id].entry_time;
mpi_dynamic_profile[mpi_id].total_time += used_dynamic_time;
mpi_dynamic_profile[mpi_id].comm_size += size;
(mpi_dynamic_profile[mpi_id].count)++;
used_lnd_time = current_time() - mpi_lnd_profile[mpi_id].entry_time;
mpi_lnd_profile[mpi_id].total_time += used_lnd_time;
mpi_lnd_profile[mpi_id].comm_size +=size;
(mpi_lnd_profile[mpi_id].count)++;
used_ice_time = current_time() - mpi_ice_profile[mpi_id].entry_time;
mpi_ice_profile[mpi_id].total_time += used_ice_time;
mpi_ice_profile[mpi_id].comm_size += size;
(mpi_ice_profile[mpi_id].count)++;
}
}
void PROFILE_PHYSICS_FINISH(){
int i,j;
_timer_physics_stop(0);
elapsed_physics_time = _timer_physics_read(0);
for(i=0; i<MPI_FUNCTIONS; i++)
{
mpi_record[0][i].comm_time += mpi_physics_profile[i].total_time;
mpi_record[0][i].count += mpi_physics_profile[i].count;
mpi_record[0][i].comm_size += mpi_physics_profile[i].comm_size;
}
mpi_record[0][0].total_time += elapsed_physics_time;
_physics_timer_clear(0);
}
void profile_physics_finish_(){
PROFILE_PHYSICS_FINISH();
}
//...(省略)
void PROFILE_FINISH(){
int m,n,k,i,j;
int id;
mpi_store *mpi_sort[KERNEL][MPI_FUNCTIONS];
id = proc_id;
fprintf(profile_file, "Record communication time each model day\n");
for (m=0; m<KERNEL; m++){
fprintf(profile_file, "Model:%s\n",Kernel[m]);
fprintf(profile_file, "Func_Name\t\tTime:%d\t\tCount:%d\t\tSize:\n", proc_id, proc_id);
for (k=0; k<MPI_FUNCTIONS; k++){
mpi_sort[m][k] = &mpi_record[m][k];
}
sort_mpi(m,mpi_sort);
comm_time = 0.0;
for(k=0; k<MPI_FUNCTIONS; k++){
if (mpi_sort[m][k]->count != 0){
n = mpi_sort[m][k] - &mpi_record[m][0];
fprintf(profile_file, "%s\t\t%.2f\t\t%ld\t\t%lld\n",MPI_Functions[n], mpi_sort[m][k]->comm_time, mpi_sort[m][k]->count,mpi_sort[m][k]->comm_size);
comm_time += mpi_sort[m][k]->comm_time;
}
}
fprintf(profile_file, "total time: %.2f, comm time: %.2f, comp time: %.2f\n", mpi_record[m][0].total_time,comm_time,mpi_record[m][0].total_time-comm_time);
fprintf(profile_file, "===============================================================\n");
}
fclose(profile_file);
_timer_clear(0);
}
void sort_mpi(int model,mpi_store *_mpi_sort[KERNEL][MPI_FUNCTIONS]){
int i, j;
mpi_store* temp;
int m,d,h;
int id;
m = model;
id = proc_id;
for(i=0; i<MPI_FUNCTIONS; i++){
for(j=i; j<MPI_FUNCTIONS; j++){
if (_mpi_sort[m][j]->comm_time > _mpi_sort[m][i]->comm_time){
temp = _mpi_sort[m][j];
_mpi_sort[m][j] = _mpi_sort[m][i];
_mpi_sort[m][i] = temp;
}
}
}
}
从代码中可以看出,有些函数的命名的末尾会多加一个_
的下划线符号,例如:profile_physics_finish_
函数,其主要原因是为了便于Fortran调用C语言5。
- time.c文件
/************************************************************
This file implements linux timer function with gettimeofday.
*************************************************************/
#include "timer.h"
#include <sys/time.h>
#include <stdlib.h>
double current_time(void)
{
double timestamp;
struct timeval tv;
gettimeofday(&tv, 0);
timestamp = (double)((double)(tv.tv_sec*1e6) +(double)tv.tv_usec);
return timestamp;
}
/**************************************************************
void WTIME(double);
**************************************************************/
void _wtime(double *t)
{
static int sec = -1;
struct timeval tv;
gettimeofday(&tv, 0);
if (sec < 0)
sec = tv.tv_sec;
*t = (tv.tv_sec - sec)*1.0e6 + tv.tv_usec;
}
/*****************************************************************/
/****** E L A P S E D _ T I M E ******/
/*****************************************************************/
double _elapsed_time( void )
{
double t;
_wtime( &t );
return( t );
}
double _elapsed_physics_time( void )
{
double t;
_wtime( &t );
return( t );
}
...(省略)
double start[64], elapsed[64];
double physics_elapsed[64],lnd_elapsed[64],dynamic_elapsed[64],barotropic_elapsed[64],baroclinic_elapsed[64],ice_elapsed[64];
double physics_start[64],dynamic_start[64],lnd_start[64],ice_start[64],barotropic_start[64],baroclinic_start[64];
/*****************************************************************/
/****** T I M E R _ C L E A R ******/
/*****************************************************************/
void _timer_clear( int n )
{
elapsed[n] = 0.0;
}
void _physics_timer_clear( int n )
{
physics_elapsed[n] = 0.0;
}
...(省略)
/*****************************************************************/
/****** T I M E R _ S T A R T ******/
/*****************************************************************/
void _timer_start( int n )
{
start[n] = _elapsed_time();
}
void _timer_dynamic_start( int n )
{
dynamic_start[n] = _elapsed_dynamic_time();
}
...(省略)
/*****************************************************************/
/****** T I M E R _ S T O P ******/
/*****************************************************************/
void _timer_stop( int n )
{
double t, now;
now = _elapsed_time();
t = now - start[n];
elapsed[n] = t;
}
void _timer_physics_stop( int n )
{
double t, now;
now = _elapsed_physics_time();
t = now - physics_start[n];
physics_elapsed[n] = t;
}
...(省略)
/*****************************************************************/
/****** T I M E R _ R E A D ******/
/*****************************************************************/
double _timer_read( int n )
{
return( elapsed[n] );
}
double _timer_dynamic_read( int n )
{
return( dynamic_elapsed[n] );
}
...(省略)
- mpiwrapper_f.c
#include <mpi.h>
#include "tauutil.h"
#ifdef TAU_LAMPI
MPI_Fint TAU_MPI_Request_c2f(MPI_Request c_request) {
MPI_Fint f_request;
f_request = MPI_Request_c2f(c_request);
/* LA-MPI doesn't seem to translate MPI_REQUEST_NULL properly
so we'll check for it and set it to the proper value for fortran */
if (c_request == MPI_REQUEST_NULL) {
f_request = -1;
}
return f_request;
}
#else
/* For all other implementations, just #define it to avoid a wrapper function call */
#define TAU_MPI_Request_c2f MPI_Request_c2f
#endif /* TAU_LAMPI */
#define TAU_MPI_CART_CREATE
void mpi_allgather_( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr)
void * sendbuf;
MPI_Fint *sendcount;
MPI_Fint *sendtype;
void * recvbuf;
MPI_Fint *recvcount;
MPI_Fint *recvtype;
MPI_Fint *comm;
MPI_Fint *ierr;
{
*ierr = MPI_Allgather( sendbuf, *sendcount, MPI_Type_f2c(*sendtype), recvbuf, *recvcount, MPI_Type_f2c(*recvtype), MPI_Comm_f2c(*comm) );
}
void mpi_allgather__( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr)
void * sendbuf;
MPI_Fint *sendcount;
MPI_Fint *sendtype;
void * recvbuf;
MPI_Fint *recvcount;
MPI_Fint *recvtype;
MPI_Fint *comm;
MPI_Fint *ierr;
{
mpi_allgather_( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr);
}
void MPI_ALLGATHER( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr)
void * sendbuf;
MPI_Fint *sendcount;
MPI_Fint *sendtype;
void * recvbuf;
MPI_Fint *recvcount;
MPI_Fint *recvtype;
MPI_Fint *comm;
MPI_Fint *ierr;
{
mpi_allgather_( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr);
}
void MPI_ALLGATHER_( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr)
void * sendbuf;
MPI_Fint *sendcount;
MPI_Fint *sendtype;
void * recvbuf;
MPI_Fint *recvcount;
MPI_Fint *recvtype;
MPI_Fint *comm;
MPI_Fint *ierr;
{
mpi_allgather_( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr);
}
void mpi_allgather( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr)
void * sendbuf;
MPI_Fint *sendcount;
MPI_Fint *sendtype;
void * recvbuf;
MPI_Fint *recvcount;
MPI_Fint *recvtype;
MPI_Fint *comm;
MPI_Fint *ierr;
{
mpi_allgather_( sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm , ierr);
}
···(省略)
该文件定义了与mpiwrapper.c文件中相应的函数,便于Fortran语言进行调用。
运行案例
本案例只适用于本校的超算平台。
- HelloWorld代码(Fortran代码)6
program hello_world
use mpi
integer ierr, num_procs, my_id
call MPI_INIT ( ierr )
call PROFILE_DYNAMIC_INIT() !统计开始
! find out my process ID, and how many processes were started.
call MPI_COMM_RANK (MPI_COMM_WORLD, my_id, ierr)
call MPI_COMM_SIZE (MPI_COMM_WORLD, num_procs, ierr)
print *, "Hello world! I'm process ", my_id, " out of ", num_procs, " processes."
call PROFILE_DYNAMIC_FINISH() !统计结束
call MPI_FINALIZE ( ierr )
stop
end program hello_world
- Makefile文件
CC=mpif90
LibmpitPath=(替换为前面编译好的libmpit.a的路径)
RM=rm -f
all: main.o
$(CC) main.o $(LibmpitPath) -o a.out
$(RM) *.o
main.o: main.f90
$(CC) -c main.f90
clean:
$(RM) *.o mpi_record.* a.out
- 执行指令及运行结果
mpirun -np 进程数量 ./a.out
其中,进程数量可以自由的设置,相应的程序会启动相应的进程数量。在各个进程运行结束之后会对每个进程生成相应的文本文件,如mpi_record.*文件。在文件中记录了不同区域(六个区域分别是CESM, DYNAMIC, PHYSICS, BAROTROPIC, BAROCLINIC, LND, ICE)的mpi操作、mpi的相应时间、通信次数以及信息量。
本案例运行了四个进程,从运行结果中提取了一个mpi_record.0文件的输出结果如下所示: