From 82eefceb1508e91b99508bf441ece8621aed6639 Mon Sep 17 00:00:00 2001 From: Jon Rood Date: Thu, 30 May 2024 13:27:22 -0600 Subject: [PATCH 1/3] Try a few optimizations. --- src/exchangeDonors.C | 4 +- src/parallelComm.C | 133 +++++++++++++++++++++++++++++++++++++++++++ src/parallelComm.h | 2 + src/tioga.C | 3 +- 4 files changed, 139 insertions(+), 3 deletions(-) diff --git a/src/exchangeDonors.C b/src/exchangeDonors.C index 4e50f66..309b3cd 100644 --- a/src/exchangeDonors.C +++ b/src/exchangeDonors.C @@ -35,7 +35,6 @@ void tioga::exchangeDonors(void) // and receiving // pc->getMap(&nsend,&nrecv,&sndMap,&rcvMap); - if (nsend == 0) return; // // create packets to send and receive // and initialize them to zero @@ -74,7 +73,8 @@ void tioga::exchangeDonors(void) // // communicate donors (comm1) // - pc->sendRecvPackets(sndPack,rcvPack); + pc->sendRecvPackets2(sndPack,rcvPack); + if (nsend == 0) return; // Initialize linked lists and populate donor data from rcvPack for (int ib=0;ib snd_int_displs(numprocs+1, 0); + std::vector rcv_int_displs(numprocs+1, 0); + for (int i=1; i <= numprocs; i++) { + snd_int_displs[i] = snd_int_displs[i-1] + sint[i-1]; + rcv_int_displs[i] = rcv_int_displs[i-1] + rint[i-1]; + } + for (int i=0; i < nsend; i++) { + int displ = snd_int_displs[sndMap[i]]; + for(int j=0; j < sint[sndMap[i]]; j++){ + all_snd_intData[displ+j] = sndPack[i].intData[j]; + } + } + MPI_Request int_request; + MPI_Ialltoallv(all_snd_intData, + sint, + snd_int_displs.data(), + MPI_INT, + all_rcv_intData, + rint, + rcv_int_displs.data(), + MPI_INT, + scomm, + &int_request); + + int all_snd_nreals = std::accumulate(sreal, sreal + numprocs, 0); + int all_rcv_nreals = std::accumulate(rreal, rreal + numprocs, 0); + REAL *all_snd_realData, *all_rcv_realData; + all_snd_realData=(REAL *) malloc(sizeof(REAL)*all_snd_nreals); + all_rcv_realData=(REAL *) malloc(sizeof(REAL)*all_rcv_nreals); + for (int i = 0; i < all_snd_nreals; i++) { + all_snd_realData[i] = 0; + } + for (int i = 0; i < all_rcv_nreals; i++) { + all_rcv_realData[i] = 0; + } + std::vector snd_real_displs(numprocs+1, 0); + std::vector rcv_real_displs(numprocs+1, 0); + for (int i=1; i <= numprocs; i++) { + snd_real_displs[i] = snd_real_displs[i-1] + sreal[i-1]; + rcv_real_displs[i] = rcv_real_displs[i-1] + rreal[i-1]; + } + for (int i=0; i < nsend; i++) { + int displ = snd_real_displs[sndMap[i]]; + for(int j=0; j < sreal[sndMap[i]]; j++){ + all_snd_realData[displ+j] = sndPack[i].realData[j]; + } + } + MPI_Request real_request; + MPI_Ialltoallv(all_snd_realData, + sreal, + snd_real_displs.data(), + MPI_DOUBLE, + all_rcv_realData, + rreal, + rcv_real_displs.data(), + MPI_DOUBLE, + scomm, + &real_request); + + // FIXME: here and above I think I should move this a bit lower + MPI_Wait(&int_request, MPI_STATUS_IGNORE); + for(i=0;i 0) { + rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); + } + if (rcvPack[i].nreals > 0) { + rcvPack[i].realData=(REAL *) malloc(sizeof(REAL)*rcvPack[i].nreals); + } + } + + // FIXME: here and above I think I should move this a bit lower + MPI_Wait(&real_request, MPI_STATUS_IGNORE); + for (int i=0; i < nrecv; i++) { + int displ = rcv_int_displs[rcvMap[i]]; + for(int j=0; j < rint[rcvMap[i]]; j++){ + rcvPack[i].intData[j] = all_rcv_intData[displ+j]; + } + } + for (int i=0; i < nrecv; i++) { + int displ = rcv_real_displs[rcvMap[i]]; + for(int j=0; j < rreal[rcvMap[i]]; j++){ + rcvPack[i].realData[j] = all_rcv_realData[displ+j]; + } + } + + TIOGA_FREE(all_snd_intData); + TIOGA_FREE(all_rcv_intData); + TIOGA_FREE(all_snd_realData); + TIOGA_FREE(all_rcv_realData); + TIOGA_FREE(sint); + TIOGA_FREE(sreal); + TIOGA_FREE(rint); + TIOGA_FREE(rreal); +} + void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) { int i; diff --git a/src/parallelComm.h b/src/parallelComm.h index c941a1e..6796e7a 100644 --- a/src/parallelComm.h +++ b/src/parallelComm.h @@ -58,6 +58,8 @@ class parallelComm { void sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack); + void sendRecvPackets2(PACKET *sndPack, PACKET *rcvPack); + void sendRecvPackets(PACKET *sndPack, PACKET *rcvPack); void sendRecvPacketsCheck(PACKET *sndPack, PACKET *rcvPack); diff --git a/src/tioga.C b/src/tioga.C index 3405419..bf4bf37 100644 --- a/src/tioga.C +++ b/src/tioga.C @@ -492,7 +492,8 @@ void tioga::performConnectivity(void) this->myTimer("tioga::exchangeDonors",1); //this->reduce_fringes(); //outputStatistics(); - MPI_Allreduce(&ihigh,&ihighGlobal,1,MPI_INT,MPI_MAX,scomm); + //MPI_Allreduce(&ihigh,&ihighGlobal,1,MPI_INT,MPI_MAX,scomm); + ihighGlobal = 0; //if (ihighGlobal) { this->myTimer("tioga::getCellIblanks",0); for (int ib=0;ib Date: Fri, 31 May 2024 09:16:50 -0600 Subject: [PATCH 2/3] Add assert. --- src/tioga.C | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tioga.C b/src/tioga.C index bf4bf37..503249b 100644 --- a/src/tioga.C +++ b/src/tioga.C @@ -493,6 +493,7 @@ void tioga::performConnectivity(void) //this->reduce_fringes(); //outputStatistics(); //MPI_Allreduce(&ihigh,&ihighGlobal,1,MPI_INT,MPI_MAX,scomm); + assert(ihigh == 0); ihighGlobal = 0; //if (ihighGlobal) { this->myTimer("tioga::getCellIblanks",0); From 13d3eea44ce494d5798a40da480b564b7d3ff133 Mon Sep 17 00:00:00 2001 From: Jon Rood Date: Mon, 3 Jun 2024 13:08:32 -0600 Subject: [PATCH 3/3] Fix shadowed variables. --- src/parallelComm.C | 115 ++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 70 deletions(-) diff --git a/src/parallelComm.C b/src/parallelComm.C index c2c93cb..4f72132 100644 --- a/src/parallelComm.C +++ b/src/parallelComm.C @@ -26,15 +26,12 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) { - int i; - int *sint,*sreal,*rint,*rreal; + int *sint=(int *)malloc(sizeof(int)*numprocs); + int *sreal=(int *) malloc(sizeof(int)*numprocs); + int *rint=(int *)malloc(sizeof(int)*numprocs); + int *rreal=(int *) malloc(sizeof(int)*numprocs); // - sint=(int *)malloc(sizeof(int)*numprocs); - sreal=(int *) malloc(sizeof(int)*numprocs); - rint=(int *)malloc(sizeof(int)*numprocs); - rreal=(int *) malloc(sizeof(int)*numprocs); - // - for(i=0;i 0) { rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); } @@ -214,19 +211,16 @@ void parallelComm::sendRecvPacketsAll(PACKET *sndPack, PACKET *rcvPack) void parallelComm::sendRecvPackets2(PACKET *sndPack,PACKET *rcvPack) { - int i; - int *sint,*sreal,*rint,*rreal; - // - sint=(int *)malloc(sizeof(int)*numprocs); - sreal=(int *) malloc(sizeof(int)*numprocs); - rint=(int *)malloc(sizeof(int)*numprocs); - rreal=(int *) malloc(sizeof(int)*numprocs); + int *sint=(int *)malloc(sizeof(int)*numprocs); + int *sreal=(int *) malloc(sizeof(int)*numprocs); + int *rint=(int *)malloc(sizeof(int)*numprocs); + int *rreal=(int *) malloc(sizeof(int)*numprocs); // remove when using stl vectors and just init the vectors to 0 - for(i=0;i 0) { rcvPack[i].intData=(int *) malloc(sizeof(int)*rcvPack[i].nints); } @@ -347,40 +341,34 @@ void parallelComm::sendRecvPackets2(PACKET *sndPack,PACKET *rcvPack) void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) { - int i; - int *scount,*rcount; - int tag,irnum; - MPI_Request *request; - MPI_Status *status; - // - scount=(int *)malloc(2*sizeof(int)*nsend); - rcount=(int *) malloc(2*sizeof(int)*nrecv); - request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); - status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); + int *scount=(int *)malloc(2*sizeof(int)*nsend); + int *rcount=(int *) malloc(2*sizeof(int)*nrecv); + MPI_Request *request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); + MPI_Status *status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); // - for(i=0;i 0) { tag=1; @@ -398,7 +386,7 @@ void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) } } // - for(i=0;i 0){ tag=1; @@ -423,34 +411,28 @@ void parallelComm::sendRecvPackets(PACKET *sndPack,PACKET *rcvPack) void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) { - int i; - int *scount,*rcount; - int tag,irnum; - MPI_Request *request; - MPI_Status *status; - // - scount=(int *)malloc(2*sizeof(int)*nsend); - rcount=(int *) malloc(2*sizeof(int)*nrecv); - request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); - status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); + int *scount=(int *)malloc(2*sizeof(int)*nsend); + int *rcount=(int *) malloc(2*sizeof(int)*nrecv); + MPI_Request *request=(MPI_Request *) malloc(sizeof(MPI_Request)*2*(nsend+nrecv)); + MPI_Status *status=(MPI_Status *) malloc(sizeof(MPI_Status)*2*(nsend+nrecv)); // - for(i=0;i 0) { tag=1; @@ -484,7 +466,7 @@ void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) } } // - for(i=0;i 0){ tag=1; @@ -509,8 +491,6 @@ void parallelComm::sendRecvPacketsCheck(PACKET *sndPack,PACKET *rcvPack) void parallelComm::setMap(int ns,int nr, int *snd,int *rcv) { - int i; - // if (sndMap) TIOGA_FREE(sndMap); sndMap=NULL; if (rcvMap) TIOGA_FREE(rcvMap); rcvMap=NULL; // @@ -519,8 +499,8 @@ void parallelComm::setMap(int ns,int nr, int *snd,int *rcv) sndMap=(int *) malloc(sizeof(int)*nsend); rcvMap=(int *) malloc(sizeof(int)*nrecv); // - for(i=0;i