diff --git a/bbb/bbb.v b/bbb/bbb.v index 843c96bd..9542e53d 100644 --- a/bbb/bbb.v +++ b/bbb/bbb.v @@ -3237,6 +3237,7 @@ ifexmain integer /0/ #scalar to indicate if subroutine allocate #=0 means it is not. exmain_aborted logical /.false./ # Set to .true. in Python version on control-C abort iallcall integer /0/ #flag to signal first call to allocate +comnfe integer /0/ # Number of NK iterations for time-step ***** RZ_cell_info: # RZ grid-cell center and face locations diff --git a/com/com.v b/com/com.v index a4221299..13bf3388 100755 --- a/com/com.v +++ b/com/com.v @@ -705,3 +705,4 @@ tanh_multi(i:integer,a:real,j:integer,b:real,fname:string,d:real) subroutine ***** Flags: # Common flags used by UEDGE iprint integer /1/ # Flag controlling whether to be verbose or not + diff --git a/ppp/omp_parallel.F90 b/ppp/omp_parallel.F90 index fd6965a5..64e33fb4 100644 --- a/ppp/omp_parallel.F90 +++ b/ppp/omp_parallel.F90 @@ -604,7 +604,8 @@ subroutine OMPPandf1Rhs(neq,time,yl,yldot) use Dim,only:ny use Selec, only:yinc,xrinc,xlinc Use Grid,only:ijactot - + Use Cdv, only: comnfe + integer yinc_bkp,xrinc_bkp,xlinc_bkp,iv,tid integer,intent(in)::neq real,intent(in)::yl(*) @@ -674,30 +675,28 @@ subroutine OMPPandf1Rhs(neq,time,yl,yldot) if (CheckPandf1.gt.0) then - Time2=omp_get_wtime() - call pandf1 (-1, -1, 0.0, neq, time, ylcopy, yldotsave) - Time2=omp_get_wtime()-Time2 - OMPTimeSerialPandf1=Time2+OMPTimeSerialPandf1 + Time2=omp_get_wtime() + call pandf1 (-1, -1, 0.0, neq, time, ylcopy, yldotsave) + Time2=omp_get_wtime()-Time2 + OMPTimeSerialPandf1=Time2+OMPTimeSerialPandf1 + if (OMPPandf1Verbose.gt.0) then write(*,*) "Timing Pandf1 serial:",OMPTimeSerialPandf1,"(",Time2,")/parallel:",OMPTimeParallelPandf1,'(',Time1,')' - write(*,*) 'ijactot ',ijactot - call Compare(yldot,yldotsave,neq) - write(*,*) "serial and parallel pandf are identical" endif + call Compare(yldot,yldotsave,neq) + write(*,'(a,i4)') " Serial and parallel pandf are identical for nfe = ", comnfe + endif else call pandf1 (-1,-1, 0.0, neq, time, yl, yldot) endif end subroutine OMPPandf1Rhs -module Bins -contains -subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,Padding,iCenterBin,iLeftBin,iRightBin,inc) - integer,intent(in):: ieqmin, ieqmax,Padding,ichunkmin,ichunkmax - integer ::N,SizeBin,i,Nchunk - integer,intent(out),allocatable:: iCenterbin(:),inc(:),iLeftBin(:),iRightBin(:) -! integer,intent(out):: iCenterbin(Nchunk),inc(Nchunk),iLeftBin(Nchunk),iRightBin(Nchunk) +subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,ichunktot,Padding,iCenterBin,iLeftBin,iRightBin,inc) + implicit none + integer,intent(in):: ieqmin, ieqmax,Padding,ichunkmin,ichunkmax, ichunktot + integer,intent(out):: iCenterbin(ichunktot),inc(ichunktot),iLeftBin(ichunktot),iRightBin(ichunktot) + integer ::N,SizeBin,Nchunk, i N=ieqmax-ieqmin+1 Nchunk=ichunkmax-ichunkmin+1 - allocate (iCenterbin(Nchunk),inc(Nchunk),iLeftBin(Nchunk),iRightBin(Nchunk)) if (N>Nchunk) then SizeBin=int((N/Nchunk)) @@ -724,7 +723,6 @@ subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,Padding,iCenterBin,iLeftB return end subroutine CreateBin -end module Bins subroutine MakeChunksPandf1() Use Indexes,only: igyl @@ -733,41 +731,43 @@ subroutine MakeChunksPandf1() Nivchunk,Nxchunks,Nychunks,iymaxchunk,ixmaxchunk,iyminchunk,ixminchunk use Lsode, only: neq use Dim, only:nx,ny - use Bins implicit none integer:: remakechunk,i,ii,ichunk,iv,ix,iy - integer,allocatable:: iyCenterBin(:),iyRightBin(:),iyLeftBin(:),incy(:) - integer,allocatable:: ixCenterBin(:),ixRightBin(:),ixLeftBin(:),incx(:) - - allocate (iyCenterBin(Nychunks),iyRightBin(Nychunks),iyLeftBin(Nychunks),incy(Nychunks)) - allocate (ixCenterBin(Nxchunks),ixRightBin(Nxchunks),ixLeftBin(Nxchunks),incx(Nxchunks)) + integer:: iyCenterBin(Nychunks),iyRightBin(Nychunks),iyLeftBin(Nychunks),incy(Nychunks) + integer::ixCenterBin(Nxchunks),ixRightBin(Nxchunks),ixLeftBin(Nxchunks),incx(Nxchunks) remakechunk=0 if ((Nxchunks.ne.Nxchunks_old).or.(Nychunks.ne.Nychunks_old)) then - if (Nychunks.gt.1) then if (Nychunks.eq.ny) then iyLeftBin(1)=0 iyRightBin(1)=1 iyCenterBin(1)=1 incy(1)=ypadding - call CreateBin(2,ny-1,2,Nychunks-1,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy) + call CreateBin( 2,ny-1,2,Nychunks-1, Nychunks, ypadding, & + iyCenterBin, iyLeftBin, iyRightBin,& + incy & + ) iyLeftBin(Nychunks)=ny iyRightBin(Nychunks)=ny+1 iyCenterBin(Nychunks)=ny incy(Nychunks)=ypadding - write(*,*) '----- Bins in y direction: ', Nychunks, ny+2 - do iy=1,Nychunks - write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy) - enddo + if (OMPPandf1Verbose.gt.1) then + write(*,*) '----- Bins in y direction: ', Nychunks, ny+2 + do iy=1,Nychunks + write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy) + enddo + endif else - call CreateBin(0,ny+1,1,Nychunks,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy) + call CreateBin(0,ny+1,1,Nychunks,Nychunks,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy) - write(*,*) '----- Bins in y direction: ', Nychunks, ny+2 - do iy=1,Nychunks + if (OMPPandf1Verbose.gt.1) then + write(*,*) '----- Bins in y direction: ', Nychunks, ny+2 + do iy=1,Nychunks write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy) - enddo + enddo + endif ! now we check the first and last bins to check that ypadding is 3 if iyCenterBin=2 if (iyCenterBin(1)==0) then @@ -787,7 +787,7 @@ subroutine MakeChunksPandf1() if (Nxchunks.gt.1) then - call CreateBin(0,nx+1,1,Nxchunks,xpadding,ixCenterBin,ixLeftBin,ixRightBin,incx) + call CreateBin(0,nx+1,1,Nxchunks,Nxchunks,xpadding,ixCenterBin,ixLeftBin,ixRightBin,incx) else ixCenterBin(1)=-1 ixLeftBin(1)=0 @@ -865,8 +865,7 @@ subroutine MakeChunksPandf1() enddo endif endif - - + return end subroutine MakeChunksPandf1 !subroutine PrintChunks diff --git a/ppp/parallel.F90 b/ppp/parallel.F90 index cb314859..92116810 100644 --- a/ppp/parallel.F90 +++ b/ppp/parallel.F90 @@ -32,15 +32,18 @@ subroutine InitParallel endif -! if (OMPParallelPandf1.gt.0) then -! if (OMPParallelJac==0) then -! call xerrab('Cannot run omp parallel evaluation of pandf1 without running jacobian omp evaluation.') -! endif -! call InitOMPPandf1() -! ParallelPandf1=1 -! else -! ParallelPandf1=0 -! endif + if (OMPParallelPandf1.gt.0) then + if (OMPParallelJac==0) then + write(*,*) "Parallel Pandf1 requires Parallel Jac: activating..." + OMPParallelJac=1 + call InitOMPJac + ParallelJac=1 + endif + call InitOMPPandf1() + ParallelPandf1=1 + else + ParallelPandf1=0 + endif end subroutine InitParallel diff --git a/ppp/ppp.v b/ppp/ppp.v index e1f175a6..c599b7f2 100644 --- a/ppp/ppp.v +++ b/ppp/ppp.v @@ -5,11 +5,11 @@ ppp ***** ParallelSettings: OMPParallelPandf1 integer /0/ # [0]: serial pandf1 rhs calc [1] omp parallel pandf1 rhs calc OMPParallelJac integer /0/ # [0]: serial jacobian calc [1] omp parallel jacobian calc -ParallelWarning integer /1/ # Warning for users who wish to use it +ParallelWarning integer /0/ # Warning for users who wish to use it CheckJac integer /0/ # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of Jacobian. # If differences between para and serial Jacobians, dump both Jacs in serialjac.dat and paralleljac.dat with routine jac_write in current working folder. See UEDGEToolBox docs for analysis tools. Nthreads integer /64/ # Number of threads to be used to calculate the Jacobian -CheckPandf1 integer /1/ # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of pandf1. +CheckPandf1 integer /0/ # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of pandf1. ***** ParallelDebug: OMPJacDebug integer /0/ #Print debug info for omp constructs diff --git a/svr/nksol.m b/svr/nksol.m index 7f3b3bdb..ddc1895d 100755 --- a/svr/nksol.m +++ b/svr/nksol.m @@ -1006,6 +1006,7 @@ call errgen(ierr,zero,zero,iret,ivar) c----------------------------------------------------------------------- call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 fnrm = vnormnk(n,savf,sf) f1nrm = fnrm*fnrm/two if (iprint .ge. 1) write(iunit,400) iter,fnrm,nfe @@ -1203,6 +1204,7 @@ c norm(f) asymptotes from above to a finite value c c----------------------------------------------------------------------- implicit none + Use(Cdv) integer n, iret, iter, itermx, ncscmx, iterm, locwmp, locimp integer iersl, kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje integer nni, nli, npe, nps, ncfl, nbcf, ipcur, nnipset @@ -1465,6 +1467,7 @@ subroutine model(n, wm, lenwm, iwm, leniwm, u, savf, x, f, jac, c c----------------------------------------------------------------------- implicit none + Use(Cdv) integer n, lenwm, leniwm, locwmp, locimp, iersl, kmp, mmax integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe integer nps, ncfl, nbcf, ipcur, nnipset, incpset, ier @@ -1535,6 +1538,7 @@ call solpk (n,wm,lenwm,iwm,leniwm,u,savf,x,su,sf,f,jac,psol) subroutine solpk (n, wm, lenwm, iwm, leniwm, u, savf, x, su, sf, * f, jac, psol) implicit none + Use(Cdv) integer lenwm, leniwm, locwmp, locimp, iersl, kmp, mmax integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe integer nps, ncfl, nbcf, iwk, npsl, mmaxp1, ihsv, iq, mgmr @@ -1910,6 +1914,7 @@ call psol (n, u, savf, su, sf, f, jac, wk, wmp, iwmp, x, ier) subroutine atv (n, u, savf, v, su, sf, ftem, f, jac, psol, z, * vtem, wmp, iwmp, ier, npsl) implicit none + Use(Cdv) integer iwmp, ier, npsl, locwmp, locimp, iersl, kmp, mmax integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe integer nps, ncfl, nbcf, i @@ -2043,6 +2048,7 @@ call psol (n, u, savf, su, sf, f, jac, ftem, wmp, iwmp, endif call f(n, u, ftem) nfe = nfe + 1 + comnfe = comnfe + 1 ccc do 281 i = 1, n # Begin 2nd order Jac ccc u(i) = z(i) - sigma*vtem(i) # change sign of perturbation ccc 281 continue @@ -2626,6 +2632,7 @@ c norm() denotes the euclidean norm. c c----------------------------------------------------------------------- implicit none + Use(Cdv) integer n, lenwm, iwm, leniwm, iret, locwmp, locimp, iersl integer kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje, nni integer nli, npe, nps, ncfl, nbcf, iprint, iunit, iermsg, np1 @@ -2723,6 +2730,7 @@ subroutine dogstp (m, mp1, mmaxp1, ygm, ycp, beta, hes, tau, ynew, * xnewl, wk, wmp, iwmp, u, su, sf, savf, f, jac, * psol) implicit none + Use(Cdv) integer m, mp1, mmaxp1, n, iwmp, locwmp, locimp, iersl, kmp integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli integer npe, nps, ncfl, nbcf, i, ier @@ -2905,6 +2913,7 @@ subroutine trgupd (m, mp1, mmaxp1, n, np1, u, savf, f1nrm, x, xl, * stptol, mxtkn, tau, uprev, fprev, f1prv, upls, * f1pls, wk, ivio, iret, f) implicit none + Use(Cdv) integer m, mp1, mmaxp1, n, np1, ivio, iret, locwmp, locimp integer iersl, kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje integer nni, nli, npe, nps, ncfl, nbcf, i @@ -3053,6 +3062,7 @@ call scopy (n, u, 1, upls, 1) 20 continue call f (n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 call sswap(n, u, 1, upls, 1) fnrmp = vnormnk(n, savf, sf) f1pls = pt5*fnrmp*fnrmp @@ -3200,6 +3210,7 @@ c norm() denotes the euclidean norm. c c----------------------------------------------------------------------- implicit none + Use(Cdv) integer n, iret, icflag, icnstr, locwmp, locimp, iersl, kmp integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli integer npe, nps, ncfl, nbcf, iprint, iunit, iermsg, i, ivio @@ -3284,6 +3295,7 @@ call scopy(n, u, 1, unew, 1) 120 continue call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 call sswap(n, u, 1, unew, 1) fnrmp = vnormnk(n,savf,sf) f1nrmp = fnrmp*fnrmp/two @@ -3310,6 +3322,7 @@ call scopy(n, u, 1, unew, 1) 140 u(i) = unew(i) + rl*p(i) call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 call sswap(n, u, 1, unew, 1) fnrmp = vnormnk(n,savf,sf) f1nrmp = fnrmp*fnrmp/two @@ -3339,6 +3352,7 @@ call scopy(n, u, 1, unew, 1) 160 u(i) = unew(i) + rl*p(i) call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 call sswap(n, u, 1, unew, 1) fnrmp = vnormnk(n,savf,sf) f1nrmp = fnrmp*fnrmp/two @@ -3369,6 +3383,7 @@ call scopy(n, u, 1, unew, 1) 170 u(i) = unew(i) + rllo*p(i) call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 call sswap(n, u, 1, unew, 1) fnrmp = vnormnk(n,savf,sf) f1nrmp = fnrmp*fnrmp/two @@ -3392,6 +3407,7 @@ call scopy(n, u, 1, unew, 1) c load savf and f1nrmp with current values. call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 fnrmp = vnormnk(n,savf,sf) f1nrmp = fnrmp*fnrmp/two return @@ -3848,6 +3864,7 @@ c norm() denotes the euclidean norm. c c----------------------------------------------------------------------- implicit none + Use(Cdv) integer n, iret, icflag, icnstr, locwmp, locimp, iersl, kmp integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli integer npe, nps, ncfl, nbcf, iprint, iunit, iermsg, i, ivio, ivar @@ -3919,6 +3936,7 @@ call scopy(n, u, 1, unew, 1) 20 continue call f(n, u, savf) nfe = nfe + 1 + comnfe = comnfe + 1 call sswap(n, u, 1, unew, 1) fnrmp = vnormnk(n,savf,sf) f1nrmp = fnrmp*fnrmp/two