Skip to content

Commit

Permalink
Merge pull request #93 from LLNL/v8_omp_pandf1
Browse files Browse the repository at this point in the history
Merges working pandf1 OMP paralellization into OMP branch
  • Loading branch information
holm10 authored Jan 14, 2025
2 parents 901d07f + 77f587f commit 99ebd69
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 47 deletions.
1 change: 1 addition & 0 deletions bbb/bbb.v
Original file line number Diff line number Diff line change
Expand Up @@ -3237,6 +3237,7 @@ ifexmain integer /0/ #scalar to indicate if subroutine allocate
#=0 means it is not.
exmain_aborted logical /.false./ # Set to .true. in Python version on control-C abort
iallcall integer /0/ #flag to signal first call to allocate
comnfe integer /0/ # Number of NK iterations for time-step

***** RZ_cell_info:
# RZ grid-cell center and face locations
Expand Down
1 change: 1 addition & 0 deletions com/com.v
Original file line number Diff line number Diff line change
Expand Up @@ -705,3 +705,4 @@ tanh_multi(i:integer,a:real,j:integer,b:real,fname:string,d:real) subroutine
***** Flags:
# Common flags used by UEDGE
iprint integer /1/ # Flag controlling whether to be verbose or not
71 changes: 35 additions & 36 deletions ppp/omp_parallel.F90
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,8 @@ subroutine OMPPandf1Rhs(neq,time,yl,yldot)
use Dim,only:ny
use Selec, only:yinc,xrinc,xlinc
Use Grid,only:ijactot

Use Cdv, only: comnfe

integer yinc_bkp,xrinc_bkp,xlinc_bkp,iv,tid
integer,intent(in)::neq
real,intent(in)::yl(*)
Expand Down Expand Up @@ -674,30 +675,28 @@ subroutine OMPPandf1Rhs(neq,time,yl,yldot)


if (CheckPandf1.gt.0) then
Time2=omp_get_wtime()
call pandf1 (-1, -1, 0.0, neq, time, ylcopy, yldotsave)
Time2=omp_get_wtime()-Time2
OMPTimeSerialPandf1=Time2+OMPTimeSerialPandf1
Time2=omp_get_wtime()
call pandf1 (-1, -1, 0.0, neq, time, ylcopy, yldotsave)
Time2=omp_get_wtime()-Time2
OMPTimeSerialPandf1=Time2+OMPTimeSerialPandf1
if (OMPPandf1Verbose.gt.0) then
write(*,*) "Timing Pandf1 serial:",OMPTimeSerialPandf1,"(",Time2,")/parallel:",OMPTimeParallelPandf1,'(',Time1,')'
write(*,*) 'ijactot ',ijactot
call Compare(yldot,yldotsave,neq)
write(*,*) "serial and parallel pandf are identical"
endif
call Compare(yldot,yldotsave,neq)
write(*,'(a,i4)') " Serial and parallel pandf are identical for nfe = ", comnfe
endif
else
call pandf1 (-1,-1, 0.0, neq, time, yl, yldot)
endif
end subroutine OMPPandf1Rhs

module Bins
contains
subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,Padding,iCenterBin,iLeftBin,iRightBin,inc)
integer,intent(in):: ieqmin, ieqmax,Padding,ichunkmin,ichunkmax
integer ::N,SizeBin,i,Nchunk
integer,intent(out),allocatable:: iCenterbin(:),inc(:),iLeftBin(:),iRightBin(:)
! integer,intent(out):: iCenterbin(Nchunk),inc(Nchunk),iLeftBin(Nchunk),iRightBin(Nchunk)
subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,ichunktot,Padding,iCenterBin,iLeftBin,iRightBin,inc)
implicit none
integer,intent(in):: ieqmin, ieqmax,Padding,ichunkmin,ichunkmax, ichunktot
integer,intent(out):: iCenterbin(ichunktot),inc(ichunktot),iLeftBin(ichunktot),iRightBin(ichunktot)
integer ::N,SizeBin,Nchunk, i
N=ieqmax-ieqmin+1
Nchunk=ichunkmax-ichunkmin+1
allocate (iCenterbin(Nchunk),inc(Nchunk),iLeftBin(Nchunk),iRightBin(Nchunk))

if (N>Nchunk) then
SizeBin=int((N/Nchunk))
Expand All @@ -724,7 +723,6 @@ subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,Padding,iCenterBin,iLeftB
return

end subroutine CreateBin
end module Bins

subroutine MakeChunksPandf1()
Use Indexes,only: igyl
Expand All @@ -733,41 +731,43 @@ subroutine MakeChunksPandf1()
Nivchunk,Nxchunks,Nychunks,iymaxchunk,ixmaxchunk,iyminchunk,ixminchunk
use Lsode, only: neq
use Dim, only:nx,ny
use Bins
implicit none
integer:: remakechunk,i,ii,ichunk,iv,ix,iy
integer,allocatable:: iyCenterBin(:),iyRightBin(:),iyLeftBin(:),incy(:)
integer,allocatable:: ixCenterBin(:),ixRightBin(:),ixLeftBin(:),incx(:)

allocate (iyCenterBin(Nychunks),iyRightBin(Nychunks),iyLeftBin(Nychunks),incy(Nychunks))
allocate (ixCenterBin(Nxchunks),ixRightBin(Nxchunks),ixLeftBin(Nxchunks),incx(Nxchunks))
integer:: iyCenterBin(Nychunks),iyRightBin(Nychunks),iyLeftBin(Nychunks),incy(Nychunks)
integer::ixCenterBin(Nxchunks),ixRightBin(Nxchunks),ixLeftBin(Nxchunks),incx(Nxchunks)
remakechunk=0
if ((Nxchunks.ne.Nxchunks_old).or.(Nychunks.ne.Nychunks_old)) then


if (Nychunks.gt.1) then
if (Nychunks.eq.ny) then
iyLeftBin(1)=0
iyRightBin(1)=1
iyCenterBin(1)=1
incy(1)=ypadding
call CreateBin(2,ny-1,2,Nychunks-1,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy)
call CreateBin( 2,ny-1,2,Nychunks-1, Nychunks, ypadding, &
iyCenterBin, iyLeftBin, iyRightBin,&
incy &
)
iyLeftBin(Nychunks)=ny
iyRightBin(Nychunks)=ny+1
iyCenterBin(Nychunks)=ny
incy(Nychunks)=ypadding
write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
do iy=1,Nychunks
write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy)
enddo
if (OMPPandf1Verbose.gt.1) then
write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
do iy=1,Nychunks
write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy)
enddo
endif
else

call CreateBin(0,ny+1,1,Nychunks,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy)
call CreateBin(0,ny+1,1,Nychunks,Nychunks,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy)

write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
do iy=1,Nychunks
if (OMPPandf1Verbose.gt.1) then
write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
do iy=1,Nychunks
write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy)
enddo
enddo
endif

! now we check the first and last bins to check that ypadding is 3 if iyCenterBin=2
if (iyCenterBin(1)==0) then
Expand All @@ -787,7 +787,7 @@ subroutine MakeChunksPandf1()


if (Nxchunks.gt.1) then
call CreateBin(0,nx+1,1,Nxchunks,xpadding,ixCenterBin,ixLeftBin,ixRightBin,incx)
call CreateBin(0,nx+1,1,Nxchunks,Nxchunks,xpadding,ixCenterBin,ixLeftBin,ixRightBin,incx)
else
ixCenterBin(1)=-1
ixLeftBin(1)=0
Expand Down Expand Up @@ -865,8 +865,7 @@ subroutine MakeChunksPandf1()
enddo
endif
endif


return
end subroutine MakeChunksPandf1

!subroutine PrintChunks
Expand Down
21 changes: 12 additions & 9 deletions ppp/parallel.F90
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,18 @@ subroutine InitParallel

endif

! if (OMPParallelPandf1.gt.0) then
! if (OMPParallelJac==0) then
! call xerrab('Cannot run omp parallel evaluation of pandf1 without running jacobian omp evaluation.')
! endif
! call InitOMPPandf1()
! ParallelPandf1=1
! else
! ParallelPandf1=0
! endif
if (OMPParallelPandf1.gt.0) then
if (OMPParallelJac==0) then
write(*,*) "Parallel Pandf1 requires Parallel Jac: activating..."
OMPParallelJac=1
call InitOMPJac
ParallelJac=1
endif
call InitOMPPandf1()
ParallelPandf1=1
else
ParallelPandf1=0
endif


end subroutine InitParallel
Expand Down
4 changes: 2 additions & 2 deletions ppp/ppp.v
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ ppp
***** ParallelSettings:
OMPParallelPandf1 integer /0/ # [0]: serial pandf1 rhs calc [1] omp parallel pandf1 rhs calc
OMPParallelJac integer /0/ # [0]: serial jacobian calc [1] omp parallel jacobian calc
ParallelWarning integer /1/ # Warning for users who wish to use it
ParallelWarning integer /0/ # Warning for users who wish to use it
CheckJac integer /0/ # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of Jacobian.
# If differences between para and serial Jacobians, dump both Jacs in serialjac.dat and paralleljac.dat with routine jac_write in current working folder. See UEDGEToolBox docs for analysis tools.
Nthreads integer /64/ # Number of threads to be used to calculate the Jacobian
CheckPandf1 integer /1/ # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of pandf1.
CheckPandf1 integer /0/ # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of pandf1.

***** ParallelDebug:
OMPJacDebug integer /0/ #Print debug info for omp constructs
Expand Down
18 changes: 18 additions & 0 deletions svr/nksol.m
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,7 @@ call errgen(ierr,zero,zero,iret,ivar)
c-----------------------------------------------------------------------
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
fnrm = vnormnk(n,savf,sf)
f1nrm = fnrm*fnrm/two
if (iprint .ge. 1) write(iunit,400) iter,fnrm,nfe
Expand Down Expand Up @@ -1203,6 +1204,7 @@ c norm(f) asymptotes from above to a finite value
c
c-----------------------------------------------------------------------
implicit none
Use(Cdv)
integer n, iret, iter, itermx, ncscmx, iterm, locwmp, locimp
integer iersl, kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje
integer nni, nli, npe, nps, ncfl, nbcf, ipcur, nnipset
Expand Down Expand Up @@ -1465,6 +1467,7 @@ subroutine model(n, wm, lenwm, iwm, leniwm, u, savf, x, f, jac,
c
c-----------------------------------------------------------------------
implicit none
Use(Cdv)
integer n, lenwm, leniwm, locwmp, locimp, iersl, kmp, mmax
integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe
integer nps, ncfl, nbcf, ipcur, nnipset, incpset, ier
Expand Down Expand Up @@ -1535,6 +1538,7 @@ call solpk (n,wm,lenwm,iwm,leniwm,u,savf,x,su,sf,f,jac,psol)
subroutine solpk (n, wm, lenwm, iwm, leniwm, u, savf, x, su, sf,
* f, jac, psol)
implicit none
Use(Cdv)
integer lenwm, leniwm, locwmp, locimp, iersl, kmp, mmax
integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe
integer nps, ncfl, nbcf, iwk, npsl, mmaxp1, ihsv, iq, mgmr
Expand Down Expand Up @@ -1910,6 +1914,7 @@ call psol (n, u, savf, su, sf, f, jac, wk, wmp, iwmp, x, ier)
subroutine atv (n, u, savf, v, su, sf, ftem, f, jac, psol, z,
* vtem, wmp, iwmp, ier, npsl)
implicit none
Use(Cdv)
integer iwmp, ier, npsl, locwmp, locimp, iersl, kmp, mmax
integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe
integer nps, ncfl, nbcf, i
Expand Down Expand Up @@ -2043,6 +2048,7 @@ call psol (n, u, savf, su, sf, f, jac, ftem, wmp, iwmp,
endif
call f(n, u, ftem)
nfe = nfe + 1
comnfe = comnfe + 1
ccc do 281 i = 1, n # Begin 2nd order Jac
ccc u(i) = z(i) - sigma*vtem(i) # change sign of perturbation
ccc 281 continue
Expand Down Expand Up @@ -2626,6 +2632,7 @@ c norm() denotes the euclidean norm.
c
c-----------------------------------------------------------------------
implicit none
Use(Cdv)
integer n, lenwm, iwm, leniwm, iret, locwmp, locimp, iersl
integer kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje, nni
integer nli, npe, nps, ncfl, nbcf, iprint, iunit, iermsg, np1
Expand Down Expand Up @@ -2723,6 +2730,7 @@ subroutine dogstp (m, mp1, mmaxp1, ygm, ycp, beta, hes, tau, ynew,
* xnewl, wk, wmp, iwmp, u, su, sf, savf, f, jac,
* psol)
implicit none
Use(Cdv)
integer m, mp1, mmaxp1, n, iwmp, locwmp, locimp, iersl, kmp
integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli
integer npe, nps, ncfl, nbcf, i, ier
Expand Down Expand Up @@ -2905,6 +2913,7 @@ subroutine trgupd (m, mp1, mmaxp1, n, np1, u, savf, f1nrm, x, xl,
* stptol, mxtkn, tau, uprev, fprev, f1prv, upls,
* f1pls, wk, ivio, iret, f)
implicit none
Use(Cdv)
integer m, mp1, mmaxp1, n, np1, ivio, iret, locwmp, locimp
integer iersl, kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje
integer nni, nli, npe, nps, ncfl, nbcf, i
Expand Down Expand Up @@ -3053,6 +3062,7 @@ call scopy (n, u, 1, upls, 1)
20 continue
call f (n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
call sswap(n, u, 1, upls, 1)
fnrmp = vnormnk(n, savf, sf)
f1pls = pt5*fnrmp*fnrmp
Expand Down Expand Up @@ -3200,6 +3210,7 @@ c norm() denotes the euclidean norm.
c
c-----------------------------------------------------------------------
implicit none
Use(Cdv)
integer n, iret, icflag, icnstr, locwmp, locimp, iersl, kmp
integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli
integer npe, nps, ncfl, nbcf, iprint, iunit, iermsg, i, ivio
Expand Down Expand Up @@ -3284,6 +3295,7 @@ call scopy(n, u, 1, unew, 1)
120 continue
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
call sswap(n, u, 1, unew, 1)
fnrmp = vnormnk(n,savf,sf)
f1nrmp = fnrmp*fnrmp/two
Expand All @@ -3310,6 +3322,7 @@ call scopy(n, u, 1, unew, 1)
140 u(i) = unew(i) + rl*p(i)
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
call sswap(n, u, 1, unew, 1)
fnrmp = vnormnk(n,savf,sf)
f1nrmp = fnrmp*fnrmp/two
Expand Down Expand Up @@ -3339,6 +3352,7 @@ call scopy(n, u, 1, unew, 1)
160 u(i) = unew(i) + rl*p(i)
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
call sswap(n, u, 1, unew, 1)
fnrmp = vnormnk(n,savf,sf)
f1nrmp = fnrmp*fnrmp/two
Expand Down Expand Up @@ -3369,6 +3383,7 @@ call scopy(n, u, 1, unew, 1)
170 u(i) = unew(i) + rllo*p(i)
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
call sswap(n, u, 1, unew, 1)
fnrmp = vnormnk(n,savf,sf)
f1nrmp = fnrmp*fnrmp/two
Expand All @@ -3392,6 +3407,7 @@ call scopy(n, u, 1, unew, 1)
c load savf and f1nrmp with current values.
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
fnrmp = vnormnk(n,savf,sf)
f1nrmp = fnrmp*fnrmp/two
return
Expand Down Expand Up @@ -3848,6 +3864,7 @@ c norm() denotes the euclidean norm.
c
c-----------------------------------------------------------------------
implicit none
Use(Cdv)
integer n, iret, icflag, icnstr, locwmp, locimp, iersl, kmp
integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli
integer npe, nps, ncfl, nbcf, iprint, iunit, iermsg, i, ivio, ivar
Expand Down Expand Up @@ -3919,6 +3936,7 @@ call scopy(n, u, 1, unew, 1)
20 continue
call f(n, u, savf)
nfe = nfe + 1
comnfe = comnfe + 1
call sswap(n, u, 1, unew, 1)
fnrmp = vnormnk(n,savf,sf)
f1nrmp = fnrmp*fnrmp/two
Expand Down

0 comments on commit 99ebd69

Please sign in to comment.