Merge pull request #93 from LLNL/v8_omp_pandf1

Merges working pandf1 OMP paralellization into OMP branch
LLNL · Jan 14, 2025 · 99ebd69 · 99ebd69
2 parents 901d07f + 77f587f
commit 99ebd69
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 47 deletions.
diff --git a/bbb/bbb.v b/bbb/bbb.v
@@ -3237,6 +3237,7 @@ ifexmain           integer /0/  #scalar to indicate if subroutine allocate
                                 #=0 means it is not.
 exmain_aborted logical /.false./ # Set to .true. in Python version on control-C abort
 iallcall	   integer /0/  #flag to signal first call to allocate
+comnfe  integer     /0/     # Number of NK iterations for time-step
 
 ***** RZ_cell_info:
 # RZ grid-cell center and face locations

diff --git a/com/com.v b/com/com.v
@@ -705,3 +705,4 @@ tanh_multi(i:integer,a:real,j:integer,b:real,fname:string,d:real) subroutine
 ***** Flags:
 # Common flags used by UEDGE
 iprint  integer     /1/     # Flag controlling whether to be verbose or not
+
diff --git a/ppp/omp_parallel.F90 b/ppp/omp_parallel.F90
@@ -604,7 +604,8 @@ subroutine OMPPandf1Rhs(neq,time,yl,yldot)
     use Dim,only:ny
     use Selec, only:yinc,xrinc,xlinc
     Use Grid,only:ijactot
-
+    Use Cdv, only: comnfe
+
     integer yinc_bkp,xrinc_bkp,xlinc_bkp,iv,tid
     integer,intent(in)::neq
     real,intent(in)::yl(*)
@@ -674,30 +675,28 @@ subroutine OMPPandf1Rhs(neq,time,yl,yldot)
 
 
       if (CheckPandf1.gt.0) then
-          Time2=omp_get_wtime()
-         call pandf1 (-1, -1, 0.0, neq, time, ylcopy, yldotsave)
-         Time2=omp_get_wtime()-Time2
-          OMPTimeSerialPandf1=Time2+OMPTimeSerialPandf1
+        Time2=omp_get_wtime()
+        call pandf1 (-1, -1, 0.0, neq, time, ylcopy, yldotsave)
+        Time2=omp_get_wtime()-Time2
+        OMPTimeSerialPandf1=Time2+OMPTimeSerialPandf1
+        if (OMPPandf1Verbose.gt.0) then
           write(*,*) "Timing Pandf1 serial:",OMPTimeSerialPandf1,"(",Time2,")/parallel:",OMPTimeParallelPandf1,'(',Time1,')'
-          write(*,*) 'ijactot ',ijactot
-          call Compare(yldot,yldotsave,neq)
-          write(*,*) "serial and parallel pandf are identical"
         endif
+        call Compare(yldot,yldotsave,neq)
+        write(*,'(a,i4)') "  Serial and parallel pandf are identical for nfe = ", comnfe
+      endif
   else
        call pandf1 (-1,-1, 0.0, neq, time, yl, yldot)
 endif
 end subroutine OMPPandf1Rhs
 
-module Bins
-contains
-subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,Padding,iCenterBin,iLeftBin,iRightBin,inc)
-    integer,intent(in):: ieqmin, ieqmax,Padding,ichunkmin,ichunkmax
-    integer ::N,SizeBin,i,Nchunk
-    integer,intent(out),allocatable:: iCenterbin(:),inc(:),iLeftBin(:),iRightBin(:)
-!    integer,intent(out):: iCenterbin(Nchunk),inc(Nchunk),iLeftBin(Nchunk),iRightBin(Nchunk)
+subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,ichunktot,Padding,iCenterBin,iLeftBin,iRightBin,inc)
+    implicit none
+    integer,intent(in):: ieqmin, ieqmax,Padding,ichunkmin,ichunkmax, ichunktot
+    integer,intent(out):: iCenterbin(ichunktot),inc(ichunktot),iLeftBin(ichunktot),iRightBin(ichunktot)
+    integer ::N,SizeBin,Nchunk, i
     N=ieqmax-ieqmin+1
     Nchunk=ichunkmax-ichunkmin+1
-    allocate (iCenterbin(Nchunk),inc(Nchunk),iLeftBin(Nchunk),iRightBin(Nchunk))
 
     if (N>Nchunk) then
         SizeBin=int((N/Nchunk))
@@ -724,7 +723,6 @@ subroutine CreateBin(ieqmin,ieqmax,ichunkmin,ichunkmax,Padding,iCenterBin,iLeftB
     return
 
 end subroutine CreateBin
-end module Bins
 
 subroutine MakeChunksPandf1()
     Use Indexes,only: igyl
@@ -733,41 +731,43 @@ subroutine MakeChunksPandf1()
     Nivchunk,Nxchunks,Nychunks,iymaxchunk,ixmaxchunk,iyminchunk,ixminchunk
     use Lsode, only: neq
     use Dim, only:nx,ny
-    use Bins
     implicit none
     integer:: remakechunk,i,ii,ichunk,iv,ix,iy
-    integer,allocatable:: iyCenterBin(:),iyRightBin(:),iyLeftBin(:),incy(:)
-    integer,allocatable:: ixCenterBin(:),ixRightBin(:),ixLeftBin(:),incx(:)
-
-    allocate (iyCenterBin(Nychunks),iyRightBin(Nychunks),iyLeftBin(Nychunks),incy(Nychunks))
-    allocate (ixCenterBin(Nxchunks),ixRightBin(Nxchunks),ixLeftBin(Nxchunks),incx(Nxchunks))
+    integer:: iyCenterBin(Nychunks),iyRightBin(Nychunks),iyLeftBin(Nychunks),incy(Nychunks)
+    integer::ixCenterBin(Nxchunks),ixRightBin(Nxchunks),ixLeftBin(Nxchunks),incx(Nxchunks)
        remakechunk=0
        if ((Nxchunks.ne.Nxchunks_old).or.(Nychunks.ne.Nychunks_old)) then
 
-
        if (Nychunks.gt.1) then
            if (Nychunks.eq.ny) then
               iyLeftBin(1)=0
               iyRightBin(1)=1
               iyCenterBin(1)=1
               incy(1)=ypadding
-              call CreateBin(2,ny-1,2,Nychunks-1,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy)
+              call CreateBin(   2,ny-1,2,Nychunks-1, Nychunks, ypadding, &
+                                iyCenterBin, iyLeftBin, iyRightBin,&
+                                incy &
+              )
               iyLeftBin(Nychunks)=ny
               iyRightBin(Nychunks)=ny+1
               iyCenterBin(Nychunks)=ny
               incy(Nychunks)=ypadding
-              write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
-              do iy=1,Nychunks
-                write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy)
-              enddo
+              if (OMPPandf1Verbose.gt.1) then
+                write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
+                do iy=1,Nychunks
+                    write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy)
+                enddo
+              endif
            else
 
-            call CreateBin(0,ny+1,1,Nychunks,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy)
+            call CreateBin(0,ny+1,1,Nychunks,Nychunks,ypadding,iyCenterBin,iyLeftBin,iyRightBin,incy)
 
-            write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
-            do iy=1,Nychunks
+            if (OMPPandf1Verbose.gt.1) then
+                write(*,*) '----- Bins in y direction: ', Nychunks, ny+2
+                do iy=1,Nychunks
                     write(*,*) iyCenterBin(iy),iyLeftBin(iy),iyRightBin(iy),incy(iy)
-            enddo
+                enddo
+            endif
 
            ! now we check the first and last bins to check that ypadding is 3 if iyCenterBin=2
            if (iyCenterBin(1)==0) then
@@ -787,7 +787,7 @@ subroutine MakeChunksPandf1()
 
 
        if (Nxchunks.gt.1) then
-           call CreateBin(0,nx+1,1,Nxchunks,xpadding,ixCenterBin,ixLeftBin,ixRightBin,incx)
+           call CreateBin(0,nx+1,1,Nxchunks,Nxchunks,xpadding,ixCenterBin,ixLeftBin,ixRightBin,incx)
        else
            ixCenterBin(1)=-1
            ixLeftBin(1)=0
@@ -865,8 +865,7 @@ subroutine MakeChunksPandf1()
     enddo
     endif
     endif
-
-
+    return
 end subroutine MakeChunksPandf1
 
 !subroutine PrintChunks

diff --git a/ppp/parallel.F90 b/ppp/parallel.F90
@@ -32,15 +32,18 @@ subroutine InitParallel
 
     endif
 
-!    if (OMPParallelPandf1.gt.0) then
-!        if (OMPParallelJac==0) then
-!            call xerrab('Cannot run omp parallel evaluation of pandf1 without running jacobian omp evaluation.')
-!        endif
-!        call InitOMPPandf1()
-!        ParallelPandf1=1
-!    else
-!        ParallelPandf1=0
-!    endif
+    if (OMPParallelPandf1.gt.0) then
+        if (OMPParallelJac==0) then
+            write(*,*) "Parallel Pandf1  requires Parallel Jac: activating..."
+            OMPParallelJac=1
+            call InitOMPJac
+            ParallelJac=1
+        endif
+        call InitOMPPandf1()
+        ParallelPandf1=1
+    else
+        ParallelPandf1=0
+    endif
 
 
 end subroutine InitParallel

diff --git a/ppp/ppp.v b/ppp/ppp.v
@@ -5,11 +5,11 @@ ppp
 ***** ParallelSettings:
 OMPParallelPandf1 integer /0/      # [0]: serial pandf1 rhs calc [1] omp parallel pandf1 rhs calc
 OMPParallelJac    integer /0/     # [0]: serial jacobian calc [1] omp parallel jacobian calc
-ParallelWarning   integer /1/     # Warning for users who wish to use it
+ParallelWarning   integer /0/     # Warning for users who wish to use it
 CheckJac          integer /0/      # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of Jacobian.
                                   # If differences between para and serial Jacobians, dump both Jacs in serialjac.dat and paralleljac.dat with routine jac_write in current working folder. See UEDGEToolBox docs for analysis tools.
 Nthreads          integer /64/ # Number of threads to be used to calculate the Jacobian
-CheckPandf1       integer /1/      # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of pandf1.
+CheckPandf1       integer /0/      # [0/1]: Turn on on-the-fly comparison of parallel vs serial evaluation of pandf1.
 
 ***** ParallelDebug:
 OMPJacDebug       integer /0/ #Print debug info for omp constructs

diff --git a/svr/nksol.m b/svr/nksol.m
@@ -1006,6 +1006,7 @@ call errgen(ierr,zero,zero,iret,ivar)
 c-----------------------------------------------------------------------
       call f(n, u, savf)
       nfe = nfe + 1
+      comnfe = comnfe + 1
       fnrm = vnormnk(n,savf,sf)
       f1nrm = fnrm*fnrm/two
       if (iprint .ge. 1) write(iunit,400) iter,fnrm,nfe
@@ -1203,6 +1204,7 @@ c                    norm(f) asymptotes from above to a finite value
 c                    
 c-----------------------------------------------------------------------
       implicit none
+       Use(Cdv)
       integer n, iret, iter, itermx, ncscmx, iterm, locwmp, locimp
       integer iersl, kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje
       integer nni, nli, npe, nps, ncfl, nbcf, ipcur, nnipset
@@ -1465,6 +1467,7 @@ subroutine model(n, wm, lenwm, iwm, leniwm, u, savf, x, f, jac,
 c
 c-----------------------------------------------------------------------
       implicit none
+       Use(Cdv)
       integer n, lenwm, leniwm, locwmp, locimp, iersl, kmp, mmax
       integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe
       integer nps, ncfl, nbcf, ipcur, nnipset, incpset, ier
@@ -1535,6 +1538,7 @@ call solpk (n,wm,lenwm,iwm,leniwm,u,savf,x,su,sf,f,jac,psol)
       subroutine solpk (n, wm, lenwm, iwm, leniwm, u, savf, x, su, sf,
      *                  f, jac, psol)
       implicit none
+       Use(Cdv)
       integer lenwm, leniwm, locwmp, locimp, iersl, kmp, mmax
       integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe
       integer nps, ncfl, nbcf, iwk, npsl, mmaxp1, ihsv, iq, mgmr
@@ -1910,6 +1914,7 @@ call psol (n, u, savf, su, sf, f, jac, wk, wmp, iwmp, x, ier)
       subroutine atv (n, u, savf, v, su, sf, ftem, f, jac, psol, z,
      *                vtem, wmp, iwmp, ier, npsl)
       implicit none
+       Use(Cdv)
       integer iwmp, ier, npsl, locwmp, locimp, iersl, kmp, mmax
       integer methn, methk, ipflg, mfdif, nfe, nje, nni, nli, npe
       integer nps, ncfl, nbcf, i
@@ -2043,6 +2048,7 @@ call psol (n, u, savf, su, sf, f, jac, ftem, wmp, iwmp,
         endif
       call f(n, u, ftem)
       nfe = nfe + 1
+      comnfe = comnfe + 1
 ccc      do 281 i = 1, n                  # Begin 2nd order Jac
 ccc         u(i) = z(i) - sigma*vtem(i)   # change sign of perturbation
 ccc 281  continue
@@ -2626,6 +2632,7 @@ c            norm() denotes the euclidean norm.
 c
 c-----------------------------------------------------------------------
       implicit none
+       Use(Cdv)
       integer n, lenwm, iwm, leniwm, iret, locwmp, locimp, iersl
       integer kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje, nni
       integer nli, npe, nps, ncfl, nbcf, iprint, iunit, iermsg, np1
@@ -2723,6 +2730,7 @@ subroutine dogstp (m, mp1, mmaxp1, ygm, ycp, beta, hes, tau, ynew,
      *                  xnewl, wk, wmp, iwmp, u, su, sf, savf, f, jac,
      *                  psol)
       implicit none
+       Use(Cdv)
       integer m, mp1, mmaxp1, n, iwmp, locwmp, locimp, iersl, kmp
       integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli
       integer npe, nps, ncfl, nbcf, i, ier
@@ -2905,6 +2913,7 @@ subroutine trgupd (m, mp1, mmaxp1, n, np1, u, savf, f1nrm, x, xl,
      *                   stptol, mxtkn, tau, uprev, fprev, f1prv, upls, 
      *                   f1pls, wk, ivio, iret, f)
       implicit none
+       Use(Cdv)
       integer m, mp1, mmaxp1, n, np1, ivio, iret, locwmp, locimp
       integer iersl, kmp, mmax, methn, methk, ipflg, mfdif, nfe, nje
       integer nni, nli, npe, nps, ncfl, nbcf, i
@@ -3053,6 +3062,7 @@ call scopy (n, u, 1, upls, 1)
  20     continue
       call f (n, u, savf)
       nfe = nfe + 1
+      comnfe = comnfe + 1
       call sswap(n, u, 1, upls, 1)
       fnrmp = vnormnk(n, savf, sf)
       f1pls = pt5*fnrmp*fnrmp
@@ -3200,6 +3210,7 @@ c            norm() denotes the euclidean norm.
 c
 c-----------------------------------------------------------------------
       implicit none
+       Use(Cdv)
       integer n, iret, icflag, icnstr, locwmp, locimp, iersl, kmp
       integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli
       integer npe, nps, ncfl, nbcf, iprint, iunit, iermsg, i, ivio
@@ -3284,6 +3295,7 @@ call scopy(n, u, 1, unew, 1)
  120    continue
       call f(n, u, savf)
       nfe = nfe + 1
+      comnfe = comnfe + 1
       call sswap(n, u, 1, unew, 1)
       fnrmp = vnormnk(n,savf,sf)
       f1nrmp = fnrmp*fnrmp/two
@@ -3310,6 +3322,7 @@ call scopy(n, u, 1, unew, 1)
  140          u(i) = unew(i) + rl*p(i)
             call f(n, u, savf)
             nfe = nfe + 1
+            comnfe = comnfe + 1
             call sswap(n, u, 1, unew, 1)
             fnrmp = vnormnk(n,savf,sf)
             f1nrmp = fnrmp*fnrmp/two
@@ -3339,6 +3352,7 @@ call scopy(n, u, 1, unew, 1)
  160          u(i) = unew(i) + rl*p(i)
             call f(n, u, savf)
             nfe = nfe + 1
+            comnfe = comnfe + 1
             call sswap(n, u, 1, unew, 1)
             fnrmp = vnormnk(n,savf,sf)
             f1nrmp = fnrmp*fnrmp/two
@@ -3369,6 +3383,7 @@ call scopy(n, u, 1, unew, 1)
  170            u(i) = unew(i) + rllo*p(i)
               call f(n, u, savf)
               nfe = nfe + 1
+              comnfe = comnfe + 1
               call sswap(n, u, 1, unew, 1)
               fnrmp = vnormnk(n,savf,sf)
               f1nrmp = fnrmp*fnrmp/two
@@ -3392,6 +3407,7 @@ call scopy(n, u, 1, unew, 1)
 c load savf and f1nrmp with current values.
         call f(n, u, savf)
         nfe = nfe + 1
+        comnfe = comnfe + 1
         fnrmp = vnormnk(n,savf,sf)
         f1nrmp = fnrmp*fnrmp/two
         return
@@ -3848,6 +3864,7 @@ c            norm() denotes the euclidean norm.
 c
 c-----------------------------------------------------------------------
       implicit none
+       Use(Cdv)
       integer n, iret, icflag, icnstr, locwmp, locimp, iersl, kmp
       integer mmax, methn, methk, ipflg, mfdif, nfe, nje, nni, nli
       integer npe, nps, ncfl, nbcf, iprint, iunit, iermsg, i, ivio, ivar
@@ -3919,6 +3936,7 @@ call scopy(n, u, 1, unew, 1)
  20     continue
       call f(n, u, savf)
       nfe = nfe + 1
+      comnfe = comnfe + 1
       call sswap(n, u, 1, unew, 1)
       fnrmp = vnormnk(n,savf,sf)
       f1nrmp = fnrmp*fnrmp/two
Original file line number	Diff line number	Diff line change
Expand Up		@@ -705,3 +705,4 @@ tanh_multi(i:integer,a:real,j:integer,b:real,fname:string,d:real) subroutine
		***** Flags:
		# Common flags used by UEDGE
		iprint integer /1/ # Flag controlling whether to be verbose or not