Skip to content

Commit

Permalink
Merge branch 'master' into qlgyro4tgyro
Browse files Browse the repository at this point in the history
  • Loading branch information
bpatel2107 committed Nov 27, 2023
2 parents 01ed481 + 279c6e3 commit fe135f8
Show file tree
Hide file tree
Showing 105 changed files with 40,803 additions and 189,123 deletions.
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.AZURE_GPU
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.CLOUD_GPU
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.CRUSHER
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.CSD3_AMPERE_GPU
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.DAINT_PGI
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.FRONTIER
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.GASUMMIT_GPU
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.MARCONI_LEONARDO
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.MINT_PGI
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.NEURON
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.PERLMUTTER_GPU
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.PERLMUTTER_GPU_80G
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.SUMMIT
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
2 changes: 0 additions & 2 deletions cgyro/install/make.ext.TITAN_PGI
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cgyro_nl_fftw.o : cgyro_nl_fftw.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_nl_fftw.o -c cgyro_nl_fftw.gpu.F90
cgyro_rhs.o : cgyro_rhs.gpu.F90
$(FC) $(FMATH) $(FFLAGS) -o cgyro_rhs.o -c cgyro_rhs.gpu.F90
26 changes: 14 additions & 12 deletions cgyro/src/cgyro_check_memory.F90
Original file line number Diff line number Diff line change
Expand Up @@ -49,26 +49,26 @@ subroutine cgyro_check_memory(datafile)
write(io,*) 'Nonlinear'
write(io,*)
! nsplit * n_toroidal = nv_loc * n_theta
#ifndef _OPENACC
#if !(defined(OMPGPU) || defined(_OPENACC))
call cgyro_alloc_add_3d(io,(ny/2+1),nx,n_omp,16,'fx')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,n_omp,16,'gx')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,n_omp,16,'fy')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,n_omp,16,'gy')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'ux')
call cgyro_alloc_add_3d(io,ny,nx,n_omp,8,'vx')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'uy')
call cgyro_alloc_add_3d(io,ny,nx,n_omp,8,'vy')
call cgyro_alloc_add_3d(io,ny,nx,nsplitA,8,'ux')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'vx')
call cgyro_alloc_add_3d(io,ny,nx,nsplitA,8,'uy')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'vy')
call cgyro_alloc_add_3d(io,ny,nx,n_omp,8,'uv')
#else
call cgyro_alloc_add_3d(io,(ny/2+1),nx,nsplit,16,'fx')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,nsplitA,16,'fx')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,nsplit,16,'gx')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,nsplit,16,'fy')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,nsplitA,16,'fy')
call cgyro_alloc_add_3d(io,(ny/2+1),nx,nsplit,16,'gy')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'ux')
call cgyro_alloc_add_3d(io,ny,nx,nsplitA,8,'ux')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'vx')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'uy')
call cgyro_alloc_add_3d(io,ny,nx,nsplitA,8,'uy')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'vy')
call cgyro_alloc_add_3d(io,ny,nx,nsplit,8,'uv')
call cgyro_alloc_add_3d(io,ny,nx,nsplitA,8,'uv')
#endif
endif

Expand Down Expand Up @@ -124,9 +124,11 @@ subroutine cgyro_check_memory(datafile)
write(io,*) 'Nonlinear bracket'
write(io,*)
! nsplit * n_toroidal = nv_loc * n_theta
call cgyro_alloc_add_4d(io,n_radial,nt_loc,nsplit,n_toroidal_procs,16,'f_nl')
call cgyro_alloc_add_4d(io,n_radial,nt_loc,nsplitA,n_toroidal_procs,16,'fA_nl')
call cgyro_alloc_add_4d(io,n_radial,nt_loc,nsplitB,n_toroidal_procs,16,'fB_nl')
call cgyro_alloc_add_4d(io,n_field,n_radial,n_jtheta,n_toroidal,16,'g_nl')
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplit*n_toroidal_procs,16,'fpack')
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplitA*n_toroidal_procs,16,'fpackA')
call cgyro_alloc_add_3d(io,n_radial,nt_loc,nsplitB*n_toroidal_procs,16,'fpackB')
call cgyro_alloc_add_4d(io,n_field,n_radial,n_jtheta,n_toroidal,16,'gpack')
endif

Expand Down
24 changes: 16 additions & 8 deletions cgyro/src/cgyro_cleanup.F90
Original file line number Diff line number Diff line change
Expand Up @@ -246,17 +246,25 @@ subroutine cgyro_cleanup
ccl_del_device(upwind32_res)
deallocate(upwind32_res)
endif
if(allocated(f_nl)) then
ccl_del_device(f_nl)
deallocate(f_nl)
if(allocated(fA_nl)) then
ccl_del_device(fA_nl)
deallocate(fA_nl)
endif
if(allocated(fB_nl)) then
ccl_del_device(fB_nl)
deallocate(fB_nl)
endif
if(allocated(g_nl)) then
ccl_del_device(g_nl)
deallocate(g_nl)
endif
if(allocated(fpack)) then
ccl_del_device(fpack)
deallocate(fpack)
if(allocated(fpackA)) then
ccl_del_device(fpackA)
deallocate(fpackA)
endif
if(allocated(fpackB)) then
ccl_del_device(fpackB)
deallocate(fpackB)
endif
if(allocated(gpack)) then
ccl_del_device(gpack)
Expand Down Expand Up @@ -284,10 +292,10 @@ subroutine cgyro_cleanup
if(allocated(gx)) deallocate(gx)
if(allocated(fy)) deallocate(fy)
if(allocated(gy)) deallocate(gy)
if(allocated(vxmany)) deallocate(vxmany)
if(allocated(vymany)) deallocate(vymany)
if(allocated(uxmany)) deallocate(uxmany)
if(allocated(uymany)) deallocate(uymany)
if(allocated(vx)) deallocate(vx)
if(allocated(vy)) deallocate(vy)
if(allocated(uv)) deallocate(uv)
#endif

Expand Down
34 changes: 23 additions & 11 deletions cgyro/src/cgyro_field.F90
Original file line number Diff line number Diff line change
Expand Up @@ -229,14 +229,16 @@ end subroutine cgyro_field_v_notae_gpu
!-----------------------------------------------------------------
! Configuration (velocity-distributed) field solve
!-----------------------------------------------------------------
subroutine cgyro_field_c_cpu
subroutine cgyro_field_c_cpu(update_cap)

use parallel_lib
use timer_lib
use cgyro_globals

implicit none

logical, intent(in) :: update_cap

integer :: is,itor
complex :: my_psi

Expand Down Expand Up @@ -294,17 +296,19 @@ subroutine cgyro_field_c_cpu
endif
enddo

if (update_cap) then
!$omp parallel do collapse(2) private(iv_loc,is,ic,my_psi)
do itor=nt1,nt2
do iv=nv1,nv2
do itor=nt1,nt2
do iv=nv1,nv2
iv_loc = iv-nv1+1
is = is_v(iv)
do ic=1,nc
my_psi = sum( jvec_c(:,ic,iv_loc,itor)*field(:,ic,itor))
cap_h_c(ic,iv_loc,itor) = h_x(ic,iv_loc,itor)+my_psi*z(is)/temp(is)
enddo
enddo
enddo
enddo
endif

call timer_lib_out('field')

Expand Down Expand Up @@ -378,11 +382,14 @@ subroutine cgyro_field_c_ae_cpu
end subroutine cgyro_field_c_ae_cpu

#if defined(OMPGPU) || defined(_OPENACC)
subroutine cgyro_field_c_gpu
subroutine cgyro_field_c_gpu(update_cap)
use parallel_lib
use timer_lib
use cgyro_globals
implicit none

logical, intent(in) :: update_cap

integer :: is,i_f,itor
integer :: itor1,itor2
complex :: tmp,field_loc_l
Expand Down Expand Up @@ -502,23 +509,25 @@ subroutine cgyro_field_c_gpu
endif
endif

if (update_cap) then
#if defined(OMPGPU)
!$omp target teams distribute parallel do simd collapse(3) &
!$omp& private(iv_loc,is,my_psi)
#elif defined(_OPENACC)
!$acc parallel loop collapse(3) gang vector private(iv_loc,is,my_psi) &
!$acc& present(jvec_c,z,temp,is_v) present(nt1,nt2,nv1,nv2,nc) default(none)
#endif
do itor=nt1,nt2
do iv=nv1,nv2
do itor=nt1,nt2
do iv=nv1,nv2
do ic=1,nc
iv_loc = iv-nv1+1
is = is_v(iv)
my_psi = sum( jvec_c(:,ic,iv_loc,itor)*field(:,ic,itor))
cap_h_c(ic,iv_loc,itor) = h_x(ic,iv_loc,itor)+my_psi*z(is)/temp(is)
enddo
enddo
enddo
enddo
endif

#if (!defined(OMPGPU)) && defined(_OPENACC)
!$acc end data
Expand Down Expand Up @@ -634,12 +643,15 @@ end subroutine cgyro_field_c_ae_gpu
#endif


subroutine cgyro_field_c
subroutine cgyro_field_c(update_cap)
implicit none

logical, intent(in) :: update_cap

#if defined(OMPGPU) || defined(_OPENACC)
call cgyro_field_c_gpu
call cgyro_field_c_gpu(update_cap)
#else
call cgyro_field_c_cpu
call cgyro_field_c_cpu(update_cap)
#endif
end subroutine cgyro_field_c

Expand Down
21 changes: 11 additions & 10 deletions cgyro/src/cgyro_globals.F90
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,11 @@ module cgyro_globals
integer :: NEW_COMM_2
integer :: NEW_COMM_3
integer :: nv1,nv2,nc1,nc2
integer :: nsplit
integer :: nsplit,nsplitA,nsplitB
integer :: ns1,ns2
integer, dimension(:), allocatable :: recv_status
integer :: f_req, g_req
integer :: fA_req, fB_req, g_req
logical :: fA_req_valid, fB_req_valid, g_req_valid
! Thetas present in the process after NL AllToAll
integer :: n_jtheta
!
Expand Down Expand Up @@ -350,9 +351,9 @@ module cgyro_globals
complex, dimension(:,:,:), allocatable :: g_x
complex, dimension(:,:,:), allocatable :: h0_x
complex, dimension(:,:,:), allocatable :: h0_old
complex, dimension(:,:,:,:), allocatable :: f_nl
complex, dimension(:,:,:,:), allocatable :: fA_nl,fB_nl
complex, dimension(:,:,:,:), allocatable :: g_nl
complex, dimension(:,:,:), allocatable :: fpack
complex, dimension(:,:,:), allocatable :: fpackA,fpackB
complex, dimension(:,:,:,:), allocatable :: gpack
complex, dimension(:,:,:), allocatable :: omega_cap_h
complex, dimension(:,:,:), allocatable :: omega_h
Expand Down Expand Up @@ -403,11 +404,11 @@ module cgyro_globals
! GPU-FFTW plans

#ifdef HIPGPU
type(C_PTR) :: hip_plan_r2c_many
type(C_PTR) :: hip_plan_c2r_many
type(C_PTR) :: hip_plan_r2c_manyA,hip_plan_r2c_manyB
type(C_PTR) :: hip_plan_c2r_manyA,hip_plan_c2r_manyB,hip_plan_c2r_manyG
#else
integer(c_int) :: cu_plan_r2c_many
integer(c_int) :: cu_plan_c2r_many
integer(c_int) :: cu_plan_r2c_manyA,cu_plan_r2c_manyB
integer(c_int) :: cu_plan_c2r_manyA,cu_plan_c2r_manyB,cu_plan_c2r_manyG
#endif

complex, dimension(:,:,:),allocatable, target :: fxmany,fymany,gxmany,gymany
Expand All @@ -422,10 +423,10 @@ module cgyro_globals
!
! 2D FFT work arrays
#ifndef CGYRO_GPU_FFT
real, dimension(:,:,:), allocatable :: vxmany
real, dimension(:,:,:), allocatable :: vymany
real, dimension(:,:,:), allocatable :: uxmany
real, dimension(:,:,:), allocatable :: uymany
real, dimension(:,:,:), allocatable :: vx
real, dimension(:,:,:), allocatable :: vy
real, dimension(:,:,:), allocatable :: uv
complex, dimension(:,:,:),allocatable :: fx
complex, dimension(:,:,:),allocatable :: fy
Expand Down
32 changes: 29 additions & 3 deletions cgyro/src/cgyro_globals_math.f90
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,33 @@ subroutine cgyro_vel_fmaN(nr, left, r1, cN, rN, abssum)
complex, intent(in), dimension(nc,nv_loc,nt_loc,nr) :: rN
real, intent(inout), optional :: abssum
!-------------------------------------------------------
call cgyro_cmpl_fmaN(nc*nv_loc*nt_loc, nr, left,r1,cN,rN,abssum)
integer :: real_nr

! allow for tail-end zeros
real_nr = nr
do while ((real_nr>1) .AND. (cN(real_nr) == 0.d0 ))
real_nr = real_nr -1
enddo

select case (real_nr)
case(1)
call cgyro_vel_fma2(left,r1,cN(1),rN(:,:,:,1),abssum)
case(2)
call cgyro_vel_fma3(left,r1,cN(1),rN(:,:,:,1),cN(2),rN(:,:,:,2),abssum)
case(3)
call cgyro_vel_fma4(left,r1,cN(1),rN(:,:,:,1),cN(2),rN(:,:,:,2), &
cN(3),rN(:,:,:,3),abssum)
case(4)
call cgyro_vel_fma5(left,r1,cN(1),rN(:,:,:,1),cN(2),rN(:,:,:,2), &
cN(3),rN(:,:,:,3),cN(4),rN(:,:,:,4),abssum)
case(5)
call cgyro_vel_fma6(left,r1,cN(1),rN(:,:,:,1),cN(2),rN(:,:,:,2), &
cN(3),rN(:,:,:,3),cN(4),rN(:,:,:,4), &
cN(5),rN(:,:,:,5),abssum)
case default
! use the generic implementation
call cgyro_cmpl_fmaN(nc*nv_loc*nt_loc, real_nr, left,r1,cN,rN,abssum)
end select
end subroutine cgyro_vel_fmaN

!=========================================================
Expand All @@ -181,8 +207,8 @@ subroutine cgyro_vel_solution_werror(nr, left, r0, c1, &
complex, intent(in), dimension(nc,nv_loc,nt_loc,nr) :: rN
real, intent(in) :: ec1
real, intent(in), dimension(nr) :: ecN
real, intent(inout) :: abssum_left
real, intent(inout) :: abssum_m
real, intent(inout), optional :: abssum_left
real, intent(inout), optional :: abssum_m
!-------------------------------------------------------
call cgyro_cmpl_solution_werror(nc*nv_loc*nt_loc ,nr, &
left,r0,c1,m1,cN,rN,ec1,ecN,abssum_left,abssum_m)
Expand Down
2 changes: 1 addition & 1 deletion cgyro/src/cgyro_init_h.f90
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ subroutine cgyro_init_h
endif
end select

call cgyro_field_c_cpu
call cgyro_field_c_cpu(.TRUE.)

! Initialize time-history of fields (-3,-2,-1) to initial field.
field_old = field
Expand Down
Loading

0 comments on commit fe135f8

Please sign in to comment.