Skip to content

Commit

Permalink
Merge branch 'master' into dmd
Browse files Browse the repository at this point in the history
  • Loading branch information
jcandy committed Dec 5, 2024
2 parents 4f087f7 + fc6e565 commit daeeb37
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 106 deletions.
49 changes: 23 additions & 26 deletions cgyro/src/cgyro_init_collision_landau.F90
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ module cgyro_init_collision_landau
subroutine cgyro_init_landau()
! populate cmat with Galerkin based gyrokinetic Landau operator.
! cmat1 is only for comparison purposes
use cgyro_globals, only : vth,temp,mass,dens,temp_ele,mass_ele,dens_ele,rho,z,&
use cgyro_globals, only : CGYRO_COMM_WORLD,vth,temp,mass,dens,temp_ele,mass_ele,dens_ele,rho,z,&
n_energy,e_max,n_xi,n_radial,n_theta,n_species,n_toroidal,nt1,nt2,nc_loc,nc1,nc2,nc,nv,&
nu_ee,&
xi,w_xi,& !needed for projleg calc
Expand Down Expand Up @@ -106,9 +106,6 @@ subroutine cgyro_init_landau()
if (i_proc==0) print 1,'WARNING: dens_rot not yet implemented!!'
if (i_proc==0) print 1,'WARNING: nu_global not yet implemented!!'
if (i_proc==0 .and. collision_test_mode==1) print 1,'collision_test_mode==1, comparing ...'
#ifdef __PGI
if (i_proc==0) print 1,'WARNING: precision loss in landau.F90 - can''t use quad precision in PGI!!'
#endif
if (i_proc==0 .and. maxval(abs(temp(1:n_species)-temp(1)))/=0) then
print 1,'Warning: Landau not yet working for different species temperatures!!'
end if
Expand All @@ -119,7 +116,7 @@ subroutine cgyro_init_landau()
gtvb=1
end if

!$ call MPI_Barrier(MPI_COMM_WORLD,ierror) ! may improve timing
!$ call MPI_Barrier(CGYRO_COMM_WORLD,ierror) ! may improve timing
call cpu_time(t1)
ns=ispec(n_species,n_species) !number of non-redundant species pairs
xmax=sqrt(e_max) !cut off at exp(-xmax^2)
Expand All @@ -146,7 +143,7 @@ subroutine cgyro_init_landau()
end do
! kperp_bmag_max is not completely global, there is still the n dependence.
! we need to maximize over the toroidal mode numbers:
call MPI_ALLREDUCE(MPI_IN_PLACE,kperp_bmag_max,1,MPI_REAL8,MPI_MAX,MPI_COMM_WORLD,ierror)
call MPI_ALLREDUCE(MPI_IN_PLACE,kperp_bmag_max,1,MPI_REAL8,MPI_MAX,CGYRO_COMM_WORLD,ierror)
rhomax=maxval(abs(rho_spec([(i,i=1,n_species)])))*xmax
kperprhomax=kperp_bmag_max*rhomax
if (verbose>0 .and. i_proc==0) print 6,'using kperprhomax=',kperprhomax
Expand Down Expand Up @@ -911,13 +908,13 @@ subroutine cgyro_init_landau()
print 7,'pre_scatter timing:'
do i=1,n_proc
if (i>1) then
call MPI_Recv(t,11,MPI_REAL8,i-1,i-1,MPI_COMM_WORLD,status,ierror)
call MPI_Recv(t,11,MPI_REAL8,i-1,i-1,CGYRO_COMM_WORLD,status,ierror)
end if
5 format("init_collision_landau: ",A,I0,A,7G24.16E3,A,I0,A,G24.16E3)
print 5,'i_proc=',i-1,' took',t(1:7),' load ',load(i),' rel',t(3)/load(i)
end do
else
call MPI_Send(t,11,MPI_REAL8,0,i_proc,MPI_COMM_WORLD,ierror)
call MPI_Send(t,11,MPI_REAL8,0,i_proc,CGYRO_COMM_WORLD,ierror)
end if
call cpu_time(t1)
! Now do the scatter
Expand All @@ -931,17 +928,17 @@ subroutine cgyro_init_landau()
ib=idx+1
if (proc(ik,ia,ib)/=0) then
!!$ do j=1,n_proc
!!$ call MPI_BARRIER(MPI_COMM_WORLD,ierror)
!!$ call MPI_BARRIER(CGYRO_COMM_WORLD,ierror)
!!$! if (i_proc==0 .and. verbose>100) then
!!$ if (i_proc==j-1) print *,'bcasting (ik,ia,ib,proc)=',ik,ia,ib,proc(ik,ia,ib)-1,'ip',i_proc
!!$ ! end if
!!$ call MPI_BARRIER(MPI_COMM_WORLD,ierror)
!!$ call MPI_BARRIER(CGYRO_COMM_WORLD,ierror)
!!$ end do
call MPI_Bcast(gyrocolmat(:,:,:,:,ia,ib,ik),n_xi**2*n_energy**2,&
MPI_REAL8,proc(ik,ia,ib)-1,MPI_COMM_WORLD,ierror)
MPI_REAL8,proc(ik,ia,ib)-1,CGYRO_COMM_WORLD,ierror)
if (ia>ib .and. temp(ia)==temp(ib)) then
call MPI_Bcast(gyrocolmat(:,:,:,:,ib,ia,ik),n_xi**2*n_energy**2,&
MPI_REAL8,proc(ik,ia,ib)-1,MPI_COMM_WORLD,ierror)
MPI_REAL8,proc(ik,ia,ib)-1,CGYRO_COMM_WORLD,ierror)
end if
end if
enddo
Expand Down Expand Up @@ -1003,12 +1000,12 @@ subroutine cgyro_init_landau()
if (i_proc==0) then
do i=1,n_proc
if (i>1) then
call MPI_Recv(t,11,MPI_REAL8,i-1,i-1,MPI_COMM_WORLD,status,ierror)
call MPI_Recv(t,11,MPI_REAL8,i-1,i-1,CGYRO_COMM_WORLD,status,ierror)
end if
print *,'i_proc=',i-1,'took',t(1:10),'load',load(i),'rel',t(3)/load(i)
end do
else
call MPI_Send(t,11,MPI_REAL8,0,i_proc,MPI_COMM_WORLD,ierror)
call MPI_Send(t,11,MPI_REAL8,0,i_proc,CGYRO_COMM_WORLD,ierror)
end if

coltestmode: if(collision_test_mode==1) then
Expand Down Expand Up @@ -1036,10 +1033,10 @@ subroutine cgyro_init_landau()
nt2_proc(i_proc+1)=nt2
proc_c=0 ! dummy value if no processor is responsible
do i=1,n_proc
call MPI_BCAST(nc1_proc(i),1,MPI_INTEGER,i-1,MPI_COMM_WORLD,ierror)
call MPI_BCAST(nc2_proc(i),1,MPI_INTEGER,i-1,MPI_COMM_WORLD,ierror)
call MPI_BCAST(nt1_proc(i),1,MPI_INTEGER,i-1,MPI_COMM_WORLD,ierror)
call MPI_BCAST(nt2_proc(i),1,MPI_INTEGER,i-1,MPI_COMM_WORLD,ierror)
call MPI_BCAST(nc1_proc(i),1,MPI_INTEGER,i-1,CGYRO_COMM_WORLD,ierror)
call MPI_BCAST(nc2_proc(i),1,MPI_INTEGER,i-1,CGYRO_COMM_WORLD,ierror)
call MPI_BCAST(nt1_proc(i),1,MPI_INTEGER,i-1,CGYRO_COMM_WORLD,ierror)
call MPI_BCAST(nt2_proc(i),1,MPI_INTEGER,i-1,CGYRO_COMM_WORLD,ierror)
! this assigns the processor with the highest number to the respective
! nc and nt range
proc_c(nc1_proc(i):nc2_proc(i),nt1_proc(i):nt2_proc(i))=i
Expand Down Expand Up @@ -1094,11 +1091,11 @@ subroutine cgyro_init_landau()
**2,L2xi,n_xi,0.,c(:,:,:,:,l),n_xi*n_energy**2)
end do
if (i_proc/=0) then
call MPI_SEND(c,size(c),MPI_REAL8,0,1234,MPI_COMM_WORLD,ierror)
call MPI_SEND(c,size(c),MPI_REAL8,0,1234,CGYRO_COMM_WORLD,ierror)
end if
else
if (i_proc==0) then
call MPI_RECV(c,size(c),MPI_REAL8,proc_c(ic,itor)-1,1234,MPI_COMM_WORLD,status,ierror)
call MPI_RECV(c,size(c),MPI_REAL8,proc_c(ic,itor)-1,1234,CGYRO_COMM_WORLD,status,ierror)
end if
end if
!!$ block
Expand Down Expand Up @@ -1227,11 +1224,11 @@ subroutine cgyro_init_landau()
**2,L2xi,n_xi,0.,c(:,:,:,:,l),n_xi*n_energy**2)
end do
if (i_proc/=0) then
call MPI_SEND(c,size(c),MPI_REAL8,0,1234,MPI_COMM_WORLD,ierror)
call MPI_SEND(c,size(c),MPI_REAL8,0,1234,CGYRO_COMM_WORLD,ierror)
end if
else
if (i_proc==0) then
call MPI_RECV(c,size(c),MPI_REAL8,proc_c(ic,itor)-1,1234,MPI_COMM_WORLD,status,ierror)
call MPI_RECV(c,size(c),MPI_REAL8,proc_c(ic,itor)-1,1234,CGYRO_COMM_WORLD,status,ierror)
end if
end if

Expand Down Expand Up @@ -1285,18 +1282,18 @@ subroutine cgyro_init_landau()
end do
enddo
end if
call MPI_reduce(md,d,1,MPI_REAL8,MPI_MAX,0,MPI_COMM_WORLD,ierror)
call MPI_reduce(md,d,1,MPI_REAL8,MPI_MAX,0,CGYRO_COMM_WORLD,ierror)
if (i_proc==0) print 11,'Max. deviation over all processors:',d
11 format ('cgyro_in._col.: ',A,G23.16)

call MPI_Barrier(MPI_COMM_WORLD,ierror)
call MPI_Barrier(CGYRO_COMM_WORLD,ierror)
call MPI_finalize(ierror)
stop
end if coltestmode

!!$ call MPI_Barrier(MPI_COMM_WORLD,ierror)
!!$ call MPI_Barrier(CGYRO_COMM_WORLD,ierror)
!!$ print *,'i_proc',i_proc,'done with init_landau'
!!$ call MPI_Barrier(MPI_COMM_WORLD,ierror)
!!$ call MPI_Barrier(CGYRO_COMM_WORLD,ierror)

contains
elemental real function sinc(target_k,halfperiod)
Expand Down
7 changes: 4 additions & 3 deletions platform/build/make.inc.TUMBLEWEED
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@ NUMAS_PER_NODE=1

# Compilers

FC = mpifort -DDIMATCOPY -std=gnu -fallow-argument-mismatch -fall-intrinsics -fimplicit-none -J $(GACODE_ROOT)/modules #-fPIC
FC = mpifort -DDIMATCOPY -fallow-argument-mismatch -fall-intrinsics -fimplicit-none -J $(GACODE_ROOT)/modules #-fPIC
#FC = mpifort -DDIMATCOPY -DLANDAU_PREC16 -fallow-argument-mismatch -fall-intrinsics -fimplicit-none -J $(GACODE_ROOT)/modules #-fPIC
#FC = mpifort -std=f2008 -fall-intrinsics -fimplicit-none -J $(GACODE_ROOT)/modules #-fPIC
F77 = mpifort -fimplicit-none -fallow-argument-mismatch
FOMP =-fopenmp
FMATH =-fdefault-real-8 -fdefault-double-8 -I$(FFTW_INC)
FOPT =-O3 -fexternal-blas -march=native -ffree-line-length-0
FOPT =-O3 -std=gnu -fexternal-blas -march=native -ffree-line-length-0
# -floop-nest-optimize -floop-parallelize-all -flto
FDEBUG =-Wall -W -fcheck=all -g -fbacktrace -ffpe-trap=invalid,zero,overflow -finit-real=snan # -std=f2003 -fall-intrinsics
FDEBUG =-Wall -W -fcheck=all -Wno-compare-reals -Wno-integer-division -Wno-unused-variable -g -fbacktrace -ffpe-trap=invalid,zero,overflow -finit-real=snan -std=f2008 -fall-intrinsics
F2PY = f2py

# System math libraries
Expand Down
Loading

0 comments on commit daeeb37

Please sign in to comment.