diff --git a/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh b/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh index 5b834ec53..8538e8595 100755 --- a/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh +++ b/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh @@ -5,6 +5,7 @@ module load intel/2020u4 intelmpi/2020u4 ## set number of processors NPROC=4 +GPU_MODE=false # set to true if using GPU currentdir=`pwd` @@ -95,10 +96,19 @@ mpirun -np $NPROC ./bin/xgenerate_databases # DATABASES_MPI/proc*_wavefield_discontinuity.bin files mpirun -np $NPROC ../fk_coupling/compute_fk_injection_field -echo -echo " launch solver..." -echo -mpirun -np $NPROC ./bin/xspecfem3D +if ${GPU_MODE}; then + sed -i "/^GPU_MODE/c\GPU_MODE = .true." DATA/Par_file + echo + echo " launch solver on GPU..." + echo + CUDA_VISIBLE_DEVICES=0 mpirun -np $NPROC ./bin/xspecfem3D +else + sed -i "/^GPU_MODE/c\GPU_MODE = .false." DATA/Par_file + echo + echo " launch solver on CPU ..." + echo + mpirun -np $NPROC ./bin/xspecfem3D +fi # compute reference seismograms using FK ../fk_coupling/compute_fk_receiver diff --git a/src/gpu/compute_forces_viscoelastic_cuda.cu b/src/gpu/compute_forces_viscoelastic_cuda.cu index 1cfb0a512..c93b5813a 100644 --- a/src/gpu/compute_forces_viscoelastic_cuda.cu +++ b/src/gpu/compute_forces_viscoelastic_cuda.cu @@ -990,6 +990,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, mp->d_hprimewgll_xx, mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz, d_kappav, d_muv, + mp->is_wavefield_discontinuity, + mp->d_displ_wd, + mp->d_ispec_to_elem_wd, + mp->d_ibool_wd, mp->pml_conditions, mp->d_is_CPML, FORWARD_OR_ADJOINT); @@ -1013,6 +1017,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, mp->d_hprimewgll_xx, mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz, d_kappav, d_muv, + mp->is_wavefield_discontinuity, + mp->d_displ_wd, + mp->d_ispec_to_elem_wd, + mp->d_ibool_wd, mp->pml_conditions, mp->d_is_CPML, FORWARD_OR_ADJOINT); @@ -1039,6 +1047,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, mp->d_hprimewgll_xx, mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz, d_kappav, d_muv, + mp->is_wavefield_discontinuity, + mp->d_displ_wd, + mp->d_ispec_to_elem_wd, + mp->d_ibool_wd, mp->pml_conditions, mp->d_is_CPML, 3); // 3 == backward @@ -1063,6 +1075,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, mp->d_hprimewgll_xx, mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz, d_kappav, d_muv, + mp->is_wavefield_discontinuity, + mp->d_displ_wd, + mp->d_ispec_to_elem_wd, + mp->d_ibool_wd, mp->pml_conditions, mp->d_is_CPML, 3); // 3 == backward diff --git a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu index 98c42b507..93e87d273 100644 --- a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu +++ b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu @@ -336,6 +336,28 @@ __device__ __forceinline__ void load_shared_memory_displ_visco(const int* tx, c /* ----------------------------------------------------------------------------------------------- */ +// add displacement discontinuity in case of wavefield discontinuity + +__device__ __forceinline__ void add_displacement_discontinuity_element( + const int* tx, const int* working_element, + realw_const_p d_displ_wd, + const int* ispec_to_elem_wd, + const int* ibool_wd, + realw* sh_displx, + realw* sh_disply, + realw* sh_displz) { + int ispec_wd = ispec_to_elem_wd[(*working_element)]-1; + if (ispec_wd >= 0) { + int offset = ispec_wd * NGLL3_PADDED + (*tx); + int iglob_wd = ibool_wd[offset]-1; + sh_displx[(*tx)] = sh_displx[(*tx)] + d_displ_wd[iglob_wd*3]; + sh_disply[(*tx)] = sh_disply[(*tx)] + d_displ_wd[iglob_wd*3 + 1]; + sh_displz[(*tx)] = sh_displz[(*tx)] + d_displ_wd[iglob_wd*3 + 2]; + } +} + +/* ----------------------------------------------------------------------------------------------- */ + // loads hprime into shared memory for element __device__ __forceinline__ void load_shared_memory_hprime(const int* tx, @@ -738,6 +760,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute, realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, realw_const_p d_kappav, realw_const_p d_muv, + const int is_wavefield_discontinuity, + realw_const_p d_displ_wd, + const int* d_ispec_to_elem_wd, + const int* d_ibool_wd, const int pml_conditions, const int* d_is_CPML, const int FORWARD_OR_ADJOINT){ @@ -849,6 +875,9 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute, load_shared_memory_displ<3>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz); }else{ load_shared_memory_displ<1>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz); + if (is_wavefield_discontinuity) { + add_displacement_discontinuity_element(&tx,&working_element,d_displ_wd,d_ispec_to_elem_wd,d_ibool_wd,sh_tempx,sh_tempy,sh_tempz); + } } } diff --git a/src/gpu/kernels/kernel_cuda.mk b/src/gpu/kernels/kernel_cuda.mk index 2073bdb26..d1820f773 100644 --- a/src/gpu/kernels/kernel_cuda.mk +++ b/src/gpu/kernels/kernel_cuda.mk @@ -40,6 +40,7 @@ cuda_kernels_OBJS := \ $O/transfer_surface_to_host_kernel.cuda-kernel.o \ $O/UpdateDispVeloc_kernel.cuda-kernel.o \ $O/UpdatePotential_kernel.cuda-kernel.o \ + $O/wavefield_discontinuity_kernel.cuda-kernel.o \ $(EMPTY_MACRO) diff --git a/src/gpu/kernels/kernel_proto.cu.h b/src/gpu/kernels/kernel_proto.cu.h index 00db7749e..7902b3808 100644 --- a/src/gpu/kernels/kernel_proto.cu.h +++ b/src/gpu/kernels/kernel_proto.cu.h @@ -111,6 +111,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute, realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz, realw_const_p d_kappav, realw_const_p d_muv, + const int is_wavefield_discontinuity, + realw_const_p d_displ_wd, + const int* d_ispec_to_element_wd, + const int* d_ibool_wd, const int pml_conditions, const int* d_is_CPML, const int FORWARD_OR_ADJOINT); @@ -527,6 +531,26 @@ __global__ void compute_coupling_elastic_ac_kernel(field* potential_dot_dot_acou int backward_simulation) ; +// +// src/gpu/kernels/wavefield_discontinuity_kernel.cu +// + +__global__ void add_acceleration_discontinuity_kernel( + realw_const_p accel_wd, + realw_const_p mass_in_wd, + const int* boundary_to_iglob_wd, + const int size, realw* accel + ); + +__global__ void add_traction_discontinuity_kernel( + realw_const_p traction_wd, + const int* face_ispec_wd, + const int* face_ijk_wd, + realw_const_p face_jacobian2Dw_wd, + const int* d_ibool, + const int size, realw* accel); + + // // src/gpu/kernels/compute_coupling_ocean_cuda_kernel.cu // diff --git a/src/gpu/kernels/wavefield_discontinuity_kernel.cpp b/src/gpu/kernels/wavefield_discontinuity_kernel.cpp new file mode 100644 index 000000000..59f0435b3 --- /dev/null +++ b/src/gpu/kernels/wavefield_discontinuity_kernel.cpp @@ -0,0 +1 @@ +#include "wavefield_discontinuity_kernel.cu" diff --git a/src/gpu/kernels/wavefield_discontinuity_kernel.cu b/src/gpu/kernels/wavefield_discontinuity_kernel.cu new file mode 100644 index 000000000..0c8dd7b80 --- /dev/null +++ b/src/gpu/kernels/wavefield_discontinuity_kernel.cu @@ -0,0 +1,41 @@ +__global__ void add_acceleration_discontinuity_kernel( + realw_const_p accel_wd, + realw_const_p mass_in_wd, + const int* boundary_to_iglob_wd, + const int size, realw* accel + ) { + int id = threadIdx.x + (blockIdx.x + blockIdx.y*gridDim.x)*blockDim.x; + int iglob = boundary_to_iglob_wd[id] - 1; + realw mass_in = mass_in_wd[id]; + if (id < size) { + accel[iglob*3] = accel[iglob*3] - accel_wd[id*3] * mass_in; + accel[iglob*3 + 1] = accel[iglob*3 + 1] - accel_wd[id*3 + 1] * mass_in; + accel[iglob*3 + 2] = accel[iglob*3 + 2] - accel_wd[id*3 + 2] * mass_in; + } +} + +__global__ void add_traction_discontinuity_kernel( + realw_const_p traction_wd, + const int* face_ispec_wd, + const int* face_ijk_wd, + realw_const_p face_jacobian2Dw_wd, + const int* d_ibool, + const int size, realw* accel) { + int igll = threadIdx.x; + int iface_wd = blockIdx.x + gridDim.x*blockIdx.y; + int i, j, k, ispec, iglob; + realw jacobianw; + if (iface_wd < size) { + ispec = face_ispec_wd[iface_wd] - 1; + i = face_ijk_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)]-1; + j = face_ijk_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)]-1; + k = face_ijk_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)]-1; + + iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1; + + jacobianw = face_jacobian2Dw_wd[INDEX2(NGLL2,igll,iface_wd)]; + atomicAdd(&accel[iglob*3], traction_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)] * jacobianw); + atomicAdd(&accel[iglob*3+1], traction_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)] * jacobianw); + atomicAdd(&accel[iglob*3+2], traction_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)] * jacobianw); + } +} diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h index 495ff2528..8bf524623 100644 --- a/src/gpu/mesh_constants_gpu.h +++ b/src/gpu/mesh_constants_gpu.h @@ -789,6 +789,20 @@ typedef struct mesh_ { int use_Kelvin_Voigt_damping; realw* d_Kelvin_Voigt_eta; + // prescribed wavefield discontinuity on an interface + int is_wavefield_discontinuity; + int* d_ispec_to_elem_wd; + int* d_ibool_wd; + int* d_boundary_to_iglob_wd; + realw* d_mass_in_wd; + int* d_face_ijk_wd; + int* d_face_ispec_wd; + realw* d_face_normal_wd; + realw* d_face_jacobian2Dw_wd; + realw* d_displ_wd; + realw* d_accel_wd; + realw* d_traction_wd; + // for option NB_RUNS_FOR_ACOUSTIC_GPU int* run_number_of_the_source; diff --git a/src/gpu/prepare_mesh_constants_cuda.cu b/src/gpu/prepare_mesh_constants_cuda.cu index 78dcf5069..45a043dac 100644 --- a/src/gpu/prepare_mesh_constants_cuda.cu +++ b/src/gpu/prepare_mesh_constants_cuda.cu @@ -106,6 +106,7 @@ void FC_FUNC_(prepare_constants_device, int* SAVE_SEISMOGRAMS_ACCELERATION,int* SAVE_SEISMOGRAMS_PRESSURE, int* h_NB_RUNS_ACOUSTIC_GPU, int* FAULT_SIMULATION, + int* IS_WAVEFIELD_DISCONTINUITY, int* UNDO_ATTENUATION_AND_OR_PML, int* PML_CONDITIONS) { @@ -420,6 +421,9 @@ void FC_FUNC_(prepare_constants_device, // Kelvin_voigt initialization mp->use_Kelvin_Voigt_damping = 0; + // prescribed wavefield discontinuity + mp->is_wavefield_discontinuity = *IS_WAVEFIELD_DISCONTINUITY; + GPU_ERROR_CHECKING("prepare_constants_device"); } @@ -1542,6 +1546,51 @@ void FC_FUNC_(prepare_fault_device, } +/* ----------------------------------------------------------------------------------------------- */ + +// wavefield discontinuity + +/* ----------------------------------------------------------------------------------------------- */ + +extern EXTERN_LANG +void FC_FUNC_(prepare_wavefield_discontinuity_device, + PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)( + long* Mesh_pointer, + int* ispec_to_elem_wd, + int* nglob_wd, + int* nspec_wd, + int* ibool_wd, + int* boundary_to_iglob_wd, + realw* mass_in_wd, + int* nfaces_wd, + int* face_ijk_wd, + int* face_ispec_wd, + realw* face_normal_wd, + realw* face_jacobian2Dw_wd) { + TRACE("prepare_wavefield_discontinuity_device"); + Mesh* mp = (Mesh*)(*Mesh_pointer); + + // arrays + gpuCreateCopy_todevice_int((void**)&mp->d_ispec_to_elem_wd, ispec_to_elem_wd, mp->NSPEC_AB); + gpuCreateCopy_todevice_int((void**)&mp->d_boundary_to_iglob_wd, boundary_to_iglob_wd, (*nglob_wd)); + gpuCreateCopy_todevice_realw((void**)&mp->d_mass_in_wd, mass_in_wd, (*nglob_wd)); + gpuCreateCopy_todevice_int((void**)&mp->d_face_ispec_wd, face_ispec_wd, (*nfaces_wd)); + gpuCreateCopy_todevice_int((void**)&mp->d_face_ijk_wd, face_ijk_wd, 3*NGLL2*(*nfaces_wd)); + gpuCreateCopy_todevice_realw((void**)&mp->d_face_normal_wd, face_normal_wd, NDIM*NGLL2*(*nfaces_wd)); + gpuCreateCopy_todevice_realw((void**)&mp->d_face_jacobian2Dw_wd, face_jacobian2Dw_wd, NGLL2*(*nfaces_wd)); + // global indexing for wavefield discontinuity points (padded) + int size_padded = NGLL3_PADDED * (*nspec_wd); + gpuMalloc_int((void**) &mp->d_ibool_wd, size_padded); + gpuMemcpy2D_todevice_int(mp->d_ibool_wd, NGLL3_PADDED, ibool_wd, NGLL3, NGLL3, (*nspec_wd)); + + // allocate discontinuity field + int size = NDIM * (*nglob_wd); + gpuMalloc_realw((void**)&(mp->d_displ_wd),size); + gpuMalloc_realw((void**)&(mp->d_accel_wd),size); + size = NDIM * NGLL2 * (*nfaces_wd); + gpuMalloc_realw((void**)&(mp->d_traction_wd),size); +} + /* ----------------------------------------------------------------------------------------------- */ // cleanup diff --git a/src/gpu/rules.mk b/src/gpu/rules.mk index 27e390595..922141d95 100644 --- a/src/gpu/rules.mk +++ b/src/gpu/rules.mk @@ -61,6 +61,7 @@ gpu_specfem3D_OBJECTS = \ $O/transfer_fields_cuda.o \ $O/update_displacement_cuda.o \ $O/write_seismograms_cuda.o \ + $O/wavefield_discontinuity_cuda.o \ $(EMPTY_MACRO) gpu_specfem3D_STUBS = \ diff --git a/src/gpu/specfem3D_gpu_cuda_method_stubs.c b/src/gpu/specfem3D_gpu_cuda_method_stubs.c index b3af872be..af7c37795 100644 --- a/src/gpu/specfem3D_gpu_cuda_method_stubs.c +++ b/src/gpu/specfem3D_gpu_cuda_method_stubs.c @@ -639,6 +639,21 @@ void FC_FUNC_(prepare_fault_device, int* KELVIN_VOIGT_DAMPING, realw* Kelvin_Voigt_eta) {} +void FC_FUNC_(prepare_wavefield_discontinuity_device, + PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)( + long* Mesh_pointer, + int* ispec_to_elem_wd, + int* nglob_wd, + int* nspec_wd, + int* ibool_wd, + int* boundary_to_iglob_wd, + realw* mass_in_wd, + int* nfaces_wd, + int* face_ijk_wd, + int* face_ispec_wd, + realw* face_normal_wd, + realw* face_jacobian2Dw_wd) {} + void FC_FUNC_(prepare_cleanup_device, PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer, int* ACOUSTIC_SIMULATION, @@ -675,7 +690,6 @@ void FC_FUNC_(get_smooth_gpu, GET_SMOOTH_gpu)(long * smooth_pointer, realw * data_smooth) {} - // // src/gpu/transfer_fields_cuda.cu // @@ -738,6 +752,12 @@ void FC_FUNC_(transfer_pml_displ_from_device, void FC_FUNC_(transfer_pml_displ_to_device, TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) {} +void FC_FUNC_(transfer_wavefield_discontinuity_to_device, + TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)( + int* size_point, int* size_face, + realw* displ_wd, realw* accel_wd, + realw* traction_wd, long* Mesh_pointer) {} + void FC_FUNC_(transfer_b_rmemory_to_device, TRANSFER_B_RMEMORY_TO_DEVICE)(long* Mesh_pointer, realw* b_R_xx,realw* b_R_yy,realw* b_R_xy, @@ -943,3 +963,14 @@ void FC_FUNC_(compute_seismograms_cuda, int* ELASTIC_SIMULATION, int* USE_TRICK_FOR_BETTER_PRESSURE) {} +// +// src/gpu/wavefield_discontinuity_cuda.cu +// + +void FC_FUNC_(wavefield_discontinuity_add_traction_cuda, + WAVEFIELD_DISCONTINUITY_ADD_TRACTION_CUDA)(int* size_points, + int* size_faces, + long* Mesh_pointer){} + + + diff --git a/src/gpu/transfer_fields_cuda.cu b/src/gpu/transfer_fields_cuda.cu index 450fa677e..6bcf4dccf 100644 --- a/src/gpu/transfer_fields_cuda.cu +++ b/src/gpu/transfer_fields_cuda.cu @@ -303,6 +303,29 @@ void FC_FUNC_(transfer_pml_displ_from_device, /* ----------------------------------------------------------------------------------------------- */ +// prescribed wavefield discontinuity. this needs to be called in every time step + +/* ----------------------------------------------------------------------------------------------- */ + +extern EXTERN_LANG +void FC_FUNC_(transfer_wavefield_discontinuity_to_device, + TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)( + int* size_point, int* size_face, + realw* displ_wd, realw* accel_wd, + realw* traction_wd, long* Mesh_pointer) { + + TRACE("transfer_wavefield_discontinuity_to_device"); + + Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container + + gpuMemcpy_todevice_realw(mp->d_displ_wd, displ_wd, (*size_point)); + gpuMemcpy_todevice_realw(mp->d_accel_wd, accel_wd, (*size_point)); + gpuMemcpy_todevice_realw(mp->d_traction_wd, traction_wd, (*size_face)); + +} + +/* ----------------------------------------------------------------------------------------------- */ + extern EXTERN_LANG void FC_FUNC_(transfer_pml_displ_to_device, TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) { diff --git a/src/gpu/wavefield_discontinuity_cuda.cu b/src/gpu/wavefield_discontinuity_cuda.cu new file mode 100644 index 000000000..17f07f89f --- /dev/null +++ b/src/gpu/wavefield_discontinuity_cuda.cu @@ -0,0 +1,71 @@ +#include "mesh_constants_gpu.h" + +extern EXTERN_LANG +void FC_FUNC_(wavefield_discontinuity_add_traction_cuda, + WAVEFIELD_DISCONTINUITY_ADD_TRACTION_CUDA)(int* size_points, + int* size_faces, + long* Mesh_pointer){ + TRACE("wavefield_discontinuity_add_traction_cuda"); + Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container + if (mp->is_wavefield_discontinuity) { + int size = (*size_points); + int blocksize = BLOCKSIZE_KERNEL1; + int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize; + + int num_blocks_x, num_blocks_y; + get_blocks_xy(size_padded/blocksize,&num_blocks_x,&num_blocks_y); + + dim3 grid(num_blocks_x,num_blocks_y); + dim3 threads(blocksize,1,1); + +#ifdef USE_CUDA + if (run_cuda) { + add_acceleration_discontinuity_kernel + <<compute_stream>>>(mp->d_accel_wd, + mp->d_mass_in_wd, + mp->d_boundary_to_iglob_wd, + size, mp->d_accel); + } +#endif +#ifdef USE_HIP + if (run_hip) { + hipLaunchKernelGGL(add_acceleration_discontinuity_kernel, + dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_accel_wd, + mp->d_mass_in_wd, + mp->d_boundary_to_iglob_wd, + size, mp->d_accel); + } +#endif + + size = (*size_faces); + blocksize = NGLL2; + + get_blocks_xy(size,&num_blocks_x,&num_blocks_y); + + dim3 grid2(num_blocks_x,num_blocks_y); + dim3 threads2(blocksize,1,1); + +#ifdef USE_CUDA + if (run_cuda) { + add_traction_discontinuity_kernel + <<compute_stream>>>(mp->d_traction_wd, + mp->d_face_ispec_wd, + mp->d_face_ijk_wd, + mp->d_face_jacobian2Dw_wd, + mp->d_ibool, + size, mp->d_accel); + } +#endif +#ifdef USE_HIP + if (run_hip) { + hipLaunchKernelGGL(add_traction_discontinuity_kernel, + dim3(grid2), dim3(threads2), 0, mp->compute_stream, mp->d_traction_wd, + mp->d_face_ispec_wd, + mp->d_face_ijk_wd, + mp->d_face_jacobian2Dw_wd, + mp->d_ibool, + size, mp->d_accel); + } +#endif + } +} diff --git a/src/shared/read_parameter_file.F90 b/src/shared/read_parameter_file.F90 index ca5bef914..c215ebb6b 100644 --- a/src/shared/read_parameter_file.F90 +++ b/src/shared/read_parameter_file.F90 @@ -1076,8 +1076,6 @@ subroutine check_simulation_parameters() endif if (IS_WAVEFIELD_DISCONTINUITY) then - if (GPU_MODE) & - stop 'wavefield discontinuity problem cannot be solved on GPU yet' if (ATTENUATION) & stop 'wavefield discontinuity problem cannot be solved with attenuation' endif diff --git a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 index 54bac90a7..c9e7b92ba 100644 --- a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 +++ b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 @@ -43,7 +43,9 @@ subroutine compute_forces_viscoelastic_calling() !! solving wavefield discontinuity problem with non-split-node scheme use wavefield_discontinuity_solver, only: & - add_traction_discontinuity, read_wavefield_discontinuity_file + add_traction_discontinuity, read_wavefield_discontinuity_file, & + transfer_wavefield_discontinuity_to_GPU, & + add_traction_discontinuity_GPU implicit none @@ -112,6 +114,9 @@ subroutine compute_forces_viscoelastic_calling() !! note that this is not called in case of adjoint simulation if (IS_WAVEFIELD_DISCONTINUITY .and. (SIMULATION_TYPE == 1)) then call read_wavefield_discontinuity_file() + if (GPU_MODE) then + call transfer_wavefield_discontinuity_to_GPU() + endif endif ! distinguishes two runs: for elements in contact with MPI interfaces, and elements within the partitions @@ -284,7 +289,11 @@ subroutine compute_forces_viscoelastic_calling() !! because these terms need to be used in MPI calls !! note that this is not called in case of adjoint simulation if (IS_WAVEFIELD_DISCONTINUITY .and. (SIMULATION_TYPE == 1)) then - call add_traction_discontinuity(accel, NGLOB_AB) + if (GPU_MODE) then + call add_traction_discontinuity_GPU() + else + call add_traction_discontinuity(accel, NGLOB_AB) + endif endif endif ! iphase diff --git a/src/specfem3D/finalize_simulation.f90 b/src/specfem3D/finalize_simulation.f90 index 5b32e7657..fb27c1c80 100644 --- a/src/specfem3D/finalize_simulation.f90 +++ b/src/specfem3D/finalize_simulation.f90 @@ -141,6 +141,10 @@ subroutine finalize_simulation_cleanup() use fault_solver_dynamic, only: SIMULATION_TYPE_DYN,BC_DYNFLT_free use fault_solver_kinematic, only: SIMULATION_TYPE_KIN,BC_KINFLT_free + !! solving wavefield discontinuity problem with non-split-node scheme + use wavefield_discontinuity_solver, only: & + finalize_wavefield_discontinuity + implicit none ! from here on, no gpu data is needed anymore @@ -150,6 +154,9 @@ subroutine finalize_simulation_cleanup() ! C-PML absorbing boundary conditions deallocates C_PML arrays if (PML_CONDITIONS) call pml_cleanup() + ! cleanup for wavefield discontinuity + if (IS_WAVEFIELD_DISCONTINUITY) call finalize_wavefield_discontinuity() + ! free arrays ! mass matrices if (ELASTIC_SIMULATION) then diff --git a/src/specfem3D/iterate_time.F90 b/src/specfem3D/iterate_time.F90 index 40ba19e27..ea29a7a38 100644 --- a/src/specfem3D/iterate_time.F90 +++ b/src/specfem3D/iterate_time.F90 @@ -40,12 +40,6 @@ subroutine iterate_time() ! hdf5 i/o server use io_server_hdf5, only: do_io_start_idle,pass_info_to_io - !! solving wavefield discontinuity problem with non-split-node scheme - use wavefield_discontinuity_solver, only: & - read_mesh_databases_wavefield_discontinuity, & - open_wavefield_discontinuity_file, & - finalize_wavefield_discontinuity - implicit none ! for EXACT_UNDOING_TO_DISK @@ -214,13 +208,6 @@ subroutine iterate_time() ! get MPI starting time_start = wtime() - !! solving wavefield discontinuity problem with - !! non-split-node scheme - if (IS_WAVEFIELD_DISCONTINUITY) then - call read_mesh_databases_wavefield_discontinuity() - call open_wavefield_discontinuity_file() - endif - ! LTS if (LTS_MODE) then ! LTS steps through its own time iterations - for now @@ -346,12 +333,6 @@ subroutine iterate_time() ! enddo ! end of main time loop - !! solving wavefield discontinuity problem with - !! non-split-node scheme - if (IS_WAVEFIELD_DISCONTINUITY) then - call finalize_wavefield_discontinuity() - endif - ! goto point for LTS to finish time loop 777 continue diff --git a/src/specfem3D/prepare_gpu.f90 b/src/specfem3D/prepare_gpu.f90 index b7136f58a..d2c39b4cd 100644 --- a/src/specfem3D/prepare_gpu.f90 +++ b/src/specfem3D/prepare_gpu.f90 @@ -43,6 +43,8 @@ subroutine prepare_GPU() pml_convolution_coef_alpha,pml_convolution_coef_beta, & pml_convolution_coef_abar,pml_convolution_coef_strain + use wavefield_discontinuity_solver, only: prepare_wavefield_discontinuity_GPU + implicit none ! local parameters @@ -97,6 +99,7 @@ subroutine prepare_GPU() SAVE_SEISMOGRAMS_ACCELERATION,SAVE_SEISMOGRAMS_PRESSURE, & NB_RUNS_ACOUSTIC_GPU, & FAULT_SIMULATION, & + IS_WAVEFIELD_DISCONTINUITY, & UNDO_ATTENUATION_AND_OR_PML, & PML_CONDITIONS) @@ -279,6 +282,16 @@ subroutine prepare_GPU() endif endif + ! prepares wavefield discontinuity + if (IS_WAVEFIELD_DISCONTINUITY) then + ! user output + if (myrank == 0) then + write(IMAIN,*) " loading wavefield discontinuity" + call flush_IMAIN() + endif + call prepare_wavefield_discontinuity_GPU() + endif + ! synchronizes processes call synchronize_all() diff --git a/src/specfem3D/prepare_timerun.F90 b/src/specfem3D/prepare_timerun.F90 index f9b5574e9..56ad2de13 100644 --- a/src/specfem3D/prepare_timerun.F90 +++ b/src/specfem3D/prepare_timerun.F90 @@ -34,6 +34,10 @@ subroutine prepare_timerun() use specfem_par_poroelastic use specfem_par_movie + !! solving wavefield discontinuity problem with non-split-node scheme + use wavefield_discontinuity_solver, only: & + prepare_timerun_wavefield_discontinuity + implicit none ! local parameters @@ -81,6 +85,9 @@ subroutine prepare_timerun() ! Stacey boundaries call prepare_timerun_stacey() + ! wavefield discontinuity + call prepare_timerun_wavefield_discontinuity() + ! prepares ADJOINT simulations call prepare_timerun_adjoint() diff --git a/src/specfem3D/wavefield_discontinuity_solver.f90 b/src/specfem3D/wavefield_discontinuity_solver.f90 index 2c4c94841..f04432136 100644 --- a/src/specfem3D/wavefield_discontinuity_solver.f90 +++ b/src/specfem3D/wavefield_discontinuity_solver.f90 @@ -94,6 +94,15 @@ module wavefield_discontinuity_solver contains + subroutine prepare_timerun_wavefield_discontinuity() + use specfem_par, only: IS_WAVEFIELD_DISCONTINUITY + implicit none + if (IS_WAVEFIELD_DISCONTINUITY) then + call read_mesh_databases_wavefield_discontinuity() + call open_wavefield_discontinuity_file() + endif + end subroutine prepare_timerun_wavefield_discontinuity + subroutine read_mesh_databases_wavefield_discontinuity() use constants, only: IFILE_WAVEFIELD_DISCONTINUITY, & FNAME_WAVEFIELD_DISCONTINUITY_DATABASE @@ -145,6 +154,27 @@ subroutine read_wavefield_discontinuity_file() read(IFILE_WAVEFIELD_DISCONTINUITY) traction_wd end subroutine read_wavefield_discontinuity_file + subroutine transfer_wavefield_discontinuity_to_GPU() + use specfem_par, only: Mesh_pointer, NDIM, NGLLSQUARE + implicit none + call transfer_wavefield_discontinuity_to_device(nglob_wd*NDIM, & + nfaces_wd*NDIM*NGLLSQUARE, & + displ_wd, accel_wd, & + traction_wd, Mesh_pointer) + end subroutine transfer_wavefield_discontinuity_to_GPU + + subroutine prepare_wavefield_discontinuity_GPU() + use specfem_par, only: Mesh_pointer + implicit none + call prepare_wavefield_discontinuity_device(Mesh_pointer, ispec_to_elem_wd,& + nglob_wd, nspec_wd, ibool_wd,& + boundary_to_iglob_wd,& + mass_in_wd,& + nfaces_wd, face_ijk_wd,& + face_ispec_wd, face_normal_wd,& + face_jacobian2dw_wd) + end subroutine prepare_wavefield_discontinuity_GPU + subroutine finalize_wavefield_discontinuity() use constants, only: IFILE_WAVEFIELD_DISCONTINUITY implicit none @@ -205,4 +235,11 @@ subroutine add_traction_discontinuity(accel, nglob) enddo enddo end subroutine add_traction_discontinuity + + subroutine add_traction_discontinuity_GPU() + use specfem_par, only: Mesh_pointer + implicit none + call wavefield_discontinuity_add_traction_cuda(nglob_wd, nfaces_wd, & + Mesh_pointer) + end subroutine add_traction_discontinuity_GPU end module wavefield_discontinuity_solver