Skip to content

Commit

Permalink
Merge pull request #1779 from tianshi-liu/devel
Browse files Browse the repository at this point in the history
GPU version of interface-discontinuity-based wavefield injection (pull request 1706).
  • Loading branch information
danielpeter authored Jan 6, 2025
2 parents 0fb684c + 0e8cbe3 commit a8182ff
Show file tree
Hide file tree
Showing 20 changed files with 391 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module load intel/2020u4 intelmpi/2020u4

## set number of processors
NPROC=4
GPU_MODE=false # set to true if using GPU

currentdir=`pwd`

Expand Down Expand Up @@ -95,10 +96,19 @@ mpirun -np $NPROC ./bin/xgenerate_databases
# DATABASES_MPI/proc*_wavefield_discontinuity.bin files
mpirun -np $NPROC ../fk_coupling/compute_fk_injection_field

echo
echo " launch solver..."
echo
mpirun -np $NPROC ./bin/xspecfem3D
if ${GPU_MODE}; then
sed -i "/^GPU_MODE/c\GPU_MODE = .true." DATA/Par_file
echo
echo " launch solver on GPU..."
echo
CUDA_VISIBLE_DEVICES=0 mpirun -np $NPROC ./bin/xspecfem3D
else
sed -i "/^GPU_MODE/c\GPU_MODE = .false." DATA/Par_file
echo
echo " launch solver on CPU ..."
echo
mpirun -np $NPROC ./bin/xspecfem3D
fi

# compute reference seismograms using FK
../fk_coupling/compute_fk_receiver
Expand Down
16 changes: 16 additions & 0 deletions src/gpu/compute_forces_viscoelastic_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
FORWARD_OR_ADJOINT);
Expand All @@ -1013,6 +1017,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
FORWARD_OR_ADJOINT);
Expand All @@ -1039,6 +1047,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
3); // 3 == backward
Expand All @@ -1063,6 +1075,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
mp->d_hprimewgll_xx,
mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
d_kappav, d_muv,
mp->is_wavefield_discontinuity,
mp->d_displ_wd,
mp->d_ispec_to_elem_wd,
mp->d_ibool_wd,
mp->pml_conditions,
mp->d_is_CPML,
3); // 3 == backward
Expand Down
29 changes: 29 additions & 0 deletions src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,28 @@ __device__ __forceinline__ void load_shared_memory_displ_visco(const int* tx, c

/* ----------------------------------------------------------------------------------------------- */

// add displacement discontinuity in case of wavefield discontinuity

__device__ __forceinline__ void add_displacement_discontinuity_element(
const int* tx, const int* working_element,
realw_const_p d_displ_wd,
const int* ispec_to_elem_wd,
const int* ibool_wd,
realw* sh_displx,
realw* sh_disply,
realw* sh_displz) {
int ispec_wd = ispec_to_elem_wd[(*working_element)]-1;
if (ispec_wd >= 0) {
int offset = ispec_wd * NGLL3_PADDED + (*tx);
int iglob_wd = ibool_wd[offset]-1;
sh_displx[(*tx)] = sh_displx[(*tx)] + d_displ_wd[iglob_wd*3];
sh_disply[(*tx)] = sh_disply[(*tx)] + d_displ_wd[iglob_wd*3 + 1];
sh_displz[(*tx)] = sh_displz[(*tx)] + d_displ_wd[iglob_wd*3 + 2];
}
}

/* ----------------------------------------------------------------------------------------------- */

// loads hprime into shared memory for element

__device__ __forceinline__ void load_shared_memory_hprime(const int* tx,
Expand Down Expand Up @@ -738,6 +760,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
realw_const_p d_kappav,
realw_const_p d_muv,
const int is_wavefield_discontinuity,
realw_const_p d_displ_wd,
const int* d_ispec_to_elem_wd,
const int* d_ibool_wd,
const int pml_conditions,
const int* d_is_CPML,
const int FORWARD_OR_ADJOINT){
Expand Down Expand Up @@ -849,6 +875,9 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
load_shared_memory_displ<3>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
}else{
load_shared_memory_displ<1>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
if (is_wavefield_discontinuity) {
add_displacement_discontinuity_element(&tx,&working_element,d_displ_wd,d_ispec_to_elem_wd,d_ibool_wd,sh_tempx,sh_tempy,sh_tempz);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/gpu/kernels/kernel_cuda.mk
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cuda_kernels_OBJS := \
$O/transfer_surface_to_host_kernel.cuda-kernel.o \
$O/UpdateDispVeloc_kernel.cuda-kernel.o \
$O/UpdatePotential_kernel.cuda-kernel.o \
$O/wavefield_discontinuity_kernel.cuda-kernel.o \
$(EMPTY_MACRO)


24 changes: 24 additions & 0 deletions src/gpu/kernels/kernel_proto.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
realw_const_p d_kappav,
realw_const_p d_muv,
const int is_wavefield_discontinuity,
realw_const_p d_displ_wd,
const int* d_ispec_to_element_wd,
const int* d_ibool_wd,
const int pml_conditions,
const int* d_is_CPML,
const int FORWARD_OR_ADJOINT);
Expand Down Expand Up @@ -527,6 +531,26 @@ __global__ void compute_coupling_elastic_ac_kernel(field* potential_dot_dot_acou
int backward_simulation) ;


//
// src/gpu/kernels/wavefield_discontinuity_kernel.cu
//

__global__ void add_acceleration_discontinuity_kernel(
realw_const_p accel_wd,
realw_const_p mass_in_wd,
const int* boundary_to_iglob_wd,
const int size, realw* accel
);

__global__ void add_traction_discontinuity_kernel(
realw_const_p traction_wd,
const int* face_ispec_wd,
const int* face_ijk_wd,
realw_const_p face_jacobian2Dw_wd,
const int* d_ibool,
const int size, realw* accel);


//
// src/gpu/kernels/compute_coupling_ocean_cuda_kernel.cu
//
Expand Down
1 change: 1 addition & 0 deletions src/gpu/kernels/wavefield_discontinuity_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "wavefield_discontinuity_kernel.cu"
41 changes: 41 additions & 0 deletions src/gpu/kernels/wavefield_discontinuity_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
__global__ void add_acceleration_discontinuity_kernel(
realw_const_p accel_wd,
realw_const_p mass_in_wd,
const int* boundary_to_iglob_wd,
const int size, realw* accel
) {
int id = threadIdx.x + (blockIdx.x + blockIdx.y*gridDim.x)*blockDim.x;
int iglob = boundary_to_iglob_wd[id] - 1;
realw mass_in = mass_in_wd[id];
if (id < size) {
accel[iglob*3] = accel[iglob*3] - accel_wd[id*3] * mass_in;
accel[iglob*3 + 1] = accel[iglob*3 + 1] - accel_wd[id*3 + 1] * mass_in;
accel[iglob*3 + 2] = accel[iglob*3 + 2] - accel_wd[id*3 + 2] * mass_in;
}
}

__global__ void add_traction_discontinuity_kernel(
realw_const_p traction_wd,
const int* face_ispec_wd,
const int* face_ijk_wd,
realw_const_p face_jacobian2Dw_wd,
const int* d_ibool,
const int size, realw* accel) {
int igll = threadIdx.x;
int iface_wd = blockIdx.x + gridDim.x*blockIdx.y;
int i, j, k, ispec, iglob;
realw jacobianw;
if (iface_wd < size) {
ispec = face_ispec_wd[iface_wd] - 1;
i = face_ijk_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)]-1;
j = face_ijk_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)]-1;
k = face_ijk_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)]-1;

iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;

jacobianw = face_jacobian2Dw_wd[INDEX2(NGLL2,igll,iface_wd)];
atomicAdd(&accel[iglob*3], traction_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)] * jacobianw);
atomicAdd(&accel[iglob*3+1], traction_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)] * jacobianw);
atomicAdd(&accel[iglob*3+2], traction_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)] * jacobianw);
}
}
14 changes: 14 additions & 0 deletions src/gpu/mesh_constants_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,20 @@ typedef struct mesh_ {
int use_Kelvin_Voigt_damping;
realw* d_Kelvin_Voigt_eta;

// prescribed wavefield discontinuity on an interface
int is_wavefield_discontinuity;
int* d_ispec_to_elem_wd;
int* d_ibool_wd;
int* d_boundary_to_iglob_wd;
realw* d_mass_in_wd;
int* d_face_ijk_wd;
int* d_face_ispec_wd;
realw* d_face_normal_wd;
realw* d_face_jacobian2Dw_wd;
realw* d_displ_wd;
realw* d_accel_wd;
realw* d_traction_wd;

// for option NB_RUNS_FOR_ACOUSTIC_GPU
int* run_number_of_the_source;

Expand Down
49 changes: 49 additions & 0 deletions src/gpu/prepare_mesh_constants_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ void FC_FUNC_(prepare_constants_device,
int* SAVE_SEISMOGRAMS_ACCELERATION,int* SAVE_SEISMOGRAMS_PRESSURE,
int* h_NB_RUNS_ACOUSTIC_GPU,
int* FAULT_SIMULATION,
int* IS_WAVEFIELD_DISCONTINUITY,
int* UNDO_ATTENUATION_AND_OR_PML,
int* PML_CONDITIONS) {

Expand Down Expand Up @@ -420,6 +421,9 @@ void FC_FUNC_(prepare_constants_device,
// Kelvin_voigt initialization
mp->use_Kelvin_Voigt_damping = 0;

// prescribed wavefield discontinuity
mp->is_wavefield_discontinuity = *IS_WAVEFIELD_DISCONTINUITY;

GPU_ERROR_CHECKING("prepare_constants_device");
}

Expand Down Expand Up @@ -1542,6 +1546,51 @@ void FC_FUNC_(prepare_fault_device,
}


/* ----------------------------------------------------------------------------------------------- */

// wavefield discontinuity

/* ----------------------------------------------------------------------------------------------- */

extern EXTERN_LANG
void FC_FUNC_(prepare_wavefield_discontinuity_device,
PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
long* Mesh_pointer,
int* ispec_to_elem_wd,
int* nglob_wd,
int* nspec_wd,
int* ibool_wd,
int* boundary_to_iglob_wd,
realw* mass_in_wd,
int* nfaces_wd,
int* face_ijk_wd,
int* face_ispec_wd,
realw* face_normal_wd,
realw* face_jacobian2Dw_wd) {
TRACE("prepare_wavefield_discontinuity_device");
Mesh* mp = (Mesh*)(*Mesh_pointer);

// arrays
gpuCreateCopy_todevice_int((void**)&mp->d_ispec_to_elem_wd, ispec_to_elem_wd, mp->NSPEC_AB);
gpuCreateCopy_todevice_int((void**)&mp->d_boundary_to_iglob_wd, boundary_to_iglob_wd, (*nglob_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_mass_in_wd, mass_in_wd, (*nglob_wd));
gpuCreateCopy_todevice_int((void**)&mp->d_face_ispec_wd, face_ispec_wd, (*nfaces_wd));
gpuCreateCopy_todevice_int((void**)&mp->d_face_ijk_wd, face_ijk_wd, 3*NGLL2*(*nfaces_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_face_normal_wd, face_normal_wd, NDIM*NGLL2*(*nfaces_wd));
gpuCreateCopy_todevice_realw((void**)&mp->d_face_jacobian2Dw_wd, face_jacobian2Dw_wd, NGLL2*(*nfaces_wd));
// global indexing for wavefield discontinuity points (padded)
int size_padded = NGLL3_PADDED * (*nspec_wd);
gpuMalloc_int((void**) &mp->d_ibool_wd, size_padded);
gpuMemcpy2D_todevice_int(mp->d_ibool_wd, NGLL3_PADDED, ibool_wd, NGLL3, NGLL3, (*nspec_wd));

// allocate discontinuity field
int size = NDIM * (*nglob_wd);
gpuMalloc_realw((void**)&(mp->d_displ_wd),size);
gpuMalloc_realw((void**)&(mp->d_accel_wd),size);
size = NDIM * NGLL2 * (*nfaces_wd);
gpuMalloc_realw((void**)&(mp->d_traction_wd),size);
}

/* ----------------------------------------------------------------------------------------------- */

// cleanup
Expand Down
1 change: 1 addition & 0 deletions src/gpu/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ gpu_specfem3D_OBJECTS = \
$O/transfer_fields_cuda.o \
$O/update_displacement_cuda.o \
$O/write_seismograms_cuda.o \
$O/wavefield_discontinuity_cuda.o \
$(EMPTY_MACRO)

gpu_specfem3D_STUBS = \
Expand Down
33 changes: 32 additions & 1 deletion src/gpu/specfem3D_gpu_cuda_method_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,21 @@ void FC_FUNC_(prepare_fault_device,
int* KELVIN_VOIGT_DAMPING,
realw* Kelvin_Voigt_eta) {}

void FC_FUNC_(prepare_wavefield_discontinuity_device,
PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
long* Mesh_pointer,
int* ispec_to_elem_wd,
int* nglob_wd,
int* nspec_wd,
int* ibool_wd,
int* boundary_to_iglob_wd,
realw* mass_in_wd,
int* nfaces_wd,
int* face_ijk_wd,
int* face_ispec_wd,
realw* face_normal_wd,
realw* face_jacobian2Dw_wd) {}

void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer,
int* ACOUSTIC_SIMULATION,
Expand Down Expand Up @@ -675,7 +690,6 @@ void FC_FUNC_(get_smooth_gpu,
GET_SMOOTH_gpu)(long * smooth_pointer,
realw * data_smooth) {}


//
// src/gpu/transfer_fields_cuda.cu
//
Expand Down Expand Up @@ -738,6 +752,12 @@ void FC_FUNC_(transfer_pml_displ_from_device,
void FC_FUNC_(transfer_pml_displ_to_device,
TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) {}

void FC_FUNC_(transfer_wavefield_discontinuity_to_device,
TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)(
int* size_point, int* size_face,
realw* displ_wd, realw* accel_wd,
realw* traction_wd, long* Mesh_pointer) {}

void FC_FUNC_(transfer_b_rmemory_to_device,
TRANSFER_B_RMEMORY_TO_DEVICE)(long* Mesh_pointer,
realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,
Expand Down Expand Up @@ -943,3 +963,14 @@ void FC_FUNC_(compute_seismograms_cuda,
int* ELASTIC_SIMULATION,
int* USE_TRICK_FOR_BETTER_PRESSURE) {}

//
// src/gpu/wavefield_discontinuity_cuda.cu
//

void FC_FUNC_(wavefield_discontinuity_add_traction_cuda,
WAVEFIELD_DISCONTINUITY_ADD_TRACTION_CUDA)(int* size_points,
int* size_faces,
long* Mesh_pointer){}



Loading

0 comments on commit a8182ff

Please sign in to comment.