diff --git a/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh b/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh
index 5b834ec53..8538e8595 100755
--- a/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh
+++ b/EXAMPLES/applications/wavefield_discontinuity/test_small_injection/run_this_example.sh
@@ -5,6 +5,7 @@ module load intel/2020u4 intelmpi/2020u4
 
 ## set number of processors
 NPROC=4
+GPU_MODE=false # set to true if using GPU
 
 currentdir=`pwd`
 
@@ -95,10 +96,19 @@ mpirun -np $NPROC ./bin/xgenerate_databases
 # DATABASES_MPI/proc*_wavefield_discontinuity.bin files
 mpirun -np $NPROC ../fk_coupling/compute_fk_injection_field
 
-echo
-echo "  launch solver..."
-echo
-mpirun -np $NPROC ./bin/xspecfem3D
+if ${GPU_MODE}; then
+  sed -i "/^GPU_MODE/c\GPU_MODE                           = .true." DATA/Par_file
+  echo
+  echo "  launch solver on GPU..."
+  echo
+  CUDA_VISIBLE_DEVICES=0 mpirun -np $NPROC ./bin/xspecfem3D
+else
+  sed -i "/^GPU_MODE/c\GPU_MODE                           = .false." DATA/Par_file
+  echo
+  echo "  launch solver on CPU ..."
+  echo
+  mpirun -np $NPROC ./bin/xspecfem3D
+fi
 
 # compute reference seismograms using FK
 ../fk_coupling/compute_fk_receiver
diff --git a/src/gpu/compute_forces_viscoelastic_cuda.cu b/src/gpu/compute_forces_viscoelastic_cuda.cu
index 1cfb0a512..c93b5813a 100644
--- a/src/gpu/compute_forces_viscoelastic_cuda.cu
+++ b/src/gpu/compute_forces_viscoelastic_cuda.cu
@@ -990,6 +990,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
                                                                                mp->d_hprimewgll_xx,
                                                                                mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
                                                                                d_kappav, d_muv,
+                                                                               mp->is_wavefield_discontinuity,
+                                                                               mp->d_displ_wd,
+                                                                               mp->d_ispec_to_elem_wd,
+                                                                               mp->d_ibool_wd,
                                                                                mp->pml_conditions,
                                                                                mp->d_is_CPML,
                                                                                FORWARD_OR_ADJOINT);
@@ -1013,6 +1017,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
                                                              mp->d_hprimewgll_xx,
                                                              mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
                                                              d_kappav, d_muv,
+                                                             mp->is_wavefield_discontinuity,
+                                                             mp->d_displ_wd,
+                                                             mp->d_ispec_to_elem_wd,
+                                                             mp->d_ibool_wd,
                                                              mp->pml_conditions,
                                                              mp->d_is_CPML,
                                                              FORWARD_OR_ADJOINT);
@@ -1039,6 +1047,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
                                                                                   mp->d_hprimewgll_xx,
                                                                                   mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
                                                                                   d_kappav, d_muv,
+                                                                                  mp->is_wavefield_discontinuity,
+                                                                                  mp->d_displ_wd,
+                                                                                  mp->d_ispec_to_elem_wd,
+                                                                                  mp->d_ibool_wd,
                                                                                   mp->pml_conditions,
                                                                                   mp->d_is_CPML,
                                                                                   3); // 3 == backward
@@ -1063,6 +1075,10 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
                                                               mp->d_hprimewgll_xx,
                                                               mp->d_wgllwgll_xy, mp->d_wgllwgll_xz, mp->d_wgllwgll_yz,
                                                               d_kappav, d_muv,
+                                                              mp->is_wavefield_discontinuity,
+                                                              mp->d_displ_wd,
+                                                              mp->d_ispec_to_elem_wd,
+                                                              mp->d_ibool_wd,
                                                               mp->pml_conditions,
                                                               mp->d_is_CPML,
                                                               3); // 3 == backward
diff --git a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
index 98c42b507..93e87d273 100644
--- a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
+++ b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
@@ -336,6 +336,28 @@ __device__  __forceinline__ void load_shared_memory_displ_visco(const int* tx, c
 
 /* ----------------------------------------------------------------------------------------------- */
 
+// add displacement discontinuity in case of wavefield discontinuity
+
+__device__ __forceinline__ void add_displacement_discontinuity_element(
+                                  const int* tx, const int* working_element,
+                                  realw_const_p d_displ_wd,
+                                  const int* ispec_to_elem_wd,
+                                  const int* ibool_wd,
+                                  realw* sh_displx,
+                                  realw* sh_disply,
+                                  realw* sh_displz) {
+  int ispec_wd = ispec_to_elem_wd[(*working_element)]-1;
+  if (ispec_wd >= 0) {
+    int offset = ispec_wd * NGLL3_PADDED + (*tx);
+    int iglob_wd = ibool_wd[offset]-1;
+    sh_displx[(*tx)] = sh_displx[(*tx)] + d_displ_wd[iglob_wd*3];
+    sh_disply[(*tx)] = sh_disply[(*tx)] + d_displ_wd[iglob_wd*3 + 1];
+    sh_displz[(*tx)] = sh_displz[(*tx)] + d_displ_wd[iglob_wd*3 + 2];
+  }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
 // loads hprime into shared memory for element
 
 __device__  __forceinline__ void load_shared_memory_hprime(const int* tx,
@@ -738,6 +760,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
                         realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
                         realw_const_p d_kappav,
                         realw_const_p d_muv,
+                        const int is_wavefield_discontinuity,
+                        realw_const_p d_displ_wd,
+                        const int* d_ispec_to_elem_wd,
+                        const int* d_ibool_wd,
                         const int pml_conditions,
                         const int* d_is_CPML,
                         const int FORWARD_OR_ADJOINT){
@@ -849,6 +875,9 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
       load_shared_memory_displ<3>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
     }else{
       load_shared_memory_displ<1>(&tx,&iglob,d_displ,sh_tempx,sh_tempy,sh_tempz);
+      if (is_wavefield_discontinuity) {
+        add_displacement_discontinuity_element(&tx,&working_element,d_displ_wd,d_ispec_to_elem_wd,d_ibool_wd,sh_tempx,sh_tempy,sh_tempz);
+      }
     }
   }
 
diff --git a/src/gpu/kernels/kernel_cuda.mk b/src/gpu/kernels/kernel_cuda.mk
index 2073bdb26..d1820f773 100644
--- a/src/gpu/kernels/kernel_cuda.mk
+++ b/src/gpu/kernels/kernel_cuda.mk
@@ -40,6 +40,7 @@ cuda_kernels_OBJS := \
 	$O/transfer_surface_to_host_kernel.cuda-kernel.o \
 	$O/UpdateDispVeloc_kernel.cuda-kernel.o \
 	$O/UpdatePotential_kernel.cuda-kernel.o \
+	$O/wavefield_discontinuity_kernel.cuda-kernel.o \
 	$(EMPTY_MACRO)
 
 
diff --git a/src/gpu/kernels/kernel_proto.cu.h b/src/gpu/kernels/kernel_proto.cu.h
index 00db7749e..7902b3808 100644
--- a/src/gpu/kernels/kernel_proto.cu.h
+++ b/src/gpu/kernels/kernel_proto.cu.h
@@ -111,6 +111,10 @@ Kernel_2_noatt_iso_impl(const int nb_blocks_to_compute,
                         realw_const_p d_wgllwgll_xy,realw_const_p d_wgllwgll_xz,realw_const_p d_wgllwgll_yz,
                         realw_const_p d_kappav,
                         realw_const_p d_muv,
+                        const int is_wavefield_discontinuity,
+                        realw_const_p d_displ_wd,
+                        const int* d_ispec_to_element_wd,
+                        const int* d_ibool_wd,
                         const int pml_conditions,
                         const int* d_is_CPML,
                         const int FORWARD_OR_ADJOINT);
@@ -527,6 +531,26 @@ __global__ void compute_coupling_elastic_ac_kernel(field* potential_dot_dot_acou
                                                     int backward_simulation) ;
 
 
+//
+// src/gpu/kernels/wavefield_discontinuity_kernel.cu
+//
+
+__global__ void add_acceleration_discontinuity_kernel(
+                                  realw_const_p accel_wd,
+                                  realw_const_p mass_in_wd,
+                                  const int* boundary_to_iglob_wd,
+                                  const int size, realw* accel
+                                  );
+
+__global__ void add_traction_discontinuity_kernel(
+                                  realw_const_p traction_wd,
+                                  const int* face_ispec_wd,
+                                  const int* face_ijk_wd,
+                                  realw_const_p face_jacobian2Dw_wd,
+                                  const int* d_ibool,
+                                  const int size, realw* accel);
+
+
 //
 // src/gpu/kernels/compute_coupling_ocean_cuda_kernel.cu
 //
diff --git a/src/gpu/kernels/wavefield_discontinuity_kernel.cpp b/src/gpu/kernels/wavefield_discontinuity_kernel.cpp
new file mode 100644
index 000000000..59f0435b3
--- /dev/null
+++ b/src/gpu/kernels/wavefield_discontinuity_kernel.cpp
@@ -0,0 +1 @@
+#include "wavefield_discontinuity_kernel.cu"
diff --git a/src/gpu/kernels/wavefield_discontinuity_kernel.cu b/src/gpu/kernels/wavefield_discontinuity_kernel.cu
new file mode 100644
index 000000000..0c8dd7b80
--- /dev/null
+++ b/src/gpu/kernels/wavefield_discontinuity_kernel.cu
@@ -0,0 +1,41 @@
+__global__ void add_acceleration_discontinuity_kernel(
+                                  realw_const_p accel_wd, 
+                                  realw_const_p mass_in_wd,
+                                  const int* boundary_to_iglob_wd,
+                                  const int size, realw* accel
+                                  ) {
+  int id = threadIdx.x + (blockIdx.x + blockIdx.y*gridDim.x)*blockDim.x;
+  int iglob = boundary_to_iglob_wd[id] - 1;
+  realw mass_in = mass_in_wd[id];
+  if (id < size) {
+    accel[iglob*3] = accel[iglob*3] - accel_wd[id*3] * mass_in;
+    accel[iglob*3 + 1] = accel[iglob*3 + 1] - accel_wd[id*3 + 1] * mass_in;
+    accel[iglob*3 + 2] = accel[iglob*3 + 2] - accel_wd[id*3 + 2] * mass_in;
+  }
+}
+
+__global__ void add_traction_discontinuity_kernel(
+                                  realw_const_p traction_wd,
+                                  const int* face_ispec_wd,
+                                  const int* face_ijk_wd,
+                                  realw_const_p face_jacobian2Dw_wd,
+                                  const int* d_ibool,
+                                  const int size, realw* accel) {
+  int igll = threadIdx.x;
+  int iface_wd = blockIdx.x + gridDim.x*blockIdx.y;
+  int i, j, k, ispec, iglob;
+  realw jacobianw;
+  if (iface_wd < size) {
+    ispec = face_ispec_wd[iface_wd] - 1;
+    i = face_ijk_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)]-1;
+    j = face_ijk_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)]-1;
+    k = face_ijk_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)]-1;
+
+    iglob = d_ibool[INDEX4_PADDED(NGLLX,NGLLX,NGLLX,i,j,k,ispec)]-1;
+
+    jacobianw = face_jacobian2Dw_wd[INDEX2(NGLL2,igll,iface_wd)];
+    atomicAdd(&accel[iglob*3],  traction_wd[INDEX3(NDIM,NGLL2,0,igll,iface_wd)] * jacobianw);
+    atomicAdd(&accel[iglob*3+1],  traction_wd[INDEX3(NDIM,NGLL2,1,igll,iface_wd)] * jacobianw);
+    atomicAdd(&accel[iglob*3+2],  traction_wd[INDEX3(NDIM,NGLL2,2,igll,iface_wd)] * jacobianw);
+  }
+}
diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h
index 495ff2528..8bf524623 100644
--- a/src/gpu/mesh_constants_gpu.h
+++ b/src/gpu/mesh_constants_gpu.h
@@ -789,6 +789,20 @@ typedef struct mesh_ {
   int use_Kelvin_Voigt_damping;
   realw* d_Kelvin_Voigt_eta;
 
+  // prescribed wavefield discontinuity on an interface
+  int is_wavefield_discontinuity;
+  int* d_ispec_to_elem_wd;
+  int* d_ibool_wd;
+  int* d_boundary_to_iglob_wd;
+  realw* d_mass_in_wd;
+  int* d_face_ijk_wd;
+  int* d_face_ispec_wd;
+  realw* d_face_normal_wd;
+  realw* d_face_jacobian2Dw_wd;
+  realw* d_displ_wd;
+  realw* d_accel_wd;
+  realw* d_traction_wd;
+
   // for option NB_RUNS_FOR_ACOUSTIC_GPU
   int* run_number_of_the_source;
 
diff --git a/src/gpu/prepare_mesh_constants_cuda.cu b/src/gpu/prepare_mesh_constants_cuda.cu
index 78dcf5069..45a043dac 100644
--- a/src/gpu/prepare_mesh_constants_cuda.cu
+++ b/src/gpu/prepare_mesh_constants_cuda.cu
@@ -106,6 +106,7 @@ void FC_FUNC_(prepare_constants_device,
                                         int* SAVE_SEISMOGRAMS_ACCELERATION,int* SAVE_SEISMOGRAMS_PRESSURE,
                                         int* h_NB_RUNS_ACOUSTIC_GPU,
                                         int* FAULT_SIMULATION,
+                                        int* IS_WAVEFIELD_DISCONTINUITY,
                                         int* UNDO_ATTENUATION_AND_OR_PML,
                                         int* PML_CONDITIONS) {
 
@@ -420,6 +421,9 @@ void FC_FUNC_(prepare_constants_device,
   // Kelvin_voigt initialization
   mp->use_Kelvin_Voigt_damping = 0;
 
+  // prescribed wavefield discontinuity
+  mp->is_wavefield_discontinuity = *IS_WAVEFIELD_DISCONTINUITY;
+
   GPU_ERROR_CHECKING("prepare_constants_device");
 }
 
@@ -1542,6 +1546,51 @@ void FC_FUNC_(prepare_fault_device,
 }
 
 
+/* ----------------------------------------------------------------------------------------------- */
+
+// wavefield discontinuity
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern EXTERN_LANG
+void FC_FUNC_(prepare_wavefield_discontinuity_device,
+              PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
+                             long* Mesh_pointer,
+                             int* ispec_to_elem_wd,
+                             int* nglob_wd,
+                             int* nspec_wd,
+                             int* ibool_wd,
+                             int* boundary_to_iglob_wd,
+                             realw* mass_in_wd,
+                             int* nfaces_wd,
+                             int* face_ijk_wd,
+                             int* face_ispec_wd,
+                             realw* face_normal_wd,
+                             realw* face_jacobian2Dw_wd) {
+  TRACE("prepare_wavefield_discontinuity_device");
+  Mesh* mp = (Mesh*)(*Mesh_pointer);
+
+  // arrays
+  gpuCreateCopy_todevice_int((void**)&mp->d_ispec_to_elem_wd, ispec_to_elem_wd, mp->NSPEC_AB);
+  gpuCreateCopy_todevice_int((void**)&mp->d_boundary_to_iglob_wd, boundary_to_iglob_wd, (*nglob_wd));
+  gpuCreateCopy_todevice_realw((void**)&mp->d_mass_in_wd, mass_in_wd, (*nglob_wd));
+  gpuCreateCopy_todevice_int((void**)&mp->d_face_ispec_wd, face_ispec_wd, (*nfaces_wd));
+  gpuCreateCopy_todevice_int((void**)&mp->d_face_ijk_wd, face_ijk_wd, 3*NGLL2*(*nfaces_wd));
+  gpuCreateCopy_todevice_realw((void**)&mp->d_face_normal_wd, face_normal_wd, NDIM*NGLL2*(*nfaces_wd));
+  gpuCreateCopy_todevice_realw((void**)&mp->d_face_jacobian2Dw_wd, face_jacobian2Dw_wd, NGLL2*(*nfaces_wd));
+  // global indexing for wavefield discontinuity points (padded)
+  int size_padded = NGLL3_PADDED * (*nspec_wd);
+  gpuMalloc_int((void**) &mp->d_ibool_wd, size_padded);
+  gpuMemcpy2D_todevice_int(mp->d_ibool_wd, NGLL3_PADDED, ibool_wd, NGLL3, NGLL3, (*nspec_wd));
+
+  // allocate discontinuity field
+  int size = NDIM * (*nglob_wd);
+  gpuMalloc_realw((void**)&(mp->d_displ_wd),size);
+  gpuMalloc_realw((void**)&(mp->d_accel_wd),size);
+  size = NDIM * NGLL2 * (*nfaces_wd);
+  gpuMalloc_realw((void**)&(mp->d_traction_wd),size);
+}
+
 /* ----------------------------------------------------------------------------------------------- */
 
 // cleanup
diff --git a/src/gpu/rules.mk b/src/gpu/rules.mk
index 27e390595..922141d95 100644
--- a/src/gpu/rules.mk
+++ b/src/gpu/rules.mk
@@ -61,6 +61,7 @@ gpu_specfem3D_OBJECTS = \
 	$O/transfer_fields_cuda.o \
 	$O/update_displacement_cuda.o \
 	$O/write_seismograms_cuda.o \
+	$O/wavefield_discontinuity_cuda.o \
 	$(EMPTY_MACRO)
 
 gpu_specfem3D_STUBS = \
diff --git a/src/gpu/specfem3D_gpu_cuda_method_stubs.c b/src/gpu/specfem3D_gpu_cuda_method_stubs.c
index b3af872be..af7c37795 100644
--- a/src/gpu/specfem3D_gpu_cuda_method_stubs.c
+++ b/src/gpu/specfem3D_gpu_cuda_method_stubs.c
@@ -639,6 +639,21 @@ void FC_FUNC_(prepare_fault_device,
                                     int* KELVIN_VOIGT_DAMPING,
                                     realw* Kelvin_Voigt_eta) {}
 
+void FC_FUNC_(prepare_wavefield_discontinuity_device,
+              PREPARE_WAVEFIELD_DISCONTINUITY_DEVICE)(
+                             long* Mesh_pointer,
+                             int* ispec_to_elem_wd,
+                             int* nglob_wd,
+                             int* nspec_wd,
+                             int* ibool_wd,
+                             int* boundary_to_iglob_wd,
+                             realw* mass_in_wd,
+                             int* nfaces_wd,
+                             int* face_ijk_wd,
+                             int* face_ispec_wd,
+                             realw* face_normal_wd,
+                             realw* face_jacobian2Dw_wd) {}
+
 void FC_FUNC_(prepare_cleanup_device,
               PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer,
                                       int* ACOUSTIC_SIMULATION,
@@ -675,7 +690,6 @@ void FC_FUNC_(get_smooth_gpu,
               GET_SMOOTH_gpu)(long * smooth_pointer,
                               realw * data_smooth) {}
 
-
 //
 // src/gpu/transfer_fields_cuda.cu
 //
@@ -738,6 +752,12 @@ void FC_FUNC_(transfer_pml_displ_from_device,
 void FC_FUNC_(transfer_pml_displ_to_device,
               TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) {}
 
+void FC_FUNC_(transfer_wavefield_discontinuity_to_device,
+              TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)(
+                         int* size_point, int* size_face,
+                         realw* displ_wd, realw* accel_wd,
+                         realw* traction_wd, long* Mesh_pointer) {}
+
 void FC_FUNC_(transfer_b_rmemory_to_device,
               TRANSFER_B_RMEMORY_TO_DEVICE)(long* Mesh_pointer,
                                             realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,
@@ -943,3 +963,14 @@ void FC_FUNC_(compute_seismograms_cuda,
                                         int* ELASTIC_SIMULATION,
                                         int* USE_TRICK_FOR_BETTER_PRESSURE) {}
 
+//
+// src/gpu/wavefield_discontinuity_cuda.cu
+//
+
+void FC_FUNC_(wavefield_discontinuity_add_traction_cuda,
+              WAVEFIELD_DISCONTINUITY_ADD_TRACTION_CUDA)(int* size_points,
+                                                         int* size_faces,
+                                                         long* Mesh_pointer){}
+
+
+
diff --git a/src/gpu/transfer_fields_cuda.cu b/src/gpu/transfer_fields_cuda.cu
index 450fa677e..6bcf4dccf 100644
--- a/src/gpu/transfer_fields_cuda.cu
+++ b/src/gpu/transfer_fields_cuda.cu
@@ -303,6 +303,29 @@ void FC_FUNC_(transfer_pml_displ_from_device,
 
 /* ----------------------------------------------------------------------------------------------- */
 
+// prescribed wavefield discontinuity. this needs to be called in every time step
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern EXTERN_LANG
+void FC_FUNC_(transfer_wavefield_discontinuity_to_device,
+              TRANSFER_WAVEFIELD_DISCONTINUITY_TO_DEVICE)(
+                         int* size_point, int* size_face,
+                         realw* displ_wd, realw* accel_wd,
+                         realw* traction_wd, long* Mesh_pointer) {
+
+  TRACE("transfer_wavefield_discontinuity_to_device");
+
+  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+  gpuMemcpy_todevice_realw(mp->d_displ_wd, displ_wd, (*size_point));
+  gpuMemcpy_todevice_realw(mp->d_accel_wd, accel_wd, (*size_point));
+  gpuMemcpy_todevice_realw(mp->d_traction_wd, traction_wd, (*size_face));
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
 extern EXTERN_LANG
 void FC_FUNC_(transfer_pml_displ_to_device,
               TRANSFER_PML_DISPL_TO_DEVICE)(int* size, realw* PML_displ_old, realw* PML_displ_new, long* Mesh_pointer) {
diff --git a/src/gpu/wavefield_discontinuity_cuda.cu b/src/gpu/wavefield_discontinuity_cuda.cu
new file mode 100644
index 000000000..17f07f89f
--- /dev/null
+++ b/src/gpu/wavefield_discontinuity_cuda.cu
@@ -0,0 +1,71 @@
+#include "mesh_constants_gpu.h"
+
+extern EXTERN_LANG
+void FC_FUNC_(wavefield_discontinuity_add_traction_cuda,
+              WAVEFIELD_DISCONTINUITY_ADD_TRACTION_CUDA)(int* size_points, 
+                                                         int* size_faces,
+                                                         long* Mesh_pointer){
+  TRACE("wavefield_discontinuity_add_traction_cuda");
+  Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+  if (mp->is_wavefield_discontinuity) {
+    int size = (*size_points);
+    int blocksize = BLOCKSIZE_KERNEL1;
+    int size_padded = ((int)ceil(((double)size)/((double)blocksize)))*blocksize;
+
+    int num_blocks_x, num_blocks_y;
+    get_blocks_xy(size_padded/blocksize,&num_blocks_x,&num_blocks_y);
+
+    dim3 grid(num_blocks_x,num_blocks_y);
+    dim3 threads(blocksize,1,1);
+    
+#ifdef USE_CUDA
+    if (run_cuda) {
+      add_acceleration_discontinuity_kernel
+       <<<grid, threads, 0, mp->compute_stream>>>(mp->d_accel_wd,
+                                                  mp->d_mass_in_wd,
+                                                  mp->d_boundary_to_iglob_wd,
+                                                  size, mp->d_accel);
+    }
+#endif
+#ifdef USE_HIP
+    if (run_hip) {
+      hipLaunchKernelGGL(add_acceleration_discontinuity_kernel, 
+       dim3(grid), dim3(threads), 0, mp->compute_stream, mp->d_accel_wd,
+                                                  mp->d_mass_in_wd,
+                                                  mp->d_boundary_to_iglob_wd,           
+                                                  size, mp->d_accel);
+    }
+#endif
+    
+    size = (*size_faces);
+    blocksize = NGLL2;
+    
+    get_blocks_xy(size,&num_blocks_x,&num_blocks_y);
+
+    dim3 grid2(num_blocks_x,num_blocks_y);
+    dim3 threads2(blocksize,1,1);
+
+#ifdef USE_CUDA
+    if (run_cuda) {
+       add_traction_discontinuity_kernel
+        <<<grid2, threads2, 0, mp->compute_stream>>>(mp->d_traction_wd,
+                                                   mp->d_face_ispec_wd,
+                                                   mp->d_face_ijk_wd,
+                                                   mp->d_face_jacobian2Dw_wd,
+                                                   mp->d_ibool,
+                                                   size, mp->d_accel);
+    }
+#endif
+#ifdef USE_HIP
+    if (run_hip) {
+      hipLaunchKernelGGL(add_traction_discontinuity_kernel,
+        dim3(grid2), dim3(threads2), 0, mp->compute_stream, mp->d_traction_wd,
+                                                   mp->d_face_ispec_wd,
+                                                   mp->d_face_ijk_wd,
+                                                   mp->d_face_jacobian2Dw_wd,
+                                                   mp->d_ibool,
+                                                   size, mp->d_accel);
+    }
+#endif
+  }
+}
diff --git a/src/shared/read_parameter_file.F90 b/src/shared/read_parameter_file.F90
index ca5bef914..c215ebb6b 100644
--- a/src/shared/read_parameter_file.F90
+++ b/src/shared/read_parameter_file.F90
@@ -1076,8 +1076,6 @@ subroutine check_simulation_parameters()
   endif
 
   if (IS_WAVEFIELD_DISCONTINUITY) then
-    if (GPU_MODE) &
-      stop 'wavefield discontinuity problem cannot be solved on GPU yet'
     if (ATTENUATION) &
       stop 'wavefield discontinuity problem cannot be solved with attenuation'
   endif
diff --git a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
index 54bac90a7..c9e7b92ba 100644
--- a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
+++ b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
@@ -43,7 +43,9 @@ subroutine compute_forces_viscoelastic_calling()
 
   !! solving wavefield discontinuity problem with non-split-node scheme
   use wavefield_discontinuity_solver, only: &
-                 add_traction_discontinuity, read_wavefield_discontinuity_file
+                 add_traction_discontinuity, read_wavefield_discontinuity_file, &
+                 transfer_wavefield_discontinuity_to_GPU, &
+                 add_traction_discontinuity_GPU
 
   implicit none
 
@@ -112,6 +114,9 @@ subroutine compute_forces_viscoelastic_calling()
   !! note that this is not called in case of adjoint simulation
   if (IS_WAVEFIELD_DISCONTINUITY .and. (SIMULATION_TYPE == 1)) then
     call read_wavefield_discontinuity_file()
+    if (GPU_MODE) then
+      call transfer_wavefield_discontinuity_to_GPU()
+    endif
   endif
 
 ! distinguishes two runs: for elements in contact with MPI interfaces, and elements within the partitions
@@ -284,7 +289,11 @@ subroutine compute_forces_viscoelastic_calling()
       !! because these terms need to be used in MPI calls
       !! note that this is not called in case of adjoint simulation
       if (IS_WAVEFIELD_DISCONTINUITY .and. (SIMULATION_TYPE == 1)) then
-        call add_traction_discontinuity(accel, NGLOB_AB)
+        if (GPU_MODE) then
+          call add_traction_discontinuity_GPU()
+        else
+          call add_traction_discontinuity(accel, NGLOB_AB)
+        endif
       endif
     endif ! iphase
 
diff --git a/src/specfem3D/finalize_simulation.f90 b/src/specfem3D/finalize_simulation.f90
index 5b32e7657..fb27c1c80 100644
--- a/src/specfem3D/finalize_simulation.f90
+++ b/src/specfem3D/finalize_simulation.f90
@@ -141,6 +141,10 @@ subroutine finalize_simulation_cleanup()
   use fault_solver_dynamic, only: SIMULATION_TYPE_DYN,BC_DYNFLT_free
   use fault_solver_kinematic, only: SIMULATION_TYPE_KIN,BC_KINFLT_free
 
+  !! solving wavefield discontinuity problem with non-split-node scheme
+  use wavefield_discontinuity_solver, only: &
+               finalize_wavefield_discontinuity
+
   implicit none
 
   ! from here on, no gpu data is needed anymore
@@ -150,6 +154,9 @@ subroutine finalize_simulation_cleanup()
   ! C-PML absorbing boundary conditions deallocates C_PML arrays
   if (PML_CONDITIONS) call pml_cleanup()
 
+  ! cleanup for wavefield discontinuity
+  if (IS_WAVEFIELD_DISCONTINUITY) call finalize_wavefield_discontinuity()
+
   ! free arrays
   ! mass matrices
   if (ELASTIC_SIMULATION) then
diff --git a/src/specfem3D/iterate_time.F90 b/src/specfem3D/iterate_time.F90
index 40ba19e27..ea29a7a38 100644
--- a/src/specfem3D/iterate_time.F90
+++ b/src/specfem3D/iterate_time.F90
@@ -40,12 +40,6 @@ subroutine iterate_time()
   ! hdf5 i/o server
   use io_server_hdf5, only: do_io_start_idle,pass_info_to_io
 
-  !! solving wavefield discontinuity problem with non-split-node scheme
-  use wavefield_discontinuity_solver, only: &
-               read_mesh_databases_wavefield_discontinuity, &
-               open_wavefield_discontinuity_file, &
-               finalize_wavefield_discontinuity
-
   implicit none
 
   ! for EXACT_UNDOING_TO_DISK
@@ -214,13 +208,6 @@ subroutine iterate_time()
   ! get MPI starting
   time_start = wtime()
 
-  !! solving wavefield discontinuity problem with
-  !! non-split-node scheme
-  if (IS_WAVEFIELD_DISCONTINUITY) then
-    call read_mesh_databases_wavefield_discontinuity()
-    call open_wavefield_discontinuity_file()
-  endif
-
   ! LTS
   if (LTS_MODE) then
     ! LTS steps through its own time iterations - for now
@@ -346,12 +333,6 @@ subroutine iterate_time()
   !
   enddo   ! end of main time loop
 
-  !! solving wavefield discontinuity problem with
-  !! non-split-node scheme
-  if (IS_WAVEFIELD_DISCONTINUITY) then
-    call finalize_wavefield_discontinuity()
-  endif
-
 ! goto point for LTS to finish time loop
 777 continue
 
diff --git a/src/specfem3D/prepare_gpu.f90 b/src/specfem3D/prepare_gpu.f90
index b7136f58a..d2c39b4cd 100644
--- a/src/specfem3D/prepare_gpu.f90
+++ b/src/specfem3D/prepare_gpu.f90
@@ -43,6 +43,8 @@ subroutine prepare_GPU()
     pml_convolution_coef_alpha,pml_convolution_coef_beta, &
     pml_convolution_coef_abar,pml_convolution_coef_strain
 
+  use wavefield_discontinuity_solver, only: prepare_wavefield_discontinuity_GPU
+
   implicit none
 
   ! local parameters
@@ -97,6 +99,7 @@ subroutine prepare_GPU()
                                 SAVE_SEISMOGRAMS_ACCELERATION,SAVE_SEISMOGRAMS_PRESSURE, &
                                 NB_RUNS_ACOUSTIC_GPU, &
                                 FAULT_SIMULATION, &
+                                IS_WAVEFIELD_DISCONTINUITY, &
                                 UNDO_ATTENUATION_AND_OR_PML, &
                                 PML_CONDITIONS)
 
@@ -279,6 +282,16 @@ subroutine prepare_GPU()
     endif
   endif
 
+  ! prepares wavefield discontinuity
+  if (IS_WAVEFIELD_DISCONTINUITY) then
+    ! user output
+    if (myrank == 0) then
+      write(IMAIN,*) "  loading wavefield discontinuity"
+      call flush_IMAIN()
+    endif
+    call prepare_wavefield_discontinuity_GPU()
+  endif
+
   ! synchronizes processes
   call synchronize_all()
 
diff --git a/src/specfem3D/prepare_timerun.F90 b/src/specfem3D/prepare_timerun.F90
index f9b5574e9..56ad2de13 100644
--- a/src/specfem3D/prepare_timerun.F90
+++ b/src/specfem3D/prepare_timerun.F90
@@ -34,6 +34,10 @@ subroutine prepare_timerun()
   use specfem_par_poroelastic
   use specfem_par_movie
 
+  !! solving wavefield discontinuity problem with non-split-node scheme
+  use wavefield_discontinuity_solver, only: &
+               prepare_timerun_wavefield_discontinuity
+
   implicit none
 
   ! local parameters
@@ -81,6 +85,9 @@ subroutine prepare_timerun()
   ! Stacey boundaries
   call prepare_timerun_stacey()
 
+  ! wavefield discontinuity
+  call prepare_timerun_wavefield_discontinuity()
+
   ! prepares ADJOINT simulations
   call prepare_timerun_adjoint()
 
diff --git a/src/specfem3D/wavefield_discontinuity_solver.f90 b/src/specfem3D/wavefield_discontinuity_solver.f90
index 2c4c94841..f04432136 100644
--- a/src/specfem3D/wavefield_discontinuity_solver.f90
+++ b/src/specfem3D/wavefield_discontinuity_solver.f90
@@ -94,6 +94,15 @@ module wavefield_discontinuity_solver
 
 contains
 
+  subroutine prepare_timerun_wavefield_discontinuity()
+  use specfem_par, only: IS_WAVEFIELD_DISCONTINUITY
+  implicit none
+  if (IS_WAVEFIELD_DISCONTINUITY) then
+    call read_mesh_databases_wavefield_discontinuity()
+    call open_wavefield_discontinuity_file()
+  endif
+  end subroutine prepare_timerun_wavefield_discontinuity
+
   subroutine read_mesh_databases_wavefield_discontinuity()
   use constants, only: IFILE_WAVEFIELD_DISCONTINUITY, &
                        FNAME_WAVEFIELD_DISCONTINUITY_DATABASE
@@ -145,6 +154,27 @@ subroutine read_wavefield_discontinuity_file()
   read(IFILE_WAVEFIELD_DISCONTINUITY) traction_wd
   end subroutine read_wavefield_discontinuity_file
 
+  subroutine transfer_wavefield_discontinuity_to_GPU()
+  use specfem_par, only: Mesh_pointer, NDIM, NGLLSQUARE
+  implicit none
+  call transfer_wavefield_discontinuity_to_device(nglob_wd*NDIM, &
+                                                  nfaces_wd*NDIM*NGLLSQUARE, &
+                                                  displ_wd, accel_wd, &
+                                                  traction_wd, Mesh_pointer)
+  end subroutine transfer_wavefield_discontinuity_to_GPU
+
+  subroutine prepare_wavefield_discontinuity_GPU()
+  use specfem_par, only: Mesh_pointer
+  implicit none
+  call prepare_wavefield_discontinuity_device(Mesh_pointer, ispec_to_elem_wd,&
+                                                nglob_wd, nspec_wd, ibool_wd,&
+                                                boundary_to_iglob_wd,&
+                                                mass_in_wd,&
+                                                nfaces_wd, face_ijk_wd,&
+                                                face_ispec_wd, face_normal_wd,&
+                                                face_jacobian2dw_wd)
+  end subroutine prepare_wavefield_discontinuity_GPU
+
   subroutine finalize_wavefield_discontinuity()
   use constants, only: IFILE_WAVEFIELD_DISCONTINUITY
   implicit none
@@ -205,4 +235,11 @@ subroutine add_traction_discontinuity(accel, nglob)
     enddo
   enddo
   end subroutine add_traction_discontinuity
+
+  subroutine add_traction_discontinuity_GPU()
+  use specfem_par, only: Mesh_pointer
+  implicit none
+  call wavefield_discontinuity_add_traction_cuda(nglob_wd, nfaces_wd, &
+                                                 Mesh_pointer)
+  end subroutine add_traction_discontinuity_GPU
 end module wavefield_discontinuity_solver