diff --git a/src/decompose_mesh/rules.mk b/src/decompose_mesh/rules.mk
index ed75854cc..18b3f39be 100644
--- a/src/decompose_mesh/rules.mk
+++ b/src/decompose_mesh/rules.mk
@@ -154,6 +154,7 @@ xdecompose_mesh_mpi_OBJECTS = \
 	$O/module_database.dec.o \
 	$O/module_mesh.dec.o \
 	$O/module_partition.dec.o \
+	$O/part_decompose_mesh.dec_module.o \
 	$O/program_decompose_mesh_mpi.mpidec.o \
 	$(EMPTY_MACRO)
 
diff --git a/src/generate_databases/save_arrays_solver_hdf5.F90 b/src/generate_databases/save_arrays_solver_hdf5.F90
index 644acee5c..2ceec3470 100644
--- a/src/generate_databases/save_arrays_solver_hdf5.F90
+++ b/src/generate_databases/save_arrays_solver_hdf5.F90
@@ -90,7 +90,7 @@ subroutine save_arrays_solver_mesh_hdf5()
 
   ! element node connectivity for movie output
   ! the node ids are stored after dividing one NGLL*-th order spectral element into NGLLX*NGLLY*NGLLZ elements
-  integer, dimension(9,nspec_ab*(NGLLX-1)*(NGLLY-1)*(NGLLZ-1)) :: spec_elm_conn_xdmf
+  integer, dimension(:,:),allocatable :: spec_elm_conn_xdmf
 
   ! dummy arrays
   integer, dimension(1,1), parameter                    :: i2d_dummy = reshape((/0/),(/1,1/))
@@ -153,9 +153,17 @@ subroutine save_arrays_solver_mesh_hdf5()
   allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
   if (ier /= 0) call exit_MPI_without_rank('error allocating array 650')
   if (ier /= 0) stop 'error allocating array'
+  ibool_interfaces_ext_mesh_dummy(:,:) = 0
   do i = 1, num_interfaces_ext_mesh
     ibool_interfaces_ext_mesh_dummy(:,i) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,i)
   enddo
+
+  ! connectivity
+  allocate(spec_elm_conn_xdmf(9,NSPEC_AB*(NGLLX-1)*(NGLLY-1)*(NGLLZ-1)),stat=ier)
+  if (ier /= 0) call exit_MPI_without_rank('error allocating array 651')
+  if (ier /= 0) stop 'error allocating spec_elm_conn_xdm array'
+  spec_elm_conn_xdmf(:,:) = 0
+
   call synchronize_all()
 
   ! get MPI parameters
@@ -1433,10 +1441,8 @@ subroutine save_arrays_solver_mesh_hdf5()
   call synchronize_all()
 
   ! cleanup
-  if (allocated(ibool_interfaces_ext_mesh_dummy)) then
-    deallocate(ibool_interfaces_ext_mesh_dummy,stat=ier)
-    if (ier /= 0) stop 'error deallocating array ibool_interfaces_ext_mesh_dummy'
-  endif
+  deallocate(ibool_interfaces_ext_mesh_dummy)
+  deallocate(spec_elm_conn_xdmf)
 
   ! user output
   if (myrank == 0) then
diff --git a/src/gpu/compute_forces_acoustic_cuda.cu b/src/gpu/compute_forces_acoustic_cuda.cu
index 97afb85ea..5d372a4cf 100644
--- a/src/gpu/compute_forces_acoustic_cuda.cu
+++ b/src/gpu/compute_forces_acoustic_cuda.cu
@@ -67,10 +67,9 @@ void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
   //       to combine forward and backward wavefield in the same kernel call
 
   // kernel timing
-  gpu_event start, stop;
-  if (CUDA_TIMING ){
-    start_timing_gpu(&start,&stop);
-  }
+  gpu_event start,stop;
+  if (GPU_TIMING){ start_timing_gpu(&start,&stop); }
+
   int nb_field = mp->simulation_type == 3 ? 2 : 1 ;
 
   // sets gpu arrays
@@ -202,8 +201,8 @@ void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase,
 
   }
 
-  // Cuda timing
-  if (CUDA_TIMING ){
+  // kernel timing
+  if (GPU_TIMING){
     realw flops,time;
     stop_timing_gpu(&start,&stop,"Kernel_2_acoustic_impl",&time);
     // time in seconds
diff --git a/src/gpu/compute_forces_viscoelastic_cuda.cu b/src/gpu/compute_forces_viscoelastic_cuda.cu
index a81cff535..dd2d90f2c 100644
--- a/src/gpu/compute_forces_viscoelastic_cuda.cu
+++ b/src/gpu/compute_forces_viscoelastic_cuda.cu
@@ -91,7 +91,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
 
   // kernel timing
   gpu_event start,stop;
-  if (CUDA_TIMING){ start_timing_gpu(&start,&stop); }
+  if (GPU_TIMING){ start_timing_gpu(&start,&stop); }
 
   // defines local parameters for forward/adjoint function calls
   realw *displ,*veloc,*accel;
@@ -1076,8 +1076,8 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat,
     } // ANISOTROPY
   } // ATTENUATION
 
-  // Cuda timing
-  if (CUDA_TIMING){
+  // kernel timing
+  if (GPU_TIMING){
     if (ATTENUATION){
       stop_timing_gpu(&start,&stop,"Kernel_2_att_impl");
     }else{
diff --git a/src/gpu/helper_functions.cu b/src/gpu/helper_functions.cu
index 911843638..dcb8315c8 100644
--- a/src/gpu/helper_functions.cu
+++ b/src/gpu/helper_functions.cu
@@ -227,7 +227,7 @@ void stop_timing_gpu(gpu_event* start,gpu_event* stop, const char* info_str){
 #endif
 
   // user output
-  printf("%s: Execution Time = %f ms\n",info_str,time);
+  printf("GPU_timing %s: Execution Time = %f ms\n",info_str,time);
 }
 
 /* ----------------------------------------------------------------------------------------------- */
@@ -253,7 +253,7 @@ void stop_timing_gpu(gpu_event* start,gpu_event* stop, const char* info_str, rea
 #endif
 
   // user output
-  printf("%s: Execution Time = %f ms\n",info_str,time);
+  printf("GPU_timing %s: Execution Time = %f ms\n",info_str,time);
 
   // returns time
   *t = time;
diff --git a/src/gpu/kernels/Kernel_2_acoustic_impl.cu b/src/gpu/kernels/Kernel_2_acoustic_impl.cu
index 5b098d84b..4101e12cb 100644
--- a/src/gpu/kernels/Kernel_2_acoustic_impl.cu
+++ b/src/gpu/kernels/Kernel_2_acoustic_impl.cu
@@ -462,7 +462,7 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute,
 //                              i.e.               11% x theoretical peak performance ~ 440 GFlop/s.
 //                                                 11% x "pratical"  peak performance ~ 320 GFlop/s.
 //
-//   CUDA_TIMING: we achieve about 224 GFlop/s (1 mpi process, 20736 elements)
+//   GPU_TIMING: we achieve about 224 GFlop/s (1 mpi process, 20736 elements)
 //                -> that is about 8% of the "practical" peak. (or 70% of the theoretical arithmetic intensity)
 //
 //                this might be due to the first compute code block (before first syncthreads), where
diff --git a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
index bc608278b..0a9eb3e6d 100644
--- a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
+++ b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu
@@ -2412,8 +2412,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute,
 #endif
   }
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
   // synchronize all the threads (one thread for each of the NGLL grid points of the
   // current spectral element) because we need the whole element to be ready in order
   // to be able to compute the matrix products along cut planes of the 3D element below
@@ -2547,8 +2545,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute,
                             &rho_s_H1,&rho_s_H2,&rho_s_H3);
   }
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
   // form dot product with test vector, symmetric form
   // 1. cut-plane xi
   __syncthreads();
@@ -2583,8 +2579,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute,
     sum_terms3 += rho_s_H3;
   }
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
   // assembles acceleration array
   if (threadIdx.x < NGLL3) {
 
@@ -2662,8 +2656,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute,
     epsilondev_trace[tx + working_element*NGLL3] = epsilondev_trace_loc;
   } // threadIdx.x
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
 } // kernel_2_att_impl()
 
 
@@ -2814,8 +2806,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
     // copy displacement from global memory to shared memory
     load_shared_memory_displ<FORWARD_OR_ADJOINT>(&tx,&iglob,d_displ,s_dummyx_loc,s_dummyy_loc,s_dummyz_loc);
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
     // attenuation
     // use first order Taylor expansion of displacement for local storage of stresses
     // at this current time step, to fix attenuation in a consistent way
@@ -2875,8 +2865,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
       tempz3l += s_dummyz_loc[l*NGLL2+J*NGLLX+I]*fac3;
     }
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
     // attenuation
     // temporary variables used for fixing attenuation in a consistent way
     tempx1l_att = 0.f;
@@ -2963,8 +2951,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
             + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K]
             + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K];
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
     // attenuation
     // temporary variables used for fixing attenuation in a consistent way
     tempx1l_att = s_dummyx_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I]
@@ -3049,8 +3035,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
     duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l;
     duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l;
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
     // precompute some sums to save CPU time
     duxdxl_plus_duydyl = duxdxl + duydyl;
     duxdxl_plus_duzdzl = duxdxl + duzdzl;
@@ -3059,8 +3043,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
     duzdxl_plus_duxdzl = duzdxl + duxdzl;
     duzdyl_plus_duydzl = duzdyl + duydzl;
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
     // attenuation
     // temporary variables used for fixing attenuation in a consistent way
     duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att;
@@ -3207,8 +3189,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
   // to be able to compute the matrix products along cut planes of the 3D element below
   __syncthreads();
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
   if (active ){
 
 #ifndef MANUALLY_UNROLLED_LOOPS
@@ -3326,10 +3306,7 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
     d_accel[iglob*3 + 2] += sum_terms3;
 #endif // USE_TEXTURES_FIELDS
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
 #else // MESH_COLORING
-
     //mesh coloring
     if (use_mesh_coloring_gpu ){
 
@@ -3371,8 +3348,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute,
     epsilondev_yz[tx + working_element*NGLL3] = epsilondev_yz_loc;
   } // if (active)
 
-// JC JC here we will need to add GPU support for the new C-PML routines
-
 } // kernel_2_att_impl()
 
 */
diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h
index 3a8d2a4d1..402edb266 100644
--- a/src/gpu/mesh_constants_gpu.h
+++ b/src/gpu/mesh_constants_gpu.h
@@ -112,8 +112,7 @@ typedef double realw;
 #endif
 
 // performance timers
-#define CUDA_TIMING 0
-#define CUDA_TIMING_UPDATE 0
+#define GPU_TIMING 0
 
 // error checking after cuda function calls
 // (note: this synchronizes many calls, thus e.g. no asynchronuous memcpy possible)
diff --git a/src/gpu/prepare_mesh_constants_cuda.cu b/src/gpu/prepare_mesh_constants_cuda.cu
index 357b4ce08..57e142571 100644
--- a/src/gpu/prepare_mesh_constants_cuda.cu
+++ b/src/gpu/prepare_mesh_constants_cuda.cu
@@ -420,8 +420,6 @@ void FC_FUNC_(prepare_constants_device,
   // Kelvin_voigt initialization
   mp->use_Kelvin_Voigt_damping = 0;
 
-  // JC JC here we will need to add GPU support for the new C-PML routines
-
   GPU_ERROR_CHECKING("prepare_constants_device");
 }
 
diff --git a/src/gpu/update_displacement_cuda.cu b/src/gpu/update_displacement_cuda.cu
index d3a37846b..8156f87d7 100644
--- a/src/gpu/update_displacement_cuda.cu
+++ b/src/gpu/update_displacement_cuda.cu
@@ -77,7 +77,7 @@ void FC_FUNC_(update_displacement_cuda,
 
   // kernel timing
   gpu_event start,stop;
-  if (CUDA_TIMING_UPDATE ) start_timing_gpu(&start,&stop);
+  if (GPU_TIMING) start_timing_gpu(&start,&stop);
 
   // debug
   //realw max_d,max_v,max_a;
@@ -166,8 +166,8 @@ void FC_FUNC_(update_displacement_cuda,
     }
   }
 
-  // Cuda timing
-  if (CUDA_TIMING_UPDATE ){
+  // kernel timing
+  if (GPU_TIMING){
     realw flops,time;
     stop_timing_gpu(&start,&stop,"UpdateDispVeloc_kernel",&time);
     // time in seconds
@@ -235,7 +235,7 @@ void FC_FUNC_(update_displacement_ac_cuda,
 
   // kernel timing
   gpu_event start,stop;
-  if (CUDA_TIMING_UPDATE ) start_timing_gpu(&start,&stop);
+  if (GPU_TIMING) start_timing_gpu(&start,&stop);
 
 #ifdef USE_CUDA
   if (run_cuda){
@@ -255,8 +255,8 @@ void FC_FUNC_(update_displacement_ac_cuda,
   }
 #endif
 
-  // Cuda timing
-  if (CUDA_TIMING_UPDATE ){
+  // kernel timing
+  if (GPU_TIMING){
     realw flops,time;
     stop_timing_gpu(&start,&stop,"UpdatePotential_kernel",&time);
     // time in seconds
diff --git a/src/specfem3D/compute_seismograms.f90 b/src/specfem3D/compute_seismograms.f90
index a6d75f51e..c6d2c203c 100644
--- a/src/specfem3D/compute_seismograms.f90
+++ b/src/specfem3D/compute_seismograms.f90
@@ -634,7 +634,7 @@ subroutine compute_seismograms_moment_adjoint()
                                       hprime_xx,hprime_yy,hprime_zz)
 
       ! source time function value
-      time_source_dble = dble(NSTEP-it)*DT - t0 - tshift_src(irec)
+      time_source_dble = dble(NSTEP-it) * DT - t0 - tshift_src(irec)
 
       ! determines source time function value
       stf = get_stf_viscoelastic(time_source_dble,irec,NSTEP-it+1)
diff --git a/src/specfem3D/setup_sources_receivers.f90 b/src/specfem3D/setup_sources_receivers.f90
index 6769ca8cb..9aa75a804 100644
--- a/src/specfem3D/setup_sources_receivers.f90
+++ b/src/specfem3D/setup_sources_receivers.f90
@@ -1265,13 +1265,27 @@ subroutine setup_receivers_precompute_intp()
   !         nadj_rec_local - determines the number of adjoint sources, i.e., number of station locations (STATIONS_ADJOINT), which
   !                          act as sources to drive the adjoint wavefield
 
+  ! check if we need to save seismos
+  if (SIMULATION_TYPE == 3 .and. (.not. SAVE_SEISMOGRAMS_IN_ADJOINT_RUN)) then
+    do_save_seismograms = .false.
+  else
+    do_save_seismograms = .true.
+  endif
+
+  ! sets local receivers to zero if no seismogram needs to be saved
+  if (.not. do_save_seismograms) nrec_local = 0
+
   ! user output
   if (myrank == 0) then
     write(IMAIN,*) 'seismograms:'
-    if (WRITE_SEISMOGRAMS_BY_MAIN) then
-      write(IMAIN,*) '  seismograms written by main process only'
+    if (do_save_seismograms) then
+      if (WRITE_SEISMOGRAMS_BY_MAIN) then
+        write(IMAIN,*) '  seismograms written by main process only'
+      else
+        write(IMAIN,*) '  seismograms written by all processes'
+      endif
     else
-      write(IMAIN,*) '  seismograms written by all processes'
+      write(IMAIN,*) '  seismograms will not be saved'
     endif
     write(IMAIN,*)
     write(IMAIN,*) '  Total number of simulation steps (NSTEP)                       = ',NSTEP
diff --git a/src/specfem3D/specfem3D_par.F90 b/src/specfem3D/specfem3D_par.F90
index c1ac386b7..c025dc695 100644
--- a/src/specfem3D/specfem3D_par.F90
+++ b/src/specfem3D/specfem3D_par.F90
@@ -193,6 +193,7 @@ module specfem_par
 
   integer :: nlength_seismogram
   integer :: seismo_offset,seismo_current
+  logical :: do_save_seismograms
 
   ! for ASDF/SAC headers time
   integer :: yr_PDE,jda_PDE,ho_PDE,mi_PDE
diff --git a/src/specfem3D/write_output_ASCII_or_binary.f90 b/src/specfem3D/write_output_ASCII_or_binary.f90
index 346709a25..be4b8dc6e 100644
--- a/src/specfem3D/write_output_ASCII_or_binary.f90
+++ b/src/specfem3D/write_output_ASCII_or_binary.f90
@@ -37,14 +37,17 @@ subroutine write_output_ASCII_or_binary(one_seismogram, &
 
   use constants
 
-  use specfem_par, only: myrank,USE_BINARY_FOR_SEISMOGRAMS,SAVE_ALL_SEISMOS_IN_ONE_FILE,NTSTEP_BETWEEN_OUTPUT_SEISMOS, &
-    seismo_offset,seismo_current,NTSTEP_BETWEEN_OUTPUT_SAMPLE
+  use specfem_par, only: myrank, &
+    USE_BINARY_FOR_SEISMOGRAMS,SAVE_ALL_SEISMOS_IN_ONE_FILE, &
+    nlength_seismogram, &
+    NTSTEP_BETWEEN_OUTPUT_SAMPLE, &
+    seismo_offset,seismo_current
 
   implicit none
 
   integer,intent(in) :: NSTEP,it,SIMULATION_TYPE
 
-  real(kind=CUSTOM_REAL), dimension(NDIM,NTSTEP_BETWEEN_OUTPUT_SEISMOS/NTSTEP_BETWEEN_OUTPUT_SAMPLE),intent(in) :: one_seismogram
+  real(kind=CUSTOM_REAL), dimension(NDIM,nlength_seismogram),intent(in) :: one_seismogram
 
   double precision,intent(in) :: t0,DT
 
@@ -56,7 +59,6 @@ subroutine write_output_ASCII_or_binary(one_seismogram, &
   integer :: isample,ier,it_current
   double precision :: time_t_db
   real(kind=CUSTOM_REAL) :: value,time_t
-
   real, dimension(1:seismo_current) :: tr
 
   ! opens seismogram file
@@ -112,11 +114,9 @@ subroutine write_output_ASCII_or_binary(one_seismogram, &
     ! time
     if (SIMULATION_TYPE == 1) then
       ! forward simulation
-      ! distinguish between single and double precision for reals
-      time_t_db = dble( (it_current-1) * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0
+      time_t_db = dble((it_current-1) * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0
     else if (SIMULATION_TYPE == 3) then
       ! adjoint simulation: backward/reconstructed wavefields
-      ! distinguish between single and double precision for reals
       ! note: compare time_t with time used for source term
       time_t_db = dble(NSTEP - it_current * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0
     endif
diff --git a/src/specfem3D/write_output_ASDF.f90 b/src/specfem3D/write_output_ASDF.f90
index 1571e4516..b05d7a6d8 100644
--- a/src/specfem3D/write_output_ASDF.f90
+++ b/src/specfem3D/write_output_ASDF.f90
@@ -156,8 +156,8 @@ end subroutine write_output_ASDF
 !! \param nrec_store The number of receivers on the local processor
   subroutine init_asdf_data(nrec_store)
 
-  use constants, only: NDIM
-  use specfem_par, only: myrank,NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS
+  use constants, only: NDIM,myrank
+  use specfem_par, only: NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS
 
   use asdf_data, only: asdf_container
 
@@ -221,7 +221,9 @@ subroutine store_asdf_data(seismogram_tmp, irec_local, irec, chn, iorientation)
 
   use constants, only: CUSTOM_REAL,NDIM,myrank
 
-  use specfem_par, only: NSTEP,NTSTEP_BETWEEN_OUTPUT_SAMPLE,nlength_seismogram, &
+  use specfem_par, only: &
+    nlength_seismogram, &
+    seismo_current, &
     stlat,stlon,stele,stbur,station_name,network_name
 
   use asdf_data, only: asdf_container
@@ -236,10 +238,6 @@ subroutine store_asdf_data(seismogram_tmp, irec_local, irec, chn, iorientation)
   ! local Variables
   integer :: length_station_name, length_network_name
   integer :: ier, i, index_increment
-  integer :: seismo_current_used
-
-  ! actual seismogram length
-  seismo_current_used = ceiling(real(NSTEP) / NTSTEP_BETWEEN_OUTPUT_SAMPLE)
 
   index_increment = iorientation
 
@@ -258,13 +256,13 @@ subroutine store_asdf_data(seismogram_tmp, irec_local, irec, chn, iorientation)
   asdf_container%receiver_el(irec_local) = stele(irec)
   asdf_container%receiver_dpt(irec_local) = stbur(irec)
 
-  allocate(asdf_container%records(i)%record(seismo_current_used),stat=ier)
+  allocate(asdf_container%records(i)%record(seismo_current),stat=ier)
   if (ier /= 0) call exit_MPI_without_rank('error allocating array 2017')
   if (ier /= 0) call exit_MPI (myrank, 'Allocating ASDF container failed.')
   asdf_container%records(i)%record(:) = 0.0
 
   ! seismogram as real data
-  asdf_container%records(i)%record(1:seismo_current_used) = real(seismogram_tmp(iorientation, 1:seismo_current_used),kind=4)
+  asdf_container%records(i)%record(1:seismo_current) = real(seismogram_tmp(iorientation, 1:seismo_current),kind=4)
 
   end subroutine store_asdf_data
 
@@ -316,7 +314,10 @@ subroutine write_asdf()
   use iso_c_binding, only: C_NULL_CHAR !,c_ptr
 !  use iso_Fortran_env
 
-  use specfem_par, only: seismo_offset,DT,NSTEP,NTSTEP_BETWEEN_OUTPUT_SAMPLE,OUTPUT_FILES,WRITE_SEISMOGRAMS_BY_MAIN
+  use specfem_par, only: DT,NSTEP, &
+    seismo_offset,seismo_current, &
+    NTSTEP_BETWEEN_OUTPUT_SAMPLE, &
+    OUTPUT_FILES,WRITE_SEISMOGRAMS_BY_MAIN
 
   implicit none
 
@@ -327,7 +328,6 @@ subroutine write_asdf()
   integer :: num_stations
   integer :: stationxml_length
   integer :: nsamples  ! constant, as in SPECFEM
-  integer :: seismo_current_used
   double precision :: sampling_rate
   double precision :: startTime
   integer(kind=8) :: start_time
@@ -393,9 +393,6 @@ subroutine write_asdf()
   call world_duplicate(comm)
   call world_size(mysize)
 
-  ! actual seismogram length
-  seismo_current_used = ceiling(real(NSTEP) / NTSTEP_BETWEEN_OUTPUT_SAMPLE)
-
   num_stations = asdf_container%nrec_local
   sampling_rate = 1.0/(DT*NTSTEP_BETWEEN_OUTPUT_SAMPLE)
 
@@ -620,7 +617,7 @@ subroutine write_asdf()
   deallocate(displs)
   deallocate(rcounts)
 
-  allocate(one_seismogram(NDIM,seismo_current_used),stat=ier)
+  allocate(one_seismogram(NDIM,seismo_current),stat=ier)
   if (ier /= 0) call exit_MPI_without_rank('error allocating array 2032')
   if (ier /= 0) call exit_MPI(myrank,'error allocating one_seismogram array')
   one_seismogram(:,:) = 0.0
@@ -795,7 +792,7 @@ subroutine write_asdf()
               if (len_trim(component_names_gather(i+((j-1)*NDIM),k)) == 0) cycle
 
               !  write(*,*) j, l, l+i, size(asdf_container%records)
-              one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current_used)
+              one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current)
             enddo
           endif
 
@@ -810,14 +807,14 @@ subroutine write_asdf()
               !       we therefore skip components with an empty ("") entry.
               if (len_trim(component_names_gather(i+((j-1)*NDIM),k)) == 0) cycle
 
-              one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current_used)
+              one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current)
             enddo
             ! send (real) data
-            call send_r(one_seismogram,NDIM*seismo_current_used,receiver,itag)
+            call send_r(one_seismogram,NDIM*seismo_current,receiver,itag)
 
           else if (myrank == 0) then
             ! receive (real) data
-            call recv_r(one_seismogram,NDIM*seismo_current_used,sender,itag)
+            call recv_r(one_seismogram,NDIM*seismo_current,sender,itag)
 
           endif
         endif
@@ -852,7 +849,7 @@ subroutine write_asdf()
 
             ! writes (float) data
             call ASDF_write_partial_waveform_f(data_ids(i), &
-                                               one_seismogram(i,1:seismo_current_used), 0, seismo_current_used, ier)
+                                               one_seismogram(i,1:seismo_current), 0, seismo_current, ier)
             if (ier /= 0) call exit_MPI(myrank,'Error ASDF write partial waveform failed')
 
             call ASDF_close_dataset_f(data_ids(i), ier)
@@ -938,11 +935,11 @@ subroutine write_asdf()
           ! see asdf implementation of ASDF_write_partial_waveform_f():
           ! https://github.com/SeismicData/asdf-library/blob/f28233894ef0a065eb725b9d22e55d0d02a7aac1/src/ASDF_write.c#L354
           if (myrank == current_proc) then
-            one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current_used)
+            one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current)
 
             ! writes (float) data
             call ASDF_write_partial_waveform_f(data_ids(i), &
-                                               one_seismogram(i,1:seismo_current_used), 0, seismo_current_used, ier)
+                                               one_seismogram(i,1:seismo_current), 0, seismo_current, ier)
             if (ier /= 0) call exit_MPI(myrank,'Error ASDF parallel write partial waveform failed')
           endif
 
diff --git a/src/specfem3D/write_seismograms.f90 b/src/specfem3D/write_seismograms.f90
index c16dbd985..ecb231ccc 100644
--- a/src/specfem3D/write_seismograms.f90
+++ b/src/specfem3D/write_seismograms.f90
@@ -40,14 +40,9 @@ subroutine write_seismograms()
   ! timing
   double precision, external :: wtime
   double precision :: write_time_begin,write_time
-  logical :: do_save_seismograms
 
-  ! check if we need to save seismos
-  if (SIMULATION_TYPE == 3 .and. (.not. SAVE_SEISMOGRAMS_IN_ADJOINT_RUN)) then
-    do_save_seismograms = .false.
-  else
-    do_save_seismograms = .true.
-  endif
+  ! checks if anything to do
+  if (.not. do_save_seismograms) return
 
   ! checks subsampling recurrence
   if (mod(it-1,NTSTEP_BETWEEN_OUTPUT_SAMPLE) == 0) then
@@ -75,7 +70,7 @@ subroutine write_seismograms()
     ! - ispec_selected_source   -> element containing source position, which becomes an adjoint "receiver"
 
     ! gets seismogram values
-    if (do_save_seismograms .and. nrec_local > 0) then
+    if (nrec_local > 0) then
       ! seismograms displ/veloc/accel/pressure
       if (GPU_MODE) then
         ! on GPU
@@ -98,9 +93,23 @@ subroutine write_seismograms()
   endif ! NTSTEP_BETWEEN_OUTPUT_SAMPLE
 
   ! write the current or final seismograms
+  !
+  ! for example:
+  !   NTSTEP_BETWEEN_OUTPUT_SEISMOS = 10 , NTSTEP_BETWEEN_OUTPUT_SAMPLE = 5
+  !       -> 10 / 5 == 2 steps (nlength_seismogram)
+  !          we will store samples at it==1,it==6 and output at it==10
+  !   NTSTEP_BETWEEN_OUTPUT_SEISMOS = 10 , NTSTEP_BETWEEN_OUTPUT_SAMPLE = 1
+  !       -> 10 / 1 == 10 steps
+  !          we will store samples at it==1,2,3,..,10 and output at it==10
+  !
+  !   that is for NTSTEP_BETWEEN_OUTPUT_SEISMOS = 10 , NTSTEP_BETWEEN_OUTPUT_SAMPLE = 5,
+  !   the seismograms have samples for the wavefield at mod((it-1),NTSTEP_BETWEEN_OUTPUT_SAMPLE) == 0,
+  !   which is at it==1,it==6. for writing the seismograms however,
+  !   we would write out at mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0, which is at it==10
+  !
   if (mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it == it_end) then
 
-    if (do_save_seismograms .and. .not. INVERSE_FWI_FULL_PROBLEM) then
+    if (.not. INVERSE_FWI_FULL_PROBLEM) then
 
       ! user output
       if (myrank == 0) then
@@ -157,7 +166,7 @@ subroutine write_seismograms()
         ! timing
         write_time = wtime() - write_time_begin
         ! output
-        write(IMAIN,*) 'Total number of time steps done: ', it-it_begin+1
+        write(IMAIN,*) 'Total number of time steps written: ', seismo_offset + seismo_current
         if (WRITE_SEISMOGRAMS_BY_MAIN) then
           write(IMAIN,*) 'Writing the seismograms by main proc alone took ',sngl(write_time),' seconds'
         else
@@ -167,7 +176,7 @@ subroutine write_seismograms()
         call flush_IMAIN()
       endif
 
-    endif ! do_save_seismograms
+    endif
 
     ! resets current seismogram position
     seismo_offset = seismo_offset + seismo_current
@@ -856,7 +865,7 @@ subroutine write_adj_seismograms_to_file(istore)
           it_tmp = seismo_offset + isample
 
           ! subtract half duration of the source to make sure travel time is correct
-          time_t = real(dble((it_tmp-1)*NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0,kind=CUSTOM_REAL)
+          time_t = real(dble((it_tmp-1) * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0,kind=CUSTOM_REAL)
 
           ! distinguish between single and double precision for reals
           write(IOUT,*) time_t,all_seismograms(iorientation,isample,irec_local)