diff --git a/src/decompose_mesh/rules.mk b/src/decompose_mesh/rules.mk index ed75854cc..18b3f39be 100644 --- a/src/decompose_mesh/rules.mk +++ b/src/decompose_mesh/rules.mk @@ -154,6 +154,7 @@ xdecompose_mesh_mpi_OBJECTS = \ $O/module_database.dec.o \ $O/module_mesh.dec.o \ $O/module_partition.dec.o \ + $O/part_decompose_mesh.dec_module.o \ $O/program_decompose_mesh_mpi.mpidec.o \ $(EMPTY_MACRO) diff --git a/src/generate_databases/save_arrays_solver_hdf5.F90 b/src/generate_databases/save_arrays_solver_hdf5.F90 index 644acee5c..2ceec3470 100644 --- a/src/generate_databases/save_arrays_solver_hdf5.F90 +++ b/src/generate_databases/save_arrays_solver_hdf5.F90 @@ -90,7 +90,7 @@ subroutine save_arrays_solver_mesh_hdf5() ! element node connectivity for movie output ! the node ids are stored after dividing one NGLL*-th order spectral element into NGLLX*NGLLY*NGLLZ elements - integer, dimension(9,nspec_ab*(NGLLX-1)*(NGLLY-1)*(NGLLZ-1)) :: spec_elm_conn_xdmf + integer, dimension(:,:),allocatable :: spec_elm_conn_xdmf ! dummy arrays integer, dimension(1,1), parameter :: i2d_dummy = reshape((/0/),(/1,1/)) @@ -153,9 +153,17 @@ subroutine save_arrays_solver_mesh_hdf5() allocate(ibool_interfaces_ext_mesh_dummy(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier) if (ier /= 0) call exit_MPI_without_rank('error allocating array 650') if (ier /= 0) stop 'error allocating array' + ibool_interfaces_ext_mesh_dummy(:,:) = 0 do i = 1, num_interfaces_ext_mesh ibool_interfaces_ext_mesh_dummy(:,i) = ibool_interfaces_ext_mesh(1:max_nibool_interfaces_ext_mesh,i) enddo + + ! connectivity + allocate(spec_elm_conn_xdmf(9,NSPEC_AB*(NGLLX-1)*(NGLLY-1)*(NGLLZ-1)),stat=ier) + if (ier /= 0) call exit_MPI_without_rank('error allocating array 651') + if (ier /= 0) stop 'error allocating spec_elm_conn_xdm array' + spec_elm_conn_xdmf(:,:) = 0 + call synchronize_all() ! get MPI parameters @@ -1433,10 +1441,8 @@ subroutine save_arrays_solver_mesh_hdf5() call synchronize_all() ! cleanup - if (allocated(ibool_interfaces_ext_mesh_dummy)) then - deallocate(ibool_interfaces_ext_mesh_dummy,stat=ier) - if (ier /= 0) stop 'error deallocating array ibool_interfaces_ext_mesh_dummy' - endif + deallocate(ibool_interfaces_ext_mesh_dummy) + deallocate(spec_elm_conn_xdmf) ! user output if (myrank == 0) then diff --git a/src/gpu/compute_forces_acoustic_cuda.cu b/src/gpu/compute_forces_acoustic_cuda.cu index 97afb85ea..5d372a4cf 100644 --- a/src/gpu/compute_forces_acoustic_cuda.cu +++ b/src/gpu/compute_forces_acoustic_cuda.cu @@ -67,10 +67,9 @@ void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, // to combine forward and backward wavefield in the same kernel call // kernel timing - gpu_event start, stop; - if (CUDA_TIMING ){ - start_timing_gpu(&start,&stop); - } + gpu_event start,stop; + if (GPU_TIMING){ start_timing_gpu(&start,&stop); } + int nb_field = mp->simulation_type == 3 ? 2 : 1 ; // sets gpu arrays @@ -202,8 +201,8 @@ void Kernel_2_acoustic(int nb_blocks_to_compute, Mesh* mp, int d_iphase, } - // Cuda timing - if (CUDA_TIMING ){ + // kernel timing + if (GPU_TIMING){ realw flops,time; stop_timing_gpu(&start,&stop,"Kernel_2_acoustic_impl",&time); // time in seconds diff --git a/src/gpu/compute_forces_viscoelastic_cuda.cu b/src/gpu/compute_forces_viscoelastic_cuda.cu index a81cff535..dd2d90f2c 100644 --- a/src/gpu/compute_forces_viscoelastic_cuda.cu +++ b/src/gpu/compute_forces_viscoelastic_cuda.cu @@ -91,7 +91,7 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, // kernel timing gpu_event start,stop; - if (CUDA_TIMING){ start_timing_gpu(&start,&stop); } + if (GPU_TIMING){ start_timing_gpu(&start,&stop); } // defines local parameters for forward/adjoint function calls realw *displ,*veloc,*accel; @@ -1076,8 +1076,8 @@ void Kernel_2(int nb_blocks_to_compute,Mesh* mp,int d_iphase,realw d_deltat, } // ANISOTROPY } // ATTENUATION - // Cuda timing - if (CUDA_TIMING){ + // kernel timing + if (GPU_TIMING){ if (ATTENUATION){ stop_timing_gpu(&start,&stop,"Kernel_2_att_impl"); }else{ diff --git a/src/gpu/helper_functions.cu b/src/gpu/helper_functions.cu index 911843638..dcb8315c8 100644 --- a/src/gpu/helper_functions.cu +++ b/src/gpu/helper_functions.cu @@ -227,7 +227,7 @@ void stop_timing_gpu(gpu_event* start,gpu_event* stop, const char* info_str){ #endif // user output - printf("%s: Execution Time = %f ms\n",info_str,time); + printf("GPU_timing %s: Execution Time = %f ms\n",info_str,time); } /* ----------------------------------------------------------------------------------------------- */ @@ -253,7 +253,7 @@ void stop_timing_gpu(gpu_event* start,gpu_event* stop, const char* info_str, rea #endif // user output - printf("%s: Execution Time = %f ms\n",info_str,time); + printf("GPU_timing %s: Execution Time = %f ms\n",info_str,time); // returns time *t = time; diff --git a/src/gpu/kernels/Kernel_2_acoustic_impl.cu b/src/gpu/kernels/Kernel_2_acoustic_impl.cu index 5b098d84b..4101e12cb 100644 --- a/src/gpu/kernels/Kernel_2_acoustic_impl.cu +++ b/src/gpu/kernels/Kernel_2_acoustic_impl.cu @@ -462,7 +462,7 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, // i.e. 11% x theoretical peak performance ~ 440 GFlop/s. // 11% x "pratical" peak performance ~ 320 GFlop/s. // -// CUDA_TIMING: we achieve about 224 GFlop/s (1 mpi process, 20736 elements) +// GPU_TIMING: we achieve about 224 GFlop/s (1 mpi process, 20736 elements) // -> that is about 8% of the "practical" peak. (or 70% of the theoretical arithmetic intensity) // // this might be due to the first compute code block (before first syncthreads), where diff --git a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu index bc608278b..0a9eb3e6d 100644 --- a/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu +++ b/src/gpu/kernels/Kernel_2_viscoelastic_impl.cu @@ -2412,8 +2412,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute, #endif } -// JC JC here we will need to add GPU support for the new C-PML routines - // synchronize all the threads (one thread for each of the NGLL grid points of the // current spectral element) because we need the whole element to be ready in order // to be able to compute the matrix products along cut planes of the 3D element below @@ -2547,8 +2545,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute, &rho_s_H1,&rho_s_H2,&rho_s_H3); } -// JC JC here we will need to add GPU support for the new C-PML routines - // form dot product with test vector, symmetric form // 1. cut-plane xi __syncthreads(); @@ -2583,8 +2579,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute, sum_terms3 += rho_s_H3; } -// JC JC here we will need to add GPU support for the new C-PML routines - // assembles acceleration array if (threadIdx.x < NGLL3) { @@ -2662,8 +2656,6 @@ Kernel_2_att_impl(int nb_blocks_to_compute, epsilondev_trace[tx + working_element*NGLL3] = epsilondev_trace_loc; } // threadIdx.x -// JC JC here we will need to add GPU support for the new C-PML routines - } // kernel_2_att_impl() @@ -2814,8 +2806,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, // copy displacement from global memory to shared memory load_shared_memory_displ(&tx,&iglob,d_displ,s_dummyx_loc,s_dummyy_loc,s_dummyz_loc); -// JC JC here we will need to add GPU support for the new C-PML routines - // attenuation // use first order Taylor expansion of displacement for local storage of stresses // at this current time step, to fix attenuation in a consistent way @@ -2875,8 +2865,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, tempz3l += s_dummyz_loc[l*NGLL2+J*NGLLX+I]*fac3; } -// JC JC here we will need to add GPU support for the new C-PML routines - // attenuation // temporary variables used for fixing attenuation in a consistent way tempx1l_att = 0.f; @@ -2963,8 +2951,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, + s_dummyz_loc[3*NGLL2+J*NGLLX+I]*d_hprime_xx[3*NGLLX+K] + s_dummyz_loc[4*NGLL2+J*NGLLX+I]*d_hprime_xx[4*NGLLX+K]; -// JC JC here we will need to add GPU support for the new C-PML routines - // attenuation // temporary variables used for fixing attenuation in a consistent way tempx1l_att = s_dummyx_loc_att[K*NGLL2+J*NGLLX]*d_hprime_xx[I] @@ -3049,8 +3035,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l; duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l; -// JC JC here we will need to add GPU support for the new C-PML routines - // precompute some sums to save CPU time duxdxl_plus_duydyl = duxdxl + duydyl; duxdxl_plus_duzdzl = duxdxl + duzdzl; @@ -3059,8 +3043,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, duzdxl_plus_duxdzl = duzdxl + duxdzl; duzdyl_plus_duydzl = duzdyl + duydzl; -// JC JC here we will need to add GPU support for the new C-PML routines - // attenuation // temporary variables used for fixing attenuation in a consistent way duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att; @@ -3207,8 +3189,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, // to be able to compute the matrix products along cut planes of the 3D element below __syncthreads(); -// JC JC here we will need to add GPU support for the new C-PML routines - if (active ){ #ifndef MANUALLY_UNROLLED_LOOPS @@ -3326,10 +3306,7 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, d_accel[iglob*3 + 2] += sum_terms3; #endif // USE_TEXTURES_FIELDS -// JC JC here we will need to add GPU support for the new C-PML routines - #else // MESH_COLORING - //mesh coloring if (use_mesh_coloring_gpu ){ @@ -3371,8 +3348,6 @@ Kernel_2_att_org_impl(int nb_blocks_to_compute, epsilondev_yz[tx + working_element*NGLL3] = epsilondev_yz_loc; } // if (active) -// JC JC here we will need to add GPU support for the new C-PML routines - } // kernel_2_att_impl() */ diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h index 3a8d2a4d1..402edb266 100644 --- a/src/gpu/mesh_constants_gpu.h +++ b/src/gpu/mesh_constants_gpu.h @@ -112,8 +112,7 @@ typedef double realw; #endif // performance timers -#define CUDA_TIMING 0 -#define CUDA_TIMING_UPDATE 0 +#define GPU_TIMING 0 // error checking after cuda function calls // (note: this synchronizes many calls, thus e.g. no asynchronuous memcpy possible) diff --git a/src/gpu/prepare_mesh_constants_cuda.cu b/src/gpu/prepare_mesh_constants_cuda.cu index 357b4ce08..57e142571 100644 --- a/src/gpu/prepare_mesh_constants_cuda.cu +++ b/src/gpu/prepare_mesh_constants_cuda.cu @@ -420,8 +420,6 @@ void FC_FUNC_(prepare_constants_device, // Kelvin_voigt initialization mp->use_Kelvin_Voigt_damping = 0; - // JC JC here we will need to add GPU support for the new C-PML routines - GPU_ERROR_CHECKING("prepare_constants_device"); } diff --git a/src/gpu/update_displacement_cuda.cu b/src/gpu/update_displacement_cuda.cu index d3a37846b..8156f87d7 100644 --- a/src/gpu/update_displacement_cuda.cu +++ b/src/gpu/update_displacement_cuda.cu @@ -77,7 +77,7 @@ void FC_FUNC_(update_displacement_cuda, // kernel timing gpu_event start,stop; - if (CUDA_TIMING_UPDATE ) start_timing_gpu(&start,&stop); + if (GPU_TIMING) start_timing_gpu(&start,&stop); // debug //realw max_d,max_v,max_a; @@ -166,8 +166,8 @@ void FC_FUNC_(update_displacement_cuda, } } - // Cuda timing - if (CUDA_TIMING_UPDATE ){ + // kernel timing + if (GPU_TIMING){ realw flops,time; stop_timing_gpu(&start,&stop,"UpdateDispVeloc_kernel",&time); // time in seconds @@ -235,7 +235,7 @@ void FC_FUNC_(update_displacement_ac_cuda, // kernel timing gpu_event start,stop; - if (CUDA_TIMING_UPDATE ) start_timing_gpu(&start,&stop); + if (GPU_TIMING) start_timing_gpu(&start,&stop); #ifdef USE_CUDA if (run_cuda){ @@ -255,8 +255,8 @@ void FC_FUNC_(update_displacement_ac_cuda, } #endif - // Cuda timing - if (CUDA_TIMING_UPDATE ){ + // kernel timing + if (GPU_TIMING){ realw flops,time; stop_timing_gpu(&start,&stop,"UpdatePotential_kernel",&time); // time in seconds diff --git a/src/specfem3D/compute_seismograms.f90 b/src/specfem3D/compute_seismograms.f90 index a6d75f51e..c6d2c203c 100644 --- a/src/specfem3D/compute_seismograms.f90 +++ b/src/specfem3D/compute_seismograms.f90 @@ -634,7 +634,7 @@ subroutine compute_seismograms_moment_adjoint() hprime_xx,hprime_yy,hprime_zz) ! source time function value - time_source_dble = dble(NSTEP-it)*DT - t0 - tshift_src(irec) + time_source_dble = dble(NSTEP-it) * DT - t0 - tshift_src(irec) ! determines source time function value stf = get_stf_viscoelastic(time_source_dble,irec,NSTEP-it+1) diff --git a/src/specfem3D/setup_sources_receivers.f90 b/src/specfem3D/setup_sources_receivers.f90 index 6769ca8cb..9aa75a804 100644 --- a/src/specfem3D/setup_sources_receivers.f90 +++ b/src/specfem3D/setup_sources_receivers.f90 @@ -1265,13 +1265,27 @@ subroutine setup_receivers_precompute_intp() ! nadj_rec_local - determines the number of adjoint sources, i.e., number of station locations (STATIONS_ADJOINT), which ! act as sources to drive the adjoint wavefield + ! check if we need to save seismos + if (SIMULATION_TYPE == 3 .and. (.not. SAVE_SEISMOGRAMS_IN_ADJOINT_RUN)) then + do_save_seismograms = .false. + else + do_save_seismograms = .true. + endif + + ! sets local receivers to zero if no seismogram needs to be saved + if (.not. do_save_seismograms) nrec_local = 0 + ! user output if (myrank == 0) then write(IMAIN,*) 'seismograms:' - if (WRITE_SEISMOGRAMS_BY_MAIN) then - write(IMAIN,*) ' seismograms written by main process only' + if (do_save_seismograms) then + if (WRITE_SEISMOGRAMS_BY_MAIN) then + write(IMAIN,*) ' seismograms written by main process only' + else + write(IMAIN,*) ' seismograms written by all processes' + endif else - write(IMAIN,*) ' seismograms written by all processes' + write(IMAIN,*) ' seismograms will not be saved' endif write(IMAIN,*) write(IMAIN,*) ' Total number of simulation steps (NSTEP) = ',NSTEP diff --git a/src/specfem3D/specfem3D_par.F90 b/src/specfem3D/specfem3D_par.F90 index c1ac386b7..c025dc695 100644 --- a/src/specfem3D/specfem3D_par.F90 +++ b/src/specfem3D/specfem3D_par.F90 @@ -193,6 +193,7 @@ module specfem_par integer :: nlength_seismogram integer :: seismo_offset,seismo_current + logical :: do_save_seismograms ! for ASDF/SAC headers time integer :: yr_PDE,jda_PDE,ho_PDE,mi_PDE diff --git a/src/specfem3D/write_output_ASCII_or_binary.f90 b/src/specfem3D/write_output_ASCII_or_binary.f90 index 346709a25..be4b8dc6e 100644 --- a/src/specfem3D/write_output_ASCII_or_binary.f90 +++ b/src/specfem3D/write_output_ASCII_or_binary.f90 @@ -37,14 +37,17 @@ subroutine write_output_ASCII_or_binary(one_seismogram, & use constants - use specfem_par, only: myrank,USE_BINARY_FOR_SEISMOGRAMS,SAVE_ALL_SEISMOS_IN_ONE_FILE,NTSTEP_BETWEEN_OUTPUT_SEISMOS, & - seismo_offset,seismo_current,NTSTEP_BETWEEN_OUTPUT_SAMPLE + use specfem_par, only: myrank, & + USE_BINARY_FOR_SEISMOGRAMS,SAVE_ALL_SEISMOS_IN_ONE_FILE, & + nlength_seismogram, & + NTSTEP_BETWEEN_OUTPUT_SAMPLE, & + seismo_offset,seismo_current implicit none integer,intent(in) :: NSTEP,it,SIMULATION_TYPE - real(kind=CUSTOM_REAL), dimension(NDIM,NTSTEP_BETWEEN_OUTPUT_SEISMOS/NTSTEP_BETWEEN_OUTPUT_SAMPLE),intent(in) :: one_seismogram + real(kind=CUSTOM_REAL), dimension(NDIM,nlength_seismogram),intent(in) :: one_seismogram double precision,intent(in) :: t0,DT @@ -56,7 +59,6 @@ subroutine write_output_ASCII_or_binary(one_seismogram, & integer :: isample,ier,it_current double precision :: time_t_db real(kind=CUSTOM_REAL) :: value,time_t - real, dimension(1:seismo_current) :: tr ! opens seismogram file @@ -112,11 +114,9 @@ subroutine write_output_ASCII_or_binary(one_seismogram, & ! time if (SIMULATION_TYPE == 1) then ! forward simulation - ! distinguish between single and double precision for reals - time_t_db = dble( (it_current-1) * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0 + time_t_db = dble((it_current-1) * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0 else if (SIMULATION_TYPE == 3) then ! adjoint simulation: backward/reconstructed wavefields - ! distinguish between single and double precision for reals ! note: compare time_t with time used for source term time_t_db = dble(NSTEP - it_current * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0 endif diff --git a/src/specfem3D/write_output_ASDF.f90 b/src/specfem3D/write_output_ASDF.f90 index 1571e4516..b05d7a6d8 100644 --- a/src/specfem3D/write_output_ASDF.f90 +++ b/src/specfem3D/write_output_ASDF.f90 @@ -156,8 +156,8 @@ end subroutine write_output_ASDF !! \param nrec_store The number of receivers on the local processor subroutine init_asdf_data(nrec_store) - use constants, only: NDIM - use specfem_par, only: myrank,NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS + use constants, only: NDIM,myrank + use specfem_par, only: NSTEP,NTSTEP_BETWEEN_OUTPUT_SEISMOS use asdf_data, only: asdf_container @@ -221,7 +221,9 @@ subroutine store_asdf_data(seismogram_tmp, irec_local, irec, chn, iorientation) use constants, only: CUSTOM_REAL,NDIM,myrank - use specfem_par, only: NSTEP,NTSTEP_BETWEEN_OUTPUT_SAMPLE,nlength_seismogram, & + use specfem_par, only: & + nlength_seismogram, & + seismo_current, & stlat,stlon,stele,stbur,station_name,network_name use asdf_data, only: asdf_container @@ -236,10 +238,6 @@ subroutine store_asdf_data(seismogram_tmp, irec_local, irec, chn, iorientation) ! local Variables integer :: length_station_name, length_network_name integer :: ier, i, index_increment - integer :: seismo_current_used - - ! actual seismogram length - seismo_current_used = ceiling(real(NSTEP) / NTSTEP_BETWEEN_OUTPUT_SAMPLE) index_increment = iorientation @@ -258,13 +256,13 @@ subroutine store_asdf_data(seismogram_tmp, irec_local, irec, chn, iorientation) asdf_container%receiver_el(irec_local) = stele(irec) asdf_container%receiver_dpt(irec_local) = stbur(irec) - allocate(asdf_container%records(i)%record(seismo_current_used),stat=ier) + allocate(asdf_container%records(i)%record(seismo_current),stat=ier) if (ier /= 0) call exit_MPI_without_rank('error allocating array 2017') if (ier /= 0) call exit_MPI (myrank, 'Allocating ASDF container failed.') asdf_container%records(i)%record(:) = 0.0 ! seismogram as real data - asdf_container%records(i)%record(1:seismo_current_used) = real(seismogram_tmp(iorientation, 1:seismo_current_used),kind=4) + asdf_container%records(i)%record(1:seismo_current) = real(seismogram_tmp(iorientation, 1:seismo_current),kind=4) end subroutine store_asdf_data @@ -316,7 +314,10 @@ subroutine write_asdf() use iso_c_binding, only: C_NULL_CHAR !,c_ptr ! use iso_Fortran_env - use specfem_par, only: seismo_offset,DT,NSTEP,NTSTEP_BETWEEN_OUTPUT_SAMPLE,OUTPUT_FILES,WRITE_SEISMOGRAMS_BY_MAIN + use specfem_par, only: DT,NSTEP, & + seismo_offset,seismo_current, & + NTSTEP_BETWEEN_OUTPUT_SAMPLE, & + OUTPUT_FILES,WRITE_SEISMOGRAMS_BY_MAIN implicit none @@ -327,7 +328,6 @@ subroutine write_asdf() integer :: num_stations integer :: stationxml_length integer :: nsamples ! constant, as in SPECFEM - integer :: seismo_current_used double precision :: sampling_rate double precision :: startTime integer(kind=8) :: start_time @@ -393,9 +393,6 @@ subroutine write_asdf() call world_duplicate(comm) call world_size(mysize) - ! actual seismogram length - seismo_current_used = ceiling(real(NSTEP) / NTSTEP_BETWEEN_OUTPUT_SAMPLE) - num_stations = asdf_container%nrec_local sampling_rate = 1.0/(DT*NTSTEP_BETWEEN_OUTPUT_SAMPLE) @@ -620,7 +617,7 @@ subroutine write_asdf() deallocate(displs) deallocate(rcounts) - allocate(one_seismogram(NDIM,seismo_current_used),stat=ier) + allocate(one_seismogram(NDIM,seismo_current),stat=ier) if (ier /= 0) call exit_MPI_without_rank('error allocating array 2032') if (ier /= 0) call exit_MPI(myrank,'error allocating one_seismogram array') one_seismogram(:,:) = 0.0 @@ -795,7 +792,7 @@ subroutine write_asdf() if (len_trim(component_names_gather(i+((j-1)*NDIM),k)) == 0) cycle ! write(*,*) j, l, l+i, size(asdf_container%records) - one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current_used) + one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current) enddo endif @@ -810,14 +807,14 @@ subroutine write_asdf() ! we therefore skip components with an empty ("") entry. if (len_trim(component_names_gather(i+((j-1)*NDIM),k)) == 0) cycle - one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current_used) + one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current) enddo ! send (real) data - call send_r(one_seismogram,NDIM*seismo_current_used,receiver,itag) + call send_r(one_seismogram,NDIM*seismo_current,receiver,itag) else if (myrank == 0) then ! receive (real) data - call recv_r(one_seismogram,NDIM*seismo_current_used,sender,itag) + call recv_r(one_seismogram,NDIM*seismo_current,sender,itag) endif endif @@ -852,7 +849,7 @@ subroutine write_asdf() ! writes (float) data call ASDF_write_partial_waveform_f(data_ids(i), & - one_seismogram(i,1:seismo_current_used), 0, seismo_current_used, ier) + one_seismogram(i,1:seismo_current), 0, seismo_current, ier) if (ier /= 0) call exit_MPI(myrank,'Error ASDF write partial waveform failed') call ASDF_close_dataset_f(data_ids(i), ier) @@ -938,11 +935,11 @@ subroutine write_asdf() ! see asdf implementation of ASDF_write_partial_waveform_f(): ! https://github.com/SeismicData/asdf-library/blob/f28233894ef0a065eb725b9d22e55d0d02a7aac1/src/ASDF_write.c#L354 if (myrank == current_proc) then - one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current_used) + one_seismogram(i,:) = asdf_container%records(l+i)%record(1:seismo_current) ! writes (float) data call ASDF_write_partial_waveform_f(data_ids(i), & - one_seismogram(i,1:seismo_current_used), 0, seismo_current_used, ier) + one_seismogram(i,1:seismo_current), 0, seismo_current, ier) if (ier /= 0) call exit_MPI(myrank,'Error ASDF parallel write partial waveform failed') endif diff --git a/src/specfem3D/write_seismograms.f90 b/src/specfem3D/write_seismograms.f90 index c16dbd985..ecb231ccc 100644 --- a/src/specfem3D/write_seismograms.f90 +++ b/src/specfem3D/write_seismograms.f90 @@ -40,14 +40,9 @@ subroutine write_seismograms() ! timing double precision, external :: wtime double precision :: write_time_begin,write_time - logical :: do_save_seismograms - ! check if we need to save seismos - if (SIMULATION_TYPE == 3 .and. (.not. SAVE_SEISMOGRAMS_IN_ADJOINT_RUN)) then - do_save_seismograms = .false. - else - do_save_seismograms = .true. - endif + ! checks if anything to do + if (.not. do_save_seismograms) return ! checks subsampling recurrence if (mod(it-1,NTSTEP_BETWEEN_OUTPUT_SAMPLE) == 0) then @@ -75,7 +70,7 @@ subroutine write_seismograms() ! - ispec_selected_source -> element containing source position, which becomes an adjoint "receiver" ! gets seismogram values - if (do_save_seismograms .and. nrec_local > 0) then + if (nrec_local > 0) then ! seismograms displ/veloc/accel/pressure if (GPU_MODE) then ! on GPU @@ -98,9 +93,23 @@ subroutine write_seismograms() endif ! NTSTEP_BETWEEN_OUTPUT_SAMPLE ! write the current or final seismograms + ! + ! for example: + ! NTSTEP_BETWEEN_OUTPUT_SEISMOS = 10 , NTSTEP_BETWEEN_OUTPUT_SAMPLE = 5 + ! -> 10 / 5 == 2 steps (nlength_seismogram) + ! we will store samples at it==1,it==6 and output at it==10 + ! NTSTEP_BETWEEN_OUTPUT_SEISMOS = 10 , NTSTEP_BETWEEN_OUTPUT_SAMPLE = 1 + ! -> 10 / 1 == 10 steps + ! we will store samples at it==1,2,3,..,10 and output at it==10 + ! + ! that is for NTSTEP_BETWEEN_OUTPUT_SEISMOS = 10 , NTSTEP_BETWEEN_OUTPUT_SAMPLE = 5, + ! the seismograms have samples for the wavefield at mod((it-1),NTSTEP_BETWEEN_OUTPUT_SAMPLE) == 0, + ! which is at it==1,it==6. for writing the seismograms however, + ! we would write out at mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0, which is at it==10 + ! if (mod(it,NTSTEP_BETWEEN_OUTPUT_SEISMOS) == 0 .or. it == it_end) then - if (do_save_seismograms .and. .not. INVERSE_FWI_FULL_PROBLEM) then + if (.not. INVERSE_FWI_FULL_PROBLEM) then ! user output if (myrank == 0) then @@ -157,7 +166,7 @@ subroutine write_seismograms() ! timing write_time = wtime() - write_time_begin ! output - write(IMAIN,*) 'Total number of time steps done: ', it-it_begin+1 + write(IMAIN,*) 'Total number of time steps written: ', seismo_offset + seismo_current if (WRITE_SEISMOGRAMS_BY_MAIN) then write(IMAIN,*) 'Writing the seismograms by main proc alone took ',sngl(write_time),' seconds' else @@ -167,7 +176,7 @@ subroutine write_seismograms() call flush_IMAIN() endif - endif ! do_save_seismograms + endif ! resets current seismogram position seismo_offset = seismo_offset + seismo_current @@ -856,7 +865,7 @@ subroutine write_adj_seismograms_to_file(istore) it_tmp = seismo_offset + isample ! subtract half duration of the source to make sure travel time is correct - time_t = real(dble((it_tmp-1)*NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0,kind=CUSTOM_REAL) + time_t = real(dble((it_tmp-1) * NTSTEP_BETWEEN_OUTPUT_SAMPLE) * DT - t0,kind=CUSTOM_REAL) ! distinguish between single and double precision for reals write(IOUT,*) time_t,all_seismograms(iorientation,isample,irec_local)