diff --git a/src/case.f90 b/src/case.f90 index 78fcf9f156a..40a448e04b0 100644 --- a/src/case.f90 +++ b/src/case.f90 @@ -206,6 +206,17 @@ subroutine case_init_common(C) call json_get(C%params, 'case.numerics.polynomial_order', lx) lx = lx + 1 ! add 1 to get poly order call C%fluid%init(C%msh, lx, C%params, C%usr, C%material_properties) + C%fluid%chkp%tlag => C%tlag + C%fluid%chkp%dtlag => C%dtlag + select type(f => C%fluid) + type is(fluid_pnpn_t) + f%chkp%abx1 => f%abx1 + f%chkp%abx2 => f%abx2 + f%chkp%aby1 => f%aby1 + f%chkp%aby2 => f%aby2 + f%chkp%abz1 => f%abz1 + f%chkp%abz2 => f%abz2 + end select ! @@ -232,6 +243,9 @@ subroutine case_init_common(C) call C%scalar%init(C%msh, C%fluid%c_Xh, C%fluid%gs_Xh, C%params, C%usr,& C%material_properties) call C%fluid%chkp%add_scalar(C%scalar%s) + C%fluid%chkp%abs1 => C%scalar%abx1 + C%fluid%chkp%abs2 => C%scalar%abx2 + C%fluid%chkp%slag => C%scalar%slag end if ! @@ -381,11 +395,11 @@ subroutine case_init_common(C) ! Save checkpoints (if nothing specified, default to saving at end of sim) ! call json_get_or_default(C%params, 'case.output_checkpoints',& - logical_val, .false.) + logical_val, .true.) if (logical_val) then C%f_chkp = chkp_output_t(C%fluid%chkp, path=output_directory) - call json_get(C%params, 'case.checkpoint_control', string_val) - call json_get(C%params, 'case.checkpoint_value', real_val) + call json_get_or_default(C%params, 'case.checkpoint_control', string_val,"simulationtime") + call json_get_or_default(C%params, 'case.checkpoint_value', real_val,1e10_rp) call C%s%add(C%f_chkp, real_val, string_val) end if diff --git a/src/common/checkpoint.f90 b/src/common/checkpoint.f90 index 2491d8c2a6c..cec1a8a4811 100644 --- a/src/common/checkpoint.f90 +++ b/src/common/checkpoint.f90 @@ -50,14 +50,30 @@ module checkpoint type(field_t), pointer :: w => null() type(field_t), pointer :: p => null() + ! ! Optional payload ! type(field_series_t), pointer :: ulag => null() type(field_series_t), pointer :: vlag => null() type(field_series_t), pointer :: wlag => null() + + real(kind=rp), pointer :: tlag(:) => null() + real(kind=rp), pointer :: dtlag(:) => null() + + !> for pnpn + type(field_t), pointer :: abx1 => null() + type(field_t), pointer :: abx2 => null() + type(field_t), pointer :: aby1 => null() + type(field_t), pointer :: aby2 => null() + type(field_t), pointer :: abz1 => null() + type(field_t), pointer :: abz2 => null() type(field_t), pointer :: s => null() + type(field_series_t), pointer :: slag => null() + + type(field_t), pointer :: abs1 => null() + type(field_t), pointer :: abs2 => null() real(kind=dp) :: t !< Restart time (valid after load) type(mesh_t) :: previous_mesh @@ -125,13 +141,15 @@ subroutine chkp_sync_host(this) if (NEKO_BCKND_DEVICE .eq. 1) then associate(u=>this%u, v=>this%v, w=>this%w, & - ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag) + ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag, & + p=>this%p) if (associated(this%u) .and. associated(this%v) .and. & - associated(this%w)) then + associated(this%w) .and. associated(this%p)) then call device_memcpy(u%x, u%x_d, u%dof%size(), DEVICE_TO_HOST) call device_memcpy(v%x, v%x_d, v%dof%size(), DEVICE_TO_HOST) call device_memcpy(w%x, w%x_d, w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(p%x, p%x_d, p%dof%size(), DEVICE_TO_HOST) end if if (associated(this%ulag) .and. associated(this%vlag) .and. & @@ -150,12 +168,33 @@ subroutine chkp_sync_host(this) w%dof%size(), DEVICE_TO_HOST) call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, & w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%abx1%x, this%abx1%x_d, & + w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%abx2%x, this%abx2%x_d, & + w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%aby1%x, this%aby1%x_d, & + w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%aby2%x, this%aby2%x_d, & + w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%abz1%x, this%abz1%x_d, & + w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%abz2%x, this%abz2%x_d, & + w%dof%size(), DEVICE_TO_HOST) end if if (associated(this%s)) then call device_memcpy(this%s%x, this%s%x_d, & this%s%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, & + this%s%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, & + this%s%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%abs1%x, this%abs1%x_d, & + w%dof%size(), DEVICE_TO_HOST) + call device_memcpy(this%abs2%x, this%abs2%x_d, & + w%dof%size(), DEVICE_TO_HOST) end if end associate + call device_sync() end if end subroutine chkp_sync_host @@ -166,13 +205,15 @@ subroutine chkp_sync_device(this) if (NEKO_BCKND_DEVICE .eq. 1) then associate(u=>this%u, v=>this%v, w=>this%w, & - ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag) + ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag,& + p=>this%p) if (associated(this%u) .and. associated(this%v) .and. & associated(this%w)) then call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE) call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE) call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(p%x, p%x_d, p%dof%size(), HOST_TO_DEVICE) end if if (associated(this%ulag) .and. associated(this%vlag) .and. & diff --git a/src/fluid/bcknd/device/pnpn_res_device.F90 b/src/fluid/bcknd/device/pnpn_res_device.F90 index 16e2696e7f2..8e676a2fa3d 100644 --- a/src/fluid/bcknd/device/pnpn_res_device.F90 +++ b/src/fluid/bcknd/device/pnpn_res_device.F90 @@ -255,6 +255,7 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh) call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh) + #ifdef HAVE_HIP call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, & wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, & @@ -270,6 +271,7 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & c_Xh%B_d, c_Xh%h1_d, mu, rho, n) #endif c_Xh%ifh2 = .false. + call device_cfill(c_Xh%h1_d,1.0_rp / rho,n) call gs_Xh%op(ta1, GS_OP_ADD) call gs_Xh%op(ta2, GS_OP_ADD) diff --git a/src/fluid/fluid_pnpn.f90 b/src/fluid/fluid_pnpn.f90 index 0d7274f5e79..35cb6b3e702 100644 --- a/src/fluid/fluid_pnpn.f90 +++ b/src/fluid/fluid_pnpn.f90 @@ -44,6 +44,7 @@ module fluid_pnpn use fluid_aux use time_scheme_controller use projection + use device use logger use advection use profiler @@ -107,6 +108,7 @@ module fluid_pnpn procedure, pass(this) :: init => fluid_pnpn_init procedure, pass(this) :: free => fluid_pnpn_free procedure, pass(this) :: step => fluid_pnpn_step + procedure, pass(this) :: restart => fluid_pnpn_restart end type fluid_pnpn_t contains @@ -158,10 +160,15 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) call this%abx1%init(dm_Xh, "abx1") call this%aby1%init(dm_Xh, "aby1") call this%abz1%init(dm_Xh, "abz1") - call this%abx2%init(dm_Xh, "abx2") call this%aby2%init(dm_Xh, "aby2") call this%abz2%init(dm_Xh, "abz2") + this%abx1 = 0.0_rp + this%aby1 = 0.0_rp + this%abz1 = 0.0_rp + this%abx2 = 0.0_rp + this%aby2 = 0.0_rp + this%abz2 = 0.0_rp call this%du%init(dm_Xh, 'du') call this%dv%init(dm_Xh, 'dv') @@ -271,6 +278,139 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) end subroutine fluid_pnpn_init + subroutine fluid_pnpn_restart(this,dtlag, tlag) + class(fluid_pnpn_t), target, intent(inout) :: this + real(kind=rp) :: dtlag(10), tlag(10) + type(field_t) :: u_temp, v_temp, w_temp + integer :: i, n + + n = this%u%dof%size() + ! Make sure that continuity is maintained (important for interpolation) + call col2(this%u%x,this%c_Xh%mult,this%u%dof%size()) + call col2(this%v%x,this%c_Xh%mult,this%u%dof%size()) + call col2(this%w%x,this%c_Xh%mult,this%u%dof%size()) + call col2(this%p%x,this%c_Xh%mult,this%u%dof%size()) + do i = 1, this%ulag%size() + call col2(this%ulag%lf(i)%x,this%c_Xh%mult,this%u%dof%size()) + call col2(this%vlag%lf(i)%x,this%c_Xh%mult,this%u%dof%size()) + call col2(this%wlag%lf(i)%x,this%c_Xh%mult,this%u%dof%size()) + end do + + call col2(this%abx1%x,this%c_Xh%mult,this%abx1%dof%size()) + call col2(this%abx2%x,this%c_Xh%mult,this%abx2%dof%size()) + call col2(this%aby1%x,this%c_Xh%mult,this%aby1%dof%size()) + call col2(this%aby2%x,this%c_Xh%mult,this%aby2%dof%size()) + call col2(this%abz1%x,this%c_Xh%mult,this%abz1%dof%size()) + call col2(this%abz2%x,this%c_Xh%mult,this%abz2%dof%size()) + + + if (NEKO_BCKND_DEVICE .eq. 1) then + associate(u=>this%u, v=>this%v, w=>this%w, & + ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag,& + p=>this%p) + call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE) + call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE) + call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(p%x, p%x_d, p%dof%size(), HOST_TO_DEVICE) + call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, & + u%dof%size(), HOST_TO_DEVICE) + call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, & + u%dof%size(), HOST_TO_DEVICE) + + call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, & + v%dof%size(), HOST_TO_DEVICE) + call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, & + v%dof%size(), HOST_TO_DEVICE) + + call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%abx1%x, this%abx1%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%abx2%x, this%abx2%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%aby1%x, this%aby1%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%aby2%x, this%aby2%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%abz1%x, this%abz1%x_d, & + w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%abz2%x, this%abz2%x_d, & + w%dof%size(), HOST_TO_DEVICE) + end associate + end if + + + + call this%gs_Xh%op(this%u,GS_OP_ADD) + call this%gs_Xh%op(this%v,GS_OP_ADD) + call this%gs_Xh%op(this%w,GS_OP_ADD) + call this%gs_Xh%op(this%p,GS_OP_ADD) + + do i = 1, this%ulag%size() + call this%gs_Xh%op(this%ulag%lf(i),GS_OP_ADD) + call this%gs_Xh%op(this%vlag%lf(i),GS_OP_ADD) + call this%gs_Xh%op(this%wlag%lf(i),GS_OP_ADD) + end do + call this%gs_Xh%op(this%abx1,GS_OP_ADD) + call this%gs_Xh%op(this%abx2,GS_OP_ADD) + call this%gs_Xh%op(this%aby1,GS_OP_ADD) + call this%gs_Xh%op(this%aby2,GS_OP_ADD) + call this%gs_Xh%op(this%abz1,GS_OP_ADD) + call this%gs_Xh%op(this%abz2,GS_OP_ADD) + + !! If we would decide to only restart from lagged fields instead of asving abx1, aby1 etc. + !! Observe that one also needs to recompute the focing at the old time steps + !u_temp = this%ulag%lf(2) + !v_temp = this%vlag%lf(2) + !w_temp = this%wlag%lf(2) + !! Compute the source terms + !call this%source_term%compute(tlag(2), -1) + ! + !! Pre-multiply the source terms with the mass matrix. + !if (NEKO_BCKND_DEVICE .eq. 1) then + ! call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n) + !else + ! call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n) + !end if + + !! Add the advection operators to the right-hand-side. + !call this%adv%compute(u_temp, v_temp, w_temp, & + ! this%f_x%x, this%f_y%x, this%f_z%x, & + ! this%Xh, this%c_Xh, this%dm_Xh%size()) + !this%abx2 = this%f_x + !this%aby2 = this%f_y + !this%abz2 = this%f_z + ! + !u_temp = this%ulag%lf(1) + !v_temp = this%vlag%lf(1) + !w_temp = this%wlag%lf(1) + !call this%source_term%compute(tlag(1), 0) + + !! Pre-multiply the source terms with the mass matrix. + !if (NEKO_BCKND_DEVICE .eq. 1) then + ! call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n) + !else + ! call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n) + !end if + + !! Pre-multiply the source terms with the mass matrix. + !if (NEKO_BCKND_DEVICE .eq. 1) then + ! call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n) + !else + ! call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n) + !end if + + !call this%adv%compute(u_temp, v_temp, w_temp, & + ! this%f_x%x, this%f_y%x, this%f_z%x, & + ! this%Xh, this%c_Xh, this%dm_Xh%size()) + !this%abx1 = this%f_x + !this%aby1 = this%f_y + !this%abz1 = this%f_z + + end subroutine fluid_pnpn_restart + subroutine fluid_pnpn_free(this) class(fluid_pnpn_t), intent(inout) :: this @@ -384,7 +524,6 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) call this%scratch%request_field(u_e, temp_indices(1)) call this%scratch%request_field(v_e, temp_indices(2)) call this%scratch%request_field(w_e, temp_indices(3)) - call sumab%compute_fluid(u_e, v_e, w_e, u, v, w, & ulag, vlag, wlag, ext_bdf%advection_coeffs, ext_bdf%nadv) diff --git a/src/fluid/fluid_scheme.f90 b/src/fluid/fluid_scheme.f90 index 15360484ec5..9a098237b2a 100644 --- a/src/fluid/fluid_scheme.f90 +++ b/src/fluid/fluid_scheme.f90 @@ -137,6 +137,7 @@ module fluid_scheme procedure(fluid_scheme_init_intrf), pass(this), deferred :: init procedure(fluid_scheme_free_intrf), pass(this), deferred :: free procedure(fluid_scheme_step_intrf), pass(this), deferred :: step + procedure(fluid_scheme_restart_intrf), pass(this), deferred :: restart generic :: scheme_init => fluid_scheme_init_all, fluid_scheme_init_uvw end type fluid_scheme_t @@ -180,6 +181,18 @@ subroutine fluid_scheme_step_intrf(this, t, tstep, dt, ext_bdf) end subroutine fluid_scheme_step_intrf end interface + !> Abstract interface to restart a fluid scheme + abstract interface + subroutine fluid_scheme_restart_intrf(this, dtlag, tlag) + import fluid_scheme_t + import rp + class(fluid_scheme_t), target, intent(inout) :: this + real(kind=rp) :: dtlag(10), tlag(10) + + end subroutine fluid_scheme_restart_intrf + end interface + + contains !> Initialize common data for the current scheme diff --git a/src/io/chkp_file.f90 b/src/io/chkp_file.f90 index b84dba3d5e8..a45c535244b 100644 --- a/src/io/chkp_file.f90 +++ b/src/io/chkp_file.f90 @@ -39,6 +39,7 @@ module chkp_file use num_types use field use dofmap, only: dofmap_t + use device_math use utils use space use mesh @@ -75,15 +76,21 @@ subroutine chkp_file_write(this, data, t) character(len=1024) :: fname integer :: ierr, suffix_pos, optional_fields type(field_t), pointer :: u, v, w, p, s + type(field_t), pointer :: abx1,abx2 + type(field_t), pointer :: aby1,aby2 + type(field_t), pointer :: abz1,abz2 + type(field_t), pointer :: abs1,abs2 type(field_series_t), pointer :: ulag => null() type(field_series_t), pointer :: vlag => null() type(field_series_t), pointer :: wlag => null() + type(field_series_t), pointer :: slag => null() + real(kind=rp), pointer :: dtlag(:), tlag(:) type(mesh_t), pointer :: msh type(MPI_Status) :: status type(MPI_File) :: fh integer (kind=MPI_OFFSET_KIND) :: mpi_offset, byte_offset integer(kind=i8) :: n_glb_dofs, dof_offset - logical :: write_lag, write_scalar + logical :: write_lag, write_scalar, write_dtlag, write_scalarlag, write_abvel integer :: i if (present(t)) then @@ -127,6 +134,34 @@ subroutine chkp_file_write(this, data, t) else write_scalar = .false. end if + + if (associated(data%tlag)) then + tlag => data%tlag + dtlag => data%dtlag + write_dtlag = .true. + optional_fields = optional_fields + 4 + else + write_dtlag = .false. + end if + write_abvel = .false. + if (associated(data%abx1)) then + abx1 => data%abx1 + abx2 => data%abx2 + aby1 => data%aby1 + aby2 => data%aby2 + abz1 => data%abz1 + abz2 => data%abz2 + optional_fields = optional_fields + 8 + write_abvel = .true. + end if + write_scalarlag = .false. + if (associated(data%abs1)) then + slag => data%slag + abs1 => data%abs1 + abs2 => data%abs2 + optional_fields = optional_fields + 16 + write_scalarlag = .true. + end if class default call neko_error('Invalid data') end select @@ -153,11 +188,13 @@ subroutine chkp_file_write(this, data, t) ! Dump mandatory checkpoint data ! - byte_offset = 4 * MPI_INTEGER_SIZE + MPI_DOUBLE_PRECISION_SIZE + & + byte_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) + byte_offset = byte_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) - call MPI_File_write_at_all(fh, byte_offset, u%x, u%dof%size(), & + call MPI_File_write_at_all(fh, byte_offset,u%x, u%dof%size(), & MPI_REAL_PRECISION, status, ierr) - mpi_offset = 4 * MPI_INTEGER_SIZE + MPI_DOUBLE_PRECISION_SIZE + & + mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) + mpi_offset = mpi_offset +& n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) byte_offset = mpi_offset + & @@ -232,6 +269,72 @@ subroutine chkp_file_write(this, data, t) MPI_REAL_PRECISION, status, ierr) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end if + + if (write_dtlag) then + call MPI_File_write_at_all(fh, mpi_offset, tlag, 10, MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, mpi_offset, dtlag, 10, MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8) + end if + + if (write_abvel) then + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, abx1%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, abx2%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, aby1%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, aby2%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, abz1%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, abz2%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + end if + + if (write_scalarlag) then + do i = 1, slag%size() + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + ! We should not need this extra associate block, ant it works + ! great without it for GNU, Intel, NEC and Cray, but throws an + ! ICE with NAG. + associate (x => slag%lf(i)%x) + call MPI_File_write_at_all(fh, byte_offset, x, & + slag%lf(i)%dof%size(), MPI_REAL_PRECISION, status, ierr) + end associate + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + end do + + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, abs1%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_write_at_all(fh, byte_offset, abs2%x, abx1%dof%size(), & + MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + end if call MPI_File_close(fh, ierr) @@ -251,16 +354,23 @@ subroutine chkp_file_read(this, data) type(field_series_t), pointer :: ulag => null() type(field_series_t), pointer :: vlag => null() type(field_series_t), pointer :: wlag => null() + type(field_series_t), pointer :: slag => null() type(mesh_t), pointer :: msh type(MPI_Status) :: status type(MPI_File) :: fh + type(field_t), pointer :: abx1,abx2 + type(field_t), pointer :: aby1,aby2 + type(field_t), pointer :: abz1,abz2 + type(field_t), pointer :: abs1,abs2 real(kind=rp), allocatable :: x_coord(:,:,:,:) real(kind=rp), allocatable :: y_coord(:,:,:,:) real(kind=rp), allocatable :: z_coord(:,:,:,:) + real(kind=rp), pointer :: dtlag(:), tlag(:) integer (kind=MPI_OFFSET_KIND) :: mpi_offset, byte_offset integer(kind=i8) :: n_glb_dofs, dof_offset - integer :: glb_nelv, gdim, lx, have_lag, have_scalar, nel, optional_fields - logical :: read_lag, read_scalar + integer :: glb_nelv, gdim, lx, have_lag, have_scalar, nel, optional_fields, have_dtlag + integer :: have_abvel, have_scalarlag + logical :: read_lag, read_scalar, read_dtlag, read_abvel, read_scalarlag real(kind=rp) :: tol real(kind=rp) :: center_x, center_y, center_z integer :: i, e @@ -306,6 +416,30 @@ subroutine chkp_file_read(this, data) else read_scalar = .false. end if + if (associated(data%dtlag)) then + dtlag => data%dtlag + tlag => data%tlag + read_dtlag = .true. + else + read_dtlag = .false. + end if + read_abvel = .false. + if (associated(data%abx1)) then + abx1 => data%abx1 + abx2 => data%abx2 + aby1 => data%aby1 + aby2 => data%aby2 + abz1 => data%abz1 + abz2 => data%abz2 + read_abvel = .true. + end if + read_scalarlag = .false. + if (associated(data%abs1)) then + slag => data%slag + abs1 => data%abs1 + abs2 => data%abs2 + read_scalarlag = .true. + end if chkp => data @@ -325,6 +459,9 @@ subroutine chkp_file_read(this, data) have_lag = mod(optional_fields,2)/1 have_scalar = mod(optional_fields,4)/2 + have_dtlag = mod(optional_fields,8)/4 + have_abvel = mod(optional_fields,16)/8 + have_scalarlag = mod(optional_fields,32)/16 if ( ( glb_nelv .ne. msh%glb_nelv ) .or. & ( gdim .ne. msh%gdim) .or. & @@ -381,10 +518,12 @@ subroutine chkp_file_read(this, data) ! Read mandatory checkpoint data ! - byte_offset = 4 * MPI_INTEGER_SIZE + MPI_DOUBLE_PRECISION_SIZE + & + byte_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) + byte_offset = byte_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call this%read_field(fh, byte_offset, u%x, nel) - mpi_offset = 4 * MPI_INTEGER_SIZE + MPI_DOUBLE_PRECISION_SIZE + & + mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) + mpi_offset = mpi_offset +& n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) byte_offset = mpi_offset + & @@ -407,7 +546,6 @@ subroutine chkp_file_read(this, data) ! if (read_lag) then - do i = 1, ulag%size() byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) @@ -436,20 +574,69 @@ subroutine chkp_file_read(this, data) call this%read_field(fh, byte_offset, s%x, nel) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end if - - call MPI_File_close(fh, ierr) - if (this%mesh2mesh) then - call this%global_interp%free() + + if (read_dtlag .and. have_dtlag .eq. 1) then + call MPI_File_read_at_all(fh, mpi_offset, tlag, 10, MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8) + call MPI_File_read_at_all(fh, mpi_offset, dtlag, 10, MPI_REAL_PRECISION, status, ierr) + mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8) end if + + if (read_abvel .and. have_abvel .eq. 1) then + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, abx1%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, abx2%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, aby1%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, aby2%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, abz1%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, abz2%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + end if + if (read_scalarlag .and. have_scalarlag .eq. 1) then + do i = 1, slag%size() + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, slag%lf(i)%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + end do + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, abs1%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + byte_offset = mpi_offset + & + dof_offset * int(MPI_REAL_PREC_SIZE, i8) + call this%read_field(fh, byte_offset, abs2%x, nel) + mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) + end if + + call MPI_File_close(fh, ierr) + + call this%global_interp%free() call this%space_interp%free() - + end subroutine chkp_file_read subroutine chkp_read_field(this, fh, byte_offset, x, nel) class(chkp_file_t) :: this type(MPI_File) :: fh integer(kind=MPI_OFFSET_KIND) :: byte_offset - integer :: nel + integer, intent(in) :: nel real(kind=rp) :: x(this%sim_Xh%lxyz, nel) real(kind=rp), allocatable :: read_array(:) integer :: nel_stride, frac_space @@ -457,6 +644,8 @@ subroutine chkp_read_field(this, fh, byte_offset, x, nel) integer :: ierr allocate(read_array(this%chkp_Xh%lxyz*nel)) + + call rzero(read_array,this%chkp_xh%lxyz*nel) call MPI_File_read_at_all(fh, byte_offset, read_array, & nel*this%chkp_Xh%lxyz, MPI_REAL_PRECISION, status, ierr) if (this%mesh2mesh) then diff --git a/src/scalar/scalar_pnpn.f90 b/src/scalar/scalar_pnpn.f90 index 09ce3b08b12..44ca5b44948 100644 --- a/src/scalar/scalar_pnpn.f90 +++ b/src/scalar/scalar_pnpn.f90 @@ -41,7 +41,9 @@ module scalar_pnpn use bc, only : bc_list_t, bc_list_init, bc_list_free, bc_list_apply_scalar, & bc_list_add use mesh, only : mesh_t + use checkpoint, only : chkp_t use coefs, only : coef_t + use device use gather_scatter, only : gs_t, GS_OP_ADD use scalar_residual, only :scalar_residual_t use ax_product, only : ax_t @@ -101,6 +103,8 @@ module scalar_pnpn contains !> Constructor. procedure, pass(this) :: init => scalar_pnpn_init + !> To restart + procedure, pass(this) :: restart => scalar_pnpn_restart !> Destructor. procedure, pass(this) :: free => scalar_pnpn_free procedure, pass(this) :: step => scalar_pnpn_step @@ -205,6 +209,43 @@ subroutine scalar_pnpn_init(this, msh, coef, gs, params, user, & end subroutine scalar_pnpn_init + !> I envision the arguments to this func might need to be expanded + subroutine scalar_pnpn_restart(this, dtlag, tlag) + class(scalar_pnpn_t), target, intent(inout) :: this + real(kind=rp) :: dtlag(10), tlag(10) + integer :: n + + + n = this%s%dof%size() + + call col2(this%s%x, this%c_Xh%mult, n) + call col2(this%abx1%x, this%c_Xh%mult, n) + call col2(this%abx2%x, this%c_Xh%mult, n) + call col2(this%slag%lf(1)%x, this%c_Xh%mult, n) + call col2(this%slag%lf(2)%x, this%c_Xh%mult, n) + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(this%s%x, this%s%x_d, & + n, HOST_TO_DEVICE) + call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, & + n, HOST_TO_DEVICE) + call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, & + n, HOST_TO_DEVICE) + call device_memcpy(this%abx1%x, this%abx1%x_d, & + n, HOST_TO_DEVICE) + call device_memcpy(this%abx2%x, this%abx2%x_d, & + n, HOST_TO_DEVICE) + end if + + call this%gs_Xh%op(this%s,GS_OP_ADD) + call this%gs_Xh%op(this%slag%lf(1),GS_OP_ADD) + call this%gs_Xh%op(this%slag%lf(2),GS_OP_ADD) + call this%gs_Xh%op(this%abx1,GS_OP_ADD) + call this%gs_Xh%op(this%abx2,GS_OP_ADD) + + + + end subroutine scalar_pnpn_restart + subroutine scalar_pnpn_free(this) class(scalar_pnpn_t), intent(inout) :: this @@ -256,6 +297,7 @@ subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf) integer :: n ! Linear solver results monitor type(ksp_monitor_t) :: ksp_results(1) + character(len=LOG_SIZE) :: log_buf n = this%dm_Xh%size() @@ -274,6 +316,18 @@ subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf) msh => this%msh, res => this%res, & makeext => this%makeext, makebdf => this%makebdf) + if (neko_log%level_ .ge. NEKO_LOG_DEBUG) then + write(log_buf,'(A,A,E15.7,A,E15.7,A,E15.7)') 'Scalar debug',& + ' l2norm s', glsc2(this%s%x,this%s%x,n),& + ' slag1', glsc2(this%slag%lf(1)%x,this%slag%lf(1)%x,n),& + ' slag2', glsc2(this%slag%lf(2)%x,this%slag%lf(2)%x,n) + call neko_log%message(log_buf) + write(log_buf,'(A,A,E15.7,A,E15.7)') 'Scalar debug2',& + ' l2norm abx1', glsc2(this%abx1%x,this%abx1%x,n),& + ' abx2', glsc2(this%abx2%x,this%abx2%x,n) + call neko_log%message(log_buf) + end if + ! Evaluate the source term and scale with the mass matrix. call f_Xh%eval(t) diff --git a/src/scalar/scalar_scheme.f90 b/src/scalar/scalar_scheme.f90 index 58b8589ab0a..419f513790b 100644 --- a/src/scalar/scalar_scheme.f90 +++ b/src/scalar/scalar_scheme.f90 @@ -99,6 +99,7 @@ module scalar_scheme procedure(scalar_scheme_init_intrf), pass(this), deferred :: init procedure(scalar_scheme_free_intrf), pass(this), deferred :: free procedure(scalar_scheme_step_intrf), pass(this), deferred :: step + procedure(scalar_scheme_restart_intrf), pass(this), deferred :: restart end type scalar_scheme_t !> Abstract interface to initialize a scalar formulation @@ -122,6 +123,17 @@ subroutine scalar_scheme_init_intrf(this, msh, coef, gs, params, user,& end subroutine scalar_scheme_init_intrf end interface + !> Abstract interface to restart a scalar formulation + abstract interface + subroutine scalar_scheme_restart_intrf(this, dtlag, tlag) + import scalar_scheme_t + import chkp_t + import rp + class(scalar_scheme_t), target, intent(inout) :: this + real(kind=rp) :: dtlag(10), tlag(10) + end subroutine scalar_scheme_restart_intrf + end interface + !> Abstract interface to dealocate a scalar formulation abstract interface subroutine scalar_scheme_free_intrf(this) diff --git a/src/simulation.f90 b/src/simulation.f90 index b6788428c84..4d773027772 100644 --- a/src/simulation.f90 +++ b/src/simulation.f90 @@ -38,7 +38,10 @@ module simulation use file use math use logger + use device + use device_math use jobctrl + use field, only : field_t use profiler use math, only : col2 use simulation_component_global, only : neko_simcomps @@ -109,7 +112,6 @@ subroutine neko_solve(C) write(log_buf, '(A,E15.7,1x,A,E15.7)') 'CFL:', cfl, 'dt:', C%dt call neko_log%message(log_buf) - ! Fluid step call simulation_settime(t, C%dt, C%ext_bdf, C%tlag, C%dtlag, tstep) call neko_log%section('Fluid') @@ -191,7 +193,8 @@ subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step) end do dtlag(1) = dt - if (step .eq. 1) then + tlag(1) = t + if (ext_bdf%ndiff .eq. 0) then dtlag(2) = dt tlag(2) = t end if @@ -228,48 +231,24 @@ subroutine simulation_restart(C, t) found) if (found) C%fluid%chkp%mesh2mesh_tol = tol - - + + C%dtlag(:) = C%dt + C%tlag(:) = t + do i = 1, size(C%tlag) + C%tlag(i) = t - i*C%dtlag(i) + end do chkpf = file_t(trim(restart_file)) call chkpf%read(C%fluid%chkp) !Free the previous mesh, dont need it anymore call C%fluid%chkp%previous_mesh%free() - - ! Make sure that continuity is maintained (important for interpolation) - call col2(C%fluid%u%x,C%fluid%c_Xh%mult,C%fluid%u%dof%size()) - call col2(C%fluid%v%x,C%fluid%c_Xh%mult,C%fluid%u%dof%size()) - call col2(C%fluid%w%x,C%fluid%c_Xh%mult,C%fluid%u%dof%size()) - call col2(C%fluid%p%x,C%fluid%c_Xh%mult,C%fluid%u%dof%size()) - select type (fld => C%fluid) - type is(fluid_pnpn_t) - do i = 1, fld%ulag%size() - call col2(fld%ulag%lf(i)%x,fld%c_Xh%mult,fld%u%dof%size()) - call col2(fld%vlag%lf(i)%x,fld%c_Xh%mult,fld%u%dof%size()) - call col2(fld%wlag%lf(i)%x,fld%c_Xh%mult,fld%u%dof%size()) + do i = 1, size(C%dtlag) + call C%ext_bdf%set_coeffs(C%dtlag) end do - end select - if (allocated(C%scalar)) then - call col2(C%scalar%s%x,C%scalar%c_Xh%mult, C%scalar%s%dof%size()) - end if + + call C%fluid%restart(C%dtlag, C%tlag) + if (allocated(C%scalar)) call C%scalar%restart( C%dtlag, C%tlag) - call C%fluid%chkp%sync_device() - call C%fluid%gs_Xh%op(C%fluid%u,GS_OP_ADD) - call C%fluid%gs_Xh%op(C%fluid%v,GS_OP_ADD) - call C%fluid%gs_Xh%op(C%fluid%w,GS_OP_ADD) - call C%fluid%gs_Xh%op(C%fluid%p,GS_OP_ADD) - select type (fld => C%fluid) - type is(fluid_pnpn_t) - do i = 1, fld%ulag%size() - call fld%gs_Xh%op(fld%ulag%lf(i),GS_OP_ADD) - call fld%gs_Xh%op(fld%vlag%lf(i),GS_OP_ADD) - call fld%gs_Xh%op(fld%wlag%lf(i),GS_OP_ADD) - end do - end select - - if (allocated(C%scalar)) then - call C%scalar%gs_Xh%op(C%scalar%s,GS_OP_ADD) - end if t = C%fluid%chkp%restart_time() call neko_log%section('Restarting from checkpoint') write(log_buf,'(A,A)') 'File : ', & @@ -279,7 +258,6 @@ subroutine simulation_restart(C, t) call neko_log%message(log_buf) call neko_log%end_section() - call C%s%set_counter(t) end subroutine simulation_restart