Skip to content

Commit

Permalink
Use standard language constructs to guide vectorisation
Browse files Browse the repository at this point in the history
  • Loading branch information
njansson committed May 30, 2024
1 parent a60891b commit 4d03281
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
32 changes: 16 additions & 16 deletions src/common/bcknd/cpu/rhs_maker_cpu.f90
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ subroutine rhs_maker_sumab_cpu(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab

n = uu%dof%size()

do i = 1, n
do concurrent (i = 1:n)
u%x(i,1,1,1) = ab(1) * uu%x(i,1,1,1) + ab(2) * uulag%lf(1)%x(i,1,1,1)
v%x(i,1,1,1) = ab(1) * vv%x(i,1,1,1) + ab(2) * vvlag%lf(1)%x(i,1,1,1)
w%x(i,1,1,1) = ab(1) * ww%x(i,1,1,1) + ab(2) * wwlag%lf(1)%x(i,1,1,1)
end do

if (nab .eq. 3) then
do i = 1, n
do concurrent (i = 1:n)
u%x(i,1,1,1) = u%x(i,1,1,1) + ab(3) * uulag%lf(2)%x(i,1,1,1)
v%x(i,1,1,1) = v%x(i,1,1,1) + ab(3) * vvlag%lf(2)%x(i,1,1,1)
w%x(i,1,1,1) = w%x(i,1,1,1) + ab(3) * wwlag%lf(2)%x(i,1,1,1)
Expand All @@ -68,7 +68,7 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
call neko_scratch_registry%request_field(temp2, temp_indices(2))
call neko_scratch_registry%request_field(temp3, temp_indices(3))

do i = 1, n
do concurrent (i = 1:n)
temp1%x(i,1,1,1) = ext_coeffs(2) * fx_lag%x(i,1,1,1) + &
ext_coeffs(3) * fx_laglag%x(i,1,1,1)
temp2%x(i,1,1,1) = ext_coeffs(2) * fy_lag%x(i,1,1,1) + &
Expand All @@ -77,7 +77,7 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
ext_coeffs(3) * fz_laglag%x(i,1,1,1)
end do

do i = 1, n
do concurrent (i = 1:n)
fx_laglag%x(i,1,1,1) = fx_lag%x(i,1,1,1)
fy_laglag%x(i,1,1,1) = fy_lag%x(i,1,1,1)
fz_laglag%x(i,1,1,1) = fz_lag%x(i,1,1,1)
Expand All @@ -86,7 +86,7 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
fz_lag%x(i,1,1,1) = fz(i)
end do

do i = 1, n
do concurrent (i = 1:n)
fx(i) = (ext_coeffs(1) * fx(i) + temp1%x(i,1,1,1)) * rho
fy(i) = (ext_coeffs(1) * fy(i) + temp2%x(i,1,1,1)) * rho
fz(i) = (ext_coeffs(1) * fz(i) + temp3%x(i,1,1,1)) * rho
Expand All @@ -108,17 +108,17 @@ subroutine scalar_rhs_maker_ext_cpu(fs_lag, fs_laglag, fs, rho, ext_coeffs, n)

call neko_scratch_registry%request_field(temp1, temp_index)

do i = 1, n
do concurrent (i = 1:n)
temp1%x(i,1,1,1) = ext_coeffs(2) * fs_lag%x(i,1,1,1) + &
ext_coeffs(3) * fs_laglag%x(i,1,1,1)
end do

do i = 1, n
do concurrent (i = 1:n)
fs_laglag%x(i,1,1,1) = fs_lag%x(i,1,1,1)
fs_lag%x(i,1,1,1) = fs(i)
end do

do i = 1, n
do concurrent (i = 1:n)
fs(i) = (ext_coeffs(1) * fs(i) + temp1%x(i,1,1,1)) * rho
end do

Expand All @@ -145,27 +145,27 @@ subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, &
call neko_scratch_registry%request_field(tb2, temp_indices(5))
call neko_scratch_registry%request_field(tb3, temp_indices(6))

do i = 1, n
do concurrent (i = 1:n)
tb1%x(i,1,1,1) = u%x(i,1,1,1) * B(i) * bd(2)
tb2%x(i,1,1,1) = v%x(i,1,1,1) * B(i) * bd(2)
tb3%x(i,1,1,1) = w%x(i,1,1,1) * B(i) * bd(2)
end do

do ilag = 2, nbd
do i = 1, n
do concurrent (i = 1:n)
ta1%x(i,1,1,1) = ulag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
ta2%x(i,1,1,1) = vlag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
ta3%x(i,1,1,1) = wlag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
end do

do i = 1, n
do concurrent (i = 1:n)
tb1%x(i,1,1,1) = tb1%x(i,1,1,1) + ta1%x(i,1,1,1)
tb2%x(i,1,1,1) = tb2%x(i,1,1,1) + ta2%x(i,1,1,1)
tb3%x(i,1,1,1) = tb3%x(i,1,1,1) + ta3%x(i,1,1,1)
end do
end do

do i = 1, n
do concurrent (i = 1:n)
bfx(i) = bfx(i) + tb1%x(i,1,1,1) * (rho / dt)
bfy(i) = bfy(i) + tb2%x(i,1,1,1) * (rho / dt)
bfz(i) = bfz(i) + tb3%x(i,1,1,1) * (rho / dt)
Expand All @@ -189,21 +189,21 @@ subroutine scalar_rhs_maker_bdf_cpu(s_lag, fs, s, B, rho, dt, bd, nbd, n)
call neko_scratch_registry%request_field(temp1, temp_indices(1))
call neko_scratch_registry%request_field(temp2, temp_indices(2))

do i = 1, n
do concurrent (i = 1:n)
temp2%x(i,1,1,1) = s%x(i,1,1,1) * B(i) * bd(2)
end do

do ilag = 2, nbd
do i = 1, n
do concurrent (i = 1:n)
temp1%x(i,1,1,1) = s_lag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
end do

do i = 1, n
do concurrent (i = 1:n)
temp2%x(i,1,1,1) = temp2%x(i,1,1,1) + temp1%x(i,1,1,1)
end do
end do

do i = 1, n
do concurrent (i = 1:n)
fs(i) = fs(i) + temp2%x(i,1,1,1) * (rho / dt)
end do

Expand Down
12 changes: 6 additions & 6 deletions src/fluid/bcknd/cpu/pnpn_res_cpu.f90
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
type(gs_t), intent(inout) :: gs_Xh
type(facet_normal_t), intent(inout) :: bc_prs_surface
type(facet_normal_t), intent(inout) :: bc_sym_surface
class(Ax_t), intent(inout) :: Ax
class(ax_t), intent(inout) :: Ax
real(kind=rp), intent(inout) :: bd
real(kind=rp), intent(in) :: dt
real(kind=rp), intent(in) :: mu
Expand Down Expand Up @@ -67,7 +67,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh)
call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh)

do i = 1, n
do concurrent (i = 1:n)
ta1%x(i,1,1,1) = f_x%x(i,1,1,1) / rho &
- ((wa1%x(i,1,1,1) * (mu / rho)) * c_Xh%B(i,1,1,1))
ta2%x(i,1,1,1) = f_y%x(i,1,1,1) / rho &
Expand All @@ -80,7 +80,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
call gs_Xh%op(ta2, GS_OP_ADD)
call gs_Xh%op(ta3, GS_OP_ADD)

do i = 1, n
do concurrent (i = 1:n)
ta1%x(i,1,1,1) = ta1%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
ta2%x(i,1,1,1) = ta2%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
ta3%x(i,1,1,1) = ta3%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
Expand All @@ -100,7 +100,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
!
! Surface velocity terms
!
do i = 1, n
do concurrent (i = 1:n)
wa1%x(i,1,1,1) = 0.0_rp
wa2%x(i,1,1,1) = 0.0_rp
wa3%x(i,1,1,1) = 0.0_rp
Expand All @@ -109,15 +109,15 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
call bc_sym_surface%apply_surfvec(wa1%x,wa2%x,wa3%x,ta1%x, ta2%x, ta3%x, n)

dtbd = bd / dt
do i = 1, n
do concurrent (i = 1:n)
ta1%x(i,1,1,1) = 0.0_rp
ta2%x(i,1,1,1) = 0.0_rp
ta3%x(i,1,1,1) = 0.0_rp
end do

call bc_prs_surface%apply_surfvec(ta1%x, ta2%x, ta3%x, u%x, v%x, w%x, n)

do i = 1, n
do concurrent (i = 1:n)
p_res%x(i,1,1,1) = p_res%x(i,1,1,1) &
- (dtbd * (ta1%x(i,1,1,1) + ta2%x(i,1,1,1) + ta3%x(i,1,1,1)))&
- (wa1%x(i,1,1,1) + wa2%x(i,1,1,1) + wa3%x(i,1,1,1))
Expand Down

0 comments on commit 4d03281

Please sign in to comment.