From 4d45b71a2882c9b68be9ce9da2f2d29fa8be0e6d Mon Sep 17 00:00:00 2001 From: Ajay Panyala Date: Thu, 11 Jan 2024 19:25:01 -0800 Subject: [PATCH] [SCF] cleanup restart logic --- .github/workflows/c-cpp.yaml | 7 +++-- docs/user_guide/scf.rst | 2 +- exachem/scf/scf_iter.cpp | 58 +++++++++++++++++------------------- exachem/scf/scf_iter.hpp | 7 +++-- exachem/scf/scf_main.cpp | 39 +++++++++++++----------- 5 files changed, 59 insertions(+), 54 deletions(-) diff --git a/.github/workflows/c-cpp.yaml b/.github/workflows/c-cpp.yaml index 6584463..4ee5dac 100644 --- a/.github/workflows/c-cpp.yaml +++ b/.github/workflows/c-cpp.yaml @@ -214,6 +214,8 @@ jobs: if: ${{ matrix.backend == 'ga' }} id: build_exachem run: | + echo "CI_NRANKS=3" >> $GITHUB_ENV + # TAMM build git clone https://github.com/NWChemEx/TAMM $GITHUB_WORKSPACE/TAMM cd $GITHUB_WORKSPACE/TAMM @@ -241,6 +243,7 @@ jobs: export CPATH=$CPATH:/usr/lib/x86_64-linux-gnu/openmpi/include echo "UPCXX_SHARED_HEAP_SIZE=MAX" >> $GITHUB_ENV echo "CI_MPIEXEC=upcxx-run" >> $GITHUB_ENV + echo "CI_NRANKS=2" >> $GITHUB_ENV echo "UPCXX_MISSING_TESTS=1" >> $GITHUB_ENV # TAMM build @@ -255,7 +258,7 @@ jobs: # Chem build cd $GITHUB_WORKSPACE - UPCXX_CODEMODE=O3 CXX=upcxx cmake -H. -Bbuild -DGPU_ARCH=70 -DMODULES="CC" -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_PATH }} -DUSE_UPCXX=ON -DMPIRUN_EXECUTABLE="upcxx-run" + UPCXX_CODEMODE=O3 CXX=upcxx cmake -H. -Bbuild -DGPU_ARCH=70 -DMODULES="CC" -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_PATH }} -DUSE_UPCXX=ON -DMPIRUN_EXECUTABLE=${{ env.CI_MPIEXEC }} cd build UPCXX_NETWORK=smp UPCXX_CODEMODE=O3 make -j${{ env.EC_NPROC }} @@ -265,7 +268,7 @@ jobs: cd $GITHUB_WORKSPACE/build export PATH=$PATH:$GITHUB_WORKSPACE/install/bin # ctest -VV - $GITHUB_WORKSPACE/ci/scripts/run_ci $GITHUB_WORKSPACE $GITHUB_WORKSPACE/build/methods_stage/${{ env.INSTALL_PATH }}/methods/ExaChem 3 ${{ env.CI_MPIEXEC }} ${{ env.UPCXX_MISSING_TESTS }} + $GITHUB_WORKSPACE/ci/scripts/run_ci $GITHUB_WORKSPACE $GITHUB_WORKSPACE/build/methods_stage/${{ env.INSTALL_PATH }}/methods/ExaChem ${{ env.CI_NRANKS }} ${{ env.CI_MPIEXEC }} ${{ env.UPCXX_MISSING_TESTS }} # - name: gcovr # run: | # cd $GITHUB_WORKSPACE diff --git a/docs/user_guide/scf.rst b/docs/user_guide/scf.rst index 6570714..6f3ebbd 100644 --- a/docs/user_guide/scf.rst +++ b/docs/user_guide/scf.rst @@ -90,7 +90,7 @@ The values listed below are the defaults where few options are automatically adj :restart: ``[default=false]`` indicates the calculation be restarted. -:noscf: ``[default=false]`` typically used together with `restart` option. Computes only the SCF energy upon restart. +:noscf: ``[default=false]`` Computes only the SCF energy upon restart. :scf_type: ``[default=restricted]`` The following values are supported diff --git a/exachem/scf/scf_iter.cpp b/exachem/scf/scf_iter.cpp index c56f8f7..f2b597e 100644 --- a/exachem/scf/scf_iter.cpp +++ b/exachem/scf/scf_iter.cpp @@ -12,11 +12,12 @@ template std::tuple scf_iter_body(ExecutionContext& ec, ScalapackInfo& scalapack_info, const int& iter, const SystemData& sys_data, SCFVars& scf_vars, - TAMMTensors& ttensors, EigenTensors& etensors, + TAMMTensors& ttensors, EigenTensors& etensors #if defined(USE_GAUXC) - GauXC::XCIntegrator& gauxc_integrator, + , + GauXC::XCIntegrator& gauxc_integrator #endif - bool scf_restart) { +) { const bool is_uhf = sys_data.is_unrestricted; const bool is_rhf = sys_data.is_restricted; @@ -159,40 +160,36 @@ std::tuple scf_iter_body(ExecutionContext& ec, std::cout << std::fixed << std::setprecision(2) << "diis: " << do_time << "s, "; } - if(!scf_restart) { - if(lshift > 0) { - double lval = is_rhf ? 0.5 * lshift : lshift; + if(lshift > 0) { + double lval = is_rhf ? 0.5 * lshift : lshift; + // clang-format off + sch + (ehf_tmp(mu,ku) = S1(mu,nu) * D_last_alpha_tamm(nu,ku)) + (F_alpha(mu,ku) -= lval * ehf_tmp(mu,nu) * S1(nu,ku)) + .execute(); + // clang-format on + + if(is_uhf) { // clang-format off sch - (ehf_tmp(mu,ku) = S1(mu,nu) * D_last_alpha_tamm(nu,ku)) - (F_alpha(mu,ku) -= lval * ehf_tmp(mu,nu) * S1(nu,ku)) + (ehf_tmp(mu,ku) = S1(mu,nu) * D_last_beta_tamm(nu,ku)) + (F_beta(mu,ku) -= lval * ehf_tmp(mu,nu) * S1(nu,ku)) .execute(); // clang-format on - - if(is_uhf) { - // clang-format off - sch - (ehf_tmp(mu,ku) = S1(mu,nu) * D_last_beta_tamm(nu,ku)) - (F_beta(mu,ku) -= lval * ehf_tmp(mu,nu) * S1(nu,ku)) - .execute(); - // clang-format on - } } + } - auto do_t1 = std::chrono::high_resolution_clock::now(); + auto do_t1 = std::chrono::high_resolution_clock::now(); - scf_diagonalize(sch, sys_data, scf_vars, scalapack_info, ttensors, etensors); + scf_diagonalize(sch, sys_data, scf_vars, scalapack_info, ttensors, etensors); - auto do_t2 = std::chrono::high_resolution_clock::now(); - auto do_time = - std::chrono::duration_cast>((do_t2 - do_t1)).count(); - - if(rank == 0 && debug) - std::cout << std::fixed << std::setprecision(2) << "diagonalize: " << do_time << "s, "; + auto do_t2 = std::chrono::high_resolution_clock::now(); + auto do_time = std::chrono::duration_cast>((do_t2 - do_t1)).count(); - compute_density(ec, sys_data, scf_vars, scalapack_info, ttensors, etensors); + if(rank == 0 && debug) + std::cout << std::fixed << std::setprecision(2) << "diagonalize: " << do_time << "s, "; - } // end scf_restart + compute_density(ec, sys_data, scf_vars, scalapack_info, ttensors, etensors); double rmsd = 0.0; // clang-format off @@ -1112,11 +1109,12 @@ void scf_diis(ExecutionContext& ec, const TiledIndexSpace& tAO, Tensor scf_iter_body(ExecutionContext& ec, ScalapackInfo& scalapack_info, const int& iter, const SystemData& sys_data, SCFVars& scf_vars, TAMMTensors& ttensors, - EigenTensors& etensors, + EigenTensors& etensors #if defined(USE_GAUXC) - GauXC::XCIntegrator& gauxc_integrator, + , + GauXC::XCIntegrator& gauxc_integrator #endif - bool scf_restart); +); template std::tuple, std::vector, std::vector> compute_2bf_taskinfo(ExecutionContext& ec, const SystemData& sys_data, diff --git a/exachem/scf/scf_iter.hpp b/exachem/scf/scf_iter.hpp index 116fd3d..dd9d45c 100644 --- a/exachem/scf/scf_iter.hpp +++ b/exachem/scf/scf_iter.hpp @@ -24,11 +24,12 @@ template std::tuple scf_iter_body(ExecutionContext& ec, ScalapackInfo& scalapack_info, const int& iter, const SystemData& sys_data, SCFVars& scf_vars, - TAMMTensors& ttensors, EigenTensors& etensors, + TAMMTensors& ttensors, EigenTensors& etensors #if defined(USE_GAUXC) - GauXC::XCIntegrator& gauxc_integrator, + , + GauXC::XCIntegrator& gauxc_integrator #endif - bool scf_restart = false); +); template std::tuple, std::vector, std::vector> diff --git a/exachem/scf/scf_main.cpp b/exachem/scf/scf_main.cpp index 291c941..21c7d90 100644 --- a/exachem/scf/scf_main.cpp +++ b/exachem/scf/scf_main.cpp @@ -372,11 +372,11 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map EigenTensors etensors; TAMMTensors ttensors; - const bool scf_conv = restart && scf_options.noscf; + const bool no_scf = scf_options.noscf; bool is_conv = true; const bool load_bal = scf_vars.do_load_bal; - scf_restart_test(exc, sys_data, restart, files_prefix); + scf_restart_test(exc, sys_data, restart || no_scf, files_prefix); #if SCF_THROTTLE_RESOURCES if(rank < hf_nranks) { @@ -625,7 +625,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map auto max_nprim4 = max_nprim * max_nprim * max_nprim * max_nprim; auto shell2bf = obs.shell2bf(); - if(restart) { + if(restart || no_scf) { scf_restart(ec, scalapack_info, sys_data, ttensors, etensors, files_prefix); if(!do_density_fitting) { tamm_to_eigen_tensor(ttensors.D_alpha, etensors.D_alpha); @@ -704,7 +704,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map hf_t2 = std::chrono::high_resolution_clock::now(); hf_time = std::chrono::duration_cast>((hf_t2 - hf_t1)).count(); - if(rank == 0) + if(rank == 0 && !no_scf && !restart) std::cout << std::fixed << std::setprecision(2) << "Total Time to compute initial guess: " << hf_time << " secs" << endl; @@ -724,12 +724,11 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map } } - if(rank == 0) { + if(rank == 0 && !no_scf) { std::cout << std::endl << std::endl; std::cout << " SCF iterations" << endl; std::cout << std::string(65, '-') << endl; std::string sph = " Iter Energy E-Diff RMSD Time(s)"; - if(scf_conv) sph = " Iter Energy E-Diff Time(s)"; std::cout << sph << endl; std::cout << std::string(65, '-') << endl; } @@ -767,7 +766,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map ec.pg().broadcast(etensors.taskmap.data(), etensors.taskmap.size(), 0); } - if(restart || molden_exists) { + if(restart || no_scf || molden_exists) { sch(ttensors.F_alpha_tmp() = 0).execute(); if(is_uhf) { sch(ttensors.F_beta_tmp() = 0).execute(); } @@ -812,6 +811,8 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map // SCF main loop do { + if(no_scf) break; + const auto loop_start = std::chrono::high_resolution_clock::now(); ++iter; @@ -841,11 +842,12 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map do_density_fitting, xHF); std::tie(ehf, rmsd) = scf_iter_body(ec, scalapack_info, iter, sys_data, scf_vars, - ttensors, etensors, + ttensors, etensors #if defined(USE_GAUXC) - gauxc_integrator, + , + gauxc_integrator #endif - scf_conv); + ); ehf += enuc; // compute difference with last iteration @@ -866,7 +868,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map std::cout << std::scientific << std::setprecision(2); } std::cout << ' ' << std::scientific << std::setw(12) << ediff; - if(!scf_conv) std::cout << ' ' << std::setw(12) << rmsd << ' '; + std::cout << ' ' << std::setw(12) << rmsd << ' '; std::cout << ' ' << std::setw(10) << std::fixed << std::setprecision(1) << loop_time << ' ' << endl; @@ -879,7 +881,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map // if(rank==0) cout << "D at the end of iteration: " << endl << std::setprecision(6) << // etensors.D_alpha << endl; if(scf_options.writem % iter == 0 || scf_options.writem == 1) { - if(!scf_conv) rw_md_disk(ec, scalapack_info, sys_data, ttensors, etensors, files_prefix); + rw_md_disk(ec, scalapack_info, sys_data, ttensors, etensors, files_prefix); } if(iter >= maxiter) { @@ -887,8 +889,6 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map break; } - if(scf_conv) break; - if(debug) print_energies(ec, ttensors, etensors, sys_data, scf_vars, scalapack_info, debug); // Reset lshift to input option. @@ -910,10 +910,13 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map sch(ttensors.F_alpha_tmp() = 0).execute(); if(is_uhf) sch(ttensors.F_beta_tmp() = 0).execute(); + auto xHF_adjust = xHF; + // TODO: skip for non-CC methods + if(!sys_data.options_map.task_options.scf) xHF_adjust = 1.0; // build a new Fock matrix compute_2bf(ec, scalapack_info, sys_data, scf_vars, obs, do_schwarz_screen, shell2bf, SchwarzK, max_nprim4, ttensors, etensors, is_3c_init, - do_density_fitting, xHF); + do_density_fitting, xHF_adjust); } for(auto x: ttensors.ehf_tamm_hist) Tensor::deallocate(x); @@ -933,7 +936,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map << "Nuclear repulsion energy = " << std::setprecision(15) << enuc << endl; print_energies(ec, ttensors, etensors, sys_data, scf_vars, scalapack_info); - if(!scf_conv) { + if(!no_scf) { if(rank == 0) cout << "writing orbitals and density to disk ... "; rw_md_disk(ec, scalapack_info, sys_data, ttensors, etensors, files_prefix); if(rank == 0) cout << "done." << endl; @@ -1056,7 +1059,7 @@ hartree_fock(ExecutionContext& exc, const string filename, ECOptions options_map exc.pg().barrier(); return std::make_tuple(sys_data, ehf, shells, scf_vars.shell_tile_map, C_alpha_tamm, Fa_global, - C_beta_tamm, Fb_global, scf_vars.tAO, scf_vars.tAOt, scf_conv); + C_beta_tamm, Fb_global, scf_vars.tAO, scf_vars.tAOt, no_scf); } void scf(std::string filename, ECOptions options_map) { @@ -1069,7 +1072,7 @@ void scf(std::string filename, ECOptions options_map) { auto hf_t1 = std::chrono::high_resolution_clock::now(); auto [sys_data, hf_energy, shells, shell_tile_map, C_AO, F_AO, C_beta_AO, F_beta_AO, AO_opt, - AO_tis, scf_conv] = hartree_fock(ec, filename, options_map); + AO_tis, no_scf] = hartree_fock(ec, filename, options_map); Tensor::deallocate(C_AO, F_AO); if(sys_data.is_unrestricted) Tensor::deallocate(C_beta_AO, F_beta_AO);