ccao-data · jeancochrane · Nov 5, 2024 · Oct 31, 2024 · Nov 5, 2024 · dfsnow
@@ -12,8 +12,17 @@ tictoc::tic("Evaluate")
 # Load libraries, helpers, and recipes from files
 purrr::walk(list.files("R/", "\\.R$", full.names = TRUE), source)
 
-# Enable parallel backend for generating stats more quickly
-plan(multisession, workers = num_threads)
+# Enable parallel backend for generating stats faster.
+# In the past we used the 'multisession' parallelization strategy, but this
+# strategy exhibits diminishing returns (and eventually worse performance) past
+# 5 workers on the server, and it's not particularly fast either (~10 mins to
+# complete this stage). The 'multicore' strategy has a higher risk of hogging
+# server resources for the duration of execution, but it executes much faster
+# than the multisession strategy (~80 seconds to complete this stage), so
+# ultimately we think it's worth the risk; plus, we only use half the available
+# cores in order to ensure we don't block execution of other important tasks on
+# the server.
+plan(multicore, workers = ceiling(num_threads / 2))
 
 # Renaming dictionary for input columns. We want the actual value of the column
 # to become geography_id and the NAME of the column to become geography_name