From c87ad370143976ae5f9ea8a7fb7fd84ca563aedf Mon Sep 17 00:00:00 2001
From: Lindon Roberts <lindon.roberts@anu.edu.au>
Date: Mon, 5 Aug 2024 12:57:43 +1000
Subject: [PATCH] Small code cleanup

---
 dfols/controller.py   | 22 ++++++----------------
 dfols/solver.py       |  5 -----
 dfols/trust_region.py | 16 +++++++---------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/dfols/controller.py b/dfols/controller.py
index a4eb92e..b3ac934 100644
--- a/dfols/controller.py
+++ b/dfols/controller.py
@@ -442,7 +442,8 @@ def get_new_direction_for_growing(self, step_length):
         return dirn * (step_length / LA.norm(dirn))
 
     def evaluate_criticality_measure(self, params):
-        # TODO: add comment for calculation of criticality measure, need h != None
+        # Calculate criticality measure for regularized problems (h is not None)
+        
         # Build model for full least squares function
         gopt, H = self.model.build_full_model()
         
@@ -452,19 +453,15 @@ def evaluate_criticality_measure(self, params):
             # gnew = gopt.copy()
             # crvmin = -1
             return np.inf
-        ##print("gopt", gopt)
-        ##print("H", H)
+        
         # NOTE: smaller params here to get more iterations in S-FISTA
         func_tol = params("func_tol.criticality_measure") * self.delta
-        ##print("function tolerance (criticality measure)", func_tol)
         if self.model.projections:
             d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), self.model.projections, 1,
                                 self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol, 
                                 max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
                                 scaling_changes=self.scaling_changes)
         else:
-            # NOTE: alternative way if using trsbox
-            # d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
             proj = lambda x: pbox(x, self.model.sl, self.model.su)
             d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), [proj], 1,
                                 self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol, 
@@ -473,8 +470,6 @@ def evaluate_criticality_measure(self, params):
         
         # Calculate criticality measure
         criticality_measure = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, np.zeros(H.shape), d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
-        ##print("d (criticality measure): ", d)
-        ##print("model value (criticality measure): ", model_value(gopt, 2*H, d, self.model.xopt(abs_coordinates=True), self.h, self.argsh))
         return criticality_measure
 
     def trust_region_step(self, params, criticality_measure=1e-2):
@@ -484,7 +479,6 @@ def trust_region_step(self, params, criticality_measure=1e-2):
         # QUESTION: c1 = min{1, 1/delta_max^2}, but choose c1=1here; choose maxhessian = max(||H||_2,1)
         # QUESTION: when criticality_measure = 0? choose max(criticality_measure,1)
         func_tol = (1-params("func_tol.tr_step")) * 1 * max(criticality_measure,1) * min(self.delta, max(criticality_measure,1) / max(np.linalg.norm(H, 2),1))
-        ##print("function tolerance (trust region step)", func_tol)
 
         if self.h is None:
             if self.model.projections:
@@ -520,14 +514,12 @@ def trust_region_step(self, params, criticality_measure=1e-2):
                                       self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
                                       max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
                                       scaling_changes=self.scaling_changes)
+            
             # NOTE: check sufficient decrease. If increase in the model, set zero step
             pred_reduction = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, H, d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
-            ##("pred_reduction", pred_reduction)
             if pred_reduction < 0.0:
-                ##print("bad d", d)
                 d = np.zeros(d.shape)
-            ##print("d (trust region step): ", d)
-            ##print("new point (after trust region step): ", d + self.model.xopt(abs_coordinates=True))
+        
         return d, gopt, H, gnew, crvmin
 
     def geometry_step(self, knew, adelt, number_of_samples, params):
@@ -727,9 +719,7 @@ def calculate_ratio(self, x, current_iter, rvec_list, d, gopt, H):
             if len(self.model.projections) > 1: # if we are using multiple projections, only warn since likely due to constraint intersection
                 exit_info = ExitInformation(EXIT_TR_INCREASE_WARNING, "Either multiple constraints are active or trust region step gave model increase")
             else:
-                exit_info = ExitInformation(EXIT_TR_INCREASE_ERROR, "Either rust region step gave model increase")
-        ##print("actual reduction: ", actual_reduction)
-        ##print("pred reduction: ", pred_reduction)
+                exit_info = ExitInformation(EXIT_TR_INCREASE_ERROR, "Trust region step gave model increase")
         ratio = actual_reduction / pred_reduction
         return ratio, exit_info
 
diff --git a/dfols/solver.py b/dfols/solver.py
index ac8071f..b1db0a9 100644
--- a/dfols/solver.py
+++ b/dfols/solver.py
@@ -555,11 +555,7 @@ def solve_main(objfun, x0, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxf
                 break  # quit
 
             # Estimate f in order to compute 'actual reduction'
-            ##print("d before calcualte_ratio: ", d)
-            ##print("xopt before calcualte_ratio: ", control.model.xopt(abs_coordinates=True))
-            # NOTE: be careful abour x in calculate_ratio
             ratio, exit_info = control.calculate_ratio(control.model.xopt(abs_coordinates=True), current_iter, rvec_list[:num_samples_run, :], d, gopt, H)
-            ##print("ratio: ", ratio)
             if exit_info is not None:
                 if exit_info.able_to_do_restart() and params("restarts.use_restarts") and params(
                         "restarts.use_soft_restarts"):
@@ -607,7 +603,6 @@ def solve_main(objfun, x0, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxf
                                         params("tr_radius.gamma_inc_overline") * dnorm), 1.0e10)
                 if params("logging.save_diagnostic_info"):
                     diagnostic_info.update_iter_type(ITER_VERY_SUCCESSFUL)
-            # QUESTION: previously: control.delta <= 1.5 * control.rho, why use 1.5 here?
             if control.delta <= 1.5 * control.rho:  # cap trust region radius at rho
                 control.delta = control.rho
 
diff --git a/dfols/trust_region.py b/dfols/trust_region.py
index 6c9902d..e18237c 100644
--- a/dfols/trust_region.py
+++ b/dfols/trust_region.py
@@ -100,7 +100,7 @@ def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), ar
     y = np.zeros(n)
     t = 1
     k_H = np.linalg.norm(H, 2) # SOLVED: use ||H||_2 <= ||H||_F, might be better than maxhessian
-    # QUESTION: difference expression for u from MAX_LOP_ITERS
+    # QUESTION: different expression for u from MAX_LOOP_ITERS
     #u = 2 * func_tol / (L_h * (L_h + sqrt(L_h * L_h + 2 * k_H * func_tol))) # smoothing parameter
     crvmin = -1.0
     # NOTE: iterataion depends on delta/func_tol
@@ -109,14 +109,11 @@ def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), ar
     except ValueError:
         MAX_LOOP_ITERS = max_iters
     u =  2 * delta / (MAX_LOOP_ITERS * L_h) # smoothing parameter
-    ##print("smoothing parameter", u)
-    ##print("number of iterations", MAX_LOOP_ITERS)
 
     def gradient_Fu(xopt, g, H, u, prox_uh, d):
     # Calculate gradient_Fu,
-    # where Fu(d) := g(d) + h_u(d) and h_u(d) is a 1/u-smooth approximation of
-    # h.
-    # We assume that h is global Lipschitz continous with constant L_h,
+    # where Fu(d) := g(d) + h_u(d) and h_u(d) is a 1/u-smooth approximation of h.
+    # We assume that h is globally Lipschitz continous with constant L_h,
     # then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.  
     # TODO: Add instruction to prox_uh
     # SOLVED: bug here, previous: g + H @ d + (d - prox_uh(xopt, u, d, *argsprox)) / u
@@ -151,9 +148,10 @@ def proj(d0):
         # SOLVED: (previously) make sfista decrease in each iteration (might have d = 0, criticality measure=0)
         # if model_value(g, H, d, xopt, h, *argsh) > model_value(g, H, prev_d, xopt, h, *argsh):
         #     d = prev_d
-        if model_value(g, H, d, xopt, h, *argsh, scaling_changes) < model_value_best:
-            d_best = d
-            model_value_best =  model_value(g, H, d, xopt, h, *argsh, scaling_changes)
+        new_model_value = model_value(g, H, d, xopt, h, *argsh, scaling_changes)
+        if new_model_value < model_value_best:
+            d_best = d.copy()
+            model_value_best = new_model_value
 
         # update true gradient
         # FIXME: gnew is the gradient of the smoothed version