-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
331 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
""" Gaussian Random Block-Coordinate Gauss-Newton """ | ||
from __future__ import absolute_import, division, unicode_literals, print_function | ||
from trs.trs_exact import trs | ||
import numpy as np | ||
import scipy.linalg as linalg | ||
import math as ma | ||
import warnings | ||
|
||
def GRBCGN(r, J, x0, sampling_func, fxopt, it_max, ftol, p, fig, kappa, algorithm='tr'): | ||
n = x0.size | ||
|
||
# Adaptive BCGN step size | ||
STEP = 5 | ||
|
||
# Full function and gradient | ||
def f(z): return 0.5 * np.dot(r(z), r(z)) | ||
def gradf(z): return J(z).T.dot(r(z)) | ||
|
||
# Plotting | ||
if fig is not None: | ||
plot_data = np.full((3,it_max+1),np.nan) | ||
plot_data[0,0] = f(x0)-fxopt | ||
plot_data[1,0] = linalg.norm(gradf(x0)) | ||
|
||
# Metrics | ||
budget = 0 | ||
tau_budget = np.full(4,np.nan) | ||
|
||
k = 0 | ||
x = x0 | ||
delta = None | ||
while (not fig and budget < it_max*n) or (fig and k < it_max and ma.fabs(f(x) - fxopt) > ftol): | ||
|
||
# Randomly select subspace | ||
if kappa == 1 and p == n: # GN | ||
Q = np.eye(n) | ||
else: # random projection | ||
Q = gram_schmidt_randn(n,p) | ||
|
||
# Assemble block-reduced matrices | ||
J_S = J(x).dot(Q) | ||
rx = r(x) | ||
gradf_S = J_S.T.dot(rx) | ||
|
||
# Set initial trust region radius | ||
if k == 0 and algorithm.startswith('tr') or algorithm == 'reg': | ||
delta = linalg.norm(gradf_S)/10 | ||
if delta == 0: | ||
delta = 1 | ||
|
||
# Debug output | ||
#monitor(k, r, x, f, delta, algorithm, gradf, gradf_S) | ||
|
||
# Solve subproblem | ||
if algorithm == 'tr': | ||
s_S = trs(J_S, gradf_S, delta) | ||
else: | ||
raise RuntimeError(algorithm + 'unimplemented!') | ||
|
||
# Loop tolerance | ||
Js_S = J_S.dot(s_S) | ||
Delta_m = -np.dot(gradf_S,s_S) -0.5*np.dot(Js_S,Js_S) | ||
stopping_rule = -Delta_m + (1-kappa)/2*np.power(np.linalg.norm(rx),2) > 0 | ||
#stopping_rule = -Delta_m + kappa*delta*delta > 0 | ||
#stopping_rule = linalg.norm(gradf_S) > kappa*delta | ||
|
||
# Iteratively refine block size | ||
p_in = p | ||
while kappa != 1 and p_in != n and stopping_rule: | ||
|
||
# Increase block size | ||
step = min(STEP,n-p_in) | ||
# print 'Increasing block size to:', p_in+step | ||
|
||
# Grow subspace | ||
Q = gram_schmidt_grow(Q,step) | ||
|
||
# Assemble block-reduced matrices | ||
J_S = J(x).dot(Q) | ||
gradf_S = J_S.T.dot(rx) | ||
|
||
p_in += step | ||
|
||
# Debug output | ||
#monitor(k, r, x, f, delta, algorithm, gradf, gradf_S) | ||
|
||
# Solve subproblem | ||
if algorithm == 'tr': | ||
s_S = trs(J_S, gradf_S, delta) | ||
else: | ||
raise RuntimeError(algorithm + 'unimplemented!') | ||
|
||
# Loop tolerance | ||
Js_S = J_S.dot(s_S) | ||
Delta_m = -np.dot(gradf_S,s_S) -0.5*np.dot(Js_S,Js_S) | ||
stopping_rule = -Delta_m + (1-kappa)/2*np.power(np.linalg.norm(rx),2) > 0 | ||
#stopping_rule = linalg.norm(gradf_S) > kappa*delta | ||
#Jx_S = J_S.dot(x.dot(U_S)) | ||
#stopping_rule = -Delta_m + np.dot(Js_S,Jx_S) + (sigma/2)*np.power(linalg.norm(s_S),2) > 0 | ||
|
||
budget += p_in | ||
#print 'Iteration:', k, 'max block size:', p_in | ||
|
||
# Update parameter and take step | ||
#Delta_m = -np.dot(gradf_S,s_S) - 0.5*np.dot(Js_S,Js_S) | ||
if algorithm.startswith('tr'): | ||
x, delta = tr_update(f, x, s_S, Q, Delta_m, delta) | ||
else: | ||
raise RuntimeError(algorithm + 'unimplemented!') | ||
k += 1 | ||
|
||
# function decrease metrics | ||
if fig is None: | ||
for itau, tau in enumerate([1e-1,1e-3,1e-5,1e-7]): | ||
if np.isnan(tau_budget[itau]) and np.linalg.norm(gradf(x)) <= tau*np.linalg.norm(gradf(x0)): | ||
tau_budget[itau] = budget | ||
if np.all(np.isfinite(tau_budget)): # Stop if all function decrease metrics satisfied | ||
return tau_budget | ||
else: # plotting | ||
plot_data[0,k] = f(x)-fxopt | ||
plot_data[1,k] = linalg.norm(gradf(x)) | ||
plot_data[2,k] = p_in | ||
|
||
# Debug output | ||
#monitor(k, r, x, f, delta, algorithm, gradf) | ||
|
||
# Return function decrease metrics (some unsatisfied) | ||
if fig is None: | ||
return tau_budget | ||
else: # plotting | ||
return plot_data | ||
|
||
""" Generate random partial orthonormal basis """ | ||
def gram_schmidt_randn(n,k): | ||
U = np.zeros((n,k)) | ||
v = np.random.randn(n) | ||
U[:,0] = v/linalg.norm(v) | ||
for i in range(1,k): | ||
U[:,i] = np.random.randn(n) | ||
for j in range(i): | ||
U[:,i] = U[:,i] - np.dot(U[:,i],U[:,j]) / np.dot(U[:,j],U[:,j]) * U[:,j] | ||
U[:,i] = U[:,i] / np.linalg.norm(U[:,i]) | ||
return U | ||
|
||
""" Grow partial orthonormal basis """ | ||
def gram_schmidt_grow(U,l): | ||
n,k = U.shape | ||
U = np.hstack((U,np.zeros((n,l)))) | ||
for i in range(k,k+l): | ||
U[:,i] = np.random.randn(n) | ||
for j in range(i): | ||
U[:,i] = U[:,i] - (U[:,i].T.dot(U[:,j])) / (U[:,j].T.dot(U[:,j])) * U[:,j] | ||
U[:,i] = U[:,i] / np.linalg.norm(U[:,i]) | ||
return U | ||
|
||
""" Trust Region Update """ | ||
def tr_update(f, x, s_S, Q, Delta_m, delta): | ||
|
||
# Trust Region parameters | ||
ETA1 = 0.1 | ||
ETA2 = 0.75 | ||
GAMMA1 = 0.5 | ||
GAMMA2 = 2. | ||
DELTA_MIN = 1e-150 | ||
DELTA_MAX = 1e150 | ||
|
||
# Evaluate sufficient decrease | ||
s = Q.dot(s_S) | ||
rho = (f(x) - f(x+s))/Delta_m | ||
|
||
# Accept trial point | ||
if rho >= ETA1: | ||
x = x + s | ||
|
||
# Update trust region radius | ||
if rho < ETA1: | ||
delta *= GAMMA1 | ||
delta = max(delta,DELTA_MIN) | ||
elif rho >= ETA2: | ||
delta *= GAMMA2 | ||
delta = min(delta,DELTA_MAX) | ||
|
||
return x, delta | ||
|
||
""" Return roots of quadratic equation """ | ||
def quadeq(a, b, c): | ||
warnings.simplefilter("error", RuntimeWarning) | ||
try: | ||
x1 = (-b + ma.sqrt(b * b - 4 * a * c)) / (2 * a) | ||
x2 = (-b - ma.sqrt(b * b - 4 * a * c)) / (2 * a) | ||
except RuntimeWarning: # failed step: delta too large | ||
x1 = 0 | ||
x2 = 0 | ||
warnings.resetwarnings() | ||
return x1, x2 | ||
|
||
""" Output Monitoring Information """ | ||
def monitor(k, r, x, f, delta, algorithm, gradf, gradf_S=None): | ||
|
||
print('++++ Iteration', k, '++++') | ||
if algorithm.startswith('tr'): | ||
print('delta: %.2e' % delta) | ||
elif algorithm == 'reg': | ||
print('sigma: %.2e' % delta) | ||
elif delta is not None: | ||
print('alpha: %.2e' % delta) | ||
|
||
nr = linalg.norm(r(x)) | ||
ng = linalg.norm(gradf(x)) | ||
nJrr = ng / nr | ||
if gradf_S is not None: | ||
ng_S = linalg.norm(gradf_S) | ||
nJ_Srr = ng_S / nr | ||
|
||
print('x:', x, 'f(x):', f(x)) | ||
print('||r(x)||: %.2e' % nr, '||gradf(x)||: %.2e' % ng,end='') | ||
if gradf_S is not None: print('||gradf_S(x)||: %.2e' % ng_S) | ||
print("||J'r||/||r||: %.2e" % nJrr,end='') | ||
if gradf_S is not None: print("||J_S'r||/||r||: %.2e" % nJ_Srr) | ||
|
||
if gradf_S is None: print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
""" Randomly Generated Linear Least-Squares Problems """ | ||
from __future__ import absolute_import, division, unicode_literals, print_function | ||
import scipy.sparse as sparse | ||
import numpy as np | ||
|
||
def rsparse_lin(m,n,seed,density=0.1): | ||
|
||
# Fix RNG seed | ||
np.random.seed(seed) | ||
|
||
# Generate random linear Jacobians | ||
A = sparse.random(m,n,density=density).toarray() | ||
|
||
# Generate random rhs | ||
b = np.random.random(m) | ||
|
||
# Residual | ||
def r(x): | ||
return A.dot(x)-b | ||
|
||
# Jacobian | ||
def J(x): | ||
return A | ||
|
||
# Initial guess | ||
x0 = np.ones(n) | ||
|
||
return r, J, x0 | ||
|
||
|
||
# Plot Hessian sparsity structure | ||
def main(): | ||
import matplotlib.pyplot as plt | ||
|
||
funcs = ['Random Sparse Linear ' + str(i) for i in range(1, 16)] | ||
for ifunc, func in enumerate(funcs): | ||
|
||
r, J, x0 = rsparse_lin(20,100,ifunc) | ||
|
||
# Plot sparsity | ||
plt.figure(ifunc) | ||
plt.spy(np.dot(J(x0).T,J(x0))) | ||
plt.ylabel('m') | ||
plt.xlabel('m') | ||
plt.title('Hessian Sparsity: '+func) | ||
plt.show() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
""" Randomly Generated Sparse Quadratics """ | ||
from __future__ import absolute_import, division, unicode_literals, print_function | ||
import scipy.sparse as sparse | ||
import numpy as np | ||
|
||
def rsparse_quad(m,n,seed,type='convex'): | ||
|
||
# Fix RNG seed | ||
np.random.seed(seed) | ||
|
||
# Generate random quadratic Hessians | ||
Qs = [] | ||
for i in range(m): | ||
A = sparse.random(n,n,density=0.001) | ||
if type == 'convex': | ||
Qs.append((A.T.dot(A)).toarray()) | ||
elif type == 'nonconvex': | ||
U = sparse.triu(A) | ||
Qs.append((U.T+U).toarray()) | ||
else: | ||
raise RuntimeError('Incorrect type '+type) | ||
|
||
# Residual | ||
def r(x): | ||
res = np.zeros(m) | ||
for i in range(m): | ||
res[i] = 0.5*x.dot(Qs[i].dot(x)) | ||
return res | ||
|
||
# Jacobian | ||
def J(x): | ||
jac = np.zeros((m,n)) | ||
for i in range(m): | ||
jac[i,:] = Qs[i].dot(x) | ||
return jac | ||
|
||
# Initial guess | ||
x0 = np.ones(n) | ||
|
||
return r, J, x0 | ||
|
||
|
||
# Plot Hessian sparsity structure | ||
def main(): | ||
import matplotlib.pyplot as plt | ||
|
||
funcs = ['Random Sparse Quadratic ' + str(i) for i in range(1, 16)] | ||
for ifunc, func in enumerate(funcs): | ||
|
||
r, J, x0 = rsparse_quad(20,100,ifunc,type='nonconvex') | ||
|
||
# Plot sparsity | ||
plt.figure(ifunc) | ||
plt.spy(np.dot(J(x0).T,J(x0))) | ||
plt.ylabel('m') | ||
plt.xlabel('m') | ||
plt.title('Hessian Sparsity: '+func) | ||
plt.show() | ||
|
||
if __name__ == "__main__": | ||
main() |