Skip to content

Commit

Permalink
Add Gaussian subspace GN code
Browse files Browse the repository at this point in the history
  • Loading branch information
jfowkes committed May 28, 2019
1 parent 5983413 commit 3ed130e
Show file tree
Hide file tree
Showing 3 changed files with 331 additions and 0 deletions.
221 changes: 221 additions & 0 deletions GRBCGN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
""" Gaussian Random Block-Coordinate Gauss-Newton """
from __future__ import absolute_import, division, unicode_literals, print_function
from trs.trs_exact import trs
import numpy as np
import scipy.linalg as linalg
import math as ma
import warnings

def GRBCGN(r, J, x0, sampling_func, fxopt, it_max, ftol, p, fig, kappa, algorithm='tr'):
n = x0.size

# Adaptive BCGN step size
STEP = 5

# Full function and gradient
def f(z): return 0.5 * np.dot(r(z), r(z))
def gradf(z): return J(z).T.dot(r(z))

# Plotting
if fig is not None:
plot_data = np.full((3,it_max+1),np.nan)
plot_data[0,0] = f(x0)-fxopt
plot_data[1,0] = linalg.norm(gradf(x0))

# Metrics
budget = 0
tau_budget = np.full(4,np.nan)

k = 0
x = x0
delta = None
while (not fig and budget < it_max*n) or (fig and k < it_max and ma.fabs(f(x) - fxopt) > ftol):

# Randomly select subspace
if kappa == 1 and p == n: # GN
Q = np.eye(n)
else: # random projection
Q = gram_schmidt_randn(n,p)

# Assemble block-reduced matrices
J_S = J(x).dot(Q)
rx = r(x)
gradf_S = J_S.T.dot(rx)

# Set initial trust region radius
if k == 0 and algorithm.startswith('tr') or algorithm == 'reg':
delta = linalg.norm(gradf_S)/10
if delta == 0:
delta = 1

# Debug output
#monitor(k, r, x, f, delta, algorithm, gradf, gradf_S)

# Solve subproblem
if algorithm == 'tr':
s_S = trs(J_S, gradf_S, delta)
else:
raise RuntimeError(algorithm + 'unimplemented!')

# Loop tolerance
Js_S = J_S.dot(s_S)
Delta_m = -np.dot(gradf_S,s_S) -0.5*np.dot(Js_S,Js_S)
stopping_rule = -Delta_m + (1-kappa)/2*np.power(np.linalg.norm(rx),2) > 0
#stopping_rule = -Delta_m + kappa*delta*delta > 0
#stopping_rule = linalg.norm(gradf_S) > kappa*delta

# Iteratively refine block size
p_in = p
while kappa != 1 and p_in != n and stopping_rule:

# Increase block size
step = min(STEP,n-p_in)
# print 'Increasing block size to:', p_in+step

# Grow subspace
Q = gram_schmidt_grow(Q,step)

# Assemble block-reduced matrices
J_S = J(x).dot(Q)
gradf_S = J_S.T.dot(rx)

p_in += step

# Debug output
#monitor(k, r, x, f, delta, algorithm, gradf, gradf_S)

# Solve subproblem
if algorithm == 'tr':
s_S = trs(J_S, gradf_S, delta)
else:
raise RuntimeError(algorithm + 'unimplemented!')

# Loop tolerance
Js_S = J_S.dot(s_S)
Delta_m = -np.dot(gradf_S,s_S) -0.5*np.dot(Js_S,Js_S)
stopping_rule = -Delta_m + (1-kappa)/2*np.power(np.linalg.norm(rx),2) > 0
#stopping_rule = linalg.norm(gradf_S) > kappa*delta
#Jx_S = J_S.dot(x.dot(U_S))
#stopping_rule = -Delta_m + np.dot(Js_S,Jx_S) + (sigma/2)*np.power(linalg.norm(s_S),2) > 0

budget += p_in
#print 'Iteration:', k, 'max block size:', p_in

# Update parameter and take step
#Delta_m = -np.dot(gradf_S,s_S) - 0.5*np.dot(Js_S,Js_S)
if algorithm.startswith('tr'):
x, delta = tr_update(f, x, s_S, Q, Delta_m, delta)
else:
raise RuntimeError(algorithm + 'unimplemented!')
k += 1

# function decrease metrics
if fig is None:
for itau, tau in enumerate([1e-1,1e-3,1e-5,1e-7]):
if np.isnan(tau_budget[itau]) and np.linalg.norm(gradf(x)) <= tau*np.linalg.norm(gradf(x0)):
tau_budget[itau] = budget
if np.all(np.isfinite(tau_budget)): # Stop if all function decrease metrics satisfied
return tau_budget
else: # plotting
plot_data[0,k] = f(x)-fxopt
plot_data[1,k] = linalg.norm(gradf(x))
plot_data[2,k] = p_in

# Debug output
#monitor(k, r, x, f, delta, algorithm, gradf)

# Return function decrease metrics (some unsatisfied)
if fig is None:
return tau_budget
else: # plotting
return plot_data

""" Generate random partial orthonormal basis """
def gram_schmidt_randn(n,k):
U = np.zeros((n,k))
v = np.random.randn(n)
U[:,0] = v/linalg.norm(v)
for i in range(1,k):
U[:,i] = np.random.randn(n)
for j in range(i):
U[:,i] = U[:,i] - np.dot(U[:,i],U[:,j]) / np.dot(U[:,j],U[:,j]) * U[:,j]
U[:,i] = U[:,i] / np.linalg.norm(U[:,i])
return U

""" Grow partial orthonormal basis """
def gram_schmidt_grow(U,l):
n,k = U.shape
U = np.hstack((U,np.zeros((n,l))))
for i in range(k,k+l):
U[:,i] = np.random.randn(n)
for j in range(i):
U[:,i] = U[:,i] - (U[:,i].T.dot(U[:,j])) / (U[:,j].T.dot(U[:,j])) * U[:,j]
U[:,i] = U[:,i] / np.linalg.norm(U[:,i])
return U

""" Trust Region Update """
def tr_update(f, x, s_S, Q, Delta_m, delta):

# Trust Region parameters
ETA1 = 0.1
ETA2 = 0.75
GAMMA1 = 0.5
GAMMA2 = 2.
DELTA_MIN = 1e-150
DELTA_MAX = 1e150

# Evaluate sufficient decrease
s = Q.dot(s_S)
rho = (f(x) - f(x+s))/Delta_m

# Accept trial point
if rho >= ETA1:
x = x + s

# Update trust region radius
if rho < ETA1:
delta *= GAMMA1
delta = max(delta,DELTA_MIN)
elif rho >= ETA2:
delta *= GAMMA2
delta = min(delta,DELTA_MAX)

return x, delta

""" Return roots of quadratic equation """
def quadeq(a, b, c):
warnings.simplefilter("error", RuntimeWarning)
try:
x1 = (-b + ma.sqrt(b * b - 4 * a * c)) / (2 * a)
x2 = (-b - ma.sqrt(b * b - 4 * a * c)) / (2 * a)
except RuntimeWarning: # failed step: delta too large
x1 = 0
x2 = 0
warnings.resetwarnings()
return x1, x2

""" Output Monitoring Information """
def monitor(k, r, x, f, delta, algorithm, gradf, gradf_S=None):

print('++++ Iteration', k, '++++')
if algorithm.startswith('tr'):
print('delta: %.2e' % delta)
elif algorithm == 'reg':
print('sigma: %.2e' % delta)
elif delta is not None:
print('alpha: %.2e' % delta)

nr = linalg.norm(r(x))
ng = linalg.norm(gradf(x))
nJrr = ng / nr
if gradf_S is not None:
ng_S = linalg.norm(gradf_S)
nJ_Srr = ng_S / nr

print('x:', x, 'f(x):', f(x))
print('||r(x)||: %.2e' % nr, '||gradf(x)||: %.2e' % ng,end='')
if gradf_S is not None: print('||gradf_S(x)||: %.2e' % ng_S)
print("||J'r||/||r||: %.2e" % nJrr,end='')
if gradf_S is not None: print("||J_S'r||/||r||: %.2e" % nJ_Srr)

if gradf_S is None: print()
49 changes: 49 additions & 0 deletions problems/rsparse_lin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
""" Randomly Generated Linear Least-Squares Problems """
from __future__ import absolute_import, division, unicode_literals, print_function
import scipy.sparse as sparse
import numpy as np

def rsparse_lin(m,n,seed,density=0.1):

# Fix RNG seed
np.random.seed(seed)

# Generate random linear Jacobians
A = sparse.random(m,n,density=density).toarray()

# Generate random rhs
b = np.random.random(m)

# Residual
def r(x):
return A.dot(x)-b

# Jacobian
def J(x):
return A

# Initial guess
x0 = np.ones(n)

return r, J, x0


# Plot Hessian sparsity structure
def main():
import matplotlib.pyplot as plt

funcs = ['Random Sparse Linear ' + str(i) for i in range(1, 16)]
for ifunc, func in enumerate(funcs):

r, J, x0 = rsparse_lin(20,100,ifunc)

# Plot sparsity
plt.figure(ifunc)
plt.spy(np.dot(J(x0).T,J(x0)))
plt.ylabel('m')
plt.xlabel('m')
plt.title('Hessian Sparsity: '+func)
plt.show()

if __name__ == "__main__":
main()
61 changes: 61 additions & 0 deletions problems/rsparse_quad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
""" Randomly Generated Sparse Quadratics """
from __future__ import absolute_import, division, unicode_literals, print_function
import scipy.sparse as sparse
import numpy as np

def rsparse_quad(m,n,seed,type='convex'):

# Fix RNG seed
np.random.seed(seed)

# Generate random quadratic Hessians
Qs = []
for i in range(m):
A = sparse.random(n,n,density=0.001)
if type == 'convex':
Qs.append((A.T.dot(A)).toarray())
elif type == 'nonconvex':
U = sparse.triu(A)
Qs.append((U.T+U).toarray())
else:
raise RuntimeError('Incorrect type '+type)

# Residual
def r(x):
res = np.zeros(m)
for i in range(m):
res[i] = 0.5*x.dot(Qs[i].dot(x))
return res

# Jacobian
def J(x):
jac = np.zeros((m,n))
for i in range(m):
jac[i,:] = Qs[i].dot(x)
return jac

# Initial guess
x0 = np.ones(n)

return r, J, x0


# Plot Hessian sparsity structure
def main():
import matplotlib.pyplot as plt

funcs = ['Random Sparse Quadratic ' + str(i) for i in range(1, 16)]
for ifunc, func in enumerate(funcs):

r, J, x0 = rsparse_quad(20,100,ifunc,type='nonconvex')

# Plot sparsity
plt.figure(ifunc)
plt.spy(np.dot(J(x0).T,J(x0)))
plt.ylabel('m')
plt.xlabel('m')
plt.title('Hessian Sparsity: '+func)
plt.show()

if __name__ == "__main__":
main()

0 comments on commit 3ed130e

Please sign in to comment.