From 14cf44d61b78d13911bd500a57d4d8e69e14e586 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Sat, 8 Jun 2024 19:40:20 +0900 Subject: [PATCH] Add some neuralnetwork optimizer --- js/neuralnetwork_builder.js | 16 +- js/view/ladder_network.js | 15 +- lib/model/mlp.js | 4 +- lib/model/neuralnetwork.js | 24 +-- lib/model/nns/optimizer.js | 26 ++- lib/model/nns/optimizer/adabelief.js | 44 ++++ lib/model/nns/optimizer/adabound.js | 48 +++++ lib/model/nns/optimizer/amsbound.js | 49 +++++ lib/model/nns/optimizer/amsgrad.js | 43 ++++ lib/model/nns/optimizer/santae.js | 75 +++++++ lib/model/nns/optimizer/santasss.js | 86 ++++++++ lib/model/ranknet.js | 4 +- .../lib/model/nns/optimizer/adabelief.test.js | 121 +++++++++++ .../lib/model/nns/optimizer/adabound.test.js | 129 ++++++++++++ tests/lib/model/nns/optimizer/adamax.test.js | 2 +- .../lib/model/nns/optimizer/amsbound.test.js | 136 +++++++++++++ tests/lib/model/nns/optimizer/amsgrad.test.js | 121 +++++++++++ tests/lib/model/nns/optimizer/nadam.test.js | 2 +- tests/lib/model/nns/optimizer/santae.test.js | 174 ++++++++++++++++ .../lib/model/nns/optimizer/santasss.test.js | 192 ++++++++++++++++++ 20 files changed, 1251 insertions(+), 60 deletions(-) create mode 100644 lib/model/nns/optimizer/adabelief.js create mode 100644 lib/model/nns/optimizer/adabound.js create mode 100644 lib/model/nns/optimizer/amsbound.js create mode 100644 lib/model/nns/optimizer/amsgrad.js create mode 100644 lib/model/nns/optimizer/santae.js create mode 100644 lib/model/nns/optimizer/santasss.js create mode 100644 tests/lib/model/nns/optimizer/adabelief.test.js create mode 100644 tests/lib/model/nns/optimizer/adabound.test.js create mode 100644 tests/lib/model/nns/optimizer/amsbound.test.js create mode 100644 tests/lib/model/nns/optimizer/amsgrad.test.js create mode 100644 tests/lib/model/nns/optimizer/santae.test.js create mode 100644 tests/lib/model/nns/optimizer/santasss.test.js diff --git a/js/neuralnetwork_builder.js b/js/neuralnetwork_builder.js index 01f0666d..37c2d98b 100644 --- a/js/neuralnetwork_builder.js +++ b/js/neuralnetwork_builder.js @@ -1,3 +1,5 @@ +import * as opt from '../../lib/model/nns/optimizer.js' + const layerTypes = { abs: {}, clip: { min: 0, max: 1 }, @@ -192,22 +194,12 @@ export default class NeuralNetworkBuilder { this._opt = r.append('select').attr('name', 'optimizer') this._opt .selectAll('option') - .data([ - 'sgd', - 'adam', - 'momentum', - 'adagrad', - 'rmsprop', - 'adadelta', - 'rmspropgraves', - 'smorms3', - 'adamax', - 'nadam', - ]) + .data(Object.keys(opt)) .enter() .append('option') .property('value', d => d) .text(d => d) + this._opt.property('value', 'adam') } } } diff --git a/js/view/ladder_network.js b/js/view/ladder_network.js index 1affa690..0ef4585e 100644 --- a/js/view/ladder_network.js +++ b/js/view/ladder_network.js @@ -1,5 +1,6 @@ import Controller from '../controller.js' import { BaseWorker } from '../utils.js' +import * as opt from '../../lib/model/nns/optimizer.js' class LadderNetworkWorker extends BaseWorker { constructor() { @@ -85,18 +86,8 @@ export default function (platform) { const optimizer = controller.select({ label: ' Optimizer ', - values: [ - 'sgd', - 'adam', - 'momentum', - 'adagrad', - 'rmsprop', - 'adadelta', - 'rmspropgraves', - 'smorms3', - 'adamax', - 'nadam', - ], + values: Object.keys(opt), + value: 'adam', }) const slbConf = controller.stepLoopButtons().init(done => { if (platform.datas.length === 0) { diff --git a/lib/model/mlp.js b/lib/model/mlp.js index 53954b3a..a4cdfc65 100644 --- a/lib/model/mlp.js +++ b/lib/model/mlp.js @@ -1,5 +1,5 @@ import Matrix from '../util/matrix.js' -import { AdamOptimizer } from './nns/optimizer.js' +import { adam } from './nns/optimizer.js' /** * @ignore @@ -31,7 +31,7 @@ class MLP { this._w[i] = Matrix.randn(layer_sizes[i], layer_sizes[i + 1], 0, 0.1) this._b[i] = Matrix.zeros(1, layer_sizes[i + 1]) } - this._optimizer = new AdamOptimizer() + this._optimizer = new adam() this._optimizer_mng = this._optimizer.manager() } diff --git a/lib/model/neuralnetwork.js b/lib/model/neuralnetwork.js index f64b1b5c..0b81f478 100644 --- a/lib/model/neuralnetwork.js +++ b/lib/model/neuralnetwork.js @@ -84,31 +84,15 @@ export default class NeuralNetwork { /** * @param {ComputationalGraph} graph Graph of a network - * @param {'sgd' | 'adam' | 'momentum' | 'adagrad' | 'rmsprop' | 'adadelta' | 'rmspropgraves' | 'smorms3' | 'adamax' | 'nadam'} [optimizer] Optimizer of the network + * @param {'sgd' | 'adam' | 'momentum' | 'adagrad' | 'rmsprop' | 'adadelta' | 'rmspropgraves' | 'smorms3' | 'adamax' | 'nadam' | 'santae' | 'santasss' | 'amsgrad' | 'adabound' | 'amsbound' | 'adabelief'} [optimizer] Optimizer of the network */ constructor(graph, optimizer = 'sgd') { this._graph = graph this._optimizer = optimizer - if (optimizer === 'adam') { - this._opt = new opt.AdamOptimizer() - } else if (optimizer === 'momentum') { - this._opt = new opt.MomentumOptimizer() - } else if (optimizer === 'adagrad') { - this._opt = new opt.AdaGradOptimizer() - } else if (optimizer === 'rmsprop') { - this._opt = new opt.RMSPropOptimizer() - } else if (optimizer === 'adadelta') { - this._opt = new opt.AdaDeltaOptimizer() - } else if (optimizer === 'rmspropgraves') { - this._opt = new opt.RMSPropGravesOptimizer() - } else if (optimizer === 'smorms3') { - this._opt = new opt.SMORMS3Optimizer() - } else if (optimizer === 'adamax') { - this._opt = new opt.AdaMaxOptimizer() - } else if (optimizer === 'nadam') { - this._opt = new opt.NadamOptimizer() + if (Object.keys(opt).includes(optimizer)) { + this._opt = new opt[optimizer]() } else { - this._opt = new opt.SGDOptimizer() + this._opt = new opt.sgd() } this._opt_managers = [] for (let i = 0; i < this._graph.size; i++) { diff --git a/lib/model/nns/optimizer.js b/lib/model/nns/optimizer.js index 52685a00..5fd5c7d7 100644 --- a/lib/model/nns/optimizer.js +++ b/lib/model/nns/optimizer.js @@ -1,10 +1,16 @@ -export { SGDOptimizer } from './optimizer/sgd.js' -export { MomentumOptimizer } from './optimizer/momentum.js' -export { AdaGradOptimizer } from './optimizer/adagrad.js' -export { RMSPropOptimizer } from './optimizer/rmsprop.js' -export { AdaDeltaOptimizer } from './optimizer/adadelta.js' -export { AdamOptimizer } from './optimizer/adam.js' -export { RMSPropGravesOptimizer } from './optimizer/rmspropgraves.js' -export { SMORMS3Optimizer } from './optimizer/smorms3.js' -export { AdaMaxOptimizer } from './optimizer/adamax.js' -export { NadamOptimizer } from './optimizer/nadam.js' +export { SGDOptimizer as sgd } from './optimizer/sgd.js' +export { MomentumOptimizer as momentum } from './optimizer/momentum.js' +export { AdaGradOptimizer as adagrad } from './optimizer/adagrad.js' +export { RMSPropOptimizer as rmsprop } from './optimizer/rmsprop.js' +export { AdaDeltaOptimizer as adadelta } from './optimizer/adadelta.js' +export { AdamOptimizer as adam } from './optimizer/adam.js' +export { RMSPropGravesOptimizer as rmspropgraves } from './optimizer/rmspropgraves.js' +export { SMORMS3Optimizer as smorms3 } from './optimizer/smorms3.js' +export { AdaMaxOptimizer as adamax } from './optimizer/adamax.js' +export { NadamOptimizer as nadam } from './optimizer/nadam.js' +export { SantaEOptimizer as santae } from './optimizer/santae.js' +export { SantaSSSOptimizer as santasss } from './optimizer/santasss.js' +export { AMSGradOptimizer as amsgrad } from './optimizer/amsgrad.js' +export { AdaBoundOptimizer as adabound } from './optimizer/adabound.js' +export { AMSBoundOptimizer as amsbound } from './optimizer/amsbound.js' +export { AdaBeliefOptimizer as adabelief } from './optimizer/adabelief.js' diff --git a/lib/model/nns/optimizer/adabelief.js b/lib/model/nns/optimizer/adabelief.js new file mode 100644 index 00000000..08b98fbc --- /dev/null +++ b/lib/model/nns/optimizer/adabelief.js @@ -0,0 +1,44 @@ +import Matrix from '../../../util/matrix.js' + +export class AdaBeliefOptimizer { + constructor(lr = 0.001, beta1 = 0.9, beta2 = 0.999) { + this._learningrate = lr + this._beta1 = beta1 + this._beta2 = beta2 + } + + set learningRate(value) { + this._learningrate = value + } + + manager() { + const this_ = this + return { + get lr() { + return this_._learningrate + }, + params: {}, + delta(key, value) { + const valueIsNumber = typeof value === 'number' + if (valueIsNumber) { + value = new Matrix(1, 1, value) + } + if (!this.params[key]) { + const z = value.copy() + z.fill(0) + this.params[key] = { m: z.copy(), v: z, t: 1 } + } + this.params[key].m.broadcastOperate(value, (a, b) => a * this_._beta1 + b * (1 - this_._beta1)) + const mo = this.params[key].m.copy() + mo.broadcastOperate(value, (a, b) => b - a) + this.params[key].v.broadcastOperate(mo, (a, b) => a * this_._beta2 + (1 - this_._beta2) * b * b) + const nv = 1 - this_._beta1 ** this.params[key].t + const ns = 1 - this_._beta2 ** this.params[key].t + const ret = this.params[key].m.copy() + ret.broadcastOperate(this.params[key].v, (a, b) => (a / nv) * (this.lr / Math.sqrt(b / ns + 1.0e-12))) + this.params[key].t++ + return valueIsNumber ? ret.toScaler() : ret + }, + } + } +} diff --git a/lib/model/nns/optimizer/adabound.js b/lib/model/nns/optimizer/adabound.js new file mode 100644 index 00000000..a2c8e94c --- /dev/null +++ b/lib/model/nns/optimizer/adabound.js @@ -0,0 +1,48 @@ +import Matrix from '../../../util/matrix.js' + +export class AdaBoundOptimizer { + constructor(lr = 0.001, alpha = 0.003, beta1 = 0.9, beta2 = 0.999) { + this._learningrate = lr + this._alpha = alpha + this._beta1 = beta1 + this._beta2 = beta2 + + this._eta_lbound = t => this._learningrate * (1 - 1 / ((1 - beta2) * t + 1)) + this._eta_ubound = t => this._learningrate * (1 + 1 / ((1 - beta2) * t + 1)) + } + + set learningRate(value) { + this._learningrate = value + } + + manager() { + const this_ = this + return { + get lr() { + return this_._learningrate + }, + params: {}, + delta(key, value) { + const valueIsNumber = typeof value === 'number' + if (valueIsNumber) { + value = new Matrix(1, 1, value) + } + if (!this.params[key]) { + const z = value.copy() + z.fill(0) + this.params[key] = { m: z.copy(), v: z, t: 1 } + } + this.params[key].m.broadcastOperate(value, (a, b) => a * this_._beta1 + b * (1 - this_._beta1)) + this.params[key].v.broadcastOperate(value, (a, b) => a * this_._beta2 + (1 - this_._beta2) * b * b) + const eta_lb = this_._eta_lbound(this.params[key].t) + const eta_ub = this_._eta_ubound(this.params[key].t) + const eta = this.params[key].v.copy() + eta.map(v => Math.min(eta_ub, Math.max(eta_lb, this_._alpha / Math.sqrt(v)))) + const ret = this.params[key].m.copy() + ret.broadcastOperate(eta, (a, b) => (a * b) / Math.sqrt(this.params[key].t)) + this.params[key].t++ + return valueIsNumber ? ret.toScaler() : ret + }, + } + } +} diff --git a/lib/model/nns/optimizer/amsbound.js b/lib/model/nns/optimizer/amsbound.js new file mode 100644 index 00000000..20725b96 --- /dev/null +++ b/lib/model/nns/optimizer/amsbound.js @@ -0,0 +1,49 @@ +import Matrix from '../../../util/matrix.js' + +export class AMSBoundOptimizer { + constructor(lr = 0.001, alpha = 0.003, beta1 = 0.9, beta2 = 0.999) { + this._learningrate = lr + this._alpha = alpha + this._beta1 = beta1 + this._beta2 = beta2 + + this._eta_lbound = t => this._learningrate * (1 - 1 / ((1 - beta2) * t + 1)) + this._eta_ubound = t => this._learningrate * (1 + 1 / ((1 - beta2) * t + 1)) + } + + set learningRate(value) { + this._learningrate = value + } + + manager() { + const this_ = this + return { + get lr() { + return this_._learningrate + }, + params: {}, + delta(key, value) { + const valueIsNumber = typeof value === 'number' + if (valueIsNumber) { + value = new Matrix(1, 1, value) + } + if (!this.params[key]) { + const z = value.copy() + z.fill(0) + this.params[key] = { m: z.copy(), v: z.copy(), vh: z, t: 1 } + } + this.params[key].m.broadcastOperate(value, (a, b) => a * this_._beta1 + b * (1 - this_._beta1)) + this.params[key].v.broadcastOperate(value, (a, b) => a * this_._beta2 + (1 - this_._beta2) * b * b) + this.params[key].vh.broadcastOperate(this.params[key].v, (a, b) => Math.max(a, b)) + const eta_lb = this_._eta_lbound(this.params[key].t) + const eta_ub = this_._eta_ubound(this.params[key].t) + const eta = this.params[key].vh.copy() + eta.map(v => Math.min(eta_ub, Math.max(eta_lb, this_._alpha / Math.sqrt(v)))) + const ret = this.params[key].m.copy() + ret.broadcastOperate(eta, (a, b) => (a * b) / Math.sqrt(this.params[key].t)) + this.params[key].t++ + return valueIsNumber ? ret.toScaler() : ret + }, + } + } +} diff --git a/lib/model/nns/optimizer/amsgrad.js b/lib/model/nns/optimizer/amsgrad.js new file mode 100644 index 00000000..7822820c --- /dev/null +++ b/lib/model/nns/optimizer/amsgrad.js @@ -0,0 +1,43 @@ +import Matrix from '../../../util/matrix.js' + +export class AMSGradOptimizer { + constructor(lr = 0.001, beta1 = 0.9, beta2 = 0.999) { + this._learningrate = lr + this._beta1 = beta1 + this._beta2 = beta2 + this._a = t => this._learningrate / Math.sqrt(t) + } + + set learningRate(value) { + this._learningrate = value + } + + manager() { + const this_ = this + return { + get lr() { + return this_._learningrate + }, + params: {}, + delta(key, value) { + const valueIsNumber = typeof value === 'number' + if (valueIsNumber) { + value = new Matrix(1, 1, value) + } + if (!this.params[key]) { + const z = value.copy() + z.fill(0) + this.params[key] = { m: z.copy(), v: z.copy(), vh: z, t: 1 } + } + this.params[key].m.broadcastOperate(value, (a, b) => a * this_._beta1 + b * (1 - this_._beta1)) + this.params[key].v.broadcastOperate(value, (a, b) => a * this_._beta2 + b ** 2 * (1 - this_._beta2)) + this.params[key].vh.broadcastOperate(this.params[key].v, (a, b) => Math.max(a, b)) + const ret = this.params[key].m.copy() + const lr = this_._a(this.params[key].t) + ret.broadcastOperate(this.params[key].vh, (a, b) => (lr * a) / Math.sqrt(b + 1.0e-12)) + this.params[key].t++ + return valueIsNumber ? ret.toScaler() : ret + }, + } + } +} diff --git a/lib/model/nns/optimizer/santae.js b/lib/model/nns/optimizer/santae.js new file mode 100644 index 00000000..3f246f9e --- /dev/null +++ b/lib/model/nns/optimizer/santae.js @@ -0,0 +1,75 @@ +import Matrix from '../../../util/matrix.js' + +export class SantaEOptimizer { + constructor(lr = 0.01, sigma = 0.95, burnin = 100, c = 5, n = 16, lambda = 0.01) { + this._learningrate = lr + this._sigma = sigma + this._beta = t => t ** 0.5 + this._burnin = burnin + this._c = c + this._n = n + this._lambda = lambda + this._z = () => { + const x = Math.random() + const y = Math.random() + return Math.sqrt(-2 * Math.log(x)) * Math.cos(2 * Math.PI * y) + } + } + + set learningRate(value) { + this._learningrate = value + } + + manager() { + const this_ = this + return { + get lr() { + return this_._learningrate + }, + params: {}, + delta(key, value) { + const valueIsNumber = typeof value === 'number' + if (valueIsNumber) { + value = new Matrix(1, 1, value) + } + if (!this.params[key]) { + const z = value.copy() + z.fill(0) + const a = value.copy() + a.fill(Math.sqrt(this.lr) * this_._c) + const u = value.copy() + u.map(() => Math.sqrt(this.lr) * this_._z()) + this.params[key] = { v: z.copy(), g: z, a, u, t: 1 } + } + this.params[key].v.broadcastOperate( + value, + (a, b) => a * this_._sigma + (b ** 2 * (1 - this_._sigma)) / this_._n ** 2 + ) + const gp = this.params[key].g.copy() + const up = this.params[key].u.copy() + this.params[key].g = this.params[key].v.copy() + this.params[key].g.map(v => 1 / Math.sqrt(this_._lambda + Math.sqrt(v))) + if (this.params[key].t < this_._burnin) { + const beta = this_._beta(this.params[key].t) + const lrbeta = this.lr / beta + this.params[key].a.broadcastOperate(this.params[key].u, (a, b) => a + (b ** 2 - lrbeta)) + const gg = gp.copy() + gg.broadcastOperate(this.params[key].g, (a, b) => a / b) + this.params[key].u.broadcastOperate(gg, (a, b) => (lrbeta * (1 - b)) / a) + this.params[key].u.broadcastOperate(gp, (a, b) => a + Math.sqrt(2 * lrbeta * b) * this_._z()) + } else { + this.params[key].u.fill(0) + } + up.broadcastOperate(this.params[key].a, (a, b) => a * (1 - b)) + this.params[key].u.broadcastOperate(up, (a, b) => a + b) + const gv = this.params[key].g.copy() + gv.broadcastOperate(value, (a, b) => a * b) + this.params[key].u.broadcastOperate(gv, (a, b) => a - this.lr * b) + const ret = this.params[key].u.copy() + ret.broadcastOperate(this.params[key].g, (a, b) => -a * b) + this.params[key].t++ + return valueIsNumber ? ret.toScaler() : ret + }, + } + } +} diff --git a/lib/model/nns/optimizer/santasss.js b/lib/model/nns/optimizer/santasss.js new file mode 100644 index 00000000..1d04a340 --- /dev/null +++ b/lib/model/nns/optimizer/santasss.js @@ -0,0 +1,86 @@ +import Matrix from '../../../util/matrix.js' + +export class SantaSSSOptimizer { + constructor(lr = 0.01, sigma = 0.95, burnin = 100, c = 5, n = 16, lambda = 0.01) { + this._learningrate = lr + this._sigma = sigma + this._beta = t => t ** 0.5 + this._burnin = burnin + this._c = c + this._n = n + this._lambda = lambda + this._z = () => { + const x = Math.random() + const y = Math.random() + return Math.sqrt(-2 * Math.log(x)) * Math.cos(2 * Math.PI * y) + } + } + + set learningRate(value) { + this._learningrate = value + } + + manager() { + const this_ = this + return { + get lr() { + return this_._learningrate + }, + params: {}, + delta(key, value) { + const valueIsNumber = typeof value === 'number' + if (valueIsNumber) { + value = new Matrix(1, 1, value) + } + if (!this.params[key]) { + const z = value.copy() + z.fill(0) + const a = value.copy() + a.fill(Math.sqrt(this.lr) * this_._c) + const u = value.copy() + u.map(() => Math.sqrt(this.lr) * this_._z()) + this.params[key] = { v: z.copy(), g: z, a, u, t: 1 } + } + this.params[key].v.broadcastOperate( + value, + (a, b) => a * this_._sigma + (b ** 2 * (1 - this_._sigma)) / this_._n ** 2 + ) + const gp = this.params[key].g.copy() + const up = this.params[key].u.copy() + this.params[key].g = this.params[key].v.copy() + this.params[key].g.map(v => 1 / Math.sqrt(this_._lambda + Math.sqrt(v))) + if (this.params[key].t < this_._burnin) { + const beta = this_._beta(this.params[key].t) + const lrbeta = this.lr / beta + this.params[key].a.broadcastOperate(this.params[key].u, (a, b) => a + (b ** 2 - lrbeta) / 2) + this.params[key].u.broadcastOperate(this.params[key].a, (a, b) => Math.exp(-b / 2) * a) + const gg = gp.copy() + gg.broadcastOperate(this.params[key].g, (a, b) => a / b) + gg.broadcastOperate(up, (a, b) => (lrbeta * (1 - a)) / b) + this.params[key].u.broadcastOperate(gg, (a, b) => a + b) + this.params[key].u.broadcastOperate(gp, (a, b) => a + Math.sqrt(2 * lrbeta * b) * this_._z()) + const gv = this.params[key].g.copy() + gv.broadcastOperate(value, (a, b) => a * b) + this.params[key].u.broadcastOperate(gv, (a, b) => a - this.lr * b) + this.params[key].u.broadcastOperate(this.params[key].a, (a, b) => Math.exp(-b / 2) * a) + this.params[key].a.broadcastOperate(this.params[key].u, (a, b) => a + (b ** 2 - lrbeta) / 2) + } else { + this.params[key].u.broadcastOperate(this.params[key].a, (a, b) => Math.exp(-b / 2) * a) + const gv = this.params[key].g.copy() + gv.broadcastOperate(value, (a, b) => a * b) + this.params[key].u.broadcastOperate(gv, (a, b) => a - this.lr * b) + this.params[key].u.broadcastOperate(this.params[key].a, (a, b) => Math.exp(-b / 2) * a) + } + const gup = this.params[key].g.copy() + gup.broadcastOperate(up, (a, b) => (a * b) / 2) + const gu = this.params[key].g.copy() + gu.broadcastOperate(this.params[key].u, (a, b) => (a * b) / 2) + + const ret = gup.copy() + ret.broadcastOperate(gu, (a, b) => -a - b) + this.params[key].t++ + return valueIsNumber ? ret.toScaler() : ret + }, + } + } +} diff --git a/lib/model/ranknet.js b/lib/model/ranknet.js index ce40df13..daa25e27 100644 --- a/lib/model/ranknet.js +++ b/lib/model/ranknet.js @@ -1,5 +1,5 @@ import Matrix from '../util/matrix.js' -import { AdamOptimizer } from './nns/optimizer.js' +import { adam } from './nns/optimizer.js' const ActivationFunctions = { identity: { @@ -40,7 +40,7 @@ export default class RankNet { this._w = [] this._b = [] - this._optimizer = new AdamOptimizer(rate).manager() + this._optimizer = new adam(rate).manager() } _init(sizes) { diff --git a/tests/lib/model/nns/optimizer/adabelief.test.js b/tests/lib/model/nns/optimizer/adabelief.test.js new file mode 100644 index 00000000..36419db1 --- /dev/null +++ b/tests/lib/model/nns/optimizer/adabelief.test.js @@ -0,0 +1,121 @@ +import { jest } from '@jest/globals' +jest.retryTimes(3) + +import NeuralNetwork from '../../../../../lib/model/neuralnetwork.js' +import Matrix from '../../../../../lib/util/matrix.js' + +import { AdaBeliefOptimizer } from '../../../../../lib/model/nns/optimizer/adabelief.js' +import Tensor from '../../../../../lib/util/tensor.js' + +describe('adabelief', () => { + test('lr', () => { + const opt = new AdaBeliefOptimizer(0.1) + const manager = opt.manager() + expect(manager.lr).toBe(0.1) + }) + + describe('delta', () => { + test('scalar', () => { + const opt = new AdaBeliefOptimizer(0.1) + const manager = opt.manager() + const beta1 = 0.9 + const beta2 = 0.999 + + let r = 0 + let s = 0 + for (let i = 0; i < 10; i++) { + const v = Math.random() + const d = manager.delta('w', v) + expect(typeof d).toBe('number') + r = r * beta1 + v * (1 - beta1) + s = s * beta2 + (r - v) ** 2 * (1 - beta2) + expect(d).toBeCloseTo((0.1 * (r / (1 - beta1 ** (i + 1)))) / Math.sqrt(s / (1 - beta2 ** (i + 1)))) + } + }) + + test('matrix', () => { + const opt = new AdaBeliefOptimizer(0.1) + const manager = opt.manager() + const beta1 = 0.9 + const beta2 = 0.999 + + const r = Matrix.zeros(10, 3) + const s = Matrix.zeros(10, 3) + for (let t = 0; t < 10; t++) { + const mat = Matrix.randn(10, 3) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([10, 3]) + r.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + s.map((v, i) => v * beta2 + (r.at(i) - mat.at(i)) ** 2 * (1 - beta2)) + for (let i = 0; i < mat.rows; i++) { + for (let j = 0; j < mat.cols; j++) { + expect(d.at(i, j)).toBeCloseTo( + (0.1 * (r.at(i, j) / (1 - beta1 ** (t + 1)))) / + Math.sqrt(s.at(i, j) / (1 - beta2 ** (t + 1))) + ) + } + } + } + }) + + test('tensor', () => { + const opt = new AdaBeliefOptimizer(0.1) + const manager = opt.manager() + const beta1 = 0.9 + const beta2 = 0.999 + + const r = Tensor.zeros([7, 5, 3]) + const s = Tensor.zeros([7, 5, 3]) + for (let t = 0; t < 10; t++) { + const mat = Tensor.randn([7, 5, 3]) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([7, 5, 3]) + r.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + const mo = r.copy() + mo.broadcastOperate(mat, (a, b) => a - b) + s.broadcastOperate(mo, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + for (let i = 0; i < mat.sizes[0]; i++) { + for (let j = 0; j < mat.sizes[1]; j++) { + for (let k = 0; k < mat.sizes[2]; k++) { + expect(d.at(i, j, k)).toBeCloseTo( + (0.1 * (r.at(i, j, k) / (1 - beta1 ** (t + 1)))) / + Math.sqrt(s.at(i, j, k) / (1 - beta2 ** (t + 1))) + ) + } + } + } + } + }) + }) +}) + +test('nn', () => { + const net = NeuralNetwork.fromObject( + [ + { type: 'input', name: 'in' }, + { type: 'full', out_size: 5, activation: 'sigmoid' }, + { type: 'full', out_size: 3 }, + ], + 'mse', + 'adabelief' + ) + const x = Matrix.randn(1, 10) + const t = Matrix.randn(1, 3) + + const losslog = [] + for (let i = 0; i < 1000; i++) { + const loss = net.fit(x, t, 1000, 0.01) + losslog.push(loss[0]) + if (loss[0] < 1.0e-8) { + break + } + if (losslog.length > 10 && (losslog.at(-10) - loss[0]) / loss[0] < 1.0e-5) { + throw new Error('Test failed.') + } + } + + const y = net.calc(x) + for (let i = 0; i < 3; i++) { + expect(y.at(0, i)).toBeCloseTo(t.at(0, i)) + } +}) diff --git a/tests/lib/model/nns/optimizer/adabound.test.js b/tests/lib/model/nns/optimizer/adabound.test.js new file mode 100644 index 00000000..8cd7bbcf --- /dev/null +++ b/tests/lib/model/nns/optimizer/adabound.test.js @@ -0,0 +1,129 @@ +import { jest } from '@jest/globals' +jest.retryTimes(3) + +import NeuralNetwork from '../../../../../lib/model/neuralnetwork.js' +import Matrix from '../../../../../lib/util/matrix.js' + +import { AdaBoundOptimizer } from '../../../../../lib/model/nns/optimizer/adabound.js' +import Tensor from '../../../../../lib/util/tensor.js' + +describe('adabound', () => { + test('lr', () => { + const opt = new AdaBoundOptimizer(0.1) + const manager = opt.manager() + expect(manager.lr).toBe(0.1) + }) + + describe('delta', () => { + test('scalar', () => { + const opt = new AdaBoundOptimizer(0.1) + const manager = opt.manager() + const alpha = 0.003 + const beta1 = 0.9 + const beta2 = 0.999 + + let m = 0 + let v = 0 + for (let i = 0; i < 10; i++) { + const x = Math.random() + const d = manager.delta('w', x) + expect(typeof d).toBe('number') + m = m * beta1 + x * (1 - beta1) + v = v * beta2 + x ** 2 * (1 - beta2) + const eta_lb = 0.1 * (1 - 1 / ((1 - beta2) * (i + 1) + 1)) + const eta_ub = 0.1 * (1 + 1 / ((1 - beta2) * (i + 1) + 1)) + const eta = Math.max(Math.min(alpha / Math.sqrt(v), eta_ub), eta_lb) + expect(d).toBeCloseTo((m * eta) / Math.sqrt(i + 1)) + } + }) + + test('matrix', () => { + const opt = new AdaBoundOptimizer(0.1) + const manager = opt.manager() + const alpha = 0.003 + const beta1 = 0.9 + const beta2 = 0.999 + + const m = Matrix.zeros(10, 3) + const v = Matrix.zeros(10, 3) + for (let t = 0; t < 10; t++) { + const mat = Matrix.randn(10, 3) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([10, 3]) + m.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + v.broadcastOperate(mat, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + const eta_lb = 0.1 * (1 - 1 / ((1 - beta2) * (t + 1) + 1)) + const eta_ub = 0.1 * (1 + 1 / ((1 - beta2) * (t + 1) + 1)) + for (let i = 0; i < mat.rows; i++) { + for (let j = 0; j < mat.cols; j++) { + expect(d.at(i, j)).toBeCloseTo( + (Math.max(Math.min(alpha / Math.sqrt(v.at(i, j)), eta_ub), eta_lb) * m.at(i, j)) / + Math.sqrt(t + 1) + ) + } + } + } + }) + + test('tensor', () => { + const opt = new AdaBoundOptimizer(0.1) + const manager = opt.manager() + const alpha = 0.003 + const beta1 = 0.9 + const beta2 = 0.999 + + const m = Tensor.zeros([7, 5, 3]) + const v = Tensor.zeros([7, 5, 3]) + for (let t = 0; t < 10; t++) { + const mat = Tensor.randn([7, 5, 3]) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([7, 5, 3]) + m.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + v.broadcastOperate(mat, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + const eta_lb = 0.1 * (1 - 1 / ((1 - beta2) * (t + 1) + 1)) + const eta_ub = 0.1 * (1 + 1 / ((1 - beta2) * (t + 1) + 1)) + for (let i = 0; i < mat.sizes[0]; i++) { + for (let j = 0; j < mat.sizes[1]; j++) { + for (let k = 0; k < mat.sizes[2]; k++) { + expect(d.at(i, j, k)).toBeCloseTo( + (Math.max(Math.min(alpha / Math.sqrt(v.at(i, j, k)), eta_ub), eta_lb) * m.at(i, j, k)) / + Math.sqrt(t + 1) + ) + } + } + } + } + }) + }) +}) + +test('nn', () => { + const net = NeuralNetwork.fromObject( + [ + { type: 'input', name: 'in' }, + { type: 'full', out_size: 5, activation: 'sigmoid' }, + { type: 'full', out_size: 3 }, + ], + 'mse', + 'adabound' + ) + const x = Matrix.randn(1, 10) + const t = Matrix.randn(1, 3) + + const losslog = [] + for (let i = 0; i < 100; i++) { + const loss = net.fit(x, t, 1000, 0.1) + losslog.push(loss[0]) + if (loss[0] < 1.0e-5) { + break + } + if (losslog.length > 10 && (losslog.at(-10) - loss[0]) / loss[0] < 1.0e-5) { + throw new Error('Test failed.') + } + } + + const y = net.calc(x) + for (let i = 0; i < 3; i++) { + expect(y.at(0, i)).toBeCloseTo(t.at(0, i)) + } +}) diff --git a/tests/lib/model/nns/optimizer/adamax.test.js b/tests/lib/model/nns/optimizer/adamax.test.js index d8af6eb9..285ebd72 100644 --- a/tests/lib/model/nns/optimizer/adamax.test.js +++ b/tests/lib/model/nns/optimizer/adamax.test.js @@ -7,7 +7,7 @@ import Matrix from '../../../../../lib/util/matrix.js' import { AdaMaxOptimizer } from '../../../../../lib/model/nns/optimizer/adamax.js' import Tensor from '../../../../../lib/util/tensor.js' -describe('adam', () => { +describe('adamax', () => { test('lr', () => { const opt = new AdaMaxOptimizer(0.1) const manager = opt.manager() diff --git a/tests/lib/model/nns/optimizer/amsbound.test.js b/tests/lib/model/nns/optimizer/amsbound.test.js new file mode 100644 index 00000000..885e5c57 --- /dev/null +++ b/tests/lib/model/nns/optimizer/amsbound.test.js @@ -0,0 +1,136 @@ +import { jest } from '@jest/globals' +jest.retryTimes(3) + +import NeuralNetwork from '../../../../../lib/model/neuralnetwork.js' +import Matrix from '../../../../../lib/util/matrix.js' + +import { AMSBoundOptimizer } from '../../../../../lib/model/nns/optimizer/amsbound.js' +import Tensor from '../../../../../lib/util/tensor.js' + +describe('amsbound', () => { + test('lr', () => { + const opt = new AMSBoundOptimizer(0.1) + const manager = opt.manager() + expect(manager.lr).toBe(0.1) + }) + + describe('delta', () => { + test('scalar', () => { + const opt = new AMSBoundOptimizer(0.1) + const manager = opt.manager() + const alpha = 0.003 + const beta1 = 0.9 + const beta2 = 0.999 + + let m = 0 + let v = 0 + let vh = 0 + for (let i = 0; i < 10; i++) { + const x = Math.random() + const d = manager.delta('w', x) + expect(typeof d).toBe('number') + m = m * beta1 + x * (1 - beta1) + v = v * beta2 + x ** 2 * (1 - beta2) + vh = Math.max(vh, v) + const eta_lb = 0.1 * (1 - 1 / ((1 - beta2) * (i + 1) + 1)) + const eta_ub = 0.1 * (1 + 1 / ((1 - beta2) * (i + 1) + 1)) + const eta = Math.max(Math.min(alpha / Math.sqrt(vh), eta_ub), eta_lb) + expect(d).toBeCloseTo((m * eta) / Math.sqrt(i + 1)) + } + }) + + test('matrix', () => { + const opt = new AMSBoundOptimizer(0.1) + const manager = opt.manager() + const alpha = 0.003 + const beta1 = 0.9 + const beta2 = 0.999 + + const m = Matrix.zeros(10, 3) + const v = Matrix.zeros(10, 3) + const vh = Matrix.zeros(10, 3) + for (let t = 0; t < 10; t++) { + const mat = Matrix.randn(10, 3) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([10, 3]) + m.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + v.broadcastOperate(mat, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + vh.broadcastOperate(v, (a, b) => Math.max(a, b)) + const eta_lb = 0.1 * (1 - 1 / ((1 - beta2) * (t + 1) + 1)) + const eta_ub = 0.1 * (1 + 1 / ((1 - beta2) * (t + 1) + 1)) + for (let i = 0; i < mat.rows; i++) { + for (let j = 0; j < mat.cols; j++) { + expect(d.at(i, j)).toBeCloseTo( + (Math.max(Math.min(alpha / Math.sqrt(vh.at(i, j)), eta_ub), eta_lb) * m.at(i, j)) / + Math.sqrt(t + 1) + ) + } + } + } + }) + + test('tensor', () => { + const opt = new AMSBoundOptimizer(0.1) + const manager = opt.manager() + const alpha = 0.003 + const beta1 = 0.9 + const beta2 = 0.999 + + const m = Tensor.zeros([7, 5, 3]) + const v = Tensor.zeros([7, 5, 3]) + const vh = Tensor.zeros([7, 5, 3]) + for (let t = 0; t < 10; t++) { + const mat = Tensor.randn([7, 5, 3]) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([7, 5, 3]) + m.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + v.broadcastOperate(mat, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + vh.broadcastOperate(v, (a, b) => Math.max(a, b)) + const eta_lb = 0.1 * (1 - 1 / ((1 - beta2) * (t + 1) + 1)) + const eta_ub = 0.1 * (1 + 1 / ((1 - beta2) * (t + 1) + 1)) + for (let i = 0; i < mat.sizes[0]; i++) { + for (let j = 0; j < mat.sizes[1]; j++) { + for (let k = 0; k < mat.sizes[2]; k++) { + expect(d.at(i, j, k)).toBeCloseTo( + (Math.max(Math.min(alpha / Math.sqrt(vh.at(i, j, k)), eta_ub), eta_lb) * + m.at(i, j, k)) / + Math.sqrt(t + 1) + ) + } + } + } + } + }) + }) +}) + +test('nn', () => { + const net = NeuralNetwork.fromObject( + [ + { type: 'input', name: 'in' }, + { type: 'full', out_size: 5, activation: 'sigmoid' }, + { type: 'full', out_size: 3 }, + ], + 'mse', + 'amsbound' + ) + const x = Matrix.randn(1, 10) + const t = Matrix.randn(1, 3) + + const losslog = [] + for (let i = 0; i < 100; i++) { + const loss = net.fit(x, t, 1000, 0.1) + losslog.push(loss[0]) + if (loss[0] < 1.0e-5) { + break + } + if (losslog.length > 10 && (losslog.at(-10) - loss[0]) / loss[0] < 1.0e-5) { + throw new Error('Test failed.') + } + } + + const y = net.calc(x) + for (let i = 0; i < 3; i++) { + expect(y.at(0, i)).toBeCloseTo(t.at(0, i)) + } +}) diff --git a/tests/lib/model/nns/optimizer/amsgrad.test.js b/tests/lib/model/nns/optimizer/amsgrad.test.js new file mode 100644 index 00000000..55d5657c --- /dev/null +++ b/tests/lib/model/nns/optimizer/amsgrad.test.js @@ -0,0 +1,121 @@ +import { jest } from '@jest/globals' +jest.retryTimes(3) + +import NeuralNetwork from '../../../../../lib/model/neuralnetwork.js' +import Matrix from '../../../../../lib/util/matrix.js' + +import { AMSGradOptimizer } from '../../../../../lib/model/nns/optimizer/amsgrad.js' +import Tensor from '../../../../../lib/util/tensor.js' + +describe('amsgrad', () => { + test('lr', () => { + const opt = new AMSGradOptimizer(0.1) + const manager = opt.manager() + expect(manager.lr).toBe(0.1) + }) + + describe('delta', () => { + test('scalar', () => { + const opt = new AMSGradOptimizer(0.1) + const manager = opt.manager() + const beta1 = 0.9 + const beta2 = 0.999 + + let m = 0 + let v = 0 + let vh = 0 + for (let i = 0; i < 10; i++) { + const x = Math.random() + const d = manager.delta('w', x) + expect(typeof d).toBe('number') + m = m * beta1 + x * (1 - beta1) + v = v * beta2 + x ** 2 * (1 - beta2) + vh = Math.max(vh, v) + expect(d).toBeCloseTo(((0.1 / Math.sqrt(i + 1)) * m) / Math.sqrt(vh)) + } + }) + + test('matrix', () => { + const opt = new AMSGradOptimizer(0.1) + const manager = opt.manager() + const beta1 = 0.9 + const beta2 = 0.999 + + const m = Matrix.zeros(10, 3) + const v = Matrix.zeros(10, 3) + const vh = Matrix.zeros(10, 3) + for (let t = 0; t < 10; t++) { + const mat = Matrix.randn(10, 3) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([10, 3]) + m.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + v.broadcastOperate(mat, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + vh.broadcastOperate(v, (a, b) => Math.max(a, b)) + for (let i = 0; i < mat.rows; i++) { + for (let j = 0; j < mat.cols; j++) { + expect(d.at(i, j)).toBeCloseTo(((0.1 / Math.sqrt(t + 1)) * m.at(i, j)) / Math.sqrt(vh.at(i, j))) + } + } + } + }) + + test('tensor', () => { + const opt = new AMSGradOptimizer(0.1) + const manager = opt.manager() + const beta1 = 0.9 + const beta2 = 0.999 + + const m = Tensor.zeros([7, 5, 3]) + const v = Tensor.zeros([7, 5, 3]) + const vh = Tensor.zeros([7, 5, 3]) + for (let t = 0; t < 10; t++) { + const mat = Tensor.randn([7, 5, 3]) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([7, 5, 3]) + m.broadcastOperate(mat, (a, b) => a * beta1 + b * (1 - beta1)) + v.broadcastOperate(mat, (a, b) => a * beta2 + b ** 2 * (1 - beta2)) + vh.broadcastOperate(v, (a, b) => Math.max(a, b)) + for (let i = 0; i < mat.sizes[0]; i++) { + for (let j = 0; j < mat.sizes[1]; j++) { + for (let k = 0; k < mat.sizes[2]; k++) { + expect(d.at(i, j, k)).toBeCloseTo( + ((0.1 / Math.sqrt(t + 1)) * m.at(i, j, k)) / Math.sqrt(vh.at(i, j, k)) + ) + } + } + } + } + }) + }) +}) + +test('nn', () => { + const net = NeuralNetwork.fromObject( + [ + { type: 'input', name: 'in' }, + { type: 'full', out_size: 5, activation: 'sigmoid' }, + { type: 'full', out_size: 3 }, + ], + 'mse', + 'amsgrad' + ) + const x = Matrix.randn(1, 10) + const t = Matrix.randn(1, 3) + + const losslog = [] + for (let i = 0; i < 100; i++) { + const loss = net.fit(x, t, 1000, 0.01) + losslog.push(loss[0]) + if (loss[0] < 1.0e-8) { + break + } + if (losslog.length > 10 && (losslog.at(-10) - loss[0]) / loss[0] < 1.0e-5) { + throw new Error('Test failed.') + } + } + + const y = net.calc(x) + for (let i = 0; i < 3; i++) { + expect(y.at(0, i)).toBeCloseTo(t.at(0, i)) + } +}) diff --git a/tests/lib/model/nns/optimizer/nadam.test.js b/tests/lib/model/nns/optimizer/nadam.test.js index 1a52d37f..1028e06a 100644 --- a/tests/lib/model/nns/optimizer/nadam.test.js +++ b/tests/lib/model/nns/optimizer/nadam.test.js @@ -7,7 +7,7 @@ import Matrix from '../../../../../lib/util/matrix.js' import { NadamOptimizer } from '../../../../../lib/model/nns/optimizer/nadam.js' import Tensor from '../../../../../lib/util/tensor.js' -describe('adam', () => { +describe('nadam', () => { test('lr', () => { const opt = new NadamOptimizer(0.1) const manager = opt.manager() diff --git a/tests/lib/model/nns/optimizer/santae.test.js b/tests/lib/model/nns/optimizer/santae.test.js new file mode 100644 index 00000000..26f44502 --- /dev/null +++ b/tests/lib/model/nns/optimizer/santae.test.js @@ -0,0 +1,174 @@ +import { jest } from '@jest/globals' +jest.retryTimes(20) + +import NeuralNetwork from '../../../../../lib/model/neuralnetwork.js' +import Matrix from '../../../../../lib/util/matrix.js' + +import { SantaEOptimizer } from '../../../../../lib/model/nns/optimizer/santae.js' +import Tensor from '../../../../../lib/util/tensor.js' + +describe('santae', () => { + test('lr', () => { + const opt = new SantaEOptimizer(0.1) + const manager = opt.manager() + expect(manager.lr).toBe(0.1) + }) + + describe('delta', () => { + test('scalar', () => { + const opt = new SantaEOptimizer(0.1, 0.95, 5) + opt._z = () => 0.02 + const manager = opt.manager() + const sigma = 0.95 + const c = 5 + const n = 16 + const lambda = 0.01 + const burnin = 5 + const lr = 0.1 + + let v = 0 + let g = 0 + let a = Math.sqrt(lr) * c + let u = Math.sqrt(lr) * 0.02 + for (let t = 0; t < 10; t++) { + const x = Math.random() / (t + 1) + const d = manager.delta('w', x) + expect(typeof d).toBe('number') + v = v * sigma + ((1 - sigma) / n ** 2) * x ** 2 + const gp = g + const up = u + g = 1 / Math.sqrt(lambda + Math.sqrt(v)) + if (t < burnin - 1) { + const beta = (t + 1) ** 0.5 + a += u ** 2 - lr / beta + u = ((lr / beta) * (1 - gp / g)) / u + Math.sqrt(((2 * lr) / beta) * gp) * 0.02 + } else { + u = 0 + } + u += (1 - a) * up - lr * g * x + expect(d).toBeCloseTo(-g * u) + } + }) + + test('matrix', () => { + const opt = new SantaEOptimizer(0.1, 0.95, 5) + opt._z = () => 0.02 + const manager = opt.manager() + const sigma = 0.95 + const c = 5 + const n = 16 + const lambda = 0.01 + const burnin = 5 + const lr = 0.1 + + const v = Matrix.zeros(10, 3) + const g = Matrix.zeros(10, 3) + const a = new Matrix(10, 3, Math.sqrt(lr) * c) + const u = new Matrix(10, 3, Math.sqrt(lr) * 0.02) + for (let t = 0; t < 10; t++) { + const mat = Matrix.randn(10, 3) + mat.div((t + 10) ** 2) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([10, 3]) + v.map((v, i) => v * sigma + ((1 - sigma) / n ** 2) * mat.at(i) ** 2) + const gp = g.copy() + const up = u.copy() + g.map((_, i) => 1 / Math.sqrt(lambda + Math.sqrt(v.at(i)))) + if (t < burnin - 1) { + const beta = (t + 1) ** 0.5 + a.map((v, i) => v + u.at(i) ** 2 - lr / beta) + u.map( + (v, i) => + ((lr / beta) * (1 - gp.at(i) / g.at(i))) / v + + Math.sqrt(((2 * lr) / beta) * gp.at(i)) * 0.02 + ) + } else { + u.fill(0) + } + u.map((v, i) => v + (1 - a.at(i)) * up.at(i) - lr * g.at(i) * mat.at(i)) + for (let i = 0; i < mat.rows; i++) { + for (let j = 0; j < mat.cols; j++) { + expect(d.at(i, j)).toBeCloseTo(-g.at(i, j) * u.at(i, j)) + } + } + } + }) + + test('tensor', () => { + const opt = new SantaEOptimizer(0.1, 0.95, 5) + opt._z = () => 0.02 + const manager = opt.manager() + const sigma = 0.95 + const c = 5 + const n = 16 + const lambda = 0.01 + const burnin = 5 + const lr = 0.1 + + const v = Tensor.zeros([7, 5, 3]) + const g = Tensor.zeros([7, 5, 3]) + const a = new Tensor([7, 5, 3], Math.sqrt(lr) * c) + const u = new Tensor([7, 5, 3], Math.sqrt(lr) * 0.02) + for (let t = 0; t < 10; t++) { + const mat = Tensor.randn([7, 5, 3]) + mat.map(v => v / (t + 10) ** 2) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([7, 5, 3]) + v.map((v, i) => v * sigma + ((1 - sigma) / n ** 2) * mat.at(i) ** 2) + const gp = g.copy() + const up = u.copy() + g.map((_, i) => 1 / Math.sqrt(lambda + Math.sqrt(v.at(i)))) + if (t < burnin - 1) { + const beta = (t + 1) ** 0.5 + a.map((v, i) => v + u.at(i) ** 2 - lr / beta) + u.map( + (v, i) => + ((lr / beta) * (1 - gp.at(i) / g.at(i))) / v + + Math.sqrt(((2 * lr) / beta) * gp.at(i)) * 0.02 + ) + } else { + u.fill(0) + } + u.map((v, i) => v + (1 - a.at(i)) * up.at(i) - lr * g.at(i) * mat.at(i)) + for (let i = 0; i < mat.sizes[0]; i++) { + for (let j = 0; j < mat.sizes[1]; j++) { + for (let k = 0; k < mat.sizes[2]; k++) { + expect(d.at(i, j, k)).toBeCloseTo(-g.at(i, j, k) * u.at(i, j, k)) + } + } + } + } + }) + }) +}) + +test('nn', () => { + const net = NeuralNetwork.fromObject( + [ + { type: 'input', name: 'in' }, + { type: 'full', out_size: 5, activation: 'tanh' }, + { type: 'full', out_size: 3 }, + ], + 'mse', + 'santae' + ) + const x = Matrix.randn(1, 10) + const t = Matrix.randn(1, 3) + + const losslog = [] + for (let i = 0; i < 100; i++) { + const loss = net.fit(x, t, 1000, 0.0001) + losslog.push(loss[0]) + if (loss[0] < 1.0e-7) { + break + } + if (losslog.length > 10 && (losslog.at(-10) - loss[0]) / loss[0] < 1.0e-5) { + throw new Error('Test failed.') + } + } + + const y = net.calc(x) + for (let i = 0; i < 3; i++) { + expect(y.at(0, i)).toBeCloseTo(t.at(0, i)) + } +}) diff --git a/tests/lib/model/nns/optimizer/santasss.test.js b/tests/lib/model/nns/optimizer/santasss.test.js new file mode 100644 index 00000000..ca783775 --- /dev/null +++ b/tests/lib/model/nns/optimizer/santasss.test.js @@ -0,0 +1,192 @@ +import { jest } from '@jest/globals' +jest.retryTimes(10) + +import NeuralNetwork from '../../../../../lib/model/neuralnetwork.js' +import Matrix from '../../../../../lib/util/matrix.js' + +import { SantaSSSOptimizer } from '../../../../../lib/model/nns/optimizer/santasss.js' +import Tensor from '../../../../../lib/util/tensor.js' + +describe('santasss', () => { + test('lr', () => { + const opt = new SantaSSSOptimizer(0.1) + const manager = opt.manager() + expect(manager.lr).toBe(0.1) + }) + + describe('delta', () => { + test('scalar', () => { + const opt = new SantaSSSOptimizer(0.1, 0.95, 5) + opt._z = () => 0.02 + const manager = opt.manager() + const sigma = 0.95 + const c = 5 + const n = 16 + const lambda = 0.01 + const burnin = 5 + const lr = 0.1 + + let v = 0 + let g = 0 + let a = Math.sqrt(lr) * c + let u = Math.sqrt(lr) * 0.02 + for (let t = 0; t < 10; t++) { + const x = Math.random() / (t + 1) + const d = manager.delta('w', x) + expect(typeof d).toBe('number') + v = v * sigma + ((1 - sigma) / n ** 2) * x ** 2 + const gp = g + const up = u + g = 1 / Math.sqrt(lambda + Math.sqrt(v)) + if (t < burnin - 1) { + const beta = (t + 1) ** 0.5 + a += (u ** 2 - lr / beta) / 2 + u = Math.exp(-a / 2) * u + u += -g * x * lr + Math.sqrt((2 * gp * lr) / beta) * 0.02 + ((lr / beta) * (1 - gp / g)) / up + u = Math.exp(-a / 2) * u + a += (u ** 2 - lr / beta) / 2 + } else { + u = Math.exp(-a / 2) * u + u -= g * x * lr + u = Math.exp(-a / 2) * u + } + expect(d).toBeCloseTo((-g * up) / 2 - (g * u) / 2) + } + }) + + test('matrix', () => { + const opt = new SantaSSSOptimizer(0.1, 0.95, 5) + opt._z = () => 0.02 + const manager = opt.manager() + const sigma = 0.95 + const c = 5 + const n = 16 + const lambda = 0.01 + const burnin = 5 + const lr = 0.1 + + const v = Matrix.zeros(10, 3) + const g = Matrix.zeros(10, 3) + const a = new Matrix(10, 3, Math.sqrt(lr) * c) + const u = new Matrix(10, 3, Math.sqrt(lr) * 0.02) + for (let t = 0; t < 10; t++) { + const mat = Matrix.randn(10, 3) + mat.div((t + 10) ** 2) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([10, 3]) + v.map((v, i) => v * sigma + ((1 - sigma) / n ** 2) * mat.at(i) ** 2) + const gp = g.copy() + const up = u.copy() + g.map((_, i) => 1 / Math.sqrt(lambda + Math.sqrt(v.at(i)))) + if (t < burnin - 1) { + const beta = (t + 1) ** 0.5 + a.map((v, i) => v + (u.at(i) ** 2 - lr / beta) / 2) + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + u.map( + (v, i) => + v - + g.at(i) * mat.at(i) * lr + + ((lr / beta) * (1 - gp.at(i) / g.at(i))) / up.at(i) + + Math.sqrt(((2 * lr) / beta) * gp.at(i)) * 0.02 + ) + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + a.map((v, i) => v + (u.at(i) ** 2 - lr / beta) / 2) + } else { + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + u.map((v, i) => v - g.at(i) * mat.at(i) * lr) + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + } + for (let i = 0; i < mat.rows; i++) { + for (let j = 0; j < mat.cols; j++) { + expect(d.at(i, j)).toBeCloseTo((-g.at(i, j) * u.at(i, j)) / 2 - (g.at(i, j) * up.at(i, j)) / 2) + } + } + } + }) + + test('tensor', () => { + const opt = new SantaSSSOptimizer(0.1, 0.95, 5) + opt._z = () => 0.02 + const manager = opt.manager() + const sigma = 0.95 + const c = 5 + const n = 16 + const lambda = 0.01 + const burnin = 5 + const lr = 0.1 + + const v = Tensor.zeros([7, 5, 3]) + const g = Tensor.zeros([7, 5, 3]) + const a = new Tensor([7, 5, 3], Math.sqrt(lr) * c) + const u = new Tensor([7, 5, 3], Math.sqrt(lr) * 0.02) + for (let t = 0; t < 10; t++) { + const mat = Tensor.randn([7, 5, 3]) + mat.map(v => v / (t + 10) ** 2) + const d = manager.delta('w', mat) + expect(d.sizes).toEqual([7, 5, 3]) + v.map((v, i) => v * sigma + ((1 - sigma) / n ** 2) * mat.at(i) ** 2) + const gp = g.copy() + const up = u.copy() + g.map((_, i) => 1 / Math.sqrt(lambda + Math.sqrt(v.at(i)))) + if (t < burnin - 1) { + const beta = (t + 1) ** 0.5 + a.map((v, i) => v + (u.at(i) ** 2 - lr / beta) / 2) + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + u.map( + (v, i) => + v - + g.at(i) * mat.at(i) * lr + + ((lr / beta) * (1 - gp.at(i) / g.at(i))) / up.at(i) + + Math.sqrt(((2 * lr) / beta) * gp.at(i)) * 0.02 + ) + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + a.map((v, i) => v + (u.at(i) ** 2 - lr / beta) / 2) + } else { + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + u.map((v, i) => v - g.at(i) * mat.at(i) * lr) + u.map((v, i) => Math.exp(-a.at(i) / 2) * v) + } + for (let i = 0; i < mat.sizes[0]; i++) { + for (let j = 0; j < mat.sizes[1]; j++) { + for (let k = 0; k < mat.sizes[2]; k++) { + expect(d.at(i, j, k)).toBeCloseTo( + (-g.at(i, j, k) * u.at(i, j, k)) / 2 - (g.at(i, j, k) * up.at(i, j, k)) / 2 + ) + } + } + } + } + }) + }) +}) + +test('nn', () => { + const net = NeuralNetwork.fromObject( + [ + { type: 'input', name: 'in' }, + { type: 'full', out_size: 5, activation: 'tanh' }, + { type: 'full', out_size: 3 }, + ], + 'mse', + 'santasss' + ) + const x = Matrix.randn(1, 10) + const t = Matrix.randn(1, 3) + + const losslog = [] + for (let i = 0; i < 1000; i++) { + const loss = net.fit(x, t, 1000, 0.001) + losslog.push(loss[0]) + if (loss[0] < 1.0e-7) { + break + } + if (losslog.length > 10 && (losslog.at(-10) - loss[0]) / loss[0] < 1.0e-5) { + throw new Error('Test failed.') + } + } + + const y = net.calc(x) + for (let i = 0; i < 3; i++) { + expect(y.at(0, i)).toBeCloseTo(t.at(0, i)) + } +})