From 6dee2682ffcb90976a6eb12988d5e8b9ce3fe41d Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Wed, 1 Feb 2023 13:57:21 -0500 Subject: [PATCH] results are mediocre --- scratch.js | 64 +++++++++++++++++++++++++++++++--- src/compress/index.js | 31 ---------------- src/compress/press.js | 24 ------------- src/compress/uncompress.js | 48 ------------------------- src/learn/01-findRules.js | 53 ++++++++++++---------------- src/pack/pack.js | 9 +++-- tmp/compare.js | 19 ++++++++++ filesize.js => tmp/filesize.js | 0 tmp/index.js | 20 +++++++++++ 9 files changed, 126 insertions(+), 142 deletions(-) delete mode 100644 src/compress/index.js delete mode 100644 src/compress/press.js delete mode 100644 src/compress/uncompress.js create mode 100644 tmp/compare.js rename filesize.js => tmp/filesize.js (100%) create mode 100644 tmp/index.js diff --git a/scratch.js b/scratch.js index 455b99e..d290ea6 100644 --- a/scratch.js +++ b/scratch.js @@ -1,15 +1,71 @@ import { learn, test, reverse, convert, compress } from './src/index.js' -// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-words.js' -import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-nous.js' -// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/future-simple.js' +import summarize from './tmp/index.js' + + +// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-words.js' //0.3kb +// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/future-simple.js' //1.6kb +// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-nous.js' //4.5kb +import pairs from '/Users/spencer/mountain/compromise/data/pairs/Gerund.js'//5kb, 5s + + let opts = { threshold: 0.8, reverse: true } +// let pairs = [ +// ['agatiser', 'agatiserai'], +// ['agencer', 'agencerai'], +// ['agenouiller', 'agenouillerai'], +// ['agneler', 'agnellerai'], +// ['agonir', 'agonirai'], +// ['agoniser', 'agoniserai'], +// ['agrafer', 'agraferai'], +// ['agrandir', 'agrandirai'], +// ['amollir', 'amollirai'], +// ['amonceler', 'amoncellerai'], +// ['amorcer', 'amorcerai'], +// ['ankyloser', 'ankyloserai'], +// ['anneler', 'annellerai'], +// ['appauvrir', 'appauvrirai'], +// ['appeler', 'appellerai'], +// ['attaquer', 'attaquerai'], +// ['attarder', 'attarderai'], +// ['atteler', 'attellerai'], +// ['attenter', 'attenterai'], +// ['autocentrer', 'autocentrerai'], +// ['autodévelopper', 'autodévelopperai'], +// ['autodiscipliner', 'autodisciplinerai'], +// ['autoévaporiser', 'autoévaporiserai'], +// ['autofinancer', 'autofinancerai'], +// ['balancer', 'balancerai'], +// ['balayer', 'balayerai'], +// ] +// pairs = [ +// ['neighbouring', 'neighbour'], +// ['colouring', 'colour'], +// ['flavouring', 'flavour'], +// ['touring', 'tour'], +// ['scouring', 'scour'], +// ['honouring', 'honour'], +// ['favouring', 'favour'], +// ['labouring', 'labour'], +// ['devouring', 'devour'], +// ['harbouring', 'harbour'], +// ['clamouring', 'clamour'], +// ['pouring', 'pour'], +// ['autodévelopper', 'autodévelopperai'], +// ['autodiscipliner', 'autodisciplinerai'], +// ['autoévaporiser', 'autoévaporiserai'], +// ['autofinancer', 'autofinancerai'], +// ['balancer', 'balancerai'], +// ['balayer', 'balayerai'], +// ['contouring', 'contour'], +// ['endeavouring', 'endeavour'] +// ] let model = learn(pairs) -console.log(model) +console.log(summarize(model)) // console.log(reverse(model)) // console.log('----') // console.log(model) diff --git a/src/compress/index.js b/src/compress/index.js deleted file mode 100644 index b8dbd41..0000000 --- a/src/compress/index.js +++ /dev/null @@ -1,31 +0,0 @@ -import press from './press.js' -import { unIndex } from '../_lib.js' - -// remove shared data in key-val pairs -// uses an ad-hoc run-length encoding format -// {walk: walking} -> {walk: '.4ing'} -const pressPairs = function (pairs) { - pairs = pairs.map(a => { - return press(a[0], a[1]).join('|') - }) - return pairs.join(',') -} - -const compress = function (model = {}) { - model = Object.assign({}, model) - - // compress fwd rules - model.rules = unIndex(model.rules) - model.rules = pressPairs(model.rules) - // compress reverse rules - if (model.rev) { - model.rev = unIndex(model.rev) - model.rev = pressPairs(model.rev) - } - - // compress exceptions - model.exceptions = Object.entries(model.exceptions) - model.exceptions = pressPairs(model.exceptions) - return model -} -export default compress \ No newline at end of file diff --git a/src/compress/press.js b/src/compress/press.js deleted file mode 100644 index e0485e4..0000000 --- a/src/compress/press.js +++ /dev/null @@ -1,24 +0,0 @@ -// longest common prefix -const findOverlap = (from, to) => { - let all = [] - for (let i = 0; i < from.length; i += 1) { - if (from[i] === to[i]) { - all.push(from[i]) - } else { - break - } - } - return all.join('') -} - -let compress = function (key, val) { - let prefix = findOverlap(key, val) - if (prefix.length < 1) { - return [key, val] - } - let out = prefix.length + val.substr(prefix.length) - return [key, out] -} - -export default compress -// console.log(compress('fixture', 'fixturing')) \ No newline at end of file diff --git a/src/compress/uncompress.js b/src/compress/uncompress.js deleted file mode 100644 index f57864b..0000000 --- a/src/compress/uncompress.js +++ /dev/null @@ -1,48 +0,0 @@ -const prefix = /^([0-9]+)/ -import { indexRules } from '../_lib.js' - -const expand = function (key = '', val = '') { - val = String(val) - let m = val.match(prefix) - if (m === null) { - return [key, val] - } - let num = Number(m[1]) || 0 - let pre = key.substring(0, num) - let full = pre + val.replace(prefix, '') - return [key, full] -} - -const toArray = function (txt) { - const pipe = /\|/ - return txt.split(/,/).map(str => { - let a = str.split(pipe) - return expand(a[0], a[1]) - }) -} - -const uncompress = function (model = {}) { - model = Object.assign({}, model) - - // compress fwd rules - model.rules = toArray(model.rules) - model.rules = indexRules(model.rules) - - // compress reverse rules - if (model.rev) { - model.rev = toArray(model.rev) - model.rev = indexRules(model.rev) - } - - // compress exceptions - model.exceptions = toArray(model.exceptions) - model.exceptions = model.exceptions.reduce((h, a) => { - h[a[0]] = a[1] - return h - }, {}) - return model -} -export default uncompress - -// console.log(expand('fixture', '6ing')) -// console.log(toArray('heard|4')) \ No newline at end of file diff --git a/src/learn/01-findRules.js b/src/learn/01-findRules.js index 9ef0d50..26c9c12 100644 --- a/src/learn/01-findRules.js +++ b/src/learn/01-findRules.js @@ -1,12 +1,10 @@ import getSuffix from './lib/getSuffix.js' import goodEnough from './lib/goodEnough.js' import convert from './lib/convert.js' + const magenta = str => '\x1b[35m' + str + '\x1b[0m' const yellow = str => '\x1b[33m' + str + '\x1b[0m' -// memoize failed rules -let badRule = new Set() - const completePairs = function (remain, pairs) { let todo = new Set() remain.forEach(arr => todo.add(arr[0])) @@ -14,25 +12,13 @@ const completePairs = function (remain, pairs) { } const isPerfect = function (pairs, rule) { - let id = rule.from + '|' + rule.to - if (badRule.has(id)) { - return false - } - for (let i = 0; i < pairs.length; i += 1) { - let [a, b] = pairs[i] - if (a.endsWith(a) && convert(a, rule) !== b) { - badRule.add(id) - return false - } - } - return true + return pairs.every(pair => convert(pair[0], rule) !== pair[1]) } const findRules = function (remain, pairs, threshold) { let rules = {} let ex = {} - let done = completePairs(remain, pairs) // ensure pairs are prefix aligned, in the first-place remain = remain.filter(arr => { let [a, b] = arr @@ -47,19 +33,18 @@ const findRules = function (remain, pairs, threshold) { // console.log(`\n--- #${peek} ---`) for (let i = 0; i < remain.length; i += 1) { let rule = getSuffix(remain[i], peek) - // ensure the rule passes our accuracy threshold, and does not effect existing pairs - if (rule !== null && isPerfect(done, rule) && goodEnough(rule, remain, threshold)) { - // add it - rules[rule.from] = rules[rule.from] || rule.to - // what's left, now? - remain = remain.filter(pair => { - if (convert(pair[0], rule) !== pair[1]) { - return true - } - done.push(pair) - return false - }) - // console.log(`+${yellow((rule.from || "''").padStart(7))} → ${magenta(rule.to).padEnd(19)} ${done.length} good, ${remain.length} left`) + if (rule !== null && goodEnough(rule, pairs, threshold)) { + // ensure this rule does not break any existing pairs + let goodOnes = completePairs(remain, pairs) + if (isPerfect(goodOnes, rule)) { + // console.log(rule) + // console.log(goodOnes) + // add it + rules[rule.from] = rules[rule.from] || rule.to + // what's left, now? + remain = remain.filter(pair => convert(pair[0], rule) !== pair[1]) + // console.log(`+${yellow((rule.from || "''").padStart(7))} → ${magenta(rule.to).padEnd(19)} ${goodOnes.length} good, ${remain.length} left`) + } } } if (remain.length === 0) { @@ -70,7 +55,15 @@ const findRules = function (remain, pairs, threshold) { remain.forEach(p => { ex[p[0]] = p[1] }) - badRule.clear() return { fwd: rules, ex } } export default findRules + + +let rule = { from: 'ler', to: 'llerai' } +let pairs = [ + ['agatiser', 'agatiserai'], + ['agencer', 'agencerai'], + ['agenouiller', 'agenouillerai'], +] +console.log(isPerfect(pairs, rule)) \ No newline at end of file diff --git a/src/pack/pack.js b/src/pack/pack.js index 0684236..bdd2142 100644 --- a/src/pack/pack.js +++ b/src/pack/pack.js @@ -1,6 +1,6 @@ import keyVal from './key-val.js' -const packObj = function (obj) { +const packObj = function (obj = {}) { let r = [] Object.keys(obj).forEach(k => { let val = keyVal(k, obj[k])// compress any shared prefix @@ -13,10 +13,9 @@ const pack = function (model) { let out = { fwd: packObj(model.fwd), both: packObj(model.both), - bkwd: packObj(model.bkwd), - } - if (model.ex) { - out.ex = packObj(model.ex) + rev: packObj(model.rev), + ex: packObj(model.ex), + same: (model.same || []).join(',') } return JSON.stringify(out) } diff --git a/tmp/compare.js b/tmp/compare.js new file mode 100644 index 0000000..2fae363 --- /dev/null +++ b/tmp/compare.js @@ -0,0 +1,19 @@ +import { learn, compress } from 'suffix-thumb/builds/suffix-thumb.mjs' +// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-words.js' //0.3kb +// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-nous.js' //4.5kb +// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/future-simple.js' //1.6kb +import pairs from '/Users/spencer/mountain/compromise/data/pairs/Gerund.js'//5kb, 5s + +// import pairList from '/Users/spencer/mountain/fr-compromise/data/models/adjective/index.js' //1.7kb, 7 seconds +// let pairs = Object.keys(pairList).map(k => [k, pairList[k][0]]) + +import filesize from './filesize.js' + +let begin = new Date() +let model = learn(pairs) +console.log(model) +let pkd = compress(model) +console.log(pkd) +let end = new Date() +console.log((end.getTime() - begin.getTime()) / 1000, 'seconds') +console.log(filesize(pkd)) \ No newline at end of file diff --git a/filesize.js b/tmp/filesize.js similarity index 100% rename from filesize.js rename to tmp/filesize.js diff --git a/tmp/index.js b/tmp/index.js new file mode 100644 index 0000000..7c7c1eb --- /dev/null +++ b/tmp/index.js @@ -0,0 +1,20 @@ +import filesize from './filesize.js' +import { compress } from '../src/index.js' + +const green = str => '\x1b[32m' + str + '\x1b[0m' +const red = str => '\x1b[31m' + str + '\x1b[0m' +const blue = str => '\x1b[34m' + str + '\x1b[0m' +const magenta = str => '\x1b[35m' + str + '\x1b[0m' +const cyan = str => '\x1b[36m' + str + '\x1b[0m' +const yellow = str => '\x1b[33m' + str + '\x1b[0m' +const black = str => '\x1b[30m' + str + '\x1b[0m' +const dim = str => '\x1b[2m' + str + '\x1b[0m' + +const inspect = function (model) { + console.log(green(Object.keys(model.fwd).length), 'fwd', magenta(Object.keys(model.both).length), 'both', magenta(Object.keys(model.rev).length), 'rev') + console.log(' ', cyan(Object.keys(model.ex).length), 'ex') + let pkd = compress(model) + console.log(blue(filesize(pkd) + ' total')) + console.log('\n\n') +} +export default inspect \ No newline at end of file