Skip to content

Commit

Permalink
results are mediocre
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Feb 1, 2023
1 parent 33a3259 commit 6dee268
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 142 deletions.
64 changes: 60 additions & 4 deletions scratch.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,71 @@
import { learn, test, reverse, convert, compress } from './src/index.js'
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-words.js'
import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-nous.js'
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/future-simple.js'
import summarize from './tmp/index.js'


// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-words.js' //0.3kb
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/future-simple.js' //1.6kb
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-nous.js' //4.5kb
import pairs from '/Users/spencer/mountain/compromise/data/pairs/Gerund.js'//5kb, 5s


let opts = {
threshold: 0.8,
reverse: true
}

// let pairs = [
// ['agatiser', 'agatiserai'],
// ['agencer', 'agencerai'],
// ['agenouiller', 'agenouillerai'],
// ['agneler', 'agnellerai'],
// ['agonir', 'agonirai'],
// ['agoniser', 'agoniserai'],
// ['agrafer', 'agraferai'],
// ['agrandir', 'agrandirai'],
// ['amollir', 'amollirai'],
// ['amonceler', 'amoncellerai'],
// ['amorcer', 'amorcerai'],
// ['ankyloser', 'ankyloserai'],
// ['anneler', 'annellerai'],
// ['appauvrir', 'appauvrirai'],
// ['appeler', 'appellerai'],
// ['attaquer', 'attaquerai'],
// ['attarder', 'attarderai'],
// ['atteler', 'attellerai'],
// ['attenter', 'attenterai'],
// ['autocentrer', 'autocentrerai'],
// ['autodévelopper', 'autodévelopperai'],
// ['autodiscipliner', 'autodisciplinerai'],
// ['autoévaporiser', 'autoévaporiserai'],
// ['autofinancer', 'autofinancerai'],
// ['balancer', 'balancerai'],
// ['balayer', 'balayerai'],
// ]
// pairs = [
// ['neighbouring', 'neighbour'],
// ['colouring', 'colour'],
// ['flavouring', 'flavour'],
// ['touring', 'tour'],
// ['scouring', 'scour'],
// ['honouring', 'honour'],
// ['favouring', 'favour'],
// ['labouring', 'labour'],
// ['devouring', 'devour'],
// ['harbouring', 'harbour'],
// ['clamouring', 'clamour'],
// ['pouring', 'pour'],
// ['autodévelopper', 'autodévelopperai'],
// ['autodiscipliner', 'autodisciplinerai'],
// ['autoévaporiser', 'autoévaporiserai'],
// ['autofinancer', 'autofinancerai'],
// ['balancer', 'balancerai'],
// ['balayer', 'balayerai'],
// ['contouring', 'contour'],
// ['endeavouring', 'endeavour']
// ]

let model = learn(pairs)
console.log(model)
console.log(summarize(model))
// console.log(reverse(model))
// console.log('----')
// console.log(model)
Expand Down
31 changes: 0 additions & 31 deletions src/compress/index.js

This file was deleted.

24 changes: 0 additions & 24 deletions src/compress/press.js

This file was deleted.

48 changes: 0 additions & 48 deletions src/compress/uncompress.js

This file was deleted.

53 changes: 23 additions & 30 deletions src/learn/01-findRules.js
Original file line number Diff line number Diff line change
@@ -1,38 +1,24 @@
import getSuffix from './lib/getSuffix.js'
import goodEnough from './lib/goodEnough.js'
import convert from './lib/convert.js'

const magenta = str => '\x1b[35m' + str + '\x1b[0m'
const yellow = str => '\x1b[33m' + str + '\x1b[0m'

// memoize failed rules
let badRule = new Set()

const completePairs = function (remain, pairs) {
let todo = new Set()
remain.forEach(arr => todo.add(arr[0]))
return pairs.filter(arr => !todo.has(arr[0]))
}

const isPerfect = function (pairs, rule) {
let id = rule.from + '|' + rule.to
if (badRule.has(id)) {
return false
}
for (let i = 0; i < pairs.length; i += 1) {
let [a, b] = pairs[i]
if (a.endsWith(a) && convert(a, rule) !== b) {
badRule.add(id)
return false
}
}
return true
return pairs.every(pair => convert(pair[0], rule) !== pair[1])
}


const findRules = function (remain, pairs, threshold) {
let rules = {}
let ex = {}
let done = completePairs(remain, pairs)
// ensure pairs are prefix aligned, in the first-place
remain = remain.filter(arr => {
let [a, b] = arr
Expand All @@ -47,19 +33,18 @@ const findRules = function (remain, pairs, threshold) {
// console.log(`\n--- #${peek} ---`)
for (let i = 0; i < remain.length; i += 1) {
let rule = getSuffix(remain[i], peek)
// ensure the rule passes our accuracy threshold, and does not effect existing pairs
if (rule !== null && isPerfect(done, rule) && goodEnough(rule, remain, threshold)) {
// add it
rules[rule.from] = rules[rule.from] || rule.to
// what's left, now?
remain = remain.filter(pair => {
if (convert(pair[0], rule) !== pair[1]) {
return true
}
done.push(pair)
return false
})
// console.log(`+${yellow((rule.from || "''").padStart(7))} → ${magenta(rule.to).padEnd(19)} ${done.length} good, ${remain.length} left`)
if (rule !== null && goodEnough(rule, pairs, threshold)) {
// ensure this rule does not break any existing pairs
let goodOnes = completePairs(remain, pairs)
if (isPerfect(goodOnes, rule)) {
// console.log(rule)
// console.log(goodOnes)
// add it
rules[rule.from] = rules[rule.from] || rule.to
// what's left, now?
remain = remain.filter(pair => convert(pair[0], rule) !== pair[1])
// console.log(`+${yellow((rule.from || "''").padStart(7))} → ${magenta(rule.to).padEnd(19)} ${goodOnes.length} good, ${remain.length} left`)
}
}
}
if (remain.length === 0) {
Expand All @@ -70,7 +55,15 @@ const findRules = function (remain, pairs, threshold) {
remain.forEach(p => {
ex[p[0]] = p[1]
})
badRule.clear()
return { fwd: rules, ex }
}
export default findRules


let rule = { from: 'ler', to: 'llerai' }
let pairs = [
['agatiser', 'agatiserai'],
['agencer', 'agencerai'],
['agenouiller', 'agenouillerai'],
]
console.log(isPerfect(pairs, rule))
9 changes: 4 additions & 5 deletions src/pack/pack.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import keyVal from './key-val.js'

const packObj = function (obj) {
const packObj = function (obj = {}) {
let r = []
Object.keys(obj).forEach(k => {
let val = keyVal(k, obj[k])// compress any shared prefix
Expand All @@ -13,10 +13,9 @@ const pack = function (model) {
let out = {
fwd: packObj(model.fwd),
both: packObj(model.both),
bkwd: packObj(model.bkwd),
}
if (model.ex) {
out.ex = packObj(model.ex)
rev: packObj(model.rev),
ex: packObj(model.ex),
same: (model.same || []).join(',')
}
return JSON.stringify(out)
}
Expand Down
19 changes: 19 additions & 0 deletions tmp/compare.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { learn, compress } from 'suffix-thumb/builds/suffix-thumb.mjs'
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-words.js' //0.3kb
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/fr-nous.js' //4.5kb
// import pairs from '/Users/spencer/mountain/suffix-thumb/test/data/future-simple.js' //1.6kb
import pairs from '/Users/spencer/mountain/compromise/data/pairs/Gerund.js'//5kb, 5s

// import pairList from '/Users/spencer/mountain/fr-compromise/data/models/adjective/index.js' //1.7kb, 7 seconds
// let pairs = Object.keys(pairList).map(k => [k, pairList[k][0]])

import filesize from './filesize.js'

let begin = new Date()
let model = learn(pairs)
console.log(model)
let pkd = compress(model)
console.log(pkd)
let end = new Date()
console.log((end.getTime() - begin.getTime()) / 1000, 'seconds')
console.log(filesize(pkd))
File renamed without changes.
20 changes: 20 additions & 0 deletions tmp/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import filesize from './filesize.js'
import { compress } from '../src/index.js'

const green = str => '\x1b[32m' + str + '\x1b[0m'
const red = str => '\x1b[31m' + str + '\x1b[0m'
const blue = str => '\x1b[34m' + str + '\x1b[0m'
const magenta = str => '\x1b[35m' + str + '\x1b[0m'
const cyan = str => '\x1b[36m' + str + '\x1b[0m'
const yellow = str => '\x1b[33m' + str + '\x1b[0m'
const black = str => '\x1b[30m' + str + '\x1b[0m'
const dim = str => '\x1b[2m' + str + '\x1b[0m'

const inspect = function (model) {
console.log(green(Object.keys(model.fwd).length), 'fwd', magenta(Object.keys(model.both).length), 'both', magenta(Object.keys(model.rev).length), 'rev')
console.log(' ', cyan(Object.keys(model.ex).length), 'ex')
let pkd = compress(model)
console.log(blue(filesize(pkd) + ' total'))
console.log('\n\n')
}
export default inspect

0 comments on commit 6dee268

Please sign in to comment.