-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6243bc9
commit f2bbcfe
Showing
6 changed files
with
348 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import { DOC, FastDOC } from '../../lib/model/doc.js' | ||
import Controller from '../controller.js' | ||
|
||
export default function (platform) { | ||
platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.' | ||
platform.setting.ml.reference = { | ||
author: 'C. M. Procopiuc, M. Jones, P. K. Agarwal, T. M. Murali', | ||
title: 'A monte carlo algorithm for fast projective clustering', | ||
year: 2002, | ||
} | ||
const controller = new Controller(platform) | ||
|
||
const fitModel = () => { | ||
let model = null | ||
if (type.value === 'DOC') { | ||
model = new DOC(alpha.value, beta.value, w.value) | ||
} else { | ||
model = new DOC(alpha.value, beta.value, w.value, maxiter.value, d0.value) | ||
} | ||
|
||
model.fit(platform.trainInput) | ||
const pred = model.predict().map(v => v + 1) | ||
platform.trainResult = pred | ||
} | ||
|
||
const type = controller.select(['DOC', 'FastDOC']).on('change', () => { | ||
felm.element.style.display = type.value === 'DOC' ? 'none' : null | ||
}) | ||
const alpha = controller.input.number({ label: ' alpha ', min: 0, max: 1, step: 0.01, value: 0.1 }) | ||
const beta = controller.input.number({ label: ' beta ', min: 0, max: 0.5, step: 0.01, value: 0.25 }) | ||
const w = controller.input.number({ label: ' width ', min: 0, max: 1000, step: 0.1, value: 0.1 }) | ||
|
||
const felm = controller.span() | ||
felm.element.style.display = 'none' | ||
const maxiter = felm.input.number({ label: ' maxiter ', min: 1, max: 1000000, value: 100 }) | ||
const d0 = felm.input.number({ label: ' d0 ', min: 1, max: 100, value: 2 }) | ||
|
||
controller.input.button('Fit').on('click', fitModel) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
/** | ||
* Density-based Optimal projective Clustering | ||
*/ | ||
export class DOC { | ||
// A monte carlo algorithm for fast projective clustering | ||
// https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f7a389eb1742d16cf09fc0d631cc0d1e97d49dda | ||
/** | ||
* @param {number} alpha Dense scale | ||
* @param {number} beta Balanced value | ||
* @param {number} w Width of cluster | ||
*/ | ||
constructor(alpha, beta, w) { | ||
this._alpha = alpha | ||
this._beta = beta | ||
this._w = w | ||
this._p = [] | ||
this._d = [] | ||
|
||
this._mu = (a, b) => a * (1 / this._beta) ** b | ||
} | ||
|
||
_select(n, k) { | ||
const idx = [] | ||
for (let i = 0; i < k; i++) { | ||
idx.push(Math.floor(Math.random() * (n - i))) | ||
} | ||
for (let i = idx.length - 1; i >= 0; i--) { | ||
for (let j = idx.length - 1; j > i; j--) { | ||
if (idx[i] <= idx[j]) { | ||
idx[j]++ | ||
} | ||
} | ||
} | ||
return idx | ||
} | ||
|
||
/** | ||
* Fit model. | ||
* @param {Array<Array<number>>} datas Sample data | ||
*/ | ||
fit(datas) { | ||
const n = datas.length | ||
const d = datas[0].length | ||
const r = Math.min(n, Math.ceil(Math.log(2 * d) / Math.log(1 / (2 * this._beta)))) | ||
const m = (2 / this._alpha) ** r * Math.log(4) | ||
let best_mu = 0 | ||
let opt_cluster = [] | ||
let opt_dim = [] | ||
|
||
for (let i = 0; i < 2 / this._alpha; i++) { | ||
const p = datas[Math.floor(Math.random() * n)] | ||
for (let j = 0; j < m; j++) { | ||
const xi = this._select(n, r) | ||
const l = [] | ||
const h = [] | ||
|
||
const td = [] | ||
for (let k = 0; k < d; k++) { | ||
if (xi.every(t => Math.abs(datas[t][k] - p[k]) <= this._w)) { | ||
td.push(k) | ||
l.push(p[k] - this._w) | ||
h.push(p[k] + this._w) | ||
} else { | ||
l.push(-Infinity) | ||
h.push(Infinity) | ||
} | ||
} | ||
const c = [] | ||
for (let t = 0; t < n; t++) { | ||
if (datas[t].every((v, k) => l[k] <= v && v <= h[k])) { | ||
c.push(t) | ||
} | ||
} | ||
if (c.length < this._alpha * n) { | ||
continue | ||
} | ||
const mu = this._mu(c.length, td.length) | ||
if (best_mu < mu) { | ||
best_mu = mu | ||
opt_cluster = c | ||
opt_dim = td | ||
} | ||
} | ||
} | ||
|
||
const p = Array(n).fill(-1) | ||
for (let i = 0; i < opt_cluster.length; i++) { | ||
p[opt_cluster[i]] = 0 | ||
} | ||
this._p = p | ||
this._d = opt_dim | ||
} | ||
|
||
/** | ||
* Returns predicted categories. | ||
* @returns {number[]} Predicted values | ||
*/ | ||
predict() { | ||
return this._p | ||
} | ||
} | ||
|
||
/** | ||
* Fast Density-based Optimal projective Clustering | ||
*/ | ||
export class FastDOC { | ||
// A monte carlo algorithm for fast projective clustering | ||
// https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=f7a389eb1742d16cf09fc0d631cc0d1e97d49dda | ||
/** | ||
* @param {number} alpha Dense scale | ||
* @param {number} beta Balanced value | ||
* @param {number} w Width of cluster | ||
* @param {number} maxiter Maximum inner iteration | ||
* @param {number} d0 Threshold of selected dimension count | ||
*/ | ||
constructor(alpha, beta, w, maxiter, d0) { | ||
this._alpha = alpha | ||
this._beta = beta | ||
this._w = w | ||
this._maxiter = maxiter | ||
this._d0 = d0 | ||
this._p = [] | ||
this._d = [] | ||
} | ||
|
||
_select(n, k) { | ||
const idx = [] | ||
for (let i = 0; i < k; i++) { | ||
idx.push(Math.floor(Math.random() * (n - i))) | ||
} | ||
for (let i = idx.length - 1; i >= 0; i--) { | ||
for (let j = idx.length - 1; j > i; j--) { | ||
if (idx[i] <= idx[j]) { | ||
idx[j]++ | ||
} | ||
} | ||
} | ||
return idx | ||
} | ||
|
||
/** | ||
* Fit model. | ||
* @param {Array<Array<number>>} datas Sample data | ||
*/ | ||
fit(datas) { | ||
const n = datas.length | ||
const d = datas[0].length | ||
const r = Math.min(n, Math.ceil(Math.log(2 * d) / Math.log(1 / (2 * this._beta)))) | ||
const m = Math.min(this._maxiter, (2 / this._alpha) ** r * Math.log(4)) | ||
|
||
let opt_dim = [] | ||
let opt_p = null | ||
|
||
for (let i = 0; i < 2 / this._alpha; i++) { | ||
const p = datas[Math.floor(Math.random() * n)] | ||
for (let j = 0; j < m; j++) { | ||
const xi = this._select(n, r) | ||
|
||
const td = [] | ||
for (let k = 0; k < d; k++) { | ||
if (xi.every(t => Math.abs(datas[t][k] - p[k]) <= this._w)) { | ||
td.push(k) | ||
} | ||
} | ||
if (td.length >= opt_dim.length) { | ||
opt_dim = td | ||
opt_p = p | ||
} | ||
if (opt_dim.length >= this._d0) { | ||
break | ||
} | ||
} | ||
if (opt_dim.length >= this._d0) { | ||
break | ||
} | ||
} | ||
const l = Array.from({ length: d }, () => -Infinity) | ||
const h = Array.from({ length: d }, () => Infinity) | ||
|
||
for (let k = 0; k < opt_dim.length; k++) { | ||
l[opt_dim[k]] = opt_p[opt_dim[k]] - this._w | ||
h[opt_dim[k]] = opt_p[opt_dim[k]] + this._w | ||
} | ||
|
||
const p = Array(n).fill(-1) | ||
for (let t = 0; t < n; t++) { | ||
if (datas[t].every((v, k) => l[k] <= v && v <= h[k])) { | ||
p[t] = 0 | ||
} | ||
} | ||
|
||
this._p = p | ||
this._d = opt_dim | ||
} | ||
|
||
/** | ||
* Returns predicted categories. | ||
* @returns {number[]} Predicted values | ||
*/ | ||
predict() { | ||
return this._p | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import { getPage } from '../helper/browser' | ||
|
||
describe('clustering', () => { | ||
/** @type {Awaited<ReturnType<getPage>>} */ | ||
let page | ||
beforeEach(async () => { | ||
page = await getPage() | ||
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select') | ||
await taskSelectBox.selectOption('CT') | ||
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp') | ||
await modelSelectBox.selectOption('doc') | ||
}) | ||
|
||
afterEach(async () => { | ||
await page?.close() | ||
}) | ||
|
||
test('initialize', async () => { | ||
const methodMenu = await page.waitForSelector('#ml_selector #method_menu') | ||
const buttons = await methodMenu.waitForSelector('.buttons') | ||
|
||
const type = await buttons.waitForSelector('select:nth-of-type(1)') | ||
await expect((await type.getProperty('value')).jsonValue()).resolves.toBe('DOC') | ||
const alpha = await buttons.waitForSelector('input:nth-of-type(1)') | ||
await expect(alpha.getAttribute('value')).resolves.toBe('0.1') | ||
const beta = await buttons.waitForSelector('input:nth-of-type(2)') | ||
await expect(beta.getAttribute('value')).resolves.toBe('0.25') | ||
const w = await buttons.waitForSelector('input:nth-of-type(3)') | ||
await expect(w.getAttribute('value')).resolves.toBe('0.1') | ||
}) | ||
|
||
test('learn', async () => { | ||
const methodMenu = await page.waitForSelector('#ml_selector #method_menu') | ||
const buttons = await methodMenu.waitForSelector('.buttons') | ||
|
||
const fitButton = await buttons.waitForSelector('input[value=Fit]') | ||
await fitButton.evaluate(el => el.click()) | ||
|
||
const svg = await page.waitForSelector('#plot-area svg') | ||
await svg.waitForSelector('.datas circle') | ||
const circles = await svg.$$('.datas circle') | ||
const colors = new Set() | ||
for (const circle of circles) { | ||
const fill = await circle.evaluate(el => el.getAttribute('fill')) | ||
colors.add(fill) | ||
} | ||
expect(colors.size).toBe(2) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import { jest } from '@jest/globals' | ||
jest.retryTimes(3) | ||
|
||
import Matrix from '../../../lib/util/matrix.js' | ||
import { DOC, FastDOC } from '../../../lib/model/doc.js' | ||
|
||
import { randIndex } from '../../../lib/evaluate/clustering.js' | ||
|
||
describe('doc', () => { | ||
test('small alpha', () => { | ||
const model = new DOC(0.1, 0.2, 1.0) | ||
const n = 100 | ||
const x = Matrix.concat(Matrix.randn(n, 3, [0, 5, 0], 0.1), Matrix.randn(n, 3, [10, 5, 10], 0.1)).toArray() | ||
|
||
model.fit(x) | ||
const y = model.predict() | ||
expect(y).toHaveLength(x.length) | ||
|
||
const t = [] | ||
for (let i = 0; i < x.length; i++) { | ||
t[i] = Math.floor(i / n) | ||
} | ||
const ri = randIndex(y, t) | ||
expect(ri).toBeGreaterThan(0.9) | ||
}) | ||
|
||
test('big alpha', () => { | ||
const model = new DOC(0.9, 0.2, 1.0) | ||
const n = 50 | ||
const x = Matrix.concat(Matrix.randn(n, 3, 0, 0.1), Matrix.randn(n, 3, 10, 0.1)).toArray() | ||
|
||
model.fit(x) | ||
const y = model.predict() | ||
expect(y).toHaveLength(x.length) | ||
}) | ||
}) | ||
|
||
describe('fastdoc', () => { | ||
test('small alpha', () => { | ||
const model = new FastDOC(0.1, 0.2, 1.0, 100, 2) | ||
const n = 100 | ||
const x = Matrix.concat(Matrix.randn(n, 3, [0, 5, 0], 0.1), Matrix.randn(n, 3, [10, 5, 10], 0.1)).toArray() | ||
|
||
model.fit(x) | ||
const y = model.predict() | ||
expect(y).toHaveLength(x.length) | ||
|
||
const t = [] | ||
for (let i = 0; i < x.length; i++) { | ||
t[i] = Math.floor(i / n) | ||
} | ||
const ri = randIndex(y, t) | ||
expect(ri).toBeGreaterThan(0.9) | ||
}) | ||
}) |