diff --git a/README.md b/README.md index e4b02cb9..f59a9ea4 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) { | task | model | | ---- | ----- | -| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder | +| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, NMF, Autoencoder | | classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, ELM, LMNN | | semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network | | regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median | diff --git a/js/model_selector.js b/js/model_selector.js index fa670e48..bdcfc2c6 100644 --- a/js/model_selector.js +++ b/js/model_selector.js @@ -129,6 +129,7 @@ const AIMethods = [ { value: 'mean_shift', title: 'Mean Shift' }, { value: 'dbscan', title: 'DBSCAN' }, { value: 'optics', title: 'OPTICS' }, + { value: 'dtscan', title: 'DTSCAN' }, { value: 'hdbscan', title: 'HDBSCAN' }, { value: 'denclue', title: 'DENCLUE' }, { value: 'dbclasd', title: 'DBCLASD' }, diff --git a/js/view/dtscan.js b/js/view/dtscan.js new file mode 100644 index 00000000..2cb628b9 --- /dev/null +++ b/js/view/dtscan.js @@ -0,0 +1,26 @@ +import DTSCAN from '../../lib/model/dtscan.js' +import Controller from '../controller.js' + +export default function (platform) { + platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.' + platform.setting.ml.reference = { + author: 'J. Kim, J. Cho', + title: 'Delaunay Triangulation-Based Spatial Clustering Technique for Enhanced Adjacent Boundary Detection and Segmentation of LiDAR 3D Point Clouds', + year: 2019, + } + const controller = new Controller(platform) + + const fitModel = () => { + const model = new DTSCAN(minpts.value, threshold.value) + const pred = model.predict(platform.trainInput) + platform.trainResult = pred.map(v => v + 1) + clusters.value = new Set(pred).size + } + + const minpts = controller.input.number({ label: 'min pts', min: 2, max: 1000, value: 5 }).on('change', fitModel) + const threshold = controller.input + .number({ label: 'threshold', min: 0, max: 10, step: 0.1, value: 1.0 }) + .on('change', fitModel) + controller.input.button('Fit').on('click', fitModel) + const clusters = controller.text({ label: ' Clusters: ' }) +} diff --git a/lib/model/dtscan.js b/lib/model/dtscan.js new file mode 100644 index 00000000..c6973371 --- /dev/null +++ b/lib/model/dtscan.js @@ -0,0 +1,289 @@ +class Point { + constructor(p, value = null) { + this._p = p + this.value = value + } + + get x() { + return this._p[0] + } + + get y() { + return this._p[1] + } + + distance(p) { + return Math.sqrt((this.x - p.x) ** 2 + (this.y - p.y) ** 2) + } +} + +class Circle { + constructor(c, r) { + this._c = c + this._r = r + } + + contains(p) { + return (p.x - this._c.x) ** 2 + (p.y - this._c.y) ** 2 < this._r ** 2 + } +} + +class Triangle { + constructor(p1, p2, p3) { + this.p = [p1, p2, p3] + this.adjoin = [null, null, null] + + this._circumcircle = null + } + + get p() { + return this._p + } + + set p(points) { + this._p = points + this._circumcircle = null + } + + get circumcircle() { + if (this._circumcircle) { + return this._circumcircle + } + const [p1, p2, p3] = this.p + + const c = 2 * ((p2.x - p1.x) * (p3.y - p1.y) - (p2.y - p1.y) * (p3.x - p1.x)) + 1.0e-12 + const c21 = p2.x ** 2 - p1.x ** 2 + p2.y ** 2 - p1.y ** 2 + const c31 = p3.x ** 2 - p1.x ** 2 + p3.y ** 2 - p1.y ** 2 + const cx = ((p3.y - p1.y) * c21 + (p1.y - p2.y) * c31) / c + const cy = ((p1.x - p3.x) * c21 + (p2.x - p1.x) * c31) / c + + this._circumcircle = new Circle(new Point([cx, cy]), Math.sqrt((cx - p1.x) ** 2 + (cy - p1.y) ** 2)) + return this._circumcircle + } + + get area() { + const [p1, p2, p3] = this.p + return Math.abs((p1.x - p3.x) * (p2.y - p3.y) - (p2.x - p3.x) * (p1.y - p3.y)) / 2 + } + + contains(p) { + const outer = (p1, p2, p3) => { + return (p1.x - p3.x) * (p2.y - p3.y) - (p2.x - p3.x) * (p1.y - p3.y) + } + + const o = [] + for (let i = 0; i < 3; i++) { + const oi = outer(p, this.p[i], this.p[(i + 1) % 3]) + if (oi === 0) { + continue + } + if (o.length > 0 && o[o.length - 1] !== oi < 0) { + return false + } + o.push(oi < 0) + } + + return true + } + + contains_circle(p) { + return this.circumcircle.contains(p) + } +} + +/** + * Delaunay triangulation-based spatial clustering of application with noise + */ +export default class DTSCAN { + // Delaunay Triangulation-Based Spatial Clustering Technique for Enhanced Adjacent Boundary Detection and Segmentation of LiDAR 3D Point Clouds + // https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6767241/ + /** + * @param {number} [minPts] Minimum size of neighbors + * @param {number} [threshold] Remove threshold score + */ + constructor(minPts = 5, threshold = 1.0) { + this._minPts = minPts + this._area_threshold = threshold + this._length_threshold = threshold + } + + /** + * Returns predicted categories. + * @param {Array>} x Training data + * @returns {number[]} Predicted values + */ + predict(x) { + const n = x.length + if (x[0].length !== 2) { + throw new Error('Only 2d data can apply for current implementation.') + } + const min = [Infinity, Infinity] + const max = [-Infinity, -Infinity] + for (let i = 0; i < n; i++) { + for (let d = 0; d < 2; d++) { + min[d] = Math.min(min[d], x[i][d]) + max[d] = Math.max(max[d], x[i][d]) + } + } + for (let d = 0; d < 2; d++) { + min[d] -= 1 + max[d] += 1 + } + const rootPoints = [ + new Point([min[0] - (max[1] - min[1]), min[1]]), + new Point([max[0] + (max[1] - min[1]), min[1]]), + new Point([(min[0] + max[0]) / 2, max[1] + (max[0] - min[0]) / 2]), + ] + + const triangles = [new Triangle(...rootPoints)] + + for (let i = 0; i < n; i++) { + const xi = new Point(x[i], i) + let k = 0 + for (; k < triangles.length; k++) { + if (triangles[k].contains(xi)) { + break + } + } + const t = triangles.splice(k, 1)[0] + + const nt1 = new Triangle(xi, t.p[1], t.p[2]) + const nt2 = new Triangle(xi, t.p[2], t.p[0]) + const nt3 = new Triangle(xi, t.p[0], t.p[1]) + + nt1.adjoin = [t.adjoin[0], nt2, nt3] + nt2.adjoin = [t.adjoin[1], nt3, nt1] + nt3.adjoin = [t.adjoin[2], nt1, nt2] + + const nt = [nt1, nt2, nt3] + for (let j = 0; j < t.adjoin.length; j++) { + if (!t.adjoin[j]) { + continue + } + const m = t.adjoin[j].adjoin.indexOf(t) + t.adjoin[j].adjoin[m] = nt[j] + } + triangles.push(...nt) + + const checkFlip = nt.map(t => [t, 0]) + while (checkFlip.length > 0) { + const [cf, j] = checkFlip.pop() + const ad = cf.adjoin[j] + if (!ad) { + continue + } + const m = ad.adjoin.indexOf(cf) + + if (!cf.contains_circle(ad.p[m])) { + continue + } + + const j1 = (j + 1) % 3 + const j2 = (j + 2) % 3 + let m1 = (m + 1) % 3 + let m2 = (m + 2) % 3 + if (ad.p[m1].x !== cf.p[j1].x || ad.p[m1].y !== cf.p[j1].y) { + ;[m1, m2] = [m2, m1] + } + + const cf_p = cf.p + const cf_a = cf.adjoin + const ad_a = ad.adjoin + + cf.p = [cf.p[j], cf.p[j1], ad.p[m]] + cf.adjoin = [ad_a[m2], ad, cf_a[j2]] + if (ad_a[m2]) { + ad_a[m2].adjoin[ad_a[m2].adjoin.indexOf(ad)] = cf + } + + ad.p = [cf_p[j], cf_p[j2], ad.p[m]] + ad.adjoin = [ad_a[m1], cf, cf_a[j1]] + if (cf_a[j1]) { + cf_a[j1].adjoin[cf_a[j1].adjoin.indexOf(cf)] = ad + } + + checkFlip.push([cf, 0]) + checkFlip.push([ad, 0]) + } + } + + for (let i = triangles.length - 1; i >= 0; i--) { + if (triangles[i].p.some(p => rootPoints.some(rp => p.x === rp.x && p.y === rp.y))) { + triangles.splice(i, 1) + } + } + + const areas = [] + const lengthes = [] + for (const triangle of triangles) { + areas.push(triangle.area) + const [p1, p2, p3] = triangle.p + lengthes.push(p1.distance(p2), p2.distance(p3), p3.distance(p1)) + } + + const areamean = areas.reduce((s, v) => s + v, 0) / areas.length + const areavar = areas.reduce((s, v) => s + (v - areamean) ** 2, 0) / areas.length + const areastd = Math.sqrt(areavar) + const lengthmean = lengthes.reduce((s, v) => s + v, 0) / lengthes.length + const lengthvar = lengthes.reduce((s, v) => s + (v - lengthmean) ** 2, 0) / lengthes.length + const lengthstd = Math.sqrt(lengthvar) + + const neighbors = Array.from(x, () => new Set()) + for (const triangle of triangles) { + const areaz = (triangle.area - areamean) / areastd + if (areaz >= this._area_threshold) { + continue + } + const [p1, p2, p3] = triangle.p + const len12z = (p1.distance(p2) - lengthmean) / lengthstd + if (len12z < this._length_threshold) { + neighbors[p1.value].add(p2.value) + neighbors[p2.value].add(p1.value) + } + const len23z = (p2.distance(p3) - lengthmean) / lengthstd + if (len23z < this._length_threshold) { + neighbors[p2.value].add(p3.value) + neighbors[p3.value].add(p2.value) + } + const len13z = (p1.distance(p3) - lengthmean) / lengthstd + if (len13z < this._length_threshold) { + neighbors[p1.value].add(p3.value) + neighbors[p3.value].add(p1.value) + } + } + + const p = Array(n).fill(-1) + const visited = Array(n).fill(false) + let c = -1 + const stack = [] + while (true) { + if (stack.length === 0) { + for (let i = 0; i < n; i++) { + if (!visited[i]) { + if (neighbors[i].size < this._minPts) { + visited[i] = true + continue + } + stack.push(i) + c++ + break + } + } + if (stack.length === 0) { + break + } + } + const pi = stack.pop() + if (visited[pi]) { + continue + } + visited[pi] = true + if (neighbors[pi].size < this._minPts) { + continue + } + p[pi] = c + stack.push(...neighbors[pi]) + } + return p + } +} diff --git a/tests/gui/view/dtscan.test.js b/tests/gui/view/dtscan.test.js new file mode 100644 index 00000000..fa0a3dd9 --- /dev/null +++ b/tests/gui/view/dtscan.test.js @@ -0,0 +1,40 @@ +import { getPage } from '../helper/browser' + +describe('clustering', () => { + /** @type {Awaited>} */ + let page + beforeEach(async () => { + page = await getPage() + const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('CT') + const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('dtscan') + }) + + afterEach(async () => { + await page?.close() + }) + + test('initialize', async () => { + const methodMenu = await page.waitForSelector('#ml_selector #method_menu') + const buttons = await methodMenu.waitForSelector('.buttons') + + const minpts = await buttons.waitForSelector('input:nth-of-type(1)') + await expect(minpts.getAttribute('value')).resolves.toBe('5') + const threshold = await buttons.waitForSelector('input:nth-of-type(2)') + await expect(threshold.getAttribute('value')).resolves.toBe('1') + }) + + test('learn', async () => { + const methodMenu = await page.waitForSelector('#ml_selector #method_menu') + const buttons = await methodMenu.waitForSelector('.buttons') + + const clusters = await buttons.waitForSelector('span:last-child', { state: 'attached' }) + await expect(clusters.textContent()).resolves.toBe('') + + const fitButton = await buttons.waitForSelector('input[value=Fit]') + await fitButton.evaluate(el => el.click()) + + await expect(clusters.textContent()).resolves.toMatch(/^[0-9]+$/) + }) +}) diff --git a/tests/lib/model/dtscan.test.js b/tests/lib/model/dtscan.test.js new file mode 100644 index 00000000..9d44114a --- /dev/null +++ b/tests/lib/model/dtscan.test.js @@ -0,0 +1,36 @@ +import Matrix from '../../../lib/util/matrix.js' +import DTSCAN from '../../../lib/model/dtscan.js' + +import { randIndex } from '../../../lib/evaluate/clustering.js' + +describe('clustering', () => { + test('default', () => { + const model = new DTSCAN() + const n = 200 + const x = Matrix.concat( + Matrix.concat(Matrix.randn(n, 2, 0, 0.1), Matrix.randn(n, 2, 5, 0.1)), + Matrix.randn(n, 2, [-5, 5], 0.1) + ).toArray() + x[0] = [0, -10] + + const y = model.predict(x) + expect(y).toHaveLength(x.length) + + const t = [] + for (let i = 0; i < x.length; i++) { + t[i] = Math.floor(i / n) + } + const ri = randIndex(y, t) + expect(ri).toBeGreaterThan(0.85) + }) + + test('invalid dimension', () => { + const model = new DTSCAN() + const x = [ + [1, 1, 1], + [0, 0, 0], + ] + + expect(() => model.predict(x)).toThrow('Only 2d data can apply for current implementation.') + }) +})