-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathkmeans.php
92 lines (79 loc) · 2.39 KB
/
kmeans.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
<?php
echo json_encode(kmeans(array(25500, 17250, 20000, 28500, 28500, 20500, 20500, 27000, 22000), 3));
function kmeans($data, $k)
{
$cPositions = assign_initial_positions($data, $k);
$clusters = array();
while(true)
{
$changes = kmeans_clustering($data, $cPositions, $clusters);
if(!$changes)
{
return kmeans_get_cluster_values($clusters, $data);
}
$cPositions = kmeans_recalculate_cpositions($cPositions, $data, $clusters);
}
}
function kmeans_clustering($data, $cPositions, &$clusters)
{
$nChanges = 0;
foreach($data as $dataKey => $value)
{
$minDistance = null;
$cluster = null;
foreach($cPositions as $k => $position)
{
$distance = distance($value, $position);
if(is_null($minDistance) || $minDistance > $distance)
{
$minDistance = $distance;
$cluster = $k;
}
}
if(!isset($clusters[$dataKey]) || $clusters[$dataKey] != $cluster)
{
$nChanges++;
}
$clusters[$dataKey] = $cluster;
}
return $nChanges;
}
function kmeans_recalculate_cpositions($cPositions, $data, $clusters)
{
$kValues = kmeans_get_cluster_values($clusters, $data);
foreach($cPositions as $k => $position)
{
$cPositions[$k] = empty($kValues[$k]) ? 0 : kmeans_avg($kValues[$k]);
}
return $cPositions;
}
function kmeans_get_cluster_values($clusters, $data)
{
$values = array();
foreach($clusters as $dataKey => $cluster)
{
$values[$cluster][] = $data[$dataKey];
}
return $values;
}
function kmeans_avg($values)
{
$n = count($values);
$sum = array_sum($values);
return ($n == 0) ? 0 : $sum / $n;
}
function distance($v1, $v2)
{
return abs($v1-$v2);
}
function assign_initial_positions($data, $k)
{
$min = min($data);
$max = max($data);
$int = ceil(abs($max - $min) / $k);
while($k-- > 0)
{
$cPositions[$k] = $min + $int * $k;
}
return $cPositions;
}