-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.R
executable file
·119 lines (112 loc) · 3.96 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#############################################
library("Matrix")
library("geigen")
library("rARPACK")
library(maps)
library(WDI)
library(RColorBrewer)
library("maptools")
source("Preprocess.R")
source("SpectralClustering.R")
source("Postprocess.R")
######################################################
# speCluster()
speCluster <- function(data, conMatrix, cluster.number,
iter.max=400, repetition= 400 ){
# Perform Spectral Clustering on a data matrix
#
# Args:
# data: A numeric data frame or matrix.
# conMatrix: Contiguity matrix.
# cluster.number: The number of clusters.
# iter.max: The maximum number of iterations allowed for
# kmeans step.
# repetition: How many random sets should be chosen
# for as the initial centers in kmeans step.
#
# Returns:
# A list contains two parts:
# clusters: A vector of integers(from 1:cluster.number)
# indicating the cluster to which each point is
# allocated.
# SS: A list with two values SSW for Sum Squered Within and
# SSB for SumSquered Between
# Error handeling
############################################
#Preprocess
outId <-outlierDetector(data)
dataAfterPC <- prinComp(data=data,outId=outId)
rm(data)
############################################
# Spectral clustering Algorithm
S <- similarity(data = dataAfterPC , neighbors=conMatrix)
rm(outId, conMatrix)
U <- produceU( similarity = S, ncol=cluster.number)
rm(S)
clusters <- kmeansU(data=U, cluster.number = cluster.number,iter.max=500)
############################################
#postprocess
SS <- sumSquares(data=dataAfterPC, clusters= clusters)
############################################
out <- list(clusters= clusters,SS= SS)
return(out)
}
stepOne <- function(data, conMatrix, ncol){
# This function Computes the data after Principal component
#
#
# Args:
# data: A numeric data frame or matrix.
# conMatrix: Contiguity matrix.
# ncol: number of columns of the output matrix U
#
#
# Returns:
# A list contains two parts:
# dataAfterPC: After Principal component data
# U: n by ncol numeric matrix that contains the ncol tops
# eigenvectors of Laplacian matrix as column.
#
# Error handeling
############################################
#Preprocess
outId <-outlierDetector(data)
dataAfterPC <- prinComp(data=data,outId=outId)
rm(data)
############################################
# Spectral clustering Algorithm
S <- similarity(data = dataAfterPC, neighbors=conMatrix)
rm(outId, conMatrix)
U <- produceU( similarity = S, ncol=ncol)
out <- list( dataAfterPC=dataAfterPC, U=U)
return(out)
}
stepTwo <- function(data, U, cluster.number= cluster.number,
iter.max=400, repetition=400){
# Perform Spectral Clustering on U matrix.
#
# Args:
# data: A numeric data frame or matrix.
# U: A numeric matrix
# cluster.number: The number of clusters.
# iter.max: The maximum number of iterations allowed for
# kmeans step.
# repetition: How many random sets should be chosen
# for as the initial centers in kmeans step.
#
# Returns:
# A list contains two parts:
# clusters: A vector of integers(from 1:cluster.number)
# indicating the cluster to which each point is
# allocated.
# SS: A list with two values SSW for Sum Squered Within and
# SSB for SumSquered Between
# Error handeling
############################################
clusters <- kmeansU(data=U, cluster.number = cluster.number,
iter.max=iter.max, repetition=repetition)
SS <- sumSquares(data=data, clusters= clusters)
############################################
out <- list(clusters= clusters,SS= SS)
return(out)
}