-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathclusters.R
50 lines (44 loc) · 2.21 KB
/
clusters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(mclust)
data <- read.table('data/results.csv', sep=',', header=TRUE)
data$min_size <- apply(
data[grepl('size', colnames(data))], 1, min
)
data$max_size <- apply(
data[grepl('size', colnames(data))], 1, max
)
data$max_over_min <- data$max_size / data$min_size
data$median_size <- apply(
data[grepl('size', colnames(data))], 1, median
)
data$mean_size <- apply(
data[grepl('size', colnames(data))], 1, mean
)
# data <- data[c('degree_of_convexity', 'accuracy')]
D = Mclust(data[c('degree_of_convexity', 'accuracy')],G=1:20)
summary(D)
png('clusters.png', width=12, height=9, units='in', res=300)
plot(D, what="classification")
dev.off()
BIC <- mclustBIC(data)
png('cluster_BIC.png', width=12, height=9, units='in', res=300)
plot(BIC)
dev.off()
summary(BIC)
library(ggplot2)
library(viridis)
# qplot(degree_of_convexity, accuracy, data=data, geom=c('point', 'smooth'), method="lm", formula=y~x)
data$cluster <- as.factor(D$classification)
data$temp <- as.factor(data$temp)
data$conv <- as.factor(data$conv)
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=conv))
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=temp))
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=linear_accuracy)) + scale_colour_viridis()
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=min_size)) + scale_colour_viridis()
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=max_size)) + scale_colour_viridis()
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=max_over_min)) + scale_colour_viridis()
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=median_size)) + scale_colour_viridis() + ggtitle('Optimal clustering of accuracy data') + theme(plot.title = element_text(hjust = 0.5))
ggsave('clusters_median.png', units='in', width=12, height=8)
ggplot(data) + geom_point(aes(x=degree_of_convexity, y=accuracy, shape=cluster, colour=mean_size)) + scale_colour_viridis()
no_bad_cluster <- data[which(data$cluster != 3), ]
regress <- lm(accuracy ~ degree_of_convexity, data=no_bad_cluster)
summary(regress)