-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreport.Rmd
139 lines (109 loc) · 4.33 KB
/
report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
---
title: "Workflow results report"
output:
pdf_document:
fig_crop: no
fig_caption: no
params:
input: NA
original_data: NA
classes: NA
positives: NA
negatives: NA
port: NA
original_pca_data: NA
original_fisher_data: NA
after_fisher_pca_data: NA
after_fisher_data: NA
after_relieff_pca_data: NA
after_relieff_data: NA
after_plsda_perf: NA
after_splsda: NA
---
```{r, include=FALSE}
options(tinytex.verbose = FALSE)
library(knitr)
knit_engines$set(asis = function(options) {
if (options$echo && options$eval) knit_child(text = options$code)
})
knitr::opts_chunk$set(echo = FALSE)
```
## Initial state and Fisher filter
### Data
Number of variables: `r dim(params$original_data)[2]`\newline
Number of samples: `r dim(params$original_data)[1]`\newline
Classes: `r levels(factor(params$classes))` \newline
Number of samples of class "`r levels(factor(params$classes))[1]`": `r length(params$positives)`\newline
Number of samples of class "`r levels(factor(params$classes))[2]`": `r length(params$negatives)`\newline
Number of variables kept after Fisher: `r params$input$fisher_vars`\newline
### Plots
```{r}
plot(params$original_pca_data, main = 'Principal components with all variables')
```
```{r}
plot(params$original_fisher_data$to_plot, ylab = 'Score', main = 'Fisher scores')
abline(v = params$input$fisher_vars, col = 'red', lwd = 2)
```
```{r}
plot(params$after_fisher_pca_data, main = 'Principal components with the selected variables after Fisher scoring')
```
```{asis, echo=params$input$use_relieff, eval=params$input$use_relieff}
## ReliefF
### Data
Number of variables kept after ReliefF: `r params$input$relieff_vars`\newline
Number of nearest neighbors chosen: `r params$input$relieff_k`\newline
Number of iterations made: `r if (params$input$relieff_iters == 0) { params$input$fisher_vars } else if (params$input$relieff_iters == -1) { floor(log(params$input$fisher_vars)) } else { floor(sqrt(params$input$fisher_vars)) }`\newline
Algorithm chosen to calculate the scores: `r params$input$relieff_method`\newline
### Plots
```
```{r, eval=params$input$use_relieff}
plot(params$after_relieff_data$sorted_attrs, ylab = 'Score', main = 'ReliefF scores')
abline(v = params$input$relieff_vars, col = 'red', lwd = 2)
```
```{r, eval=(params$input$draw_relieff_heatmap && params$input$use_relieff)}
heatmap(params$after_relieff_data$data, main = paste('Heatmap of the', params$input$relieff_vars, 'variables after applying ReliefF'))
```
```{r, eval=params$input$use_relieff}
plot(params$after_relieff_pca_data, main = 'Principal components with the selected variables after ReliefF')
```
## sPLS-DA
### Data
Number of components to do the perfectioning: `r params$input$components`\newline
Number of cross validation folds performed: `r params$input$cv_folds`\newline
Number of cross validation repetitions performed: `r params$input$cv_repeats`\newline
### Plots
```{r}
matplot(params$after_plsda_perf$perf_plsda$error.rate$BER, type = 'l', lty = 1, col = color.mixo(1:3), main = 'Balanced Error Rate for amount of components', ylab = 'Balanced Error Rate')
legend('topright', c('max.dist', 'centroids.dist', 'mahalanobis.dist'), lty = 1, col = color.mixo(1:3))
```
```{r}
plot(params$after_plsda_perf$tune_splsda, col = color.jet(params$input$components), main = 'Error rates for number of components')
```
```{r, eval=params$input$draw_indiv_plot}
plotIndiv(params$after_splsda$splsda, comp = c(1, 2), ind.names = FALSE, legend = TRUE, ellipse = TRUE, title = 'sPLS-DA, final result, components 1 and 2')
```
```{r, eval=params$input$draw_auroc}
ncomp = params$after_plsda_perf$final_ncomp
for (i in seq(ncomp)){
auroc(params$after_splsda$splsda, roc.comp = i)
}
```
```{asis, echo=params$input$draw_cim, eval=params$input$draw_cim}
![](TFG_workflow/`r paste("cim", params$port, ".png", sep = "_")`)
```
```{r, eval=params$input$draw_loadings}
ncomp = params$after_plsda_perf$final_ncomp
for (i in seq(ncomp)){
plotLoadings(params$after_splsda$splsda, comp=i, contrib = 'max', method = 'mean', title = paste('Loadings of component', i))
}
```
### Selected variables
```{r}
vars_list = c()
ncomp = params$after_plsda_perf$final_ncomp
for (i in seq(ncomp)){
new_vars = selectVar(params$after_splsda$splsda, comp=i)$name
vars_list = append(vars_list, new_vars[which(!new_vars %in% vars_list)])
}
```
`r paste0(vars_list, collapse = "\\newline ")`