-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.R
72 lines (61 loc) · 2.05 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
library(tercen)
library(tercenApi)
library(dplyr)
ctx = tercenCtx()
if (!any(ctx$cnames == "documentId")) stop("Column factor documentId is required.")
# extract files
df <- ctx$cselect()
docId = df$documentId[1]
doc = ctx$client$fileService$get(docId)
filename = tempfile()
writeBin(ctx$client$fileService$download(docId), filename)
on.exit(unlink(filename))
# unzip if archive
if (length(grep(".zip", doc$name)) > 0) {
tmpdir <- tempfile()
unzip(filename, exdir = tmpdir)
f.names <- list.files(tmpdir, full.names = TRUE)
} else {
f.names <- filename
}
assign("actual", 0, envir = .GlobalEnv)
task = ctx$task
headers <- ctx$op.value('Headers', as.logical, TRUE)
separator <- ctx$op.value('Separator', as.character, "Tab")
force_merge <- ctx$op.value('Force', as.logical, FALSE)
separator <- case_when(
separator == "Comma" ~ ",",
separator == "Tab" ~ "\t"
)
# import files in Tercen
csv_list <- f.names %>%
lapply(function(filename) {
data <- read.csv(filename, header = headers, sep = separator)
if (!is.null(task)) {
# task is null when run from RStudio
actual = get("actual", envir = .GlobalEnv) + 1
assign("actual", actual, envir = .GlobalEnv)
evt = TaskProgressEvent$new()
evt$taskId = task$id
evt$total = length(f.names)
evt$actual = actual
evt$message = paste0('processing csv file ' , filename)
ctx$client$eventService$sendChannel(task$channelId, evt)
} else {
ctx$log(paste0('Processing CSV file: ' , filename))
}
data %>%
mutate(filename = rep_len(basename(filename), nrow(.)))
})
same_colnames <- all(sapply(csv_list, function(x) identical(colnames(x), colnames(csv_list[[1]]))))
if(!same_colnames & !force_merge) {
stop("All files must have strictly identical column names or the 'Force' option should be set to true.")
}
csv_list %>%
bind_rows() %>%
mutate_if(is.logical, as.character) %>%
mutate_if(is.integer, as.double) %>%
mutate(.ci = as.integer(rep_len(0, nrow(.)))) %>%
mutate(filename_of_zip = doc$name) %>%
ctx$addNamespace() %>%
ctx$save()