Skip to content

Commit

Permalink
Merge pull request #1057 from wadpac/issue1056-ad-hoc-header
Browse files Browse the repository at this point in the history
fix handling of ad hoc csv file header
  • Loading branch information
vincentvanhees authored Feb 22, 2024
2 parents c6c7375 + bd129f9 commit 4f38ec0
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 47 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# CHANGES IN GGIR VERSION 3.0-6

- Part 1: Fix handling of ad hoc csv file header in g.inspectfile() #1057

- Part 1: Improve g.calibrate to better handle scenario when no non-movement periods are found in the entire recording #1032

Expand Down
2 changes: 1 addition & 1 deletion R/check_params.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ check_params = function(params_sleep = c(), params_metrics = c(),
numeric_params = c("chunksize", "spherecrit", "minloadcrit", "minimumFileSizeMB", "dynrange",
"rmc.col.acc", "interpolationType",
"rmc.firstrow.acc", "rmc.firstrow.header", "rmc.header.length",
"rmc.col.temp", "rmc.col.time", "rmc.bitrate", "rmc.dynamic_range",
"rmc.col.temp", "rmc.col.time",
"rmc.sf", "rmc.col.wear", "rmc.noise", "frequency_tol", "rmc.scalefactor.acc")
boolean_params = c("printsummary", "do.cal", "rmc.unsignedbit", "rmc.check4timegaps", "rmc.doresample",
"imputeTimegaps")
Expand Down
10 changes: 0 additions & 10 deletions R/g.getmeta.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,6 @@ g.getmeta = function(datafile, params_metrics = c(), params_rawdata = c(),
params_general = params$params_general
params_cleaning = params$params_cleaning
}
#get input variables
if (length(input) > 0) {
for (i in 1:length(names(input))) {
txt = paste0(names(input)[i], "=", input[i])
if (is(unlist(input[i]), "character")) {
txt = paste0(names(input)[i], "='", unlist(input[i]), "'")
}
eval(parse(text = txt))
}
}

metrics2do = data.frame(do.bfen = params_metrics[["do.bfen"]],
do.enmo = params_metrics[["do.enmo"]],
Expand Down
40 changes: 10 additions & 30 deletions R/g.inspectfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,6 @@ g.inspectfile = function(datafile, desiredtz = "", params_rawdata = c(),
rm(params)
}

#get input variables (relevant when read.myacc.csv is used
if (length(input) > 0) {
for (i in 1:length(names(input))) {
txt = paste0(names(input)[i], "=", input[i])
if (is(unlist(input[i]), "character")) {
txt = paste0(names(input)[i], "='", unlist(input[i]), "'")
}
eval(parse(text = txt))
}
}

# note that if the file is an RData file then this function will not be called
# the output of this function for the original datafile is stored inside the RData file in the form of object I
getbrand = function(filename = c(), datafile = c()) {
Expand Down Expand Up @@ -208,16 +197,15 @@ g.inspectfile = function(datafile, desiredtz = "", params_rawdata = c(),
rmc.scalefactor.acc = params_rawdata[["rmc.scalefactor.acc"]],
desiredtz = desiredtz,
configtz = configtz)
if (Pusercsvformat$header == "no header" || is.null(Pusercsvformat$header$sample_rate)) {

if (class(Pusercsvformat$header) == "character" && Pusercsvformat$header == "no header") {
sf = params_rawdata[["rmc.sf"]]
if (is.null(sf)) {
stop("\nFile header doesn't specify sample rate. Please provide rmc.sf value to process ", datafile)
} else if (sf == 0) {
stop("\nFile header doesn't specify sample rate. Please provide a non-zero rmc.sf value to process ", datafile)
}
} else {
sf = Pusercsvformat$header$sample_rate
sf = as.numeric(Pusercsvformat$header["sample_rate",1])
}
if (is.null(sf) || is.na(sf)) {
stop("\nFile header doesn't specify sample rate. Please provide rmc.sf value to process ", datafile)
} else if (sf == 0) {
stop("\nFile header doesn't specify sample rate. Please provide a non-zero rmc.sf value to process ", datafile)
}
}

Expand Down Expand Up @@ -264,15 +252,7 @@ g.inspectfile = function(datafile, desiredtz = "", params_rawdata = c(),
H = PP$header

} else if (dformat == FORMAT$AD_HOC_CSV) { # csv data in a user-specified format

H = header = Pusercsvformat$header
if (Pusercsvformat$header != "no header") {
H = data.frame(name = row.names(header), value = header, stringsAsFactors = TRUE)
}
sf = params_rawdata[["rmc.sf"]]
if (sf == 0) {
stop("\nPlease provide a non-zero rmc.sf value to process ", datafile)
}
header = Pusercsvformat$header
} else if (dformat == FORMAT$GT3X) { # gt3x
info = try(expr = {read.gt3x::parse_gt3x_info(datafile, tz = desiredtz)},silent = TRUE)
if (inherits(info, "try-error") == TRUE || is.null(info)) {
Expand Down Expand Up @@ -300,7 +280,7 @@ g.inspectfile = function(datafile, desiredtz = "", params_rawdata = c(),
stop(paste0("\nSample frequency not recognised in ", datafile), call. = FALSE)
}

if (is.null(sf) == FALSE) {
if (dformat != FORMAT$AD_HOC_CSV && is.null(sf) == FALSE) {
H = as.matrix(H)
if (ncol(H) == 3 && dformat == FORMAT$CSV && mon == MONITOR$ACTIGRAPH) {
if (length(which(is.na(H[,2]) == FALSE)) == 0) {
Expand Down Expand Up @@ -333,7 +313,7 @@ g.inspectfile = function(datafile, desiredtz = "", params_rawdata = c(),
if ((mon == MONITOR$GENEACTIV && dformat == FORMAT$BIN) || (mon == MONITOR$MOVISENS && length(H) > 0)) {
varname = rownames(as.matrix(H))
H = data.frame(varname = varname,varvalue = as.character(H), stringsAsFactors = TRUE)
} else {
} else if (dformat != FORMAT$AD_HOC_CSV) {
if (length(H) > 1 && class(H)[1] == "matrix") H = data.frame(varname = H[,1],varvalue = H[,2], stringsAsFactors = TRUE)
}
}
Expand Down
7 changes: 4 additions & 3 deletions R/read.myacc.csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ read.myacc.csv = function(rmc.file=c(), rmc.nrow=Inf, rmc.skip=c(), rmc.dec=".",
dec = rmc.dec, showProgress = FALSE, header = FALSE,
blank.lines.skip = TRUE,
data.table=FALSE, stringsAsFactors=FALSE)
validrows = which(is.na(header_tmp[,1]) == FALSE & header_tmp[,1] != "")
header_tmp = header_tmp[validrows,1:2]

options(warn = 0)
if (length(rmc.header.structure) != 0) { # header is stored in 1 column, with strings that need to be split
if (length(header_tmp) == 1) { # one header item
Expand Down Expand Up @@ -119,8 +122,6 @@ read.myacc.csv = function(rmc.file=c(), rmc.nrow=Inf, rmc.skip=c(), rmc.dec=".",
header = header_tmp2
} else { # column 1 is header name, column 2 is header value
colnames(header_tmp) = NULL
validrows = which(is.na(header_tmp[,1]) == FALSE & header_tmp[,1] != "")
header_tmp = header_tmp[validrows,1:2]
header_tmp2 = as.data.frame(header_tmp[,2], stringsAsFactors = FALSE)
row.names(header_tmp2) = header_tmp[,1]
colnames(header_tmp2) = NULL
Expand All @@ -142,7 +143,7 @@ read.myacc.csv = function(rmc.file=c(), rmc.nrow=Inf, rmc.skip=c(), rmc.dec=".",
# first see if maybe sf *is* in the header, just not under the rmc.headername.sf name
sf = as.numeric(header[which(row.names(header) == "sample_rate"),1])
# if sf isn't in the header under the default name either, then use the default value
if (is.na(sf)) {
if (is.na(sf) && !is.null(rmc.sf)) {
sf = rmc.sf
header = rbind(header, sf) # add it also to the header
row.names(header)[nrow(header)] = "sample_rate"
Expand Down
149 changes: 147 additions & 2 deletions tests/testthat/test_greadaccfile.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
library(GGIR)
context("g.readaccfile")
test_that("g.readaccfile and g.inspectfile can read movisens, gt3x, cwa, Axivity csv, and actigraph csv files correctly", {
test_that("g.readaccfile and g.inspectfile can read movisens, gt3x, cwa, Axivity csv, actigraph csv, and ad-hoc csv files correctly", {
skip_on_cran()

desiredtz = "Pacific/Auckland"
Expand Down Expand Up @@ -218,11 +218,25 @@ test_that("g.readaccfile and g.inspectfile can read movisens, gt3x, cwa, Axivity
timestamps = as.POSIXlt(x, origin="1970-1-1", tz = configtz)
mydata = data.frame(Xcol = rnorm(N), timecol = timestamps, Ycol = rnorm(N), Zcol = rnorm(N),
tempcol = rnorm(N) + 20)
testfile = "testcsv1.csv"
testfile = "testcsv.csv"
on.exit({if (file.exists(testfile)) file.remove(testfile)}, add = TRUE)

write.csv(mydata, file = testfile, row.names = FALSE)

# check that for files with no header, g.inspectfile() errors out if sampling rate is not specified as rmc.sf, or if rmc.sf == 0
expect_error(g.inspectfile(testfile,
rmc.dec=".", rmc.unit.time="POSIX",
rmc.firstrow.acc = 1, rmc.firstrow.header=c(),
rmc.col.acc = c(1,3,4), rmc.col.temp = 5, rmc.col.time=2,
rmc.unit.acc = "g", rmc.unit.temp = "C", rmc.origin = "1970-01-01"),
regexp = "File header doesn't specify sample rate. Please provide rmc.sf value to process")
expect_error(g.inspectfile(testfile,
rmc.dec=".", rmc.sf=0, rmc.unit.time="POSIX",
rmc.firstrow.acc = 1, rmc.firstrow.header=c(),
rmc.col.acc = c(1,3,4), rmc.col.temp = 5, rmc.col.time=2,
rmc.unit.acc = "g", rmc.unit.temp = "C", rmc.origin = "1970-01-01"),
regexp = "File header doesn't specify sample rate. Please provide a non-zero rmc.sf value to process")

AHcsv = g.inspectfile(testfile,
rmc.dec=".", rmc.sf=30, rmc.unit.time="POSIX",
rmc.firstrow.acc = 1, rmc.firstrow.header=c(),
Expand Down Expand Up @@ -289,6 +303,137 @@ test_that("g.readaccfile and g.inspectfile can read movisens, gt3x, cwa, Axivity
expect_equal(nrow(csv_read4$P$data), 3000)
expect_equal(sum(csv_read3$P$data[c("x","y","z")]), sum(csv_read4$P$data[c("x","y","z")]), tolerance = .01, scale = 1)

# Create test file: 2-column header, with time,
# but sample rate not specified in the header

N = 6000
sf = 30
x = Sys.time()+((0:(N-1))/sf)
timestamps = as.POSIXlt(x, origin="1970-1-1", tz = configtz)
mydata = data.frame(Xcol = rnorm(N), timecol = timestamps, Ycol = rnorm(N), Zcol = rnorm(N))
S1 = as.matrix(mydata)

hd_NR = 10
hd = matrix("", hd_NR + 1, ncol(S1))
hd[1, 1:2] = c("ID","12345")
hd[2, 1:2] = c("serial_number","30")
hd[3, 1:2] = c("bit","8")
hd[4, 1:2] = c("dynamic_range","6")

S1 = rbind(hd, S1)
S1[hd_NR + 1,] = colnames(S1)
colnames(S1) = NULL

testfile_two_col = "testcsv2col.csv"
on.exit({if (file.exists(testfile_two_col)) file.remove(testfile_two_col)}, add = TRUE)
write.table(S1, file = testfile_two_col, col.names = FALSE, row.names = FALSE)

# Create test file: 1-column header, with time,
# but sample rate not specified in the header
S1 = as.matrix(mydata)
hd = matrix("", hd_NR + 1, ncol(S1))
hd[1, 1:2] = c("ID: 12345", "")
hd[2, 1:2] = c("serial_number: 4321", "")
hd[3, 1:2] = c("bit: 8", "")
hd[4, 1:2] = c("dynamic_range: 6", "")

S1 = as.matrix(mydata)
S1 = rbind(hd, S1)
S1[hd_NR + 1,] = colnames(S1)
colnames(S1) = NULL

testfile_one_col = "testcsv1col.csv"
on.exit({if (file.exists(testfile_one_col)) file.remove(testfile_one_col)}, add = TRUE)
write.table(S1, file = testfile_one_col, col.names = FALSE, row.names = FALSE)

for (testfile in c(testfile_one_col, testfile_two_col)) {
# check that for a file whose header doesn't specify sampling rate,
# g.inspectfile() errors out if sampling rate is not specified as rmc.sf, or if rmc.sf==0
expect_error(g.inspectfile(testfile,
rmc.dec=".", rmc.unit.time="POSIX",
rmc.firstrow.acc = 11, rmc.firstrow.header = 1,
rmc.col.acc = c(1,3,4), rmc.col.time=2,
rmc.unit.acc = "g", rmc.origin = "1970-01-01"),
regexp = "File header doesn't specify sample rate. Please provide rmc.sf value to process")
expect_error(g.inspectfile(testfile,
rmc.dec=".", rmc.sf = 0, rmc.unit.time="POSIX",
rmc.firstrow.acc = 11, rmc.firstrow.header = 1,
rmc.col.acc = c(1,3,4), rmc.col.time=2,
rmc.unit.acc = "g", rmc.origin = "1970-01-01"),
regexp = "File header doesn't specify sample rate. Please provide a non-zero rmc.sf value to process")

# check that for a file whose header doesn't specify sampling rate,
# g.inspectfile() returns sf == rmc.sf if the latter was specified
I = g.inspectfile(testfile,
rmc.dec=".", rmc.sf = 80, rmc.unit.time="POSIX",
rmc.firstrow.acc = 11, rmc.firstrow.header = 1,
rmc.col.acc = c(1,3,4), rmc.col.time=2,
rmc.unit.acc = "g", rmc.origin = "1970-01-01")
expect_equal(I$sf, 80)
}

# Create test file: 2-column header, with temperature, with time,
# and sample rate correctly specified in the header
S1 = as.matrix(mydata)
hd_NR = 10
hd = matrix("", hd_NR + 1, ncol(S1))
hd[1, 1:2] = c("ID","12345")
hd[2, 1:2] = c("sample_freq","40")
hd[3, 1:2] = c("serial_number","9876")
hd[4, 1:2] = c("bit","8")
hd[5, 1:2] = c("dynamic_range","6")
S1 = as.matrix(mydata)
S1 = rbind(hd, S1)
S1[hd_NR + 1,] = colnames(S1)
colnames(S1) = NULL

testfile_two_col = "testcsv2col.csv"
on.exit({if (file.exists(testfile_two_col)) file.remove(testfile_two_col)}, add = TRUE)
write.table(S1, file = testfile_two_col, col.names = FALSE, row.names = FALSE)

# Create test file: 1-column header, with time,
# and sample rate not specified in the header
S1 = as.matrix(mydata)
hd = matrix("", hd_NR + 1, ncol(S1))
hd[1, 1:2] = c("ID: 12345", "")
hd[2, 1:2] = c("sample_freq: 40", "")
hd[3, 1:2] = c("serial_number: 4321", "")
hd[4, 1:2] = c("bit: 8", "")
hd[5, 1:2] = c("dynamic_range: 6", "")
S1 = rbind(hd, S1)
S1[hd_NR + 1,] = colnames(S1)
colnames(S1) = NULL

testfile_one_col = "testcsv1col.csv"
on.exit({if (file.exists(testfile_one_col)) file.remove(testfile_one_col)}, add = TRUE)
write.table(S1, file = testfile_one_col, col.names = FALSE, row.names = FALSE)

for (csvData in list(list(testfile_one_col, ": "),
list(testfile_two_col, c()))) {
# check that g.inspectfile() returns sf value that was specified in the header, even if rmc.sf was also specified
I = g.inspectfile(csvData[[1]],
rmc.dec=".", rmc.sf = 80, rmc.headername.sf = "sample_freq",
rmc.unit.time="POSIX",
rmc.firstrow.acc = 11, rmc.firstrow.header=1,
rmc.col.acc = c(1,3,4), rmc.col.time=2,
rmc.unit.acc = "g", rmc.origin = "1970-01-01",
rmc.headername.sn = "serial_number",
rmc.headername.recordingid = "ID",
rmc.bitrate = "bit", rmc.dynamic_range = "dynamic_range",
rmc.header.structure = csvData[[2]])

expect_equal(I$sf, 40)

# check that g.inspectfile() correctly reads the sf value from the header
I = g.inspectfile(csvData[[1]],
rmc.dec=".", rmc.headername.sf = "sample_freq",
rmc.unit.time="POSIX",
rmc.firstrow.acc = 11, rmc.firstrow.header=1,
rmc.col.acc = c(1,3,4), rmc.col.time=2,
rmc.unit.acc = "g", rmc.origin = "1970-01-01",
rmc.header.structure = csvData[[2]])
expect_equal(I$sf, 40)
}
# test decimal separator recognition extraction
decn = g.dotorcomma(Ax3CwaFile,dformat = FORMAT$CWA, mon = MONITOR$AXIVITY, desiredtz = desiredtz)
expect_equal(decn,".")
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test_read.myacc.csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -265,14 +265,15 @@ test_that("read.myacc.csv can handle header and bit-value acceleration", {
rmc.format.time = "%Y-%m-%d %H:%M:%OS",
rmc.origin = "1970-01-01",
desiredtz = "Europe/London",
rmc.sf = sf,
rmc.headername.sf = "sample_frequency",
rmc.headername.sn = "serial_number",
rmc.headername.recordingid = "ID")
expect_that(nrow(D1$data), equals(20))
expect_that(ncol(D1$data), equals(5))
expect_that(nrow(D1$header), equals(5))
expect_that(ncol(D1$header), equals(1))
expect_equal(as.numeric(D1$header["sample_rate",1]), 30)

# Test 2 - 2 column header, bit-valued acceleration
D2 = read.myacc.csv(rmc.file = testfile[2], rmc.nrow = 20, rmc.dec = ".",
rmc.firstrow.acc = 11, rmc.firstrow.header = 1,
Expand Down

0 comments on commit 4f38ec0

Please sign in to comment.