Skip to content

Commit

Permalink
v1.15.3 SMap remove warning. Allow whitespace in column names.
Browse files Browse the repository at this point in the history
  • Loading branch information
SoftwareLiteracy committed Dec 1, 2023
1 parent 3955b66 commit 38af381
Show file tree
Hide file tree
Showing 19 changed files with 224 additions and 97 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rEDM
Type: Package
Title: Empirical Dynamic Modeling ('EDM')
Version: 1.15.1
Date: 2023-10-27
Version: 1.15.3
Date: 2023-12-01
Authors@R: c( person("Joseph", "Park", role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-5411-1409")),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
- `CCM()` `replacement` parameter removed.
- Legacy overload functions removed.
- Version 1.15.1 `ignoreNan` added in `PredictNonlinear()`. Replace unicode in pLot labels with mathplot expression. cppEDM initialize `nanFound` in DataFrame.h for UBSAN. Sync with cppEDM 1.15.1.
- Version 1.15.2 Allow `columns` names with spaces. If the `columns` argument is a string use the "," delimiter to separate names. Remove `SMap` warning for disjoint library.
- Version 1.15.3 Allow `columns` and `target` names with spaces in CCM.

##### Version 1.14
- cppEDM core added `generateLibrary` parameter to `Simplex()` and `SMap()`. If `TRUE` the state-space library has newly generated points added. Not available due to Rcpp 20 parameter limit.
Expand Down
53 changes: 44 additions & 9 deletions R/EDM.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ Embed = function( path = "./",
}

# If columns are vectors/list, convert to string for cppEDM
if ( is.vector( columns ) || is.list( columns ) ) {
columns = paste( columns, collapse = " " )
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns, "," )
}

# Mapped to Embed_rcpp() (Embed.cpp) in RcppEDMCommon.cpp
Expand Down Expand Up @@ -102,14 +103,18 @@ Simplex = function( pathIn = "./",
}

# If lib, pred, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( lib ) || length( lib ) > 1 ) {
lib = FlattenToString( lib )
}
if ( ! is.character( pred ) || length( pred ) > 1 ) {
pred = FlattenToString( pred )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}

# NOTE: Rcpp has a 20 argument limit!
Expand Down Expand Up @@ -196,14 +201,18 @@ SMap = function( pathIn = "./",
}

# If lib, pred, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( lib ) || length( lib ) > 1 ) {
lib = FlattenToString( lib )
}
if ( ! is.character( pred ) || length( pred ) > 1 ) {
pred = FlattenToString( pred )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}

# NOTE: Rcpp has a 20 argument limit!
Expand Down Expand Up @@ -287,14 +296,18 @@ Multiview = function( pathIn = "./",
}

# If lib, pred, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( lib ) || length( lib ) > 1 ) {
lib = FlattenToString( lib )
}
if ( ! is.character( pred ) || length( pred ) > 1 ) {
pred = FlattenToString( pred )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}

# NOTE: Rcpp has a 20 argument limit!
Expand Down Expand Up @@ -390,11 +403,21 @@ CCM = function( pathIn = "./",
}

# If libSizes, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
# NOTE: CCM can have multiple target
if ( ! is.character( libSizes ) || length( libSizes ) > 1 ) {
libSizes = FlattenToString( libSizes )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( ! is.character( target ) || length( target ) > 1 ) {
columns = FlattenToString( target, "," )
}
else {
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}
}

# NOTE: Rcpp has a 20 argument limit!
Expand Down Expand Up @@ -493,14 +516,18 @@ EmbedDimension = function ( pathIn = "./",
}

# If lib, pred, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( lib ) || length( lib ) > 1 ) {
lib = FlattenToString( lib )
}
if ( ! is.character( pred ) || length( pred ) > 1 ) {
pred = FlattenToString( pred )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}

# Mapped to EmbedDimension_rcpp() (EmbedDim.cpp) in RcppEDMCommon.cpp
Expand Down Expand Up @@ -574,14 +601,18 @@ PredictInterval = function( pathIn = "./",
}

# If lib, pred, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( lib ) || length( lib ) > 1 ) {
lib = FlattenToString( lib )
}
if ( ! is.character( pred ) || length( pred ) > 1 ) {
pred = FlattenToString( pred )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}

# Mapped to PredictInterval_rcpp() (PredictInterval.cpp) in RcppEDMCommon.cpp
Expand Down Expand Up @@ -658,6 +689,7 @@ PredictNonlinear = function( pathIn = "./",
}

# If lib, pred, theta, columns are vectors/list, convert to string for cppEDM
# NOTE: columns joined on ',' to enable names with whitespace in cppEDM
if ( ! is.character( lib ) || length( lib ) > 1 ) {
lib = FlattenToString( lib )
}
Expand All @@ -668,7 +700,10 @@ PredictNonlinear = function( pathIn = "./",
theta = FlattenToString( theta )
}
if ( ! is.character( columns ) || length( columns ) > 1 ) {
columns = FlattenToString( columns )
columns = FlattenToString( columns, "," )
}
if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
target = paste0( target, ',' ) # space in target: add , for cppEDM
}

# Mapped to PredictNonlinear_rcpp() (PredictNL.cpp) in RcppEDMCommon.cpp
Expand Down
82 changes: 57 additions & 25 deletions R/EDM_AuxFuncs.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,38 @@ ComputeError = function( obs, pred ) {
#------------------------------------------------------------------------
#
#------------------------------------------------------------------------
FlattenToString = function( x ) {
# R is wonderful... is.vector( list() ) is TRUE is.list( data.frame ) TRUE
FlattenToString = function( x, delimiter = " " ) {
# R is Bizarre... does not have a consistent type system
# is.vector( list() ) is TRUE; is.list( data.frame() ) is TRUE
# length( 'xxx' ) is 1; length( c('xxx' ) ) is 1
# nchar ( 'xxx' ) is 3; nchar ( c('xxx' ) ) is 3
# Test for data.frame or matrix first, then list, then vector
# or, use class string as selector
if ( is.data.frame( x ) || is.matrix( x ) ) {
s = ""
for( row in 1:nrow( x ) ) {
s = paste( s, paste( x[row,], collapse = " " ), collapse = " " )
s = paste( s, paste( x[row,], collapse = delimiter ),
collapse = delimiter )
}
}
else if ( is.list( x ) ) {
s = paste( unlist( x ), collapse = " " )
s = paste( unlist( x ), collapse = delimiter )
}
else if ( is.vector( x ) ) {
s = paste( x, collapse = " " )
s = paste( x, collapse = delimiter )
}
else {
s = x
}

return ( s )
}

#------------------------------------------------------------------------
# Validate dataFrame, or load dataFile and create dataFrame to validate
#------------------------------------------------------------------------
ValidateDataFrame = function( pathIn, dataFile, dataFrame,
columns, target, noTime ) {
columns, target, noTime,
verbose = FALSE ) {

if ( nchar( dataFile ) ) {
# Shame to read the data just for this... anti Big Data. R fails anyway.
Expand All @@ -61,10 +65,35 @@ ValidateDataFrame = function( pathIn, dataFile, dataFrame,
print( "Error: ValidateDataFrame(): dataFrame is not valid." )
return( FALSE )
}
if ( verbose ) {
print( "ValidateDataFrame(): dataFrame is valid." )
}

columnNames = names( df ) # Names from data.frame itself

# Names from API input columns and target
# Is there ',' in API string for name with whitespace?
# regex for multiple whitespace : "\\s+"
if ( length( columns ) > 1 ) {
columnVec = columns # Vector of strings passed in columns, use as-is
}
else {
if ( TRUE %in% grepl( ",", columns ) ) { regex_delimiters = ",+" }
else { regex_delimiters = "\\s+" }
columnVec = strsplit( trimws( columns ), regex_delimiters )[[1]]
}

columnNames = names( df )
columnVec = strsplit( trimws( columns ), "\\s+" )[[1]] # split on whitespace
targetVec = strsplit( trimws( target ), "\\s+" )[[1]] # split on whitespace
if ( length( target ) > 1 ) {
columnVec = columns # Vector of strings passed in target, use as-is
}
else {
if ( TRUE %in% grepl( ",", target ) ) {
targetVec = strsplit( trimws( columns ), ",+" ) # CCM can have multiple
}
else {
targetVec = c( target ) # No ',' in target string, take as-is
}
}

for ( target in targetVec ) {
if ( length( df[,target] ) == 0 ) {
Expand All @@ -79,7 +108,7 @@ ValidateDataFrame = function( pathIn, dataFile, dataFrame,
return( FALSE )
}
}

for ( column in columnVec ) {
if ( length( df[,column] ) == 0 ) {
print( paste("Error: ValidateDataFrame(): Column", column, "is empty."))
Expand All @@ -94,6 +123,9 @@ ValidateDataFrame = function( pathIn, dataFile, dataFrame,
}
}

if ( verbose ) {
print( "ValidateDataFrame(): dataFrame validated." )
}
return( TRUE )
}

Expand Down Expand Up @@ -164,18 +196,18 @@ PlotObsPred = function( df,
# stats: {'MAE': 0., 'RMSE': 0., 'rho': 0. }
stats = ComputeError( df $ Observations,
df $ Predictions )

title = paste( "\nE=", E, " Tp=", Tp,
" rho=", round( stats[['rho']], 2 ),
" RMSE=", round( stats[['RMSE']], 2 ) )

plot( time, df $ Observations, main = title,
xlab = names(df)[1], ylab = "",
type = "l", col = "blue", lwd = 3,
cex.axis = 1.3, cex.lab = 1.3 )

lines( time, df $ Predictions, col = "red", lwd = 3 )

legend( 'topright', c( "Predictions", "Observations" ),
fill = c('red', 'blue' ), bty = 'n', cex = 1.2 )
}
Expand Down Expand Up @@ -204,7 +236,7 @@ PlotSmap = function( SmapList,
print( "PlotSmap: expected at least 3 columns in predictions." )
return( 0 )
}

# Try to convert first column to Date or POSIXlt or numeric
time = NULL
if ( is.numeric( p[,1] ) ) {
Expand All @@ -225,18 +257,18 @@ PlotSmap = function( SmapList,
}

numCoeff = ncol( c ) - 1

old.par = par( no.readonly = TRUE )

par( mfrow = c( numCoeff + 1, 1 ), mar = c( 3.5, 4, 0.5, 1 ),
mgp = c( 1.5, 0.5, 0 ), cex.axis = 1.3, cex.lab = 1.3 )

# Observations & Predictions
plot( time, p $ Observations,
xlab = names(p)[1], ylab = "",
type = "l", col = "blue", lwd = 3,
cex.axis = 1.3, cex.lab = 1.3 )

lines( time, p $ Predictions, col = "red", lwd = 3 )
legend( 'topright', c( "Predictions", "Observations" ),
fill = c('red', 'blue' ), bty = 'n', cex = 1.5 )
Expand Down Expand Up @@ -273,7 +305,7 @@ SurrogateData = function(
method = c("random_shuffle", "ebisuzaki", "seasonal"),
num_surr = 100, T_period = 1, alpha = 0 )
{

method = match.arg(method)
if( method == "random_shuffle" ) {
return( sapply( 1:num_surr, function(i) {
Expand All @@ -284,16 +316,16 @@ SurrogateData = function(
if( any( ! is.finite(ts) ) ) {
stop("SurrogateData(): input time series contained invalid values")
}

n = length(ts)
n2 = floor(n/2)

mu = mean(ts)
sigma = sd(ts)
a = fft(ts)
amplitudes = abs(a)
amplitudes[1] = 0

return( sapply(1:num_surr, function(i) {
if(n %% 2 == 0) # even length
{
Expand All @@ -318,7 +350,7 @@ SurrogateData = function(
if( any(!is.finite(ts)) ) {
stop("SurrogateData(): input time series contained invalid values")
}

n = length(ts)
I_season = suppressWarnings( matrix( 1:T_period, nrow = n, ncol = 1 ) )

Expand Down
7 changes: 4 additions & 3 deletions man/CCM.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ time column rows.}
\item{exclusionRadius}{excludes vectors from the search space of nearest
neighbors if their relative time index is within exclusionRadius.}

\item{columns}{string of whitespace separated column name(s) in the
input data used to create the library.}
\item{columns}{string of whitespace separated column name(s), or vector
of column names used to create the library. If individual column names
contain whitespace place names in a vector, or, append ',' to the name.}

\item{target}{column name in the input data used for prediction.}
\item{target}{column name used for prediction.}

\item{libSizes}{string of 3 whitespace separated integer values
specifying the intial library size, the final library size,
Expand Down
5 changes: 3 additions & 2 deletions man/Embed.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ index or time values. The columns must be named. One of
\item{tau}{integer time delay embedding lag specified as number of
time column rows.}

\item{columns}{string of whitespace separated column name(s) in the
input data to be embedded.}
\item{columns}{string of whitespace separated column name(s), or vector
of column names used to create the library. If individual column names
contain whitespace place names in a vector, or, append ',' to the name.}

\item{verbose}{logical to produce additional console reporting.}
}
Expand Down
Loading

0 comments on commit 38af381

Please sign in to comment.