v1.15.3 SMap remove warning. Allow whitespace in column names.

SugiharaLab · Dec 1, 2023 · 38af381 · 38af381
1 parent 3955b66
commit 38af381
Show file tree

Hide file tree

Showing 19 changed files with 224 additions and 97 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: rEDM
 Type:    Package
 Title:   Empirical Dynamic Modeling ('EDM')
-Version: 1.15.1
-Date:    2023-10-27
+Version: 1.15.3
+Date:    2023-12-01
 Authors@R: c( person("Joseph", "Park", role = c("aut", "cre"),
                      email = "[email protected]",
                      comment = c(ORCID = "0000-0001-5411-1409")),

diff --git a/NEWS.md b/NEWS.md
@@ -19,6 +19,8 @@
 - `CCM()` `replacement` parameter removed.
 - Legacy overload functions removed.
 - Version 1.15.1 `ignoreNan` added in `PredictNonlinear()`. Replace unicode in pLot labels with mathplot expression. cppEDM initialize `nanFound` in DataFrame.h for UBSAN. Sync with cppEDM 1.15.1.
+- Version 1.15.2 Allow `columns` names with spaces. If the `columns` argument is a string use the "," delimiter to separate names. Remove `SMap` warning for disjoint library.
+- Version 1.15.3 Allow `columns` and `target` names with spaces in CCM.
 
 ##### Version 1.14
 - cppEDM core added `generateLibrary` parameter to `Simplex()` and `SMap()`.  If `TRUE` the state-space library has newly generated points added. Not available due to Rcpp 20 parameter limit. 

diff --git a/R/EDM.R b/R/EDM.R
@@ -41,8 +41,9 @@ Embed = function( path      = "./",
   }
 
   # If columns are vectors/list, convert to string for cppEDM
-  if ( is.vector( columns ) || is.list( columns ) ) {
-    columns = paste( columns, collapse = " " )
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
+  if ( ! is.character( columns ) || length( columns ) > 1 ) {
+    columns = FlattenToString( columns, "," )
   }
 
   # Mapped to Embed_rcpp() (Embed.cpp) in RcppEDMCommon.cpp
@@ -102,14 +103,18 @@ Simplex = function( pathIn          = "./",
   }
 
   # If lib, pred, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
   if ( ! is.character( lib ) || length( lib ) > 1 ) {
     lib = FlattenToString( lib )
   }
   if ( ! is.character( pred ) || length( pred ) > 1 ) {
     pred = FlattenToString( pred )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+    target = paste0( target, ',' ) # space in target: add , for cppEDM
   }
 
   # NOTE: Rcpp has a 20 argument limit!
@@ -196,14 +201,18 @@ SMap = function( pathIn          = "./",
   }
 
   # If lib, pred, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
   if ( ! is.character( lib ) || length( lib ) > 1 ) {
     lib = FlattenToString( lib )
   }
   if ( ! is.character( pred ) || length( pred ) > 1 ) {
     pred = FlattenToString( pred )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+    target = paste0( target, ',' ) # space in target: add , for cppEDM
   }
 
   # NOTE: Rcpp has a 20 argument limit!
@@ -287,14 +296,18 @@ Multiview = function( pathIn          = "./",
   }
 
   # If lib, pred, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
   if ( ! is.character( lib ) || length( lib ) > 1 ) {
     lib = FlattenToString( lib )
   }
   if ( ! is.character( pred ) || length( pred ) > 1 ) {
     pred = FlattenToString( pred )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+    target = paste0( target, ',' ) # space in target: add , for cppEDM
   }
 
   # NOTE: Rcpp has a 20 argument limit!
@@ -390,11 +403,21 @@ CCM = function( pathIn          = "./",
   }
 
   # If libSizes, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
+  # NOTE: CCM can have multiple target
   if ( ! is.character( libSizes ) || length( libSizes ) > 1 ) {
     libSizes = FlattenToString( libSizes )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( ! is.character( target ) || length( target ) > 1 ) {
+    columns = FlattenToString( target, "," )
+  }
+  else {
+    if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+      target = paste0( target, ',' ) # space in target: add , for cppEDM
+    }
   }
 
   # NOTE: Rcpp has a 20 argument limit!
@@ -493,14 +516,18 @@ EmbedDimension = function ( pathIn          = "./",
   }
 
   # If lib, pred, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
   if ( ! is.character( lib ) || length( lib ) > 1 ) {
     lib = FlattenToString( lib )
   }
   if ( ! is.character( pred ) || length( pred ) > 1 ) {
     pred = FlattenToString( pred )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+    target = paste0( target, ',' ) # space in target: add , for cppEDM
   }
 
   # Mapped to EmbedDimension_rcpp() (EmbedDim.cpp) in RcppEDMCommon.cpp
@@ -574,14 +601,18 @@ PredictInterval = function( pathIn          = "./",
   }
 
   # If lib, pred, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
   if ( ! is.character( lib ) || length( lib ) > 1 ) {
     lib = FlattenToString( lib )
   }
   if ( ! is.character( pred ) || length( pred ) > 1 ) {
     pred = FlattenToString( pred )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+    target = paste0( target, ',' ) # space in target: add , for cppEDM
   }
 
   # Mapped to PredictInterval_rcpp() (PredictInterval.cpp) in RcppEDMCommon.cpp
@@ -658,6 +689,7 @@ PredictNonlinear = function( pathIn          = "./",
   }
 
   # If lib, pred, theta, columns are vectors/list, convert to string for cppEDM
+  # NOTE: columns joined on ',' to enable names with whitespace in cppEDM
   if ( ! is.character( lib ) || length( lib ) > 1 ) {
     lib = FlattenToString( lib )
   }
@@ -668,7 +700,10 @@ PredictNonlinear = function( pathIn          = "./",
     theta = FlattenToString( theta )
   }
   if ( ! is.character( columns ) || length( columns ) > 1 ) {
-    columns = FlattenToString( columns )
+    columns = FlattenToString( columns, "," )
+  }
+  if ( length( strsplit( target, ' ' )[[1]] ) > 1 ) {
+    target = paste0( target, ',' ) # space in target: add , for cppEDM
   }
 
   # Mapped to PredictNonlinear_rcpp() (PredictNL.cpp) in RcppEDMCommon.cpp

diff --git a/R/EDM_AuxFuncs.R b/R/EDM_AuxFuncs.R
@@ -14,34 +14,38 @@ ComputeError = function( obs, pred ) {
 #------------------------------------------------------------------------
 # 
 #------------------------------------------------------------------------
-FlattenToString = function( x ) {
-  # R is wonderful... is.vector( list() ) is TRUE is.list( data.frame ) TRUE
+FlattenToString = function( x, delimiter = " " ) {
+  # R is Bizarre... does not have a consistent type system
+  #   is.vector( list() ) is TRUE; is.list( data.frame() ) is TRUE
+  #   length( 'xxx' ) is 1; length( c('xxx' ) ) is 1
+  #   nchar ( 'xxx' ) is 3; nchar ( c('xxx' ) ) is 3
   # Test for data.frame or matrix first, then list, then vector
   # or, use class string as selector
   if ( is.data.frame( x ) || is.matrix( x ) ) {
     s = ""
     for( row in 1:nrow( x ) ) {
-      s = paste( s, paste( x[row,], collapse = " " ), collapse = " " )
+      s = paste( s, paste( x[row,], collapse = delimiter ),
+                 collapse = delimiter )
     }
   }
   else if ( is.list( x ) ) {
-    s = paste( unlist( x ),  collapse = " " )
+    s = paste( unlist( x ), collapse = delimiter )
   }
   else if ( is.vector( x ) ) {
-    s = paste( x,  collapse = " " )
+    s = paste( x,  collapse = delimiter )
   }
   else {
     s = x
   }
-
   return ( s )
 }
 
 #------------------------------------------------------------------------
 # Validate dataFrame, or load dataFile and create dataFrame to validate
 #------------------------------------------------------------------------
 ValidateDataFrame = function( pathIn, dataFile, dataFrame,
-                              columns, target, noTime ) {
+                              columns, target, noTime,
+                              verbose = FALSE ) {
 
   if ( nchar( dataFile ) ) {
     # Shame to read the data just for this... anti Big Data. R fails anyway.
@@ -61,10 +65,35 @@ ValidateDataFrame = function( pathIn, dataFile, dataFrame,
     print( "Error: ValidateDataFrame(): dataFrame is not valid." )
     return( FALSE )
   }
+  if ( verbose ) {
+    print( "ValidateDataFrame(): dataFrame is valid." )
+  }
+
+  columnNames = names( df ) # Names from data.frame itself
+
+  # Names from API input columns and target
+  # Is there ',' in API string for name with whitespace?
+  # regex for multiple whitespace : "\\s+"
+  if ( length( columns ) > 1 ) {
+    columnVec = columns # Vector of strings passed in columns, use as-is
+  }
+  else {
+    if ( TRUE %in% grepl( ",", columns ) ) { regex_delimiters = ",+"   }
+    else                                   { regex_delimiters = "\\s+" }
+    columnVec = strsplit( trimws( columns ), regex_delimiters )[[1]]
+  }
 
-  columnNames = names( df )
-  columnVec   = strsplit( trimws( columns ), "\\s+" )[[1]] # split on whitespace
-  targetVec   = strsplit( trimws( target ),  "\\s+" )[[1]] # split on whitespace
+  if ( length( target ) > 1 ) {
+    columnVec = columns # Vector of strings passed in target, use as-is
+  }
+  else {
+    if ( TRUE %in% grepl( ",", target ) ) {
+      targetVec = strsplit( trimws( columns ), ",+" ) # CCM can have multiple
+    }
+    else {
+      targetVec = c( target ) # No ',' in target string, take as-is
+    }
+  }
 
   for ( target in targetVec ) {
     if ( length( df[,target] ) == 0 ) {
@@ -79,7 +108,7 @@ ValidateDataFrame = function( pathIn, dataFile, dataFrame,
       return( FALSE )
     }
   }
-  
+
   for ( column in columnVec ) {
     if ( length( df[,column] ) == 0 ) {
       print( paste("Error: ValidateDataFrame(): Column", column, "is empty."))
@@ -94,6 +123,9 @@ ValidateDataFrame = function( pathIn, dataFile, dataFrame,
     }
   }
 
+  if ( verbose ) {
+    print( "ValidateDataFrame(): dataFrame validated." )
+  }
   return( TRUE )
 }
 
@@ -164,18 +196,18 @@ PlotObsPred = function( df,
   # stats: {'MAE': 0., 'RMSE': 0., 'rho': 0. }
   stats = ComputeError( df $ Observations,
                         df $ Predictions )
-  
+
   title = paste( "\nE=", E, " Tp=", Tp,
                  " rho=",  round( stats[['rho']],  2 ),    
                  " RMSE=", round( stats[['RMSE']], 2 ) )
-  
+
   plot( time, df $ Observations, main = title,
         xlab = names(df)[1], ylab = "",
         type = "l", col = "blue", lwd = 3,
         cex.axis = 1.3, cex.lab = 1.3 )
-  
+
   lines( time, df $ Predictions, col = "red", lwd = 3 )
-  
+
   legend( 'topright', c( "Predictions", "Observations" ), 
           fill = c('red', 'blue' ), bty = 'n', cex = 1.2 )
 }
@@ -204,7 +236,7 @@ PlotSmap = function( SmapList,
     print( "PlotSmap: expected at least 3 columns in predictions." )
     return( 0 )
   }
-  
+
   # Try to convert first column to Date or POSIXlt or numeric
   time = NULL
   if ( is.numeric( p[,1] ) ) {
@@ -225,18 +257,18 @@ PlotSmap = function( SmapList,
   }
 
   numCoeff = ncol( c ) - 1 
-  
+
   old.par = par( no.readonly = TRUE )
-  
+
   par( mfrow = c( numCoeff + 1, 1 ), mar = c( 3.5, 4, 0.5, 1 ),
        mgp = c( 1.5, 0.5, 0 ), cex.axis = 1.3, cex.lab = 1.3 )
-  
+
   # Observations & Predictions
   plot( time, p $ Observations,
         xlab = names(p)[1], ylab = "",
         type = "l", col = "blue", lwd = 3,
         cex.axis = 1.3, cex.lab = 1.3 )
-  
+
   lines( time, p $ Predictions, col = "red", lwd = 3 )
   legend( 'topright', c( "Predictions", "Observations" ), 
           fill = c('red', 'blue' ), bty = 'n', cex = 1.5 )
@@ -273,7 +305,7 @@ SurrogateData = function(
   method = c("random_shuffle", "ebisuzaki", "seasonal"), 
   num_surr = 100, T_period = 1, alpha = 0 )
 {
-  
+
   method = match.arg(method)
   if( method == "random_shuffle" ) {
     return( sapply( 1:num_surr, function(i) {
@@ -284,16 +316,16 @@ SurrogateData = function(
     if( any( ! is.finite(ts) ) ) {
       stop("SurrogateData(): input time series contained invalid values")
     }
-    
+
     n  = length(ts)
     n2 = floor(n/2)
-    
+
     mu    = mean(ts)
     sigma = sd(ts)
     a     = fft(ts)
     amplitudes    = abs(a)
     amplitudes[1] = 0
-    
+
     return( sapply(1:num_surr, function(i) {
       if(n %% 2 == 0) # even length
       {
@@ -318,7 +350,7 @@ SurrogateData = function(
     if( any(!is.finite(ts)) ) {
       stop("SurrogateData(): input time series contained invalid values")
     }
-    
+
     n = length(ts)
     I_season = suppressWarnings( matrix( 1:T_period, nrow = n, ncol = 1 ) )
 

diff --git a/man/CCM.Rd b/man/CCM.Rd
@@ -30,10 +30,11 @@ time column rows.}
 \item{exclusionRadius}{excludes vectors from the search space of nearest 
 neighbors if their relative time index is within exclusionRadius.}
 
-\item{columns}{string of whitespace separated column name(s) in the
-input data used to create the library.}
+\item{columns}{string of whitespace separated column name(s), or vector
+of column names used to create the library. If individual column names
+contain whitespace place names in a vector, or, append ',' to the name.}
 
-\item{target}{column name in the input data used for prediction.}
+\item{target}{column name used for prediction.}
 
 \item{libSizes}{string of 3 whitespace separated integer values
   specifying the intial library size, the final library size,

diff --git a/man/Embed.Rd b/man/Embed.Rd
@@ -23,8 +23,9 @@ index or time values. The columns must be named. One of
 \item{tau}{integer time delay embedding lag specified as number of
 time column rows.}
 
-\item{columns}{string of whitespace separated column name(s) in the
-input data to be embedded.}
+\item{columns}{string of whitespace separated column name(s), or vector
+of column names used to create the library. If individual column names
+contain whitespace place names in a vector, or, append ',' to the name.}
 
 \item{verbose}{logical to produce additional console reporting.}
 }