From d240221b1296281478c2774a1ed94b9c964eaf29 Mon Sep 17 00:00:00 2001 From: Matthieu Lesnoff Date: Sun, 12 Jan 2025 19:27:29 +0100 Subject: [PATCH] spca --- Project.toml | 2 +- src/snipals_shen.jl | 2 +- src/spca.jl | 37 +++++++++++++------------------------ 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/Project.toml b/Project.toml index fe19fa44..64b368d0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Jchemo" uuid = "fbca9394-dd0a-4d1c-b066-ae75f6ef1ad5" authors = ["Matthieu Lesnoff "] -version = "0.8.0" +version = "0.8.1" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" diff --git a/src/snipals_shen.jl b/src/snipals_shen.jl index fb9ccdff..348976dc 100644 --- a/src/snipals_shen.jl +++ b/src/snipals_shen.jl @@ -11,7 +11,7 @@ function snipals_shen(X; kwargs...) res = nipals(X; kwargs...) u = res.u u0 = similar(u) - v = similar(X, p) #res.v * res.sv # = 'v_tild' in Shen et al. 2008 + v = similar(X, p) # = 'v_tild' in Shen et al. 2008 absv = similar(v) ind = list(Int64, p) sel = list(Int64, nvar) diff --git a/src/spca.jl b/src/spca.jl index 3205d4bc..28520b21 100644 --- a/src/spca.jl +++ b/src/spca.jl @@ -22,32 +22,25 @@ Keyword arguments: sPCA-rSVD algorithm (regularized low rank matrix approximation) of Shen & Huang 2008. -The algorithm computes each loadings vector iteratively, by an alternating -LS regression (Nipals) including a step of thresholding. Function `spca` provides -thresholding methods '1' and '2' (`:soft` and `:hard`) reported in Shen & Huang -2008 Lemma 2: +The algorithm computes each loadings vector iteratively, by alternating +least squares regressions (Nipals) including a step of thresholding. Function +`spca` provides thresholding methods '1' and '2' (`:soft` and `:hard`) reported +in Shen & Huang 2008 Lemma 2: * The tuning parameter used by Shen & Huang 2008 is the number of null elements - in the loadings vector, referred to as degree of sparsity. The present - function `spca` uses the number of non-zero elements (`nvar`), equal to - p - degree of sparsity. + in the loadings vector, referred to as degree of sparsity. Conversely, the + present function `spca` uses the number of non-zero elements (`nvar`), + equal to p - degree of sparsity. * See the code of function `snipals_shen` for details on how is computed - the cutoff 'lambda' (Shen & Huang 2008) used inside the thresholding - function, given a value for `nvar`. It follows the strategy given in - Shen & Huang 2008 section 2. Differences from other softwares may occur + the cutoff 'lambda' used inside the thresholding function (Shen & Huang 2008), + given a value for `nvar`. Differences from other softwares may occur when there are tied values in the loadings vector (depending on the choices - made when computing the quantiles). + of method used to compute quantiles). Shen & Huang 2008 do not decsribe how they deflate matrix `X` after a given PC is computed. The present function `spca` does a regression of the `X`-columns -on the score vector `t`, as it is done in function `spca` of the R Package -`mixOmics` (note however that the `mixOmics` function uses a variant of the Nipals -proposed by Shen & Hunag, that gives different results). - -For the first PC (and when there are not tied values in the loadings vector), -the present function `spca` gives the same result as function `sPCA_rSVD` of -the R package `ltsspca`. However, the `ltsspca` fonction deflates matrix `X` by -regressing the `X`-rows on the loadings vector `v`. Therefore, results differ -from the second PC. +on the score vector `t`. When `meth = :soft`, the function gives the same result as +function `spca` of the R package `mixOmics` (except possibly when there are many tied +values in the loadings vectors). The computed sparse loadings vectors (`V`-columns) are in general non orthogonal. Therefore, there is no a unique decomposition of the variance of `X` such as in PCA. @@ -72,10 +65,6 @@ analysis via regularized low rank matrix approximation. Journal of Multivariate Analysis 99, 1015–1034. https://doi.org/10.1016/j.jmva.2007.06.007 -Wang Y. , Van Aelst S., Cevallos Valdiviezo H., Reynkens T. 2019. -ltsspca: Sparse Principal Component Based on Least Trimmed Squares. -Version 0.1.0. https://cran.r-project.org/web/packages/ltsspca - ## Examples ```julia using Jchemo, JchemoData, JLD2