From d240221b1296281478c2774a1ed94b9c964eaf29 Mon Sep 17 00:00:00 2001
From: Matthieu Lesnoff <matthieu.lesnoff@gmail.com>
Date: Sun, 12 Jan 2025 19:27:29 +0100
Subject: [PATCH] spca

---
 Project.toml        |  2 +-
 src/snipals_shen.jl |  2 +-
 src/spca.jl         | 37 +++++++++++++------------------------
 3 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/Project.toml b/Project.toml
index fe19fa44..64b368d0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Jchemo"
 uuid = "fbca9394-dd0a-4d1c-b066-ae75f6ef1ad5"
 authors = ["Matthieu Lesnoff <matthieu.lesnoff@gmail.com>"]
-version = "0.8.0"
+version = "0.8.1"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
diff --git a/src/snipals_shen.jl b/src/snipals_shen.jl
index fb9ccdff..348976dc 100644
--- a/src/snipals_shen.jl
+++ b/src/snipals_shen.jl
@@ -11,7 +11,7 @@ function snipals_shen(X; kwargs...)
     res = nipals(X; kwargs...)
     u = res.u
     u0 = similar(u)
-    v = similar(X, p) #res.v * res.sv  # = 'v_tild' in Shen et al. 2008
+    v = similar(X, p) # = 'v_tild' in Shen et al. 2008
     absv = similar(v)
     ind = list(Int64, p)
     sel = list(Int64, nvar)
diff --git a/src/spca.jl b/src/spca.jl
index 3205d4bc..28520b21 100644
--- a/src/spca.jl
+++ b/src/spca.jl
@@ -22,32 +22,25 @@ Keyword arguments:
 sPCA-rSVD algorithm (regularized low rank matrix approximation) of 
 Shen & Huang 2008. 
 
-The algorithm computes each loadings vector iteratively, by an alternating 
-LS regression (Nipals) including a step of thresholding. Function `spca` provides 
-thresholding methods '1' and '2' (`:soft` and `:hard`) reported in Shen & Huang 
-2008 Lemma 2:
+The algorithm computes each loadings vector iteratively, by alternating 
+least squares regressions (Nipals) including a step of thresholding. Function 
+`spca` provides thresholding methods '1' and '2' (`:soft` and `:hard`) reported 
+in Shen & Huang 2008 Lemma 2:
 * The tuning parameter used by Shen & Huang 2008 is the number of null elements 
-    in the loadings vector, referred to as degree of sparsity. The present 
-    function `spca` uses the number of non-zero elements (`nvar`), equal to 
-    p - degree of sparsity.
+    in the loadings vector, referred to as degree of sparsity. Conversely, the 
+    present function `spca` uses the number of non-zero elements (`nvar`), 
+    equal to p - degree of sparsity.
 * See the code of function `snipals_shen` for details on how is computed 
-    the cutoff 'lambda'  (Shen & Huang 2008) used inside the thresholding
-    function, given a value for `nvar`. It follows the strategy given in 
-    Shen & Huang 2008 section 2. Differences from other softwares may occur 
+    the cutoff 'lambda' used inside the thresholding function (Shen & Huang 2008), 
+    given a value for `nvar`. Differences from other softwares may occur 
     when there are tied values in the loadings vector (depending on the choices 
-    made when computing the quantiles).
+    of method used to compute quantiles).
 
 Shen & Huang 2008 do not decsribe how they deflate matrix `X` after a given PC 
 is computed. The present function `spca` does a regression of the `X`-columns 
-on the score vector `t`, as it is done in function `spca` of the R Package 
-`mixOmics` (note however that the `mixOmics` function uses a variant of the Nipals
-proposed by Shen & Hunag, that gives different results).
-
-For the first PC (and when there are not tied values in the loadings vector), 
-the present function `spca` gives the same result as function `sPCA_rSVD` of 
-the R package `ltsspca`. However, the `ltsspca` fonction deflates matrix `X` by 
-regressing the `X`-rows on the loadings vector `v`. Therefore, results differ 
-from the second PC. 
+on the score vector `t`. When `meth = :soft`, the function gives the same result as 
+function `spca` of the R package `mixOmics` (except possibly when there are many tied 
+values in the loadings vectors). 
 
 The computed sparse loadings vectors (`V`-columns) are in general non orthogonal. 
 Therefore, there is no a unique decomposition of the variance of `X` such as in PCA. 
@@ -72,10 +65,6 @@ analysis via regularized low rank matrix approximation.
 Journal of Multivariate Analysis 99, 1015–1034. 
 https://doi.org/10.1016/j.jmva.2007.06.007
 
-Wang Y. , Van Aelst S., Cevallos Valdiviezo H., Reynkens T. 2019.
-ltsspca: Sparse Principal Component Based on Least Trimmed Squares.
-Version 0.1.0. https://cran.r-project.org/web/packages/ltsspca
-
 ## Examples
 ```julia
 using Jchemo, JchemoData, JLD2