From 21c453a4343a933e86da4f1c4a286fa02837732c Mon Sep 17 00:00:00 2001 From: aamijar Date: Sun, 19 May 2024 05:04:44 +0000 Subject: [PATCH 01/13] add pca init for tsne --- cpp/include/cuml/manifold/tsne.h | 6 +++-- cpp/src/tsne/barnes_hut_tsne.cuh | 9 +++---- cpp/src/tsne/exact_tsne.cuh | 3 --- cpp/src/tsne/fft_tsne.cuh | 4 --- cpp/src/tsne/tsne_runner.cuh | 46 ++++++++++++++++++++++++++++++++ python/cuml/manifold/t_sne.pyx | 24 ++++++++++------- python/cuml/tests/test_tsne.py | 3 ++- 7 files changed, 69 insertions(+), 26 deletions(-) diff --git a/cpp/include/cuml/manifold/tsne.h b/cpp/include/cuml/manifold/tsne.h index c8d12f2b8c..c2fd041e91 100644 --- a/cpp/include/cuml/manifold/tsne.h +++ b/cpp/include/cuml/manifold/tsne.h @@ -94,8 +94,10 @@ struct TSNEParams { // verbosity level for logging messages during execution int verbosity = CUML_LEVEL_INFO; - // Whether to overwrite the current Y vector with random noise. - bool initialize_embeddings = true; + // Embedding initializer algorithm + // 0 = random layout + // 1 = pca layout + int init = 1; // When this is set to true, the distances from the knn graph will // always be squared before computing conditional probabilities, even if diff --git a/cpp/src/tsne/barnes_hut_tsne.cuh b/cpp/src/tsne/barnes_hut_tsne.cuh index ef63473060..c907eb560d 100644 --- a/cpp/src/tsne/barnes_hut_tsne.cuh +++ b/cpp/src/tsne/barnes_hut_tsne.cuh @@ -126,12 +126,9 @@ value_t Barnes_Hut(value_t* VAL, RAFT_CUDA_TRY(cudaMemsetAsync(old_forces.data(), 0, sizeof(value_t) * n * 2, stream)); rmm::device_uvector YY((nnodes + 1) * 2, stream); - if (params.initialize_embeddings) { - random_vector(YY.data(), -0.0001f, 0.0001f, (nnodes + 1) * 2, stream, params.random_state); - } else { - raft::copy(YY.data(), Y, n, stream); - raft::copy(YY.data() + nnodes + 1, Y + n, n, stream); - } + + raft::copy(YY.data(), Y, n, stream); + raft::copy(YY.data() + nnodes + 1, Y + n, n, stream); rmm::device_uvector tmp(NNZ, stream); value_t* Qs = tmp.data(); diff --git a/cpp/src/tsne/exact_tsne.cuh b/cpp/src/tsne/exact_tsne.cuh index 2b236574b6..680c3200e8 100644 --- a/cpp/src/tsne/exact_tsne.cuh +++ b/cpp/src/tsne/exact_tsne.cuh @@ -54,9 +54,6 @@ value_t Exact_TSNE(value_t* VAL, value_t kl_div = 0; const value_idx dim = params.dim; - if (params.initialize_embeddings) - random_vector(Y, -0.0001f, 0.0001f, n * dim, stream, params.random_state); - // Allocate space //--------------------------------------------------- CUML_LOG_DEBUG("Now allocating memory for TSNE."); diff --git a/cpp/src/tsne/fft_tsne.cuh b/cpp/src/tsne/fft_tsne.cuh index bd9e9e73b8..cb5dedf932 100644 --- a/cpp/src/tsne/fft_tsne.cuh +++ b/cpp/src/tsne/fft_tsne.cuh @@ -340,10 +340,6 @@ value_t FFT_TSNE(value_t* VAL, value_t learning_rate = params.pre_learning_rate; value_t exaggeration = params.early_exaggeration; - if (params.initialize_embeddings) { - random_vector(Y, 0.0000f, 0.0001f, n * 2, stream, params.random_state); - } - value_t kl_div = 0; for (int iter = 0; iter < params.max_iter; iter++) { // Compute charges Q_ij diff --git a/cpp/src/tsne/tsne_runner.cuh b/cpp/src/tsne/tsne_runner.cuh index 8e9fdb0df5..7f7fedd721 100644 --- a/cpp/src/tsne/tsne_runner.cuh +++ b/cpp/src/tsne/tsne_runner.cuh @@ -33,8 +33,16 @@ #include +#include + namespace ML { +template class U> +inline constexpr bool is_instance_of = std::false_type{}; + +template