From cd28cff1888dc4b5e1e504426ed51987aaeb4233 Mon Sep 17 00:00:00 2001 From: Guillermo Oyarzun Date: Mon, 13 Jan 2025 16:26:52 +0100 Subject: [PATCH] feat(gpu): implement fhe rand on gpu --- .../cuda/include/linear_algebra.h | 5 + .../cuda/src/linearalgebra/addition.cu | 38 + .../cuda/src/linearalgebra/addition.cuh | 36 + backends/tfhe-cuda-backend/src/bindings.rs | 11 + tfhe/benches/integer/bench.rs | 83 +- tfhe/src/core_crypto/gpu/mod.rs | 25 + tfhe/src/high_level_api/booleans/oprf.rs | 34 +- tfhe/src/high_level_api/integers/oprf.rs | 69 +- tfhe/src/integer/gpu/server_key/radix/mod.rs | 28 +- tfhe/src/integer/gpu/server_key/radix/oprf.rs | 736 ++++++++++++++++++ .../server_key/radix/vector_comparisons.rs | 13 +- .../gpu/server_key/radix/vector_find.rs | 28 +- tfhe/src/shortint/engine/mod.rs | 13 +- tfhe/src/shortint/oprf.rs | 55 +- tfhe/src/shortint/server_key/mod.rs | 7 +- 15 files changed, 1108 insertions(+), 73 deletions(-) create mode 100644 tfhe/src/integer/gpu/server_key/radix/oprf.rs diff --git a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h index ddc9a2a508..69394e4af3 100644 --- a/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h +++ b/backends/tfhe-cuda-backend/cuda/include/linear_algebra.h @@ -44,6 +44,11 @@ void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64( void *stream, uint32_t gpu_index, void *lwe_array_out, void const *lwe_array_in, void const *cleartext_array_in, uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count); +void cuda_add_lwe_ciphertext_vector_plaintext_64( + void *stream, uint32_t gpu_index, void *lwe_array_out, + void const *lwe_array_in, const uint64_t plaintext_in, + const uint32_t input_lwe_dimension, + const uint32_t input_lwe_ciphertext_count); } #endif // CUDA_LINALG_H_ diff --git a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cu b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cu index 03ded74b46..6e38bdebd8 100644 --- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cu +++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cu @@ -114,3 +114,41 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_64( static_cast(plaintext_array_in), input_lwe_dimension, input_lwe_ciphertext_count); } + +/* + * Perform the addition of a u64 input LWE ciphertext vector with a u64 input + * plaintext scalar. + * - `stream` is a void pointer to the Cuda stream to be used in the kernel + * launch + * - `gpu_index` is the index of the GPU to be used in the kernel launch + * - `lwe_array_out` is an array of size + * `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have + * been allocated on the GPU before calling this function, and that will hold + * the result of the computation. + * - `lwe_array_in` is the LWE ciphertext vector used as input, it should have + * been allocated and initialized before calling this function. It has the same + * size as the output array. + * - `plaintext_in` is the plaintext used as input. + * - `input_lwe_dimension` is the number of mask elements in the input and + * output LWE ciphertext vectors + * - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the + * input LWE ciphertext vector, as well as in the output. + * + * The same input plaintext is added to the body of the + * LWE ciphertexts in the LWE ciphertext vector. The result of the + * operation is stored in the output LWE ciphertext vector. The two input + * vectors are unchanged. This function is a wrapper to a device function that + * performs the operation on the GPU. + */ +void cuda_add_lwe_ciphertext_vector_plaintext_64( + void *stream, uint32_t gpu_index, void *lwe_array_out, + void const *lwe_array_in, const uint64_t plaintext_in, + const uint32_t input_lwe_dimension, + const uint32_t input_lwe_ciphertext_count) { + + host_addition_plaintext_scalar( + static_cast(stream), gpu_index, + static_cast(lwe_array_out), + static_cast(lwe_array_in), plaintext_in, + input_lwe_dimension, input_lwe_ciphertext_count); +} diff --git a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cuh b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cuh index 3401cdadd2..57f1a0ee99 100644 --- a/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/linearalgebra/addition.cuh @@ -27,6 +27,21 @@ plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input, } } +template +__global__ void plaintext_addition_scalar(T *output, T const *lwe_input, + const T plaintext_input, + const uint32_t input_lwe_dimension, + const uint32_t num_entries) { + + int tid = threadIdx.x; + int lwe_index = blockIdx.x * blockDim.x + tid; + if (lwe_index < num_entries) { + int index = lwe_index * (input_lwe_dimension + 1) + input_lwe_dimension; + // Here we take advantage of the wrapping behaviour of uint + output[index] = lwe_input[index] + plaintext_input; + } +} + template __host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output, @@ -48,6 +63,27 @@ host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output, check_cuda_error(cudaGetLastError()); } +template +__host__ void host_addition_plaintext_scalar( + cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input, + const T plaintext_input, const uint32_t lwe_dimension, + const uint32_t lwe_ciphertext_count) { + + cudaSetDevice(gpu_index); + int num_blocks = 0, num_threads = 0; + int num_entries = lwe_ciphertext_count; + getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads); + dim3 grid(num_blocks, 1, 1); + dim3 thds(num_threads, 1, 1); + + cuda_memcpy_async_gpu_to_gpu( + output, lwe_input, (lwe_dimension + 1) * lwe_ciphertext_count * sizeof(T), + stream, gpu_index); + plaintext_addition_scalar<<>>( + output, lwe_input, plaintext_input, lwe_dimension, num_entries); + check_cuda_error(cudaGetLastError()); +} + template __global__ void addition(T *output, T const *input_1, T const *input_2, uint32_t num_entries) { diff --git a/backends/tfhe-cuda-backend/src/bindings.rs b/backends/tfhe-cuda-backend/src/bindings.rs index 983a409338..fa16da0136 100644 --- a/backends/tfhe-cuda-backend/src/bindings.rs +++ b/backends/tfhe-cuda-backend/src/bindings.rs @@ -1345,6 +1345,17 @@ extern "C" { input_lwe_ciphertext_count: u32, ); } +extern "C" { + pub fn cuda_add_lwe_ciphertext_vector_plaintext_64( + stream: *mut ffi::c_void, + gpu_index: u32, + lwe_array_out: *mut ffi::c_void, + lwe_array_in: *const ffi::c_void, + plaintext_in: u64, + input_lwe_dimension: u32, + input_lwe_ciphertext_count: u32, + ); +} extern "C" { pub fn cuda_fourier_polynomial_mul( stream: *mut ffi::c_void, diff --git a/tfhe/benches/integer/bench.rs b/tfhe/benches/integer/bench.rs index d6ead085b7..8fb1200035 100644 --- a/tfhe/benches/integer/bench.rs +++ b/tfhe/benches/integer/bench.rs @@ -1307,11 +1307,12 @@ define_server_key_bench_default_fn!( #[cfg(feature = "gpu")] mod cuda { use super::*; - use criterion::criterion_group; + use criterion::{black_box, criterion_group}; use tfhe::core_crypto::gpu::CudaStreams; use tfhe::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext; use tfhe::integer::gpu::server_key::CudaServerKey; + use tfhe_csprng::seeders::Seed; fn bench_cuda_server_key_unary_function_clean_inputs( c: &mut Criterion, @@ -1731,6 +1732,84 @@ mod cuda { bench_group.finish() } + pub fn cuda_unsigned_oprf(c: &mut Criterion) { + let bench_name = "integer::cuda::unsigned_oprf"; + + let mut bench_group = c.benchmark_group(bench_name); + bench_group + .sample_size(15) + .measurement_time(std::time::Duration::from_secs(30)); + + let streams = CudaStreams::new_multi_gpu(); + + for (param, num_block, bit_size) in ParamsAndNumBlocksIter::default() { + let param_name = param.name(); + + let bench_id; + + match BENCH_TYPE.get().unwrap() { + BenchmarkType::Latency => { + bench_id = format!("{bench_name}::{param_name}::{bit_size}_bits"); + bench_group.bench_function(&bench_id, |b| { + let (cks, _cpu_sks) = + KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let gpu_sks = CudaServerKey::new(&cks, &streams); + + b.iter(|| { + _ = black_box( + gpu_sks + .par_generate_oblivious_pseudo_random_unsigned_integer_bounded( + Seed(0), + bit_size as u64, + num_block as u64, + &streams, + ), + ); + }) + }); + } + BenchmarkType::Throughput => { + bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits"); + let elements = throughput_num_threads(num_block); + bench_group.throughput(Throughput::Elements(elements)); + + bench_group.bench_function(&bench_id, |b| { + let (cks, _cpu_sks) = + KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix); + let gpu_sks = CudaServerKey::new(&cks, &streams); + + b.iter(|| { + (0..elements).into_par_iter().for_each(|i| { + let selected_gpu = + streams.gpu_indexes[i as usize % streams.gpu_indexes.len()]; + let stream = CudaStreams::new_single_gpu(selected_gpu); + gpu_sks + .par_generate_oblivious_pseudo_random_unsigned_integer_bounded( + Seed(0), + bit_size as u64, + num_block as u64, + &stream, + ); + }) + }) + }); + } + } + + write_to_json::( + &bench_id, + param, + param.name(), + "oprf", + &OperatorType::Atomic, + bit_size as u32, + vec![param.message_modulus().0.ilog2(); num_block], + ); + } + + bench_group.finish() + } + macro_rules! define_cuda_server_key_bench_clean_input_unary_fn ( (method_name: $server_key_method:ident, display_name:$name:ident) => { ::paste::paste!{ @@ -2376,6 +2455,7 @@ mod cuda { cuda_trailing_zeros, cuda_trailing_ones, cuda_ilog2, + cuda_unsigned_oprf, ); criterion_group!( @@ -2395,6 +2475,7 @@ mod cuda { cuda_scalar_mul, cuda_scalar_div, cuda_scalar_rem, + cuda_unsigned_oprf, ); criterion_group!( diff --git a/tfhe/src/core_crypto/gpu/mod.rs b/tfhe/src/core_crypto/gpu/mod.rs index e5c5271017..7ed226c3f3 100644 --- a/tfhe/src/core_crypto/gpu/mod.rs +++ b/tfhe/src/core_crypto/gpu/mod.rs @@ -481,6 +481,31 @@ pub unsafe fn add_lwe_ciphertext_vector_plaintext_vector_async( + streams: &CudaStreams, + lwe_array_out: &mut CudaVec, + lwe_array_in: &CudaVec, + plaintext_in: u64, + lwe_dimension: LweDimension, + num_samples: u32, +) { + cuda_add_lwe_ciphertext_vector_plaintext_64( + streams.ptr[0], + streams.gpu_indexes[0].0, + lwe_array_out.as_mut_c_ptr(0), + lwe_array_in.as_c_ptr(0), + plaintext_in, + lwe_dimension.0 as u32, + num_samples, + ); +} + /// Assigned addition of a vector of LWE ciphertexts with a vector of plaintexts /// /// # Safety diff --git a/tfhe/src/high_level_api/booleans/oprf.rs b/tfhe/src/high_level_api/booleans/oprf.rs index 4546cfda84..942022010e 100644 --- a/tfhe/src/high_level_api/booleans/oprf.rs +++ b/tfhe/src/high_level_api/booleans/oprf.rs @@ -1,6 +1,12 @@ -use super::FheBool; +use super::{FheBool, InnerBoolean}; use crate::high_level_api::global_state; +#[cfg(feature = "gpu")] +use crate::high_level_api::global_state::with_thread_local_cuda_streams; use crate::high_level_api::keys::InternalServerKey; +#[cfg(feature = "gpu")] +use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; +#[cfg(feature = "gpu")] +use crate::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext; use crate::integer::BooleanBlock; use tfhe_csprng::seeders::Seed; @@ -24,16 +30,28 @@ impl FheBool { /// let dec_result: bool = ct_res.decrypt(&client_key); /// ``` pub fn generate_oblivious_pseudo_random(seed: Seed) -> Self { - global_state::with_internal_keys(|key| match key { + let (ciphertext, tag) = global_state::with_internal_keys(|key| match key { InternalServerKey::Cpu(key) => { let ct = key.pbs_key().key.generate_oblivious_pseudo_random(seed, 1); - - Self::new(BooleanBlock(ct), key.tag.clone()) + ( + InnerBoolean::Cpu(BooleanBlock::new_unchecked(ct)), + key.tag.clone(), + ) } #[cfg(feature = "gpu")] - InternalServerKey::Cuda(_) => { - todo!("Cuda devices do not yet support oblivious pseudo random generation") - } - }) + InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| { + let d_ct: CudaUnsignedRadixCiphertext = cuda_key + .key + .key + .generate_oblivious_pseudo_random(seed, 1, streams); + ( + InnerBoolean::Cuda(CudaBooleanBlock::from_cuda_radix_ciphertext( + d_ct.ciphertext, + )), + cuda_key.tag.clone(), + ) + }), + }); + Self::new(ciphertext, tag) } } diff --git a/tfhe/src/high_level_api/integers/oprf.rs b/tfhe/src/high_level_api/integers/oprf.rs index 4ee3ec1049..09ee7f375a 100644 --- a/tfhe/src/high_level_api/integers/oprf.rs +++ b/tfhe/src/high_level_api/integers/oprf.rs @@ -1,8 +1,11 @@ -use super::{FheIntId, FheUintId}; +use super::{FheIntId, FheUint, FheUintId}; use crate::high_level_api::global_state; +#[cfg(feature = "gpu")] +use crate::high_level_api::global_state::with_thread_local_cuda_streams; use crate::high_level_api::keys::InternalServerKey; -use crate::{FheInt, FheUint, Seed}; - +#[cfg(feature = "gpu")] +use crate::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext}; +use crate::{FheInt, Seed}; impl FheUint { /// Generates an encrypted unsigned integer /// taken uniformly in its full range using the given seed. @@ -35,9 +38,18 @@ impl FheUint { Self::new(ct, key.tag.clone()) } #[cfg(feature = "gpu")] - InternalServerKey::Cuda(_) => { - todo!("Cuda devices do not yet support oblivious pseudo random generation") - } + InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| { + let d_ct: CudaUnsignedRadixCiphertext = cuda_key + .key + .key + .par_generate_oblivious_pseudo_random_unsigned_integer( + seed, + Id::num_blocks(cuda_key.message_modulus()) as u64, + streams, + ); + + Self::new(d_ct, cuda_key.tag.clone()) + }), }) } /// Generates an encrypted `num_block` blocks unsigned integer @@ -75,9 +87,18 @@ impl FheUint { Self::new(ct, key.tag.clone()) } #[cfg(feature = "gpu")] - InternalServerKey::Cuda(_) => { - todo!("Cuda devices do not yet support oblivious pseudo random generation") - } + InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| { + let d_ct: CudaUnsignedRadixCiphertext = cuda_key + .key + .key + .par_generate_oblivious_pseudo_random_unsigned_integer_bounded( + seed, + random_bits_count, + Id::num_blocks(cuda_key.message_modulus()) as u64, + streams, + ); + Self::new(d_ct, cuda_key.tag.clone()) + }), }) } } @@ -115,9 +136,18 @@ impl FheInt { Self::new(ct, key.tag.clone()) } #[cfg(feature = "gpu")] - InternalServerKey::Cuda(_) => { - todo!("Cuda devices do not yet support oblivious pseudo random generation") - } + InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| { + let d_ct: CudaSignedRadixCiphertext = cuda_key + .key + .key + .par_generate_oblivious_pseudo_random_signed_integer( + seed, + Id::num_blocks(cuda_key.message_modulus()) as u64, + streams, + ); + + Self::new(d_ct, cuda_key.tag.clone()) + }), }) } @@ -157,9 +187,18 @@ impl FheInt { Self::new(ct, key.tag.clone()) } #[cfg(feature = "gpu")] - InternalServerKey::Cuda(_) => { - todo!("Cuda devices do not yet support oblivious pseudo random generation") - } + InternalServerKey::Cuda(cuda_key) => with_thread_local_cuda_streams(|streams| { + let d_ct: CudaSignedRadixCiphertext = cuda_key + .key + .key + .par_generate_oblivious_pseudo_random_signed_integer_bounded( + seed, + random_bits_count, + Id::num_blocks(cuda_key.message_modulus()) as u64, + streams, + ); + Self::new(d_ct, cuda_key.tag.clone()) + }), }) } } diff --git a/tfhe/src/integer/gpu/server_key/radix/mod.rs b/tfhe/src/integer/gpu/server_key/radix/mod.rs index ccb49e3dfc..6c8031da2f 100644 --- a/tfhe/src/integer/gpu/server_key/radix/mod.rs +++ b/tfhe/src/integer/gpu/server_key/radix/mod.rs @@ -20,7 +20,9 @@ use crate::integer::gpu::{ }; use crate::integer::server_key::radix_parallel::OutputFlag; use crate::shortint::ciphertext::{Degree, NoiseLevel}; -use crate::shortint::engine::{fill_accumulator, fill_many_lut_accumulator}; +use crate::shortint::engine::{ + fill_accumulator, fill_accumulator_no_encoding, fill_many_lut_accumulator, +}; use crate::shortint::server_key::{ BivariateLookupTableOwned, LookupTableOwned, ManyLookupTableOwned, }; @@ -36,6 +38,7 @@ mod even_odd; mod ilog2; mod mul; mod neg; +mod oprf; mod rotate; mod scalar_add; mod scalar_bitwise_op; @@ -805,6 +808,29 @@ impl CudaServerKey { degree: Degree::new(max_value), } } + pub(crate) fn generate_lookup_table_no_encode(&self, f: F) -> LookupTableOwned + where + F: Fn(u64) -> u64, + { + let (glwe_size, polynomial_size) = match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + (d_bsk.glwe_dimension.to_glwe_size(), d_bsk.polynomial_size) + } + CudaBootstrappingKey::MultiBit(d_bsk) => { + (d_bsk.glwe_dimension.to_glwe_size(), d_bsk.polynomial_size) + } + }; + let mut acc = GlweCiphertext::new(0, glwe_size, polynomial_size, self.ciphertext_modulus); + + fill_accumulator_no_encoding(&mut acc, polynomial_size, glwe_size, f); + + LookupTableOwned { + acc, + // We should not rely on the degree in this case + // The degree should be set manually on the outputs of PBS by this LUT + degree: Degree::new(self.message_modulus.0 * self.carry_modulus.0 * 2), + } + } pub fn generate_many_lookup_table( &self, diff --git a/tfhe/src/integer/gpu/server_key/radix/oprf.rs b/tfhe/src/integer/gpu/server_key/radix/oprf.rs new file mode 100644 index 0000000000..0ccc94cd99 --- /dev/null +++ b/tfhe/src/integer/gpu/server_key/radix/oprf.rs @@ -0,0 +1,736 @@ +use crate::core_crypto::gpu::CudaStreams; +use crate::integer::gpu::ciphertext::{ + CudaIntegerRadixCiphertext, CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext, +}; +use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey}; + +use crate::core_crypto::commons::generators::DeterministicSeeder; +use crate::core_crypto::prelude::DefaultRandomGenerator; +use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator}; + +use crate::shortint::oprf::create_random_from_seed_modulus_switched; +use crate::shortint::server_key::LookupTableOwned; + +pub use tfhe_csprng::seeders::{Seed, Seeder}; + +use crate::core_crypto::gpu::{ + cuda_multi_bit_programmable_bootstrap_lwe_ciphertext, + cuda_programmable_bootstrap_lwe_ciphertext, +}; + +use crate::core_crypto::commons::numeric::Numeric; +use crate::core_crypto::gpu::add_lwe_ciphertext_vector_plaintext_scalar_async; +use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList; +use crate::core_crypto::prelude::CastInto; +use crate::integer::gpu::server_key::radix::{CudaLweCiphertextList, LweCiphertextCount}; +use crate::integer::gpu::CudaVec; +use itertools::Itertools; +impl CudaServerKey { + /// Generates an encrypted `num_block` blocks unsigned integer + /// taken uniformly in its full range using the given seed. + /// The encryted value is oblivious to the server. + /// It can be useful to make server random generation deterministic. + /// + /// ```rust + /// use tfhe::core_crypto::gpu::CudaStreams; + /// use tfhe::core_crypto::gpu::vec::GpuIndex; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + /// use tfhe::Seed; + /// + /// let size = 4; + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let d_ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_integer(Seed(0), size as u64, &streams); + /// let ct_res = d_ct_res.to_radix_ciphertext(&streams); + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt_radix(&ct_res); + /// + /// assert!(dec_result < 1 << (2 * size)); + /// ``` + pub fn par_generate_oblivious_pseudo_random_unsigned_integer( + &self, + seed: Seed, + num_blocks: u64, + streams: &CudaStreams, + ) -> CudaUnsignedRadixCiphertext { + assert!(self.message_modulus.0.is_power_of_two()); + let range_log_size = self.message_modulus.0.ilog2() as u64 * num_blocks; + + let random_bits_count = range_log_size; + + assert!(self.message_modulus.0.is_power_of_two()); + let mut streams_vector = Vec::::with_capacity(num_blocks as usize); + for _ in 0..num_blocks { + streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0])); + } + + let message_bits_count = self.message_modulus.0.ilog2() as u64; + + let mut deterministic_seeder = DeterministicSeeder::::new(seed); + + let seeds: Vec = (0..num_blocks) + .map(|_| deterministic_seeder.seed()) + .collect(); + + let blocks = seeds + .into_par_iter() + .enumerate() + .map(|(i, seed)| { + let stream_index = i; + let i = i as u64; + if i * message_bits_count < random_bits_count { + // if we generate 5 bits of noise in n blocks of 2 bits, the third (i=2) block + // must have only one bit of random + if random_bits_count < (i + 1) * message_bits_count { + let top_message_bits_count = random_bits_count - i * message_bits_count; + + assert!(top_message_bits_count <= message_bits_count); + let ct: CudaUnsignedRadixCiphertext = self + .generate_oblivious_pseudo_random( + seed, + top_message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + } else { + let ct: CudaUnsignedRadixCiphertext = self + .generate_oblivious_pseudo_random( + seed, + message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + } + } else { + let ct: CudaUnsignedRadixCiphertext = + self.create_trivial_zero_radix(1, &streams_vector[stream_index]); + ct.ciphertext + } + }) + .collect::>(); + self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams) + } + + /// Generates an encrypted `num_block` blocks unsigned integer + /// taken uniformly in `[0, 2^random_bits_count[` using the given seed. + /// The encryted value is oblivious to the server. + /// It can be useful to make server random generation deterministic. + /// + /// ```rust + /// use tfhe::core_crypto::gpu::CudaStreams; + /// use tfhe::core_crypto::gpu::vec::GpuIndex; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + /// use tfhe::Seed; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// let size = 4; + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let random_bits_count = 3; + /// + /// let d_ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_integer_bounded( + /// Seed(0), + /// random_bits_count, + /// size as u64, + /// &streams, + /// ); + /// let ct_res = d_ct_res.to_radix_ciphertext(&streams); + /// // Decrypt: + /// let dec_result: u64 = cks.decrypt(&ct_res); + /// assert!(dec_result < (1 << random_bits_count)); + /// ``` + pub fn par_generate_oblivious_pseudo_random_unsigned_integer_bounded( + &self, + seed: Seed, + random_bits_count: u64, + num_blocks: u64, + streams: &CudaStreams, + ) -> CudaUnsignedRadixCiphertext { + assert!(self.message_modulus.0.is_power_of_two()); + let range_log_size = self.message_modulus.0.ilog2() as u64 * num_blocks; + + assert!( + random_bits_count <= range_log_size, + "The range asked for a random value (=[0, 2^{random_bits_count}[) does not fit in the available range [0, 2^{range_log_size}[", + ); + + assert!(self.message_modulus.0.is_power_of_two()); + let mut streams_vector = Vec::::with_capacity(num_blocks as usize); + for _ in 0..num_blocks { + streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0])); + } + let message_bits_count = self.message_modulus.0.ilog2() as u64; + + let mut deterministic_seeder = DeterministicSeeder::::new(seed); + + let seeds: Vec = (0..num_blocks) + .map(|_| deterministic_seeder.seed()) + .collect(); + + let blocks = seeds + .into_par_iter() + .enumerate() + .map(|(i, seed)| { + let stream_index = i; + let i = i as u64; + + if i * message_bits_count < random_bits_count { + // if we generate 5 bits of noise in n blocks of 2 bits, the third (i=2) block + // must have only one bit of random + if random_bits_count < (i + 1) * message_bits_count { + let top_message_bits_count = random_bits_count - i * message_bits_count; + + assert!(top_message_bits_count <= message_bits_count); + + let ct: CudaUnsignedRadixCiphertext = self + .generate_oblivious_pseudo_random( + seed, + top_message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + } else { + let ct: CudaUnsignedRadixCiphertext = self + .generate_oblivious_pseudo_random( + seed, + message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + } + } else { + let ct: CudaUnsignedRadixCiphertext = + self.create_trivial_zero_radix(1, &streams_vector[stream_index]); + ct.ciphertext + } + }) + .collect::>(); + self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams) + } + + /// Generates an encrypted `num_block` blocks signed integer + /// taken uniformly in its full range using the given seed. + /// The encryted value is oblivious to the server. + /// It can be useful to make server random generation deterministic. + /// + /// ```rust + /// use tfhe::core_crypto::gpu::CudaStreams; + /// use tfhe::core_crypto::gpu::vec::GpuIndex; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + /// use tfhe::Seed; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// let size = 4; + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let d_ct_res = sks.par_generate_oblivious_pseudo_random_signed_integer(Seed(0), size as u64, &streams); + /// let ct_res = d_ct_res.to_signed_radix_ciphertext(&streams); + /// + /// // Decrypt: + /// let dec_result: i64 = cks.decrypt_signed_radix(&ct_res); + /// assert!(dec_result < 1 << (2 * size - 1)); + /// assert!(dec_result >= -(1 << (2 * size - 1))); + /// ``` + pub fn par_generate_oblivious_pseudo_random_signed_integer( + &self, + seed: Seed, + num_blocks: u64, + streams: &CudaStreams, + ) -> CudaSignedRadixCiphertext { + assert!(self.message_modulus.0.is_power_of_two()); + let message_bits_count = self.message_modulus.0.ilog2() as u64; + let mut streams_vector = Vec::::with_capacity(num_blocks as usize); + for _ in 0..num_blocks { + streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0])); + } + let mut deterministic_seeder = DeterministicSeeder::::new(seed); + + let seeds: Vec = (0..num_blocks) + .map(|_| deterministic_seeder.seed()) + .collect(); + + let blocks = seeds + .into_par_iter() + .enumerate() + .map(|(i, seed)| { + let stream_index = i; + let ct: CudaSignedRadixCiphertext = self.generate_oblivious_pseudo_random( + seed, + message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + }) + .collect::>(); + self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams) + } + + /// Generates an encrypted `num_block` blocks signed integer + /// taken uniformly in `[0, 2^random_bits_count[` using the given seed. + /// The encryted value is oblivious to the server. + /// It can be useful to make server random generation deterministic. + /// + /// ```rust + /// use tfhe::core_crypto::gpu::CudaStreams; + /// use tfhe::core_crypto::gpu::vec::GpuIndex; + /// use tfhe::integer::gpu::gen_keys_gpu; + /// use tfhe::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + /// use tfhe::Seed; + /// + /// let gpu_index = 0; + /// let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + /// let size = 4; + /// + /// // Generate the client key and the server key: + /// let (cks, sks) = gen_keys_gpu(PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, &streams); + /// + /// let random_bits_count = 3; + /// + /// let d_ct_res = sks.par_generate_oblivious_pseudo_random_signed_integer_bounded( + /// Seed(0), + /// random_bits_count, + /// size as u64, + /// &streams, + /// ); + /// let ct_res = d_ct_res.to_signed_radix_ciphertext(&streams); + /// + /// // Decrypt: + /// let dec_result: i64 = cks.decrypt_signed_radix(&ct_res); + /// assert!(dec_result >= 0); + /// assert!(dec_result < (1 << random_bits_count)); + /// ``` + pub fn par_generate_oblivious_pseudo_random_signed_integer_bounded( + &self, + seed: Seed, + random_bits_count: u64, + num_blocks: u64, + streams: &CudaStreams, + ) -> CudaSignedRadixCiphertext { + assert!(self.message_modulus.0.is_power_of_two()); + let range_log_size = self.message_modulus.0.ilog2() as u64 * num_blocks; + + #[allow(clippy::int_plus_one)] + { + assert!( + random_bits_count + 1 <= range_log_size, + "The range asked for a random value (=[0, 2^{}[) does not fit in the available range [-2^{}, 2^{}[", + random_bits_count, range_log_size-1, range_log_size-1, + ); + } + + assert!(self.message_modulus.0.is_power_of_two()); + let mut streams_vector = Vec::::with_capacity(num_blocks as usize); + for _ in 0..num_blocks { + streams_vector.push(CudaStreams::new_single_gpu(streams.gpu_indexes[0])); + } + let message_bits_count = self.message_modulus.0.ilog2() as u64; + + let mut deterministic_seeder = DeterministicSeeder::::new(seed); + + let seeds = (0..num_blocks).map(|_| deterministic_seeder.seed()); + + let blocks = seeds + .into_iter() + .enumerate() + .map(|(i, seed)| { + let stream_index = i; + let i = i as u64; + if i * message_bits_count < random_bits_count { + // if we generate 5 bits of noise in n blocks of 2 bits, the third (i=2) + // block must have only one bit of random + if random_bits_count < (i + 1) * message_bits_count { + let top_message_bits_count = random_bits_count - i * message_bits_count; + + assert!(top_message_bits_count <= message_bits_count); + let ct: CudaUnsignedRadixCiphertext = self + .generate_oblivious_pseudo_random( + seed, + top_message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + } else { + let ct: CudaUnsignedRadixCiphertext = self + .generate_oblivious_pseudo_random( + seed, + message_bits_count, + &streams_vector[stream_index], + ); + ct.ciphertext + } + } else { + let ct: CudaUnsignedRadixCiphertext = + self.create_trivial_zero_radix(1, &streams_vector[stream_index]); + ct.ciphertext + } + }) + .collect::>(); + + self.convert_radixes_vec_to_single_radix_ciphertext(&blocks, streams) + } + + /// Uniformly generates a random encrypted value in `[0, 2^random_bits_count[` + /// `2^random_bits_count` must be smaller than the message modulus + /// The encryted value is oblivious to the server + pub fn generate_oblivious_pseudo_random( + &self, + seed: Seed, + random_bits_count: u64, + streams: &CudaStreams, + ) -> T + where + T: CudaIntegerRadixCiphertext, + { + assert!( + 1 << random_bits_count <= self.message_modulus.0, + "The range asked for a random value (=[0, 2^{}[) does not fit in the available range [0, {}[", + random_bits_count, self.message_modulus.0 + ); + self.generate_oblivious_pseudo_random_message_and_carry(seed, random_bits_count, streams) + } + + /// Uniformly generates a random value in `[0, 2^random_bits_count[` + /// The encryted value is oblivious to the server + pub(crate) fn generate_oblivious_pseudo_random_message_and_carry( + &self, + seed: Seed, + random_bits_count: u64, + streams: &CudaStreams, + ) -> T + where + T: CudaIntegerRadixCiphertext, + { + assert!( + self.message_modulus.0.is_power_of_two(), + "The message modulus(={}), must be a power of 2 to use the OPRF", + self.message_modulus.0 + ); + let message_bits_count = self.message_modulus.0.ilog2() as u64; + + assert!( + self.carry_modulus.0.is_power_of_two(), + "The carry modulus(={}), must be a power of 2 to use the OPRF", + self.carry_modulus.0 + ); + let carry_bits_count = self.carry_modulus.0.ilog2() as u64; + + assert!( + random_bits_count <= carry_bits_count + message_bits_count, + "The number of random bits asked for (={random_bits_count}) is bigger than carry_bits_count (={carry_bits_count}) + message_bits_count(={message_bits_count})", + ); + self.generate_oblivious_pseudo_random_custom_encoding( + seed, + random_bits_count, + 1 + carry_bits_count + message_bits_count, + streams, + ) + } + + /// Uniformly generates a random encrypted value in `[0, 2^random_bits_count[` + /// The output in in the form 0000rrr000noise (rbc=3, fbc=7) + /// The encryted value is oblivious to the server + pub(crate) fn generate_oblivious_pseudo_random_custom_encoding( + &self, + seed: Seed, + random_bits_count: u64, + full_bits_count: u64, + streams: &CudaStreams, + ) -> T + where + T: CudaIntegerRadixCiphertext, + { + assert!( + random_bits_count <= full_bits_count, + "The number of random bits asked for (={random_bits_count}) is bigger than full_bits_count (={full_bits_count})" + ); + + let (in_lwe_size, out_lwe_dimension, polynomial_size) = match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => ( + d_bsk.input_lwe_dimension().to_lwe_size(), + d_bsk.output_lwe_dimension(), + d_bsk.polynomial_size(), + ), + CudaBootstrappingKey::MultiBit(d_bsk) => ( + d_bsk.input_lwe_dimension().to_lwe_size(), + d_bsk.output_lwe_dimension(), + d_bsk.polynomial_size(), + ), + }; + + let seeded = create_random_from_seed_modulus_switched( + seed, + in_lwe_size, + polynomial_size.to_blind_rotation_input_modulus_log(), + self.ciphertext_modulus, + ); + + let p = 1 << random_bits_count; + + let delta = 1_u64 << (64 - full_bits_count); + + let poly_delta = 2 * polynomial_size.0 as u64 / p; + + let lut_no_encode: LookupTableOwned = + self.generate_lookup_table_no_encode(|x| (2 * (x / poly_delta) + 1) * delta / 2); + + let num_ct_blocks = 1; + let ct_seeded = CudaLweCiphertextList::from_lwe_ciphertext(&seeded, streams); + + let mut ct_out: T = self.create_trivial_zero_radix(num_ct_blocks, streams); + + let number_of_messages = 1; + let d_accumulator = + CudaGlweCiphertextList::from_glwe_ciphertext(&lut_no_encode.acc, streams); + let mut lut_vector_indexes: Vec = vec![u64::ZERO; number_of_messages]; + for (i, ind) in lut_vector_indexes.iter_mut().enumerate() { + *ind = >::cast_into(i); + } + + let mut d_lut_vector_indexes = + unsafe { CudaVec::::new_async(number_of_messages, streams, 0) }; + unsafe { d_lut_vector_indexes.copy_from_cpu_async(&lut_vector_indexes, streams, 0) }; + let lwe_indexes_usize: Vec = (0..num_ct_blocks).collect_vec(); + let lwe_indexes = lwe_indexes_usize + .iter() + .map(|&x| >::cast_into(x)) + .collect_vec(); + let mut d_output_indexes = unsafe { CudaVec::::new_async(num_ct_blocks, streams, 0) }; + let mut d_input_indexes = unsafe { CudaVec::::new_async(num_ct_blocks, streams, 0) }; + unsafe { + d_input_indexes.copy_from_cpu_async(&lwe_indexes, streams, 0); + d_output_indexes.copy_from_cpu_async(&lwe_indexes, streams, 0); + } + + match &self.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => { + cuda_programmable_bootstrap_lwe_ciphertext( + &ct_seeded, + &mut ct_out.as_mut().d_blocks, + &d_accumulator, + &d_lut_vector_indexes, + &d_output_indexes, + &d_input_indexes, + LweCiphertextCount(num_ct_blocks), + d_bsk, + streams, + ); + } + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => { + cuda_multi_bit_programmable_bootstrap_lwe_ciphertext( + &ct_seeded, + &mut ct_out.as_mut().d_blocks, + &d_accumulator, + &d_lut_vector_indexes, + &d_output_indexes, + &d_input_indexes, + d_multibit_bsk, + streams, + ); + } + } + + let plaintext_to_add = (p - 1) * delta / 2; + let ct_cloned = ct_out.duplicate(streams); + unsafe { + add_lwe_ciphertext_vector_plaintext_scalar_async( + streams, + &mut ct_out.as_mut().d_blocks.0.d_vec, + &ct_cloned.as_ref().d_blocks.0.d_vec, + plaintext_to_add, + out_lwe_dimension, + num_ct_blocks as u32, + ); + } + streams.synchronize(); + ct_out + } +} + +#[cfg(test)] +pub(crate) mod test { + use crate::core_crypto::gpu::vec::GpuIndex; + use crate::core_crypto::gpu::CudaStreams; + use crate::core_crypto::prelude::decrypt_lwe_ciphertext; + use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext; + use crate::integer::gpu::server_key::CudaBootstrappingKey; + use crate::integer::gpu::{gen_keys_gpu, CudaServerKey}; + use crate::integer::{ClientKey, RadixCiphertext}; + use crate::shortint::oprf::create_random_from_seed_modulus_switched; + use crate::shortint::parameters::PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64; + use rayon::prelude::*; + use statrs::distribution::ContinuousCDF; + use std::collections::HashMap; + use tfhe_csprng::seeders::Seed; + + fn square(a: f64) -> f64 { + a * a + } + + #[test] + fn oprf_compare_plain_ci_run_filter() { + let gpu_index = 0; + let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + let (ck, gpu_sk) = gen_keys_gpu( + PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, + &streams, + ); + + for seed in 0..1000 { + oprf_compare_plain_from_seed(Seed(seed), &ck, &gpu_sk, &streams); + } + } + + fn oprf_compare_plain_from_seed( + seed: Seed, + ck: &ClientKey, + sk: &CudaServerKey, + streams: &CudaStreams, + ) { + let params = ck.parameters(); + + let random_bits_count = 2; + + let input_p = 2 * params.polynomial_size().0 as u64; + + let log_input_p = input_p.ilog2(); + + let p_prime = 1 << random_bits_count; + + let output_p = 2 * params.carry_modulus().0 * params.message_modulus().0; + + let poly_delta = 2 * params.polynomial_size().0 as u64 / p_prime; + + let d_img: CudaUnsignedRadixCiphertext = + sk.generate_oblivious_pseudo_random(seed, random_bits_count, streams); + let img: RadixCiphertext = d_img.to_radix_ciphertext(streams); + + let (lwe_size, polynomial_size) = match &sk.bootstrapping_key { + CudaBootstrappingKey::Classic(d_bsk) => ( + d_bsk.input_lwe_dimension().to_lwe_size(), + d_bsk.polynomial_size(), + ), + CudaBootstrappingKey::MultiBit(d_multibit_bsk) => ( + d_multibit_bsk.input_lwe_dimension().to_lwe_size(), + d_multibit_bsk.polynomial_size(), + ), + }; + + let ct = create_random_from_seed_modulus_switched( + seed, + lwe_size, + polynomial_size.to_blind_rotation_input_modulus_log(), + sk.ciphertext_modulus, + ); + + let sk = ck.key.small_lwe_secret_key(); + let plain_prf_input = decrypt_lwe_ciphertext(&sk, &ct) + .0 + .wrapping_add(1 << (64 - log_input_p - 1)) + >> (64 - log_input_p); + + let half_negacyclic_part = |x| 2 * (x / poly_delta) + 1; + + let negacyclic_part = |x| { + assert!(x < input_p); + if x < input_p / 2 { + half_negacyclic_part(x) + } else { + 2 * output_p - half_negacyclic_part(x - (input_p / 2)) + } + }; + + let prf = |x| { + let a = (negacyclic_part(x) + p_prime - 1) % (2 * output_p); + assert!(a % 2 == 0); + a / 2 + }; + + let expected_output = prf(plain_prf_input); + + let output = ck.key.decrypt_message_and_carry(&img.blocks[0]); + + assert!(output < p_prime); + assert_eq!(output, expected_output); + } + + #[test] + fn oprf_test_uniformity_ci_run_filter() { + let sample_count: usize = 100_000; + + let p_value_limit: f64 = 0.000_01; + let gpu_index = 0; + let streams = CudaStreams::new_single_gpu(GpuIndex(gpu_index)); + let (ck, gpu_sk) = gen_keys_gpu( + PARAM_GPU_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, + &streams, + ); + + let test_uniformity = |distinct_values: u64, f: &(dyn Fn(usize) -> u64 + Sync)| { + test_uniformity(sample_count, p_value_limit, distinct_values, f) + }; + + let random_bits_count = 2; + + test_uniformity(1 << random_bits_count, &|seed| { + let d_img: CudaUnsignedRadixCiphertext = gpu_sk.generate_oblivious_pseudo_random( + Seed(seed as u128), + random_bits_count, + &streams, + ); + let img: RadixCiphertext = d_img.to_radix_ciphertext(&streams); + ck.key.decrypt_message_and_carry(&img.blocks[0]) + }); + } + + pub fn test_uniformity(sample_count: usize, p_value_limit: f64, distinct_values: u64, f: F) + where + F: Sync + Fn(usize) -> u64, + { + let p_value = uniformity_p_value(f, sample_count, distinct_values); + + assert!( + p_value_limit < p_value, + "p_value (={p_value}) expected to be bigger than {p_value_limit}" + ); + } + + fn uniformity_p_value(f: F, sample_count: usize, distinct_values: u64) -> f64 + where + F: Sync + Fn(usize) -> u64, + { + let values: Vec<_> = (0..sample_count).into_par_iter().map(&f).collect(); + + let mut values_count = HashMap::new(); + + for i in &values { + assert!(*i < distinct_values, "i {} dv{}", *i, distinct_values); + + *values_count.entry(i).or_insert(0) += 1; + } + + let single_expected_count = sample_count as f64 / distinct_values as f64; + + // https://en.wikipedia.org/wiki/Pearson's_chi-squared_test + let distance: f64 = (0..distinct_values) + .map(|value| *values_count.get(&value).unwrap_or(&0)) + .map(|count| square(count as f64 - single_expected_count) / single_expected_count) + .sum(); + + statrs::distribution::ChiSquared::new((distinct_values - 1) as f64) + .unwrap() + .sf(distance) + } +} diff --git a/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs b/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs index 3efb158321..a0433ebbe6 100644 --- a/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs +++ b/tfhe/src/integer/gpu/server_key/radix/vector_comparisons.rs @@ -3,7 +3,9 @@ use crate::core_crypto::gpu::CudaStreams; use crate::core_crypto::prelude::LweBskGroupingFactor; use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext}; -use crate::integer::gpu::server_key::radix::CudaRadixCiphertext; +use crate::integer::gpu::server_key::radix::{ + CudaBlockInfo, CudaRadixCiphertext, CudaRadixCiphertextInfo, +}; use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey}; use crate::integer::gpu::{apply_bivariate_lut_kb_async, PBSType}; @@ -23,9 +25,16 @@ impl CudaServerKey { .map(|ciphertext| &ciphertext.as_ref().d_blocks), streams, ); + let vec_block_info: Vec = radixes + .iter() + .flat_map(|ct| ct.as_ref().info.blocks.clone()) + .collect(); + let radix_info = CudaRadixCiphertextInfo { + blocks: vec_block_info, + }; CudaIntegerRadixCiphertext::from(CudaRadixCiphertext { d_blocks: packed_list, - info: radixes[0].as_ref().info.clone(), + info: radix_info, }) } diff --git a/tfhe/src/integer/gpu/server_key/radix/vector_find.rs b/tfhe/src/integer/gpu/server_key/radix/vector_find.rs index 89fe57bd50..56ff9642b3 100644 --- a/tfhe/src/integer/gpu/server_key/radix/vector_find.rs +++ b/tfhe/src/integer/gpu/server_key/radix/vector_find.rs @@ -3,6 +3,7 @@ use crate::core_crypto::gpu::CudaStreams; use crate::core_crypto::prelude::{LweBskGroupingFactor, UnsignedInteger}; use crate::integer::block_decomposition::{BlockDecomposer, Decomposable, DecomposableInto}; use crate::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock; +use crate::integer::gpu::ciphertext::info::{CudaBlockInfo, CudaRadixCiphertextInfo}; use crate::integer::gpu::ciphertext::{CudaIntegerRadixCiphertext, CudaUnsignedRadixCiphertext}; use crate::integer::gpu::server_key::radix::CudaRadixCiphertext; use crate::integer::gpu::server_key::{CudaBootstrappingKey, CudaServerKey}; @@ -31,14 +32,17 @@ impl CudaServerKey { .map(|ciphertext| &ciphertext.0.ciphertext.d_blocks), streams, ); - - let blocks_ct: CudaUnsignedRadixCiphertext = CudaUnsignedRadixCiphertext { - ciphertext: CudaRadixCiphertext { - d_blocks: packed_list, - info: selectors[0].0.ciphertext.info.clone(), - }, + let vec_block_info: Vec = selectors + .iter() + .flat_map(|ct| ct.0.ciphertext.info.blocks.clone()) + .collect(); + let radix_info = CudaRadixCiphertextInfo { + blocks: vec_block_info, }; - blocks_ct + CudaIntegerRadixCiphertext::from(CudaRadixCiphertext { + d_blocks: packed_list, + info: radix_info, + }) } #[allow(clippy::unused_self)] pub(crate) fn convert_radixes_vec_to_single_radix_ciphertext( @@ -56,10 +60,16 @@ impl CudaServerKey { radixes.iter().map(|ciphertext| &ciphertext.d_blocks), streams, ); - + let vec_block_info: Vec = radixes + .iter() + .flat_map(|ct| ct.info.blocks.clone()) + .collect(); + let radix_info = CudaRadixCiphertextInfo { + blocks: vec_block_info, + }; CudaIntegerRadixCiphertext::from(CudaRadixCiphertext { d_blocks: packed_list, - info: radixes[0].info.clone(), + info: radix_info, }) } diff --git a/tfhe/src/shortint/engine/mod.rs b/tfhe/src/shortint/engine/mod.rs index 8986d85bc8..9d6dbac699 100644 --- a/tfhe/src/shortint/engine/mod.rs +++ b/tfhe/src/shortint/engine/mod.rs @@ -160,20 +160,15 @@ where pub(crate) fn fill_accumulator_no_encoding( accumulator: &mut GlweCiphertext, - server_key: &ServerKey, + polynomial_size: PolynomialSize, + glwe_size: GlweSize, f: F, ) where C: ContainerMut, F: Fn(u64) -> u64, { - assert_eq!( - accumulator.polynomial_size(), - server_key.bootstrapping_key.polynomial_size() - ); - assert_eq!( - accumulator.glwe_size(), - server_key.bootstrapping_key.glwe_size() - ); + assert_eq!(accumulator.polynomial_size(), polynomial_size); + assert_eq!(accumulator.glwe_size(), glwe_size); let mut accumulator_view = accumulator.as_mut_view(); diff --git a/tfhe/src/shortint/oprf.rs b/tfhe/src/shortint/oprf.rs index a55229622e..8484bcaef2 100644 --- a/tfhe/src/shortint/oprf.rs +++ b/tfhe/src/shortint/oprf.rs @@ -1,8 +1,8 @@ use super::Ciphertext; use crate::core_crypto::fft_impl::common::modulus_switch; use crate::core_crypto::prelude::{ - keyswitch_lwe_ciphertext, lwe_ciphertext_plaintext_add_assign, CiphertextModulusLog, - LweCiphertext, LweSize, Plaintext, + keyswitch_lwe_ciphertext, lwe_ciphertext_plaintext_add_assign, CiphertextModulus, + CiphertextModulusLog, LweCiphertext, LweSize, Plaintext, }; use crate::shortint::ciphertext::Degree; use crate::shortint::engine::ShortintEngine; @@ -28,35 +28,33 @@ pub fn sha3_hash(values: &mut [u64], seed: Seed) { *value = u64::from_le_bytes(bytes); } } +pub fn create_random_from_seed( + seed: Seed, + lwe_size: LweSize, + ciphertext_modulus: CiphertextModulus, +) -> LweCiphertext> { + let mut ct = LweCiphertext::new(0, lwe_size, ciphertext_modulus); -impl ServerKey { - pub(crate) fn create_random_from_seed( - &self, - seed: Seed, - lwe_size: LweSize, - ) -> LweCiphertext> { - let mut ct = LweCiphertext::new(0, lwe_size, self.ciphertext_modulus); - - sha3_hash(ct.get_mut_mask().as_mut(), seed); - - ct - } + sha3_hash(ct.get_mut_mask().as_mut(), seed); - pub(crate) fn create_random_from_seed_modulus_switched( - &self, - seed: Seed, - lwe_size: LweSize, - log_modulus: CiphertextModulusLog, - ) -> LweCiphertext> { - let mut ct = self.create_random_from_seed(seed, lwe_size); + ct +} - for i in ct.as_mut() { - *i = modulus_switch(*i, log_modulus) << (64 - log_modulus.0); - } +pub fn create_random_from_seed_modulus_switched( + seed: Seed, + lwe_size: LweSize, + log_modulus: CiphertextModulusLog, + ciphertext_modulus: CiphertextModulus, +) -> LweCiphertext> { + let mut ct = create_random_from_seed(seed, lwe_size, ciphertext_modulus); - ct + for i in ct.as_mut() { + *i = modulus_switch(*i, log_modulus) << (64 - log_modulus.0); } + ct +} +impl ServerKey { /// Uniformly generates a random encrypted value in `[0, 2^random_bits_count[` /// `2^random_bits_count` must be smaller than the message modulus /// The encryted value is oblivious to the server @@ -123,12 +121,13 @@ impl ServerKey { let in_lwe_size = self.bootstrapping_key.input_lwe_dimension().to_lwe_size(); - let seeded = self.create_random_from_seed_modulus_switched( + let seeded = create_random_from_seed_modulus_switched( seed, in_lwe_size, self.bootstrapping_key .polynomial_size() .to_blind_rotation_input_modulus_log(), + self.ciphertext_modulus, ); let p = 1 << random_bits_count; @@ -183,6 +182,7 @@ impl ServerKey { #[cfg(test)] pub(crate) mod test { use crate::core_crypto::prelude::decrypt_lwe_ciphertext; + use crate::shortint::oprf::create_random_from_seed_modulus_switched; use crate::shortint::{ClientKey, ServerKey}; use rayon::prelude::*; use statrs::distribution::ContinuousCDF; @@ -223,12 +223,13 @@ pub(crate) mod test { let lwe_size = sk.bootstrapping_key.input_lwe_dimension().to_lwe_size(); - let ct = sk.create_random_from_seed_modulus_switched( + let ct = create_random_from_seed_modulus_switched( seed, lwe_size, sk.bootstrapping_key .polynomial_size() .to_blind_rotation_input_modulus_log(), + sk.ciphertext_modulus, ); let sk = ck.small_lwe_secret_key(); diff --git a/tfhe/src/shortint/server_key/mod.rs b/tfhe/src/shortint/server_key/mod.rs index 0fc59daeb2..ed82e2fce6 100644 --- a/tfhe/src/shortint/server_key/mod.rs +++ b/tfhe/src/shortint/server_key/mod.rs @@ -640,7 +640,12 @@ impl ServerKey { self.bootstrapping_key.polynomial_size(), self.ciphertext_modulus, ); - fill_accumulator_no_encoding(&mut acc, self, f); + fill_accumulator_no_encoding( + &mut acc, + self.bootstrapping_key.polynomial_size(), + self.bootstrapping_key.glwe_size(), + f, + ); LookupTableOwned { acc,