Skip to content

Commit

Permalink
chore(gpu): replace recomended lwe_chunk_size for NVIDIA Tesla H100 GPUs
Browse files Browse the repository at this point in the history
  • Loading branch information
pdroalves committed Mar 5, 2024
1 parent b7d7e68 commit eb703c3
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions backends/tfhe-cuda-backend/cuda/src/pbs/bootstrap_multibit.cu
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,12 @@ __host__ uint32_t get_lwe_chunk_size(uint32_t lwe_dimension,
return 9;
} else if (std::strstr(deviceProp.name, h100Name) != nullptr) {
// Tesla H100
return 45;
if (num_samples < 1024)
return 128;
else if (num_samples < 4096)
return 64;
else
return 32;
}

// Generic case
Expand Down Expand Up @@ -451,11 +456,11 @@ __host__ uint32_t get_average_lwe_chunk_size(uint32_t lwe_dimension,
return (ct_count > 10000) ? 30 : 45;
} else if (std::strstr(deviceProp.name, h100Name) != nullptr) {
// Tesla H100
return (ct_count > 10000) ? 30 : 45;
return 64;
}

// Generic case
return (ct_count > 10000) ? 2 : 10;
return (ct_count > 10000) ? 2 : 1;
}

// Returns the maximum buffer size required to execute batches up to
Expand Down

0 comments on commit eb703c3

Please sign in to comment.