Skip to content

Commit

Permalink
Cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
Narsil committed Jan 5, 2024
1 parent f97fcd4 commit 6f85840
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 29 deletions.
25 changes: 1 addition & 24 deletions candle-core/examples/tensor-tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,7 @@ enum QuantizationMode {
}

impl QuantizationMode {
fn quantize(
&self,
name: &str,
tensor: QTensor,
dtype: GgmlDType,
// default: fn(&Tensor) -> Result<QTensor>,
) -> Result<QTensor> {
fn quantize(&self, name: &str, tensor: QTensor, dtype: GgmlDType) -> Result<QTensor> {
match self {
Self::Llama => {
// Same behavior as the llama.cpp quantization.
Expand All @@ -28,7 +22,6 @@ impl QuantizationMode {
QTensor::quantize(&tensor, GgmlDType::Q6K)
} else {
QTensor::quantize(&tensor, dtype)
// default(&tensor)
}
} else {
Ok(tensor)
Expand Down Expand Up @@ -261,22 +254,6 @@ fn run_quantize_safetensors(
println!("tensors: {}", tensors.len());

let dtype = q.dtype();
// let quantize_fn = match q {
// Quantization::Q4_0 => QTensor::quantize::<k_quants::BlockQ4_0>,
// Quantization::Q4_1 => QTensor::quantize::<k_quants::BlockQ4_1>,
// Quantization::Q5_0 => QTensor::quantize::<k_quants::BlockQ5_0>,
// Quantization::Q5_1 => QTensor::quantize::<k_quants::BlockQ5_1>,
// Quantization::Q8_0 => QTensor::quantize::<k_quants::BlockQ8_0>,
// Quantization::Q8_1 => QTensor::quantize::<k_quants::BlockQ8_1>,
// Quantization::Q2k => QTensor::quantize::<k_quants::BlockQ2K>,
// Quantization::Q3k => QTensor::quantize::<k_quants::BlockQ3K>,
// Quantization::Q4k => QTensor::quantize::<k_quants::BlockQ4K>,
// Quantization::Q5k => QTensor::quantize::<k_quants::BlockQ5K>,
// Quantization::Q6k => QTensor::quantize::<k_quants::BlockQ6K>,
// Quantization::Q8k => QTensor::quantize::<k_quants::BlockQ8K>,
// Quantization::F16 => QTensor::quantize::<half::f16>,
// Quantization::F32 => QTensor::quantize::<f32>,
// };
let block_size = dtype.block_size();

let qtensors = tensors
Expand Down
2 changes: 1 addition & 1 deletion candle-core/src/metal_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl MetalDevice {
&self.device
}

pub fn fence(&self) -> &metal::Fence {
pub(crate) fn fence(&self) -> &metal::Fence {
&self.fence
}

Expand Down
6 changes: 2 additions & 4 deletions candle-core/src/quantized/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,12 @@ impl Device {
fn qzeros(&self, elem_count: usize, dtype: GgmlDType) -> Result<QStorage> {
match self {
Device::Cpu => {
let storage = dtype.zeros(elem_count);
let storage = dtype.cpu_zeros(elem_count);
Ok(QStorage::Cpu(storage))
}
#[cfg(feature = "metal")]
Device::Metal(metal) => {
let size = elem_count * dtype.type_size() / dtype.block_size();
// let cpu_storage = dtype.zeros(elem_count);
// assert_eq!(size, cpu_storage.size());
let buffer = metal.allocate_zeros(size)?;
Ok(QStorage::Metal(metal::QMetalStorage::new(
buffer,
Expand Down Expand Up @@ -182,7 +180,7 @@ impl GgmlDType {
}

/// The block dtype
pub fn zeros(&self, elem_count: usize) -> Box<dyn QuantizedType> {
pub fn cpu_zeros(&self, elem_count: usize) -> Box<dyn QuantizedType> {
match self {
Self::F32 => Box::new(vec![f32::zeros(); elem_count]),
Self::F16 => Box::new(vec![f16::zeros(); elem_count]),
Expand Down

0 comments on commit 6f85840

Please sign in to comment.