From e1d7f1163005f487694d6c9fd9e6a91d83dda593 Mon Sep 17 00:00:00 2001 From: Chizkiyahu Raful <37312901+Chizkiyahu@users.noreply.github.com> Date: Thu, 5 Oct 2023 18:35:54 +0300 Subject: [PATCH] Fix issue in per-tensor quantization and missing input_rank (#65) --- .../weights_lut_pot_inferable_quantizer.py | 3 +++ .../weights_lut_symmetric_inferable_quantizer.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_pot_inferable_quantizer.py b/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_pot_inferable_quantizer.py index 0575b92..7d358f7 100644 --- a/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_pot_inferable_quantizer.py +++ b/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_pot_inferable_quantizer.py @@ -155,8 +155,11 @@ def symbolic(g, # When None is passed as channel_axis, the op has no attribute of channel_axis, # which creates conflict with the onnxruntime function. For this reason, if we quantize # per-tensor and channel_axis is None, we set it to 0. + # per-tensor and input_rank is None, we set it to 4. if not per_channel and channel_axis is None: channel_axis = 0 + if not per_channel and input_rank is None: + input_rank = 4 return g.op(f"{ONNX_CUSTOM_OP_DOMAIN}::WeightsLUTPOTQuantizer", input_tensor, g.op('Constant', value_t=torch.tensor(lut_values, dtype=torch.float32)), diff --git a/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_symmetric_inferable_quantizer.py b/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_symmetric_inferable_quantizer.py index c785c1c..37bc533 100644 --- a/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_symmetric_inferable_quantizer.py +++ b/mct_quantizers/pytorch/quantizers/weights_inferable_quantizers/weights_lut_symmetric_inferable_quantizer.py @@ -177,8 +177,11 @@ def symbolic(g, # When None is passed as channel_axis, the op has no attribute of channel_axis, # which creates conflict with the onnxruntime function. For this reason, if we quantize # per-tensor and channel_axis is None, we set it to 0. + # per-tensor and input_rank is None, we set it to 4. if not per_channel and channel_axis is None: channel_axis = 0 + if not per_channel and input_rank is None: + input_rank = 4 return g.op(f"{ONNX_CUSTOM_OP_DOMAIN}::WeightsLUTSymmetricQuantizer", input_tensor, g.op('Constant', value_t=torch.tensor(lut_values, dtype=torch.float32)),