Comments

daniil-lyakhov · Dec 29, 2024 · 38c82a4 · 38c82a4
1 parent 20147ab
commit 38c82a4
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 8 deletions.
diff --git a/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
@@ -49,6 +49,7 @@ def quantize_pt2e(
     weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
     batchwise_statistics: Optional[bool] = None,
     fold_quantize: bool = False,
+    do_copy: bool = False,
 ) -> torch.fx.GraphModule:
     """
     Applies post-training quantization to the torch.fx.GraphModule provided model
@@ -72,25 +73,29 @@ def quantize_pt2e(
         for each item of the batch or for the entire batch, default is None, which means
         it set True if batch_size > 1 otherwise False.
     :param fold_quantize: Boolean flag for whether fold the quantize op or not.
+    :param do_copy: The copy of the given model is being quantized if do_copy == True,
+        otherwise the model is quantized inplace. Default value is False.
     """
     nncf_logger.warning("This is an experimental feature and may change in the future without notice.")
 
     if subset_size < 1:
         raise nncf.ValidationError("Subset size must be positive.")
 
     batch_size = calibration_dataset.get_batch_size()
-    batchwise_statistics = batchwise_statistics is None and batch_size is not None and batch_size > 1
+    if batchwise_statistics is None:
+        batchwise_statistics = batch_size is not None and batch_size > 1
 
     original_graph_meta = model.meta
 
-    copied_model = deepcopy(model)
+    if do_copy:
+        model = deepcopy(model)
 
     # To make it easier for bias correction algorithms,
     # biases are being separated by the followng calls.
-    fuse_conv_bn(copied_model)
+    fuse_conv_bn(model)
     # Call ao quantizer transform_for_annotation
     # before the NNCFGraph creation
-    quantizer.transform_for_annotation(copied_model)
+    quantizer.transform_for_annotation(model)
 
     if not isinstance(quantizer, NNCFQuantizer):
         quantizer = NNCFFXQuantizer(quantizer)
@@ -107,8 +112,8 @@ def quantize_pt2e(
         batchwise_statistics=batchwise_statistics,
     )
 
-    nncf_graph = NNCFGraphFactory.create(copied_model)
-    quantized_model = quantization_algorithm.apply(copied_model, nncf_graph, dataset=calibration_dataset)
+    nncf_graph = NNCFGraphFactory.create(model)
+    quantized_model = quantization_algorithm.apply(model, nncf_graph, dataset=calibration_dataset)
 
     # Magic. Without this call compiled model
     # is not preformant

diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py
@@ -725,7 +725,7 @@ def _get_activation_quantization_target_point(
 
     def find_quantization_setup(self, model: TModel, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
         """
-        Initializes a cache, finds quantization target points and them puts in the cache.
+        Initializes a cache, finds quantization target points and then puts them in the cache.
 
         :param model: Backend-specific model, for which Quantization Target Points are being seek.
         :param nncf_graph: NNCFGraph instance.
@@ -757,7 +757,7 @@ def fill_quantization_target_points(
         self, quantizer_setup: SingleConfigQuantizerSetup, nncf_graph: NNCFGraph
     ) -> Tuple[OrderedDict[TargetPoint, QuantizerConfig], List[List[TargetPoint]]]:
         """
-        Initializes a cache, finds quantization target points and them puts in the cache.
+        Initializes a cache and puts the given quantization target points in the cache.
 
         :param quantizer_setup: Quantization Target Points in format of SingleConfigQuantizerSetup.
         :param nncf_graph: NNCFGraph instance.

diff --git a/tests/torch/fx/test_quantizer.py b/tests/torch/fx/test_quantizer.py
@@ -137,6 +137,7 @@ def test_quantized_model(
             calibration_dataset=calibration_dataset,
             fast_bias_correction=None,  # BC is disabled
             fold_quantize=True,
+            do_copy=True,
             **pt2e_params,
         )