openvinotoolkit · alexsu52 · Sep 26, 2024 · Aug 14, 2024 · Aug 16, 2024 · Aug 16, 2024
@@ -78,7 +78,7 @@ def module_insertion_transformation(model: torch.fx.GraphModule):
             if target_point.target_type == TargetType.OPERATOR_POST_HOOK:
                 _set_new_node_meta(new_node, target_node, module_to_insert, model)
                 with graph.inserting_after(target_node):
-                    for user in list(target_node.users.keys()):
+                    for user in list(target_node.users):
                         if user is new_node:
                             continue
                         user.replace_input_with(target_node, new_node)

@@ -13,7 +13,6 @@
 
 import torch
 import torch.fx
-from torch.ao.quantization.fx.utils import create_getattr_from_value
 
 import nncf
 import nncf.errors
@@ -30,6 +29,7 @@
 from nncf.experimental.torch.fx.model_transformer import FXModelTransformer
 from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
 from nncf.experimental.torch.fx.node_utils import get_tensor_constant_from_node
+from nncf.experimental.torch.fx.transformations import constant_update_transformation_builder
 from nncf.experimental.torch.fx.transformations import module_insertion_transformation_builder
 from nncf.parameters import CompressWeightsMode
 from nncf.quantization.algorithms.weight_compression.backend import WeightCompressionAlgoBackend
@@ -180,23 +180,13 @@ def set_weight(
         graph: NNCFGraph,
         weight: Tensor,
     ) -> torch.fx.Node:
-        weight_node = graph.get_previous_nodes(node_with_weight)[weight_port_id]
-        graph_node = get_graph_node_by_name(model.graph, weight_node.node_name)
-        if len(graph_node.users) != 1:
-            raise nncf.InternalError(f"Weight Node has {len(graph_node.users)} users, 1 expected.")
-
-        node_with_weight_graph = next(iter(graph_node.users))
-        with model.graph.inserting_before(node_with_weight_graph):
-            new_weight_node = create_getattr_from_value(
-                model, model.graph, node_with_weight.node_name + "_compressed_weight", weight.data
-            )
 
-        args = list(node_with_weight_graph.args)
-        args[weight_port_id] = new_weight_node
-        node_with_weight_graph.args = tuple(args)
-        model.graph.eliminate_dead_code()
-
-        return new_weight_node
+        weight_update_command = FXApplyTransformationCommand(
+            constant_update_transformation_builder(node_with_weight, weight.data)
+        )
+        layout = TransformationLayout()
+        layout.register(weight_update_command)
+        model = FXModelTransformer(model).transform(layout)
 
     def transform_model(
         self,
@@ -240,12 +230,10 @@ def transform_model(
                 dtype = TensorDataType.uint8
             packed_tensor = compressed_weight.tensor.astype(dtype)
 
-            new_weight = self.set_weight(
-                wc_params.node_with_weight, wc_params.weight_port_id, model, graph, packed_tensor
-            )
+            self.set_weight(wc_params.node_with_weight, wc_params.weight_port_id, model, graph, packed_tensor)
 
             if len(consumer_nodes) > 1:
-                raise nncf.InternalError("Shared weights not supported in compression for Torch Fx models")
+                raise nncf.InternalError("Shared weights not supported in compression for TorchFX models")
 
             # creates weight decompressor
             if compression_config.mode == CompressWeightsMode.INT8_SYM:
@@ -261,15 +249,16 @@ def transform_model(
                 decompressor_type = "asymmetric"
 
             # registry weight decompression module in the model
-            compressed_weight_name = wc_params.node_with_weight.node_name
+            # TODO: Find a more efficient way to access updated constant name
+            compressed_weight_name = wc_params.node_with_weight.node_name + "_updated_constant0"
             decompressor_name = f"{decompressor_type}_weights_decompressor_{compressed_weight_name.replace('.', '_')}"
 
             # inserts the weight decompressor into the model as the post hook on the model weight
             transformation_layout.register(
                 FXApplyTransformationCommand(
                     module_insertion_transformation_builder(
                         decompressor,
-                        [PTTargetPoint(TargetType.OPERATOR_POST_HOOK, target_node_name=new_weight.name)],
+                        [PTTargetPoint(TargetType.OPERATOR_POST_HOOK, target_node_name=compressed_weight_name)],
                         decompressor_name,
                     )
                 )

@@ -474,13 +474,13 @@ def compress_weights(
 
         if mode not in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM]:
             raise AttributeError(
-                "Torch backend supports only INT8_ASYM, INT8_SYM modes for weight compression, "
+                "TorchFX backend supports only INT8_ASYM, INT8_SYM modes for weight compression, "
                 f"but given {mode.value} mode."
             )
 
-        if True in [awq, scale_estimation, gptq]:
+        if any((awq, scale_estimation, gptq)):
             raise AttributeError(
-                "Torch backend doesn`t supports scale estimation and AWQ algorithm, "
+                "TorchFX backend doesn`t supports scale estimation and AWQ algorithm, "
                 "but awq=True or scale_estimation=True or gptq=True is specified."
             )
         compression_weights_impl = fx_compression_weights_impl

@@ -162,9 +162,9 @@ def test_get_dtype_attribute_of_parameter():
         dummy_input = torch.randint(0, 10, [3, 3])
         exported_model = capture_pre_autograd_graph(model, args=(dummy_input,))
         compressed_model = compress_weights(exported_model)
-    assert compressed_model.matmul_compressed_weight0.dtype == torch.uint8
+    assert compressed_model.matmul_updated_constant0.dtype == torch.uint8
     compressed_model(dummy_input)
-    assert compressed_model.matmul_compressed_weight0.dtype == torch.uint8
+    assert compressed_model.matmul_updated_constant0.dtype == torch.uint8
 
 
 @pytest.mark.parametrize("dtype", ("float16", "float32"))
@@ -183,6 +183,9 @@ def test_model_devices_and_precisions(use_cuda, dtype):
         compressed_model = compress_weights(exported_model)
     result = compressed_model(dummy_input)
     # Scale should always be in float16
-    assert compressed_model.state_dict()["asymmetric_weights_decompressor_matmul._scale"].dtype == torch.float16
+    assert (
+        compressed_model.state_dict()["asymmetric_weights_decompressor_matmul_updated_constant0._scale"].dtype
+        == torch.float16
+    )
     # Result should be in the precision of the model
     assert result.dtype == dtype