Skip to content

Commit

Permalink
[WC] Fix ratio_defining_params
Browse files Browse the repository at this point in the history
  • Loading branch information
l-bat committed Apr 24, 2024
1 parent 590bc6d commit bbc7f71
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
12 changes: 6 additions & 6 deletions nncf/quantization/algorithms/weight_compression/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,28 +165,28 @@ def _get_ratio_defining_params(
if self._mode in [CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM]:
return all_weight_params

if self._all_layers:
return list(filter(lambda wp: len(wp.reduction_axes) == 1, all_weight_params))

ratio_defining_params = list(
filter(
lambda wp: wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes,
all_weight_params,
)
)

# The last MatMul layer is quantized to 4-bits if all_layers=True or if the layer is shared
if not self._all_layers and not is_last_layer_shared:
ratio_defining_params = ratio_defining_params[:-1]

# Embedding layers are quantized to 4-bits only if all_layers=True.
if self._all_layers:
embedding_params = list(
filter(
lambda wp: wp.node_with_weight.metatype in self._backend_entity.embedding_metatypes,
lambda wp: wp.node_with_weight.metatype in self._backend_entity.embedding_metatypes
and len(wp.reduction_axes) == 1,
all_weight_params,
)
)
ratio_defining_params.extend(embedding_params)

if not self._all_layers and not is_last_layer_shared:
ratio_defining_params = ratio_defining_params[:-1]
return ratio_defining_params

def _set_weight_compression_config(
Expand Down
15 changes: 15 additions & 0 deletions tests/openvino/native/quantization/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,21 @@ def test_shared_gather(mode):
assert op.get_element_type() == weight_name_vs_type[op_name]


@pytest.mark.parametrize("all_layers", (True, False))
def test_shared_gather_all_layers(all_layers):
weight_name_vs_type = {
"gather_2_data": ov.Type.u4 if all_layers else ov.Type.u8,
"shared_data": ov.Type.u4 if all_layers else ov.Type.u8,
"matmul_1_data": ov.Type.u4,
}
model = GatherAndMatmulShareData().ov_model
compressed_model = compress_weights(model, CompressWeightsMode.INT4_ASYM, group_size=-1, all_layers=all_layers)
for op in compressed_model.get_ordered_ops():
op_name = op.get_friendly_name()
if op.get_type_name() == "Constant" and op_name in weight_name_vs_type:
assert op.get_element_type() == weight_name_vs_type[op_name]


@dataclass
class QuantErrorDesc:
weight: List[float]
Expand Down

0 comments on commit bbc7f71

Please sign in to comment.