-
Notifications
You must be signed in to change notification settings - Fork 239
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
### Changes Add simple QAT example for TF backend ### Reason for changes Ref: 158980 ### Related tickets Ref: 158980 ### Tests - tests/tensorflow/quantization/test_strip.py - tests/tensorflow/sparsity/test_strip.py - tests/tensorflow/pruning/test_strip.py
- Loading branch information
1 parent
76e3987
commit 883c787
Showing
10 changed files
with
452 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
examples/quantization_aware_training/tensorflow/mobilenet_v2/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Quantization-Aware Training of MobileNet v2 TensorFlow Model | ||
|
||
This example demonstrates how to use Post-Training Quantization API from Neural Network Compression Framework (NNCF) to quantize and train TensorFlow models on the example of [MobileNet v2](https://huggingface.co/alexsu52/mobilenet_v2_imagenette) quantization aware training, pretrained on [Imagenette](https://github.com/fastai/imagenette) dataset. | ||
|
||
The example includes the following steps: | ||
|
||
- Loading the [Imagenette](https://github.com/fastai/imagenette) dataset (~340 Mb) and the [MobileNet v2 TensorFlow model](https://huggingface.co/alexsu52/mobilenet_v2_imagenette) pretrained on this dataset. | ||
- Quantizing the model using NNCF Post-Training Quantization algorithm. | ||
- Fine tuning quantized model for two epoch to improve quantized model metrics. | ||
- Output of the following characteristics of the quantized model: | ||
- Accuracy drop of the quantized model (INT8) over the pre-trained model (FP32) | ||
- Compression rate of the quantized model file size relative to the pre-trained model file size | ||
- Performance speed up of the quantized model (INT8) | ||
|
||
## Install requirements | ||
|
||
At this point it is assumed that you have already installed NNCF. You can find information on installation NNCF [here](https://github.com/openvinotoolkit/nncf#user-content-installation). | ||
|
||
To work with the example you should install the corresponding Python package dependencies: | ||
|
||
```bash | ||
pip install -r requirements.txt | ||
``` | ||
|
||
## Run Example | ||
|
||
It's pretty simple. The example does not require additional preparation. It will do the preparation itself, such as loading the dataset and model, etc. | ||
|
||
```bash | ||
python main.py | ||
``` |
197 changes: 197 additions & 0 deletions
197
examples/quantization_aware_training/tensorflow/mobilenet_v2/main.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
# Copyright (c) 2025 Intel Corporation | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import re | ||
import subprocess | ||
from pathlib import Path | ||
from typing import List | ||
|
||
import openvino as ov | ||
import tensorflow as tf | ||
import tensorflow_datasets as tfds | ||
from rich.progress import track | ||
|
||
import nncf | ||
|
||
ROOT = Path(__file__).parent.resolve() | ||
WEIGHTS_URL = "https://huggingface.co/alexsu52/mobilenet_v2_imagenette/resolve/main/tf_model.h5" | ||
DATASET_CLASSES = 10 | ||
|
||
|
||
def validate(model: ov.Model, val_loader: tf.data.Dataset) -> tf.Tensor: | ||
compiled_model = ov.compile_model(model, device_name="CPU") | ||
output = compiled_model.outputs[0] | ||
|
||
metric = tf.keras.metrics.CategoricalAccuracy(name="acc@1") | ||
for images, labels in track(val_loader): | ||
pred = compiled_model(images.numpy())[output] | ||
metric.update_state(labels, pred) | ||
|
||
return metric.result() | ||
|
||
|
||
def run_benchmark(model_path: Path, shape: List[int]) -> float: | ||
command = [ | ||
"benchmark_app", | ||
"-m", model_path.as_posix(), | ||
"-d", "CPU", | ||
"-api", "async", | ||
"-t", "15", | ||
"-shape", str(shape), | ||
] # fmt: skip | ||
cmd_output = subprocess.check_output(command, text=True) # nosec | ||
print(*cmd_output.splitlines()[-8:], sep="\n") | ||
match = re.search(r"Throughput\: (.+?) FPS", cmd_output) | ||
return float(match.group(1)) | ||
|
||
|
||
def get_model_size(ir_path: Path, m_type: str = "Mb") -> float: | ||
xml_size = ir_path.stat().st_size | ||
bin_size = ir_path.with_suffix(".bin").stat().st_size | ||
for t in ["bytes", "Kb", "Mb"]: | ||
if m_type == t: | ||
break | ||
xml_size /= 1024 | ||
bin_size /= 1024 | ||
model_size = xml_size + bin_size | ||
print(f"Model graph (xml): {xml_size:.3f} Mb") | ||
print(f"Model weights (bin): {bin_size:.3f} Mb") | ||
print(f"Model size: {model_size:.3f} Mb") | ||
return model_size | ||
|
||
|
||
############################################################################### | ||
# Create a Tensorflow model and dataset | ||
|
||
|
||
def center_crop(image: tf.Tensor, image_size: int, crop_padding: int) -> tf.Tensor: | ||
shape = tf.shape(image) | ||
image_height = shape[0] | ||
image_width = shape[1] | ||
|
||
padded_center_crop_size = tf.cast( | ||
((image_size / (image_size + crop_padding)) * tf.cast(tf.minimum(image_height, image_width), tf.float32)), | ||
tf.int32, | ||
) | ||
|
||
offset_height = ((image_height - padded_center_crop_size) + 1) // 2 | ||
offset_width = ((image_width - padded_center_crop_size) + 1) // 2 | ||
|
||
image = tf.image.crop_to_bounding_box( | ||
image, | ||
offset_height=offset_height, | ||
offset_width=offset_width, | ||
target_height=padded_center_crop_size, | ||
target_width=padded_center_crop_size, | ||
) | ||
image = tf.image.resize(image, [image_size, image_size], method=tf.image.ResizeMethod.BILINEAR) | ||
return image | ||
|
||
|
||
def preprocess_for_eval(image, label): | ||
image = center_crop(image, 224, 32) | ||
image = tf.keras.applications.mobilenet_v2.preprocess_input(image) | ||
image = tf.image.convert_image_dtype(image, tf.float32) | ||
label = tf.one_hot(label, DATASET_CLASSES) | ||
return image, label | ||
|
||
|
||
def preprocess_for_train(image, label): | ||
image = tf.image.resize_with_crop_or_pad(image, 256, 256) | ||
image = tf.image.random_crop(image, [224, 224, 3]) | ||
image = tf.image.convert_image_dtype(image, tf.float32) | ||
label = tf.one_hot(label, DATASET_CLASSES) | ||
return image, label | ||
|
||
|
||
train_dataset = tfds.load("imagenette/320px-v2", split="train", shuffle_files=True, as_supervised=True) | ||
train_dataset = train_dataset.map(preprocess_for_train).shuffle(1024).batch(128) | ||
|
||
val_dataset = tfds.load("imagenette/320px-v2", split="validation", shuffle_files=False, as_supervised=True) | ||
val_dataset = val_dataset.map(preprocess_for_eval).batch(128) | ||
|
||
weights_path = tf.keras.utils.get_file("mobilenet_v2_imagenette_weights.h5", WEIGHTS_URL, cache_subdir="models") | ||
tf_model = tf.keras.applications.MobileNetV2(weights=weights_path, classes=DATASET_CLASSES) | ||
|
||
############################################################################### | ||
# Quantize a Tensorflow model | ||
# | ||
# The transformation function transforms a data item into model input data. | ||
# | ||
# To validate the transform function use the following code: | ||
# >> for data_item in val_loader: | ||
# >> model(transform_fn(data_item)) | ||
|
||
|
||
def transform_fn(data_item): | ||
images, _ = data_item | ||
return images | ||
|
||
|
||
# The calibration dataset is a small, no label, representative dataset | ||
# (~100-500 samples) that is used to estimate the range, i.e. (min, max) of all | ||
# floating point activation tensors in the model, to initialize the quantization | ||
# parameters. | ||
# | ||
# The easiest way to define a calibration dataset is to use a training or | ||
# validation dataset and a transformation function to remove labels from the data | ||
# item and prepare model input data. The quantize method uses a small subset | ||
# (default: 300 samples) of the calibration dataset. | ||
|
||
calibration_dataset = nncf.Dataset(val_dataset, transform_fn) | ||
tf_quantized_model = nncf.quantize(tf_model, calibration_dataset) | ||
|
||
tf_quantized_model.compile( | ||
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), | ||
loss=tf.keras.losses.CategoricalCrossentropy(), | ||
metrics=[tf.keras.metrics.CategoricalAccuracy()], | ||
) | ||
|
||
tf_quantized_model.fit(train_dataset, epochs=3, verbose=1) | ||
|
||
# Removes auxiliary layers and operations added during the quantization process, | ||
# resulting in a clean, fully quantized model ready for deployment. | ||
stripped_model = nncf.strip(tf_quantized_model) | ||
|
||
############################################################################### | ||
# Benchmark performance, calculate compression rate and validate accuracy | ||
|
||
ov_model = ov.convert_model(tf_model, share_weights=False) | ||
ov_quantized_model = ov.convert_model(stripped_model, share_weights=False) | ||
|
||
fp32_ir_path = ROOT / "mobilenet_v2_fp32.xml" | ||
ov.save_model(ov_model, fp32_ir_path, compress_to_fp16=False) | ||
print(f"[1/7] Save FP32 model: {fp32_ir_path}") | ||
fp32_model_size = get_model_size(fp32_ir_path) | ||
|
||
int8_ir_path = ROOT / "mobilenet_v2_int8.xml" | ||
ov.save_model(ov_quantized_model, int8_ir_path) | ||
print(f"[2/7] Save INT8 model: {int8_ir_path}") | ||
int8_model_size = get_model_size(int8_ir_path) | ||
|
||
print("[3/7] Benchmark FP32 model:") | ||
fp32_fps = run_benchmark(fp32_ir_path, shape=[1, 224, 224, 3]) | ||
print("[4/7] Benchmark INT8 model:") | ||
int8_fps = run_benchmark(int8_ir_path, shape=[1, 224, 224, 3]) | ||
|
||
print("[5/7] Validate OpenVINO FP32 model:") | ||
fp32_top1 = validate(ov_model, val_dataset) | ||
print(f"Accuracy @ top1: {fp32_top1:.3f}") | ||
|
||
print("[6/7] Validate OpenVINO INT8 model:") | ||
int8_top1 = validate(ov_quantized_model, val_dataset) | ||
print(f"Accuracy @ top1: {int8_top1:.3f}") | ||
|
||
print("[7/7] Report:") | ||
print(f"Accuracy drop: {fp32_top1 - int8_top1:.3f}") | ||
print(f"Model compression rate: {fp32_model_size / int8_model_size:.3f}") | ||
# https://docs.openvino.ai/latest/openvino_docs_optimization_guide_dldt_optimization_guide.html | ||
print(f"Performance speed up (throughput mode): {int8_fps / fp32_fps:.3f}") |
5 changes: 5 additions & 0 deletions
5
examples/quantization_aware_training/tensorflow/mobilenet_v2/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
tensorflow~=2.12.0; python_version < '3.9' | ||
tensorflow~=2.15.1; python_version >= '3.9' | ||
tensorflow-datasets | ||
tqdm | ||
openvino==2024.6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# Copyright (c) 2025 Intel Corporation | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from typing import Dict | ||
|
||
import tensorflow as tf | ||
|
||
from nncf.common.utils.backend import copy_model | ||
from nncf.tensorflow.graph.model_transformer import TFModelTransformer | ||
from nncf.tensorflow.graph.transformations.commands import TFOperationWithWeights | ||
from nncf.tensorflow.graph.transformations.commands import TFRemovalCommand | ||
from nncf.tensorflow.graph.transformations.layout import TFTransformationLayout | ||
from nncf.tensorflow.graph.utils import collect_wrapped_layers | ||
from nncf.tensorflow.layers.operation import NNCFOperation | ||
from nncf.tensorflow.quantization.quantizers import AsymmetricQuantizer | ||
from nncf.tensorflow.quantization.quantizers import SymmetricQuantizer | ||
from nncf.tensorflow.quantization.utils import apply_overflow_fix_to_layer | ||
from nncf.tensorflow.sparsity.magnitude.operation import BinaryMask | ||
from nncf.tensorflow.sparsity.rb.operation import RBSparsifyingWeight | ||
from nncf.tensorflow.sparsity.utils import apply_mask | ||
|
||
|
||
def strip(model: tf.keras.Model, do_copy: bool = True) -> tf.keras.Model: | ||
""" | ||
Implementation of the nncf.strip() function for the TF backend | ||
:param model: The compressed model. | ||
:param do_copy: If True (default), will return a copy of the currently associated model object. If False, | ||
will return the currently associated model object "stripped" in-place. | ||
:return: The stripped model. | ||
""" | ||
# Check to understand if the model is after NNCF or not. | ||
wrapped_layers = collect_wrapped_layers(model) | ||
if not wrapped_layers: | ||
return model | ||
|
||
if do_copy: | ||
model = copy_model(model) | ||
wrapped_layers = collect_wrapped_layers(model) | ||
|
||
op_to_priority: Dict[NNCFOperation, int] = { | ||
SymmetricQuantizer: 1, | ||
AsymmetricQuantizer: 1, | ||
BinaryMask: 2, | ||
RBSparsifyingWeight: 2, | ||
} | ||
|
||
key_fn = lambda op: op_to_priority.get(op, 0) | ||
|
||
transformation_layout = TFTransformationLayout() | ||
for wrapped_layer in wrapped_layers: | ||
for weight_attr, ops in wrapped_layer.weights_attr_ops.items(): | ||
for op in sorted(ops.values(), key=key_fn, reverse=True): | ||
# quantization | ||
if isinstance(op, (SymmetricQuantizer, AsymmetricQuantizer)) and op.half_range: | ||
apply_overflow_fix_to_layer(wrapped_layer, weight_attr, op) | ||
# sparsity, pruning | ||
if isinstance(op, (BinaryMask, RBSparsifyingWeight)): | ||
apply_mask(wrapped_layer, weight_attr, op) | ||
transformation_layout.register( | ||
TFRemovalCommand( | ||
target_point=TFOperationWithWeights( | ||
wrapped_layer.name, weights_attr_name=weight_attr, operation_name=op.name | ||
) | ||
) | ||
) | ||
if transformation_layout.transformations: | ||
model = TFModelTransformer(model).transform(transformation_layout) | ||
|
||
return model |
Oops, something went wrong.