Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add accuracy tests #73

Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 23 additions & 32 deletions openvino_xai/metrics/adcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
import numpy as np
from scipy.stats import pearsonr

from openvino_xai import Task
from openvino_xai.common.utils import scaling
from openvino_xai.explainer.explainer import Explainer, ExplainMode
from openvino_xai.explainer.explanation import Explanation
from openvino_xai.explainer.explanation import ONE_MAP_LAYOUTS, Explanation
from openvino_xai.metrics.base import BaseMetric


Expand All @@ -22,49 +20,39 @@ class ADCC(BaseMetric):
https://github.com/aimagelab/ADCC/
"""

def __init__(self, model, preprocess_fn, postprocess_fn, explainer=None, device_name="CPU"):
def __init__(self, model, preprocess_fn, postprocess_fn, explainer, device_name="AUTO", **kwargs: Any):
goodsong81 marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(
model=model, preprocess_fn=preprocess_fn, postprocess_fn=postprocess_fn, device_name=device_name
)
if explainer is None:
self.explainer = Explainer(
model=model,
task=Task.CLASSIFICATION,
preprocess_fn=self.preprocess_fn,
explain_mode=ExplainMode.WHITEBOX,
)
else:
self.explainer = explainer

def average_drop(
self, saliency_map: np.ndarray, class_idx: int, image: np.ndarray, model_output: np.ndarray
) -> float:
self.explainer = explainer
self.black_box_kwargs = kwargs

def average_drop(self, masked_image: np.ndarray, class_idx: int, model_output: np.ndarray) -> float:
"""
Measures the average percentage drop in confidence for the target class when the model sees only the
explanation map (image masked with saliency map), instead of the full image.
The less the better.
"""
confidence_on_input = np.max(model_output)

masked_image = (image * saliency_map[:, :, None]).astype(np.uint8)
confidence_on_input = model_output[class_idx]
prediction_on_saliency_map = self.model_predict(masked_image)
confidence_on_saliency_map = prediction_on_saliency_map[class_idx]

return max(0.0, confidence_on_input - confidence_on_saliency_map) / confidence_on_input

def coherency(self, saliency_map: np.ndarray, class_idx: int, image: np.ndarray) -> float:
def coherency(self, saliency_map: np.ndarray, masked_image: np.ndarray, class_idx: int, image: np.ndarray) -> float:
"""
Measures the coherency of the saliency map. The explanation map (image masked with saliency map) should
contain all the relevant features that explain a prediction and should remove useless features in a coherent way.
Saliency map and saliency map of exlanation map should be similar.
The more the better.
"""
saliency_map_masked_image = self.explainer(
masked_image, targets=class_idx, colormap=False, scaling=False, **self.black_box_kwargs
)
saliency_map_masked_image = list(saliency_map_masked_image.saliency_map.values())[0] # only one target
saliency_map_masked_image = scaling(saliency_map_masked_image, cast_to_uint8=False, max_value=1)

masked_image = image * saliency_map[:, :, None]
saliency_map_mapped_image = self.explainer(masked_image, targets=[class_idx], colormap=False, scaling=False)
saliency_map_mapped_image = saliency_map_mapped_image.saliency_map[class_idx]

A, B = saliency_map.flatten(), saliency_map_mapped_image.flatten()
A, B = saliency_map.flatten(), saliency_map_masked_image.flatten()
# Pearson correlation coefficient
y, _ = pearsonr(A, B)
y = (y + 1) / 2
Expand All @@ -78,7 +66,7 @@ def complexity(saliency_map: np.ndarray) -> float:
Defined as L1 norm of the saliency map.
The less the better.
"""
return abs(saliency_map).sum() / (saliency_map.shape[-1] * saliency_map.shape[-2])
return saliency_map.sum() / (saliency_map.shape[-1] * saliency_map.shape[-2])

def __call__(self, saliency_map: np.ndarray, class_idx: int, input_image: np.ndarray) -> Dict[str, float]:
"""
Expand All @@ -102,9 +90,11 @@ def __call__(self, saliency_map: np.ndarray, class_idx: int, input_image: np.nda
saliency_map = scaling(saliency_map, cast_to_uint8=False, max_value=1)

model_output = self.model_predict(input_image)
masked_image = input_image * saliency_map[:, :, None]
class_idx = np.argmax(model_output) if class_idx is None else class_idx

avgdrop = self.average_drop(saliency_map, class_idx, input_image, model_output)
coh = self.coherency(saliency_map, class_idx, input_image)
avgdrop = self.average_drop(masked_image, class_idx, model_output)
coh = self.coherency(saliency_map, masked_image, class_idx, input_image)
com = self.complexity(saliency_map)

adcc = 3 / (1 / coh + 1 / (1 - com) + 1 / (1 - avgdrop))
Expand All @@ -129,14 +119,15 @@ def evaluate(
results = []
for input_image, explanation in zip(input_images, explanations):
for class_idx, saliency_map in explanation.saliency_map.items():
metric_dict = self(saliency_map, int(class_idx), input_image)
target_idx = None if explanation.layout in ONE_MAP_LAYOUTS else int(class_idx)
metric_dict = self(saliency_map, target_idx, input_image)
results.append(
[
metric_dict["adcc"],
metric_dict["coherency"],
metric_dict["complexity"],
metric_dict["average_drop"],
]
)
adcc, coherency, complexity, average_drop = np.mean(np.array(results), axis=0)
coherency, complexity, average_drop = np.mean(np.array(results), axis=0)
adcc = 3 / (1 / coherency + 1 / (1 - complexity) + 1 / (1 - average_drop))
return {"adcc": adcc, "coherency": coherency, "complexity": complexity, "average_drop": average_drop}
2 changes: 1 addition & 1 deletion openvino_xai/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(
model: ov.Model = None,
preprocess_fn: Callable[[np.ndarray], np.ndarray] = IdentityPreprocessFN(),
postprocess_fn: Callable[[np.ndarray], np.ndarray] = None,
device_name: str = "CPU",
device_name: str = "AUTO",
):
# Pass model_predict to class initialization directly?
self.model = model
Expand Down
10 changes: 4 additions & 6 deletions openvino_xai/metrics/insertion_deletion_auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from openvino_xai.explainer.explanation import Explanation, Layout
from openvino_xai.explainer.explanation import ONE_MAP_LAYOUTS, Explanation
from openvino_xai.metrics.base import BaseMetric


Expand Down Expand Up @@ -43,7 +43,7 @@ def step_image_insertion_deletion(
return image_insertion, image_deletion

def __call__(
self, saliency_map: np.ndarray, class_idx: int, input_image: np.ndarray, steps: int = 100, **kwargs: Any
self, saliency_map: np.ndarray, class_idx: int, input_image: np.ndarray, steps: int = 30, **kwargs: Any
) -> Dict[str, float]:
"""
Calculate the Insertion and Deletion AUC metrics for one saliency map for one class.
Expand Down Expand Up @@ -98,13 +98,11 @@ def evaluate(
:return: A Dict containing the mean insertion AUC, mean deletion AUC, and their difference (delta) as values.
:rtype: float
"""
for explanation in explanations:
assert explanation.layout in [Layout.MULTIPLE_MAPS_PER_IMAGE_GRAY, Layout.MULTIPLE_MAPS_PER_IMAGE_COLOR]

results = []
for input_image, explanation in zip(input_images, explanations):
for class_idx, saliency_map in explanation.saliency_map.items():
metric_dict = self(saliency_map, int(class_idx), input_image, steps)
target_idx = None if explanation.layout in ONE_MAP_LAYOUTS else int(class_idx)
metric_dict = self(saliency_map, target_idx, input_image, steps)
results.append([metric_dict["insertion"], metric_dict["deletion"]])

insertion, deletion = np.mean(np.array(results), axis=0)
Expand Down
31 changes: 18 additions & 13 deletions openvino_xai/metrics/pointing_game.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from openvino_xai.common.utils import logger
from openvino_xai.explainer.explanation import Explanation
from openvino_xai.explainer.explanation import ONE_MAP_LAYOUTS, Explanation
from openvino_xai.metrics.base import BaseMetric


Expand Down Expand Up @@ -86,20 +86,25 @@ def evaluate(
hits = 0.0
num_sal_maps = 0
for explanation, image_gt_bboxes in zip(explanations, gt_bboxes):
label_names = explanation.label_names
assert label_names is not None, "Label names are required for pointing game evaluation."

for class_idx, class_sal_map in explanation.saliency_map.items():
label_name = label_names[int(class_idx)]

if label_name not in image_gt_bboxes:
logger.info(
f"No ground-truth bbox for {label_name} saliency map. "
f"Skip pointing game evaluation for this saliency map."
)
continue
if explanation.layout in ONE_MAP_LAYOUTS:
# Activation map
class_gt_bboxes = [
gt_bbox for class_gt_bboxes in image_gt_bboxes.values() for gt_bbox in class_gt_bboxes
]
else:
label_names = explanation.label_names
assert label_names is not None, "Label names are required for pointing game evaluation."
label_name = label_names[int(class_idx)]

if label_name not in image_gt_bboxes:
logger.info(
f"No ground-truth bbox for {label_name} saliency map. "
f"Skip pointing game evaluation for this saliency map."
)
continue
class_gt_bboxes = image_gt_bboxes[label_name]

class_gt_bboxes = image_gt_bboxes[label_name]
hits += self(class_sal_map, class_gt_bboxes)["pointing_game"]
num_sal_maps += 1

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ dev = [
"py-cpuinfo",
"openpyxl",
"torchvision",
"pycocotools",
]
doc = [
"furo",
Expand Down
1 change: 0 additions & 1 deletion tests/assets/cheetah_coco/annotations/instances_val.json

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<annotation>
<folder>cheetah</folder>
<filename>cheetah_person.jpg</filename>
<source>
<database>Unknown</database>
<annotation>Unknown</annotation>
<image>Unknown</image>
</source>
<size>
<width>500</width>
<height>354</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>person</name>
<truncated>0</truncated>
<occluded>0</occluded>
<difficult>0</difficult>
<bndbox>
<xmin>274.0</xmin>
<ymin>99.0</ymin>
<xmax>434.0</xmax>
<ymax>290.0</ymax>
</bndbox>
</object>
<object>
<name>cheetah</name>
<truncated>0</truncated>
<occluded>0</occluded>
<difficult>0</difficult>
<bndbox>
<xmin>17.0</xmin>
<ymin>160.0</ymin>
<xmax>306.0</xmax>
<ymax>289.0</ymax>
</bndbox>
</object>
<object>
<name>cheetah</name>
<truncated>0</truncated>
<occluded>0</occluded>
<difficult>0</difficult>
<bndbox>
<xmin>165.0</xmin>
<ymin>129.0</ymin>
<xmax>274.0</xmax>
<ymax>283.0</ymax>
</bndbox>
</object>
<object>
<name>cheetah</name>
<truncated>0</truncated>
<occluded>0</occluded>
<difficult>0</difficult>
<bndbox>
<xmin>316.0</xmin>
<ymin>111.0</ymin>
<xmax>469.0</xmax>
<ymax>283.0</ymax>
</bndbox>
</object>
</annotation>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def fxt_output_root(


@pytest.fixture(scope="session")
def fxt_clear_cache(request: pytest.FixtureRequest) -> Path:
def fxt_clear_cache(request: pytest.FixtureRequest) -> bool:
"""Data root directory path."""
clear_cache = bool(request.config.getoption("--clear-cache"))
msg = f"{clear_cache = }"
Expand Down
2 changes: 1 addition & 1 deletion tests/intg/test_accuracy_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_explainer_image_2_classes(self):
assert np.abs(delta_auc_score - 0.39) <= 0.01

adcc_score = self.adcc.evaluate([explanation], [self.image])["adcc"]
assert np.abs(adcc_score - 0.55) <= 0.01
assert np.abs(adcc_score - 0.77) <= 0.01

def test_explainer_images(self):
images = [self.image, self.image]
Expand Down
24 changes: 24 additions & 0 deletions tests/perf/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ def pytest_addoption(parser: pytest.Parser):
default=5000,
help="Number of masks for black box methods." "Defaults to 5000.",
)
parser.addoption(
"--dataset-data-root",
GalyaZalesskaya marked this conversation as resolved.
Show resolved Hide resolved
action="store",
default="",
help="Path to directory with dataset images.",
)
parser.addoption(
"--dataset-ann-path",
action="store",
default="",
help="Path to dataset annotation file",
)


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -173,3 +185,15 @@ def fxt_perf_summary(
data.to_csv(fxt_output_root / "perf-summary.csv")
data.to_excel(fxt_output_root / "perf-summary.xlsx")
print(f" -> Saved to {fxt_output_root}")


@pytest.fixture(scope="session")
def fxt_dataset_parameters(request: pytest.FixtureRequest) -> tuple[Path | None, Path | None]:
"""Retrieve dataset parameters for tests."""
data_root = request.config.getoption("--dataset-data-root")
ann_path = request.config.getoption("--dataset-ann-path")

if data_root != "":
return (Path(data_root), Path(ann_path) if ann_path else None)
else:
return (None, None)
Loading