openvinotoolkit · GalyaZalesskaya · Sep 30, 2024 · Sep 2, 2024 · Sep 2, 2024 · Sep 2, 2024
diff --git a/openvino_xai/metrics/adcc.py b/openvino_xai/metrics/adcc.py
@@ -3,10 +3,8 @@
 import numpy as np
 from scipy.stats import pearsonr
 
-from openvino_xai import Task
 from openvino_xai.common.utils import scaling
-from openvino_xai.explainer.explainer import Explainer, ExplainMode
-from openvino_xai.explainer.explanation import Explanation
+from openvino_xai.explainer.explanation import ONE_MAP_LAYOUTS, Explanation
 from openvino_xai.metrics.base import BaseMetric
 
 
@@ -22,49 +20,39 @@ class ADCC(BaseMetric):
             https://github.com/aimagelab/ADCC/
     """
 
-    def __init__(self, model, preprocess_fn, postprocess_fn, explainer=None, device_name="CPU"):
+    def __init__(self, model, preprocess_fn, postprocess_fn, explainer, device_name="AUTO", **kwargs: Any):
         super().__init__(
             model=model, preprocess_fn=preprocess_fn, postprocess_fn=postprocess_fn, device_name=device_name
         )
-        if explainer is None:
-            self.explainer = Explainer(
-                model=model,
-                task=Task.CLASSIFICATION,
-                preprocess_fn=self.preprocess_fn,
-                explain_mode=ExplainMode.WHITEBOX,
-            )
-        else:
-            self.explainer = explainer
-
-    def average_drop(
-        self, saliency_map: np.ndarray, class_idx: int, image: np.ndarray, model_output: np.ndarray
-    ) -> float:
+        self.explainer = explainer
+        self.black_box_kwargs = kwargs
+
+    def average_drop(self, masked_image: np.ndarray, class_idx: int, model_output: np.ndarray) -> float:
         """
         Measures the average percentage drop in confidence for the target class when the model sees only the
         explanation map (image masked with saliency map), instead of the full image.
         The less the better.
         """
-        confidence_on_input = np.max(model_output)
-
-        masked_image = (image * saliency_map[:, :, None]).astype(np.uint8)
+        confidence_on_input = model_output[class_idx]
         prediction_on_saliency_map = self.model_predict(masked_image)
         confidence_on_saliency_map = prediction_on_saliency_map[class_idx]
 
         return max(0.0, confidence_on_input - confidence_on_saliency_map) / confidence_on_input
 
-    def coherency(self, saliency_map: np.ndarray, class_idx: int, image: np.ndarray) -> float:
+    def coherency(self, saliency_map: np.ndarray, masked_image: np.ndarray, class_idx: int, image: np.ndarray) -> float:
         """
         Measures the coherency of the saliency map. The explanation map (image masked with saliency map) should
         contain all the relevant features that explain a prediction and should remove useless features in a coherent way.
         Saliency map and saliency map of exlanation map should be similar.
         The more the better.
         """
+        saliency_map_masked_image = self.explainer(
+            masked_image, targets=class_idx, colormap=False, scaling=False, **self.black_box_kwargs
+        )
+        saliency_map_masked_image = list(saliency_map_masked_image.saliency_map.values())[0]  # only one target
+        saliency_map_masked_image = scaling(saliency_map_masked_image, cast_to_uint8=False, max_value=1)
 
-        masked_image = image * saliency_map[:, :, None]
-        saliency_map_mapped_image = self.explainer(masked_image, targets=[class_idx], colormap=False, scaling=False)
-        saliency_map_mapped_image = saliency_map_mapped_image.saliency_map[class_idx]
-
-        A, B = saliency_map.flatten(), saliency_map_mapped_image.flatten()
+        A, B = saliency_map.flatten(), saliency_map_masked_image.flatten()
         # Pearson correlation coefficient
         y, _ = pearsonr(A, B)
         y = (y + 1) / 2
@@ -78,7 +66,7 @@ def complexity(saliency_map: np.ndarray) -> float:
         Defined as L1 norm of the saliency map.
         The less the better.
         """
-        return abs(saliency_map).sum() / (saliency_map.shape[-1] * saliency_map.shape[-2])
+        return saliency_map.sum() / (saliency_map.shape[-1] * saliency_map.shape[-2])
 
     def __call__(self, saliency_map: np.ndarray, class_idx: int, input_image: np.ndarray) -> Dict[str, float]:
         """
@@ -102,9 +90,11 @@ def __call__(self, saliency_map: np.ndarray, class_idx: int, input_image: np.nda
             saliency_map = scaling(saliency_map, cast_to_uint8=False, max_value=1)
 
         model_output = self.model_predict(input_image)
+        masked_image = input_image * saliency_map[:, :, None]
+        class_idx = np.argmax(model_output) if class_idx is None else class_idx
 
-        avgdrop = self.average_drop(saliency_map, class_idx, input_image, model_output)
-        coh = self.coherency(saliency_map, class_idx, input_image)
+        avgdrop = self.average_drop(masked_image, class_idx, model_output)
+        coh = self.coherency(saliency_map, masked_image, class_idx, input_image)
         com = self.complexity(saliency_map)
 
         adcc = 3 / (1 / coh + 1 / (1 - com) + 1 / (1 - avgdrop))
@@ -129,14 +119,15 @@ def evaluate(
         results = []
         for input_image, explanation in zip(input_images, explanations):
             for class_idx, saliency_map in explanation.saliency_map.items():
-                metric_dict = self(saliency_map, int(class_idx), input_image)
+                target_idx = None if explanation.layout in ONE_MAP_LAYOUTS else int(class_idx)
+                metric_dict = self(saliency_map, target_idx, input_image)
                 results.append(
                     [
-                        metric_dict["adcc"],
                         metric_dict["coherency"],
                         metric_dict["complexity"],
                         metric_dict["average_drop"],
                     ]
                 )
-        adcc, coherency, complexity, average_drop = np.mean(np.array(results), axis=0)
+        coherency, complexity, average_drop = np.mean(np.array(results), axis=0)
+        adcc = 3 / (1 / coherency + 1 / (1 - complexity) + 1 / (1 - average_drop))
         return {"adcc": adcc, "coherency": coherency, "complexity": complexity, "average_drop": average_drop}
diff --git a/openvino_xai/metrics/base.py b/openvino_xai/metrics/base.py
@@ -16,7 +16,7 @@ def __init__(
         model: ov.Model = None,
         preprocess_fn: Callable[[np.ndarray], np.ndarray] = IdentityPreprocessFN(),
         postprocess_fn: Callable[[np.ndarray], np.ndarray] = None,
-        device_name: str = "CPU",
+        device_name: str = "AUTO",
     ):
         # Pass model_predict to class initialization directly?
         self.model = model

diff --git a/openvino_xai/metrics/insertion_deletion_auc.py b/openvino_xai/metrics/insertion_deletion_auc.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from openvino_xai.explainer.explanation import Explanation, Layout
+from openvino_xai.explainer.explanation import ONE_MAP_LAYOUTS, Explanation
 from openvino_xai.metrics.base import BaseMetric
 
 
@@ -43,7 +43,7 @@ def step_image_insertion_deletion(
         return image_insertion, image_deletion
 
     def __call__(
-        self, saliency_map: np.ndarray, class_idx: int, input_image: np.ndarray, steps: int = 100, **kwargs: Any
+        self, saliency_map: np.ndarray, class_idx: int, input_image: np.ndarray, steps: int = 30, **kwargs: Any
     ) -> Dict[str, float]:
         """
         Calculate the Insertion and Deletion AUC metrics for one saliency map for one class.
@@ -98,13 +98,11 @@ def evaluate(
         :return: A Dict containing the mean insertion AUC, mean deletion AUC, and their difference (delta) as values.
         :rtype: float
         """
-        for explanation in explanations:
-            assert explanation.layout in [Layout.MULTIPLE_MAPS_PER_IMAGE_GRAY, Layout.MULTIPLE_MAPS_PER_IMAGE_COLOR]
-
         results = []
         for input_image, explanation in zip(input_images, explanations):
             for class_idx, saliency_map in explanation.saliency_map.items():
-                metric_dict = self(saliency_map, int(class_idx), input_image, steps)
+                target_idx = None if explanation.layout in ONE_MAP_LAYOUTS else int(class_idx)
+                metric_dict = self(saliency_map, target_idx, input_image, steps)
                 results.append([metric_dict["insertion"], metric_dict["deletion"]])
 
         insertion, deletion = np.mean(np.array(results), axis=0)

diff --git a/openvino_xai/metrics/pointing_game.py b/openvino_xai/metrics/pointing_game.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from openvino_xai.common.utils import logger
-from openvino_xai.explainer.explanation import Explanation
+from openvino_xai.explainer.explanation import ONE_MAP_LAYOUTS, Explanation
 from openvino_xai.metrics.base import BaseMetric
 
 
@@ -86,20 +86,25 @@ def evaluate(
         hits = 0.0
         num_sal_maps = 0
         for explanation, image_gt_bboxes in zip(explanations, gt_bboxes):
-            label_names = explanation.label_names
-            assert label_names is not None, "Label names are required for pointing game evaluation."
-
             for class_idx, class_sal_map in explanation.saliency_map.items():
-                label_name = label_names[int(class_idx)]
-
-                if label_name not in image_gt_bboxes:
-                    logger.info(
-                        f"No ground-truth bbox for {label_name} saliency map. "
-                        f"Skip pointing game evaluation for this saliency map."
-                    )
-                    continue
+                if explanation.layout in ONE_MAP_LAYOUTS:
+                    # Activation map
+                    class_gt_bboxes = [
+                        gt_bbox for class_gt_bboxes in image_gt_bboxes.values() for gt_bbox in class_gt_bboxes
+                    ]
+                else:
+                    label_names = explanation.label_names
+                    assert label_names is not None, "Label names are required for pointing game evaluation."
+                    label_name = label_names[int(class_idx)]
+
+                    if label_name not in image_gt_bboxes:
+                        logger.info(
+                            f"No ground-truth bbox for {label_name} saliency map. "
+                            f"Skip pointing game evaluation for this saliency map."
+                        )
+                        continue
+                    class_gt_bboxes = image_gt_bboxes[label_name]
 
-                class_gt_bboxes = image_gt_bboxes[label_name]
                 hits += self(class_sal_map, class_gt_bboxes)["pointing_game"]
                 num_sal_maps += 1
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -49,6 +49,7 @@ dev = [
   "py-cpuinfo",
   "openpyxl",
   "torchvision",
+  "pycocotools",
 ]
 doc = [
   "furo",

diff --git a/tests/assets/cheetah_coco/annotations/instances_val.json b/tests/assets/cheetah_coco/annotations/instances_val.json
diff --git a/tests/assets/cheetah_voc/VOCdevkit/VOC2012/Annotations/cheetah_person.xml b/tests/assets/cheetah_voc/VOCdevkit/VOC2012/Annotations/cheetah_person.xml
@@ -0,0 +1,63 @@
+<annotation>
+  <folder>cheetah</folder>
+  <filename>cheetah_person.jpg</filename>
+  <source>
+    <database>Unknown</database>
+    <annotation>Unknown</annotation>
+    <image>Unknown</image>
+  </source>
+  <size>
+    <width>500</width>
+    <height>354</height>
+    <depth>3</depth>
+  </size>
+  <segmented>0</segmented>
+  <object>
+    <name>person</name>
+    <truncated>0</truncated>
+    <occluded>0</occluded>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>274.0</xmin>
+      <ymin>99.0</ymin>
+      <xmax>434.0</xmax>
+      <ymax>290.0</ymax>
+    </bndbox>
+  </object>
+  <object>
+    <name>cheetah</name>
+    <truncated>0</truncated>
+    <occluded>0</occluded>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>17.0</xmin>
+      <ymin>160.0</ymin>
+      <xmax>306.0</xmax>
+      <ymax>289.0</ymax>
+    </bndbox>
+  </object>
+  <object>
+    <name>cheetah</name>
+    <truncated>0</truncated>
+    <occluded>0</occluded>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>165.0</xmin>
+      <ymin>129.0</ymin>
+      <xmax>274.0</xmax>
+      <ymax>283.0</ymax>
+    </bndbox>
+  </object>
+  <object>
+    <name>cheetah</name>
+    <truncated>0</truncated>
+    <occluded>0</occluded>
+    <difficult>0</difficult>
+    <bndbox>
+      <xmin>316.0</xmin>
+      <ymin>111.0</ymin>
+      <xmax>469.0</xmax>
+      <ymax>283.0</ymax>
+    </bndbox>
+  </object>
+</annotation>
diff --git a/tests/assets/cheetah_voc/VOCdevkit/VOC2012/JPEGImages/cheetah_person.jpg b/tests/assets/cheetah_voc/VOCdevkit/VOC2012/JPEGImages/cheetah_person.jpg
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -61,7 +61,7 @@ def fxt_output_root(
 
 
 @pytest.fixture(scope="session")
-def fxt_clear_cache(request: pytest.FixtureRequest) -> Path:
+def fxt_clear_cache(request: pytest.FixtureRequest) -> bool:
     """Data root directory path."""
     clear_cache = bool(request.config.getoption("--clear-cache"))
     msg = f"{clear_cache = }"

diff --git a/tests/intg/test_accuracy_metrics.py b/tests/intg/test_accuracy_metrics.py
@@ -113,7 +113,7 @@ def test_explainer_image_2_classes(self):
         assert np.abs(delta_auc_score - 0.39) <= 0.01
 
         adcc_score = self.adcc.evaluate([explanation], [self.image])["adcc"]
-        assert np.abs(adcc_score - 0.55) <= 0.01
+        assert np.abs(adcc_score - 0.77) <= 0.01
 
     def test_explainer_images(self):
         images = [self.image, self.image]

diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py
@@ -32,6 +32,18 @@ def pytest_addoption(parser: pytest.Parser):
         default=5000,
         help="Number of masks for black box methods." "Defaults to 5000.",
     )
+    parser.addoption(
+        "--dataset-data-root",
+        action="store",
+        default="",
+        help="Path to directory with dataset images.",
+    )
+    parser.addoption(
+        "--dataset-ann-path",
+        action="store",
+        default="",
+        help="Path to dataset annotation file",
+    )
 
 
 @pytest.fixture(scope="session")
@@ -173,3 +185,15 @@ def fxt_perf_summary(
     data.to_csv(fxt_output_root / "perf-summary.csv")
     data.to_excel(fxt_output_root / "perf-summary.xlsx")
     print(f"    -> Saved to {fxt_output_root}")
+
+
+@pytest.fixture(scope="session")
+def fxt_dataset_parameters(request: pytest.FixtureRequest) -> tuple[Path | None, Path | None]:
+    """Retrieve dataset parameters for tests."""
+    data_root = request.config.getoption("--dataset-data-root")
+    ann_path = request.config.getoption("--dataset-ann-path")
+
+    if data_root != "":
+        return (Path(data_root), Path(ann_path) if ann_path else None)
+    else:
+        return (None, None)