borglab · johnwlambert · Nov 5, 2021 · travisdriver · Nov 5, 2021 · johnwlambert
diff --git a/gtsfm/frontend/matcher/matcher_base.py b/gtsfm/frontend/matcher/matcher_base.py
@@ -3,15 +3,30 @@
 Authors: Ayush Baid
 """
 import abc
-from typing import Tuple
+from typing import NamedTuple, Optional, Tuple
 
 import dask
 import numpy as np
 from dask.delayed import Delayed
+from gtsam import Cal3Bundler, Pose3
+from trimesh import Trimesh
 
+import gtsfm.utils.metrics as metric_utils
 from gtsfm.common.keypoints import Keypoints
 
 
+class MatcherReport(NamedTuple):
+    """Report containing information about the matcher result.
+
+    Args:
+        num_matches: number of putative matches.
+        inlier_ratio_wrt_gt_model: # correct matches / # putative matches
+    """
+
+    num_matches: int
+    inlier_ratio_gt_model: Optional[float] = None
+
+
 class MatcherBase(metaclass=abc.ABCMeta):
     """Base class for all matchers.
 
@@ -54,6 +69,56 @@ def match(
         # TODO(ayush): should I define matcher on descriptors or the distance matrices.
         # TODO(ayush): how to handle deep-matchers which might require the full image as input
 
+    def evaluate(
+        self,
+        keypoints_i1: Keypoints,
+        keypoints_i2: Keypoints,
+        corr_idxs: np.ndarray,
+        camera_intrinsics_i1: Cal3Bundler,
+        camera_intrinsics_i2: Cal3Bundler,
+        dist_threshold: float,
+        gt_wTi1: Optional[Pose3],
+        gt_wTi2: Optional[Pose3],
+        gt_scene_mesh: Optional[Trimesh],
+    ) -> MatcherReport:
+        """Analyze the matcher result.
+
+        We valuate the quality of the matches against ground truth, if available.
+
+        Args:
+            keypoints_i1: keypoints for image #i1, of length N1.
+            keypoints_i2: keypoints for image #i2, of length N2.
+            corr_idxs: correspondence indices, as array of shape (K,).
+            camera_intrinsics_i1: intrinsics for camera i1.
+            camera_intrinsics_i2: intrinsics for camera i2.
+            dist_threshold: inlier threshold, in pixels.
+            gt_wTi1: ground truth global pose for camera i1.
+            gt_wTi2: ground truth global pose for camera i2.
+            gt_scene_mesh: ground truth scene mesh.
+        """
+
+        # Note: ignoring reprojection error return argument.
+        corr_idxs_inlier_mask_gt, _ = metric_utils.compute_correspondence_metrics(
+            keypoints_i1,
+            keypoints_i2,
+            corr_idxs,
+            camera_intrinsics_i1,
+            camera_intrinsics_i2,
+            dist_threshold,
+            gt_wTi1,
+            gt_wTi2,
+            gt_scene_mesh,
+        )
+        if corr_idxs_inlier_mask_gt is None:
+            inlier_ratio_gt_model = None
+        else:
+            inlier_ratio_gt_model = np.mean(corr_idxs_inlier_mask_gt)
+        # count how many matches between each image on average.
+        return MatcherReport(
+            num_matches=corr_idxs.shape[0],
+            inlier_ratio_gt_model=inlier_ratio_gt_model,
+        )
+
     def create_computation_graph(
         self,
         keypoints_i1_graph: Delayed,
@@ -62,6 +127,12 @@ def create_computation_graph(
         descriptors_i2_graph: Delayed,
         im_shape_i1_graph: Delayed,
         im_shape_i2_graph: Delayed,
+        camera_intrinsics_i1_graph: Delayed,
+        camera_intrinsics_i2_graph: Delayed,
+        dist_threshold: Delayed,
+        gt_wTi1_graph: Delayed,
+        gt_wTi2_graph: Delayed,
+        gt_scene_mesh_graph: Delayed,
     ) -> Delayed:
         """
         Generates computation graph for matched features using description graphs.
@@ -73,15 +144,35 @@ def create_computation_graph(
             descriptors_i2_graph: descriptors corr. to keypoints_i2.
             im_shape_i1_graph: Delayed with the (H,W) shape of image #i1.
             im_shape_i2_graph: Delayed with the (H,W) shape of image #i2.
+            camera_intrinsics_i1_graph: intrinsics for camera i1.
+            camera_intrinsics_i2_graph: intrinsics for camera i2.
+            dist_threshold: inlier threshold, in pixels.
+            gt_wTi1_graph: ground truth global pose for camera i1.
+            gt_wTi2_graph: ground truth global pose for camera i2.
+            gt_scene_mesh_graph: ground truth scene mesh.
 
         Returns:
-            Delayed dask tasks for matching for input camera pairs.
+            Delayed dask tasks for matching for input image pair.
+            Delayed dask containing a MatcherReport for the input image pair.
         """
-        return dask.delayed(self.match)(
+        corr_idxs_graph = dask.delayed(self.match)(
             keypoints_i1_graph,
             keypoints_i2_graph,
             descriptors_i1_graph,
             descriptors_i2_graph,
             im_shape_i1_graph,
             im_shape_i2_graph,
         )
+        matcher_report = dask.delayed(self.evaluate)(
+            keypoints_i1=keypoints_i1_graph,
+            keypoints_i2=keypoints_i2_graph,
+            corr_idxs=corr_idxs_graph,
+            camera_intrinsics_i1=camera_intrinsics_i1_graph,
+            camera_intrinsics_i2=camera_intrinsics_i2_graph,
+            dist_threshold=dist_threshold,
+            gt_wTi1=gt_wTi1_graph,
+            gt_wTi2=gt_wTi2_graph,
+            gt_scene_mesh=gt_scene_mesh_graph,
+        )
+
+        return corr_idxs_graph, matcher_report
diff --git a/gtsfm/scene_optimizer.py b/gtsfm/scene_optimizer.py
@@ -122,6 +122,7 @@ def create_computation_graph(
             descriptors_graph_list += [delayed_descs]
 
         # Estimate two-view geometry and get indices of verified correspondences.
+        matcher_reports_dict = {}
         i2Ri1_graph_dict = {}
         i2Ui1_graph_dict = {}
         v_corr_idxs_graph_dict: Dict[Tuple[int, int], np.ndarray] = {}
@@ -142,6 +143,7 @@ def create_computation_graph(
                 v_corr_idxs,
                 two_view_report,
                 two_view_report_pp,
+                matcher_report,
             ) = self.two_view_estimator.create_computation_graph(
                 keypoints_graph_list[i1],
                 keypoints_graph_list[i2],
@@ -162,6 +164,7 @@ def create_computation_graph(
             v_corr_idxs_graph_dict[(i1, i2)] = v_corr_idxs
             two_view_reports_dict[(i1, i2)] = two_view_report
             two_view_reports_pp_dict[(i1, i2)] = two_view_report_pp
+            matcher_reports_dict[(i1, i2)] = matcher_report
 
             # Visualize verified two-view correspondences.
             if self._save_two_view_correspondences_viz:
@@ -181,6 +184,7 @@ def create_computation_graph(
         auxiliary_graph_list.append(
             dask.delayed(save_full_frontend_metrics)(two_view_reports_dict, image_graph, filename="verifier_full.json")
         )
+        metrics_graph_list.append(dask.delayed(two_view_estimator.aggregate_matcher_metrics)(matcher_reports_dict))
         if gt_cameras_graph is not None:
             metrics_graph_list.append(
                 dask.delayed(two_view_estimator.aggregate_frontend_metrics)(

diff --git a/gtsfm/two_view_estimator.py b/gtsfm/two_view_estimator.py
@@ -15,7 +15,7 @@
 import gtsfm.utils.metrics as metric_utils
 from gtsfm.common.two_view_estimation_report import TwoViewEstimationReport
 from gtsfm.frontend.inlier_support_processor import InlierSupportProcessor
-from gtsfm.frontend.matcher.matcher_base import MatcherBase
+from gtsfm.frontend.matcher.matcher_base import MatcherBase, MatcherReport
 from gtsfm.frontend.verifier.verifier_base import VerifierBase
 from gtsfm.evaluation.metrics import GtsfmMetric, GtsfmMetricsGroup
 
@@ -96,13 +96,19 @@ def create_computation_graph(
         """
 
         # graph for matching to obtain putative correspondences
-        corr_idxs_graph = self._matcher.create_computation_graph(
+        corr_idxs_graph, matcher_report = self._matcher.create_computation_graph(
             keypoints_i1_graph,
             keypoints_i2_graph,
             descriptors_i1_graph,
             descriptors_i2_graph,
             im_shape_i1_graph,
             im_shape_i2_graph,
+            camera_intrinsics_i1_graph,
+            camera_intrinsics_i2_graph,
+            self._corr_metric_dist_threshold,
+            gt_wTi1_graph,
+            gt_wTi2_graph,
+            gt_scene_mesh_graph,
         )
 
         # verification on putative correspondences to obtain relative pose
@@ -156,7 +162,14 @@ def create_computation_graph(
             two_view_report_pp_graph,
         ) = self.processor.create_computation_graph(i2Ri1_graph, i2Ui1_graph, v_corr_idxs_graph, two_view_report_graph)
         # We provide both, as we will create reports for both.
-        return (i2Ri1_pp_graph, i2Ui1_pp_graph, v_corr_idxs_pp_graph, two_view_report_graph, two_view_report_pp_graph)
+        return (
+            i2Ri1_pp_graph,
+            i2Ui1_pp_graph,
+            v_corr_idxs_pp_graph,
+            two_view_report_graph,
+            two_view_report_pp_graph,
+            matcher_report,
+        )
 
 
 def generate_two_view_report(
@@ -223,6 +236,27 @@ def compute_relative_pose_metrics(
     return (R_error_deg, U_error_deg)
 
 
+def aggregate_matcher_metrics(matcher_reports_dict: Dict[Tuple[int, int], MatcherReport]):
+    """Aggregate the matcher metrics into a metrics group for the HTML report.
+
+    Args:
+        matcher_reports_dict: dictionary containing a matcher report for every image pair.
+    """
+    num_matches_all_pairs = []
+    inlier_ratio_gt_model_all_pairs = []
+    for report in matcher_reports_dict.values():
+        num_matches_all_pairs.append(report.num_matches)
+        inlier_ratio_gt_model_all_pairs.append(report.inlier_ratio_gt_model)
+
+    return GtsfmMetricsGroup(
+        "matcher_metrics",
+        metrics=[
+            GtsfmMetric("inlier_ratio_wrt_gt_model", inlier_ratio_gt_model_all_pairs),
+            GtsfmMetric("num_matches", num_matches_all_pairs),
+        ],
+    )
+
+
 def aggregate_frontend_metrics(
     two_view_reports_dict: Dict[Tuple[int, int], Optional[TwoViewEstimationReport]],
     angular_err_threshold_deg: float,