diff --git a/gtsfm/common/image.py b/gtsfm/common/image.py index fc8ca7f4a..11582ed2e 100644 --- a/gtsfm/common/image.py +++ b/gtsfm/common/image.py @@ -3,18 +3,13 @@ Authors: Ayush Baid """ -from typing import Any, Dict, NamedTuple, Optional +from typing import Any, Dict, NamedTuple, Optional, Tuple import numpy as np from gtsam import Cal3Bundler from gtsfm.common.sensor_width_database import SensorWidthDatabase -# A heuristic value that scales image width or height in pixel units. Matches the scaling used in COLMAP, see -# `ImageReaderOptions.default_focal_length_factor` in -# https://github.com/colmap/colmap/blob/dev/src/base/image_reader.h. -DEFAULT_FOCAL_LENGTH_FACTOR = 1.2 - # Tag Ref: https://www.awaresystems.be/imaging/tiff/tifftags/privateifd/exif/focalplaneresolutionunit.html INCHES_FOCAL_PLANE_RES_UNIT = 2 CENTIMETERS_FOCAL_PLANE_RES_UNIT = 3 @@ -40,6 +35,11 @@ def width(self) -> int: """The width of the image (i.e. number of pixels in the horizontal direction).""" return self.value_array.shape[1] + @property + def shape(self) -> Tuple[int, int, int]: + """The shape of the image (H, W, C).""" + return self.value_array.shape + def __compute_sensor_width_from_exif(self) -> float: """Compute sensor_width_mm from `ExifImageWidth` tag, @@ -73,7 +73,7 @@ def __compute_sensor_width_from_exif(self) -> float: return sensor_width_mm - def get_intrinsics_from_exif(self) -> Optional[Cal3Bundler]: + def get_intrinsics_from_exif(self, default_focal_length_factor: float = 1.2) -> Optional[Cal3Bundler]: """Constructs the camera intrinsics from exif tag. Equation: focal_px=max(w_px,h_px)∗focal_mm / ccdw_mm @@ -88,6 +88,12 @@ def get_intrinsics_from_exif(self) -> Optional[Cal3Bundler]: - https://openmvg.readthedocs.io/en/latest/software/SfM/SfMInit_ImageListing/ - https://photo.stackexchange.com/questions/40865/how-can-i-get-the-image-sensor-dimensions-in-mm-to-get-circle-of-confusion-from # noqa: E501 + Args: + default_focal_length_factor: A heuristic value that scales image width or height in pixel units. + The default value of 1.2 matches the value used in COLMAP, + see `ImageReaderOptions.default_focal_length_factor` in + https://github.com/colmap/colmap/blob/dev/src/base/image_reader.h. + Returns: intrinsics matrix (3x3). """ @@ -99,9 +105,9 @@ def get_intrinsics_from_exif(self) -> Optional[Cal3Bundler]: center_x = img_w_px / 2 center_y = img_h_px / 2 - # Initialize focal length by `DEFAULT_FOCAL_LENGTH_FACTOR * max(width, height)`. + # Initialize focal length by `default_focal_length_factor * max(width, height)`. max_size = max(img_w_px, img_h_px) - focal_length_px = DEFAULT_FOCAL_LENGTH_FACTOR * max_size + focal_length_px = default_focal_length_factor * max_size # Read focal length prior from exif. if self.exif_data is None or len(self.exif_data) <= 0: diff --git a/gtsfm/loader/mobilebrick_loader.py b/gtsfm/loader/mobilebrick_loader.py new file mode 100644 index 000000000..4ee405d73 --- /dev/null +++ b/gtsfm/loader/mobilebrick_loader.py @@ -0,0 +1,152 @@ +"""Simple loader class that reads from the MobileBrick dataset. + +Reference to MobileBrick: https://code.active.vision/MobileBrick/, Kejie Li et al. + +Authors: Akshay Krishnan +""" + +import os +from pathlib import Path +from typing import List, Optional + +import numpy as np + +from gtsam import Cal3Bundler, Pose3, Rot3 + +import gtsfm.utils.io as io_utils +import gtsfm.utils.logger as logger_utils +from gtsfm.common.image import Image +from gtsfm.loader.loader_base import LoaderBase + +logger = logger_utils.get_logger() + + +class MobilebrickLoader(LoaderBase): + """Loader class that reads from the MobileBrick dataset.""" + + def __init__( + self, + data_dir: str, + use_gt_intrinsics: bool = False, + max_frame_lookahead: int = 5, + max_resolution: int = 1024, + input_worker: Optional[str] = None, + ) -> None: + """ """ + super().__init__(max_resolution=max_resolution, input_worker=input_worker) + + self._use_gt_intrinsics = use_gt_intrinsics + self._max_frame_lookahead = max_frame_lookahead + self._image_dir = os.path.join(data_dir, "image") + self._num_images = len(os.listdir(self._image_dir)) + + # Cache image paths + self._image_paths = [] + for i in range(self._num_images): + image_path = os.path.join(self._image_dir, f"{i:06d}.jpg") + self._image_paths.append(image_path) + + # Load GT intrinsics + if self._use_gt_intrinsics: + intrinsics_dir = os.path.join(data_dir, "intrinsic") + self._intrinsics = [] + for i in range(self._num_images): + intrinsics_file = os.path.join(intrinsics_dir, f"{i:06d}.txt") + K = np.loadtxt(intrinsics_file) + self._intrinsics.append(Cal3Bundler((K[0, 0] + K[1, 1]) / 2, 0, 0, K[0, 2], K[1, 2])) + else: + # TODO(akshay): It should be possible to cache approx intrinsics here. + self._intrinsics = None + + # Load GT poses + self._poses_dir = os.path.join(data_dir, "pose") + self._wTi = [] + for i in range(self._num_images): + pose_file = os.path.join(self._poses_dir, f"{i:06d}.txt") + wTi_mat = np.loadtxt(pose_file) + wTi = Pose3(Rot3(wTi_mat[:3, :3]), wTi_mat[:3, 3]) + self._wTi.append(wTi) + + def image_filenames(self) -> List[Path]: + """Return the file names corresponding to each image index.""" + return [Path(fpath) for fpath in sorted(os.listdir(self._image_dir))] + + def __len__(self) -> int: + """The number of images in the dataset. + + Returns: + The number of images. + """ + return self._num_images + + def get_image_full_res(self, index: int) -> Image: + """Get the image at the given index, at full resolution. + + Args: + index: The index to fetch. + + Returns: + The image at the query index. + + Raises: + IndexError: If an out-of-bounds image index is requested. + """ + if index < 0 or index >= len(self): + raise IndexError(f"Image index {index} is invalid") + + # Read in image. + img = io_utils.load_image(self._image_paths[index]) + return Image(value_array=img.value_array, exif_data=img.exif_data, file_name=img.file_name) + + def get_camera_intrinsics_full_res(self, index: int) -> Cal3Bundler: + """Get the camera intrinsics at the given index, valid for a full-resolution image. + + Args: + index: The index to fetch. + + Returns: + Ground truth intrinsics for the given camera. + + Raises: + IndexError: If an out-of-bounds image index is requested. + """ + if index < 0 or index >= len(self): + raise IndexError(f"Image index {index} is invalid") + + if self._intrinsics: + return self._intrinsics[index] + else: + # 0.8 is better than the default factor of 1.2 for this dataset, but it has not been fully tuned. + return io_utils.load_image(self._image_paths[index]).get_intrinsics_from_exif( + default_focal_length_factor=0.8 + ) + + def get_camera_pose(self, index: int) -> Optional[Pose3]: + """Get the camera pose (in world coordinates) at the given index. + + Args: + index: The index to fetch. + + Returns: + Ground truth pose for the given camera. + + Raises: + IndexError: If an out-of-bounds image index is requested. + """ + if index < 0 or index >= len(self): + raise IndexError(f"Image index {index} is invalid") + + wTi = self._wTi[index] + return wTi + + def is_valid_pair(self, idx1: int, idx2: int) -> bool: + """Checks if (idx1, idx2) is a valid pair. idx1 < idx2 is required. + + Args: + idx1: First index of the pair. + idx2: Second index of the pair. + + Returns: + Validation result. + """ + return super().is_valid_pair(idx1, idx2) and abs(idx1 - idx2) <= self._max_frame_lookahead diff --git a/gtsfm/loader/one_d_sfm_loader.py b/gtsfm/loader/one_d_sfm_loader.py index aa168bd66..67927cf48 100644 --- a/gtsfm/loader/one_d_sfm_loader.py +++ b/gtsfm/loader/one_d_sfm_loader.py @@ -17,6 +17,9 @@ logger = logger_utils.get_logger() +# Focal length is initialized to 1.2 * largest dimension of image if EXIF data is not available. +NO_EXIF_DEFAULT_FOCAL_LENGTH_FACTOR = 1.2 + class OneDSFMLoader(LoaderBase): """Loader for datasets used in 1DSFM and Colmap papers. @@ -98,7 +101,9 @@ def get_camera_intrinsics_full_res(self, index: int) -> Optional[Cal3Bundler]: Intrinsics for the given camera. """ # Get intrinsics from exif. - intrinsics = io_utils.load_image(self._image_paths[index]).get_intrinsics_from_exif() + intrinsics = io_utils.load_image(self._image_paths[index]).get_intrinsics_from_exif( + default_focal_length_factor=NO_EXIF_DEFAULT_FOCAL_LENGTH_FACTOR + ) return intrinsics def get_camera_pose(self, index: int) -> Optional[Pose3]: diff --git a/gtsfm/runner/run_scene_optimizer_mobilebrick.py b/gtsfm/runner/run_scene_optimizer_mobilebrick.py new file mode 100644 index 000000000..dccb3da78 --- /dev/null +++ b/gtsfm/runner/run_scene_optimizer_mobilebrick.py @@ -0,0 +1,39 @@ +"""Runner for datasets loaded from the MobileBrick loader. + +Authors: Akshay Krishnan +""" +import argparse + +import gtsfm.utils.logger as logger_utils +from gtsfm.loader.loader_base import LoaderBase +from gtsfm.loader.mobilebrick_loader import MobilebrickLoader +from gtsfm.runner.gtsfm_runner_base import GtsfmRunnerBase + +logger = logger_utils.get_logger() + + +class GtsfmRunnerMobilebrickLoader(GtsfmRunnerBase): + """Runner for the Mobilebrick dataset.""" + + def __init__(self): + super(GtsfmRunnerMobilebrickLoader, self).__init__(tag="Run GTSFM on dataset from MobileBrick.") + + def construct_argparser(self) -> argparse.ArgumentParser: + parser = super(GtsfmRunnerMobilebrickLoader, self).construct_argparser() + parser.add_argument("--data_dir", type=str, default="", help="") + parser.add_argument("--use_gt_intrinsics", type=bool, default=False, help="") + return parser + + def construct_loader(self) -> LoaderBase: + loader = MobilebrickLoader( + data_dir=self.parsed_args.data_dir, + use_gt_intrinsics=self.parsed_args.use_gt_intrinsics, + max_frame_lookahead=self.parsed_args.max_frame_lookahead, + max_resolution=self.parsed_args.max_resolution, + ) + return loader + + +if __name__ == "__main__": + runner = GtsfmRunnerMobilebrickLoader() + runner.run() diff --git a/tests/common/test_image.py b/tests/common/test_image.py index 8c1ee5ce9..f8ee4ff0f 100644 --- a/tests/common/test_image.py +++ b/tests/common/test_image.py @@ -8,9 +8,11 @@ import numpy as np from gtsam import Cal3Bundler -from gtsfm.common.image import Image, DEFAULT_FOCAL_LENGTH_FACTOR +from gtsfm.common.image import Image from gtsfm.common.sensor_width_database import SensorWidthDatabase +DEFAULT_FOCAL_LENGTH_FACTOR = 1.2 + class TestImage(unittest.TestCase): """Unit tests for the image class.""" @@ -22,7 +24,7 @@ def test_get_intrinsics_from_exif_no_exif(self): im_w = 120 exif_data = None image = Image(np.random.randint(low=0, high=255, size=(im_h, im_w, 3)), exif_data) - computed_intrinsics = image.get_intrinsics_from_exif() + computed_intrinsics = image.get_intrinsics_from_exif(default_focal_length_factor=DEFAULT_FOCAL_LENGTH_FACTOR) expected_focal_length = DEFAULT_FOCAL_LENGTH_FACTOR * max(im_h, im_w) expected_intrinsics = Cal3Bundler(fx=expected_focal_length, k1=0.0, k2=0.0, u0=60.0, v0=50.0) @@ -39,7 +41,7 @@ def test_get_intrinsics_from_exif_no_tags(self): "DummyName": "DummyValue", } image = Image(np.random.randint(low=0, high=255, size=(im_h, im_w, 3)), exif_data) - computed_intrinsics = image.get_intrinsics_from_exif() + computed_intrinsics = image.get_intrinsics_from_exif(default_focal_length_factor=DEFAULT_FOCAL_LENGTH_FACTOR) expected_focal_length = DEFAULT_FOCAL_LENGTH_FACTOR * max(im_h, im_w) expected_intrinsics = Cal3Bundler(fx=expected_focal_length, k1=0.0, k2=0.0, u0=60.0, v0=50.0) @@ -66,7 +68,7 @@ def test_get_intrinsics_from_exif_focal_length_in_35mm_film(self): "FocalPlaneResolutionUnit": 2, } image = Image(np.random.randint(low=0, high=255, size=(im_h, im_w, 3)), exif_data) - computed_intrinsics = image.get_intrinsics_from_exif() + computed_intrinsics = image.get_intrinsics_from_exif(default_focal_length_factor=DEFAULT_FOCAL_LENGTH_FACTOR) expected_intrinsics = Cal3Bundler(fx=480, k1=0.0, k2=0.0, u0=60.0, v0=50.0) @@ -89,7 +91,7 @@ def test_get_intrinsics_from_exif_focal_length(self, mock_init, mock_lookup): "FocalPlaneResolutionUnit": 2, } image = Image(np.random.randint(low=0, high=255, size=(im_h, im_w, 3)), exif_data) - computed_intrinsics = image.get_intrinsics_from_exif() + computed_intrinsics = image.get_intrinsics_from_exif(default_focal_length_factor=DEFAULT_FOCAL_LENGTH_FACTOR) expected_intrinsics = Cal3Bundler(fx=600, k1=0.0, k2=0.0, u0=60.0, v0=50.0) @@ -108,7 +110,7 @@ def test_get_intrinsics_from_exif_exif_image_width(self): "FocalPlaneResolutionUnit": 2, } image = Image(np.random.randint(low=0, high=255, size=(im_h, im_w, 3)), exif_data) - computed_intrinsics = image.get_intrinsics_from_exif() + computed_intrinsics = image.get_intrinsics_from_exif(default_focal_length_factor=DEFAULT_FOCAL_LENGTH_FACTOR) expected_intrinsics = Cal3Bundler(fx=590.551, k1=0.0, k2=0.0, u0=60.0, v0=50.0) diff --git a/tests/data/mobilebrick/image/000000.jpg b/tests/data/mobilebrick/image/000000.jpg new file mode 100644 index 000000000..6ea62f3a9 Binary files /dev/null and b/tests/data/mobilebrick/image/000000.jpg differ diff --git a/tests/data/mobilebrick/image/000001.jpg b/tests/data/mobilebrick/image/000001.jpg new file mode 100644 index 000000000..09c3b8377 Binary files /dev/null and b/tests/data/mobilebrick/image/000001.jpg differ diff --git a/tests/data/mobilebrick/image/000002.jpg b/tests/data/mobilebrick/image/000002.jpg new file mode 100644 index 000000000..b941f2fc9 Binary files /dev/null and b/tests/data/mobilebrick/image/000002.jpg differ diff --git a/tests/data/mobilebrick/image/000003.jpg b/tests/data/mobilebrick/image/000003.jpg new file mode 100644 index 000000000..ccff3a209 Binary files /dev/null and b/tests/data/mobilebrick/image/000003.jpg differ diff --git a/tests/data/mobilebrick/image/000004.jpg b/tests/data/mobilebrick/image/000004.jpg new file mode 100644 index 000000000..3df6b1dd8 Binary files /dev/null and b/tests/data/mobilebrick/image/000004.jpg differ diff --git a/tests/data/mobilebrick/intrinsic/000000.txt b/tests/data/mobilebrick/intrinsic/000000.txt new file mode 100644 index 000000000..4efd87764 --- /dev/null +++ b/tests/data/mobilebrick/intrinsic/000000.txt @@ -0,0 +1,3 @@ +1459.52795410 0.00000000 962.14691162 +0.00000000 1459.52795410 724.53588867 +0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/intrinsic/000001.txt b/tests/data/mobilebrick/intrinsic/000001.txt new file mode 100644 index 000000000..8fbb02416 --- /dev/null +++ b/tests/data/mobilebrick/intrinsic/000001.txt @@ -0,0 +1,3 @@ +1460.12475586 0.00000000 962.19854736 +0.00000000 1460.12475586 724.49261475 +0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/intrinsic/000002.txt b/tests/data/mobilebrick/intrinsic/000002.txt new file mode 100644 index 000000000..8d8b4d6bb --- /dev/null +++ b/tests/data/mobilebrick/intrinsic/000002.txt @@ -0,0 +1,3 @@ +1460.12475586 0.00000000 962.19372559 +0.00000000 1460.12475586 724.62994385 +0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/intrinsic/000003.txt b/tests/data/mobilebrick/intrinsic/000003.txt new file mode 100644 index 000000000..5a40bace5 --- /dev/null +++ b/tests/data/mobilebrick/intrinsic/000003.txt @@ -0,0 +1,3 @@ +1459.05395508 0.00000000 962.14215088 +0.00000000 1459.05395508 724.56298828 +0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/intrinsic/000004.txt b/tests/data/mobilebrick/intrinsic/000004.txt new file mode 100644 index 000000000..0dc0cadbc --- /dev/null +++ b/tests/data/mobilebrick/intrinsic/000004.txt @@ -0,0 +1,3 @@ +1458.52233887 0.00000000 962.09661865 +0.00000000 1458.52233887 724.60595703 +0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/pose/000000.txt b/tests/data/mobilebrick/pose/000000.txt new file mode 100644 index 000000000..f15495d5e --- /dev/null +++ b/tests/data/mobilebrick/pose/000000.txt @@ -0,0 +1,4 @@ +-0.46448371 -0.85866576 0.21667491 -0.22408816 +-0.54823434 0.08665860 -0.83182293 0.32383028 +0.69548112 -0.50515682 -0.51100159 0.34980530 +0.00000000 0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/pose/000001.txt b/tests/data/mobilebrick/pose/000001.txt new file mode 100644 index 000000000..bd2263279 --- /dev/null +++ b/tests/data/mobilebrick/pose/000001.txt @@ -0,0 +1,4 @@ +-0.46954235 -0.85014164 0.23830472 -0.21554375 +-0.55491459 0.07422182 -0.82858974 0.32398531 +0.68673122 -0.52129674 -0.50660628 0.35298249 +0.00000000 0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/pose/000002.txt b/tests/data/mobilebrick/pose/000002.txt new file mode 100644 index 000000000..09c262a86 --- /dev/null +++ b/tests/data/mobilebrick/pose/000002.txt @@ -0,0 +1,4 @@ +-0.46406239 -0.85505915 0.23134370 -0.20851623 +-0.55035943 0.07367838 -0.83167058 0.33216473 +0.69408256 -0.51326925 -0.50478125 0.36323601 +0.00000000 0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/pose/000003.txt b/tests/data/mobilebrick/pose/000003.txt new file mode 100644 index 000000000..f1d5cd072 --- /dev/null +++ b/tests/data/mobilebrick/pose/000003.txt @@ -0,0 +1,4 @@ +-0.45643684 -0.86206996 0.22022893 -0.20537572 +-0.53969383 0.07145945 -0.83882308 0.34330064 +0.70738673 -0.50172597 -0.49787053 0.36881423 +0.00000000 0.00000000 0.00000000 1.00000000 diff --git a/tests/data/mobilebrick/pose/000004.txt b/tests/data/mobilebrick/pose/000004.txt new file mode 100644 index 000000000..7a87fbcf4 --- /dev/null +++ b/tests/data/mobilebrick/pose/000004.txt @@ -0,0 +1,4 @@ +-0.43685880 -0.87210071 0.22044215 -0.19587429 +-0.53288990 0.05347520 -0.84449321 0.34679961 +0.72469491 -0.48639569 -0.48809478 0.36172125 +0.00000000 0.00000000 0.00000000 1.00000000 diff --git a/tests/loader/test_mobilebrick_loader.py b/tests/loader/test_mobilebrick_loader.py new file mode 100644 index 000000000..826fb5b30 --- /dev/null +++ b/tests/loader/test_mobilebrick_loader.py @@ -0,0 +1,71 @@ +"""Unit tests for the MobileBrick loader. + +Authors: Akshay Krishnan +""" + +import unittest +from pathlib import Path + +from gtsam import Cal3Bundler, Pose3 + +from gtsfm.common.image import Image +from gtsfm.loader.mobilebrick_loader import MobilebrickLoader + +DATA_ROOT_PATH = Path(__file__).resolve().parent.parent / "data" + +DEFAULT_FOLDER = DATA_ROOT_PATH / "mobilebrick" + + +class TestMobileBrickLoader(unittest.TestCase): + """Unit tests for the MobileBrick loader. + + The unit test data contains 5 images, their corresponding intrinsics and ground truth pose. + """ + + def setUp(self) -> None: + """Set up the loader for the test.""" + super().setUp() + + self.loader = MobilebrickLoader(data_dir=str(DEFAULT_FOLDER)) + + def test_length(self) -> None: + """Test the number of all images in the loader.""" + + # There are 5 images in total. + self.assertEqual(5, len(self.loader)) + + def test_image_filenames(self): + """Test the image filenames.""" + image_filenames = self.loader.image_filenames() + self.assertEqual(len(image_filenames), 5) + self.assertEqual(str(image_filenames[0]), "000000.jpg") + self.assertEqual(str(image_filenames[4]), "000004.jpg") + + def test_get_image_full_res(self): + """Test the image at a given index.""" + image1 = self.loader.get_image_full_res(0) + self.assertIsInstance(image1, Image) + self.assertEqual(image1.shape, (1440, 1920, 3)) + + with self.assertRaises(IndexError): + self.loader.get_image_full_res(5) + + def test_get_camera_intrinsics_full_res(self): + """Test the camera intrinsics at a given index.""" + intrinsics1 = self.loader.get_camera_intrinsics_full_res(0) + self.assertIsInstance(intrinsics1, Cal3Bundler) + + with self.assertRaises(IndexError): + self.loader.get_camera_intrinsics_full_res(5) + + def test_get_camera_pose(self): + """Test the camera pose at a given index.""" + pose1 = self.loader.get_camera_pose(4) + self.assertIsInstance(pose1, Pose3) + + with self.assertRaises(IndexError): + self.loader.get_camera_pose(5) + + +if __name__ == "__main__": + unittest.main()