From d16022a037e0cc602a8284af51c149a5616a5597 Mon Sep 17 00:00:00 2001 From: roomrys <38435167+roomrys@users.noreply.github.com> Date: Wed, 27 Nov 2024 09:40:16 -0800 Subject: [PATCH 1/2] Implement CameraGroup.triangulate and provide default triangulate_dlt_vectorized function --- sleap_io/model/camera.py | 110 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/sleap_io/model/camera.py b/sleap_io/model/camera.py index 530939f3..cbc06ec8 100644 --- a/sleap_io/model/camera.py +++ b/sleap_io/model/camera.py @@ -2,6 +2,8 @@ from __future__ import annotations +from collections.abc import Callable + import attrs import cv2 import numpy as np @@ -10,6 +12,59 @@ from sleap_io.model.video import Video +def triangulate_dlt_vectorized( + points: np.ndarray, projection_matrices: np.ndarray +) -> np.ndarray: + """Triangulate 3D points from multiple camera views using Direct Linear Transform. + + Args: + points: Array of N 2D points from each camera view M of shape (M, N, 2). + projection_matrices: Array of (3, 4) projection matrices for each camera M of + shape (M, 3, 4). + + Returns: + Triangulated 3D points of shape (N, 3). + """ + n_cameras, n_points, _ = points.shape + + # Flatten points to shape needed for multiplication + points_flattened = points.reshape(n_cameras, 2 * n_points, 1, order="C") + + # Create row selector matrix to select correct rows from projection matrix + row_selector = np.zeros((n_cameras * n_points, 2, 2)) + row_selector[:, 0, 0] = -1 # Negate 1st row of projection matrix for x + row_selector[:, 1, 1] = -1 # Negate 2nd row of projection matrix for y + row_selector = row_selector.reshape(n_cameras, 2 * n_points, 2, order="C") + + # Concatenate row selector and points matrices to shape (M, 2N, 3) + left_matrix = np.concatenate((row_selector, points_flattened), axis=2) + + # Get A (stacked in a weird way) of shape (M, 2N, 4) + a_stacked = np.matmul(left_matrix, projection_matrices) + + # Reorganize A to shape (N, 2M, 4) s.t. each 3D point has A of shape 2M x 4 + a = ( + a_stacked.reshape(n_cameras, n_points, 2, 4) + .transpose(1, 0, 2, 3) + .reshape(n_points, 2 * n_cameras, 4) + ) + + # Remove rows with NaNs before SVD which may result in a ragged A (hence for loop) + points_3d = [] + for a_slice in a: + nan_mask = np.isnan(a_slice) + a_no_nan = a_slice[~nan_mask].reshape(-1, 4, order="C") + + _, _, vh = np.linalg.svd(a_no_nan) + + point_3d = vh[-1, :-1] / vh[-1, -1] + points_3d.append(point_3d) + + points_3d = np.array(points_3d) + + return points_3d + + @define class CameraGroup: """A group of cameras used to record a multi-view `RecordingSession`. @@ -20,6 +75,61 @@ class CameraGroup: cameras: list[Camera] = field(factory=list) + def triangulate( + self, + points: np.ndarray, + triangulation_func: Callable[ + [np.ndarray, np.ndarray], np.ndarray + ] = triangulate_dlt_vectorized, + ) -> np.ndarray: + """Triangulate N 2D points from multiple camera views M. + + Args: + points: Array of 2D points from each camera view of shape (M, N, 2). + triangulation_func: Function to use for triangulation. The + triangulation_func should take the following arguments: + - points: Array of undistorted 2D points from each camera view of shape + (M, N, 2). + - projection_matrices: Array of (3, 4) projection matrices for each of + the M cameras of shape (M, 3, 4) - note that points are undistorted. + and return the triangulated 3D points of shape (N, 3). + Default is vectorized DLT. + + Returns: + Triangulated 3D points of shape (N, 3). + """ + n_cameras, n_points, _ = points.shape + if n_cameras != len(self.cameras): + raise ValueError( + f"Expected points to have shape (M, N, 2) where M = {len(self.cameras)}" + f" is the number of cameras in the group, but received shape " + f"{points.shape}" + ) + + # Convert points to float64 to control precision + points = points.astype("float64") + + # Undistort points + for cam_idx, camera in enumerate(self.cameras): + cam_points = camera.undistort_points(points[cam_idx]) + points[cam_idx] = cam_points + + # Since points are undistorted, use extrinsic matrices as projection matrices + projection_matrices = np.array( + [camera.extrinsic_matrix[:3] for camera in self.cameras] + ) + + # Triangulate points using provided function + points_3d = triangulation_func(points, projection_matrices) + n_points_returned = points_3d.shape[0] + if n_points_returned != n_points: + raise ValueError( + f"Expected triangulation function to return {n_points} 3D points, but " + f"received {n_points_returned} 3D points." + ) + + return points_3d + @define(eq=False) # Set eq to false to make class hashable class RecordingSession: From df5c541d229fd083c3ca76660cb5c26b1f9497ce Mon Sep 17 00:00:00 2001 From: roomrys <38435167+roomrys@users.noreply.github.com> Date: Wed, 27 Nov 2024 09:41:01 -0800 Subject: [PATCH 2/2] Test triangulate and cameras list --- tests/model/test_camera.py | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/model/test_camera.py b/tests/model/test_camera.py index 243e672d..381acd58 100644 --- a/tests/model/test_camera.py +++ b/tests/model/test_camera.py @@ -252,6 +252,61 @@ def test_camera_get_video(): assert camera.get_video(session) is None +def test_camera_group_cameras(): + """Test camera group cameras method.""" + camera1 = Camera(name="camera1") + camera2 = Camera(name="camera2") + camera_group = CameraGroup(cameras=[camera1, camera2]) + + assert camera_group.cameras == [camera1, camera2] + + camera_group = CameraGroup() + assert camera_group.cameras == [] + + +def test_camera_group_triangulation(): + """Test camera group triangulation using 3-4-5 triangle on xy-plane.""" + + # Define special 3-4-5 triangle + a = 3 + b = 4 + c = 5 + + # Angles opposite to sides a, b, and c in radians + angle_a = np.arccos((b**2 + c**2 - a**2) / (2 * b * c)) # 36.87 degrees + + # Define camera origin and world point + camera1_origin = np.array([0, a, 0]) + camera2_origin = np.array([0, -a, 0]) + point_world = np.array([b, 0, 0]) + + # Define rotation and translation vectors + rvec_1 = np.array([0, 0, 1]) * angle_a # axis-angle representation + rvec_2 = -rvec_1 # Opposite rotation + rotation_matrix_1 = cv2.Rodrigues(rvec_1)[0] + rotation_matrix_2 = cv2.Rodrigues(rvec_2)[0] + tvec_1 = -rotation_matrix_1 @ camera1_origin # Rotated camera origin + tvec_2 = -rotation_matrix_2 @ camera2_origin # Rotated camera origin + + # Transform point from world to camera frame + point_cam1 = rotation_matrix_1 @ point_world + tvec_1 + point_cam2 = rotation_matrix_2 @ point_world + tvec_2 + np.testing.assert_array_almost_equal(point_cam1, np.array([c, 0, 0]), decimal=5) + np.testing.assert_array_almost_equal(point_cam2, np.array([c, 0, 0]), decimal=5) + + # Define camera group + camera_1 = Camera(rvec=rvec_1, tvec=tvec_1) + camera_2 = Camera(rvec=rvec_2, tvec=tvec_2) + camera_group = CameraGroup(cameras=[camera_1, camera_2]) + + # Triangulate point from two camera views + points = np.array([[[c, 0]], [[c, 0]]]) + points_3d = camera_group.triangulate(points=points) + np.testing.assert_array_almost_equal( + points_3d[:, :-1], np.array([[b, 0]]), decimal=5 + ) # z-coordinate is ambiguous since we only define 2D points on x-y plane + + # TODO: Remove when implement triangulation without aniposelib def test_camera_aliases(): """Test camera aliases for attributes."""