From 27fa3536aae69f6660916c2640f274a22a8def32 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 08:25:04 -0700
Subject: [PATCH 01/38] baseline

---
 examples/benchmarks/mcmc.sh         | 42 ++++++++++++--------
 examples/benchmarks/mcmc_alameda.sh | 60 +++++++++++++++++++++++++++++
 examples/simple_trainer.py          |  5 ++-
 gsplat/cuda/_wrapper.py             |  6 +++
 gsplat/rendering.py                 |  2 +
 5 files changed, 97 insertions(+), 18 deletions(-)
 create mode 100644 examples/benchmarks/mcmc_alameda.sh

diff --git a/examples/benchmarks/mcmc.sh b/examples/benchmarks/mcmc.sh
index 23e40838d..8913dc406 100644
--- a/examples/benchmarks/mcmc.sh
+++ b/examples/benchmarks/mcmc.sh
@@ -1,37 +1,45 @@
-SCENE_DIR="data/360_v2"
-RESULT_DIR="results/benchmark_mcmc_1M"
-SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers
-RENDER_TRAJ_PATH="ellipse"
+# SCENE_DIR="data/360_v2"
+# RESULT_DIR="results/benchmark_mcmc_1M"
+# SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers
+# RENDER_TRAJ_PATH="ellipse"
 
-CAP_MAX=1000000
+SCENE_DIR="data/zipnerf"
+RESULT_DIR="results/benchmark_alameda"
+SCENE_LIST="alameda_undistort"
+CAMERA_MODEL="pinhole"
+RENDER_TRAJ_PATH="interp"
+
+CAP_MAX=2000000
 
 for SCENE in $SCENE_LIST;
 do
     if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
         DATA_FACTOR=2
     else
-        DATA_FACTOR=4
+        DATA_FACTOR=2
     fi
 
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
+        --camera_model $CAMERA_MODEL \
         --render_traj_path $RENDER_TRAJ_PATH \
-        --data_dir data/360_v2/$SCENE/ \
+        --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
 
     # run eval and render
-    for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
-    do
-        CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
-            --strategy.cap-max $CAP_MAX \
-            --render_traj_path $RENDER_TRAJ_PATH \
-            --data_dir $SCENE_DIR/$SCENE/ \
-            --result_dir $RESULT_DIR/$SCENE/ \
-            --ckpt $CKPT
-    done
+    # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
+    # do
+    #     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+    #         --strategy.cap-max $CAP_MAX \
+    #         --camera_model $CAMERA_MODEL \
+    #         --render_traj_path $RENDER_TRAJ_PATH \
+    #         --data_dir $SCENE_DIR/$SCENE/ \
+    #         --result_dir $RESULT_DIR/$SCENE/ \
+    #         --ckpt $CKPT
+    # done
 done
 
 
diff --git a/examples/benchmarks/mcmc_alameda.sh b/examples/benchmarks/mcmc_alameda.sh
new file mode 100644
index 000000000..72e26f3cd
--- /dev/null
+++ b/examples/benchmarks/mcmc_alameda.sh
@@ -0,0 +1,60 @@
+SCENE_DIR="data/zipnerf"
+RESULT_DIR="results/benchmark_alameda"
+SCENE_LIST="alameda_undistort"
+CAMERA_MODEL="pinhole"
+RENDER_TRAJ_PATH="interp"
+
+CAP_MAX=2000000
+
+for SCENE in $SCENE_LIST;
+do
+    if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
+        DATA_FACTOR=2
+    else
+        DATA_FACTOR=4
+    fi
+
+    echo "Running $SCENE"
+
+    # train without eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+        --strategy.cap-max $CAP_MAX \
+        --camera_model $CAMERA_MODEL \
+        --render_traj_path $RENDER_TRAJ_PATH \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+
+    # run eval and render
+    # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
+    # do
+    #     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+    #         --strategy.cap-max $CAP_MAX \
+    #         --camera_model $CAMERA_MODEL \
+    #         --render_traj_path $RENDER_TRAJ_PATH \
+    #         --data_dir $SCENE_DIR/$SCENE/ \
+    #         --result_dir $RESULT_DIR/$SCENE/ \
+    #         --ckpt $CKPT
+    # done
+done
+
+
+for SCENE in $SCENE_LIST;
+do
+    echo "=== Eval Stats ==="
+
+    for STATS in $RESULT_DIR/$SCENE/stats/val*.json;
+    do  
+        echo $STATS
+        cat $STATS; 
+        echo
+    done
+
+    echo "=== Train Stats ==="
+
+    for STATS in $RESULT_DIR/$SCENE/stats/train*_rank0.json;
+    do  
+        echo $STATS
+        cat $STATS; 
+        echo
+    done
+done
\ No newline at end of file
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index ccf979dec..d66f9c8ae 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -55,6 +55,8 @@ class Config:
     global_scale: float = 1.0
     # Normalize the world space
     normalize_world_space: bool = True
+    # Camera model
+    camera_model: str = "pinhole"
 
     # Port for the viewer server
     port: int = 8080
@@ -441,6 +443,7 @@ def rasterize_splats(
             sparse_grad=self.cfg.sparse_grad,
             rasterize_mode=rasterize_mode,
             distributed=self.world_size > 1,
+            camera_model=self.cfg.camera_model,
             **kwargs,
         )
         return render_colors, render_alphas, info
@@ -965,7 +968,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config):
             Config(
                 init_opa=0.5,
                 init_scale=0.1,
-                opacity_reg=0.01,
+                opacity_reg=0.001,
                 scale_reg=0.01,
                 strategy=MCMCStrategy(verbose=True),
             ),
diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
index ded7d5989..5fc51672e 100644
--- a/gsplat/cuda/_wrapper.py
+++ b/gsplat/cuda/_wrapper.py
@@ -188,6 +188,7 @@ def fully_fused_projection(
     sparse_grad: bool = False,
     calc_compensations: bool = False,
     ortho: bool = False,
+    fisheye: bool = False,
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
     """Projects Gaussians to 2D.
 
@@ -290,6 +291,7 @@ def fully_fused_projection(
             sparse_grad,
             calc_compensations,
             ortho,
+            # fisheye,
         )
     else:
         return _FullyFusedProjection.apply(
@@ -307,6 +309,7 @@ def fully_fused_projection(
             radius_clip,
             calc_compensations,
             ortho,
+            # fisheye,
         )
 
 
@@ -754,6 +757,7 @@ def forward(
         radius_clip: float,
         calc_compensations: bool,
         ortho: bool,
+        # fisheye: bool,
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         # "covars" and {"quats", "scales"} are mutually exclusive
         radii, means2d, depths, conics, compensations = _make_lazy_cuda_func(
@@ -783,6 +787,7 @@ def forward(
         ctx.height = height
         ctx.eps2d = eps2d
         ctx.ortho = ortho
+        # ctx.fisheye = fisheye
 
         return radii, means2d, depths, conics, compensations
 
@@ -803,6 +808,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
         height = ctx.height
         eps2d = ctx.eps2d
         ortho = ctx.ortho
+        # fisheye = ctx.fisheye
         if v_compensations is not None:
             v_compensations = v_compensations.contiguous()
         v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func(
diff --git a/gsplat/rendering.py b/gsplat/rendering.py
index 8f14dbd28..55308d78f 100644
--- a/gsplat/rendering.py
+++ b/gsplat/rendering.py
@@ -47,6 +47,7 @@ def rasterization(
     distributed: bool = False,
     ortho: bool = False,
     covars: Optional[Tensor] = None,
+    camera_model: str = "pinhole",
 ) -> Tuple[Tensor, Tensor, Dict]:
     """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C).
 
@@ -307,6 +308,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso
         sparse_grad=sparse_grad,
         calc_compensations=(rasterize_mode == "antialiased"),
         ortho=ortho,
+        fisheye=camera_model=="fisheye",
     )
 
     if packed:

From 96089a16572459888463192bf475e835373a5add Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 15:16:11 -0700
Subject: [PATCH 02/38] fisheye forward works

---
 .../{mcmc_alameda.sh => mcmc_zipnerf.sh}      | 24 +++----
 examples/datasets/colmap.py                   | 70 +++++++++----------
 examples/simple_trainer.py                    | 10 +--
 gsplat/cuda/_wrapper.py                       | 12 ++--
 gsplat/cuda/csrc/bindings.h                   |  4 +-
 .../cuda/csrc/fully_fused_projection_bwd.cu   | 18 +++++
 .../cuda/csrc/fully_fused_projection_fwd.cu   | 18 ++++-
 gsplat/cuda/csrc/utils.cuh                    | 67 ++++++++++++++++++
 8 files changed, 164 insertions(+), 59 deletions(-)
 rename examples/benchmarks/{mcmc_alameda.sh => mcmc_zipnerf.sh} (68%)

diff --git a/examples/benchmarks/mcmc_alameda.sh b/examples/benchmarks/mcmc_zipnerf.sh
similarity index 68%
rename from examples/benchmarks/mcmc_alameda.sh
rename to examples/benchmarks/mcmc_zipnerf.sh
index 72e26f3cd..a4c534d57 100644
--- a/examples/benchmarks/mcmc_alameda.sh
+++ b/examples/benchmarks/mcmc_zipnerf.sh
@@ -1,28 +1,28 @@
-SCENE_DIR="data/zipnerf"
-RESULT_DIR="results/benchmark_alameda"
-SCENE_LIST="alameda_undistort"
-CAMERA_MODEL="pinhole"
+# SCENE_DIR="data/zipnerf/undistort"
+# RESULT_DIR="results/benchmark_zipnerf/undistort"
+# CAMERA_MODEL="pinhole"
+SCENE_DIR="data/zipnerf/fisheye"
+RESULT_DIR="results/benchmark_zipnerf/fisheye"
+CAMERA_MODEL="fisheye"
+SCENE_LIST="berlin" # alameda
 RENDER_TRAJ_PATH="interp"
 
 CAP_MAX=2000000
+DATA_FACTOR=4
 
 for SCENE in $SCENE_LIST;
 do
-    if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
-        DATA_FACTOR=2
-    else
-        DATA_FACTOR=4
-    fi
-
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
+        --opacity_reg 0.001 \
         --camera_model $CAMERA_MODEL \
         --render_traj_path $RENDER_TRAJ_PATH \
         --data_dir $SCENE_DIR/$SCENE/ \
-        --result_dir $RESULT_DIR/$SCENE/
+        --result_dir $RESULT_DIR/$SCENE/ \
+        --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt"
 
     # run eval and render
     # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 5aeeec5be..a9d5c7cc5 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -97,9 +97,9 @@ def __init__(
             elif type_ == 5 or type_ == "OPENCV_FISHEYE":
                 params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32)
                 camtype = "fisheye"
-            assert (
-                camtype == "perspective"
-            ), f"Only support perspective camera model, got {type_}"
+            # assert (
+            #     camtype == "perspective"
+            # ), f"Only support perspective camera model, got {type_}"
 
             params_dict[camera_id] = params
 
@@ -206,29 +206,29 @@ def __init__(
             self.imsize_dict[camera_id] = (int(width * s_width), int(height * s_height))
 
         # undistortion
-        self.mapx_dict = dict()
-        self.mapy_dict = dict()
-        self.roi_undist_dict = dict()
-        for camera_id in self.params_dict.keys():
-            params = self.params_dict[camera_id]
-            if len(params) == 0:
-                continue  # no distortion
-            assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}"
-            assert (
-                camera_id in self.params_dict
-            ), f"Missing params for camera {camera_id}"
-            K = self.Ks_dict[camera_id]
-            width, height = self.imsize_dict[camera_id]
-            K_undist, roi_undist = cv2.getOptimalNewCameraMatrix(
-                K, params, (width, height), 0
-            )
-            mapx, mapy = cv2.initUndistortRectifyMap(
-                K, params, None, K_undist, (width, height), cv2.CV_32FC1
-            )
-            self.Ks_dict[camera_id] = K_undist
-            self.mapx_dict[camera_id] = mapx
-            self.mapy_dict[camera_id] = mapy
-            self.roi_undist_dict[camera_id] = roi_undist
+        # self.mapx_dict = dict()
+        # self.mapy_dict = dict()
+        # self.roi_undist_dict = dict()
+        # for camera_id in self.params_dict.keys():
+        #     params = self.params_dict[camera_id]
+        #     if len(params) == 0:
+        #         continue  # no distortion
+        #     assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}"
+        #     assert (
+        #         camera_id in self.params_dict
+        #     ), f"Missing params for camera {camera_id}"
+        #     K = self.Ks_dict[camera_id]
+        #     width, height = self.imsize_dict[camera_id]
+        #     K_undist, roi_undist = cv2.getOptimalNewCameraMatrix(
+        #         K, params, (width, height), 0
+        #     )
+        #     mapx, mapy = cv2.initUndistortRectifyMap(
+        #         K, params, None, K_undist, (width, height), cv2.CV_32FC1
+        #     )
+        #     self.Ks_dict[camera_id] = K_undist
+        #     self.mapx_dict[camera_id] = mapx
+        #     self.mapy_dict[camera_id] = mapy
+        #     self.roi_undist_dict[camera_id] = roi_undist
 
         # size of the scene measured by cameras
         camera_locations = camtoworlds[:, :3, 3]
@@ -268,15 +268,15 @@ def __getitem__(self, item: int) -> Dict[str, Any]:
         params = self.parser.params_dict[camera_id]
         camtoworlds = self.parser.camtoworlds[index]
 
-        if len(params) > 0:
-            # Images are distorted. Undistort them.
-            mapx, mapy = (
-                self.parser.mapx_dict[camera_id],
-                self.parser.mapy_dict[camera_id],
-            )
-            image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
-            x, y, w, h = self.parser.roi_undist_dict[camera_id]
-            image = image[y : y + h, x : x + w]
+        # if len(params) > 0:
+        #     # Images are distorted. Undistort them.
+        #     mapx, mapy = (
+        #         self.parser.mapx_dict[camera_id],
+        #         self.parser.mapy_dict[camera_id],
+        #     )
+        #     image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
+        #     x, y, w, h = self.parser.roi_undist_dict[camera_id]
+        #     image = image[y : y + h, x : x + w]
 
         if self.patch_size is not None:
             # Random crop.
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index d66f9c8ae..ef54593dc 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -69,7 +69,7 @@ class Config:
     # Number of training steps
     max_steps: int = 30_000
     # Steps to evaluate the model
-    eval_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
+    eval_steps: List[int] = field(default_factory=lambda: [3_000, 7_000, 30_000])
     # Steps to save the model
     save_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
 
@@ -930,9 +930,9 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config):
             runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts])
         step = ckpts[0]["step"]
         runner.eval(step=step)
-        runner.render_traj(step=step)
-        if cfg.compression is not None:
-            runner.run_compression(step=step)
+        # runner.render_traj(step=step)
+        # if cfg.compression is not None:
+        #     runner.run_compression(step=step)
     else:
         runner.train()
 
@@ -968,7 +968,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config):
             Config(
                 init_opa=0.5,
                 init_scale=0.1,
-                opacity_reg=0.001,
+                opacity_reg=0.01,
                 scale_reg=0.01,
                 strategy=MCMCStrategy(verbose=True),
             ),
diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
index 5fc51672e..315005163 100644
--- a/gsplat/cuda/_wrapper.py
+++ b/gsplat/cuda/_wrapper.py
@@ -291,7 +291,6 @@ def fully_fused_projection(
             sparse_grad,
             calc_compensations,
             ortho,
-            # fisheye,
         )
     else:
         return _FullyFusedProjection.apply(
@@ -309,7 +308,7 @@ def fully_fused_projection(
             radius_clip,
             calc_compensations,
             ortho,
-            # fisheye,
+            fisheye,
         )
 
 
@@ -757,7 +756,7 @@ def forward(
         radius_clip: float,
         calc_compensations: bool,
         ortho: bool,
-        # fisheye: bool,
+        fisheye: bool,
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         # "covars" and {"quats", "scales"} are mutually exclusive
         radii, means2d, depths, conics, compensations = _make_lazy_cuda_func(
@@ -777,6 +776,7 @@ def forward(
             radius_clip,
             calc_compensations,
             ortho,
+            fisheye,
         )
         if not calc_compensations:
             compensations = None
@@ -787,7 +787,7 @@ def forward(
         ctx.height = height
         ctx.eps2d = eps2d
         ctx.ortho = ortho
-        # ctx.fisheye = fisheye
+        ctx.fisheye = fisheye
 
         return radii, means2d, depths, conics, compensations
 
@@ -808,7 +808,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
         height = ctx.height
         eps2d = ctx.eps2d
         ortho = ctx.ortho
-        # fisheye = ctx.fisheye
+        fisheye = ctx.fisheye
         if v_compensations is not None:
             v_compensations = v_compensations.contiguous()
         v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func(
@@ -824,6 +824,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
             height,
             eps2d,
             ortho,
+            fisheye,
             radii,
             conics,
             compensations,
@@ -858,6 +859,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
             None,
             None,
             None,
+            None,
         )
 
 
diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h
index 004709a4d..7f76007a4 100644
--- a/gsplat/cuda/csrc/bindings.h
+++ b/gsplat/cuda/csrc/bindings.h
@@ -104,7 +104,8 @@ fully_fused_projection_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho
+    const bool ortho,
+    const bool fisheye
 );
 
 std::tuple<
@@ -125,6 +126,7 @@ fully_fused_projection_bwd_tensor(
     const uint32_t image_height,
     const float eps2d,
     const bool ortho,
+    const bool fisheye,
     // fwd outputs
     const torch::Tensor &radii,                       // [C, N]
     const torch::Tensor &conics,                      // [C, N, 3]
diff --git a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu
index af880c3a6..a069eb2db 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu
@@ -31,6 +31,7 @@ __global__ void fully_fused_projection_bwd_kernel(
     const int32_t image_height,
     const T eps2d,
     const bool ortho,
+    const bool fisheye,
     // fwd outputs
     const int32_t *__restrict__ radii,   // [C, N]
     const T *__restrict__ conics,        // [C, N, 3]
@@ -143,6 +144,21 @@ __global__ void fully_fused_projection_bwd_kernel(
             v_mean_c,
             v_covar_c
         );
+    } else if (fisheye) {
+        fisheye_proj_vjp<T>(
+            mean_c,
+            covar_c,
+            fx,
+            fy,
+            cx,
+            cy,
+            image_width,
+            image_height,
+            v_covar2d,
+            glm::make_vec2(v_means2d),
+            v_mean_c,
+            v_covar_c
+        );
     } else {
         persp_proj_vjp<T>(
             mean_c,
@@ -257,6 +273,7 @@ fully_fused_projection_bwd_tensor(
     const uint32_t image_height,
     const float eps2d,
     const bool ortho,
+    const bool fisheye,
     // fwd outputs
     const torch::Tensor &radii,                       // [C, N]
     const torch::Tensor &conics,                      // [C, N, 3]
@@ -326,6 +343,7 @@ fully_fused_projection_bwd_tensor(
                 image_height,
                 eps2d,
                 ortho,
+                fisheye,
                 radii.data_ptr<int32_t>(),
                 conics.data_ptr<float>(),
                 compensations.has_value()
diff --git a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu
index 8ea15e720..0ffeebb19 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu
@@ -33,6 +33,7 @@ __global__ void fully_fused_projection_fwd_kernel(
     const T far_plane,
     const T radius_clip,
     const bool ortho,
+    const bool fisheye,
     // outputs
     int32_t *__restrict__ radii,  // [C, N]
     T *__restrict__ means2d,      // [C, N, 2]
@@ -118,6 +119,19 @@ __global__ void fully_fused_projection_fwd_kernel(
             covar2d,
             mean2d
         );
+    } else if (fisheye) {
+        fisheye_proj<T>(
+            mean_c,
+            covar_c,
+            Ks[0],
+            Ks[4],
+            Ks[2],
+            Ks[5],
+            image_width,
+            image_height,
+            covar2d,
+            mean2d
+        );
     } else {
         persp_proj<T>(
             mean_c,
@@ -196,7 +210,8 @@ fully_fused_projection_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho
+    const bool ortho,
+    const bool fisheye
 ) {
     GSPLAT_DEVICE_GUARD(means);
     GSPLAT_CHECK_INPUT(means);
@@ -245,6 +260,7 @@ fully_fused_projection_fwd_tensor(
                 far_plane,
                 radius_clip,
                 ortho,
+                fisheye,
                 radii.data_ptr<int32_t>(),
                 means2d.data_ptr<float>(),
                 depths.data_ptr<float>(),
diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index a8dcc3012..75672a2ee 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -373,6 +373,73 @@ inline __device__ void persp_proj_vjp(
                   2.f * fy * ty * rz3 * v_J[2][1];
 }
 
+template <typename T>
+inline __device__ void fisheye_proj(
+    // inputs
+    const vec3<T> mean3d,
+    const mat3<T> cov3d,
+    const T fx,
+    const T fy,
+    const T cx,
+    const T cy,
+    const uint32_t width,
+    const uint32_t height,
+    // outputs
+    mat2<T> &cov2d,
+    vec2<T> &mean2d
+) {
+    T x = mean3d[0], y = mean3d[1], z = mean3d[2];
+
+    float eps = 0.0000001f;
+    float xy_len = glm::length(glm::vec2({x, y})) + eps;
+    float theta = glm::atan(xy_len, z + eps);
+    if (abs(theta) > 3.14 * 0.403)
+        return;
+    mean2d = vec2<T>({
+        x * fx * theta / xy_len + cx, 
+        y * fy * theta / xy_len + cy
+    });
+
+    float x2 = x * x + eps;
+    float y2 = y * y;
+    float xy = x * y;
+    float x2y2 = x2 + y2 ;
+    float x2y2z2_inv = 1.f / (x2y2 + z * z);
+
+    float b = glm::atan(xy_len, z) / xy_len / x2y2;
+    float a = z * x2y2z2_inv / (x2y2);
+    mat3x2<T> J = mat3x2<T>(
+        fx * (x2 * a + y2 * b),
+        fy * xy  * (a - b), 
+        fx * xy * (a - b), 
+        fy * (y2 * a + x2 * b),   
+        - fx * x * x2y2z2_inv,
+        - fy * y * x2y2z2_inv
+    );
+    cov2d = J * cov3d * glm::transpose(J);
+}
+
+template <typename T>
+inline __device__ void fisheye_proj_vjp(
+    // fwd inputs
+    const vec3<T> mean3d,
+    const mat3<T> cov3d,
+    const T fx,
+    const T fy,
+    const T cx,
+    const T cy,
+    const uint32_t width,
+    const uint32_t height,
+    // grad outputs
+    const mat2<T> v_cov2d,
+    const vec2<T> v_mean2d,
+    // grad inputs
+    vec3<T> &v_mean3d,
+    mat3<T> &v_cov3d
+) {
+
+}
+
 template <typename T>
 inline __device__ void pos_world_to_cam(
     // [R, t] is the world-to-camera transformation

From 30b457927901d92bde06f05f0cabd135cad7004b Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 15:49:39 -0700
Subject: [PATCH 03/38] torch implementation of fisheye projection

---
 gsplat/cuda/_torch_impl.py | 78 +++++++++++++++++++++++++++++++++++++-
 gsplat/rendering.py        |  5 ++-
 2 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py
index 2585e36b3..291272d08 100644
--- a/gsplat/cuda/_torch_impl.py
+++ b/gsplat/cuda/_torch_impl.py
@@ -106,6 +106,71 @@ def _persp_proj(
     return means2d, cov2d  # [C, N, 2], [C, N, 2, 2]
 
 
+def _fisheye_proj(
+    means: Tensor,  # [C, N, 3]
+    covars: Tensor,  # [C, N, 3, 3]
+    Ks: Tensor,  # [C, 3, 3]
+    width: int,
+    height: int,
+) -> Tuple[Tensor, Tensor]:
+    """PyTorch implementation of fisheye projection for 3D Gaussians.
+
+    Args:
+        means: Gaussian means in camera coordinate system. [C, N, 3].
+        covars: Gaussian covariances in camera coordinate system. [C, N, 3, 3].
+        Ks: Camera intrinsics. [C, 3, 3].
+        width: Image width.
+        height: Image height.
+
+    Returns:
+        A tuple:
+
+        - **means2d**: Projected means. [C, N, 2].
+        - **cov2d**: Projected covariances. [C, N, 2, 2].
+    """
+    C, N, _ = means.shape
+
+    x, y, z = torch.unbind(means, dim=-1)  # [C, N]
+
+    fx = Ks[..., 0, 0, None]  # [C, 1]
+    fy = Ks[..., 1, 1, None]  # [C, 1]
+    cx = Ks[..., 0, 2, None]  # [C, 1]
+    cy = Ks[..., 1, 2, None]  # [C, 1]
+
+    eps = 0.0000001
+    xy_len = (x**2 + y**2) ** 0.5 + eps
+    theta = torch.atan2(xy_len, z + eps)
+    means2d = torch.stack(
+        [
+            x * fx * theta / xy_len + cx,
+            y * fy * theta / xy_len + cy,
+        ],
+        dim=-1,
+    )
+
+    x2 = x * x + eps
+    y2 = y * y
+    xy = x * y
+    x2y2 = x2 + y2
+    x2y2z2_inv = 1.0 / (x2y2 + z * z)
+    b = torch.atan2(xy_len, z) / xy_len / x2y2
+    a = z * x2y2z2_inv / (x2y2)
+    J = torch.stack(
+        [
+            fx * (x2 * a + y2 * b),
+            fx * xy * (a - b),
+            -fx * x * x2y2z2_inv,
+            fy * xy * (a - b),
+            fy * (y2 * a + x2 * b),
+            -fy * y * x2y2z2_inv,
+        ],
+        dim=-1,
+    ).reshape(C, N, 2, 3)
+
+    cov2d = torch.einsum("...ij,...jk,...kl->...il", J, covars, J.transpose(-1, -2))
+    return means2d, cov2d  # [C, N, 2], [C, N, 2, 2]
+
+
 def _ortho_proj(
     means: Tensor,  # [C, N, 3]
     covars: Tensor,  # [C, N, 3, 3]
@@ -170,7 +235,9 @@ def _world_to_cam(
 
 def _fully_fused_projection(
     means: Tensor,  # [N, 3]
-    covars: Tensor,  # [N, 3, 3]
+    covars: Optional[Tensor],  # [N, 6] or None
+    quats: Optional[Tensor],  # [N, 4] or None
+    scales: Optional[Tensor],  # [N, 3] or None
     viewmats: Tensor,  # [C, 4, 4]
     Ks: Tensor,  # [C, 3, 3]
     width: int,
@@ -180,6 +247,8 @@ def _fully_fused_projection(
     far_plane: float = 1e10,
     calc_compensations: bool = False,
     ortho: bool = False,
+    fisheye: bool = False,
+    **kwargs,
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]:
     """PyTorch implementation of `gsplat.cuda._wrapper.fully_fused_projection()`
 
@@ -188,10 +257,17 @@ def _fully_fused_projection(
         This is a minimal implementation of fully fused version, which has more
         arguments. Not all arguments are supported.
     """
+    if covars is None:
+        covars = _quat_scale_to_covar_preci(
+            quats, scales, compute_covar=True, compute_preci=False
+        )[0]
+
     means_c, covars_c = _world_to_cam(means, covars, viewmats)
 
     if ortho:
         means2d, covars2d = _ortho_proj(means_c, covars_c, Ks, width, height)
+    elif fisheye:
+        means2d, covars2d = _fisheye_proj(means_c, covars_c, Ks, width, height)
     else:
         means2d, covars2d = _persp_proj(means_c, covars_c, Ks, width, height)
 
diff --git a/gsplat/rendering.py b/gsplat/rendering.py
index 55308d78f..353af7f0c 100644
--- a/gsplat/rendering.py
+++ b/gsplat/rendering.py
@@ -6,6 +6,7 @@
 from torch import Tensor
 from typing_extensions import Literal
 
+from .cuda._torch_impl import _fully_fused_projection
 from .cuda._wrapper import (
     fully_fused_projection,
     isect_offset_encode,
@@ -291,7 +292,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso
         C = len(viewmats)
 
     # Project Gaussians to 2D. Directly pass in {quats, scales} is faster than precomputing covars.
-    proj_results = fully_fused_projection(
+    proj_results = _fully_fused_projection(
         means,
         covars,
         quats,
@@ -308,7 +309,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso
         sparse_grad=sparse_grad,
         calc_compensations=(rasterize_mode == "antialiased"),
         ortho=ortho,
-        fisheye=camera_model=="fisheye",
+        fisheye=camera_model == "fisheye",
     )
 
     if packed:

From c4651c0752d897f09a2791d60efec58030224c9b Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 16:16:51 -0700
Subject: [PATCH 04/38] test basic

---
 examples/benchmarks/mcmc_zipnerf.sh |  6 +++---
 tests/test_basic.py                 | 16 ++++++++++++----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh
index a4c534d57..1d466887f 100644
--- a/examples/benchmarks/mcmc_zipnerf.sh
+++ b/examples/benchmarks/mcmc_zipnerf.sh
@@ -15,14 +15,14 @@ do
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
         --opacity_reg 0.001 \
         --camera_model $CAMERA_MODEL \
         --render_traj_path $RENDER_TRAJ_PATH \
         --data_dir $SCENE_DIR/$SCENE/ \
-        --result_dir $RESULT_DIR/$SCENE/ \
-        --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt"
+        --result_dir $RESULT_DIR/$SCENE/
+        # --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt"
 
     # run eval and render
     # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 349329584..da21276fd 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -165,10 +165,13 @@ def test_proj(test_data, ortho: bool):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device")
-@pytest.mark.parametrize("fused", [False, True])
-@pytest.mark.parametrize("calc_compensations", [False, True])
-@pytest.mark.parametrize("ortho", [True, False])
-def test_projection(test_data, fused: bool, calc_compensations: bool, ortho: bool):
+@pytest.mark.parametrize("fused", [True])
+@pytest.mark.parametrize("calc_compensations", [False])
+@pytest.mark.parametrize("ortho", [False])
+@pytest.mark.parametrize("fisheye", [True, False])
+def test_projection(
+    test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool
+):
     from gsplat.cuda._torch_impl import _fully_fused_projection
     from gsplat.cuda._wrapper import fully_fused_projection, quat_scale_to_covar_preci
 
@@ -200,6 +203,7 @@ def test_projection(test_data, fused: bool, calc_compensations: bool, ortho: boo
             height,
             calc_compensations=calc_compensations,
             ortho=ortho,
+            fisheye=fisheye,
         )
     else:
         covars, _ = quat_scale_to_covar_preci(quats, scales, triu=True)  # [N, 6]
@@ -214,17 +218,21 @@ def test_projection(test_data, fused: bool, calc_compensations: bool, ortho: boo
             height,
             calc_compensations=calc_compensations,
             ortho=ortho,
+            fisheye=fisheye,
         )
     _covars, _ = quat_scale_to_covar_preci(quats, scales, triu=False)  # [N, 3, 3]
     _radii, _means2d, _depths, _conics, _compensations = _fully_fused_projection(
         means,
         _covars,
+        None,
+        None,
         viewmats,
         Ks,
         width,
         height,
         calc_compensations=calc_compensations,
         ortho=ortho,
+        fisheye=fisheye,
     )
 
     # radii is integer so we allow for 1 unit difference

From 712ae95d47ad197c7e506e40e133a7b66bc8c558 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 20:19:10 -0700
Subject: [PATCH 05/38] close 0.3%

---
 gsplat/cuda/_torch_impl.py |   1 -
 gsplat/cuda/csrc/utils.cuh | 115 +++++++++++++++++++++++++++++++++++++
 tests/test_basic.py        |  10 ++--
 3 files changed, 120 insertions(+), 6 deletions(-)

diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py
index 291272d08..c02535f7b 100644
--- a/gsplat/cuda/_torch_impl.py
+++ b/gsplat/cuda/_torch_impl.py
@@ -248,7 +248,6 @@ def _fully_fused_projection(
     calc_compensations: bool = False,
     ortho: bool = False,
     fisheye: bool = False,
-    **kwargs,
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]:
     """PyTorch implementation of `gsplat.cuda._wrapper.fully_fused_projection()`
 
diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index 75672a2ee..2a854320c 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -437,7 +437,122 @@ inline __device__ void fisheye_proj_vjp(
     vec3<T> &v_mean3d,
     mat3<T> &v_cov3d
 ) {
+    T x = mean3d[0], y = mean3d[1], z = mean3d[2];
+
+    T tan_fovx = 0.5f * width / fx;
+    T tan_fovy = 0.5f * height / fy;
+    T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx;
+    T lim_x_neg = cx / fx + 0.3f * tan_fovx;
+    T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy;
+    T lim_y_neg = cy / fy + 0.3f * tan_fovy;
+
+    T rz = 1.f / z;
+    T rz2 = rz * rz;
+    T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz));
+    T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz));
+
+    const float eps = 0.0000001f;
+    float x2 = x * x + eps;
+    float y2 = y * y;
+    float xy = x * y;
+    float x2y2 = x2 + y2;
+    float len_xy = length(glm::vec2({x, y})) + eps;
+    const float x2y2z2 = x2y2 + z * z;
+    float x2y2z2_inv = 1.f / x2y2z2;
+    float b = glm::atan(len_xy, z) / len_xy / x2y2;
+    float a = z * x2y2z2_inv / (x2y2);
+    // float fx = 2.0 * focal_x / W;
+    // float fy = 2.0 * focal_y / H;
+    v_mean3d += vec3<T>(
+        fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1],
+        fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1],
+        - fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1]
+    );
+
+    const float theta = glm::atan(len_xy, z);
+    const float J_b = theta / len_xy / x2y2;
+	const float J_a = z * x2y2z2_inv / (x2y2);
+    // mat3x2 is 3 columns x 2 rows.
+    mat3x2<T> J = mat3x2<T>(
+        fx * (x2 * J_a + y2 * J_b),
+        fy * xy  * (J_a - J_b), // 1st column
+        fx * xy * (J_a - J_b),
+        fy * (y2 * J_a + x2 * J_b), // 2nd column
+        - fx * x * x2y2z2_inv,
+        - fy * y * x2y2z2_inv // 3rd column
+    );
+    v_cov3d += glm::transpose(J) * v_cov2d * J;
+
+    // df/dx = -fx * rz2 * df/dJ_02
+    // df/dy = -fy * rz2 * df/dJ_12
+    // df/dz = -fx * rz2 * df/dJ_00 - fy * rz2 * df/dJ_11
+    //         + 2 * fx * tx * rz3 * df/dJ_02 + 2 * fy * ty * rz3
+    mat3x2<T> v_J = v_cov2d * J * glm::transpose(cov3d) +
+                    glm::transpose(v_cov2d) * J * cov3d;
 
+	float l4 = x2y2z2 * x2y2z2;
+
+	float E = - l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z;
+	float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z;
+
+	float A = x * (3 * E + x2 * F);
+	float B = y * (E + x2 * F);
+	float C = x * (E + y2 * F);
+	float D = y * (3 * E + y2 * F);
+	
+	float S1 = x2 - y2 - z * z;
+	float S2 = y2 - x2 - z * z;
+	float inv1 = x2y2z2_inv * x2y2z2_inv;
+	float inv2 = inv1 / (x2y2 * x2y2 * len_xy);
+
+	float dJ_dx00 = fx * A * inv2;
+	float dJ_dx01 = fx * B * inv2;
+	float dJ_dx02 = fx * S1 * inv1;
+	float dJ_dx10 = fy * B * inv2;
+	float dJ_dx11 = fy * C * inv2;
+	float dJ_dx12 = 2.f * fy * xy * inv1;
+
+	float dJ_dy00 = dJ_dx01;
+	float dJ_dy01 = fx * C * inv2;
+	float dJ_dy02 = 2.f * fx * xy * inv1;
+	float dJ_dy10 = dJ_dx11;
+	float dJ_dy11 = fy * D * inv2;
+	float dJ_dy12 = fy * S2 * inv1;
+
+	float dJ_dz00 = dJ_dx02;
+	float dJ_dz01 = dJ_dy02;
+	float dJ_dz02 = 2.f * fx * x * z * inv1;
+	float dJ_dz10 = dJ_dx12;
+	float dJ_dz11 = dJ_dy12;
+	float dJ_dz12 = 2.f * fy * y * z * inv1;
+
+    float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[0][1] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
+    float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[0][1] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
+    float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[0][1] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
+    const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1;
+    const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1;
+    v_mean3d.x += x_grad_mul * dL_dtx_raw;
+    v_mean3d.y += y_grad_mul * dL_dty_raw;
+    v_mean3d.z += dL_dtz_raw;
+
+    // // fov clipping
+    // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) {
+    //     // v_mean3d.x += -fx * rz2 * v_J[2][0];
+    //     v_mean3d.x += dL_dtx_raw;
+    // } else {
+    //     // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx;
+    //     v_mean3d.z += dL_dtx_raw * rz * tx;
+    // }
+    // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) {
+    //     // v_mean3d.y += -fy * rz2 * v_J[2][1];
+    //     v_mean3d.y += dL_dty_raw;
+    // } else {
+    //     v_mean3d.z += dL_dty_raw * rz * ty;
+    // }
+    // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] +
+    // //               2.f * fx * tx * rz2 * rz * v_J[2][0] +
+    // //               2.f * fy * ty * rz2 * rz * v_J[2][1];
+    // v_mean3d.z += dL_dtz_raw;
 }
 
 template <typename T>
diff --git a/tests/test_basic.py b/tests/test_basic.py
index da21276fd..f86e273ce 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -168,7 +168,7 @@ def test_proj(test_data, ortho: bool):
 @pytest.mark.parametrize("fused", [True])
 @pytest.mark.parametrize("calc_compensations", [False])
 @pytest.mark.parametrize("ortho", [False])
-@pytest.mark.parametrize("fisheye", [True, False])
+@pytest.mark.parametrize("fisheye", [True])
 def test_projection(
     test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool
 ):
@@ -177,8 +177,8 @@ def test_projection(
 
     torch.manual_seed(42)
 
-    Ks = test_data["Ks"]
-    viewmats = test_data["viewmats"]
+    Ks = test_data["Ks"][:1, ...]
+    viewmats = test_data["viewmats"][:1, ...]
     height = test_data["height"]
     width = test_data["width"]
     quats = test_data["quats"]
@@ -237,7 +237,7 @@ def test_projection(
 
     # radii is integer so we allow for 1 unit difference
     valid = (radii > 0) & (_radii > 0)
-    torch.testing.assert_close(radii, _radii, rtol=0, atol=1)
+    # torch.testing.assert_close(radii, _radii, rtol=0, atol=1)
     torch.testing.assert_close(means2d[valid], _means2d[valid], rtol=1e-4, atol=1e-4)
     torch.testing.assert_close(depths[valid], _depths[valid], rtol=1e-4, atol=1e-4)
     torch.testing.assert_close(conics[valid], _conics[valid], rtol=1e-4, atol=1e-4)
@@ -267,7 +267,7 @@ def test_projection(
         (viewmats, quats, scales, means),
     )
 
-    torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3)
+    # torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3)
     torch.testing.assert_close(v_quats, _v_quats, rtol=2e-1, atol=1e-2)
     torch.testing.assert_close(v_scales, _v_scales, rtol=1e-1, atol=2e-1)
     torch.testing.assert_close(v_means, _v_means, rtol=1e-2, atol=6e-2)

From 01daadb3add38a6b1156508c05877b551ada4925 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 20:49:34 -0700
Subject: [PATCH 06/38] 19 mismatched

---
 gsplat/cuda/csrc/utils.cuh | 52 +++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index 2a854320c..7e0a95c74 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -526,33 +526,33 @@ inline __device__ void fisheye_proj_vjp(
 	float dJ_dz11 = dJ_dy12;
 	float dJ_dz12 = 2.f * fy * y * z * inv1;
 
-    float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[0][1] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
-    float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[0][1] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
-    float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[0][1] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
-    const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1;
-    const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1;
-    v_mean3d.x += x_grad_mul * dL_dtx_raw;
-    v_mean3d.y += y_grad_mul * dL_dty_raw;
-    v_mean3d.z += dL_dtz_raw;
-
-    // // fov clipping
-    // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) {
-    //     // v_mean3d.x += -fx * rz2 * v_J[2][0];
-    //     v_mean3d.x += dL_dtx_raw;
-    // } else {
-    //     // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx;
-    //     v_mean3d.z += dL_dtx_raw * rz * tx;
-    // }
-    // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) {
-    //     // v_mean3d.y += -fy * rz2 * v_J[2][1];
-    //     v_mean3d.y += dL_dty_raw;
-    // } else {
-    //     v_mean3d.z += dL_dty_raw * rz * ty;
-    // }
-    // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] +
-    // //               2.f * fx * tx * rz2 * rz * v_J[2][0] +
-    // //               2.f * fy * ty * rz2 * rz * v_J[2][1];
+    float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
+    float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
+    float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
+    // const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1;
+    // const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1;
+    // v_mean3d.x += x_grad_mul * dL_dtx_raw;
+    // v_mean3d.y += y_grad_mul * dL_dty_raw;
     // v_mean3d.z += dL_dtz_raw;
+
+    // fov clipping
+    if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) {
+        // v_mean3d.x += -fx * rz2 * v_J[2][0];
+        v_mean3d.x += dL_dtx_raw;
+    } else {
+        // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx;
+        // v_mean3d.z += dL_dtx_raw * rz * tx;
+    }
+    if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) {
+        // v_mean3d.y += -fy * rz2 * v_J[2][1];
+        v_mean3d.y += dL_dty_raw;
+    } else {
+        // v_mean3d.z += dL_dty_raw * rz * ty;
+    }
+    // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] +
+    //               2.f * fx * tx * rz2 * rz * v_J[2][0] +
+    //               2.f * fy * ty * rz2 * rz * v_J[2][1];
+    v_mean3d.z += dL_dtz_raw;
 }
 
 template <typename T>

From 2de03b92873125adb203b36352ffb4d01c405a5b Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 21:07:56 -0700
Subject: [PATCH 07/38] pass tests

---
 gsplat/cuda/csrc/utils.cuh | 43 ++++++++++++++++++++------------------
 tests/test_basic.py        |  4 ++--
 2 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index 7e0a95c74..1e7906ded 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -393,8 +393,8 @@ inline __device__ void fisheye_proj(
     float eps = 0.0000001f;
     float xy_len = glm::length(glm::vec2({x, y})) + eps;
     float theta = glm::atan(xy_len, z + eps);
-    if (abs(theta) > 3.14 * 0.403)
-        return;
+    // if (abs(theta) > 3.14 * 0.403)
+    //     return;
     mean2d = vec2<T>({
         x * fx * theta / xy_len + cx, 
         y * fy * theta / xy_len + cy
@@ -534,25 +534,28 @@ inline __device__ void fisheye_proj_vjp(
     // v_mean3d.x += x_grad_mul * dL_dtx_raw;
     // v_mean3d.y += y_grad_mul * dL_dty_raw;
     // v_mean3d.z += dL_dtz_raw;
-
-    // fov clipping
-    if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) {
-        // v_mean3d.x += -fx * rz2 * v_J[2][0];
-        v_mean3d.x += dL_dtx_raw;
-    } else {
-        // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx;
-        // v_mean3d.z += dL_dtx_raw * rz * tx;
-    }
-    if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) {
-        // v_mean3d.y += -fy * rz2 * v_J[2][1];
-        v_mean3d.y += dL_dty_raw;
-    } else {
-        // v_mean3d.z += dL_dty_raw * rz * ty;
-    }
-    // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] +
-    //               2.f * fx * tx * rz2 * rz * v_J[2][0] +
-    //               2.f * fy * ty * rz2 * rz * v_J[2][1];
+    v_mean3d.x += dL_dtx_raw;
+    v_mean3d.y += dL_dty_raw;
     v_mean3d.z += dL_dtz_raw;
+
+    // // fov clipping
+    // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) {
+    //     // v_mean3d.x += -fx * rz2 * v_J[2][0];
+    //     v_mean3d.x += dL_dtx_raw;
+    // } else {
+    //     // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx;
+    //     v_mean3d.z += dL_dtx_raw * rz * tx;
+    // }
+    // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) {
+    //     // v_mean3d.y += -fy * rz2 * v_J[2][1];
+    //     v_mean3d.y += dL_dty_raw;
+    // } else {
+    //     v_mean3d.z += dL_dty_raw * rz * ty;
+    // }
+    // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] +
+    // //               2.f * fx * tx * rz2 * rz * v_J[2][0] +
+    // //               2.f * fy * ty * rz2 * rz * v_J[2][1];
+    // v_mean3d.z += dL_dtz_raw;
 }
 
 template <typename T>
diff --git a/tests/test_basic.py b/tests/test_basic.py
index f86e273ce..79e8c0c16 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -237,7 +237,7 @@ def test_projection(
 
     # radii is integer so we allow for 1 unit difference
     valid = (radii > 0) & (_radii > 0)
-    # torch.testing.assert_close(radii, _radii, rtol=0, atol=1)
+    torch.testing.assert_close(radii, _radii, rtol=0, atol=1)
     torch.testing.assert_close(means2d[valid], _means2d[valid], rtol=1e-4, atol=1e-4)
     torch.testing.assert_close(depths[valid], _depths[valid], rtol=1e-4, atol=1e-4)
     torch.testing.assert_close(conics[valid], _conics[valid], rtol=1e-4, atol=1e-4)
@@ -267,7 +267,7 @@ def test_projection(
         (viewmats, quats, scales, means),
     )
 
-    # torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3)
+    torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3)
     torch.testing.assert_close(v_quats, _v_quats, rtol=2e-1, atol=1e-2)
     torch.testing.assert_close(v_scales, _v_scales, rtol=1e-1, atol=2e-1)
     torch.testing.assert_close(v_means, _v_means, rtol=1e-2, atol=6e-2)

From 9c61743ce91d67df4b969b16181d12f082cb20b1 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 21:12:40 -0700
Subject: [PATCH 08/38] comment out

---
 gsplat/cuda/csrc/utils.cuh | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index 1e7906ded..9aa9dfb7b 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -439,17 +439,17 @@ inline __device__ void fisheye_proj_vjp(
 ) {
     T x = mean3d[0], y = mean3d[1], z = mean3d[2];
 
-    T tan_fovx = 0.5f * width / fx;
-    T tan_fovy = 0.5f * height / fy;
-    T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx;
-    T lim_x_neg = cx / fx + 0.3f * tan_fovx;
-    T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy;
-    T lim_y_neg = cy / fy + 0.3f * tan_fovy;
-
-    T rz = 1.f / z;
-    T rz2 = rz * rz;
-    T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz));
-    T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz));
+    // T tan_fovx = 0.5f * width / fx;
+    // T tan_fovy = 0.5f * height / fy;
+    // T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx;
+    // T lim_x_neg = cx / fx + 0.3f * tan_fovx;
+    // T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy;
+    // T lim_y_neg = cy / fy + 0.3f * tan_fovy;
+
+    // T rz = 1.f / z;
+    // T rz2 = rz * rz;
+    // T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz));
+    // T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz));
 
     const float eps = 0.0000001f;
     float x2 = x * x + eps;

From 6347c494d7bbc3595c016608a54a2c98a6349fac Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 21:51:42 -0700
Subject: [PATCH 09/38] crashing

---
 gsplat/rendering.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gsplat/rendering.py b/gsplat/rendering.py
index 353af7f0c..9cfd8318f 100644
--- a/gsplat/rendering.py
+++ b/gsplat/rendering.py
@@ -6,7 +6,6 @@
 from torch import Tensor
 from typing_extensions import Literal
 
-from .cuda._torch_impl import _fully_fused_projection
 from .cuda._wrapper import (
     fully_fused_projection,
     isect_offset_encode,
@@ -292,7 +291,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso
         C = len(viewmats)
 
     # Project Gaussians to 2D. Directly pass in {quats, scales} is faster than precomputing covars.
-    proj_results = _fully_fused_projection(
+    proj_results = fully_fused_projection(
         means,
         covars,
         quats,

From 6e56adff853fa6e6b900c350ee6423e306ea8b94 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 21:53:08 -0700
Subject: [PATCH 10/38] remove dead code

---
 gsplat/cuda/csrc/utils.cuh | 42 ++------------------------------------
 1 file changed, 2 insertions(+), 40 deletions(-)

diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index 9aa9dfb7b..85877c088 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -393,8 +393,8 @@ inline __device__ void fisheye_proj(
     float eps = 0.0000001f;
     float xy_len = glm::length(glm::vec2({x, y})) + eps;
     float theta = glm::atan(xy_len, z + eps);
-    // if (abs(theta) > 3.14 * 0.403)
-    //     return;
+    if (abs(theta) > 3.14 * 0.403)
+        return;
     mean2d = vec2<T>({
         x * fx * theta / xy_len + cx, 
         y * fy * theta / xy_len + cy
@@ -439,18 +439,6 @@ inline __device__ void fisheye_proj_vjp(
 ) {
     T x = mean3d[0], y = mean3d[1], z = mean3d[2];
 
-    // T tan_fovx = 0.5f * width / fx;
-    // T tan_fovy = 0.5f * height / fy;
-    // T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx;
-    // T lim_x_neg = cx / fx + 0.3f * tan_fovx;
-    // T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy;
-    // T lim_y_neg = cy / fy + 0.3f * tan_fovy;
-
-    // T rz = 1.f / z;
-    // T rz2 = rz * rz;
-    // T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz));
-    // T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz));
-
     const float eps = 0.0000001f;
     float x2 = x * x + eps;
     float y2 = y * y;
@@ -461,8 +449,6 @@ inline __device__ void fisheye_proj_vjp(
     float x2y2z2_inv = 1.f / x2y2z2;
     float b = glm::atan(len_xy, z) / len_xy / x2y2;
     float a = z * x2y2z2_inv / (x2y2);
-    // float fx = 2.0 * focal_x / W;
-    // float fy = 2.0 * focal_y / H;
     v_mean3d += vec3<T>(
         fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1],
         fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1],
@@ -529,33 +515,9 @@ inline __device__ void fisheye_proj_vjp(
     float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
     float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
     float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
-    // const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1;
-    // const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1;
-    // v_mean3d.x += x_grad_mul * dL_dtx_raw;
-    // v_mean3d.y += y_grad_mul * dL_dty_raw;
-    // v_mean3d.z += dL_dtz_raw;
     v_mean3d.x += dL_dtx_raw;
     v_mean3d.y += dL_dty_raw;
     v_mean3d.z += dL_dtz_raw;
-
-    // // fov clipping
-    // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) {
-    //     // v_mean3d.x += -fx * rz2 * v_J[2][0];
-    //     v_mean3d.x += dL_dtx_raw;
-    // } else {
-    //     // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx;
-    //     v_mean3d.z += dL_dtx_raw * rz * tx;
-    // }
-    // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) {
-    //     // v_mean3d.y += -fy * rz2 * v_J[2][1];
-    //     v_mean3d.y += dL_dty_raw;
-    // } else {
-    //     v_mean3d.z += dL_dty_raw * rz * ty;
-    // }
-    // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] +
-    // //               2.f * fx * tx * rz2 * rz * v_J[2][0] +
-    // //               2.f * fy * ty * rz2 * rz * v_J[2][1];
-    // v_mean3d.z += dL_dtz_raw;
 }
 
 template <typename T>

From cb084270a1423975dd887b8ec7c15b9707ce35ab Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 21:56:39 -0700
Subject: [PATCH 11/38] reduce diff

---
 examples/benchmarks/mcmc.sh | 42 +++++++++++++++----------------------
 examples/datasets/colmap.py |  6 +++---
 examples/simple_trainer.py  |  6 +++---
 3 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/examples/benchmarks/mcmc.sh b/examples/benchmarks/mcmc.sh
index 8913dc406..23e40838d 100644
--- a/examples/benchmarks/mcmc.sh
+++ b/examples/benchmarks/mcmc.sh
@@ -1,45 +1,37 @@
-# SCENE_DIR="data/360_v2"
-# RESULT_DIR="results/benchmark_mcmc_1M"
-# SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers
-# RENDER_TRAJ_PATH="ellipse"
+SCENE_DIR="data/360_v2"
+RESULT_DIR="results/benchmark_mcmc_1M"
+SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers
+RENDER_TRAJ_PATH="ellipse"
 
-SCENE_DIR="data/zipnerf"
-RESULT_DIR="results/benchmark_alameda"
-SCENE_LIST="alameda_undistort"
-CAMERA_MODEL="pinhole"
-RENDER_TRAJ_PATH="interp"
-
-CAP_MAX=2000000
+CAP_MAX=1000000
 
 for SCENE in $SCENE_LIST;
 do
     if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
         DATA_FACTOR=2
     else
-        DATA_FACTOR=2
+        DATA_FACTOR=4
     fi
 
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
-        --camera_model $CAMERA_MODEL \
         --render_traj_path $RENDER_TRAJ_PATH \
-        --data_dir $SCENE_DIR/$SCENE/ \
+        --data_dir data/360_v2/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
 
     # run eval and render
-    # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
-    # do
-    #     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
-    #         --strategy.cap-max $CAP_MAX \
-    #         --camera_model $CAMERA_MODEL \
-    #         --render_traj_path $RENDER_TRAJ_PATH \
-    #         --data_dir $SCENE_DIR/$SCENE/ \
-    #         --result_dir $RESULT_DIR/$SCENE/ \
-    #         --ckpt $CKPT
-    # done
+    for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
+    do
+        CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+            --strategy.cap-max $CAP_MAX \
+            --render_traj_path $RENDER_TRAJ_PATH \
+            --data_dir $SCENE_DIR/$SCENE/ \
+            --result_dir $RESULT_DIR/$SCENE/ \
+            --ckpt $CKPT
+    done
 done
 
 
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index a9d5c7cc5..9510492b7 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -97,9 +97,9 @@ def __init__(
             elif type_ == 5 or type_ == "OPENCV_FISHEYE":
                 params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32)
                 camtype = "fisheye"
-            # assert (
-            #     camtype == "perspective"
-            # ), f"Only support perspective camera model, got {type_}"
+            assert (
+                camtype == "perspective"
+            ), f"Only support perspective camera model, got {type_}"
 
             params_dict[camera_id] = params
 
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index ef54593dc..d0e12ba38 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -930,9 +930,9 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config):
             runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts])
         step = ckpts[0]["step"]
         runner.eval(step=step)
-        # runner.render_traj(step=step)
-        # if cfg.compression is not None:
-        #     runner.run_compression(step=step)
+        runner.render_traj(step=step)
+        if cfg.compression is not None:
+            runner.run_compression(step=step)
     else:
         runner.train()
 

From 22060d5902424c51e97b7727854e5c1f9c30d718 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 9 Sep 2024 22:38:49 -0700
Subject: [PATCH 12/38] video

---
 examples/benchmarks/mcmc_zipnerf.sh |  4 ++--
 examples/datasets/colmap.py         |  6 +++---
 examples/simple_trainer.py          | 14 +++++---------
 gsplat/cuda/csrc/utils.cuh          |  2 --
 4 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh
index 1d466887f..2d75d8661 100644
--- a/examples/benchmarks/mcmc_zipnerf.sh
+++ b/examples/benchmarks/mcmc_zipnerf.sh
@@ -1,10 +1,11 @@
 # SCENE_DIR="data/zipnerf/undistort"
 # RESULT_DIR="results/benchmark_zipnerf/undistort"
 # CAMERA_MODEL="pinhole"
+
 SCENE_DIR="data/zipnerf/fisheye"
 RESULT_DIR="results/benchmark_zipnerf/fisheye"
 CAMERA_MODEL="fisheye"
-SCENE_LIST="berlin" # alameda
+SCENE_LIST="berlin alameda london nyc"
 RENDER_TRAJ_PATH="interp"
 
 CAP_MAX=2000000
@@ -22,7 +23,6 @@ do
         --render_traj_path $RENDER_TRAJ_PATH \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
-        # --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt"
 
     # run eval and render
     # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 9510492b7..a9d5c7cc5 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -97,9 +97,9 @@ def __init__(
             elif type_ == 5 or type_ == "OPENCV_FISHEYE":
                 params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32)
                 camtype = "fisheye"
-            assert (
-                camtype == "perspective"
-            ), f"Only support perspective camera model, got {type_}"
+            # assert (
+            #     camtype == "perspective"
+            # ), f"Only support perspective camera model, got {type_}"
 
             params_dict[camera_id] = params
 
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index d0e12ba38..e1b5907ef 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -69,7 +69,7 @@ class Config:
     # Number of training steps
     max_steps: int = 30_000
     # Steps to evaluate the model
-    eval_steps: List[int] = field(default_factory=lambda: [3_000, 7_000, 30_000])
+    eval_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
     # Steps to save the model
     save_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
 
@@ -839,7 +839,10 @@ def render_traj(self, step: int):
         K = torch.from_numpy(list(self.parser.Ks_dict.values())[0]).float().to(device)
         width, height = list(self.parser.imsize_dict.values())[0]
 
-        canvas_all = []
+        # save to video
+        video_dir = f"{cfg.result_dir}/videos"
+        os.makedirs(video_dir, exist_ok=True)
+        writer = imageio.get_writer(f"{video_dir}/traj_{step}.mp4", fps=30)
         for i in tqdm.trange(len(camtoworlds_all), desc="Rendering trajectory"):
             camtoworlds = camtoworlds_all[i : i + 1]
             Ks = K[None]
@@ -862,13 +865,6 @@ def render_traj(self, step: int):
             # write images
             canvas = torch.cat(canvas_list, dim=2).squeeze(0).cpu().numpy()
             canvas = (canvas * 255).astype(np.uint8)
-            canvas_all.append(canvas)
-
-        # save to video
-        video_dir = f"{cfg.result_dir}/videos"
-        os.makedirs(video_dir, exist_ok=True)
-        writer = imageio.get_writer(f"{video_dir}/traj_{step}.mp4", fps=30)
-        for canvas in canvas_all:
             writer.append_data(canvas)
         writer.close()
         print(f"Video saved to {video_dir}/traj_{step}.mp4")
diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index 85877c088..e10007cd8 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -393,8 +393,6 @@ inline __device__ void fisheye_proj(
     float eps = 0.0000001f;
     float xy_len = glm::length(glm::vec2({x, y})) + eps;
     float theta = glm::atan(xy_len, z + eps);
-    if (abs(theta) > 3.14 * 0.403)
-        return;
     mean2d = vec2<T>({
         x * fx * theta / xy_len + cx, 
         y * fy * theta / xy_len + cy

From 139f7fe21442b5495de0bc3c8e6f55f4c2366d38 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 10 Sep 2024 00:10:50 -0700
Subject: [PATCH 13/38] distortion not handled correctly

---
 examples/datasets/colmap.py | 88 +++++++++++++++++++++++--------------
 examples/simple_trainer.py  |  2 +-
 2 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index a9d5c7cc5..5f1071c1f 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -206,29 +206,52 @@ def __init__(
             self.imsize_dict[camera_id] = (int(width * s_width), int(height * s_height))
 
         # undistortion
-        # self.mapx_dict = dict()
-        # self.mapy_dict = dict()
-        # self.roi_undist_dict = dict()
-        # for camera_id in self.params_dict.keys():
-        #     params = self.params_dict[camera_id]
-        #     if len(params) == 0:
-        #         continue  # no distortion
-        #     assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}"
-        #     assert (
-        #         camera_id in self.params_dict
-        #     ), f"Missing params for camera {camera_id}"
-        #     K = self.Ks_dict[camera_id]
-        #     width, height = self.imsize_dict[camera_id]
-        #     K_undist, roi_undist = cv2.getOptimalNewCameraMatrix(
-        #         K, params, (width, height), 0
-        #     )
-        #     mapx, mapy = cv2.initUndistortRectifyMap(
-        #         K, params, None, K_undist, (width, height), cv2.CV_32FC1
-        #     )
-        #     self.Ks_dict[camera_id] = K_undist
-        #     self.mapx_dict[camera_id] = mapx
-        #     self.mapy_dict[camera_id] = mapy
-        #     self.roi_undist_dict[camera_id] = roi_undist
+        self.mapx_dict = dict()
+        self.mapy_dict = dict()
+        self.roi_undist_dict = dict()
+        for camera_id in self.params_dict.keys():
+            params = self.params_dict[camera_id]
+            if len(params) == 0:
+                continue  # no distortion
+            assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}"
+            assert (
+                camera_id in self.params_dict
+            ), f"Missing params for camera {camera_id}"
+            K = self.Ks_dict[camera_id]
+            width, height = self.imsize_dict[camera_id]
+
+            if camtype == "perspective":
+                K_undist, roi_undist = cv2.getOptimalNewCameraMatrix(
+                    K, params, (width, height), 0
+                )
+                mapx, mapy = cv2.initUndistortRectifyMap(
+                    K, params, None, K_undist, (width, height), cv2.CV_32FC1
+                )
+            elif camtype == "fisheye":
+                K_undist = K.copy()
+                roi_undist = np.array([0, 0, width, height])
+                mapx = None
+                mapy = None
+                # print(K, params)
+                # print(width, height)
+                # mapx = np.zeros((width, height), dtype=np.float32)
+                # mapy = np.zeros((width, height), dtype=np.float32)
+                # for i in range(0, width):
+                #     for j in range(0, height):
+                #         x = float(i)
+                #         y = float(j)
+                #         x1 = (x - cx) / fx
+                #         y1 = (y - cy) / fy
+                #         theta = np.sqrt(x1**2 + y1**2)
+                #         r = (1.0 + params[0] * theta**2 + params[1] * theta**4 + params[2] * theta**6 + params[3] * theta**8)
+                #         x2 = fx * x1 * r + width // 2
+                #         y2 = fy * y1 * r + height // 2
+                #         mapx[i, j] = x2
+                #         mapy[i, j] = y2
+            self.Ks_dict[camera_id] = K_undist
+            self.mapx_dict[camera_id] = mapx
+            self.mapy_dict[camera_id] = mapy
+            self.roi_undist_dict[camera_id] = roi_undist
 
         # size of the scene measured by cameras
         camera_locations = camtoworlds[:, :3, 3]
@@ -268,15 +291,16 @@ def __getitem__(self, item: int) -> Dict[str, Any]:
         params = self.parser.params_dict[camera_id]
         camtoworlds = self.parser.camtoworlds[index]
 
-        # if len(params) > 0:
-        #     # Images are distorted. Undistort them.
-        #     mapx, mapy = (
-        #         self.parser.mapx_dict[camera_id],
-        #         self.parser.mapy_dict[camera_id],
-        #     )
-        #     image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
-        #     x, y, w, h = self.parser.roi_undist_dict[camera_id]
-        #     image = image[y : y + h, x : x + w]
+        if len(params) > 0:
+            # Images are distorted. Undistort them.
+            mapx, mapy = (
+                self.parser.mapx_dict[camera_id],
+                self.parser.mapy_dict[camera_id],
+            )
+            if mapx is not None and mapy is not None:
+                image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
+            x, y, w, h = self.parser.roi_undist_dict[camera_id]
+            image = image[y : y + h, x : x + w]
 
         if self.patch_size is not None:
             # Random crop.
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index e1b5907ef..9205144c2 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -926,7 +926,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config):
             runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts])
         step = ckpts[0]["step"]
         runner.eval(step=step)
-        runner.render_traj(step=step)
+        # runner.render_traj(step=step)
         if cfg.compression is not None:
             runner.run_compression(step=step)
     else:

From fde16f8a2a6043027ac113ccfb1582214e2ac0e9 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 10 Sep 2024 11:26:44 -0700
Subject: [PATCH 14/38] test remap

---
 examples/datasets/colmap.py | 53 ++++++++++++++++++-------------
 examples/test_remap.py      | 63 +++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 21 deletions(-)
 create mode 100644 examples/test_remap.py

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 5f1071c1f..96ed09fd5 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -228,26 +228,38 @@ def __init__(
                     K, params, None, K_undist, (width, height), cv2.CV_32FC1
                 )
             elif camtype == "fisheye":
+                fx = K[0, 0]
+                fy = K[1, 1]
+                cx = K[0, 2]
+                cy = K[1, 2]
+                mapx = np.zeros((height, width), dtype=np.float32)
+                mapy = np.zeros((height, width), dtype=np.float32)
+                for i in range(0, width):
+                    for j in range(0, height):
+                        x = float(i)
+                        y = float(j)
+                        x1 = (x - cx) / fx
+                        y1 = (y - cy) / fy
+                        theta = np.sqrt(x1**2 + y1**2)
+                        r = (
+                            1.0
+                            + params[0] * theta**2
+                            + params[1] * theta**4
+                            + params[2] * theta**6
+                            + params[3] * theta**8
+                        )
+                        x2 = fx * x1 * r + width // 2
+                        y2 = fy * y1 * r + height // 2
+                        mapx[j, i] = x2
+                        mapy[j, i] = y2
+
+                x_crop, y_crop = (100, 70)  # Hardcoded ROI crop
+                roi_undist = np.array(
+                    [x_crop, y_crop, int(width - 2 * x_crop), int(height - 2 * y_crop)]
+                )
                 K_undist = K.copy()
-                roi_undist = np.array([0, 0, width, height])
-                mapx = None
-                mapy = None
-                # print(K, params)
-                # print(width, height)
-                # mapx = np.zeros((width, height), dtype=np.float32)
-                # mapy = np.zeros((width, height), dtype=np.float32)
-                # for i in range(0, width):
-                #     for j in range(0, height):
-                #         x = float(i)
-                #         y = float(j)
-                #         x1 = (x - cx) / fx
-                #         y1 = (y - cy) / fy
-                #         theta = np.sqrt(x1**2 + y1**2)
-                #         r = (1.0 + params[0] * theta**2 + params[1] * theta**4 + params[2] * theta**6 + params[3] * theta**8)
-                #         x2 = fx * x1 * r + width // 2
-                #         y2 = fy * y1 * r + height // 2
-                #         mapx[i, j] = x2
-                #         mapy[i, j] = y2
+                K_undist[0, 2] -= x_crop
+                K_undist[1, 2] -= y_crop
             self.Ks_dict[camera_id] = K_undist
             self.mapx_dict[camera_id] = mapx
             self.mapy_dict[camera_id] = mapy
@@ -297,8 +309,7 @@ def __getitem__(self, item: int) -> Dict[str, Any]:
                 self.parser.mapx_dict[camera_id],
                 self.parser.mapy_dict[camera_id],
             )
-            if mapx is not None and mapy is not None:
-                image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
+            image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
             x, y, w, h = self.parser.roi_undist_dict[camera_id]
             image = image[y : y + h, x : x + w]
 
diff --git a/examples/test_remap.py b/examples/test_remap.py
new file mode 100644
index 000000000..13a80d9ab
--- /dev/null
+++ b/examples/test_remap.py
@@ -0,0 +1,63 @@
+import numpy as np
+import cv2
+import imageio
+
+
+def init_fisheye_remap(K, params, width, height):
+    fx = K[0, 0]
+    fy = K[1, 1]
+    cx = K[0, 2]
+    cy = K[1, 2]
+
+    mapx = np.zeros((height, width), dtype=np.float32)
+    mapy = np.zeros((height, width), dtype=np.float32)
+    for i in range(0, width):
+        for j in range(0, height):
+            x = float(i)
+            y = float(j)
+            x1 = (x - cx) / fx
+            y1 = (y - cy) / fy
+            theta = np.sqrt(x1**2 + y1**2)
+            r = (
+                1.0
+                + params[0] * theta**2
+                + params[1] * theta**4
+                + params[2] * theta**6
+                + params[3] * theta**8
+            )
+            x2 = fx * x1 * r + width // 2
+            y2 = fy * y1 * r + height // 2
+            mapx[j, i] = x2
+            mapy[j, i] = y2
+    return mapx, mapy
+
+
+def main():
+    K = np.array(
+        [[610.93592297, 0.0, 876.0], [0.0, 610.84071973, 584.0], [0.0, 0.0, 1.0]]
+    )
+    params = np.array([0.03699945, 0.00660936, 0.00116909, -0.00038226])
+    width, height = (1752, 1168)
+
+    mapx, mapy = init_fisheye_remap(K, params, width, height)
+
+    x_min = np.nonzero(mapx < 0)[1].max()
+    x_max = np.nonzero(mapx > width)[1].min()
+    y_min = np.nonzero(mapy < 0)[0].max()
+    y_max = np.nonzero(mapy > height)[0].min()
+    roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
+    K[0, 2] -= x_min
+    K[1, 2] -= y_min
+
+    image = imageio.imread("./data/zipnerf/fisheye/berlin/images_4/DSC00040.JPG")[
+        ..., :3
+    ]
+    image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
+    imageio.imwrite("./results/test_remap.png", image)
+    x, y, w, h = roi_undist
+    image = image[y : y + h, x : x + w]
+    imageio.imwrite("./results/test_remap_crop.png", image)
+
+
+if __name__ == "__main__":
+    main()

From 37f40a5fa5d3c391161755f70269608b43d1202a Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 10 Sep 2024 12:39:57 -0700
Subject: [PATCH 15/38] remove hardcoded roi

---
 examples/benchmarks/mcmc_zipnerf.sh | 20 +++++++++-----------
 examples/datasets/colmap.py         | 16 ++++++++++------
 examples/simple_trainer.py          |  4 ++--
 gsplat/rendering.py                 |  2 +-
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh
index 2d75d8661..0e1206161 100644
--- a/examples/benchmarks/mcmc_zipnerf.sh
+++ b/examples/benchmarks/mcmc_zipnerf.sh
@@ -5,7 +5,7 @@
 SCENE_DIR="data/zipnerf/fisheye"
 RESULT_DIR="results/benchmark_zipnerf/fisheye"
 CAMERA_MODEL="fisheye"
-SCENE_LIST="berlin alameda london nyc"
+SCENE_LIST="berlin london nyc alameda"
 RENDER_TRAJ_PATH="interp"
 
 CAP_MAX=2000000
@@ -25,16 +25,14 @@ do
         --result_dir $RESULT_DIR/$SCENE/
 
     # run eval and render
-    # for CKPT in $RESULT_DIR/$SCENE/ckpts/*;
-    # do
-    #     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
-    #         --strategy.cap-max $CAP_MAX \
-    #         --camera_model $CAMERA_MODEL \
-    #         --render_traj_path $RENDER_TRAJ_PATH \
-    #         --data_dir $SCENE_DIR/$SCENE/ \
-    #         --result_dir $RESULT_DIR/$SCENE/ \
-    #         --ckpt $CKPT
-    # done
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+        --strategy.cap-max $CAP_MAX \
+        --opacity_reg 0.001 \
+        --camera_model $CAMERA_MODEL \
+        --render_traj_path $RENDER_TRAJ_PATH \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/ \
+        --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt
 done
 
 
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 96ed09fd5..f06035f49 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -253,13 +253,17 @@ def __init__(
                         mapx[j, i] = x2
                         mapy[j, i] = y2
 
-                x_crop, y_crop = (100, 70)  # Hardcoded ROI crop
-                roi_undist = np.array(
-                    [x_crop, y_crop, int(width - 2 * x_crop), int(height - 2 * y_crop)]
-                )
+                # Compute ROI
+                x_min = np.nonzero(mapx < 0)[1].max()
+                x_max = np.nonzero(mapx > width)[1].min()
+                y_min = np.nonzero(mapy < 0)[0].max()
+                y_max = np.nonzero(mapy > height)[0].min()
+                roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
+                K[0, 2] -= x_min
+                K[1, 2] -= y_min
                 K_undist = K.copy()
-                K_undist[0, 2] -= x_crop
-                K_undist[1, 2] -= y_crop
+                K_undist[0, 2] -= x_min
+                K_undist[1, 2] -= y_min
             self.Ks_dict[camera_id] = K_undist
             self.mapx_dict[camera_id] = mapx
             self.mapy_dict[camera_id] = mapy
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index 9205144c2..3b27ae119 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -56,7 +56,7 @@ class Config:
     # Normalize the world space
     normalize_world_space: bool = True
     # Camera model
-    camera_model: str = "pinhole"
+    camera_model: Literal["pinhole", "fisheye"] = "pinhole"
 
     # Port for the viewer server
     port: int = 8080
@@ -926,7 +926,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config):
             runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts])
         step = ckpts[0]["step"]
         runner.eval(step=step)
-        # runner.render_traj(step=step)
+        runner.render_traj(step=step)
         if cfg.compression is not None:
             runner.run_compression(step=step)
     else:
diff --git a/gsplat/rendering.py b/gsplat/rendering.py
index 9cfd8318f..4e4b3dd70 100644
--- a/gsplat/rendering.py
+++ b/gsplat/rendering.py
@@ -47,7 +47,7 @@ def rasterization(
     distributed: bool = False,
     ortho: bool = False,
     covars: Optional[Tensor] = None,
-    camera_model: str = "pinhole",
+    camera_model: Literal["pinhole", "fisheye"] = "pinhole",
 ) -> Tuple[Tensor, Tensor, Dict]:
     """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C).
 

From 98a7819a4969876d83ed73cc423f70c2ece1a8f9 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 10 Sep 2024 12:48:30 -0700
Subject: [PATCH 16/38] cleanup tests

---
 examples/datasets/colmap.py | 12 +++++++-----
 tests/test_basic.py         | 10 +++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index f06035f49..61b143873 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, assert_never
 
 import cv2
 import imageio.v2 as imageio
@@ -97,9 +97,9 @@ def __init__(
             elif type_ == 5 or type_ == "OPENCV_FISHEYE":
                 params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32)
                 camtype = "fisheye"
-            # assert (
-            #     camtype == "perspective"
-            # ), f"Only support perspective camera model, got {type_}"
+            assert (
+                camtype == "perspective" or camtype == "fisheye"
+            ), f"Only perspective and fisheye cameras are supported, got {type_}"
 
             params_dict[camera_id] = params
 
@@ -252,7 +252,6 @@ def __init__(
                         y2 = fy * y1 * r + height // 2
                         mapx[j, i] = x2
                         mapy[j, i] = y2
-
                 # Compute ROI
                 x_min = np.nonzero(mapx < 0)[1].max()
                 x_max = np.nonzero(mapx > width)[1].min()
@@ -264,6 +263,9 @@ def __init__(
                 K_undist = K.copy()
                 K_undist[0, 2] -= x_min
                 K_undist[1, 2] -= y_min
+            else:
+                assert_never(camtype)
+
             self.Ks_dict[camera_id] = K_undist
             self.mapx_dict[camera_id] = mapx
             self.mapy_dict[camera_id] = mapy
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 79e8c0c16..c63ea44a4 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -165,10 +165,10 @@ def test_proj(test_data, ortho: bool):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device")
-@pytest.mark.parametrize("fused", [True])
-@pytest.mark.parametrize("calc_compensations", [False])
+@pytest.mark.parametrize("fused", [False, True])
+@pytest.mark.parametrize("calc_compensations", [False, True])
 @pytest.mark.parametrize("ortho", [False])
-@pytest.mark.parametrize("fisheye", [True])
+@pytest.mark.parametrize("fisheye", [True, False])
 def test_projection(
     test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool
 ):
@@ -177,8 +177,8 @@ def test_projection(
 
     torch.manual_seed(42)
 
-    Ks = test_data["Ks"][:1, ...]
-    viewmats = test_data["viewmats"][:1, ...]
+    Ks = test_data["Ks"]
+    viewmats = test_data["viewmats"]
     height = test_data["height"]
     width = test_data["width"]
     quats = test_data["quats"]

From 67baaae487997ccf343b0b406f768c6edb91af56 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 10 Sep 2024 12:53:16 -0700
Subject: [PATCH 17/38] fix bug

---
 examples/datasets/colmap.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 61b143873..58428af2a 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -1,5 +1,6 @@
 import os
-from typing import Any, Dict, List, Optional, assert_never
+from typing import Any, Dict, List, Optional
+from typing_extensions import assert_never
 
 import cv2
 import imageio.v2 as imageio

From e41679de675cae8035e2e77f01d5e3301be5f65f Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 10 Sep 2024 13:02:07 -0700
Subject: [PATCH 18/38] bug

---
 examples/datasets/colmap.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 58428af2a..6d54db7f8 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -259,8 +259,6 @@ def __init__(
                 y_min = np.nonzero(mapy < 0)[0].max()
                 y_max = np.nonzero(mapy > height)[0].min()
                 roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
-                K[0, 2] -= x_min
-                K[1, 2] -= y_min
                 K_undist = K.copy()
                 K_undist[0, 2] -= x_min
                 K_undist[1, 2] -= y_min

From 1ed34f0bff29eced214c4c3c4093a705d94e21d1 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 11:30:52 -0700
Subject: [PATCH 19/38] edit imsize_dict

---
 examples/benchmarks/mcmc_zipnerf.sh | 2 +-
 examples/datasets/colmap.py         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh
index 0e1206161..f17c09ee0 100644
--- a/examples/benchmarks/mcmc_zipnerf.sh
+++ b/examples/benchmarks/mcmc_zipnerf.sh
@@ -3,7 +3,7 @@
 # CAMERA_MODEL="pinhole"
 
 SCENE_DIR="data/zipnerf/fisheye"
-RESULT_DIR="results/benchmark_zipnerf/fisheye"
+RESULT_DIR="results/benchmark_zipnerf/fisheye_2m"
 CAMERA_MODEL="fisheye"
 SCENE_LIST="berlin london nyc alameda"
 RENDER_TRAJ_PATH="interp"
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 6d54db7f8..ca7630818 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -269,6 +269,7 @@ def __init__(
             self.mapx_dict[camera_id] = mapx
             self.mapy_dict[camera_id] = mapy
             self.roi_undist_dict[camera_id] = roi_undist
+            self.imsize_dict[camera_id] = (x_max - x_min, y_max - y_min)
 
         # size of the scene measured by cameras
         camera_locations = camtoworlds[:, :3, 3]

From 4434f0955ec1bb809a1fbb310ca1363f0a9b3851 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 11:39:07 -0700
Subject: [PATCH 20/38] format c++

---
 gsplat/cuda/csrc/utils.cuh | 118 ++++++++++++++++++-------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index e10007cd8..d6af483c0 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -244,11 +244,7 @@ inline __device__ void ortho_proj_vjp(
     // df/dx = fx * df/dpixx
     // df/dy = fy * df/dpixy
     // df/dz = 0
-    v_mean3d += vec3<T>(
-        fx * v_mean2d[0],
-        fy * v_mean2d[1],
-        0.f
-    );
+    v_mean3d += vec3<T>(fx * v_mean2d[0], fy * v_mean2d[1], 0.f);
 }
 
 template <typename T>
@@ -393,26 +389,24 @@ inline __device__ void fisheye_proj(
     float eps = 0.0000001f;
     float xy_len = glm::length(glm::vec2({x, y})) + eps;
     float theta = glm::atan(xy_len, z + eps);
-    mean2d = vec2<T>({
-        x * fx * theta / xy_len + cx, 
-        y * fy * theta / xy_len + cy
-    });
+    mean2d =
+        vec2<T>({x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy});
 
     float x2 = x * x + eps;
     float y2 = y * y;
     float xy = x * y;
-    float x2y2 = x2 + y2 ;
+    float x2y2 = x2 + y2;
     float x2y2z2_inv = 1.f / (x2y2 + z * z);
 
     float b = glm::atan(xy_len, z) / xy_len / x2y2;
     float a = z * x2y2z2_inv / (x2y2);
     mat3x2<T> J = mat3x2<T>(
         fx * (x2 * a + y2 * b),
-        fy * xy  * (a - b), 
-        fx * xy * (a - b), 
-        fy * (y2 * a + x2 * b),   
-        - fx * x * x2y2z2_inv,
-        - fy * y * x2y2z2_inv
+        fy * xy * (a - b),
+        fx * xy * (a - b),
+        fy * (y2 * a + x2 * b),
+        -fx * x * x2y2z2_inv,
+        -fy * y * x2y2z2_inv
     );
     cov2d = J * cov3d * glm::transpose(J);
 }
@@ -450,20 +444,20 @@ inline __device__ void fisheye_proj_vjp(
     v_mean3d += vec3<T>(
         fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1],
         fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1],
-        - fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1]
+        -fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1]
     );
 
     const float theta = glm::atan(len_xy, z);
     const float J_b = theta / len_xy / x2y2;
-	const float J_a = z * x2y2z2_inv / (x2y2);
+    const float J_a = z * x2y2z2_inv / (x2y2);
     // mat3x2 is 3 columns x 2 rows.
     mat3x2<T> J = mat3x2<T>(
         fx * (x2 * J_a + y2 * J_b),
-        fy * xy  * (J_a - J_b), // 1st column
+        fy * xy * (J_a - J_b), // 1st column
         fx * xy * (J_a - J_b),
         fy * (y2 * J_a + x2 * J_b), // 2nd column
-        - fx * x * x2y2z2_inv,
-        - fy * y * x2y2z2_inv // 3rd column
+        -fx * x * x2y2z2_inv,
+        -fy * y * x2y2z2_inv // 3rd column
     );
     v_cov3d += glm::transpose(J) * v_cov2d * J;
 
@@ -474,45 +468,51 @@ inline __device__ void fisheye_proj_vjp(
     mat3x2<T> v_J = v_cov2d * J * glm::transpose(cov3d) +
                     glm::transpose(v_cov2d) * J * cov3d;
 
-	float l4 = x2y2z2 * x2y2z2;
-
-	float E = - l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z;
-	float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z;
-
-	float A = x * (3 * E + x2 * F);
-	float B = y * (E + x2 * F);
-	float C = x * (E + y2 * F);
-	float D = y * (3 * E + y2 * F);
-	
-	float S1 = x2 - y2 - z * z;
-	float S2 = y2 - x2 - z * z;
-	float inv1 = x2y2z2_inv * x2y2z2_inv;
-	float inv2 = inv1 / (x2y2 * x2y2 * len_xy);
-
-	float dJ_dx00 = fx * A * inv2;
-	float dJ_dx01 = fx * B * inv2;
-	float dJ_dx02 = fx * S1 * inv1;
-	float dJ_dx10 = fy * B * inv2;
-	float dJ_dx11 = fy * C * inv2;
-	float dJ_dx12 = 2.f * fy * xy * inv1;
-
-	float dJ_dy00 = dJ_dx01;
-	float dJ_dy01 = fx * C * inv2;
-	float dJ_dy02 = 2.f * fx * xy * inv1;
-	float dJ_dy10 = dJ_dx11;
-	float dJ_dy11 = fy * D * inv2;
-	float dJ_dy12 = fy * S2 * inv1;
-
-	float dJ_dz00 = dJ_dx02;
-	float dJ_dz01 = dJ_dy02;
-	float dJ_dz02 = 2.f * fx * x * z * inv1;
-	float dJ_dz10 = dJ_dx12;
-	float dJ_dz11 = dJ_dy12;
-	float dJ_dz12 = 2.f * fy * y * z * inv1;
-
-    float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
-    float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
-    float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
+    float l4 = x2y2z2 * x2y2z2;
+
+    float E = -l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z;
+    float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z;
+
+    float A = x * (3 * E + x2 * F);
+    float B = y * (E + x2 * F);
+    float C = x * (E + y2 * F);
+    float D = y * (3 * E + y2 * F);
+
+    float S1 = x2 - y2 - z * z;
+    float S2 = y2 - x2 - z * z;
+    float inv1 = x2y2z2_inv * x2y2z2_inv;
+    float inv2 = inv1 / (x2y2 * x2y2 * len_xy);
+
+    float dJ_dx00 = fx * A * inv2;
+    float dJ_dx01 = fx * B * inv2;
+    float dJ_dx02 = fx * S1 * inv1;
+    float dJ_dx10 = fy * B * inv2;
+    float dJ_dx11 = fy * C * inv2;
+    float dJ_dx12 = 2.f * fy * xy * inv1;
+
+    float dJ_dy00 = dJ_dx01;
+    float dJ_dy01 = fx * C * inv2;
+    float dJ_dy02 = 2.f * fx * xy * inv1;
+    float dJ_dy10 = dJ_dx11;
+    float dJ_dy11 = fy * D * inv2;
+    float dJ_dy12 = fy * S2 * inv1;
+
+    float dJ_dz00 = dJ_dx02;
+    float dJ_dz01 = dJ_dy02;
+    float dJ_dz02 = 2.f * fx * x * z * inv1;
+    float dJ_dz10 = dJ_dx12;
+    float dJ_dz11 = dJ_dy12;
+    float dJ_dz12 = 2.f * fy * y * z * inv1;
+
+    float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] +
+                       dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] +
+                       dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
+    float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] +
+                       dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] +
+                       dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
+    float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] +
+                       dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] +
+                       dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
     v_mean3d.x += dL_dtx_raw;
     v_mean3d.y += dL_dty_raw;
     v_mean3d.z += dL_dtz_raw;

From 3948fc95314dcdf38ddacbc02fb75e2a0942d83d Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 11:53:57 -0700
Subject: [PATCH 21/38] T

---
 gsplat/cuda/csrc/utils.cuh | 141 ++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 73 deletions(-)

diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh
index d6af483c0..50c2fe5e9 100644
--- a/gsplat/cuda/csrc/utils.cuh
+++ b/gsplat/cuda/csrc/utils.cuh
@@ -386,20 +386,20 @@ inline __device__ void fisheye_proj(
 ) {
     T x = mean3d[0], y = mean3d[1], z = mean3d[2];
 
-    float eps = 0.0000001f;
-    float xy_len = glm::length(glm::vec2({x, y})) + eps;
-    float theta = glm::atan(xy_len, z + eps);
+    T eps = 0.0000001f;
+    T xy_len = glm::length(glm::vec2({x, y})) + eps;
+    T theta = glm::atan(xy_len, z + eps);
     mean2d =
         vec2<T>({x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy});
 
-    float x2 = x * x + eps;
-    float y2 = y * y;
-    float xy = x * y;
-    float x2y2 = x2 + y2;
-    float x2y2z2_inv = 1.f / (x2y2 + z * z);
+    T x2 = x * x + eps;
+    T y2 = y * y;
+    T xy = x * y;
+    T x2y2 = x2 + y2;
+    T x2y2z2_inv = 1.f / (x2y2 + z * z);
 
-    float b = glm::atan(xy_len, z) / xy_len / x2y2;
-    float a = z * x2y2z2_inv / (x2y2);
+    T b = glm::atan(xy_len, z) / xy_len / x2y2;
+    T a = z * x2y2z2_inv / (x2y2);
     mat3x2<T> J = mat3x2<T>(
         fx * (x2 * a + y2 * b),
         fy * xy * (a - b),
@@ -431,25 +431,25 @@ inline __device__ void fisheye_proj_vjp(
 ) {
     T x = mean3d[0], y = mean3d[1], z = mean3d[2];
 
-    const float eps = 0.0000001f;
-    float x2 = x * x + eps;
-    float y2 = y * y;
-    float xy = x * y;
-    float x2y2 = x2 + y2;
-    float len_xy = length(glm::vec2({x, y})) + eps;
-    const float x2y2z2 = x2y2 + z * z;
-    float x2y2z2_inv = 1.f / x2y2z2;
-    float b = glm::atan(len_xy, z) / len_xy / x2y2;
-    float a = z * x2y2z2_inv / (x2y2);
+    const T eps = 0.0000001f;
+    T x2 = x * x + eps;
+    T y2 = y * y;
+    T xy = x * y;
+    T x2y2 = x2 + y2;
+    T len_xy = length(glm::vec2({x, y})) + eps;
+    const T x2y2z2 = x2y2 + z * z;
+    T x2y2z2_inv = 1.f / x2y2z2;
+    T b = glm::atan(len_xy, z) / len_xy / x2y2;
+    T a = z * x2y2z2_inv / (x2y2);
     v_mean3d += vec3<T>(
         fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1],
         fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1],
         -fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1]
     );
 
-    const float theta = glm::atan(len_xy, z);
-    const float J_b = theta / len_xy / x2y2;
-    const float J_a = z * x2y2z2_inv / (x2y2);
+    const T theta = glm::atan(len_xy, z);
+    const T J_b = theta / len_xy / x2y2;
+    const T J_a = z * x2y2z2_inv / (x2y2);
     // mat3x2 is 3 columns x 2 rows.
     mat3x2<T> J = mat3x2<T>(
         fx * (x2 * J_a + y2 * J_b),
@@ -461,58 +461,53 @@ inline __device__ void fisheye_proj_vjp(
     );
     v_cov3d += glm::transpose(J) * v_cov2d * J;
 
-    // df/dx = -fx * rz2 * df/dJ_02
-    // df/dy = -fy * rz2 * df/dJ_12
-    // df/dz = -fx * rz2 * df/dJ_00 - fy * rz2 * df/dJ_11
-    //         + 2 * fx * tx * rz3 * df/dJ_02 + 2 * fy * ty * rz3
     mat3x2<T> v_J = v_cov2d * J * glm::transpose(cov3d) +
                     glm::transpose(v_cov2d) * J * cov3d;
-
-    float l4 = x2y2z2 * x2y2z2;
-
-    float E = -l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z;
-    float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z;
-
-    float A = x * (3 * E + x2 * F);
-    float B = y * (E + x2 * F);
-    float C = x * (E + y2 * F);
-    float D = y * (3 * E + y2 * F);
-
-    float S1 = x2 - y2 - z * z;
-    float S2 = y2 - x2 - z * z;
-    float inv1 = x2y2z2_inv * x2y2z2_inv;
-    float inv2 = inv1 / (x2y2 * x2y2 * len_xy);
-
-    float dJ_dx00 = fx * A * inv2;
-    float dJ_dx01 = fx * B * inv2;
-    float dJ_dx02 = fx * S1 * inv1;
-    float dJ_dx10 = fy * B * inv2;
-    float dJ_dx11 = fy * C * inv2;
-    float dJ_dx12 = 2.f * fy * xy * inv1;
-
-    float dJ_dy00 = dJ_dx01;
-    float dJ_dy01 = fx * C * inv2;
-    float dJ_dy02 = 2.f * fx * xy * inv1;
-    float dJ_dy10 = dJ_dx11;
-    float dJ_dy11 = fy * D * inv2;
-    float dJ_dy12 = fy * S2 * inv1;
-
-    float dJ_dz00 = dJ_dx02;
-    float dJ_dz01 = dJ_dy02;
-    float dJ_dz02 = 2.f * fx * x * z * inv1;
-    float dJ_dz10 = dJ_dx12;
-    float dJ_dz11 = dJ_dy12;
-    float dJ_dz12 = 2.f * fy * y * z * inv1;
-
-    float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] +
-                       dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] +
-                       dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
-    float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] +
-                       dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] +
-                       dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
-    float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] +
-                       dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] +
-                       dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
+    T l4 = x2y2z2 * x2y2z2;
+
+    T E = -l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z;
+    T F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z;
+
+    T A = x * (3 * E + x2 * F);
+    T B = y * (E + x2 * F);
+    T C = x * (E + y2 * F);
+    T D = y * (3 * E + y2 * F);
+
+    T S1 = x2 - y2 - z * z;
+    T S2 = y2 - x2 - z * z;
+    T inv1 = x2y2z2_inv * x2y2z2_inv;
+    T inv2 = inv1 / (x2y2 * x2y2 * len_xy);
+
+    T dJ_dx00 = fx * A * inv2;
+    T dJ_dx01 = fx * B * inv2;
+    T dJ_dx02 = fx * S1 * inv1;
+    T dJ_dx10 = fy * B * inv2;
+    T dJ_dx11 = fy * C * inv2;
+    T dJ_dx12 = 2.f * fy * xy * inv1;
+
+    T dJ_dy00 = dJ_dx01;
+    T dJ_dy01 = fx * C * inv2;
+    T dJ_dy02 = 2.f * fx * xy * inv1;
+    T dJ_dy10 = dJ_dx11;
+    T dJ_dy11 = fy * D * inv2;
+    T dJ_dy12 = fy * S2 * inv1;
+
+    T dJ_dz00 = dJ_dx02;
+    T dJ_dz01 = dJ_dy02;
+    T dJ_dz02 = 2.f * fx * x * z * inv1;
+    T dJ_dz10 = dJ_dx12;
+    T dJ_dz11 = dJ_dy12;
+    T dJ_dz12 = 2.f * fy * y * z * inv1;
+
+    T dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] +
+                   dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] +
+                   dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1];
+    T dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] +
+                   dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] +
+                   dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1];
+    T dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] +
+                   dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] +
+                   dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1];
     v_mean3d.x += dL_dtx_raw;
     v_mean3d.y += dL_dty_raw;
     v_mean3d.z += dL_dtz_raw;

From ce98242fe379f0238ff82c9b66595f5d41080fce Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 14:43:01 -0700
Subject: [PATCH 22/38] use mask

---
 examples/datasets/colmap.py | 33 +++++++++++++++++++++++----------
 examples/simple_trainer.py  |  9 +++++++++
 examples/test_remap.py      | 13 +++++++++++++
 3 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index ca7630818..74882d57d 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -59,6 +59,7 @@ def __init__(
         Ks_dict = dict()
         params_dict = dict()
         imsize_dict = dict()  # width, height
+        mask_dict = dict()
         bottom = np.array([0, 0, 0, 1]).reshape(1, 4)
         for k in imdata:
             im = imdata[k]
@@ -187,6 +188,7 @@ def __init__(
         self.Ks_dict = Ks_dict  # Dict of camera_id -> K
         self.params_dict = params_dict  # Dict of camera_id -> params
         self.imsize_dict = imsize_dict  # Dict of camera_id -> (width, height)
+        self.mask_dict = mask_dict  # Dict of camera_id -> mask
         self.points = points  # np.ndarray, (num_points, 3)
         self.points_err = points_err  # np.ndarray, (num_points,)
         self.points_rgb = points_rgb  # np.ndarray, (num_points, 3)
@@ -253,23 +255,32 @@ def __init__(
                         y2 = fy * y1 * r + height // 2
                         mapx[j, i] = x2
                         mapy[j, i] = y2
-                # Compute ROI
-                x_min = np.nonzero(mapx < 0)[1].max()
-                x_max = np.nonzero(mapx > width)[1].min()
-                y_min = np.nonzero(mapy < 0)[0].max()
-                y_max = np.nonzero(mapy > height)[0].min()
-                roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
+
                 K_undist = K.copy()
-                K_undist[0, 2] -= x_min
-                K_undist[1, 2] -= y_min
+                roi_undist = [0, 0, width, height]
+                mask = np.logical_and(
+                    np.logical_and(mapx > 0, mapy > 0),
+                    np.logical_and(mapx < width - 1, mapy < height - 1),
+                )
+
+                # # Compute ROI
+                # x_min = np.nonzero(mapx < 0)[1].max()
+                # x_max = np.nonzero(mapx > width)[1].min()
+                # y_min = np.nonzero(mapy < 0)[0].max()
+                # y_max = np.nonzero(mapy > height)[0].min()
+                # roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
+                # K_undist = K.copy()
+                # K_undist[0, 2] -= x_min
+                # K_undist[1, 2] -= y_min
             else:
                 assert_never(camtype)
 
-            self.Ks_dict[camera_id] = K_undist
             self.mapx_dict[camera_id] = mapx
             self.mapy_dict[camera_id] = mapy
+            self.Ks_dict[camera_id] = K_undist
             self.roi_undist_dict[camera_id] = roi_undist
-            self.imsize_dict[camera_id] = (x_max - x_min, y_max - y_min)
+            self.imsize_dict[camera_id] = (roi_undist[2], roi_undist[3])
+            self.mask_dict[camera_id] = mask
 
         # size of the scene measured by cameras
         camera_locations = camtoworlds[:, :3, 3]
@@ -308,6 +319,7 @@ def __getitem__(self, item: int) -> Dict[str, Any]:
         K = self.parser.Ks_dict[camera_id].copy()  # undistorted K
         params = self.parser.params_dict[camera_id]
         camtoworlds = self.parser.camtoworlds[index]
+        mask = self.parser.mask_dict[camera_id]
 
         if len(params) > 0:
             # Images are distorted. Undistort them.
@@ -333,6 +345,7 @@ def __getitem__(self, item: int) -> Dict[str, Any]:
             "camtoworld": torch.from_numpy(camtoworlds).float(),
             "image": torch.from_numpy(image).float(),
             "image_id": item,  # the index of the image in the dataset
+            "mask": torch.from_numpy(mask).bool(),
         }
 
         if self.load_depths:
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index 3b27ae119..c91116fcc 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -401,6 +401,7 @@ def rasterize_splats(
         Ks: Tensor,
         width: int,
         height: int,
+        masks: Optional[Tensor] = None,
         **kwargs,
     ) -> Tuple[Tensor, Tensor, Dict]:
         means = self.splats["means"]  # [N, 3]
@@ -446,6 +447,8 @@ def rasterize_splats(
             camera_model=self.cfg.camera_model,
             **kwargs,
         )
+        if masks is not None:
+            render_colors[~masks] = 0
         return render_colors, render_alphas, info
 
     def train(self):
@@ -509,6 +512,9 @@ def train(self):
                 pixels.shape[0] * pixels.shape[1] * pixels.shape[2]
             )
             image_ids = data["image_id"].to(device)
+            masks = (
+                data["mask"].to(device) if data["mask"] is not None else None
+            )  # [1, H, W]
             if cfg.depth_loss:
                 points = data["points"].to(device)  # [1, M, 2]
                 depths_gt = data["depths"].to(device)  # [1, M]
@@ -535,6 +541,7 @@ def train(self):
                 far_plane=cfg.far_plane,
                 image_ids=image_ids,
                 render_mode="RGB+ED" if cfg.depth_loss else "RGB",
+                masks=masks,
             )
             if renders.shape[-1] == 4:
                 colors, depths = renders[..., 0:3], renders[..., 3:4]
@@ -743,6 +750,7 @@ def eval(self, step: int, stage: str = "val"):
             camtoworlds = data["camtoworld"].to(device)
             Ks = data["K"].to(device)
             pixels = data["image"].to(device) / 255.0
+            masks = data["mask"].to(device) if data["mask"] is not None else None
             height, width = pixels.shape[1:3]
 
             torch.cuda.synchronize()
@@ -755,6 +763,7 @@ def eval(self, step: int, stage: str = "val"):
                 sh_degree=cfg.sh_degree,
                 near_plane=cfg.near_plane,
                 far_plane=cfg.far_plane,
+                masks=masks,
             )  # [1, H, W, 3]
             torch.cuda.synchronize()
             ellipse_time += time.time() - tic
diff --git a/examples/test_remap.py b/examples/test_remap.py
index 13a80d9ab..7cf529618 100644
--- a/examples/test_remap.py
+++ b/examples/test_remap.py
@@ -41,6 +41,12 @@ def main():
 
     mapx, mapy = init_fisheye_remap(K, params, width, height)
 
+    mask = np.logical_and(
+        np.logical_and(mapx > 0, mapy > 0),
+        np.logical_and(mapx < width - 1, mapy < height - 1),
+    )
+    imageio.imwrite("./results/test_remap_mask.png", mask.astype(np.uint8) * 255)
+
     x_min = np.nonzero(mapx < 0)[1].max()
     x_max = np.nonzero(mapx > width)[1].min()
     y_min = np.nonzero(mapy < 0)[0].max()
@@ -53,6 +59,13 @@ def main():
         ..., :3
     ]
     image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
+
+    mask2 = image.max(axis=2) != 0
+    imageio.imwrite("./results/test_remap_mask2.png", mask2.astype(np.uint8) * 255)
+    mask3 = mask2 ^ mask
+    imageio.imwrite("./results/test_remap_mask3.png", mask3.astype(np.uint8) * 255)
+    print(mask.sum(), mask2.sum(), mask3.sum())
+    # print((mask2.astype(int) - mask.astype(int)).sum())
     imageio.imwrite("./results/test_remap.png", image)
     x, y, w, h = roi_undist
     image = image[y : y + h, x : x + w]

From 66128ca1171b8ddcb885aa12ec6ad4b82d316f26 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 14:44:24 -0700
Subject: [PATCH 23/38] remove test_remap

---
 examples/datasets/colmap.py | 10 -----
 examples/test_remap.py      | 76 -------------------------------------
 2 files changed, 86 deletions(-)
 delete mode 100644 examples/test_remap.py

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 74882d57d..15f4e739a 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -262,16 +262,6 @@ def __init__(
                     np.logical_and(mapx > 0, mapy > 0),
                     np.logical_and(mapx < width - 1, mapy < height - 1),
                 )
-
-                # # Compute ROI
-                # x_min = np.nonzero(mapx < 0)[1].max()
-                # x_max = np.nonzero(mapx > width)[1].min()
-                # y_min = np.nonzero(mapy < 0)[0].max()
-                # y_max = np.nonzero(mapy > height)[0].min()
-                # roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
-                # K_undist = K.copy()
-                # K_undist[0, 2] -= x_min
-                # K_undist[1, 2] -= y_min
             else:
                 assert_never(camtype)
 
diff --git a/examples/test_remap.py b/examples/test_remap.py
deleted file mode 100644
index 7cf529618..000000000
--- a/examples/test_remap.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import numpy as np
-import cv2
-import imageio
-
-
-def init_fisheye_remap(K, params, width, height):
-    fx = K[0, 0]
-    fy = K[1, 1]
-    cx = K[0, 2]
-    cy = K[1, 2]
-
-    mapx = np.zeros((height, width), dtype=np.float32)
-    mapy = np.zeros((height, width), dtype=np.float32)
-    for i in range(0, width):
-        for j in range(0, height):
-            x = float(i)
-            y = float(j)
-            x1 = (x - cx) / fx
-            y1 = (y - cy) / fy
-            theta = np.sqrt(x1**2 + y1**2)
-            r = (
-                1.0
-                + params[0] * theta**2
-                + params[1] * theta**4
-                + params[2] * theta**6
-                + params[3] * theta**8
-            )
-            x2 = fx * x1 * r + width // 2
-            y2 = fy * y1 * r + height // 2
-            mapx[j, i] = x2
-            mapy[j, i] = y2
-    return mapx, mapy
-
-
-def main():
-    K = np.array(
-        [[610.93592297, 0.0, 876.0], [0.0, 610.84071973, 584.0], [0.0, 0.0, 1.0]]
-    )
-    params = np.array([0.03699945, 0.00660936, 0.00116909, -0.00038226])
-    width, height = (1752, 1168)
-
-    mapx, mapy = init_fisheye_remap(K, params, width, height)
-
-    mask = np.logical_and(
-        np.logical_and(mapx > 0, mapy > 0),
-        np.logical_and(mapx < width - 1, mapy < height - 1),
-    )
-    imageio.imwrite("./results/test_remap_mask.png", mask.astype(np.uint8) * 255)
-
-    x_min = np.nonzero(mapx < 0)[1].max()
-    x_max = np.nonzero(mapx > width)[1].min()
-    y_min = np.nonzero(mapy < 0)[0].max()
-    y_max = np.nonzero(mapy > height)[0].min()
-    roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
-    K[0, 2] -= x_min
-    K[1, 2] -= y_min
-
-    image = imageio.imread("./data/zipnerf/fisheye/berlin/images_4/DSC00040.JPG")[
-        ..., :3
-    ]
-    image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR)
-
-    mask2 = image.max(axis=2) != 0
-    imageio.imwrite("./results/test_remap_mask2.png", mask2.astype(np.uint8) * 255)
-    mask3 = mask2 ^ mask
-    imageio.imwrite("./results/test_remap_mask3.png", mask3.astype(np.uint8) * 255)
-    print(mask.sum(), mask2.sum(), mask3.sum())
-    # print((mask2.astype(int) - mask.astype(int)).sum())
-    imageio.imwrite("./results/test_remap.png", image)
-    x, y, w, h = roi_undist
-    image = image[y : y + h, x : x + w]
-    imageio.imwrite("./results/test_remap_crop.png", image)
-
-
-if __name__ == "__main__":
-    main()

From 419c9e13719640dc38df881359d24fd6d34202d2 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 15:40:47 -0700
Subject: [PATCH 24/38] mask roi

---
 examples/datasets/colmap.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 15f4e739a..994a64abc 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -256,12 +256,18 @@ def __init__(
                         mapx[j, i] = x2
                         mapy[j, i] = y2
 
-                K_undist = K.copy()
-                roi_undist = [0, 0, width, height]
                 mask = np.logical_and(
                     np.logical_and(mapx > 0, mapy > 0),
                     np.logical_and(mapx < width - 1, mapy < height - 1),
                 )
+                y_indices, x_indices = np.nonzero(mask)
+                y_min, y_max = y_indices.min(), y_indices.max() + 1
+                x_min, x_max = x_indices.min(), x_indices.max() + 1
+                mask = mask[y_min:y_max, x_min:x_max]
+                K_undist = K.copy()
+                K_undist[0, 2] -= x_min
+                K_undist[1, 2] -= y_min
+                roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min]
             else:
                 assert_never(camtype)
 

From 42111578c8a502105e48c7fe5908348473946265 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 16:33:16 -0700
Subject: [PATCH 25/38] scripts

---
 examples/benchmarks/fisheye/mcmc_zipnerf.sh   | 22 +++++++
 .../fisheye/mcmc_zipnerf_undistort.sh         | 22 +++++++
 examples/benchmarks/mcmc.sh                   |  2 +-
 examples/benchmarks/mcmc_zipnerf.sh           | 58 -------------------
 examples/datasets/colmap.py                   |  6 +-
 examples/simple_trainer.py                    |  6 +-
 6 files changed, 51 insertions(+), 65 deletions(-)
 create mode 100644 examples/benchmarks/fisheye/mcmc_zipnerf.sh
 create mode 100644 examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
 delete mode 100644 examples/benchmarks/mcmc_zipnerf.sh

diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
new file mode 100644
index 000000000..ff6f7b9e5
--- /dev/null
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
@@ -0,0 +1,22 @@
+SCENE_DIR="data/zipnerf"
+SCENE_LIST="berlin london nyc alameda"
+DATA_FACTOR=2
+
+RESULT_DIR="results/benchmark_mcmc_2M_zipnerf"
+CAP_MAX=2000000
+
+# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf"
+# CAP_MAX=4000000
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train and eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+        --strategy.cap-max $CAP_MAX \
+        --opacity_reg 0.001 \
+        --camera_model fisheye \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+done
\ No newline at end of file
diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
new file mode 100644
index 000000000..f0f29ecd8
--- /dev/null
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
@@ -0,0 +1,22 @@
+SCENE_DIR="data/zipnerf_undistort"
+SCENE_LIST="berlin london nyc alameda"
+DATA_FACTOR=2
+
+RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistort"
+CAP_MAX=2000000
+
+# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf_undistort"
+# CAP_MAX=4000000
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train and eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+        --strategy.cap-max $CAP_MAX \
+        --opacity_reg 0.001 \
+        --camera_model pinhole \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+done
\ No newline at end of file
diff --git a/examples/benchmarks/mcmc.sh b/examples/benchmarks/mcmc.sh
index 23e40838d..0eaa5c8bb 100644
--- a/examples/benchmarks/mcmc.sh
+++ b/examples/benchmarks/mcmc.sh
@@ -19,7 +19,7 @@ do
     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
         --render_traj_path $RENDER_TRAJ_PATH \
-        --data_dir data/360_v2/$SCENE/ \
+        --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
 
     # run eval and render
diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh
deleted file mode 100644
index f17c09ee0..000000000
--- a/examples/benchmarks/mcmc_zipnerf.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-# SCENE_DIR="data/zipnerf/undistort"
-# RESULT_DIR="results/benchmark_zipnerf/undistort"
-# CAMERA_MODEL="pinhole"
-
-SCENE_DIR="data/zipnerf/fisheye"
-RESULT_DIR="results/benchmark_zipnerf/fisheye_2m"
-CAMERA_MODEL="fisheye"
-SCENE_LIST="berlin london nyc alameda"
-RENDER_TRAJ_PATH="interp"
-
-CAP_MAX=2000000
-DATA_FACTOR=4
-
-for SCENE in $SCENE_LIST;
-do
-    echo "Running $SCENE"
-
-    # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
-        --strategy.cap-max $CAP_MAX \
-        --opacity_reg 0.001 \
-        --camera_model $CAMERA_MODEL \
-        --render_traj_path $RENDER_TRAJ_PATH \
-        --data_dir $SCENE_DIR/$SCENE/ \
-        --result_dir $RESULT_DIR/$SCENE/
-
-    # run eval and render
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
-        --strategy.cap-max $CAP_MAX \
-        --opacity_reg 0.001 \
-        --camera_model $CAMERA_MODEL \
-        --render_traj_path $RENDER_TRAJ_PATH \
-        --data_dir $SCENE_DIR/$SCENE/ \
-        --result_dir $RESULT_DIR/$SCENE/ \
-        --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt
-done
-
-
-for SCENE in $SCENE_LIST;
-do
-    echo "=== Eval Stats ==="
-
-    for STATS in $RESULT_DIR/$SCENE/stats/val*.json;
-    do  
-        echo $STATS
-        cat $STATS; 
-        echo
-    done
-
-    echo "=== Train Stats ==="
-
-    for STATS in $RESULT_DIR/$SCENE/stats/train*_rank0.json;
-    do  
-        echo $STATS
-        cat $STATS; 
-        echo
-    done
-done
\ No newline at end of file
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 994a64abc..f41863a35 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -107,7 +107,7 @@ def __init__(
 
             # image size
             imsize_dict[camera_id] = (cam.width // factor, cam.height // factor)
-
+            mask_dict[camera_id] = None
         print(
             f"[Parser] {len(imdata)} images, taken by {len(set(camera_ids))} cameras."
         )
@@ -230,6 +230,7 @@ def __init__(
                 mapx, mapy = cv2.initUndistortRectifyMap(
                     K, params, None, K_undist, (width, height), cv2.CV_32FC1
                 )
+                mask = None
             elif camtype == "fisheye":
                 fx = K[0, 0]
                 fy = K[1, 1]
@@ -341,8 +342,9 @@ def __getitem__(self, item: int) -> Dict[str, Any]:
             "camtoworld": torch.from_numpy(camtoworlds).float(),
             "image": torch.from_numpy(image).float(),
             "image_id": item,  # the index of the image in the dataset
-            "mask": torch.from_numpy(mask).bool(),
         }
+        if mask is not None:
+            data["mask"] = torch.from_numpy(mask).bool()
 
         if self.load_depths:
             # projected points to image plane to get depths
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index c91116fcc..c674284aa 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -512,9 +512,7 @@ def train(self):
                 pixels.shape[0] * pixels.shape[1] * pixels.shape[2]
             )
             image_ids = data["image_id"].to(device)
-            masks = (
-                data["mask"].to(device) if data["mask"] is not None else None
-            )  # [1, H, W]
+            masks = data["mask"].to(device) if "mask" in data else None  # [1, H, W]
             if cfg.depth_loss:
                 points = data["points"].to(device)  # [1, M, 2]
                 depths_gt = data["depths"].to(device)  # [1, M]
@@ -750,7 +748,7 @@ def eval(self, step: int, stage: str = "val"):
             camtoworlds = data["camtoworld"].to(device)
             Ks = data["K"].to(device)
             pixels = data["image"].to(device) / 255.0
-            masks = data["mask"].to(device) if data["mask"] is not None else None
+            masks = data["mask"].to(device) if "mask" in data else None
             height, width = pixels.shape[1:3]
 
             torch.cuda.synchronize()

From 148c218520ee35839b93ca57d1712e492b4fb476 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 16:41:53 -0700
Subject: [PATCH 26/38] reduce diff

---
 examples/simple_trainer.py | 2 +-
 gsplat/cuda/_torch_impl.py | 9 +--------
 gsplat/rendering.py        | 4 ++--
 tests/test_basic.py        | 2 --
 4 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index c674284aa..f3b2ac8c4 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -444,7 +444,7 @@ def rasterize_splats(
             sparse_grad=self.cfg.sparse_grad,
             rasterize_mode=rasterize_mode,
             distributed=self.world_size > 1,
-            camera_model=self.cfg.camera_model,
+            fisheye=self.cfg.camera_model == "fisheye",
             **kwargs,
         )
         if masks is not None:
diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py
index c02535f7b..235d1b577 100644
--- a/gsplat/cuda/_torch_impl.py
+++ b/gsplat/cuda/_torch_impl.py
@@ -235,9 +235,7 @@ def _world_to_cam(
 
 def _fully_fused_projection(
     means: Tensor,  # [N, 3]
-    covars: Optional[Tensor],  # [N, 6] or None
-    quats: Optional[Tensor],  # [N, 4] or None
-    scales: Optional[Tensor],  # [N, 3] or None
+    covars: Tensor,  # [N, 3, 3]
     viewmats: Tensor,  # [C, 4, 4]
     Ks: Tensor,  # [C, 3, 3]
     width: int,
@@ -256,11 +254,6 @@ def _fully_fused_projection(
         This is a minimal implementation of fully fused version, which has more
         arguments. Not all arguments are supported.
     """
-    if covars is None:
-        covars = _quat_scale_to_covar_preci(
-            quats, scales, compute_covar=True, compute_preci=False
-        )[0]
-
     means_c, covars_c = _world_to_cam(means, covars, viewmats)
 
     if ortho:
diff --git a/gsplat/rendering.py b/gsplat/rendering.py
index 4e4b3dd70..6f4f9c577 100644
--- a/gsplat/rendering.py
+++ b/gsplat/rendering.py
@@ -46,8 +46,8 @@ def rasterization(
     channel_chunk: int = 32,
     distributed: bool = False,
     ortho: bool = False,
+    fisheye: bool = False,
     covars: Optional[Tensor] = None,
-    camera_model: Literal["pinhole", "fisheye"] = "pinhole",
 ) -> Tuple[Tensor, Tensor, Dict]:
     """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C).
 
@@ -308,7 +308,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso
         sparse_grad=sparse_grad,
         calc_compensations=(rasterize_mode == "antialiased"),
         ortho=ortho,
-        fisheye=camera_model == "fisheye",
+        fisheye=fisheye,
     )
 
     if packed:
diff --git a/tests/test_basic.py b/tests/test_basic.py
index c63ea44a4..89fdd8a1d 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -224,8 +224,6 @@ def test_projection(
     _radii, _means2d, _depths, _conics, _compensations = _fully_fused_projection(
         means,
         _covars,
-        None,
-        None,
         viewmats,
         Ks,
         width,

From 6adbf6d99105a879c3dba576183ed51e400b236b Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 16:46:00 -0700
Subject: [PATCH 27/38] minor

---
 examples/benchmarks/fisheye/mcmc_zipnerf.sh           | 2 +-
 examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh | 2 +-
 examples/datasets/colmap.py                           | 2 --
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
index ff6f7b9e5..f315e0c15 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
@@ -19,4 +19,4 @@ do
         --camera_model fisheye \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
-done
\ No newline at end of file
+done
diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
index f0f29ecd8..f2a2b462f 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
@@ -19,4 +19,4 @@ do
         --camera_model pinhole \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
-done
\ No newline at end of file
+done
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index f41863a35..79638d538 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -104,8 +104,6 @@ def __init__(
             ), f"Only perspective and fisheye cameras are supported, got {type_}"
 
             params_dict[camera_id] = params
-
-            # image size
             imsize_dict[camera_id] = (cam.width // factor, cam.height // factor)
             mask_dict[camera_id] = None
         print(

From c2e7ada02829b45c4dae6d04d6a1599a9d5ffa3a Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 11 Sep 2024 17:38:52 -0700
Subject: [PATCH 28/38] weird ortho bug

---
 tests/test_basic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_basic.py b/tests/test_basic.py
index 89fdd8a1d..a64084776 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -165,10 +165,10 @@ def test_proj(test_data, ortho: bool):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device")
-@pytest.mark.parametrize("fused", [False, True])
-@pytest.mark.parametrize("calc_compensations", [False, True])
-@pytest.mark.parametrize("ortho", [False])
 @pytest.mark.parametrize("fisheye", [True, False])
+@pytest.mark.parametrize("fused", [False, True])
+@pytest.mark.parametrize("calc_compensations", [True, False])
+@pytest.mark.parametrize("ortho", [True, False])
 def test_projection(
     test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool
 ):

From a31e65a54411cfc324bf712e5a1b6a41b44fdaed Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Mon, 16 Sep 2024 22:07:38 -0700
Subject: [PATCH 29/38] vectorize

---
 examples/datasets/colmap.py | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index e007ae2b7..78bc24152 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -251,26 +251,21 @@ def __init__(
                 fy = K[1, 1]
                 cx = K[0, 2]
                 cy = K[1, 2]
-                mapx = np.zeros((height, width), dtype=np.float32)
-                mapy = np.zeros((height, width), dtype=np.float32)
-                for i in range(0, width):
-                    for j in range(0, height):
-                        x = float(i)
-                        y = float(j)
-                        x1 = (x - cx) / fx
-                        y1 = (y - cy) / fy
-                        theta = np.sqrt(x1**2 + y1**2)
-                        r = (
-                            1.0
-                            + params[0] * theta**2
-                            + params[1] * theta**4
-                            + params[2] * theta**6
-                            + params[3] * theta**8
-                        )
-                        x2 = fx * x1 * r + width // 2
-                        y2 = fy * y1 * r + height // 2
-                        mapx[j, i] = x2
-                        mapy[j, i] = y2
+                grid_x, grid_y = np.meshgrid(
+                    np.arange(width, dtype=np.float32), np.arange(height, dtype=np.float32), indexing="xy"
+                )
+                x1 = (grid_x - cx) / fx
+                y1 = (grid_y - cy) / fy
+                theta = np.sqrt(x1**2 + y1**2)
+                r = (
+                    1.0
+                    + params[0] * theta**2
+                    + params[1] * theta**4
+                    + params[2] * theta**6
+                    + params[3] * theta**8
+                )
+                mapx = fx * x1 * r + width // 2
+                mapy = fy * y1 * r + height // 2
 
                 mask = np.logical_and(
                     np.logical_and(mapx > 0, mapy > 0),

From 41aa3982b9cad8554ba4c5b42f63a7c76a306b12 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 17 Sep 2024 15:23:42 -0700
Subject: [PATCH 30/38] ellipse

---
 examples/benchmarks/fisheye/mcmc_zipnerf.sh           | 5 ++++-
 examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
index f315e0c15..75507e454 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
@@ -1,6 +1,7 @@
 SCENE_DIR="data/zipnerf"
 SCENE_LIST="berlin london nyc alameda"
-DATA_FACTOR=2
+DATA_FACTOR=4
+RENDER_TRAJ_PATH="ellipse"
 
 RESULT_DIR="results/benchmark_mcmc_2M_zipnerf"
 CAP_MAX=2000000
@@ -16,7 +17,9 @@ do
     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
         --opacity_reg 0.001 \
+        --render_traj_path $RENDER_TRAJ_PATH \
         --camera_model fisheye \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
+
 done
diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
index f2a2b462f..1a218e1e2 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
@@ -1,6 +1,7 @@
 SCENE_DIR="data/zipnerf_undistort"
 SCENE_LIST="berlin london nyc alameda"
-DATA_FACTOR=2
+DATA_FACTOR=4
+RENDER_TRAJ_PATH="ellipse"
 
 RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistort"
 CAP_MAX=2000000
@@ -16,7 +17,9 @@ do
     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
         --opacity_reg 0.001 \
+        --render_traj_path $RENDER_TRAJ_PATH \
         --camera_model pinhole \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
+
 done

From 7f2972af6b5480d80af34497f41acdf95dcead73 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 17 Sep 2024 16:01:16 -0700
Subject: [PATCH 31/38] unify python side camera_model

---
 examples/datasets/colmap.py  |  4 ++-
 examples/simple_trainer.py   |  4 +--
 gsplat/cuda/_torch_impl.py   | 12 ++++----
 gsplat/cuda/_wrapper.py      | 57 +++++++++++++++++++-----------------
 gsplat/cuda/csrc/bindings.h  |  4 ++-
 gsplat/cuda/csrc/proj_bwd.cu | 18 ++++++++++++
 gsplat/cuda/csrc/proj_fwd.cu |  7 ++++-
 gsplat/rendering.py          |  6 ++--
 tests/test_basic.py          | 50 ++++++++++++++++++-------------
 9 files changed, 100 insertions(+), 62 deletions(-)

diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 78bc24152..127d2d4a4 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -252,7 +252,9 @@ def __init__(
                 cx = K[0, 2]
                 cy = K[1, 2]
                 grid_x, grid_y = np.meshgrid(
-                    np.arange(width, dtype=np.float32), np.arange(height, dtype=np.float32), indexing="xy"
+                    np.arange(width, dtype=np.float32),
+                    np.arange(height, dtype=np.float32),
+                    indexing="xy",
                 )
                 x1 = (grid_x - cx) / fx
                 y1 = (grid_y - cy) / fy
diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
index 8894dcb1f..3e544201b 100644
--- a/examples/simple_trainer.py
+++ b/examples/simple_trainer.py
@@ -68,7 +68,7 @@ class Config:
     # Normalize the world space
     normalize_world_space: bool = True
     # Camera model
-    camera_model: Literal["pinhole", "fisheye"] = "pinhole"
+    camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole"
 
     # Port for the viewer server
     port: int = 8080
@@ -477,7 +477,7 @@ def rasterize_splats(
             sparse_grad=self.cfg.sparse_grad,
             rasterize_mode=rasterize_mode,
             distributed=self.world_size > 1,
-            fisheye=self.cfg.camera_model == "fisheye",
+            camera_model=self.cfg.camera_model,
             **kwargs,
         )
         if masks is not None:
diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py
index d243deed2..892c6a66f 100644
--- a/gsplat/cuda/_torch_impl.py
+++ b/gsplat/cuda/_torch_impl.py
@@ -1,5 +1,6 @@
 import struct
 from typing import Optional, Tuple
+from typing_extensions import Literal, assert_never
 
 import torch
 import torch.nn.functional as F
@@ -257,8 +258,7 @@ def _fully_fused_projection(
     near_plane: float = 0.01,
     far_plane: float = 1e10,
     calc_compensations: bool = False,
-    ortho: bool = False,
-    fisheye: bool = False,
+    camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]:
     """PyTorch implementation of `gsplat.cuda._wrapper.fully_fused_projection()`
 
@@ -269,12 +269,14 @@ def _fully_fused_projection(
     """
     means_c, covars_c = _world_to_cam(means, covars, viewmats)
 
-    if ortho:
+    if camera_model == "ortho":
         means2d, covars2d = _ortho_proj(means_c, covars_c, Ks, width, height)
-    elif fisheye:
+    elif camera_model == "fisheye":
         means2d, covars2d = _fisheye_proj(means_c, covars_c, Ks, width, height)
-    else:
+    elif camera_model == "pinhole":
         means2d, covars2d = _persp_proj(means_c, covars_c, Ks, width, height)
+    else:
+        assert_never(camera_model)
 
     det_orig = (
         covars2d[..., 0, 0] * covars2d[..., 1, 1]
diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
index 5698df6fb..2ddd2ac9b 100644
--- a/gsplat/cuda/_wrapper.py
+++ b/gsplat/cuda/_wrapper.py
@@ -1,5 +1,6 @@
 from typing import Callable, Optional, Tuple
 import warnings
+from typing_extensions import Literal
 
 import torch
 from torch import Tensor
@@ -115,7 +116,7 @@ def proj(
     Ks: Tensor,  # [C, 3, 3]
     width: int,
     height: int,
-    ortho: bool,
+    camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
 ) -> Tuple[Tensor, Tensor]:
     """Projection of Gaussians (perspective or orthographic).
 
@@ -139,7 +140,7 @@ def proj(
     means = means.contiguous()
     covars = covars.contiguous()
     Ks = Ks.contiguous()
-    return _Proj.apply(means, covars, Ks, width, height, ortho)
+    return _Proj.apply(means, covars, Ks, width, height, camera_model)
 
 
 def world_to_cam(
@@ -187,8 +188,7 @@ def fully_fused_projection(
     packed: bool = False,
     sparse_grad: bool = False,
     calc_compensations: bool = False,
-    ortho: bool = False,
-    fisheye: bool = False,
+    camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
     """Projects Gaussians to 2D.
 
@@ -290,7 +290,7 @@ def fully_fused_projection(
             radius_clip,
             sparse_grad,
             calc_compensations,
-            ortho,
+            camera_model,
         )
     else:
         return _FullyFusedProjection.apply(
@@ -307,8 +307,7 @@ def fully_fused_projection(
             far_plane,
             radius_clip,
             calc_compensations,
-            ortho,
-            fisheye,
+            camera_model,
         )
 
 
@@ -668,15 +667,21 @@ def forward(
         Ks: Tensor,  # [C, 3, 3]
         width: int,
         height: int,
-        ortho: bool,
+        camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor]:
         means2d, covars2d = _make_lazy_cuda_func("proj_fwd")(
-            means, covars, Ks, width, height, ortho
+            means,
+            covars,
+            Ks,
+            width,
+            height,
+            camera_model == "ortho",
+            camera_model == "fisheye",
         )
         ctx.save_for_backward(means, covars, Ks)
         ctx.width = width
         ctx.height = height
-        ctx.ortho = ortho
+        ctx.camera_model = camera_model
         return means2d, covars2d
 
     @staticmethod
@@ -684,14 +689,15 @@ def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor):
         means, covars, Ks = ctx.saved_tensors
         width = ctx.width
         height = ctx.height
-        ortho = ctx.ortho
+        camera_model = ctx.camera_model
         v_means, v_covars = _make_lazy_cuda_func("proj_bwd")(
             means,
             covars,
             Ks,
             width,
             height,
-            ortho,
+            camera_model == "ortho",
+            camera_model == "fisheye",
             v_means2d.contiguous(),
             v_covars2d.contiguous(),
         )
@@ -755,8 +761,7 @@ def forward(
         far_plane: float,
         radius_clip: float,
         calc_compensations: bool,
-        ortho: bool,
-        fisheye: bool,
+        camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
         # "covars" and {"quats", "scales"} are mutually exclusive
         radii, means2d, depths, conics, compensations = _make_lazy_cuda_func(
@@ -775,8 +780,8 @@ def forward(
             far_plane,
             radius_clip,
             calc_compensations,
-            ortho,
-            fisheye,
+            camera_model == "ortho",
+            camera_model == "fisheye",
         )
         if not calc_compensations:
             compensations = None
@@ -786,8 +791,7 @@ def forward(
         ctx.width = width
         ctx.height = height
         ctx.eps2d = eps2d
-        ctx.ortho = ortho
-        ctx.fisheye = fisheye
+        ctx.camera_model = camera_model
 
         return radii, means2d, depths, conics, compensations
 
@@ -807,8 +811,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
         width = ctx.width
         height = ctx.height
         eps2d = ctx.eps2d
-        ortho = ctx.ortho
-        fisheye = ctx.fisheye
+        camera_model = ctx.camera_model
         if v_compensations is not None:
             v_compensations = v_compensations.contiguous()
         v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func(
@@ -823,8 +826,8 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
             width,
             height,
             eps2d,
-            ortho,
-            fisheye,
+            camera_model == "ortho",
+            camera_model == "fisheye",
             radii,
             conics,
             compensations,
@@ -1013,7 +1016,7 @@ def forward(
         radius_clip: float,
         sparse_grad: bool,
         calc_compensations: bool,
-        ortho: bool,
+        camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
         (
             indptr,
@@ -1038,7 +1041,7 @@ def forward(
             far_plane,
             radius_clip,
             calc_compensations,
-            ortho,
+            camera_model == "ortho",
         )
         if not calc_compensations:
             compensations = None
@@ -1058,7 +1061,7 @@ def forward(
         ctx.height = height
         ctx.eps2d = eps2d
         ctx.sparse_grad = sparse_grad
-        ctx.ortho = ortho
+        ctx.camera_model = camera_model
 
         return camera_ids, gaussian_ids, radii, means2d, depths, conics, compensations
 
@@ -1089,7 +1092,7 @@ def backward(
         height = ctx.height
         eps2d = ctx.eps2d
         sparse_grad = ctx.sparse_grad
-        ortho = ctx.ortho
+        camera_model = ctx.camera_model
 
         if v_compensations is not None:
             v_compensations = v_compensations.contiguous()
@@ -1105,7 +1108,7 @@ def backward(
             width,
             height,
             eps2d,
-            ortho,
+            camera_model == "ortho",
             camera_ids,
             gaussian_ids,
             conics,
diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h
index 6002efa60..fb2f8891d 100644
--- a/gsplat/cuda/csrc/bindings.h
+++ b/gsplat/cuda/csrc/bindings.h
@@ -53,7 +53,8 @@ std::tuple<torch::Tensor, torch::Tensor> proj_fwd_tensor(
     const torch::Tensor &Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho
+    const bool ortho,
+    const bool fisheye
 );
 
 std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
@@ -63,6 +64,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
     const uint32_t width,
     const uint32_t height,
     const bool ortho,
+    const bool fisheye,
     const torch::Tensor &v_means2d, // [C, N, 2]
     const torch::Tensor &v_covars2d // [C, N, 2, 2]
 );
diff --git a/gsplat/cuda/csrc/proj_bwd.cu b/gsplat/cuda/csrc/proj_bwd.cu
index f5d44a3e5..cb05c791a 100644
--- a/gsplat/cuda/csrc/proj_bwd.cu
+++ b/gsplat/cuda/csrc/proj_bwd.cu
@@ -26,6 +26,7 @@ __global__ void proj_bwd_kernel(
     const uint32_t width,
     const uint32_t height,
     const bool ortho,
+    const bool fisheye,
     const T *__restrict__ v_means2d,  // [C, N, 2]
     const T *__restrict__ v_covars2d, // [C, N, 2, 2]
     T *__restrict__ v_means,          // [C, N, 3]
@@ -75,6 +76,21 @@ __global__ void proj_bwd_kernel(
             v_mean,
             v_covar
         );
+    } else if (fisheye) {
+        fisheye_proj_vjp<OpT>(
+            mean,
+            covar,
+            fx,
+            fy,
+            cx,
+            cy,
+            width,
+            height,
+            glm::transpose(v_covar2d),
+            v_mean2d,
+            v_mean,
+            v_covar
+        );
     } else {
         persp_proj_vjp<OpT>(
             mean,
@@ -114,6 +130,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
     const uint32_t width,
     const uint32_t height,
     const bool ortho,
+    const bool fisheye,
     const torch::Tensor &v_means2d, // [C, N, 2]
     const torch::Tensor &v_covars2d // [C, N, 2, 2]
 ) {
@@ -151,6 +168,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
                         width,
                         height,
                         ortho,
+                        fisheye,
                         v_means2d.data_ptr<scalar_t>(),
                         v_covars2d.data_ptr<scalar_t>(),
                         v_means.data_ptr<scalar_t>(),
diff --git a/gsplat/cuda/csrc/proj_fwd.cu b/gsplat/cuda/csrc/proj_fwd.cu
index 7944bd581..8478b9e04 100644
--- a/gsplat/cuda/csrc/proj_fwd.cu
+++ b/gsplat/cuda/csrc/proj_fwd.cu
@@ -26,6 +26,7 @@ __global__ void proj_fwd_kernel(
     const uint32_t width,
     const uint32_t height,
     const bool ortho,
+    const bool fisheye,
     T *__restrict__ means2d, // [C, N, 2]
     T *__restrict__ covars2d // [C, N, 2, 2]
 ) {
@@ -55,6 +56,8 @@ __global__ void proj_fwd_kernel(
 
     if (ortho)
         ortho_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
+    else if (fisheye)
+        fisheye_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
     else
         persp_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
 
@@ -78,7 +81,8 @@ std::tuple<torch::Tensor, torch::Tensor> proj_fwd_tensor(
     const torch::Tensor &Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho
+    const bool ortho,
+    const bool fisheye
 ) {
     GSPLAT_DEVICE_GUARD(means);
     GSPLAT_CHECK_INPUT(means);
@@ -112,6 +116,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_fwd_tensor(
                         width,
                         height,
                         ortho,
+                        fisheye,
                         means2d.data_ptr<scalar_t>(),
                         covars2d.data_ptr<scalar_t>()
                     );
diff --git a/gsplat/rendering.py b/gsplat/rendering.py
index 5aa11ae5a..cddc2742f 100644
--- a/gsplat/rendering.py
+++ b/gsplat/rendering.py
@@ -49,8 +49,7 @@ def rasterization(
     rasterize_mode: Literal["classic", "antialiased"] = "classic",
     channel_chunk: int = 32,
     distributed: bool = False,
-    ortho: bool = False,
-    fisheye: bool = False,
+    camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     covars: Optional[Tensor] = None,
 ) -> Tuple[Tensor, Tensor, Dict]:
     """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C).
@@ -311,8 +310,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso
         radius_clip=radius_clip,
         sparse_grad=sparse_grad,
         calc_compensations=(rasterize_mode == "antialiased"),
-        ortho=ortho,
-        fisheye=fisheye,
+        camera_model=camera_model,
     )
 
     if packed:
diff --git a/tests/test_basic.py b/tests/test_basic.py
index a64084776..11c747dd1 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -6,6 +6,7 @@
 ```
 """
 
+from typing_extensions import Literal, assert_never
 import math
 
 import pytest
@@ -122,9 +123,9 @@ def test_world_to_cam(test_data):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device")
-@pytest.mark.parametrize("ortho", [True, False])
-def test_proj(test_data, ortho: bool):
-    from gsplat.cuda._torch_impl import _persp_proj, _ortho_proj
+@pytest.mark.parametrize("camera_model", ["pinhole", "ortho", "fisheye"])
+def test_proj(test_data, camera_model: Literal["pinhole", "ortho", "fisheye"]):
+    from gsplat.cuda._torch_impl import _persp_proj, _ortho_proj, _fisheye_proj
     from gsplat.cuda._wrapper import proj, quat_scale_to_covar_preci, world_to_cam
 
     torch.manual_seed(42)
@@ -140,11 +141,15 @@ def test_proj(test_data, ortho: bool):
     covars.requires_grad = True
 
     # forward
-    means2d, covars2d = proj(means, covars, Ks, width, height, ortho)
-    if ortho:
+    means2d, covars2d = proj(means, covars, Ks, width, height, camera_model)
+    if camera_model == "ortho":
         _means2d, _covars2d = _ortho_proj(means, covars, Ks, width, height)
-    else:
+    elif camera_model == "fisheye":
+        _means2d, _covars2d = _fisheye_proj(means, covars, Ks, width, height)
+    elif camera_model == "pinhole":
         _means2d, _covars2d = _persp_proj(means, covars, Ks, width, height)
+    else:
+        assert_never(camera_model)
 
     torch.testing.assert_close(means2d, _means2d, rtol=1e-4, atol=1e-4)
     torch.testing.assert_close(covars2d, _covars2d, rtol=1e-1, atol=3e-2)
@@ -165,12 +170,14 @@ def test_proj(test_data, ortho: bool):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device")
-@pytest.mark.parametrize("fisheye", [True, False])
+@pytest.mark.parametrize("camera_model", ["pinhole", "ortho", "fisheye"])
 @pytest.mark.parametrize("fused", [False, True])
 @pytest.mark.parametrize("calc_compensations", [True, False])
-@pytest.mark.parametrize("ortho", [True, False])
 def test_projection(
-    test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool
+    test_data,
+    fused: bool,
+    calc_compensations: bool,
+    camera_model: Literal["pinhole", "ortho", "fisheye"],
 ):
     from gsplat.cuda._torch_impl import _fully_fused_projection
     from gsplat.cuda._wrapper import fully_fused_projection, quat_scale_to_covar_preci
@@ -202,8 +209,7 @@ def test_projection(
             width,
             height,
             calc_compensations=calc_compensations,
-            ortho=ortho,
-            fisheye=fisheye,
+            camera_model=camera_model,
         )
     else:
         covars, _ = quat_scale_to_covar_preci(quats, scales, triu=True)  # [N, 6]
@@ -217,8 +223,7 @@ def test_projection(
             width,
             height,
             calc_compensations=calc_compensations,
-            ortho=ortho,
-            fisheye=fisheye,
+            camera_model=camera_model,
         )
     _covars, _ = quat_scale_to_covar_preci(quats, scales, triu=False)  # [N, 3, 3]
     _radii, _means2d, _depths, _conics, _compensations = _fully_fused_projection(
@@ -229,8 +234,7 @@ def test_projection(
         width,
         height,
         calc_compensations=calc_compensations,
-        ortho=ortho,
-        fisheye=fisheye,
+        camera_model=camera_model,
     )
 
     # radii is integer so we allow for 1 unit difference
@@ -275,9 +279,13 @@ def test_projection(
 @pytest.mark.parametrize("fused", [False, True])
 @pytest.mark.parametrize("sparse_grad", [False, True])
 @pytest.mark.parametrize("calc_compensations", [False, True])
-@pytest.mark.parametrize("ortho", [True, False])
+@pytest.mark.parametrize("camera_model", ["pinhole", "ortho"])
 def test_fully_fused_projection_packed(
-    test_data, fused: bool, sparse_grad: bool, calc_compensations: bool, ortho: bool
+    test_data,
+    fused: bool,
+    sparse_grad: bool,
+    calc_compensations: bool,
+    camera_model: Literal["pinhole", "ortho", "fisheye"],
 ):
     from gsplat.cuda._wrapper import fully_fused_projection, quat_scale_to_covar_preci
 
@@ -318,7 +326,7 @@ def test_fully_fused_projection_packed(
             packed=True,
             sparse_grad=sparse_grad,
             calc_compensations=calc_compensations,
-            ortho=ortho,
+            camera_model=camera_model,
         )
         _radii, _means2d, _depths, _conics, _compensations = fully_fused_projection(
             means,
@@ -331,7 +339,7 @@ def test_fully_fused_projection_packed(
             height,
             packed=False,
             calc_compensations=calc_compensations,
-            ortho=ortho,
+            camera_model=camera_model,
         )
     else:
         covars, _ = quat_scale_to_covar_preci(quats, scales, triu=True)  # [N, 6]
@@ -355,7 +363,7 @@ def test_fully_fused_projection_packed(
             packed=True,
             sparse_grad=sparse_grad,
             calc_compensations=calc_compensations,
-            ortho=ortho,
+            camera_model=camera_model,
         )
         _radii, _means2d, _depths, _conics, _compensations = fully_fused_projection(
             means,
@@ -368,7 +376,7 @@ def test_fully_fused_projection_packed(
             height,
             packed=False,
             calc_compensations=calc_compensations,
-            ortho=ortho,
+            camera_model=camera_model,
         )
 
     # recover packed tensors to full matrices for testing

From 593769c6f6c5abb659753bfe95b5fcfd54c62657 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Tue, 17 Sep 2024 16:06:24 -0700
Subject: [PATCH 32/38] fisheye packed mode

---
 gsplat/cuda/_wrapper.py                       |  2 ++
 gsplat/cuda/csrc/bindings.h                   |  4 +++-
 .../csrc/fully_fused_projection_packed_bwd.cu | 19 ++++++++++++++++++
 .../csrc/fully_fused_projection_packed_fwd.cu | 20 ++++++++++++++++++-
 tests/test_basic.py                           |  2 +-
 5 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
index 2ddd2ac9b..c55bab0a4 100644
--- a/gsplat/cuda/_wrapper.py
+++ b/gsplat/cuda/_wrapper.py
@@ -1042,6 +1042,7 @@ def forward(
             radius_clip,
             calc_compensations,
             camera_model == "ortho",
+            camera_model == "fisheye",
         )
         if not calc_compensations:
             compensations = None
@@ -1109,6 +1110,7 @@ def backward(
             height,
             eps2d,
             camera_model == "ortho",
+            camera_model == "fisheye",
             camera_ids,
             gaussian_ids,
             conics,
diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h
index fb2f8891d..a565c6760 100644
--- a/gsplat/cuda/csrc/bindings.h
+++ b/gsplat/cuda/csrc/bindings.h
@@ -270,7 +270,8 @@ fully_fused_projection_packed_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho
+    const bool ortho,
+    const bool fisheye
 );
 
 std::tuple<
@@ -291,6 +292,7 @@ fully_fused_projection_packed_bwd_tensor(
     const uint32_t image_height,
     const float eps2d,
     const bool ortho,
+    const bool fisheye,
     // fwd outputs
     const torch::Tensor &camera_ids,                  // [nnz]
     const torch::Tensor &gaussian_ids,                // [nnz]
diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu
index 65eada111..bd351403a 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu
@@ -32,6 +32,7 @@ __global__ void fully_fused_projection_packed_bwd_kernel(
     const int32_t image_height,
     const T eps2d,
     const bool ortho,
+    const bool fisheye,
     // fwd outputs
     const int64_t *__restrict__ camera_ids,   // [nnz]
     const int64_t *__restrict__ gaussian_ids, // [nnz]
@@ -145,6 +146,22 @@ __global__ void fully_fused_projection_packed_bwd_kernel(
             v_mean_c,
             v_covar_c
         );
+    } else if (fisheye) {
+        // vjp: fisheye projection
+        fisheye_proj_vjp<T>(
+            mean_c,
+            covar_c,
+            fx,
+            fy,
+            cx,
+            cy,
+            image_width,
+            image_height,
+            v_covar2d,
+            glm::make_vec2(v_means2d),
+            v_mean_c,
+            v_covar_c
+        );
     } else {
         // vjp: perspective projection
         persp_proj_vjp<T>(
@@ -298,6 +315,7 @@ fully_fused_projection_packed_bwd_tensor(
     const uint32_t image_height,
     const float eps2d,
     const bool ortho,
+    const bool fisheye,
     // fwd outputs
     const torch::Tensor &camera_ids,                  // [nnz]
     const torch::Tensor &gaussian_ids,                // [nnz]
@@ -384,6 +402,7 @@ fully_fused_projection_packed_bwd_tensor(
                 image_height,
                 eps2d,
                 ortho,
+                fisheye,
                 camera_ids.data_ptr<int64_t>(),
                 gaussian_ids.data_ptr<int64_t>(),
                 conics.data_ptr<float>(),
diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu
index 12f21c611..983ef2940 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu
@@ -35,6 +35,7 @@ __global__ void fully_fused_projection_packed_fwd_kernel(
     const int32_t
         *__restrict__ block_accum,    // [C * blocks_per_row] packing helper
     const bool ortho,
+    const bool fisheye,
     // outputs
     int32_t *__restrict__ block_cnts, // [C * blocks_per_row] packing helper
     int32_t *__restrict__ indptr,       // [C + 1]
@@ -134,6 +135,20 @@ __global__ void fully_fused_projection_packed_fwd_kernel(
                 covar2d,
                 mean2d
             );
+        } else if (fisheye) {
+            // fisheye projection
+            fisheye_proj<T>(
+                mean_c,
+                covar_c,
+                Ks[0],
+                Ks[4],
+                Ks[2],
+                Ks[5],
+                image_width,
+                image_height,
+                covar2d,
+                mean2d
+            );
         } else {
             // perspective projection
             persp_proj<T>(
@@ -255,7 +270,8 @@ fully_fused_projection_packed_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho
+    const bool ortho,
+    const bool fisheye
 ) {
     GSPLAT_DEVICE_GUARD(means);
     GSPLAT_CHECK_INPUT(means);
@@ -305,6 +321,7 @@ fully_fused_projection_packed_fwd_tensor(
                 radius_clip,
                 nullptr,
                 ortho,
+                fisheye,
                 block_cnts.data_ptr<int32_t>(),
                 nullptr,
                 nullptr,
@@ -355,6 +372,7 @@ fully_fused_projection_packed_fwd_tensor(
                 radius_clip,
                 block_accum.data_ptr<int32_t>(),
                 ortho,
+                fisheye,
                 nullptr,
                 indptr.data_ptr<int32_t>(),
                 camera_ids.data_ptr<int64_t>(),
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 11c747dd1..22d2ee227 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -279,7 +279,7 @@ def test_projection(
 @pytest.mark.parametrize("fused", [False, True])
 @pytest.mark.parametrize("sparse_grad", [False, True])
 @pytest.mark.parametrize("calc_compensations", [False, True])
-@pytest.mark.parametrize("camera_model", ["pinhole", "ortho"])
+@pytest.mark.parametrize("camera_model", ["pinhole", "ortho", "fisheye"])
 def test_fully_fused_projection_packed(
     test_data,
     fused: bool,

From 07d20876bd94566700855ec5affffb035707b451 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 18 Sep 2024 10:09:03 -0700
Subject: [PATCH 33/38] cuda enum

---
 gsplat/cuda/_wrapper.py      |  20 +++++--
 gsplat/cuda/csrc/bindings.h  |  13 +++--
 gsplat/cuda/csrc/ext.cpp     |   6 ++
 gsplat/cuda/csrc/proj_bwd.cu | 103 ++++++++++++++++++-----------------
 gsplat/cuda/csrc/proj_fwd.cu |  26 +++++----
 5 files changed, 96 insertions(+), 72 deletions(-)

diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
index c55bab0a4..1e6dab002 100644
--- a/gsplat/cuda/_wrapper.py
+++ b/gsplat/cuda/_wrapper.py
@@ -1,4 +1,4 @@
-from typing import Callable, Optional, Tuple
+from typing import Callable, Optional, Tuple, Any
 import warnings
 from typing_extensions import Literal
 
@@ -16,6 +16,16 @@ def call_cuda(*args, **kwargs):
     return call_cuda
 
 
+def _make_lazy_cuda_obj(name: str) -> Any:
+    # pylint: disable=import-outside-toplevel
+    from ._backend import _C
+
+    obj = _C
+    for name_split in name.split("."):
+        obj = getattr(_C, name_split)
+    return obj
+
+
 def spherical_harmonics(
     degrees_to_use: int,
     dirs: Tensor,  # [..., 3]
@@ -669,14 +679,15 @@ def forward(
         height: int,
         camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor]:
+        camera_model = _make_lazy_cuda_obj(f"CameraModelType.{camera_model.upper()}")
+
         means2d, covars2d = _make_lazy_cuda_func("proj_fwd")(
             means,
             covars,
             Ks,
             width,
             height,
-            camera_model == "ortho",
-            camera_model == "fisheye",
+            camera_model,
         )
         ctx.save_for_backward(means, covars, Ks)
         ctx.width = width
@@ -696,8 +707,7 @@ def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor):
             Ks,
             width,
             height,
-            camera_model == "ortho",
-            camera_model == "fisheye",
+            camera_model,
             v_means2d.contiguous(),
             v_covars2d.contiguous(),
         )
diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h
index a565c6760..d71e63bfe 100644
--- a/gsplat/cuda/csrc/bindings.h
+++ b/gsplat/cuda/csrc/bindings.h
@@ -31,6 +31,13 @@
 
 namespace gsplat {
 
+enum CameraModelType
+{
+    PINHOLE = 0,
+    ORTHO = 1,
+    FISHEYE = 2,
+};
+
 std::tuple<torch::Tensor, torch::Tensor> quat_scale_to_covar_preci_fwd_tensor(
     const torch::Tensor &quats,  // [N, 4]
     const torch::Tensor &scales, // [N, 3]
@@ -53,8 +60,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_fwd_tensor(
     const torch::Tensor &Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho,
-    const bool fisheye
+    const CameraModelType camera_model
 );
 
 std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
@@ -63,8 +69,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
     const torch::Tensor &Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     const torch::Tensor &v_means2d, // [C, N, 2]
     const torch::Tensor &v_covars2d // [C, N, 2, 2]
 );
diff --git a/gsplat/cuda/csrc/ext.cpp b/gsplat/cuda/csrc/ext.cpp
index 0a4a67aac..a85129959 100644
--- a/gsplat/cuda/csrc/ext.cpp
+++ b/gsplat/cuda/csrc/ext.cpp
@@ -1,6 +1,12 @@
 #include "bindings.h"
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+    py::enum_<gsplat::CameraModelType>(m, "CameraModelType")
+        .value("PINHOLE", gsplat::CameraModelType::PINHOLE)
+        .value("ORTHO", gsplat::CameraModelType::ORTHO)
+        .value("FISHEYE", gsplat::CameraModelType::FISHEYE)
+        .export_values();
+
     m.def("compute_sh_fwd", &gsplat::compute_sh_fwd_tensor);
     m.def("compute_sh_bwd", &gsplat::compute_sh_bwd_tensor);
 
diff --git a/gsplat/cuda/csrc/proj_bwd.cu b/gsplat/cuda/csrc/proj_bwd.cu
index cb05c791a..66557f679 100644
--- a/gsplat/cuda/csrc/proj_bwd.cu
+++ b/gsplat/cuda/csrc/proj_bwd.cu
@@ -25,8 +25,7 @@ __global__ void proj_bwd_kernel(
     const T *__restrict__ Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     const T *__restrict__ v_means2d,  // [C, N, 2]
     const T *__restrict__ v_covars2d, // [C, N, 2, 2]
     T *__restrict__ v_means,          // [C, N, 3]
@@ -61,51 +60,55 @@ __global__ void proj_bwd_kernel(
     const vec2<OpT> v_mean2d = glm::make_vec2(v_means2d);
     const mat2<OpT> v_covar2d = glm::make_mat2(v_covars2d);
 
-    if (ortho){
-        ortho_proj_vjp<OpT>(
-            mean,
-            covar,
-            fx,
-            fy,
-            cx,
-            cy,
-            width,
-            height,
-            glm::transpose(v_covar2d),
-            v_mean2d,
-            v_mean,
-            v_covar
-        );
-    } else if (fisheye) {
-        fisheye_proj_vjp<OpT>(
-            mean,
-            covar,
-            fx,
-            fy,
-            cx,
-            cy,
-            width,
-            height,
-            glm::transpose(v_covar2d),
-            v_mean2d,
-            v_mean,
-            v_covar
-        );
-    } else {
-        persp_proj_vjp<OpT>(
-            mean,
-            covar,
-            fx,
-            fy,
-            cx,
-            cy,
-            width,
-            height,
-            glm::transpose(v_covar2d),
-            v_mean2d,
-            v_mean,
-            v_covar
-        );
+    switch (camera_model) {
+        case CameraModelType::PINHOLE: // perspective projection
+            persp_proj_vjp<OpT>(
+                mean,
+                covar,
+                fx,
+                fy,
+                cx,
+                cy,
+                width,
+                height,
+                glm::transpose(v_covar2d),
+                v_mean2d,
+                v_mean,
+                v_covar
+            );
+            break;
+        case CameraModelType::ORTHO: // orthographic projection
+            ortho_proj_vjp<OpT>(
+                mean,
+                covar,
+                fx,
+                fy,
+                cx,
+                cy,
+                width,
+                height,
+                glm::transpose(v_covar2d),
+                v_mean2d,
+                v_mean,
+                v_covar
+            );
+            break;
+        case CameraModelType::FISHEYE: // fisheye projection
+            fisheye_proj_vjp<OpT>(
+                mean,
+                covar,
+                fx,
+                fy,
+                cx,
+                cy,
+                width,
+                height,
+                glm::transpose(v_covar2d),
+                v_mean2d,
+                v_mean,
+                v_covar
+            );
+            break;
     }
 
     // write to outputs: glm is column-major but we want row-major
@@ -129,8 +132,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
     const torch::Tensor &Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     const torch::Tensor &v_means2d, // [C, N, 2]
     const torch::Tensor &v_covars2d // [C, N, 2, 2]
 ) {
@@ -167,8 +169,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_bwd_tensor(
                         Ks.data_ptr<scalar_t>(),
                         width,
                         height,
-                        ortho,
-                        fisheye,
+                        camera_model,
                         v_means2d.data_ptr<scalar_t>(),
                         v_covars2d.data_ptr<scalar_t>(),
                         v_means.data_ptr<scalar_t>(),
diff --git a/gsplat/cuda/csrc/proj_fwd.cu b/gsplat/cuda/csrc/proj_fwd.cu
index 8478b9e04..861f60479 100644
--- a/gsplat/cuda/csrc/proj_fwd.cu
+++ b/gsplat/cuda/csrc/proj_fwd.cu
@@ -25,8 +25,7 @@ __global__ void proj_fwd_kernel(
     const T *__restrict__ Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     T *__restrict__ means2d, // [C, N, 2]
     T *__restrict__ covars2d // [C, N, 2, 2]
 ) {
@@ -54,12 +53,17 @@ __global__ void proj_fwd_kernel(
     const vec3<OpT> mean = glm::make_vec3(means);
     const mat3<OpT> covar = glm::make_mat3(covars);
 
-    if (ortho)
-        ortho_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
-    else if (fisheye)
-        fisheye_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
-    else
-        persp_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
+    switch (camera_model) {
+        case CameraModelType::PINHOLE: // perspective projection
+            persp_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
+            break;
+        case CameraModelType::ORTHO: // orthographic projection
+            ortho_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
+            break;
+        case CameraModelType::FISHEYE: // fisheye projection
+            fisheye_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d);
+            break;
+    }
 
     // write to outputs: glm is column-major but we want row-major
     GSPLAT_PRAGMA_UNROLL
@@ -81,8 +85,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_fwd_tensor(
     const torch::Tensor &Ks,     // [C, 3, 3]
     const uint32_t width,
     const uint32_t height,
-    const bool ortho,
-    const bool fisheye
+    const CameraModelType camera_model
 ) {
     GSPLAT_DEVICE_GUARD(means);
     GSPLAT_CHECK_INPUT(means);
@@ -115,8 +118,7 @@ std::tuple<torch::Tensor, torch::Tensor> proj_fwd_tensor(
                         Ks.data_ptr<scalar_t>(),
                         width,
                         height,
-                        ortho,
-                        fisheye,
+                        camera_model,
                         means2d.data_ptr<scalar_t>(),
                         covars2d.data_ptr<scalar_t>()
                     );

From e7370735d71dcb68a34c54f5244e137618095f35 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 18 Sep 2024 10:35:30 -0700
Subject: [PATCH 34/38] use c++ enum

---
 gsplat/cuda/_wrapper.py                       |  40 ++++---
 gsplat/cuda/csrc/bindings.h                   |  12 +-
 .../cuda/csrc/fully_fused_projection_bwd.cu   | 103 ++++++++---------
 .../cuda/csrc/fully_fused_projection_fwd.cu   |  91 +++++++--------
 .../csrc/fully_fused_projection_packed_bwd.cu | 106 +++++++++---------
 .../csrc/fully_fused_projection_packed_fwd.cu |  98 ++++++++--------
 6 files changed, 224 insertions(+), 226 deletions(-)

diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
index 1e6dab002..1c3826110 100644
--- a/gsplat/cuda/_wrapper.py
+++ b/gsplat/cuda/_wrapper.py
@@ -679,7 +679,9 @@ def forward(
         height: int,
         camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor]:
-        camera_model = _make_lazy_cuda_obj(f"CameraModelType.{camera_model.upper()}")
+        camera_model_type = _make_lazy_cuda_obj(
+            f"CameraModelType.{camera_model.upper()}"
+        )
 
         means2d, covars2d = _make_lazy_cuda_func("proj_fwd")(
             means,
@@ -687,12 +689,12 @@ def forward(
             Ks,
             width,
             height,
-            camera_model,
+            camera_model_type,
         )
         ctx.save_for_backward(means, covars, Ks)
         ctx.width = width
         ctx.height = height
-        ctx.camera_model = camera_model
+        ctx.camera_model_type = camera_model_type
         return means2d, covars2d
 
     @staticmethod
@@ -700,14 +702,14 @@ def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor):
         means, covars, Ks = ctx.saved_tensors
         width = ctx.width
         height = ctx.height
-        camera_model = ctx.camera_model
+        camera_model_type = ctx.camera_model_type
         v_means, v_covars = _make_lazy_cuda_func("proj_bwd")(
             means,
             covars,
             Ks,
             width,
             height,
-            camera_model,
+            camera_model_type,
             v_means2d.contiguous(),
             v_covars2d.contiguous(),
         )
@@ -773,6 +775,10 @@ def forward(
         calc_compensations: bool,
         camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+        camera_model_type = _make_lazy_cuda_obj(
+            f"CameraModelType.{camera_model.upper()}"
+        )
+
         # "covars" and {"quats", "scales"} are mutually exclusive
         radii, means2d, depths, conics, compensations = _make_lazy_cuda_func(
             "fully_fused_projection_fwd"
@@ -790,8 +796,7 @@ def forward(
             far_plane,
             radius_clip,
             calc_compensations,
-            camera_model == "ortho",
-            camera_model == "fisheye",
+            camera_model_type,
         )
         if not calc_compensations:
             compensations = None
@@ -801,7 +806,7 @@ def forward(
         ctx.width = width
         ctx.height = height
         ctx.eps2d = eps2d
-        ctx.camera_model = camera_model
+        ctx.camera_model_type = camera_model_type
 
         return radii, means2d, depths, conics, compensations
 
@@ -821,7 +826,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
         width = ctx.width
         height = ctx.height
         eps2d = ctx.eps2d
-        camera_model = ctx.camera_model
+        camera_model_type = ctx.camera_model_type
         if v_compensations is not None:
             v_compensations = v_compensations.contiguous()
         v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func(
@@ -836,8 +841,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
             width,
             height,
             eps2d,
-            camera_model == "ortho",
-            camera_model == "fisheye",
+            camera_model_type,
             radii,
             conics,
             compensations,
@@ -1028,6 +1032,10 @@ def forward(
         calc_compensations: bool,
         camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole",
     ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+        camera_model_type = _make_lazy_cuda_obj(
+            f"CameraModelType.{camera_model.upper()}"
+        )
+
         (
             indptr,
             camera_ids,
@@ -1051,8 +1059,7 @@ def forward(
             far_plane,
             radius_clip,
             calc_compensations,
-            camera_model == "ortho",
-            camera_model == "fisheye",
+            camera_model_type,
         )
         if not calc_compensations:
             compensations = None
@@ -1072,7 +1079,7 @@ def forward(
         ctx.height = height
         ctx.eps2d = eps2d
         ctx.sparse_grad = sparse_grad
-        ctx.camera_model = camera_model
+        ctx.camera_model_type = camera_model_type
 
         return camera_ids, gaussian_ids, radii, means2d, depths, conics, compensations
 
@@ -1103,7 +1110,7 @@ def backward(
         height = ctx.height
         eps2d = ctx.eps2d
         sparse_grad = ctx.sparse_grad
-        camera_model = ctx.camera_model
+        camera_model_type = ctx.camera_model_type
 
         if v_compensations is not None:
             v_compensations = v_compensations.contiguous()
@@ -1119,8 +1126,7 @@ def backward(
             width,
             height,
             eps2d,
-            camera_model == "ortho",
-            camera_model == "fisheye",
+            camera_model_type,
             camera_ids,
             gaussian_ids,
             conics,
diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h
index d71e63bfe..cf0dc8751 100644
--- a/gsplat/cuda/csrc/bindings.h
+++ b/gsplat/cuda/csrc/bindings.h
@@ -111,8 +111,7 @@ fully_fused_projection_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho,
-    const bool fisheye
+    const CameraModelType camera_model
 );
 
 std::tuple<
@@ -132,8 +131,7 @@ fully_fused_projection_bwd_tensor(
     const uint32_t image_width,
     const uint32_t image_height,
     const float eps2d,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // fwd outputs
     const torch::Tensor &radii,                       // [C, N]
     const torch::Tensor &conics,                      // [C, N, 3]
@@ -275,8 +273,7 @@ fully_fused_projection_packed_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho,
-    const bool fisheye
+    const CameraModelType camera_model
 );
 
 std::tuple<
@@ -296,8 +293,7 @@ fully_fused_projection_packed_bwd_tensor(
     const uint32_t image_width,
     const uint32_t image_height,
     const float eps2d,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // fwd outputs
     const torch::Tensor &camera_ids,                  // [nnz]
     const torch::Tensor &gaussian_ids,                // [nnz]
diff --git a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu
index a069eb2db..b5757ff40 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu
@@ -30,8 +30,7 @@ __global__ void fully_fused_projection_bwd_kernel(
     const int32_t image_width,
     const int32_t image_height,
     const T eps2d,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // fwd outputs
     const int32_t *__restrict__ radii,   // [C, N]
     const T *__restrict__ conics,        // [C, N, 3]
@@ -129,51 +128,55 @@ __global__ void fully_fused_projection_bwd_kernel(
     mat3<T> v_covar_c(0.f);
     vec3<T> v_mean_c(0.f);
 
-    if (ortho){
-        ortho_proj_vjp<T>(
-            mean_c,
-            covar_c,
-            fx,
-            fy,
-            cx,
-            cy,
-            image_width,
-            image_height,
-            v_covar2d,
-            glm::make_vec2(v_means2d),
-            v_mean_c,
-            v_covar_c
-        );
-    } else if (fisheye) {
-        fisheye_proj_vjp<T>(
-            mean_c,
-            covar_c,
-            fx,
-            fy,
-            cx,
-            cy,
-            image_width,
-            image_height,
-            v_covar2d,
-            glm::make_vec2(v_means2d),
-            v_mean_c,
-            v_covar_c
-        );
-    } else {
-        persp_proj_vjp<T>(
-            mean_c,
-            covar_c,
-            fx,
-            fy,
-            cx,
-            cy,
-            image_width,
-            image_height,
-            v_covar2d,
-            glm::make_vec2(v_means2d),
-            v_mean_c,
-            v_covar_c
-        );
+    switch (camera_model) {
+        case CameraModelType::PINHOLE: // perspective projection
+            persp_proj_vjp<T>(
+                mean_c,
+                covar_c,
+                fx,
+                fy,
+                cx,
+                cy,
+                image_width,
+                image_height,
+                v_covar2d,
+                glm::make_vec2(v_means2d),
+                v_mean_c,
+                v_covar_c
+            );
+            break;
+        case CameraModelType::ORTHO: // orthographic projection
+            ortho_proj_vjp<T>(
+                mean_c,
+                covar_c,
+                fx,
+                fy,
+                cx,
+                cy,
+                image_width,
+                image_height,
+                v_covar2d,
+                glm::make_vec2(v_means2d),
+                v_mean_c,
+                v_covar_c
+            );
+            break;
+        case CameraModelType::FISHEYE: // fisheye projection
+            fisheye_proj_vjp<T>(
+                mean_c,
+                covar_c,
+                fx,
+                fy,
+                cx,
+                cy,
+                image_width,
+                image_height,
+                v_covar2d,
+                glm::make_vec2(v_means2d),
+                v_mean_c,
+                v_covar_c
+            );
+            break;
     }
 
     // add contribution from v_depths
@@ -272,8 +275,7 @@ fully_fused_projection_bwd_tensor(
     const uint32_t image_width,
     const uint32_t image_height,
     const float eps2d,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // fwd outputs
     const torch::Tensor &radii,                       // [C, N]
     const torch::Tensor &conics,                      // [C, N, 3]
@@ -342,8 +344,7 @@ fully_fused_projection_bwd_tensor(
                 image_width,
                 image_height,
                 eps2d,
-                ortho,
-                fisheye,
+                camera_model,
                 radii.data_ptr<int32_t>(),
                 conics.data_ptr<float>(),
                 compensations.has_value()
diff --git a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu
index 0ffeebb19..c651e803d 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu
@@ -32,8 +32,7 @@ __global__ void fully_fused_projection_fwd_kernel(
     const T near_plane,
     const T far_plane,
     const T radius_clip,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // outputs
     int32_t *__restrict__ radii,  // [C, N]
     T *__restrict__ means2d,      // [C, N, 2]
@@ -106,45 +105,49 @@ __global__ void fully_fused_projection_fwd_kernel(
     mat2<T> covar2d;
     vec2<T> mean2d;
 
-    if (ortho){
-        ortho_proj<T>(
-            mean_c,
-            covar_c,
-            Ks[0],
-            Ks[4],
-            Ks[2],
-            Ks[5],
-            image_width,
-            image_height,
-            covar2d,
-            mean2d
-        );
-    } else if (fisheye) {
-        fisheye_proj<T>(
-            mean_c,
-            covar_c,
-            Ks[0],
-            Ks[4],
-            Ks[2],
-            Ks[5],
-            image_width,
-            image_height,
-            covar2d,
-            mean2d
-        );
-    } else {
-        persp_proj<T>(
-            mean_c,
-            covar_c,
-            Ks[0],
-            Ks[4],
-            Ks[2],
-            Ks[5],
-            image_width,
-            image_height,
-            covar2d,
-            mean2d
-        );
+    switch (camera_model) {
+        case CameraModelType::PINHOLE: // perspective projection
+            persp_proj<T>(
+                mean_c,
+                covar_c,
+                Ks[0],
+                Ks[4],
+                Ks[2],
+                Ks[5],
+                image_width,
+                image_height,
+                covar2d,
+                mean2d
+            );
+            break;
+        case CameraModelType::ORTHO: // orthographic projection
+            ortho_proj<T>(
+                mean_c,
+                covar_c,
+                Ks[0],
+                Ks[4],
+                Ks[2],
+                Ks[5],
+                image_width,
+                image_height,
+                covar2d,
+                mean2d
+            );
+            break;
+        case CameraModelType::FISHEYE: // fisheye projection
+            fisheye_proj<T>(
+                mean_c,
+                covar_c,
+                Ks[0],
+                Ks[4],
+                Ks[2],
+                Ks[5],
+                image_width,
+                image_height,
+                covar2d,
+                mean2d
+            );
+            break;
     }
 
     T compensation;
@@ -210,8 +213,7 @@ fully_fused_projection_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho,
-    const bool fisheye
+    const CameraModelType camera_model
 ) {
     GSPLAT_DEVICE_GUARD(means);
     GSPLAT_CHECK_INPUT(means);
@@ -259,8 +261,7 @@ fully_fused_projection_fwd_tensor(
                 near_plane,
                 far_plane,
                 radius_clip,
-                ortho,
-                fisheye,
+                camera_model,
                 radii.data_ptr<int32_t>(),
                 means2d.data_ptr<float>(),
                 depths.data_ptr<float>(),
diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu
index bd351403a..e5a0172fe 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu
@@ -31,8 +31,7 @@ __global__ void fully_fused_projection_packed_bwd_kernel(
     const int32_t image_width,
     const int32_t image_height,
     const T eps2d,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // fwd outputs
     const int64_t *__restrict__ camera_ids,   // [nnz]
     const int64_t *__restrict__ gaussian_ids, // [nnz]
@@ -130,54 +129,55 @@ __global__ void fully_fused_projection_packed_bwd_kernel(
     T fx = Ks[0], cx = Ks[2], fy = Ks[4], cy = Ks[5];
     mat3<T> v_covar_c(0.f);
     vec3<T> v_mean_c(0.f);
-    if (ortho){
-        // vjp: orthographic projection
-        ortho_proj_vjp<T>(
-            mean_c,
-            covar_c,
-            fx,
-            fy,
-            cx,
-            cy,
-            image_width,
-            image_height,
-            v_covar2d,
-            glm::make_vec2(v_means2d),
-            v_mean_c,
-            v_covar_c
-        );
-    } else if (fisheye) {
-        // vjp: fisheye projection
-        fisheye_proj_vjp<T>(
-            mean_c,
-            covar_c,
-            fx,
-            fy,
-            cx,
-            cy,
-            image_width,
-            image_height,
-            v_covar2d,
-            glm::make_vec2(v_means2d),
-            v_mean_c,
-            v_covar_c
-        );
-    } else {
-        // vjp: perspective projection
-        persp_proj_vjp<T>(
-            mean_c,
-            covar_c,
-            fx,
-            fy,
-            cx,
-            cy,
-            image_width,
-            image_height,
-            v_covar2d,
-            glm::make_vec2(v_means2d),
-            v_mean_c,
-            v_covar_c
-        );
+    switch (camera_model) {
+        case CameraModelType::PINHOLE: // perspective projection
+            persp_proj_vjp<T>(
+                mean_c,
+                covar_c,
+                fx,
+                fy,
+                cx,
+                cy,
+                image_width,
+                image_height,
+                v_covar2d,
+                glm::make_vec2(v_means2d),
+                v_mean_c,
+                v_covar_c
+            );
+            break;
+        case CameraModelType::ORTHO: // orthographic projection
+            ortho_proj_vjp<T>(
+                mean_c,
+                covar_c,
+                fx,
+                fy,
+                cx,
+                cy,
+                image_width,
+                image_height,
+                v_covar2d,
+                glm::make_vec2(v_means2d),
+                v_mean_c,
+                v_covar_c
+            );
+            break;
+        case CameraModelType::FISHEYE: // fisheye projection
+            fisheye_proj_vjp<T>(
+                mean_c,
+                covar_c,
+                fx,
+                fy,
+                cx,
+                cy,
+                image_width,
+                image_height,
+                v_covar2d,
+                glm::make_vec2(v_means2d),
+                v_mean_c,
+                v_covar_c
+            );
+            break;
     }
 
     // add contribution from v_depths
@@ -314,8 +314,7 @@ fully_fused_projection_packed_bwd_tensor(
     const uint32_t image_width,
     const uint32_t image_height,
     const float eps2d,
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // fwd outputs
     const torch::Tensor &camera_ids,                  // [nnz]
     const torch::Tensor &gaussian_ids,                // [nnz]
@@ -401,8 +400,7 @@ fully_fused_projection_packed_bwd_tensor(
                 image_width,
                 image_height,
                 eps2d,
-                ortho,
-                fisheye,
+                camera_model,
                 camera_ids.data_ptr<int64_t>(),
                 gaussian_ids.data_ptr<int64_t>(),
                 conics.data_ptr<float>(),
diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu
index 983ef2940..4d8609f05 100644
--- a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu
+++ b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu
@@ -34,8 +34,7 @@ __global__ void fully_fused_projection_packed_fwd_kernel(
     const T radius_clip,
     const int32_t
         *__restrict__ block_accum,    // [C * blocks_per_row] packing helper
-    const bool ortho,
-    const bool fisheye,
+    const CameraModelType camera_model,
     // outputs
     int32_t *__restrict__ block_cnts, // [C * blocks_per_row] packing helper
     int32_t *__restrict__ indptr,       // [C + 1]
@@ -121,51 +120,51 @@ __global__ void fully_fused_projection_packed_fwd_kernel(
         covar_world_to_cam(R, covar, covar_c);
         
         Ks += row_idx * 9;
-        if (ortho){
-            // orthographic projection
-            ortho_proj<T>(
-                mean_c,
-                covar_c,
-                Ks[0],
-                Ks[4],
-                Ks[2],
-                Ks[5],
-                image_width,
-                image_height,
-                covar2d,
-                mean2d
-            );
-        } else if (fisheye) {
-            // fisheye projection
-            fisheye_proj<T>(
-                mean_c,
-                covar_c,
-                Ks[0],
-                Ks[4],
-                Ks[2],
-                Ks[5],
-                image_width,
-                image_height,
-                covar2d,
-                mean2d
-            );
-        } else {
-            // perspective projection
-            persp_proj<T>(
-                mean_c,
-                covar_c,
-                Ks[0],
-                Ks[4],
-                Ks[2],
-                Ks[5],
-                image_width,
-                image_height,
-                covar2d,
-                mean2d
-            );
+        switch (camera_model) {
+            case CameraModelType::PINHOLE: // perspective projection
+                persp_proj<T>(
+                    mean_c,
+                    covar_c,
+                    Ks[0],
+                    Ks[4],
+                    Ks[2],
+                    Ks[5],
+                    image_width,
+                    image_height,
+                    covar2d,
+                    mean2d
+                );
+                break;
+            case CameraModelType::ORTHO: // orthographic projection
+                ortho_proj<T>(
+                    mean_c,
+                    covar_c,
+                    Ks[0],
+                    Ks[4],
+                    Ks[2],
+                    Ks[5],
+                    image_width,
+                    image_height,
+                    covar2d,
+                    mean2d
+                );
+                break;
+            case CameraModelType::FISHEYE: // fisheye projection
+                fisheye_proj<T>(
+                    mean_c,
+                    covar_c,
+                    Ks[0],
+                    Ks[4],
+                    Ks[2],
+                    Ks[5],
+                    image_width,
+                    image_height,
+                    covar2d,
+                    mean2d
+                );
+                break;
         }
 
-
         det = add_blur(eps2d, covar2d, compensation);
         if (det <= 0.f) {
             valid = false;
@@ -270,8 +269,7 @@ fully_fused_projection_packed_fwd_tensor(
     const float far_plane,
     const float radius_clip,
     const bool calc_compensations,
-    const bool ortho,
-    const bool fisheye
+    const CameraModelType camera_model
 ) {
     GSPLAT_DEVICE_GUARD(means);
     GSPLAT_CHECK_INPUT(means);
@@ -320,8 +318,7 @@ fully_fused_projection_packed_fwd_tensor(
                 far_plane,
                 radius_clip,
                 nullptr,
-                ortho,
-                fisheye,
+                camera_model,
                 block_cnts.data_ptr<int32_t>(),
                 nullptr,
                 nullptr,
@@ -371,8 +368,7 @@ fully_fused_projection_packed_fwd_tensor(
                 far_plane,
                 radius_clip,
                 block_accum.data_ptr<int32_t>(),
-                ortho,
-                fisheye,
+                camera_model,
                 nullptr,
                 indptr.data_ptr<int32_t>(),
                 camera_ids.data_ptr<int64_t>(),

From fa23297fb0094cd16c6966e870062295a2c4275e Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 18 Sep 2024 10:44:56 -0700
Subject: [PATCH 35/38] download dataset

---
 examples/benchmarks/fisheye/mcmc_zipnerf.sh           | 3 +--
 examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh | 9 ++++-----
 examples/datasets/download_dataset.py                 | 6 ++++++
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
index 75507e454..e317df7e4 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
@@ -1,5 +1,5 @@
 SCENE_DIR="data/zipnerf"
-SCENE_LIST="berlin london nyc alameda"
+SCENE_LIST="alameda berlin london nyc"
 DATA_FACTOR=4
 RENDER_TRAJ_PATH="ellipse"
 
@@ -21,5 +21,4 @@ do
         --camera_model fisheye \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
-
 done
diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
index 1a218e1e2..945961634 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
@@ -1,12 +1,12 @@
-SCENE_DIR="data/zipnerf_undistort"
-SCENE_LIST="berlin london nyc alameda"
+SCENE_DIR="data/zipnerf_undistorted"
+SCENE_LIST="alameda berlin london nyc"
 DATA_FACTOR=4
 RENDER_TRAJ_PATH="ellipse"
 
-RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistort"
+RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistorted"
 CAP_MAX=2000000
 
-# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf_undistort"
+# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf_undistorted"
 # CAP_MAX=4000000
 
 for SCENE in $SCENE_LIST;
@@ -21,5 +21,4 @@ do
         --camera_model pinhole \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
-
 done
diff --git a/examples/datasets/download_dataset.py b/examples/datasets/download_dataset.py
index 272970a30..822eaf9a7 100755
--- a/examples/datasets/download_dataset.py
+++ b/examples/datasets/download_dataset.py
@@ -13,6 +13,8 @@
     "mipnerf360",
     "mipnerf360_extra",
     "bilarf_data",
+    "alameda",
+    "alameda_undistorted",
 ]
 
 # dataset urls
@@ -20,6 +22,8 @@
     "mipnerf360": "http://storage.googleapis.com/gresearch/refraw360/360_v2.zip",
     "mipnerf360_extra": "https://storage.googleapis.com/gresearch/refraw360/360_extra_scenes.zip",
     "bilarf_data": "https://huggingface.co/datasets/Yuehao/bilarf_data/resolve/main/bilarf_data.zip",
+    "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip",
+    "alameda_undistorted": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip",
 }
 
 # rename maps
@@ -27,6 +31,8 @@
     "mipnerf360": "360_v2",
     "mipnerf360_extra": "360_v2",
     "bilarf_data": "bilarf",
+    "alameda": "zipnerf/alameda",
+    "alameda_undistorted": "zipnerf_undistorted/alameda",
 }
 
 

From 2065224a2c4fc3fb7341a23e23f5c83e95219266 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 18 Sep 2024 11:29:34 -0700
Subject: [PATCH 36/38] refactor dataset download to download zipnerf

---
 examples/benchmarks/fisheye/mcmc_zipnerf.sh   |   2 +-
 ...distort.sh => mcmc_zipnerf_undistorted.sh} |   2 +-
 examples/datasets/colmap.py                   |   1 +
 examples/datasets/download_dataset.py         | 125 +++++++++++-------
 4 files changed, 78 insertions(+), 52 deletions(-)
 rename examples/benchmarks/fisheye/{mcmc_zipnerf_undistort.sh => mcmc_zipnerf_undistorted.sh} (94%)

diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
index e317df7e4..075b1ffca 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
@@ -1,5 +1,5 @@
 SCENE_DIR="data/zipnerf"
-SCENE_LIST="alameda berlin london nyc"
+SCENE_LIST="berlin london nyc alameda"
 DATA_FACTOR=4
 RENDER_TRAJ_PATH="ellipse"
 
diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh
similarity index 94%
rename from examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
rename to examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh
index 945961634..3fcf12915 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh
@@ -1,5 +1,5 @@
 SCENE_DIR="data/zipnerf_undistorted"
-SCENE_LIST="alameda berlin london nyc"
+SCENE_LIST="berlin london nyc alameda"
 DATA_FACTOR=4
 RENDER_TRAJ_PATH="ellipse"
 
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
index 127d2d4a4..938bad265 100644
--- a/examples/datasets/colmap.py
+++ b/examples/datasets/colmap.py
@@ -269,6 +269,7 @@ def __init__(
                 mapx = fx * x1 * r + width // 2
                 mapy = fy * y1 * r + height // 2
 
+                # Use mask to define ROI
                 mask = np.logical_and(
                     np.logical_and(mapx > 0, mapy > 0),
                     np.logical_and(mapx < width - 1, mapy < height - 1),
diff --git a/examples/datasets/download_dataset.py b/examples/datasets/download_dataset.py
index 822eaf9a7..a6de57534 100755
--- a/examples/datasets/download_dataset.py
+++ b/examples/datasets/download_dataset.py
@@ -13,8 +13,8 @@
     "mipnerf360",
     "mipnerf360_extra",
     "bilarf_data",
-    "alameda",
-    "alameda_undistorted",
+    "zipnerf",
+    "zipnerf_undistorted",
 ]
 
 # dataset urls
@@ -22,8 +22,18 @@
     "mipnerf360": "http://storage.googleapis.com/gresearch/refraw360/360_v2.zip",
     "mipnerf360_extra": "https://storage.googleapis.com/gresearch/refraw360/360_extra_scenes.zip",
     "bilarf_data": "https://huggingface.co/datasets/Yuehao/bilarf_data/resolve/main/bilarf_data.zip",
-    "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip",
-    "alameda_undistorted": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip",
+    "zipnerf": {
+        "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/berlin.zip",
+        "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/london.zip",
+        "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/nyc.zip",
+        "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip",
+    },
+    "zipnerf_undistorted": {
+        "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/berlin.zip",
+        "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/london.zip",
+        "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/nyc.zip",
+        "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip",
+    },
 }
 
 # rename maps
@@ -31,8 +41,8 @@
     "mipnerf360": "360_v2",
     "mipnerf360_extra": "360_v2",
     "bilarf_data": "bilarf",
-    "alameda": "zipnerf/alameda",
-    "alameda_undistorted": "zipnerf_undistorted/alameda",
+    "zipnerf": "zipnerf",
+    "zipnerf_undistorted": "zipnerf_undistorted",
 }
 
 
@@ -46,57 +56,72 @@ def main(self):
         self.dataset_download(self.dataset)
 
     def dataset_download(self, dataset: dataset_names):
-        (self.save_dir / dataset_rename_map[dataset]).mkdir(parents=True, exist_ok=True)
+        if isinstance(urls[dataset], dict):
+            for name, url in urls[dataset].items():
+                url_file_name = Path(url).name
+                extract_path = self.save_dir / dataset_rename_map[dataset] / name
+                download_path = extract_path / url_file_name
+                download_and_extract(url, download_path, extract_path)
+        else:
+            url = urls[dataset]
+            url_file_name = Path(url).name
+            extract_path = self.save_dir / dataset_rename_map[dataset]
+            download_path = extract_path / url_file_name
+            download_and_extract(url, download_path, extract_path)
 
-        file_name = Path(urls[dataset]).name
 
-        # download
-        download_command = [
-            "curl",
-            "-o",
-            str(self.save_dir / dataset_rename_map[dataset] / file_name),
-            urls[dataset],
-        ]
-        try:
-            subprocess.run(download_command, check=True)
-            print("File file downloaded succesfully.")
-        except subprocess.CalledProcessError as e:
-            print(f"Error downloading file: {e}")
-
-        # if .zip
-        if Path(urls[dataset]).suffix == ".zip":
-            if os.name == "nt":  # Windows doesn't have 'unzip' but 'tar' works
-                extract_command = [
-                    "tar",
-                    "-xvf",
-                    self.save_dir / dataset_rename_map[dataset] / file_name,
-                    "-C",
-                    self.save_dir / dataset_rename_map[dataset],
-                ]
-            else:
-                extract_command = [
-                    "unzip",
-                    self.save_dir / dataset_rename_map[dataset] / file_name,
-                    "-d",
-                    self.save_dir / dataset_rename_map[dataset],
-                ]
-        # if .tar
-        else:
+def download_and_extract(url: str, download_path: Path, extract_path: Path) -> None:
+    download_path.parent.mkdir(parents=True, exist_ok=True)
+    extract_path.mkdir(parents=True, exist_ok=True)
+
+    # download
+    download_command = [
+        "curl",
+        "-L",
+        "-o",
+        str(download_path),
+        url,
+    ]
+    try:
+        subprocess.run(download_command, check=True)
+        print("File file downloaded succesfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading file: {e}")
+
+    # if .zip
+    if Path(url).suffix == ".zip":
+        if os.name == "nt":  # Windows doesn't have 'unzip' but 'tar' works
             extract_command = [
                 "tar",
-                "-xvzf",
-                self.save_dir / dataset_rename_map[dataset] / file_name,
+                "-xvf",
+                download_path,
                 "-C",
-                self.save_dir / dataset_rename_map[dataset],
+                extract_path,
             ]
+        else:
+            extract_command = [
+                "unzip",
+                download_path,
+                "-d",
+                extract_path,
+            ]
+    # if .tar
+    else:
+        extract_command = [
+            "tar",
+            "-xvzf",
+            download_path,
+            "-C",
+            extract_path,
+        ]
 
-        # extract
-        try:
-            subprocess.run(extract_command, check=True)
-            os.remove(self.save_dir / dataset_rename_map[dataset] / file_name)
-            print("Extraction complete.")
-        except subprocess.CalledProcessError as e:
-            print(f"Extraction failed: {e}")
+    # extract
+    try:
+        subprocess.run(extract_command, check=True)
+        os.remove(download_path)
+        print("Extraction complete.")
+    except subprocess.CalledProcessError as e:
+        print(f"Extraction failed: {e}")
 
 
 if __name__ == "__main__":

From 6fe028d41501ad19a338a8edc43e11356a9ed645 Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 18 Sep 2024 12:01:07 -0700
Subject: [PATCH 37/38] use lists

---
 examples/datasets/download_dataset.py | 30 +++++++++++++--------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/examples/datasets/download_dataset.py b/examples/datasets/download_dataset.py
index a6de57534..520e97d1e 100755
--- a/examples/datasets/download_dataset.py
+++ b/examples/datasets/download_dataset.py
@@ -22,18 +22,18 @@
     "mipnerf360": "http://storage.googleapis.com/gresearch/refraw360/360_v2.zip",
     "mipnerf360_extra": "https://storage.googleapis.com/gresearch/refraw360/360_extra_scenes.zip",
     "bilarf_data": "https://huggingface.co/datasets/Yuehao/bilarf_data/resolve/main/bilarf_data.zip",
-    "zipnerf": {
-        "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/berlin.zip",
-        "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/london.zip",
-        "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/nyc.zip",
-        "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip",
-    },
-    "zipnerf_undistorted": {
-        "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/berlin.zip",
-        "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/london.zip",
-        "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/nyc.zip",
-        "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip",
-    },
+    "zipnerf": [
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf/berlin.zip",
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf/london.zip",
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf/nyc.zip",
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip",
+    ],
+    "zipnerf_undistorted": [
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/berlin.zip",
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/london.zip",
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/nyc.zip",
+        "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip",
+    ],
 }
 
 # rename maps
@@ -56,10 +56,10 @@ def main(self):
         self.dataset_download(self.dataset)
 
     def dataset_download(self, dataset: dataset_names):
-        if isinstance(urls[dataset], dict):
-            for name, url in urls[dataset].items():
+        if isinstance(urls[dataset], list):
+            for url in urls[dataset]:
                 url_file_name = Path(url).name
-                extract_path = self.save_dir / dataset_rename_map[dataset] / name
+                extract_path = self.save_dir / dataset_rename_map[dataset]
                 download_path = extract_path / url_file_name
                 download_and_extract(url, download_path, extract_path)
         else:

From e6c19d60fe5fa13057c1033b6c82d34f1768153f Mon Sep 17 00:00:00 2001
From: Jeffrey Hu <hujh14@gmail.com>
Date: Wed, 18 Sep 2024 17:46:37 -0700
Subject: [PATCH 38/38] use bilateral grid as default for zipnerf

---
 examples/benchmarks/fisheye/mcmc_zipnerf.sh             | 1 +
 examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh | 1 +
 2 files changed, 2 insertions(+)

diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
index 075b1ffca..bd0e57f44 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh
@@ -17,6 +17,7 @@ do
     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
         --opacity_reg 0.001 \
+        --use_bilateral_grid \
         --render_traj_path $RENDER_TRAJ_PATH \
         --camera_model fisheye \
         --data_dir $SCENE_DIR/$SCENE/ \
diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh
index 3fcf12915..012c685cf 100644
--- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh
+++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh
@@ -17,6 +17,7 @@ do
     CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
         --strategy.cap-max $CAP_MAX \
         --opacity_reg 0.001 \
+        --use_bilateral_grid \
         --render_traj_path $RENDER_TRAJ_PATH \
         --camera_model pinhole \
         --data_dir $SCENE_DIR/$SCENE/ \