From 27fa3536aae69f6660916c2640f274a22a8def32 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 08:25:04 -0700 Subject: [PATCH 01/38] baseline --- examples/benchmarks/mcmc.sh | 42 ++++++++++++-------- examples/benchmarks/mcmc_alameda.sh | 60 +++++++++++++++++++++++++++++ examples/simple_trainer.py | 5 ++- gsplat/cuda/_wrapper.py | 6 +++ gsplat/rendering.py | 2 + 5 files changed, 97 insertions(+), 18 deletions(-) create mode 100644 examples/benchmarks/mcmc_alameda.sh diff --git a/examples/benchmarks/mcmc.sh b/examples/benchmarks/mcmc.sh index 23e40838d..8913dc406 100644 --- a/examples/benchmarks/mcmc.sh +++ b/examples/benchmarks/mcmc.sh @@ -1,37 +1,45 @@ -SCENE_DIR="data/360_v2" -RESULT_DIR="results/benchmark_mcmc_1M" -SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers -RENDER_TRAJ_PATH="ellipse" +# SCENE_DIR="data/360_v2" +# RESULT_DIR="results/benchmark_mcmc_1M" +# SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers +# RENDER_TRAJ_PATH="ellipse" -CAP_MAX=1000000 +SCENE_DIR="data/zipnerf" +RESULT_DIR="results/benchmark_alameda" +SCENE_LIST="alameda_undistort" +CAMERA_MODEL="pinhole" +RENDER_TRAJ_PATH="interp" + +CAP_MAX=2000000 for SCENE in $SCENE_LIST; do if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then DATA_FACTOR=2 else - DATA_FACTOR=4 + DATA_FACTOR=2 fi echo "Running $SCENE" # train without eval - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \ + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ + --camera_model $CAMERA_MODEL \ --render_traj_path $RENDER_TRAJ_PATH \ - --data_dir data/360_v2/$SCENE/ \ + --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ # run eval and render - for CKPT in $RESULT_DIR/$SCENE/ckpts/*; - do - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ - --strategy.cap-max $CAP_MAX \ - --render_traj_path $RENDER_TRAJ_PATH \ - --data_dir $SCENE_DIR/$SCENE/ \ - --result_dir $RESULT_DIR/$SCENE/ \ - --ckpt $CKPT - done + # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; + # do + # CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + # --strategy.cap-max $CAP_MAX \ + # --camera_model $CAMERA_MODEL \ + # --render_traj_path $RENDER_TRAJ_PATH \ + # --data_dir $SCENE_DIR/$SCENE/ \ + # --result_dir $RESULT_DIR/$SCENE/ \ + # --ckpt $CKPT + # done done diff --git a/examples/benchmarks/mcmc_alameda.sh b/examples/benchmarks/mcmc_alameda.sh new file mode 100644 index 000000000..72e26f3cd --- /dev/null +++ b/examples/benchmarks/mcmc_alameda.sh @@ -0,0 +1,60 @@ +SCENE_DIR="data/zipnerf" +RESULT_DIR="results/benchmark_alameda" +SCENE_LIST="alameda_undistort" +CAMERA_MODEL="pinhole" +RENDER_TRAJ_PATH="interp" + +CAP_MAX=2000000 + +for SCENE in $SCENE_LIST; +do + if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then + DATA_FACTOR=2 + else + DATA_FACTOR=4 + fi + + echo "Running $SCENE" + + # train without eval + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + --strategy.cap-max $CAP_MAX \ + --camera_model $CAMERA_MODEL \ + --render_traj_path $RENDER_TRAJ_PATH \ + --data_dir $SCENE_DIR/$SCENE/ \ + --result_dir $RESULT_DIR/$SCENE/ + + # run eval and render + # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; + # do + # CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + # --strategy.cap-max $CAP_MAX \ + # --camera_model $CAMERA_MODEL \ + # --render_traj_path $RENDER_TRAJ_PATH \ + # --data_dir $SCENE_DIR/$SCENE/ \ + # --result_dir $RESULT_DIR/$SCENE/ \ + # --ckpt $CKPT + # done +done + + +for SCENE in $SCENE_LIST; +do + echo "=== Eval Stats ===" + + for STATS in $RESULT_DIR/$SCENE/stats/val*.json; + do + echo $STATS + cat $STATS; + echo + done + + echo "=== Train Stats ===" + + for STATS in $RESULT_DIR/$SCENE/stats/train*_rank0.json; + do + echo $STATS + cat $STATS; + echo + done +done \ No newline at end of file diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index ccf979dec..d66f9c8ae 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -55,6 +55,8 @@ class Config: global_scale: float = 1.0 # Normalize the world space normalize_world_space: bool = True + # Camera model + camera_model: str = "pinhole" # Port for the viewer server port: int = 8080 @@ -441,6 +443,7 @@ def rasterize_splats( sparse_grad=self.cfg.sparse_grad, rasterize_mode=rasterize_mode, distributed=self.world_size > 1, + camera_model=self.cfg.camera_model, **kwargs, ) return render_colors, render_alphas, info @@ -965,7 +968,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config): Config( init_opa=0.5, init_scale=0.1, - opacity_reg=0.01, + opacity_reg=0.001, scale_reg=0.01, strategy=MCMCStrategy(verbose=True), ), diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py index ded7d5989..5fc51672e 100644 --- a/gsplat/cuda/_wrapper.py +++ b/gsplat/cuda/_wrapper.py @@ -188,6 +188,7 @@ def fully_fused_projection( sparse_grad: bool = False, calc_compensations: bool = False, ortho: bool = False, + fisheye: bool = False, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: """Projects Gaussians to 2D. @@ -290,6 +291,7 @@ def fully_fused_projection( sparse_grad, calc_compensations, ortho, + # fisheye, ) else: return _FullyFusedProjection.apply( @@ -307,6 +309,7 @@ def fully_fused_projection( radius_clip, calc_compensations, ortho, + # fisheye, ) @@ -754,6 +757,7 @@ def forward( radius_clip: float, calc_compensations: bool, ortho: bool, + # fisheye: bool, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: # "covars" and {"quats", "scales"} are mutually exclusive radii, means2d, depths, conics, compensations = _make_lazy_cuda_func( @@ -783,6 +787,7 @@ def forward( ctx.height = height ctx.eps2d = eps2d ctx.ortho = ortho + # ctx.fisheye = fisheye return radii, means2d, depths, conics, compensations @@ -803,6 +808,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): height = ctx.height eps2d = ctx.eps2d ortho = ctx.ortho + # fisheye = ctx.fisheye if v_compensations is not None: v_compensations = v_compensations.contiguous() v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( diff --git a/gsplat/rendering.py b/gsplat/rendering.py index 8f14dbd28..55308d78f 100644 --- a/gsplat/rendering.py +++ b/gsplat/rendering.py @@ -47,6 +47,7 @@ def rasterization( distributed: bool = False, ortho: bool = False, covars: Optional[Tensor] = None, + camera_model: str = "pinhole", ) -> Tuple[Tensor, Tensor, Dict]: """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C). @@ -307,6 +308,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso sparse_grad=sparse_grad, calc_compensations=(rasterize_mode == "antialiased"), ortho=ortho, + fisheye=camera_model=="fisheye", ) if packed: From 96089a16572459888463192bf475e835373a5add Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 15:16:11 -0700 Subject: [PATCH 02/38] fisheye forward works --- .../{mcmc_alameda.sh => mcmc_zipnerf.sh} | 24 +++---- examples/datasets/colmap.py | 70 +++++++++---------- examples/simple_trainer.py | 10 +-- gsplat/cuda/_wrapper.py | 12 ++-- gsplat/cuda/csrc/bindings.h | 4 +- .../cuda/csrc/fully_fused_projection_bwd.cu | 18 +++++ .../cuda/csrc/fully_fused_projection_fwd.cu | 18 ++++- gsplat/cuda/csrc/utils.cuh | 67 ++++++++++++++++++ 8 files changed, 164 insertions(+), 59 deletions(-) rename examples/benchmarks/{mcmc_alameda.sh => mcmc_zipnerf.sh} (68%) diff --git a/examples/benchmarks/mcmc_alameda.sh b/examples/benchmarks/mcmc_zipnerf.sh similarity index 68% rename from examples/benchmarks/mcmc_alameda.sh rename to examples/benchmarks/mcmc_zipnerf.sh index 72e26f3cd..a4c534d57 100644 --- a/examples/benchmarks/mcmc_alameda.sh +++ b/examples/benchmarks/mcmc_zipnerf.sh @@ -1,28 +1,28 @@ -SCENE_DIR="data/zipnerf" -RESULT_DIR="results/benchmark_alameda" -SCENE_LIST="alameda_undistort" -CAMERA_MODEL="pinhole" +# SCENE_DIR="data/zipnerf/undistort" +# RESULT_DIR="results/benchmark_zipnerf/undistort" +# CAMERA_MODEL="pinhole" +SCENE_DIR="data/zipnerf/fisheye" +RESULT_DIR="results/benchmark_zipnerf/fisheye" +CAMERA_MODEL="fisheye" +SCENE_LIST="berlin" # alameda RENDER_TRAJ_PATH="interp" CAP_MAX=2000000 +DATA_FACTOR=4 for SCENE in $SCENE_LIST; do - if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then - DATA_FACTOR=2 - else - DATA_FACTOR=4 - fi - echo "Running $SCENE" # train without eval - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ + --opacity_reg 0.001 \ --camera_model $CAMERA_MODEL \ --render_traj_path $RENDER_TRAJ_PATH \ --data_dir $SCENE_DIR/$SCENE/ \ - --result_dir $RESULT_DIR/$SCENE/ + --result_dir $RESULT_DIR/$SCENE/ \ + --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt" # run eval and render # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 5aeeec5be..a9d5c7cc5 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -97,9 +97,9 @@ def __init__( elif type_ == 5 or type_ == "OPENCV_FISHEYE": params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32) camtype = "fisheye" - assert ( - camtype == "perspective" - ), f"Only support perspective camera model, got {type_}" + # assert ( + # camtype == "perspective" + # ), f"Only support perspective camera model, got {type_}" params_dict[camera_id] = params @@ -206,29 +206,29 @@ def __init__( self.imsize_dict[camera_id] = (int(width * s_width), int(height * s_height)) # undistortion - self.mapx_dict = dict() - self.mapy_dict = dict() - self.roi_undist_dict = dict() - for camera_id in self.params_dict.keys(): - params = self.params_dict[camera_id] - if len(params) == 0: - continue # no distortion - assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}" - assert ( - camera_id in self.params_dict - ), f"Missing params for camera {camera_id}" - K = self.Ks_dict[camera_id] - width, height = self.imsize_dict[camera_id] - K_undist, roi_undist = cv2.getOptimalNewCameraMatrix( - K, params, (width, height), 0 - ) - mapx, mapy = cv2.initUndistortRectifyMap( - K, params, None, K_undist, (width, height), cv2.CV_32FC1 - ) - self.Ks_dict[camera_id] = K_undist - self.mapx_dict[camera_id] = mapx - self.mapy_dict[camera_id] = mapy - self.roi_undist_dict[camera_id] = roi_undist + # self.mapx_dict = dict() + # self.mapy_dict = dict() + # self.roi_undist_dict = dict() + # for camera_id in self.params_dict.keys(): + # params = self.params_dict[camera_id] + # if len(params) == 0: + # continue # no distortion + # assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}" + # assert ( + # camera_id in self.params_dict + # ), f"Missing params for camera {camera_id}" + # K = self.Ks_dict[camera_id] + # width, height = self.imsize_dict[camera_id] + # K_undist, roi_undist = cv2.getOptimalNewCameraMatrix( + # K, params, (width, height), 0 + # ) + # mapx, mapy = cv2.initUndistortRectifyMap( + # K, params, None, K_undist, (width, height), cv2.CV_32FC1 + # ) + # self.Ks_dict[camera_id] = K_undist + # self.mapx_dict[camera_id] = mapx + # self.mapy_dict[camera_id] = mapy + # self.roi_undist_dict[camera_id] = roi_undist # size of the scene measured by cameras camera_locations = camtoworlds[:, :3, 3] @@ -268,15 +268,15 @@ def __getitem__(self, item: int) -> Dict[str, Any]: params = self.parser.params_dict[camera_id] camtoworlds = self.parser.camtoworlds[index] - if len(params) > 0: - # Images are distorted. Undistort them. - mapx, mapy = ( - self.parser.mapx_dict[camera_id], - self.parser.mapy_dict[camera_id], - ) - image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) - x, y, w, h = self.parser.roi_undist_dict[camera_id] - image = image[y : y + h, x : x + w] + # if len(params) > 0: + # # Images are distorted. Undistort them. + # mapx, mapy = ( + # self.parser.mapx_dict[camera_id], + # self.parser.mapy_dict[camera_id], + # ) + # image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) + # x, y, w, h = self.parser.roi_undist_dict[camera_id] + # image = image[y : y + h, x : x + w] if self.patch_size is not None: # Random crop. diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index d66f9c8ae..ef54593dc 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -69,7 +69,7 @@ class Config: # Number of training steps max_steps: int = 30_000 # Steps to evaluate the model - eval_steps: List[int] = field(default_factory=lambda: [7_000, 30_000]) + eval_steps: List[int] = field(default_factory=lambda: [3_000, 7_000, 30_000]) # Steps to save the model save_steps: List[int] = field(default_factory=lambda: [7_000, 30_000]) @@ -930,9 +930,9 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config): runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts]) step = ckpts[0]["step"] runner.eval(step=step) - runner.render_traj(step=step) - if cfg.compression is not None: - runner.run_compression(step=step) + # runner.render_traj(step=step) + # if cfg.compression is not None: + # runner.run_compression(step=step) else: runner.train() @@ -968,7 +968,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config): Config( init_opa=0.5, init_scale=0.1, - opacity_reg=0.001, + opacity_reg=0.01, scale_reg=0.01, strategy=MCMCStrategy(verbose=True), ), diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py index 5fc51672e..315005163 100644 --- a/gsplat/cuda/_wrapper.py +++ b/gsplat/cuda/_wrapper.py @@ -291,7 +291,6 @@ def fully_fused_projection( sparse_grad, calc_compensations, ortho, - # fisheye, ) else: return _FullyFusedProjection.apply( @@ -309,7 +308,7 @@ def fully_fused_projection( radius_clip, calc_compensations, ortho, - # fisheye, + fisheye, ) @@ -757,7 +756,7 @@ def forward( radius_clip: float, calc_compensations: bool, ortho: bool, - # fisheye: bool, + fisheye: bool, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: # "covars" and {"quats", "scales"} are mutually exclusive radii, means2d, depths, conics, compensations = _make_lazy_cuda_func( @@ -777,6 +776,7 @@ def forward( radius_clip, calc_compensations, ortho, + fisheye, ) if not calc_compensations: compensations = None @@ -787,7 +787,7 @@ def forward( ctx.height = height ctx.eps2d = eps2d ctx.ortho = ortho - # ctx.fisheye = fisheye + ctx.fisheye = fisheye return radii, means2d, depths, conics, compensations @@ -808,7 +808,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): height = ctx.height eps2d = ctx.eps2d ortho = ctx.ortho - # fisheye = ctx.fisheye + fisheye = ctx.fisheye if v_compensations is not None: v_compensations = v_compensations.contiguous() v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( @@ -824,6 +824,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): height, eps2d, ortho, + fisheye, radii, conics, compensations, @@ -858,6 +859,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): None, None, None, + None, ) diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h index 004709a4d..7f76007a4 100644 --- a/gsplat/cuda/csrc/bindings.h +++ b/gsplat/cuda/csrc/bindings.h @@ -104,7 +104,8 @@ fully_fused_projection_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho + const bool ortho, + const bool fisheye ); std::tuple< @@ -125,6 +126,7 @@ fully_fused_projection_bwd_tensor( const uint32_t image_height, const float eps2d, const bool ortho, + const bool fisheye, // fwd outputs const torch::Tensor &radii, // [C, N] const torch::Tensor &conics, // [C, N, 3] diff --git a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu index af880c3a6..a069eb2db 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu @@ -31,6 +31,7 @@ __global__ void fully_fused_projection_bwd_kernel( const int32_t image_height, const T eps2d, const bool ortho, + const bool fisheye, // fwd outputs const int32_t *__restrict__ radii, // [C, N] const T *__restrict__ conics, // [C, N, 3] @@ -143,6 +144,21 @@ __global__ void fully_fused_projection_bwd_kernel( v_mean_c, v_covar_c ); + } else if (fisheye) { + fisheye_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); } else { persp_proj_vjp( mean_c, @@ -257,6 +273,7 @@ fully_fused_projection_bwd_tensor( const uint32_t image_height, const float eps2d, const bool ortho, + const bool fisheye, // fwd outputs const torch::Tensor &radii, // [C, N] const torch::Tensor &conics, // [C, N, 3] @@ -326,6 +343,7 @@ fully_fused_projection_bwd_tensor( image_height, eps2d, ortho, + fisheye, radii.data_ptr(), conics.data_ptr(), compensations.has_value() diff --git a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu index 8ea15e720..0ffeebb19 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu @@ -33,6 +33,7 @@ __global__ void fully_fused_projection_fwd_kernel( const T far_plane, const T radius_clip, const bool ortho, + const bool fisheye, // outputs int32_t *__restrict__ radii, // [C, N] T *__restrict__ means2d, // [C, N, 2] @@ -118,6 +119,19 @@ __global__ void fully_fused_projection_fwd_kernel( covar2d, mean2d ); + } else if (fisheye) { + fisheye_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); } else { persp_proj( mean_c, @@ -196,7 +210,8 @@ fully_fused_projection_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho + const bool ortho, + const bool fisheye ) { GSPLAT_DEVICE_GUARD(means); GSPLAT_CHECK_INPUT(means); @@ -245,6 +260,7 @@ fully_fused_projection_fwd_tensor( far_plane, radius_clip, ortho, + fisheye, radii.data_ptr(), means2d.data_ptr(), depths.data_ptr(), diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index a8dcc3012..75672a2ee 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -373,6 +373,73 @@ inline __device__ void persp_proj_vjp( 2.f * fy * ty * rz3 * v_J[2][1]; } +template +inline __device__ void fisheye_proj( + // inputs + const vec3 mean3d, + const mat3 cov3d, + const T fx, + const T fy, + const T cx, + const T cy, + const uint32_t width, + const uint32_t height, + // outputs + mat2 &cov2d, + vec2 &mean2d +) { + T x = mean3d[0], y = mean3d[1], z = mean3d[2]; + + float eps = 0.0000001f; + float xy_len = glm::length(glm::vec2({x, y})) + eps; + float theta = glm::atan(xy_len, z + eps); + if (abs(theta) > 3.14 * 0.403) + return; + mean2d = vec2({ + x * fx * theta / xy_len + cx, + y * fy * theta / xy_len + cy + }); + + float x2 = x * x + eps; + float y2 = y * y; + float xy = x * y; + float x2y2 = x2 + y2 ; + float x2y2z2_inv = 1.f / (x2y2 + z * z); + + float b = glm::atan(xy_len, z) / xy_len / x2y2; + float a = z * x2y2z2_inv / (x2y2); + mat3x2 J = mat3x2( + fx * (x2 * a + y2 * b), + fy * xy * (a - b), + fx * xy * (a - b), + fy * (y2 * a + x2 * b), + - fx * x * x2y2z2_inv, + - fy * y * x2y2z2_inv + ); + cov2d = J * cov3d * glm::transpose(J); +} + +template +inline __device__ void fisheye_proj_vjp( + // fwd inputs + const vec3 mean3d, + const mat3 cov3d, + const T fx, + const T fy, + const T cx, + const T cy, + const uint32_t width, + const uint32_t height, + // grad outputs + const mat2 v_cov2d, + const vec2 v_mean2d, + // grad inputs + vec3 &v_mean3d, + mat3 &v_cov3d +) { + +} + template inline __device__ void pos_world_to_cam( // [R, t] is the world-to-camera transformation From 30b457927901d92bde06f05f0cabd135cad7004b Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 15:49:39 -0700 Subject: [PATCH 03/38] torch implementation of fisheye projection --- gsplat/cuda/_torch_impl.py | 78 +++++++++++++++++++++++++++++++++++++- gsplat/rendering.py | 5 ++- 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py index 2585e36b3..291272d08 100644 --- a/gsplat/cuda/_torch_impl.py +++ b/gsplat/cuda/_torch_impl.py @@ -106,6 +106,71 @@ def _persp_proj( return means2d, cov2d # [C, N, 2], [C, N, 2, 2] +def _fisheye_proj( + means: Tensor, # [C, N, 3] + covars: Tensor, # [C, N, 3, 3] + Ks: Tensor, # [C, 3, 3] + width: int, + height: int, +) -> Tuple[Tensor, Tensor]: + """PyTorch implementation of fisheye projection for 3D Gaussians. + + Args: + means: Gaussian means in camera coordinate system. [C, N, 3]. + covars: Gaussian covariances in camera coordinate system. [C, N, 3, 3]. + Ks: Camera intrinsics. [C, 3, 3]. + width: Image width. + height: Image height. + + Returns: + A tuple: + + - **means2d**: Projected means. [C, N, 2]. + - **cov2d**: Projected covariances. [C, N, 2, 2]. + """ + C, N, _ = means.shape + + x, y, z = torch.unbind(means, dim=-1) # [C, N] + + fx = Ks[..., 0, 0, None] # [C, 1] + fy = Ks[..., 1, 1, None] # [C, 1] + cx = Ks[..., 0, 2, None] # [C, 1] + cy = Ks[..., 1, 2, None] # [C, 1] + + eps = 0.0000001 + xy_len = (x**2 + y**2) ** 0.5 + eps + theta = torch.atan2(xy_len, z + eps) + means2d = torch.stack( + [ + x * fx * theta / xy_len + cx, + y * fy * theta / xy_len + cy, + ], + dim=-1, + ) + + x2 = x * x + eps + y2 = y * y + xy = x * y + x2y2 = x2 + y2 + x2y2z2_inv = 1.0 / (x2y2 + z * z) + b = torch.atan2(xy_len, z) / xy_len / x2y2 + a = z * x2y2z2_inv / (x2y2) + J = torch.stack( + [ + fx * (x2 * a + y2 * b), + fx * xy * (a - b), + -fx * x * x2y2z2_inv, + fy * xy * (a - b), + fy * (y2 * a + x2 * b), + -fy * y * x2y2z2_inv, + ], + dim=-1, + ).reshape(C, N, 2, 3) + + cov2d = torch.einsum("...ij,...jk,...kl->...il", J, covars, J.transpose(-1, -2)) + return means2d, cov2d # [C, N, 2], [C, N, 2, 2] + + def _ortho_proj( means: Tensor, # [C, N, 3] covars: Tensor, # [C, N, 3, 3] @@ -170,7 +235,9 @@ def _world_to_cam( def _fully_fused_projection( means: Tensor, # [N, 3] - covars: Tensor, # [N, 3, 3] + covars: Optional[Tensor], # [N, 6] or None + quats: Optional[Tensor], # [N, 4] or None + scales: Optional[Tensor], # [N, 3] or None viewmats: Tensor, # [C, 4, 4] Ks: Tensor, # [C, 3, 3] width: int, @@ -180,6 +247,8 @@ def _fully_fused_projection( far_plane: float = 1e10, calc_compensations: bool = False, ortho: bool = False, + fisheye: bool = False, + **kwargs, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]: """PyTorch implementation of `gsplat.cuda._wrapper.fully_fused_projection()` @@ -188,10 +257,17 @@ def _fully_fused_projection( This is a minimal implementation of fully fused version, which has more arguments. Not all arguments are supported. """ + if covars is None: + covars = _quat_scale_to_covar_preci( + quats, scales, compute_covar=True, compute_preci=False + )[0] + means_c, covars_c = _world_to_cam(means, covars, viewmats) if ortho: means2d, covars2d = _ortho_proj(means_c, covars_c, Ks, width, height) + elif fisheye: + means2d, covars2d = _fisheye_proj(means_c, covars_c, Ks, width, height) else: means2d, covars2d = _persp_proj(means_c, covars_c, Ks, width, height) diff --git a/gsplat/rendering.py b/gsplat/rendering.py index 55308d78f..353af7f0c 100644 --- a/gsplat/rendering.py +++ b/gsplat/rendering.py @@ -6,6 +6,7 @@ from torch import Tensor from typing_extensions import Literal +from .cuda._torch_impl import _fully_fused_projection from .cuda._wrapper import ( fully_fused_projection, isect_offset_encode, @@ -291,7 +292,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso C = len(viewmats) # Project Gaussians to 2D. Directly pass in {quats, scales} is faster than precomputing covars. - proj_results = fully_fused_projection( + proj_results = _fully_fused_projection( means, covars, quats, @@ -308,7 +309,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso sparse_grad=sparse_grad, calc_compensations=(rasterize_mode == "antialiased"), ortho=ortho, - fisheye=camera_model=="fisheye", + fisheye=camera_model == "fisheye", ) if packed: From c4651c0752d897f09a2791d60efec58030224c9b Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 16:16:51 -0700 Subject: [PATCH 04/38] test basic --- examples/benchmarks/mcmc_zipnerf.sh | 6 +++--- tests/test_basic.py | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh index a4c534d57..1d466887f 100644 --- a/examples/benchmarks/mcmc_zipnerf.sh +++ b/examples/benchmarks/mcmc_zipnerf.sh @@ -15,14 +15,14 @@ do echo "Running $SCENE" # train without eval - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --data_factor $DATA_FACTOR \ + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ --opacity_reg 0.001 \ --camera_model $CAMERA_MODEL \ --render_traj_path $RENDER_TRAJ_PATH \ --data_dir $SCENE_DIR/$SCENE/ \ - --result_dir $RESULT_DIR/$SCENE/ \ - --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt" + --result_dir $RESULT_DIR/$SCENE/ + # --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt" # run eval and render # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; diff --git a/tests/test_basic.py b/tests/test_basic.py index 349329584..da21276fd 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -165,10 +165,13 @@ def test_proj(test_data, ortho: bool): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") -@pytest.mark.parametrize("fused", [False, True]) -@pytest.mark.parametrize("calc_compensations", [False, True]) -@pytest.mark.parametrize("ortho", [True, False]) -def test_projection(test_data, fused: bool, calc_compensations: bool, ortho: bool): +@pytest.mark.parametrize("fused", [True]) +@pytest.mark.parametrize("calc_compensations", [False]) +@pytest.mark.parametrize("ortho", [False]) +@pytest.mark.parametrize("fisheye", [True, False]) +def test_projection( + test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool +): from gsplat.cuda._torch_impl import _fully_fused_projection from gsplat.cuda._wrapper import fully_fused_projection, quat_scale_to_covar_preci @@ -200,6 +203,7 @@ def test_projection(test_data, fused: bool, calc_compensations: bool, ortho: boo height, calc_compensations=calc_compensations, ortho=ortho, + fisheye=fisheye, ) else: covars, _ = quat_scale_to_covar_preci(quats, scales, triu=True) # [N, 6] @@ -214,17 +218,21 @@ def test_projection(test_data, fused: bool, calc_compensations: bool, ortho: boo height, calc_compensations=calc_compensations, ortho=ortho, + fisheye=fisheye, ) _covars, _ = quat_scale_to_covar_preci(quats, scales, triu=False) # [N, 3, 3] _radii, _means2d, _depths, _conics, _compensations = _fully_fused_projection( means, _covars, + None, + None, viewmats, Ks, width, height, calc_compensations=calc_compensations, ortho=ortho, + fisheye=fisheye, ) # radii is integer so we allow for 1 unit difference From 712ae95d47ad197c7e506e40e133a7b66bc8c558 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 20:19:10 -0700 Subject: [PATCH 05/38] close 0.3% --- gsplat/cuda/_torch_impl.py | 1 - gsplat/cuda/csrc/utils.cuh | 115 +++++++++++++++++++++++++++++++++++++ tests/test_basic.py | 10 ++-- 3 files changed, 120 insertions(+), 6 deletions(-) diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py index 291272d08..c02535f7b 100644 --- a/gsplat/cuda/_torch_impl.py +++ b/gsplat/cuda/_torch_impl.py @@ -248,7 +248,6 @@ def _fully_fused_projection( calc_compensations: bool = False, ortho: bool = False, fisheye: bool = False, - **kwargs, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]: """PyTorch implementation of `gsplat.cuda._wrapper.fully_fused_projection()` diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index 75672a2ee..2a854320c 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -437,7 +437,122 @@ inline __device__ void fisheye_proj_vjp( vec3 &v_mean3d, mat3 &v_cov3d ) { + T x = mean3d[0], y = mean3d[1], z = mean3d[2]; + + T tan_fovx = 0.5f * width / fx; + T tan_fovy = 0.5f * height / fy; + T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx; + T lim_x_neg = cx / fx + 0.3f * tan_fovx; + T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy; + T lim_y_neg = cy / fy + 0.3f * tan_fovy; + + T rz = 1.f / z; + T rz2 = rz * rz; + T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz)); + T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz)); + + const float eps = 0.0000001f; + float x2 = x * x + eps; + float y2 = y * y; + float xy = x * y; + float x2y2 = x2 + y2; + float len_xy = length(glm::vec2({x, y})) + eps; + const float x2y2z2 = x2y2 + z * z; + float x2y2z2_inv = 1.f / x2y2z2; + float b = glm::atan(len_xy, z) / len_xy / x2y2; + float a = z * x2y2z2_inv / (x2y2); + // float fx = 2.0 * focal_x / W; + // float fy = 2.0 * focal_y / H; + v_mean3d += vec3( + fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1], + fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1], + - fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1] + ); + + const float theta = glm::atan(len_xy, z); + const float J_b = theta / len_xy / x2y2; + const float J_a = z * x2y2z2_inv / (x2y2); + // mat3x2 is 3 columns x 2 rows. + mat3x2 J = mat3x2( + fx * (x2 * J_a + y2 * J_b), + fy * xy * (J_a - J_b), // 1st column + fx * xy * (J_a - J_b), + fy * (y2 * J_a + x2 * J_b), // 2nd column + - fx * x * x2y2z2_inv, + - fy * y * x2y2z2_inv // 3rd column + ); + v_cov3d += glm::transpose(J) * v_cov2d * J; + + // df/dx = -fx * rz2 * df/dJ_02 + // df/dy = -fy * rz2 * df/dJ_12 + // df/dz = -fx * rz2 * df/dJ_00 - fy * rz2 * df/dJ_11 + // + 2 * fx * tx * rz3 * df/dJ_02 + 2 * fy * ty * rz3 + mat3x2 v_J = v_cov2d * J * glm::transpose(cov3d) + + glm::transpose(v_cov2d) * J * cov3d; + float l4 = x2y2z2 * x2y2z2; + + float E = - l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z; + float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z; + + float A = x * (3 * E + x2 * F); + float B = y * (E + x2 * F); + float C = x * (E + y2 * F); + float D = y * (3 * E + y2 * F); + + float S1 = x2 - y2 - z * z; + float S2 = y2 - x2 - z * z; + float inv1 = x2y2z2_inv * x2y2z2_inv; + float inv2 = inv1 / (x2y2 * x2y2 * len_xy); + + float dJ_dx00 = fx * A * inv2; + float dJ_dx01 = fx * B * inv2; + float dJ_dx02 = fx * S1 * inv1; + float dJ_dx10 = fy * B * inv2; + float dJ_dx11 = fy * C * inv2; + float dJ_dx12 = 2.f * fy * xy * inv1; + + float dJ_dy00 = dJ_dx01; + float dJ_dy01 = fx * C * inv2; + float dJ_dy02 = 2.f * fx * xy * inv1; + float dJ_dy10 = dJ_dx11; + float dJ_dy11 = fy * D * inv2; + float dJ_dy12 = fy * S2 * inv1; + + float dJ_dz00 = dJ_dx02; + float dJ_dz01 = dJ_dy02; + float dJ_dz02 = 2.f * fx * x * z * inv1; + float dJ_dz10 = dJ_dx12; + float dJ_dz11 = dJ_dy12; + float dJ_dz12 = 2.f * fy * y * z * inv1; + + float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[0][1] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; + float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[0][1] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; + float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[0][1] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; + const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1; + const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1; + v_mean3d.x += x_grad_mul * dL_dtx_raw; + v_mean3d.y += y_grad_mul * dL_dty_raw; + v_mean3d.z += dL_dtz_raw; + + // // fov clipping + // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) { + // // v_mean3d.x += -fx * rz2 * v_J[2][0]; + // v_mean3d.x += dL_dtx_raw; + // } else { + // // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx; + // v_mean3d.z += dL_dtx_raw * rz * tx; + // } + // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) { + // // v_mean3d.y += -fy * rz2 * v_J[2][1]; + // v_mean3d.y += dL_dty_raw; + // } else { + // v_mean3d.z += dL_dty_raw * rz * ty; + // } + // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] + + // // 2.f * fx * tx * rz2 * rz * v_J[2][0] + + // // 2.f * fy * ty * rz2 * rz * v_J[2][1]; + // v_mean3d.z += dL_dtz_raw; } template diff --git a/tests/test_basic.py b/tests/test_basic.py index da21276fd..f86e273ce 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -168,7 +168,7 @@ def test_proj(test_data, ortho: bool): @pytest.mark.parametrize("fused", [True]) @pytest.mark.parametrize("calc_compensations", [False]) @pytest.mark.parametrize("ortho", [False]) -@pytest.mark.parametrize("fisheye", [True, False]) +@pytest.mark.parametrize("fisheye", [True]) def test_projection( test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool ): @@ -177,8 +177,8 @@ def test_projection( torch.manual_seed(42) - Ks = test_data["Ks"] - viewmats = test_data["viewmats"] + Ks = test_data["Ks"][:1, ...] + viewmats = test_data["viewmats"][:1, ...] height = test_data["height"] width = test_data["width"] quats = test_data["quats"] @@ -237,7 +237,7 @@ def test_projection( # radii is integer so we allow for 1 unit difference valid = (radii > 0) & (_radii > 0) - torch.testing.assert_close(radii, _radii, rtol=0, atol=1) + # torch.testing.assert_close(radii, _radii, rtol=0, atol=1) torch.testing.assert_close(means2d[valid], _means2d[valid], rtol=1e-4, atol=1e-4) torch.testing.assert_close(depths[valid], _depths[valid], rtol=1e-4, atol=1e-4) torch.testing.assert_close(conics[valid], _conics[valid], rtol=1e-4, atol=1e-4) @@ -267,7 +267,7 @@ def test_projection( (viewmats, quats, scales, means), ) - torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3) + # torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3) torch.testing.assert_close(v_quats, _v_quats, rtol=2e-1, atol=1e-2) torch.testing.assert_close(v_scales, _v_scales, rtol=1e-1, atol=2e-1) torch.testing.assert_close(v_means, _v_means, rtol=1e-2, atol=6e-2) From 01daadb3add38a6b1156508c05877b551ada4925 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 20:49:34 -0700 Subject: [PATCH 06/38] 19 mismatched --- gsplat/cuda/csrc/utils.cuh | 52 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index 2a854320c..7e0a95c74 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -526,33 +526,33 @@ inline __device__ void fisheye_proj_vjp( float dJ_dz11 = dJ_dy12; float dJ_dz12 = 2.f * fy * y * z * inv1; - float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[0][1] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; - float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[0][1] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; - float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[0][1] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; - const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1; - const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1; - v_mean3d.x += x_grad_mul * dL_dtx_raw; - v_mean3d.y += y_grad_mul * dL_dty_raw; - v_mean3d.z += dL_dtz_raw; - - // // fov clipping - // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) { - // // v_mean3d.x += -fx * rz2 * v_J[2][0]; - // v_mean3d.x += dL_dtx_raw; - // } else { - // // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx; - // v_mean3d.z += dL_dtx_raw * rz * tx; - // } - // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) { - // // v_mean3d.y += -fy * rz2 * v_J[2][1]; - // v_mean3d.y += dL_dty_raw; - // } else { - // v_mean3d.z += dL_dty_raw * rz * ty; - // } - // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] + - // // 2.f * fx * tx * rz2 * rz * v_J[2][0] + - // // 2.f * fy * ty * rz2 * rz * v_J[2][1]; + float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; + float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; + float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; + // const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1; + // const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1; + // v_mean3d.x += x_grad_mul * dL_dtx_raw; + // v_mean3d.y += y_grad_mul * dL_dty_raw; // v_mean3d.z += dL_dtz_raw; + + // fov clipping + if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) { + // v_mean3d.x += -fx * rz2 * v_J[2][0]; + v_mean3d.x += dL_dtx_raw; + } else { + // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx; + // v_mean3d.z += dL_dtx_raw * rz * tx; + } + if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) { + // v_mean3d.y += -fy * rz2 * v_J[2][1]; + v_mean3d.y += dL_dty_raw; + } else { + // v_mean3d.z += dL_dty_raw * rz * ty; + } + // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] + + // 2.f * fx * tx * rz2 * rz * v_J[2][0] + + // 2.f * fy * ty * rz2 * rz * v_J[2][1]; + v_mean3d.z += dL_dtz_raw; } template From 2de03b92873125adb203b36352ffb4d01c405a5b Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 21:07:56 -0700 Subject: [PATCH 07/38] pass tests --- gsplat/cuda/csrc/utils.cuh | 43 ++++++++++++++++++++------------------ tests/test_basic.py | 4 ++-- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index 7e0a95c74..1e7906ded 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -393,8 +393,8 @@ inline __device__ void fisheye_proj( float eps = 0.0000001f; float xy_len = glm::length(glm::vec2({x, y})) + eps; float theta = glm::atan(xy_len, z + eps); - if (abs(theta) > 3.14 * 0.403) - return; + // if (abs(theta) > 3.14 * 0.403) + // return; mean2d = vec2({ x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy @@ -534,25 +534,28 @@ inline __device__ void fisheye_proj_vjp( // v_mean3d.x += x_grad_mul * dL_dtx_raw; // v_mean3d.y += y_grad_mul * dL_dty_raw; // v_mean3d.z += dL_dtz_raw; - - // fov clipping - if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) { - // v_mean3d.x += -fx * rz2 * v_J[2][0]; - v_mean3d.x += dL_dtx_raw; - } else { - // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx; - // v_mean3d.z += dL_dtx_raw * rz * tx; - } - if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) { - // v_mean3d.y += -fy * rz2 * v_J[2][1]; - v_mean3d.y += dL_dty_raw; - } else { - // v_mean3d.z += dL_dty_raw * rz * ty; - } - // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] + - // 2.f * fx * tx * rz2 * rz * v_J[2][0] + - // 2.f * fy * ty * rz2 * rz * v_J[2][1]; + v_mean3d.x += dL_dtx_raw; + v_mean3d.y += dL_dty_raw; v_mean3d.z += dL_dtz_raw; + + // // fov clipping + // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) { + // // v_mean3d.x += -fx * rz2 * v_J[2][0]; + // v_mean3d.x += dL_dtx_raw; + // } else { + // // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx; + // v_mean3d.z += dL_dtx_raw * rz * tx; + // } + // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) { + // // v_mean3d.y += -fy * rz2 * v_J[2][1]; + // v_mean3d.y += dL_dty_raw; + // } else { + // v_mean3d.z += dL_dty_raw * rz * ty; + // } + // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] + + // // 2.f * fx * tx * rz2 * rz * v_J[2][0] + + // // 2.f * fy * ty * rz2 * rz * v_J[2][1]; + // v_mean3d.z += dL_dtz_raw; } template diff --git a/tests/test_basic.py b/tests/test_basic.py index f86e273ce..79e8c0c16 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -237,7 +237,7 @@ def test_projection( # radii is integer so we allow for 1 unit difference valid = (radii > 0) & (_radii > 0) - # torch.testing.assert_close(radii, _radii, rtol=0, atol=1) + torch.testing.assert_close(radii, _radii, rtol=0, atol=1) torch.testing.assert_close(means2d[valid], _means2d[valid], rtol=1e-4, atol=1e-4) torch.testing.assert_close(depths[valid], _depths[valid], rtol=1e-4, atol=1e-4) torch.testing.assert_close(conics[valid], _conics[valid], rtol=1e-4, atol=1e-4) @@ -267,7 +267,7 @@ def test_projection( (viewmats, quats, scales, means), ) - # torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3) + torch.testing.assert_close(v_viewmats, _v_viewmats, rtol=1e-3, atol=1e-3) torch.testing.assert_close(v_quats, _v_quats, rtol=2e-1, atol=1e-2) torch.testing.assert_close(v_scales, _v_scales, rtol=1e-1, atol=2e-1) torch.testing.assert_close(v_means, _v_means, rtol=1e-2, atol=6e-2) From 9c61743ce91d67df4b969b16181d12f082cb20b1 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 21:12:40 -0700 Subject: [PATCH 08/38] comment out --- gsplat/cuda/csrc/utils.cuh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index 1e7906ded..9aa9dfb7b 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -439,17 +439,17 @@ inline __device__ void fisheye_proj_vjp( ) { T x = mean3d[0], y = mean3d[1], z = mean3d[2]; - T tan_fovx = 0.5f * width / fx; - T tan_fovy = 0.5f * height / fy; - T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx; - T lim_x_neg = cx / fx + 0.3f * tan_fovx; - T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy; - T lim_y_neg = cy / fy + 0.3f * tan_fovy; - - T rz = 1.f / z; - T rz2 = rz * rz; - T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz)); - T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz)); + // T tan_fovx = 0.5f * width / fx; + // T tan_fovy = 0.5f * height / fy; + // T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx; + // T lim_x_neg = cx / fx + 0.3f * tan_fovx; + // T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy; + // T lim_y_neg = cy / fy + 0.3f * tan_fovy; + + // T rz = 1.f / z; + // T rz2 = rz * rz; + // T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz)); + // T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz)); const float eps = 0.0000001f; float x2 = x * x + eps; From 6347c494d7bbc3595c016608a54a2c98a6349fac Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 21:51:42 -0700 Subject: [PATCH 09/38] crashing --- gsplat/rendering.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gsplat/rendering.py b/gsplat/rendering.py index 353af7f0c..9cfd8318f 100644 --- a/gsplat/rendering.py +++ b/gsplat/rendering.py @@ -6,7 +6,6 @@ from torch import Tensor from typing_extensions import Literal -from .cuda._torch_impl import _fully_fused_projection from .cuda._wrapper import ( fully_fused_projection, isect_offset_encode, @@ -292,7 +291,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso C = len(viewmats) # Project Gaussians to 2D. Directly pass in {quats, scales} is faster than precomputing covars. - proj_results = _fully_fused_projection( + proj_results = fully_fused_projection( means, covars, quats, From 6e56adff853fa6e6b900c350ee6423e306ea8b94 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 21:53:08 -0700 Subject: [PATCH 10/38] remove dead code --- gsplat/cuda/csrc/utils.cuh | 42 ++------------------------------------ 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index 9aa9dfb7b..85877c088 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -393,8 +393,8 @@ inline __device__ void fisheye_proj( float eps = 0.0000001f; float xy_len = glm::length(glm::vec2({x, y})) + eps; float theta = glm::atan(xy_len, z + eps); - // if (abs(theta) > 3.14 * 0.403) - // return; + if (abs(theta) > 3.14 * 0.403) + return; mean2d = vec2({ x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy @@ -439,18 +439,6 @@ inline __device__ void fisheye_proj_vjp( ) { T x = mean3d[0], y = mean3d[1], z = mean3d[2]; - // T tan_fovx = 0.5f * width / fx; - // T tan_fovy = 0.5f * height / fy; - // T lim_x_pos = (width - cx) / fx + 0.3f * tan_fovx; - // T lim_x_neg = cx / fx + 0.3f * tan_fovx; - // T lim_y_pos = (height - cy) / fy + 0.3f * tan_fovy; - // T lim_y_neg = cy / fy + 0.3f * tan_fovy; - - // T rz = 1.f / z; - // T rz2 = rz * rz; - // T tx = z * min(lim_x_pos, max(-lim_x_neg, x * rz)); - // T ty = z * min(lim_y_pos, max(-lim_y_neg, y * rz)); - const float eps = 0.0000001f; float x2 = x * x + eps; float y2 = y * y; @@ -461,8 +449,6 @@ inline __device__ void fisheye_proj_vjp( float x2y2z2_inv = 1.f / x2y2z2; float b = glm::atan(len_xy, z) / len_xy / x2y2; float a = z * x2y2z2_inv / (x2y2); - // float fx = 2.0 * focal_x / W; - // float fy = 2.0 * focal_y / H; v_mean3d += vec3( fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1], fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1], @@ -529,33 +515,9 @@ inline __device__ void fisheye_proj_vjp( float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; - // const float x_grad_mul = x * rz < -lim_x_neg || x * rz > lim_x_pos ? 0 : 1; - // const float y_grad_mul = y * rz < -lim_y_neg || y * rz > lim_y_pos ? 0 : 1; - // v_mean3d.x += x_grad_mul * dL_dtx_raw; - // v_mean3d.y += y_grad_mul * dL_dty_raw; - // v_mean3d.z += dL_dtz_raw; v_mean3d.x += dL_dtx_raw; v_mean3d.y += dL_dty_raw; v_mean3d.z += dL_dtz_raw; - - // // fov clipping - // if (x * rz <= lim_x_pos && x * rz >= -lim_x_neg) { - // // v_mean3d.x += -fx * rz2 * v_J[2][0]; - // v_mean3d.x += dL_dtx_raw; - // } else { - // // v_mean3d.z += -fx * rz2 * v_J[2][0] * rz * tx; - // v_mean3d.z += dL_dtx_raw * rz * tx; - // } - // if (y * rz <= lim_y_pos && y * rz >= -lim_y_neg) { - // // v_mean3d.y += -fy * rz2 * v_J[2][1]; - // v_mean3d.y += dL_dty_raw; - // } else { - // v_mean3d.z += dL_dty_raw * rz * ty; - // } - // // v_mean3d.z += -fx * rz2 * v_J[0][0] - fy * rz2 * v_J[1][1] + - // // 2.f * fx * tx * rz2 * rz * v_J[2][0] + - // // 2.f * fy * ty * rz2 * rz * v_J[2][1]; - // v_mean3d.z += dL_dtz_raw; } template From cb084270a1423975dd887b8ec7c15b9707ce35ab Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 21:56:39 -0700 Subject: [PATCH 11/38] reduce diff --- examples/benchmarks/mcmc.sh | 42 +++++++++++++++---------------------- examples/datasets/colmap.py | 6 +++--- examples/simple_trainer.py | 6 +++--- 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/examples/benchmarks/mcmc.sh b/examples/benchmarks/mcmc.sh index 8913dc406..23e40838d 100644 --- a/examples/benchmarks/mcmc.sh +++ b/examples/benchmarks/mcmc.sh @@ -1,45 +1,37 @@ -# SCENE_DIR="data/360_v2" -# RESULT_DIR="results/benchmark_mcmc_1M" -# SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers -# RENDER_TRAJ_PATH="ellipse" +SCENE_DIR="data/360_v2" +RESULT_DIR="results/benchmark_mcmc_1M" +SCENE_LIST="garden bicycle stump bonsai counter kitchen room" # treehill flowers +RENDER_TRAJ_PATH="ellipse" -SCENE_DIR="data/zipnerf" -RESULT_DIR="results/benchmark_alameda" -SCENE_LIST="alameda_undistort" -CAMERA_MODEL="pinhole" -RENDER_TRAJ_PATH="interp" - -CAP_MAX=2000000 +CAP_MAX=1000000 for SCENE in $SCENE_LIST; do if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then DATA_FACTOR=2 else - DATA_FACTOR=2 + DATA_FACTOR=4 fi echo "Running $SCENE" # train without eval - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ - --camera_model $CAMERA_MODEL \ --render_traj_path $RENDER_TRAJ_PATH \ - --data_dir $SCENE_DIR/$SCENE/ \ + --data_dir data/360_v2/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ # run eval and render - # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; - # do - # CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ - # --strategy.cap-max $CAP_MAX \ - # --camera_model $CAMERA_MODEL \ - # --render_traj_path $RENDER_TRAJ_PATH \ - # --data_dir $SCENE_DIR/$SCENE/ \ - # --result_dir $RESULT_DIR/$SCENE/ \ - # --ckpt $CKPT - # done + for CKPT in $RESULT_DIR/$SCENE/ckpts/*; + do + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + --strategy.cap-max $CAP_MAX \ + --render_traj_path $RENDER_TRAJ_PATH \ + --data_dir $SCENE_DIR/$SCENE/ \ + --result_dir $RESULT_DIR/$SCENE/ \ + --ckpt $CKPT + done done diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index a9d5c7cc5..9510492b7 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -97,9 +97,9 @@ def __init__( elif type_ == 5 or type_ == "OPENCV_FISHEYE": params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32) camtype = "fisheye" - # assert ( - # camtype == "perspective" - # ), f"Only support perspective camera model, got {type_}" + assert ( + camtype == "perspective" + ), f"Only support perspective camera model, got {type_}" params_dict[camera_id] = params diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index ef54593dc..d0e12ba38 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -930,9 +930,9 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config): runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts]) step = ckpts[0]["step"] runner.eval(step=step) - # runner.render_traj(step=step) - # if cfg.compression is not None: - # runner.run_compression(step=step) + runner.render_traj(step=step) + if cfg.compression is not None: + runner.run_compression(step=step) else: runner.train() From 22060d5902424c51e97b7727854e5c1f9c30d718 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 9 Sep 2024 22:38:49 -0700 Subject: [PATCH 12/38] video --- examples/benchmarks/mcmc_zipnerf.sh | 4 ++-- examples/datasets/colmap.py | 6 +++--- examples/simple_trainer.py | 14 +++++--------- gsplat/cuda/csrc/utils.cuh | 2 -- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh index 1d466887f..2d75d8661 100644 --- a/examples/benchmarks/mcmc_zipnerf.sh +++ b/examples/benchmarks/mcmc_zipnerf.sh @@ -1,10 +1,11 @@ # SCENE_DIR="data/zipnerf/undistort" # RESULT_DIR="results/benchmark_zipnerf/undistort" # CAMERA_MODEL="pinhole" + SCENE_DIR="data/zipnerf/fisheye" RESULT_DIR="results/benchmark_zipnerf/fisheye" CAMERA_MODEL="fisheye" -SCENE_LIST="berlin" # alameda +SCENE_LIST="berlin alameda london nyc" RENDER_TRAJ_PATH="interp" CAP_MAX=2000000 @@ -22,7 +23,6 @@ do --render_traj_path $RENDER_TRAJ_PATH \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ - # --ckpt "results/benchmark_zipnerf/undistort/$SCENE/ckpts/ckpt_29999_rank0.pt" # run eval and render # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 9510492b7..a9d5c7cc5 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -97,9 +97,9 @@ def __init__( elif type_ == 5 or type_ == "OPENCV_FISHEYE": params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32) camtype = "fisheye" - assert ( - camtype == "perspective" - ), f"Only support perspective camera model, got {type_}" + # assert ( + # camtype == "perspective" + # ), f"Only support perspective camera model, got {type_}" params_dict[camera_id] = params diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index d0e12ba38..e1b5907ef 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -69,7 +69,7 @@ class Config: # Number of training steps max_steps: int = 30_000 # Steps to evaluate the model - eval_steps: List[int] = field(default_factory=lambda: [3_000, 7_000, 30_000]) + eval_steps: List[int] = field(default_factory=lambda: [7_000, 30_000]) # Steps to save the model save_steps: List[int] = field(default_factory=lambda: [7_000, 30_000]) @@ -839,7 +839,10 @@ def render_traj(self, step: int): K = torch.from_numpy(list(self.parser.Ks_dict.values())[0]).float().to(device) width, height = list(self.parser.imsize_dict.values())[0] - canvas_all = [] + # save to video + video_dir = f"{cfg.result_dir}/videos" + os.makedirs(video_dir, exist_ok=True) + writer = imageio.get_writer(f"{video_dir}/traj_{step}.mp4", fps=30) for i in tqdm.trange(len(camtoworlds_all), desc="Rendering trajectory"): camtoworlds = camtoworlds_all[i : i + 1] Ks = K[None] @@ -862,13 +865,6 @@ def render_traj(self, step: int): # write images canvas = torch.cat(canvas_list, dim=2).squeeze(0).cpu().numpy() canvas = (canvas * 255).astype(np.uint8) - canvas_all.append(canvas) - - # save to video - video_dir = f"{cfg.result_dir}/videos" - os.makedirs(video_dir, exist_ok=True) - writer = imageio.get_writer(f"{video_dir}/traj_{step}.mp4", fps=30) - for canvas in canvas_all: writer.append_data(canvas) writer.close() print(f"Video saved to {video_dir}/traj_{step}.mp4") diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index 85877c088..e10007cd8 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -393,8 +393,6 @@ inline __device__ void fisheye_proj( float eps = 0.0000001f; float xy_len = glm::length(glm::vec2({x, y})) + eps; float theta = glm::atan(xy_len, z + eps); - if (abs(theta) > 3.14 * 0.403) - return; mean2d = vec2({ x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy From 139f7fe21442b5495de0bc3c8e6f55f4c2366d38 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 10 Sep 2024 00:10:50 -0700 Subject: [PATCH 13/38] distortion not handled correctly --- examples/datasets/colmap.py | 88 +++++++++++++++++++++++-------------- examples/simple_trainer.py | 2 +- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index a9d5c7cc5..5f1071c1f 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -206,29 +206,52 @@ def __init__( self.imsize_dict[camera_id] = (int(width * s_width), int(height * s_height)) # undistortion - # self.mapx_dict = dict() - # self.mapy_dict = dict() - # self.roi_undist_dict = dict() - # for camera_id in self.params_dict.keys(): - # params = self.params_dict[camera_id] - # if len(params) == 0: - # continue # no distortion - # assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}" - # assert ( - # camera_id in self.params_dict - # ), f"Missing params for camera {camera_id}" - # K = self.Ks_dict[camera_id] - # width, height = self.imsize_dict[camera_id] - # K_undist, roi_undist = cv2.getOptimalNewCameraMatrix( - # K, params, (width, height), 0 - # ) - # mapx, mapy = cv2.initUndistortRectifyMap( - # K, params, None, K_undist, (width, height), cv2.CV_32FC1 - # ) - # self.Ks_dict[camera_id] = K_undist - # self.mapx_dict[camera_id] = mapx - # self.mapy_dict[camera_id] = mapy - # self.roi_undist_dict[camera_id] = roi_undist + self.mapx_dict = dict() + self.mapy_dict = dict() + self.roi_undist_dict = dict() + for camera_id in self.params_dict.keys(): + params = self.params_dict[camera_id] + if len(params) == 0: + continue # no distortion + assert camera_id in self.Ks_dict, f"Missing K for camera {camera_id}" + assert ( + camera_id in self.params_dict + ), f"Missing params for camera {camera_id}" + K = self.Ks_dict[camera_id] + width, height = self.imsize_dict[camera_id] + + if camtype == "perspective": + K_undist, roi_undist = cv2.getOptimalNewCameraMatrix( + K, params, (width, height), 0 + ) + mapx, mapy = cv2.initUndistortRectifyMap( + K, params, None, K_undist, (width, height), cv2.CV_32FC1 + ) + elif camtype == "fisheye": + K_undist = K.copy() + roi_undist = np.array([0, 0, width, height]) + mapx = None + mapy = None + # print(K, params) + # print(width, height) + # mapx = np.zeros((width, height), dtype=np.float32) + # mapy = np.zeros((width, height), dtype=np.float32) + # for i in range(0, width): + # for j in range(0, height): + # x = float(i) + # y = float(j) + # x1 = (x - cx) / fx + # y1 = (y - cy) / fy + # theta = np.sqrt(x1**2 + y1**2) + # r = (1.0 + params[0] * theta**2 + params[1] * theta**4 + params[2] * theta**6 + params[3] * theta**8) + # x2 = fx * x1 * r + width // 2 + # y2 = fy * y1 * r + height // 2 + # mapx[i, j] = x2 + # mapy[i, j] = y2 + self.Ks_dict[camera_id] = K_undist + self.mapx_dict[camera_id] = mapx + self.mapy_dict[camera_id] = mapy + self.roi_undist_dict[camera_id] = roi_undist # size of the scene measured by cameras camera_locations = camtoworlds[:, :3, 3] @@ -268,15 +291,16 @@ def __getitem__(self, item: int) -> Dict[str, Any]: params = self.parser.params_dict[camera_id] camtoworlds = self.parser.camtoworlds[index] - # if len(params) > 0: - # # Images are distorted. Undistort them. - # mapx, mapy = ( - # self.parser.mapx_dict[camera_id], - # self.parser.mapy_dict[camera_id], - # ) - # image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) - # x, y, w, h = self.parser.roi_undist_dict[camera_id] - # image = image[y : y + h, x : x + w] + if len(params) > 0: + # Images are distorted. Undistort them. + mapx, mapy = ( + self.parser.mapx_dict[camera_id], + self.parser.mapy_dict[camera_id], + ) + if mapx is not None and mapy is not None: + image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) + x, y, w, h = self.parser.roi_undist_dict[camera_id] + image = image[y : y + h, x : x + w] if self.patch_size is not None: # Random crop. diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index e1b5907ef..9205144c2 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -926,7 +926,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config): runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts]) step = ckpts[0]["step"] runner.eval(step=step) - runner.render_traj(step=step) + # runner.render_traj(step=step) if cfg.compression is not None: runner.run_compression(step=step) else: From fde16f8a2a6043027ac113ccfb1582214e2ac0e9 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 10 Sep 2024 11:26:44 -0700 Subject: [PATCH 14/38] test remap --- examples/datasets/colmap.py | 53 ++++++++++++++++++------------- examples/test_remap.py | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 21 deletions(-) create mode 100644 examples/test_remap.py diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 5f1071c1f..96ed09fd5 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -228,26 +228,38 @@ def __init__( K, params, None, K_undist, (width, height), cv2.CV_32FC1 ) elif camtype == "fisheye": + fx = K[0, 0] + fy = K[1, 1] + cx = K[0, 2] + cy = K[1, 2] + mapx = np.zeros((height, width), dtype=np.float32) + mapy = np.zeros((height, width), dtype=np.float32) + for i in range(0, width): + for j in range(0, height): + x = float(i) + y = float(j) + x1 = (x - cx) / fx + y1 = (y - cy) / fy + theta = np.sqrt(x1**2 + y1**2) + r = ( + 1.0 + + params[0] * theta**2 + + params[1] * theta**4 + + params[2] * theta**6 + + params[3] * theta**8 + ) + x2 = fx * x1 * r + width // 2 + y2 = fy * y1 * r + height // 2 + mapx[j, i] = x2 + mapy[j, i] = y2 + + x_crop, y_crop = (100, 70) # Hardcoded ROI crop + roi_undist = np.array( + [x_crop, y_crop, int(width - 2 * x_crop), int(height - 2 * y_crop)] + ) K_undist = K.copy() - roi_undist = np.array([0, 0, width, height]) - mapx = None - mapy = None - # print(K, params) - # print(width, height) - # mapx = np.zeros((width, height), dtype=np.float32) - # mapy = np.zeros((width, height), dtype=np.float32) - # for i in range(0, width): - # for j in range(0, height): - # x = float(i) - # y = float(j) - # x1 = (x - cx) / fx - # y1 = (y - cy) / fy - # theta = np.sqrt(x1**2 + y1**2) - # r = (1.0 + params[0] * theta**2 + params[1] * theta**4 + params[2] * theta**6 + params[3] * theta**8) - # x2 = fx * x1 * r + width // 2 - # y2 = fy * y1 * r + height // 2 - # mapx[i, j] = x2 - # mapy[i, j] = y2 + K_undist[0, 2] -= x_crop + K_undist[1, 2] -= y_crop self.Ks_dict[camera_id] = K_undist self.mapx_dict[camera_id] = mapx self.mapy_dict[camera_id] = mapy @@ -297,8 +309,7 @@ def __getitem__(self, item: int) -> Dict[str, Any]: self.parser.mapx_dict[camera_id], self.parser.mapy_dict[camera_id], ) - if mapx is not None and mapy is not None: - image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) + image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) x, y, w, h = self.parser.roi_undist_dict[camera_id] image = image[y : y + h, x : x + w] diff --git a/examples/test_remap.py b/examples/test_remap.py new file mode 100644 index 000000000..13a80d9ab --- /dev/null +++ b/examples/test_remap.py @@ -0,0 +1,63 @@ +import numpy as np +import cv2 +import imageio + + +def init_fisheye_remap(K, params, width, height): + fx = K[0, 0] + fy = K[1, 1] + cx = K[0, 2] + cy = K[1, 2] + + mapx = np.zeros((height, width), dtype=np.float32) + mapy = np.zeros((height, width), dtype=np.float32) + for i in range(0, width): + for j in range(0, height): + x = float(i) + y = float(j) + x1 = (x - cx) / fx + y1 = (y - cy) / fy + theta = np.sqrt(x1**2 + y1**2) + r = ( + 1.0 + + params[0] * theta**2 + + params[1] * theta**4 + + params[2] * theta**6 + + params[3] * theta**8 + ) + x2 = fx * x1 * r + width // 2 + y2 = fy * y1 * r + height // 2 + mapx[j, i] = x2 + mapy[j, i] = y2 + return mapx, mapy + + +def main(): + K = np.array( + [[610.93592297, 0.0, 876.0], [0.0, 610.84071973, 584.0], [0.0, 0.0, 1.0]] + ) + params = np.array([0.03699945, 0.00660936, 0.00116909, -0.00038226]) + width, height = (1752, 1168) + + mapx, mapy = init_fisheye_remap(K, params, width, height) + + x_min = np.nonzero(mapx < 0)[1].max() + x_max = np.nonzero(mapx > width)[1].min() + y_min = np.nonzero(mapy < 0)[0].max() + y_max = np.nonzero(mapy > height)[0].min() + roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] + K[0, 2] -= x_min + K[1, 2] -= y_min + + image = imageio.imread("./data/zipnerf/fisheye/berlin/images_4/DSC00040.JPG")[ + ..., :3 + ] + image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) + imageio.imwrite("./results/test_remap.png", image) + x, y, w, h = roi_undist + image = image[y : y + h, x : x + w] + imageio.imwrite("./results/test_remap_crop.png", image) + + +if __name__ == "__main__": + main() From 37f40a5fa5d3c391161755f70269608b43d1202a Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 10 Sep 2024 12:39:57 -0700 Subject: [PATCH 15/38] remove hardcoded roi --- examples/benchmarks/mcmc_zipnerf.sh | 20 +++++++++----------- examples/datasets/colmap.py | 16 ++++++++++------ examples/simple_trainer.py | 4 ++-- gsplat/rendering.py | 2 +- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh index 2d75d8661..0e1206161 100644 --- a/examples/benchmarks/mcmc_zipnerf.sh +++ b/examples/benchmarks/mcmc_zipnerf.sh @@ -5,7 +5,7 @@ SCENE_DIR="data/zipnerf/fisheye" RESULT_DIR="results/benchmark_zipnerf/fisheye" CAMERA_MODEL="fisheye" -SCENE_LIST="berlin alameda london nyc" +SCENE_LIST="berlin london nyc alameda" RENDER_TRAJ_PATH="interp" CAP_MAX=2000000 @@ -25,16 +25,14 @@ do --result_dir $RESULT_DIR/$SCENE/ # run eval and render - # for CKPT in $RESULT_DIR/$SCENE/ckpts/*; - # do - # CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ - # --strategy.cap-max $CAP_MAX \ - # --camera_model $CAMERA_MODEL \ - # --render_traj_path $RENDER_TRAJ_PATH \ - # --data_dir $SCENE_DIR/$SCENE/ \ - # --result_dir $RESULT_DIR/$SCENE/ \ - # --ckpt $CKPT - # done + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + --strategy.cap-max $CAP_MAX \ + --opacity_reg 0.001 \ + --camera_model $CAMERA_MODEL \ + --render_traj_path $RENDER_TRAJ_PATH \ + --data_dir $SCENE_DIR/$SCENE/ \ + --result_dir $RESULT_DIR/$SCENE/ \ + --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt done diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 96ed09fd5..f06035f49 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -253,13 +253,17 @@ def __init__( mapx[j, i] = x2 mapy[j, i] = y2 - x_crop, y_crop = (100, 70) # Hardcoded ROI crop - roi_undist = np.array( - [x_crop, y_crop, int(width - 2 * x_crop), int(height - 2 * y_crop)] - ) + # Compute ROI + x_min = np.nonzero(mapx < 0)[1].max() + x_max = np.nonzero(mapx > width)[1].min() + y_min = np.nonzero(mapy < 0)[0].max() + y_max = np.nonzero(mapy > height)[0].min() + roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] + K[0, 2] -= x_min + K[1, 2] -= y_min K_undist = K.copy() - K_undist[0, 2] -= x_crop - K_undist[1, 2] -= y_crop + K_undist[0, 2] -= x_min + K_undist[1, 2] -= y_min self.Ks_dict[camera_id] = K_undist self.mapx_dict[camera_id] = mapx self.mapy_dict[camera_id] = mapy diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index 9205144c2..3b27ae119 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -56,7 +56,7 @@ class Config: # Normalize the world space normalize_world_space: bool = True # Camera model - camera_model: str = "pinhole" + camera_model: Literal["pinhole", "fisheye"] = "pinhole" # Port for the viewer server port: int = 8080 @@ -926,7 +926,7 @@ def main(local_rank: int, world_rank, world_size: int, cfg: Config): runner.splats[k].data = torch.cat([ckpt["splats"][k] for ckpt in ckpts]) step = ckpts[0]["step"] runner.eval(step=step) - # runner.render_traj(step=step) + runner.render_traj(step=step) if cfg.compression is not None: runner.run_compression(step=step) else: diff --git a/gsplat/rendering.py b/gsplat/rendering.py index 9cfd8318f..4e4b3dd70 100644 --- a/gsplat/rendering.py +++ b/gsplat/rendering.py @@ -47,7 +47,7 @@ def rasterization( distributed: bool = False, ortho: bool = False, covars: Optional[Tensor] = None, - camera_model: str = "pinhole", + camera_model: Literal["pinhole", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Dict]: """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C). From 98a7819a4969876d83ed73cc423f70c2ece1a8f9 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 10 Sep 2024 12:48:30 -0700 Subject: [PATCH 16/38] cleanup tests --- examples/datasets/colmap.py | 12 +++++++----- tests/test_basic.py | 10 +++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index f06035f49..61b143873 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -1,5 +1,5 @@ import os -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, assert_never import cv2 import imageio.v2 as imageio @@ -97,9 +97,9 @@ def __init__( elif type_ == 5 or type_ == "OPENCV_FISHEYE": params = np.array([cam.k1, cam.k2, cam.k3, cam.k4], dtype=np.float32) camtype = "fisheye" - # assert ( - # camtype == "perspective" - # ), f"Only support perspective camera model, got {type_}" + assert ( + camtype == "perspective" or camtype == "fisheye" + ), f"Only perspective and fisheye cameras are supported, got {type_}" params_dict[camera_id] = params @@ -252,7 +252,6 @@ def __init__( y2 = fy * y1 * r + height // 2 mapx[j, i] = x2 mapy[j, i] = y2 - # Compute ROI x_min = np.nonzero(mapx < 0)[1].max() x_max = np.nonzero(mapx > width)[1].min() @@ -264,6 +263,9 @@ def __init__( K_undist = K.copy() K_undist[0, 2] -= x_min K_undist[1, 2] -= y_min + else: + assert_never(camtype) + self.Ks_dict[camera_id] = K_undist self.mapx_dict[camera_id] = mapx self.mapy_dict[camera_id] = mapy diff --git a/tests/test_basic.py b/tests/test_basic.py index 79e8c0c16..c63ea44a4 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -165,10 +165,10 @@ def test_proj(test_data, ortho: bool): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") -@pytest.mark.parametrize("fused", [True]) -@pytest.mark.parametrize("calc_compensations", [False]) +@pytest.mark.parametrize("fused", [False, True]) +@pytest.mark.parametrize("calc_compensations", [False, True]) @pytest.mark.parametrize("ortho", [False]) -@pytest.mark.parametrize("fisheye", [True]) +@pytest.mark.parametrize("fisheye", [True, False]) def test_projection( test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool ): @@ -177,8 +177,8 @@ def test_projection( torch.manual_seed(42) - Ks = test_data["Ks"][:1, ...] - viewmats = test_data["viewmats"][:1, ...] + Ks = test_data["Ks"] + viewmats = test_data["viewmats"] height = test_data["height"] width = test_data["width"] quats = test_data["quats"] From 67baaae487997ccf343b0b406f768c6edb91af56 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 10 Sep 2024 12:53:16 -0700 Subject: [PATCH 17/38] fix bug --- examples/datasets/colmap.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 61b143873..58428af2a 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -1,5 +1,6 @@ import os -from typing import Any, Dict, List, Optional, assert_never +from typing import Any, Dict, List, Optional +from typing_extensions import assert_never import cv2 import imageio.v2 as imageio From e41679de675cae8035e2e77f01d5e3301be5f65f Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 10 Sep 2024 13:02:07 -0700 Subject: [PATCH 18/38] bug --- examples/datasets/colmap.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 58428af2a..6d54db7f8 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -259,8 +259,6 @@ def __init__( y_min = np.nonzero(mapy < 0)[0].max() y_max = np.nonzero(mapy > height)[0].min() roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] - K[0, 2] -= x_min - K[1, 2] -= y_min K_undist = K.copy() K_undist[0, 2] -= x_min K_undist[1, 2] -= y_min From 1ed34f0bff29eced214c4c3c4093a705d94e21d1 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 11:30:52 -0700 Subject: [PATCH 19/38] edit imsize_dict --- examples/benchmarks/mcmc_zipnerf.sh | 2 +- examples/datasets/colmap.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh index 0e1206161..f17c09ee0 100644 --- a/examples/benchmarks/mcmc_zipnerf.sh +++ b/examples/benchmarks/mcmc_zipnerf.sh @@ -3,7 +3,7 @@ # CAMERA_MODEL="pinhole" SCENE_DIR="data/zipnerf/fisheye" -RESULT_DIR="results/benchmark_zipnerf/fisheye" +RESULT_DIR="results/benchmark_zipnerf/fisheye_2m" CAMERA_MODEL="fisheye" SCENE_LIST="berlin london nyc alameda" RENDER_TRAJ_PATH="interp" diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 6d54db7f8..ca7630818 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -269,6 +269,7 @@ def __init__( self.mapx_dict[camera_id] = mapx self.mapy_dict[camera_id] = mapy self.roi_undist_dict[camera_id] = roi_undist + self.imsize_dict[camera_id] = (x_max - x_min, y_max - y_min) # size of the scene measured by cameras camera_locations = camtoworlds[:, :3, 3] From 4434f0955ec1bb809a1fbb310ca1363f0a9b3851 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 11:39:07 -0700 Subject: [PATCH 20/38] format c++ --- gsplat/cuda/csrc/utils.cuh | 118 ++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index e10007cd8..d6af483c0 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -244,11 +244,7 @@ inline __device__ void ortho_proj_vjp( // df/dx = fx * df/dpixx // df/dy = fy * df/dpixy // df/dz = 0 - v_mean3d += vec3( - fx * v_mean2d[0], - fy * v_mean2d[1], - 0.f - ); + v_mean3d += vec3(fx * v_mean2d[0], fy * v_mean2d[1], 0.f); } template @@ -393,26 +389,24 @@ inline __device__ void fisheye_proj( float eps = 0.0000001f; float xy_len = glm::length(glm::vec2({x, y})) + eps; float theta = glm::atan(xy_len, z + eps); - mean2d = vec2({ - x * fx * theta / xy_len + cx, - y * fy * theta / xy_len + cy - }); + mean2d = + vec2({x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy}); float x2 = x * x + eps; float y2 = y * y; float xy = x * y; - float x2y2 = x2 + y2 ; + float x2y2 = x2 + y2; float x2y2z2_inv = 1.f / (x2y2 + z * z); float b = glm::atan(xy_len, z) / xy_len / x2y2; float a = z * x2y2z2_inv / (x2y2); mat3x2 J = mat3x2( fx * (x2 * a + y2 * b), - fy * xy * (a - b), - fx * xy * (a - b), - fy * (y2 * a + x2 * b), - - fx * x * x2y2z2_inv, - - fy * y * x2y2z2_inv + fy * xy * (a - b), + fx * xy * (a - b), + fy * (y2 * a + x2 * b), + -fx * x * x2y2z2_inv, + -fy * y * x2y2z2_inv ); cov2d = J * cov3d * glm::transpose(J); } @@ -450,20 +444,20 @@ inline __device__ void fisheye_proj_vjp( v_mean3d += vec3( fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1], fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1], - - fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1] + -fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1] ); const float theta = glm::atan(len_xy, z); const float J_b = theta / len_xy / x2y2; - const float J_a = z * x2y2z2_inv / (x2y2); + const float J_a = z * x2y2z2_inv / (x2y2); // mat3x2 is 3 columns x 2 rows. mat3x2 J = mat3x2( fx * (x2 * J_a + y2 * J_b), - fy * xy * (J_a - J_b), // 1st column + fy * xy * (J_a - J_b), // 1st column fx * xy * (J_a - J_b), fy * (y2 * J_a + x2 * J_b), // 2nd column - - fx * x * x2y2z2_inv, - - fy * y * x2y2z2_inv // 3rd column + -fx * x * x2y2z2_inv, + -fy * y * x2y2z2_inv // 3rd column ); v_cov3d += glm::transpose(J) * v_cov2d * J; @@ -474,45 +468,51 @@ inline __device__ void fisheye_proj_vjp( mat3x2 v_J = v_cov2d * J * glm::transpose(cov3d) + glm::transpose(v_cov2d) * J * cov3d; - float l4 = x2y2z2 * x2y2z2; - - float E = - l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z; - float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z; - - float A = x * (3 * E + x2 * F); - float B = y * (E + x2 * F); - float C = x * (E + y2 * F); - float D = y * (3 * E + y2 * F); - - float S1 = x2 - y2 - z * z; - float S2 = y2 - x2 - z * z; - float inv1 = x2y2z2_inv * x2y2z2_inv; - float inv2 = inv1 / (x2y2 * x2y2 * len_xy); - - float dJ_dx00 = fx * A * inv2; - float dJ_dx01 = fx * B * inv2; - float dJ_dx02 = fx * S1 * inv1; - float dJ_dx10 = fy * B * inv2; - float dJ_dx11 = fy * C * inv2; - float dJ_dx12 = 2.f * fy * xy * inv1; - - float dJ_dy00 = dJ_dx01; - float dJ_dy01 = fx * C * inv2; - float dJ_dy02 = 2.f * fx * xy * inv1; - float dJ_dy10 = dJ_dx11; - float dJ_dy11 = fy * D * inv2; - float dJ_dy12 = fy * S2 * inv1; - - float dJ_dz00 = dJ_dx02; - float dJ_dz01 = dJ_dy02; - float dJ_dz02 = 2.f * fx * x * z * inv1; - float dJ_dz10 = dJ_dx12; - float dJ_dz11 = dJ_dy12; - float dJ_dz12 = 2.f * fy * y * z * inv1; - - float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; - float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; - float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; + float l4 = x2y2z2 * x2y2z2; + + float E = -l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z; + float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z; + + float A = x * (3 * E + x2 * F); + float B = y * (E + x2 * F); + float C = x * (E + y2 * F); + float D = y * (3 * E + y2 * F); + + float S1 = x2 - y2 - z * z; + float S2 = y2 - x2 - z * z; + float inv1 = x2y2z2_inv * x2y2z2_inv; + float inv2 = inv1 / (x2y2 * x2y2 * len_xy); + + float dJ_dx00 = fx * A * inv2; + float dJ_dx01 = fx * B * inv2; + float dJ_dx02 = fx * S1 * inv1; + float dJ_dx10 = fy * B * inv2; + float dJ_dx11 = fy * C * inv2; + float dJ_dx12 = 2.f * fy * xy * inv1; + + float dJ_dy00 = dJ_dx01; + float dJ_dy01 = fx * C * inv2; + float dJ_dy02 = 2.f * fx * xy * inv1; + float dJ_dy10 = dJ_dx11; + float dJ_dy11 = fy * D * inv2; + float dJ_dy12 = fy * S2 * inv1; + + float dJ_dz00 = dJ_dx02; + float dJ_dz01 = dJ_dy02; + float dJ_dz02 = 2.f * fx * x * z * inv1; + float dJ_dz10 = dJ_dx12; + float dJ_dz11 = dJ_dy12; + float dJ_dz12 = 2.f * fy * y * z * inv1; + + float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; + float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; + float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; v_mean3d.x += dL_dtx_raw; v_mean3d.y += dL_dty_raw; v_mean3d.z += dL_dtz_raw; From 3948fc95314dcdf38ddacbc02fb75e2a0942d83d Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 11:53:57 -0700 Subject: [PATCH 21/38] T --- gsplat/cuda/csrc/utils.cuh | 141 ++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 73 deletions(-) diff --git a/gsplat/cuda/csrc/utils.cuh b/gsplat/cuda/csrc/utils.cuh index d6af483c0..50c2fe5e9 100644 --- a/gsplat/cuda/csrc/utils.cuh +++ b/gsplat/cuda/csrc/utils.cuh @@ -386,20 +386,20 @@ inline __device__ void fisheye_proj( ) { T x = mean3d[0], y = mean3d[1], z = mean3d[2]; - float eps = 0.0000001f; - float xy_len = glm::length(glm::vec2({x, y})) + eps; - float theta = glm::atan(xy_len, z + eps); + T eps = 0.0000001f; + T xy_len = glm::length(glm::vec2({x, y})) + eps; + T theta = glm::atan(xy_len, z + eps); mean2d = vec2({x * fx * theta / xy_len + cx, y * fy * theta / xy_len + cy}); - float x2 = x * x + eps; - float y2 = y * y; - float xy = x * y; - float x2y2 = x2 + y2; - float x2y2z2_inv = 1.f / (x2y2 + z * z); + T x2 = x * x + eps; + T y2 = y * y; + T xy = x * y; + T x2y2 = x2 + y2; + T x2y2z2_inv = 1.f / (x2y2 + z * z); - float b = glm::atan(xy_len, z) / xy_len / x2y2; - float a = z * x2y2z2_inv / (x2y2); + T b = glm::atan(xy_len, z) / xy_len / x2y2; + T a = z * x2y2z2_inv / (x2y2); mat3x2 J = mat3x2( fx * (x2 * a + y2 * b), fy * xy * (a - b), @@ -431,25 +431,25 @@ inline __device__ void fisheye_proj_vjp( ) { T x = mean3d[0], y = mean3d[1], z = mean3d[2]; - const float eps = 0.0000001f; - float x2 = x * x + eps; - float y2 = y * y; - float xy = x * y; - float x2y2 = x2 + y2; - float len_xy = length(glm::vec2({x, y})) + eps; - const float x2y2z2 = x2y2 + z * z; - float x2y2z2_inv = 1.f / x2y2z2; - float b = glm::atan(len_xy, z) / len_xy / x2y2; - float a = z * x2y2z2_inv / (x2y2); + const T eps = 0.0000001f; + T x2 = x * x + eps; + T y2 = y * y; + T xy = x * y; + T x2y2 = x2 + y2; + T len_xy = length(glm::vec2({x, y})) + eps; + const T x2y2z2 = x2y2 + z * z; + T x2y2z2_inv = 1.f / x2y2z2; + T b = glm::atan(len_xy, z) / len_xy / x2y2; + T a = z * x2y2z2_inv / (x2y2); v_mean3d += vec3( fx * (x2 * a + y2 * b) * v_mean2d[0] + fy * xy * (a - b) * v_mean2d[1], fx * xy * (a - b) * v_mean2d[0] + fy * (y2 * a + x2 * b) * v_mean2d[1], -fx * x * x2y2z2_inv * v_mean2d[0] - fy * y * x2y2z2_inv * v_mean2d[1] ); - const float theta = glm::atan(len_xy, z); - const float J_b = theta / len_xy / x2y2; - const float J_a = z * x2y2z2_inv / (x2y2); + const T theta = glm::atan(len_xy, z); + const T J_b = theta / len_xy / x2y2; + const T J_a = z * x2y2z2_inv / (x2y2); // mat3x2 is 3 columns x 2 rows. mat3x2 J = mat3x2( fx * (x2 * J_a + y2 * J_b), @@ -461,58 +461,53 @@ inline __device__ void fisheye_proj_vjp( ); v_cov3d += glm::transpose(J) * v_cov2d * J; - // df/dx = -fx * rz2 * df/dJ_02 - // df/dy = -fy * rz2 * df/dJ_12 - // df/dz = -fx * rz2 * df/dJ_00 - fy * rz2 * df/dJ_11 - // + 2 * fx * tx * rz3 * df/dJ_02 + 2 * fy * ty * rz3 mat3x2 v_J = v_cov2d * J * glm::transpose(cov3d) + glm::transpose(v_cov2d) * J * cov3d; - - float l4 = x2y2z2 * x2y2z2; - - float E = -l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z; - float F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z; - - float A = x * (3 * E + x2 * F); - float B = y * (E + x2 * F); - float C = x * (E + y2 * F); - float D = y * (3 * E + y2 * F); - - float S1 = x2 - y2 - z * z; - float S2 = y2 - x2 - z * z; - float inv1 = x2y2z2_inv * x2y2z2_inv; - float inv2 = inv1 / (x2y2 * x2y2 * len_xy); - - float dJ_dx00 = fx * A * inv2; - float dJ_dx01 = fx * B * inv2; - float dJ_dx02 = fx * S1 * inv1; - float dJ_dx10 = fy * B * inv2; - float dJ_dx11 = fy * C * inv2; - float dJ_dx12 = 2.f * fy * xy * inv1; - - float dJ_dy00 = dJ_dx01; - float dJ_dy01 = fx * C * inv2; - float dJ_dy02 = 2.f * fx * xy * inv1; - float dJ_dy10 = dJ_dx11; - float dJ_dy11 = fy * D * inv2; - float dJ_dy12 = fy * S2 * inv1; - - float dJ_dz00 = dJ_dx02; - float dJ_dz01 = dJ_dy02; - float dJ_dz02 = 2.f * fx * x * z * inv1; - float dJ_dz10 = dJ_dx12; - float dJ_dz11 = dJ_dy12; - float dJ_dz12 = 2.f * fy * y * z * inv1; - - float dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + - dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + - dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; - float dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + - dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + - dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; - float dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + - dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + - dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; + T l4 = x2y2z2 * x2y2z2; + + T E = -l4 * x2y2 * theta + x2y2z2 * x2y2 * len_xy * z; + T F = 3 * l4 * theta - 3 * x2y2z2 * len_xy * z - 2 * x2y2 * len_xy * z; + + T A = x * (3 * E + x2 * F); + T B = y * (E + x2 * F); + T C = x * (E + y2 * F); + T D = y * (3 * E + y2 * F); + + T S1 = x2 - y2 - z * z; + T S2 = y2 - x2 - z * z; + T inv1 = x2y2z2_inv * x2y2z2_inv; + T inv2 = inv1 / (x2y2 * x2y2 * len_xy); + + T dJ_dx00 = fx * A * inv2; + T dJ_dx01 = fx * B * inv2; + T dJ_dx02 = fx * S1 * inv1; + T dJ_dx10 = fy * B * inv2; + T dJ_dx11 = fy * C * inv2; + T dJ_dx12 = 2.f * fy * xy * inv1; + + T dJ_dy00 = dJ_dx01; + T dJ_dy01 = fx * C * inv2; + T dJ_dy02 = 2.f * fx * xy * inv1; + T dJ_dy10 = dJ_dx11; + T dJ_dy11 = fy * D * inv2; + T dJ_dy12 = fy * S2 * inv1; + + T dJ_dz00 = dJ_dx02; + T dJ_dz01 = dJ_dy02; + T dJ_dz02 = 2.f * fx * x * z * inv1; + T dJ_dz10 = dJ_dx12; + T dJ_dz11 = dJ_dy12; + T dJ_dz12 = 2.f * fy * y * z * inv1; + + T dL_dtx_raw = dJ_dx00 * v_J[0][0] + dJ_dx01 * v_J[1][0] + + dJ_dx02 * v_J[2][0] + dJ_dx10 * v_J[0][1] + + dJ_dx11 * v_J[1][1] + dJ_dx12 * v_J[2][1]; + T dL_dty_raw = dJ_dy00 * v_J[0][0] + dJ_dy01 * v_J[1][0] + + dJ_dy02 * v_J[2][0] + dJ_dy10 * v_J[0][1] + + dJ_dy11 * v_J[1][1] + dJ_dy12 * v_J[2][1]; + T dL_dtz_raw = dJ_dz00 * v_J[0][0] + dJ_dz01 * v_J[1][0] + + dJ_dz02 * v_J[2][0] + dJ_dz10 * v_J[0][1] + + dJ_dz11 * v_J[1][1] + dJ_dz12 * v_J[2][1]; v_mean3d.x += dL_dtx_raw; v_mean3d.y += dL_dty_raw; v_mean3d.z += dL_dtz_raw; From ce98242fe379f0238ff82c9b66595f5d41080fce Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 14:43:01 -0700 Subject: [PATCH 22/38] use mask --- examples/datasets/colmap.py | 33 +++++++++++++++++++++++---------- examples/simple_trainer.py | 9 +++++++++ examples/test_remap.py | 13 +++++++++++++ 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index ca7630818..74882d57d 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -59,6 +59,7 @@ def __init__( Ks_dict = dict() params_dict = dict() imsize_dict = dict() # width, height + mask_dict = dict() bottom = np.array([0, 0, 0, 1]).reshape(1, 4) for k in imdata: im = imdata[k] @@ -187,6 +188,7 @@ def __init__( self.Ks_dict = Ks_dict # Dict of camera_id -> K self.params_dict = params_dict # Dict of camera_id -> params self.imsize_dict = imsize_dict # Dict of camera_id -> (width, height) + self.mask_dict = mask_dict # Dict of camera_id -> mask self.points = points # np.ndarray, (num_points, 3) self.points_err = points_err # np.ndarray, (num_points,) self.points_rgb = points_rgb # np.ndarray, (num_points, 3) @@ -253,23 +255,32 @@ def __init__( y2 = fy * y1 * r + height // 2 mapx[j, i] = x2 mapy[j, i] = y2 - # Compute ROI - x_min = np.nonzero(mapx < 0)[1].max() - x_max = np.nonzero(mapx > width)[1].min() - y_min = np.nonzero(mapy < 0)[0].max() - y_max = np.nonzero(mapy > height)[0].min() - roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] + K_undist = K.copy() - K_undist[0, 2] -= x_min - K_undist[1, 2] -= y_min + roi_undist = [0, 0, width, height] + mask = np.logical_and( + np.logical_and(mapx > 0, mapy > 0), + np.logical_and(mapx < width - 1, mapy < height - 1), + ) + + # # Compute ROI + # x_min = np.nonzero(mapx < 0)[1].max() + # x_max = np.nonzero(mapx > width)[1].min() + # y_min = np.nonzero(mapy < 0)[0].max() + # y_max = np.nonzero(mapy > height)[0].min() + # roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] + # K_undist = K.copy() + # K_undist[0, 2] -= x_min + # K_undist[1, 2] -= y_min else: assert_never(camtype) - self.Ks_dict[camera_id] = K_undist self.mapx_dict[camera_id] = mapx self.mapy_dict[camera_id] = mapy + self.Ks_dict[camera_id] = K_undist self.roi_undist_dict[camera_id] = roi_undist - self.imsize_dict[camera_id] = (x_max - x_min, y_max - y_min) + self.imsize_dict[camera_id] = (roi_undist[2], roi_undist[3]) + self.mask_dict[camera_id] = mask # size of the scene measured by cameras camera_locations = camtoworlds[:, :3, 3] @@ -308,6 +319,7 @@ def __getitem__(self, item: int) -> Dict[str, Any]: K = self.parser.Ks_dict[camera_id].copy() # undistorted K params = self.parser.params_dict[camera_id] camtoworlds = self.parser.camtoworlds[index] + mask = self.parser.mask_dict[camera_id] if len(params) > 0: # Images are distorted. Undistort them. @@ -333,6 +345,7 @@ def __getitem__(self, item: int) -> Dict[str, Any]: "camtoworld": torch.from_numpy(camtoworlds).float(), "image": torch.from_numpy(image).float(), "image_id": item, # the index of the image in the dataset + "mask": torch.from_numpy(mask).bool(), } if self.load_depths: diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index 3b27ae119..c91116fcc 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -401,6 +401,7 @@ def rasterize_splats( Ks: Tensor, width: int, height: int, + masks: Optional[Tensor] = None, **kwargs, ) -> Tuple[Tensor, Tensor, Dict]: means = self.splats["means"] # [N, 3] @@ -446,6 +447,8 @@ def rasterize_splats( camera_model=self.cfg.camera_model, **kwargs, ) + if masks is not None: + render_colors[~masks] = 0 return render_colors, render_alphas, info def train(self): @@ -509,6 +512,9 @@ def train(self): pixels.shape[0] * pixels.shape[1] * pixels.shape[2] ) image_ids = data["image_id"].to(device) + masks = ( + data["mask"].to(device) if data["mask"] is not None else None + ) # [1, H, W] if cfg.depth_loss: points = data["points"].to(device) # [1, M, 2] depths_gt = data["depths"].to(device) # [1, M] @@ -535,6 +541,7 @@ def train(self): far_plane=cfg.far_plane, image_ids=image_ids, render_mode="RGB+ED" if cfg.depth_loss else "RGB", + masks=masks, ) if renders.shape[-1] == 4: colors, depths = renders[..., 0:3], renders[..., 3:4] @@ -743,6 +750,7 @@ def eval(self, step: int, stage: str = "val"): camtoworlds = data["camtoworld"].to(device) Ks = data["K"].to(device) pixels = data["image"].to(device) / 255.0 + masks = data["mask"].to(device) if data["mask"] is not None else None height, width = pixels.shape[1:3] torch.cuda.synchronize() @@ -755,6 +763,7 @@ def eval(self, step: int, stage: str = "val"): sh_degree=cfg.sh_degree, near_plane=cfg.near_plane, far_plane=cfg.far_plane, + masks=masks, ) # [1, H, W, 3] torch.cuda.synchronize() ellipse_time += time.time() - tic diff --git a/examples/test_remap.py b/examples/test_remap.py index 13a80d9ab..7cf529618 100644 --- a/examples/test_remap.py +++ b/examples/test_remap.py @@ -41,6 +41,12 @@ def main(): mapx, mapy = init_fisheye_remap(K, params, width, height) + mask = np.logical_and( + np.logical_and(mapx > 0, mapy > 0), + np.logical_and(mapx < width - 1, mapy < height - 1), + ) + imageio.imwrite("./results/test_remap_mask.png", mask.astype(np.uint8) * 255) + x_min = np.nonzero(mapx < 0)[1].max() x_max = np.nonzero(mapx > width)[1].min() y_min = np.nonzero(mapy < 0)[0].max() @@ -53,6 +59,13 @@ def main(): ..., :3 ] image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) + + mask2 = image.max(axis=2) != 0 + imageio.imwrite("./results/test_remap_mask2.png", mask2.astype(np.uint8) * 255) + mask3 = mask2 ^ mask + imageio.imwrite("./results/test_remap_mask3.png", mask3.astype(np.uint8) * 255) + print(mask.sum(), mask2.sum(), mask3.sum()) + # print((mask2.astype(int) - mask.astype(int)).sum()) imageio.imwrite("./results/test_remap.png", image) x, y, w, h = roi_undist image = image[y : y + h, x : x + w] From 66128ca1171b8ddcb885aa12ec6ad4b82d316f26 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 14:44:24 -0700 Subject: [PATCH 23/38] remove test_remap --- examples/datasets/colmap.py | 10 ----- examples/test_remap.py | 76 ------------------------------------- 2 files changed, 86 deletions(-) delete mode 100644 examples/test_remap.py diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 74882d57d..15f4e739a 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -262,16 +262,6 @@ def __init__( np.logical_and(mapx > 0, mapy > 0), np.logical_and(mapx < width - 1, mapy < height - 1), ) - - # # Compute ROI - # x_min = np.nonzero(mapx < 0)[1].max() - # x_max = np.nonzero(mapx > width)[1].min() - # y_min = np.nonzero(mapy < 0)[0].max() - # y_max = np.nonzero(mapy > height)[0].min() - # roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] - # K_undist = K.copy() - # K_undist[0, 2] -= x_min - # K_undist[1, 2] -= y_min else: assert_never(camtype) diff --git a/examples/test_remap.py b/examples/test_remap.py deleted file mode 100644 index 7cf529618..000000000 --- a/examples/test_remap.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np -import cv2 -import imageio - - -def init_fisheye_remap(K, params, width, height): - fx = K[0, 0] - fy = K[1, 1] - cx = K[0, 2] - cy = K[1, 2] - - mapx = np.zeros((height, width), dtype=np.float32) - mapy = np.zeros((height, width), dtype=np.float32) - for i in range(0, width): - for j in range(0, height): - x = float(i) - y = float(j) - x1 = (x - cx) / fx - y1 = (y - cy) / fy - theta = np.sqrt(x1**2 + y1**2) - r = ( - 1.0 - + params[0] * theta**2 - + params[1] * theta**4 - + params[2] * theta**6 - + params[3] * theta**8 - ) - x2 = fx * x1 * r + width // 2 - y2 = fy * y1 * r + height // 2 - mapx[j, i] = x2 - mapy[j, i] = y2 - return mapx, mapy - - -def main(): - K = np.array( - [[610.93592297, 0.0, 876.0], [0.0, 610.84071973, 584.0], [0.0, 0.0, 1.0]] - ) - params = np.array([0.03699945, 0.00660936, 0.00116909, -0.00038226]) - width, height = (1752, 1168) - - mapx, mapy = init_fisheye_remap(K, params, width, height) - - mask = np.logical_and( - np.logical_and(mapx > 0, mapy > 0), - np.logical_and(mapx < width - 1, mapy < height - 1), - ) - imageio.imwrite("./results/test_remap_mask.png", mask.astype(np.uint8) * 255) - - x_min = np.nonzero(mapx < 0)[1].max() - x_max = np.nonzero(mapx > width)[1].min() - y_min = np.nonzero(mapy < 0)[0].max() - y_max = np.nonzero(mapy > height)[0].min() - roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] - K[0, 2] -= x_min - K[1, 2] -= y_min - - image = imageio.imread("./data/zipnerf/fisheye/berlin/images_4/DSC00040.JPG")[ - ..., :3 - ] - image = cv2.remap(image, mapx, mapy, cv2.INTER_LINEAR) - - mask2 = image.max(axis=2) != 0 - imageio.imwrite("./results/test_remap_mask2.png", mask2.astype(np.uint8) * 255) - mask3 = mask2 ^ mask - imageio.imwrite("./results/test_remap_mask3.png", mask3.astype(np.uint8) * 255) - print(mask.sum(), mask2.sum(), mask3.sum()) - # print((mask2.astype(int) - mask.astype(int)).sum()) - imageio.imwrite("./results/test_remap.png", image) - x, y, w, h = roi_undist - image = image[y : y + h, x : x + w] - imageio.imwrite("./results/test_remap_crop.png", image) - - -if __name__ == "__main__": - main() From 419c9e13719640dc38df881359d24fd6d34202d2 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 15:40:47 -0700 Subject: [PATCH 24/38] mask roi --- examples/datasets/colmap.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 15f4e739a..994a64abc 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -256,12 +256,18 @@ def __init__( mapx[j, i] = x2 mapy[j, i] = y2 - K_undist = K.copy() - roi_undist = [0, 0, width, height] mask = np.logical_and( np.logical_and(mapx > 0, mapy > 0), np.logical_and(mapx < width - 1, mapy < height - 1), ) + y_indices, x_indices = np.nonzero(mask) + y_min, y_max = y_indices.min(), y_indices.max() + 1 + x_min, x_max = x_indices.min(), x_indices.max() + 1 + mask = mask[y_min:y_max, x_min:x_max] + K_undist = K.copy() + K_undist[0, 2] -= x_min + K_undist[1, 2] -= y_min + roi_undist = [x_min, y_min, x_max - x_min, y_max - y_min] else: assert_never(camtype) From 42111578c8a502105e48c7fe5908348473946265 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 16:33:16 -0700 Subject: [PATCH 25/38] scripts --- examples/benchmarks/fisheye/mcmc_zipnerf.sh | 22 +++++++ .../fisheye/mcmc_zipnerf_undistort.sh | 22 +++++++ examples/benchmarks/mcmc.sh | 2 +- examples/benchmarks/mcmc_zipnerf.sh | 58 ------------------- examples/datasets/colmap.py | 6 +- examples/simple_trainer.py | 6 +- 6 files changed, 51 insertions(+), 65 deletions(-) create mode 100644 examples/benchmarks/fisheye/mcmc_zipnerf.sh create mode 100644 examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh delete mode 100644 examples/benchmarks/mcmc_zipnerf.sh diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh new file mode 100644 index 000000000..ff6f7b9e5 --- /dev/null +++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh @@ -0,0 +1,22 @@ +SCENE_DIR="data/zipnerf" +SCENE_LIST="berlin london nyc alameda" +DATA_FACTOR=2 + +RESULT_DIR="results/benchmark_mcmc_2M_zipnerf" +CAP_MAX=2000000 + +# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf" +# CAP_MAX=4000000 + +for SCENE in $SCENE_LIST; +do + echo "Running $SCENE" + + # train and eval + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + --strategy.cap-max $CAP_MAX \ + --opacity_reg 0.001 \ + --camera_model fisheye \ + --data_dir $SCENE_DIR/$SCENE/ \ + --result_dir $RESULT_DIR/$SCENE/ +done \ No newline at end of file diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh new file mode 100644 index 000000000..f0f29ecd8 --- /dev/null +++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh @@ -0,0 +1,22 @@ +SCENE_DIR="data/zipnerf_undistort" +SCENE_LIST="berlin london nyc alameda" +DATA_FACTOR=2 + +RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistort" +CAP_MAX=2000000 + +# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf_undistort" +# CAP_MAX=4000000 + +for SCENE in $SCENE_LIST; +do + echo "Running $SCENE" + + # train and eval + CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ + --strategy.cap-max $CAP_MAX \ + --opacity_reg 0.001 \ + --camera_model pinhole \ + --data_dir $SCENE_DIR/$SCENE/ \ + --result_dir $RESULT_DIR/$SCENE/ +done \ No newline at end of file diff --git a/examples/benchmarks/mcmc.sh b/examples/benchmarks/mcmc.sh index 23e40838d..0eaa5c8bb 100644 --- a/examples/benchmarks/mcmc.sh +++ b/examples/benchmarks/mcmc.sh @@ -19,7 +19,7 @@ do CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ --render_traj_path $RENDER_TRAJ_PATH \ - --data_dir data/360_v2/$SCENE/ \ + --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ # run eval and render diff --git a/examples/benchmarks/mcmc_zipnerf.sh b/examples/benchmarks/mcmc_zipnerf.sh deleted file mode 100644 index f17c09ee0..000000000 --- a/examples/benchmarks/mcmc_zipnerf.sh +++ /dev/null @@ -1,58 +0,0 @@ -# SCENE_DIR="data/zipnerf/undistort" -# RESULT_DIR="results/benchmark_zipnerf/undistort" -# CAMERA_MODEL="pinhole" - -SCENE_DIR="data/zipnerf/fisheye" -RESULT_DIR="results/benchmark_zipnerf/fisheye_2m" -CAMERA_MODEL="fisheye" -SCENE_LIST="berlin london nyc alameda" -RENDER_TRAJ_PATH="interp" - -CAP_MAX=2000000 -DATA_FACTOR=4 - -for SCENE in $SCENE_LIST; -do - echo "Running $SCENE" - - # train without eval - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ - --strategy.cap-max $CAP_MAX \ - --opacity_reg 0.001 \ - --camera_model $CAMERA_MODEL \ - --render_traj_path $RENDER_TRAJ_PATH \ - --data_dir $SCENE_DIR/$SCENE/ \ - --result_dir $RESULT_DIR/$SCENE/ - - # run eval and render - CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ - --strategy.cap-max $CAP_MAX \ - --opacity_reg 0.001 \ - --camera_model $CAMERA_MODEL \ - --render_traj_path $RENDER_TRAJ_PATH \ - --data_dir $SCENE_DIR/$SCENE/ \ - --result_dir $RESULT_DIR/$SCENE/ \ - --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt -done - - -for SCENE in $SCENE_LIST; -do - echo "=== Eval Stats ===" - - for STATS in $RESULT_DIR/$SCENE/stats/val*.json; - do - echo $STATS - cat $STATS; - echo - done - - echo "=== Train Stats ===" - - for STATS in $RESULT_DIR/$SCENE/stats/train*_rank0.json; - do - echo $STATS - cat $STATS; - echo - done -done \ No newline at end of file diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 994a64abc..f41863a35 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -107,7 +107,7 @@ def __init__( # image size imsize_dict[camera_id] = (cam.width // factor, cam.height // factor) - + mask_dict[camera_id] = None print( f"[Parser] {len(imdata)} images, taken by {len(set(camera_ids))} cameras." ) @@ -230,6 +230,7 @@ def __init__( mapx, mapy = cv2.initUndistortRectifyMap( K, params, None, K_undist, (width, height), cv2.CV_32FC1 ) + mask = None elif camtype == "fisheye": fx = K[0, 0] fy = K[1, 1] @@ -341,8 +342,9 @@ def __getitem__(self, item: int) -> Dict[str, Any]: "camtoworld": torch.from_numpy(camtoworlds).float(), "image": torch.from_numpy(image).float(), "image_id": item, # the index of the image in the dataset - "mask": torch.from_numpy(mask).bool(), } + if mask is not None: + data["mask"] = torch.from_numpy(mask).bool() if self.load_depths: # projected points to image plane to get depths diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index c91116fcc..c674284aa 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -512,9 +512,7 @@ def train(self): pixels.shape[0] * pixels.shape[1] * pixels.shape[2] ) image_ids = data["image_id"].to(device) - masks = ( - data["mask"].to(device) if data["mask"] is not None else None - ) # [1, H, W] + masks = data["mask"].to(device) if "mask" in data else None # [1, H, W] if cfg.depth_loss: points = data["points"].to(device) # [1, M, 2] depths_gt = data["depths"].to(device) # [1, M] @@ -750,7 +748,7 @@ def eval(self, step: int, stage: str = "val"): camtoworlds = data["camtoworld"].to(device) Ks = data["K"].to(device) pixels = data["image"].to(device) / 255.0 - masks = data["mask"].to(device) if data["mask"] is not None else None + masks = data["mask"].to(device) if "mask" in data else None height, width = pixels.shape[1:3] torch.cuda.synchronize() From 148c218520ee35839b93ca57d1712e492b4fb476 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 16:41:53 -0700 Subject: [PATCH 26/38] reduce diff --- examples/simple_trainer.py | 2 +- gsplat/cuda/_torch_impl.py | 9 +-------- gsplat/rendering.py | 4 ++-- tests/test_basic.py | 2 -- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index c674284aa..f3b2ac8c4 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -444,7 +444,7 @@ def rasterize_splats( sparse_grad=self.cfg.sparse_grad, rasterize_mode=rasterize_mode, distributed=self.world_size > 1, - camera_model=self.cfg.camera_model, + fisheye=self.cfg.camera_model == "fisheye", **kwargs, ) if masks is not None: diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py index c02535f7b..235d1b577 100644 --- a/gsplat/cuda/_torch_impl.py +++ b/gsplat/cuda/_torch_impl.py @@ -235,9 +235,7 @@ def _world_to_cam( def _fully_fused_projection( means: Tensor, # [N, 3] - covars: Optional[Tensor], # [N, 6] or None - quats: Optional[Tensor], # [N, 4] or None - scales: Optional[Tensor], # [N, 3] or None + covars: Tensor, # [N, 3, 3] viewmats: Tensor, # [C, 4, 4] Ks: Tensor, # [C, 3, 3] width: int, @@ -256,11 +254,6 @@ def _fully_fused_projection( This is a minimal implementation of fully fused version, which has more arguments. Not all arguments are supported. """ - if covars is None: - covars = _quat_scale_to_covar_preci( - quats, scales, compute_covar=True, compute_preci=False - )[0] - means_c, covars_c = _world_to_cam(means, covars, viewmats) if ortho: diff --git a/gsplat/rendering.py b/gsplat/rendering.py index 4e4b3dd70..6f4f9c577 100644 --- a/gsplat/rendering.py +++ b/gsplat/rendering.py @@ -46,8 +46,8 @@ def rasterization( channel_chunk: int = 32, distributed: bool = False, ortho: bool = False, + fisheye: bool = False, covars: Optional[Tensor] = None, - camera_model: Literal["pinhole", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Dict]: """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C). @@ -308,7 +308,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso sparse_grad=sparse_grad, calc_compensations=(rasterize_mode == "antialiased"), ortho=ortho, - fisheye=camera_model == "fisheye", + fisheye=fisheye, ) if packed: diff --git a/tests/test_basic.py b/tests/test_basic.py index c63ea44a4..89fdd8a1d 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -224,8 +224,6 @@ def test_projection( _radii, _means2d, _depths, _conics, _compensations = _fully_fused_projection( means, _covars, - None, - None, viewmats, Ks, width, From 6adbf6d99105a879c3dba576183ed51e400b236b Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 16:46:00 -0700 Subject: [PATCH 27/38] minor --- examples/benchmarks/fisheye/mcmc_zipnerf.sh | 2 +- examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh | 2 +- examples/datasets/colmap.py | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh index ff6f7b9e5..f315e0c15 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh @@ -19,4 +19,4 @@ do --camera_model fisheye \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ -done \ No newline at end of file +done diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh index f0f29ecd8..f2a2b462f 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh @@ -19,4 +19,4 @@ do --camera_model pinhole \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ -done \ No newline at end of file +done diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index f41863a35..79638d538 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -104,8 +104,6 @@ def __init__( ), f"Only perspective and fisheye cameras are supported, got {type_}" params_dict[camera_id] = params - - # image size imsize_dict[camera_id] = (cam.width // factor, cam.height // factor) mask_dict[camera_id] = None print( From c2e7ada02829b45c4dae6d04d6a1599a9d5ffa3a Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 11 Sep 2024 17:38:52 -0700 Subject: [PATCH 28/38] weird ortho bug --- tests/test_basic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index 89fdd8a1d..a64084776 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -165,10 +165,10 @@ def test_proj(test_data, ortho: bool): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") -@pytest.mark.parametrize("fused", [False, True]) -@pytest.mark.parametrize("calc_compensations", [False, True]) -@pytest.mark.parametrize("ortho", [False]) @pytest.mark.parametrize("fisheye", [True, False]) +@pytest.mark.parametrize("fused", [False, True]) +@pytest.mark.parametrize("calc_compensations", [True, False]) +@pytest.mark.parametrize("ortho", [True, False]) def test_projection( test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool ): From a31e65a54411cfc324bf712e5a1b6a41b44fdaed Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Mon, 16 Sep 2024 22:07:38 -0700 Subject: [PATCH 29/38] vectorize --- examples/datasets/colmap.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index e007ae2b7..78bc24152 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -251,26 +251,21 @@ def __init__( fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] - mapx = np.zeros((height, width), dtype=np.float32) - mapy = np.zeros((height, width), dtype=np.float32) - for i in range(0, width): - for j in range(0, height): - x = float(i) - y = float(j) - x1 = (x - cx) / fx - y1 = (y - cy) / fy - theta = np.sqrt(x1**2 + y1**2) - r = ( - 1.0 - + params[0] * theta**2 - + params[1] * theta**4 - + params[2] * theta**6 - + params[3] * theta**8 - ) - x2 = fx * x1 * r + width // 2 - y2 = fy * y1 * r + height // 2 - mapx[j, i] = x2 - mapy[j, i] = y2 + grid_x, grid_y = np.meshgrid( + np.arange(width, dtype=np.float32), np.arange(height, dtype=np.float32), indexing="xy" + ) + x1 = (grid_x - cx) / fx + y1 = (grid_y - cy) / fy + theta = np.sqrt(x1**2 + y1**2) + r = ( + 1.0 + + params[0] * theta**2 + + params[1] * theta**4 + + params[2] * theta**6 + + params[3] * theta**8 + ) + mapx = fx * x1 * r + width // 2 + mapy = fy * y1 * r + height // 2 mask = np.logical_and( np.logical_and(mapx > 0, mapy > 0), From 41aa3982b9cad8554ba4c5b42f63a7c76a306b12 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 17 Sep 2024 15:23:42 -0700 Subject: [PATCH 30/38] ellipse --- examples/benchmarks/fisheye/mcmc_zipnerf.sh | 5 ++++- examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh index f315e0c15..75507e454 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh @@ -1,6 +1,7 @@ SCENE_DIR="data/zipnerf" SCENE_LIST="berlin london nyc alameda" -DATA_FACTOR=2 +DATA_FACTOR=4 +RENDER_TRAJ_PATH="ellipse" RESULT_DIR="results/benchmark_mcmc_2M_zipnerf" CAP_MAX=2000000 @@ -16,7 +17,9 @@ do CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ --opacity_reg 0.001 \ + --render_traj_path $RENDER_TRAJ_PATH \ --camera_model fisheye \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ + done diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh index f2a2b462f..1a218e1e2 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh @@ -1,6 +1,7 @@ SCENE_DIR="data/zipnerf_undistort" SCENE_LIST="berlin london nyc alameda" -DATA_FACTOR=2 +DATA_FACTOR=4 +RENDER_TRAJ_PATH="ellipse" RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistort" CAP_MAX=2000000 @@ -16,7 +17,9 @@ do CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ --opacity_reg 0.001 \ + --render_traj_path $RENDER_TRAJ_PATH \ --camera_model pinhole \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ + done From 7f2972af6b5480d80af34497f41acdf95dcead73 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 17 Sep 2024 16:01:16 -0700 Subject: [PATCH 31/38] unify python side camera_model --- examples/datasets/colmap.py | 4 ++- examples/simple_trainer.py | 4 +-- gsplat/cuda/_torch_impl.py | 12 ++++---- gsplat/cuda/_wrapper.py | 57 +++++++++++++++++++----------------- gsplat/cuda/csrc/bindings.h | 4 ++- gsplat/cuda/csrc/proj_bwd.cu | 18 ++++++++++++ gsplat/cuda/csrc/proj_fwd.cu | 7 ++++- gsplat/rendering.py | 6 ++-- tests/test_basic.py | 50 ++++++++++++++++++------------- 9 files changed, 100 insertions(+), 62 deletions(-) diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 78bc24152..127d2d4a4 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -252,7 +252,9 @@ def __init__( cx = K[0, 2] cy = K[1, 2] grid_x, grid_y = np.meshgrid( - np.arange(width, dtype=np.float32), np.arange(height, dtype=np.float32), indexing="xy" + np.arange(width, dtype=np.float32), + np.arange(height, dtype=np.float32), + indexing="xy", ) x1 = (grid_x - cx) / fx y1 = (grid_y - cy) / fy diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py index 8894dcb1f..3e544201b 100644 --- a/examples/simple_trainer.py +++ b/examples/simple_trainer.py @@ -68,7 +68,7 @@ class Config: # Normalize the world space normalize_world_space: bool = True # Camera model - camera_model: Literal["pinhole", "fisheye"] = "pinhole" + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole" # Port for the viewer server port: int = 8080 @@ -477,7 +477,7 @@ def rasterize_splats( sparse_grad=self.cfg.sparse_grad, rasterize_mode=rasterize_mode, distributed=self.world_size > 1, - fisheye=self.cfg.camera_model == "fisheye", + camera_model=self.cfg.camera_model, **kwargs, ) if masks is not None: diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py index d243deed2..892c6a66f 100644 --- a/gsplat/cuda/_torch_impl.py +++ b/gsplat/cuda/_torch_impl.py @@ -1,5 +1,6 @@ import struct from typing import Optional, Tuple +from typing_extensions import Literal, assert_never import torch import torch.nn.functional as F @@ -257,8 +258,7 @@ def _fully_fused_projection( near_plane: float = 0.01, far_plane: float = 1e10, calc_compensations: bool = False, - ortho: bool = False, - fisheye: bool = False, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Optional[Tensor]]: """PyTorch implementation of `gsplat.cuda._wrapper.fully_fused_projection()` @@ -269,12 +269,14 @@ def _fully_fused_projection( """ means_c, covars_c = _world_to_cam(means, covars, viewmats) - if ortho: + if camera_model == "ortho": means2d, covars2d = _ortho_proj(means_c, covars_c, Ks, width, height) - elif fisheye: + elif camera_model == "fisheye": means2d, covars2d = _fisheye_proj(means_c, covars_c, Ks, width, height) - else: + elif camera_model == "pinhole": means2d, covars2d = _persp_proj(means_c, covars_c, Ks, width, height) + else: + assert_never(camera_model) det_orig = ( covars2d[..., 0, 0] * covars2d[..., 1, 1] diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py index 5698df6fb..2ddd2ac9b 100644 --- a/gsplat/cuda/_wrapper.py +++ b/gsplat/cuda/_wrapper.py @@ -1,5 +1,6 @@ from typing import Callable, Optional, Tuple import warnings +from typing_extensions import Literal import torch from torch import Tensor @@ -115,7 +116,7 @@ def proj( Ks: Tensor, # [C, 3, 3] width: int, height: int, - ortho: bool, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor]: """Projection of Gaussians (perspective or orthographic). @@ -139,7 +140,7 @@ def proj( means = means.contiguous() covars = covars.contiguous() Ks = Ks.contiguous() - return _Proj.apply(means, covars, Ks, width, height, ortho) + return _Proj.apply(means, covars, Ks, width, height, camera_model) def world_to_cam( @@ -187,8 +188,7 @@ def fully_fused_projection( packed: bool = False, sparse_grad: bool = False, calc_compensations: bool = False, - ortho: bool = False, - fisheye: bool = False, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: """Projects Gaussians to 2D. @@ -290,7 +290,7 @@ def fully_fused_projection( radius_clip, sparse_grad, calc_compensations, - ortho, + camera_model, ) else: return _FullyFusedProjection.apply( @@ -307,8 +307,7 @@ def fully_fused_projection( far_plane, radius_clip, calc_compensations, - ortho, - fisheye, + camera_model, ) @@ -668,15 +667,21 @@ def forward( Ks: Tensor, # [C, 3, 3] width: int, height: int, - ortho: bool, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor]: means2d, covars2d = _make_lazy_cuda_func("proj_fwd")( - means, covars, Ks, width, height, ortho + means, + covars, + Ks, + width, + height, + camera_model == "ortho", + camera_model == "fisheye", ) ctx.save_for_backward(means, covars, Ks) ctx.width = width ctx.height = height - ctx.ortho = ortho + ctx.camera_model = camera_model return means2d, covars2d @staticmethod @@ -684,14 +689,15 @@ def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor): means, covars, Ks = ctx.saved_tensors width = ctx.width height = ctx.height - ortho = ctx.ortho + camera_model = ctx.camera_model v_means, v_covars = _make_lazy_cuda_func("proj_bwd")( means, covars, Ks, width, height, - ortho, + camera_model == "ortho", + camera_model == "fisheye", v_means2d.contiguous(), v_covars2d.contiguous(), ) @@ -755,8 +761,7 @@ def forward( far_plane: float, radius_clip: float, calc_compensations: bool, - ortho: bool, - fisheye: bool, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: # "covars" and {"quats", "scales"} are mutually exclusive radii, means2d, depths, conics, compensations = _make_lazy_cuda_func( @@ -775,8 +780,8 @@ def forward( far_plane, radius_clip, calc_compensations, - ortho, - fisheye, + camera_model == "ortho", + camera_model == "fisheye", ) if not calc_compensations: compensations = None @@ -786,8 +791,7 @@ def forward( ctx.width = width ctx.height = height ctx.eps2d = eps2d - ctx.ortho = ortho - ctx.fisheye = fisheye + ctx.camera_model = camera_model return radii, means2d, depths, conics, compensations @@ -807,8 +811,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): width = ctx.width height = ctx.height eps2d = ctx.eps2d - ortho = ctx.ortho - fisheye = ctx.fisheye + camera_model = ctx.camera_model if v_compensations is not None: v_compensations = v_compensations.contiguous() v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( @@ -823,8 +826,8 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): width, height, eps2d, - ortho, - fisheye, + camera_model == "ortho", + camera_model == "fisheye", radii, conics, compensations, @@ -1013,7 +1016,7 @@ def forward( radius_clip: float, sparse_grad: bool, calc_compensations: bool, - ortho: bool, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ( indptr, @@ -1038,7 +1041,7 @@ def forward( far_plane, radius_clip, calc_compensations, - ortho, + camera_model == "ortho", ) if not calc_compensations: compensations = None @@ -1058,7 +1061,7 @@ def forward( ctx.height = height ctx.eps2d = eps2d ctx.sparse_grad = sparse_grad - ctx.ortho = ortho + ctx.camera_model = camera_model return camera_ids, gaussian_ids, radii, means2d, depths, conics, compensations @@ -1089,7 +1092,7 @@ def backward( height = ctx.height eps2d = ctx.eps2d sparse_grad = ctx.sparse_grad - ortho = ctx.ortho + camera_model = ctx.camera_model if v_compensations is not None: v_compensations = v_compensations.contiguous() @@ -1105,7 +1108,7 @@ def backward( width, height, eps2d, - ortho, + camera_model == "ortho", camera_ids, gaussian_ids, conics, diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h index 6002efa60..fb2f8891d 100644 --- a/gsplat/cuda/csrc/bindings.h +++ b/gsplat/cuda/csrc/bindings.h @@ -53,7 +53,8 @@ std::tuple proj_fwd_tensor( const torch::Tensor &Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho + const bool ortho, + const bool fisheye ); std::tuple proj_bwd_tensor( @@ -63,6 +64,7 @@ std::tuple proj_bwd_tensor( const uint32_t width, const uint32_t height, const bool ortho, + const bool fisheye, const torch::Tensor &v_means2d, // [C, N, 2] const torch::Tensor &v_covars2d // [C, N, 2, 2] ); diff --git a/gsplat/cuda/csrc/proj_bwd.cu b/gsplat/cuda/csrc/proj_bwd.cu index f5d44a3e5..cb05c791a 100644 --- a/gsplat/cuda/csrc/proj_bwd.cu +++ b/gsplat/cuda/csrc/proj_bwd.cu @@ -26,6 +26,7 @@ __global__ void proj_bwd_kernel( const uint32_t width, const uint32_t height, const bool ortho, + const bool fisheye, const T *__restrict__ v_means2d, // [C, N, 2] const T *__restrict__ v_covars2d, // [C, N, 2, 2] T *__restrict__ v_means, // [C, N, 3] @@ -75,6 +76,21 @@ __global__ void proj_bwd_kernel( v_mean, v_covar ); + } else if (fisheye) { + fisheye_proj_vjp( + mean, + covar, + fx, + fy, + cx, + cy, + width, + height, + glm::transpose(v_covar2d), + v_mean2d, + v_mean, + v_covar + ); } else { persp_proj_vjp( mean, @@ -114,6 +130,7 @@ std::tuple proj_bwd_tensor( const uint32_t width, const uint32_t height, const bool ortho, + const bool fisheye, const torch::Tensor &v_means2d, // [C, N, 2] const torch::Tensor &v_covars2d // [C, N, 2, 2] ) { @@ -151,6 +168,7 @@ std::tuple proj_bwd_tensor( width, height, ortho, + fisheye, v_means2d.data_ptr(), v_covars2d.data_ptr(), v_means.data_ptr(), diff --git a/gsplat/cuda/csrc/proj_fwd.cu b/gsplat/cuda/csrc/proj_fwd.cu index 7944bd581..8478b9e04 100644 --- a/gsplat/cuda/csrc/proj_fwd.cu +++ b/gsplat/cuda/csrc/proj_fwd.cu @@ -26,6 +26,7 @@ __global__ void proj_fwd_kernel( const uint32_t width, const uint32_t height, const bool ortho, + const bool fisheye, T *__restrict__ means2d, // [C, N, 2] T *__restrict__ covars2d // [C, N, 2, 2] ) { @@ -55,6 +56,8 @@ __global__ void proj_fwd_kernel( if (ortho) ortho_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); + else if (fisheye) + fisheye_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); else persp_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); @@ -78,7 +81,8 @@ std::tuple proj_fwd_tensor( const torch::Tensor &Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho + const bool ortho, + const bool fisheye ) { GSPLAT_DEVICE_GUARD(means); GSPLAT_CHECK_INPUT(means); @@ -112,6 +116,7 @@ std::tuple proj_fwd_tensor( width, height, ortho, + fisheye, means2d.data_ptr(), covars2d.data_ptr() ); diff --git a/gsplat/rendering.py b/gsplat/rendering.py index 5aa11ae5a..cddc2742f 100644 --- a/gsplat/rendering.py +++ b/gsplat/rendering.py @@ -49,8 +49,7 @@ def rasterization( rasterize_mode: Literal["classic", "antialiased"] = "classic", channel_chunk: int = 32, distributed: bool = False, - ortho: bool = False, - fisheye: bool = False, + camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", covars: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor, Dict]: """Rasterize a set of 3D Gaussians (N) to a batch of image planes (C). @@ -311,8 +310,7 @@ def reshape_view(C: int, world_view: torch.Tensor, N_world: list) -> torch.Tenso radius_clip=radius_clip, sparse_grad=sparse_grad, calc_compensations=(rasterize_mode == "antialiased"), - ortho=ortho, - fisheye=fisheye, + camera_model=camera_model, ) if packed: diff --git a/tests/test_basic.py b/tests/test_basic.py index a64084776..11c747dd1 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -6,6 +6,7 @@ ``` """ +from typing_extensions import Literal, assert_never import math import pytest @@ -122,9 +123,9 @@ def test_world_to_cam(test_data): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") -@pytest.mark.parametrize("ortho", [True, False]) -def test_proj(test_data, ortho: bool): - from gsplat.cuda._torch_impl import _persp_proj, _ortho_proj +@pytest.mark.parametrize("camera_model", ["pinhole", "ortho", "fisheye"]) +def test_proj(test_data, camera_model: Literal["pinhole", "ortho", "fisheye"]): + from gsplat.cuda._torch_impl import _persp_proj, _ortho_proj, _fisheye_proj from gsplat.cuda._wrapper import proj, quat_scale_to_covar_preci, world_to_cam torch.manual_seed(42) @@ -140,11 +141,15 @@ def test_proj(test_data, ortho: bool): covars.requires_grad = True # forward - means2d, covars2d = proj(means, covars, Ks, width, height, ortho) - if ortho: + means2d, covars2d = proj(means, covars, Ks, width, height, camera_model) + if camera_model == "ortho": _means2d, _covars2d = _ortho_proj(means, covars, Ks, width, height) - else: + elif camera_model == "fisheye": + _means2d, _covars2d = _fisheye_proj(means, covars, Ks, width, height) + elif camera_model == "pinhole": _means2d, _covars2d = _persp_proj(means, covars, Ks, width, height) + else: + assert_never(camera_model) torch.testing.assert_close(means2d, _means2d, rtol=1e-4, atol=1e-4) torch.testing.assert_close(covars2d, _covars2d, rtol=1e-1, atol=3e-2) @@ -165,12 +170,14 @@ def test_proj(test_data, ortho: bool): @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device") -@pytest.mark.parametrize("fisheye", [True, False]) +@pytest.mark.parametrize("camera_model", ["pinhole", "ortho", "fisheye"]) @pytest.mark.parametrize("fused", [False, True]) @pytest.mark.parametrize("calc_compensations", [True, False]) -@pytest.mark.parametrize("ortho", [True, False]) def test_projection( - test_data, fused: bool, calc_compensations: bool, ortho: bool, fisheye: bool + test_data, + fused: bool, + calc_compensations: bool, + camera_model: Literal["pinhole", "ortho", "fisheye"], ): from gsplat.cuda._torch_impl import _fully_fused_projection from gsplat.cuda._wrapper import fully_fused_projection, quat_scale_to_covar_preci @@ -202,8 +209,7 @@ def test_projection( width, height, calc_compensations=calc_compensations, - ortho=ortho, - fisheye=fisheye, + camera_model=camera_model, ) else: covars, _ = quat_scale_to_covar_preci(quats, scales, triu=True) # [N, 6] @@ -217,8 +223,7 @@ def test_projection( width, height, calc_compensations=calc_compensations, - ortho=ortho, - fisheye=fisheye, + camera_model=camera_model, ) _covars, _ = quat_scale_to_covar_preci(quats, scales, triu=False) # [N, 3, 3] _radii, _means2d, _depths, _conics, _compensations = _fully_fused_projection( @@ -229,8 +234,7 @@ def test_projection( width, height, calc_compensations=calc_compensations, - ortho=ortho, - fisheye=fisheye, + camera_model=camera_model, ) # radii is integer so we allow for 1 unit difference @@ -275,9 +279,13 @@ def test_projection( @pytest.mark.parametrize("fused", [False, True]) @pytest.mark.parametrize("sparse_grad", [False, True]) @pytest.mark.parametrize("calc_compensations", [False, True]) -@pytest.mark.parametrize("ortho", [True, False]) +@pytest.mark.parametrize("camera_model", ["pinhole", "ortho"]) def test_fully_fused_projection_packed( - test_data, fused: bool, sparse_grad: bool, calc_compensations: bool, ortho: bool + test_data, + fused: bool, + sparse_grad: bool, + calc_compensations: bool, + camera_model: Literal["pinhole", "ortho", "fisheye"], ): from gsplat.cuda._wrapper import fully_fused_projection, quat_scale_to_covar_preci @@ -318,7 +326,7 @@ def test_fully_fused_projection_packed( packed=True, sparse_grad=sparse_grad, calc_compensations=calc_compensations, - ortho=ortho, + camera_model=camera_model, ) _radii, _means2d, _depths, _conics, _compensations = fully_fused_projection( means, @@ -331,7 +339,7 @@ def test_fully_fused_projection_packed( height, packed=False, calc_compensations=calc_compensations, - ortho=ortho, + camera_model=camera_model, ) else: covars, _ = quat_scale_to_covar_preci(quats, scales, triu=True) # [N, 6] @@ -355,7 +363,7 @@ def test_fully_fused_projection_packed( packed=True, sparse_grad=sparse_grad, calc_compensations=calc_compensations, - ortho=ortho, + camera_model=camera_model, ) _radii, _means2d, _depths, _conics, _compensations = fully_fused_projection( means, @@ -368,7 +376,7 @@ def test_fully_fused_projection_packed( height, packed=False, calc_compensations=calc_compensations, - ortho=ortho, + camera_model=camera_model, ) # recover packed tensors to full matrices for testing From 593769c6f6c5abb659753bfe95b5fcfd54c62657 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Tue, 17 Sep 2024 16:06:24 -0700 Subject: [PATCH 32/38] fisheye packed mode --- gsplat/cuda/_wrapper.py | 2 ++ gsplat/cuda/csrc/bindings.h | 4 +++- .../csrc/fully_fused_projection_packed_bwd.cu | 19 ++++++++++++++++++ .../csrc/fully_fused_projection_packed_fwd.cu | 20 ++++++++++++++++++- tests/test_basic.py | 2 +- 5 files changed, 44 insertions(+), 3 deletions(-) diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py index 2ddd2ac9b..c55bab0a4 100644 --- a/gsplat/cuda/_wrapper.py +++ b/gsplat/cuda/_wrapper.py @@ -1042,6 +1042,7 @@ def forward( radius_clip, calc_compensations, camera_model == "ortho", + camera_model == "fisheye", ) if not calc_compensations: compensations = None @@ -1109,6 +1110,7 @@ def backward( height, eps2d, camera_model == "ortho", + camera_model == "fisheye", camera_ids, gaussian_ids, conics, diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h index fb2f8891d..a565c6760 100644 --- a/gsplat/cuda/csrc/bindings.h +++ b/gsplat/cuda/csrc/bindings.h @@ -270,7 +270,8 @@ fully_fused_projection_packed_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho + const bool ortho, + const bool fisheye ); std::tuple< @@ -291,6 +292,7 @@ fully_fused_projection_packed_bwd_tensor( const uint32_t image_height, const float eps2d, const bool ortho, + const bool fisheye, // fwd outputs const torch::Tensor &camera_ids, // [nnz] const torch::Tensor &gaussian_ids, // [nnz] diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu index 65eada111..bd351403a 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu @@ -32,6 +32,7 @@ __global__ void fully_fused_projection_packed_bwd_kernel( const int32_t image_height, const T eps2d, const bool ortho, + const bool fisheye, // fwd outputs const int64_t *__restrict__ camera_ids, // [nnz] const int64_t *__restrict__ gaussian_ids, // [nnz] @@ -145,6 +146,22 @@ __global__ void fully_fused_projection_packed_bwd_kernel( v_mean_c, v_covar_c ); + } else if (fisheye) { + // vjp: fisheye projection + fisheye_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); } else { // vjp: perspective projection persp_proj_vjp( @@ -298,6 +315,7 @@ fully_fused_projection_packed_bwd_tensor( const uint32_t image_height, const float eps2d, const bool ortho, + const bool fisheye, // fwd outputs const torch::Tensor &camera_ids, // [nnz] const torch::Tensor &gaussian_ids, // [nnz] @@ -384,6 +402,7 @@ fully_fused_projection_packed_bwd_tensor( image_height, eps2d, ortho, + fisheye, camera_ids.data_ptr(), gaussian_ids.data_ptr(), conics.data_ptr(), diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu index 12f21c611..983ef2940 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu @@ -35,6 +35,7 @@ __global__ void fully_fused_projection_packed_fwd_kernel( const int32_t *__restrict__ block_accum, // [C * blocks_per_row] packing helper const bool ortho, + const bool fisheye, // outputs int32_t *__restrict__ block_cnts, // [C * blocks_per_row] packing helper int32_t *__restrict__ indptr, // [C + 1] @@ -134,6 +135,20 @@ __global__ void fully_fused_projection_packed_fwd_kernel( covar2d, mean2d ); + } else if (fisheye) { + // fisheye projection + fisheye_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); } else { // perspective projection persp_proj( @@ -255,7 +270,8 @@ fully_fused_projection_packed_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho + const bool ortho, + const bool fisheye ) { GSPLAT_DEVICE_GUARD(means); GSPLAT_CHECK_INPUT(means); @@ -305,6 +321,7 @@ fully_fused_projection_packed_fwd_tensor( radius_clip, nullptr, ortho, + fisheye, block_cnts.data_ptr(), nullptr, nullptr, @@ -355,6 +372,7 @@ fully_fused_projection_packed_fwd_tensor( radius_clip, block_accum.data_ptr(), ortho, + fisheye, nullptr, indptr.data_ptr(), camera_ids.data_ptr(), diff --git a/tests/test_basic.py b/tests/test_basic.py index 11c747dd1..22d2ee227 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -279,7 +279,7 @@ def test_projection( @pytest.mark.parametrize("fused", [False, True]) @pytest.mark.parametrize("sparse_grad", [False, True]) @pytest.mark.parametrize("calc_compensations", [False, True]) -@pytest.mark.parametrize("camera_model", ["pinhole", "ortho"]) +@pytest.mark.parametrize("camera_model", ["pinhole", "ortho", "fisheye"]) def test_fully_fused_projection_packed( test_data, fused: bool, From 07d20876bd94566700855ec5affffb035707b451 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 18 Sep 2024 10:09:03 -0700 Subject: [PATCH 33/38] cuda enum --- gsplat/cuda/_wrapper.py | 20 +++++-- gsplat/cuda/csrc/bindings.h | 13 +++-- gsplat/cuda/csrc/ext.cpp | 6 ++ gsplat/cuda/csrc/proj_bwd.cu | 103 ++++++++++++++++++----------------- gsplat/cuda/csrc/proj_fwd.cu | 26 +++++---- 5 files changed, 96 insertions(+), 72 deletions(-) diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py index c55bab0a4..1e6dab002 100644 --- a/gsplat/cuda/_wrapper.py +++ b/gsplat/cuda/_wrapper.py @@ -1,4 +1,4 @@ -from typing import Callable, Optional, Tuple +from typing import Callable, Optional, Tuple, Any import warnings from typing_extensions import Literal @@ -16,6 +16,16 @@ def call_cuda(*args, **kwargs): return call_cuda +def _make_lazy_cuda_obj(name: str) -> Any: + # pylint: disable=import-outside-toplevel + from ._backend import _C + + obj = _C + for name_split in name.split("."): + obj = getattr(_C, name_split) + return obj + + def spherical_harmonics( degrees_to_use: int, dirs: Tensor, # [..., 3] @@ -669,14 +679,15 @@ def forward( height: int, camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor]: + camera_model = _make_lazy_cuda_obj(f"CameraModelType.{camera_model.upper()}") + means2d, covars2d = _make_lazy_cuda_func("proj_fwd")( means, covars, Ks, width, height, - camera_model == "ortho", - camera_model == "fisheye", + camera_model, ) ctx.save_for_backward(means, covars, Ks) ctx.width = width @@ -696,8 +707,7 @@ def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor): Ks, width, height, - camera_model == "ortho", - camera_model == "fisheye", + camera_model, v_means2d.contiguous(), v_covars2d.contiguous(), ) diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h index a565c6760..d71e63bfe 100644 --- a/gsplat/cuda/csrc/bindings.h +++ b/gsplat/cuda/csrc/bindings.h @@ -31,6 +31,13 @@ namespace gsplat { +enum CameraModelType +{ + PINHOLE = 0, + ORTHO = 1, + FISHEYE = 2, +}; + std::tuple quat_scale_to_covar_preci_fwd_tensor( const torch::Tensor &quats, // [N, 4] const torch::Tensor &scales, // [N, 3] @@ -53,8 +60,7 @@ std::tuple proj_fwd_tensor( const torch::Tensor &Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho, - const bool fisheye + const CameraModelType camera_model ); std::tuple proj_bwd_tensor( @@ -63,8 +69,7 @@ std::tuple proj_bwd_tensor( const torch::Tensor &Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, const torch::Tensor &v_means2d, // [C, N, 2] const torch::Tensor &v_covars2d // [C, N, 2, 2] ); diff --git a/gsplat/cuda/csrc/ext.cpp b/gsplat/cuda/csrc/ext.cpp index 0a4a67aac..a85129959 100644 --- a/gsplat/cuda/csrc/ext.cpp +++ b/gsplat/cuda/csrc/ext.cpp @@ -1,6 +1,12 @@ #include "bindings.h" PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + py::enum_(m, "CameraModelType") + .value("PINHOLE", gsplat::CameraModelType::PINHOLE) + .value("ORTHO", gsplat::CameraModelType::ORTHO) + .value("FISHEYE", gsplat::CameraModelType::FISHEYE) + .export_values(); + m.def("compute_sh_fwd", &gsplat::compute_sh_fwd_tensor); m.def("compute_sh_bwd", &gsplat::compute_sh_bwd_tensor); diff --git a/gsplat/cuda/csrc/proj_bwd.cu b/gsplat/cuda/csrc/proj_bwd.cu index cb05c791a..66557f679 100644 --- a/gsplat/cuda/csrc/proj_bwd.cu +++ b/gsplat/cuda/csrc/proj_bwd.cu @@ -25,8 +25,7 @@ __global__ void proj_bwd_kernel( const T *__restrict__ Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, const T *__restrict__ v_means2d, // [C, N, 2] const T *__restrict__ v_covars2d, // [C, N, 2, 2] T *__restrict__ v_means, // [C, N, 3] @@ -61,51 +60,55 @@ __global__ void proj_bwd_kernel( const vec2 v_mean2d = glm::make_vec2(v_means2d); const mat2 v_covar2d = glm::make_mat2(v_covars2d); - if (ortho){ - ortho_proj_vjp( - mean, - covar, - fx, - fy, - cx, - cy, - width, - height, - glm::transpose(v_covar2d), - v_mean2d, - v_mean, - v_covar - ); - } else if (fisheye) { - fisheye_proj_vjp( - mean, - covar, - fx, - fy, - cx, - cy, - width, - height, - glm::transpose(v_covar2d), - v_mean2d, - v_mean, - v_covar - ); - } else { - persp_proj_vjp( - mean, - covar, - fx, - fy, - cx, - cy, - width, - height, - glm::transpose(v_covar2d), - v_mean2d, - v_mean, - v_covar - ); + switch (camera_model) { + case CameraModelType::PINHOLE: // perspective projection + persp_proj_vjp( + mean, + covar, + fx, + fy, + cx, + cy, + width, + height, + glm::transpose(v_covar2d), + v_mean2d, + v_mean, + v_covar + ); + break; + case CameraModelType::ORTHO: // orthographic projection + ortho_proj_vjp( + mean, + covar, + fx, + fy, + cx, + cy, + width, + height, + glm::transpose(v_covar2d), + v_mean2d, + v_mean, + v_covar + ); + break; + case CameraModelType::FISHEYE: // fisheye projection + fisheye_proj_vjp( + mean, + covar, + fx, + fy, + cx, + cy, + width, + height, + glm::transpose(v_covar2d), + v_mean2d, + v_mean, + v_covar + ); + break; } // write to outputs: glm is column-major but we want row-major @@ -129,8 +132,7 @@ std::tuple proj_bwd_tensor( const torch::Tensor &Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, const torch::Tensor &v_means2d, // [C, N, 2] const torch::Tensor &v_covars2d // [C, N, 2, 2] ) { @@ -167,8 +169,7 @@ std::tuple proj_bwd_tensor( Ks.data_ptr(), width, height, - ortho, - fisheye, + camera_model, v_means2d.data_ptr(), v_covars2d.data_ptr(), v_means.data_ptr(), diff --git a/gsplat/cuda/csrc/proj_fwd.cu b/gsplat/cuda/csrc/proj_fwd.cu index 8478b9e04..861f60479 100644 --- a/gsplat/cuda/csrc/proj_fwd.cu +++ b/gsplat/cuda/csrc/proj_fwd.cu @@ -25,8 +25,7 @@ __global__ void proj_fwd_kernel( const T *__restrict__ Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, T *__restrict__ means2d, // [C, N, 2] T *__restrict__ covars2d // [C, N, 2, 2] ) { @@ -54,12 +53,17 @@ __global__ void proj_fwd_kernel( const vec3 mean = glm::make_vec3(means); const mat3 covar = glm::make_mat3(covars); - if (ortho) - ortho_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); - else if (fisheye) - fisheye_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); - else - persp_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); + switch (camera_model) { + case CameraModelType::PINHOLE: // perspective projection + persp_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); + break; + case CameraModelType::ORTHO: // orthographic projection + ortho_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); + break; + case CameraModelType::FISHEYE: // fisheye projection + fisheye_proj(mean, covar, fx, fy, cx, cy, width, height, covar2d, mean2d); + break; + } // write to outputs: glm is column-major but we want row-major GSPLAT_PRAGMA_UNROLL @@ -81,8 +85,7 @@ std::tuple proj_fwd_tensor( const torch::Tensor &Ks, // [C, 3, 3] const uint32_t width, const uint32_t height, - const bool ortho, - const bool fisheye + const CameraModelType camera_model ) { GSPLAT_DEVICE_GUARD(means); GSPLAT_CHECK_INPUT(means); @@ -115,8 +118,7 @@ std::tuple proj_fwd_tensor( Ks.data_ptr(), width, height, - ortho, - fisheye, + camera_model, means2d.data_ptr(), covars2d.data_ptr() ); From e7370735d71dcb68a34c54f5244e137618095f35 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 18 Sep 2024 10:35:30 -0700 Subject: [PATCH 34/38] use c++ enum --- gsplat/cuda/_wrapper.py | 40 ++++--- gsplat/cuda/csrc/bindings.h | 12 +- .../cuda/csrc/fully_fused_projection_bwd.cu | 103 ++++++++--------- .../cuda/csrc/fully_fused_projection_fwd.cu | 91 +++++++-------- .../csrc/fully_fused_projection_packed_bwd.cu | 106 +++++++++--------- .../csrc/fully_fused_projection_packed_fwd.cu | 98 ++++++++-------- 6 files changed, 224 insertions(+), 226 deletions(-) diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py index 1e6dab002..1c3826110 100644 --- a/gsplat/cuda/_wrapper.py +++ b/gsplat/cuda/_wrapper.py @@ -679,7 +679,9 @@ def forward( height: int, camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor]: - camera_model = _make_lazy_cuda_obj(f"CameraModelType.{camera_model.upper()}") + camera_model_type = _make_lazy_cuda_obj( + f"CameraModelType.{camera_model.upper()}" + ) means2d, covars2d = _make_lazy_cuda_func("proj_fwd")( means, @@ -687,12 +689,12 @@ def forward( Ks, width, height, - camera_model, + camera_model_type, ) ctx.save_for_backward(means, covars, Ks) ctx.width = width ctx.height = height - ctx.camera_model = camera_model + ctx.camera_model_type = camera_model_type return means2d, covars2d @staticmethod @@ -700,14 +702,14 @@ def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor): means, covars, Ks = ctx.saved_tensors width = ctx.width height = ctx.height - camera_model = ctx.camera_model + camera_model_type = ctx.camera_model_type v_means, v_covars = _make_lazy_cuda_func("proj_bwd")( means, covars, Ks, width, height, - camera_model, + camera_model_type, v_means2d.contiguous(), v_covars2d.contiguous(), ) @@ -773,6 +775,10 @@ def forward( calc_compensations: bool, camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + camera_model_type = _make_lazy_cuda_obj( + f"CameraModelType.{camera_model.upper()}" + ) + # "covars" and {"quats", "scales"} are mutually exclusive radii, means2d, depths, conics, compensations = _make_lazy_cuda_func( "fully_fused_projection_fwd" @@ -790,8 +796,7 @@ def forward( far_plane, radius_clip, calc_compensations, - camera_model == "ortho", - camera_model == "fisheye", + camera_model_type, ) if not calc_compensations: compensations = None @@ -801,7 +806,7 @@ def forward( ctx.width = width ctx.height = height ctx.eps2d = eps2d - ctx.camera_model = camera_model + ctx.camera_model_type = camera_model_type return radii, means2d, depths, conics, compensations @@ -821,7 +826,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): width = ctx.width height = ctx.height eps2d = ctx.eps2d - camera_model = ctx.camera_model + camera_model_type = ctx.camera_model_type if v_compensations is not None: v_compensations = v_compensations.contiguous() v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( @@ -836,8 +841,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): width, height, eps2d, - camera_model == "ortho", - camera_model == "fisheye", + camera_model_type, radii, conics, compensations, @@ -1028,6 +1032,10 @@ def forward( calc_compensations: bool, camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole", ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + camera_model_type = _make_lazy_cuda_obj( + f"CameraModelType.{camera_model.upper()}" + ) + ( indptr, camera_ids, @@ -1051,8 +1059,7 @@ def forward( far_plane, radius_clip, calc_compensations, - camera_model == "ortho", - camera_model == "fisheye", + camera_model_type, ) if not calc_compensations: compensations = None @@ -1072,7 +1079,7 @@ def forward( ctx.height = height ctx.eps2d = eps2d ctx.sparse_grad = sparse_grad - ctx.camera_model = camera_model + ctx.camera_model_type = camera_model_type return camera_ids, gaussian_ids, radii, means2d, depths, conics, compensations @@ -1103,7 +1110,7 @@ def backward( height = ctx.height eps2d = ctx.eps2d sparse_grad = ctx.sparse_grad - camera_model = ctx.camera_model + camera_model_type = ctx.camera_model_type if v_compensations is not None: v_compensations = v_compensations.contiguous() @@ -1119,8 +1126,7 @@ def backward( width, height, eps2d, - camera_model == "ortho", - camera_model == "fisheye", + camera_model_type, camera_ids, gaussian_ids, conics, diff --git a/gsplat/cuda/csrc/bindings.h b/gsplat/cuda/csrc/bindings.h index d71e63bfe..cf0dc8751 100644 --- a/gsplat/cuda/csrc/bindings.h +++ b/gsplat/cuda/csrc/bindings.h @@ -111,8 +111,7 @@ fully_fused_projection_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho, - const bool fisheye + const CameraModelType camera_model ); std::tuple< @@ -132,8 +131,7 @@ fully_fused_projection_bwd_tensor( const uint32_t image_width, const uint32_t image_height, const float eps2d, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // fwd outputs const torch::Tensor &radii, // [C, N] const torch::Tensor &conics, // [C, N, 3] @@ -275,8 +273,7 @@ fully_fused_projection_packed_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho, - const bool fisheye + const CameraModelType camera_model ); std::tuple< @@ -296,8 +293,7 @@ fully_fused_projection_packed_bwd_tensor( const uint32_t image_width, const uint32_t image_height, const float eps2d, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // fwd outputs const torch::Tensor &camera_ids, // [nnz] const torch::Tensor &gaussian_ids, // [nnz] diff --git a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu index a069eb2db..b5757ff40 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_bwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_bwd.cu @@ -30,8 +30,7 @@ __global__ void fully_fused_projection_bwd_kernel( const int32_t image_width, const int32_t image_height, const T eps2d, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // fwd outputs const int32_t *__restrict__ radii, // [C, N] const T *__restrict__ conics, // [C, N, 3] @@ -129,51 +128,55 @@ __global__ void fully_fused_projection_bwd_kernel( mat3 v_covar_c(0.f); vec3 v_mean_c(0.f); - if (ortho){ - ortho_proj_vjp( - mean_c, - covar_c, - fx, - fy, - cx, - cy, - image_width, - image_height, - v_covar2d, - glm::make_vec2(v_means2d), - v_mean_c, - v_covar_c - ); - } else if (fisheye) { - fisheye_proj_vjp( - mean_c, - covar_c, - fx, - fy, - cx, - cy, - image_width, - image_height, - v_covar2d, - glm::make_vec2(v_means2d), - v_mean_c, - v_covar_c - ); - } else { - persp_proj_vjp( - mean_c, - covar_c, - fx, - fy, - cx, - cy, - image_width, - image_height, - v_covar2d, - glm::make_vec2(v_means2d), - v_mean_c, - v_covar_c - ); + switch (camera_model) { + case CameraModelType::PINHOLE: // perspective projection + persp_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); + break; + case CameraModelType::ORTHO: // orthographic projection + ortho_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); + break; + case CameraModelType::FISHEYE: // fisheye projection + fisheye_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); + break; } // add contribution from v_depths @@ -272,8 +275,7 @@ fully_fused_projection_bwd_tensor( const uint32_t image_width, const uint32_t image_height, const float eps2d, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // fwd outputs const torch::Tensor &radii, // [C, N] const torch::Tensor &conics, // [C, N, 3] @@ -342,8 +344,7 @@ fully_fused_projection_bwd_tensor( image_width, image_height, eps2d, - ortho, - fisheye, + camera_model, radii.data_ptr(), conics.data_ptr(), compensations.has_value() diff --git a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu index 0ffeebb19..c651e803d 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_fwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_fwd.cu @@ -32,8 +32,7 @@ __global__ void fully_fused_projection_fwd_kernel( const T near_plane, const T far_plane, const T radius_clip, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // outputs int32_t *__restrict__ radii, // [C, N] T *__restrict__ means2d, // [C, N, 2] @@ -106,45 +105,49 @@ __global__ void fully_fused_projection_fwd_kernel( mat2 covar2d; vec2 mean2d; - if (ortho){ - ortho_proj( - mean_c, - covar_c, - Ks[0], - Ks[4], - Ks[2], - Ks[5], - image_width, - image_height, - covar2d, - mean2d - ); - } else if (fisheye) { - fisheye_proj( - mean_c, - covar_c, - Ks[0], - Ks[4], - Ks[2], - Ks[5], - image_width, - image_height, - covar2d, - mean2d - ); - } else { - persp_proj( - mean_c, - covar_c, - Ks[0], - Ks[4], - Ks[2], - Ks[5], - image_width, - image_height, - covar2d, - mean2d - ); + switch (camera_model) { + case CameraModelType::PINHOLE: // perspective projection + persp_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); + break; + case CameraModelType::ORTHO: // orthographic projection + ortho_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); + break; + case CameraModelType::FISHEYE: // fisheye projection + fisheye_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); + break; } T compensation; @@ -210,8 +213,7 @@ fully_fused_projection_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho, - const bool fisheye + const CameraModelType camera_model ) { GSPLAT_DEVICE_GUARD(means); GSPLAT_CHECK_INPUT(means); @@ -259,8 +261,7 @@ fully_fused_projection_fwd_tensor( near_plane, far_plane, radius_clip, - ortho, - fisheye, + camera_model, radii.data_ptr(), means2d.data_ptr(), depths.data_ptr(), diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu index bd351403a..e5a0172fe 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_packed_bwd.cu @@ -31,8 +31,7 @@ __global__ void fully_fused_projection_packed_bwd_kernel( const int32_t image_width, const int32_t image_height, const T eps2d, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // fwd outputs const int64_t *__restrict__ camera_ids, // [nnz] const int64_t *__restrict__ gaussian_ids, // [nnz] @@ -130,54 +129,55 @@ __global__ void fully_fused_projection_packed_bwd_kernel( T fx = Ks[0], cx = Ks[2], fy = Ks[4], cy = Ks[5]; mat3 v_covar_c(0.f); vec3 v_mean_c(0.f); - if (ortho){ - // vjp: orthographic projection - ortho_proj_vjp( - mean_c, - covar_c, - fx, - fy, - cx, - cy, - image_width, - image_height, - v_covar2d, - glm::make_vec2(v_means2d), - v_mean_c, - v_covar_c - ); - } else if (fisheye) { - // vjp: fisheye projection - fisheye_proj_vjp( - mean_c, - covar_c, - fx, - fy, - cx, - cy, - image_width, - image_height, - v_covar2d, - glm::make_vec2(v_means2d), - v_mean_c, - v_covar_c - ); - } else { - // vjp: perspective projection - persp_proj_vjp( - mean_c, - covar_c, - fx, - fy, - cx, - cy, - image_width, - image_height, - v_covar2d, - glm::make_vec2(v_means2d), - v_mean_c, - v_covar_c - ); + switch (camera_model) { + case CameraModelType::PINHOLE: // perspective projection + persp_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); + break; + case CameraModelType::ORTHO: // orthographic projection + ortho_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); + break; + case CameraModelType::FISHEYE: // fisheye projection + fisheye_proj_vjp( + mean_c, + covar_c, + fx, + fy, + cx, + cy, + image_width, + image_height, + v_covar2d, + glm::make_vec2(v_means2d), + v_mean_c, + v_covar_c + ); + break; } // add contribution from v_depths @@ -314,8 +314,7 @@ fully_fused_projection_packed_bwd_tensor( const uint32_t image_width, const uint32_t image_height, const float eps2d, - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // fwd outputs const torch::Tensor &camera_ids, // [nnz] const torch::Tensor &gaussian_ids, // [nnz] @@ -401,8 +400,7 @@ fully_fused_projection_packed_bwd_tensor( image_width, image_height, eps2d, - ortho, - fisheye, + camera_model, camera_ids.data_ptr(), gaussian_ids.data_ptr(), conics.data_ptr(), diff --git a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu index 983ef2940..4d8609f05 100644 --- a/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu +++ b/gsplat/cuda/csrc/fully_fused_projection_packed_fwd.cu @@ -34,8 +34,7 @@ __global__ void fully_fused_projection_packed_fwd_kernel( const T radius_clip, const int32_t *__restrict__ block_accum, // [C * blocks_per_row] packing helper - const bool ortho, - const bool fisheye, + const CameraModelType camera_model, // outputs int32_t *__restrict__ block_cnts, // [C * blocks_per_row] packing helper int32_t *__restrict__ indptr, // [C + 1] @@ -121,51 +120,51 @@ __global__ void fully_fused_projection_packed_fwd_kernel( covar_world_to_cam(R, covar, covar_c); Ks += row_idx * 9; - if (ortho){ - // orthographic projection - ortho_proj( - mean_c, - covar_c, - Ks[0], - Ks[4], - Ks[2], - Ks[5], - image_width, - image_height, - covar2d, - mean2d - ); - } else if (fisheye) { - // fisheye projection - fisheye_proj( - mean_c, - covar_c, - Ks[0], - Ks[4], - Ks[2], - Ks[5], - image_width, - image_height, - covar2d, - mean2d - ); - } else { - // perspective projection - persp_proj( - mean_c, - covar_c, - Ks[0], - Ks[4], - Ks[2], - Ks[5], - image_width, - image_height, - covar2d, - mean2d - ); + switch (camera_model) { + case CameraModelType::PINHOLE: // perspective projection + persp_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); + break; + case CameraModelType::ORTHO: // orthographic projection + ortho_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); + break; + case CameraModelType::FISHEYE: // fisheye projection + fisheye_proj( + mean_c, + covar_c, + Ks[0], + Ks[4], + Ks[2], + Ks[5], + image_width, + image_height, + covar2d, + mean2d + ); + break; } - det = add_blur(eps2d, covar2d, compensation); if (det <= 0.f) { valid = false; @@ -270,8 +269,7 @@ fully_fused_projection_packed_fwd_tensor( const float far_plane, const float radius_clip, const bool calc_compensations, - const bool ortho, - const bool fisheye + const CameraModelType camera_model ) { GSPLAT_DEVICE_GUARD(means); GSPLAT_CHECK_INPUT(means); @@ -320,8 +318,7 @@ fully_fused_projection_packed_fwd_tensor( far_plane, radius_clip, nullptr, - ortho, - fisheye, + camera_model, block_cnts.data_ptr(), nullptr, nullptr, @@ -371,8 +368,7 @@ fully_fused_projection_packed_fwd_tensor( far_plane, radius_clip, block_accum.data_ptr(), - ortho, - fisheye, + camera_model, nullptr, indptr.data_ptr(), camera_ids.data_ptr(), From fa23297fb0094cd16c6966e870062295a2c4275e Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 18 Sep 2024 10:44:56 -0700 Subject: [PATCH 35/38] download dataset --- examples/benchmarks/fisheye/mcmc_zipnerf.sh | 3 +-- examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh | 9 ++++----- examples/datasets/download_dataset.py | 6 ++++++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh index 75507e454..e317df7e4 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh @@ -1,5 +1,5 @@ SCENE_DIR="data/zipnerf" -SCENE_LIST="berlin london nyc alameda" +SCENE_LIST="alameda berlin london nyc" DATA_FACTOR=4 RENDER_TRAJ_PATH="ellipse" @@ -21,5 +21,4 @@ do --camera_model fisheye \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ - done diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh index 1a218e1e2..945961634 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh @@ -1,12 +1,12 @@ -SCENE_DIR="data/zipnerf_undistort" -SCENE_LIST="berlin london nyc alameda" +SCENE_DIR="data/zipnerf_undistorted" +SCENE_LIST="alameda berlin london nyc" DATA_FACTOR=4 RENDER_TRAJ_PATH="ellipse" -RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistort" +RESULT_DIR="results/benchmark_mcmc_2M_zipnerf_undistorted" CAP_MAX=2000000 -# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf_undistort" +# RESULT_DIR="results/benchmark_mcmc_4M_zipnerf_undistorted" # CAP_MAX=4000000 for SCENE in $SCENE_LIST; @@ -21,5 +21,4 @@ do --camera_model pinhole \ --data_dir $SCENE_DIR/$SCENE/ \ --result_dir $RESULT_DIR/$SCENE/ - done diff --git a/examples/datasets/download_dataset.py b/examples/datasets/download_dataset.py index 272970a30..822eaf9a7 100755 --- a/examples/datasets/download_dataset.py +++ b/examples/datasets/download_dataset.py @@ -13,6 +13,8 @@ "mipnerf360", "mipnerf360_extra", "bilarf_data", + "alameda", + "alameda_undistorted", ] # dataset urls @@ -20,6 +22,8 @@ "mipnerf360": "http://storage.googleapis.com/gresearch/refraw360/360_v2.zip", "mipnerf360_extra": "https://storage.googleapis.com/gresearch/refraw360/360_extra_scenes.zip", "bilarf_data": "https://huggingface.co/datasets/Yuehao/bilarf_data/resolve/main/bilarf_data.zip", + "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip", + "alameda_undistorted": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip", } # rename maps @@ -27,6 +31,8 @@ "mipnerf360": "360_v2", "mipnerf360_extra": "360_v2", "bilarf_data": "bilarf", + "alameda": "zipnerf/alameda", + "alameda_undistorted": "zipnerf_undistorted/alameda", } From 2065224a2c4fc3fb7341a23e23f5c83e95219266 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 18 Sep 2024 11:29:34 -0700 Subject: [PATCH 36/38] refactor dataset download to download zipnerf --- examples/benchmarks/fisheye/mcmc_zipnerf.sh | 2 +- ...distort.sh => mcmc_zipnerf_undistorted.sh} | 2 +- examples/datasets/colmap.py | 1 + examples/datasets/download_dataset.py | 125 +++++++++++------- 4 files changed, 78 insertions(+), 52 deletions(-) rename examples/benchmarks/fisheye/{mcmc_zipnerf_undistort.sh => mcmc_zipnerf_undistorted.sh} (94%) diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh index e317df7e4..075b1ffca 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh @@ -1,5 +1,5 @@ SCENE_DIR="data/zipnerf" -SCENE_LIST="alameda berlin london nyc" +SCENE_LIST="berlin london nyc alameda" DATA_FACTOR=4 RENDER_TRAJ_PATH="ellipse" diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh similarity index 94% rename from examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh rename to examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh index 945961634..3fcf12915 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistort.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh @@ -1,5 +1,5 @@ SCENE_DIR="data/zipnerf_undistorted" -SCENE_LIST="alameda berlin london nyc" +SCENE_LIST="berlin london nyc alameda" DATA_FACTOR=4 RENDER_TRAJ_PATH="ellipse" diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py index 127d2d4a4..938bad265 100644 --- a/examples/datasets/colmap.py +++ b/examples/datasets/colmap.py @@ -269,6 +269,7 @@ def __init__( mapx = fx * x1 * r + width // 2 mapy = fy * y1 * r + height // 2 + # Use mask to define ROI mask = np.logical_and( np.logical_and(mapx > 0, mapy > 0), np.logical_and(mapx < width - 1, mapy < height - 1), diff --git a/examples/datasets/download_dataset.py b/examples/datasets/download_dataset.py index 822eaf9a7..a6de57534 100755 --- a/examples/datasets/download_dataset.py +++ b/examples/datasets/download_dataset.py @@ -13,8 +13,8 @@ "mipnerf360", "mipnerf360_extra", "bilarf_data", - "alameda", - "alameda_undistorted", + "zipnerf", + "zipnerf_undistorted", ] # dataset urls @@ -22,8 +22,18 @@ "mipnerf360": "http://storage.googleapis.com/gresearch/refraw360/360_v2.zip", "mipnerf360_extra": "https://storage.googleapis.com/gresearch/refraw360/360_extra_scenes.zip", "bilarf_data": "https://huggingface.co/datasets/Yuehao/bilarf_data/resolve/main/bilarf_data.zip", - "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip", - "alameda_undistorted": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip", + "zipnerf": { + "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/berlin.zip", + "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/london.zip", + "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/nyc.zip", + "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip", + }, + "zipnerf_undistorted": { + "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/berlin.zip", + "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/london.zip", + "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/nyc.zip", + "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip", + }, } # rename maps @@ -31,8 +41,8 @@ "mipnerf360": "360_v2", "mipnerf360_extra": "360_v2", "bilarf_data": "bilarf", - "alameda": "zipnerf/alameda", - "alameda_undistorted": "zipnerf_undistorted/alameda", + "zipnerf": "zipnerf", + "zipnerf_undistorted": "zipnerf_undistorted", } @@ -46,57 +56,72 @@ def main(self): self.dataset_download(self.dataset) def dataset_download(self, dataset: dataset_names): - (self.save_dir / dataset_rename_map[dataset]).mkdir(parents=True, exist_ok=True) + if isinstance(urls[dataset], dict): + for name, url in urls[dataset].items(): + url_file_name = Path(url).name + extract_path = self.save_dir / dataset_rename_map[dataset] / name + download_path = extract_path / url_file_name + download_and_extract(url, download_path, extract_path) + else: + url = urls[dataset] + url_file_name = Path(url).name + extract_path = self.save_dir / dataset_rename_map[dataset] + download_path = extract_path / url_file_name + download_and_extract(url, download_path, extract_path) - file_name = Path(urls[dataset]).name - # download - download_command = [ - "curl", - "-o", - str(self.save_dir / dataset_rename_map[dataset] / file_name), - urls[dataset], - ] - try: - subprocess.run(download_command, check=True) - print("File file downloaded succesfully.") - except subprocess.CalledProcessError as e: - print(f"Error downloading file: {e}") - - # if .zip - if Path(urls[dataset]).suffix == ".zip": - if os.name == "nt": # Windows doesn't have 'unzip' but 'tar' works - extract_command = [ - "tar", - "-xvf", - self.save_dir / dataset_rename_map[dataset] / file_name, - "-C", - self.save_dir / dataset_rename_map[dataset], - ] - else: - extract_command = [ - "unzip", - self.save_dir / dataset_rename_map[dataset] / file_name, - "-d", - self.save_dir / dataset_rename_map[dataset], - ] - # if .tar - else: +def download_and_extract(url: str, download_path: Path, extract_path: Path) -> None: + download_path.parent.mkdir(parents=True, exist_ok=True) + extract_path.mkdir(parents=True, exist_ok=True) + + # download + download_command = [ + "curl", + "-L", + "-o", + str(download_path), + url, + ] + try: + subprocess.run(download_command, check=True) + print("File file downloaded succesfully.") + except subprocess.CalledProcessError as e: + print(f"Error downloading file: {e}") + + # if .zip + if Path(url).suffix == ".zip": + if os.name == "nt": # Windows doesn't have 'unzip' but 'tar' works extract_command = [ "tar", - "-xvzf", - self.save_dir / dataset_rename_map[dataset] / file_name, + "-xvf", + download_path, "-C", - self.save_dir / dataset_rename_map[dataset], + extract_path, ] + else: + extract_command = [ + "unzip", + download_path, + "-d", + extract_path, + ] + # if .tar + else: + extract_command = [ + "tar", + "-xvzf", + download_path, + "-C", + extract_path, + ] - # extract - try: - subprocess.run(extract_command, check=True) - os.remove(self.save_dir / dataset_rename_map[dataset] / file_name) - print("Extraction complete.") - except subprocess.CalledProcessError as e: - print(f"Extraction failed: {e}") + # extract + try: + subprocess.run(extract_command, check=True) + os.remove(download_path) + print("Extraction complete.") + except subprocess.CalledProcessError as e: + print(f"Extraction failed: {e}") if __name__ == "__main__": From 6fe028d41501ad19a338a8edc43e11356a9ed645 Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 18 Sep 2024 12:01:07 -0700 Subject: [PATCH 37/38] use lists --- examples/datasets/download_dataset.py | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/datasets/download_dataset.py b/examples/datasets/download_dataset.py index a6de57534..520e97d1e 100755 --- a/examples/datasets/download_dataset.py +++ b/examples/datasets/download_dataset.py @@ -22,18 +22,18 @@ "mipnerf360": "http://storage.googleapis.com/gresearch/refraw360/360_v2.zip", "mipnerf360_extra": "https://storage.googleapis.com/gresearch/refraw360/360_extra_scenes.zip", "bilarf_data": "https://huggingface.co/datasets/Yuehao/bilarf_data/resolve/main/bilarf_data.zip", - "zipnerf": { - "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/berlin.zip", - "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/london.zip", - "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/nyc.zip", - "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip", - }, - "zipnerf_undistorted": { - "berlin": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/berlin.zip", - "london": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/london.zip", - "nyc": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/nyc.zip", - "alameda": "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip", - }, + "zipnerf": [ + "https://storage.googleapis.com/gresearch/refraw360/zipnerf/berlin.zip", + "https://storage.googleapis.com/gresearch/refraw360/zipnerf/london.zip", + "https://storage.googleapis.com/gresearch/refraw360/zipnerf/nyc.zip", + "https://storage.googleapis.com/gresearch/refraw360/zipnerf/alameda.zip", + ], + "zipnerf_undistorted": [ + "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/berlin.zip", + "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/london.zip", + "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/nyc.zip", + "https://storage.googleapis.com/gresearch/refraw360/zipnerf-undistorted/alameda.zip", + ], } # rename maps @@ -56,10 +56,10 @@ def main(self): self.dataset_download(self.dataset) def dataset_download(self, dataset: dataset_names): - if isinstance(urls[dataset], dict): - for name, url in urls[dataset].items(): + if isinstance(urls[dataset], list): + for url in urls[dataset]: url_file_name = Path(url).name - extract_path = self.save_dir / dataset_rename_map[dataset] / name + extract_path = self.save_dir / dataset_rename_map[dataset] download_path = extract_path / url_file_name download_and_extract(url, download_path, extract_path) else: From e6c19d60fe5fa13057c1033b6c82d34f1768153f Mon Sep 17 00:00:00 2001 From: Jeffrey Hu Date: Wed, 18 Sep 2024 17:46:37 -0700 Subject: [PATCH 38/38] use bilateral grid as default for zipnerf --- examples/benchmarks/fisheye/mcmc_zipnerf.sh | 1 + examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf.sh b/examples/benchmarks/fisheye/mcmc_zipnerf.sh index 075b1ffca..bd0e57f44 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf.sh @@ -17,6 +17,7 @@ do CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ --opacity_reg 0.001 \ + --use_bilateral_grid \ --render_traj_path $RENDER_TRAJ_PATH \ --camera_model fisheye \ --data_dir $SCENE_DIR/$SCENE/ \ diff --git a/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh index 3fcf12915..012c685cf 100644 --- a/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh +++ b/examples/benchmarks/fisheye/mcmc_zipnerf_undistorted.sh @@ -17,6 +17,7 @@ do CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \ --strategy.cap-max $CAP_MAX \ --opacity_reg 0.001 \ + --use_bilateral_grid \ --render_traj_path $RENDER_TRAJ_PATH \ --camera_model pinhole \ --data_dir $SCENE_DIR/$SCENE/ \