nerfstudio-project · jefequien · Jul 2, 2024 · Jul 2, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/examples/benchmarks/compression/mcmc.sh b/examples/benchmarks/compression/mcmc.sh
@@ -49,7 +49,7 @@ done
 if command -v zip &> /dev/null
 then
     echo "Zipping results"
-    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR
+    python benchmarks/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --stage compress
 else
     echo "zip command not found, skipping zipping"
 fi
diff --git a/examples/benchmarks/compression/mcmc_tt.sh b/examples/benchmarks/compression/mcmc_tt.sh
@@ -42,7 +42,7 @@ done
 if command -v zip &> /dev/null
 then
     echo "Zipping results"
-    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST
+    python benchmarks/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --stage compress
 else
     echo "zip command not found, skipping zipping"
 fi
diff --git a/examples/benchmarks/normal/2dgs_dtu.sh b/examples/benchmarks/normal/2dgs_dtu.sh
@@ -0,0 +1,17 @@
+SCENE_DIR="data/DTU"
+SCENE_LIST="scan24 scan37 scan40 scan55 scan63 scan65 scan69 scan83 scan97 scan105 scan106 scan110 scan114 scan118 scan122"
+
+RESULT_DIR="results/benchmark_dtu_2dgs"
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train and eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer_2dgs.py --disable_viewer --data_factor 1 \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+done
+
+echo "Summarizing results"
+python benchmarks/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --stage val
diff --git a/examples/benchmarks/normal/mcmc_dtu.sh b/examples/benchmarks/normal/mcmc_dtu.sh
@@ -0,0 +1,28 @@
+SCENE_DIR="data/DTU"
+SCENE_LIST="scan24 scan37 scan40 scan55 scan63 scan65 scan69 scan83 scan97 scan105 scan106 scan110 scan114 scan118 scan122"
+RENDER_TRAJ_PATH="ellipse"
+
+RESULT_DIR="results/benchmark_dtu_mcmc_0.25M_normal"
+CAP_MAX=250000
+
+# RESULT_DIR="results/benchmark_dtu_mcmc_0.5M_normal"
+# CAP_MAX=500000
+
+# RESULT_DIR="results/benchmark_dtu_mcmc_1M_normal"
+# CAP_MAX=1000000
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train and eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor 1 \
+        --strategy.cap-max $CAP_MAX \
+        --normal_consistency_loss \
+        --render_traj_path $RENDER_TRAJ_PATH \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+done
+
+echo "Summarizing results"
+python benchmarks/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --stage val
diff --git a/examples/benchmarks/normal/mcmc_normal.sh b/examples/benchmarks/normal/mcmc_normal.sh
@@ -0,0 +1,25 @@
+SCENE_DIR="data/360_v2"
+SCENE_LIST="garden bicycle stump bonsai counter kitchen room treehill flowers"
+
+RESULT_DIR="results/benchmark_normal"
+RENDER_TRAJ_PATH="ellipse"
+
+for SCENE in $SCENE_LIST;
+do
+    if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
+        DATA_FACTOR=2
+    else
+        DATA_FACTOR=4
+    fi
+
+    echo "Running $SCENE"
+
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+        --normal_consistency_loss \
+        --render_traj_path $RENDER_TRAJ_PATH \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+done
+
+echo "Summarizing results"
+python benchmarks/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --stage val
diff --git a/...benchmarks/compression/summarize_stats.py → examples/benchmarks/summarize_stats.py b/...benchmarks/compression/summarize_stats.py → examples/benchmarks/summarize_stats.py
@@ -8,11 +8,8 @@
 import tyro
 
 
-def main(results_dir: str, scenes: List[str]):
-    print("scenes:", scenes)
-    stage = "compress"
-
-    summary = defaultdict(list)
+def main(results_dir: str, scenes: List[str], stage: str = "val"):
+    stats_all = defaultdict(list)
     for scene in scenes:
         scene_dir = os.path.join(results_dir, scene)
 
@@ -25,15 +22,20 @@ def main(results_dir: str, scenes: List[str]):
                 f"stat -c%s {zip_path}", shell=True, capture_output=True
             )
             size = int(out.stdout)
-            summary["size"].append(size)
+            stats_all["size"].append(size)
 
         with open(os.path.join(scene_dir, f"stats/{stage}_step29999.json"), "r") as f:
             stats = json.load(f)
             for k, v in stats.items():
-                summary[k].append(v)
+                stats_all[k].append(v)
+
+    summary = {"scenes": scenes}
+    for k, v in stats_all.items():
+        summary[k] = np.mean(v)
+    print(summary)
 
-    for k, v in summary.items():
-        print(k, np.mean(v))
+    with open(os.path.join(results_dir, f"{stage}_summary.json"), "w") as f:
+        json.dump(summary, f, indent=2)
 
 
 if __name__ == "__main__":

diff --git a/examples/simple_trainer.py b/examples/simple_trainer.py
@@ -154,6 +154,13 @@ class Config:
     # Weight for depth loss
     depth_lambda: float = 1e-2
 
+    # Enable normal consistency loss. (experimental)
+    normal_consistency_loss: bool = False
+    # Weight for normal consistency loss
+    normal_consistency_lambda: float = 0.05
+    # Start applying normal consistency loss after this iteration
+    normal_consistency_start_iter: int = 7000
+
     # Dump information to tensorboard every this steps
     tb_every: int = 100
     # Save training images to tensorboard
@@ -273,6 +280,12 @@ def __init__(
         self.world_size = world_size
         self.device = f"cuda:{local_rank}"
 
+        self.render_mode = "RGB"
+        if cfg.depth_loss:
+            self.render_mode = "RGB+ED"
+        if cfg.normal_consistency_loss:
+            self.render_mode = "RGB+ED+N"
+
         # Where to dump results.
         os.makedirs(cfg.result_dir, exist_ok=True)
 
@@ -587,13 +600,10 @@ def train(self):
                 near_plane=cfg.near_plane,
                 far_plane=cfg.far_plane,
                 image_ids=image_ids,
-                render_mode="RGB+ED" if cfg.depth_loss else "RGB",
+                render_mode=self.render_mode,
                 masks=masks,
             )
-            if renders.shape[-1] == 4:
-                colors, depths = renders[..., 0:3], renders[..., 3:4]
-            else:
-                colors, depths = renders, None
+            colors = renders[..., :3]
 
             if cfg.use_bilateral_grid:
                 grid_y, grid_x = torch.meshgrid(
@@ -623,6 +633,7 @@ def train(self):
             )
             loss = l1loss * (1.0 - cfg.ssim_lambda) + ssimloss * cfg.ssim_lambda
             if cfg.depth_loss:
+                depths = renders[..., -1:]
                 # query depths from depth map
                 points = torch.stack(
                     [
@@ -641,6 +652,14 @@ def train(self):
                 disp_gt = 1.0 / depths_gt  # [1, M]
                 depthloss = F.l1_loss(disp, disp_gt) * self.scene_scale
                 loss += depthloss * cfg.depth_lambda
+            if cfg.normal_consistency_loss:
+                normals_rend = info["normals_rend"]
+                normals_surf = info["normals_surf"]
+                normalconsistencyloss = (
+                    1 - (normals_rend * normals_surf).sum(dim=-1)
+                ).mean()
+                if step > cfg.normal_consistency_start_iter:
+                    loss += normalconsistencyloss * cfg.normal_consistency_lambda
             if cfg.use_bilateral_grid:
                 tvloss = 10 * total_variation_loss(self.bil_grids.grids)
                 loss += tvloss
@@ -687,6 +706,12 @@ def train(self):
                 self.writer.add_scalar("train/mem", mem, step)
                 if cfg.depth_loss:
                     self.writer.add_scalar("train/depthloss", depthloss.item(), step)
+                if cfg.normal_consistency_loss:
+                    self.writer.add_scalar(
+                        "train/normalconsistencyloss",
+                        normalconsistencyloss.item(),
+                        step,
+                    )
                 if cfg.use_bilateral_grid:
                     self.writer.add_scalar("train/tvloss", tvloss.item(), step)
                 if cfg.tb_save_image:
@@ -819,21 +844,31 @@ def eval(self, step: int, stage: str = "val"):
 
             torch.cuda.synchronize()
             tic = time.time()
-            colors, _, _ = self.rasterize_splats(
+            renders, alphas, info = self.rasterize_splats(
                 camtoworlds=camtoworlds,
                 Ks=Ks,
                 width=width,
                 height=height,
                 sh_degree=cfg.sh_degree,
                 near_plane=cfg.near_plane,
                 far_plane=cfg.far_plane,
+                render_mode=self.render_mode,
                 masks=masks,
             )  # [1, H, W, 3]
             torch.cuda.synchronize()
             ellipse_time += time.time() - tic
 
-            colors = torch.clamp(colors, 0.0, 1.0)
+            colors = torch.clamp(renders[..., 0:3], 0.0, 1.0)
             canvas_list = [pixels, colors]
+            if cfg.depth_loss:
+                depths = renders[..., -1:]
+                depths = (depths - depths.min()) / (depths.max() - depths.min())
+                canvas_list.append(depths)
+            if cfg.normal_consistency_loss:
+                normals_rend = info["normals_rend"]
+                normals_surf = info["normals_surf"]
+                canvas_list.extend([normals_rend * 0.5 + 0.5])
+                canvas_list.extend([normals_surf * 0.5 + 0.5])
 
             if world_rank == 0:
                 # write images
@@ -927,20 +962,28 @@ def render_traj(self, step: int):
             camtoworlds = camtoworlds_all[i : i + 1]
             Ks = K[None]
 
-            renders, _, _ = self.rasterize_splats(
+            renders, alphas, info = self.rasterize_splats(
                 camtoworlds=camtoworlds,
                 Ks=Ks,
                 width=width,
                 height=height,
                 sh_degree=cfg.sh_degree,
                 near_plane=cfg.near_plane,
                 far_plane=cfg.far_plane,
-                render_mode="RGB+ED",
+                render_mode=self.render_mode,
             )  # [1, H, W, 4]
-            colors = torch.clamp(renders[..., 0:3], 0.0, 1.0)  # [1, H, W, 3]
-            depths = renders[..., 3:4]  # [1, H, W, 1]
-            depths = (depths - depths.min()) / (depths.max() - depths.min())
-            canvas_list = [colors, depths.repeat(1, 1, 1, 3)]
+
+            colors = torch.clamp(renders[..., 0:3], 0.0, 1.0)
+            canvas_list = [colors]
+            if cfg.depth_loss:
+                depths = renders[..., -1:]
+                depths = (depths - depths.min()) / (depths.max() - depths.min())
+                canvas_list.append(depths)
+            if cfg.normal_consistency_loss:
+                normals_rend = info["normals_rend"]
+                normals_surf = info["normals_surf"]
+                canvas_list.extend([normals_rend * 0.5 + 0.5])
+                canvas_list.extend([normals_surf * 0.5 + 0.5])
 
             # write images
             canvas = torch.cat(canvas_list, dim=2).squeeze(0).cpu().numpy()

diff --git a/gsplat/cuda/_torch_impl.py b/gsplat/cuda/_torch_impl.py
@@ -249,7 +249,8 @@ def _world_to_cam(
 
 def _fully_fused_projection(
     means: Tensor,  # [N, 3]
-    covars: Tensor,  # [N, 3, 3]
+    quats: Tensor,
+    scales: Tensor,
     viewmats: Tensor,  # [C, 4, 4]
     Ks: Tensor,  # [C, 3, 3]
     width: int,
@@ -267,6 +268,10 @@ def _fully_fused_projection(
         This is a minimal implementation of fully fused version, which has more
         arguments. Not all arguments are supported.
     """
+    covars, _ = _quat_scale_to_covar_preci(quats, scales, triu=False)  # [N, 3, 3]
+    normals = _quat_to_rotmat(quats)[..., 2]  # [N, 3]
+    normals = normals.repeat(viewmats.shape[0], 1, 1)  # [C, N, 3]
+
     means_c, covars_c = _world_to_cam(means, covars, viewmats)
 
     if camera_model == "ortho":
@@ -324,7 +329,7 @@ def _fully_fused_projection(
     radius[~inside] = 0.0
 
     radii = radius.int()
-    return radii, means2d, depths, conics, compensations
+    return radii, means2d, depths, normals, conics, compensations
 
 
 @torch.no_grad()

diff --git a/gsplat/cuda/_wrapper.py b/gsplat/cuda/_wrapper.py
@@ -780,7 +780,7 @@ def forward(
         )
 
         # "covars" and {"quats", "scales"} are mutually exclusive
-        radii, means2d, depths, conics, compensations = _make_lazy_cuda_func(
+        radii, means2d, depths, normals, conics, compensations = _make_lazy_cuda_func(
             "fully_fused_projection_fwd"
         )(
             means,
@@ -808,10 +808,12 @@ def forward(
         ctx.eps2d = eps2d
         ctx.camera_model_type = camera_model_type
 
-        return radii, means2d, depths, conics, compensations
+        return radii, means2d, depths, normals, conics, compensations
 
     @staticmethod
-    def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
+    def backward(
+        ctx, v_radii, v_means2d, v_depths, v_normals, v_conics, v_compensations
+    ):
         (
             means,
             covars,
@@ -847,6 +849,7 @@ def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations):
             compensations,
             v_means2d.contiguous(),
             v_depths.contiguous(),
+            v_normals.contiguous(),
             v_conics.contiguous(),
             v_compensations,
             ctx.needs_input_grad[4],  # viewmats_requires_grad
@@ -1043,6 +1046,7 @@ def forward(
             radii,
             means2d,
             depths,
+            normals,
             conics,
             compensations,
         ) = _make_lazy_cuda_func("fully_fused_projection_packed_fwd")(
@@ -1081,7 +1085,16 @@ def forward(
         ctx.sparse_grad = sparse_grad
         ctx.camera_model_type = camera_model_type
 
-        return camera_ids, gaussian_ids, radii, means2d, depths, conics, compensations
+        return (
+            camera_ids,
+            gaussian_ids,
+            radii,
+            means2d,
+            depths,
+            normals,
+            conics,
+            compensations,
+        )
 
     @staticmethod
     def backward(
@@ -1091,6 +1104,7 @@ def backward(
         v_radii,
         v_means2d,
         v_depths,
+        v_normals,
         v_conics,
         v_compensations,
     ):
@@ -1133,6 +1147,7 @@ def backward(
             compensations,
             v_means2d.contiguous(),
             v_depths.contiguous(),
+            v_normals.contiguous(),
             v_conics.contiguous(),
             v_compensations,
             ctx.needs_input_grad[4],  # viewmats_requires_grad