utils.py

import numpy as np
import torchvision.utils as vutils
import torch
import torch.nn.functional as F
from torch import Tensor
import torch.jit as jit
import kornia

# print arguments
def print_args(args):
    print("################################  args  ################################")
    for k, v in args.__dict__.items():
        print("{0: <10}\t{1: <30}\t{2: <20}".format(k, str(v), str(type(v))))
    print("########################################################################")


# torch.no_grad warpper for functions
def make_nograd_func(func):
    def wrapper(*f_args, **f_kwargs):
        with torch.no_grad():
            ret = func(*f_args, **f_kwargs)
        return ret

    return wrapper


# convert a function into recursive style to handle nested dict/list/tuple variables
def make_recursive_func(func):
    def wrapper(vars):
        if isinstance(vars, list):
            return [wrapper(x) for x in vars]
        elif isinstance(vars, tuple):
            return tuple([wrapper(x) for x in vars])
        elif isinstance(vars, dict):
            return {k: wrapper(v) for k, v in vars.items()}
        else:
            return func(vars)

    return wrapper


@make_recursive_func
def tensor2float(vars):
    if isinstance(vars, float):
        return vars
    elif isinstance(vars, torch.Tensor):
        return vars.data.item()
    else:
        raise NotImplementedError("invalid input type {} for tensor2float".format(type(vars)))


@make_recursive_func
def tensor2numpy(vars):
    if isinstance(vars, np.ndarray):
        return vars
    elif isinstance(vars, torch.Tensor):
        return vars.detach().cpu().numpy().copy()
    else:
        raise NotImplementedError("invalid input type {} for tensor2numpy".format(type(vars)))


@make_recursive_func
def tocuda(vars):
    if isinstance(vars, torch.Tensor):
        return vars.cuda()
    elif isinstance(vars, str):
        return vars
    else:
        raise NotImplementedError("invalid input type {} for tocuda".format(type(vars)))


def save_scalars(logger, mode, scalar_dict, global_step):
    scalar_dict = tensor2float(scalar_dict)
    for key, value in scalar_dict.items():
        if not isinstance(value, (list, tuple)):
            name = '{}/{}'.format(mode, key)
            logger.add_scalar(name, value, global_step)
        else:
            for idx in range(len(value)):
                name = '{}/{}_{}'.format(mode, key, idx)
                logger.add_scalar(name, value[idx], global_step)


def save_images(logger, mode, images_dict, global_step):
    images_dict = tensor2numpy(images_dict)

    def preprocess(name, img):
        if not (len(img.shape) == 3 or len(img.shape) == 4):
            raise NotImplementedError("invalid img shape {}:{} in save_images".format(name, img.shape))
        if len(img.shape) == 3:
            img = img[:, np.newaxis, :, :]
        img = torch.from_numpy(img[:1])
        return vutils.make_grid(img, padding=0, nrow=1, normalize=True, scale_each=True)

    for key, value in images_dict.items():
        if not isinstance(value, (list, tuple)):
            name = '{}/{}'.format(mode, key)
            logger.add_image(name, preprocess(name, value), global_step)
        else:
            for idx in range(len(value)):
                name = '{}/{}_{}'.format(mode, key, idx)
                logger.add_image(name, preprocess(name, value[idx]), global_step)


class DictAverageMeter(object):
    def __init__(self):
        self.data = {}
        self.count = 0

    def update(self, new_input):
        self.count += 1
        if len(self.data) == 0:
            for k, v in new_input.items():
                if not isinstance(v, float):
                    raise NotImplementedError("invalid data {}: {}".format(k, type(v)))
                self.data[k] = v
        else:
            for k, v in new_input.items():
                if not isinstance(v, float):
                    raise NotImplementedError("invalid data {}: {}".format(k, type(v)))
                self.data[k] += v

    def mean(self):
        return {k: v / self.count for k, v in self.data.items()}


# a wrapper to compute metrics for each image individually
def compute_metrics_for_each_image(metric_func):
    def wrapper(depth_est, depth_gt, mask, *args):
        batch_size = depth_gt.shape[0]
        results = []
        # compute result one by one
        for idx in range(batch_size):
            ret = metric_func(depth_est[idx], depth_gt[idx], mask[idx], *args)
            results.append(ret)
        return torch.stack(results).mean()

    return wrapper


@torch.jit.script
def to_homogeneous(input_tensor: Tensor, dim: int = 0) -> Tensor:
    """
    Converts tensor to homogeneous coordinates by adding ones to the specified
    dimension
    """
    ones = torch.ones_like(input_tensor.select(dim, 0).unsqueeze(dim))
    output_bkN = torch.cat([input_tensor, ones], dim=dim)
    return output_bkN


class BackprojectDepth(jit.ScriptModule):
    """
    Layer that projects points from 2D camera to 3D space. The 3D points are
    represented in homogeneous coordinates.
    """

    def __init__(self, height: int, width: int):
        super().__init__()

        self.height = height
        self.width = width

        xx, yy = torch.meshgrid(
                            torch.arange(self.width),
                            torch.arange(self.height),
                            indexing='xy',
                        )
        pix_coords_2hw = torch.stack((xx, yy), axis=0) + 0.5

        pix_coords_13N = to_homogeneous(
                                pix_coords_2hw,
                                dim=0,
                            ).flatten(1).unsqueeze(0)

        # make these tensors into buffers so they are put on the correct GPU
        # automatically
        self.register_buffer("pix_coords_13N", pix_coords_13N)
        self.pix_coords_13N = pix_coords_13N.cuda()
        pass

    #@jit.script_method
    def forward(self, depth_b1hw: Tensor, invK_b44: Tensor) -> Tensor:
        """
        Backprojects spatial points in 2D image space to world space using
        invK_b44 at the depths defined in depth_b1hw.
        """
        cam_points_b3N = torch.matmul(invK_b44[:, :3, :3], self.pix_coords_13N)
        para = depth_b1hw.flatten(start_dim=2)
        cam_points_b3N = para* cam_points_b3N
        cam_points_b4N = to_homogeneous(cam_points_b3N, dim=1)
        return cam_points_b4N

class NormalGenerator(jit.ScriptModule):
    def __init__(self, height: int, width: int,
                smoothing_kernel_size: int=5, smoothing_kernel_std: float=2.0):
        """
        Estimates normals from depth maps.
        """
        super().__init__()
        self.height = height
        self.width = width

        self.backproject = BackprojectDepth(self.height, self.width)

        self.kernel_size = smoothing_kernel_size
        self.std = smoothing_kernel_std

    #@jit.script_method
    def forward(self, depth_b1hw: Tensor, invK_b44: Tensor) -> Tensor:
        """
        First smoothes incoming depth maps with a gaussian blur, backprojects
        those depth points into world space (see BackprojectDepth), estimates
        the spatial gradient at those points, and finally uses normalized cross
        correlation to estimate a normal vector at each location.

        """
        depth_smooth_b1hw = kornia.filters.gaussian_blur2d(
                                depth_b1hw,
                                (self.kernel_size, self.kernel_size),
                                (self.std, self.std),
                            )
        cam_points_b4N = self.backproject(depth_smooth_b1hw, invK_b44)
        cam_points_b3hw = cam_points_b4N[:, :3].view(-1, 3, self.height, self.width)

        gradients_b32hw = kornia.filters.spatial_gradient(cam_points_b3hw)

        return F.normalize(
                        torch.cross(
                            gradients_b32hw[:, :, 0],
                            gradients_b32hw[:, :, 1],
                            dim=1,
                        ),
                        dim=1,
                    )

@make_nograd_func
@compute_metrics_for_each_image
def Thres_metrics(depth_est, depth_gt, mask, thres):
    # if thres is int or float, then True
    assert isinstance(thres, (int, float))
    depth_est, depth_gt = depth_est[mask], depth_gt[mask]
    errors = torch.abs(depth_est - depth_gt)
    err_mask = errors > thres
    return torch.mean(err_mask.float())


# NOTE: please do not use this to build up training loss
@make_nograd_func
@compute_metrics_for_each_image
def AbsDepthError_metrics(depth_est, depth_gt, mask):
    depth_est, depth_gt = depth_est[mask], depth_gt[mask]
    return torch.mean((depth_est - depth_gt).abs())