pytorch3d/tests/test_render_volumes.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import unittest
from typing import Optional, Tuple

import numpy as np
import torch
from pytorch3d.ops import knn_points
from pytorch3d.renderer import (
    AbsorptionOnlyRaymarcher,
    AlphaCompositor,
    EmissionAbsorptionRaymarcher,
    MonteCarloRaysampler,
    MultinomialRaysampler,
    NDCMultinomialRaysampler,
    PerspectiveCameras,
    PointsRasterizationSettings,
    PointsRasterizer,
    PointsRenderer,
    RayBundle,
    VolumeRenderer,
    VolumeSampler,
)
from pytorch3d.renderer.implicit.utils import _validate_ray_bundle_variables
from pytorch3d.structures import Pointclouds, Volumes

from .common_testing import TestCaseMixin
from .test_points_to_volumes import init_uniform_y_rotations


DEBUG = False
if DEBUG:
    import os
    import tempfile

    from PIL import Image


ZERO_TRANSLATION = torch.zeros(1, 3)


def init_boundary_volume(
    batch_size: int,
    volume_size: Tuple[int, int, int],
    border_offset: int = 2,
    shape: str = "cube",
    volume_translation: torch.Tensor = ZERO_TRANSLATION,
):
    """
    Generate a volume with sides colored with distinct colors.
    """

    device = torch.device("cuda")

    # first center the volume for the purpose of generating the canonical shape
    volume_translation_tmp = (0.0, 0.0, 0.0)

    # set the voxel size to 1 / (volume_size-1)
    volume_voxel_size = 1 / (volume_size[0] - 1.0)

    # colors of the sides of the cube
    clr_sides = torch.tensor(
        [
            [1.0, 1.0, 1.0],
            [1.0, 0.0, 0.0],
            [1.0, 0.0, 1.0],
            [1.0, 1.0, 0.0],
            [0.0, 1.0, 0.0],
            [0.0, 1.0, 1.0],
        ],
        dtype=torch.float32,
        device=device,
    )

    # get the coord grid of the volume
    coord_grid = Volumes(
        densities=torch.zeros(1, 1, *volume_size, device=device),
        voxel_size=volume_voxel_size,
        volume_translation=volume_translation_tmp,
    ).get_coord_grid()[0]

    # extract the boundary points and their colors of the cube
    if shape == "cube":
        boundary_points, boundary_colors = [], []
        for side, clr_side in enumerate(clr_sides):
            first = side % 2
            dim = side // 2
            slices = [slice(border_offset, -border_offset, 1)] * 3
            slices[dim] = int(border_offset * (2 * first - 1))
            slices.append(slice(0, 3, 1))
            boundary_points_ = coord_grid[slices].reshape(-1, 3)
            boundary_points.append(boundary_points_)
            boundary_colors.append(clr_side[None].expand_as(boundary_points_))
        # set the internal part of the volume to be completely opaque
        volume_densities = torch.zeros(*volume_size, device=device)
        volume_densities[[slice(border_offset, -border_offset, 1)] * 3] = 1.0
        boundary_points, boundary_colors = [
            torch.cat(p, dim=0) for p in [boundary_points, boundary_colors]
        ]
        # color the volume voxels with the nearest boundary points' color
        _, idx, _ = knn_points(
            coord_grid.view(1, -1, 3), boundary_points.view(1, -1, 3)
        )
        volume_colors = (
            boundary_colors[idx.view(-1)].view(*volume_size, 3).permute(3, 0, 1, 2)
        )

    elif shape == "sphere":
        # set all voxels within a certain distance from the origin to be opaque
        volume_densities = (
            coord_grid.norm(dim=-1)
            <= 0.5 * volume_voxel_size * (volume_size[0] - border_offset)
        ).float()
        # color each voxel with the standrd spherical color
        volume_colors = (
            (torch.nn.functional.normalize(coord_grid, dim=-1) + 1.0) * 0.5
        ).permute(3, 0, 1, 2)

    else:
        raise ValueError(shape)

    volume_voxel_size = torch.ones((batch_size, 1), device=device) * volume_voxel_size
    volume_translation = volume_translation.expand(batch_size, 3)
    volumes = Volumes(
        densities=volume_densities[None, None].expand(batch_size, 1, *volume_size),
        features=volume_colors[None].expand(batch_size, 3, *volume_size),
        voxel_size=volume_voxel_size,
        volume_translation=volume_translation,
    )

    return volumes, volume_voxel_size, volume_translation


def init_cameras(
    batch_size: int = 10,
    image_size: Optional[Tuple[int, int]] = (50, 50),
    ndc: bool = False,
):
    """
    Initialize a batch of cameras whose extrinsics rotate the cameras around
    the world's y axis.
    Depending on whether we want an NDC-space (`ndc==True`) or a screen-space camera,
    the camera's focal length and principal point are initialized accordingly:
        For `ndc==False`, p0=focal_length=image_size/2.
        For `ndc==True`, focal_length=1.0, p0 = 0.0.
    The the z-coordinate of the translation vector of each camera is fixed to 1.5.
    """
    device = torch.device("cuda:0")

    # trivial rotations
    R = init_uniform_y_rotations(batch_size=batch_size, device=device)

    # move camera 1.5 m away from the scene center
    T = torch.zeros((batch_size, 3), device=device)
    T[:, 2] = 1.5

    if ndc:
        p0 = torch.zeros(batch_size, 2, device=device)
        focal = torch.ones(batch_size, device=device)
    else:
        p0 = torch.ones(batch_size, 2, device=device)
        p0[:, 0] *= image_size[1] * 0.5
        p0[:, 1] *= image_size[0] * 0.5
        focal = max(*image_size) * torch.ones(batch_size, device=device)

    # convert to a Camera object
    cameras = PerspectiveCameras(focal, p0, R=R, T=T, device=device)
    return cameras


class TestRenderVolumes(TestCaseMixin, unittest.TestCase):
    def setUp(self) -> None:
        super().setUp()
        torch.manual_seed(42)
        np.random.seed(42)

    @staticmethod
    def renderer(
        volume_size=(25, 25, 25),
        batch_size=10,
        shape="sphere",
        raymarcher_type=EmissionAbsorptionRaymarcher,
        n_rays_per_image=10,
        n_pts_per_ray=10,
    ):
        # get the volumes
        volumes = init_boundary_volume(
            volume_size=volume_size, batch_size=batch_size, shape=shape
        )[0]

        # init the mc raysampler
        raysampler = MonteCarloRaysampler(
            min_x=-1.0,
            max_x=1.0,
            min_y=-1.0,
            max_y=1.0,
            n_rays_per_image=n_rays_per_image,
            n_pts_per_ray=n_pts_per_ray,
            min_depth=0.1,
            max_depth=2.0,
        ).to(volumes.device)

        # get the raymarcher
        raymarcher = raymarcher_type()

        renderer = VolumeRenderer(
            raysampler=raysampler, raymarcher=raymarcher, sample_mode="bilinear"
        )

        # generate NDC camera extrinsics and intrinsics
        cameras = init_cameras(batch_size, image_size=None, ndc=True)

        def run_renderer():
            renderer(cameras=cameras, volumes=volumes)

        return run_renderer

    def test_input_types(self, batch_size: int = 10):
        """
        Check that ValueErrors are thrown where expected.
        """
        # check the constructor
        for bad_raysampler in (None, 5, []):
            for bad_raymarcher in (None, 5, []):
                with self.assertRaises(ValueError):
                    VolumeRenderer(raysampler=bad_raysampler, raymarcher=bad_raymarcher)

        raysampler = NDCMultinomialRaysampler(
            image_width=100,
            image_height=100,
            n_pts_per_ray=10,
            min_depth=0.1,
            max_depth=1.0,
        )

        # init a trivial renderer
        renderer = VolumeRenderer(
            raysampler=raysampler, raymarcher=EmissionAbsorptionRaymarcher()
        )

        # get cameras
        cameras = init_cameras(batch_size=batch_size)

        # get volumes
        volumes = init_boundary_volume(volume_size=(10, 10, 10), batch_size=batch_size)[
            0
        ]

        # different batch sizes for cameras / volumes
        with self.assertRaises(ValueError):
            renderer(cameras=cameras, volumes=volumes[:-1])

        # ray checks for VolumeSampler
        volume_sampler = VolumeSampler(volumes=volumes)
        n_rays = 100
        for bad_ray_bundle in (
            (
                torch.rand(batch_size, n_rays, 3),
                torch.rand(batch_size, n_rays + 1, 3),
                torch.rand(batch_size, n_rays, 10),
            ),
            (
                torch.rand(batch_size + 1, n_rays, 3),
                torch.rand(batch_size, n_rays, 3),
                torch.rand(batch_size, n_rays, 10),
            ),
            (
                torch.rand(batch_size, n_rays, 3),
                torch.rand(batch_size, n_rays, 2),
                torch.rand(batch_size, n_rays, 10),
            ),
            (
                torch.rand(batch_size, n_rays, 3),
                torch.rand(batch_size, n_rays, 3),
                torch.rand(batch_size, n_rays),
            ),
        ):
            ray_bundle = RayBundle(
                **dict(
                    zip(
                        ("origins", "directions", "lengths"),
                        [r.to(cameras.device) for r in bad_ray_bundle],
                    )
                ),
                xys=None,
            )
            with self.assertRaises(ValueError):
                volume_sampler(ray_bundle)

            # check also explicitly the ray bundle validation function
            with self.assertRaises(ValueError):
                _validate_ray_bundle_variables(*bad_ray_bundle)

    def test_compare_with_pointclouds_renderer(
        self, batch_size=11, volume_size=(30, 30, 30), image_size=(200, 250)
    ):
        """
        Generate a volume and its corresponding point cloud and check whether
        PointsRenderer returns the same images as the corresponding VolumeRenderer.
        """

        # generate NDC camera extrinsics and intrinsics
        cameras = init_cameras(batch_size, image_size=image_size, ndc=True)

        # init the boundary volume
        for shape in ("sphere", "cube"):
            if not DEBUG and shape == "cube":
                # do not run numeric checks for the cube as the
                # differences in rendering equations make the renders incomparable
                continue

            # get rand offset of the volume
            volume_translation = torch.randn(batch_size, 3) * 0.1
            # volume_translation[2] = 0.1
            volumes = init_boundary_volume(
                volume_size=volume_size,
                batch_size=batch_size,
                shape=shape,
                volume_translation=volume_translation,
            )[0]

            # convert the volumes to a pointcloud
            points = []
            points_features = []
            for densities_one, features_one, grid_one in zip(
                volumes.densities(),
                volumes.features(),
                volumes.get_coord_grid(world_coordinates=True),
            ):
                opaque = densities_one.view(-1) > 1e-4
                points.append(grid_one.view(-1, 3)[opaque])
                points_features.append(features_one.reshape(3, -1).t()[opaque])
            pointclouds = Pointclouds(points, features=points_features)

            # init the grid raysampler with the ndc grid
            coord_range = 1.0
            half_pix_size = coord_range / max(*image_size)
            raysampler = NDCMultinomialRaysampler(
                image_width=image_size[1],
                image_height=image_size[0],
                n_pts_per_ray=256,
                min_depth=0.1,
                max_depth=2.0,
            )

            # get the EA raymarcher
            raymarcher = EmissionAbsorptionRaymarcher()

            # jitter the camera intrinsics a bit for each render
            cameras_randomized = cameras.clone()
            cameras_randomized.principal_point = (
                torch.randn_like(cameras.principal_point) * 0.3
            )
            cameras_randomized.focal_length = (
                cameras.focal_length + torch.randn_like(cameras.focal_length) * 0.2
            )

            # get the volumetric render
            images = VolumeRenderer(
                raysampler=raysampler, raymarcher=raymarcher, sample_mode="bilinear"
            )(cameras=cameras_randomized, volumes=volumes)[0][..., :3]

            # instantiate the points renderer
            point_radius = 6 * half_pix_size
            points_renderer = PointsRenderer(
                rasterizer=PointsRasterizer(
                    cameras=cameras_randomized,
                    raster_settings=PointsRasterizationSettings(
                        image_size=image_size, radius=point_radius, points_per_pixel=10
                    ),
                ),
                compositor=AlphaCompositor(),
            )

            # get the point render
            images_pts = points_renderer(pointclouds)

            if shape == "sphere":
                diff = (images - images_pts).abs().mean(dim=-1)
                mu_diff = diff.mean(dim=(1, 2))
                std_diff = diff.std(dim=(1, 2))
                self.assertClose(mu_diff, torch.zeros_like(mu_diff), atol=3e-2)
                self.assertClose(std_diff, torch.zeros_like(std_diff), atol=6e-2)

            if DEBUG:
                outdir = tempfile.gettempdir() + "/test_volume_vs_pts_renderer"
                os.makedirs(outdir, exist_ok=True)

                frames = []
                for image, image_pts in zip(images, images_pts):
                    diff_image = (
                        ((image - image_pts) * 0.5 + 0.5)
                        .mean(dim=2, keepdim=True)
                        .repeat(1, 1, 3)
                    )
                    image_pil = Image.fromarray(
                        (
                            torch.cat((image, image_pts, diff_image), dim=1)
                            .detach()
                            .cpu()
                            .numpy()
                            * 255.0
                        ).astype(np.uint8)
                    )
                    frames.append(image_pil)

                # export gif
                outfile = os.path.join(outdir, f"volume_vs_pts_render_{shape}.gif")
                frames[0].save(
                    outfile,
                    save_all=True,
                    append_images=frames[1:],
                    duration=batch_size // 15,
                    loop=0,
                )
                print(f"exported {outfile}")

                # export concatenated frames
                outfile_cat = os.path.join(outdir, f"volume_vs_pts_render_{shape}.png")
                Image.fromarray(
                    np.concatenate([np.array(f) for f in frames], axis=0)
                ).save(outfile_cat)
                print(f"exported {outfile_cat}")

    def test_monte_carlo_rendering(
        self, n_frames=20, volume_size=(30, 30, 30), image_size=(40, 50)
    ):
        """
        Tests that rendering with the MonteCarloRaysampler matches the
        rendering with MultinomialRaysampler sampled at the corresponding
        MonteCarlo locations.
        """
        volumes = init_boundary_volume(
            volume_size=volume_size, batch_size=n_frames, shape="sphere"
        )[0]

        # generate camera extrinsics and intrinsics
        cameras = init_cameras(n_frames, image_size=image_size)

        # init the grid raysampler
        raysampler_multinomial = MultinomialRaysampler(
            min_x=0.5,
            max_x=image_size[1] - 0.5,
            min_y=0.5,
            max_y=image_size[0] - 0.5,
            image_width=image_size[1],
            image_height=image_size[0],
            n_pts_per_ray=256,
            min_depth=0.5,
            max_depth=2.0,
        )

        # init the mc raysampler
        raysampler_mc = MonteCarloRaysampler(
            min_x=0.5,
            max_x=image_size[1] - 0.5,
            min_y=0.5,
            max_y=image_size[0] - 0.5,
            n_rays_per_image=3000,
            n_pts_per_ray=256,
            min_depth=0.5,
            max_depth=2.0,
        )

        # get the EA raymarcher
        raymarcher = EmissionAbsorptionRaymarcher()

        # get both mc and grid renders
        (
            (images_opacities_mc, ray_bundle_mc),
            (images_opacities_grid, ray_bundle_grid),
        ) = [
            VolumeRenderer(
                raysampler=raysampler_multinomial,
                raymarcher=raymarcher,
                sample_mode="bilinear",
            )(cameras=cameras, volumes=volumes)
            for raysampler in (raysampler_mc, raysampler_multinomial)
        ]

        # convert the mc sampling locations to [-1, 1]
        sample_loc = ray_bundle_mc.xys.clone()
        sample_loc[..., 0] = 2 * (sample_loc[..., 0] / image_size[1]) - 1
        sample_loc[..., 1] = 2 * (sample_loc[..., 1] / image_size[0]) - 1

        # sample the grid render at the mc locations
        images_opacities_mc_ = torch.nn.functional.grid_sample(
            images_opacities_grid.permute(0, 3, 1, 2), sample_loc, align_corners=False
        )

        # check that the samples are the same
        self.assertClose(
            images_opacities_mc.permute(0, 3, 1, 2), images_opacities_mc_, atol=1e-4
        )

    def test_rotating_gif(self):
        self._rotating_gif(image_size=(200, 100))
        self._rotating_gif(image_size=(100, 200))

    def _rotating_gif(
        self, image_size, n_frames=50, fps=15, volume_size=(100, 100, 100)
    ):
        """
        Render a gif animation of a rotating cube/sphere (runs only if `DEBUG==True`).
        """

        if not DEBUG:
            # do not run this if debug is False
            return

        for shape in ("sphere", "cube"):
            for sample_mode in ("bilinear", "nearest"):
                volumes = init_boundary_volume(
                    volume_size=volume_size, batch_size=n_frames, shape=shape
                )[0]

                # generate camera extrinsics and intrinsics
                cameras = init_cameras(n_frames, image_size=image_size)

                # init the grid raysampler
                raysampler = MultinomialRaysampler(
                    min_x=0.5,
                    max_x=image_size[1] - 0.5,
                    min_y=0.5,
                    max_y=image_size[0] - 0.5,
                    image_width=image_size[1],
                    image_height=image_size[0],
                    n_pts_per_ray=256,
                    min_depth=0.5,
                    max_depth=2.0,
                )

                # get the EA raymarcher
                raymarcher = EmissionAbsorptionRaymarcher()

                # initialize the renderer
                renderer = VolumeRenderer(
                    raysampler=raysampler,
                    raymarcher=raymarcher,
                    sample_mode=sample_mode,
                )

                # run the renderer
                images_opacities = renderer(cameras=cameras, volumes=volumes)[0]

                # split output to the alpha channel and rendered images
                images, opacities = images_opacities[..., :3], images_opacities[..., 3]

                # export the gif
                outdir = tempfile.gettempdir() + "/test_volume_renderer_gifs"
                os.makedirs(outdir, exist_ok=True)
                frames = []
                for image, opacity in zip(images, opacities):
                    image_pil = Image.fromarray(
                        (
                            torch.cat(
                                (image, opacity[..., None].repeat(1, 1, 3)), dim=1
                            )
                            .detach()
                            .cpu()
                            .numpy()
                            * 255.0
                        ).astype(np.uint8)
                    )
                    frames.append(image_pil)
                outfile = os.path.join(outdir, f"{shape}_{sample_mode}.gif")
                frames[0].save(
                    outfile,
                    save_all=True,
                    append_images=frames[1:],
                    duration=n_frames // fps,
                    loop=0,
                )
                print(f"exported {outfile}")

    def test_rotating_cube_volume_render(self):
        """
        Generates 4 renders of 4 sides of a volume representing a 3D cube.
        Since each side of the cube is homogeneously colored with
        a different color, this should result in 4 images of homogeneous color
        with the depth of each pixel equal to a constant.
        """

        # batch_size = 4 sides of the cube
        batch_size = 4
        image_size = (50, 40)

        for volume_size in ([25, 25, 25],):
            for sample_mode in ("bilinear", "nearest"):
                volume_translation = torch.zeros(4, 3)
                volume_translation.requires_grad = True
                volumes, volume_voxel_size, _ = init_boundary_volume(
                    volume_size=volume_size,
                    batch_size=batch_size,
                    shape="cube",
                    volume_translation=volume_translation,
                )

                # generate camera extrinsics and intrinsics
                cameras = init_cameras(batch_size, image_size=image_size)

                # enable the gradient caching for the camera variables
                # the list of differentiable camera vars
                cam_vars = ("R", "T", "focal_length", "principal_point")
                for cam_var in cam_vars:
                    getattr(cameras, cam_var).requires_grad = True
                # enable the grad for volume vars as well
                volumes.features().requires_grad = True
                volumes.densities().requires_grad = True

                raysampler = MultinomialRaysampler(
                    min_x=0.5,
                    max_x=image_size[1] - 0.5,
                    min_y=0.5,
                    max_y=image_size[0] - 0.5,
                    image_width=image_size[1],
                    image_height=image_size[0],
                    n_pts_per_ray=128,
                    min_depth=0.01,
                    max_depth=3.0,
                )

                raymarcher = EmissionAbsorptionRaymarcher()
                renderer = VolumeRenderer(
                    raysampler=raysampler,
                    raymarcher=raymarcher,
                    sample_mode=sample_mode,
                )
                images_opacities = renderer(cameras=cameras, volumes=volumes)[0]
                images, opacities = images_opacities[..., :3], images_opacities[..., 3]

                # check that the renderer does not erase gradients
                loss = images_opacities.sum()
                loss.backward()
                for check_var in (
                    *[getattr(cameras, cam_var) for cam_var in cam_vars],
                    volumes.features(),
                    volumes.densities(),
                    volume_translation,
                ):
                    self.assertIsNotNone(check_var.grad)

                # ao opacities should be exactly the same as the ea ones
                # we can further get the ea opacities from a feature-less
                # version of our volumes
                raymarcher_ao = AbsorptionOnlyRaymarcher()
                renderer_ao = VolumeRenderer(
                    raysampler=raysampler,
                    raymarcher=raymarcher_ao,
                    sample_mode=sample_mode,
                )
                volumes_featureless = Volumes(
                    densities=volumes.densities(),
                    volume_translation=volume_translation,
                    voxel_size=volume_voxel_size,
                )
                opacities_ao = renderer_ao(
                    cameras=cameras, volumes=volumes_featureless
                )[0][..., 0]
                self.assertClose(opacities, opacities_ao)

                # colors of the sides of the cube
                gt_clr_sides = torch.tensor(
                    [
                        [1.0, 0.0, 0.0],
                        [0.0, 1.0, 1.0],
                        [1.0, 1.0, 1.0],
                        [0.0, 1.0, 0.0],
                    ],
                    dtype=torch.float32,
                    device=images.device,
                )

                if DEBUG:
                    outdir = tempfile.gettempdir() + "/test_volume_renderer"
                    os.makedirs(outdir, exist_ok=True)
                    for imidx, (image, opacity) in enumerate(zip(images, opacities)):
                        for image_ in (image, opacity):
                            image_pil = Image.fromarray(
                                (image_.detach().cpu().numpy() * 255.0).astype(np.uint8)
                            )
                            outfile = (
                                outdir
                                + f"/rgb_{sample_mode}"
                                + f"_{str(volume_size).replace(' ', '')}"
                                + f"_{imidx:003d}"
                            )
                            if image_ is image:
                                outfile += "_rgb.png"
                            else:
                                outfile += "_opacity.png"
                            image_pil.save(outfile)
                            print(f"exported {outfile}")

                border = 10
                for image, opacity, gt_color in zip(images, opacities, gt_clr_sides):
                    image_crop = image[border:-border, border:-border]
                    opacity_crop = opacity[border:-border, border:-border]

                    # check mean and std difference from gt
                    err = (
                        (image_crop - gt_color[None, None].expand_as(image_crop))
                        .abs()
                        .mean(dim=-1)
                    )
                    zero = err.new_zeros(1)[0]
                    self.assertClose(err.mean(), zero, atol=1e-2)
                    self.assertClose(err.std(), zero, atol=1e-2)

                    err_opacity = (opacity_crop - 1.0).abs()
                    self.assertClose(err_opacity.mean(), zero, atol=1e-2)
                    self.assertClose(err_opacity.std(), zero, atol=1e-2)