257 lines
11 KiB
Python
257 lines
11 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the BSD-style license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import unittest
|
|
|
|
import torch
|
|
from common_testing import TestCaseMixin
|
|
from pytorch3d.renderer import (
|
|
MeshRasterizer,
|
|
NDCGridRaysampler,
|
|
PerspectiveCameras,
|
|
PointsRasterizationSettings,
|
|
PointsRasterizer,
|
|
PulsarPointsRenderer,
|
|
RasterizationSettings,
|
|
)
|
|
from pytorch3d.structures import Meshes, Pointclouds
|
|
|
|
|
|
"""
|
|
PyTorch3D renderers operate in an align_corners=False manner.
|
|
This file demonstrates the pixel-perfect calculation by very simple
|
|
examples.
|
|
"""
|
|
|
|
|
|
class _CommonData:
|
|
"""
|
|
Contains data for all these tests.
|
|
|
|
- Firstly, a non-square at the origin specified in ndc space and
|
|
screen space. Principal point is in the center of the image.
|
|
Focal length is 1.0 in world space.
|
|
This camera has the identity as its world to view transformation, so
|
|
it is facing down the positive z axis with y being up and x being left.
|
|
A point on the z=1.0 focal plane has its x,y world coordinate equal to
|
|
its NDC.
|
|
|
|
- Secondly, batched together with that, is a camera with the same
|
|
focal length facing in the same direction but located so that it faces
|
|
the corner of the corner pixel of the first image, with its principal
|
|
point located at its corner, so that it maps the z=1 plane to NDC just
|
|
like the first.
|
|
|
|
- a single point self.point in world space which is located on a plane 1.0
|
|
in front from the camera which is located exactly in the center
|
|
of a known pixel (self.x, self.y), specifically with negative x and slightly
|
|
positive y, so it is in the top right quadrant of the image.
|
|
|
|
- A second batch of cameras defined in screen space which exactly match the
|
|
first ones.
|
|
|
|
So that this data can be copied for making demos, it is easiest to leave
|
|
it as a freestanding class.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.H, self.W = 249, 125
|
|
self.image_size = (self.H, self.W)
|
|
self.camera_ndc = PerspectiveCameras(
|
|
focal_length=1.0,
|
|
image_size=(self.image_size,),
|
|
in_ndc=True,
|
|
T=torch.tensor([[0.0, 0.0, 0.0], [-1.0, self.H / self.W, 0.0]]),
|
|
principal_point=((-0.0, -0.0), (1.0, -self.H / self.W)),
|
|
)
|
|
# Note how principal point is specifiied
|
|
self.camera_screen = PerspectiveCameras(
|
|
focal_length=self.W / 2.0,
|
|
principal_point=((self.W / 2.0, self.H / 2.0), (0.0, self.H)),
|
|
image_size=(self.image_size,),
|
|
T=torch.tensor([[0.0, 0.0, 0.0], [-1.0, self.H / self.W, 0.0]]),
|
|
in_ndc=False,
|
|
)
|
|
|
|
# 81 is more than half of 125, 113 is a bit less than half of 249
|
|
self.x, self.y = 81, 113
|
|
self.point = [-0.304, 0.176, 1]
|
|
# The point is in the center of pixel (81, 113)
|
|
# where pixel (0,0) is the top left.
|
|
# 81 is 38/2 pixels over the midpoint (125-1)/2=62
|
|
# and 38/125=0.304
|
|
# 113 is 22/2 pixels under the midpoint (249-1)/2=124
|
|
# and 22/125=0.176
|
|
|
|
|
|
class TestPixels(TestCaseMixin, unittest.TestCase):
|
|
def test_mesh(self):
|
|
data = _CommonData()
|
|
# Three points on the plane at unit 1 from the camera in
|
|
# world space, whose mean is the known point.
|
|
verts = torch.tensor(
|
|
[[-0.288, 0.192, 1], [-0.32, 0.192, 1], [-0.304, 0.144, 1]]
|
|
)
|
|
self.assertClose(verts.mean(0), torch.tensor(data.point))
|
|
faces = torch.LongTensor([[0, 1, 2]])
|
|
# A mesh of one triangular face whose centroid is the known point
|
|
# duplicated so it can be rendered from two cameras.
|
|
meshes = Meshes(verts=[verts], faces=[faces]).extend(2)
|
|
faces_per_pixel = 2
|
|
for camera in (data.camera_ndc, data.camera_screen):
|
|
rasterizer = MeshRasterizer(
|
|
cameras=camera,
|
|
raster_settings=RasterizationSettings(
|
|
image_size=data.image_size, faces_per_pixel=faces_per_pixel
|
|
),
|
|
)
|
|
barycentric_coords_found = rasterizer(meshes).bary_coords
|
|
self.assertTupleEqual(
|
|
barycentric_coords_found.shape,
|
|
(2,) + data.image_size + (faces_per_pixel, 3),
|
|
)
|
|
# We see that the barycentric coordinates at the expected
|
|
# pixel are (1/3, 1/3, 1/3), indicating that this pixel
|
|
# hits the centroid of the triangle.
|
|
self.assertClose(
|
|
barycentric_coords_found[:, data.y, data.x, 0],
|
|
torch.full((2, 3), 1 / 3.0),
|
|
atol=1e-5,
|
|
)
|
|
|
|
def test_pointcloud(self):
|
|
data = _CommonData()
|
|
clouds = Pointclouds(points=torch.tensor([[data.point]])).extend(2)
|
|
colorful_cloud = Pointclouds(
|
|
points=torch.tensor([[data.point]]), features=torch.ones(1, 1, 3)
|
|
).extend(2)
|
|
points_per_pixel = 2
|
|
# for camera in [data.camera_screen]:
|
|
for camera in (data.camera_ndc, data.camera_screen):
|
|
rasterizer = PointsRasterizer(
|
|
cameras=camera,
|
|
raster_settings=PointsRasterizationSettings(
|
|
image_size=data.image_size,
|
|
radius=0.0001,
|
|
points_per_pixel=points_per_pixel,
|
|
),
|
|
)
|
|
# when rasterizing we expect only one pixel to be occupied
|
|
rasterizer_output = rasterizer(clouds).idx
|
|
self.assertTupleEqual(
|
|
rasterizer_output.shape, (2,) + data.image_size + (points_per_pixel,)
|
|
)
|
|
found = torch.nonzero(rasterizer_output != -1)
|
|
self.assertTupleEqual(found.shape, (2, 4))
|
|
self.assertListEqual(found[0].tolist(), [0, data.y, data.x, 0])
|
|
self.assertListEqual(found[1].tolist(), [1, data.y, data.x, 0])
|
|
|
|
if camera.in_ndc():
|
|
# Pulsar not currently working in screen space.
|
|
pulsar_renderer = PulsarPointsRenderer(rasterizer=rasterizer)
|
|
pulsar_output = pulsar_renderer(
|
|
colorful_cloud, gamma=(0.1, 0.1), znear=(0.1, 0.1), zfar=(70, 70)
|
|
)
|
|
self.assertTupleEqual(
|
|
pulsar_output.shape, (2,) + data.image_size + (3,)
|
|
)
|
|
# Look for points rendered in the red channel only, expecting our one.
|
|
# Check the first batch element only.
|
|
# TODO: Something is odd with the second.
|
|
found = torch.nonzero(pulsar_output[0, :, :, 0])
|
|
self.assertTupleEqual(found.shape, (1, 2))
|
|
self.assertListEqual(found[0].tolist(), [data.y, data.x])
|
|
# Should be:
|
|
# found = torch.nonzero(pulsar_output[:, :, :, 0])
|
|
# self.assertTupleEqual(found.shape, (2, 3))
|
|
# self.assertListEqual(found[0].tolist(), [0, data.y, data.x])
|
|
# self.assertListEqual(found[1].tolist(), [1, data.y, data.x])
|
|
|
|
def test_raysampler(self):
|
|
data = _CommonData()
|
|
gridsampler = NDCGridRaysampler(
|
|
image_width=data.W,
|
|
image_height=data.H,
|
|
n_pts_per_ray=2,
|
|
min_depth=1.0,
|
|
max_depth=2.0,
|
|
)
|
|
for camera in (data.camera_ndc, data.camera_screen):
|
|
bundle = gridsampler(camera)
|
|
self.assertTupleEqual(bundle.xys.shape, (2,) + data.image_size + (2,))
|
|
self.assertTupleEqual(
|
|
bundle.directions.shape, (2,) + data.image_size + (3,)
|
|
)
|
|
self.assertClose(
|
|
bundle.xys[:, data.y, data.x],
|
|
torch.tensor(data.point[:2]).expand(2, -1),
|
|
)
|
|
# We check only the first batch element.
|
|
# Second element varies because of camera location.
|
|
self.assertClose(
|
|
bundle.directions[0, data.y, data.x],
|
|
torch.tensor(data.point),
|
|
)
|
|
|
|
def test_camera(self):
|
|
data = _CommonData()
|
|
# Our point, plus the image center, and a corner of the image.
|
|
# Located at the focal-length distance away
|
|
points = torch.tensor([data.point, [0, 0, 1], [1, data.H / data.W, 1]])
|
|
for cameras in (data.camera_ndc, data.camera_screen):
|
|
ndc_points = cameras.transform_points_ndc(points)
|
|
screen_points = cameras.transform_points_screen(points)
|
|
camera_points = cameras.transform_points(points)
|
|
for batch_idx in range(2):
|
|
# NDC space agrees with the original
|
|
self.assertClose(ndc_points[batch_idx], points, atol=1e-5)
|
|
# First point in screen space is the center of our expected pixel
|
|
self.assertClose(
|
|
screen_points[batch_idx][0],
|
|
torch.tensor([data.x + 0.5, data.y + 0.5, 1.0]),
|
|
atol=1e-5,
|
|
)
|
|
# Second point in screen space is the center of the screen
|
|
self.assertClose(
|
|
screen_points[batch_idx][1],
|
|
torch.tensor([data.W / 2.0, data.H / 2.0, 1.0]),
|
|
atol=1e-5,
|
|
)
|
|
# Third point in screen space is the corner of the screen
|
|
# (corner of corner pixels)
|
|
self.assertClose(
|
|
screen_points[batch_idx][2],
|
|
torch.tensor([0.0, 0.0, 1.0]),
|
|
atol=1e-5,
|
|
)
|
|
|
|
if cameras.in_ndc():
|
|
self.assertClose(camera_points[batch_idx], ndc_points[batch_idx])
|
|
else:
|
|
# transform_points does something strange for screen cameras
|
|
if batch_idx == 0:
|
|
wanted = torch.stack(
|
|
[
|
|
data.W - screen_points[batch_idx, :, 0],
|
|
data.H - screen_points[batch_idx, :, 1],
|
|
torch.ones(3),
|
|
],
|
|
dim=1,
|
|
)
|
|
else:
|
|
wanted = torch.stack(
|
|
[
|
|
-screen_points[batch_idx, :, 0],
|
|
2 * data.H - screen_points[batch_idx, :, 1],
|
|
torch.ones(3),
|
|
],
|
|
dim=1,
|
|
)
|
|
|
|
print(wanted)
|
|
print(camera_points[batch_idx])
|
|
self.assertClose(camera_points[batch_idx], wanted)
|