diff --git a/scene/cameras.py b/scene/cameras.py index 4a5d84a8..db68dd63 100644 --- a/scene/cameras.py +++ b/scene/cameras.py @@ -3,7 +3,7 @@ # GRAPHDECO research group, https://team.inria.fr/graphdeco # All rights reserved. # -# This software is free for non-commercial, research and evaluation use +# This software is free for non-commercial, research and evaluation use # under the terms of the LICENSE.md file. # # For inquiries contact george.drettakis@inria.fr @@ -12,11 +12,11 @@ import torch from torch import nn import numpy as np -from utils.graphics_utils import getWorld2View2, getProjectionMatrix +from utils.graphics_utils import getWorld2View2, getProjectionMatrixShift class Camera(nn.Module): def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, - image_name, uid, + image_name, uid, principal_point_ndc, trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda" ): super(Camera, self).__init__() @@ -46,7 +46,7 @@ def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, else: self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device) self.gt_alpha_mask = None - + self.zfar = 100.0 self.znear = 0.01 @@ -54,14 +54,14 @@ def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, self.scale = scale self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda() - self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda() + self.projection_matrix = getProjectionMatrixShift(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy, width=self.image_width, height=self.image_height, principal_point_ndc=principal_point_ndc).transpose(0,1).cuda() self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0) self.camera_center = self.world_view_transform.inverse()[3, :3] class MiniCam: def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform): self.image_width = width - self.image_height = height + self.image_height = height self.FoVy = fovy self.FoVx = fovx self.znear = znear diff --git a/scene/dataset_readers.py b/scene/dataset_readers.py index 2a6f904a..517801fc 100644 --- a/scene/dataset_readers.py +++ b/scene/dataset_readers.py @@ -3,7 +3,7 @@ # GRAPHDECO research group, https://team.inria.fr/graphdeco # All rights reserved. # -# This software is free for non-commercial, research and evaluation use +# This software is free for non-commercial, research and evaluation use # under the terms of the LICENSE.md file. # # For inquiries contact george.drettakis@inria.fr @@ -34,6 +34,7 @@ class CameraInfo(NamedTuple): image_name: str width: int height: int + principal_point_ndc: np.array class SceneInfo(NamedTuple): point_cloud: BasicPointCloud @@ -84,22 +85,29 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): if intr.model=="SIMPLE_PINHOLE": focal_length_x = intr.params[0] + cx = intr.params[1] + cy = intr.params[2] FovY = focal2fov(focal_length_x, height) FovX = focal2fov(focal_length_x, width) elif intr.model=="PINHOLE": focal_length_x = intr.params[0] focal_length_y = intr.params[1] + cx = intr.params[2] + cy = intr.params[3] FovY = focal2fov(focal_length_y, height) FovX = focal2fov(focal_length_x, width) else: assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!" + principal_point_ndc = np.array([cx / width, cy / height]) + image_path = os.path.join(images_folder, os.path.basename(extr.name)) image_name = os.path.basename(image_path).split(".")[0] image = Image.open(image_path) cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image, - image_path=image_path, image_name=image_name, width=width, height=height) + image_path=image_path, image_name=image_name, width=width, height=height, + principal_point_ndc=principal_point_ndc) cam_infos.append(cam_info) sys.stdout.write('\n') return cam_infos @@ -117,7 +125,7 @@ def storePly(path, xyz, rgb): dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'), ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')] - + normals = np.zeros_like(xyz) elements = np.empty(xyz.shape[0], dtype=dtype) @@ -210,12 +218,12 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension= image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB") fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1]) - FovY = fovy + FovY = fovy FovX = fovx cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1])) - + return cam_infos def readNerfSyntheticInfo(path, white_background, eval, extension=".png"): @@ -223,7 +231,7 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"): train_cam_infos = readCamerasFromTransforms(path, "transforms_train.json", white_background, extension) print("Reading Test Transforms") test_cam_infos = readCamerasFromTransforms(path, "transforms_test.json", white_background, extension) - + if not eval: train_cam_infos.extend(test_cam_infos) test_cam_infos = [] @@ -235,7 +243,7 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"): # Since this data set has no colmap data, we start with random points num_pts = 100_000 print(f"Generating random point cloud ({num_pts})...") - + # We create random points inside the bounds of the synthetic Blender scenes xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3 shs = np.random.random((num_pts, 3)) / 255.0 diff --git a/utils/camera_utils.py b/utils/camera_utils.py index 0af952a5..a2a978ce 100644 --- a/utils/camera_utils.py +++ b/utils/camera_utils.py @@ -3,7 +3,7 @@ # GRAPHDECO research group, https://team.inria.fr/graphdeco # All rights reserved. # -# This software is free for non-commercial, research and evaluation use +# This software is free for non-commercial, research and evaluation use # under the terms of the LICENSE.md file. # # For inquiries contact george.drettakis@inria.fr @@ -48,10 +48,12 @@ def loadCam(args, id, cam_info, resolution_scale): loaded_mask = None gt_image = resized_image_rgb - return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, - FoVx=cam_info.FovX, FoVy=cam_info.FovY, + return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T, + FoVx=cam_info.FovX, FoVy=cam_info.FovY, image=gt_image, gt_alpha_mask=loaded_mask, - image_name=cam_info.image_name, uid=id, data_device=args.data_device) + image_name=cam_info.image_name, uid=id, + principal_point_ndc=cam_info.principal_point_ndc, + data_device=args.data_device) def cameraList_from_camInfos(cam_infos, resolution_scale, args): camera_list = [] diff --git a/utils/graphics_utils.py b/utils/graphics_utils.py index b4627d83..0f3851ef 100644 --- a/utils/graphics_utils.py +++ b/utils/graphics_utils.py @@ -3,7 +3,7 @@ # GRAPHDECO research group, https://team.inria.fr/graphdeco # All rights reserved. # -# This software is free for non-commercial, research and evaluation use +# This software is free for non-commercial, research and evaluation use # under the terms of the LICENSE.md file. # # For inquiries contact george.drettakis@inria.fr @@ -70,6 +70,44 @@ def getProjectionMatrix(znear, zfar, fovX, fovY): P[2, 3] = -(zfar * znear) / (zfar - znear) return P +def getProjectionMatrixShift(znear, zfar, fovX, fovY, width, height, principal_point_ndc): + tanHalfFovY = math.tan((fovY / 2)) + tanHalfFovX = math.tan((fovX / 2)) + + # the origin at center of image plane + top = tanHalfFovY * znear + bottom = -top + right = tanHalfFovX * znear + left = -right + + # shift the frame window due to the non-zero principle point offsets + cx = width * principal_point_ndc[0] + cy = height * principal_point_ndc[1] + focal_x = fov2focal(fovX, width) + focal_y = fov2focal(fovY, height) + offset_x = cx - (width / 2) + offset_x = (offset_x / focal_x) * znear + offset_y = cy - (height / 2) + offset_y = (offset_y / focal_y) * znear + + top = top + offset_y + left = left + offset_x + right = right + offset_x + bottom = bottom + offset_y + + P = torch.zeros(4, 4) + + z_sign = 1.0 + + P[0, 0] = 2.0 * znear / (right - left) + P[1, 1] = 2.0 * znear / (top - bottom) + P[0, 2] = (right + left) / (right - left) + P[1, 2] = (top + bottom) / (top - bottom) + P[3, 2] = z_sign + P[2, 2] = z_sign * zfar / (zfar - znear) + P[2, 3] = -(zfar * znear) / (zfar - znear) + return P + def fov2focal(fov, pixels): return pixels / (2 * math.tan(fov / 2))