Source code for kiui.cam

import numpy as np
from scipy.spatial.transform import Rotation

from kiui.op import safe_normalize
from kiui.typing import *


[docs] def to_homo(mat: np.ndarray, eye: bool = True) -> np.ndarray: """Append a homogeneous row to a matrix. - If ``mat`` is [3, 4], append ``[0, 0, 0, 1]`` to get a [4, 4] matrix when ``eye=True``, or ``[0, 0, 0, 0]`` when ``eye=False``. - If ``mat`` is [2, 3], append ``[0, 0, 1]`` to get a [3, 3] matrix. Args: mat (np.ndarray): Input matrix of shape [3, 4] or [2, 3]. eye (bool, optional): Whether to append a homogeneous "eye" row. Defaults to True. Returns: np.ndarray: Homogeneous matrix of shape [4, 4] or [3, 3]. """ mat = np.asarray(mat, dtype=np.float32) if mat.shape == (3, 4): if eye: bottom = np.array([[0.0, 0.0, 0.0, 1.0]], dtype=mat.dtype) else: bottom = np.array([[0.0, 0.0, 0.0, 0.0]], dtype=mat.dtype) elif mat.shape == (2, 3): if eye: bottom = np.array([[0.0, 0.0, 1.0]], dtype=mat.dtype) else: bottom = np.array([[0.0, 0.0, 0.0]], dtype=mat.dtype) else: raise ValueError(f"to_homo expects shape (3, 4) or (2, 3), got {mat.shape}") return np.concatenate([mat, bottom], axis=0)
[docs] def intr_to_K(intr): """convert intrinsics [fx, fy, cx, cy] to calibration matrix K [3, 3]. Args: intr (np.ndarray): camera intrinsics, float [4] as [fx, fy, cx, cy] Returns: np.ndarray: calibration matrix K, float [3, 3] """ intr = np.asarray(intr, dtype=np.float32) fx, fy, cx, cy = intr K = np.array( [ [fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0], ], dtype=np.float32, ) return K
[docs] def K_to_intr(K): """convert calibration matrix K [3, 3] to intrinsics [fx, fy, cx, cy]. Args: K (np.ndarray): calibration matrix, float [3, 3] Returns: np.ndarray: intrinsics, float [4] as [fx, fy, cx, cy] """ K = np.asarray(K, dtype=np.float32) fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] return np.array([fx, fy, cx, cy], dtype=np.float32)
[docs] def fov_to_intr(fov, width, height, is_degree=True): """convert field of view [fovx, fovy] to intrinsics [fx, fy, cx, cy]. Args: fov (np.ndarray): field of view, float [2] as [fovx, fovy] width (int): image width height (int): image height is_degree (bool, optional): whether fov is in degree. Defaults to True. Returns: np.ndarray: intrinsics, float [4] as [fx, fy, cx, cy] """ fov = np.asarray(fov, dtype=np.float32) fovx, fovy = fov if is_degree: fovx = np.deg2rad(fovx) fovy = np.deg2rad(fovy) fx = width / (2.0 * np.tan(fovx / 2.0)) fy = height / (2.0 * np.tan(fovy / 2.0)) cx = width * 0.5 cy = height * 0.5 return np.array([fx, fy, cx, cy], dtype=np.float32)
[docs] def intr_to_fov(intr, width, height, is_degree=True): """convert intrinsics [fx, fy, cx, cy] to field of view [fovx, fovy]. Args: intr (np.ndarray): intrinsics, float [4] as [fx, fy, cx, cy] width (int): image width height (int): image height is_degree (bool, optional): whether to return fov in degree. Defaults to True. Returns: np.ndarray: field of view, float [2] as [fovx, fovy] """ intr = np.asarray(intr, dtype=np.float32) fx, fy, _, _ = intr fovx = 2.0 * np.arctan(width / (2.0 * fx)) fovy = 2.0 * np.arctan(height / (2.0 * fy)) if is_degree: fovx = np.rad2deg(fovx) fovy = np.rad2deg(fovy) return np.array([fovx, fovy], dtype=np.float32)
[docs] def fov_to_K(fov, width, height, is_degree=True): """convert field of view [fovx, fovy] to calibration matrix K [3, 3].""" intr = fov_to_intr(fov, width, height, is_degree=is_degree) return intr_to_K(intr)
[docs] def K_to_fov(K, width, height, is_degree=True): """convert calibration matrix K [3, 3] to field of view [fovx, fovy].""" intr = K_to_intr(K) return intr_to_fov(intr, width, height, is_degree=is_degree)
[docs] def perspective_to_intr(proj, width, height): """convert a perspective projection matrix to intrinsics [fx, fy, cx, cy]. Args: proj (np.ndarray): perspective projection matrix, float [4, 4] width (int): image width height (int): image height Returns: np.ndarray: intrinsics, float [4] as [fx, fy, cx, cy] """ proj = np.asarray(proj, dtype=np.float32) fx = proj[0, 0] * (width / 2.0) fy = proj[1, 1] * (height / 2.0) cx = (1.0 - proj[0, 2]) * (width / 2.0) cy = (1.0 - proj[1, 2]) * (height / 2.0) return np.array([fx, fy, cx, cy], dtype=np.float32)
[docs] def get_rays(pose, h, w, fovy, opengl=True, normalize_dir=True): """ construct rays origin and direction from a camera pose. Args: pose (np.ndarray): camera pose, float [4, 4] h (int): image height w (int): image width fovy (float): field of view in degree along y-axis. opengl (bool, optional): whether to use the OpenGL camera convention. Defaults to True. normalize_dir (bool, optional): whether to normalize the ray directions. Defaults to True. Returns: Tuple[np.ndarray]: rays_o and rays_d, both are float [h, w, 3] """ # pose: [4, 4] # fov: in degree # opengl: camera front view convention x, y = np.meshgrid(np.arange(w), np.arange(h), indexing="xy") x = x.reshape(-1) y = y.reshape(-1) cx = w * 0.5 cy = h * 0.5 # objaverse rendering has fixed focal of 560 at resolution 512 --> fov = 49.1 degree focal = h * 0.5 / np.tan(0.5 * np.deg2rad(fovy)) camera_dirs = np.stack([ (x - cx + 0.5) / focal, (y - cy + 0.5) / focal * (-1.0 if opengl else 1.0), np.ones_like(x) * (-1.0 if opengl else 1.0), ], axis=-1) # [hw, 3] rays_d = camera_dirs @ pose[:3, :3].transpose(0, 1) # [hw, 3] rays_o = np.expand_dims(pose[:3, 3], 0).repeat(rays_d.shape[0], 0) # [hw, 3] if normalize_dir: rays_d = safe_normalize(rays_d) rays_o = rays_o.reshape(h, w, 3) rays_d = rays_d.reshape(h, w, 3) return rays_o, rays_d
[docs] def pose_to_extr(pose): """convert camera pose (cam2world) to extrinsics (world2cam). Args: pose (np.ndarray): camera pose matrix (cam2world), float [4, 4] Returns: np.ndarray: camera extrinsics matrix (world2cam), float [4, 4] """ pose = np.asarray(pose, dtype=np.float32) return np.linalg.inv(pose)
[docs] def extr_to_pose(extr): """convert camera extrinsics (world2cam) to pose (cam2world). Args: extr (np.ndarray): camera extrinsics matrix (world2cam), float [4, 4] Returns: np.ndarray: camera pose matrix (cam2world), float [4, 4] """ extr = np.asarray(extr, dtype=np.float32) return np.linalg.inv(extr)
[docs] def se3_to_matrix(se3): """convert [tx, ty, tz, qx, qy, qz, qw] to a [4, 4] R|t camera matrix (it can be c2w or w2c!). Args: se3 (np.ndarray): camera parameters, float [7] as [tx, ty, tz, qx, qy, qz, qw] Returns: np.ndarray: c2w/w2c matrix, float [4, 4] """ se3 = np.asarray(se3, dtype=np.float32) assert se3.shape[-1] == 7, "se3 must be of shape: [7] for [tx, ty, tz, qx, qy, qz, qw]" t = se3[:3] qx, qy, qz, qw = se3[3:] # SciPy Rotation expects quaternion in (x, y, z, w) order. r = Rotation.from_quat([qx, qy, qz, qw]) R = r.as_matrix().astype(np.float32) pose = np.eye(4, dtype=np.float32) pose[:3, :3] = R pose[:3, 3] = t return pose
[docs] def matrix_to_se3(matrix): """convert a [4, 4] R|t camera matrix to [tx, ty, tz, qx, qy, qz, qw]. Args: matrix (np.ndarray): c2w/w2c matrix, float [4, 4] Returns: np.ndarray: camera parameters, float [7] as [tx, ty, tz, qx, qy, qz, qw] """ matrix = np.asarray(matrix, dtype=np.float32) assert matrix.shape == (4, 4), "matrix must be of shape: [4, 4]" t = matrix[:3, 3] R = matrix[:3, :3] q = Rotation.from_matrix(R).as_quat() return np.concatenate([t, q], axis=-1)
[docs] def look_at(campos, target, opengl=True): """construct pose rotation matrix by look-at. Args: campos (np.ndarray): camera position, float [3] target (np.ndarray): look at target, float [3] opengl (bool, optional): whether use opengl camera convention (forward direction is target --> camera). Defaults to True. Returns: np.ndarray: the camera pose rotation matrix, float [3, 3], normalized. """ if not opengl: # forward is camera --> target forward_vector = safe_normalize(target - campos) up_vector = np.array([0, 1, 0], dtype=np.float32) right_vector = safe_normalize(np.cross(forward_vector, up_vector)) up_vector = safe_normalize(np.cross(right_vector, forward_vector)) else: # forward is target --> camera forward_vector = safe_normalize(campos - target) up_vector = np.array([0, 1, 0], dtype=np.float32) right_vector = safe_normalize(np.cross(up_vector, forward_vector)) up_vector = safe_normalize(np.cross(forward_vector, right_vector)) R = np.stack([right_vector, up_vector, forward_vector], axis=1) return R
[docs] def orbit_camera(elevation, azimuth, radius=1, is_degree=True, target=None, opengl=True): """construct a camera pose matrix orbiting a target with elevation & azimuth angle. Args: elevation (float): elevation in (-90, 90), from +y to -y is (-90, 90) azimuth (float): azimuth in (-180, 180), from +z to +x is (0, 90) radius (float, optional): camera radius. Defaults to 1. is_degree (bool, optional): if the angles are in degree. Defaults to True. target (np.ndarray, optional): look at target position. Defaults to None. opengl (bool, optional): whether to use OpenGL camera convention. Defaults to True. Returns: np.ndarray: the camera pose matrix, float [4, 4] """ if is_degree: elevation = np.deg2rad(elevation) azimuth = np.deg2rad(azimuth) x = radius * np.cos(elevation) * np.sin(azimuth) y = - radius * np.sin(elevation) z = radius * np.cos(elevation) * np.cos(azimuth) if target is None: target = np.zeros([3], dtype=np.float32) campos = np.array([x, y, z]) + target # [3] T = np.eye(4, dtype=np.float32) T[:3, :3] = look_at(campos, target, opengl) T[:3, 3] = campos return T
[docs] def undo_orbit_camera(T, is_degree=True): """ undo an orbital camera pose matrix to elevation & azimuth Args: T (np.ndarray): camera pose matrix, float [4, 4], must be an orbital camera targeting at (0, 0, 0)! is_degree (bool, optional): whether to return angles in degree. Defaults to True. Returns: Tuple[float]: elevation, azimuth, and radius. """ campos = T[:3, 3] radius = np.linalg.norm(campos) elevation = np.arcsin(-campos[1] / radius) azimuth = np.arctan2(campos[0], campos[2]) if is_degree: elevation = np.rad2deg(elevation) azimuth = np.rad2deg(azimuth) return elevation, azimuth, radius
# perspective matrix
[docs] def get_perspective(fovy, aspect=1, near=0.01, far=1000): """construct a perspective matrix from fovy. Args: fovy (float): field of view in degree along y-axis. aspect (int, optional): aspect ratio. Defaults to 1. near (float, optional): near clip plane. Defaults to 0.01. far (int, optional): far clip plane. Defaults to 1000. Returns: np.ndarray: perspective matrix, float [4, 4] """ # fovy: field of view in degree. y = np.tan(np.deg2rad(fovy) / 2) return np.array( [ [1 / (y * aspect), 0, 0, 0], [0, -1 / y, 0, 0], [ 0, 0, -(far + near) / (far - near), -(2 * far * near) / (far - near), ], [0, 0, -1, 0], ], dtype=np.float32, )
[docs] class OrbitCamera: """ An orbital camera class. """
[docs] def __init__(self, W, H, r=2, fovy=60, near=0.01, far=100): """init function Args: W (int): image width H (int): image height r (float, optional): camera radius. Defaults to 2. fovy (float, optional): camera field of view in degree along y-axis. Defaults to 60. near (float, optional): near clip plane. Defaults to 0.01. far (float, optional): far clip plane. Defaults to 100. """ self.W = W self.H = H self.radius = r # camera distance from center self.fovy = np.deg2rad(fovy) # deg 2 rad self.near = near self.far = far self.center = np.array([0, 0, 0], dtype=np.float32) # look at this point self.rot = Rotation.from_matrix(np.eye(3)) self.up = np.array([0, 1, 0], dtype=np.float32) # need to be normalized!
@property def fovx(self): """get the field of view in radians along x-axis Returns: float: field of view in radians along x-axis """ return 2 * np.arctan(np.tan(self.fovy / 2) * self.W / self.H) @property def campos(self): """get the camera position Returns: np.ndarray: camera position, float [3] """ return self.pose[:3, 3] @property def pose(self): """get the camera pose matrix (cam2world) Returns: np.ndarray: camera pose, float [4, 4] """ # first move camera to radius res = np.eye(4, dtype=np.float32) res[2, 3] = self.radius # opengl convention... # rotate rot = np.eye(4, dtype=np.float32) rot[:3, :3] = self.rot.as_matrix() res = rot @ res # translate res[:3, 3] -= self.center return res @property def view(self): """get the camera view matrix (world2cam, inverse of cam2world) Returns: np.ndarray: camera view, float [4, 4] """ return np.linalg.inv(self.pose) @property def perspective(self): """get the perspective matrix Returns: np.ndarray: camera perspective, float [4, 4] """ y = np.tan(self.fovy / 2) aspect = self.W / self.H return np.array( [ [1 / (y * aspect), 0, 0, 0], [0, -1 / y, 0, 0], [ 0, 0, -(self.far + self.near) / (self.far - self.near), -(2 * self.far * self.near) / (self.far - self.near), ], [0, 0, -1, 0], ], dtype=np.float32, ) # intrinsics @property def intrinsics(self): """get the camera intrinsics Returns: np.ndarray: intrinsics (fx, fy, cx, cy), float [4] """ focal = self.H / (2 * np.tan(self.fovy / 2)) return np.array([focal, focal, self.W // 2, self.H // 2], dtype=np.float32) @property def mvp(self): """get the MVP (model-view-perspective) matrix. Returns: np.ndarray: camera MVP, float [4, 4] """ return self.perspective @ np.linalg.inv(self.pose) # [4, 4]
[docs] def orbit(self, dx, dy): """ rotate along camera up/side axis! Args: dx (float): delta step along x (up). dy (float): delta step along y (side). """ side = self.rot.as_matrix()[:3, 0] rotvec_x = self.up * np.radians(-0.05 * dx) rotvec_y = side * np.radians(-0.05 * dy) self.rot = Rotation.from_rotvec(rotvec_x) * Rotation.from_rotvec(rotvec_y) * self.rot
[docs] def scale(self, delta): """scale the camera. Args: delta (float): delta step. """ self.radius *= 1.1 ** (-delta)
[docs] def pan(self, dx, dy, dz=0): """pan the camera. Args: dx (float): delta step along x. dy (float): delta step along y. dz (float, optional): delta step along x. Defaults to 0. """ # pan in camera coordinate system (careful on the sensitivity!) self.center += 0.0005 * self.rot.as_matrix()[:3, :3] @ np.array([dx, -dy, dz])
[docs] def from_angle(self, elevation, azimuth, is_degree=True): """set the camera pose from elevation & azimuth angle. Args: elevation (float): elevation in (-90, 90), from +y to -y is (-90, 90) azimuth (float): azimuth in (-180, 180), from +z to +x is (0, 90) is_degree (bool, optional): whether the angles are in degree. Defaults to True. """ if is_degree: elevation = np.deg2rad(elevation) azimuth = np.deg2rad(azimuth) x = self.radius * np.cos(elevation) * np.sin(azimuth) y = - self.radius * np.sin(elevation) z = self.radius * np.cos(elevation) * np.cos(azimuth) campos = np.array([x, y, z]) # [N, 3] rot_mat = look_at(campos, np.zeros([3], dtype=np.float32)) self.rot = Rotation.from_matrix(rot_mat)