# 这个脚本用于对colmap的相机标定结果,寻找地面与场景中心 # 方法: # 1. 使用棋盘格 # 2. 估计点云里的地面 import os from os.path import join from easymocap.annotator.file_utils import save_json from easymocap.mytools.debug_utils import myerror, run_cmd, mywarn, log from easymocap.mytools.camera_utils import read_cameras, write_camera from easymocap.mytools import read_json from easymocap.mytools import batch_triangulate, projectN3, Undistort import numpy as np import cv2 from apps.calibration.calib_extri import solvePnP def guess_ground(pcdname): pcd = o3d.io.read_point_cloud(pcdname) def compute_rel(R_src, T_src, R_tgt, T_tgt): R_rel = R_src.T @ R_tgt T_rel = R_src.T @ (T_tgt - T_src) return R_rel, T_rel def triangulate(cameras, areas): Ps, k2ds = [], [] for cam, _, k2d, k3d in areas: k2d = Undistort.points(k2d, cameras[cam]['K'], cameras[cam]['dist']) P = cameras[cam]['K'] @ np.hstack([cameras[cam]['R'], cameras[cam]['T']]) Ps.append(P) k2ds.append(k2d) Ps = np.stack(Ps) k2ds = np.stack(k2ds) k3d = batch_triangulate(k2ds, Ps) return k3d def best_fit_transform(A, B): ''' Calculates the least-squares best-fit transform that maps corresponding points A to B in m spatial dimensions Input: A: Nxm numpy array of corresponding points B: Nxm numpy array of corresponding points Returns: T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B R: mxm rotation matrix t: mx1 translation vector ''' assert A.shape == B.shape # get number of dimensions m = A.shape[1] # translate points to their centroids centroid_A = np.mean(A, axis=0) centroid_B = np.mean(B, axis=0) AA = A - centroid_A BB = B - centroid_B # rotation matrix H = np.dot(AA.T, BB) U, S, Vt = np.linalg.svd(H) R = np.dot(Vt.T, U.T) # special reflection case if np.linalg.det(R) < 0: Vt[m-1,:] *= -1 R = np.dot(Vt.T, U.T) # translation t = centroid_B.T - np.dot(R,centroid_A.T) return R, t def align_by_chessboard(cameras, path): camnames = sorted(os.listdir(join(path, 'chessboard'))) areas = [] for ic, cam in enumerate(camnames): imagename = join(path, 'images', cam, '000000.jpg') chessname = join(path, 'chessboard', cam, '000000.json') data = read_json(chessname) k3d = np.array(data['keypoints3d'], dtype=np.float32) k2d = np.array(data['keypoints2d'], dtype=np.float32) # TODO # pattern = (11, 8) if 'pattern' in data.keys(): pattern = data['pattern'] else: pattern = None img = cv2.imread(imagename) if args.scale2d is not None: k2d[:, :2] *= args.scale2d img = cv2.resize(img, None, fx=args.scale2d, fy=args.scale2d) if args.origin is not None: cameras[args.prefix+cam] = cameras.pop(args.origin+cam.replace('VID_', '0000')) cam = args.prefix + cam if cam not in cameras.keys(): myerror('camera {} not found in {}'.format(cam, cameras.keys())) continue cameras[cam]['shape'] = img.shape[:2] if k2d[:, -1].sum() < 1: continue # calculate the area of the chessboard mask = np.zeros_like(img[:, :, 0]) k2d_int = np.round(k2d[:, :2]).astype(int) if pattern is not None: cv2.fillPoly(mask, [k2d_int[[0, pattern[0]-1, -1, -pattern[0]]]], 1) else: cv2.fillPoly(mask, [k2d_int[[0, 1, 2, 3, 0]]], 1) area = mask.sum() print(cam, area) areas.append([cam, area, k2d, k3d]) areas.sort(key=lambda x: -x[1]) best_cam, area, k2d, k3d = areas[0] # 先解决尺度问题 ref_point_id = np.linalg.norm(k3d - k3d[:1], axis=-1).argmax() k3d_pre = triangulate(cameras, areas) length_gt = np.linalg.norm(k3d[0, :3] - k3d[ref_point_id, :3]) length = np.linalg.norm(k3d_pre[0, :3] - k3d_pre[ref_point_id, :3]) log('gt diag={:.3f}, est diag={:.3f}, scale={:.3f}'.format(length_gt, length, length_gt/length)) scale_colmap = length_gt / length for cam, camera in cameras.items(): camera['T'] *= scale_colmap k3d_pre = triangulate(cameras, areas) length = np.linalg.norm(k3d_pre[0, :3] - k3d_pre[-1, :3]) log('gt diag={:.3f}, est diag={:.3f}, scale={:.3f}'.format(length_gt, length, length_gt/length)) # 计算相机相对于棋盘格的RT if False: for cam, _, k2d, k3d in areas: K, dist = cameras[cam]['K'], cameras[cam]['dist'] R, T = cameras[cam]['R'], cameras[cam]['T'] err, rvec, tvec, kpts_repro = solvePnP(k3d, k2d, K, dist, flag=cv2.SOLVEPNP_ITERATIVE) # 不同视角的计算的相对变换应该是一致的 R_tgt = cv2.Rodrigues(rvec)[0] T_tgt = tvec.reshape(3, 1) R_rel, T_rel = compute_rel(R, T, R_tgt, T_tgt) break else: # 使用估计的棋盘格坐标与实际的棋盘格坐标 X = k3d_pre[:, :3] X_gt = k3d[:, :3] R_rel, T_rel = best_fit_transform(X_gt, X) # 从棋盘格坐标系映射到colmap坐标系 T_rel = T_rel.reshape(3, 1) centers = [] for cam, camera in cameras.items(): camera.pop('Rvec') R_old, T_old = camera['R'], camera['T'] R_new = R_old @ R_rel T_new = T_old + R_old @ T_rel camera['R'] = R_new camera['T'] = T_new center = - camera['R'].T @ camera['T'] centers.append(center) print('{}: ({:6.3f}, {:.3f}, {:.3f})'.format(cam, *np.round(center.T[0], 3))) # 使用棋盘格估计一下尺度 k3d_pre = triangulate(cameras, areas) length = np.linalg.norm(k3d_pre[0, :3] - k3d_pre[ref_point_id, :3]) log('{} {} {}'.format(length_gt, length, length_gt/length)) log(k3d_pre) transform = np.eye(4) transform[:3, :3] = R_rel transform[:3, 3:] = T_rel return cameras, scale_colmap, np.linalg.inv(transform) # for 3D points X, # in origin world: \Pi(RX + T) = x # in new world: \Pi(R'Y+T') = x # , where X = R_pY + T_p if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('path', type=str) parser.add_argument('out', type=str) parser.add_argument('--plane_by_chessboard', type=str, default=None) parser.add_argument('--plane_by_point', type=str, default=None) parser.add_argument('--num', type=int, default=1) parser.add_argument('--scale2d', type=float, default=None) parser.add_argument('--prefix', type=str, default='') parser.add_argument('--origin', type=str, default=None) parser.add_argument('--guess_plane', action='store_true') parser.add_argument('--noshow', action='store_true') parser.add_argument('--debug', action='store_true') args = parser.parse_args() if not os.path.exists(join(args.path, 'intri.yml')): assert os.path.exists(join(args.path, 'cameras.bin')), os.listdir(args.path) cmd = f'python3 apps/calibration/read_colmap.py {args.path} .bin' run_cmd(cmd) cameras = read_cameras(args.path) if args.plane_by_point is not None: # 读入点云 import ipdb; ipdb.set_trace() if args.plane_by_chessboard is not None: cameras, scale, transform = align_by_chessboard(cameras, args.plane_by_chessboard) if not args.noshow: import open3d as o3d sparse_name = join(args.path, 'sparse.ply') dense_name = join(args.path, '..', '..', 'dense', 'fused.ply') if os.path.exists(dense_name): pcd = o3d.io.read_point_cloud(dense_name) else: pcd = o3d.io.read_point_cloud(sparse_name) save_json(join(args.out, 'transform.json'), {'scale': scale, 'transform': transform.tolist()}) points = np.asarray(pcd.points) # TODO: read correspondence of points3D and points2D points_new = (scale*points) @ transform[:3, :3].T + transform[:3, 3:].T pcd.points = o3d.utility.Vector3dVector(points_new) o3d.io.write_point_cloud(join(args.out, 'sparse_aligned.ply'), pcd) grids = [] center = o3d.geometry.TriangleMesh.create_coordinate_frame( size=1, origin=[0, 0, 0]) grids.append(center) for cam, camera in cameras.items(): center = - camera['R'].T @ camera['T'] center = o3d.geometry.TriangleMesh.create_coordinate_frame( size=0.5, origin=[center[0, 0], center[1, 0], center[2, 0]]) if cam.startswith(args.prefix): center.paint_uniform_color([1, 0, 1]) center.rotate(camera['R'].T) grids.append(center) o3d.visualization.draw_geometries([pcd] + grids) write_camera(cameras, args.out) if args.prefix is not None: cameras_ = {} for key, camera in cameras.items(): if args.prefix not in key: continue cameras_[key.replace(args.prefix, '')] = camera os.makedirs(join(args.out, args.prefix), exist_ok=True) write_camera(cameras_, join(args.out, args.prefix))