From 5bc4b113ba4e1bd1cde45f12e0080dc765d882cd Mon Sep 17 00:00:00 2001 From: Qing Shuai Date: Sun, 21 Aug 2022 16:04:51 +0800 Subject: [PATCH] :construction: update tools 1. update camera reader 2. update debug and visualize tools --- easymocap/mytools/camera_utils.py | 163 +++++- easymocap/mytools/colmap_structure.py | 439 +++++++++++++++ easymocap/mytools/colmap_wrapper.py | 468 ++++++++++++++++ easymocap/mytools/debug_utils.py | 86 +++ easymocap/mytools/file_utils.py | 63 ++- easymocap/mytools/reader.py | 29 +- easymocap/mytools/triangulator.py | 735 ++++++++++++++++++++++++++ easymocap/mytools/vis_base.py | 186 +++++-- scripts/dataset/download_youtube.py | 104 ++++ 9 files changed, 2184 insertions(+), 89 deletions(-) create mode 100644 easymocap/mytools/colmap_structure.py create mode 100644 easymocap/mytools/colmap_wrapper.py create mode 100644 easymocap/mytools/debug_utils.py create mode 100644 easymocap/mytools/triangulator.py create mode 100644 scripts/dataset/download_youtube.py diff --git a/easymocap/mytools/camera_utils.py b/easymocap/mytools/camera_utils.py index 0c3d8c9..be14143 100644 --- a/easymocap/mytools/camera_utils.py +++ b/easymocap/mytools/camera_utils.py @@ -1,7 +1,7 @@ import cv2 import numpy as np import os - +from os.path import join class FileStorage(object): def __init__(self, filename, isWrite=False): version = cv2.__version__ @@ -10,27 +10,34 @@ class FileStorage(object): if isWrite: os.makedirs(os.path.dirname(filename), exist_ok=True) - self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_WRITE) + self.fs = open(filename, 'w') + self.fs.write('%YAML:1.0\r\n') + self.fs.write('---\r\n') else: + assert os.path.exists(filename), filename self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_READ) + self.isWrite = isWrite def __del__(self): - cv2.FileStorage.release(self.fs) + if self.isWrite: + self.fs.close() + else: + cv2.FileStorage.release(self.fs) + + def _write(self, out): + self.fs.write(out+'\r\n') def write(self, key, value, dt='mat'): if dt == 'mat': - cv2.FileStorage.write(self.fs, key, value) + self._write('{}: !!opencv-matrix'.format(key)) + self._write(' rows: {}'.format(value.shape[0])) + self._write(' cols: {}'.format(value.shape[1])) + self._write(' dt: d') + self._write(' data: [{}]'.format(', '.join(['{:.3f}'.format(i) for i in value.reshape(-1)]))) elif dt == 'list': - if self.major_version == 4: # 4.4 - self.fs.startWriteStruct(key, cv2.FileNode_SEQ) - for elem in value: - self.fs.write('', elem) - self.fs.endWriteStruct() - else: # 3.4 - self.fs.write(key, '[') - for elem in value: - self.fs.write('none', elem) - self.fs.write('none', ']') + self._write('{}:'.format(key)) + for elem in value: + self._write(' - "{}"'.format(elem)) def read(self, key, dt='mat'): if dt == 'mat': @@ -66,6 +73,8 @@ def read_intri(intri_name): return cameras def write_intri(intri_name, cameras): + if not os.path.exists(os.path.dirname(intri_name)): + os.makedirs(os.path.dirname(intri_name)) intri = FileStorage(intri_name, True) results = {} camnames = list(cameras.keys()) @@ -74,11 +83,13 @@ def write_intri(intri_name, cameras): key = key_.split('.')[0] K, dist = val['K'], val['dist'] assert K.shape == (3, 3), K.shape - assert dist.shape == (1, 5) or dist.shape == (5, 1), dist.shape + assert dist.shape == (1, 5) or dist.shape == (5, 1) or dist.shape == (1, 4) or dist.shape == (4, 1), dist.shape intri.write('K_{}'.format(key), K) - intri.write('dist_{}'.format(key), dist.reshape(1, 5)) + intri.write('dist_{}'.format(key), dist.flatten()[None]) def write_extri(extri_name, cameras): + if not os.path.exists(os.path.dirname(extri_name)): + os.makedirs(os.path.dirname(extri_name)) extri = FileStorage(extri_name, True) results = {} camnames = list(cameras.keys()) @@ -105,12 +116,15 @@ def read_camera(intri_name, extri_name, cam_names=[]): cams[cam]['invK'] = np.linalg.inv(cams[cam]['K']) Rvec = extri.read('R_{}'.format(cam)) Tvec = extri.read('T_{}'.format(cam)) + assert Rvec is not None, cam R = cv2.Rodrigues(Rvec)[0] RT = np.hstack((R, Tvec)) cams[cam]['RT'] = RT cams[cam]['R'] = R + cams[cam]['Rvec'] = Rvec cams[cam]['T'] = Tvec + cams[cam]['center'] = - Rvec.T @ Tvec P[cam] = cams[cam]['K'] @ cams[cam]['RT'] cams[cam]['P'] = P[cam] @@ -118,6 +132,13 @@ def read_camera(intri_name, extri_name, cam_names=[]): cams['basenames'] = cam_names return cams +def read_cameras(path, intri='intri.yml', extri='extri.yml', subs=[]): + cameras = read_camera(join(path, intri), join(path, extri)) + cameras.pop('basenames') + if len(subs) > 0: + cameras = {key:cameras[key].astype(np.float32) for key in subs} + return cameras + def write_camera(camera, path): from os.path import join intri_name = join(path, 'intri.yml') @@ -146,12 +167,24 @@ def camera_from_img(img): focal = 1.2*min(height, width) # as colmap K = np.array([focal, 0., width/2, 0., focal, height/2, 0. ,0., 1.]).reshape(3, 3) camera = {'K':K ,'R': np.eye(3), 'T': np.zeros((3, 1)), 'dist': np.zeros((1, 5))} + camera['invK'] = np.linalg.inv(camera['K']) + camera['P'] = camera['K'] @ np.hstack((camera['R'], camera['T'])) return camera class Undistort: - @staticmethod - def image(frame, K, dist): - return cv2.undistort(frame, K, dist, None) + distortMap = {} + @classmethod + def image(cls, frame, K, dist, sub=None): + if sub is None: + return cv2.undistort(frame, K, dist, None) + else: + if sub not in cls.distortMap.keys(): + h, w = frame.shape[:2] + mapx, mapy = cv2.initUndistortRectifyMap(K, dist, None, K, (w,h), 5) + cls.distortMap[sub] = (mapx, mapy) + mapx, mapy = cls.distortMap[sub] + img = cv2.remap(frame, mapx, mapy, cv2.INTER_NEAREST) + return img @staticmethod def points(keypoints, K, dist): @@ -170,10 +203,38 @@ class Undistort: bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]]) return bbox -def undistort(camera, frame=None, keypoints=None, output=None, bbox=None): - # bbox: 1, 7 - print('This function is deprecated') - raise NotImplementedError +def unproj(kpts, invK): + homo = np.hstack([kpts[:, :2], np.ones_like(kpts[:, :1])]) + homo = homo @ invK.T + return np.hstack([homo[:, :2], kpts[:, 2:]]) +class UndistortFisheye: + @staticmethod + def image(frame, K, dist): + Knew = K.copy() + frame = cv2.fisheye.undistortImage(frame, K, dist, Knew=Knew) + return frame, Knew + + @staticmethod + def points(keypoints, K, dist, Knew): + # keypoints: (N, 3) + assert len(keypoints.shape) == 2, keypoints.shape + kpts = keypoints[:, None, :2] + kpts = np.ascontiguousarray(kpts) + kpts = cv2.fisheye.undistortPoints(kpts, K, dist, P=Knew) + keypoints[:, :2] = kpts[:, 0] + return keypoints + + @staticmethod + def bbox(bbox, K, dist, Knew): + keypoints = np.array([[bbox[0], bbox[1], 1], [bbox[2], bbox[3], 1]]) + kpts = UndistortFisheye.points(keypoints, K, dist, Knew) + bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]]) + return bbox + + +def get_Pall(cameras, camnames): + Pall = np.stack([cameras[cam]['K'] @ np.hstack((cameras[cam]['R'], cameras[cam]['T'])) for cam in camnames]) + return Pall def get_fundamental_matrix(cameras, basenames): skew_op = lambda x: np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]]) @@ -189,3 +250,59 @@ def get_fundamental_matrix(cameras, basenames): if F[(icam, jcam)].sum() == 0: F[(icam, jcam)] += 1e-12 # to avoid nan return F + +def interp_cameras(cameras, keys, step=20, loop=True, allstep=-1, **kwargs): + from scipy.spatial.transform import Rotation as R + from scipy.spatial.transform import Slerp + if allstep != -1: + tall = np.linspace(0., 1., allstep+1)[:-1].reshape(-1, 1, 1) + elif allstep == -1 and loop: + tall = np.linspace(0., 1., 1+step*len(keys))[:-1].reshape(-1, 1, 1) + elif allstep == -1 and not loop: + tall = np.linspace(0., 1., 1+step*(len(keys)-1))[:-1].reshape(-1, 1, 1) + cameras_new = {} + for ik in range(len(keys)): + if ik == len(keys) -1 and not loop: + break + if loop: + start, end = (ik * tall.shape[0])//len(keys), int((ik+1)*tall.shape[0])//len(keys) + print(ik, start, end, tall.shape) + else: + start, end = (ik * tall.shape[0])//(len(keys)-1), int((ik+1)*tall.shape[0])//(len(keys)-1) + t = tall[start:end].copy() + t = (t-t.min())/(t.max()-t.min()) + left, right = keys[ik], keys[0 if ik == len(keys)-1 else ik + 1] + camera_left = cameras[left] + camera_right = cameras[right] + # 插值相机中心: center = - R.T @ T + center_l = - camera_left['R'].T @ camera_left['T'] + center_r = - camera_right['R'].T @ camera_right['T'] + center_l, center_r = center_l[None], center_r[None] + if False: + centers = center_l * (1-t) + center_r * t + else: + # 球面插值 + norm_l, norm_r = np.linalg.norm(center_l), np.linalg.norm(center_r) + center_l, center_r = center_l/norm_l, center_r/norm_r + costheta = (center_l*center_r).sum() + sintheta = np.sqrt(1. - costheta**2) + theta = np.arctan2(sintheta, costheta) + centers = (np.sin(theta*(1-t)) * center_l + np.sin(theta * t) * center_r)/sintheta + norm = norm_l * (1-t) + norm_r * t + centers = centers * norm + key_rots = R.from_matrix(np.stack([camera_left['R'], camera_right['R']])) + key_times = [0, 1] + slerp = Slerp(key_times, key_rots) + interp_rots = slerp(t.squeeze()).as_matrix() + # 计算相机T RX + T = 0 => T = - R @ X + T = - np.einsum('bmn,bno->bmo', interp_rots, centers) + K = camera_left['K'] * (1-t) + camera_right['K'] * t + for i in range(T.shape[0]): + cameras_new['{}-{}-{}'.format(left, right, i)] = \ + { + 'K': K[i], + 'dist': np.zeros((1, 5)), + 'R': interp_rots[i], + 'T': T[i] + } + return cameras_new \ No newline at end of file diff --git a/easymocap/mytools/colmap_structure.py b/easymocap/mytools/colmap_structure.py new file mode 100644 index 0000000..b2c7632 --- /dev/null +++ b/easymocap/mytools/colmap_structure.py @@ -0,0 +1,439 @@ +# Copyright (c) 2018, ETH Zurich and UNC Chapel Hill. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of +# its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de) + +import os +import sys +import collections +import numpy as np +import struct +import cv2 + +CameraModel = collections.namedtuple( + "CameraModel", ["model_id", "model_name", "num_params"]) +Camera = collections.namedtuple( + "Camera", ["id", "model", "width", "height", "params"]) +BaseImage = collections.namedtuple( + "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) +Point3D = collections.namedtuple( + "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) + +class Image(BaseImage): + def qvec2rotmat(self): + return qvec2rotmat(self.qvec) + + +CAMERA_MODELS = { + CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), + CameraModel(model_id=1, model_name="PINHOLE", num_params=4), + CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), + CameraModel(model_id=3, model_name="RADIAL", num_params=5), + CameraModel(model_id=4, model_name="OPENCV", num_params=8), + CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), + CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), + CameraModel(model_id=7, model_name="FOV", num_params=5), + CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), + CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), + CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) +} +CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \ + for camera_model in CAMERA_MODELS]) +CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model) + for camera_model in CAMERA_MODELS]) + +def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): + """Read and unpack the next bytes from a binary file. + :param fid: + :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. + :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. + :param endian_character: Any of {@, =, <, >, !} + :return: Tuple of read and unpacked values. + """ + data = fid.read(num_bytes) + return struct.unpack(endian_character + format_char_sequence, data) + + +def read_cameras_text(path): + """ + see: src/base/reconstruction.cc + void Reconstruction::WriteCamerasText(const std::string& path) + void Reconstruction::ReadCamerasText(const std::string& path) + """ + cameras = {} + with open(path, "r") as fid: + while True: + line = fid.readline() + if not line: + break + line = line.strip() + if len(line) > 0 and line[0] != "#": + elems = line.split() + camera_id = int(elems[0]) + model = elems[1] + width = int(elems[2]) + height = int(elems[3]) + params = np.array(tuple(map(float, elems[4:]))) + cameras[camera_id] = Camera(id=camera_id, model=model, + width=width, height=height, + params=params) + return cameras + + +def read_cameras_binary(path_to_model_file): + """ + see: src/base/reconstruction.cc + void Reconstruction::WriteCamerasBinary(const std::string& path) + void Reconstruction::ReadCamerasBinary(const std::string& path) + """ + cameras = {} + with open(path_to_model_file, "rb") as fid: + num_cameras = read_next_bytes(fid, 8, "Q")[0] + for camera_line_index in range(num_cameras): + camera_properties = read_next_bytes( + fid, num_bytes=24, format_char_sequence="iiQQ") + camera_id = camera_properties[0] + model_id = camera_properties[1] + model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name + width = camera_properties[2] + height = camera_properties[3] + num_params = CAMERA_MODEL_IDS[model_id].num_params + params = read_next_bytes(fid, num_bytes=8*num_params, + format_char_sequence="d"*num_params) + cameras[camera_id] = Camera(id=camera_id, + model=model_name, + width=width, + height=height, + params=np.array(params)) + assert len(cameras) == num_cameras + return cameras + + +def read_images_text(path): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadImagesText(const std::string& path) + void Reconstruction::WriteImagesText(const std::string& path) + """ + images = {} + with open(path, "r") as fid: + while True: + line = fid.readline() + if not line: + break + line = line.strip() + if len(line) > 0 and line[0] != "#": + elems = line.split() + image_id = int(elems[0]) + qvec = np.array(tuple(map(float, elems[1:5]))) + tvec = np.array(tuple(map(float, elems[5:8]))) + camera_id = int(elems[8]) + image_name = elems[9] + elems = fid.readline().split() + xys = np.column_stack([tuple(map(float, elems[0::3])), + tuple(map(float, elems[1::3]))]) + point3D_ids = np.array(tuple(map(int, elems[2::3]))) + images[image_id] = Image( + id=image_id, qvec=qvec, tvec=tvec, + camera_id=camera_id, name=image_name, + xys=xys, point3D_ids=point3D_ids) + return images + + +def read_images_binary(path_to_model_file): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadImagesBinary(const std::string& path) + void Reconstruction::WriteImagesBinary(const std::string& path) + """ + images = {} + with open(path_to_model_file, "rb") as fid: + num_reg_images = read_next_bytes(fid, 8, "Q")[0] + for image_index in range(num_reg_images): + binary_image_properties = read_next_bytes( + fid, num_bytes=64, format_char_sequence="idddddddi") + image_id = binary_image_properties[0] + qvec = np.array(binary_image_properties[1:5]) + tvec = np.array(binary_image_properties[5:8]) + camera_id = binary_image_properties[8] + image_name = "" + current_char = read_next_bytes(fid, 1, "c")[0] + while current_char != b"\x00": # look for the ASCII 0 entry + image_name += current_char.decode("utf-8") + current_char = read_next_bytes(fid, 1, "c")[0] + num_points2D = read_next_bytes(fid, num_bytes=8, + format_char_sequence="Q")[0] + x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, + format_char_sequence="ddq"*num_points2D) + xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), + tuple(map(float, x_y_id_s[1::3]))]) + point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) + images[image_id] = Image( + id=image_id, qvec=qvec, tvec=tvec, + camera_id=camera_id, name=image_name, + xys=xys, point3D_ids=point3D_ids) + return images + + +def read_points3D_text(path): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadPoints3DText(const std::string& path) + void Reconstruction::WritePoints3DText(const std::string& path) + """ + points3D = {} + with open(path, "r") as fid: + while True: + line = fid.readline() + if not line: + break + line = line.strip() + if len(line) > 0 and line[0] != "#": + elems = line.split() + point3D_id = int(elems[0]) + xyz = np.array(tuple(map(float, elems[1:4]))) + rgb = np.array(tuple(map(int, elems[4:7]))) + error = float(elems[7]) + image_ids = np.array(tuple(map(int, elems[8::2]))) + point2D_idxs = np.array(tuple(map(int, elems[9::2]))) + points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb, + error=error, image_ids=image_ids, + point2D_idxs=point2D_idxs) + return points3D + + +def read_points3d_binary(path_to_model_file): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadPoints3DBinary(const std::string& path) + void Reconstruction::WritePoints3DBinary(const std::string& path) + """ + points3D = {} + with open(path_to_model_file, "rb") as fid: + num_points = read_next_bytes(fid, 8, "Q")[0] + for point_line_index in range(num_points): + binary_point_line_properties = read_next_bytes( + fid, num_bytes=43, format_char_sequence="QdddBBBd") + point3D_id = binary_point_line_properties[0] + xyz = np.array(binary_point_line_properties[1:4]) + rgb = np.array(binary_point_line_properties[4:7]) + error = np.array(binary_point_line_properties[7]) + track_length = read_next_bytes( + fid, num_bytes=8, format_char_sequence="Q")[0] + track_elems = read_next_bytes( + fid, num_bytes=8*track_length, + format_char_sequence="ii"*track_length) + image_ids = np.array(tuple(map(int, track_elems[0::2]))) + point2D_idxs = np.array(tuple(map(int, track_elems[1::2]))) + points3D[point3D_id] = Point3D( + id=point3D_id, xyz=xyz, rgb=rgb, + error=error, image_ids=image_ids, + point2D_idxs=point2D_idxs) + return points3D + + +def read_model(path, ext): + if ext == ".txt": + cameras = read_cameras_text(os.path.join(path, "cameras" + ext)) + images = read_images_text(os.path.join(path, "images" + ext)) + points3D = read_points3D_text(os.path.join(path, "points3D") + ext) + else: + cameras = read_cameras_binary(os.path.join(path, "cameras" + ext)) + images = read_images_binary(os.path.join(path, "images" + ext)) + points3D = read_points3d_binary(os.path.join(path, "points3D") + ext) + return cameras, images, points3D + + +def qvec2rotmat(qvec): + return np.array([ + [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, + 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], + 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], + [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], + 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, + 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], + [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], + 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], + 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) + + +def rotmat2qvec(R): + Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat + K = np.array([ + [Rxx - Ryy - Rzz, 0, 0, 0], + [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], + [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], + [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 + eigvals, eigvecs = np.linalg.eigh(K) + qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] + if qvec[0] < 0: + qvec *= -1 + return qvec + + +def write_cameras_text(cameras, path): + """ + see: src/base/reconstruction.cc + void Reconstruction::WriteCamerasText(const std::string& path) + void Reconstruction::ReadCamerasText(const std::string& path) + """ + HEADER = '# Camera list with one line of data per camera:\n' + '# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n' + '# Number of cameras: {}\n'.format(len(cameras)) + with open(path, "w") as fid: + fid.write(HEADER) + for _, cam in cameras.items(): + to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params] + line = " ".join([str(elem) for elem in to_write]) + fid.write(line + "\n") + +def write_next_bytes(fid, data, format_char_sequence, endian_character="<"): + """pack and write to a binary file. + :param fid: + :param data: data to send, if multiple elements are sent at the same time, + they should be encapsuled either in a list or a tuple + :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. + should be the same length as the data list or tuple + :param endian_character: Any of {@, =, <, >, !} + """ + if isinstance(data, (list, tuple)): + bytes = struct.pack(endian_character + format_char_sequence, *data) + else: + bytes = struct.pack(endian_character + format_char_sequence, data) + fid.write(bytes) + +def write_cameras_binary(cameras, path_to_model_file): + """ + see: src/base/reconstruction.cc + void Reconstruction::WriteCamerasBinary(const std::string& path) + void Reconstruction::ReadCamerasBinary(const std::string& path) + """ + with open(path_to_model_file, "wb") as fid: + write_next_bytes(fid, len(cameras), "Q") + for _, cam in cameras.items(): + model_id = CAMERA_MODEL_NAMES[cam.model].model_id + camera_properties = [cam.id, + model_id, + cam.width, + cam.height] + write_next_bytes(fid, camera_properties, "iiQQ") + for p in cam.params: + write_next_bytes(fid, float(p), "d") + return cameras + + +def write_images_binary(images, path_to_model_file): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadImagesBinary(const std::string& path) + void Reconstruction::WriteImagesBinary(const std::string& path) + """ + with open(path_to_model_file, "wb") as fid: + write_next_bytes(fid, len(images), "Q") + for _, img in images.items(): + write_next_bytes(fid, img.id, "i") + write_next_bytes(fid, img.qvec.tolist(), "dddd") + write_next_bytes(fid, img.tvec.tolist(), "ddd") + write_next_bytes(fid, img.camera_id, "i") + for char in img.name: + write_next_bytes(fid, char.encode("utf-8"), "c") + write_next_bytes(fid, b"\x00", "c") + write_next_bytes(fid, len(img.point3D_ids), "Q") + for xy, p3d_id in zip(img.xys, img.point3D_ids): + write_next_bytes(fid, [*xy, p3d_id], "ddq") + +def write_images_text(images, path): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadImagesText(const std::string& path) + void Reconstruction::WriteImagesText(const std::string& path) + """ + if len(images) == 0: + mean_observations = 0 + else: + mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images) + HEADER = '# Image list with two lines of data per image:\n' + '# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n' + '# POINTS2D[] as (X, Y, POINT3D_ID)\n' + '# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations) + + with open(path, "w") as fid: + fid.write(HEADER) + for _, img in images.items(): + image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name] + first_line = " ".join(map(str, image_header)) + fid.write(first_line + "\n") + + points_strings = [] + for xy, point3D_id in zip(img.xys, img.point3D_ids): + points_strings.append(" ".join(map(str, [*xy, point3D_id]))) + fid.write(" ".join(points_strings) + "\n") + +def write_points3D_text(points3D, path): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadPoints3DText(const std::string& path) + void Reconstruction::WritePoints3DText(const std::string& path) + """ + if len(points3D) == 0: + mean_track_length = 0 + else: + mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D) + HEADER = '# 3D point list with one line of data per point:\n' + '# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n' + '# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length) + + with open(path, "w") as fid: + fid.write(HEADER) + for _, pt in points3D.items(): + point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error] + fid.write(" ".join(map(str, point_header)) + " ") + track_strings = [] + for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs): + track_strings.append(" ".join(map(str, [image_id, point2D]))) + fid.write(" ".join(track_strings) + "\n") + + +def write_points3d_binary(points3D, path_to_model_file): + """ + see: src/base/reconstruction.cc + void Reconstruction::ReadPoints3DBinary(const std::string& path) + void Reconstruction::WritePoints3DBinary(const std::string& path) + """ + with open(path_to_model_file, "wb") as fid: + write_next_bytes(fid, len(points3D), "Q") + for _, pt in points3D.items(): + write_next_bytes(fid, pt.id, "Q") + write_next_bytes(fid, pt.xyz.tolist(), "ddd") + write_next_bytes(fid, pt.rgb.tolist(), "BBB") + write_next_bytes(fid, pt.error, "d") + track_length = pt.image_ids.shape[0] + write_next_bytes(fid, track_length, "Q") + for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs): + write_next_bytes(fid, [image_id, point2D_id], "ii") diff --git a/easymocap/mytools/colmap_wrapper.py b/easymocap/mytools/colmap_wrapper.py new file mode 100644 index 0000000..9fd0b62 --- /dev/null +++ b/easymocap/mytools/colmap_wrapper.py @@ -0,0 +1,468 @@ +''' + @ Date: 2022-06-20 15:03:50 + @ Author: Qing Shuai + @ Mail: s_q@zju.edu.cn + @ LastEditors: Qing Shuai + @ LastEditTime: 2022-08-16 20:24:07 + @ FilePath: /EasyMocapPublic/easymocap/mytools/colmap_wrapper.py +''' + +import shutil +import sys +import os +import sqlite3 +import numpy as np +from os.path import join +import cv2 +from .debug_utils import mkdir, run_cmd, log, mywarn +from .colmap_structure import Camera, Image, CAMERA_MODEL_NAMES +from .colmap_structure import rotmat2qvec +from .colmap_structure import read_points3d_binary + +IS_PYTHON3 = sys.version_info[0] >= 3 + +MAX_IMAGE_ID = 2**31 - 1 + +CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras ( + camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + model INTEGER NOT NULL, + width INTEGER NOT NULL, + height INTEGER NOT NULL, + params BLOB, + prior_focal_length INTEGER NOT NULL)""" + +CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors ( + image_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB, + FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)""" + +CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images ( + image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL UNIQUE, + camera_id INTEGER NOT NULL, + prior_qw REAL, + prior_qx REAL, + prior_qy REAL, + prior_qz REAL, + prior_tx REAL, + prior_ty REAL, + prior_tz REAL, + CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}), + FOREIGN KEY(camera_id) REFERENCES cameras(camera_id)) +""".format(MAX_IMAGE_ID) + +CREATE_TWO_VIEW_GEOMETRIES_TABLE = """ +CREATE TABLE IF NOT EXISTS two_view_geometries ( + pair_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB, + config INTEGER NOT NULL, + F BLOB, + E BLOB, + H BLOB) +""" + +CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints ( + image_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB, + FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE) +""" + +CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches ( + pair_id INTEGER PRIMARY KEY NOT NULL, + rows INTEGER NOT NULL, + cols INTEGER NOT NULL, + data BLOB)""" + +CREATE_NAME_INDEX = \ + "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)" + +CREATE_ALL = "; ".join([ + CREATE_CAMERAS_TABLE, + CREATE_IMAGES_TABLE, + CREATE_KEYPOINTS_TABLE, + CREATE_DESCRIPTORS_TABLE, + CREATE_MATCHES_TABLE, + CREATE_TWO_VIEW_GEOMETRIES_TABLE, + CREATE_NAME_INDEX +]) + +def image_ids_to_pair_id(image_id1, image_id2): + if image_id1 > image_id2: + image_id1, image_id2 = image_id2, image_id1 + return image_id1 * MAX_IMAGE_ID + image_id2 + + +def pair_id_to_image_ids(pair_id): + image_id2 = pair_id % MAX_IMAGE_ID + image_id1 = (pair_id - image_id2) // MAX_IMAGE_ID + return image_id1, image_id2 + +def array_to_blob(array): + if IS_PYTHON3: + return array.tobytes() + else: + return np.getbuffer(array) + +def blob_to_array(blob, dtype, shape=(-1,)): + if blob is None: + return np.empty((0, 2), dtype=dtype) + if IS_PYTHON3: + return np.frombuffer(blob, dtype=dtype).reshape(*shape) + else: + return np.frombuffer(blob, dtype=dtype).reshape(*shape) + + +class COLMAPDatabase(sqlite3.Connection): + + @staticmethod + def connect(database_path): + return sqlite3.connect(database_path, factory=COLMAPDatabase) + + + def __init__(self, *args, **kwargs): + super(COLMAPDatabase, self).__init__(*args, **kwargs) + + self.create_tables = lambda: self.executescript(CREATE_ALL) + self.create_cameras_table = \ + lambda: self.executescript(CREATE_CAMERAS_TABLE) + self.create_descriptors_table = \ + lambda: self.executescript(CREATE_DESCRIPTORS_TABLE) + self.create_images_table = \ + lambda: self.executescript(CREATE_IMAGES_TABLE) + self.create_two_view_geometries_table = \ + lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE) + self.create_keypoints_table = \ + lambda: self.executescript(CREATE_KEYPOINTS_TABLE) + self.create_matches_table = \ + lambda: self.executescript(CREATE_MATCHES_TABLE) + self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX) + + def add_camera(self, model, width, height, params, + prior_focal_length=False, camera_id=None): + params = np.asarray(params, np.float64) + cursor = self.execute( + "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)", + (camera_id, model, width, height, array_to_blob(params), + prior_focal_length)) + return cursor.lastrowid + + def add_image(self, name, camera_id, + prior_q=np.full(4, np.NaN), prior_t=np.full(3, np.NaN), image_id=None): + cursor = self.execute( + "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2], + prior_q[3], prior_t[0], prior_t[1], prior_t[2])) + return cursor.lastrowid + + def add_keypoints(self, image_id, keypoints): + assert(len(keypoints.shape) == 2) + assert(keypoints.shape[1] in [2, 4, 6]) + + keypoints = np.asarray(keypoints, np.float32) + self.execute( + "INSERT INTO keypoints VALUES (?, ?, ?, ?)", + (image_id,) + keypoints.shape + (array_to_blob(keypoints),)) + + def add_descriptors(self, image_id, descriptors): + descriptors = np.ascontiguousarray(descriptors, np.uint8) + self.execute( + "INSERT INTO descriptors VALUES (?, ?, ?, ?)", + (image_id,) + descriptors.shape + (array_to_blob(descriptors),)) + + def add_matches(self, image_id1, image_id2, matches): + assert(len(matches.shape) == 2) + assert(matches.shape[1] == 2) + + if image_id1 > image_id2: + matches = matches[:,::-1] + + pair_id = image_ids_to_pair_id(image_id1, image_id2) + matches = np.asarray(matches, np.uint32) + self.execute( + "INSERT INTO matches VALUES (?, ?, ?, ?)", + (pair_id,) + matches.shape + (array_to_blob(matches),)) + + def add_two_view_geometry(self, image_id1, image_id2, matches, + F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2): + assert(len(matches.shape) == 2) + assert(matches.shape[1] == 2) + + if image_id1 > image_id2: + matches = matches[:,::-1] + + pair_id = image_ids_to_pair_id(image_id1, image_id2) + matches = np.asarray(matches, np.uint32) + F = np.asarray(F, dtype=np.float64) + E = np.asarray(E, dtype=np.float64) + H = np.asarray(H, dtype=np.float64) + self.execute( + "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + (pair_id,) + matches.shape + (array_to_blob(matches), config, + array_to_blob(F), array_to_blob(E), array_to_blob(H))) + + def read_images(self): + image_id_to_name, name_to_image_id = {}, {} + image_results = self.execute("SELECT * FROM images") + for result in image_results: + image_id, name, camera_id, q0, q1, q2, q3, t0, t1, t2 = result + image_id_to_name[image_id] = name + name_to_image_id[name] = image_id + return image_id_to_name, name_to_image_id + + def read_keypoints(self, mapping=None): + image_id_to_keypoints = {} + keypoints_results = self.execute("SELECT * FROM keypoints") + for keypoints_result in keypoints_results: + image_id, rows, cols, keypoints = keypoints_result + keypoints = blob_to_array(keypoints, np.float32, (rows, cols)) + if mapping is None: + image_id_to_keypoints[image_id] = keypoints + else: + image_id_to_keypoints[mapping[image_id]] = keypoints + return image_id_to_keypoints + + def read_matches(self, mapping=None): + matches_results = self.execute("SELECT * FROM matches") + matches = {} + for matches_result in matches_results: + pair_id, rows, cols, match = matches_result + image_id0, image_id1 = pair_id_to_image_ids(pair_id) + if rows == 0: + continue + match = blob_to_array(match, dtype=np.uint32, shape=(rows, cols)) + if mapping is not None: + image_id0 = mapping[image_id0] + image_id1 = mapping[image_id1] + matches[(image_id0, image_id1)] = match + return matches + + def read_two_view_geometry(self, mapping=None): + geometry = self.execute("SELECT * FROM two_view_geometries") + geometries = {} + for pair_id, rows, cols, data, config, F, E, H in geometry: + F = blob_to_array(F, dtype=np.float64) + E = blob_to_array(E, dtype=np.float64) + H = blob_to_array(H, dtype=np.float64) + image_id0, image_id1 = pair_id_to_image_ids(pair_id) + match = blob_to_array(data, dtype=np.uint32, shape=(rows, cols)) + if rows == 0:continue + if mapping is not None: + image_id0 = mapping[image_id0] + image_id1 = mapping[image_id1] + geometries[(image_id0, image_id1)] = {'matches': match, 'F':F, 'E':E, 'H':H, 'config': config} + return geometries + +def create_empty_db(database_path): + if os.path.exists(database_path): + mywarn('Removing old database: {}'.format(database_path)) + os.remove(database_path) + print('Creating an empty database...') + db = COLMAPDatabase.connect(database_path) + db.create_tables() + db.commit() + db.close() + +def create_cameras(db, cameras, subs, width, height, share_intri=True): + model = 'OPENCV' + if share_intri: + cam_id = 1 + K = cameras[subs[0]]['K'] + D = cameras[subs[0]]['dist'].reshape(1, 5) + fx, fy, cx, cy, k1, k2, p1, p2, k3, k4, k5, k6 = K[0, 0], K[1, 1], K[0, 2], K[1, 2], D[0, 0], D[0, 1], D[0, 2], D[0, 3], D[0, 4], 0, 0, 0 + + params = [fx, fy, cx, cy, k1, k2, p1, p2] + # params = [fx, fy, cx, cy, 0, 0, 0, 0] + camera = Camera( + id=cam_id, + model=model, + width=width, + height=height, + params=params + ) + cameras_colmap = {cam_id: camera} + cameras_map = {sub:cam_id for sub in subs} + # + db.add_camera(CAMERA_MODEL_NAMES[model].model_id, width, height, params, + prior_focal_length=False, camera_id=cam_id) + else: + raise NotImplementedError + return cameras_colmap, cameras_map + +def create_images(db, cameras, cameras_map, image_names): + subs = sorted(list(image_names.keys())) + images = {} + for sub, image_name in image_names.items(): + img_id = subs.index(sub) + 1 + R = cameras[sub]['R'] + T = cameras[sub]['T'] + qvec = rotmat2qvec(R) + tvec = T.T[0] + image = Image( + id=img_id, + qvec=qvec, + tvec=tvec, + camera_id=cameras_map[sub], + name=os.path.basename(image_name), + xys=[], + point3D_ids=[] + ) + images[img_id] = image + db.add_image(image.name, camera_id=image.camera_id, + prior_q=image.qvec, prior_t=image.tvec, image_id=img_id) + return images + +def copy_images(data, out, nf=0, copy_func=shutil.copyfile, mask='mask', add_mask=True): + subs = sorted(os.listdir(join(data, 'images'))) + image_names = {} + for sub in subs: + srcname = join(data, 'images', sub, '{:06d}.jpg'.format(nf)) + if not os.path.exists(srcname): + mywarn('{} not exists, skip'.format(srcname)) + return False + dstname = join(out, 'images', '{}.jpg'.format(sub)) + image_names[sub] = dstname + if os.path.exists(dstname): + continue + os.makedirs(os.path.dirname(dstname), exist_ok=True) + copy_func(srcname, dstname) + mskname = join(data, mask, sub, '{:06d}.png'.format(nf)) + dstname = join(out, 'mask', '{}.jpg.png'.format(sub)) + if os.path.exists(mskname) and add_mask: + os.makedirs(os.path.dirname(dstname), exist_ok=True) + copy_func(mskname, dstname) + return True, image_names + +def colmap_feature_extract(colmap, path, share_camera, add_mask): + ''' +struct SiftMatchingOptions { + // Number of threads for feature matching and geometric verification. + int num_threads = -1; + + // Whether to use the GPU for feature matching. + bool use_gpu = true; + + // Index of the GPU used for feature matching. For multi-GPU matching, + // you should separate multiple GPU indices by comma, e.g., "0,1,2,3". + std::string gpu_index = "-1"; + + // Maximum distance ratio between first and second best match. + double max_ratio = 0.8; + + // Maximum distance to best match. + double max_distance = 0.7; + + // Whether to enable cross checking in matching. + bool cross_check = true; + + // Maximum number of matches. + int max_num_matches = 32768; + + // Maximum epipolar error in pixels for geometric verification. + double max_error = 4.0; + + // Confidence threshold for geometric verification. + double confidence = 0.999; + + // Minimum/maximum number of RANSAC iterations. Note that this option + // overrules the min_inlier_ratio option. + int min_num_trials = 100; + int max_num_trials = 10000; + + // A priori assumed minimum inlier ratio, which determines the maximum + // number of iterations. + double min_inlier_ratio = 0.25; + + // Minimum number of inliers for an image pair to be considered as + // geometrically verified. + int min_num_inliers = 15; + + // Whether to attempt to estimate multiple geometric models per image pair. + bool multiple_models = false; + + // Whether to perform guided matching, if geometric verification succeeds. + bool guided_matching = false; + + bool Check() const; +}; +''' + cmd = f'{colmap} feature_extractor --database_path {path}/database.db \ +--image_path {path}/images \ +--SiftExtraction.peak_threshold 0.006 \ +--ImageReader.camera_model OPENCV \ +' + if share_camera: + cmd += ' --ImageReader.single_camera 1' + + if add_mask: + cmd += f' --ImageReader.mask_path {path}/mask' + cmd += f' >> {path}/log.txt' + run_cmd(cmd) + +def colmap_feature_match(colmap, path): + cmd = f'{colmap} exhaustive_matcher --database_path {path}/database.db \ +--SiftMatching.guided_matching 0 \ +--SiftMatching.max_ratio 0.8 \ +--SiftMatching.max_distance 0.5 \ +--SiftMatching.cross_check 1 \ +--SiftMatching.max_error 4 \ +--SiftMatching.max_num_matches 32768 \ +--SiftMatching.confidence 0.9999 \ +--SiftMatching.max_num_trials 10000 \ +--SiftMatching.min_inlier_ratio 0.25 \ +--SiftMatching.min_num_inliers 30 \ +>> {path}/log.txt' + run_cmd(cmd) + +def colmap_ba(colmap, path, with_init=False): + if with_init: + cmd = f'{colmap} point_triangulator --database_path {path}/database.db \ +--image_path {path}/images \ +--input_path {path}/sparse/0 \ +--output_path {path}/sparse/0 \ +--Mapper.tri_merge_max_reproj_error 3 \ +--Mapper.ignore_watermarks 1 \ +--Mapper.filter_max_reproj_error 2 \ +>> {path}/log.txt' + run_cmd(cmd) + cmd = f'{colmap} bundle_adjuster \ +--input_path {path}/sparse/0 \ +--output_path {path}/sparse/0 \ +>> {path}/log.txt' + run_cmd(cmd) + points3d = read_points3d_binary(join(path, 'sparse', '0', 'points3D.bin')) + pids = list(points3d.keys()) + mean_error = np.mean([points3d[p].error for p in pids]) + log('Triangulate {} points, mean error: {:.2f} pixel'.format(len(pids), mean_error)) + else: + mkdir(join(path, 'sparse')) + cmd = f'{colmap} mapper --database_path {path}/database.db --image_path {path}/images --output_path {path}/sparse \ + --Mapper.ba_refine_principal_point 1 \ + --Mapper.ba_global_max_num_iterations 1000 \ + >> {path}/log.txt' + run_cmd(cmd) + + +def colmap_dense(colmap, path): + mkdir(join(path, 'dense')) + cmd = f'{colmap} image_undistorter --image_path {path}/images --input_path {path}/sparse/0 --output_path {path}/dense --output_type COLMAP --max_image_size 2000' + run_cmd(cmd) + cmd = f'{colmap} patch_match_stereo \ +--workspace_path {path}/dense \ +--workspace_format COLMAP \ +--PatchMatchStereo.geom_consistency true \ +>> {path}/log.txt' + + run_cmd(cmd) + cmd = f'{colmap} stereo_fusion \ +--workspace_path {path}/dense \ +--workspace_format COLMAP \ +--input_type geometric \ +--output_path {path}/dense/fused.ply \ +>> {path}/log.txt' + run_cmd(cmd) diff --git a/easymocap/mytools/debug_utils.py b/easymocap/mytools/debug_utils.py new file mode 100644 index 0000000..09cd664 --- /dev/null +++ b/easymocap/mytools/debug_utils.py @@ -0,0 +1,86 @@ +''' + @ Date: 2022-02-14 14:54:50 + @ Author: Qing Shuai + @ Mail: s_q@zju.edu.cn + @ LastEditors: Qing Shuai + @ LastEditTime: 2022-06-14 18:07:19 + @ FilePath: /EasyMocapPublic/easymocap/mytools/debug_utils.py +''' +from termcolor import colored +import os +from os.path import join +import shutil +import subprocess +import time +import datetime + +def toc(): + return time.time() * 1000 + +def myprint(cmd, level): + color = {'run': 'blue', 'info': 'green', 'warn': 'yellow', 'error': 'red'}[level] + print(colored(cmd, color)) + +def log(text): + myprint(text, 'info') + +def log_time(text): + strf = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') + print(colored(strf, 'yellow'), colored(text, 'green')) + +def mywarn(text): + myprint(text, 'warn') + +warning_infos = set() + +def oncewarn(text): + if text in warning_infos: + return + warning_infos.add(text) + myprint(text, 'warn') + + +def myerror(text): + myprint(text, 'error') + +def run_cmd(cmd, verbo=True, bg=False): + if verbo: myprint('[run] ' + cmd, 'run') + if bg: + args = cmd.split() + print(args) + p = subprocess.Popen(args) + return [p] + else: + os.system(cmd) + return [] + +def mkdir(path): + if os.path.exists(path): + return 0 + log('mkdir {}'.format(path)) + os.makedirs(path, exist_ok=True) + +def cp(srcname, dstname): + mkdir(join(os.path.dirname(dstname))) + shutil.copyfile(srcname, dstname) + +def print_table(header, contents): + from tabulate import tabulate + length = len(contents[0]) + tables = [[] for _ in range(length)] + mean = ['Mean'] + for icnt, content in enumerate(contents): + for i in range(length): + if isinstance(content[i], float): + tables[i].append('{:6.2f}'.format(content[i])) + else: + tables[i].append('{}'.format(content[i])) + if icnt > 0: + mean.append('{:6.2f}'.format(sum(content)/length)) + tables.append(mean) + print(tabulate(tables, header, tablefmt='fancy_grid')) + +def check_exists(path): + flag1 = os.path.isfile(path) and os.path.exists(path) + flag2 = os.path.isdir(path) and len(os.listdir(path)) >= 10 + return flag1 or flag2 \ No newline at end of file diff --git a/easymocap/mytools/file_utils.py b/easymocap/mytools/file_utils.py index b3cd0d6..3720aa5 100644 --- a/easymocap/mytools/file_utils.py +++ b/easymocap/mytools/file_utils.py @@ -2,8 +2,8 @@ @ Date: 2021-03-15 12:23:12 @ Author: Qing Shuai @ LastEditors: Qing Shuai - @ LastEditTime: 2021-06-14 22:25:58 - @ FilePath: /EasyMocapRelease/easymocap/mytools/file_utils.py + @ LastEditTime: 2022-07-21 15:55:02 + @ FilePath: /EasyMocapPublic/easymocap/mytools/file_utils.py ''' import os import json @@ -11,12 +11,18 @@ import numpy as np from os.path import join mkdir = lambda x:os.makedirs(x, exist_ok=True) -mkout = lambda x:mkdir(os.path.dirname(x)) - +# mkout = lambda x:mkdir(os.path.dirname(x)) if x is not None +def mkout(x): + if x is not None: + mkdir(os.path.dirname(x)) def read_json(path): assert os.path.exists(path), path with open(path) as f: - data = json.load(f) + try: + data = json.load(f) + except: + print('Reading error {}'.format(path)) + data = [] return data def save_json(file, data): @@ -25,6 +31,17 @@ def save_json(file, data): with open(file, 'w') as f: json.dump(data, f, indent=4) +def append_json(file, data): + if not os.path.exists(os.path.dirname(file)): + os.makedirs(os.path.dirname(file)) + if os.path.exists(file): + res = read_json(file) + assert isinstance(res, list) + res.append(data) + data = res + with open(file, 'w') as f: + json.dump(data, f, indent=4) + save_annot = save_json def getFileList(root, ext='.jpg'): @@ -51,19 +68,24 @@ def read_annot(annotname, mode='body25'): data[i]['id'] = data[i].pop('personID') if 'keypoints2d' in data[i].keys() and 'keypoints' not in data[i].keys(): data[i]['keypoints'] = data[i].pop('keypoints2d') - for key in ['bbox', 'keypoints', 'handl2d', 'handr2d', 'face2d']: + for key in ['bbox', 'keypoints', + 'bbox_handl2d', 'handl2d', + 'bbox_handr2d', 'handr2d', + 'bbox_face2d', 'face2d']: if key not in data[i].keys():continue data[i][key] = np.array(data[i][key]) if key == 'face2d': # TODO: Make parameters, 17 is the offset for the eye brows, # etc. 51 is the total number of FLAME compatible landmarks data[i][key] = data[i][key][17:17+51, :] - data[i]['bbox'] = data[i]['bbox'][:5] - if data[i]['bbox'][-1] < 0.001: - # print('{}/{} bbox conf = 0, may be error'.format(annotname, i)) - data[i]['bbox'][-1] = 1 + if 'bbox' in data[i].keys(): + data[i]['bbox'] = data[i]['bbox'][:5] + if data[i]['bbox'][-1] < 0.001: + print('{}/{} bbox conf = 0, may be error'.format(annotname, i)) + data[i]['bbox'][-1] = 0 + # combine the basic results if mode == 'body25': - data[i]['keypoints'] = data[i]['keypoints'] + data[i]['keypoints'] = data[i].get('keypoints', np.zeros((25, 3))) elif mode == 'body15': data[i]['keypoints'] = data[i]['keypoints'][:15, :] elif mode in ['handl', 'handr']: @@ -91,7 +113,7 @@ def array2raw(array, separator=' ', fmt='%.3f'): res.append(separator.join([fmt%(d) for d in data])) -def myarray2string(array, separator=', ', fmt='%.3f', indent=8): +def myarray2string(array, separator=', ', fmt='%7.7f', indent=8): assert len(array.shape) == 2, 'Only support MxN matrix, {}'.format(array.shape) blank = ' ' * indent res = ['['] @@ -110,14 +132,16 @@ def write_common_results(dumpname=None, results=[], keys=[], fmt='%2.3f'): out_text.append(' {\n') output = {} output['id'] = data['id'] - for key in keys: - if key not in data.keys():continue + for k in ['type']: + if k in data.keys():output[k] = '\"{}\"'.format(data[k]) + keys_current = [k for k in keys if k in data.keys()] + for key in keys_current: # BUG: This function will failed if the rows of the data[key] is too large # output[key] = np.array2string(data[key], max_line_width=1000, separator=', ', formatter=format_out) output[key] = myarray2string(data[key], separator=', ', fmt=fmt) for key in output.keys(): out_text.append(' \"{}\": {}'.format(key, output[key])) - if key != keys[-1]: + if key != keys_current[-1]: out_text.append(',\n') else: out_text.append('\n') @@ -134,17 +158,16 @@ def write_common_results(dumpname=None, results=[], keys=[], fmt='%2.3f'): else: return ''.join(out_text) -def write_keypoints3d(dumpname, results): +def write_keypoints3d(dumpname, results, keys = ['keypoints3d']): # TODO:rewrite it - keys = ['keypoints3d'] - write_common_results(dumpname, results, keys, fmt='%6.3f') + write_common_results(dumpname, results, keys, fmt='%6.7f') def write_vertices(dumpname, results): keys = ['vertices'] - write_common_results(dumpname, results, keys, fmt='%6.3f') + write_common_results(dumpname, results, keys, fmt='%6.5f') def write_smpl(dumpname, results): - keys = ['Rh', 'Th', 'poses', 'expression', 'shapes'] + keys = ['Rh', 'Th', 'poses', 'handl', 'handr', 'expression', 'shapes'] write_common_results(dumpname, results, keys) def batch_bbox_from_pose(keypoints2d, height, width, rate=0.1): diff --git a/easymocap/mytools/reader.py b/easymocap/mytools/reader.py index b200881..22c522c 100644 --- a/easymocap/mytools/reader.py +++ b/easymocap/mytools/reader.py @@ -2,8 +2,8 @@ @ Date: 2021-04-21 15:19:21 @ Author: Qing Shuai @ LastEditors: Qing Shuai - @ LastEditTime: 2021-07-29 16:12:37 - @ FilePath: /EasyMocap/easymocap/mytools/reader.py + @ LastEditTime: 2022-07-22 23:23:26 + @ FilePath: /EasyMocapPublic/easymocap/mytools/reader.py ''' # function to read data """ @@ -27,17 +27,14 @@ def read_keypoints3d(filename): res_ = [] for d in data: pid = d['id'] if 'id' in d.keys() else d['personID'] - pose3d = np.array(d['keypoints3d'], dtype=np.float32) - if pose3d.shape[0] > 25: - # 对于有手的情况,把手的根节点赋值成body25上的点 - pose3d[25, :] = pose3d[7, :] - pose3d[46, :] = pose3d[4, :] - if pose3d.shape[1] == 3: - pose3d = np.hstack([pose3d, np.ones((pose3d.shape[0], 1))]) - res_.append({ - 'id': pid, - 'keypoints3d': pose3d - }) + ret = {'id': pid, 'type': 'body25'} + for key in ['keypoints3d', 'handl3d', 'handr3d', 'face3d']: + if key not in d.keys():continue + pose3d = np.array(d[key], dtype=np.float32) + if pose3d.shape[1] == 3: + pose3d = np.hstack([pose3d, np.ones((pose3d.shape[0], 1))]) + ret[key] = pose3d + res_.append(ret) return res_ def read_keypoints3d_dict(filename): @@ -54,11 +51,13 @@ def read_keypoints3d_dict(filename): } return res_ -def read_smpl(filename): +def read_smpl(filename): datas = read_json(filename) + if isinstance(datas, dict): + datas = datas['annots'] outputs = [] for data in datas: - for key in ['Rh', 'Th', 'poses', 'shapes', 'expression']: + for key in ['Rh', 'Th', 'poses', 'handl', 'handr', 'shapes', 'expression', 'keypoints3d']: if key in data.keys(): data[key] = np.array(data[key], dtype=np.float32) # for smplx results diff --git a/easymocap/mytools/triangulator.py b/easymocap/mytools/triangulator.py new file mode 100644 index 0000000..cba0748 --- /dev/null +++ b/easymocap/mytools/triangulator.py @@ -0,0 +1,735 @@ +import numpy as np +import cv2 +from easymocap.datasets.base import crop_image + +from easymocap.estimator.wrapper_base import bbox_from_keypoints +from easymocap.mytools.vis_base import merge, plot_keypoints_auto +from .debug_utils import log, mywarn, myerror + +def batch_triangulate(keypoints_, Pall, min_view=2): + # keypoints: (nViews, nJoints, 3) + # Pall: (nViews, 3, 4) + # A: (nJoints, nViewsx2, 4), x: (nJoints, 4, 1); b: (nJoints, nViewsx2, 1) + v = (keypoints_[:, :, -1]>0).sum(axis=0) + valid_joint = np.where(v >= min_view)[0] + keypoints = keypoints_[:, valid_joint] + conf3d = keypoints[:, :, -1].sum(axis=0)/v[valid_joint] + # P2: P矩阵的最后一行:(1, nViews, 1, 4) + P0 = Pall[None, :, 0, :] + P1 = Pall[None, :, 1, :] + P2 = Pall[None, :, 2, :] + # uP2: x坐标乘上P2: (nJoints, nViews, 1, 4) + uP2 = keypoints[:, :, 0].T[:, :, None] * P2 + vP2 = keypoints[:, :, 1].T[:, :, None] * P2 + conf = keypoints[:, :, 2].T[:, :, None] + Au = conf * (uP2 - P0) + Av = conf * (vP2 - P1) + A = np.hstack([Au, Av]) + u, s, v = np.linalg.svd(A) + X = v[:, -1, :] + X = X / X[:, 3:] + # out: (nJoints, 4) + result = np.zeros((keypoints_.shape[1], 4)) + result[valid_joint, :3] = X[:, :3] + result[valid_joint, 3] = conf3d #* (conf[..., 0].sum(axis=-1)>min_view) + return result + +def project_points(keypoints, RT, einsum='vab,kb->vka'): + homo = np.concatenate([keypoints[..., :3], np.ones_like(keypoints[..., :1])], axis=-1) + kpts2d = np.einsum(einsum, RT, homo) + kpts2d[..., :2] /= kpts2d[..., 2:] + return kpts2d + +def make_Cnk(n, k): + import itertools + res = {} + for n_ in range(3, n+1): + n_0 = [i for i in range(n_)] + for k_ in range(2, k+1): + res[(n_, k_)] = list(map(list, itertools.combinations(n_0, k_))) + return res + +MAX_VIEWS = 30 +Cnk = make_Cnk(MAX_VIEWS, 3) + +def robust_triangulate_point(kpts2d, Pall, dist_max, min_v = 3): + nV = kpts2d.shape[0] + if len(kpts2d) < min_v:# 重建失败 + return [], None + # min_v = max(2, nV//2) + # 1. choose the combination of min_v + index_ = Cnk[(len(kpts2d), min(min_v, len(kpts2d)))] + # 2. proposals: store the reconstruction points of each proposal + proposals = np.zeros((len(index_), 4)) + weight_self = np.zeros((nV, len(index_))) + for i, index in enumerate(index_): + weight_self[index, i] = 100. + point = batch_triangulate(kpts2d[index, :], Pall[index], min_view=min_v) + proposals[i] = point + # 3. project the proposals to each view + # and calculate the reprojection error + # (nViews, nProposals, 4) + kpts_repro = project_points(proposals, Pall) + conf = (proposals[None, :, -1] > 0) * (kpts2d[..., -1] > 0) + # err: (nViews, nProposals) + err = np.linalg.norm(kpts_repro[..., :2] - kpts2d[..., :2], axis=-1) * conf + valid = 1. - err/dist_max + valid[valid<0] = 0 + # consider the weight of different view + # TODO:naive weight: + conf = kpts2d[..., -1] + weight = conf + # (valid > 0)*weight_self 一项用于强制要求使用到的两个视角都需要被用到 + # 增加一项使用的视角数的加成 + weight_sum = (weight * valid).sum(axis=0) + ((valid > 0)*weight_self).sum(axis=0) - min_v * 100 + if weight_sum.max() < 0:# 重建失败 + return [], None + best = weight_sum.argmax() + if (err[index_[best], best] > dist_max).any(): + return [], None + # 对于选出来的proposal,寻找其大于0的其他视角 + point = proposals[best] + best_add = np.where(valid[:, best])[0].tolist() + index = list(index_[best]) + best_add.sort(key=lambda x:-weight[x]) + for add in best_add: + if add in index: + continue + index.append(add) + point = batch_triangulate(kpts2d[index, :], Pall[index], min_view=min_v) + kpts_repro = project_points(point, Pall[index]) + err = np.linalg.norm(kpts_repro[..., :2] - kpts2d[index, ..., :2], axis=-1) + if (err > dist_max).any(): + index.remove(add) + break + return index, point + +def remove_outview(kpts2d, out_view, debug): + if len(out_view) == 0: + return False + outv = out_view[0] + if debug: + mywarn('[triangulate] remove outview: {} from {}'.format(outv, out_view)) + kpts2d[outv] = 0. + return True + +def remove_outjoint(kpts2d, Pall, out_joint, dist_max, min_view=3, debug=False): + if len(out_joint) == 0: + return False + if debug: + mywarn('[triangulate] remove outjoint: {}'.format(out_joint)) + for nj in out_joint: + valid = np.where(kpts2d[:, nj, -1] > 0)[0] + if len(valid) < min_view: + # if less than 3 visible view, set these unvisible + kpts2d[:, nj, -1] = 0 + continue + if len(valid) > MAX_VIEWS: + # only select max points + conf = -kpts2d[:, nj, -1] + valid = conf.argsort()[:MAX_VIEWS] + index_j, point = robust_triangulate_point(kpts2d[valid, nj:nj+1], Pall[valid], dist_max=dist_max, min_v=3) + index_j = valid[index_j] + # print('select {} for joint {}'.format(index_j, nj)) + set0 = np.zeros(kpts2d.shape[0]) + set0[index_j] = 1. + kpts2d[:, nj, -1] *= set0 + return True + +def project_and_distance(kpts3d, RT, kpts2d): + kpts_proj = project_points(kpts3d, RT) + # 1. distance between input and projection + conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0) + dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf + return dist, conf + +def iterative_triangulate(kpts2d, RT, previous=None, + min_conf=0.1, min_view=3, min_joints=3, dist_max=0.05, dist_vel=0.05, + thres_outlier_view=0.4, thres_outlier_joint=0.4, debug=False): + kpts2d = kpts2d.copy() + conf = kpts2d[..., -1] + kpts2d[conf dist_vel) & conf + if nottrack.sum() > 0: + kpts2d[nottrack] = 0. + if debug: + log('[triangulate] Remove with track {}'.format(np.where(nottrack))) + while True: + # 0. triangulate and project + kpts3d = batch_triangulate(kpts2d, RT, min_view=min_view) + dist, conf = project_and_distance(kpts3d, RT, kpts2d) + # 2. find the outlier + vv, jj = np.where(dist > dist_max) + if vv.shape[0] < 1: + if debug: + log('[triangulate] Not found outlier, break') + break + ratio_outlier_view = (dist>dist_max).sum(axis=1)/(1e-5 + conf.sum(axis=1)) + ratio_outlier_joint = (dist>dist_max).sum(axis=0)/(1e-5 + conf.sum(axis=0)) + # 3. find the totally wrong detections + out_view = np.where(ratio_outlier_view > thres_outlier_view)[0] + out_joint = np.where(ratio_outlier_joint > thres_outlier_joint)[0] + if len(out_view) > 1: + dist_view = dist.sum(axis=1)/(1e-5 + conf.sum(axis=1)) + out_view = out_view.tolist() + out_view.sort(key=lambda x:-dist_view[x]) + if debug: mywarn('[triangulate] Remove outlier view: {}'.format(ratio_outlier_view)) + if remove_outview(kpts2d, out_view, debug): continue + if remove_outjoint(kpts2d, RT, out_joint, dist_max, debug=debug): continue + if debug: + log('[triangulate] Directly remove {}, {}'.format(vv, jj)) + kpts2d[vv, jj, -1] = 0. + if debug: + log('[triangulate] finally {} valid points'.format((kpts3d[..., -1]>0).sum())) + if (kpts3d[..., -1]>0).sum() < min_joints: + kpts3d[..., -1] = 0. + kpts2d[..., -1] = 0. + return kpts3d, kpts2d + return kpts3d, kpts2d + +class BaseTriangulator: + def __init__(self, config, debug, keys) -> None: + self.config = config + self.debug = debug + self.keys = keys + + def project_and_check(self, kpts3d, kpts2d, RT): + kpts_proj = project_points(kpts3d, RT) + conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0) + dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf + return conf, dist + + def triangulate_with_results(self, pid, data, results): + new = {'id': pid} + for key in self.keys: + key3d = key.replace('2d', '3d') + if len(results) == 0: + kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'], + debug=self.debug, **self.config[key]) + else: + if len(results) == 1: + previous = results[-1][key3d] # TODO: mean previous frame + elif len(results) >= 2: + # TODO: mean previous velocity + previous0 = results[-2][key3d] # TODO: mean previous frame + previous1 = results[-1][key3d] # TODO: mean previous frame + vel = (previous1[:, :3] - previous0[:, :3])*((previous0[:, -1:]>0)&(previous0[:, -1:]>0)) + previous = previous1.copy() + previous[:, :3] += vel + kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'], + debug=self.debug, previous=previous, **self.config[key]) + vel = np.linalg.norm(kpts3d[:, :3] - previous[:, :3], axis=-1) + new[key] = np.concatenate([data[key+'_distort'][..., :-1], kpts2d[..., -1:]], axis=-1) + new[key3d] = kpts3d + return new + +class SimpleTriangulator(BaseTriangulator): + def __init__(self, keys, debug, config, + pid=0) -> None: + super().__init__(config, debug, keys) + self.results = [] + self.infos = [] + self.dim_name = ['_joints', '_views'] + self.pid = pid + + def __call__(self, data, results=None): + info = {} + if results is None: + results = self.results + new = {'id': self.pid} + for key in self.keys: + if key not in data.keys(): continue + key3d = key.replace('2d', '3d') + if self.debug: + log('[triangulate] {}'.format(key)) + if len(results) == 0: + kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'], + debug=self.debug, **self.config[key]) + else: + if len(results) == 1: + previous = results[-1][key3d] # TODO: mean previous frame + elif len(results) >= 2: + # TODO: mean previous velocity + previous0 = results[-2][key3d] # TODO: mean previous frame + previous1 = results[-1][key3d] # TODO: mean previous frame + vel = (previous1[:, :3] - previous0[:, :3])*((previous0[:, -1:]>0)&(previous0[:, -1:]>0)) + previous = previous1.copy() + previous[:, :3] += vel + kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'], + debug=self.debug, previous=previous, **self.config[key]) + vel = np.linalg.norm(kpts3d[:, :3] - previous[:, :3], axis=-1) + new[key] = np.concatenate([data[key+'_distort'][..., :-1], kpts2d[..., -1:]], axis=-1) + new[key.replace('2d', '3d')] = kpts3d + if self.debug: + conf, dist = self.project_and_check(kpts3d, kpts2d, data['RT']) + for dim in [0, 1]: + info_dim = { + 'valid': conf.sum(axis=dim), + 'dist': 10000*dist.sum(axis=dim)/(1e-5 + conf.sum(axis=dim)), + } + info[key+self.dim_name[dim]] = info_dim + info[key+'_joints']['valid3d'] = kpts3d[:, -1] >0 + results.append(new) + self.infos.append(info) + return [new] + + def report(self): + if not self.debug: + return 0 + from .debug_utils import print_table + for key in self.infos[0].keys(): + metrics = list(self.infos[0][key].keys()) + values = [np.mean(np.stack([info[key][metric] for info in self.infos]), axis=0) for metric in metrics] + metrics = [key] + metrics + values = [[i for i in range(values[0].shape[0])]] + values + print_table(metrics, values) + +class SimpleTriangulatorMulti(SimpleTriangulator): + def __init__(self, pids, **cfg) -> None: + super().__init__(**cfg) + self.results = {} + + def __call__(self, data, results=None): + res_now = [] + for ipid, pid in enumerate(data['pid']): + if pid not in self.results.keys(): + self.results[pid] = [] + data_ = {'RT': data['RT']} + for key in self.keys: + data_[key+'_distort'] = data[key+'_distort'][:, ipid] + data_[key+'_unproj'] = data[key+'_unproj'][:, ipid] + data_[key] = data[key][:, ipid] + res = self.triangulate_with_results(pid, data_, self.results[pid]) + self.results[pid].append(res) + res_now.append(res) + return res_now + +def skew_op(x): + skew_op = lambda x: np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]]) + res = np.zeros((3, 3), dtype=x.dtype) + # 0, -z, y + res[0, 1] = -x[2, 0] + res[0, 2] = x[1, 0] + # z, 0, -x + res[1, 0] = x[2, 0] + res[1, 2] = -x[0, 0] + # -y, x, 0 + res[2, 0] = -x[1, 0] + res[2, 1] = x[0, 0] + return res + +def fundamental_op(K0, K1, R_0, T_0, R_1, T_1): + invK0 = np.linalg.inv(K0) + return invK0.T @ (R_0 @ R_1.T) @ K1.T @ skew_op(K1 @ R_1 @ R_0.T @ (T_0 - R_0 @ R_1.T @ T_1)) + +def drawlines(img1,img2,lines,pts1,pts2): + ''' img1 - image on which we draw the epilines for the points in img2 + lines - corresponding epilines ''' + r,c = img1.shape[:2] + for r,pt1,pt2 in zip(lines,pts1,pts2): + pt1 = list(map(lambda x:int(x+0.5), pt1[:2].tolist())) + pt2 = list(map(lambda x:int(x+0.5), pt2[:2].tolist())) + if pt1[0] < 0 or pt1[1] < 0: + continue + color = tuple(np.random.randint(0,255,3).tolist()) + x0,y0 = map(int, [0, -r[2]/r[1] ]) + x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ]) + img1 = cv2.line(img1, (x0,y0), (x1,y1), color,1) + img1 = cv2.circle(img1,tuple(pt1),5,color,-1) + img2 = cv2.circle(img2,tuple(pt2),5,color,-1) + return img1,img2 + +def SimpleConstrain(dimGroups): + constrain = np.ones((dimGroups[-1], dimGroups[-1])) + for i in range(len(dimGroups)-1): + start, end = dimGroups[i], dimGroups[i+1] + constrain[start:end, start:end] = 0 + N = constrain.shape[0] + constrain[range(N), range(N)] = 1 + return constrain + +def check_cluster(affinity, row, views, dimGroups, indices, p2dAssigned, visited): + affinity_row = affinity[row].copy() + # given affinity and row, select the combine of all possible set + cluster = np.where((affinity[row]>0)&(p2dAssigned==-1)&(visited==0))[0].tolist() + cluster.sort(key=lambda x:-affinity[row, x]) + views_ = views[cluster] + view_count = np.bincount(views[cluster]) + indices_all = [indices] + for col in cluster: + v = views[col] + nOld = len(indices_all) + if indices[v] != -1: # already assigned, copy and make new + for i in range(nOld): + ind = indices_all[i].copy() + ind[v] = col + indices_all.append(ind) + else: # not assigned, assign + for i in range(nOld): + indices_all[i][v] = col + return indices_all + +def views_from_dimGroups(dimGroups): + views = np.zeros(dimGroups[-1], dtype=np.int) + for nv in range(len(dimGroups) - 1): + views[dimGroups[nv]:dimGroups[nv+1]] = nv + return views + +class SimpleMatchAndTriangulator(SimpleTriangulator): + def __init__(self, num_joints, min_views, min_joints, cfg_svt, cfg_track, **cfg) -> None: + super().__init__(**cfg) + self.nJoints = num_joints + self.cfg_svt = cfg_svt + self.cfg_track = cfg_track + self.min_views = min_views + self.min_joints = min_joints + self.time = -1 + self.max_id = 0 + self.tracks = {} + self.loglevel_dict = { + 'info': 0, + 'warn': 1, + 'error': 2, + } + self.loglevel = self.loglevel_dict['info'] # ['info', 'warn', 'error'] + self.debug = False + self.data = None + self.people = None + + def log(self, text): + if self.loglevel > 0: + return 0 + log(text) + + def warn(self, text): + if self.loglevel > 1: + return 0 + mywarn(text) + + + @staticmethod + def distance_by_epipolar(pts0, pts1, K0, K1, R0, T0, R1, T1): + F = fundamental_op(K0, K1, R0, T0, R1, T1) + # Find epilines corresponding to points in left image (first image) and + # drawing its lines on right image + lines0 = cv2.computeCorrespondEpilines(pts0[..., :2].reshape (-1,1,2), 2, F) + # Find epilines corresponding to points in right image (second image) and + # drawing its lines on left image + lines1 = cv2.computeCorrespondEpilines(pts1[..., :2].reshape(-1,1,2), 1, F) + if False: + H, W = 1080, 1920 + img0 = np.zeros((H, W, 3), dtype=np.uint8) +255 + img4, img3 = drawlines(img0.copy(), img0.copy(), lines0.reshape(-1, 3), pts1.reshape(-1, 3), pts0.reshape(-1,3)) + img5,img6 = drawlines(img0.copy(), img0.copy(), lines1.reshape(-1, 3), pts0.reshape(-1,3), pts1.reshape(-1,3)) + import matplotlib.pyplot as plt + plt.subplot(121) + plt.imshow(img5) + plt.subplot(122) + plt.imshow(img4) + plt.show() + lines0 = lines0.reshape(pts0.shape) + lines1 = lines1.reshape(pts1.shape) + # dist: (D_v0, D_v1, nJoints) + dist01 = np.abs(np.sum(lines0[:, None, :, :2] * pts1[None, :, :, :2], axis=-1) + lines0[:, None, :, 2]) + conf = pts0[:, None, :, 2] * pts1[None, :, :, 2] + dist10 = np.abs(np.sum(lines1[:, None, :, :2] * pts0[None, :, :, :2], axis=-1) + lines1[:, None, :, 2]) + + dist = np.sum(dist01 * conf + dist10.transpose(1, 0, 2) * conf, axis=-1)/(conf.sum(axis=-1) + 1e-5)/2 + return dist + + def _simple_associate2d_triangulate(self, data, affinity, dimGroups, prev_id): + # sum1 = affinity.sum(axis=1) + # 注意:这里的排序应该是对每个视角,挑选最大的一个 + sum1 = np.zeros((affinity.shape[0])) + for i in range(len(dimGroups)-1): + start, end = dimGroups[i], dimGroups[i+1] + if end == start:continue + sum1 += affinity[:, start:end].max(axis=-1) + n2d = affinity.shape[0] + nViews = len(dimGroups) - 1 + idx_zero = np.zeros(nViews, dtype=np.int) - 1 + views = views_from_dimGroups(dimGroups) + # the assigned results of each person + p2dAssigned = np.zeros(n2d, dtype=np.int) - 1 + visited = np.zeros(n2d, dtype=np.int) + sortidx = np.argsort(-sum1) + pid = 0 + k3dresults = [] + for idx in sortidx: + if p2dAssigned[idx] != -1: + continue + if prev_id[idx] != -1: + results = [self.people[prev_id[idx]]] + else: + results = [] + proposals = check_cluster(affinity, row=idx, views=views, + dimGroups=dimGroups, indices=idx_zero.copy(), p2dAssigned=p2dAssigned, visited=visited) + for indices in proposals: + if (indices > -1).sum() < self.min_views - (len(results)): + continue + # set keypoints2d + info = {'RT': data['RT']} + for name in ['keypoints2d', 'keypoints2d_unproj', 'keypoints2d_distort']: + info[name] = np.zeros((nViews, self.nJoints, 3), dtype=np.float32) + for nv in range(nViews): + if indices[nv] == -1: continue + for name in ['keypoints2d', 'keypoints2d_unproj', 'keypoints2d_distort']: + info[name][nv] = data[name][nv][indices[nv]-dimGroups[nv]] + + res = super().__call__(info, results=results)[0] + + k2d = res['keypoints2d'] + valid_view = (k2d[..., 2] > 0).sum(axis=-1) > self.min_joints + # if valid_view.sum() < self.min_views - len(results): # 这里如果是有前一帧的话,len(results)会是2;不知道之前为啥有这个条件使用 + if valid_view.sum() < self.min_views: + self.log('[associate] Skip proposal {}->{} with not enough valid view {}'.format(idx, indices, (k2d[..., 2] > 0).sum(axis=-1))) + continue + valid_joint = res['keypoints3d'][:, -1] > 0.1 + if valid_joint.sum() < self.min_joints: + self.log('[associate] Skip proposal {}->{} as not enough joints'.format(idx, indices)) + continue + indices[~valid_view] = -1 + if (indices < 0).all(): + import ipdb; ipdb.set_trace() + self.log('[associate] Add indices {}, valid {}'.format(indices, (k2d[..., 2] > 0).sum(axis=-1))) + res['id'] = pid + res['indices'] = indices + res['valid_view'] = valid_view + res['valid_joints'] = res['keypoints3d'][:, -1] > 0.1 + k3dresults.append(res) + for nv in range(nViews): + if valid_view[nv] and indices[nv] != -1: + p2dAssigned[indices[nv]] = pid + visited[indices[nv]] = 1 + pid += 1 + break + visited[idx] = 1 + self.log('[associate] {} points not visited, {} not assigned'.format(visited.shape[0] - visited.sum(), (p2dAssigned==-1).sum())) + k3dresults.sort(key=lambda x: -x['keypoints2d'][..., -1].sum()) + return k3dresults + + def _calculate_affinity_MxM(self, dims, dimGroups, data, key): + M = dimGroups[-1] + distance = np.zeros((M, M), dtype=np.float32) + nViews = len(dims) + for v0 in range(nViews-1): + for v1 in range(1, nViews): + # calculate distance between (v0, v1) + if v0 >= v1: + continue + if dims[v0] == 0 or dims[v1] == 0: + continue + if True: + pts0 = data[key][v0] + pts1 = data[key][v1] + K0, K1 = data['K'][v0], data['K'][v1] + R0, T0 = data['Rc'][v0], data['Tc'][v0] + R1, T1 = data['Rc'][v1], data['Tc'][v1] + dist = self.distance_by_epipolar(pts0, pts1, K0, K1, R0, T0, R1, T1) + dist /= (K0[0, 0] + K1[0, 0])/2 + else: + dist = self.distance_by_ray(pts0, pts1, R0, T0, R1, T1) + distance[dimGroups[v0]:dimGroups[v0+1], dimGroups[v1]:dimGroups[v1+1]] = dist + distance[dimGroups[v1]:dimGroups[v1+1], dimGroups[v0]:dimGroups[v0+1]] = dist.T + DIST_MAX = self.cfg_track.track_dist_max + for nv in range(nViews): + distance[dimGroups[nv]:dimGroups[nv+1], dimGroups[nv]:dimGroups[nv+1]] = DIST_MAX + distance -= np.eye(M) * DIST_MAX + aff = (DIST_MAX - distance)/DIST_MAX + aff = np.clip(aff, 0, 1) + return aff + + def _calculate_affinity_MxN(self, dims, dimGroups, data, key, results): + M = dimGroups[-1] + N = len(results) + distance = np.zeros((M, N), dtype=np.float32) + nViews = len(dims) + k3d = np.stack([r['keypoints3d'] for r in results]) + kpts_proj = project_points(k3d, data['KRT'], einsum='vab,pkb->vpka') + depth = kpts_proj[..., -1] + kpts_proj[depth<0] = -10000 + for v in range(nViews): + if dims[v] == 0: + continue + focal = data['K'][v][0, 0] + pts2d = data[key][v][:, None] + pts_repro = kpts_proj[v][None] + conf = np.sqrt(pts2d[..., -1]*k3d[None, ..., -1]) + diff = np.linalg.norm(pts2d[..., :2] - pts_repro[..., :2], axis=-1) + diff = np.sum(diff*conf, axis=-1)/(1e-5 + np.sum(conf, axis=-1)) + dist = diff / focal + distance[dimGroups[v]:dimGroups[v+1], :] = dist + DIST_MAX = self.cfg_track.track_repro_max + aff = (DIST_MAX - distance)/DIST_MAX + aff = np.clip(aff, 0, 1) + return aff + + def _svt_optimize_affinity(self, affinity, dimGroups): + # match SVT + import pymatchlr + observe = np.ones_like(affinity) + aff_svt = pymatchlr.matchSVT(affinity, dimGroups, SimpleConstrain(dimGroups), observe, self.cfg_svt) + aff_svt[aff_svt0.01).sum())) + self.tracks[pid] = { + 'start_time': self.time, + 'end_time': self.time+1, + 'missing_frame': [], + 'infos': [res] + } + + def _track_update(self, res, pid): + res['id'] = pid + info = self.tracks[pid] + self.log('[{:06d}] Update track {} [{}->{}], valid joints={}'.format(self.time, pid, info['start_time'], info['end_time'], (res['keypoints3d'][:, -1]>0.1).sum())) + self.tracks[pid]['end_time'] = self.time + 1 + self.tracks[pid]['infos'].append(res) + + def _track_merge(self, res, pid): + res['id'] = -1 + # TODO: merge + + def _track_and_update(self, data, results): + cfg = self.cfg_track + self.time += 1 + if self.time == 0: + # initialize the tracks + for res in results: + self._track_add(res) + return results + # filter the missing frames + for pid in list(self.tracks.keys()): + if self.time - self.tracks[pid]['end_time'] > cfg.max_missing_frame: + self.warn('[{:06d}] Remove track {}'.format(self.time, pid)) + self.tracks.pop(pid) + # track the results with greedy matching + for idx_match, res in enumerate(results): + res['id'] = -1 + # compute the distance + k3d = res['keypoints3d'][None] + pids_free = [pid for pid in self.tracks.keys() if self.tracks[pid]['end_time'] != self.time+1] + pids_used = [pid for pid in self.tracks.keys() if self.tracks[pid]['end_time'] == self.time+1] + def check_dist(k3d_check): + dist = np.linalg.norm(k3d[..., :3] - k3d_check[..., :3], axis=-1) + conf = np.sqrt(k3d[..., 3] * k3d_check[..., 3]) + dist_mean = ((conf>0.1).sum(axis=-1) < self.min_joints)*cfg.track_dist_max + np.sum(dist * conf, axis=-1)/(1e-5 + np.sum(conf, axis=-1)) + argmin = dist_mean.argmin() + dist_min = dist_mean[argmin] + return dist_mean, argmin, dist_min + # check free + NOT_VISITED = -2 + NOT_FOUND = -1 + flag_tracked, flag_current = NOT_VISITED, NOT_VISITED + if len(pids_free) > 0: + k3d_check = np.stack([self.tracks[pid]['infos'][-1]['keypoints3d'] for pid in pids_free]) + dist_track, best, best_dist_track = check_dist(k3d_check) + if best_dist_track < cfg.track_dist_max: + flag_tracked = best + else: + flag_tracked = NOT_FOUND + # check used + if len(pids_used) > 0: + k3d_check = np.stack([self.tracks[pid]['infos'][-1]['keypoints3d'] for pid in pids_used]) + dist_cur, best, best_dist_curr = check_dist(k3d_check) + if best_dist_curr < cfg.track_dist_max: + flag_current = best + else: + flag_current = NOT_FOUND + if flag_tracked >= 0 and (flag_current == NOT_VISITED or flag_current == NOT_FOUND): + self._track_update(res, pids_free[flag_tracked]) + elif (flag_tracked == NOT_FOUND or flag_tracked==NOT_VISITED) and flag_current >= 0: + # 没有跟踪到,但是有当前帧的3D的,合并 + self.log('[{:06d}] Merge track {} to {}'.format(self.time, idx_match, pids_used[flag_current])) + self._track_merge(res, pids_used[flag_current]) + elif flag_tracked == NOT_FOUND and flag_current == NOT_FOUND: + # create a new track + self._track_add(res) + else: + # 丢弃 + self.log('[{:06d}] Remove track {}. No close points'.format(self.time, idx_match)) + + for pid in list(self.tracks.keys()): + if self.tracks[pid]['end_time'] != self.time + 1: + self.warn('[{:06d}] Tracking {} missing'.format(self.time, pid)) + results = [r for r in results if r['id']!=-1] + return results + + def __call__(self, data): + # match the data + self.data = data + key = 'keypoints2d' + dims = [d.shape[0] for d in data[key]] + dimGroups = np.cumsum([0] + dims) + # 1. compute affinity + affinity = self._calculate_affinity_MxM(dims, dimGroups, data, key) + N2D = affinity.shape[0] + if self.people is not None and len(self.people) > 0: + # add 3d affinity + _affinity = affinity + affinity_3d = self._calculate_affinity_MxN(dims, dimGroups, data, key, self.people) + affinity = np.concatenate([affinity, affinity_3d], axis=1) + eye3d = np.eye(affinity_3d.shape[1]) + affinity = np.concatenate([affinity, np.hstack((affinity_3d.T, eye3d))], axis=0) + dimGroups = dimGroups.tolist() + dimGroups.append(dimGroups[-1]+affinity_3d.shape[1]) + affinity = self._svt_optimize_affinity(affinity, dimGroups) + # affinity = self._svt_optimize_affinity(_affinity, dimGroups[:-1]) + # recover + affinity_3d = np.hstack([np.ones((N2D, 1))*0.5, affinity[:N2D, N2D:]]) + prev_id = affinity_3d.argmax(axis=-1) - 1 + affinity = affinity[:N2D, :N2D] + dimGroups = np.array(dimGroups[:-1]) + else: + affinity = self._svt_optimize_affinity(affinity, dimGroups) + prev_id = np.zeros(N2D) - 1 + # 2. associate and triangulate + results = self._simple_associate2d_triangulate(data, affinity, dimGroups, prev_id) + # 3. track, filter and return + results = self._track_and_update(data, results) + results.sort(key=lambda x:x['id']) + self.people = results + return results \ No newline at end of file diff --git a/easymocap/mytools/vis_base.py b/easymocap/mytools/vis_base.py index 325548c..490c63a 100644 --- a/easymocap/mytools/vis_base.py +++ b/easymocap/mytools/vis_base.py @@ -2,27 +2,38 @@ @ Date: 2020-11-28 17:23:04 @ Author: Qing Shuai @ LastEditors: Qing Shuai - @ LastEditTime: 2021-08-22 16:11:25 - @ FilePath: /EasyMocap/easymocap/mytools/vis_base.py + @ LastEditTime: 2022-08-12 21:50:56 + @ FilePath: /EasyMocapPublic/easymocap/mytools/vis_base.py ''' import cv2 import numpy as np import json -def generate_colorbar(N = 20, cmap = 'jet'): +def generate_colorbar(N = 20, cmap = 'jet', rand=True): bar = ((np.arange(N)/(N-1))*255).astype(np.uint8).reshape(-1, 1) colorbar = cv2.applyColorMap(bar, cv2.COLORMAP_JET).squeeze() if False: colorbar = np.clip(colorbar + 64, 0, 255) - import random - random.seed(666) - index = [i for i in range(N)] - random.shuffle(index) - rgb = colorbar[index, :] + if rand: + import random + random.seed(666) + index = [i for i in range(N)] + random.shuffle(index) + rgb = colorbar[index, :] + else: + rgb = colorbar rgb = rgb.tolist() return rgb -colors_bar_rgb = generate_colorbar(cmap='hsv') +# colors_bar_rgb = generate_colorbar(cmap='hsv') +colors_bar_rgb = [ + (94, 124, 226), # 青色 + (255, 200, 87), # yellow + (74, 189, 172), # green + (8, 76, 97), # blue + (219, 58, 52), # red + (77, 40, 49), # brown +] colors_table = { 'b': [0.65098039, 0.74117647, 0.85882353], @@ -34,15 +45,19 @@ colors_table = { 'r': [ 251/255., 128/255., 114/255.], '_orange': [ 253/255., 174/255., 97/255.], 'y': [ 250/255., 230/255., 154/255.], - '_r':[255/255,0,0], 'g':[0,255/255,0], - '_b':[0,0,255/255], 'k':[0,0,0], + '_r':[255/255,0,0], + '_g':[0,255/255,0], + '_b':[0,0,255/255], + '_k':[0,0,0], '_y':[255/255,255/255,0], 'purple':[128/255,0,128/255], 'smap_b':[51/255,153/255,255/255], 'smap_r':[255/255,51/255,153/255], - 'smap_b':[51/255,255/255,153/255], + 'person': [255/255,255/255,255/255], + 'handl': [255/255,51/255,153/255], + 'handr': [51/255,255/255,153/255], } def get_rgb(index): @@ -51,7 +66,9 @@ def get_rgb(index): return (255, 255, 255) if index < -1: return (0, 0, 0) - col = colors_bar_rgb[index%len(colors_bar_rgb)] + # elif index == 0: + # return (245, 150, 150) + col = list(colors_bar_rgb[index%len(colors_bar_rgb)])[::-1] else: col = colors_table.get(index, (1, 0, 0)) col = tuple([int(c*255) for c in col[::-1]]) @@ -80,13 +97,14 @@ def plot_cross(img, x, y, col, width=-1, lw=-1): cv2.line(img, (int(x-width), int(y)), (int(x+width), int(y)), col, lw) cv2.line(img, (int(x), int(y-width)), (int(x), int(y+width)), col, lw) -def plot_bbox(img, bbox, pid, vis_id=True): +def plot_bbox(img, bbox, pid, scale=1, vis_id=True): # 画bbox: (l, t, r, b) - x1, y1, x2, y2 = bbox[:4] - x1 = int(round(x1)) - x2 = int(round(x2)) - y1 = int(round(y1)) - y2 = int(round(y2)) + x1, y1, x2, y2, c = bbox + if c < 0.01:return img + x1 = int(round(x1*scale)) + x2 = int(round(x2*scale)) + y1 = int(round(y1*scale)) + y2 = int(round(y2*scale)) color = get_rgb(pid) lw = max(img.shape[0]//300, 2) cv2.rectangle(img, (x1, y1), (x2, y2), color, lw) @@ -94,11 +112,20 @@ def plot_bbox(img, bbox, pid, vis_id=True): font_scale = img.shape[0]/1000 cv2.putText(img, '{}'.format(pid), (x1, y1+int(25*font_scale)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2) -def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2): +def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2, fliplr=False): + lw = max(lw, 2) + H, W = img.shape[:2] for ii, (i, j) in enumerate(config['kintree']): if i >= len(points) or j >= len(points): continue + if (i >25 or j > 25) and config['nJoints'] != 42: + _lw = max(int(lw/4), 1) + else: + _lw = lw pt1, pt2 = points[i], points[j] + if fliplr: + pt1 = (W-pt1[0], pt1[1]) + pt2 = (W-pt2[0], pt2[1]) if use_limb_color: col = get_rgb(config['colors'][ii]) else: @@ -106,32 +133,111 @@ def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True if pt1[-1] > 0.01 and pt2[-1] > 0.01: image = cv2.line( img, (int(pt1[0]+0.5), int(pt1[1]+0.5)), (int(pt2[0]+0.5), int(pt2[1]+0.5)), - col, lw) - for i in range(len(points)): + col, _lw) + for i in range(min(len(points), config['nJoints'])): x, y = points[i][0], points[i][1] + if fliplr: + x = W - x + c = points[i][-1] + if c > 0.01: + text_size = img.shape[0]/1000 + col = get_rgb(pid) + radius = int(lw/1.5) + if i > 25 and config['nJoints'] != 42: + radius = max(int(radius/4), 1) + cv2.circle(img, (int(x+0.5), int(y+0.5)), radius, col, -1) + if vis_conf: + cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), + cv2.FONT_HERSHEY_SIMPLEX, text_size, col, 2) + +def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, scale=1, lw=-1): + from ..dataset.config import CONFIG + config_name = {25: 'body25', 21: 'hand', 42:'handlr', 17: 'coco', 1:'points', 67:'bodyhand', 137: 'total', 79:'up'}[len(points)] + config = CONFIG[config_name] + if lw == -1: + lw = img.shape[0]//200 + if config_name == 'hand': + lw = img.shape[0]//1000 + lw = max(lw, 1) + for ii, (i, j) in enumerate(config['kintree']): + if i >= len(points) or j >= len(points): + continue + if i >= 25 and config_name in ['bodyhand', 'total']: + lw = max(img.shape[0]//400, 1) + pt1, pt2 = points[i], points[j] + if use_limb_color: + col = get_rgb(config['colors'][ii]) + else: + col = get_rgb(pid) + if pt1[0] < 0 or pt1[1] < 0 or pt1[0] > 10000 or pt1[1] > 10000: + continue + if pt2[0] < 0 or pt2[1] < 0 or pt2[0] > 10000 or pt2[1] > 10000: + continue + if pt1[-1] > 0.01 and pt2[-1] > 0.01: + image = cv2.line( + img, (int(pt1[0]*scale+0.5), int(pt1[1]*scale+0.5)), (int(pt2[0]*scale+0.5), int(pt2[1]*scale+0.5)), + col, lw) + lw = img.shape[0]//200 + if config_name == 'hand': + lw = img.shape[0]//500 + lw = max(lw, 1) + for i in range(len(points)): + x, y = points[i][0]*scale, points[i][1]*scale + if x < 0 or y < 0 or x >10000 or y >10000: + continue + if i >= 25 and config_name in ['bodyhand', 'total']: + lw = max(img.shape[0]//400, 1) c = points[i][-1] if c > 0.01: col = get_rgb(pid) - cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1) + if len(points) == 1: + cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*10, col, lw*2) + plot_cross(img, int(x+0.5), int(y+0.5), width=lw*5, col=col, lw=lw*2) + else: + cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1) if vis_conf: cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, col, 2) -def plot_points2d(img, points2d, lines, lw=4, col=(0, 255, 0), putText=True): +def plot_keypoints_total(img, annots, scale, pid_offset=0): + _lw = img.shape[0] // 150 + for annot in annots: + pid = annot['personID'] + pid_offset + for key in ['keypoints', 'handl2d', 'handr2d']: + if key not in annot.keys():continue + if key in ['handl2d', 'handr2d', 'face2d']: + lw = _lw // 2 + else: + lw = _lw + lw = max(lw, 1) + plot_keypoints_auto(img, annot[key], pid, vis_conf=False, use_limb_color=False, scale=scale, lw=lw) + if 'bbox' not in annot.keys() or (annot['bbox'][0] < 0 or annot['bbox'][0] >10000): + continue + plot_bbox(img, annot['bbox'], pid, scale=scale, vis_id=True) + return img + +def plot_points2d(img, points2d, lines, lw=-1, col=(0, 255, 0), putText=True, style='+'): # 将2d点画上去 if points2d.shape[1] == 2: points2d = np.hstack([points2d, np.ones((points2d.shape[0], 1))]) + if lw == -1: + lw = img.shape[0]//200 for i, (x, y, v) in enumerate(points2d): if v < 0.01: continue c = col - plot_cross(img, x, y, width=10, col=c, lw=lw) + if '+' in style: + plot_cross(img, x, y, width=10, col=c, lw=lw*2) + if 'o' in style: + cv2.circle(img, (int(x), int(y)), 10, c, lw*2) + cv2.circle(img, (int(x), int(y)), lw, c, lw) if putText: - font_scale = img.shape[0]/2000 + c = col[::-1] + font_scale = img.shape[0]/1000 cv2.putText(img, '{}'.format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, c, 2) for i, j in lines: if points2d[i][2] < 0.01 or points2d[j][2] < 0.01: continue - plot_line(img, points2d[i], points2d[j], 2, col) + plot_line(img, points2d[i], points2d[j], max(1, lw//2), col) row_col_ = { 2: (2, 1), @@ -140,7 +246,18 @@ row_col_ = { 9: (3, 3), 26: (4, 7) } -def get_row_col(l): + +row_col_square = { + 2: (2, 1), + 7: (3, 3), + 8: (3, 3), + 9: (3, 3), + 26: (5, 5) +} + +def get_row_col(l, square): + if square and l in row_col_square.keys(): + return row_col_square[l] if l in row_col_.keys(): return row_col_[l] else: @@ -153,12 +270,19 @@ def get_row_col(l): row, col = col, row return row, col -def merge(images, row=-1, col=-1, resize=False, ret_range=False, **kwargs): +def merge(images, row=-1, col=-1, resize=False, ret_range=False, square=False, **kwargs): if row == -1 and col == -1: - row, col = get_row_col(len(images)) + row, col = get_row_col(len(images), square) height = images[0].shape[0] width = images[0].shape[1] - ret_img = np.zeros((height * row, width * col, images[0].shape[2]), dtype=np.uint8) + 255 + # special case + if height > width: + if len(images) == 3: + row, col = 1, 3 + if len(images[0].shape) > 2: + ret_img = np.zeros((height * row, width * col, images[0].shape[2]), dtype=np.uint8) + 255 + else: + ret_img = np.zeros((height * row, width * col), dtype=np.uint8) + 255 ranges = [] for i in range(row): for j in range(col): diff --git a/scripts/dataset/download_youtube.py b/scripts/dataset/download_youtube.py new file mode 100644 index 0000000..bcf30aa --- /dev/null +++ b/scripts/dataset/download_youtube.py @@ -0,0 +1,104 @@ +''' + @ Date: 2022-03-29 13:55:42 + @ Author: Qing Shuai + @ Mail: s_q@zju.edu.cn + @ LastEditors: Qing Shuai + @ LastEditTime: 2022-05-06 16:45:47 + @ FilePath: /EasyMocapPublic/scripts/dataset/download_youtube.py +''' +from glob import glob +from os.path import join +from urllib.error import URLError +from pytube import YouTube +import os +from easymocap.mytools.debug_utils import log, mkdir, myerror + +extensions = ['.mp4', '.webm'] + +def download_youtube(vid, outdir): + outname = join(outdir, vid) + url = 'https://www.youtube.com/watch?v={}'.format(vid) + for ext in extensions: + if os.path.exists(outname+ext) and not args.restart: + log('[Info]: skip video {}'.format(outname+ext)) + return 0 + log('[Info]: start to download video {}'.format(outname)) + log('[Info]: {}'.format(url)) + yt = YouTube(url) + try: + streams = yt.streams + except KeyError: + myerror('[Error]: not found streams: {}'.format(url)) + return 1 + except URLError: + myerror('[Error]: Url error: {}'.format(url)) + return 1 + find = False + streams_valid = [] + res_range = ['2160p', '1440p', '1080p', '720p'] if not args.only4k else ['2160p'] + if args.no720: + res_range.remove('720p') + for res in res_range: + for fps in [60, 50, 30, 25, 24]: + for ext in ['webm', 'mp4']: + for stream in streams: + if stream.resolution == res and \ + stream.fps == fps and \ + stream.mime_type == 'video/{}'.format(ext): + streams_valid.append(stream) + if len(streams_valid) == 0: + for stream in streams: + print(stream) + myerror('[BUG ] Not found valid stream, please check the streams') + return 0 + # best_stream = yt.streams.order_by('filesize')[-1] + title = streams_valid[0].title + log('[Info]: {}'.format(title)) + for stream in streams_valid: + res = stream.resolution + log('[Info]: The resolution is {}, ext={}'.format(res, stream.mime_type)) + filename = '{}.{}'.format(vid, stream.mime_type.split('/')[-1]) + try: + stream.download(output_path=outdir, filename=filename, max_retries=0) + log('[Info]: Succeed') + except: + myerror('[BUG ]: Failed') + continue + break + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('vid', type=str) + parser.add_argument('--database', type=str, default='data/youtube') + parser.add_argument('--num', type=int, default=1) + parser.add_argument('--only4k', action='store_true') + parser.add_argument('--no720', action='store_true') + parser.add_argument('--restart', action='store_true') + parser.add_argument('--debug', action='store_true') + args = parser.parse_args() + + vid = args.vid + # check database + database = join(args.database, 'videos') + mkdir(database) + videonames = sorted(os.listdir(database)) + log('[download] video database in {}'.format(database)) + log('[download] already has {} videos'.format(len(videonames))) + + if vid.startswith('https'): + vid = vid.replace('https://www.youtube.com/watch?v=', '') + vid = vid.split('&')[0] + print(vid) + urls = [vid] + elif os.path.exists(vid): + with open(vid, 'r') as f: + urls = f.readlines() + urls = list(filter(lambda x:not x.startswith('#') and len(x) > 0, map(lambda x: x.strip().replace('https://www.youtube.com/watch?v=', '').split('&')[0], urls))) + log('[download] download {} videos from {}'.format(len(urls), vid)) + else: + urls = [vid] + + for url in urls: + download_youtube(url, database) \ No newline at end of file