🚧 update tools

1. update camera reader 2. update debug and visualize tools
2022-08-21 16:04:51 +08:00 · 2022-08-21 16:04:51 +08:00 · 5bc4b113ba
commit 5bc4b113ba
parent d534ba41fc
9 changed files with 2184 additions and 89 deletions
--- a/easymocap/mytools/camera_utils.py
+++ b/easymocap/mytools/camera_utils.py
@ -1,7 +1,7 @@
 import cv2
 import numpy as np
 import os
-
+from os.path import join
 class FileStorage(object):
    def __init__(self, filename, isWrite=False):
        version = cv2.__version__
@ -10,27 +10,34 @@ class FileStorage(object):
        if isWrite:
            os.makedirs(os.path.dirname(filename), exist_ok=True)
-            self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_WRITE)
+            self.fs = open(filename, 'w')
            self.fs.write('%YAML:1.0\r\n')
            self.fs.write('---\r\n')
        else:
            assert os.path.exists(filename), filename
            self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_READ)
        self.isWrite = isWrite
    def __del__(self):
-        cv2.FileStorage.release(self.fs)
+        if self.isWrite:
            self.fs.close()
        else:
            cv2.FileStorage.release(self.fs)
    def _write(self, out):
        self.fs.write(out+'\r\n')
    def write(self, key, value, dt='mat'):
        if dt == 'mat':
-            cv2.FileStorage.write(self.fs, key, value)
+            self._write('{}: !!opencv-matrix'.format(key))
            self._write('  rows: {}'.format(value.shape[0]))
            self._write('  cols: {}'.format(value.shape[1]))
            self._write('  dt: d')
            self._write('  data: [{}]'.format(', '.join(['{:.3f}'.format(i) for i in value.reshape(-1)])))
        elif dt == 'list':
-            if self.major_version == 4: # 4.4
+            self._write('{}:'.format(key))
-                self.fs.startWriteStruct(key, cv2.FileNode_SEQ)
+            for elem in value:
-                for elem in value:
+                self._write('  - "{}"'.format(elem))
                    self.fs.write('', elem)
                self.fs.endWriteStruct()
            else: # 3.4
                self.fs.write(key, '[')
                for elem in value:
                    self.fs.write('none', elem)
                self.fs.write('none', ']')
    def read(self, key, dt='mat'):
        if dt == 'mat':
@ -66,6 +73,8 @@ def read_intri(intri_name):
    return cameras
 def write_intri(intri_name, cameras):
    if not os.path.exists(os.path.dirname(intri_name)):
        os.makedirs(os.path.dirname(intri_name))
    intri = FileStorage(intri_name, True)
    results = {}
    camnames = list(cameras.keys())
@ -74,11 +83,13 @@ def write_intri(intri_name, cameras):
        key = key_.split('.')[0]
        K, dist = val['K'], val['dist']
        assert K.shape == (3, 3), K.shape
-        assert dist.shape == (1, 5) or dist.shape == (5, 1), dist.shape
+        assert dist.shape == (1, 5) or dist.shape == (5, 1) or dist.shape == (1, 4) or dist.shape == (4, 1), dist.shape
        intri.write('K_{}'.format(key), K)
-        intri.write('dist_{}'.format(key), dist.reshape(1, 5))
+        intri.write('dist_{}'.format(key), dist.flatten()[None])
 def write_extri(extri_name, cameras):
    if not os.path.exists(os.path.dirname(extri_name)):
        os.makedirs(os.path.dirname(extri_name))
    extri = FileStorage(extri_name, True)
    results = {}
    camnames = list(cameras.keys())
@ -105,12 +116,15 @@ def read_camera(intri_name, extri_name, cam_names=[]):
        cams[cam]['invK'] = np.linalg.inv(cams[cam]['K'])
        Rvec = extri.read('R_{}'.format(cam))
        Tvec = extri.read('T_{}'.format(cam))
        assert Rvec is not None, cam
        R = cv2.Rodrigues(Rvec)[0]
        RT = np.hstack((R, Tvec))
        cams[cam]['RT'] = RT
        cams[cam]['R'] = R
        cams[cam]['Rvec'] = Rvec
        cams[cam]['T'] = Tvec
        cams[cam]['center'] = - Rvec.T @ Tvec
        P[cam] = cams[cam]['K'] @ cams[cam]['RT']
        cams[cam]['P'] = P[cam]
@ -118,6 +132,13 @@ def read_camera(intri_name, extri_name, cam_names=[]):
    cams['basenames'] = cam_names
    return cams
 def read_cameras(path, intri='intri.yml', extri='extri.yml', subs=[]):
    cameras = read_camera(join(path, intri), join(path, extri))
    cameras.pop('basenames')
    if len(subs) > 0:
        cameras = {key:cameras[key].astype(np.float32) for key in subs}
    return cameras
 def write_camera(camera, path):
    from os.path import join
    intri_name = join(path, 'intri.yml')
@ -146,12 +167,24 @@ def camera_from_img(img):
    focal = 1.2*min(height, width) # as colmap
    K = np.array([focal, 0., width/2, 0., focal, height/2, 0. ,0., 1.]).reshape(3, 3)
    camera = {'K':K ,'R': np.eye(3), 'T': np.zeros((3, 1)), 'dist': np.zeros((1, 5))}
    camera['invK'] = np.linalg.inv(camera['K'])
    camera['P'] = camera['K'] @ np.hstack((camera['R'], camera['T']))
    return camera
 class Undistort:
-    @staticmethod
+    distortMap = {}
-    def image(frame, K, dist):
+    @classmethod
-        return cv2.undistort(frame, K, dist, None)
+    def image(cls, frame, K, dist, sub=None):
        if sub is None:
            return cv2.undistort(frame, K, dist, None)
        else:
            if sub not in cls.distortMap.keys():
                h,  w = frame.shape[:2]
                mapx, mapy = cv2.initUndistortRectifyMap(K, dist, None, K, (w,h), 5)
                cls.distortMap[sub] = (mapx, mapy)
            mapx, mapy = cls.distortMap[sub]
            img = cv2.remap(frame, mapx, mapy, cv2.INTER_NEAREST)
            return img
    @staticmethod
    def points(keypoints, K, dist):
@ -170,10 +203,38 @@ class Undistort:
        bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]])
        return bbox
-def undistort(camera, frame=None, keypoints=None, output=None, bbox=None):
+def unproj(kpts, invK):
-    # bbox: 1, 7
+    homo = np.hstack([kpts[:, :2], np.ones_like(kpts[:, :1])])
-    print('This function is deprecated')
+    homo = homo @ invK.T
-    raise NotImplementedError
+    return np.hstack([homo[:, :2], kpts[:, 2:]])
 class UndistortFisheye:
    @staticmethod
    def image(frame, K, dist):
        Knew = K.copy()
        frame = cv2.fisheye.undistortImage(frame, K, dist, Knew=Knew)
        return frame, Knew
    @staticmethod
    def points(keypoints, K, dist, Knew):
        # keypoints: (N, 3)
        assert len(keypoints.shape) == 2, keypoints.shape
        kpts = keypoints[:, None, :2]
        kpts = np.ascontiguousarray(kpts)
        kpts = cv2.fisheye.undistortPoints(kpts, K, dist, P=Knew)
        keypoints[:, :2] = kpts[:, 0]
        return keypoints
    @staticmethod
    def bbox(bbox, K, dist, Knew):
        keypoints = np.array([[bbox[0], bbox[1], 1], [bbox[2], bbox[3], 1]])
        kpts = UndistortFisheye.points(keypoints, K, dist, Knew)
        bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]])
        return bbox
 def get_Pall(cameras, camnames):
    Pall = np.stack([cameras[cam]['K'] @ np.hstack((cameras[cam]['R'], cameras[cam]['T'])) for cam in camnames])
    return Pall
 def get_fundamental_matrix(cameras, basenames):
    skew_op = lambda x: np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
@ -189,3 +250,59 @@ def get_fundamental_matrix(cameras, basenames):
            if F[(icam, jcam)].sum() == 0:
                F[(icam, jcam)] += 1e-12  # to avoid nan
    return F
 def interp_cameras(cameras, keys, step=20, loop=True, allstep=-1, **kwargs):
    from scipy.spatial.transform import Rotation as R
    from scipy.spatial.transform import Slerp
    if allstep != -1:
        tall = np.linspace(0., 1., allstep+1)[:-1].reshape(-1, 1, 1)
    elif allstep == -1 and loop:
        tall = np.linspace(0., 1., 1+step*len(keys))[:-1].reshape(-1, 1, 1)
    elif allstep == -1 and not loop:
        tall = np.linspace(0., 1., 1+step*(len(keys)-1))[:-1].reshape(-1, 1, 1)
    cameras_new = {}
    for ik in range(len(keys)):
        if ik == len(keys) -1 and not loop:
            break
        if loop:
            start, end = (ik * tall.shape[0])//len(keys),     int((ik+1)*tall.shape[0])//len(keys)
            print(ik, start, end, tall.shape)
        else:
            start, end = (ik * tall.shape[0])//(len(keys)-1), int((ik+1)*tall.shape[0])//(len(keys)-1)
        t = tall[start:end].copy()
        t = (t-t.min())/(t.max()-t.min())
        left, right = keys[ik], keys[0 if ik == len(keys)-1 else ik + 1]
        camera_left = cameras[left]
        camera_right = cameras[right]
        # 插值相机中心: center = - R.T @ T
        center_l = - camera_left['R'].T @ camera_left['T']
        center_r = - camera_right['R'].T @ camera_right['T']
        center_l, center_r = center_l[None], center_r[None]
        if False:
            centers = center_l * (1-t) + center_r * t
        else:
            # 球面插值
            norm_l, norm_r = np.linalg.norm(center_l), np.linalg.norm(center_r)
            center_l, center_r = center_l/norm_l, center_r/norm_r
            costheta = (center_l*center_r).sum()
            sintheta = np.sqrt(1. - costheta**2)
            theta = np.arctan2(sintheta, costheta)
            centers = (np.sin(theta*(1-t)) * center_l + np.sin(theta * t) * center_r)/sintheta
            norm = norm_l * (1-t) + norm_r * t
            centers = centers * norm
        key_rots = R.from_matrix(np.stack([camera_left['R'], camera_right['R']]))
        key_times = [0, 1]
        slerp = Slerp(key_times, key_rots)
        interp_rots = slerp(t.squeeze()).as_matrix()
        # 计算相机T RX + T = 0 => T = - R @ X
        T = - np.einsum('bmn,bno->bmo', interp_rots, centers)
        K = camera_left['K'] * (1-t) + camera_right['K'] * t
        for i in range(T.shape[0]):
            cameras_new['{}-{}-{}'.format(left, right, i)] = \
                {
                    'K': K[i],
                    'dist': np.zeros((1, 5)),
                    'R': interp_rots[i],
                    'T': T[i]
                }
    return cameras_new
--- a/easymocap/mytools/colmap_structure.py
+++ b/easymocap/mytools/colmap_structure.py
@ -0,0 +1,439 @@
 # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 #     * Redistributions of source code must retain the above copyright
 #       notice, this list of conditions and the following disclaimer.
 #
 #     * Redistributions in binary form must reproduce the above copyright
 #       notice, this list of conditions and the following disclaimer in the
 #       documentation and/or other materials provided with the distribution.
 #
 #     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
 #       its contributors may be used to endorse or promote products derived
 #       from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 #
 # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
 import os
 import sys
 import collections
 import numpy as np
 import struct
 import cv2
 CameraModel = collections.namedtuple(
    "CameraModel", ["model_id", "model_name", "num_params"])
 Camera = collections.namedtuple(
    "Camera", ["id", "model", "width", "height", "params"])
 BaseImage = collections.namedtuple(
    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
 Point3D = collections.namedtuple(
    "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
 class Image(BaseImage):
    def qvec2rotmat(self):
        return qvec2rotmat(self.qvec)
 CAMERA_MODELS = {
    CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
    CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
    CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
    CameraModel(model_id=3, model_name="RADIAL", num_params=5),
    CameraModel(model_id=4, model_name="OPENCV", num_params=8),
    CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
    CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
    CameraModel(model_id=7, model_name="FOV", num_params=5),
    CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
    CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
    CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
 }
 CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \
                         for camera_model in CAMERA_MODELS])
 CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
                           for camera_model in CAMERA_MODELS])
 def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
    """Read and unpack the next bytes from a binary file.
    :param fid:
    :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
    :param endian_character: Any of {@, =, <, >, !}
    :return: Tuple of read and unpacked values.
    """
    data = fid.read(num_bytes)
    return struct.unpack(endian_character + format_char_sequence, data)
 def read_cameras_text(path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::WriteCamerasText(const std::string& path)
        void Reconstruction::ReadCamerasText(const std::string& path)
    """
    cameras = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()
            if not line:
                break
            line = line.strip()
            if len(line) > 0 and line[0] != "#":
                elems = line.split()
                camera_id = int(elems[0])
                model = elems[1]
                width = int(elems[2])
                height = int(elems[3])
                params = np.array(tuple(map(float, elems[4:])))
                cameras[camera_id] = Camera(id=camera_id, model=model,
                                            width=width, height=height,
                                            params=params)
    return cameras
 def read_cameras_binary(path_to_model_file):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::WriteCamerasBinary(const std::string& path)
        void Reconstruction::ReadCamerasBinary(const std::string& path)
    """
    cameras = {}
    with open(path_to_model_file, "rb") as fid:
        num_cameras = read_next_bytes(fid, 8, "Q")[0]
        for camera_line_index in range(num_cameras):
            camera_properties = read_next_bytes(
                fid, num_bytes=24, format_char_sequence="iiQQ")
            camera_id = camera_properties[0]
            model_id = camera_properties[1]
            model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
            width = camera_properties[2]
            height = camera_properties[3]
            num_params = CAMERA_MODEL_IDS[model_id].num_params
            params = read_next_bytes(fid, num_bytes=8*num_params,
                                     format_char_sequence="d"*num_params)
            cameras[camera_id] = Camera(id=camera_id,
                                        model=model_name,
                                        width=width,
                                        height=height,
                                        params=np.array(params))
        assert len(cameras) == num_cameras
    return cameras
 def read_images_text(path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadImagesText(const std::string& path)
        void Reconstruction::WriteImagesText(const std::string& path)
    """
    images = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()
            if not line:
                break
            line = line.strip()
            if len(line) > 0 and line[0] != "#":
                elems = line.split()
                image_id = int(elems[0])
                qvec = np.array(tuple(map(float, elems[1:5])))
                tvec = np.array(tuple(map(float, elems[5:8])))
                camera_id = int(elems[8])
                image_name = elems[9]
                elems = fid.readline().split()
                xys = np.column_stack([tuple(map(float, elems[0::3])),
                                       tuple(map(float, elems[1::3]))])
                point3D_ids = np.array(tuple(map(int, elems[2::3])))
                images[image_id] = Image(
                    id=image_id, qvec=qvec, tvec=tvec,
                    camera_id=camera_id, name=image_name,
                    xys=xys, point3D_ids=point3D_ids)
    return images
 def read_images_binary(path_to_model_file):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadImagesBinary(const std::string& path)
        void Reconstruction::WriteImagesBinary(const std::string& path)
    """
    images = {}
    with open(path_to_model_file, "rb") as fid:
        num_reg_images = read_next_bytes(fid, 8, "Q")[0]
        for image_index in range(num_reg_images):
            binary_image_properties = read_next_bytes(
                fid, num_bytes=64, format_char_sequence="idddddddi")
            image_id = binary_image_properties[0]
            qvec = np.array(binary_image_properties[1:5])
            tvec = np.array(binary_image_properties[5:8])
            camera_id = binary_image_properties[8]
            image_name = ""
            current_char = read_next_bytes(fid, 1, "c")[0]
            while current_char != b"\x00":   # look for the ASCII 0 entry
                image_name += current_char.decode("utf-8")
                current_char = read_next_bytes(fid, 1, "c")[0]
            num_points2D = read_next_bytes(fid, num_bytes=8,
                                           format_char_sequence="Q")[0]
            x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
                                       format_char_sequence="ddq"*num_points2D)
            xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
                                   tuple(map(float, x_y_id_s[1::3]))])
            point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
            images[image_id] = Image(
                id=image_id, qvec=qvec, tvec=tvec,
                camera_id=camera_id, name=image_name,
                xys=xys, point3D_ids=point3D_ids)
    return images
 def read_points3D_text(path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadPoints3DText(const std::string& path)
        void Reconstruction::WritePoints3DText(const std::string& path)
    """
    points3D = {}
    with open(path, "r") as fid:
        while True:
            line = fid.readline()
            if not line:
                break
            line = line.strip()
            if len(line) > 0 and line[0] != "#":
                elems = line.split()
                point3D_id = int(elems[0])
                xyz = np.array(tuple(map(float, elems[1:4])))
                rgb = np.array(tuple(map(int, elems[4:7])))
                error = float(elems[7])
                image_ids = np.array(tuple(map(int, elems[8::2])))
                point2D_idxs = np.array(tuple(map(int, elems[9::2])))
                points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
                                               error=error, image_ids=image_ids,
                                               point2D_idxs=point2D_idxs)
    return points3D
 def read_points3d_binary(path_to_model_file):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadPoints3DBinary(const std::string& path)
        void Reconstruction::WritePoints3DBinary(const std::string& path)
    """
    points3D = {}
    with open(path_to_model_file, "rb") as fid:
        num_points = read_next_bytes(fid, 8, "Q")[0]
        for point_line_index in range(num_points):
            binary_point_line_properties = read_next_bytes(
                fid, num_bytes=43, format_char_sequence="QdddBBBd")
            point3D_id = binary_point_line_properties[0]
            xyz = np.array(binary_point_line_properties[1:4])
            rgb = np.array(binary_point_line_properties[4:7])
            error = np.array(binary_point_line_properties[7])
            track_length = read_next_bytes(
                fid, num_bytes=8, format_char_sequence="Q")[0]
            track_elems = read_next_bytes(
                fid, num_bytes=8*track_length,
                format_char_sequence="ii"*track_length)
            image_ids = np.array(tuple(map(int, track_elems[0::2])))
            point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
            points3D[point3D_id] = Point3D(
                id=point3D_id, xyz=xyz, rgb=rgb,
                error=error, image_ids=image_ids,
                point2D_idxs=point2D_idxs)
    return points3D
 def read_model(path, ext):
    if ext == ".txt":
        cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
        images = read_images_text(os.path.join(path, "images" + ext))
        points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
    else:
        cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
        images = read_images_binary(os.path.join(path, "images" + ext))
        points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
    return cameras, images, points3D
 def qvec2rotmat(qvec):
    return np.array([
        [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
         2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
         2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
        [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
         1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
         2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
        [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
         2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
         1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
 def rotmat2qvec(R):
    Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
    K = np.array([
        [Rxx - Ryy - Rzz, 0, 0, 0],
        [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
        [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
        [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
    eigvals, eigvecs = np.linalg.eigh(K)
    qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
    if qvec[0] < 0:
        qvec *= -1
    return qvec
 def write_cameras_text(cameras, path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::WriteCamerasText(const std::string& path)
        void Reconstruction::ReadCamerasText(const std::string& path)
    """
    HEADER = '# Camera list with one line of data per camera:\n'
    '#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n'
    '# Number of cameras: {}\n'.format(len(cameras))
    with open(path, "w") as fid:
        fid.write(HEADER)
        for _, cam in cameras.items():
            to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
            line = " ".join([str(elem) for elem in to_write])
            fid.write(line + "\n")
 def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
    """pack and write to a binary file.
    :param fid:
    :param data: data to send, if multiple elements are sent at the same time,
    they should be encapsuled either in a list or a tuple
    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
    should be the same length as the data list or tuple
    :param endian_character: Any of {@, =, <, >, !}
    """
    if isinstance(data, (list, tuple)):
        bytes = struct.pack(endian_character + format_char_sequence, *data)
    else:
        bytes = struct.pack(endian_character + format_char_sequence, data)
    fid.write(bytes)
 def write_cameras_binary(cameras, path_to_model_file):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::WriteCamerasBinary(const std::string& path)
        void Reconstruction::ReadCamerasBinary(const std::string& path)
    """
    with open(path_to_model_file, "wb") as fid:
        write_next_bytes(fid, len(cameras), "Q")
        for _, cam in cameras.items():
            model_id = CAMERA_MODEL_NAMES[cam.model].model_id
            camera_properties = [cam.id,
                                 model_id,
                                 cam.width,
                                 cam.height]
            write_next_bytes(fid, camera_properties, "iiQQ")
            for p in cam.params:
                write_next_bytes(fid, float(p), "d")
    return cameras
 def write_images_binary(images, path_to_model_file):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadImagesBinary(const std::string& path)
        void Reconstruction::WriteImagesBinary(const std::string& path)
    """
    with open(path_to_model_file, "wb") as fid:
        write_next_bytes(fid, len(images), "Q")
        for _, img in images.items():
            write_next_bytes(fid, img.id, "i")
            write_next_bytes(fid, img.qvec.tolist(), "dddd")
            write_next_bytes(fid, img.tvec.tolist(), "ddd")
            write_next_bytes(fid, img.camera_id, "i")
            for char in img.name:
                write_next_bytes(fid, char.encode("utf-8"), "c")
            write_next_bytes(fid, b"\x00", "c")
            write_next_bytes(fid, len(img.point3D_ids), "Q")
            for xy, p3d_id in zip(img.xys, img.point3D_ids):
                write_next_bytes(fid, [*xy, p3d_id], "ddq")
 def write_images_text(images, path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadImagesText(const std::string& path)
        void Reconstruction::WriteImagesText(const std::string& path)
    """
    if len(images) == 0:
        mean_observations = 0
    else:
        mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
    HEADER = '# Image list with two lines of data per image:\n'
    '#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n'
    '#   POINTS2D[] as (X, Y, POINT3D_ID)\n'
    '# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations)
    with open(path, "w") as fid:
        fid.write(HEADER)
        for _, img in images.items():
            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
            first_line = " ".join(map(str, image_header))
            fid.write(first_line + "\n")
            points_strings = []
            for xy, point3D_id in zip(img.xys, img.point3D_ids):
                points_strings.append(" ".join(map(str, [*xy, point3D_id])))
            fid.write(" ".join(points_strings) + "\n")
 def write_points3D_text(points3D, path):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadPoints3DText(const std::string& path)
        void Reconstruction::WritePoints3DText(const std::string& path)
    """
    if len(points3D) == 0:
        mean_track_length = 0
    else:
        mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
    HEADER = '# 3D point list with one line of data per point:\n'
    '#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n'
    '# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length)
    with open(path, "w") as fid:
        fid.write(HEADER)
        for _, pt in points3D.items():
            point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
            fid.write(" ".join(map(str, point_header)) + " ")
            track_strings = []
            for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
                track_strings.append(" ".join(map(str, [image_id, point2D])))
            fid.write(" ".join(track_strings) + "\n")
 def write_points3d_binary(points3D, path_to_model_file):
    """
    see: src/base/reconstruction.cc
        void Reconstruction::ReadPoints3DBinary(const std::string& path)
        void Reconstruction::WritePoints3DBinary(const std::string& path)
    """
    with open(path_to_model_file, "wb") as fid:
        write_next_bytes(fid, len(points3D), "Q")
        for _, pt in points3D.items():
            write_next_bytes(fid, pt.id, "Q")
            write_next_bytes(fid, pt.xyz.tolist(), "ddd")
            write_next_bytes(fid, pt.rgb.tolist(), "BBB")
            write_next_bytes(fid, pt.error, "d")
            track_length = pt.image_ids.shape[0]
            write_next_bytes(fid, track_length, "Q")
            for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
                write_next_bytes(fid, [image_id, point2D_id], "ii")
--- a/easymocap/mytools/colmap_wrapper.py
+++ b/easymocap/mytools/colmap_wrapper.py
@ -0,0 +1,468 @@
 '''
  @ Date: 2022-06-20 15:03:50
  @ Author: Qing Shuai
  @ Mail: s_q@zju.edu.cn
  @ LastEditors: Qing Shuai
  @ LastEditTime: 2022-08-16 20:24:07
  @ FilePath: /EasyMocapPublic/easymocap/mytools/colmap_wrapper.py
 '''
 import shutil
 import sys
 import os
 import sqlite3
 import numpy as np
 from os.path import join
 import cv2
 from .debug_utils import mkdir, run_cmd, log, mywarn
 from .colmap_structure import Camera, Image, CAMERA_MODEL_NAMES
 from .colmap_structure import rotmat2qvec
 from .colmap_structure import read_points3d_binary
 IS_PYTHON3 = sys.version_info[0] >= 3
 MAX_IMAGE_ID = 2**31 - 1
 CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras (
    camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    model INTEGER NOT NULL,
    width INTEGER NOT NULL,
    height INTEGER NOT NULL,
    params BLOB,
    prior_focal_length INTEGER NOT NULL)"""
 CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors (
    image_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB,
    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)"""
 CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images (
    image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    name TEXT NOT NULL UNIQUE,
    camera_id INTEGER NOT NULL,
    prior_qw REAL,
    prior_qx REAL,
    prior_qy REAL,
    prior_qz REAL,
    prior_tx REAL,
    prior_ty REAL,
    prior_tz REAL,
    CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}),
    FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))
 """.format(MAX_IMAGE_ID)
 CREATE_TWO_VIEW_GEOMETRIES_TABLE = """
 CREATE TABLE IF NOT EXISTS two_view_geometries (
    pair_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB,
    config INTEGER NOT NULL,
    F BLOB,
    E BLOB,
    H BLOB)
 """
 CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints (
    image_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB,
    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)
 """
 CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches (
    pair_id INTEGER PRIMARY KEY NOT NULL,
    rows INTEGER NOT NULL,
    cols INTEGER NOT NULL,
    data BLOB)"""
 CREATE_NAME_INDEX = \
    "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
 CREATE_ALL = "; ".join([
    CREATE_CAMERAS_TABLE,
    CREATE_IMAGES_TABLE,
    CREATE_KEYPOINTS_TABLE,
    CREATE_DESCRIPTORS_TABLE,
    CREATE_MATCHES_TABLE,
    CREATE_TWO_VIEW_GEOMETRIES_TABLE,
    CREATE_NAME_INDEX
 ])
 def image_ids_to_pair_id(image_id1, image_id2):
    if image_id1 > image_id2:
        image_id1, image_id2 = image_id2, image_id1
    return image_id1 * MAX_IMAGE_ID + image_id2
 def pair_id_to_image_ids(pair_id):
    image_id2 = pair_id % MAX_IMAGE_ID
    image_id1 = (pair_id - image_id2) // MAX_IMAGE_ID
    return image_id1, image_id2
 def array_to_blob(array):
    if IS_PYTHON3:
        return array.tobytes()
    else:
        return np.getbuffer(array)
 def blob_to_array(blob, dtype, shape=(-1,)):
    if blob is None:
        return np.empty((0, 2), dtype=dtype)
    if IS_PYTHON3:
        return np.frombuffer(blob, dtype=dtype).reshape(*shape)
    else:
        return np.frombuffer(blob, dtype=dtype).reshape(*shape)
 class COLMAPDatabase(sqlite3.Connection):
    @staticmethod
    def connect(database_path):
        return sqlite3.connect(database_path, factory=COLMAPDatabase)
    def __init__(self, *args, **kwargs):
        super(COLMAPDatabase, self).__init__(*args, **kwargs)
        self.create_tables = lambda: self.executescript(CREATE_ALL)
        self.create_cameras_table = \
            lambda: self.executescript(CREATE_CAMERAS_TABLE)
        self.create_descriptors_table = \
            lambda: self.executescript(CREATE_DESCRIPTORS_TABLE)
        self.create_images_table = \
            lambda: self.executescript(CREATE_IMAGES_TABLE)
        self.create_two_view_geometries_table = \
            lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE)
        self.create_keypoints_table = \
            lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
        self.create_matches_table = \
            lambda: self.executescript(CREATE_MATCHES_TABLE)
        self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
    def add_camera(self, model, width, height, params,
                   prior_focal_length=False, camera_id=None):
        params = np.asarray(params, np.float64)
        cursor = self.execute(
            "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)",
            (camera_id, model, width, height, array_to_blob(params),
             prior_focal_length))
        return cursor.lastrowid
    def add_image(self, name, camera_id,
                  prior_q=np.full(4, np.NaN), prior_t=np.full(3, np.NaN), image_id=None):
        cursor = self.execute(
            "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
            (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2],
             prior_q[3], prior_t[0], prior_t[1], prior_t[2]))
        return cursor.lastrowid
    def add_keypoints(self, image_id, keypoints):
        assert(len(keypoints.shape) == 2)
        assert(keypoints.shape[1] in [2, 4, 6])
        keypoints = np.asarray(keypoints, np.float32)
        self.execute(
            "INSERT INTO keypoints VALUES (?, ?, ?, ?)",
            (image_id,) + keypoints.shape + (array_to_blob(keypoints),))
    def add_descriptors(self, image_id, descriptors):
        descriptors = np.ascontiguousarray(descriptors, np.uint8)
        self.execute(
            "INSERT INTO descriptors VALUES (?, ?, ?, ?)",
            (image_id,) + descriptors.shape + (array_to_blob(descriptors),))
    def add_matches(self, image_id1, image_id2, matches):
        assert(len(matches.shape) == 2)
        assert(matches.shape[1] == 2)
        if image_id1 > image_id2:
            matches = matches[:,::-1]
        pair_id = image_ids_to_pair_id(image_id1, image_id2)
        matches = np.asarray(matches, np.uint32)
        self.execute(
            "INSERT INTO matches VALUES (?, ?, ?, ?)",
            (pair_id,) + matches.shape + (array_to_blob(matches),))
    def add_two_view_geometry(self, image_id1, image_id2, matches,
                              F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2):
        assert(len(matches.shape) == 2)
        assert(matches.shape[1] == 2)
        if image_id1 > image_id2:
            matches = matches[:,::-1]
        pair_id = image_ids_to_pair_id(image_id1, image_id2)
        matches = np.asarray(matches, np.uint32)
        F = np.asarray(F, dtype=np.float64)
        E = np.asarray(E, dtype=np.float64)
        H = np.asarray(H, dtype=np.float64)
        self.execute(
            "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
            (pair_id,) + matches.shape + (array_to_blob(matches), config,
             array_to_blob(F), array_to_blob(E), array_to_blob(H)))
    def read_images(self):
        image_id_to_name, name_to_image_id = {}, {}
        image_results = self.execute("SELECT * FROM images")
        for result in image_results:
            image_id, name, camera_id, q0, q1, q2, q3, t0, t1, t2 = result
            image_id_to_name[image_id] = name
            name_to_image_id[name] = image_id
        return image_id_to_name, name_to_image_id
    def read_keypoints(self, mapping=None):
        image_id_to_keypoints = {}
        keypoints_results = self.execute("SELECT * FROM keypoints")
        for keypoints_result in keypoints_results:
            image_id, rows, cols, keypoints = keypoints_result
            keypoints = blob_to_array(keypoints, np.float32, (rows, cols))
            if mapping is None:
                image_id_to_keypoints[image_id] = keypoints
            else:
                image_id_to_keypoints[mapping[image_id]] = keypoints
        return image_id_to_keypoints
    def read_matches(self, mapping=None):
        matches_results = self.execute("SELECT * FROM matches")
        matches = {}
        for matches_result in matches_results:
            pair_id, rows, cols, match = matches_result
            image_id0, image_id1 = pair_id_to_image_ids(pair_id)
            if rows == 0:
                continue
            match = blob_to_array(match, dtype=np.uint32, shape=(rows, cols))
            if mapping is not None:
                image_id0 = mapping[image_id0]
                image_id1 = mapping[image_id1]
            matches[(image_id0, image_id1)] = match
        return matches
    def read_two_view_geometry(self, mapping=None):
        geometry = self.execute("SELECT * FROM two_view_geometries")
        geometries = {}
        for pair_id, rows, cols, data, config, F, E, H in geometry:
            F = blob_to_array(F, dtype=np.float64)
            E = blob_to_array(E, dtype=np.float64)
            H = blob_to_array(H, dtype=np.float64)
            image_id0, image_id1 = pair_id_to_image_ids(pair_id)
            match = blob_to_array(data, dtype=np.uint32, shape=(rows, cols))
            if rows == 0:continue
            if mapping is not None:
                image_id0 = mapping[image_id0]
                image_id1 = mapping[image_id1]
            geometries[(image_id0, image_id1)] = {'matches': match, 'F':F, 'E':E, 'H':H, 'config': config}
        return geometries
 def create_empty_db(database_path):
    if os.path.exists(database_path):
        mywarn('Removing old database: {}'.format(database_path))
        os.remove(database_path)
    print('Creating an empty database...')
    db = COLMAPDatabase.connect(database_path)
    db.create_tables()
    db.commit()
    db.close()
 def create_cameras(db, cameras, subs, width, height, share_intri=True):
    model = 'OPENCV'
    if share_intri:
        cam_id = 1
        K = cameras[subs[0]]['K']
        D = cameras[subs[0]]['dist'].reshape(1, 5)
        fx, fy, cx, cy, k1, k2, p1, p2, k3, k4, k5, k6 = K[0, 0], K[1, 1], K[0, 2], K[1, 2], D[0, 0], D[0, 1], D[0, 2], D[0, 3], D[0, 4], 0, 0, 0
        params = [fx, fy, cx, cy, k1, k2, p1, p2]
        # params = [fx, fy, cx, cy, 0, 0, 0, 0]
        camera = Camera(
            id=cam_id,
            model=model,
            width=width,
            height=height,
            params=params
        )
        cameras_colmap = {cam_id: camera}
        cameras_map = {sub:cam_id for sub in subs}
        # 
        db.add_camera(CAMERA_MODEL_NAMES[model].model_id, width, height, params,
                   prior_focal_length=False, camera_id=cam_id)
    else:
        raise NotImplementedError
    return cameras_colmap, cameras_map
 def create_images(db, cameras, cameras_map, image_names):
    subs = sorted(list(image_names.keys()))
    images = {}
    for sub, image_name in image_names.items():
        img_id = subs.index(sub) + 1
        R = cameras[sub]['R']
        T = cameras[sub]['T']
        qvec = rotmat2qvec(R)
        tvec = T.T[0]
        image = Image(
            id=img_id,
            qvec=qvec,
            tvec=tvec,
            camera_id=cameras_map[sub],
            name=os.path.basename(image_name),
            xys=[],
            point3D_ids=[]
        )
        images[img_id] = image
        db.add_image(image.name, camera_id=image.camera_id,
            prior_q=image.qvec, prior_t=image.tvec, image_id=img_id)
    return images
 def copy_images(data, out, nf=0, copy_func=shutil.copyfile, mask='mask', add_mask=True):
    subs = sorted(os.listdir(join(data, 'images')))
    image_names = {}
    for sub in subs:
        srcname = join(data, 'images', sub, '{:06d}.jpg'.format(nf))
        if not os.path.exists(srcname):
            mywarn('{} not exists, skip'.format(srcname))
            return False
        dstname = join(out, 'images', '{}.jpg'.format(sub))
        image_names[sub] = dstname
        if os.path.exists(dstname):
            continue
        os.makedirs(os.path.dirname(dstname), exist_ok=True)
        copy_func(srcname, dstname)
        mskname = join(data, mask, sub, '{:06d}.png'.format(nf))
        dstname = join(out, 'mask', '{}.jpg.png'.format(sub))
        if os.path.exists(mskname) and add_mask:
            os.makedirs(os.path.dirname(dstname), exist_ok=True)
            copy_func(mskname, dstname)
    return True, image_names
 def colmap_feature_extract(colmap, path, share_camera, add_mask):
    '''
 struct SiftMatchingOptions {
  // Number of threads for feature matching and geometric verification.
  int num_threads = -1;
  // Whether to use the GPU for feature matching.
  bool use_gpu = true;
  // Index of the GPU used for feature matching. For multi-GPU matching,
  // you should separate multiple GPU indices by comma, e.g., "0,1,2,3".
  std::string gpu_index = "-1";
  // Maximum distance ratio between first and second best match.
  double max_ratio = 0.8;
  // Maximum distance to best match.
  double max_distance = 0.7;
  // Whether to enable cross checking in matching.
  bool cross_check = true;
  // Maximum number of matches.
  int max_num_matches = 32768;
  // Maximum epipolar error in pixels for geometric verification.
  double max_error = 4.0;
  // Confidence threshold for geometric verification.
  double confidence = 0.999;
  // Minimum/maximum number of RANSAC iterations. Note that this option
  // overrules the min_inlier_ratio option.
  int min_num_trials = 100;
  int max_num_trials = 10000;
  // A priori assumed minimum inlier ratio, which determines the maximum
  // number of iterations.
  double min_inlier_ratio = 0.25;
  // Minimum number of inliers for an image pair to be considered as
  // geometrically verified.
  int min_num_inliers = 15;
  // Whether to attempt to estimate multiple geometric models per image pair.
  bool multiple_models = false;
  // Whether to perform guided matching, if geometric verification succeeds.
  bool guided_matching = false;
  bool Check() const;
 };
 '''
    cmd = f'{colmap} feature_extractor --database_path {path}/database.db \
 --image_path {path}/images \
 --SiftExtraction.peak_threshold 0.006 \
 --ImageReader.camera_model OPENCV \
 '
    if share_camera:
        cmd += ' --ImageReader.single_camera 1'
    if add_mask:
        cmd += f' --ImageReader.mask_path {path}/mask'
    cmd += f' >> {path}/log.txt'
    run_cmd(cmd)
 def colmap_feature_match(colmap, path):
    cmd = f'{colmap} exhaustive_matcher --database_path {path}/database.db \
 --SiftMatching.guided_matching 0 \
 --SiftMatching.max_ratio 0.8 \
 --SiftMatching.max_distance 0.5 \
 --SiftMatching.cross_check 1 \
 --SiftMatching.max_error 4 \
 --SiftMatching.max_num_matches 32768 \
 --SiftMatching.confidence 0.9999 \
 --SiftMatching.max_num_trials 10000 \
 --SiftMatching.min_inlier_ratio 0.25 \
 --SiftMatching.min_num_inliers 30 \
 >> {path}/log.txt'
    run_cmd(cmd)
 def colmap_ba(colmap, path, with_init=False):
    if with_init:
        cmd = f'{colmap} point_triangulator --database_path {path}/database.db \
 --image_path {path}/images \
 --input_path {path}/sparse/0 \
 --output_path {path}/sparse/0 \
 --Mapper.tri_merge_max_reproj_error 3 \
 --Mapper.ignore_watermarks 1 \
 --Mapper.filter_max_reproj_error 2 \
 >> {path}/log.txt'
        run_cmd(cmd)
        cmd = f'{colmap} bundle_adjuster \
 --input_path {path}/sparse/0 \
 --output_path {path}/sparse/0 \
 >> {path}/log.txt'
        run_cmd(cmd)
        points3d = read_points3d_binary(join(path, 'sparse', '0', 'points3D.bin'))
        pids = list(points3d.keys())
        mean_error = np.mean([points3d[p].error for p in pids])
        log('Triangulate {} points, mean error: {:.2f} pixel'.format(len(pids), mean_error))
    else:
        mkdir(join(path, 'sparse'))
        cmd = f'{colmap} mapper --database_path {path}/database.db --image_path {path}/images --output_path {path}/sparse \
    --Mapper.ba_refine_principal_point 1 \
    --Mapper.ba_global_max_num_iterations 1000 \
    >> {path}/log.txt'
        run_cmd(cmd)
 def colmap_dense(colmap, path):
    mkdir(join(path, 'dense'))
    cmd = f'{colmap} image_undistorter --image_path {path}/images --input_path {path}/sparse/0 --output_path {path}/dense --output_type COLMAP --max_image_size 2000'
    run_cmd(cmd)
    cmd = f'{colmap} patch_match_stereo \
 --workspace_path {path}/dense \
 --workspace_format COLMAP \
 --PatchMatchStereo.geom_consistency true \
 >> {path}/log.txt'
    run_cmd(cmd)        
    cmd = f'{colmap} stereo_fusion \
 --workspace_path {path}/dense \
 --workspace_format COLMAP \
 --input_type geometric \
 --output_path {path}/dense/fused.ply \
 >> {path}/log.txt'
    run_cmd(cmd)        
--- a/easymocap/mytools/debug_utils.py
+++ b/easymocap/mytools/debug_utils.py
@ -0,0 +1,86 @@
 '''
  @ Date: 2022-02-14 14:54:50
  @ Author: Qing Shuai
  @ Mail: s_q@zju.edu.cn
  @ LastEditors: Qing Shuai
  @ LastEditTime: 2022-06-14 18:07:19
  @ FilePath: /EasyMocapPublic/easymocap/mytools/debug_utils.py
 '''
 from termcolor import colored
 import os
 from os.path import join
 import shutil
 import subprocess
 import time
 import datetime
 def toc():
    return time.time() * 1000
 def myprint(cmd, level):
    color = {'run': 'blue', 'info': 'green', 'warn': 'yellow', 'error': 'red'}[level]
    print(colored(cmd, color))
 def log(text):
    myprint(text, 'info')
 def log_time(text):
    strf = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
    print(colored(strf, 'yellow'), colored(text, 'green'))
 def mywarn(text):
    myprint(text, 'warn')
 warning_infos = set()
 def oncewarn(text):
    if text in warning_infos:
        return
    warning_infos.add(text)
    myprint(text, 'warn')
 def myerror(text):
    myprint(text, 'error')
 def run_cmd(cmd, verbo=True, bg=False):
    if verbo: myprint('[run] ' + cmd, 'run')
    if bg:
        args = cmd.split()
        print(args)
        p = subprocess.Popen(args)
        return [p]
    else:
        os.system(cmd)
        return []
 def mkdir(path):
    if os.path.exists(path):
        return 0
    log('mkdir {}'.format(path))
    os.makedirs(path, exist_ok=True)
 def cp(srcname, dstname):
    mkdir(join(os.path.dirname(dstname)))
    shutil.copyfile(srcname, dstname)
 def print_table(header, contents):
    from tabulate import tabulate
    length = len(contents[0])
    tables = [[] for _ in range(length)]
    mean = ['Mean']
    for icnt, content in enumerate(contents):
        for i in range(length):
            if isinstance(content[i], float):
                tables[i].append('{:6.2f}'.format(content[i]))
            else:
                tables[i].append('{}'.format(content[i]))
        if icnt > 0:
            mean.append('{:6.2f}'.format(sum(content)/length))
    tables.append(mean)
    print(tabulate(tables, header, tablefmt='fancy_grid'))
 def check_exists(path):
    flag1 = os.path.isfile(path) and os.path.exists(path)
    flag2 = os.path.isdir(path) and len(os.listdir(path)) >= 10
    return flag1 or flag2
--- a/easymocap/mytools/file_utils.py
+++ b/easymocap/mytools/file_utils.py
@ -2,8 +2,8 @@
  @ Date: 2021-03-15 12:23:12
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2021-06-14 22:25:58
+  @ LastEditTime: 2022-07-21 15:55:02
-  @ FilePath: /EasyMocapRelease/easymocap/mytools/file_utils.py
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/file_utils.py
 '''
 import os
 import json
@ -11,12 +11,18 @@ import numpy as np
 from os.path import join
 mkdir = lambda x:os.makedirs(x, exist_ok=True)
-mkout = lambda x:mkdir(os.path.dirname(x))
+# mkout = lambda x:mkdir(os.path.dirname(x)) if x is not None
-
+def mkout(x):
    if x is not None:
        mkdir(os.path.dirname(x))
 def read_json(path):
    assert os.path.exists(path), path
    with open(path) as f:
-        data = json.load(f)
+        try:
            data = json.load(f)
        except:
            print('Reading error {}'.format(path))
            data = []
    return data
 def save_json(file, data):
@ -25,6 +31,17 @@ def save_json(file, data):
    with open(file, 'w') as f:
        json.dump(data, f, indent=4)
 def append_json(file, data):
    if not os.path.exists(os.path.dirname(file)):
        os.makedirs(os.path.dirname(file))
    if os.path.exists(file):
        res = read_json(file)
        assert isinstance(res, list)
        res.append(data)
        data = res
    with open(file, 'w') as f:
        json.dump(data, f, indent=4)
 save_annot = save_json
 def getFileList(root, ext='.jpg'):
@ -51,19 +68,24 @@ def read_annot(annotname, mode='body25'):
            data[i]['id'] = data[i].pop('personID')
        if 'keypoints2d' in data[i].keys() and 'keypoints' not in data[i].keys():
            data[i]['keypoints'] = data[i].pop('keypoints2d')
-        for key in ['bbox', 'keypoints', 'handl2d', 'handr2d', 'face2d']:
+        for key in ['bbox', 'keypoints', 
            'bbox_handl2d', 'handl2d', 
            'bbox_handr2d', 'handr2d', 
            'bbox_face2d', 'face2d']:
            if key not in data[i].keys():continue
            data[i][key] = np.array(data[i][key])
            if key == 'face2d':
                # TODO: Make parameters, 17 is the offset for the eye brows,
                # etc. 51 is the total number of FLAME compatible landmarks
                data[i][key] = data[i][key][17:17+51, :]
-        data[i]['bbox'] = data[i]['bbox'][:5]
+        if 'bbox' in data[i].keys():
-        if data[i]['bbox'][-1] < 0.001:
+            data[i]['bbox'] = data[i]['bbox'][:5]
-            # print('{}/{} bbox conf = 0, may be error'.format(annotname, i))
+            if data[i]['bbox'][-1] < 0.001:
-            data[i]['bbox'][-1] = 1
+                print('{}/{} bbox conf = 0, may be error'.format(annotname, i))
                data[i]['bbox'][-1] = 0
        # combine the basic results
        if mode == 'body25':
-            data[i]['keypoints'] = data[i]['keypoints']
+            data[i]['keypoints'] = data[i].get('keypoints', np.zeros((25, 3)))
        elif mode == 'body15':
            data[i]['keypoints'] = data[i]['keypoints'][:15, :]
        elif mode in ['handl', 'handr']:
@ -91,7 +113,7 @@ def array2raw(array, separator=' ', fmt='%.3f'):
        res.append(separator.join([fmt%(d) for d in data]))
-def myarray2string(array, separator=', ', fmt='%.3f', indent=8):
+def myarray2string(array, separator=', ', fmt='%7.7f', indent=8):
    assert len(array.shape) == 2, 'Only support MxN matrix, {}'.format(array.shape)
    blank = ' ' * indent
    res = ['[']
@ -110,14 +132,16 @@ def write_common_results(dumpname=None, results=[], keys=[], fmt='%2.3f'):
        out_text.append('    {\n')
        output = {}
        output['id'] = data['id']
-        for key in keys:
+        for k in ['type']:
-            if key not in data.keys():continue
+            if k in data.keys():output[k] = '\"{}\"'.format(data[k])
        keys_current = [k for k in keys if k in data.keys()]
        for key in keys_current:
            # BUG: This function will failed if the rows of the data[key] is too large
            # output[key] = np.array2string(data[key], max_line_width=1000, separator=', ', formatter=format_out)
            output[key] = myarray2string(data[key], separator=', ', fmt=fmt)
        for key in output.keys():
            out_text.append('        \"{}\": {}'.format(key, output[key]))
-            if key != keys[-1]:
+            if key != keys_current[-1]:
                out_text.append(',\n')
            else:
                out_text.append('\n')
@ -134,17 +158,16 @@ def write_common_results(dumpname=None, results=[], keys=[], fmt='%2.3f'):
    else:
        return ''.join(out_text)
-def write_keypoints3d(dumpname, results):
+def write_keypoints3d(dumpname, results, keys = ['keypoints3d']):
    # TODO:rewrite it
-    keys = ['keypoints3d']
+    write_common_results(dumpname, results, keys, fmt='%6.7f')
    write_common_results(dumpname, results, keys, fmt='%6.3f')
 def write_vertices(dumpname, results):
    keys = ['vertices']
-    write_common_results(dumpname, results, keys, fmt='%6.3f')
+    write_common_results(dumpname, results, keys, fmt='%6.5f')
 def write_smpl(dumpname, results):
-    keys = ['Rh', 'Th', 'poses', 'expression', 'shapes']
+    keys = ['Rh', 'Th', 'poses', 'handl', 'handr', 'expression', 'shapes']
    write_common_results(dumpname, results, keys)
 def batch_bbox_from_pose(keypoints2d, height, width, rate=0.1):
--- a/easymocap/mytools/reader.py
+++ b/easymocap/mytools/reader.py
@ -2,8 +2,8 @@
  @ Date: 2021-04-21 15:19:21
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2021-07-29 16:12:37
+  @ LastEditTime: 2022-07-22 23:23:26
-  @ FilePath: /EasyMocap/easymocap/mytools/reader.py
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/reader.py
 '''
 # function to read data
 """
@ -27,17 +27,14 @@ def read_keypoints3d(filename):
    res_ = []
    for d in data:
        pid = d['id'] if 'id' in d.keys() else d['personID']
-        pose3d = np.array(d['keypoints3d'], dtype=np.float32)
+        ret = {'id': pid, 'type': 'body25'}
-        if pose3d.shape[0] > 25:
+        for key in ['keypoints3d', 'handl3d', 'handr3d', 'face3d']:
-            # 对于有手的情况，把手的根节点赋值成body25上的点
+            if key not in d.keys():continue
-            pose3d[25, :] = pose3d[7, :]
+            pose3d = np.array(d[key], dtype=np.float32)
-            pose3d[46, :] = pose3d[4, :]
+            if pose3d.shape[1] == 3:
-        if pose3d.shape[1] == 3:
+                pose3d = np.hstack([pose3d, np.ones((pose3d.shape[0], 1))])
-            pose3d = np.hstack([pose3d, np.ones((pose3d.shape[0], 1))])
+            ret[key] = pose3d
-        res_.append({
+        res_.append(ret)
            'id': pid,
            'keypoints3d': pose3d
        })
    return res_
 def read_keypoints3d_dict(filename):
@ -56,9 +53,11 @@ def read_keypoints3d_dict(filename):
 def read_smpl(filename):    
    datas = read_json(filename)
    if isinstance(datas, dict):
        datas = datas['annots']
    outputs = []
    for data in datas:
-        for key in ['Rh', 'Th', 'poses', 'shapes', 'expression']:
+        for key in ['Rh', 'Th', 'poses', 'handl', 'handr', 'shapes', 'expression', 'keypoints3d']:
            if key in data.keys():
                data[key] = np.array(data[key], dtype=np.float32)
        # for smplx results
--- a/easymocap/mytools/triangulator.py
+++ b/easymocap/mytools/triangulator.py
@ -0,0 +1,735 @@
 import numpy as np
 import cv2
 from easymocap.datasets.base import crop_image
 from easymocap.estimator.wrapper_base import bbox_from_keypoints
 from easymocap.mytools.vis_base import merge, plot_keypoints_auto
 from .debug_utils import log, mywarn, myerror
 def batch_triangulate(keypoints_, Pall, min_view=2):
    # keypoints: (nViews, nJoints, 3)
    # Pall: (nViews, 3, 4)
    # A: (nJoints, nViewsx2, 4), x: (nJoints, 4, 1); b: (nJoints, nViewsx2, 1)
    v = (keypoints_[:, :, -1]>0).sum(axis=0)
    valid_joint = np.where(v >= min_view)[0]
    keypoints = keypoints_[:, valid_joint]
    conf3d = keypoints[:, :, -1].sum(axis=0)/v[valid_joint]
    # P2: P矩阵的最后一行：(1, nViews, 1, 4)
    P0 = Pall[None, :, 0, :]
    P1 = Pall[None, :, 1, :]
    P2 = Pall[None, :, 2, :]
    # uP2: x坐标乘上P2: (nJoints, nViews, 1, 4)
    uP2 = keypoints[:, :, 0].T[:, :, None] * P2
    vP2 = keypoints[:, :, 1].T[:, :, None] * P2
    conf = keypoints[:, :, 2].T[:, :, None]
    Au = conf * (uP2 - P0)
    Av = conf * (vP2 - P1)
    A = np.hstack([Au, Av])
    u, s, v = np.linalg.svd(A)
    X = v[:, -1, :]
    X = X / X[:, 3:]
    # out: (nJoints, 4)
    result = np.zeros((keypoints_.shape[1], 4))
    result[valid_joint, :3] = X[:, :3]
    result[valid_joint, 3] = conf3d #* (conf[..., 0].sum(axis=-1)>min_view)
    return result
 def project_points(keypoints, RT, einsum='vab,kb->vka'):
    homo = np.concatenate([keypoints[..., :3], np.ones_like(keypoints[..., :1])], axis=-1)
    kpts2d = np.einsum(einsum, RT, homo)
    kpts2d[..., :2] /= kpts2d[..., 2:]
    return kpts2d
 def make_Cnk(n, k):
    import itertools
    res = {}
    for n_ in range(3, n+1):
        n_0 = [i for i in range(n_)]
        for k_ in range(2, k+1):
            res[(n_, k_)] = list(map(list, itertools.combinations(n_0, k_)))
    return res
 MAX_VIEWS = 30
 Cnk = make_Cnk(MAX_VIEWS, 3)
 def robust_triangulate_point(kpts2d, Pall, dist_max, min_v = 3):
    nV = kpts2d.shape[0]
    if len(kpts2d) < min_v:# 重建失败
        return [], None
    # min_v = max(2, nV//2)
    # 1. choose the combination of min_v
    index_ = Cnk[(len(kpts2d), min(min_v, len(kpts2d)))]
    # 2. proposals: store the reconstruction points of each proposal
    proposals = np.zeros((len(index_), 4))
    weight_self = np.zeros((nV, len(index_)))
    for i, index in enumerate(index_):
        weight_self[index, i] = 100.
        point = batch_triangulate(kpts2d[index, :], Pall[index], min_view=min_v)
        proposals[i] = point
    # 3. project the proposals to each view
    #    and calculate the reprojection error
    # (nViews, nProposals, 4)
    kpts_repro = project_points(proposals, Pall)
    conf = (proposals[None, :, -1] > 0) * (kpts2d[..., -1] > 0)
    # err: (nViews, nProposals)
    err = np.linalg.norm(kpts_repro[..., :2] - kpts2d[..., :2], axis=-1) * conf
    valid = 1. - err/dist_max
    valid[valid<0] = 0
    # consider the weight of different view
    # TODO:naive weight:
    conf = kpts2d[..., -1]
    weight = conf
    # (valid > 0)*weight_self 一项用于强制要求使用到的两个视角都需要被用到
    # 增加一项使用的视角数的加成
    weight_sum = (weight * valid).sum(axis=0) + ((valid > 0)*weight_self).sum(axis=0) - min_v * 100
    if weight_sum.max() < 0:# 重建失败
        return [], None
    best = weight_sum.argmax()
    if (err[index_[best], best] > dist_max).any():
        return [], None
    # 对于选出来的proposal，寻找其大于0的其他视角
    point = proposals[best]
    best_add = np.where(valid[:, best])[0].tolist()
    index = list(index_[best])
    best_add.sort(key=lambda x:-weight[x])
    for add in best_add:
        if add in index:
            continue
        index.append(add)
        point = batch_triangulate(kpts2d[index, :], Pall[index], min_view=min_v)
        kpts_repro = project_points(point, Pall[index])
        err = np.linalg.norm(kpts_repro[..., :2] - kpts2d[index, ..., :2], axis=-1)
        if (err > dist_max).any():
            index.remove(add)
            break
    return index, point
 def remove_outview(kpts2d, out_view, debug):
    if len(out_view) == 0:
        return False
    outv = out_view[0]
    if debug:
        mywarn('[triangulate] remove outview: {} from {}'.format(outv, out_view))
    kpts2d[outv] = 0.
    return True
 def remove_outjoint(kpts2d, Pall, out_joint, dist_max, min_view=3, debug=False):
    if len(out_joint) == 0:
        return False
    if debug:
        mywarn('[triangulate] remove outjoint: {}'.format(out_joint))
    for nj in out_joint:
        valid = np.where(kpts2d[:, nj, -1] > 0)[0]
        if len(valid) < min_view:
            # if less than 3 visible view, set these unvisible
            kpts2d[:, nj, -1] = 0
            continue
        if len(valid) > MAX_VIEWS:
            # only select max points
            conf = -kpts2d[:, nj, -1]
            valid = conf.argsort()[:MAX_VIEWS]
        index_j, point = robust_triangulate_point(kpts2d[valid, nj:nj+1], Pall[valid], dist_max=dist_max, min_v=3)
        index_j = valid[index_j]
        # print('select {} for joint {}'.format(index_j, nj))
        set0 = np.zeros(kpts2d.shape[0])
        set0[index_j] = 1.
        kpts2d[:, nj, -1] *= set0
    return True
 def project_and_distance(kpts3d, RT, kpts2d):
    kpts_proj = project_points(kpts3d, RT)
    # 1. distance between input and projection
    conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0)
    dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf
    return dist, conf
 def iterative_triangulate(kpts2d, RT, previous=None,
    min_conf=0.1, min_view=3, min_joints=3, dist_max=0.05, dist_vel=0.05,
    thres_outlier_view=0.4, thres_outlier_joint=0.4, debug=False):
    kpts2d = kpts2d.copy()
    conf = kpts2d[..., -1]
    kpts2d[conf<min_conf] = 0.
    if debug:
        log('[triangulate] kpts2d: {}'.format(kpts2d.shape))
    # TODO: consider large motion
    if previous is not None:
        dist, conf = project_and_distance(previous, RT, kpts2d)
        nottrack = (dist > dist_vel) & conf
        if nottrack.sum() > 0:
            kpts2d[nottrack] = 0.
            if debug:
                log('[triangulate] Remove with track {}'.format(np.where(nottrack)))
    while True:
        # 0. triangulate and project
        kpts3d = batch_triangulate(kpts2d, RT, min_view=min_view)
        dist, conf = project_and_distance(kpts3d, RT, kpts2d)
        # 2. find the outlier
        vv, jj = np.where(dist > dist_max)
        if vv.shape[0] < 1:
            if debug:
                log('[triangulate] Not found outlier, break')
            break
        ratio_outlier_view = (dist>dist_max).sum(axis=1)/(1e-5 + conf.sum(axis=1))
        ratio_outlier_joint = (dist>dist_max).sum(axis=0)/(1e-5 + conf.sum(axis=0))
        # 3. find the totally wrong detections
        out_view = np.where(ratio_outlier_view > thres_outlier_view)[0]
        out_joint = np.where(ratio_outlier_joint > thres_outlier_joint)[0]
        if len(out_view) > 1:
            dist_view = dist.sum(axis=1)/(1e-5 + conf.sum(axis=1))
            out_view = out_view.tolist()
            out_view.sort(key=lambda x:-dist_view[x])
            if debug: mywarn('[triangulate] Remove outlier view: {}'.format(ratio_outlier_view))
        if remove_outview(kpts2d, out_view, debug): continue
        if remove_outjoint(kpts2d, RT, out_joint, dist_max, debug=debug): continue
        if debug:
            log('[triangulate] Directly remove {}, {}'.format(vv, jj))
        kpts2d[vv, jj, -1] = 0.
    if debug:
        log('[triangulate] finally {} valid points'.format((kpts3d[..., -1]>0).sum()))
    if (kpts3d[..., -1]>0).sum() < min_joints:
        kpts3d[..., -1] = 0.
        kpts2d[..., -1] = 0.
        return kpts3d, kpts2d
    return kpts3d, kpts2d
 class BaseTriangulator:
    def __init__(self, config, debug, keys) -> None:
        self.config = config
        self.debug = debug
        self.keys = keys
    def project_and_check(self, kpts3d, kpts2d, RT):
        kpts_proj = project_points(kpts3d, RT)
        conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0)
        dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf
        return conf, dist
    def triangulate_with_results(self, pid, data, results):
        new = {'id': pid}
        for key in self.keys:
            key3d = key.replace('2d', '3d')
            if len(results) == 0:
                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
                    debug=self.debug, **self.config[key])
            else:
                if len(results) == 1:
                    previous = results[-1][key3d] # TODO: mean previous frame
                elif len(results) >= 2:
                    # TODO: mean previous velocity
                    previous0 = results[-2][key3d] # TODO: mean previous frame
                    previous1 = results[-1][key3d] # TODO: mean previous frame
                    vel = (previous1[:, :3] - previous0[:, :3])*((previous0[:, -1:]>0)&(previous0[:, -1:]>0))
                    previous = previous1.copy()
                    previous[:, :3] += vel
                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
                    debug=self.debug, previous=previous, **self.config[key])
                vel = np.linalg.norm(kpts3d[:, :3] - previous[:, :3], axis=-1)
            new[key] = np.concatenate([data[key+'_distort'][..., :-1], kpts2d[..., -1:]], axis=-1)
            new[key3d] = kpts3d
        return new
 class SimpleTriangulator(BaseTriangulator):
    def __init__(self, keys, debug, config,
        pid=0) -> None:
        super().__init__(config, debug, keys)
        self.results = []
        self.infos = []
        self.dim_name = ['_joints', '_views']
        self.pid = pid
    def __call__(self, data, results=None):
        info = {}
        if results is None:
            results = self.results
        new = {'id': self.pid}
        for key in self.keys:
            if key not in data.keys(): continue
            key3d = key.replace('2d', '3d')
            if self.debug:
                log('[triangulate] {}'.format(key))
            if len(results) == 0:
                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
                    debug=self.debug, **self.config[key])
            else:
                if len(results) == 1:
                    previous = results[-1][key3d] # TODO: mean previous frame
                elif len(results) >= 2:
                    # TODO: mean previous velocity
                    previous0 = results[-2][key3d] # TODO: mean previous frame
                    previous1 = results[-1][key3d] # TODO: mean previous frame
                    vel = (previous1[:, :3] - previous0[:, :3])*((previous0[:, -1:]>0)&(previous0[:, -1:]>0))
                    previous = previous1.copy()
                    previous[:, :3] += vel
                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
                    debug=self.debug, previous=previous, **self.config[key])
                vel = np.linalg.norm(kpts3d[:, :3] - previous[:, :3], axis=-1)
            new[key] = np.concatenate([data[key+'_distort'][..., :-1], kpts2d[..., -1:]], axis=-1)
            new[key.replace('2d', '3d')] = kpts3d
            if self.debug:
                conf, dist = self.project_and_check(kpts3d, kpts2d, data['RT'])
                for dim in [0, 1]:
                    info_dim = {
                        'valid': conf.sum(axis=dim),
                        'dist': 10000*dist.sum(axis=dim)/(1e-5 + conf.sum(axis=dim)),
                    }
                    info[key+self.dim_name[dim]] = info_dim
                info[key+'_joints']['valid3d'] = kpts3d[:, -1] >0
        results.append(new)
        self.infos.append(info)
        return [new]
    def report(self):
        if not self.debug:
            return 0
        from .debug_utils import print_table
        for key in self.infos[0].keys():
            metrics = list(self.infos[0][key].keys())
            values = [np.mean(np.stack([info[key][metric] for info in self.infos]), axis=0) for metric in metrics]
            metrics = [key] + metrics
            values = [[i for i in range(values[0].shape[0])]] + values
            print_table(metrics, values)
 class SimpleTriangulatorMulti(SimpleTriangulator):
    def __init__(self, pids, **cfg) -> None:
        super().__init__(**cfg)
        self.results = {}
    def __call__(self, data, results=None):
        res_now = []
        for ipid, pid in enumerate(data['pid']):
            if pid not in self.results.keys():
                self.results[pid] = []
            data_ = {'RT': data['RT']}
            for key in self.keys:
                data_[key+'_distort'] = data[key+'_distort'][:, ipid]
                data_[key+'_unproj'] = data[key+'_unproj'][:, ipid]
                data_[key] = data[key][:, ipid]
            res = self.triangulate_with_results(pid, data_, self.results[pid])
            self.results[pid].append(res)
            res_now.append(res)
        return res_now
 def skew_op(x):
    skew_op = lambda x: np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
    res = np.zeros((3, 3), dtype=x.dtype)
    # 0, -z, y
    res[0, 1] = -x[2, 0]
    res[0, 2] =  x[1, 0]
    # z, 0, -x
    res[1, 0] =  x[2, 0]
    res[1, 2] = -x[0, 0]
    # -y, x, 0
    res[2, 0] = -x[1, 0]
    res[2, 1] =  x[0, 0]
    return res
 def fundamental_op(K0, K1, R_0, T_0, R_1, T_1):
    invK0 = np.linalg.inv(K0)
    return invK0.T @ (R_0 @ R_1.T) @ K1.T @ skew_op(K1 @ R_1 @ R_0.T @ (T_0 - R_0 @ R_1.T @ T_1))
 def drawlines(img1,img2,lines,pts1,pts2):
    ''' img1 - image on which we draw the epilines for the points in img2
        lines - corresponding epilines '''
    r,c = img1.shape[:2]
    for r,pt1,pt2 in zip(lines,pts1,pts2):
        pt1 = list(map(lambda x:int(x+0.5), pt1[:2].tolist()))
        pt2 = list(map(lambda x:int(x+0.5), pt2[:2].tolist()))
        if pt1[0] < 0 or pt1[1] < 0:
            continue
        color = tuple(np.random.randint(0,255,3).tolist())
        x0,y0 = map(int, [0, -r[2]/r[1] ])
        x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ])
        img1 = cv2.line(img1, (x0,y0), (x1,y1), color,1)
        img1 = cv2.circle(img1,tuple(pt1),5,color,-1)
        img2 = cv2.circle(img2,tuple(pt2),5,color,-1)
    return img1,img2
 def SimpleConstrain(dimGroups):
    constrain = np.ones((dimGroups[-1], dimGroups[-1]))
    for i in range(len(dimGroups)-1):
        start, end = dimGroups[i], dimGroups[i+1]
        constrain[start:end, start:end] = 0
    N = constrain.shape[0]
    constrain[range(N), range(N)] = 1
    return constrain
 def check_cluster(affinity, row, views, dimGroups, indices, p2dAssigned, visited):
    affinity_row = affinity[row].copy()
    # given affinity and row, select the combine of all possible set
    cluster = np.where((affinity[row]>0)&(p2dAssigned==-1)&(visited==0))[0].tolist()
    cluster.sort(key=lambda x:-affinity[row, x])
    views_ = views[cluster]
    view_count = np.bincount(views[cluster])
    indices_all = [indices]
    for col in cluster:
        v = views[col]
        nOld = len(indices_all)
        if indices[v] != -1: # already assigned, copy and make new 
            for i in range(nOld):
                ind = indices_all[i].copy()
                ind[v] = col
                indices_all.append(ind)
        else: # not assigned, assign
            for i in range(nOld):
                indices_all[i][v] = col
    return indices_all
 def views_from_dimGroups(dimGroups):
    views = np.zeros(dimGroups[-1], dtype=np.int)
    for nv in range(len(dimGroups) - 1):
        views[dimGroups[nv]:dimGroups[nv+1]] = nv
    return views
 class SimpleMatchAndTriangulator(SimpleTriangulator):
    def __init__(self, num_joints, min_views, min_joints, cfg_svt, cfg_track, **cfg) -> None:
        super().__init__(**cfg)
        self.nJoints = num_joints
        self.cfg_svt = cfg_svt
        self.cfg_track = cfg_track
        self.min_views = min_views
        self.min_joints = min_joints
        self.time = -1
        self.max_id = 0
        self.tracks = {}
        self.loglevel_dict = {
            'info': 0,
            'warn': 1,
            'error': 2,
        }
        self.loglevel = self.loglevel_dict['info'] # ['info', 'warn', 'error']
        self.debug = False
        self.data = None
        self.people = None
    def log(self, text):
        if self.loglevel > 0:
            return 0
        log(text)
    def warn(self, text):
        if self.loglevel > 1:
            return 0
        mywarn(text)
    @staticmethod
    def distance_by_epipolar(pts0, pts1, K0, K1, R0, T0, R1, T1):
        F = fundamental_op(K0, K1, R0, T0, R1, T1)
        # Find epilines corresponding to points in left image (first image) and
        # drawing its lines on right image
        lines0 = cv2.computeCorrespondEpilines(pts0[..., :2].reshape (-1,1,2), 2, F)
        # Find epilines corresponding to points in right image (second image) and
        # drawing its lines on left image
        lines1 = cv2.computeCorrespondEpilines(pts1[..., :2].reshape(-1,1,2), 1, F)
        if False:
            H, W = 1080, 1920
            img0 = np.zeros((H, W, 3), dtype=np.uint8) +255
            img4, img3 = drawlines(img0.copy(), img0.copy(), lines0.reshape(-1, 3), pts1.reshape(-1, 3), pts0.reshape(-1,3))
            img5,img6 = drawlines(img0.copy(), img0.copy(), lines1.reshape(-1, 3), pts0.reshape(-1,3), pts1.reshape(-1,3))
            import matplotlib.pyplot as plt
            plt.subplot(121)
            plt.imshow(img5)
            plt.subplot(122)
            plt.imshow(img4)
            plt.show()        
        lines0 = lines0.reshape(pts0.shape)
        lines1 = lines1.reshape(pts1.shape)
        # dist: (D_v0, D_v1, nJoints)
        dist01 = np.abs(np.sum(lines0[:, None, :, :2] * pts1[None, :, :, :2], axis=-1) + lines0[:, None, :, 2])
        conf = pts0[:, None, :, 2] * pts1[None, :, :, 2]
        dist10 = np.abs(np.sum(lines1[:, None, :, :2] * pts0[None, :, :, :2], axis=-1) + lines1[:, None, :, 2])
        dist = np.sum(dist01 * conf + dist10.transpose(1, 0, 2) * conf, axis=-1)/(conf.sum(axis=-1) + 1e-5)/2
        return dist
    def _simple_associate2d_triangulate(self, data, affinity, dimGroups, prev_id):
        # sum1 = affinity.sum(axis=1)
        # 注意：这里的排序应该是对每个视角，挑选最大的一个
        sum1 = np.zeros((affinity.shape[0]))
        for i in range(len(dimGroups)-1):
            start, end = dimGroups[i], dimGroups[i+1]
            if end == start:continue
            sum1 += affinity[:, start:end].max(axis=-1)
        n2d = affinity.shape[0]
        nViews = len(dimGroups) - 1
        idx_zero = np.zeros(nViews, dtype=np.int) - 1
        views = views_from_dimGroups(dimGroups)
        # the assigned results of each person
        p2dAssigned = np.zeros(n2d, dtype=np.int) - 1
        visited = np.zeros(n2d, dtype=np.int)
        sortidx = np.argsort(-sum1)
        pid = 0
        k3dresults = []
        for idx in sortidx:
            if p2dAssigned[idx] != -1:
                continue
            if prev_id[idx] != -1:
                results = [self.people[prev_id[idx]]]
            else:
                results = []
            proposals = check_cluster(affinity, row=idx, views=views, 
                dimGroups=dimGroups, indices=idx_zero.copy(), p2dAssigned=p2dAssigned, visited=visited)
            for indices in proposals:
                if (indices > -1).sum() < self.min_views - (len(results)):
                    continue
                # set keypoints2d
                info = {'RT': data['RT']}
                for name in ['keypoints2d', 'keypoints2d_unproj', 'keypoints2d_distort']:
                    info[name] = np.zeros((nViews, self.nJoints, 3), dtype=np.float32)
                for nv in range(nViews):
                    if indices[nv] == -1: continue
                    for name in ['keypoints2d', 'keypoints2d_unproj', 'keypoints2d_distort']:
                        info[name][nv] = data[name][nv][indices[nv]-dimGroups[nv]]
                res = super().__call__(info, results=results)[0]
                k2d = res['keypoints2d']
                valid_view = (k2d[..., 2] > 0).sum(axis=-1) > self.min_joints
                # if valid_view.sum() < self.min_views - len(results): # 这里如果是有前一帧的话，len(results)会是2；不知道之前为啥有这个条件使用
                if valid_view.sum() < self.min_views:
                    self.log('[associate] Skip proposal {}->{} with not enough valid view {}'.format(idx, indices, (k2d[..., 2] > 0).sum(axis=-1)))
                    continue
                valid_joint = res['keypoints3d'][:, -1] > 0.1
                if valid_joint.sum() < self.min_joints:
                    self.log('[associate] Skip proposal {}->{} as not enough joints'.format(idx, indices))
                    continue
                indices[~valid_view] = -1
                if (indices < 0).all():
                    import ipdb; ipdb.set_trace()
                self.log('[associate] Add indices {}, valid {}'.format(indices, (k2d[..., 2] > 0).sum(axis=-1)))
                res['id'] = pid
                res['indices'] = indices
                res['valid_view'] = valid_view
                res['valid_joints'] = res['keypoints3d'][:, -1] > 0.1
                k3dresults.append(res)
                for nv in range(nViews):
                    if valid_view[nv] and indices[nv] != -1:
                        p2dAssigned[indices[nv]] = pid
                        visited[indices[nv]] = 1
                pid += 1
                break
            visited[idx] = 1
        self.log('[associate] {} points not visited, {} not assigned'.format(visited.shape[0] - visited.sum(), (p2dAssigned==-1).sum()))
        k3dresults.sort(key=lambda x: -x['keypoints2d'][..., -1].sum())
        return k3dresults
    def _calculate_affinity_MxM(self, dims, dimGroups, data, key):
        M = dimGroups[-1]
        distance = np.zeros((M, M), dtype=np.float32)
        nViews = len(dims)
        for v0 in range(nViews-1):
            for v1 in range(1, nViews):
                # calculate distance between (v0, v1)
                if v0 >= v1:
                    continue
                if dims[v0] == 0 or dims[v1] == 0:
                    continue
                if True:
                    pts0 = data[key][v0]
                    pts1 = data[key][v1]
                    K0, K1 = data['K'][v0], data['K'][v1]
                    R0, T0 = data['Rc'][v0], data['Tc'][v0]
                    R1, T1 = data['Rc'][v1], data['Tc'][v1]
                    dist = self.distance_by_epipolar(pts0, pts1, K0, K1, R0, T0, R1, T1)
                    dist /= (K0[0, 0] + K1[0, 0])/2
                else:
                    dist = self.distance_by_ray(pts0, pts1, R0, T0, R1, T1)
                distance[dimGroups[v0]:dimGroups[v0+1], dimGroups[v1]:dimGroups[v1+1]] = dist
                distance[dimGroups[v1]:dimGroups[v1+1], dimGroups[v0]:dimGroups[v0+1]] = dist.T
        DIST_MAX = self.cfg_track.track_dist_max
        for nv in range(nViews):
            distance[dimGroups[nv]:dimGroups[nv+1], dimGroups[nv]:dimGroups[nv+1]] = DIST_MAX
        distance -= np.eye(M) * DIST_MAX
        aff = (DIST_MAX - distance)/DIST_MAX
        aff = np.clip(aff, 0, 1)
        return aff
    def _calculate_affinity_MxN(self, dims, dimGroups, data, key, results):
        M = dimGroups[-1]
        N = len(results)
        distance = np.zeros((M, N), dtype=np.float32)
        nViews = len(dims)
        k3d = np.stack([r['keypoints3d'] for r in results])
        kpts_proj = project_points(k3d, data['KRT'], einsum='vab,pkb->vpka')
        depth = kpts_proj[..., -1]
        kpts_proj[depth<0] = -10000
        for v in range(nViews):
            if dims[v] == 0:
                continue
            focal = data['K'][v][0, 0]
            pts2d = data[key][v][:, None]
            pts_repro = kpts_proj[v][None]
            conf = np.sqrt(pts2d[..., -1]*k3d[None, ..., -1])
            diff = np.linalg.norm(pts2d[..., :2] - pts_repro[..., :2], axis=-1)
            diff = np.sum(diff*conf, axis=-1)/(1e-5 + np.sum(conf, axis=-1))
            dist = diff / focal
            distance[dimGroups[v]:dimGroups[v+1], :] = dist
        DIST_MAX = self.cfg_track.track_repro_max
        aff = (DIST_MAX - distance)/DIST_MAX
        aff = np.clip(aff, 0, 1)
        return aff
    def _svt_optimize_affinity(self, affinity, dimGroups):
        # match SVT
        import pymatchlr
        observe = np.ones_like(affinity)
        aff_svt = pymatchlr.matchSVT(affinity, dimGroups, SimpleConstrain(dimGroups), observe, self.cfg_svt)
        aff_svt[aff_svt<self.cfg_svt.aff_min] = 0.
        if False:
            import matplotlib.pyplot as plt
            M = affinity.shape[0]
            plt.subplot(121)
            plt.imshow(affinity)
            plt.hlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
            plt.vlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
            plt.subplot(122)
            sum_row = aff_svt.sum(axis=1, keepdims=True)/(len(dimGroups) - 1)
            plt.imshow(np.hstack([aff_svt, sum_row]))
            plt.hlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
            plt.vlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
            plt.ioff()
            plt.show()
        return aff_svt
    def _track_add(self, res):
        pid = res['id']
        if pid == -1:
            pid = self.max_id
            res['id'] = pid
            self.max_id += 1
            self.log('[{:06d}] Create track {} <- {}'.format(self.time, pid, res['indices']))
            if False:
                crops = []
                data = self.data
                kpts = np.vstack(data['keypoints2d'])
                for nv in range(len(data['imgname'])):
                    img = cv2.imread(data['imgname'][nv])
                    if res['indices'][nv] == -1: continue
                    _kpts = kpts[res['indices'][nv]]
                    bbox = bbox_from_keypoints(_kpts)
                    plot_keypoints_auto(img, _kpts, pid)
                    crop = crop_image(img, bbox, crop_square=True)
                    crops.append(crop)
                debug = merge(crops)
                cv2.imwrite('debug/{:06d}.jpg'.format(pid), debug)
        else:
            self.max_id = max(self.max_id, pid+1)
            self.log('[{:06d}] Initialize track {}, valid joints={}'.format(self.time, pid, (res['keypoints3d'][:, -1]>0.01).sum()))
        self.tracks[pid] = {
            'start_time': self.time,
            'end_time': self.time+1,
            'missing_frame': [],
            'infos': [res]
        }
    def _track_update(self, res, pid):
        res['id'] = pid
        info = self.tracks[pid]
        self.log('[{:06d}] Update track {} [{}->{}], valid joints={}'.format(self.time, pid, info['start_time'], info['end_time'], (res['keypoints3d'][:, -1]>0.1).sum()))
        self.tracks[pid]['end_time'] = self.time + 1
        self.tracks[pid]['infos'].append(res)
    def _track_merge(self, res, pid):
        res['id'] = -1
        # TODO: merge
    def _track_and_update(self, data, results):
        cfg = self.cfg_track
        self.time += 1
        if self.time == 0:
            # initialize the tracks
            for res in results:
                self._track_add(res)
            return results
        # filter the missing frames
        for pid in list(self.tracks.keys()):
            if self.time - self.tracks[pid]['end_time'] > cfg.max_missing_frame:
                self.warn('[{:06d}] Remove track {}'.format(self.time, pid))
                self.tracks.pop(pid)
        # track the results with greedy matching
        for idx_match, res in enumerate(results):
            res['id'] = -1
            # compute the distance
            k3d = res['keypoints3d'][None]
            pids_free = [pid for pid in self.tracks.keys() if self.tracks[pid]['end_time'] != self.time+1]
            pids_used = [pid for pid in self.tracks.keys() if self.tracks[pid]['end_time'] == self.time+1]
            def check_dist(k3d_check):
                dist = np.linalg.norm(k3d[..., :3] - k3d_check[..., :3], axis=-1)
                conf = np.sqrt(k3d[..., 3] * k3d_check[..., 3])
                dist_mean = ((conf>0.1).sum(axis=-1) < self.min_joints)*cfg.track_dist_max + np.sum(dist * conf, axis=-1)/(1e-5 + np.sum(conf, axis=-1))
                argmin = dist_mean.argmin()
                dist_min = dist_mean[argmin]
                return dist_mean, argmin, dist_min
            # check free
            NOT_VISITED = -2
            NOT_FOUND = -1
            flag_tracked, flag_current = NOT_VISITED, NOT_VISITED
            if len(pids_free) > 0:
                k3d_check = np.stack([self.tracks[pid]['infos'][-1]['keypoints3d'] for pid in pids_free])
                dist_track, best, best_dist_track = check_dist(k3d_check)
                if best_dist_track < cfg.track_dist_max:
                    flag_tracked = best
                else:
                    flag_tracked = NOT_FOUND
            # check used
            if len(pids_used) > 0:
                k3d_check = np.stack([self.tracks[pid]['infos'][-1]['keypoints3d'] for pid in pids_used])
                dist_cur, best, best_dist_curr = check_dist(k3d_check)
                if best_dist_curr < cfg.track_dist_max:
                    flag_current = best
                else:
                    flag_current = NOT_FOUND
            if flag_tracked >= 0 and (flag_current == NOT_VISITED or flag_current == NOT_FOUND):
                self._track_update(res, pids_free[flag_tracked])
            elif (flag_tracked == NOT_FOUND or flag_tracked==NOT_VISITED) and flag_current >= 0:
                # 没有跟踪到，但是有当前帧的3D的，合并
                self.log('[{:06d}] Merge track {} to {}'.format(self.time, idx_match, pids_used[flag_current]))
                self._track_merge(res, pids_used[flag_current])
            elif flag_tracked == NOT_FOUND and flag_current == NOT_FOUND:
                # create a new track
                self._track_add(res)
            else:
                # 丢弃
                self.log('[{:06d}] Remove track {}. No close points'.format(self.time, idx_match))
        for pid in list(self.tracks.keys()):
            if self.tracks[pid]['end_time'] != self.time + 1:
                self.warn('[{:06d}] Tracking {} missing'.format(self.time, pid))
        results = [r for r in results if r['id']!=-1]
        return results
    def __call__(self, data):
        # match the data
        self.data = data
        key = 'keypoints2d'
        dims = [d.shape[0] for d in data[key]]
        dimGroups = np.cumsum([0] + dims)
        # 1. compute affinity
        affinity = self._calculate_affinity_MxM(dims, dimGroups, data, key)
        N2D = affinity.shape[0]
        if self.people is not None and len(self.people) > 0:
            # add 3d affinity
            _affinity = affinity
            affinity_3d = self._calculate_affinity_MxN(dims, dimGroups, data, key, self.people)
            affinity = np.concatenate([affinity, affinity_3d], axis=1)
            eye3d = np.eye(affinity_3d.shape[1])
            affinity = np.concatenate([affinity, np.hstack((affinity_3d.T, eye3d))], axis=0)
            dimGroups = dimGroups.tolist()
            dimGroups.append(dimGroups[-1]+affinity_3d.shape[1])
            affinity = self._svt_optimize_affinity(affinity, dimGroups)
            # affinity = self._svt_optimize_affinity(_affinity, dimGroups[:-1])
            # recover
            affinity_3d = np.hstack([np.ones((N2D, 1))*0.5, affinity[:N2D, N2D:]])
            prev_id = affinity_3d.argmax(axis=-1) - 1
            affinity = affinity[:N2D, :N2D]
            dimGroups = np.array(dimGroups[:-1])
        else:
            affinity = self._svt_optimize_affinity(affinity, dimGroups)
            prev_id = np.zeros(N2D) - 1
        # 2. associate and triangulate
        results = self._simple_associate2d_triangulate(data, affinity, dimGroups, prev_id)
        # 3. track, filter and return
        results = self._track_and_update(data, results)
        results.sort(key=lambda x:x['id'])
        self.people = results
        return results
--- a/easymocap/mytools/vis_base.py
+++ b/easymocap/mytools/vis_base.py
@ -2,27 +2,38 @@
  @ Date: 2020-11-28 17:23:04
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2021-08-22 16:11:25
+  @ LastEditTime: 2022-08-12 21:50:56
-  @ FilePath: /EasyMocap/easymocap/mytools/vis_base.py
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/vis_base.py
 '''
 import cv2
 import numpy as np
 import json
-def generate_colorbar(N = 20, cmap = 'jet'):
+def generate_colorbar(N = 20, cmap = 'jet', rand=True):
    bar = ((np.arange(N)/(N-1))*255).astype(np.uint8).reshape(-1, 1)
    colorbar = cv2.applyColorMap(bar, cv2.COLORMAP_JET).squeeze()
    if False:
        colorbar = np.clip(colorbar + 64, 0, 255)
-    import random
+    if rand:
-    random.seed(666)
+        import random
-    index = [i for i in range(N)]
+        random.seed(666)
-    random.shuffle(index)
+        index = [i for i in range(N)]
-    rgb = colorbar[index, :]
+        random.shuffle(index)
        rgb = colorbar[index, :]
    else:
        rgb = colorbar
    rgb = rgb.tolist()
    return rgb
-colors_bar_rgb = generate_colorbar(cmap='hsv')
+# colors_bar_rgb = generate_colorbar(cmap='hsv')
 colors_bar_rgb = [
    (94, 124, 226), # 青色
    (255, 200, 87), # yellow
    (74,  189,  172), # green
    (8, 76, 97), # blue
    (219, 58, 52), # red
    (77, 40, 49), # brown
 ]
 colors_table = {
    'b': [0.65098039, 0.74117647, 0.85882353],
@ -34,15 +45,19 @@ colors_table = {
    'r': [ 251/255.,  128/255.,  114/255.],
    '_orange': [ 253/255.,  174/255.,  97/255.],
    'y': [ 250/255.,  230/255.,  154/255.],
    '_r':[255/255,0,0],
    'g':[0,255/255,0],
    '_b':[0,0,255/255],
    'k':[0,0,0],
    '_r':[255/255,0,0],
    '_g':[0,255/255,0],
    '_b':[0,0,255/255],
    '_k':[0,0,0],
    '_y':[255/255,255/255,0],
    'purple':[128/255,0,128/255],
    'smap_b':[51/255,153/255,255/255],
    'smap_r':[255/255,51/255,153/255],
-    'smap_b':[51/255,255/255,153/255],
+    'person': [255/255,255/255,255/255],
    'handl': [255/255,51/255,153/255],
    'handr': [51/255,255/255,153/255],
 }
 def get_rgb(index):
@ -51,7 +66,9 @@ def get_rgb(index):
            return (255, 255, 255)
        if index < -1:
            return (0, 0, 0)
-        col = colors_bar_rgb[index%len(colors_bar_rgb)]
+        # elif index == 0:
        #     return (245, 150, 150)
        col = list(colors_bar_rgb[index%len(colors_bar_rgb)])[::-1]
    else:
        col = colors_table.get(index, (1, 0, 0))
        col = tuple([int(c*255) for c in col[::-1]])
@ -80,13 +97,14 @@ def plot_cross(img, x, y, col, width=-1, lw=-1):
    cv2.line(img, (int(x-width), int(y)), (int(x+width), int(y)), col, lw)
    cv2.line(img, (int(x), int(y-width)), (int(x), int(y+width)), col, lw)
-def plot_bbox(img, bbox, pid, vis_id=True):
+def plot_bbox(img, bbox, pid, scale=1, vis_id=True):
    # 画bbox: (l, t, r, b)
-    x1, y1, x2, y2 = bbox[:4]
+    x1, y1, x2, y2, c = bbox
-    x1 = int(round(x1))
+    if c < 0.01:return img
-    x2 = int(round(x2))
+    x1 = int(round(x1*scale))
-    y1 = int(round(y1))
+    x2 = int(round(x2*scale))
-    y2 = int(round(y2))
+    y1 = int(round(y1*scale))
    y2 = int(round(y2*scale))
    color = get_rgb(pid)
    lw = max(img.shape[0]//300, 2)
    cv2.rectangle(img, (x1, y1), (x2, y2), color, lw)
@ -94,11 +112,20 @@ def plot_bbox(img, bbox, pid, vis_id=True):
        font_scale = img.shape[0]/1000
        cv2.putText(img, '{}'.format(pid), (x1, y1+int(25*font_scale)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2)
-def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2):
+def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2, fliplr=False):
    lw = max(lw, 2)
    H, W = img.shape[:2]
    for ii, (i, j) in enumerate(config['kintree']):
        if i >= len(points) or j >= len(points):
            continue
        if (i >25 or j > 25) and config['nJoints'] != 42:
            _lw = max(int(lw/4), 1)
        else:
            _lw = lw
        pt1, pt2 = points[i], points[j]
        if fliplr:
            pt1 = (W-pt1[0], pt1[1])
            pt2 = (W-pt2[0], pt2[1])
        if use_limb_color:
            col = get_rgb(config['colors'][ii])
        else:
@ -106,32 +133,111 @@ def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True
        if pt1[-1] > 0.01 and pt2[-1] > 0.01:
            image = cv2.line(
                img, (int(pt1[0]+0.5), int(pt1[1]+0.5)), (int(pt2[0]+0.5), int(pt2[1]+0.5)),
-                col, lw)
+                col, _lw)
-    for i in range(len(points)):
+    for i in range(min(len(points), config['nJoints'])):
        x, y = points[i][0], points[i][1]
        if fliplr:
            x = W - x
        c = points[i][-1]
        if c > 0.01:
            text_size = img.shape[0]/1000
            col = get_rgb(pid)
            radius = int(lw/1.5)
            if i > 25 and config['nJoints'] != 42:
                radius = max(int(radius/4), 1)
            cv2.circle(img, (int(x+0.5), int(y+0.5)), radius, col, -1)
            if vis_conf:
                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), 
                cv2.FONT_HERSHEY_SIMPLEX, text_size, col, 2)
 def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, scale=1, lw=-1):
    from ..dataset.config import CONFIG
    config_name = {25: 'body25', 21: 'hand', 42:'handlr', 17: 'coco', 1:'points', 67:'bodyhand', 137: 'total', 79:'up'}[len(points)]
    config = CONFIG[config_name]
    if lw == -1:
        lw = img.shape[0]//200
    if config_name == 'hand':
        lw = img.shape[0]//1000
    lw = max(lw, 1)
    for ii, (i, j) in enumerate(config['kintree']):
        if i >= len(points) or j >= len(points):
            continue
        if i >= 25 and config_name in ['bodyhand', 'total']:
            lw = max(img.shape[0]//400, 1)
        pt1, pt2 = points[i], points[j]
        if use_limb_color:
            col = get_rgb(config['colors'][ii])
        else:
            col = get_rgb(pid)
        if pt1[0] < 0 or pt1[1] < 0 or pt1[0] > 10000 or pt1[1] > 10000:
            continue
        if pt2[0] < 0 or pt2[1] < 0 or pt2[0] > 10000 or pt2[1] > 10000:
            continue
        if pt1[-1] > 0.01 and pt2[-1] > 0.01:
            image = cv2.line(
                img, (int(pt1[0]*scale+0.5), int(pt1[1]*scale+0.5)), (int(pt2[0]*scale+0.5), int(pt2[1]*scale+0.5)),
                col, lw)
    lw = img.shape[0]//200
    if config_name == 'hand':
        lw = img.shape[0]//500
    lw = max(lw, 1)
    for i in range(len(points)):
        x, y = points[i][0]*scale, points[i][1]*scale
        if x < 0 or y < 0 or x >10000 or y >10000:
            continue
        if i >= 25 and config_name in ['bodyhand', 'total']:
            lw = max(img.shape[0]//400, 1)
        c = points[i][-1]
        if c > 0.01:
            col = get_rgb(pid)
-            cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1)
+            if len(points) == 1:
                cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*10, col, lw*2)
                plot_cross(img, int(x+0.5), int(y+0.5), width=lw*5, col=col, lw=lw*2)
            else:
                cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1)
            if vis_conf:
                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, col, 2)
-def plot_points2d(img, points2d, lines, lw=4, col=(0, 255, 0), putText=True):
+def plot_keypoints_total(img, annots, scale, pid_offset=0):
    _lw = img.shape[0] // 150
    for annot in annots:
        pid = annot['personID'] + pid_offset
        for key in ['keypoints', 'handl2d', 'handr2d']:
            if key not in annot.keys():continue
            if key in ['handl2d', 'handr2d', 'face2d']:
                lw = _lw // 2
            else:
                lw = _lw
            lw = max(lw, 1)
            plot_keypoints_auto(img, annot[key], pid, vis_conf=False, use_limb_color=False, scale=scale, lw=lw)
            if 'bbox' not in annot.keys() or (annot['bbox'][0] < 0 or annot['bbox'][0] >10000):
                continue
            plot_bbox(img, annot['bbox'], pid, scale=scale, vis_id=True)
    return img
 def plot_points2d(img, points2d, lines, lw=-1, col=(0, 255, 0), putText=True, style='+'):
    # 将2d点画上去
    if points2d.shape[1] == 2:
        points2d = np.hstack([points2d, np.ones((points2d.shape[0], 1))])
    if lw == -1:
        lw = img.shape[0]//200
    for i, (x, y, v) in enumerate(points2d):
        if v < 0.01:
            continue
        c = col
-        plot_cross(img, x, y, width=10, col=c, lw=lw)
+        if '+' in style:
            plot_cross(img, x, y, width=10, col=c, lw=lw*2)
        if 'o' in style:
            cv2.circle(img, (int(x), int(y)), 10, c, lw*2)
        cv2.circle(img, (int(x), int(y)), lw, c, lw)
        if putText:
-            font_scale = img.shape[0]/2000
+            c = col[::-1]
            font_scale = img.shape[0]/1000
            cv2.putText(img, '{}'.format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, c, 2)
    for i, j in lines:
        if points2d[i][2] < 0.01 or points2d[j][2] < 0.01:
            continue
-        plot_line(img, points2d[i], points2d[j], 2, col)
+        plot_line(img, points2d[i], points2d[j], max(1, lw//2), col)
 row_col_ = {
    2: (2, 1),
@ -140,7 +246,18 @@ row_col_ = {
    9: (3, 3),
    26: (4, 7)
 }
-def get_row_col(l):
+
 row_col_square = {
    2: (2, 1),
    7: (3, 3),
    8: (3, 3),
    9: (3, 3),
    26: (5, 5)
 }
 def get_row_col(l, square):
    if square and l in row_col_square.keys():
        return row_col_square[l]
    if l in row_col_.keys():
        return row_col_[l]
    else:
@ -153,12 +270,19 @@ def get_row_col(l):
            row, col = col, row
        return row, col
-def merge(images, row=-1, col=-1, resize=False, ret_range=False, **kwargs):
+def merge(images, row=-1, col=-1, resize=False, ret_range=False, square=False, **kwargs):
    if row == -1 and col == -1:
-        row, col = get_row_col(len(images))
+        row, col = get_row_col(len(images), square)
    height = images[0].shape[0]
    width = images[0].shape[1]
-    ret_img = np.zeros((height * row, width * col, images[0].shape[2]), dtype=np.uint8) + 255
+    # special case
    if height > width:
        if len(images) == 3:
            row, col = 1, 3
    if len(images[0].shape) > 2:
        ret_img = np.zeros((height * row, width * col, images[0].shape[2]), dtype=np.uint8) + 255
    else:
        ret_img = np.zeros((height * row, width * col), dtype=np.uint8) + 255
    ranges = []
    for i in range(row):
        for j in range(col):
--- a/scripts/dataset/download_youtube.py
+++ b/scripts/dataset/download_youtube.py
@ -0,0 +1,104 @@
 '''
  @ Date: 2022-03-29 13:55:42
  @ Author: Qing Shuai
  @ Mail: s_q@zju.edu.cn
  @ LastEditors: Qing Shuai
  @ LastEditTime: 2022-05-06 16:45:47
  @ FilePath: /EasyMocapPublic/scripts/dataset/download_youtube.py
 '''
 from glob import glob
 from os.path import join
 from urllib.error import URLError
 from pytube import YouTube
 import os
 from easymocap.mytools.debug_utils import log, mkdir, myerror
 extensions = ['.mp4', '.webm']
 def download_youtube(vid, outdir):
    outname = join(outdir, vid)
    url = 'https://www.youtube.com/watch?v={}'.format(vid)
    for ext in extensions:
        if os.path.exists(outname+ext) and not args.restart:
            log('[Info]: skip video {}'.format(outname+ext))
            return 0
    log('[Info]: start to download video {}'.format(outname))
    log('[Info]: {}'.format(url))
    yt = YouTube(url)
    try:
        streams = yt.streams
    except KeyError:
        myerror('[Error]: not found streams: {}'.format(url))
        return 1
    except URLError:
        myerror('[Error]: Url error: {}'.format(url))
        return 1
    find = False
    streams_valid = []
    res_range = ['2160p', '1440p', '1080p', '720p'] if not args.only4k else ['2160p']
    if args.no720:
        res_range.remove('720p')
    for res in res_range:
        for fps in [60, 50, 30, 25, 24]:
            for ext in ['webm', 'mp4']:
                for stream in streams:
                    if stream.resolution == res and \
                       stream.fps == fps and \
                       stream.mime_type == 'video/{}'.format(ext):
                       streams_valid.append(stream)
    if len(streams_valid) == 0:
        for stream in streams:
            print(stream)
        myerror('[BUG ] Not found valid stream, please check the streams')
        return 0
    # best_stream = yt.streams.order_by('filesize')[-1]
    title = streams_valid[0].title
    log('[Info]: {}'.format(title))
    for stream in streams_valid:
        res = stream.resolution
        log('[Info]: The resolution is {}, ext={}'.format(res, stream.mime_type))
        filename = '{}.{}'.format(vid, stream.mime_type.split('/')[-1])
        try:
            stream.download(output_path=outdir, filename=filename, max_retries=0)
            log('[Info]: Succeed')
        except:
            myerror('[BUG ]: Failed')
            continue
        break
 if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('vid', type=str)
    parser.add_argument('--database', type=str, default='data/youtube')
    parser.add_argument('--num', type=int, default=1)
    parser.add_argument('--only4k', action='store_true')
    parser.add_argument('--no720', action='store_true')
    parser.add_argument('--restart', action='store_true')
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args()
    vid = args.vid
    # check database
    database = join(args.database, 'videos')
    mkdir(database)
    videonames = sorted(os.listdir(database))
    log('[download] video database in {}'.format(database))
    log('[download] already has {} videos'.format(len(videonames)))
    if vid.startswith('https'):
        vid = vid.replace('https://www.youtube.com/watch?v=', '')
        vid = vid.split('&')[0]
        print(vid)
        urls = [vid]
    elif os.path.exists(vid):
        with open(vid, 'r') as f:
            urls = f.readlines()
        urls = list(filter(lambda x:not x.startswith('#') and len(x) > 0, map(lambda x: x.strip().replace('https://www.youtube.com/watch?v=', '').split('&')[0], urls)))
        log('[download] download {} videos from {}'.format(len(urls), vid))
    else:
        urls = [vid]
    for url in urls:
        download_youtube(url, database)