🚧 update tools

1. update camera reader 2. update debug and visualize tools
2022-08-21 16:04:51 +08:00 · 2022-08-21 16:04:51 +08:00 · 5bc4b113ba
commit 5bc4b113ba
parent d534ba41fc
9 changed files with 2184 additions and 89 deletions
--- a/easymocap/mytools/camera_utils.py
+++ b/easymocap/mytools/camera_utils.py
@ -1,7 +1,7 @@
 import cv2
 import numpy as np
 import os
-
+from os.path import join
 class FileStorage(object):
    def __init__(self, filename, isWrite=False):
        version = cv2.__version__
@ -10,27 +10,34 @@ class FileStorage(object):

        if isWrite:
            os.makedirs(os.path.dirname(filename), exist_ok=True)
-            self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_WRITE)
+            self.fs = open(filename, 'w')
+            self.fs.write('%YAML:1.0\r\n')
+            self.fs.write('---\r\n')
        else:
+            assert os.path.exists(filename), filename
            self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_READ)
+        self.isWrite = isWrite

    def __del__(self):
-        cv2.FileStorage.release(self.fs)
+        if self.isWrite:
+            self.fs.close()
+        else:
+            cv2.FileStorage.release(self.fs)
+
+    def _write(self, out):
+        self.fs.write(out+'\r\n')

    def write(self, key, value, dt='mat'):
        if dt == 'mat':
-            cv2.FileStorage.write(self.fs, key, value)
+            self._write('{}: !!opencv-matrix'.format(key))
+            self._write('  rows: {}'.format(value.shape[0]))
+            self._write('  cols: {}'.format(value.shape[1]))
+            self._write('  dt: d')
+            self._write('  data: [{}]'.format(', '.join(['{:.3f}'.format(i) for i in value.reshape(-1)])))
        elif dt == 'list':
-            if self.major_version == 4: # 4.4
-                self.fs.startWriteStruct(key, cv2.FileNode_SEQ)
-                for elem in value:
-                    self.fs.write('', elem)
-                self.fs.endWriteStruct()
-            else: # 3.4
-                self.fs.write(key, '[')
-                for elem in value:
-                    self.fs.write('none', elem)
-                self.fs.write('none', ']')
+            self._write('{}:'.format(key))
+            for elem in value:
+                self._write('  - "{}"'.format(elem))

    def read(self, key, dt='mat'):
        if dt == 'mat':
@ -66,6 +73,8 @@ def read_intri(intri_name):
    return cameras

 def write_intri(intri_name, cameras):
+    if not os.path.exists(os.path.dirname(intri_name)):
+        os.makedirs(os.path.dirname(intri_name))
    intri = FileStorage(intri_name, True)
    results = {}
    camnames = list(cameras.keys())
@ -74,11 +83,13 @@ def write_intri(intri_name, cameras):
        key = key_.split('.')[0]
        K, dist = val['K'], val['dist']
        assert K.shape == (3, 3), K.shape
-        assert dist.shape == (1, 5) or dist.shape == (5, 1), dist.shape
+        assert dist.shape == (1, 5) or dist.shape == (5, 1) or dist.shape == (1, 4) or dist.shape == (4, 1), dist.shape
        intri.write('K_{}'.format(key), K)
-        intri.write('dist_{}'.format(key), dist.reshape(1, 5))
+        intri.write('dist_{}'.format(key), dist.flatten()[None])

 def write_extri(extri_name, cameras):
+    if not os.path.exists(os.path.dirname(extri_name)):
+        os.makedirs(os.path.dirname(extri_name))
    extri = FileStorage(extri_name, True)
    results = {}
    camnames = list(cameras.keys())
@ -105,12 +116,15 @@ def read_camera(intri_name, extri_name, cam_names=[]):
        cams[cam]['invK'] = np.linalg.inv(cams[cam]['K'])
        Rvec = extri.read('R_{}'.format(cam))
        Tvec = extri.read('T_{}'.format(cam))
+        assert Rvec is not None, cam
        R = cv2.Rodrigues(Rvec)[0]
        RT = np.hstack((R, Tvec))

        cams[cam]['RT'] = RT
        cams[cam]['R'] = R
+        cams[cam]['Rvec'] = Rvec
        cams[cam]['T'] = Tvec
+        cams[cam]['center'] = - Rvec.T @ Tvec
        P[cam] = cams[cam]['K'] @ cams[cam]['RT']
        cams[cam]['P'] = P[cam]

@ -118,6 +132,13 @@ def read_camera(intri_name, extri_name, cam_names=[]):
    cams['basenames'] = cam_names
    return cams

+def read_cameras(path, intri='intri.yml', extri='extri.yml', subs=[]):
+    cameras = read_camera(join(path, intri), join(path, extri))
+    cameras.pop('basenames')
+    if len(subs) > 0:
+        cameras = {key:cameras[key].astype(np.float32) for key in subs}
+    return cameras
+
 def write_camera(camera, path):
    from os.path import join
    intri_name = join(path, 'intri.yml')
@ -146,12 +167,24 @@ def camera_from_img(img):
    focal = 1.2*min(height, width) # as colmap
    K = np.array([focal, 0., width/2, 0., focal, height/2, 0. ,0., 1.]).reshape(3, 3)
    camera = {'K':K ,'R': np.eye(3), 'T': np.zeros((3, 1)), 'dist': np.zeros((1, 5))}
+    camera['invK'] = np.linalg.inv(camera['K'])
+    camera['P'] = camera['K'] @ np.hstack((camera['R'], camera['T']))
    return camera

 class Undistort:
-    @staticmethod
-    def image(frame, K, dist):
-        return cv2.undistort(frame, K, dist, None)
+    distortMap = {}
+    @classmethod
+    def image(cls, frame, K, dist, sub=None):
+        if sub is None:
+            return cv2.undistort(frame, K, dist, None)
+        else:
+            if sub not in cls.distortMap.keys():
+                h,  w = frame.shape[:2]
+                mapx, mapy = cv2.initUndistortRectifyMap(K, dist, None, K, (w,h), 5)
+                cls.distortMap[sub] = (mapx, mapy)
+            mapx, mapy = cls.distortMap[sub]
+            img = cv2.remap(frame, mapx, mapy, cv2.INTER_NEAREST)
+            return img

    @staticmethod
    def points(keypoints, K, dist):
@ -170,10 +203,38 @@ class Undistort:
        bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]])
        return bbox

-def undistort(camera, frame=None, keypoints=None, output=None, bbox=None):
-    # bbox: 1, 7
-    print('This function is deprecated')
-    raise NotImplementedError
+def unproj(kpts, invK):
+    homo = np.hstack([kpts[:, :2], np.ones_like(kpts[:, :1])])
+    homo = homo @ invK.T
+    return np.hstack([homo[:, :2], kpts[:, 2:]])
+class UndistortFisheye:
+    @staticmethod
+    def image(frame, K, dist):
+        Knew = K.copy()
+        frame = cv2.fisheye.undistortImage(frame, K, dist, Knew=Knew)
+        return frame, Knew
+
+    @staticmethod
+    def points(keypoints, K, dist, Knew):
+        # keypoints: (N, 3)
+        assert len(keypoints.shape) == 2, keypoints.shape
+        kpts = keypoints[:, None, :2]
+        kpts = np.ascontiguousarray(kpts)
+        kpts = cv2.fisheye.undistortPoints(kpts, K, dist, P=Knew)
+        keypoints[:, :2] = kpts[:, 0]
+        return keypoints
+    
+    @staticmethod
+    def bbox(bbox, K, dist, Knew):
+        keypoints = np.array([[bbox[0], bbox[1], 1], [bbox[2], bbox[3], 1]])
+        kpts = UndistortFisheye.points(keypoints, K, dist, Knew)
+        bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]])
+        return bbox
+
+
+def get_Pall(cameras, camnames):
+    Pall = np.stack([cameras[cam]['K'] @ np.hstack((cameras[cam]['R'], cameras[cam]['T'])) for cam in camnames])
+    return Pall

 def get_fundamental_matrix(cameras, basenames):
    skew_op = lambda x: np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
@ -189,3 +250,59 @@ def get_fundamental_matrix(cameras, basenames):
            if F[(icam, jcam)].sum() == 0:
                F[(icam, jcam)] += 1e-12  # to avoid nan
    return F
+
+def interp_cameras(cameras, keys, step=20, loop=True, allstep=-1, **kwargs):
+    from scipy.spatial.transform import Rotation as R
+    from scipy.spatial.transform import Slerp
+    if allstep != -1:
+        tall = np.linspace(0., 1., allstep+1)[:-1].reshape(-1, 1, 1)
+    elif allstep == -1 and loop:
+        tall = np.linspace(0., 1., 1+step*len(keys))[:-1].reshape(-1, 1, 1)
+    elif allstep == -1 and not loop:
+        tall = np.linspace(0., 1., 1+step*(len(keys)-1))[:-1].reshape(-1, 1, 1)
+    cameras_new = {}
+    for ik in range(len(keys)):
+        if ik == len(keys) -1 and not loop:
+            break
+        if loop:
+            start, end = (ik * tall.shape[0])//len(keys),     int((ik+1)*tall.shape[0])//len(keys)
+            print(ik, start, end, tall.shape)
+        else:
+            start, end = (ik * tall.shape[0])//(len(keys)-1), int((ik+1)*tall.shape[0])//(len(keys)-1)
+        t = tall[start:end].copy()
+        t = (t-t.min())/(t.max()-t.min())
+        left, right = keys[ik], keys[0 if ik == len(keys)-1 else ik + 1]
+        camera_left = cameras[left]
+        camera_right = cameras[right]
+        # 插值相机中心: center = - R.T @ T
+        center_l = - camera_left['R'].T @ camera_left['T']
+        center_r = - camera_right['R'].T @ camera_right['T']
+        center_l, center_r = center_l[None], center_r[None]
+        if False:
+            centers = center_l * (1-t) + center_r * t
+        else:
+            # 球面插值
+            norm_l, norm_r = np.linalg.norm(center_l), np.linalg.norm(center_r)
+            center_l, center_r = center_l/norm_l, center_r/norm_r
+            costheta = (center_l*center_r).sum()
+            sintheta = np.sqrt(1. - costheta**2)
+            theta = np.arctan2(sintheta, costheta)
+            centers = (np.sin(theta*(1-t)) * center_l + np.sin(theta * t) * center_r)/sintheta
+            norm = norm_l * (1-t) + norm_r * t
+            centers = centers * norm
+        key_rots = R.from_matrix(np.stack([camera_left['R'], camera_right['R']]))
+        key_times = [0, 1]
+        slerp = Slerp(key_times, key_rots)
+        interp_rots = slerp(t.squeeze()).as_matrix()
+        # 计算相机T RX + T = 0 => T = - R @ X
+        T = - np.einsum('bmn,bno->bmo', interp_rots, centers)
+        K = camera_left['K'] * (1-t) + camera_right['K'] * t
+        for i in range(T.shape[0]):
+            cameras_new['{}-{}-{}'.format(left, right, i)] = \
+                {
+                    'K': K[i],
+                    'dist': np.zeros((1, 5)),
+                    'R': interp_rots[i],
+                    'T': T[i]
+                }
+    return cameras_new
--- a/easymocap/mytools/colmap_structure.py
+++ b/easymocap/mytools/colmap_structure.py
@ -0,0 +1,439 @@
+# Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
+#       its contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
+
+import os
+import sys
+import collections
+import numpy as np
+import struct
+import cv2
+
+CameraModel = collections.namedtuple(
+    "CameraModel", ["model_id", "model_name", "num_params"])
+Camera = collections.namedtuple(
+    "Camera", ["id", "model", "width", "height", "params"])
+BaseImage = collections.namedtuple(
+    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
+Point3D = collections.namedtuple(
+    "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
+
+class Image(BaseImage):
+    def qvec2rotmat(self):
+        return qvec2rotmat(self.qvec)
+
+
+CAMERA_MODELS = {
+    CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
+    CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
+    CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
+    CameraModel(model_id=3, model_name="RADIAL", num_params=5),
+    CameraModel(model_id=4, model_name="OPENCV", num_params=8),
+    CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
+    CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
+    CameraModel(model_id=7, model_name="FOV", num_params=5),
+    CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
+    CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
+    CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
+}
+CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \
+                         for camera_model in CAMERA_MODELS])
+CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
+                           for camera_model in CAMERA_MODELS])
+
+def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
+    """Read and unpack the next bytes from a binary file.
+    :param fid:
+    :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    :param endian_character: Any of {@, =, <, >, !}
+    :return: Tuple of read and unpacked values.
+    """
+    data = fid.read(num_bytes)
+    return struct.unpack(endian_character + format_char_sequence, data)
+
+
+def read_cameras_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    cameras = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                camera_id = int(elems[0])
+                model = elems[1]
+                width = int(elems[2])
+                height = int(elems[3])
+                params = np.array(tuple(map(float, elems[4:])))
+                cameras[camera_id] = Camera(id=camera_id, model=model,
+                                            width=width, height=height,
+                                            params=params)
+    return cameras
+
+
+def read_cameras_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    cameras = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_cameras = read_next_bytes(fid, 8, "Q")[0]
+        for camera_line_index in range(num_cameras):
+            camera_properties = read_next_bytes(
+                fid, num_bytes=24, format_char_sequence="iiQQ")
+            camera_id = camera_properties[0]
+            model_id = camera_properties[1]
+            model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
+            width = camera_properties[2]
+            height = camera_properties[3]
+            num_params = CAMERA_MODEL_IDS[model_id].num_params
+            params = read_next_bytes(fid, num_bytes=8*num_params,
+                                     format_char_sequence="d"*num_params)
+            cameras[camera_id] = Camera(id=camera_id,
+                                        model=model_name,
+                                        width=width,
+                                        height=height,
+                                        params=np.array(params))
+        assert len(cameras) == num_cameras
+    return cameras
+
+
+def read_images_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesText(const std::string& path)
+        void Reconstruction::WriteImagesText(const std::string& path)
+    """
+    images = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                image_id = int(elems[0])
+                qvec = np.array(tuple(map(float, elems[1:5])))
+                tvec = np.array(tuple(map(float, elems[5:8])))
+                camera_id = int(elems[8])
+                image_name = elems[9]
+                elems = fid.readline().split()
+                xys = np.column_stack([tuple(map(float, elems[0::3])),
+                                       tuple(map(float, elems[1::3]))])
+                point3D_ids = np.array(tuple(map(int, elems[2::3])))
+                images[image_id] = Image(
+                    id=image_id, qvec=qvec, tvec=tvec,
+                    camera_id=camera_id, name=image_name,
+                    xys=xys, point3D_ids=point3D_ids)
+    return images
+
+
+def read_images_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    images = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_reg_images = read_next_bytes(fid, 8, "Q")[0]
+        for image_index in range(num_reg_images):
+            binary_image_properties = read_next_bytes(
+                fid, num_bytes=64, format_char_sequence="idddddddi")
+            image_id = binary_image_properties[0]
+            qvec = np.array(binary_image_properties[1:5])
+            tvec = np.array(binary_image_properties[5:8])
+            camera_id = binary_image_properties[8]
+            image_name = ""
+            current_char = read_next_bytes(fid, 1, "c")[0]
+            while current_char != b"\x00":   # look for the ASCII 0 entry
+                image_name += current_char.decode("utf-8")
+                current_char = read_next_bytes(fid, 1, "c")[0]
+            num_points2D = read_next_bytes(fid, num_bytes=8,
+                                           format_char_sequence="Q")[0]
+            x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
+                                       format_char_sequence="ddq"*num_points2D)
+            xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
+                                   tuple(map(float, x_y_id_s[1::3]))])
+            point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
+            images[image_id] = Image(
+                id=image_id, qvec=qvec, tvec=tvec,
+                camera_id=camera_id, name=image_name,
+                xys=xys, point3D_ids=point3D_ids)
+    return images
+
+
+def read_points3D_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    points3D = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                point3D_id = int(elems[0])
+                xyz = np.array(tuple(map(float, elems[1:4])))
+                rgb = np.array(tuple(map(int, elems[4:7])))
+                error = float(elems[7])
+                image_ids = np.array(tuple(map(int, elems[8::2])))
+                point2D_idxs = np.array(tuple(map(int, elems[9::2])))
+                points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
+                                               error=error, image_ids=image_ids,
+                                               point2D_idxs=point2D_idxs)
+    return points3D
+
+
+def read_points3d_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    points3D = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_points = read_next_bytes(fid, 8, "Q")[0]
+        for point_line_index in range(num_points):
+            binary_point_line_properties = read_next_bytes(
+                fid, num_bytes=43, format_char_sequence="QdddBBBd")
+            point3D_id = binary_point_line_properties[0]
+            xyz = np.array(binary_point_line_properties[1:4])
+            rgb = np.array(binary_point_line_properties[4:7])
+            error = np.array(binary_point_line_properties[7])
+            track_length = read_next_bytes(
+                fid, num_bytes=8, format_char_sequence="Q")[0]
+            track_elems = read_next_bytes(
+                fid, num_bytes=8*track_length,
+                format_char_sequence="ii"*track_length)
+            image_ids = np.array(tuple(map(int, track_elems[0::2])))
+            point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
+            points3D[point3D_id] = Point3D(
+                id=point3D_id, xyz=xyz, rgb=rgb,
+                error=error, image_ids=image_ids,
+                point2D_idxs=point2D_idxs)
+    return points3D
+
+
+def read_model(path, ext):
+    if ext == ".txt":
+        cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
+        images = read_images_text(os.path.join(path, "images" + ext))
+        points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
+    else:
+        cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
+        images = read_images_binary(os.path.join(path, "images" + ext))
+        points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
+    return cameras, images, points3D
+
+
+def qvec2rotmat(qvec):
+    return np.array([
+        [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
+         2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
+         2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
+        [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
+         1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
+         2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
+        [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
+         2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
+         1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
+
+
+def rotmat2qvec(R):
+    Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
+    K = np.array([
+        [Rxx - Ryy - Rzz, 0, 0, 0],
+        [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
+        [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
+        [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
+    eigvals, eigvecs = np.linalg.eigh(K)
+    qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
+    if qvec[0] < 0:
+        qvec *= -1
+    return qvec
+
+
+def write_cameras_text(cameras, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    HEADER = '# Camera list with one line of data per camera:\n'
+    '#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n'
+    '# Number of cameras: {}\n'.format(len(cameras))
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, cam in cameras.items():
+            to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
+            line = " ".join([str(elem) for elem in to_write])
+            fid.write(line + "\n")
+
+def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
+    """pack and write to a binary file.
+    :param fid:
+    :param data: data to send, if multiple elements are sent at the same time,
+    they should be encapsuled either in a list or a tuple
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    should be the same length as the data list or tuple
+    :param endian_character: Any of {@, =, <, >, !}
+    """
+    if isinstance(data, (list, tuple)):
+        bytes = struct.pack(endian_character + format_char_sequence, *data)
+    else:
+        bytes = struct.pack(endian_character + format_char_sequence, data)
+    fid.write(bytes)
+
+def write_cameras_binary(cameras, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(cameras), "Q")
+        for _, cam in cameras.items():
+            model_id = CAMERA_MODEL_NAMES[cam.model].model_id
+            camera_properties = [cam.id,
+                                 model_id,
+                                 cam.width,
+                                 cam.height]
+            write_next_bytes(fid, camera_properties, "iiQQ")
+            for p in cam.params:
+                write_next_bytes(fid, float(p), "d")
+    return cameras
+
+
+def write_images_binary(images, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(images), "Q")
+        for _, img in images.items():
+            write_next_bytes(fid, img.id, "i")
+            write_next_bytes(fid, img.qvec.tolist(), "dddd")
+            write_next_bytes(fid, img.tvec.tolist(), "ddd")
+            write_next_bytes(fid, img.camera_id, "i")
+            for char in img.name:
+                write_next_bytes(fid, char.encode("utf-8"), "c")
+            write_next_bytes(fid, b"\x00", "c")
+            write_next_bytes(fid, len(img.point3D_ids), "Q")
+            for xy, p3d_id in zip(img.xys, img.point3D_ids):
+                write_next_bytes(fid, [*xy, p3d_id], "ddq")
+
+def write_images_text(images, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesText(const std::string& path)
+        void Reconstruction::WriteImagesText(const std::string& path)
+    """
+    if len(images) == 0:
+        mean_observations = 0
+    else:
+        mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
+    HEADER = '# Image list with two lines of data per image:\n'
+    '#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n'
+    '#   POINTS2D[] as (X, Y, POINT3D_ID)\n'
+    '# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations)
+
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, img in images.items():
+            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
+            first_line = " ".join(map(str, image_header))
+            fid.write(first_line + "\n")
+
+            points_strings = []
+            for xy, point3D_id in zip(img.xys, img.point3D_ids):
+                points_strings.append(" ".join(map(str, [*xy, point3D_id])))
+            fid.write(" ".join(points_strings) + "\n")
+
+def write_points3D_text(points3D, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    if len(points3D) == 0:
+        mean_track_length = 0
+    else:
+        mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
+    HEADER = '# 3D point list with one line of data per point:\n'
+    '#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n'
+    '# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length)
+
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, pt in points3D.items():
+            point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
+            fid.write(" ".join(map(str, point_header)) + " ")
+            track_strings = []
+            for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
+                track_strings.append(" ".join(map(str, [image_id, point2D])))
+            fid.write(" ".join(track_strings) + "\n")
+
+
+def write_points3d_binary(points3D, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(points3D), "Q")
+        for _, pt in points3D.items():
+            write_next_bytes(fid, pt.id, "Q")
+            write_next_bytes(fid, pt.xyz.tolist(), "ddd")
+            write_next_bytes(fid, pt.rgb.tolist(), "BBB")
+            write_next_bytes(fid, pt.error, "d")
+            track_length = pt.image_ids.shape[0]
+            write_next_bytes(fid, track_length, "Q")
+            for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
+                write_next_bytes(fid, [image_id, point2D_id], "ii")
--- a/easymocap/mytools/colmap_wrapper.py
+++ b/easymocap/mytools/colmap_wrapper.py
@ -0,0 +1,468 @@
+'''
+  @ Date: 2022-06-20 15:03:50
+  @ Author: Qing Shuai
+  @ Mail: s_q@zju.edu.cn
+  @ LastEditors: Qing Shuai
+  @ LastEditTime: 2022-08-16 20:24:07
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/colmap_wrapper.py
+'''
+
+import shutil
+import sys
+import os
+import sqlite3
+import numpy as np
+from os.path import join
+import cv2
+from .debug_utils import mkdir, run_cmd, log, mywarn
+from .colmap_structure import Camera, Image, CAMERA_MODEL_NAMES
+from .colmap_structure import rotmat2qvec
+from .colmap_structure import read_points3d_binary
+
+IS_PYTHON3 = sys.version_info[0] >= 3
+
+MAX_IMAGE_ID = 2**31 - 1
+
+CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras (
+    camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    model INTEGER NOT NULL,
+    width INTEGER NOT NULL,
+    height INTEGER NOT NULL,
+    params BLOB,
+    prior_focal_length INTEGER NOT NULL)"""
+
+CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors (
+    image_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB,
+    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)"""
+
+CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images (
+    image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL UNIQUE,
+    camera_id INTEGER NOT NULL,
+    prior_qw REAL,
+    prior_qx REAL,
+    prior_qy REAL,
+    prior_qz REAL,
+    prior_tx REAL,
+    prior_ty REAL,
+    prior_tz REAL,
+    CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}),
+    FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))
+""".format(MAX_IMAGE_ID)
+
+CREATE_TWO_VIEW_GEOMETRIES_TABLE = """
+CREATE TABLE IF NOT EXISTS two_view_geometries (
+    pair_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB,
+    config INTEGER NOT NULL,
+    F BLOB,
+    E BLOB,
+    H BLOB)
+"""
+
+CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints (
+    image_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB,
+    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)
+"""
+
+CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches (
+    pair_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB)"""
+
+CREATE_NAME_INDEX = \
+    "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
+
+CREATE_ALL = "; ".join([
+    CREATE_CAMERAS_TABLE,
+    CREATE_IMAGES_TABLE,
+    CREATE_KEYPOINTS_TABLE,
+    CREATE_DESCRIPTORS_TABLE,
+    CREATE_MATCHES_TABLE,
+    CREATE_TWO_VIEW_GEOMETRIES_TABLE,
+    CREATE_NAME_INDEX
+])
+
+def image_ids_to_pair_id(image_id1, image_id2):
+    if image_id1 > image_id2:
+        image_id1, image_id2 = image_id2, image_id1
+    return image_id1 * MAX_IMAGE_ID + image_id2
+
+
+def pair_id_to_image_ids(pair_id):
+    image_id2 = pair_id % MAX_IMAGE_ID
+    image_id1 = (pair_id - image_id2) // MAX_IMAGE_ID
+    return image_id1, image_id2
+
+def array_to_blob(array):
+    if IS_PYTHON3:
+        return array.tobytes()
+    else:
+        return np.getbuffer(array)
+
+def blob_to_array(blob, dtype, shape=(-1,)):
+    if blob is None:
+        return np.empty((0, 2), dtype=dtype)
+    if IS_PYTHON3:
+        return np.frombuffer(blob, dtype=dtype).reshape(*shape)
+    else:
+        return np.frombuffer(blob, dtype=dtype).reshape(*shape)
+
+
+class COLMAPDatabase(sqlite3.Connection):
+
+    @staticmethod
+    def connect(database_path):
+        return sqlite3.connect(database_path, factory=COLMAPDatabase)
+
+
+    def __init__(self, *args, **kwargs):
+        super(COLMAPDatabase, self).__init__(*args, **kwargs)
+
+        self.create_tables = lambda: self.executescript(CREATE_ALL)
+        self.create_cameras_table = \
+            lambda: self.executescript(CREATE_CAMERAS_TABLE)
+        self.create_descriptors_table = \
+            lambda: self.executescript(CREATE_DESCRIPTORS_TABLE)
+        self.create_images_table = \
+            lambda: self.executescript(CREATE_IMAGES_TABLE)
+        self.create_two_view_geometries_table = \
+            lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE)
+        self.create_keypoints_table = \
+            lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
+        self.create_matches_table = \
+            lambda: self.executescript(CREATE_MATCHES_TABLE)
+        self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
+
+    def add_camera(self, model, width, height, params,
+                   prior_focal_length=False, camera_id=None):
+        params = np.asarray(params, np.float64)
+        cursor = self.execute(
+            "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)",
+            (camera_id, model, width, height, array_to_blob(params),
+             prior_focal_length))
+        return cursor.lastrowid
+
+    def add_image(self, name, camera_id,
+                  prior_q=np.full(4, np.NaN), prior_t=np.full(3, np.NaN), image_id=None):
+        cursor = self.execute(
+            "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2],
+             prior_q[3], prior_t[0], prior_t[1], prior_t[2]))
+        return cursor.lastrowid
+
+    def add_keypoints(self, image_id, keypoints):
+        assert(len(keypoints.shape) == 2)
+        assert(keypoints.shape[1] in [2, 4, 6])
+
+        keypoints = np.asarray(keypoints, np.float32)
+        self.execute(
+            "INSERT INTO keypoints VALUES (?, ?, ?, ?)",
+            (image_id,) + keypoints.shape + (array_to_blob(keypoints),))
+
+    def add_descriptors(self, image_id, descriptors):
+        descriptors = np.ascontiguousarray(descriptors, np.uint8)
+        self.execute(
+            "INSERT INTO descriptors VALUES (?, ?, ?, ?)",
+            (image_id,) + descriptors.shape + (array_to_blob(descriptors),))
+
+    def add_matches(self, image_id1, image_id2, matches):
+        assert(len(matches.shape) == 2)
+        assert(matches.shape[1] == 2)
+
+        if image_id1 > image_id2:
+            matches = matches[:,::-1]
+
+        pair_id = image_ids_to_pair_id(image_id1, image_id2)
+        matches = np.asarray(matches, np.uint32)
+        self.execute(
+            "INSERT INTO matches VALUES (?, ?, ?, ?)",
+            (pair_id,) + matches.shape + (array_to_blob(matches),))
+
+    def add_two_view_geometry(self, image_id1, image_id2, matches,
+                              F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2):
+        assert(len(matches.shape) == 2)
+        assert(matches.shape[1] == 2)
+
+        if image_id1 > image_id2:
+            matches = matches[:,::-1]
+
+        pair_id = image_ids_to_pair_id(image_id1, image_id2)
+        matches = np.asarray(matches, np.uint32)
+        F = np.asarray(F, dtype=np.float64)
+        E = np.asarray(E, dtype=np.float64)
+        H = np.asarray(H, dtype=np.float64)
+        self.execute(
+            "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+            (pair_id,) + matches.shape + (array_to_blob(matches), config,
+             array_to_blob(F), array_to_blob(E), array_to_blob(H)))
+    
+    def read_images(self):
+        image_id_to_name, name_to_image_id = {}, {}
+        image_results = self.execute("SELECT * FROM images")
+        for result in image_results:
+            image_id, name, camera_id, q0, q1, q2, q3, t0, t1, t2 = result
+            image_id_to_name[image_id] = name
+            name_to_image_id[name] = image_id
+        return image_id_to_name, name_to_image_id
+
+    def read_keypoints(self, mapping=None):
+        image_id_to_keypoints = {}
+        keypoints_results = self.execute("SELECT * FROM keypoints")
+        for keypoints_result in keypoints_results:
+            image_id, rows, cols, keypoints = keypoints_result
+            keypoints = blob_to_array(keypoints, np.float32, (rows, cols))
+            if mapping is None:
+                image_id_to_keypoints[image_id] = keypoints
+            else:
+                image_id_to_keypoints[mapping[image_id]] = keypoints
+        return image_id_to_keypoints
+
+    def read_matches(self, mapping=None):
+        matches_results = self.execute("SELECT * FROM matches")
+        matches = {}
+        for matches_result in matches_results:
+            pair_id, rows, cols, match = matches_result
+            image_id0, image_id1 = pair_id_to_image_ids(pair_id)
+            if rows == 0:
+                continue
+            match = blob_to_array(match, dtype=np.uint32, shape=(rows, cols))
+            if mapping is not None:
+                image_id0 = mapping[image_id0]
+                image_id1 = mapping[image_id1]
+            matches[(image_id0, image_id1)] = match
+        return matches
+    
+    def read_two_view_geometry(self, mapping=None):
+        geometry = self.execute("SELECT * FROM two_view_geometries")
+        geometries = {}
+        for pair_id, rows, cols, data, config, F, E, H in geometry:
+            F = blob_to_array(F, dtype=np.float64)
+            E = blob_to_array(E, dtype=np.float64)
+            H = blob_to_array(H, dtype=np.float64)
+            image_id0, image_id1 = pair_id_to_image_ids(pair_id)
+            match = blob_to_array(data, dtype=np.uint32, shape=(rows, cols))
+            if rows == 0:continue
+            if mapping is not None:
+                image_id0 = mapping[image_id0]
+                image_id1 = mapping[image_id1]
+            geometries[(image_id0, image_id1)] = {'matches': match, 'F':F, 'E':E, 'H':H, 'config': config}
+        return geometries
+
+def create_empty_db(database_path):
+    if os.path.exists(database_path):
+        mywarn('Removing old database: {}'.format(database_path))
+        os.remove(database_path)
+    print('Creating an empty database...')
+    db = COLMAPDatabase.connect(database_path)
+    db.create_tables()
+    db.commit()
+    db.close()
+
+def create_cameras(db, cameras, subs, width, height, share_intri=True):
+    model = 'OPENCV'
+    if share_intri:
+        cam_id = 1
+        K = cameras[subs[0]]['K']
+        D = cameras[subs[0]]['dist'].reshape(1, 5)
+        fx, fy, cx, cy, k1, k2, p1, p2, k3, k4, k5, k6 = K[0, 0], K[1, 1], K[0, 2], K[1, 2], D[0, 0], D[0, 1], D[0, 2], D[0, 3], D[0, 4], 0, 0, 0
+        
+        params = [fx, fy, cx, cy, k1, k2, p1, p2]
+        # params = [fx, fy, cx, cy, 0, 0, 0, 0]
+        camera = Camera(
+            id=cam_id,
+            model=model,
+            width=width,
+            height=height,
+            params=params
+        )
+        cameras_colmap = {cam_id: camera}
+        cameras_map = {sub:cam_id for sub in subs}
+        # 
+        db.add_camera(CAMERA_MODEL_NAMES[model].model_id, width, height, params,
+                   prior_focal_length=False, camera_id=cam_id)
+    else:
+        raise NotImplementedError
+    return cameras_colmap, cameras_map
+
+def create_images(db, cameras, cameras_map, image_names):
+    subs = sorted(list(image_names.keys()))
+    images = {}
+    for sub, image_name in image_names.items():
+        img_id = subs.index(sub) + 1
+        R = cameras[sub]['R']
+        T = cameras[sub]['T']
+        qvec = rotmat2qvec(R)
+        tvec = T.T[0]
+        image = Image(
+            id=img_id,
+            qvec=qvec,
+            tvec=tvec,
+            camera_id=cameras_map[sub],
+            name=os.path.basename(image_name),
+            xys=[],
+            point3D_ids=[]
+        )
+        images[img_id] = image
+        db.add_image(image.name, camera_id=image.camera_id,
+            prior_q=image.qvec, prior_t=image.tvec, image_id=img_id)
+    return images
+
+def copy_images(data, out, nf=0, copy_func=shutil.copyfile, mask='mask', add_mask=True):
+    subs = sorted(os.listdir(join(data, 'images')))
+    image_names = {}
+    for sub in subs:
+        srcname = join(data, 'images', sub, '{:06d}.jpg'.format(nf))
+        if not os.path.exists(srcname):
+            mywarn('{} not exists, skip'.format(srcname))
+            return False
+        dstname = join(out, 'images', '{}.jpg'.format(sub))
+        image_names[sub] = dstname
+        if os.path.exists(dstname):
+            continue
+        os.makedirs(os.path.dirname(dstname), exist_ok=True)
+        copy_func(srcname, dstname)
+        mskname = join(data, mask, sub, '{:06d}.png'.format(nf))
+        dstname = join(out, 'mask', '{}.jpg.png'.format(sub))
+        if os.path.exists(mskname) and add_mask:
+            os.makedirs(os.path.dirname(dstname), exist_ok=True)
+            copy_func(mskname, dstname)
+    return True, image_names
+
+def colmap_feature_extract(colmap, path, share_camera, add_mask):
+    '''
+struct SiftMatchingOptions {
+  // Number of threads for feature matching and geometric verification.
+  int num_threads = -1;
+
+  // Whether to use the GPU for feature matching.
+  bool use_gpu = true;
+
+  // Index of the GPU used for feature matching. For multi-GPU matching,
+  // you should separate multiple GPU indices by comma, e.g., "0,1,2,3".
+  std::string gpu_index = "-1";
+
+  // Maximum distance ratio between first and second best match.
+  double max_ratio = 0.8;
+
+  // Maximum distance to best match.
+  double max_distance = 0.7;
+
+  // Whether to enable cross checking in matching.
+  bool cross_check = true;
+
+  // Maximum number of matches.
+  int max_num_matches = 32768;
+
+  // Maximum epipolar error in pixels for geometric verification.
+  double max_error = 4.0;
+
+  // Confidence threshold for geometric verification.
+  double confidence = 0.999;
+
+  // Minimum/maximum number of RANSAC iterations. Note that this option
+  // overrules the min_inlier_ratio option.
+  int min_num_trials = 100;
+  int max_num_trials = 10000;
+
+  // A priori assumed minimum inlier ratio, which determines the maximum
+  // number of iterations.
+  double min_inlier_ratio = 0.25;
+
+  // Minimum number of inliers for an image pair to be considered as
+  // geometrically verified.
+  int min_num_inliers = 15;
+
+  // Whether to attempt to estimate multiple geometric models per image pair.
+  bool multiple_models = false;
+
+  // Whether to perform guided matching, if geometric verification succeeds.
+  bool guided_matching = false;
+
+  bool Check() const;
+};
+'''
+    cmd = f'{colmap} feature_extractor --database_path {path}/database.db \
+--image_path {path}/images \
+--SiftExtraction.peak_threshold 0.006 \
+--ImageReader.camera_model OPENCV \
+'
+    if share_camera:
+        cmd += ' --ImageReader.single_camera 1'
+
+    if add_mask:
+        cmd += f' --ImageReader.mask_path {path}/mask'
+    cmd += f' >> {path}/log.txt'
+    run_cmd(cmd)
+
+def colmap_feature_match(colmap, path):
+    cmd = f'{colmap} exhaustive_matcher --database_path {path}/database.db \
+--SiftMatching.guided_matching 0 \
+--SiftMatching.max_ratio 0.8 \
+--SiftMatching.max_distance 0.5 \
+--SiftMatching.cross_check 1 \
+--SiftMatching.max_error 4 \
+--SiftMatching.max_num_matches 32768 \
+--SiftMatching.confidence 0.9999 \
+--SiftMatching.max_num_trials 10000 \
+--SiftMatching.min_inlier_ratio 0.25 \
+--SiftMatching.min_num_inliers 30 \
+>> {path}/log.txt'
+    run_cmd(cmd)
+
+def colmap_ba(colmap, path, with_init=False):
+    if with_init:
+        cmd = f'{colmap} point_triangulator --database_path {path}/database.db \
+--image_path {path}/images \
+--input_path {path}/sparse/0 \
+--output_path {path}/sparse/0 \
+--Mapper.tri_merge_max_reproj_error 3 \
+--Mapper.ignore_watermarks 1 \
+--Mapper.filter_max_reproj_error 2 \
+>> {path}/log.txt'
+        run_cmd(cmd)
+        cmd = f'{colmap} bundle_adjuster \
+--input_path {path}/sparse/0 \
+--output_path {path}/sparse/0 \
+>> {path}/log.txt'
+        run_cmd(cmd)
+        points3d = read_points3d_binary(join(path, 'sparse', '0', 'points3D.bin'))
+        pids = list(points3d.keys())
+        mean_error = np.mean([points3d[p].error for p in pids])
+        log('Triangulate {} points, mean error: {:.2f} pixel'.format(len(pids), mean_error))
+    else:
+        mkdir(join(path, 'sparse'))
+        cmd = f'{colmap} mapper --database_path {path}/database.db --image_path {path}/images --output_path {path}/sparse \
+    --Mapper.ba_refine_principal_point 1 \
+    --Mapper.ba_global_max_num_iterations 1000 \
+    >> {path}/log.txt'
+        run_cmd(cmd)
+    
+
+def colmap_dense(colmap, path):
+    mkdir(join(path, 'dense'))
+    cmd = f'{colmap} image_undistorter --image_path {path}/images --input_path {path}/sparse/0 --output_path {path}/dense --output_type COLMAP --max_image_size 2000'
+    run_cmd(cmd)
+    cmd = f'{colmap} patch_match_stereo \
+--workspace_path {path}/dense \
+--workspace_format COLMAP \
+--PatchMatchStereo.geom_consistency true \
+>> {path}/log.txt'
+
+    run_cmd(cmd)        
+    cmd = f'{colmap} stereo_fusion \
+--workspace_path {path}/dense \
+--workspace_format COLMAP \
+--input_type geometric \
+--output_path {path}/dense/fused.ply \
+>> {path}/log.txt'
+    run_cmd(cmd)        
--- a/easymocap/mytools/debug_utils.py
+++ b/easymocap/mytools/debug_utils.py
@ -0,0 +1,86 @@
+'''
+  @ Date: 2022-02-14 14:54:50
+  @ Author: Qing Shuai
+  @ Mail: s_q@zju.edu.cn
+  @ LastEditors: Qing Shuai
+  @ LastEditTime: 2022-06-14 18:07:19
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/debug_utils.py
+'''
+from termcolor import colored
+import os
+from os.path import join
+import shutil
+import subprocess
+import time
+import datetime
+
+def toc():
+    return time.time() * 1000
+
+def myprint(cmd, level):
+    color = {'run': 'blue', 'info': 'green', 'warn': 'yellow', 'error': 'red'}[level]
+    print(colored(cmd, color))
+
+def log(text):
+    myprint(text, 'info')
+
+def log_time(text):
+    strf = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
+    print(colored(strf, 'yellow'), colored(text, 'green'))
+
+def mywarn(text):
+    myprint(text, 'warn')
+
+warning_infos = set()
+
+def oncewarn(text):
+    if text in warning_infos:
+        return
+    warning_infos.add(text)
+    myprint(text, 'warn')
+
+
+def myerror(text):
+    myprint(text, 'error')
+
+def run_cmd(cmd, verbo=True, bg=False):
+    if verbo: myprint('[run] ' + cmd, 'run')
+    if bg:
+        args = cmd.split()
+        print(args)
+        p = subprocess.Popen(args)
+        return [p]
+    else:
+        os.system(cmd)
+        return []
+
+def mkdir(path):
+    if os.path.exists(path):
+        return 0
+    log('mkdir {}'.format(path))
+    os.makedirs(path, exist_ok=True)
+
+def cp(srcname, dstname):
+    mkdir(join(os.path.dirname(dstname)))
+    shutil.copyfile(srcname, dstname)
+
+def print_table(header, contents):
+    from tabulate import tabulate
+    length = len(contents[0])
+    tables = [[] for _ in range(length)]
+    mean = ['Mean']
+    for icnt, content in enumerate(contents):
+        for i in range(length):
+            if isinstance(content[i], float):
+                tables[i].append('{:6.2f}'.format(content[i]))
+            else:
+                tables[i].append('{}'.format(content[i]))
+        if icnt > 0:
+            mean.append('{:6.2f}'.format(sum(content)/length))
+    tables.append(mean)
+    print(tabulate(tables, header, tablefmt='fancy_grid'))
+
+def check_exists(path):
+    flag1 = os.path.isfile(path) and os.path.exists(path)
+    flag2 = os.path.isdir(path) and len(os.listdir(path)) >= 10
+    return flag1 or flag2
--- a/easymocap/mytools/file_utils.py
+++ b/easymocap/mytools/file_utils.py
@ -2,8 +2,8 @@
  @ Date: 2021-03-15 12:23:12
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2021-06-14 22:25:58
-  @ FilePath: /EasyMocapRelease/easymocap/mytools/file_utils.py
+  @ LastEditTime: 2022-07-21 15:55:02
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/file_utils.py
 '''
 import os
 import json
@ -11,12 +11,18 @@ import numpy as np
 from os.path import join

 mkdir = lambda x:os.makedirs(x, exist_ok=True)
-mkout = lambda x:mkdir(os.path.dirname(x))
-
+# mkout = lambda x:mkdir(os.path.dirname(x)) if x is not None
+def mkout(x):
+    if x is not None:
+        mkdir(os.path.dirname(x))
 def read_json(path):
    assert os.path.exists(path), path
    with open(path) as f:
-        data = json.load(f)
+        try:
+            data = json.load(f)
+        except:
+            print('Reading error {}'.format(path))
+            data = []
    return data

 def save_json(file, data):
@ -25,6 +31,17 @@ def save_json(file, data):
    with open(file, 'w') as f:
        json.dump(data, f, indent=4)

+def append_json(file, data):
+    if not os.path.exists(os.path.dirname(file)):
+        os.makedirs(os.path.dirname(file))
+    if os.path.exists(file):
+        res = read_json(file)
+        assert isinstance(res, list)
+        res.append(data)
+        data = res
+    with open(file, 'w') as f:
+        json.dump(data, f, indent=4)
+
 save_annot = save_json

 def getFileList(root, ext='.jpg'):
@ -51,19 +68,24 @@ def read_annot(annotname, mode='body25'):
            data[i]['id'] = data[i].pop('personID')
        if 'keypoints2d' in data[i].keys() and 'keypoints' not in data[i].keys():
            data[i]['keypoints'] = data[i].pop('keypoints2d')
-        for key in ['bbox', 'keypoints', 'handl2d', 'handr2d', 'face2d']:
+        for key in ['bbox', 'keypoints', 
+            'bbox_handl2d', 'handl2d', 
+            'bbox_handr2d', 'handr2d', 
+            'bbox_face2d', 'face2d']:
            if key not in data[i].keys():continue
            data[i][key] = np.array(data[i][key])
            if key == 'face2d':
                # TODO: Make parameters, 17 is the offset for the eye brows,
                # etc. 51 is the total number of FLAME compatible landmarks
                data[i][key] = data[i][key][17:17+51, :]
-        data[i]['bbox'] = data[i]['bbox'][:5]
-        if data[i]['bbox'][-1] < 0.001:
-            # print('{}/{} bbox conf = 0, may be error'.format(annotname, i))
-            data[i]['bbox'][-1] = 1
+        if 'bbox' in data[i].keys():
+            data[i]['bbox'] = data[i]['bbox'][:5]
+            if data[i]['bbox'][-1] < 0.001:
+                print('{}/{} bbox conf = 0, may be error'.format(annotname, i))
+                data[i]['bbox'][-1] = 0
+        # combine the basic results
        if mode == 'body25':
-            data[i]['keypoints'] = data[i]['keypoints']
+            data[i]['keypoints'] = data[i].get('keypoints', np.zeros((25, 3)))
        elif mode == 'body15':
            data[i]['keypoints'] = data[i]['keypoints'][:15, :]
        elif mode in ['handl', 'handr']:
@ -91,7 +113,7 @@ def array2raw(array, separator=' ', fmt='%.3f'):
        res.append(separator.join([fmt%(d) for d in data]))
    
    
-def myarray2string(array, separator=', ', fmt='%.3f', indent=8):
+def myarray2string(array, separator=', ', fmt='%7.7f', indent=8):
    assert len(array.shape) == 2, 'Only support MxN matrix, {}'.format(array.shape)
    blank = ' ' * indent
    res = ['[']
@ -110,14 +132,16 @@ def write_common_results(dumpname=None, results=[], keys=[], fmt='%2.3f'):
        out_text.append('    {\n')
        output = {}
        output['id'] = data['id']
-        for key in keys:
-            if key not in data.keys():continue
+        for k in ['type']:
+            if k in data.keys():output[k] = '\"{}\"'.format(data[k])
+        keys_current = [k for k in keys if k in data.keys()]
+        for key in keys_current:
            # BUG: This function will failed if the rows of the data[key] is too large
            # output[key] = np.array2string(data[key], max_line_width=1000, separator=', ', formatter=format_out)
            output[key] = myarray2string(data[key], separator=', ', fmt=fmt)
        for key in output.keys():
            out_text.append('        \"{}\": {}'.format(key, output[key]))
-            if key != keys[-1]:
+            if key != keys_current[-1]:
                out_text.append(',\n')
            else:
                out_text.append('\n')
@ -134,17 +158,16 @@ def write_common_results(dumpname=None, results=[], keys=[], fmt='%2.3f'):
    else:
        return ''.join(out_text)

-def write_keypoints3d(dumpname, results):
+def write_keypoints3d(dumpname, results, keys = ['keypoints3d']):
    # TODO:rewrite it
-    keys = ['keypoints3d']
-    write_common_results(dumpname, results, keys, fmt='%6.3f')
+    write_common_results(dumpname, results, keys, fmt='%6.7f')

 def write_vertices(dumpname, results):
    keys = ['vertices']
-    write_common_results(dumpname, results, keys, fmt='%6.3f')
+    write_common_results(dumpname, results, keys, fmt='%6.5f')

 def write_smpl(dumpname, results):
-    keys = ['Rh', 'Th', 'poses', 'expression', 'shapes']
+    keys = ['Rh', 'Th', 'poses', 'handl', 'handr', 'expression', 'shapes']
    write_common_results(dumpname, results, keys)

 def batch_bbox_from_pose(keypoints2d, height, width, rate=0.1):
--- a/easymocap/mytools/reader.py
+++ b/easymocap/mytools/reader.py
@ -2,8 +2,8 @@
  @ Date: 2021-04-21 15:19:21
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2021-07-29 16:12:37
-  @ FilePath: /EasyMocap/easymocap/mytools/reader.py
+  @ LastEditTime: 2022-07-22 23:23:26
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/reader.py
 '''
 # function to read data
 """
@ -27,17 +27,14 @@ def read_keypoints3d(filename):
    res_ = []
    for d in data:
        pid = d['id'] if 'id' in d.keys() else d['personID']
-        pose3d = np.array(d['keypoints3d'], dtype=np.float32)
-        if pose3d.shape[0] > 25:
-            # 对于有手的情况，把手的根节点赋值成body25上的点
-            pose3d[25, :] = pose3d[7, :]
-            pose3d[46, :] = pose3d[4, :]
-        if pose3d.shape[1] == 3:
-            pose3d = np.hstack([pose3d, np.ones((pose3d.shape[0], 1))])
-        res_.append({
-            'id': pid,
-            'keypoints3d': pose3d
-        })
+        ret = {'id': pid, 'type': 'body25'}
+        for key in ['keypoints3d', 'handl3d', 'handr3d', 'face3d']:
+            if key not in d.keys():continue
+            pose3d = np.array(d[key], dtype=np.float32)
+            if pose3d.shape[1] == 3:
+                pose3d = np.hstack([pose3d, np.ones((pose3d.shape[0], 1))])
+            ret[key] = pose3d
+        res_.append(ret)
    return res_

 def read_keypoints3d_dict(filename):
@ -56,9 +53,11 @@ def read_keypoints3d_dict(filename):

 def read_smpl(filename):    
    datas = read_json(filename)
+    if isinstance(datas, dict):
+        datas = datas['annots']
    outputs = []
    for data in datas:
-        for key in ['Rh', 'Th', 'poses', 'shapes', 'expression']:
+        for key in ['Rh', 'Th', 'poses', 'handl', 'handr', 'shapes', 'expression', 'keypoints3d']:
            if key in data.keys():
                data[key] = np.array(data[key], dtype=np.float32)
        # for smplx results
--- a/easymocap/mytools/triangulator.py
+++ b/easymocap/mytools/triangulator.py
@ -0,0 +1,735 @@
+import numpy as np
+import cv2
+from easymocap.datasets.base import crop_image
+
+from easymocap.estimator.wrapper_base import bbox_from_keypoints
+from easymocap.mytools.vis_base import merge, plot_keypoints_auto
+from .debug_utils import log, mywarn, myerror
+
+def batch_triangulate(keypoints_, Pall, min_view=2):
+    # keypoints: (nViews, nJoints, 3)
+    # Pall: (nViews, 3, 4)
+    # A: (nJoints, nViewsx2, 4), x: (nJoints, 4, 1); b: (nJoints, nViewsx2, 1)
+    v = (keypoints_[:, :, -1]>0).sum(axis=0)
+    valid_joint = np.where(v >= min_view)[0]
+    keypoints = keypoints_[:, valid_joint]
+    conf3d = keypoints[:, :, -1].sum(axis=0)/v[valid_joint]
+    # P2: P矩阵的最后一行：(1, nViews, 1, 4)
+    P0 = Pall[None, :, 0, :]
+    P1 = Pall[None, :, 1, :]
+    P2 = Pall[None, :, 2, :]
+    # uP2: x坐标乘上P2: (nJoints, nViews, 1, 4)
+    uP2 = keypoints[:, :, 0].T[:, :, None] * P2
+    vP2 = keypoints[:, :, 1].T[:, :, None] * P2
+    conf = keypoints[:, :, 2].T[:, :, None]
+    Au = conf * (uP2 - P0)
+    Av = conf * (vP2 - P1)
+    A = np.hstack([Au, Av])
+    u, s, v = np.linalg.svd(A)
+    X = v[:, -1, :]
+    X = X / X[:, 3:]
+    # out: (nJoints, 4)
+    result = np.zeros((keypoints_.shape[1], 4))
+    result[valid_joint, :3] = X[:, :3]
+    result[valid_joint, 3] = conf3d #* (conf[..., 0].sum(axis=-1)>min_view)
+    return result
+
+def project_points(keypoints, RT, einsum='vab,kb->vka'):
+    homo = np.concatenate([keypoints[..., :3], np.ones_like(keypoints[..., :1])], axis=-1)
+    kpts2d = np.einsum(einsum, RT, homo)
+    kpts2d[..., :2] /= kpts2d[..., 2:]
+    return kpts2d
+
+def make_Cnk(n, k):
+    import itertools
+    res = {}
+    for n_ in range(3, n+1):
+        n_0 = [i for i in range(n_)]
+        for k_ in range(2, k+1):
+            res[(n_, k_)] = list(map(list, itertools.combinations(n_0, k_)))
+    return res
+
+MAX_VIEWS = 30
+Cnk = make_Cnk(MAX_VIEWS, 3)
+
+def robust_triangulate_point(kpts2d, Pall, dist_max, min_v = 3):
+    nV = kpts2d.shape[0]
+    if len(kpts2d) < min_v:# 重建失败
+        return [], None
+    # min_v = max(2, nV//2)
+    # 1. choose the combination of min_v
+    index_ = Cnk[(len(kpts2d), min(min_v, len(kpts2d)))]
+    # 2. proposals: store the reconstruction points of each proposal
+    proposals = np.zeros((len(index_), 4))
+    weight_self = np.zeros((nV, len(index_)))
+    for i, index in enumerate(index_):
+        weight_self[index, i] = 100.
+        point = batch_triangulate(kpts2d[index, :], Pall[index], min_view=min_v)
+        proposals[i] = point
+    # 3. project the proposals to each view
+    #    and calculate the reprojection error
+    # (nViews, nProposals, 4)
+    kpts_repro = project_points(proposals, Pall)
+    conf = (proposals[None, :, -1] > 0) * (kpts2d[..., -1] > 0)
+    # err: (nViews, nProposals)
+    err = np.linalg.norm(kpts_repro[..., :2] - kpts2d[..., :2], axis=-1) * conf
+    valid = 1. - err/dist_max
+    valid[valid<0] = 0
+    # consider the weight of different view
+    # TODO:naive weight:
+    conf = kpts2d[..., -1]
+    weight = conf
+    # (valid > 0)*weight_self 一项用于强制要求使用到的两个视角都需要被用到
+    # 增加一项使用的视角数的加成
+    weight_sum = (weight * valid).sum(axis=0) + ((valid > 0)*weight_self).sum(axis=0) - min_v * 100
+    if weight_sum.max() < 0:# 重建失败
+        return [], None
+    best = weight_sum.argmax()
+    if (err[index_[best], best] > dist_max).any():
+        return [], None
+    # 对于选出来的proposal，寻找其大于0的其他视角
+    point = proposals[best]
+    best_add = np.where(valid[:, best])[0].tolist()
+    index = list(index_[best])
+    best_add.sort(key=lambda x:-weight[x])
+    for add in best_add:
+        if add in index:
+            continue
+        index.append(add)
+        point = batch_triangulate(kpts2d[index, :], Pall[index], min_view=min_v)
+        kpts_repro = project_points(point, Pall[index])
+        err = np.linalg.norm(kpts_repro[..., :2] - kpts2d[index, ..., :2], axis=-1)
+        if (err > dist_max).any():
+            index.remove(add)
+            break
+    return index, point
+
+def remove_outview(kpts2d, out_view, debug):
+    if len(out_view) == 0:
+        return False
+    outv = out_view[0]
+    if debug:
+        mywarn('[triangulate] remove outview: {} from {}'.format(outv, out_view))
+    kpts2d[outv] = 0.
+    return True
+
+def remove_outjoint(kpts2d, Pall, out_joint, dist_max, min_view=3, debug=False):
+    if len(out_joint) == 0:
+        return False
+    if debug:
+        mywarn('[triangulate] remove outjoint: {}'.format(out_joint))
+    for nj in out_joint:
+        valid = np.where(kpts2d[:, nj, -1] > 0)[0]
+        if len(valid) < min_view:
+            # if less than 3 visible view, set these unvisible
+            kpts2d[:, nj, -1] = 0
+            continue
+        if len(valid) > MAX_VIEWS:
+            # only select max points
+            conf = -kpts2d[:, nj, -1]
+            valid = conf.argsort()[:MAX_VIEWS]
+        index_j, point = robust_triangulate_point(kpts2d[valid, nj:nj+1], Pall[valid], dist_max=dist_max, min_v=3)
+        index_j = valid[index_j]
+        # print('select {} for joint {}'.format(index_j, nj))
+        set0 = np.zeros(kpts2d.shape[0])
+        set0[index_j] = 1.
+        kpts2d[:, nj, -1] *= set0
+    return True
+
+def project_and_distance(kpts3d, RT, kpts2d):
+    kpts_proj = project_points(kpts3d, RT)
+    # 1. distance between input and projection
+    conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0)
+    dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf
+    return dist, conf
+
+def iterative_triangulate(kpts2d, RT, previous=None,
+    min_conf=0.1, min_view=3, min_joints=3, dist_max=0.05, dist_vel=0.05,
+    thres_outlier_view=0.4, thres_outlier_joint=0.4, debug=False):
+    kpts2d = kpts2d.copy()
+    conf = kpts2d[..., -1]
+    kpts2d[conf<min_conf] = 0.
+    if debug:
+        log('[triangulate] kpts2d: {}'.format(kpts2d.shape))
+    # TODO: consider large motion
+    if previous is not None:
+        dist, conf = project_and_distance(previous, RT, kpts2d)
+        nottrack = (dist > dist_vel) & conf
+        if nottrack.sum() > 0:
+            kpts2d[nottrack] = 0.
+            if debug:
+                log('[triangulate] Remove with track {}'.format(np.where(nottrack)))
+    while True:
+        # 0. triangulate and project
+        kpts3d = batch_triangulate(kpts2d, RT, min_view=min_view)
+        dist, conf = project_and_distance(kpts3d, RT, kpts2d)
+        # 2. find the outlier
+        vv, jj = np.where(dist > dist_max)
+        if vv.shape[0] < 1:
+            if debug:
+                log('[triangulate] Not found outlier, break')
+            break
+        ratio_outlier_view = (dist>dist_max).sum(axis=1)/(1e-5 + conf.sum(axis=1))
+        ratio_outlier_joint = (dist>dist_max).sum(axis=0)/(1e-5 + conf.sum(axis=0))
+        # 3. find the totally wrong detections
+        out_view = np.where(ratio_outlier_view > thres_outlier_view)[0]
+        out_joint = np.where(ratio_outlier_joint > thres_outlier_joint)[0]
+        if len(out_view) > 1:
+            dist_view = dist.sum(axis=1)/(1e-5 + conf.sum(axis=1))
+            out_view = out_view.tolist()
+            out_view.sort(key=lambda x:-dist_view[x])
+            if debug: mywarn('[triangulate] Remove outlier view: {}'.format(ratio_outlier_view))
+        if remove_outview(kpts2d, out_view, debug): continue
+        if remove_outjoint(kpts2d, RT, out_joint, dist_max, debug=debug): continue
+        if debug:
+            log('[triangulate] Directly remove {}, {}'.format(vv, jj))
+        kpts2d[vv, jj, -1] = 0.
+    if debug:
+        log('[triangulate] finally {} valid points'.format((kpts3d[..., -1]>0).sum()))
+    if (kpts3d[..., -1]>0).sum() < min_joints:
+        kpts3d[..., -1] = 0.
+        kpts2d[..., -1] = 0.
+        return kpts3d, kpts2d
+    return kpts3d, kpts2d
+
+class BaseTriangulator:
+    def __init__(self, config, debug, keys) -> None:
+        self.config = config
+        self.debug = debug
+        self.keys = keys
+
+    def project_and_check(self, kpts3d, kpts2d, RT):
+        kpts_proj = project_points(kpts3d, RT)
+        conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0)
+        dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf
+        return conf, dist
+    
+    def triangulate_with_results(self, pid, data, results):
+        new = {'id': pid}
+        for key in self.keys:
+            key3d = key.replace('2d', '3d')
+            if len(results) == 0:
+                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
+                    debug=self.debug, **self.config[key])
+            else:
+                if len(results) == 1:
+                    previous = results[-1][key3d] # TODO: mean previous frame
+                elif len(results) >= 2:
+                    # TODO: mean previous velocity
+                    previous0 = results[-2][key3d] # TODO: mean previous frame
+                    previous1 = results[-1][key3d] # TODO: mean previous frame
+                    vel = (previous1[:, :3] - previous0[:, :3])*((previous0[:, -1:]>0)&(previous0[:, -1:]>0))
+                    previous = previous1.copy()
+                    previous[:, :3] += vel
+                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
+                    debug=self.debug, previous=previous, **self.config[key])
+                vel = np.linalg.norm(kpts3d[:, :3] - previous[:, :3], axis=-1)
+            new[key] = np.concatenate([data[key+'_distort'][..., :-1], kpts2d[..., -1:]], axis=-1)
+            new[key3d] = kpts3d
+        return new
+
+class SimpleTriangulator(BaseTriangulator):
+    def __init__(self, keys, debug, config,
+        pid=0) -> None:
+        super().__init__(config, debug, keys)
+        self.results = []
+        self.infos = []
+        self.dim_name = ['_joints', '_views']
+        self.pid = pid
+
+    def __call__(self, data, results=None):
+        info = {}
+        if results is None:
+            results = self.results
+        new = {'id': self.pid}
+        for key in self.keys:
+            if key not in data.keys(): continue
+            key3d = key.replace('2d', '3d')
+            if self.debug:
+                log('[triangulate] {}'.format(key))
+            if len(results) == 0:
+                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
+                    debug=self.debug, **self.config[key])
+            else:
+                if len(results) == 1:
+                    previous = results[-1][key3d] # TODO: mean previous frame
+                elif len(results) >= 2:
+                    # TODO: mean previous velocity
+                    previous0 = results[-2][key3d] # TODO: mean previous frame
+                    previous1 = results[-1][key3d] # TODO: mean previous frame
+                    vel = (previous1[:, :3] - previous0[:, :3])*((previous0[:, -1:]>0)&(previous0[:, -1:]>0))
+                    previous = previous1.copy()
+                    previous[:, :3] += vel
+                kpts3d, kpts2d = iterative_triangulate(data[key + '_unproj'], data['RT'],
+                    debug=self.debug, previous=previous, **self.config[key])
+                vel = np.linalg.norm(kpts3d[:, :3] - previous[:, :3], axis=-1)
+            new[key] = np.concatenate([data[key+'_distort'][..., :-1], kpts2d[..., -1:]], axis=-1)
+            new[key.replace('2d', '3d')] = kpts3d
+            if self.debug:
+                conf, dist = self.project_and_check(kpts3d, kpts2d, data['RT'])
+                for dim in [0, 1]:
+                    info_dim = {
+                        'valid': conf.sum(axis=dim),
+                        'dist': 10000*dist.sum(axis=dim)/(1e-5 + conf.sum(axis=dim)),
+                    }
+                    info[key+self.dim_name[dim]] = info_dim
+                info[key+'_joints']['valid3d'] = kpts3d[:, -1] >0
+        results.append(new)
+        self.infos.append(info)
+        return [new]
+
+    def report(self):
+        if not self.debug:
+            return 0
+        from .debug_utils import print_table
+        for key in self.infos[0].keys():
+            metrics = list(self.infos[0][key].keys())
+            values = [np.mean(np.stack([info[key][metric] for info in self.infos]), axis=0) for metric in metrics]
+            metrics = [key] + metrics
+            values = [[i for i in range(values[0].shape[0])]] + values
+            print_table(metrics, values)
+
+class SimpleTriangulatorMulti(SimpleTriangulator):
+    def __init__(self, pids, **cfg) -> None:
+        super().__init__(**cfg)
+        self.results = {}
+    
+    def __call__(self, data, results=None):
+        res_now = []
+        for ipid, pid in enumerate(data['pid']):
+            if pid not in self.results.keys():
+                self.results[pid] = []
+            data_ = {'RT': data['RT']}
+            for key in self.keys:
+                data_[key+'_distort'] = data[key+'_distort'][:, ipid]
+                data_[key+'_unproj'] = data[key+'_unproj'][:, ipid]
+                data_[key] = data[key][:, ipid]
+            res = self.triangulate_with_results(pid, data_, self.results[pid])
+            self.results[pid].append(res)
+            res_now.append(res)
+        return res_now
+
+def skew_op(x):
+    skew_op = lambda x: np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
+    res = np.zeros((3, 3), dtype=x.dtype)
+    # 0, -z, y
+    res[0, 1] = -x[2, 0]
+    res[0, 2] =  x[1, 0]
+    # z, 0, -x
+    res[1, 0] =  x[2, 0]
+    res[1, 2] = -x[0, 0]
+    # -y, x, 0
+    res[2, 0] = -x[1, 0]
+    res[2, 1] =  x[0, 0]
+    return res
+
+def fundamental_op(K0, K1, R_0, T_0, R_1, T_1):
+    invK0 = np.linalg.inv(K0)
+    return invK0.T @ (R_0 @ R_1.T) @ K1.T @ skew_op(K1 @ R_1 @ R_0.T @ (T_0 - R_0 @ R_1.T @ T_1))
+
+def drawlines(img1,img2,lines,pts1,pts2):
+    ''' img1 - image on which we draw the epilines for the points in img2
+        lines - corresponding epilines '''
+    r,c = img1.shape[:2]
+    for r,pt1,pt2 in zip(lines,pts1,pts2):
+        pt1 = list(map(lambda x:int(x+0.5), pt1[:2].tolist()))
+        pt2 = list(map(lambda x:int(x+0.5), pt2[:2].tolist()))
+        if pt1[0] < 0 or pt1[1] < 0:
+            continue
+        color = tuple(np.random.randint(0,255,3).tolist())
+        x0,y0 = map(int, [0, -r[2]/r[1] ])
+        x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ])
+        img1 = cv2.line(img1, (x0,y0), (x1,y1), color,1)
+        img1 = cv2.circle(img1,tuple(pt1),5,color,-1)
+        img2 = cv2.circle(img2,tuple(pt2),5,color,-1)
+    return img1,img2
+
+def SimpleConstrain(dimGroups):
+    constrain = np.ones((dimGroups[-1], dimGroups[-1]))
+    for i in range(len(dimGroups)-1):
+        start, end = dimGroups[i], dimGroups[i+1]
+        constrain[start:end, start:end] = 0
+    N = constrain.shape[0]
+    constrain[range(N), range(N)] = 1
+    return constrain
+
+def check_cluster(affinity, row, views, dimGroups, indices, p2dAssigned, visited):
+    affinity_row = affinity[row].copy()
+    # given affinity and row, select the combine of all possible set
+    cluster = np.where((affinity[row]>0)&(p2dAssigned==-1)&(visited==0))[0].tolist()
+    cluster.sort(key=lambda x:-affinity[row, x])
+    views_ = views[cluster]
+    view_count = np.bincount(views[cluster])
+    indices_all = [indices]
+    for col in cluster:
+        v = views[col]
+        nOld = len(indices_all)
+        if indices[v] != -1: # already assigned, copy and make new 
+            for i in range(nOld):
+                ind = indices_all[i].copy()
+                ind[v] = col
+                indices_all.append(ind)
+        else: # not assigned, assign
+            for i in range(nOld):
+                indices_all[i][v] = col
+    return indices_all
+
+def views_from_dimGroups(dimGroups):
+    views = np.zeros(dimGroups[-1], dtype=np.int)
+    for nv in range(len(dimGroups) - 1):
+        views[dimGroups[nv]:dimGroups[nv+1]] = nv
+    return views
+
+class SimpleMatchAndTriangulator(SimpleTriangulator):
+    def __init__(self, num_joints, min_views, min_joints, cfg_svt, cfg_track, **cfg) -> None:
+        super().__init__(**cfg)
+        self.nJoints = num_joints
+        self.cfg_svt = cfg_svt
+        self.cfg_track = cfg_track
+        self.min_views = min_views
+        self.min_joints = min_joints
+        self.time = -1
+        self.max_id = 0
+        self.tracks = {}
+        self.loglevel_dict = {
+            'info': 0,
+            'warn': 1,
+            'error': 2,
+        }
+        self.loglevel = self.loglevel_dict['info'] # ['info', 'warn', 'error']
+        self.debug = False
+        self.data = None
+        self.people = None
+    
+    def log(self, text):
+        if self.loglevel > 0:
+            return 0
+        log(text)
+    
+    def warn(self, text):
+        if self.loglevel > 1:
+            return 0
+        mywarn(text)
+    
+
+    @staticmethod
+    def distance_by_epipolar(pts0, pts1, K0, K1, R0, T0, R1, T1):
+        F = fundamental_op(K0, K1, R0, T0, R1, T1)
+        # Find epilines corresponding to points in left image (first image) and
+        # drawing its lines on right image
+        lines0 = cv2.computeCorrespondEpilines(pts0[..., :2].reshape (-1,1,2), 2, F)
+        # Find epilines corresponding to points in right image (second image) and
+        # drawing its lines on left image
+        lines1 = cv2.computeCorrespondEpilines(pts1[..., :2].reshape(-1,1,2), 1, F)
+        if False:
+            H, W = 1080, 1920
+            img0 = np.zeros((H, W, 3), dtype=np.uint8) +255
+            img4, img3 = drawlines(img0.copy(), img0.copy(), lines0.reshape(-1, 3), pts1.reshape(-1, 3), pts0.reshape(-1,3))
+            img5,img6 = drawlines(img0.copy(), img0.copy(), lines1.reshape(-1, 3), pts0.reshape(-1,3), pts1.reshape(-1,3))
+            import matplotlib.pyplot as plt
+            plt.subplot(121)
+            plt.imshow(img5)
+            plt.subplot(122)
+            plt.imshow(img4)
+            plt.show()        
+        lines0 = lines0.reshape(pts0.shape)
+        lines1 = lines1.reshape(pts1.shape)
+        # dist: (D_v0, D_v1, nJoints)
+        dist01 = np.abs(np.sum(lines0[:, None, :, :2] * pts1[None, :, :, :2], axis=-1) + lines0[:, None, :, 2])
+        conf = pts0[:, None, :, 2] * pts1[None, :, :, 2]
+        dist10 = np.abs(np.sum(lines1[:, None, :, :2] * pts0[None, :, :, :2], axis=-1) + lines1[:, None, :, 2])
+
+        dist = np.sum(dist01 * conf + dist10.transpose(1, 0, 2) * conf, axis=-1)/(conf.sum(axis=-1) + 1e-5)/2
+        return dist
+
+    def _simple_associate2d_triangulate(self, data, affinity, dimGroups, prev_id):
+        # sum1 = affinity.sum(axis=1)
+        # 注意：这里的排序应该是对每个视角，挑选最大的一个
+        sum1 = np.zeros((affinity.shape[0]))
+        for i in range(len(dimGroups)-1):
+            start, end = dimGroups[i], dimGroups[i+1]
+            if end == start:continue
+            sum1 += affinity[:, start:end].max(axis=-1)
+        n2d = affinity.shape[0]
+        nViews = len(dimGroups) - 1
+        idx_zero = np.zeros(nViews, dtype=np.int) - 1
+        views = views_from_dimGroups(dimGroups)
+        # the assigned results of each person
+        p2dAssigned = np.zeros(n2d, dtype=np.int) - 1
+        visited = np.zeros(n2d, dtype=np.int)
+        sortidx = np.argsort(-sum1)
+        pid = 0
+        k3dresults = []
+        for idx in sortidx:
+            if p2dAssigned[idx] != -1:
+                continue
+            if prev_id[idx] != -1:
+                results = [self.people[prev_id[idx]]]
+            else:
+                results = []
+            proposals = check_cluster(affinity, row=idx, views=views, 
+                dimGroups=dimGroups, indices=idx_zero.copy(), p2dAssigned=p2dAssigned, visited=visited)
+            for indices in proposals:
+                if (indices > -1).sum() < self.min_views - (len(results)):
+                    continue
+                # set keypoints2d
+                info = {'RT': data['RT']}
+                for name in ['keypoints2d', 'keypoints2d_unproj', 'keypoints2d_distort']:
+                    info[name] = np.zeros((nViews, self.nJoints, 3), dtype=np.float32)
+                for nv in range(nViews):
+                    if indices[nv] == -1: continue
+                    for name in ['keypoints2d', 'keypoints2d_unproj', 'keypoints2d_distort']:
+                        info[name][nv] = data[name][nv][indices[nv]-dimGroups[nv]]
+
+                res = super().__call__(info, results=results)[0]
+
+                k2d = res['keypoints2d']
+                valid_view = (k2d[..., 2] > 0).sum(axis=-1) > self.min_joints
+                # if valid_view.sum() < self.min_views - len(results): # 这里如果是有前一帧的话，len(results)会是2；不知道之前为啥有这个条件使用
+                if valid_view.sum() < self.min_views:
+                    self.log('[associate] Skip proposal {}->{} with not enough valid view {}'.format(idx, indices, (k2d[..., 2] > 0).sum(axis=-1)))
+                    continue
+                valid_joint = res['keypoints3d'][:, -1] > 0.1
+                if valid_joint.sum() < self.min_joints:
+                    self.log('[associate] Skip proposal {}->{} as not enough joints'.format(idx, indices))
+                    continue
+                indices[~valid_view] = -1
+                if (indices < 0).all():
+                    import ipdb; ipdb.set_trace()
+                self.log('[associate] Add indices {}, valid {}'.format(indices, (k2d[..., 2] > 0).sum(axis=-1)))
+                res['id'] = pid
+                res['indices'] = indices
+                res['valid_view'] = valid_view
+                res['valid_joints'] = res['keypoints3d'][:, -1] > 0.1
+                k3dresults.append(res)
+                for nv in range(nViews):
+                    if valid_view[nv] and indices[nv] != -1:
+                        p2dAssigned[indices[nv]] = pid
+                        visited[indices[nv]] = 1
+                pid += 1
+                break
+            visited[idx] = 1
+        self.log('[associate] {} points not visited, {} not assigned'.format(visited.shape[0] - visited.sum(), (p2dAssigned==-1).sum()))
+        k3dresults.sort(key=lambda x: -x['keypoints2d'][..., -1].sum())
+        return k3dresults
+
+    def _calculate_affinity_MxM(self, dims, dimGroups, data, key):
+        M = dimGroups[-1]
+        distance = np.zeros((M, M), dtype=np.float32)
+        nViews = len(dims)
+        for v0 in range(nViews-1):
+            for v1 in range(1, nViews):
+                # calculate distance between (v0, v1)
+                if v0 >= v1:
+                    continue
+                if dims[v0] == 0 or dims[v1] == 0:
+                    continue
+                if True:
+                    pts0 = data[key][v0]
+                    pts1 = data[key][v1]
+                    K0, K1 = data['K'][v0], data['K'][v1]
+                    R0, T0 = data['Rc'][v0], data['Tc'][v0]
+                    R1, T1 = data['Rc'][v1], data['Tc'][v1]
+                    dist = self.distance_by_epipolar(pts0, pts1, K0, K1, R0, T0, R1, T1)
+                    dist /= (K0[0, 0] + K1[0, 0])/2
+                else:
+                    dist = self.distance_by_ray(pts0, pts1, R0, T0, R1, T1)
+                distance[dimGroups[v0]:dimGroups[v0+1], dimGroups[v1]:dimGroups[v1+1]] = dist
+                distance[dimGroups[v1]:dimGroups[v1+1], dimGroups[v0]:dimGroups[v0+1]] = dist.T
+        DIST_MAX = self.cfg_track.track_dist_max
+        for nv in range(nViews):
+            distance[dimGroups[nv]:dimGroups[nv+1], dimGroups[nv]:dimGroups[nv+1]] = DIST_MAX
+        distance -= np.eye(M) * DIST_MAX
+        aff = (DIST_MAX - distance)/DIST_MAX
+        aff = np.clip(aff, 0, 1)
+        return aff
+
+    def _calculate_affinity_MxN(self, dims, dimGroups, data, key, results):
+        M = dimGroups[-1]
+        N = len(results)
+        distance = np.zeros((M, N), dtype=np.float32)
+        nViews = len(dims)
+        k3d = np.stack([r['keypoints3d'] for r in results])
+        kpts_proj = project_points(k3d, data['KRT'], einsum='vab,pkb->vpka')
+        depth = kpts_proj[..., -1]
+        kpts_proj[depth<0] = -10000
+        for v in range(nViews):
+            if dims[v] == 0:
+                continue
+            focal = data['K'][v][0, 0]
+            pts2d = data[key][v][:, None]
+            pts_repro = kpts_proj[v][None]
+            conf = np.sqrt(pts2d[..., -1]*k3d[None, ..., -1])
+            diff = np.linalg.norm(pts2d[..., :2] - pts_repro[..., :2], axis=-1)
+            diff = np.sum(diff*conf, axis=-1)/(1e-5 + np.sum(conf, axis=-1))
+            dist = diff / focal
+            distance[dimGroups[v]:dimGroups[v+1], :] = dist
+        DIST_MAX = self.cfg_track.track_repro_max
+        aff = (DIST_MAX - distance)/DIST_MAX
+        aff = np.clip(aff, 0, 1)
+        return aff
+
+    def _svt_optimize_affinity(self, affinity, dimGroups):
+        # match SVT
+        import pymatchlr
+        observe = np.ones_like(affinity)
+        aff_svt = pymatchlr.matchSVT(affinity, dimGroups, SimpleConstrain(dimGroups), observe, self.cfg_svt)
+        aff_svt[aff_svt<self.cfg_svt.aff_min] = 0.
+        if False:
+            import matplotlib.pyplot as plt
+            M = affinity.shape[0]
+            plt.subplot(121)
+            plt.imshow(affinity)
+            plt.hlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
+            plt.vlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
+            plt.subplot(122)
+            sum_row = aff_svt.sum(axis=1, keepdims=True)/(len(dimGroups) - 1)
+            plt.imshow(np.hstack([aff_svt, sum_row]))
+            plt.hlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
+            plt.vlines([i-0.5 for i in dimGroups[1:]], -0.5, M-0.5, 'w')
+            plt.ioff()
+            plt.show()
+        return aff_svt
+
+    def _track_add(self, res):
+        pid = res['id']
+        if pid == -1:
+            pid = self.max_id
+            res['id'] = pid
+            self.max_id += 1
+            self.log('[{:06d}] Create track {} <- {}'.format(self.time, pid, res['indices']))
+            if False:
+                crops = []
+                data = self.data
+                kpts = np.vstack(data['keypoints2d'])
+                for nv in range(len(data['imgname'])):
+                    img = cv2.imread(data['imgname'][nv])
+                    if res['indices'][nv] == -1: continue
+                    _kpts = kpts[res['indices'][nv]]
+                    bbox = bbox_from_keypoints(_kpts)
+                    plot_keypoints_auto(img, _kpts, pid)
+                    crop = crop_image(img, bbox, crop_square=True)
+                    crops.append(crop)
+                debug = merge(crops)
+                cv2.imwrite('debug/{:06d}.jpg'.format(pid), debug)
+        else:
+            self.max_id = max(self.max_id, pid+1)
+            self.log('[{:06d}] Initialize track {}, valid joints={}'.format(self.time, pid, (res['keypoints3d'][:, -1]>0.01).sum()))
+        self.tracks[pid] = {
+            'start_time': self.time,
+            'end_time': self.time+1,
+            'missing_frame': [],
+            'infos': [res]
+        }
+    
+    def _track_update(self, res, pid):
+        res['id'] = pid
+        info = self.tracks[pid]
+        self.log('[{:06d}] Update track {} [{}->{}], valid joints={}'.format(self.time, pid, info['start_time'], info['end_time'], (res['keypoints3d'][:, -1]>0.1).sum()))
+        self.tracks[pid]['end_time'] = self.time + 1
+        self.tracks[pid]['infos'].append(res)
+    
+    def _track_merge(self, res, pid):
+        res['id'] = -1
+        # TODO: merge
+
+    def _track_and_update(self, data, results):
+        cfg = self.cfg_track
+        self.time += 1
+        if self.time == 0:
+            # initialize the tracks
+            for res in results:
+                self._track_add(res)
+            return results
+        # filter the missing frames
+        for pid in list(self.tracks.keys()):
+            if self.time - self.tracks[pid]['end_time'] > cfg.max_missing_frame:
+                self.warn('[{:06d}] Remove track {}'.format(self.time, pid))
+                self.tracks.pop(pid)
+        # track the results with greedy matching
+        for idx_match, res in enumerate(results):
+            res['id'] = -1
+            # compute the distance
+            k3d = res['keypoints3d'][None]
+            pids_free = [pid for pid in self.tracks.keys() if self.tracks[pid]['end_time'] != self.time+1]
+            pids_used = [pid for pid in self.tracks.keys() if self.tracks[pid]['end_time'] == self.time+1]
+            def check_dist(k3d_check):
+                dist = np.linalg.norm(k3d[..., :3] - k3d_check[..., :3], axis=-1)
+                conf = np.sqrt(k3d[..., 3] * k3d_check[..., 3])
+                dist_mean = ((conf>0.1).sum(axis=-1) < self.min_joints)*cfg.track_dist_max + np.sum(dist * conf, axis=-1)/(1e-5 + np.sum(conf, axis=-1))
+                argmin = dist_mean.argmin()
+                dist_min = dist_mean[argmin]
+                return dist_mean, argmin, dist_min
+            # check free
+            NOT_VISITED = -2
+            NOT_FOUND = -1
+            flag_tracked, flag_current = NOT_VISITED, NOT_VISITED
+            if len(pids_free) > 0:
+                k3d_check = np.stack([self.tracks[pid]['infos'][-1]['keypoints3d'] for pid in pids_free])
+                dist_track, best, best_dist_track = check_dist(k3d_check)
+                if best_dist_track < cfg.track_dist_max:
+                    flag_tracked = best
+                else:
+                    flag_tracked = NOT_FOUND
+            # check used
+            if len(pids_used) > 0:
+                k3d_check = np.stack([self.tracks[pid]['infos'][-1]['keypoints3d'] for pid in pids_used])
+                dist_cur, best, best_dist_curr = check_dist(k3d_check)
+                if best_dist_curr < cfg.track_dist_max:
+                    flag_current = best
+                else:
+                    flag_current = NOT_FOUND
+            if flag_tracked >= 0 and (flag_current == NOT_VISITED or flag_current == NOT_FOUND):
+                self._track_update(res, pids_free[flag_tracked])
+            elif (flag_tracked == NOT_FOUND or flag_tracked==NOT_VISITED) and flag_current >= 0:
+                # 没有跟踪到，但是有当前帧的3D的，合并
+                self.log('[{:06d}] Merge track {} to {}'.format(self.time, idx_match, pids_used[flag_current]))
+                self._track_merge(res, pids_used[flag_current])
+            elif flag_tracked == NOT_FOUND and flag_current == NOT_FOUND:
+                # create a new track
+                self._track_add(res)
+            else:
+                # 丢弃
+                self.log('[{:06d}] Remove track {}. No close points'.format(self.time, idx_match))
+
+        for pid in list(self.tracks.keys()):
+            if self.tracks[pid]['end_time'] != self.time + 1:
+                self.warn('[{:06d}] Tracking {} missing'.format(self.time, pid))
+        results = [r for r in results if r['id']!=-1]
+        return results
+
+    def __call__(self, data):
+        # match the data
+        self.data = data
+        key = 'keypoints2d'
+        dims = [d.shape[0] for d in data[key]]
+        dimGroups = np.cumsum([0] + dims)
+        # 1. compute affinity
+        affinity = self._calculate_affinity_MxM(dims, dimGroups, data, key)
+        N2D = affinity.shape[0]
+        if self.people is not None and len(self.people) > 0:
+            # add 3d affinity
+            _affinity = affinity
+            affinity_3d = self._calculate_affinity_MxN(dims, dimGroups, data, key, self.people)
+            affinity = np.concatenate([affinity, affinity_3d], axis=1)
+            eye3d = np.eye(affinity_3d.shape[1])
+            affinity = np.concatenate([affinity, np.hstack((affinity_3d.T, eye3d))], axis=0)
+            dimGroups = dimGroups.tolist()
+            dimGroups.append(dimGroups[-1]+affinity_3d.shape[1])
+            affinity = self._svt_optimize_affinity(affinity, dimGroups)
+            # affinity = self._svt_optimize_affinity(_affinity, dimGroups[:-1])
+            # recover
+            affinity_3d = np.hstack([np.ones((N2D, 1))*0.5, affinity[:N2D, N2D:]])
+            prev_id = affinity_3d.argmax(axis=-1) - 1
+            affinity = affinity[:N2D, :N2D]
+            dimGroups = np.array(dimGroups[:-1])
+        else:
+            affinity = self._svt_optimize_affinity(affinity, dimGroups)
+            prev_id = np.zeros(N2D) - 1
+        # 2. associate and triangulate
+        results = self._simple_associate2d_triangulate(data, affinity, dimGroups, prev_id)
+        # 3. track, filter and return
+        results = self._track_and_update(data, results)
+        results.sort(key=lambda x:x['id'])
+        self.people = results
+        return results
--- a/easymocap/mytools/vis_base.py
+++ b/easymocap/mytools/vis_base.py
@ -2,27 +2,38 @@
  @ Date: 2020-11-28 17:23:04
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2021-08-22 16:11:25
-  @ FilePath: /EasyMocap/easymocap/mytools/vis_base.py
+  @ LastEditTime: 2022-08-12 21:50:56
+  @ FilePath: /EasyMocapPublic/easymocap/mytools/vis_base.py
 '''
 import cv2
 import numpy as np
 import json

-def generate_colorbar(N = 20, cmap = 'jet'):
+def generate_colorbar(N = 20, cmap = 'jet', rand=True):
    bar = ((np.arange(N)/(N-1))*255).astype(np.uint8).reshape(-1, 1)
    colorbar = cv2.applyColorMap(bar, cv2.COLORMAP_JET).squeeze()
    if False:
        colorbar = np.clip(colorbar + 64, 0, 255)
-    import random
-    random.seed(666)
-    index = [i for i in range(N)]
-    random.shuffle(index)
-    rgb = colorbar[index, :]
+    if rand:
+        import random
+        random.seed(666)
+        index = [i for i in range(N)]
+        random.shuffle(index)
+        rgb = colorbar[index, :]
+    else:
+        rgb = colorbar
    rgb = rgb.tolist()
    return rgb

-colors_bar_rgb = generate_colorbar(cmap='hsv')
+# colors_bar_rgb = generate_colorbar(cmap='hsv')
+colors_bar_rgb = [
+    (94, 124, 226), # 青色
+    (255, 200, 87), # yellow
+    (74,  189,  172), # green
+    (8, 76, 97), # blue
+    (219, 58, 52), # red
+    (77, 40, 49), # brown
+]

 colors_table = {
    'b': [0.65098039, 0.74117647, 0.85882353],
@ -34,15 +45,19 @@ colors_table = {
    'r': [ 251/255.,  128/255.,  114/255.],
    '_orange': [ 253/255.,  174/255.,  97/255.],
    'y': [ 250/255.,  230/255.,  154/255.],
-    '_r':[255/255,0,0],
    'g':[0,255/255,0],
-    '_b':[0,0,255/255],
    'k':[0,0,0],
+    '_r':[255/255,0,0],
+    '_g':[0,255/255,0],
+    '_b':[0,0,255/255],
+    '_k':[0,0,0],
    '_y':[255/255,255/255,0],
    'purple':[128/255,0,128/255],
    'smap_b':[51/255,153/255,255/255],
    'smap_r':[255/255,51/255,153/255],
-    'smap_b':[51/255,255/255,153/255],
+    'person': [255/255,255/255,255/255],
+    'handl': [255/255,51/255,153/255],
+    'handr': [51/255,255/255,153/255],
 }

 def get_rgb(index):
@ -51,7 +66,9 @@ def get_rgb(index):
            return (255, 255, 255)
        if index < -1:
            return (0, 0, 0)
-        col = colors_bar_rgb[index%len(colors_bar_rgb)]
+        # elif index == 0:
+        #     return (245, 150, 150)
+        col = list(colors_bar_rgb[index%len(colors_bar_rgb)])[::-1]
    else:
        col = colors_table.get(index, (1, 0, 0))
        col = tuple([int(c*255) for c in col[::-1]])
@ -80,13 +97,14 @@ def plot_cross(img, x, y, col, width=-1, lw=-1):
    cv2.line(img, (int(x-width), int(y)), (int(x+width), int(y)), col, lw)
    cv2.line(img, (int(x), int(y-width)), (int(x), int(y+width)), col, lw)
    
-def plot_bbox(img, bbox, pid, vis_id=True):
+def plot_bbox(img, bbox, pid, scale=1, vis_id=True):
    # 画bbox: (l, t, r, b)
-    x1, y1, x2, y2 = bbox[:4]
-    x1 = int(round(x1))
-    x2 = int(round(x2))
-    y1 = int(round(y1))
-    y2 = int(round(y2))
+    x1, y1, x2, y2, c = bbox
+    if c < 0.01:return img
+    x1 = int(round(x1*scale))
+    x2 = int(round(x2*scale))
+    y1 = int(round(y1*scale))
+    y2 = int(round(y2*scale))
    color = get_rgb(pid)
    lw = max(img.shape[0]//300, 2)
    cv2.rectangle(img, (x1, y1), (x2, y2), color, lw)
@ -94,11 +112,20 @@ def plot_bbox(img, bbox, pid, vis_id=True):
        font_scale = img.shape[0]/1000
        cv2.putText(img, '{}'.format(pid), (x1, y1+int(25*font_scale)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, 2)

-def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2):
+def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2, fliplr=False):
+    lw = max(lw, 2)
+    H, W = img.shape[:2]
    for ii, (i, j) in enumerate(config['kintree']):
        if i >= len(points) or j >= len(points):
            continue
+        if (i >25 or j > 25) and config['nJoints'] != 42:
+            _lw = max(int(lw/4), 1)
+        else:
+            _lw = lw
        pt1, pt2 = points[i], points[j]
+        if fliplr:
+            pt1 = (W-pt1[0], pt1[1])
+            pt2 = (W-pt2[0], pt2[1])
        if use_limb_color:
            col = get_rgb(config['colors'][ii])
        else:
@ -106,32 +133,111 @@ def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True
        if pt1[-1] > 0.01 and pt2[-1] > 0.01:
            image = cv2.line(
                img, (int(pt1[0]+0.5), int(pt1[1]+0.5)), (int(pt2[0]+0.5), int(pt2[1]+0.5)),
-                col, lw)
-    for i in range(len(points)):
+                col, _lw)
+    for i in range(min(len(points), config['nJoints'])):
        x, y = points[i][0], points[i][1]
+        if fliplr:
+            x = W - x
+        c = points[i][-1]
+        if c > 0.01:
+            text_size = img.shape[0]/1000
+            col = get_rgb(pid)
+            radius = int(lw/1.5)
+            if i > 25 and config['nJoints'] != 42:
+                radius = max(int(radius/4), 1)
+            cv2.circle(img, (int(x+0.5), int(y+0.5)), radius, col, -1)
+            if vis_conf:
+                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), 
+                cv2.FONT_HERSHEY_SIMPLEX, text_size, col, 2)
+
+def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, scale=1, lw=-1):
+    from ..dataset.config import CONFIG
+    config_name = {25: 'body25', 21: 'hand', 42:'handlr', 17: 'coco', 1:'points', 67:'bodyhand', 137: 'total', 79:'up'}[len(points)]
+    config = CONFIG[config_name]
+    if lw == -1:
+        lw = img.shape[0]//200
+    if config_name == 'hand':
+        lw = img.shape[0]//1000
+    lw = max(lw, 1)
+    for ii, (i, j) in enumerate(config['kintree']):
+        if i >= len(points) or j >= len(points):
+            continue
+        if i >= 25 and config_name in ['bodyhand', 'total']:
+            lw = max(img.shape[0]//400, 1)
+        pt1, pt2 = points[i], points[j]
+        if use_limb_color:
+            col = get_rgb(config['colors'][ii])
+        else:
+            col = get_rgb(pid)
+        if pt1[0] < 0 or pt1[1] < 0 or pt1[0] > 10000 or pt1[1] > 10000:
+            continue
+        if pt2[0] < 0 or pt2[1] < 0 or pt2[0] > 10000 or pt2[1] > 10000:
+            continue
+        if pt1[-1] > 0.01 and pt2[-1] > 0.01:
+            image = cv2.line(
+                img, (int(pt1[0]*scale+0.5), int(pt1[1]*scale+0.5)), (int(pt2[0]*scale+0.5), int(pt2[1]*scale+0.5)),
+                col, lw)
+    lw = img.shape[0]//200
+    if config_name == 'hand':
+        lw = img.shape[0]//500
+    lw = max(lw, 1)
+    for i in range(len(points)):
+        x, y = points[i][0]*scale, points[i][1]*scale
+        if x < 0 or y < 0 or x >10000 or y >10000:
+            continue
+        if i >= 25 and config_name in ['bodyhand', 'total']:
+            lw = max(img.shape[0]//400, 1)
        c = points[i][-1]
        if c > 0.01:
            col = get_rgb(pid)
-            cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1)
+            if len(points) == 1:
+                cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*10, col, lw*2)
+                plot_cross(img, int(x+0.5), int(y+0.5), width=lw*5, col=col, lw=lw*2)
+            else:
+                cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1)
            if vis_conf:
                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, col, 2)

-def plot_points2d(img, points2d, lines, lw=4, col=(0, 255, 0), putText=True):
+def plot_keypoints_total(img, annots, scale, pid_offset=0):
+    _lw = img.shape[0] // 150
+    for annot in annots:
+        pid = annot['personID'] + pid_offset
+        for key in ['keypoints', 'handl2d', 'handr2d']:
+            if key not in annot.keys():continue
+            if key in ['handl2d', 'handr2d', 'face2d']:
+                lw = _lw // 2
+            else:
+                lw = _lw
+            lw = max(lw, 1)
+            plot_keypoints_auto(img, annot[key], pid, vis_conf=False, use_limb_color=False, scale=scale, lw=lw)
+            if 'bbox' not in annot.keys() or (annot['bbox'][0] < 0 or annot['bbox'][0] >10000):
+                continue
+            plot_bbox(img, annot['bbox'], pid, scale=scale, vis_id=True)
+    return img
+
+def plot_points2d(img, points2d, lines, lw=-1, col=(0, 255, 0), putText=True, style='+'):
    # 将2d点画上去
    if points2d.shape[1] == 2:
        points2d = np.hstack([points2d, np.ones((points2d.shape[0], 1))])
+    if lw == -1:
+        lw = img.shape[0]//200
    for i, (x, y, v) in enumerate(points2d):
        if v < 0.01:
            continue
        c = col
-        plot_cross(img, x, y, width=10, col=c, lw=lw)
+        if '+' in style:
+            plot_cross(img, x, y, width=10, col=c, lw=lw*2)
+        if 'o' in style:
+            cv2.circle(img, (int(x), int(y)), 10, c, lw*2)
+        cv2.circle(img, (int(x), int(y)), lw, c, lw)
        if putText:
-            font_scale = img.shape[0]/2000
+            c = col[::-1]
+            font_scale = img.shape[0]/1000
            cv2.putText(img, '{}'.format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, font_scale, c, 2)
    for i, j in lines:
        if points2d[i][2] < 0.01 or points2d[j][2] < 0.01:
            continue
-        plot_line(img, points2d[i], points2d[j], 2, col)
+        plot_line(img, points2d[i], points2d[j], max(1, lw//2), col)

 row_col_ = {
    2: (2, 1),
@ -140,7 +246,18 @@ row_col_ = {
    9: (3, 3),
    26: (4, 7)
 }
-def get_row_col(l):
+
+row_col_square = {
+    2: (2, 1),
+    7: (3, 3),
+    8: (3, 3),
+    9: (3, 3),
+    26: (5, 5)
+}
+
+def get_row_col(l, square):
+    if square and l in row_col_square.keys():
+        return row_col_square[l]
    if l in row_col_.keys():
        return row_col_[l]
    else:
@ -153,12 +270,19 @@ def get_row_col(l):
            row, col = col, row
        return row, col

-def merge(images, row=-1, col=-1, resize=False, ret_range=False, **kwargs):
+def merge(images, row=-1, col=-1, resize=False, ret_range=False, square=False, **kwargs):
    if row == -1 and col == -1:
-        row, col = get_row_col(len(images))
+        row, col = get_row_col(len(images), square)
    height = images[0].shape[0]
    width = images[0].shape[1]
-    ret_img = np.zeros((height * row, width * col, images[0].shape[2]), dtype=np.uint8) + 255
+    # special case
+    if height > width:
+        if len(images) == 3:
+            row, col = 1, 3
+    if len(images[0].shape) > 2:
+        ret_img = np.zeros((height * row, width * col, images[0].shape[2]), dtype=np.uint8) + 255
+    else:
+        ret_img = np.zeros((height * row, width * col), dtype=np.uint8) + 255
    ranges = []
    for i in range(row):
        for j in range(col):
--- a/scripts/dataset/download_youtube.py
+++ b/scripts/dataset/download_youtube.py
@ -0,0 +1,104 @@
+'''
+  @ Date: 2022-03-29 13:55:42
+  @ Author: Qing Shuai
+  @ Mail: s_q@zju.edu.cn
+  @ LastEditors: Qing Shuai
+  @ LastEditTime: 2022-05-06 16:45:47
+  @ FilePath: /EasyMocapPublic/scripts/dataset/download_youtube.py
+'''
+from glob import glob
+from os.path import join
+from urllib.error import URLError
+from pytube import YouTube
+import os
+from easymocap.mytools.debug_utils import log, mkdir, myerror
+
+extensions = ['.mp4', '.webm']
+
+def download_youtube(vid, outdir):
+    outname = join(outdir, vid)
+    url = 'https://www.youtube.com/watch?v={}'.format(vid)
+    for ext in extensions:
+        if os.path.exists(outname+ext) and not args.restart:
+            log('[Info]: skip video {}'.format(outname+ext))
+            return 0
+    log('[Info]: start to download video {}'.format(outname))
+    log('[Info]: {}'.format(url))
+    yt = YouTube(url)
+    try:
+        streams = yt.streams
+    except KeyError:
+        myerror('[Error]: not found streams: {}'.format(url))
+        return 1
+    except URLError:
+        myerror('[Error]: Url error: {}'.format(url))
+        return 1
+    find = False
+    streams_valid = []
+    res_range = ['2160p', '1440p', '1080p', '720p'] if not args.only4k else ['2160p']
+    if args.no720:
+        res_range.remove('720p')
+    for res in res_range:
+        for fps in [60, 50, 30, 25, 24]:
+            for ext in ['webm', 'mp4']:
+                for stream in streams:
+                    if stream.resolution == res and \
+                       stream.fps == fps and \
+                       stream.mime_type == 'video/{}'.format(ext):
+                       streams_valid.append(stream)
+    if len(streams_valid) == 0:
+        for stream in streams:
+            print(stream)
+        myerror('[BUG ] Not found valid stream, please check the streams')
+        return 0
+    # best_stream = yt.streams.order_by('filesize')[-1]
+    title = streams_valid[0].title
+    log('[Info]: {}'.format(title))
+    for stream in streams_valid:
+        res = stream.resolution
+        log('[Info]: The resolution is {}, ext={}'.format(res, stream.mime_type))
+        filename = '{}.{}'.format(vid, stream.mime_type.split('/')[-1])
+        try:
+            stream.download(output_path=outdir, filename=filename, max_retries=0)
+            log('[Info]: Succeed')
+        except:
+            myerror('[BUG ]: Failed')
+            continue
+        break
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('vid', type=str)
+    parser.add_argument('--database', type=str, default='data/youtube')
+    parser.add_argument('--num', type=int, default=1)
+    parser.add_argument('--only4k', action='store_true')
+    parser.add_argument('--no720', action='store_true')
+    parser.add_argument('--restart', action='store_true')
+    parser.add_argument('--debug', action='store_true')
+    args = parser.parse_args()
+
+    vid = args.vid
+    # check database
+    database = join(args.database, 'videos')
+    mkdir(database)
+    videonames = sorted(os.listdir(database))
+    log('[download] video database in {}'.format(database))
+    log('[download] already has {} videos'.format(len(videonames)))
+
+    if vid.startswith('https'):
+        vid = vid.replace('https://www.youtube.com/watch?v=', '')
+        vid = vid.split('&')[0]
+        print(vid)
+        urls = [vid]
+    elif os.path.exists(vid):
+        with open(vid, 'r') as f:
+            urls = f.readlines()
+        urls = list(filter(lambda x:not x.startswith('#') and len(x) > 0, map(lambda x: x.strip().replace('https://www.youtube.com/watch?v=', '').split('&')[0], urls)))
+        log('[download] download {} videos from {}'.format(len(urls), vid))
+    else:
+        urls = [vid]
+    
+    for url in urls:
+        download_youtube(url, database)