[mvmp] update the config

2023-07-11 22:34:44 +08:00 · 2023-07-11 22:34:44 +08:00 · 0815e2e53d
commit 0815e2e53d
parent 88ee221c10
8 changed files with 87 additions and 30 deletions
--- a/config/mvmp/body/ballet_2person.yml
+++ b/config/mvmp/body/ballet_2person.yml
@ -39,4 +39,8 @@ exp_opts:
            args:
              loss:
                smooth:
-                  weight: 5.
+                  weight: 5.
+    vis_render:
+      args:
+        view_list: [6]
+        scale: 1.
--- a/config/mvmp/body/boxing.yml
+++ b/config/mvmp/body/boxing.yml
@ -5,7 +5,7 @@ data_opts:
  # subs: ['01', '03', '05', '07', '09', '11', '13', '15', '17', '19', '21', '23']
  subs: ['01', '04', '07', '10', '13', '16', '19', '22'] # Use 8 cameras
  subs_vis: ['01', '07', '13', '19'] # Visualize 4 cameras
-  ranges: [0, 200, 1]
+  ranges: [0, 300, 1]
 exp_opts:
  output: output/boxing
  at_step:
@ -39,5 +39,5 @@ exp_opts:
                  weight: 1.
    vis_render:
      args:
-        view_list: [3]
+        view_list: [2]
        scale: 1.
--- a/config/mvmp/body/soccer1_6.yml
+++ b/config/mvmp/body/soccer1_6.yml
@ -23,6 +23,7 @@ exp_opts:
            dist_max: 25 # pixel
            dist_track: 100 # 100mm；
        cfg_track:
+          max_person: 6
          final_ranges: [[-5, -5, 0.], [5, 5, 5.]] # 最终的输出的range，仅用于输出的时候的筛选
          final_max_person: 6
  at_final:
@ -36,8 +37,8 @@ exp_opts:
            args:
              loss:
                smooth:
-                  weight: 1.
+                  weight: 5.
    vis_render:
      args:
-        view_list: [0, 2, 4, 6]
-        scale: 0.5
+        view_list: [2]
+        scale: 1.
--- a/config/mvmp/detect_match_triangulate_fitSMPL.yml
+++ b/config/mvmp/detect_match_triangulate_fitSMPL.yml
@ -60,14 +60,13 @@ args:
            min_view_body: 3 # min visible view of the body
            min_conf_3d: 0.1
            dist_max: 50 # pixel
-            dist_track_pixel: 100 # pixel
            dist_track: 100 # mm
        cfg_track:
          max_person: 100
-          max_missing: 3 # 最多丢失5帧就要删除
+          max_missing: 3 # 最多丢失3帧就要删除
          final_ranges: [[-10000, -10000, -10000], [10000, 10000, 10000]] # 最终的输出的range，仅用于输出的时候的筛选
          final_max_person: 100
-          kintree: [[ 1,  0], [ 2,  1], [ 3,  2], [ 4,  3], [ 5,  1], [ 6,  5], [ 7,  6], [ 8,  1], [ 9,  8], [10,  9], [11, 10], [12,  8], [13, 12], [14, 13], [15,  0], [16,  0], [17, 15], [18, 16], [19, 14], [20, 19], [21, 14], [22, 11], [23, 22], [24, 11]]
+          kintree: [[2, 3], [5, 6], [3, 4], [6, 7], [11, 22], [22, 23], [11, 24], [14, 19], [19, 20], [14, 21]]
    vis_kpts3d:
      module: myeasymocap.io.vis.Vis3D
      key_from_data: [images, cameras]
--- a/myeasymocap/backbone/yolo/yolo.py
+++ b/myeasymocap/backbone/yolo/yolo.py
@ -32,6 +32,17 @@ class BaseYOLOv5:
        self.multiview = multiview
        self.name = name
    
+    def dump(self, cachename, output):
+        os.makedirs(os.path.dirname(cachename), exist_ok=True)
+        with open(cachename, 'wb') as f:
+            pickle.dump(output, f)
+        return output
+    
+    def load(self, cachename):
+        with open(cachename, 'rb') as f:
+            output = pickle.load(f)
+        return output
+
    def check_cache(self, imgname):
        basename = os.path.basename(imgname)
        imgext = '.' + basename.split('.')[-1]
@ -39,7 +50,7 @@ class BaseYOLOv5:
        cachename = join(self.output, self.name, nv, basename.replace(imgext, '.npy'))
        os.makedirs(os.path.dirname(cachename), exist_ok=True)
        if os.path.exists(cachename):
-            output = np.load(cachename, allow_pickle=True)
+            output = self.load(cachename)
            return True, output, cachename
        else:
            return False, None, cachename
@ -60,19 +71,24 @@ class BaseYOLOv5:
        image = self.check_image(imgname)
        results = self.model(image) #RGB images[:,:,::-1]
        arrays = np.array(results.pandas().xyxy[0])
-        np.save(cachename, arrays)
-        return arrays
+        res = {
+            'results': arrays,
+            'image_shape': image.shape,
+        }
+        self.dump(cachename, res)
+        return res
    
    @staticmethod
    def select_class(results, name):
        select = []
-        for i, res in enumerate(results):
+        for i, res in enumerate(results['results']):
            classname = res[6]
            if classname != name:
                continue
            box = res[:5]
            select.append(box)
-        return select
+        select = np.stack(select)
+        return select, results

    def select_bbox(self, select, imgname):
        if select.shape[0] == 0:
@ -90,13 +106,13 @@ class BaseYOLOv5:
        detects = {'bbox': [[] for _ in range(len(images))]}
        for nv in range(len(images)):
            res = self.detect(images[nv], imgnames[nv])            
-            select = self.select_class(res, self.name)
+            select, res = self.select_class(res, self.name)
            if len(select) == 0:
                select = np.zeros((0,5), dtype=np.float32)
            else:
                select = np.stack(select).astype(np.float32)
            # TODO: add track here
-            select = self.select_bbox(select, imgnames[nv])
+            select = self.select_bbox(select, res, imgnames[nv])
            detects['bbox'][nv] = select
        if squeeze:
            detects['bbox'] = detects['bbox'][0]
@ -124,14 +140,14 @@ class YoloWithTrack(BaseYOLOv5):
        over = (w*h)/(area_pre+area_now-w*h)
        return over

-    def select_bbox(self, select, imgname):
+    def select_bbox(self, select, results, imgname):
        if select.shape[0] == 0:
            return select
        sub = os.path.basename(os.path.dirname(imgname))
        frame = int(os.path.basename(imgname).split('.')[0])
        if sub not in self.track_cache:
            # select the best
-            select = super().select_bbox(select, imgname)
+            select = super().select_bbox(select, results, imgname)
            self.track_cache[sub] = {
                'frame': [frame],
                'bbox': [select]
@ -152,12 +168,15 @@ class MultiPerson(BaseYOLOv5):
        self.max_length = max_length
        print('[{}] Only keep the bbox in [{}, {}]'.format(self.__class__.__name__, min_length, max_length))

-    def select_bbox(self, select, imgname):
+    def select_bbox(self, select, results, imgname):
        if select.shape[0] == 0:
            return select
        # 判断一下面积
        area = np.sqrt((select[:, 2] - select[:, 0])*(select[:, 3]-select[:, 1]))
        valid = (area > self.min_length) & (area < self.max_length)
+        height, width, _ = results['image_shape']
+        # set the limit of left and right
+        valid = valid & (select[:, 2] > self.min_length * 1.5) & (select[:, 0] < width - self.min_length * 1.5)
        return select[valid]

 class DetectToPelvis:
--- a/myeasymocap/io/vis3d.py
+++ b/myeasymocap/io/vis3d.py
@ -6,6 +6,7 @@ from os.path import join
 import numpy as np
 from easymocap.datasets.base import add_logo
 from easymocap.mytools.vis_base import merge, plot_bbox
+from easymocap.mytools.camera_utils import Undistort
 from .vis import VisBase

 class Render(VisBase):
@ -54,6 +55,9 @@ class Render_multiview(VisBase):
            basename = os.path.basename(imgname[nv])
            assert os.path.exists(imgname[nv]), imgname[nv]
            vis = cv2.imread(imgname[nv])
+            # undistort the images
+            if cameras['dist'] is not None:
+                vis = Undistort.image(vis, cameras['K'][nv], cameras['dist'][nv], sub=os.path.basename(os.path.dirname(imgname[nv])))
            vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d)
            meshes = {}
            if vert.ndim == 2:
@ -96,7 +100,6 @@ class Render_multiview(VisBase):
                ret = plot_meshes(vis, meshes, K, R, T, mode='rgb')
            else:
                ret = plot_meshes(vis, meshes, K, R, T, mode=self.render_mode)
-            ret = add_logo(ret)
            mv_ret.append(ret)
        self.merge_and_write(mv_ret)

--- a/myeasymocap/operations/match_base.py
+++ b/myeasymocap/operations/match_base.py
@ -3,7 +3,7 @@ import cv2
 from easymocap.mytools.camera_utils import Undistort
 from easymocap.mytools.debug_utils import log, mywarn, myerror
 from .iterative_triangulate import iterative_triangulate
-from easymocap.mytools.triangulator import project_points
+from easymocap.mytools.triangulator import project_points, batch_triangulate
 from easymocap.mytools.timer import Timer

 class DistanceBase:
@ -265,8 +265,13 @@ class MatchBase:
                pass
        return indices, proposals

-    def _check_indices(self, indices):
-        return (indices > -1).sum() >= self.cfg.triangulate.min_view_body
+    def _check_indices(self, indices, keypoints3d=None):
+        flag_ind = (indices > -1).sum() >= self.cfg.triangulate.min_view_body
+        if keypoints3d is not None:
+            conf = keypoints3d[:, 3]
+            flag_3d = (conf > self.cfg.triangulate.min_conf_3d).sum() > self.cfg.min_joints
+            flag_ind = flag_ind & flag_3d
+        return flag_ind

    def _simple_associate2d_triangulate(self, affinity, keypoints, cameras, assigned=None):
        # sum1 = affinity.sum(axis=1)
@ -292,7 +297,7 @@ class MatchBase:
            log('[Tri] First try to triangulate of {}'.format(indices))
            indices_origin = indices.copy()
            result, indices = self.try_to_triangulate(keypoints, cameras, indices)
-            if not self._check_indices(indices):
+            if not self._check_indices(indices, result['keypoints3d']):
                # if the proposals is valid
                if len(proposals) > 0:
                    proposals.sort(key=lambda x:-x[2])
@ -301,7 +306,7 @@ class MatchBase:
                        indices[nviews] = select_id
                        log('[Tri] Max fail, then try to triangulate of {}'.format(indices))
                        result, indices = self.try_to_triangulate(keypoints, cameras, indices)
-                        if self._check_indices(indices):
+                        if self._check_indices(indices, result['keypoints3d']):
                            break
                    else:
                        # overall proposals, not find any valid
@ -346,21 +351,23 @@ class MatchBase:
            indices_origin = indices.copy()
            result, indices = self.try_to_triangulate(keypoints, cameras, indices, previous=keypoints3d[idx3d])
            
-            if not (self._check_indices(indices) and self._check_speed(keypoints3d[idx3d], result['keypoints3d'])):
+            if not (self._check_indices(indices, result['keypoints3d']) and self._check_speed(keypoints3d[idx3d], result['keypoints3d'])):
                # if the proposals is valid
                previous = keypoints3d[idx3d]
                # select the best keypoints of each view
                previous_proj = project_points(previous, cameras['P'])
                dist_all = np.zeros((previous_proj.shape[0],)) + 999.
                indices_all = np.zeros((previous_proj.shape[0],), dtype=int)
+                keypoints_all = np.zeros_like(previous_proj)
                for nv in range(previous_proj.shape[0]):
                    dist = np.linalg.norm(previous_proj[nv, :, :2][None] - keypoints[nv][:, :, :2], axis=-1)
                    conf = (previous[..., -1] > 0.1)[None] & (keypoints[nv][:, :, -1] > 0.1)
                    dist_mean = (dist * conf).sum(axis=-1) / (1e-5 + conf.sum(axis=-1))
                    dist_all[nv] = dist_mean.min()
                    indices_all[nv] = dist_mean.argmin()
+                    keypoints_all[nv] = keypoints[nv][indices_all[nv]]
                want_view = dist_all.argsort()[:self.cfg.triangulate.min_view_body]
-                # TODO: add proposal
+                # TODO: add more proposal instead of the top K
                proposal = (want_view, indices_all[want_view], -dist_all[want_view])
                proposals = [proposal]
                if len(proposals) > 0:
@ -370,12 +377,33 @@ class MatchBase:
                        indices[nv] = select_id
                        log('[Tri] Max fail, then try to triangulate of {}'.format(indices))
                        result, indices = self.try_to_triangulate(keypoints, cameras, indices, previous=keypoints3d[idx3d])
-                        if (self._check_indices(indices) and self._check_speed(keypoints3d[idx3d], result['keypoints3d'])):
+                        if (self._check_indices(indices, result['keypoints3d']) and self._check_speed(keypoints3d[idx3d], result['keypoints3d'])):
+                            # 检测合格了，需要计算一下所有的view里面，那些是合格的，再一起计算
+                            k2d_repro = project_points(result['keypoints3d'], cameras['P'])
+                            dist = np.linalg.norm(k2d_repro[..., :2] - keypoints_all[..., :2], axis=-1)
+                            conf = (result['keypoints3d'][:, -1][None] > 0.1) & (keypoints_all[..., 2] > 0.1)
+                            dist[~conf] = 0.
+                            valid_2d = dist < self.cfg.triangulate.dist_max
+                            valid_ratio_view = valid_2d.mean(axis=-1)
+                            valid_view = np.where(valid_ratio_view > 0.4)[0]
+                            indices_new = np.zeros_like(indices_origin) - 1
+                            indices_new[valid_view] = indices_all[valid_view]
+                            keypoints_all[~valid_2d] = 0.
+                            k3d_new = batch_triangulate(keypoints_all, cameras['P'], min_view=3)
+                            result = {
+                                'keypoints3d': k3d_new,
+                                'indices': indices_new,
+                                'keypoints2d': keypoints_all
+                            }
+                            log('[Tri] Max success, Refine the indices to {}'.format(indices))
+                            # result, indices = self.try_to_triangulate(keypoints, cameras, indices_new, previous=result['keypoints3d'])
                            break
+                        else:
+                            log('[Tri] triangulation failed')
+                            self._check_speed(keypoints3d[idx3d], result['keypoints3d'], verbo=True)
                    else:
                        # overall proposals, not find any valid
-                        mywarn('[Tri] {} Track fail after {} proposal'.format(idx3d, len(proposals)))
-                        import ipdb; ipdb.set_trace()
+                        mywarn('[Tri] {} Track fail after {} proposal'.format(self.prev_ids[idx3d], len(proposals)))
                        continue
                else:
                    mywarn('[Tri] Track fail {}'.format(indices))
@ -483,6 +511,7 @@ class TrackBase:
    def add_track(self, res):
        # add a new track
        pid = self.max_id
+        mywarn('[Track] add new person {}'.format(pid))
        res['id'] = pid
        self.record[pid] = {
            'frames': [self.current_frame],
--- a/myeasymocap/stages/basestage.py
+++ b/myeasymocap/stages/basestage.py
@ -107,6 +107,8 @@ class MultiStage:
        log('Keep keys: {}'.format(list(data.keys())))
        ret = {}
        for key, model in self.model_finals.items():
+            if self._at_final[key].get('skip', False):
+                continue
            for iter_ in range(self._at_final[key].get('repeat', 1)):
                inputs = {}
                model.iter = iter_