From 88ee221c104e724461eddeb4ce92be52eb13fe5c Mon Sep 17 00:00:00 2001
From: shuaiqing <s_q@zju.edu.cn>
Date: Tue, 11 Jul 2023 10:58:28 +0800
Subject: [PATCH] update the config

---
 config/mvmp/body/ballet_2person.yml           |  42 ++++++
 config/mvmp/body/boxing.yml                   |  43 +++++++
 config/mvmp/body/soccer1_6.yml                |  43 +++++++
 config/mvmp/detect_match_triangulate.yml      |  87 +++++++++++++
 .../mvmp/detect_match_triangulate_fitSMPL.yml | 121 ++++++++++++++++++
 config/mvmp/meta_fit_SMPL.yml                 | 107 ++++++++++++++++
 myeasymocap/stages/basestage.py               |   1 +
 7 files changed, 444 insertions(+)
 create mode 100644 config/mvmp/body/ballet_2person.yml
 create mode 100644 config/mvmp/body/boxing.yml
 create mode 100644 config/mvmp/body/soccer1_6.yml
 create mode 100644 config/mvmp/detect_match_triangulate.yml
 create mode 100644 config/mvmp/detect_match_triangulate_fitSMPL.yml
 create mode 100644 config/mvmp/meta_fit_SMPL.yml

diff --git a/config/mvmp/body/ballet_2person.yml b/config/mvmp/body/ballet_2person.yml
new file mode 100644
index 0000000..202a524
--- /dev/null
+++ b/config/mvmp/body/ballet_2person.yml
@@ -0,0 +1,42 @@
+data: config/datasets/mvimage.yml
+exp: config/mvmp/detect_match_triangulate_fitSMPL.yml
+data_opts:
+  root: TO_BE_FILLED
+  # subs: ['01', '03', '05', '07', '09', '11', '13', '15', '17', '19', '21', '23']
+  subs: ['01', '04', '07', '10', '13', '16', '19', '22'] # Use 8 cameras
+  subs_vis: ['01', '07', '13', '19']
+  # subs_vis: [_all_]
+  ranges: [0, 1100, 1]
+exp_opts:
+  output: output/ballet_2person
+  at_step:
+    detect:
+      args:
+        min_length: 200
+        max_length: 1000
+    match:
+      args:
+        cfg_match:
+          distance:
+            threshold_track: 0.05
+          triangulate:
+            min_view: 3 # at least 3 views to triangulate
+            min_view_body: 5 # at least 5 views to triangulate
+            dist_max: 25 # pixel
+            dist_track: 100 # 100mm；
+        cfg_track:
+          max_person: 2
+          final_ranges: [[-3, -3, 0.], [3, 3, 3.]] # 最终的输出的range，仅用于输出的时候的筛选
+          final_max_person: 2
+  at_final:
+    collect:
+      args:
+        min_frame: 100
+    fitting_each_person:
+      args:
+        stages:
+          refine_poses:
+            args:
+              loss:
+                smooth:
+                  weight: 5.
\ No newline at end of file
diff --git a/config/mvmp/body/boxing.yml b/config/mvmp/body/boxing.yml
new file mode 100644
index 0000000..f79d8a7
--- /dev/null
+++ b/config/mvmp/body/boxing.yml
@@ -0,0 +1,43 @@
+data: config/datasets/mvimage.yml
+exp: config/mvmp/detect_match_triangulate_fitSMPL.yml
+data_opts:
+  root: TO_BE_FILLED
+  # subs: ['01', '03', '05', '07', '09', '11', '13', '15', '17', '19', '21', '23']
+  subs: ['01', '04', '07', '10', '13', '16', '19', '22'] # Use 8 cameras
+  subs_vis: ['01', '07', '13', '19'] # Visualize 4 cameras
+  ranges: [0, 200, 1]
+exp_opts:
+  output: output/boxing
+  at_step:
+    detect:
+      args:
+        min_length: 200
+        max_length: 1000
+    match:
+      args:
+        cfg_match:
+          distance:
+            threshold_track: 0.05
+          triangulate:
+            min_view: 3 # at least 3 views to triangulate
+            min_view_body: 4 # at least 5 views to triangulate
+            dist_max: 25 # pixel
+            dist_track: 50 # 100mm；
+        cfg_track:
+          final_ranges: [[-3, -3, 0.], [3, 3, 3.]] # 最终的输出的range，仅用于输出的时候的筛选
+  at_final:
+    collect:
+      args:
+        min_frame: 100
+    fitting_each_person:
+      args:
+        stages:
+          refine_poses:
+            args:
+              loss:
+                smooth:
+                  weight: 1.
+    vis_render:
+      args:
+        view_list: [3]
+        scale: 1.
\ No newline at end of file
diff --git a/config/mvmp/body/soccer1_6.yml b/config/mvmp/body/soccer1_6.yml
new file mode 100644
index 0000000..6855933
--- /dev/null
+++ b/config/mvmp/body/soccer1_6.yml
@@ -0,0 +1,43 @@
+data: config/datasets/mvimage.yml
+exp: config/mvmp/detect_match_triangulate_fitSMPL.yml
+data_opts:
+  root: TO_BE_FILLED
+  subs: []
+  subs_vis: ['1', '3', '5', '7']
+  ranges: [0, 600, 1]
+exp_opts:
+  output: output/soccer1_6
+  at_step:
+    detect:
+      args:
+        min_length: 100
+        max_length: 1000
+    match:
+      args:
+        cfg_match:
+          distance:
+            threshold_track: 0.05
+          triangulate:
+            min_view: 3 # at least 3 views to triangulate
+            min_view_body: 3 # at least 5 views to triangulate
+            dist_max: 25 # pixel
+            dist_track: 100 # 100mm；
+        cfg_track:
+          final_ranges: [[-5, -5, 0.], [5, 5, 5.]] # 最终的输出的range，仅用于输出的时候的筛选
+          final_max_person: 6
+  at_final:
+    collect:
+      args:
+        min_frame: 100
+    fitting_each_person:
+      args:
+        stages:
+          refine_poses:
+            args:
+              loss:
+                smooth:
+                  weight: 1.
+    vis_render:
+      args:
+        view_list: [0, 2, 4, 6]
+        scale: 0.5
\ No newline at end of file
diff --git a/config/mvmp/detect_match_triangulate.yml b/config/mvmp/detect_match_triangulate.yml
new file mode 100644
index 0000000..bb28766
--- /dev/null
+++ b/config/mvmp/detect_match_triangulate.yml
@@ -0,0 +1,87 @@
+module: myeasymocap.stages.basestage.MultiStage
+args:
+  output: output/detect_match_triangulate
+  keys_keep: [cameras, imgnames]
+  at_step:
+    detect:
+      module: myeasymocap.backbone.yolo.yolo.MultiPerson # Use YOLO to detect multi-person
+      key_from_data: [images, imgnames]
+      args:
+        model: yolov5m
+        name: person
+        min_length: 150 # this two threshold control the wanted bboxes
+        max_length: 1000
+    # keypoints2d:
+    #   module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
+    #   key_from_data: [images, imgnames]
+    #   key_from_previous: [bbox]
+    #   key_keep: []
+    #   args:
+    #     ckpt: data/models/pose_hrnet_w48_384x288.pth
+    #     single_person: False # This flag controls the function to detect all keypoints
+    keypoints2d:
+      module: myeasymocap.backbone.vitpose.vit_moe.MyViT
+      key_from_data: [images, imgnames]
+      key_from_previous: [bbox]
+      key_keep: []
+      args:
+        ckpt: data/models/vitpose+_base.pth
+        single_person: False # This flag controls the function to detect all keypoints
+    vis_2d:
+      module: myeasymocap.io.vis.Vis2D
+      skip: False
+      key_from_data: [images]
+      key_from_previous: [keypoints, bbox]
+      args:
+        name: vis_keypoints2d
+        scale: 0.5
+    match:
+      module: myeasymocap.operations.match_base.MatchAndTrack
+      key_from_data: [cameras, meta]
+      key_from_previous: [keypoints]
+      args:
+        cfg_match:
+          min_conf: 0.3
+          min_joints: 9
+          distance:
+            mode: epipolar
+            threshold: 0.05 # 用于控制匹配的内点阈值
+            threshold_track: 0.05 # track的匹配的内点阈值
+            min_common_joints: 9
+            cfg_svt:
+              debug: 0
+              maxIter: 10
+              w_sparse: 0.1
+              w_rank: 50
+              tol: 0.0001
+              aff_min: 0.3
+          triangulate:
+            min_view: 3 # min view when triangulate each points
+            min_view_body: 3 # min visible view of the body
+            min_conf_3d: 0.1
+            dist_max: 50 # pixel
+            dist_track: 100 # mm
+        cfg_track:
+          max_person: 100
+          max_missing: 3 # 最多丢失3帧就要删除
+          final_ranges: [[-10000, -10000, -10000], [10000, 10000, 10000]] # 最终的输出的range，仅用于输出的时候的筛选
+          final_max_person: 100
+          kintree: [[2, 3], [5, 6], [3, 4], [6, 7], [11, 22], [22, 23], [11, 24], [14, 19], [19, 20], [14, 21]]
+    vis_kpts3d:
+      module: myeasymocap.io.vis.Vis3D
+      key_from_data: [images, cameras]
+      key_from_previous: [results] # 用于最后的一起优化
+      args:
+        scale: 0.5
+        lw_factor: 10
+  at_final:
+    write_raw: # write the raw 3d keypoints
+      module: myeasymocap.io.write.WriteAll
+      key_from_data: [results, meta]
+      args:
+        name: keypoints3d_raw
+    make_video:
+      module: myeasymocap.io.video.MakeVideo
+      args:
+        fps: 60
+        keep_image: False
\ No newline at end of file
diff --git a/config/mvmp/detect_match_triangulate_fitSMPL.yml b/config/mvmp/detect_match_triangulate_fitSMPL.yml
new file mode 100644
index 0000000..b5ba622
--- /dev/null
+++ b/config/mvmp/detect_match_triangulate_fitSMPL.yml
@@ -0,0 +1,121 @@
+module: myeasymocap.stages.basestage.MultiStage
+args:
+  output: output/detect_match_triangulate
+  keys_keep: [cameras, imgnames]
+  at_step:
+    detect:
+      module: myeasymocap.backbone.yolo.yolo.MultiPerson # Use YOLO to detect multi-person
+      key_from_data: [images, imgnames]
+      args:
+        model: yolov5m
+        name: person
+        min_length: 150 # this two threshold control the wanted bboxes
+        max_length: 1000
+    # keypoints2d:
+    #   module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
+    #   key_from_data: [images, imgnames]
+    #   key_from_previous: [bbox]
+    #   key_keep: []
+    #   args:
+    #     ckpt: data/models/pose_hrnet_w48_384x288.pth
+    #     single_person: False # This flag controls the function to detect all keypoints
+    keypoints2d:
+      module: myeasymocap.backbone.vitpose.vit_moe.MyViT
+      key_from_data: [images, imgnames]
+      key_from_previous: [bbox]
+      key_keep: []
+      args:
+        ckpt: data/models/vitpose+_base.pth
+        single_person: False # This flag controls the function to detect all keypoints
+    vis_2d:
+      module: myeasymocap.io.vis.Vis2D
+      skip: False
+      key_from_data: [images]
+      key_from_previous: [keypoints, bbox]
+      args:
+        name: vis_keypoints2d
+        scale: 0.5
+    match:
+      module: myeasymocap.operations.match_base.MatchAndTrack
+      key_from_data: [cameras, meta]
+      key_from_previous: [keypoints]
+      args:
+        cfg_match:
+          min_conf: 0.3
+          min_joints: 9
+          distance:
+            mode: epipolar
+            threshold: 0.05 # 用于控制匹配的内点阈值
+            threshold_track: 0.05 # track的匹配的内点阈值
+            min_common_joints: 9
+            cfg_svt:
+              debug: 0
+              maxIter: 10
+              w_sparse: 0.1
+              w_rank: 50
+              tol: 0.0001
+              aff_min: 0.3
+          triangulate:
+            min_view: 3 # min view when triangulate each points
+            min_view_body: 3 # min visible view of the body
+            min_conf_3d: 0.1
+            dist_max: 50 # pixel
+            dist_track_pixel: 100 # pixel
+            dist_track: 100 # mm
+        cfg_track:
+          max_person: 100
+          max_missing: 3 # 最多丢失5帧就要删除
+          final_ranges: [[-10000, -10000, -10000], [10000, 10000, 10000]] # 最终的输出的range，仅用于输出的时候的筛选
+          final_max_person: 100
+          kintree: [[ 1,  0], [ 2,  1], [ 3,  2], [ 4,  3], [ 5,  1], [ 6,  5], [ 7,  6], [ 8,  1], [ 9,  8], [10,  9], [11, 10], [12,  8], [13, 12], [14, 13], [15,  0], [16,  0], [17, 15], [18, 16], [19, 14], [20, 19], [21, 14], [22, 11], [23, 22], [24, 11]]
+    vis_kpts3d:
+      module: myeasymocap.io.vis.Vis3D
+      key_from_data: [images, cameras]
+      key_from_previous: [results] # 用于最后的一起优化
+      args:
+        scale: 0.5
+        lw_factor: 10
+  at_final:
+    write_raw: # write the raw 3d keypoints
+      module: myeasymocap.io.write.WriteAll
+      key_from_data: [results, meta]
+      args:
+        name: keypoints3d_raw
+    collect: # split the results of each frame to each person
+      module: myeasymocap.stages.collect.CollectMultiPersonMultiFrame
+      key_from_data: [keypoints3d, pids]
+      args:
+        key: keypoints3d
+        min_frame: 20
+    load_body_model: # 载入身体模型
+      module: myeasymocap.io.model.SMPLLoader
+      args:
+        model_path: models/pare/data/body_models/smpl/SMPL_NEUTRAL.pkl # load PARE model
+        regressor_path: models/J_regressor_body25.npy
+    #   # 这个模块返回两个内容：body_model, model; 其中的body_model是用来进行可视化的
+    fitting_each_person:
+      module: myeasymocap.stages.basestage.StageForFittingEach
+      key_from_previous: [model, results]
+      key_from_data: []
+      args:
+        stages: _file_/config/mvmp/meta_fit_SMPL.yml
+        keys_keep: [params]
+    write:
+      module: myeasymocap.io.write.WriteSMPL
+      key_from_data: [meta]
+      key_from_previous: [results, model]
+      args:
+        name: smpl
+    vis_render:
+      module: myeasymocap.io.vis3d.RenderAll_multiview
+      key_from_data: [meta, cameras, imgnames]
+      key_from_previous: [results, body_model]
+      args:
+        backend: pyrender
+        view_list: [0]
+        scale: 0.5
+    make_video:
+      module: myeasymocap.io.video.MakeVideo
+      args:
+        fps: 60
+        keep_image: False
\ No newline at end of file
diff --git a/config/mvmp/meta_fit_SMPL.yml b/config/mvmp/meta_fit_SMPL.yml
new file mode 100644
index 0000000..5614a4c
--- /dev/null
+++ b/config/mvmp/meta_fit_SMPL.yml
@@ -0,0 +1,107 @@
+init_params: # 初始化姿态
+  module: myeasymocap.operations.init.InitParams
+  key_from_data: [keypoints3d]
+  args:
+    num_poses: 69
+    num_shapes: 10
+fitShape: # 这一步需要根据骨长优化一下SMPL的shape参数
+  module: myeasymocap.operations.optimizer.Optimizer
+  key_from_data: [keypoints3d]
+  key_from_previous: [model, params] # 这一步优化所使用的model，是一个可调用的函数，负责把params的输入变成输出，而不用考虑其他，与SMPL model是不一样的
+  args:
+    optimizer_args: {optim_type: lbfgs}
+    optimize_keys: [shapes]
+    loss:
+      k3d:
+        weight: 1000.
+        module: myeasymocap.operations.loss.LimbLength
+        key_from_output: [keypoints]
+        key_from_infos: [keypoints3d]
+        args:
+          kintree: [[8, 1], [2, 5], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [1, 0], [9, 12], [9, 10], [10, 11], [12, 13],[13, 14]]
+      regshape:
+        weight: 0.1
+        module: myeasymocap.operations.loss.RegLoss
+        key_from_output: [shapes]
+        key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
+        args:
+          key: shapes
+          norm: l2
+init_RT: # 这一步中，首先将SMPL参数的shape参数进行整段平均。重新优化更新RT参数
+  module: myeasymocap.operations.optimizer.Optimizer
+  key_from_data: [keypoints3d]
+  key_from_previous: [model, params] # 这一步优化所使用的model，是一个可调用的函数，负责把params的输入变成输出，而不用考虑其他，与SMPL model是不一样的
+  # 这样设计的目的是对于一些不只是SMPL本身的模型，可以在外面套一层接口
+  # model是一个纯函数，用来进行可视化
+  args:
+    optimizer_args: {optim_type: lbfgs}
+    optimize_keys: [Th, Rh]
+    loss:
+      k3d:
+        weight: 100.
+        module: myeasymocap.operations.loss.Keypoints3D
+        key_from_output: [keypoints]
+        key_from_infos: [keypoints3d]
+        args:
+          norm: l2
+          index_est: [2, 5, 9, 12]
+          index_gt: [2, 5, 9, 12]
+      smooth:
+        weight: 1.
+        module: myeasymocap.operations.loss.Smooth
+        key_from_output: [Th, keypoints]
+        key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
+        args:
+          keys: [keypoints, Th]
+          smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
+          norm: [l2, l2]
+          order: [2, 2]
+          weights: [10., 100.]
+          window_weight: [0.5, 0.3, 0.1, 0.1]
+refine_poses:
+  repeat: 2
+  module: myeasymocap.operations.optimizer.Optimizer
+  key_from_data: [keypoints3d]
+  key_from_previous: [model, params]
+  args:
+    optimizer_args: {optim_type: lbfgs}
+    optimize_keys: [[poses, Rh, Th], [poses, shapes, Rh, Th]]
+    loss:
+      k3d:
+        weight: 1000.
+        module: myeasymocap.operations.loss.Keypoints3D
+        key_from_output: [keypoints]
+        key_from_infos: [keypoints3d]
+        args:
+          norm: l2
+          norm_info: 0.02
+          ranges_est: [0, 25]
+          ranges_gt: [0, 25]
+      smooth:
+        weight: 1.
+        module: myeasymocap.operations.loss.Smooth
+        key_from_output: [poses, Th, keypoints]
+        key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
+        args:
+          keys: [Th, poses, keypoints]
+          smooth_type: [Linear, Linear, Linear] # 这个depth似乎需要相机参数进行转换
+          norm: [l2, l2, l2]
+          order: [2, 2, 2]
+          weights: [10., 10., 10.,]
+          window_weight: [0.5, 0.3, 0.1, 0.1]
+      prior:
+        weight: 0.1
+        module: easymocap.multistage.gmm.GMMPrior
+        key_from_output: [poses]
+        key_from_infos: []
+        args:
+          start: 0
+          end: 69
+      regshape:
+        weight: 0.1
+        module: myeasymocap.operations.loss.RegLoss
+        key_from_output: [shapes]
+        key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
+        args:
+          key: shapes
+          norm: l2
\ No newline at end of file
diff --git a/myeasymocap/stages/basestage.py b/myeasymocap/stages/basestage.py
index 8c1cc4d..d4a6240 100644
--- a/myeasymocap/stages/basestage.py
+++ b/myeasymocap/stages/basestage.py
@@ -145,6 +145,7 @@ class StageForFittingEach:
             for key, stage in self.stages.items():
                 for iter_ in range(self.stages_args[key].get('repeat', 1)):
                     inputs = {}
+                    stage.iter = iter_
                     for k in self.stages_args[key].get('key_from_data', []):
                         inputs[k] = result[k]
                     for k in self.stages_args[key].get('key_from_previous', []):