diff --git a/config/mvmp/body/ballet_2person.yml b/config/mvmp/body/ballet_2person.yml new file mode 100644 index 0000000..202a524 --- /dev/null +++ b/config/mvmp/body/ballet_2person.yml @@ -0,0 +1,42 @@ +data: config/datasets/mvimage.yml +exp: config/mvmp/detect_match_triangulate_fitSMPL.yml +data_opts: + root: TO_BE_FILLED + # subs: ['01', '03', '05', '07', '09', '11', '13', '15', '17', '19', '21', '23'] + subs: ['01', '04', '07', '10', '13', '16', '19', '22'] # Use 8 cameras + subs_vis: ['01', '07', '13', '19'] + # subs_vis: [_all_] + ranges: [0, 1100, 1] +exp_opts: + output: output/ballet_2person + at_step: + detect: + args: + min_length: 200 + max_length: 1000 + match: + args: + cfg_match: + distance: + threshold_track: 0.05 + triangulate: + min_view: 3 # at least 3 views to triangulate + min_view_body: 5 # at least 5 views to triangulate + dist_max: 25 # pixel + dist_track: 100 # 100mm; + cfg_track: + max_person: 2 + final_ranges: [[-3, -3, 0.], [3, 3, 3.]] # 最终的输出的range,仅用于输出的时候的筛选 + final_max_person: 2 + at_final: + collect: + args: + min_frame: 100 + fitting_each_person: + args: + stages: + refine_poses: + args: + loss: + smooth: + weight: 5. \ No newline at end of file diff --git a/config/mvmp/body/boxing.yml b/config/mvmp/body/boxing.yml new file mode 100644 index 0000000..f79d8a7 --- /dev/null +++ b/config/mvmp/body/boxing.yml @@ -0,0 +1,43 @@ +data: config/datasets/mvimage.yml +exp: config/mvmp/detect_match_triangulate_fitSMPL.yml +data_opts: + root: TO_BE_FILLED + # subs: ['01', '03', '05', '07', '09', '11', '13', '15', '17', '19', '21', '23'] + subs: ['01', '04', '07', '10', '13', '16', '19', '22'] # Use 8 cameras + subs_vis: ['01', '07', '13', '19'] # Visualize 4 cameras + ranges: [0, 200, 1] +exp_opts: + output: output/boxing + at_step: + detect: + args: + min_length: 200 + max_length: 1000 + match: + args: + cfg_match: + distance: + threshold_track: 0.05 + triangulate: + min_view: 3 # at least 3 views to triangulate + min_view_body: 4 # at least 5 views to triangulate + dist_max: 25 # pixel + dist_track: 50 # 100mm; + cfg_track: + final_ranges: [[-3, -3, 0.], [3, 3, 3.]] # 最终的输出的range,仅用于输出的时候的筛选 + at_final: + collect: + args: + min_frame: 100 + fitting_each_person: + args: + stages: + refine_poses: + args: + loss: + smooth: + weight: 1. + vis_render: + args: + view_list: [3] + scale: 1. \ No newline at end of file diff --git a/config/mvmp/body/soccer1_6.yml b/config/mvmp/body/soccer1_6.yml new file mode 100644 index 0000000..6855933 --- /dev/null +++ b/config/mvmp/body/soccer1_6.yml @@ -0,0 +1,43 @@ +data: config/datasets/mvimage.yml +exp: config/mvmp/detect_match_triangulate_fitSMPL.yml +data_opts: + root: TO_BE_FILLED + subs: [] + subs_vis: ['1', '3', '5', '7'] + ranges: [0, 600, 1] +exp_opts: + output: output/soccer1_6 + at_step: + detect: + args: + min_length: 100 + max_length: 1000 + match: + args: + cfg_match: + distance: + threshold_track: 0.05 + triangulate: + min_view: 3 # at least 3 views to triangulate + min_view_body: 3 # at least 5 views to triangulate + dist_max: 25 # pixel + dist_track: 100 # 100mm; + cfg_track: + final_ranges: [[-5, -5, 0.], [5, 5, 5.]] # 最终的输出的range,仅用于输出的时候的筛选 + final_max_person: 6 + at_final: + collect: + args: + min_frame: 100 + fitting_each_person: + args: + stages: + refine_poses: + args: + loss: + smooth: + weight: 1. + vis_render: + args: + view_list: [0, 2, 4, 6] + scale: 0.5 \ No newline at end of file diff --git a/config/mvmp/detect_match_triangulate.yml b/config/mvmp/detect_match_triangulate.yml new file mode 100644 index 0000000..bb28766 --- /dev/null +++ b/config/mvmp/detect_match_triangulate.yml @@ -0,0 +1,87 @@ +module: myeasymocap.stages.basestage.MultiStage +args: + output: output/detect_match_triangulate + keys_keep: [cameras, imgnames] + at_step: + detect: + module: myeasymocap.backbone.yolo.yolo.MultiPerson # Use YOLO to detect multi-person + key_from_data: [images, imgnames] + args: + model: yolov5m + name: person + min_length: 150 # this two threshold control the wanted bboxes + max_length: 1000 + # keypoints2d: + # module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet + # key_from_data: [images, imgnames] + # key_from_previous: [bbox] + # key_keep: [] + # args: + # ckpt: data/models/pose_hrnet_w48_384x288.pth + # single_person: False # This flag controls the function to detect all keypoints + keypoints2d: + module: myeasymocap.backbone.vitpose.vit_moe.MyViT + key_from_data: [images, imgnames] + key_from_previous: [bbox] + key_keep: [] + args: + ckpt: data/models/vitpose+_base.pth + single_person: False # This flag controls the function to detect all keypoints + vis_2d: + module: myeasymocap.io.vis.Vis2D + skip: False + key_from_data: [images] + key_from_previous: [keypoints, bbox] + args: + name: vis_keypoints2d + scale: 0.5 + match: + module: myeasymocap.operations.match_base.MatchAndTrack + key_from_data: [cameras, meta] + key_from_previous: [keypoints] + args: + cfg_match: + min_conf: 0.3 + min_joints: 9 + distance: + mode: epipolar + threshold: 0.05 # 用于控制匹配的内点阈值 + threshold_track: 0.05 # track的匹配的内点阈值 + min_common_joints: 9 + cfg_svt: + debug: 0 + maxIter: 10 + w_sparse: 0.1 + w_rank: 50 + tol: 0.0001 + aff_min: 0.3 + triangulate: + min_view: 3 # min view when triangulate each points + min_view_body: 3 # min visible view of the body + min_conf_3d: 0.1 + dist_max: 50 # pixel + dist_track: 100 # mm + cfg_track: + max_person: 100 + max_missing: 3 # 最多丢失3帧就要删除 + final_ranges: [[-10000, -10000, -10000], [10000, 10000, 10000]] # 最终的输出的range,仅用于输出的时候的筛选 + final_max_person: 100 + kintree: [[2, 3], [5, 6], [3, 4], [6, 7], [11, 22], [22, 23], [11, 24], [14, 19], [19, 20], [14, 21]] + vis_kpts3d: + module: myeasymocap.io.vis.Vis3D + key_from_data: [images, cameras] + key_from_previous: [results] # 用于最后的一起优化 + args: + scale: 0.5 + lw_factor: 10 + at_final: + write_raw: # write the raw 3d keypoints + module: myeasymocap.io.write.WriteAll + key_from_data: [results, meta] + args: + name: keypoints3d_raw + make_video: + module: myeasymocap.io.video.MakeVideo + args: + fps: 60 + keep_image: False \ No newline at end of file diff --git a/config/mvmp/detect_match_triangulate_fitSMPL.yml b/config/mvmp/detect_match_triangulate_fitSMPL.yml new file mode 100644 index 0000000..b5ba622 --- /dev/null +++ b/config/mvmp/detect_match_triangulate_fitSMPL.yml @@ -0,0 +1,121 @@ +module: myeasymocap.stages.basestage.MultiStage +args: + output: output/detect_match_triangulate + keys_keep: [cameras, imgnames] + at_step: + detect: + module: myeasymocap.backbone.yolo.yolo.MultiPerson # Use YOLO to detect multi-person + key_from_data: [images, imgnames] + args: + model: yolov5m + name: person + min_length: 150 # this two threshold control the wanted bboxes + max_length: 1000 + # keypoints2d: + # module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet + # key_from_data: [images, imgnames] + # key_from_previous: [bbox] + # key_keep: [] + # args: + # ckpt: data/models/pose_hrnet_w48_384x288.pth + # single_person: False # This flag controls the function to detect all keypoints + keypoints2d: + module: myeasymocap.backbone.vitpose.vit_moe.MyViT + key_from_data: [images, imgnames] + key_from_previous: [bbox] + key_keep: [] + args: + ckpt: data/models/vitpose+_base.pth + single_person: False # This flag controls the function to detect all keypoints + vis_2d: + module: myeasymocap.io.vis.Vis2D + skip: False + key_from_data: [images] + key_from_previous: [keypoints, bbox] + args: + name: vis_keypoints2d + scale: 0.5 + match: + module: myeasymocap.operations.match_base.MatchAndTrack + key_from_data: [cameras, meta] + key_from_previous: [keypoints] + args: + cfg_match: + min_conf: 0.3 + min_joints: 9 + distance: + mode: epipolar + threshold: 0.05 # 用于控制匹配的内点阈值 + threshold_track: 0.05 # track的匹配的内点阈值 + min_common_joints: 9 + cfg_svt: + debug: 0 + maxIter: 10 + w_sparse: 0.1 + w_rank: 50 + tol: 0.0001 + aff_min: 0.3 + triangulate: + min_view: 3 # min view when triangulate each points + min_view_body: 3 # min visible view of the body + min_conf_3d: 0.1 + dist_max: 50 # pixel + dist_track_pixel: 100 # pixel + dist_track: 100 # mm + cfg_track: + max_person: 100 + max_missing: 3 # 最多丢失5帧就要删除 + final_ranges: [[-10000, -10000, -10000], [10000, 10000, 10000]] # 最终的输出的range,仅用于输出的时候的筛选 + final_max_person: 100 + kintree: [[ 1, 0], [ 2, 1], [ 3, 2], [ 4, 3], [ 5, 1], [ 6, 5], [ 7, 6], [ 8, 1], [ 9, 8], [10, 9], [11, 10], [12, 8], [13, 12], [14, 13], [15, 0], [16, 0], [17, 15], [18, 16], [19, 14], [20, 19], [21, 14], [22, 11], [23, 22], [24, 11]] + vis_kpts3d: + module: myeasymocap.io.vis.Vis3D + key_from_data: [images, cameras] + key_from_previous: [results] # 用于最后的一起优化 + args: + scale: 0.5 + lw_factor: 10 + at_final: + write_raw: # write the raw 3d keypoints + module: myeasymocap.io.write.WriteAll + key_from_data: [results, meta] + args: + name: keypoints3d_raw + collect: # split the results of each frame to each person + module: myeasymocap.stages.collect.CollectMultiPersonMultiFrame + key_from_data: [keypoints3d, pids] + args: + key: keypoints3d + min_frame: 20 + load_body_model: # 载入身体模型 + module: myeasymocap.io.model.SMPLLoader + args: + model_path: models/pare/data/body_models/smpl/SMPL_NEUTRAL.pkl # load PARE model + regressor_path: models/J_regressor_body25.npy + # # 这个模块返回两个内容:body_model, model; 其中的body_model是用来进行可视化的 + fitting_each_person: + module: myeasymocap.stages.basestage.StageForFittingEach + key_from_previous: [model, results] + key_from_data: [] + args: + stages: _file_/config/mvmp/meta_fit_SMPL.yml + keys_keep: [params] + write: + module: myeasymocap.io.write.WriteSMPL + key_from_data: [meta] + key_from_previous: [results, model] + args: + name: smpl + vis_render: + module: myeasymocap.io.vis3d.RenderAll_multiview + key_from_data: [meta, cameras, imgnames] + key_from_previous: [results, body_model] + args: + backend: pyrender + view_list: [0] + scale: 0.5 + make_video: + module: myeasymocap.io.video.MakeVideo + args: + fps: 60 + keep_image: False \ No newline at end of file diff --git a/config/mvmp/meta_fit_SMPL.yml b/config/mvmp/meta_fit_SMPL.yml new file mode 100644 index 0000000..5614a4c --- /dev/null +++ b/config/mvmp/meta_fit_SMPL.yml @@ -0,0 +1,107 @@ +init_params: # 初始化姿态 + module: myeasymocap.operations.init.InitParams + key_from_data: [keypoints3d] + args: + num_poses: 69 + num_shapes: 10 +fitShape: # 这一步需要根据骨长优化一下SMPL的shape参数 + module: myeasymocap.operations.optimizer.Optimizer + key_from_data: [keypoints3d] + key_from_previous: [model, params] # 这一步优化所使用的model,是一个可调用的函数,负责把params的输入变成输出,而不用考虑其他,与SMPL model是不一样的 + args: + optimizer_args: {optim_type: lbfgs} + optimize_keys: [shapes] + loss: + k3d: + weight: 1000. + module: myeasymocap.operations.loss.LimbLength + key_from_output: [keypoints] + key_from_infos: [keypoints3d] + args: + kintree: [[8, 1], [2, 5], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [1, 0], [9, 12], [9, 10], [10, 11], [12, 13],[13, 14]] + regshape: + weight: 0.1 + module: myeasymocap.operations.loss.RegLoss + key_from_output: [shapes] + key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重 + args: + key: shapes + norm: l2 +init_RT: # 这一步中,首先将SMPL参数的shape参数进行整段平均。重新优化更新RT参数 + module: myeasymocap.operations.optimizer.Optimizer + key_from_data: [keypoints3d] + key_from_previous: [model, params] # 这一步优化所使用的model,是一个可调用的函数,负责把params的输入变成输出,而不用考虑其他,与SMPL model是不一样的 + # 这样设计的目的是对于一些不只是SMPL本身的模型,可以在外面套一层接口 + # model是一个纯函数,用来进行可视化 + args: + optimizer_args: {optim_type: lbfgs} + optimize_keys: [Th, Rh] + loss: + k3d: + weight: 100. + module: myeasymocap.operations.loss.Keypoints3D + key_from_output: [keypoints] + key_from_infos: [keypoints3d] + args: + norm: l2 + index_est: [2, 5, 9, 12] + index_gt: [2, 5, 9, 12] + smooth: + weight: 1. + module: myeasymocap.operations.loss.Smooth + key_from_output: [Th, keypoints] + key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重 + args: + keys: [keypoints, Th] + smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换 + norm: [l2, l2] + order: [2, 2] + weights: [10., 100.] + window_weight: [0.5, 0.3, 0.1, 0.1] +refine_poses: + repeat: 2 + module: myeasymocap.operations.optimizer.Optimizer + key_from_data: [keypoints3d] + key_from_previous: [model, params] + args: + optimizer_args: {optim_type: lbfgs} + optimize_keys: [[poses, Rh, Th], [poses, shapes, Rh, Th]] + loss: + k3d: + weight: 1000. + module: myeasymocap.operations.loss.Keypoints3D + key_from_output: [keypoints] + key_from_infos: [keypoints3d] + args: + norm: l2 + norm_info: 0.02 + ranges_est: [0, 25] + ranges_gt: [0, 25] + smooth: + weight: 1. + module: myeasymocap.operations.loss.Smooth + key_from_output: [poses, Th, keypoints] + key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重 + args: + keys: [Th, poses, keypoints] + smooth_type: [Linear, Linear, Linear] # 这个depth似乎需要相机参数进行转换 + norm: [l2, l2, l2] + order: [2, 2, 2] + weights: [10., 10., 10.,] + window_weight: [0.5, 0.3, 0.1, 0.1] + prior: + weight: 0.1 + module: easymocap.multistage.gmm.GMMPrior + key_from_output: [poses] + key_from_infos: [] + args: + start: 0 + end: 69 + regshape: + weight: 0.1 + module: myeasymocap.operations.loss.RegLoss + key_from_output: [shapes] + key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重 + args: + key: shapes + norm: l2 \ No newline at end of file diff --git a/myeasymocap/stages/basestage.py b/myeasymocap/stages/basestage.py index 8c1cc4d..d4a6240 100644 --- a/myeasymocap/stages/basestage.py +++ b/myeasymocap/stages/basestage.py @@ -145,6 +145,7 @@ class StageForFittingEach: for key, stage in self.stages.items(): for iter_ in range(self.stages_args[key].get('repeat', 1)): inputs = {} + stage.iter = iter_ for k in self.stages_args[key].get('key_from_data', []): inputs[k] = result[k] for k in self.stages_args[key].get('key_from_previous', []):