🚀 update to v0.3
This commit is contained in:
parent
b44fa3c90b
commit
e7800a1356
6
.gitignore
vendored
6
.gitignore
vendored
@ -112,5 +112,7 @@ data/**
|
||||
.DS*
|
||||
code_deprecate
|
||||
code
|
||||
neuralbody
|
||||
lightning_logs
|
||||
# neuralbody
|
||||
lightning_logs
|
||||
models
|
||||
yolov5m.pt
|
||||
|
96
apps/mocap/run.py
Normal file
96
apps/mocap/run.py
Normal file
@ -0,0 +1,96 @@
|
||||
# 这个脚本提供mocap的基本运行接口
|
||||
import os
|
||||
from easymocap.config import Config, load_object
|
||||
from tqdm import tqdm
|
||||
|
||||
def process(dataset, model):
|
||||
ret_all = []
|
||||
print('[Run] dataset has {} samples'.format(len(dataset)))
|
||||
for i in tqdm(range(len(dataset)), desc='[Run]'):
|
||||
data = dataset[i]
|
||||
ret = model.at_step(data, i)
|
||||
ret_all.append(ret)
|
||||
ret_all = model.at_final(ret_all)
|
||||
|
||||
def update_data_by_args(cfg_data, args):
|
||||
if args.root is not None:
|
||||
cfg_data.args.root = args.root
|
||||
if args.subs is not None:
|
||||
cfg_data.args.subs = args.subs
|
||||
if args.subs_vis is not None:
|
||||
cfg_data.args.subs_vis = args.subs_vis
|
||||
if args.ranges is not None:
|
||||
cfg_data.args.ranges = args.ranges
|
||||
if args.cameras is not None:
|
||||
cfg_data.args.reader.cameras.root = args.cameras
|
||||
if args.skip_vis:
|
||||
cfg_data.args.subs_vis = []
|
||||
return cfg_data
|
||||
|
||||
def update_exp_by_args(cfg_exp, args):
|
||||
opts_alias = []
|
||||
if 'alias' in cfg_exp.keys():
|
||||
for i in range(len(args.opt_exp)//2):
|
||||
if args.opt_exp[i*2] in cfg_exp.alias.keys():
|
||||
opts_alias.append(cfg_exp.alias[args.opt_exp[i*2]])
|
||||
opts_alias.append(args.opt_exp[i*2+1])
|
||||
cfg_exp.merge_from_list(opts_alias)
|
||||
if args.skip_vis:
|
||||
for key, val in cfg_exp.args.at_step.items():
|
||||
if key.startswith('vis'):
|
||||
val.skip = True
|
||||
|
||||
def load_cfg_from_file(cfg, args):
|
||||
cfg = Config.load(cfg)
|
||||
cfg_data = Config.load(cfg.data)
|
||||
cfg_data.args.merge_from_other_cfg(cfg.data_opts)
|
||||
cfg_data = update_data_by_args(cfg_data, args)
|
||||
cfg_exp = Config.load(cfg.exp)
|
||||
cfg_exp.args.merge_from_other_cfg(cfg.exp_opts)
|
||||
update_exp_by_args(cfg_exp, args)
|
||||
return cfg_data, cfg_exp
|
||||
|
||||
def load_cfg_from_cmd(args):
|
||||
cfg_data = Config.load(args.data, args.opt_data)
|
||||
cfg_data = update_data_by_args(cfg_data, args)
|
||||
cfg_exp = Config.load(args.exp, args.opt_exp)
|
||||
update_exp_by_args(cfg_exp, args)
|
||||
return cfg_data, cfg_exp
|
||||
|
||||
def main_entrypoint():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--cfg', type=str, default=None)
|
||||
for name in ['data', 'exp']:
|
||||
parser.add_argument('--{}'.format(name), type=str, required=False)
|
||||
parser.add_argument('--opt_{}'.format(name), type=str, nargs='+', default=[])
|
||||
parser.add_argument('--root', type=str, default=None)
|
||||
parser.add_argument('--subs', type=str, default=None, nargs='+')
|
||||
parser.add_argument('--subs_vis', type=str, default=None, nargs='+')
|
||||
parser.add_argument('--ranges', type=int, default=None, nargs=3)
|
||||
parser.add_argument('--cameras', type=str, default=None, help='Camera file path')
|
||||
parser.add_argument('--out', type=str, default=None)
|
||||
parser.add_argument('--skip_vis', action='store_true')
|
||||
parser.add_argument('--debug', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.cfg is not None:
|
||||
cfg_data, cfg_exp = load_cfg_from_file(args.cfg, args)
|
||||
else:
|
||||
cfg_data, cfg_exp = load_cfg_from_cmd(args)
|
||||
|
||||
if args.out is not None:
|
||||
cfg_exp.args.output = args.out
|
||||
out = cfg_exp.args.output
|
||||
os.makedirs(out, exist_ok=True)
|
||||
print(cfg_data, file=open(os.path.join(out, 'cfg_data.yml'), 'w'))
|
||||
print(cfg_exp, file=open(os.path.join(out, 'cfg_exp.yml'), 'w'))
|
||||
|
||||
dataset = load_object(cfg_data.module, cfg_data.args)
|
||||
print(dataset)
|
||||
|
||||
model = load_object(cfg_exp.module, cfg_exp.args)
|
||||
process(dataset, model)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main_entrypoint()
|
139
config/1v1p/fixhand.yml
Normal file
139
config/1v1p/fixhand.yml
Normal file
@ -0,0 +1,139 @@
|
||||
smooth: &smooth_keypoints
|
||||
weight: 50.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [keypoints, Th]
|
||||
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
|
||||
args:
|
||||
keys: [Th, keypoints]
|
||||
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
|
||||
norm: [l2, l2]
|
||||
order: [2, 2]
|
||||
weights: [1000., 1000.]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/sv1p_hand_fix
|
||||
at_step:
|
||||
detect_by_mediapipe:
|
||||
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
|
||||
key_from_data: [images, imgnames]
|
||||
args:
|
||||
ckpt: models/mediapipe/hand_landmarker.task
|
||||
hand2d:
|
||||
module: myeasymocap.backbone.hand2d.hand2d.MyHand2D
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
args:
|
||||
# ckpt: /nas/public/EasyMocapModels/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
|
||||
ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
|
||||
mode: resnet
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints, bbox]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
infer_mano: #
|
||||
module: myeasymocap.backbone.hmr.hmr.MyHMR
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
key_keep: [meta, cameras, imgnames] # 将这些参数都保留到最后的输出中
|
||||
args:
|
||||
ckpt: models/manol_pca45_noflat.ckpt
|
||||
# TODO: add visualize for Init MANO
|
||||
at_final:
|
||||
load_hand_model: # 载入身体模型
|
||||
module: myeasymocap.io.model.MANOLoader
|
||||
args:
|
||||
cfg_path: config/model/mano.yml
|
||||
model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
|
||||
regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
|
||||
num_pca_comps: 45
|
||||
use_pca: True
|
||||
use_flat_mean: False
|
||||
# 这个模块返回两个内容:body_model, model; 其中的body_model是用来进行可视化的
|
||||
mean_param: # 初始化姿态,这里将poses和shapes都进行平均
|
||||
module: myeasymocap.operations.init.MeanShapes
|
||||
key_from_data: [params]
|
||||
args:
|
||||
keys: ['poses', 'shapes']
|
||||
init_T: # 初始化每一帧的位置
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras, params]
|
||||
key_from_previous: [model]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Th]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l2
|
||||
smooth: *smooth_keypoints
|
||||
init_R: # 初始化每一帧的旋转
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Rh]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l2
|
||||
smooth: *smooth_keypoints
|
||||
refine_poses: # 优化poses
|
||||
repeat: 2
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [poses, shapes, Rh, Th]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l1
|
||||
reg:
|
||||
weight: 0.001
|
||||
module: myeasymocap.operations.loss.RegLoss
|
||||
key_from_output: [poses]
|
||||
key_from_infos: []
|
||||
args:
|
||||
key: poses
|
||||
norm: l2
|
||||
smooth: *smooth_keypoints
|
||||
write:
|
||||
module: myeasymocap.io.write.WriteSMPL
|
||||
key_from_data: [meta]
|
||||
key_from_previous: [params, model]
|
||||
args:
|
||||
name: smpl
|
||||
render:
|
||||
module: myeasymocap.io.vis3d.Render_multiview
|
||||
key_from_data: [cameras, imgnames]
|
||||
key_from_previous: [hand_model, params]
|
||||
args:
|
||||
model_name: hand_model
|
||||
backend: pyrender
|
||||
view_list: [0]
|
||||
scale: 0.5
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 50
|
||||
keep_image: False
|
139
config/1v1p/hand_detect_finetune.yml
Normal file
139
config/1v1p/hand_detect_finetune.yml
Normal file
@ -0,0 +1,139 @@
|
||||
smooth: &smooth_keypoints
|
||||
weight: 50.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [keypoints, Th]
|
||||
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
|
||||
args:
|
||||
keys: [Th, keypoints]
|
||||
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
|
||||
norm: [l2, l2]
|
||||
order: [2, 2]
|
||||
weights: [1000., 1000.]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/sv1p_hand
|
||||
at_step:
|
||||
detect_by_mediapipe:
|
||||
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
|
||||
key_from_data: [images, imgnames]
|
||||
args:
|
||||
ckpt: models/mediapipe/hand_landmarker.task
|
||||
hand2d:
|
||||
module: myeasymocap.backbone.hand2d.hand2d.MyHand2D
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
args:
|
||||
# ckpt: /nas/public/EasyMocapModels/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
|
||||
ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
|
||||
mode: resnet
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints, bbox]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
infer_mano: #
|
||||
module: myeasymocap.backbone.hmr.hmr.MyHMR
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
key_keep: [meta, cameras, imgnames] # 将这些参数都保留到最后的输出中
|
||||
args:
|
||||
ckpt: models/manol_pca45_noflat.ckpt
|
||||
# TODO: add visualize for Init MANO
|
||||
at_final:
|
||||
load_hand_model: # 载入身体模型
|
||||
module: myeasymocap.io.model.MANOLoader
|
||||
args:
|
||||
cfg_path: config/model/mano.yml
|
||||
model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
|
||||
regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
|
||||
num_pca_comps: 45
|
||||
use_pca: True
|
||||
use_flat_mean: False
|
||||
# 这个模块返回两个内容:body_model, model; 其中的body_model是用来进行可视化的
|
||||
mean_param: # 初始化姿态,这里将poses和shapes都进行平均
|
||||
module: myeasymocap.operations.init.MeanShapes
|
||||
key_from_data: [params]
|
||||
args:
|
||||
keys: ['shapes']
|
||||
init_T: # 初始化每一帧的位置
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras, params]
|
||||
key_from_previous: [model]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Th]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l2
|
||||
smooth: *smooth_keypoints
|
||||
init_R: # 初始化每一帧的旋转
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Rh]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l2
|
||||
smooth: *smooth_keypoints
|
||||
refine_poses: # 优化poses
|
||||
repeat: 2
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [poses, shapes, Rh, Th]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l1
|
||||
reg:
|
||||
weight: 0.001
|
||||
module: myeasymocap.operations.loss.RegLoss
|
||||
key_from_output: [poses]
|
||||
key_from_infos: []
|
||||
args:
|
||||
key: poses
|
||||
norm: l2
|
||||
smooth: *smooth_keypoints
|
||||
write:
|
||||
module: myeasymocap.io.write.WriteSMPL
|
||||
key_from_data: [meta]
|
||||
key_from_previous: [params, model]
|
||||
args:
|
||||
name: smpl
|
||||
render:
|
||||
module: myeasymocap.io.vis3d.Render_multiview
|
||||
key_from_data: [cameras, imgnames]
|
||||
key_from_previous: [hand_model, params]
|
||||
args:
|
||||
model_name: hand_model
|
||||
backend: pyrender
|
||||
view_list: [0]
|
||||
scale: 0.5
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 50
|
||||
keep_image: False
|
147
config/1v1p/hrnet_pare_finetune.yml
Normal file
147
config/1v1p/hrnet_pare_finetune.yml
Normal file
@ -0,0 +1,147 @@
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/sv1p # 指定输出路径
|
||||
at_step:
|
||||
detect:
|
||||
module: myeasymocap.backbone.yolo.yolo.YoloWithTrack
|
||||
key_from_data: [images, imgnames]
|
||||
args:
|
||||
model: yolov5m
|
||||
name: person
|
||||
keypoints2d:
|
||||
module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
key_keep: []
|
||||
args:
|
||||
ckpt: /nas/home/shuaiqing/Code/EasyMocapPublic/data/models/pose_hrnet_w48_384x288.pth
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints, bbox]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
infer: # 这个模块给定图片和检测的框,直接返回crop系下的人体姿态
|
||||
module: myeasymocap.backbone.pare.pare.MyPARE
|
||||
key_from_data: [images, imgnames] # 从数据集中读入的bbox、图片、图片名,图片名用于保存结果
|
||||
key_from_previous: [bbox]
|
||||
key_keep: [cameras, imgnames] # 将这些参数都保留到最后的输出中
|
||||
args:
|
||||
ckpt: 3dpw # 指定使用3dpw的预训练模型
|
||||
at_final:
|
||||
load_body_model: # 载入SMPL模型
|
||||
module: myeasymocap.io.model.SMPLLoader
|
||||
args:
|
||||
model_path: models/pare/data/body_models/smpl/SMPL_NEUTRAL.pkl
|
||||
regressor_path: models/J_regressor_body25.npy
|
||||
init_translation: # 给定crop系下的姿态、2D关键点、相机参数,返回世界系下的人体姿态
|
||||
module: myeasymocap.operations.init.InitTranslation
|
||||
key_from_data: [keypoints, cameras, params] # 读入关键点、相机参数、SMPL参数
|
||||
key_from_previous: [body_model] # 读入SMPL模型进行关键点计算
|
||||
args:
|
||||
solve_T: True
|
||||
solve_R: False
|
||||
smooth: # 对初始化的结果进行平滑
|
||||
module: myeasymocap.operations.smooth.SmoothPoses
|
||||
key_from_data: [params]
|
||||
args:
|
||||
window_size: 2
|
||||
mean_param: # Mean shapes
|
||||
module: myeasymocap.operations.init.MeanShapes
|
||||
key_from_data: [params]
|
||||
args:
|
||||
keys: ['shapes']
|
||||
init_RT:
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Th, Rh]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: l2
|
||||
index_est: [2, 5, 9, 12]
|
||||
index_gt: [2, 5, 9, 12]
|
||||
smooth:
|
||||
weight: 1.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [Rh, Th]
|
||||
key_from_infos: [cameras] # TODO: 根据2D的置信度来计算smooth权重
|
||||
args:
|
||||
keys: [Th, Th]
|
||||
smooth_type: [Linear, Depth] # 这个depth似乎需要相机参数进行转换
|
||||
norm: [l2, l2]
|
||||
order: [2, 2]
|
||||
weights: [100., 1000.]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
refine_poses:
|
||||
repeat: 2
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, cameras]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [poses, Rh, Th]
|
||||
loss:
|
||||
repro:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints2D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints, cameras]
|
||||
args:
|
||||
norm: gm
|
||||
norm_info: 0.02
|
||||
smooth:
|
||||
weight: 1.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [poses, Rh, Th, keypoints]
|
||||
key_from_infos: [cameras] # TODO: 根据2D的置信度来计算smooth权重
|
||||
args:
|
||||
keys: [Th, Th, poses, keypoints]
|
||||
smooth_type: [Linear, Depth, Linear, Linear] # 这个depth似乎需要相机参数进行转换
|
||||
norm: [l2, l2, l2, l2]
|
||||
order: [2, 2, 2, 2]
|
||||
weights: [100., 1000., 50., 100.]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
init:
|
||||
weight: 1.
|
||||
module: myeasymocap.operations.loss.Init
|
||||
key_from_output: [poses]
|
||||
key_from_infos: [init_poses]
|
||||
args:
|
||||
keys: [poses]
|
||||
norm: l2
|
||||
weights: [1.]
|
||||
prior:
|
||||
weight: 0.1
|
||||
module: easymocap.multistage.gmm.GMMPrior
|
||||
key_from_output: [poses]
|
||||
key_from_infos: []
|
||||
args:
|
||||
start: 0
|
||||
end: 69
|
||||
write:
|
||||
module: myeasymocap.io.write.WriteSMPL
|
||||
key_from_data: [meta]
|
||||
key_from_previous: [params, model]
|
||||
args:
|
||||
name: smpl
|
||||
render:
|
||||
module: myeasymocap.io.vis3d.Render
|
||||
key_from_data: [cameras, imgnames]
|
||||
key_from_previous: [params, body_model]
|
||||
args:
|
||||
backend: pyrender
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 30
|
||||
keep_image: False
|
16
config/datasets/mvimage.yml
Normal file
16
config/datasets/mvimage.yml
Normal file
@ -0,0 +1,16 @@
|
||||
module: myeasymocap.datasets.mv1p.MVDataset
|
||||
args:
|
||||
root: TO_BE_FILLED
|
||||
subs: [] # used views, default all views
|
||||
subs_vis: ['01'] # visualized views
|
||||
ranges: [0, 10000, 1]
|
||||
read_image: True
|
||||
reader:
|
||||
images:
|
||||
root: images
|
||||
ext: .jpg
|
||||
image_shape:
|
||||
root: images
|
||||
ext: .jpg
|
||||
cameras:
|
||||
root: ''
|
13
config/datasets/svimage.yml
Normal file
13
config/datasets/svimage.yml
Normal file
@ -0,0 +1,13 @@
|
||||
module: myeasymocap.datasets.sv1p.SVDataset
|
||||
args:
|
||||
root: TO_BE_FILLED
|
||||
subs: ['video'] # 指定路径下的其中一个文件夹
|
||||
ranges: [0, 10000, 1] # 指定使用的数据的范围
|
||||
read_image: True # 后面会使用CNN来进行SMPL参数估计,所以需要读入图片
|
||||
reader:
|
||||
images:
|
||||
root: images
|
||||
ext: .jpg
|
||||
image_shape:
|
||||
root: images
|
||||
ext: .jpg
|
50
config/mv1p/detect_hand_triangulate.yml
Normal file
50
config/mv1p/detect_hand_triangulate.yml
Normal file
@ -0,0 +1,50 @@
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/detect_hand_triangulate
|
||||
at_step:
|
||||
detect:
|
||||
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
|
||||
key_from_data: [images, imgnames]
|
||||
args:
|
||||
ckpt: models/mediapipe/hand_landmarker.task
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
triangulate:
|
||||
module: myeasymocap.operations.triangulate.SimpleTriangulate
|
||||
key_from_data: [cameras]
|
||||
key_from_previous: [keypoints]
|
||||
key_keep: [cameras] # 用于最后的一起优化
|
||||
args:
|
||||
mode: iterative # [naive, iterative]
|
||||
visualize:
|
||||
module: myeasymocap.io.vis.Vis3D
|
||||
key_from_data: [images, cameras]
|
||||
key_from_previous: [keypoints3d] # 用于最后的一起优化
|
||||
args:
|
||||
scale: 1.
|
||||
mode: crop
|
||||
mode_args:
|
||||
- [0, 720, 100, 820]
|
||||
- [0, 720, 100, 820]
|
||||
- [0, 720, 400, 1120]
|
||||
at_final:
|
||||
smooth:
|
||||
module: myeasymocap.operations.smooth.Smooth
|
||||
key_from_data: [keypoints3d]
|
||||
args:
|
||||
window_size: 5
|
||||
write:
|
||||
module: myeasymocap.io.write.Write
|
||||
key_from_data: [keypoints3d]
|
||||
args: {}
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 60
|
||||
keep_image: False
|
166
config/mv1p/detect_hand_triangulate_fitMANO.yml
Normal file
166
config/mv1p/detect_hand_triangulate_fitMANO.yml
Normal file
@ -0,0 +1,166 @@
|
||||
smooth: &smooth_keypoints
|
||||
weight: 1.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [keypoints, poses]
|
||||
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
|
||||
args:
|
||||
keys: [poses, keypoints]
|
||||
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
|
||||
norm: [l2, l2]
|
||||
order: [2, 2]
|
||||
weights: [10., 1000.]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
|
||||
k3dtorso: &k3dtorso
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints3D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints3d]
|
||||
args:
|
||||
norm: l2
|
||||
index_est: [0, 5, 9, 13, 17]
|
||||
index_gt: [0, 5, 9, 13, 17]
|
||||
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/detect_hand_triangulate_fitMANO
|
||||
at_step:
|
||||
detect:
|
||||
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
|
||||
key_from_data: [images, imgnames]
|
||||
key_keep: [imgnames]
|
||||
args:
|
||||
ckpt: models/mediapipe/hand_landmarker.task
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
triangulate:
|
||||
module: myeasymocap.operations.triangulate.SimpleTriangulate
|
||||
key_from_data: [cameras]
|
||||
key_from_previous: [keypoints]
|
||||
key_keep: [cameras] # 用于最后的一起优化
|
||||
args:
|
||||
mode: iterative # [naive, iterative]
|
||||
visualize:
|
||||
module: myeasymocap.io.vis.Vis3D
|
||||
key_from_data: [images, cameras]
|
||||
key_from_previous: [keypoints3d] # 用于最后的一起优化
|
||||
args:
|
||||
scale: 0.5
|
||||
mode: center
|
||||
at_final:
|
||||
load_hand_model: # 载入身体模型
|
||||
module: myeasymocap.io.model.MANOLoader
|
||||
args:
|
||||
cfg_path: config/model/manol.yml
|
||||
model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
|
||||
regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
|
||||
num_pca_comps: 45
|
||||
use_pca: True
|
||||
use_flat_mean: False
|
||||
init_params:
|
||||
module: myeasymocap.operations.init.InitParams
|
||||
key_from_data: [keypoints3d]
|
||||
args:
|
||||
num_poses: 45
|
||||
num_shapes: 10
|
||||
fitShape:
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [shapes]
|
||||
loss:
|
||||
k3d:
|
||||
weight: 10000.
|
||||
module: myeasymocap.operations.loss.LimbLength
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints3d]
|
||||
args:
|
||||
kintree: [[ 1, 0], [ 2, 1], [ 3, 2], [ 4, 3], [ 5, 0], [ 6, 5], [ 7, 6], [ 8, 7], [ 9, 0], [10, 9], [11, 10], [12, 11], [13, 0], [14, 13], [15, 14], [16, 15], [17, 0], [18, 17], [19, 18], [20, 19]]
|
||||
regshape:
|
||||
weight: 0.1
|
||||
module: myeasymocap.operations.loss.RegLoss
|
||||
key_from_output: [shapes]
|
||||
key_from_infos: []
|
||||
args:
|
||||
key: shapes
|
||||
norm: l2
|
||||
init_T:
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Th]
|
||||
loss:
|
||||
k3d: *k3dtorso
|
||||
smooth: *smooth_keypoints
|
||||
init_R:
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Rh]
|
||||
loss:
|
||||
k3d: *k3dtorso
|
||||
smooth: *smooth_keypoints
|
||||
refine_poses:
|
||||
repeat: 2
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [poses, Rh, Th]
|
||||
loss:
|
||||
k3d:
|
||||
weight: 1000000.
|
||||
module: myeasymocap.operations.loss.Keypoints3D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints3d]
|
||||
args:
|
||||
norm: l2
|
||||
norm_info: 0.02
|
||||
smooth: *smooth_keypoints
|
||||
regpose:
|
||||
weight: 0.1
|
||||
module: myeasymocap.operations.loss.RegLoss
|
||||
key_from_output: [poses]
|
||||
key_from_infos: []
|
||||
args:
|
||||
key: poses
|
||||
norm: l2
|
||||
write:
|
||||
module: myeasymocap.io.write.WriteSMPL
|
||||
key_from_data: [meta]
|
||||
key_from_previous: [params, model]
|
||||
args:
|
||||
name: smpl
|
||||
render:
|
||||
module: myeasymocap.io.vis3d.Render_multiview
|
||||
key_from_data: [cameras, imgnames]
|
||||
key_from_previous: [params, hand_model]
|
||||
args:
|
||||
model_name: hand_model
|
||||
backend: pyrender
|
||||
view_list: [1, 0, 2]
|
||||
scale: 1.
|
||||
render_mode: image
|
||||
mode: crop
|
||||
mode_args:
|
||||
- [0, 720, 100, 820]
|
||||
- [0, 720, 100, 820]
|
||||
- [0, 720, 400, 1120]
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 60
|
||||
keep_image: False
|
54
config/mv1p/detect_triangulate.yml
Normal file
54
config/mv1p/detect_triangulate.yml
Normal file
@ -0,0 +1,54 @@
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/detect_triangulate
|
||||
at_step:
|
||||
detect:
|
||||
module: myeasymocap.backbone.yolo.yolo.BaseYOLOv5
|
||||
key_from_data: [images, imgnames]
|
||||
args:
|
||||
model: yolov5m
|
||||
name: person
|
||||
keypoints2d:
|
||||
module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
key_keep: []
|
||||
args:
|
||||
ckpt: data/models/pose_hrnet_w48_384x288.pth
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints, bbox]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
triangulate:
|
||||
module: myeasymocap.operations.triangulate.SimpleTriangulate
|
||||
key_from_data: [cameras]
|
||||
key_from_previous: [keypoints]
|
||||
key_keep: [cameras, imgnames]
|
||||
args:
|
||||
mode: iterative # [naive, iterative]
|
||||
visualize:
|
||||
module: myeasymocap.io.vis.Vis3D
|
||||
key_from_data: [images, cameras]
|
||||
key_from_previous: [keypoints3d] # 用于最后的一起优化
|
||||
args:
|
||||
scale: 0.5
|
||||
mode: center
|
||||
at_final:
|
||||
smooth:
|
||||
module: myeasymocap.operations.smooth.Smooth
|
||||
key_from_data: [keypoints3d]
|
||||
args:
|
||||
window_size: 5
|
||||
write:
|
||||
module: myeasymocap.io.write.Write
|
||||
key_from_data: [keypoints3d]
|
||||
args: {}
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 50
|
||||
keep_image: False
|
169
config/mv1p/detect_triangulate_fitSMPL.yml
Normal file
169
config/mv1p/detect_triangulate_fitSMPL.yml
Normal file
@ -0,0 +1,169 @@
|
||||
module: myeasymocap.stages.basestage.MultiStage
|
||||
args:
|
||||
output: output/detect_triangulate_fitSMPL
|
||||
at_step:
|
||||
detect:
|
||||
module: myeasymocap.backbone.yolo.yolo.BaseYOLOv5
|
||||
key_from_data: [images, imgnames]
|
||||
args:
|
||||
model: yolov5m
|
||||
name: person
|
||||
keypoints2d:
|
||||
module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
|
||||
key_from_data: [images, imgnames]
|
||||
key_from_previous: [bbox]
|
||||
key_keep: []
|
||||
args:
|
||||
ckpt: data/models/pose_hrnet_w48_384x288.pth
|
||||
vis2d:
|
||||
module: myeasymocap.io.vis.Vis2D
|
||||
skip: False
|
||||
key_from_data: [images]
|
||||
key_from_previous: [keypoints, bbox]
|
||||
args:
|
||||
name: vis_keypoints2d
|
||||
scale: 0.5
|
||||
triangulate:
|
||||
module: myeasymocap.operations.triangulate.SimpleTriangulate
|
||||
key_from_data: [cameras]
|
||||
key_from_previous: [keypoints]
|
||||
key_keep: [cameras, imgnames] # 用于最后的一起优化
|
||||
args:
|
||||
mode: iterative # [naive, iterative]
|
||||
visualize:
|
||||
module: myeasymocap.io.vis.Vis3D
|
||||
skip: False
|
||||
key_from_data: [images, cameras]
|
||||
key_from_previous: [keypoints3d] # 用于最后的一起优化
|
||||
args:
|
||||
scale: 0.5
|
||||
mode: center
|
||||
at_final:
|
||||
load_body_model:
|
||||
module: myeasymocap.io.model.SMPLLoader
|
||||
args:
|
||||
model_path: models/pare/data/body_models/smpl/SMPL_NEUTRAL.pkl #
|
||||
regressor_path: models/J_regressor_body25.npy
|
||||
init_params:
|
||||
module: myeasymocap.operations.init.InitParams
|
||||
key_from_data: [keypoints3d]
|
||||
args:
|
||||
num_poses: 69
|
||||
num_shapes: 10
|
||||
fitShape:
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [shapes]
|
||||
loss:
|
||||
k3d:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.LimbLength
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints3d]
|
||||
args:
|
||||
kintree: [[8, 1], [2, 5], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [1, 0], [9, 12], [9, 10], [10, 11], [12, 13],[13, 14]]
|
||||
regshape:
|
||||
weight: 0.1
|
||||
module: myeasymocap.operations.loss.RegLoss
|
||||
key_from_output: [shapes]
|
||||
key_from_infos: []
|
||||
args:
|
||||
key: shapes
|
||||
norm: l2
|
||||
init_RT:
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [Th, Rh]
|
||||
loss:
|
||||
k3d:
|
||||
weight: 100.
|
||||
module: myeasymocap.operations.loss.Keypoints3D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints3d]
|
||||
args:
|
||||
norm: l2
|
||||
index_est: [2, 5, 9, 12]
|
||||
index_gt: [2, 5, 9, 12]
|
||||
smooth:
|
||||
weight: 1.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [Th, keypoints]
|
||||
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
|
||||
args:
|
||||
keys: [keypoints, Th]
|
||||
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
|
||||
norm: [l2, l2]
|
||||
order: [2, 2]
|
||||
weights: [10., 100.]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
refine_poses:
|
||||
repeat: 2
|
||||
module: myeasymocap.operations.optimizer.Optimizer
|
||||
key_from_data: [keypoints, keypoints3d]
|
||||
key_from_previous: [model, params]
|
||||
args:
|
||||
optimizer_args: {optim_type: lbfgs}
|
||||
optimize_keys: [poses, Rh, Th]
|
||||
loss:
|
||||
k3d:
|
||||
weight: 1000.
|
||||
module: myeasymocap.operations.loss.Keypoints3D
|
||||
key_from_output: [keypoints]
|
||||
key_from_infos: [keypoints3d]
|
||||
args:
|
||||
norm: l2
|
||||
norm_info: 0.02
|
||||
smooth:
|
||||
weight: 1.
|
||||
module: myeasymocap.operations.loss.Smooth
|
||||
key_from_output: [poses, Th, keypoints]
|
||||
key_from_infos: []
|
||||
args:
|
||||
keys: [Th, poses, keypoints]
|
||||
smooth_type: [Linear, Linear, Linear]
|
||||
norm: [l2, l2, l2]
|
||||
order: [2, 2, 2]
|
||||
weights: [100., 10., 10.,]
|
||||
window_weight: [0.5, 0.3, 0.1, 0.1]
|
||||
prior:
|
||||
weight: 0.1
|
||||
module: easymocap.multistage.gmm.GMMPrior
|
||||
key_from_output: [poses]
|
||||
key_from_infos: []
|
||||
args:
|
||||
start: 0
|
||||
end: 69
|
||||
write:
|
||||
module: myeasymocap.io.write.WriteSMPL
|
||||
key_from_data: [meta]
|
||||
key_from_previous: [params, model]
|
||||
args:
|
||||
name: smpl
|
||||
# render:
|
||||
# module: myeasymocap.io.vis3d.Render_multiview
|
||||
# key_from_data: [cameras, imgnames]
|
||||
# key_from_previous: [params, body_model]
|
||||
# args:
|
||||
# backend: pyrender
|
||||
# view_list: [0]
|
||||
render_ground:
|
||||
module: myeasymocap.io.vis3d.Render_multiview
|
||||
key_from_data: [cameras, imgnames]
|
||||
key_from_previous: [params, body_model]
|
||||
args:
|
||||
backend: pyrender
|
||||
view_list: [3]
|
||||
mode: ground
|
||||
scale: 1.
|
||||
shape: [1024, 1024]
|
||||
make_video:
|
||||
module: myeasymocap.io.video.MakeVideo
|
||||
args:
|
||||
fps: 50
|
||||
keep_image: False
|
155
easymocap/multistage/gmm.py
Normal file
155
easymocap/multistage/gmm.py
Normal file
@ -0,0 +1,155 @@
|
||||
import pickle
|
||||
import os
|
||||
from os.path import join
|
||||
import numpy as np
|
||||
import torch
|
||||
from .lossbase import LossBase
|
||||
|
||||
def create_prior_from_cmu(n_gaussians, epsilon=1e-15):
|
||||
"""Load the gmm from the CMU motion database."""
|
||||
from os.path import dirname
|
||||
np_dtype = np.float32
|
||||
with open(join(dirname(__file__), 'gmm_%02d.pkl'%(n_gaussians)), 'rb') as f:
|
||||
gmm = pickle.load(f, encoding='latin1')
|
||||
if True:
|
||||
means = gmm['means'].astype(np_dtype)
|
||||
covs = gmm['covars'].astype(np_dtype)
|
||||
weights = gmm['weights'].astype(np_dtype)
|
||||
precisions = [np.linalg.inv(cov) for cov in covs]
|
||||
precisions = np.stack(precisions).astype(np_dtype)
|
||||
|
||||
sqrdets = np.array([(np.sqrt(np.linalg.det(c)))
|
||||
for c in gmm['covars']])
|
||||
const = (2 * np.pi)**(69 / 2.)
|
||||
|
||||
nll_weights = np.asarray(gmm['weights'] / (const * (sqrdets / sqrdets.min())))
|
||||
cov_dets = [np.log(np.linalg.det(cov.astype(np_dtype)) + epsilon)
|
||||
for cov in covs]
|
||||
return {
|
||||
'means': means,
|
||||
'covs': covs,
|
||||
'precisions': precisions,
|
||||
'nll_weights': -np.log(nll_weights[None]),
|
||||
'weights': weights,
|
||||
'pi_term': np.log(2*np.pi),
|
||||
'cov_dets': cov_dets
|
||||
}
|
||||
|
||||
class MaxMixturePrior(LossBase):
|
||||
def __init__(self, num_gaussians=8, epsilon=1e-16, use_merged=True,
|
||||
start=3, end=72):
|
||||
super(MaxMixturePrior, self).__init__()
|
||||
np_dtype = np.float32
|
||||
|
||||
self.num_gaussians = num_gaussians
|
||||
self.epsilon = epsilon
|
||||
self.use_merged = use_merged
|
||||
data = create_prior_from_cmu(num_gaussians)
|
||||
self.start = start
|
||||
self.end = end
|
||||
for key, val in data.items():
|
||||
self.register_buffer(key, torch.tensor(val, dtype=torch.float32))
|
||||
|
||||
def get_mean(self):
|
||||
''' Returns the mean of the mixture '''
|
||||
mean_pose = torch.matmul(self.weights, self.means)
|
||||
return mean_pose
|
||||
|
||||
def merged_log_likelihood(self, poses):
|
||||
poses = poses[..., self.start:self.end]
|
||||
diff_from_mean = poses.unsqueeze(dim=1) - self.means[None, :, :self.end-self.start]
|
||||
|
||||
prec_diff_prod = torch.einsum('mij,bmj->bmi',
|
||||
[self.precisions, diff_from_mean])
|
||||
diff_prec_quadratic = (prec_diff_prod * diff_from_mean).sum(dim=-1)
|
||||
|
||||
curr_loglikelihood = 0.5 * diff_prec_quadratic + self.nll_weights
|
||||
min_likelihood, _ = torch.min(curr_loglikelihood, dim=1)
|
||||
return min_likelihood
|
||||
|
||||
def log_likelihood(self, pose, betas, *args, **kwargs):
|
||||
''' Create graph operation for negative log-likelihood calculation
|
||||
'''
|
||||
likelihoods = []
|
||||
|
||||
for idx in range(self.num_gaussians):
|
||||
mean = self.means[idx]
|
||||
prec = self.precisions[idx]
|
||||
cov = self.covs[idx]
|
||||
diff_from_mean = pose - mean
|
||||
|
||||
curr_loglikelihood = torch.einsum('bj,ji->bi',
|
||||
[diff_from_mean, prec])
|
||||
curr_loglikelihood = torch.einsum('bi,bi->b',
|
||||
[curr_loglikelihood,
|
||||
diff_from_mean])
|
||||
cov_term = torch.log(torch.det(cov) + self.epsilon)
|
||||
curr_loglikelihood += 0.5 * (cov_term +
|
||||
self.random_var_dim *
|
||||
self.pi_term)
|
||||
likelihoods.append(curr_loglikelihood)
|
||||
|
||||
log_likelihoods = torch.stack(likelihoods, dim=1)
|
||||
min_idx = torch.argmin(log_likelihoods, dim=1)
|
||||
weight_component = self.nll_weights[:, min_idx]
|
||||
|
||||
return weight_component + log_likelihoods[:, min_idx]
|
||||
|
||||
def forward(self, poses, **kwargs):
|
||||
if self.use_merged:
|
||||
return self.merged_log_likelihood(poses).mean()
|
||||
else:
|
||||
return self.log_likelihood(poses).mean()
|
||||
|
||||
class MaxMixtureCompletePrior(object):
|
||||
"""Prior density estimation."""
|
||||
prior = None
|
||||
mean_pose = None
|
||||
def __init__(self, n_gaussians=8, start=3, end=72):
|
||||
self.n_gaussians = n_gaussians
|
||||
self.start = start
|
||||
self.end = end
|
||||
if self.prior is None:
|
||||
self.prior = self.create_prior_from_cmu()
|
||||
|
||||
def create_prior_from_cmu(self):
|
||||
"""Load the gmm from the CMU motion database."""
|
||||
from os.path import dirname
|
||||
np_dtype = np.float32
|
||||
with open(join(dirname(__file__), 'gmm_%02d.pkl'%(self.n_gaussians)), 'rb') as f:
|
||||
gmm = pickle.load(f, encoding='latin1')
|
||||
if True:
|
||||
means = gmm['means'].astype(np_dtype)
|
||||
covs = gmm['covars'].astype(np_dtype)
|
||||
weights = gmm['weights'].astype(np_dtype)
|
||||
precisions = [np.linalg.inv(cov) for cov in covs]
|
||||
precisions = np.stack(precisions).astype(np_dtype)
|
||||
|
||||
sqrdets = np.array([(np.sqrt(np.linalg.det(c)))
|
||||
for c in gmm['covars']])
|
||||
const = (2 * np.pi)**(69 / 2.)
|
||||
|
||||
nll_weights = np.asarray(gmm['weights'] / (const *
|
||||
(sqrdets / sqrdets.min())))
|
||||
self.means = means
|
||||
self.weights = weights
|
||||
self.mean_pose = weights.dot(means)
|
||||
|
||||
def __call__(self, body_model, body_params, info):
|
||||
poses = body_params['poses']
|
||||
for nf in range(poses.shape[0]):
|
||||
poses[nf][self.start:self.end] = self.mean_pose[:self.end-self.start]
|
||||
return body_params
|
||||
|
||||
def get_gmm_prior(self):
|
||||
"""Getter implementation."""
|
||||
return self.prior
|
||||
|
||||
class GMMPrior(MaxMixturePrior):
|
||||
def __call__(self, pred, target):
|
||||
poses = pred['poses']
|
||||
poses = poses.reshape(-1, poses.shape[-1])
|
||||
if self.use_merged:
|
||||
return self.merged_log_likelihood(poses).mean()
|
||||
else:
|
||||
return self.log_likelihood(poses).mean()
|
56
easymocap/multistage/gmm_08.pkl
Normal file
56
easymocap/multistage/gmm_08.pkl
Normal file
File diff suppressed because one or more lines are too long
241
myeasymocap/backbone/basetopdown.py
Normal file
241
myeasymocap/backbone/basetopdown.py
Normal file
@ -0,0 +1,241 @@
|
||||
import os
|
||||
from os.path import join
|
||||
import numpy as np
|
||||
import cv2
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import pickle
|
||||
import math
|
||||
|
||||
def rotate_2d(pt_2d, rot_rad):
|
||||
x = pt_2d[0]
|
||||
y = pt_2d[1]
|
||||
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
||||
xx = x * cs - y * sn
|
||||
yy = x * sn + y * cs
|
||||
return np.array([xx, yy], dtype=np.float32)
|
||||
|
||||
|
||||
def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False):
|
||||
# augment size with scale
|
||||
src_w = src_width * scale
|
||||
src_h = src_height * scale
|
||||
src_center = np.zeros(2)
|
||||
src_center[0] = c_x
|
||||
src_center[1] = c_y # np.array([c_x, c_y], dtype=np.float32)
|
||||
# augment rotation
|
||||
rot_rad = np.pi * rot / 180
|
||||
src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
|
||||
src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)
|
||||
|
||||
dst_w = dst_width
|
||||
dst_h = dst_height
|
||||
dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
|
||||
dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
|
||||
dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)
|
||||
|
||||
src = np.zeros((3, 2), dtype=np.float32)
|
||||
src[0, :] = src_center
|
||||
src[1, :] = src_center + src_downdir
|
||||
src[2, :] = src_center + src_rightdir
|
||||
|
||||
dst = np.zeros((3, 2), dtype=np.float32)
|
||||
dst[0, :] = dst_center
|
||||
dst[1, :] = dst_center + dst_downdir
|
||||
dst[2, :] = dst_center + dst_rightdir
|
||||
|
||||
inv_trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
|
||||
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
||||
|
||||
return trans, inv_trans
|
||||
|
||||
def generate_patch_image_cv(cvimg, c_x, c_y, bb_width, bb_height, patch_width, patch_height, do_flip, scale, rot):
|
||||
|
||||
trans, inv_trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot, inv=False)
|
||||
|
||||
img_patch = cv2.warpAffine(cvimg, trans, (int(patch_width), int(patch_height)),
|
||||
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
|
||||
|
||||
return img_patch, trans, inv_trans
|
||||
|
||||
def get_single_image_crop_demo(image, bbox, scale=1.2, crop_size=224,
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], fliplr=False):
|
||||
|
||||
crop_image, trans, inv_trans = generate_patch_image_cv(
|
||||
cvimg=image.copy(),
|
||||
c_x=bbox[0],
|
||||
c_y=bbox[1],
|
||||
bb_width=bbox[2],
|
||||
bb_height=bbox[3],
|
||||
patch_width=crop_size[0],
|
||||
patch_height=crop_size[1],
|
||||
do_flip=False,
|
||||
scale=scale,
|
||||
rot=0,
|
||||
)
|
||||
if fliplr:
|
||||
crop_image = cv2.flip(crop_image, 1)
|
||||
# cv2.imwrite('debug_crop.jpg', crop_image)
|
||||
# import ipdb; ipdb.set_trace()
|
||||
crop_image = crop_image.transpose(2,0,1)
|
||||
mean1=np.array(mean, dtype=np.float32).reshape(3,1,1)
|
||||
std1= np.array(std, dtype=np.float32).reshape(3,1,1)
|
||||
crop_image = (crop_image.astype(np.float32))/255.
|
||||
# _max = np.max(abs(crop_image))
|
||||
# crop_image = np.divide(crop_image, _max)
|
||||
crop_image = (crop_image - mean1)/std1
|
||||
|
||||
return crop_image, inv_trans
|
||||
|
||||
def xyxy2ccwh(bbox):
|
||||
w = bbox[:, 2] - bbox[:, 0]
|
||||
h = bbox[:, 3] - bbox[:, 1]
|
||||
cx = (bbox[:, 2] + bbox[:, 0])/2
|
||||
cy = (bbox[:, 3] + bbox[:, 1])/2
|
||||
return np.stack([cx, cy, w, h], axis=1)
|
||||
|
||||
class BaseTopDownModel(nn.Module):
|
||||
def __init__(self, bbox_scale, res_input,
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
|
||||
super().__init__()
|
||||
self.bbox_scale = bbox_scale
|
||||
if not isinstance(res_input, list):
|
||||
res_input = [res_input, res_input]
|
||||
self.crop_size = res_input
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def load_checkpoint(self, model, state_dict, prefix, strict):
|
||||
state_dict_new = {}
|
||||
for key, val in state_dict.items():
|
||||
if key.startswith(prefix):
|
||||
key_new = key.replace(prefix, '')
|
||||
state_dict_new[key_new] = val
|
||||
model.load_state_dict(state_dict_new, strict=strict)
|
||||
|
||||
def infer(self, image, bbox, to_numpy=False, flips=None):
|
||||
if isinstance(image, str):
|
||||
image = cv2.imread(image)
|
||||
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
squeeze = False
|
||||
if len(bbox.shape) == 1:
|
||||
bbox = bbox[None]
|
||||
squeeze = True
|
||||
# TODO: 兼容多张图片的
|
||||
bbox = xyxy2ccwh(bbox)
|
||||
inputs = []
|
||||
inv_trans_ = []
|
||||
for i in range(bbox.shape[0]):
|
||||
if flips is None:
|
||||
fliplr=False
|
||||
else:
|
||||
fliplr=flips[i]
|
||||
norm_img, inv_trans = get_single_image_crop_demo(
|
||||
img,
|
||||
bbox[i],
|
||||
scale=self.bbox_scale,
|
||||
crop_size=self.crop_size,
|
||||
mean=self.mean,
|
||||
std=self.std,
|
||||
fliplr=fliplr
|
||||
)
|
||||
inputs.append(norm_img)
|
||||
inv_trans_.append(inv_trans)
|
||||
inputs = np.stack(inputs)
|
||||
inv_trans_ = np.stack(inv_trans_)
|
||||
inputs = torch.FloatTensor(inputs).to(self.device)
|
||||
with torch.no_grad():
|
||||
output = self.model(inputs)
|
||||
if squeeze:
|
||||
for key, val in output.items():
|
||||
output[key] = val[0]
|
||||
if to_numpy:
|
||||
for key, val in output.items():
|
||||
if torch.is_tensor(val):
|
||||
output[key] = val.detach().cpu().numpy()
|
||||
output['inv_trans'] = inv_trans_
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def batch_affine_transform(points, trans):
|
||||
# points: (Bn, J, 2), trans: (Bn, 2, 3)
|
||||
points = np.dstack((points[..., :2], np.ones((*points.shape[:-1], 1))))
|
||||
out = np.matmul(points, trans.swapaxes(-1, -2))
|
||||
return out
|
||||
|
||||
class BaseTopDownModelCache(BaseTopDownModel):
|
||||
def __init__(self, name, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.name = name
|
||||
|
||||
def __call__(self, bbox, images, imgname, flips=None):
|
||||
basename = os.sep.join(imgname.split(os.sep)[-2:])
|
||||
cachename = join(self.output, self.name, basename.replace('.jpg', '.pkl'))
|
||||
os.makedirs(os.path.dirname(cachename), exist_ok=True)
|
||||
if os.path.exists(cachename):
|
||||
with open(cachename, 'rb') as f:
|
||||
output = pickle.load(f)
|
||||
else:
|
||||
output = self.infer(images, bbox, to_numpy=True, flips=flips)
|
||||
with open(cachename, 'wb') as f:
|
||||
pickle.dump(output, f)
|
||||
ret = {
|
||||
'params': output
|
||||
}
|
||||
return ret
|
||||
|
||||
# post processing
|
||||
def get_max_preds(batch_heatmaps):
|
||||
'''
|
||||
get predictions from score maps
|
||||
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
|
||||
'''
|
||||
assert isinstance(batch_heatmaps, np.ndarray), \
|
||||
'batch_heatmaps should be numpy.ndarray'
|
||||
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
|
||||
|
||||
batch_size = batch_heatmaps.shape[0]
|
||||
num_joints = batch_heatmaps.shape[1]
|
||||
width = batch_heatmaps.shape[3]
|
||||
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
|
||||
idx = np.argmax(heatmaps_reshaped, 2)
|
||||
maxvals = np.amax(heatmaps_reshaped, 2)
|
||||
|
||||
maxvals = maxvals.reshape((batch_size, num_joints, 1))
|
||||
idx = idx.reshape((batch_size, num_joints, 1))
|
||||
|
||||
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0]) % width
|
||||
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
|
||||
|
||||
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
|
||||
pred_mask = pred_mask.astype(np.float32)
|
||||
|
||||
preds *= pred_mask
|
||||
return preds, maxvals
|
||||
|
||||
def get_preds_from_heatmaps(batch_heatmaps):
|
||||
coords, maxvals = get_max_preds(batch_heatmaps)
|
||||
|
||||
heatmap_height = batch_heatmaps.shape[2]
|
||||
heatmap_width = batch_heatmaps.shape[3]
|
||||
|
||||
# post-processing
|
||||
if True:
|
||||
for n in range(coords.shape[0]):
|
||||
for p in range(coords.shape[1]):
|
||||
hm = batch_heatmaps[n][p]
|
||||
px = int(math.floor(coords[n][p][0] + 0.5))
|
||||
py = int(math.floor(coords[n][p][1] + 0.5))
|
||||
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
|
||||
diff = np.array(
|
||||
[
|
||||
hm[py][px+1] - hm[py][px-1],
|
||||
hm[py+1][px]-hm[py-1][px]
|
||||
]
|
||||
)
|
||||
coords[n][p] += np.sign(diff) * .25
|
||||
coords = coords.astype(np.float32) * 4
|
||||
pred = np.dstack((coords, maxvals))
|
||||
return pred
|
0
myeasymocap/backbone/hrnet/__init__.py
Normal file
0
myeasymocap/backbone/hrnet/__init__.py
Normal file
218
myeasymocap/backbone/hrnet/hrnet.py
Normal file
218
myeasymocap/backbone/hrnet/hrnet.py
Normal file
@ -0,0 +1,218 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from .modules import BasicBlock, Bottleneck
|
||||
|
||||
|
||||
class StageModule(nn.Module):
|
||||
def __init__(self, stage, output_branches, c, bn_momentum):
|
||||
super(StageModule, self).__init__()
|
||||
self.stage = stage
|
||||
self.output_branches = output_branches
|
||||
|
||||
self.branches = nn.ModuleList()
|
||||
for i in range(self.stage):
|
||||
w = c * (2 ** i)
|
||||
branch = nn.Sequential(
|
||||
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||
)
|
||||
self.branches.append(branch)
|
||||
|
||||
self.fuse_layers = nn.ModuleList()
|
||||
# for each output_branches (i.e. each branch in all cases but the very last one)
|
||||
for i in range(self.output_branches):
|
||||
self.fuse_layers.append(nn.ModuleList())
|
||||
for j in range(self.stage): # for each branch
|
||||
if i == j:
|
||||
self.fuse_layers[-1].append(nn.Sequential()) # Used in place of "None" because it is callable
|
||||
elif i < j:
|
||||
self.fuse_layers[-1].append(nn.Sequential(
|
||||
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(1, 1), stride=(1, 1), bias=False),
|
||||
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
|
||||
nn.Upsample(scale_factor=(2.0 ** (j - i)), mode='nearest'),
|
||||
))
|
||||
elif i > j:
|
||||
ops = []
|
||||
for k in range(i - j - 1):
|
||||
ops.append(nn.Sequential(
|
||||
nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
|
||||
bias=False),
|
||||
nn.BatchNorm2d(c * (2 ** j), eps=1e-05, momentum=0.1, affine=True,
|
||||
track_running_stats=True),
|
||||
nn.ReLU(inplace=True),
|
||||
))
|
||||
ops.append(nn.Sequential(
|
||||
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
|
||||
bias=False),
|
||||
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
|
||||
))
|
||||
self.fuse_layers[-1].append(nn.Sequential(*ops))
|
||||
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
assert len(self.branches) == len(x)
|
||||
|
||||
x = [branch(b) for branch, b in zip(self.branches, x)]
|
||||
|
||||
x_fused = []
|
||||
for i in range(len(self.fuse_layers)):
|
||||
for j in range(0, len(self.branches)):
|
||||
if j == 0:
|
||||
x_fused.append(self.fuse_layers[i][0](x[0]))
|
||||
else:
|
||||
x_fused[i] = x_fused[i] + self.fuse_layers[i][j](x[j])
|
||||
|
||||
for i in range(len(x_fused)):
|
||||
x_fused[i] = self.relu(x_fused[i])
|
||||
|
||||
return x_fused
|
||||
|
||||
|
||||
class HRNet(nn.Module):
|
||||
def __init__(self, c=48, nof_joints=17, bn_momentum=0.1):
|
||||
super(HRNet, self).__init__()
|
||||
|
||||
# Input (stem net)
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
|
||||
self.conv2 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
|
||||
# Stage 1 (layer1) - First group of bottleneck (resnet) modules
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
|
||||
nn.BatchNorm2d(256, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||
)
|
||||
self.layer1 = nn.Sequential(
|
||||
Bottleneck(64, 64, downsample=downsample),
|
||||
Bottleneck(256, 64),
|
||||
Bottleneck(256, 64),
|
||||
Bottleneck(256, 64),
|
||||
)
|
||||
|
||||
# Fusion layer 1 (transition1) - Creation of the first two branches (one full and one half resolution)
|
||||
self.transition1 = nn.ModuleList([
|
||||
nn.Sequential(
|
||||
nn.Conv2d(256, c, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
|
||||
nn.BatchNorm2d(c, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||
nn.ReLU(inplace=True),
|
||||
),
|
||||
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
|
||||
nn.Conv2d(256, c * (2 ** 1), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
|
||||
nn.BatchNorm2d(c * (2 ** 1), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||
nn.ReLU(inplace=True),
|
||||
)),
|
||||
])
|
||||
|
||||
# Stage 2 (stage2) - Second module with 1 group of bottleneck (resnet) modules. This has 2 branches
|
||||
self.stage2 = nn.Sequential(
|
||||
StageModule(stage=2, output_branches=2, c=c, bn_momentum=bn_momentum),
|
||||
)
|
||||
|
||||
# Fusion layer 2 (transition2) - Creation of the third branch (1/4 resolution)
|
||||
self.transition2 = nn.ModuleList([
|
||||
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
|
||||
nn.Conv2d(c * (2 ** 1), c * (2 ** 2), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
|
||||
nn.BatchNorm2d(c * (2 ** 2), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||
nn.ReLU(inplace=True),
|
||||
)), # ToDo Why the new branch derives from the "upper" branch only?
|
||||
])
|
||||
|
||||
# Stage 3 (stage3) - Third module with 4 groups of bottleneck (resnet) modules. This has 3 branches
|
||||
self.stage3 = nn.Sequential(
|
||||
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||
)
|
||||
|
||||
# Fusion layer 3 (transition3) - Creation of the fourth branch (1/8 resolution)
|
||||
self.transition3 = nn.ModuleList([
|
||||
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
|
||||
nn.Conv2d(c * (2 ** 2), c * (2 ** 3), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
|
||||
nn.BatchNorm2d(c * (2 ** 3), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||
nn.ReLU(inplace=True),
|
||||
)), # ToDo Why the new branch derives from the "upper" branch only?
|
||||
])
|
||||
|
||||
# Stage 4 (stage4) - Fourth module with 3 groups of bottleneck (resnet) modules. This has 4 branches
|
||||
self.stage4 = nn.Sequential(
|
||||
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
|
||||
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
|
||||
StageModule(stage=4, output_branches=1, c=c, bn_momentum=bn_momentum),
|
||||
)
|
||||
|
||||
# Final layer (final_layer)
|
||||
self.final_layer = nn.Conv2d(c, nof_joints, kernel_size=(1, 1), stride=(1, 1))
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.bn2(x)
|
||||
x = self.relu(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = [trans(x) for trans in self.transition1] # Since now, x is a list (# == nof branches)
|
||||
|
||||
x = self.stage2(x)
|
||||
# x = [trans(x[-1]) for trans in self.transition2] # New branch derives from the "upper" branch only
|
||||
x = [
|
||||
self.transition2[0](x[0]),
|
||||
self.transition2[1](x[1]),
|
||||
self.transition2[2](x[-1])
|
||||
] # New branch derives from the "upper" branch only
|
||||
|
||||
x = self.stage3(x)
|
||||
# x = [trans(x) for trans in self.transition3] # New branch derives from the "upper" branch only
|
||||
x = [
|
||||
self.transition3[0](x[0]),
|
||||
self.transition3[1](x[1]),
|
||||
self.transition3[2](x[2]),
|
||||
self.transition3[3](x[-1])
|
||||
] # New branch derives from the "upper" branch only
|
||||
|
||||
x = self.stage4(x)
|
||||
|
||||
x = self.final_layer(x[0])
|
||||
|
||||
return {
|
||||
'output': x
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# model = HRNet(48, 17, 0.1)
|
||||
model = HRNet(32, 17, 0.1)
|
||||
|
||||
# print(model)
|
||||
|
||||
model.load_state_dict(
|
||||
# torch.load('./weights/pose_hrnet_w48_384x288.pth')
|
||||
torch.load('./weights/pose_hrnet_w32_256x192.pth')
|
||||
)
|
||||
print('ok!!')
|
||||
|
||||
if torch.cuda.is_available() and False:
|
||||
torch.backends.cudnn.deterministic = True
|
||||
device = torch.device('cuda:0')
|
||||
else:
|
||||
device = torch.device('cpu')
|
||||
|
||||
print(device)
|
||||
|
||||
model = model.to(device)
|
||||
|
||||
y = model(torch.ones(1, 3, 384, 288).to(device))
|
||||
print(y.shape)
|
||||
print(torch.min(y).item(), torch.mean(y).item(), torch.max(y).item())
|
72
myeasymocap/backbone/hrnet/modules.py
Normal file
72
myeasymocap/backbone/hrnet/modules.py
Normal file
@ -0,0 +1,72 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_momentum)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
130
myeasymocap/backbone/hrnet/myhrnet.py
Normal file
130
myeasymocap/backbone/hrnet/myhrnet.py
Normal file
@ -0,0 +1,130 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import math
|
||||
import cv2
|
||||
import torch
|
||||
from ..basetopdown import BaseTopDownModelCache
|
||||
from .hrnet import HRNet
|
||||
|
||||
def get_max_preds(batch_heatmaps):
|
||||
'''
|
||||
get predictions from score maps
|
||||
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
|
||||
'''
|
||||
assert isinstance(batch_heatmaps, np.ndarray), \
|
||||
'batch_heatmaps should be numpy.ndarray'
|
||||
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim: {}'.format(batch_heatmaps.shape)
|
||||
|
||||
batch_size = batch_heatmaps.shape[0]
|
||||
num_joints = batch_heatmaps.shape[1]
|
||||
width = batch_heatmaps.shape[3]
|
||||
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
|
||||
idx = np.argmax(heatmaps_reshaped, 2)
|
||||
maxvals = np.amax(heatmaps_reshaped, 2)
|
||||
|
||||
maxvals = maxvals.reshape((batch_size, num_joints, 1))
|
||||
idx = idx.reshape((batch_size, num_joints, 1))
|
||||
|
||||
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0]) % width
|
||||
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
|
||||
|
||||
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
|
||||
pred_mask = pred_mask.astype(np.float32)
|
||||
|
||||
preds *= pred_mask
|
||||
return preds, maxvals
|
||||
|
||||
COCO17_IN_BODY25 = [0,16,15,18,17,5,2,6,3,7,4,12,9,13,10,14,11]
|
||||
pairs = [[1, 8], [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [10, 11], [8, 12], [12, 13], [13, 14], [1, 0], [0,15], [15,17], [0,16], [16,18], [14,19], [19,20], [14,21], [11,22], [22,23], [11,24]]
|
||||
def coco17tobody25(points2d):
|
||||
kpts = np.zeros((points2d.shape[0], 25, 3))
|
||||
kpts[:, COCO17_IN_BODY25, :2] = points2d[:, :, :2]
|
||||
kpts[:, COCO17_IN_BODY25, 2:3] = points2d[:, :, 2:3]
|
||||
kpts[:, 8, :2] = kpts[:, [9, 12], :2].mean(axis=1)
|
||||
kpts[:, 8, 2] = kpts[:, [9, 12], 2].min(axis=1)
|
||||
kpts[:, 1, :2] = kpts[:, [2, 5], :2].mean(axis=1)
|
||||
kpts[:, 1, 2] = kpts[:, [2, 5], 2].min(axis=1)
|
||||
# 需要交换一下
|
||||
# kpts = kpts[:, :, [1,0,2]]
|
||||
return kpts
|
||||
|
||||
class MyHRNet(BaseTopDownModelCache):
|
||||
def __init__(self, ckpt):
|
||||
super().__init__(name='hand2d', bbox_scale=1.25, res_input=[288, 384])
|
||||
model = HRNet(48, 17, 0.1)
|
||||
if not os.path.exists(ckpt) and ckpt.endswith('pose_hrnet_w48_384x288.pth'):
|
||||
url = "11ezQ6a_MxIRtj26WqhH3V3-xPI3XqYAw"
|
||||
text = '''Download `models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth` from (OneDrive)[https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ],
|
||||
And place it into {}'''.format(os.path.dirname(ckpt))
|
||||
print(text)
|
||||
os.makedirs(os.path.dirname(ckpt), exist_ok=True)
|
||||
cmd = 'gdown "{}" -O {}'.format(url, ckpt)
|
||||
print('\n', cmd, '\n')
|
||||
os.system(cmd)
|
||||
assert os.path.exists(ckpt), f'{ckpt} not exists'
|
||||
checkpoint = torch.load(ckpt, map_location='cpu')
|
||||
model.load_state_dict(checkpoint)
|
||||
model.eval()
|
||||
self.model = model
|
||||
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||
self.model.to(self.device)
|
||||
|
||||
@staticmethod
|
||||
def get_max_preds(batch_heatmaps):
|
||||
coords, maxvals = get_max_preds(batch_heatmaps)
|
||||
|
||||
heatmap_height = batch_heatmaps.shape[2]
|
||||
heatmap_width = batch_heatmaps.shape[3]
|
||||
|
||||
# post-processing
|
||||
if True:
|
||||
for n in range(coords.shape[0]):
|
||||
for p in range(coords.shape[1]):
|
||||
hm = batch_heatmaps[n][p]
|
||||
px = int(math.floor(coords[n][p][0] + 0.5))
|
||||
py = int(math.floor(coords[n][p][1] + 0.5))
|
||||
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
|
||||
diff = np.array(
|
||||
[
|
||||
hm[py][px+1] - hm[py][px-1],
|
||||
hm[py+1][px]-hm[py-1][px]
|
||||
]
|
||||
)
|
||||
coords[n][p] += np.sign(diff) * .25
|
||||
coords = coords.astype(np.float32) * 4
|
||||
pred = np.dstack((coords, maxvals))
|
||||
return pred
|
||||
|
||||
def __call__(self, bbox, images, imgnames):
|
||||
squeeze = False
|
||||
if not isinstance(images, list):
|
||||
images = [images]
|
||||
imgnames = [imgnames]
|
||||
bbox = [bbox]
|
||||
squeeze = True
|
||||
nViews = len(images)
|
||||
kpts_all = []
|
||||
for nv in range(nViews):
|
||||
_bbox = bbox[nv]
|
||||
if _bbox.shape[0] == 0:
|
||||
kpts_all.append(np.zeros((17, 3)))
|
||||
continue
|
||||
img = images[nv]
|
||||
# TODO: add flip test
|
||||
out = super().__call__(_bbox, img, imgnames[nv])
|
||||
output = out['params']['output']
|
||||
kpts = self.get_max_preds(output)
|
||||
kpts_ori = self.batch_affine_transform(kpts, out['params']['inv_trans'])
|
||||
kpts = np.concatenate([kpts_ori, kpts[..., -1:]], axis=-1)
|
||||
kpts = coco17tobody25(kpts)
|
||||
if len(kpts.shape) == 3:
|
||||
kpts = kpts[0]
|
||||
kpts_all.append(kpts)
|
||||
kpts_all = np.stack(kpts_all)
|
||||
if squeeze:
|
||||
kpts_all = kpts_all[0]
|
||||
return {
|
||||
'keypoints': kpts_all
|
||||
}
|
292
myeasymocap/backbone/yolo/yolo.py
Normal file
292
myeasymocap/backbone/yolo/yolo.py
Normal file
@ -0,0 +1,292 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import os
|
||||
import cv2
|
||||
from os.path import join
|
||||
import pickle
|
||||
|
||||
def check_modelpath(paths):
|
||||
if isinstance(paths, str):
|
||||
assert os.path.exists(paths), paths
|
||||
return paths
|
||||
elif isinstance(paths, list):
|
||||
for path in paths:
|
||||
if os.path.exists(path):
|
||||
print(f'Found model in {path}')
|
||||
break
|
||||
else:
|
||||
print(f'No model found in {paths}!')
|
||||
raise FileExistsError
|
||||
return path
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
class BaseYOLOv5:
|
||||
def __init__(self, ckpt=None, model='yolov5m', name='object2d', multiview=True) -> None:
|
||||
if ckpt is not None:
|
||||
ckpt = check_modelpath(ckpt)
|
||||
self.model = torch.hub.load('ultralytics/yolov5', 'custom', ckpt)
|
||||
else:
|
||||
print('[{}] Not given ckpt, use default yolov5'.format(self.__class__.__name__))
|
||||
self.model = torch.hub.load('ultralytics/yolov5', model)
|
||||
self.multiview = multiview
|
||||
self.name = name
|
||||
|
||||
def check_cache(self, imgname):
|
||||
basename = os.path.basename(imgname)
|
||||
imgext = '.' + basename.split('.')[-1]
|
||||
nv = imgname.split(os.sep)[-2]
|
||||
cachename = join(self.output, self.name, nv, basename.replace(imgext, '.npy'))
|
||||
os.makedirs(os.path.dirname(cachename), exist_ok=True)
|
||||
if os.path.exists(cachename):
|
||||
output = np.load(cachename, allow_pickle=True)
|
||||
return True, output, cachename
|
||||
else:
|
||||
return False, None, cachename
|
||||
|
||||
def check_image(self, img_or_name):
|
||||
if isinstance(img_or_name, str):
|
||||
images = cv2.imread(img_or_name)
|
||||
else:
|
||||
images = img_or_name
|
||||
images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)
|
||||
return images
|
||||
|
||||
@torch.no_grad()
|
||||
def detect(self, image, imgname):
|
||||
flag, cache, cachename = self.check_cache(imgname)
|
||||
if flag:
|
||||
return cache
|
||||
image = self.check_image(imgname)
|
||||
results = self.model(image) #RGB images[:,:,::-1]
|
||||
arrays = np.array(results.pandas().xyxy[0])
|
||||
np.save(cachename, arrays)
|
||||
return arrays
|
||||
|
||||
@staticmethod
|
||||
def select_class(results, name):
|
||||
select = []
|
||||
for i, res in enumerate(results):
|
||||
classname = res[6]
|
||||
if classname != name:
|
||||
continue
|
||||
box = res[:5]
|
||||
select.append(box)
|
||||
return select
|
||||
|
||||
def select_bbox(self, select, imgname):
|
||||
if select.shape[0] == 0:
|
||||
return select
|
||||
# Naive: select the best
|
||||
idx = np.argsort(select[:, -1])[::-1]
|
||||
return select[idx[0:1]]
|
||||
|
||||
def __call__(self, images, imgnames): # 这里好像默认是多视角了,需要继承一下单视角的
|
||||
squeeze = False
|
||||
if not isinstance(images, list):
|
||||
images = [images]
|
||||
imgnames = [imgnames]
|
||||
squeeze = True
|
||||
detects = {'bbox': [[] for _ in range(len(images))]}
|
||||
for nv in range(len(images)):
|
||||
res = self.detect(images[nv], imgnames[nv])
|
||||
select = self.select_class(res, self.name)
|
||||
if len(select) == 0:
|
||||
select = np.zeros((0,5), dtype=np.float32)
|
||||
else:
|
||||
select = np.stack(select).astype(np.float32)
|
||||
# TODO: add track here
|
||||
select = self.select_bbox(select, imgnames[nv])
|
||||
detects['bbox'][nv] = select
|
||||
if squeeze:
|
||||
detects['bbox'] = detects['bbox'][0]
|
||||
return detects
|
||||
|
||||
class YoloWithTrack(BaseYOLOv5):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.track_cache = {}
|
||||
|
||||
@staticmethod
|
||||
def calculate_iou(bbox_pre, bbox_now):
|
||||
area_now = (bbox_now[:, 2] - bbox_now[:, 0])*(bbox_now[:, 3]-bbox_now[:, 1])
|
||||
area_pre = (bbox_pre[:, 2] - bbox_pre[:, 0])*(bbox_pre[:, 3]-bbox_pre[:, 1])
|
||||
# compute IOU
|
||||
# max of left
|
||||
xx1 = np.maximum(bbox_now[:, 0], bbox_pre[:, 0])
|
||||
yy1 = np.maximum(bbox_now[:, 1], bbox_pre[:, 1])
|
||||
# min of right
|
||||
xx2 = np.minimum(bbox_now[:, 0+2], bbox_pre[:, 0+2])
|
||||
yy2 = np.minimum(bbox_now[:, 1+2], bbox_pre[:, 1+2])
|
||||
# w h
|
||||
w = np.maximum(0, xx2 - xx1)
|
||||
h = np.maximum(0, yy2 - yy1)
|
||||
over = (w*h)/(area_pre+area_now-w*h)
|
||||
return over
|
||||
|
||||
def select_bbox(self, select, imgname):
|
||||
if select.shape[0] == 0:
|
||||
return select
|
||||
sub = os.path.basename(os.path.dirname(imgname))
|
||||
frame = int(os.path.basename(imgname).split('.')[0])
|
||||
if sub not in self.track_cache:
|
||||
# select the best
|
||||
select = super().select_bbox(select, imgname)
|
||||
self.track_cache[sub] = {
|
||||
'frame': [frame],
|
||||
'bbox': [select]
|
||||
}
|
||||
return select
|
||||
bbox_pre = self.track_cache[sub]['bbox'][-1]
|
||||
iou = self.calculate_iou(bbox_pre, select)
|
||||
idx = iou.argmax()
|
||||
select = select[idx:idx+1]
|
||||
self.track_cache[sub]['frame'].append(frame)
|
||||
self.track_cache[sub]['bbox'].append(select)
|
||||
return select
|
||||
|
||||
class DetectToPelvis:
|
||||
def __init__(self, key) -> None:
|
||||
self.key = key
|
||||
self.multiview = True
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
key = self.key
|
||||
val = kwargs[key]
|
||||
ret = {'pelvis': []}
|
||||
for nv in range(len(val)):
|
||||
bbox = val[nv]
|
||||
center = np.stack([(bbox[:, 0] + bbox[:, 2])/2, (bbox[:, 1] + bbox[:, 3])/2, bbox[:, -1]], axis=-1)
|
||||
ret['pelvis'].append(center)
|
||||
return ret
|
||||
|
||||
class Yolo_model:
|
||||
def __init__(self, mode, yolo_ckpt, multiview, repo_or_dir = 'ultralytics/yolov5', source='github') -> None:
|
||||
yolo_ckpt = check_modelpath(yolo_ckpt)
|
||||
self.model = torch.hub.load(repo_or_dir, 'custom', yolo_ckpt, source=source)
|
||||
self.min_detect_thres = 0.3
|
||||
self.mode = mode # 'fullimg' # 'bboxcrop'
|
||||
self.output = 'output'
|
||||
self.name = 'yolo'
|
||||
self.multiview = multiview
|
||||
@torch.no_grad()
|
||||
def det_step(self, img_or_name, imgname, bbox=[]):
|
||||
|
||||
basename = os.path.basename(imgname)
|
||||
if self.multiview:
|
||||
nv = imgname.split('/')[-2]
|
||||
cachename = join(self.output, self.name, nv, basename.replace('.jpg', '.pkl'))
|
||||
else:
|
||||
cachename = join(self.output, self.name, basename.replace('.jpg', '.pkl'))
|
||||
os.makedirs(os.path.dirname(cachename), exist_ok=True)
|
||||
if os.path.exists(cachename):
|
||||
with open(cachename, 'rb') as f:
|
||||
output = pickle.load(f)
|
||||
return output
|
||||
|
||||
if isinstance(img_or_name,str):
|
||||
images = cv2.imread(img_or_name)
|
||||
else:
|
||||
images = img_or_name
|
||||
|
||||
if self.mode == 'bboxcrop':
|
||||
bbox[0] = max(0,bbox[0])
|
||||
bbox[1] = max(0,bbox[1])
|
||||
crop = images[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2]),::-1]
|
||||
else:
|
||||
crop = images[:,:,::-1]
|
||||
# print("[yolo img shape] ",crop.shape)
|
||||
results = self.model(crop) #RGB images[:,:,::-1]
|
||||
# breakpoint()
|
||||
arrays = np.array(results.pandas().xyxy[0])
|
||||
bboxes = {
|
||||
'bbox':[],
|
||||
'bbox_handl':[],
|
||||
'bbox_handr':[],
|
||||
'pelvis':[],
|
||||
'pelvis_l':[],
|
||||
'pelvis_r':[]
|
||||
}
|
||||
|
||||
for i, res in enumerate(arrays):
|
||||
classid = res[5]
|
||||
box = res[:5]
|
||||
if self.mode == 'bboxcrop':
|
||||
box[0]+=bbox[0]
|
||||
box[2]+=bbox[0]
|
||||
box[1]+=bbox[1]
|
||||
box[3]+=bbox[1]
|
||||
if False:
|
||||
vis = images.copy()
|
||||
cpimg = crop.copy()
|
||||
from easymocap.mytools.vis_base import plot_bbox
|
||||
plot_bbox(vis,box,0)
|
||||
plot_bbox(cpimg,res[:5],0)
|
||||
cv2.imshow('vis',vis)
|
||||
# cv2.waitKey(0)
|
||||
cv2.imshow('crop',cpimg)
|
||||
cv2.waitKey(0)
|
||||
breakpoint()
|
||||
if box[4] < self.min_detect_thres:
|
||||
continue
|
||||
if classid==0:
|
||||
bboxes['bbox'].append(box)
|
||||
elif classid==1:
|
||||
bboxes['bbox_handl'].append(box)
|
||||
bboxes['pelvis_l'].append([(box[0]+box[2])/2,(box[1]+box[3])/2,box[-1]])
|
||||
elif classid==2:
|
||||
bboxes['bbox_handr'].append(box)
|
||||
bboxes['pelvis_r'].append([(box[0]+box[2])/2,(box[1]+box[3])/2,box[-1]])
|
||||
if(len(bboxes['bbox_handl'])==0):
|
||||
# bboxes['bbox_handl'].append(np.zeros((0, 5)))
|
||||
# bboxes['pelvis_l'].append(np.zeros((0, 3)))
|
||||
bboxes['bbox_handl'].append(np.zeros((5)))
|
||||
bboxes['pelvis_l'].append(np.zeros((3)))
|
||||
|
||||
if(len(bboxes['bbox_handr'])==0):
|
||||
# bboxes['bbox_handr'].append(np.zeros((0, 5)))
|
||||
# bboxes['pelvis_r'].append(np.zeros((0, 3)))
|
||||
bboxes['bbox_handr'].append(np.zeros((5)))
|
||||
bboxes['pelvis_r'].append(np.zeros((3)))
|
||||
if(len(bboxes['bbox'])==0):
|
||||
bboxes['bbox'].append(np.zeros((5)))
|
||||
bboxes['bbox'] = np.array(bboxes['bbox'])
|
||||
if isinstance(imgname,str):
|
||||
with open(cachename, 'wb') as f:
|
||||
pickle.dump(bboxes, f)
|
||||
return bboxes
|
||||
def __call__(self, images, imgname, bbox=[]):
|
||||
return self.det_step(images, imgname, bbox)
|
||||
|
||||
|
||||
class Yolo_model_hand_mvmp(Yolo_model):
|
||||
@torch.no_grad()
|
||||
def __call__(self, bbox, images, imgnames):
|
||||
ret = {
|
||||
'pelvis_l':[],
|
||||
'pelvis_r':[],
|
||||
# 'pelvis':[],
|
||||
'bbox_handl':[],
|
||||
'bbox_handr':[],
|
||||
}
|
||||
for nv in range(len(images)):
|
||||
img = images[nv]
|
||||
imgname = imgnames[nv]
|
||||
if self.mode == 'bboxcrop':
|
||||
bboxes = {
|
||||
'bbox':[],
|
||||
'bbox_handl':[],
|
||||
'bbox_handr':[],
|
||||
'pelvis_l':[],
|
||||
'pelvis_r':[]
|
||||
}
|
||||
for pid in range(len(bbox[nv])):
|
||||
bboxes_ = self.det_step(img, imgname, bbox[nv][pid])
|
||||
for key in bboxes.keys():
|
||||
bboxes[key].append(bboxes_[key])
|
||||
else:
|
||||
bboxes = self.det_step(img, imgname)
|
||||
for k in ret.keys():
|
||||
ret[k].append(np.array(bboxes[k]))
|
||||
|
||||
return ret
|
106
myeasymocap/datasets/basedata.py
Normal file
106
myeasymocap/datasets/basedata.py
Normal file
@ -0,0 +1,106 @@
|
||||
import os
|
||||
from os.path import join
|
||||
import numpy as np
|
||||
import cv2
|
||||
from easymocap.mytools.debug_utils import log, myerror, mywarn
|
||||
|
||||
class ImageDataBase:
|
||||
def __init__(self, root, subs, ranges, read_image) -> None:
|
||||
assert root != 'TO_BE_FILLED', 'You must set the root of dataset'
|
||||
assert os.path.exists(root), f'root {root} not exists'
|
||||
self.root = root
|
||||
self.subs = subs
|
||||
self.ranges = ranges
|
||||
self.flag_read_image = read_image
|
||||
self.infos = {}
|
||||
self.meta = {}
|
||||
|
||||
def check_frames_length(self):
|
||||
if len(self.ranges) == 0:
|
||||
self.ranges = [0, self.length, 1]
|
||||
if self.ranges[1] > self.length:
|
||||
self.ranges[1] = self.length
|
||||
self.frames = list(range(*self.ranges))
|
||||
self.length = len(self.frames)
|
||||
|
||||
def try_to_extract_images(self, root, value):
|
||||
if not os.path.exists(os.path.join(root, value['root'])) and os.path.exists(os.path.join(root, 'videos')):
|
||||
print('[{}] Cannot find the images but find the videos, try to extract it'.format(self.__class__.__name__))
|
||||
for videoname in os.listdir(os.path.join(root, 'videos')):
|
||||
videoext = '.' + videoname.split('.')[-1]
|
||||
outdir = join(root, value['root'], videoname.replace(videoext, ''))
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
cmd = 'ffmpeg -i {videoname} -q:v 1 -start_number 0 {outdir}/%06d.jpg'.format(
|
||||
videoname=join(root, 'videos', videoname),
|
||||
outdir=outdir
|
||||
)
|
||||
os.system(cmd)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f''' [Dataset] {self.__class__.__name__}
|
||||
root : {self.root}
|
||||
subs : {self.subs}
|
||||
ranges: {self.ranges}
|
||||
'''
|
||||
|
||||
def __getitem__(self, index):
|
||||
raise NotImplementedError
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def read_image(self, imgname, cameras=None):
|
||||
assert os.path.exists(imgname), "image {} not exists".format(imgname)
|
||||
sub = os.path.basename(os.path.dirname(imgname))
|
||||
img = cv2.imread(imgname)
|
||||
if cameras is None:
|
||||
return img
|
||||
K, D = self.cameras[sub]['K'], self.cameras[sub]['dist']
|
||||
if np.linalg.norm(D) < 1e-3:
|
||||
return img
|
||||
if sub not in self.distortMap.keys():
|
||||
h, w = img.shape[:2]
|
||||
mapx, mapy = cv2.initUndistortRectifyMap(K, D, None, K, (w,h), 5)
|
||||
self.distortMap[sub] = (mapx, mapy)
|
||||
mapx, mapy = self.distortMap[sub]
|
||||
img = cv2.remap(img, mapx, mapy, cv2.INTER_NEAREST)
|
||||
return img
|
||||
|
||||
def read_mv_images(root, root_images, ext, subs):
|
||||
assert os.path.exists(os.path.join(root, root_images)), f'root {root}/{root_images} not exists'
|
||||
if len(subs) == 0:
|
||||
subs = sorted(os.listdir(os.path.join(root, root_images)))
|
||||
if subs[0].isdigit():
|
||||
subs = sorted(subs, key=lambda x: int(x))
|
||||
imagelists = []
|
||||
log(f'Found {len(subs)} subjects in {root}/{root_images}')
|
||||
for sub in subs:
|
||||
images = sorted(os.listdir(os.path.join(root, root_images, sub)))
|
||||
images = [os.path.join(root, root_images, sub, image) for image in images if image.endswith(ext)]
|
||||
log(f' -> Found {len(images)} {root_images} in {sub}.')
|
||||
imagelists.append(images)
|
||||
min_length = min([len(image) for image in imagelists])
|
||||
log(f' -> Min length: {min_length}')
|
||||
imagenames = [[image[i] for image in imagelists] for i in range(min_length)]
|
||||
return imagenames, {'subs': subs}
|
||||
|
||||
def FloatArray(x):
|
||||
return np.array(x, dtype=np.float32)
|
||||
|
||||
def find_best_people(annots):
|
||||
if len(annots) == 0:
|
||||
return {}
|
||||
# TODO: find the best
|
||||
annot = annots[0]
|
||||
bbox = FloatArray(annot['bbox'])
|
||||
if 'keypoints' not in annot.keys():
|
||||
return {}
|
||||
keypoints = FloatArray(annot['keypoints'])
|
||||
return {'bbox': bbox, 'keypoints': keypoints}
|
||||
|
||||
def find_all_people(annots):
|
||||
if len(annots) == 0:
|
||||
return {}
|
||||
bbox = FloatArray([annot['bbox'] for annot in annots])
|
||||
keypoints = FloatArray([annot['keypoints'] for annot in annots])
|
||||
return {'bbox': bbox, 'keypoints': keypoints}
|
332
myeasymocap/datasets/mv1p.py
Normal file
332
myeasymocap/datasets/mv1p.py
Normal file
@ -0,0 +1,332 @@
|
||||
from easymocap.mytools.camera_utils import read_cameras
|
||||
from easymocap.mytools.debug_utils import log, myerror, mywarn
|
||||
from easymocap.mytools.file_utils import read_json
|
||||
from .basedata import ImageDataBase, read_mv_images, find_best_people, find_all_people
|
||||
import os
|
||||
from os.path import join
|
||||
import numpy as np
|
||||
import cv2
|
||||
from collections import defaultdict
|
||||
|
||||
panoptic15_in_body15 = [1,0,8,5,6,7,12,13,14,2,3,4,9,10,11]
|
||||
|
||||
def convert_body15_panoptic15(keypoints):
|
||||
k3d_panoptic15 = keypoints[..., panoptic15_in_body15,: ]
|
||||
return k3d_panoptic15
|
||||
|
||||
def convert_panoptic15_body15(keypoints):
|
||||
keypoints_b15 = np.zeros_like(keypoints)
|
||||
keypoints_b15[..., panoptic15_in_body15, :] = keypoints
|
||||
return keypoints_b15
|
||||
|
||||
def padding_and_stack(datas):
|
||||
shapes = {}
|
||||
for data in datas:
|
||||
if len(data) == 0:
|
||||
continue
|
||||
for key, value in data.items():
|
||||
if key not in shapes.keys():
|
||||
shapes[key] = value.shape
|
||||
collect = {key: np.zeros((len(datas), *shapes[key])) for key in shapes.keys()}
|
||||
for i, data in enumerate(datas):
|
||||
for key, value in data.items():
|
||||
collect[key][i] = value
|
||||
return collect
|
||||
|
||||
def padding_empty(datas):
|
||||
shapes = {}
|
||||
for data in datas:
|
||||
if len(data) == 0:
|
||||
continue
|
||||
for key, value in data.items():
|
||||
if key not in shapes.keys():
|
||||
shapes[key] = value.shape[1:]
|
||||
collect = {key: [None for data in datas] for key in shapes.keys()}
|
||||
for i, data in enumerate(datas):
|
||||
for key, shape in shapes.items():
|
||||
if key not in data.keys():
|
||||
print('[Dataset] padding empty view {} of {}'.format(i, key))
|
||||
collect[key][i] = np.zeros((0, *shape), dtype=np.float32)
|
||||
else:
|
||||
collect[key][i] = data[key]
|
||||
return collect
|
||||
|
||||
def parse_frames(pafs_frame, H, W):
|
||||
# 解析单帧的
|
||||
res = {
|
||||
'joints': [],
|
||||
'pafs': {}
|
||||
}
|
||||
joints = pafs_frame[1:1+3*25]
|
||||
for i in range(25):
|
||||
value = np.fromstring(joints[3*i+2], sep=' ').reshape(3, -1).T
|
||||
value[:, 0] = value[:, 0] * W
|
||||
value[:, 1] = value[:, 1] * H
|
||||
res['joints'].append(value.astype(np.float32))
|
||||
# parse pafs
|
||||
pafs = pafs_frame[1+3*25+1:]
|
||||
for npart in range(26):
|
||||
label = pafs[3*npart+0].split(' ')[2:]
|
||||
label = (int(label[0]), int(label[1]))
|
||||
shape = pafs[3*npart+1].split(' ')[2:]
|
||||
w, h = int(shape[0]), int(shape[1])
|
||||
value = np.fromstring(pafs[3*npart+2], sep=' ').reshape(w, h).astype(np.float32)
|
||||
res['pafs'][label] = value
|
||||
return res
|
||||
|
||||
def read_4dassociation(pafs, H, W):
|
||||
outputs = []
|
||||
# 解析paf文件
|
||||
with open(pafs, 'r') as f:
|
||||
pafs = f.readlines()
|
||||
indices = []
|
||||
for i, line in enumerate(pafs):
|
||||
if line.startswith('# newframes:'):
|
||||
indices.append([i])
|
||||
elif line.startswith('# end frames:'):
|
||||
indices[-1].append(i)
|
||||
print('[Read OpenPose] Totally {} frames'.format(len(indices)))
|
||||
for (start, end) in indices:
|
||||
pafs_frame = pafs[start+1:end]
|
||||
pafs_frame = list(map(lambda x:x.strip(), pafs_frame))
|
||||
frames = parse_frames(pafs_frame, H, W)
|
||||
outputs.append(frames)
|
||||
return outputs
|
||||
|
||||
class MVDataset(ImageDataBase):
|
||||
def __init__(self, root, subs, subs_vis, ranges, read_image=False, reader={}, filter={}) -> None:
|
||||
super().__init__(root, subs, ranges, read_image)
|
||||
self.subs_vis = subs_vis
|
||||
self.length = 0
|
||||
for key, value in reader.items():
|
||||
if key == 'images':
|
||||
self.try_to_extract_images(root, value)
|
||||
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
|
||||
self.length = len(data)
|
||||
elif key == 'image_shape':
|
||||
imgnames = self.infos['images'][0]
|
||||
shapes = []
|
||||
for imgname in imgnames:
|
||||
img = cv2.imread(imgname)
|
||||
height, width, _ = img.shape
|
||||
log('[{}] sub {} shape {}'.format(self.__class__.__name__, imgname, img.shape))
|
||||
shapes.append([height, width])
|
||||
data = [shapes]
|
||||
meta = {}
|
||||
elif key == 'annots':
|
||||
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
|
||||
if self.length > 0:
|
||||
if self.length != len(data):
|
||||
myerror('annots length {} not equal to images length {}.'.format(len(data), self.length))
|
||||
data = data[:self.length]
|
||||
else:
|
||||
self.length = len(data)
|
||||
elif key == 'openpose':
|
||||
# 读取open pose
|
||||
if len(subs) == 0:
|
||||
pafs = sorted(os.listdir(join(root, value['root'])))
|
||||
else:
|
||||
pafs = [f'{sub}.txt' for sub in subs]
|
||||
results = []
|
||||
for nv, paf in enumerate(pafs):
|
||||
pafname = join(root, value['root'], paf)
|
||||
infos = read_4dassociation(pafname, H=self.infos['image_shape'][0][nv][0], W=self.infos['image_shape'][0][nv][1])
|
||||
results.append(infos)
|
||||
data = [[d[i] for d in results] for i in range(self.length)]
|
||||
meta = {}
|
||||
elif key == 'cameras':
|
||||
if 'with_sub' in value.keys():
|
||||
raise NotImplementedError
|
||||
else:
|
||||
cameras = read_cameras(os.path.join(root, value['root']))
|
||||
if 'remove_k3' in value.keys():
|
||||
for cam, camera in cameras.items():
|
||||
camera['dist'][:, 4] = 0.
|
||||
data = [cameras]
|
||||
meta = {}
|
||||
elif key in ['pelvis']:
|
||||
continue
|
||||
elif key == 'keypoints3d':
|
||||
k3droot = value['root']
|
||||
filenames = sorted(os.listdir(k3droot))[:self.length]
|
||||
res_key = value.get('key', 'pred')
|
||||
data = []
|
||||
for filename in filenames:
|
||||
results = read_json(join(k3droot, filename))
|
||||
if 'pids' not in results.keys():
|
||||
# 擅自补全
|
||||
results['pids'] = list(range(len(results[res_key])))
|
||||
data.append({
|
||||
'pids': results['pids'],
|
||||
'keypoints3d': np.array(results[res_key], dtype=np.float32)
|
||||
})
|
||||
if data[-1]['keypoints3d'].shape[-1] == 3:
|
||||
mywarn('The input keypoints dont have confidence')
|
||||
data[-1]['keypoints3d'] = np.concatenate([data[-1]['keypoints3d'], np.ones_like(data[-1]['keypoints3d'][..., :1])], axis=-1)
|
||||
if 'conversion' in value.keys():
|
||||
if value['conversion'] == 'panoptic15_to_body15':
|
||||
data[-1]['keypoints3d'] = convert_panoptic15_body15(data[-1]['keypoints3d'])
|
||||
else:
|
||||
raise ValueError(f'Unknown reader: {key}')
|
||||
self.infos[key] = data
|
||||
self.meta.update(meta)
|
||||
self.reader = reader
|
||||
self.filter = filter
|
||||
if len(self.subs) == 0:
|
||||
self.subs = self.meta['subs']
|
||||
self.check_frames_length()
|
||||
|
||||
@staticmethod
|
||||
def read_annots(annotnames):
|
||||
val = []
|
||||
for annname in annotnames:
|
||||
annots = read_json(annname)['annots']
|
||||
# select the best people
|
||||
annots = find_best_people(annots)
|
||||
val.append(annots)
|
||||
val = padding_and_stack(val)
|
||||
return val
|
||||
|
||||
def filter_openpose(self, candidates, pafs):
|
||||
for nv, candview in enumerate(candidates):
|
||||
H=self.infos['image_shape'][0][nv][0]
|
||||
W=self.infos['image_shape'][0][nv][1]
|
||||
for cand in candview:
|
||||
if 'border' in self.filter.keys():
|
||||
border = self.filter['border'] * max(H, W)
|
||||
flag = (cand[:, 0] > border) & (cand[:, 0] < W - border) & (cand[:, 1] > border) & (cand[:, 1] < H - border)
|
||||
cand[~flag] = 0
|
||||
return candidates, pafs
|
||||
|
||||
def __getitem__(self, index):
|
||||
frame = self.frames[index]
|
||||
ret = {}
|
||||
for key, value in self.infos.items():
|
||||
if len(value) == 1:
|
||||
ret[key] = value[0]
|
||||
elif frame >= len(value):
|
||||
myerror(f'[{self.__class__.__name__}] {key}: index {frame} out of range {len(value)}')
|
||||
else:
|
||||
ret[key] = value[frame]
|
||||
ret_list = defaultdict(list)
|
||||
for key, val in ret.items():
|
||||
if key == 'annots':
|
||||
ret_list[key] = self.read_annots(val)
|
||||
elif key == 'cameras':
|
||||
for sub in self.subs:
|
||||
select = {k: val[sub][k] for k in ['K', 'R', 'T', 'dist', 'P']}
|
||||
ret_list[key].append(select)
|
||||
ret_list[key] = padding_and_stack(ret_list[key])
|
||||
elif key == 'images':
|
||||
if self.flag_read_image:
|
||||
for i, sub in enumerate(self.subs):
|
||||
imgname = val[i]
|
||||
if sub in self.subs_vis or self.subs_vis == 'all':
|
||||
img = self.read_image(imgname)
|
||||
else:
|
||||
img = imgname
|
||||
ret_list[key].append(img)
|
||||
ret_list['imgnames'].append(imgname)
|
||||
else:
|
||||
ret_list[key] = val
|
||||
ret_list['imgnames'] = val
|
||||
elif key == 'openpose':
|
||||
ret_list[key] = [v['joints'] for v in val]
|
||||
# 同时返回PAF
|
||||
ret_list[key+'_paf'] = [v['pafs'] for v in val]
|
||||
# check一下PAF:
|
||||
for nv in range(len(ret_list[key])):
|
||||
ret_list[key+'_paf'][nv][(8, 1)] = ret_list[key+'_paf'][nv].pop((1, 8)).T
|
||||
ret_list[key], ret_list[key+'_paf'] = self.filter_openpose(ret_list[key], ret_list[key+'_paf'])
|
||||
elif key == 'keypoints3d':
|
||||
ret_list['keypoints3d'] = val['keypoints3d']
|
||||
if 'pids' in val.keys():
|
||||
ret_list['pids'] = val['pids']
|
||||
else:
|
||||
ret_list['pids'] = list(range(len(val['keypoints3d'])))
|
||||
elif key in ['image_shape']:
|
||||
pass
|
||||
else:
|
||||
print('[Dataset] Unknown key: {}'.format(key))
|
||||
ret_list.update(ret_list.pop('annots', {}))
|
||||
for key, val in self.reader.items():
|
||||
if key == 'pelvis' and 'annots' in self.reader.keys(): # load pelvis from annots.keypoints
|
||||
ret_list[key] = [d[:, val.root_id] for d in ret_list['keypoints']]
|
||||
elif key == 'pelvis' and 'openpose' in self.reader.keys():
|
||||
ret_list[key] = [d[val.root_id] for d in ret_list['openpose']]
|
||||
ret_list['meta'] = {
|
||||
'subs': self.subs,
|
||||
'index': index,
|
||||
'frame': frame,
|
||||
'image_shape': ret['image_shape'],
|
||||
'imgnames': ret_list['imgnames'],
|
||||
}
|
||||
return ret_list
|
||||
|
||||
def check(self, index):
|
||||
raise NotImplementedError
|
||||
|
||||
class MVMP(MVDataset):
|
||||
def read_annots(self, annotnames):
|
||||
val = []
|
||||
for annname in annotnames:
|
||||
annots = read_json(annname)['annots']
|
||||
# 在这里进行filter,去掉不需要的2D
|
||||
annots_valid = []
|
||||
for annot in annots:
|
||||
flag = True
|
||||
if 'bbox_size' in self.filter.keys():
|
||||
bbox_size = self.filter['bbox_size']
|
||||
bbox = annot['bbox']
|
||||
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
|
||||
if area < bbox_size:
|
||||
flag = False
|
||||
if flag:
|
||||
annots_valid.append(annot)
|
||||
annots = annots_valid
|
||||
# select the best people
|
||||
annots = find_all_people(annots)
|
||||
val.append(annots)
|
||||
val = padding_empty(val)
|
||||
return val
|
||||
|
||||
def check(self, index):
|
||||
data = self.__getitem__(index)
|
||||
from easymocap.mytools.vis_base import plot_bbox, merge, plot_keypoints_auto
|
||||
# check the subs vis
|
||||
vis = []
|
||||
for nv, sub in enumerate(self.subs):
|
||||
if sub not in self.subs_vis:continue
|
||||
img = data['images'][nv].copy()
|
||||
bbox = data['bbox'][nv]
|
||||
kpts = data['keypoints'][nv]
|
||||
for i in range(bbox.shape[0]):
|
||||
plot_bbox(img, bbox[i], pid=i)
|
||||
plot_keypoints_auto(img, kpts[i], pid=i, use_limb_color=False)
|
||||
vis.append(img)
|
||||
vis = merge(vis)
|
||||
cv2.imwrite('debug/{}_{:06d}.jpg'.format(self.__class__.__name__, index), vis)
|
||||
|
||||
if __name__ == '__main__':
|
||||
config = '''
|
||||
args:
|
||||
root: /nas/ZJUMoCap/Part0/313
|
||||
subs: []
|
||||
subs_vis: ['01', '07', '13', '19']
|
||||
ranges: [0, 100, 1]
|
||||
read_image: False
|
||||
reader:
|
||||
images:
|
||||
root: images
|
||||
ext: .jpg
|
||||
annots:
|
||||
root: annots
|
||||
ext: .json
|
||||
cameras: # 兼容所有帧的相机参数不同的情况
|
||||
root: ''
|
||||
'''
|
||||
import yaml
|
||||
config = yaml.load(config, Loader=yaml.FullLoader)
|
||||
dataset = MVDataset(**config['args'])
|
||||
for i in range(len(dataset)):
|
||||
data = dataset[i]
|
136
myeasymocap/datasets/sv1p.py
Normal file
136
myeasymocap/datasets/sv1p.py
Normal file
@ -0,0 +1,136 @@
|
||||
from .basedata import ImageDataBase, read_mv_images, find_best_people
|
||||
from easymocap.mytools.debug_utils import log, myerror, mywarn
|
||||
from easymocap.mytools.camera_utils import read_cameras
|
||||
from easymocap.mytools.file_utils import read_json
|
||||
import os
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
class SVDataset(ImageDataBase):
|
||||
'''
|
||||
这个数据只用来返回单段的视频数据,不用来返回多段的视频数据
|
||||
'''
|
||||
def __init__(self, root, subs, ranges, read_image=False, reader={}) -> None:
|
||||
super().__init__(root, subs, ranges, read_image)
|
||||
assert len(subs) == 1, 'SVDataset only support one subject'
|
||||
for key, value in reader.items():
|
||||
if key == 'images':
|
||||
self.try_to_extract_images(root, value)
|
||||
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
|
||||
data = [d[0] for d in data]
|
||||
self.length = len(data)
|
||||
elif key == 'image_shape':
|
||||
imgname = self.infos['images'][0]
|
||||
shapes = []
|
||||
assert os.path.exists(imgname), "image {} not exists".format(imgname)
|
||||
img = cv2.imread(imgname)
|
||||
assert img is not None, "image {} read failed".format(imgname)
|
||||
height, width, _ = img.shape
|
||||
log('[{}] sub {} shape {}'.format(self.__class__.__name__, imgname, img.shape))
|
||||
shapes.append([height, width])
|
||||
data = shapes
|
||||
elif key == 'annots':
|
||||
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
|
||||
data = [d[0] for d in data]
|
||||
if self.length > 0:
|
||||
assert self.length == len(data), \
|
||||
myerror('annots length {} not equal to images length {}.'.format(len(data), self.length))
|
||||
else:
|
||||
self.length = len(data)
|
||||
elif key == 'cameras':
|
||||
myerror('暂时没有实现相机参数')
|
||||
raise NotImplementedError
|
||||
else:
|
||||
raise ValueError(f'Unknown reader: {key}')
|
||||
self.infos[key] = data
|
||||
self.meta.update(meta)
|
||||
# check cameras:
|
||||
if 'cameras' not in self.infos:
|
||||
mywarn('[{}] No camera info, use default camera'.format(self.__class__.__name__))
|
||||
imgname0 = self.infos['images'][0]
|
||||
img = self.read_image(imgname0)
|
||||
height, width = img.shape[:2]
|
||||
log('[{}] Read shape {} from image {}'.format(self.__class__.__name__, img.shape, imgname0))
|
||||
focal = 1.2*min(height, width) # as colmap
|
||||
log('[{}] Set a fix focal length {}'.format(self.__class__.__name__, focal))
|
||||
K = np.array([focal, 0., width/2, 0., focal, height/2, 0. ,0., 1.]).reshape(3, 3)
|
||||
camera = {'K':K ,'R': np.eye(3), 'T': np.zeros((3, 1)), 'dist': np.zeros((1, 5))}
|
||||
for key, val in camera.items():
|
||||
camera[key] = val.astype(np.float32)
|
||||
self.infos['cameras'] = [camera]
|
||||
self.check_frames_length()
|
||||
self.find_best_people = find_best_people
|
||||
|
||||
def __getitem__(self, index):
|
||||
frame = self.frames[index]
|
||||
ret = {}
|
||||
for key, value in self.infos.items():
|
||||
if len(value) == 1:
|
||||
ret[key] = value[0]
|
||||
elif index >= len(value):
|
||||
myerror(f'[{self.__class__.__name__}] {key}: index {frame} out of range {len(value)}')
|
||||
else:
|
||||
ret[key] = value[frame]
|
||||
ret_new = {}
|
||||
for key, val in ret.items():
|
||||
if key == 'annots':
|
||||
annots = read_json(val)['annots']
|
||||
# select the best people
|
||||
annots = self.find_best_people(annots)
|
||||
ret_new.update(annots)
|
||||
elif key == 'cameras':
|
||||
ret_new[key] = val
|
||||
elif key == 'images':
|
||||
ret_new['imgnames'] = val
|
||||
if self.flag_read_image:
|
||||
img = self.read_image(val)
|
||||
ret_new[key] = img
|
||||
else:
|
||||
ret_new[key] = val
|
||||
elif key == 'image_shape':
|
||||
ret_new['image_shape'] = val
|
||||
ret_new['meta'] = {
|
||||
'subs': self.subs,
|
||||
'index': index,
|
||||
'frame': self.frames[index],
|
||||
'image_shape': ret_new['image_shape'],
|
||||
'imgnames': ret_new['imgnames'],
|
||||
}
|
||||
return ret_new
|
||||
|
||||
class SVHandL(SVDataset):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.find_best_people = self._find_best_hand
|
||||
|
||||
def _find_best_hand(self, annots):
|
||||
assert len(annots) == 1, 'SVHandL only support one person'
|
||||
annot = annots[0]
|
||||
ret = {
|
||||
'bbox': np.array(annot['bbox_handl2d'], dtype=np.float32),
|
||||
'keypoints': np.array(annot['handl2d'], dtype=np.float32),
|
||||
}
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
cfg = '''
|
||||
module: myeasymocap.datasets.1v1p.MonoDataset
|
||||
args:
|
||||
root: /nas/home/shuaiqing/EasyMocapDoc/demo/1v1p
|
||||
subs: ['0+000553+000965']
|
||||
ranges: [0, 99999, 1]
|
||||
read_image: True
|
||||
reader:
|
||||
images:
|
||||
root: images
|
||||
ext: .jpg
|
||||
annots:
|
||||
root: annots
|
||||
ext: .json
|
||||
'''
|
||||
import yaml
|
||||
cfg = yaml.load(cfg, Loader=yaml.FullLoader)
|
||||
dataset = SVDataset(**cfg['args'])
|
||||
print(dataset)
|
||||
for i in range(len(dataset)):
|
||||
data = dataset[i]
|
123
myeasymocap/io/model.py
Normal file
123
myeasymocap/io/model.py
Normal file
@ -0,0 +1,123 @@
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from easymocap.bodymodel.smpl import SMPLModel
|
||||
|
||||
from easymocap.mytools.debug_utils import log
|
||||
|
||||
def try_to_download_SMPL(model_dir):
|
||||
cmd = 'wget https://www.dropbox.com/s/aeulffqzb3zmh8x/pare-github-data.zip'
|
||||
os.system(cmd)
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
cmd = 'unzip pare-github-data.zip -d {}'.format(model_dir)
|
||||
print('[RUN] {}'.format(cmd))
|
||||
os.system(cmd)
|
||||
|
||||
class SMPLLoader:
|
||||
def __init__(self, model_path, regressor_path, return_keypoints=True):
|
||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||
if not os.path.exists(model_path):
|
||||
log('[SMPL] Model not found in `{}`'.format(model_path))
|
||||
log('[SMPL] Downloading model to `{}`'.format(model_path))
|
||||
try_to_download_SMPL('models/pare')
|
||||
assert os.path.exists(model_path), f'{model_path} not exists'
|
||||
if not os.path.exists(regressor_path):
|
||||
if regressor_path.endswith('J_regressor_body25.npy'):
|
||||
url = 'https://github.com/zju3dv/EasyMocap/raw/master/data/smplx/J_regressor_body25.npy'
|
||||
os.makedirs(os.path.dirname(regressor_path), exist_ok=True)
|
||||
cmd = 'wget {} -O {}'.format(url, regressor_path)
|
||||
os.system(cmd)
|
||||
assert os.path.exists(regressor_path), f'{regressor_path} not exists'
|
||||
log('[SMPL] Loading model in `{}`'.format(model_path))
|
||||
log('[SMPL] Using keypoints regressor `{}`'.format(regressor_path))
|
||||
smplmodel = SMPLModel(model_path=model_path,
|
||||
model_type='smpl', device=device,
|
||||
regressor_path=regressor_path,
|
||||
NUM_SHAPES=10,
|
||||
)
|
||||
self.smplmodel = smplmodel
|
||||
self.return_keypoints = return_keypoints
|
||||
|
||||
def __call__(self,):
|
||||
return {
|
||||
'body_model': self.smplmodel,
|
||||
'model': self.forward}
|
||||
|
||||
def forward(self, params):
|
||||
keypoints = self.smplmodel.keypoints(params, return_tensor=True)
|
||||
ret = {
|
||||
'keypoints': keypoints
|
||||
}
|
||||
ret.update(params)
|
||||
return ret
|
||||
|
||||
class MANOLoader:
|
||||
def __init__(self, cfg_path, model_path, regressor_path, num_pca_comps=45, use_pca=False, use_flat_mean=False):
|
||||
log('[MANO] Loading model in `{}`'.format(model_path))
|
||||
log('[MANO] Using keypoints regressor `{}`'.format(regressor_path))
|
||||
assert os.path.exists(model_path), f'{model_path} not exists, Please download it from `mano.is.tue.mpg.de`'
|
||||
if not os.path.exists(regressor_path) and regressor_path.endswith('J_regressor_mano_LEFT.txt'):
|
||||
url = 'https://raw.githubusercontent.com/zju3dv/EasyMocap/master/data/smplx/J_regressor_mano_LEFT.txt'
|
||||
os.makedirs(os.path.dirname(regressor_path), exist_ok=True)
|
||||
cmd = 'wget {} -O {}'.format(url, regressor_path)
|
||||
os.system(cmd)
|
||||
assert os.path.exists(regressor_path), f'{regressor_path} not exists'
|
||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||
from easymocap.config import Config, load_object
|
||||
cfg_data = Config.load(cfg_path)
|
||||
cfg_data['args']['model_path'] = model_path
|
||||
cfg_data['args']['regressor_path'] = regressor_path
|
||||
cfg_data['args']['cfg_hand']['num_pca_comps'] = num_pca_comps
|
||||
cfg_data['args']['cfg_hand']['use_pca'] = use_pca
|
||||
cfg_data['args']['cfg_hand']['use_flat_mean'] = use_flat_mean
|
||||
model = load_object(cfg_data.module, cfg_data.args)
|
||||
self.manomodel = model
|
||||
|
||||
def __call__(self,):
|
||||
return {
|
||||
'hand_model': self.manomodel,
|
||||
'model': self.forward}
|
||||
|
||||
def forward(self, params):
|
||||
keypoints = self.manomodel.keypoints(params, return_tensor=True)
|
||||
ret = {
|
||||
'keypoints': keypoints
|
||||
}
|
||||
ret.update(params)
|
||||
return ret
|
||||
|
||||
class MANOLoader_lr:
|
||||
def __init__(self, cfg_path, model_path, regressor_path, num_pca_comps=45, use_pca=False):
|
||||
self.Model_l = MANOLoader(cfg_path, model_path, regressor_path, num_pca_comps, use_pca)
|
||||
self.Model_r = MANOLoader(cfg_path, model_path.replace('LEFT','RIGHT'), regressor_path.replace('LEFT','RIGHT'), num_pca_comps, use_pca)
|
||||
def __call__(self,):
|
||||
ret={}
|
||||
out1 = self.Model_l()
|
||||
for key in out1.keys():
|
||||
ret[key+'_l'] = out1[key]
|
||||
out2 = self.Model_r()
|
||||
for key in out1.keys():
|
||||
ret[key+'_r'] = out2[key]
|
||||
return ret
|
||||
|
||||
class SMPLHLoader:
|
||||
def __init__(self, path):
|
||||
from easymocap.config import Config, load_object
|
||||
cfg_data = Config.load(path)
|
||||
self.model = load_object(cfg_data.module, cfg_data.args)
|
||||
|
||||
def __call__(self,):
|
||||
return {
|
||||
'smplh_model': self.model,
|
||||
'model': self.forward}
|
||||
|
||||
def forward(self, params):
|
||||
keypoints = self.model(**params, return_verts=False, return_tensor=True)
|
||||
ret = {
|
||||
'keypoints': keypoints.clone(),#
|
||||
'keypoints_body': keypoints[...,:25,:].clone(),
|
||||
'keypoints_handlr': keypoints[...,25:,:].clone()
|
||||
|
||||
}
|
||||
ret.update(params)
|
||||
return ret
|
42
myeasymocap/io/video.py
Normal file
42
myeasymocap/io/video.py
Normal file
@ -0,0 +1,42 @@
|
||||
import os
|
||||
import shutil
|
||||
from os.path import join
|
||||
from glob import glob
|
||||
from easymocap.mytools.debug_utils import log, mywarn, myerror, run_cmd
|
||||
|
||||
class MakeVideo:
|
||||
def __init__(self, fps, keep_image, output='tmp') -> None:
|
||||
self.output = output
|
||||
self.fps = fps
|
||||
self.debug = False
|
||||
self.keep_image = keep_image
|
||||
|
||||
def __call__(self):
|
||||
restart = ' -y '
|
||||
fps_in = fps_out = self.fps
|
||||
fps_in = ' -r {}'.format(fps_in)
|
||||
path = self.output
|
||||
ext = '.jpg'
|
||||
cmd = ' -pix_fmt yuv420p -vcodec libx264'
|
||||
cmd += ' -r {}'.format(fps_out)
|
||||
if ext == '.png':
|
||||
cmd += ' -profile:v main'
|
||||
pathlist = sorted(os.listdir(path))
|
||||
pathlist = [join(path, p) for p in pathlist if os.path.isdir(join(path, p))]
|
||||
for path in pathlist:
|
||||
imgnames = glob(join(path, '*{}'.format(ext)))
|
||||
if len(imgnames) == 0:
|
||||
continue
|
||||
shell = f'ffmpeg{restart}{fps_in} -i "{path}/%06d{ext}" -vf scale="2*ceil(iw/2):2*ceil(ih/2)"{cmd} "{path}.mp4"'
|
||||
if not self.debug:
|
||||
shell += ' -loglevel quiet'
|
||||
print(shell)
|
||||
os.system(shell)
|
||||
# 确认一下文件已经生成了
|
||||
if not os.path.exists(path+'.mp4'):
|
||||
mywarn('Video {} is not generated'.format(path+'.mp4'))
|
||||
shell = shell.replace(' -loglevel quiet', '')
|
||||
run_cmd(shell)
|
||||
else:
|
||||
if not self.keep_image:
|
||||
shutil.rmtree(path)
|
260
myeasymocap/io/vis.py
Normal file
260
myeasymocap/io/vis.py
Normal file
@ -0,0 +1,260 @@
|
||||
import os
|
||||
from typing import Any
|
||||
import numpy as np
|
||||
import cv2
|
||||
from os.path import join
|
||||
from easymocap.mytools.vis_base import plot_keypoints_auto, merge, plot_bbox, get_rgb, plot_cross
|
||||
from easymocap.datasets.base import add_logo
|
||||
from easymocap.mytools.camera_utils import Undistort
|
||||
|
||||
def projectPoints(k3d, camera):
|
||||
k3d0 = np.ascontiguousarray(k3d[:, :3])
|
||||
k3d_rt = np.dot(k3d0, camera['R'].T) + camera['T'].T
|
||||
depth = k3d_rt[:, -1:]
|
||||
k2d, _ = cv2.projectPoints(k3d0, camera['R'], camera['T'], camera['K'], camera['dist'])
|
||||
k2d = np.hstack([k2d[:, 0], k3d[:, -1:]])
|
||||
return k2d, depth
|
||||
|
||||
class VisBase:
|
||||
def __init__(self, scale=1, lw_factor=1, name='vis', mode='none', mode_args={}):
|
||||
self.scale = scale
|
||||
self.output = '/tmp'
|
||||
self.name = name
|
||||
self.lw = lw_factor
|
||||
self.count = 0
|
||||
self.mode = mode
|
||||
self.mode_args = mode_args
|
||||
|
||||
def merge_and_write(self, vis):
|
||||
vis = [v for v in vis if not isinstance(v, str)]
|
||||
if self.mode == 'center':
|
||||
for i, v in enumerate(vis):
|
||||
# crop the center region
|
||||
left = int(v.shape[1] - v.shape[0]) // 2
|
||||
v = v[:, left:left+v.shape[0], :]
|
||||
vis[i] = v
|
||||
elif self.mode == 'crop':
|
||||
for i, v in enumerate(vis):
|
||||
t, b, l, r = self.mode_args[i]
|
||||
v = v[t:b, l:r]
|
||||
vis[i] = v
|
||||
if len(vis) == 0:
|
||||
return 0
|
||||
if len(vis) == 3: # 只有3个的时候的merge方案:第一个不变,后面两个缩小了放在右边
|
||||
vis_0 = vis[0]
|
||||
vis_1 = cv2.resize(vis[1], None, fx=0.5, fy=0.5)
|
||||
vis_2 = cv2.resize(vis[2], None, fx=0.5, fy=0.5)
|
||||
vis_12 = np.vstack([vis_1, vis_2])
|
||||
vis = np.hstack([vis_0, vis_12])
|
||||
else:
|
||||
vis = merge(vis)
|
||||
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
|
||||
vis = add_logo(vis)
|
||||
# TODO: 从输入的Meta里面读入图片名字
|
||||
outname = join(self.output, self.name, '{:06d}.jpg'.format(self.count))
|
||||
os.makedirs(os.path.dirname(outname), exist_ok=True)
|
||||
cv2.imwrite(outname, vis)
|
||||
self.count += 1
|
||||
|
||||
class Vis3D(VisBase):
|
||||
def __init__(self, scale, lw_factor=1, name='repro', **kwargs) -> None:
|
||||
super().__init__(scale, lw_factor, name, **kwargs)
|
||||
|
||||
def __call__(self, images, cameras, keypoints3d=None, results=None):
|
||||
# keypoints3d: (nJoints, 4)
|
||||
undist = False
|
||||
cameras['dist'] = np.zeros_like(cameras['dist'])
|
||||
vis_all = []
|
||||
for nv in range(len(images)):
|
||||
if isinstance(images[nv], str): continue
|
||||
camera = {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}
|
||||
if undist:
|
||||
vis = Undistort.image(images[nv], cameras['K'][nv], cameras['dist'][nv])
|
||||
camera['dist'] = np.zeros_like(camera['dist'])
|
||||
else:
|
||||
vis = images[nv].copy()
|
||||
|
||||
if results is None:
|
||||
if len(keypoints3d.shape) == 2:
|
||||
keypoints_repro, depth = projectPoints(keypoints3d, {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']})
|
||||
plot_keypoints_auto(vis, keypoints_repro, pid=0, use_limb_color=False)
|
||||
else:
|
||||
for pid in range(keypoints3d.shape[0]):
|
||||
keypoints_repro, depth = projectPoints(keypoints3d[pid], {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']})
|
||||
plot_keypoints_auto(vis, keypoints_repro, pid=pid, use_limb_color=False)
|
||||
else:
|
||||
for res in results:
|
||||
k3d = res['keypoints3d']
|
||||
keypoints_repro, depth = projectPoints(k3d, camera)
|
||||
if k3d.shape[0] == 1:
|
||||
x, y = keypoints_repro[0,0], keypoints_repro[0,1]
|
||||
# if res['id'] == 6:
|
||||
plot_cross(vis, x, y, col=get_rgb(res['id']), lw=self.lw, width=self.lw * 5)
|
||||
elif k3d.shape[0] == 2: # limb
|
||||
x1, y1 = keypoints_repro[0,0], keypoints_repro[0,1]
|
||||
x2, y2 = keypoints_repro[1,0], keypoints_repro[1,1]
|
||||
cv2.line(vis, (int(x1), int(y1)), (int(x2), int(y2)), get_rgb(res['id']), self.lw)
|
||||
else:
|
||||
plot_keypoints_auto(vis, keypoints_repro, pid=res['id'], use_limb_color=False, lw_factor=self.lw)
|
||||
cv2.putText(vis, '{}'.format(res['id']), (int(keypoints_repro[0,0]), int(keypoints_repro[0,1])),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 2, get_rgb(res['id']), self.lw)
|
||||
vis_all.append(vis)
|
||||
self.merge_and_write(vis_all)
|
||||
|
||||
class VisRoot(VisBase):
|
||||
def __call__(self, images, pelvis):
|
||||
vis = []
|
||||
for nv in range(len(images)):
|
||||
if isinstance(images[nv], str): continue
|
||||
v = images[nv].copy()
|
||||
for i in range(pelvis[nv].shape[0]):
|
||||
color = get_rgb(i)
|
||||
x, y = pelvis[nv][i][0], pelvis[nv][i][1]
|
||||
x, y = int(x), int(y)
|
||||
plot_cross(v, x, y , col=color, lw=self.lw, width=self.lw * 10)
|
||||
cv2.putText(v, '{}'.format(i), (int(x), int(y)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 2, color, self.lw)
|
||||
vis.append(v)
|
||||
self.merge_and_write(vis)
|
||||
|
||||
class VisPAF(VisBase):
|
||||
def __call__(self, images, openpose, openpose_paf):
|
||||
# openpose [nViews, nJoints, 3]
|
||||
# openpose_paf [nViews, dict, MxN]
|
||||
vis_limb = [(8, 1)]
|
||||
vis = []
|
||||
nViews = len(images)
|
||||
for nv in range(nViews):
|
||||
if isinstance(images[nv], str): continue
|
||||
v = images[nv].copy()
|
||||
k2d = openpose[nv]
|
||||
paf = openpose_paf[nv]
|
||||
for (src, dst) in vis_limb:
|
||||
# (M, N)
|
||||
paf_ = paf[(src, dst)]
|
||||
for i in range(paf_.shape[0]):
|
||||
for j in range(paf_.shape[1]):
|
||||
if paf_[i, j] < 0.1:
|
||||
continue
|
||||
x1, y1 = k2d[src][i, :2]
|
||||
x2, y2 = k2d[dst][j, :2]
|
||||
lw = int(paf_[i, j] * 10)
|
||||
cv2.line(v, (int(x1), int(y1)), (int(x2), int(y2)), get_rgb(src), lw)
|
||||
vis.append(v)
|
||||
self.merge_and_write(vis)
|
||||
|
||||
|
||||
class VisBirdEye(VisBase):
|
||||
def __init__(self, xranges, yranges, resolution=1024, name='bird', **kwargs):
|
||||
super().__init__(name=name, **kwargs)
|
||||
self.xranges = xranges
|
||||
self.yranges = yranges
|
||||
self.resolution = resolution
|
||||
self.blank = np.zeros((resolution, resolution, 3), dtype=np.uint8) + 255
|
||||
x0, y0 = self.map_x_y(0, 0)
|
||||
cv2.line(self.blank, (x0, 0), (x0, resolution), (0, 0, 0), 1)
|
||||
cv2.line(self.blank, (0, y0), (resolution, y0), (0, 0, 0), 1)
|
||||
|
||||
def map_x_y(self, x, y):
|
||||
x = (x - self.xranges[0]) / (self.xranges[1] - self.xranges[0]) * self.resolution
|
||||
y = (y - self.yranges[0]) / (self.yranges[1] - self.yranges[0]) * self.resolution
|
||||
y = self.resolution - y
|
||||
x, y = int(x), int(y)
|
||||
return x, y
|
||||
|
||||
def __call__(self, results, cameras):
|
||||
vis = self.blank.copy()
|
||||
R = cameras['R']
|
||||
T = cameras['T']
|
||||
# 这里要兼容将来的相机运动的情况,所以不能预先可视化好
|
||||
center = - np.einsum('bmn,bnj->bmj', R.swapaxes(1, 2), T)
|
||||
for nv in range(center.shape[0]):
|
||||
x, y = center[nv, 0], center[nv, 1]
|
||||
x, y = self.map_x_y(x, y)
|
||||
plot_cross(vis, x, y, col=(0,0,255), lw=self.lw, width=20)
|
||||
cv2.putText(vis, 'cam{}'.format(nv), (int(x), int(y)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), self.lw//4)
|
||||
for res in results:
|
||||
pid = res['id']
|
||||
color = get_rgb(pid)
|
||||
x, y, z = res['pelvis'][0, 0], res['pelvis'][0, 1], res['pelvis'][0, 2]
|
||||
length = 0.5 * (np.clip(z - 1., 0, 1) + 1)
|
||||
length = int(length/(self.xranges[1] - self.xranges[0]) * self.resolution)
|
||||
x, y = self.map_x_y(x, y)
|
||||
plot_cross(vis, x, y, col=color, lw=self.lw, width=self.lw * 5)
|
||||
cv2.rectangle(vis, (x - length, y - length), (x + length, y + length), color, self.lw)
|
||||
cv2.putText(vis, '{}'.format(pid), (int(x), int(y)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 2, color, self.lw)
|
||||
self.merge_and_write([vis])
|
||||
|
||||
|
||||
class VisMatch(VisBase):
|
||||
def __call__(self, images, pelvis, results):
|
||||
vis = []
|
||||
for nv in range(len(images)):
|
||||
if isinstance(images[nv], str):
|
||||
vis.append(images[nv])
|
||||
continue
|
||||
else:
|
||||
vis.append(images[nv].copy())
|
||||
for res in results:
|
||||
pid = res['id']
|
||||
for nv, ind in zip(res['views'], res['indices']):
|
||||
v = vis[nv]
|
||||
if isinstance(v, str): continue
|
||||
x, y = pelvis[nv][ind][0], pelvis[nv][ind][1]
|
||||
plot_cross(v, pelvis[nv][ind][0], pelvis[nv][ind][1], col=get_rgb(pid), lw=self.lw, width=self.lw * 5)
|
||||
cv2.putText(v, '{}'.format(pid), (int(x), int(y)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 2, get_rgb(pid), self.lw)
|
||||
self.merge_and_write(vis)
|
||||
|
||||
class Vis_det(VisBase):
|
||||
def __call__(self, images, **kwargs):
|
||||
vis = []
|
||||
for nv in range(len(images)):
|
||||
if isinstance(images[nv], str):
|
||||
vis.append(images[nv])
|
||||
continue
|
||||
else:
|
||||
v = images[nv].copy()
|
||||
for key, bbox in kwargs.items():
|
||||
_bbox = bbox[nv]
|
||||
for idet in range(_bbox.shape[0]):
|
||||
plot_bbox(v, _bbox[idet], idet)
|
||||
vis.append(v)
|
||||
self.merge_and_write(vis)
|
||||
|
||||
class Vis2D(VisBase):
|
||||
def __call__(self, images, **kwargs):
|
||||
if 'keypoints' in kwargs:
|
||||
keypoints = kwargs['keypoints']
|
||||
else:
|
||||
if len(kwargs.keys()) == 1:
|
||||
keypoints = list(kwargs.values())[0]
|
||||
else:
|
||||
raise NotImplementedError
|
||||
if 'bbox' in kwargs:
|
||||
bbox = kwargs['bbox']
|
||||
else:
|
||||
bbox = None
|
||||
if not isinstance(images, list):
|
||||
images = [images]
|
||||
keypoints = [keypoints]
|
||||
bbox = [bbox]
|
||||
vis = []
|
||||
for nv in range(len(images)):
|
||||
if isinstance(images[nv], str): continue
|
||||
k2d = keypoints[nv]
|
||||
vis_ = images[nv].copy()
|
||||
if len(k2d.shape) == 2:
|
||||
plot_keypoints_auto(vis_, k2d, pid=0, use_limb_color=False)
|
||||
if bbox is not None:
|
||||
if len(bbox[nv].shape) == 2:
|
||||
plot_bbox(vis_, bbox[nv][0], 0)
|
||||
else:
|
||||
plot_bbox(vis_, bbox[nv], 0)
|
||||
else:
|
||||
for pid in range(k2d.shape[0]):
|
||||
plot_keypoints_auto(vis_, k2d[pid], pid=pid, use_limb_color=False)
|
||||
vis.append(vis_)
|
||||
self.merge_and_write(vis)
|
389
myeasymocap/io/vis3d.py
Normal file
389
myeasymocap/io/vis3d.py
Normal file
@ -0,0 +1,389 @@
|
||||
from tqdm import tqdm
|
||||
import cv2
|
||||
import os
|
||||
from easymocap.visualize.pyrender_wrapper import plot_meshes
|
||||
from os.path import join
|
||||
import numpy as np
|
||||
from easymocap.datasets.base import add_logo
|
||||
from easymocap.mytools.vis_base import merge, plot_bbox
|
||||
from .vis import VisBase
|
||||
|
||||
class Render(VisBase):
|
||||
def __init__(self, name='render', scale=0.5, backend='pyrender', **kwargs) -> None:
|
||||
super().__init__(name=name, scale=1., **kwargs)
|
||||
self.scale3d = scale
|
||||
|
||||
def __call__(self, body_model, params, cameras, imgnames):
|
||||
vertices = body_model.vertices(params, return_tensor=False)
|
||||
faces = body_model.faces
|
||||
for nf, img in enumerate(tqdm(imgnames, desc=self.name)):
|
||||
basename = os.path.basename(img)
|
||||
# 重新读入图片
|
||||
assert os.path.exists(img), img
|
||||
vis = cv2.imread(img)
|
||||
vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d)
|
||||
vert = vertices[nf]
|
||||
meshes = {}
|
||||
meshes[0] = {
|
||||
'vertices': vert,
|
||||
'faces': faces,
|
||||
'id': 0,
|
||||
'name': 'human_{}'.format(0)
|
||||
}
|
||||
K = cameras['K'][nf].copy()
|
||||
K[:2, :] *= self.scale3d
|
||||
R = cameras['R'][nf]
|
||||
T = cameras['T'][nf]
|
||||
ret = plot_meshes(vis, meshes, K, R, T, mode='image')
|
||||
self.merge_and_write([ret])
|
||||
|
||||
class Render_multiview(VisBase):
|
||||
def __init__(self, view_list=[], name='render', model_name='body_model', render_mode='image', backend='pyrender', shape=[-1,-1], scale=1., **kwargs):
|
||||
self.scale3d = scale
|
||||
super().__init__(name=name, scale=1., **kwargs)
|
||||
self.view_list = view_list
|
||||
self.render_mode = render_mode
|
||||
self.model_name = model_name
|
||||
self.shape = shape
|
||||
|
||||
def render_(self, vertices, faces, cameras, imgnames):
|
||||
for nf, img in enumerate(tqdm(imgnames, desc=self.name)):
|
||||
mv_ret = []
|
||||
if not isinstance(img, list):
|
||||
img = [img]
|
||||
for nv in self.view_list:
|
||||
basename = os.path.basename(img[nv])
|
||||
assert os.path.exists(img[nv]), img[nv]
|
||||
vis = cv2.imread(img[nv])
|
||||
vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d)
|
||||
vert = vertices[nf]
|
||||
meshes = {}
|
||||
if vert.ndim == 2:
|
||||
meshes[0] = {
|
||||
'vertices': vert,
|
||||
'faces': faces,
|
||||
'id': 0,
|
||||
'name': 'human_{}'.format(0)
|
||||
}
|
||||
elif vert.ndim == 3:
|
||||
for pid in range(vert.shape[0]):
|
||||
meshes[pid] = {
|
||||
'vertices': vert[pid],
|
||||
'faces': faces,
|
||||
'id': pid,
|
||||
'name': 'human_{}'.format(pid)
|
||||
}
|
||||
if cameras['K'].ndim == 4:
|
||||
K = cameras['K'][nf][nv].copy()
|
||||
K[:2, :] *= self.scale
|
||||
R = cameras['R'][nf][nv]
|
||||
T = cameras['T'][nf][nv]
|
||||
else:
|
||||
K = cameras['K'][nv].copy()
|
||||
K[:2, :] *= self.scale3d
|
||||
R = cameras['R'][nv]
|
||||
T = cameras['T'][nv]
|
||||
# add ground
|
||||
if self.render_mode == 'ground':
|
||||
from easymocap.visualize.geometry import create_ground
|
||||
ground = create_ground(
|
||||
center=[0, 0, -0.05], xdir=[1, 0, 0], ydir=[0, 1, 0], # 位置
|
||||
step=1, xrange=10, yrange=10, # 尺寸
|
||||
white=[1., 1., 1.], black=[0.5,0.5,0.5], # 颜色
|
||||
two_sides=True
|
||||
)
|
||||
meshes[1001] = ground
|
||||
vis = np.zeros((self.shape[0], self.shape[1], 3), dtype=np.uint8) + 255
|
||||
focal = min(self.shape) * 1.2
|
||||
K = np.array([
|
||||
[focal,0,vis.shape[0]/2],
|
||||
[0,focal,vis.shape[1]/2],
|
||||
[0,0,1]])
|
||||
ret = plot_meshes(vis, meshes, K, R, T, mode='rgb')
|
||||
else:
|
||||
ret = plot_meshes(vis, meshes, K, R, T, mode=self.render_mode)
|
||||
ret = add_logo(ret)
|
||||
mv_ret.append(ret)
|
||||
self.merge_and_write(mv_ret)
|
||||
|
||||
def __call__(self, params, cameras, imgnames, **kwargs):
|
||||
body_model = kwargs[self.model_name]
|
||||
vertices = body_model.vertices(params, return_tensor=False)
|
||||
faces = body_model.faces
|
||||
self.render_(vertices, faces, cameras, imgnames)
|
||||
|
||||
class Render_nocam:
|
||||
def __init__(self, scale=0.5, backend='pyrender',view_list=[0]) -> None:
|
||||
self.name = 'render'
|
||||
self.scale = scale
|
||||
self.view_list = view_list
|
||||
|
||||
def __call__(self, hand_model, params, images):
|
||||
|
||||
vertices = hand_model(**params, return_verts=True, return_tensor=False)
|
||||
faces = hand_model.faces
|
||||
for nf, img in enumerate(tqdm(images, desc=self.name)):
|
||||
for nv in self.view_list:
|
||||
if isinstance(img, np.ndarray):
|
||||
vis = img.copy()
|
||||
basename = '{:06}.jpg'.format(nf)
|
||||
else:
|
||||
basename = os.path.basename(img[nv])
|
||||
# 重新读入图片
|
||||
assert os.path.exists(img[nv]), img[nv]
|
||||
vis = cv2.imread(img[nv])
|
||||
|
||||
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
|
||||
vert = vertices[nf]
|
||||
meshes = {}
|
||||
meshes[0] = {
|
||||
'vertices': vert,
|
||||
'faces': faces,
|
||||
'id': 0,
|
||||
'name': 'human_{}'.format(0)
|
||||
}
|
||||
K = np.array([[vis.shape[0],0,vis.shape[0]/2],[0,vis.shape[1],vis.shape[1]/2],[0,0,1]])
|
||||
K[:2, :] *= self.scale
|
||||
R = np.eye(3)
|
||||
T = np.array([0,0,0.3])
|
||||
ret = plot_meshes(vis, meshes, K, R, T, mode='image')
|
||||
outname = join(self.output, self.name, basename)
|
||||
os.makedirs(os.path.dirname(outname), exist_ok=True)
|
||||
cv2.imwrite(outname, ret)
|
||||
|
||||
class Render_multiview_hand(Render_multiview):
|
||||
def __call__(self, hand_model_l, params_l, cameras, imgnames):
|
||||
vertices = hand_model_l(**params_l, return_verts=True, return_tensor=False)
|
||||
faces = hand_model_l.faces
|
||||
self.render_(vertices, faces, cameras, imgnames)
|
||||
|
||||
class Render_smplh(Render_multiview):
|
||||
def __init__(self, path, at_step, scale=0.5, mode='image', backend='pyrender', view_list=[0]) -> None:
|
||||
super().__init__(scale, mode, backend, view_list)
|
||||
from easymocap.config import Config, load_object
|
||||
cfg_data = Config.load(path)
|
||||
self.model = load_object(cfg_data.module, cfg_data.args)
|
||||
self.at_step = at_step
|
||||
|
||||
def __call__(self, params_smplh, cameras, imgnames):
|
||||
vertices = self.model(return_verts=True, return_tensor=False, **params_smplh)
|
||||
faces = self.model.faces
|
||||
if self.at_step:
|
||||
self.render_([vertices], faces, cameras, [imgnames])
|
||||
else:
|
||||
self.render_(vertices, faces, cameras, imgnames)
|
||||
|
||||
class Render_smplh2(Render_smplh):
|
||||
def __call__(self, params, cameras, imgnames):
|
||||
super().__call__(params, cameras, imgnames)
|
||||
|
||||
def projectPoints(X, K, R, t, Kd):
|
||||
x = R @ X + t
|
||||
x[0:2,:] = x[0:2,:]/x[2,:]#到归一化平面
|
||||
r = x[0,:]*x[0,:] + x[1,:]*x[1,:]
|
||||
|
||||
x[0,:] = x[0,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[2]*x[0,:]*x[1,:] + Kd[3]*(r + 2*x[0,:]*x[0,:])
|
||||
x[1,:] = x[1,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[3]*x[0,:]*x[1,:] + Kd[2]*(r + 2*x[1,:]*x[1,:])
|
||||
x[0,:] = K[0,0]*x[0,:] + K[0,1]*x[1,:] + K[0,2]
|
||||
x[1,:] = K[1,0]*x[0,:] + K[1,1]*x[1,:] + K[1,2]
|
||||
return x
|
||||
class Render_multiview_handbyk3d(Render_multiview):
|
||||
def __call__(self, hand_model_l, params_l, hand_model_r, params_r, cameras, imgnames, keypoints3d):
|
||||
# breakpoint()
|
||||
joint_regressor_r = np.load('models/handmesh/data/joint_regressor_r.npy') #右手
|
||||
joint_regressor_l = np.load('models/handmesh/data/joint_regressor_l.npy') #右手
|
||||
facesl = hand_model_l.faces
|
||||
facesr = hand_model_r.faces
|
||||
|
||||
# for nf, img in enumerate(tqdm(imgnames, desc=self.name)):
|
||||
#不显示0号人物的结果
|
||||
keypoints3d[0]=0
|
||||
|
||||
img = imgnames
|
||||
k3d = keypoints3d
|
||||
|
||||
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False) #[nf]
|
||||
vertices_r = hand_model_r(**params_r, return_verts=True, return_tensor=False) #[nf]
|
||||
|
||||
# breakpoint()
|
||||
|
||||
joint_l = np.repeat(joint_regressor_l[None, :, :],vertices_l.shape[0],0) @ vertices_l
|
||||
joint_r = np.repeat(joint_regressor_r[None, :, :],vertices_r.shape[0],0) @ vertices_r
|
||||
params_l['Th']+=k3d[:,7,:3] - joint_l[:,0,:] #左手7右手4 #[nf]
|
||||
params_r['Th']+=k3d[:,4,:3] - joint_r[:,0,:] #左手7右手4 #[nf]
|
||||
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False) #[nf]
|
||||
vertices_r = hand_model_r(**params_r, return_verts=True, return_tensor=False) #[nf]
|
||||
|
||||
faces = []
|
||||
vert = []
|
||||
pids = []
|
||||
for i in range(k3d.shape[0]):
|
||||
if k3d[i,7,-1]==0:
|
||||
continue
|
||||
vv = vertices_l[i].copy()
|
||||
vert.append(vv)
|
||||
faces.append(facesl)
|
||||
pids.append(i)
|
||||
|
||||
for i in range(k3d.shape[0]):
|
||||
if k3d[i,4,-1]==0:
|
||||
continue
|
||||
vv = vertices_r[i].copy()
|
||||
vert.append(vv)
|
||||
faces.append(facesr)
|
||||
pids.append(i)
|
||||
|
||||
faces = np.stack(faces)
|
||||
vert = np.stack(vert)
|
||||
|
||||
for nv in self.view_list:
|
||||
basename = os.path.basename(img[nv])
|
||||
# 重新读入图片
|
||||
assert os.path.exists(img[nv]), img[nv]
|
||||
vis = cv2.imread(img[nv])
|
||||
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
|
||||
|
||||
# vert = vertices
|
||||
meshes = {}
|
||||
if vert.ndim == 2:
|
||||
meshes[0] = {
|
||||
'vertices': vert,
|
||||
'faces': faces,
|
||||
'id': 0,
|
||||
'name': 'human_{}'.format(0)
|
||||
}
|
||||
elif vert.ndim == 3:
|
||||
for pid in range(vert.shape[0]):
|
||||
meshes[pid] = {
|
||||
'vertices': vert[pid],
|
||||
'faces': faces[pid],
|
||||
'vid': pids[pid],
|
||||
'name': 'human_{}'.format(pid)
|
||||
}
|
||||
K = cameras['K'][nv].copy()
|
||||
K[:2, :] *= self.scale
|
||||
R = cameras['R'][nv]
|
||||
T = cameras['T'][nv]
|
||||
# breakpoint()
|
||||
from easymocap.mytools.vis_base import plot_keypoints_auto
|
||||
for pid in range(keypoints3d.shape[0]):
|
||||
keypoints_repro = projectPoints(keypoints3d[pid].T[:3,:], K, R, T, cameras['dist'][nv].reshape(5)).T
|
||||
keypoints_repro[:,-1] = keypoints3d[pid,:,-1]
|
||||
plot_keypoints_auto(vis, keypoints_repro, pid=pid, use_limb_color=False)
|
||||
|
||||
ret = plot_meshes(vis, meshes, K, R, T, mode=self.mode)
|
||||
outname = join(self.output, self.name, basename)
|
||||
os.makedirs(os.path.dirname(outname), exist_ok=True)
|
||||
cv2.imwrite(outname, ret)
|
||||
|
||||
class Render_selectview:
|
||||
def __init__(self, scale=0.5, backend='pyrender', output='output',mode = 'image') -> None:
|
||||
self.name = 'render_debug'
|
||||
self.scale = scale
|
||||
self.view_list = [5]
|
||||
self.output = output
|
||||
self.mode = mode
|
||||
|
||||
def __call__(self, hand_model_l, posel, match3d_l, cameras, imgnames, keypoints3d,bbox_handl, joint_regressor, wristid):
|
||||
|
||||
img = imgnames
|
||||
k3d = keypoints3d
|
||||
# joint_regressor_r = np.load('models/handmesh/data/joint_regressor_r.npy') #右手
|
||||
# joint_regressor_l = np.load('models/handmesh/data/joint_regressor_l.npy')
|
||||
joint_regressor_l = joint_regressor
|
||||
facesl = hand_model_l.faces
|
||||
# facesr = hand_model_r.faces
|
||||
# breakpoint()
|
||||
hand_list=[]
|
||||
for pid in range(len(match3d_l)):
|
||||
dt = match3d_l[pid]
|
||||
if(isinstance(dt,int)):
|
||||
# TODO:处理-1的情况,也就是没有找到合适的匹配到的手
|
||||
hand_list.append(np.zeros((1,48)))
|
||||
break
|
||||
# Merge_list=[]
|
||||
out_img = []
|
||||
for cid in range(len(dt['views'])):
|
||||
nv = dt['views'][cid]
|
||||
poseid = dt['indices'][cid]
|
||||
pose = posel[nv][poseid].copy()
|
||||
|
||||
Rh = pose[:,:3].copy()
|
||||
invR = np.linalg.inv(cameras['R'][nv])
|
||||
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
|
||||
Rh_m_new = invR @ Rh_m_old
|
||||
Rh = cv2.Rodrigues(Rh_m_new)[0]
|
||||
|
||||
pose_ = np.hstack((Rh.reshape(3),pose[:,3:].reshape(-1))).reshape(1,-1)
|
||||
|
||||
Rh = pose_[:,:3].copy()
|
||||
pose_[:,:3] = 0
|
||||
params_l={
|
||||
'Rh':Rh,
|
||||
'Th':np.zeros_like(Rh),
|
||||
'poses':pose_,
|
||||
'shapes':np.zeros((Rh.shape[0],10)),
|
||||
}
|
||||
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False)
|
||||
joint_l = np.repeat(joint_regressor_l[None, :, :],vertices_l.shape[0],0) @ vertices_l
|
||||
params_l['Th']+=k3d[pid,wristid,:3] - joint_l[0,0,:]
|
||||
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False)
|
||||
|
||||
vert = vertices_l[0]
|
||||
faces = facesl
|
||||
|
||||
basename = os.path.basename(img[nv])
|
||||
# 重新读入图片
|
||||
assert os.path.exists(img[nv]), img[nv]
|
||||
vis = cv2.imread(img[nv])
|
||||
|
||||
plot_bbox(vis,bbox_handl[nv][poseid],0)
|
||||
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
|
||||
|
||||
meshes = {}
|
||||
if vert.ndim == 2:
|
||||
meshes[0] = {
|
||||
'vertices': vert,
|
||||
'faces': faces,
|
||||
'id': 0,
|
||||
'name': 'human_{}'.format(0)
|
||||
}
|
||||
elif vert.ndim == 3:
|
||||
for pid in range(vert.shape[0]):
|
||||
meshes[pid] = {
|
||||
'vertices': vert[pid],
|
||||
'faces': faces[pid],
|
||||
'id': pid,
|
||||
'name': 'human_{}'.format(pid)
|
||||
}
|
||||
K = cameras['K'][nv].copy()
|
||||
K[:2, :] *= self.scale
|
||||
R = cameras['R'][nv]
|
||||
T = cameras['T'][nv]
|
||||
# breakpoint()
|
||||
ret = plot_meshes(vis, meshes, K, R, T, mode=self.mode)
|
||||
out_img.append(ret)
|
||||
|
||||
out_img = merge(out_img)
|
||||
outname = join(self.output, self.name, '{}-{:02d}.jpg'.format(basename.split('.jpg')[0],pid))
|
||||
os.makedirs(os.path.dirname(outname), exist_ok=True)
|
||||
cv2.imwrite(outname, out_img)
|
||||
|
||||
class Render_selectview_lr:
|
||||
def __init__(self, scale=0.5, backend='pyrender', output='output',mode = 'image') -> None:
|
||||
self.output = output
|
||||
self.model_l = Render_selectview(scale=0.5, backend='pyrender', output = self.output,mode = mode)
|
||||
self.model_r = Render_selectview(scale=0.5, backend='pyrender', output = self.output,mode = mode)
|
||||
self.model_l.name+='_l'
|
||||
self.model_r.name+='_r'
|
||||
def __call__(self, hand_model_l, posel, poser, match3d_l, match3d_r, hand_model_r, cameras, imgnames, keypoints3d,bbox_handl,bbox_handr):
|
||||
joint_regressor_r = np.load('models/handmesh/data/joint_regressor_r.npy') #右手
|
||||
joint_regressor_l = np.load('models/handmesh/data/joint_regressor_l.npy')
|
||||
|
||||
self.model_l(hand_model_l, posel, match3d_l, cameras, imgnames, keypoints3d,bbox_handl, joint_regressor_l, 7)
|
||||
self.model_r(hand_model_r, poser, match3d_r, cameras, imgnames, keypoints3d,bbox_handr, joint_regressor_r, 4)
|
||||
|
||||
class Render_mv(Render):
|
||||
def __call__(self, body_model, params, cameras, imgnames):
|
||||
# breakpoint()
|
||||
super().__call__(body_model, params, cameras, [imgnames[0],imgnames[1]])
|
97
myeasymocap/io/write.py
Normal file
97
myeasymocap/io/write.py
Normal file
@ -0,0 +1,97 @@
|
||||
import os
|
||||
from easymocap.mytools.file_utils import write_keypoints3d, write_smpl
|
||||
from easymocap.annotator.file_utils import save_annot
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
class Write:
|
||||
def __init__(self, output='/tmp', name='keypoints3d') -> None:
|
||||
self.output = output
|
||||
self.name = name
|
||||
|
||||
def __call__(self, keypoints3d):
|
||||
for nf in tqdm(range(keypoints3d.shape[0]), desc='writing to {}/{}'.format(self.output, self.name)):
|
||||
res = [{
|
||||
'id': 0,
|
||||
'keypoints3d': keypoints3d[nf]
|
||||
}]
|
||||
dumpname = join(self.output, self.name, '{:06d}.json'.format(nf))
|
||||
write_keypoints3d(dumpname, res)
|
||||
return {}
|
||||
|
||||
class WriteAll:
|
||||
def __init__(self, name, output='/tmp') -> None:
|
||||
self.output = output
|
||||
self.name = name
|
||||
|
||||
def __call__(self, results, meta):
|
||||
for nf in tqdm(range(len(results)), desc='writing to {}/{}'.format(self.output, self.name)):
|
||||
res = [{'id': r['id'], 'keypoints3d': r['keypoints3d']} for r in results[nf]]
|
||||
res.sort(key=lambda x: x['id'])
|
||||
imgnames = meta['imgnames'][nf]
|
||||
if len(imgnames) > 0:
|
||||
name = os.path.basename(imgnames[0])
|
||||
name = name.replace('.jpg', '')
|
||||
else:
|
||||
name = '{:06f}'.format(nf)
|
||||
dumpname = join(self.output, self.name, '{}.json'.format(name))
|
||||
write_keypoints3d(dumpname, res)
|
||||
|
||||
class Write2D:
|
||||
def __init__(self, name, output='/tmp') -> None:
|
||||
self.output = output
|
||||
self.name = name
|
||||
|
||||
def __call__(self, results, meta):
|
||||
for nf in tqdm(range(len(results)), desc='writing to {}/{}'.format(self.output, self.name)):
|
||||
subs = meta['subs'][nf]
|
||||
result = results[nf]
|
||||
annots_all = {sub: [] for sub in subs}
|
||||
for res in result:
|
||||
for nv, v in enumerate(res['views']):
|
||||
annots_all[subs[v]].append({
|
||||
'personID': res['id'],
|
||||
'bbox': res['bbox'][nv],
|
||||
'keypoints': res['keypoints2d'][nv],
|
||||
})
|
||||
for nv, sub in enumerate(subs):
|
||||
annots = {
|
||||
'filename': f'{sub}/{nf:06d}.jpg',
|
||||
'height': meta['image_shape'][nf][nv][0],
|
||||
'width': meta['image_shape'][nf][nv][1],
|
||||
'annots': annots_all[sub],
|
||||
'isKeyframe': False
|
||||
}
|
||||
dumpname = join(self.output, self.name, sub, '{:06d}.json'.format(nf))
|
||||
save_annot(dumpname, annots)
|
||||
|
||||
class WriteSMPL:
|
||||
def __init__(self, name='smpl') -> None:
|
||||
self.name = name
|
||||
|
||||
def __call__(self, params=None, results=None, meta=None, model=None):
|
||||
results_all = []
|
||||
if results is None and params is not None:
|
||||
# copy params to results
|
||||
results = {0: {'params': params, 'keypoints3d': None, 'frames': list(range(len(params['Rh'])))}}
|
||||
for index in tqdm(meta['index'], desc=self.name):
|
||||
results_frame = []
|
||||
for pid, result in results.items():
|
||||
if index >= result['frames'][0] and index <= result['frames'][-1]:
|
||||
frame_rel = result['frames'].index(index)
|
||||
results_frame.append({
|
||||
'id': pid,
|
||||
# 'keypoints3d': result['keypoints3d'][frame_rel]
|
||||
})
|
||||
for key in ['Rh', 'Th', 'poses', 'shapes']:
|
||||
if result['params'][key].shape[0] == 1:
|
||||
results_frame[-1][key] = result['params'][key]
|
||||
else:
|
||||
results_frame[-1][key] = result['params'][key][frame_rel:frame_rel+1]
|
||||
param = results_frame[-1]
|
||||
pred = model(param)['keypoints'][0]
|
||||
results_frame[-1]['keypoints3d'] = pred
|
||||
write_smpl(join(self.output, self.name, '{:06d}.json'.format(meta['frame'][index])), results_frame)
|
||||
write_keypoints3d(join(self.output, 'keypoints3d', '{:06d}.json'.format(meta['frame'][index])), results_frame)
|
||||
results_all.append(results_frame)
|
||||
return {'results_perframe': results_all}
|
101
myeasymocap/operations/init.py
Normal file
101
myeasymocap/operations/init.py
Normal file
@ -0,0 +1,101 @@
|
||||
from typing import Any
|
||||
import numpy as np
|
||||
from easymocap.mytools.debug_utils import mywarn, log
|
||||
|
||||
def solve_translation(X, x, K):
|
||||
A = np.zeros((2*X.shape[0], 3))
|
||||
b = np.zeros((2*X.shape[0], 1))
|
||||
fx, fy = K[0, 0], K[1, 1]
|
||||
cx, cy = K[0, 2], K[1, 2]
|
||||
for nj in range(X.shape[0]):
|
||||
A[2*nj, 0] = 1
|
||||
A[2*nj + 1, 1] = 1
|
||||
A[2*nj, 2] = -(x[nj, 0] - cx)/fx
|
||||
A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
|
||||
b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
|
||||
b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
|
||||
A[2*nj:2*nj+2, :] *= x[nj, 2]
|
||||
b[2*nj:2*nj+2, :] *= x[nj, 2]
|
||||
trans = np.linalg.inv(A.T @ A) @ A.T @ b
|
||||
return trans.T[0]
|
||||
|
||||
class MeanShapes:
|
||||
def __init__(self, keys, dim=0) -> None:
|
||||
self.keys = keys
|
||||
self.dim = dim
|
||||
|
||||
def __call__(self, params):
|
||||
for key in self.keys:
|
||||
log('[{}] Mean {}: {}'.format(self.__class__.__name__, key, params[key].shape))
|
||||
params[key] = params[key].mean(axis=self.dim, keepdims=True)
|
||||
log('[{}] Mean {}: {}'.format(self.__class__.__name__, key, params[key].shape))
|
||||
|
||||
class InitTranslation:
|
||||
def __init__(self, solve_T=True, solve_R=False) -> None:
|
||||
self.solve_T = solve_T
|
||||
self.solve_R = solve_R
|
||||
|
||||
def __call__(self, body_model, params, cameras, keypoints):
|
||||
nJoints = 15 # 只使用主要的15个点
|
||||
params['Th'] = np.zeros_like(params['Th'])
|
||||
kpts1 = body_model.keypoints(params, return_tensor=False)
|
||||
for i in range(kpts1.shape[0]):
|
||||
k2d = keypoints[i, :nJoints]
|
||||
if k2d[:, -1].sum() < nJoints / 2:
|
||||
mywarn('[{}] No valid keypoints in frame {}'.format(self.__class__.__name__, i))
|
||||
params['Th'][i] = params['Th'][i-1]
|
||||
continue
|
||||
trans = solve_translation(kpts1[i, :nJoints], k2d, cameras['K'][i])
|
||||
params['Th'][i] += trans
|
||||
# params['shapes'] = params['shapes'].mean(0, keepdims=True)
|
||||
return {'params': params}
|
||||
|
||||
class InitParams:
|
||||
def __init__(self, num_poses=69, num_shapes=10, rootid=8, share_shape=True, init_trans=0.) -> None:
|
||||
self.num_poses = num_poses
|
||||
self.num_shapes = num_shapes
|
||||
self.rootid = rootid
|
||||
self.share_shape = share_shape
|
||||
self.init_trans = init_trans
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
"""
|
||||
keypoints3d: (nFrames, nJoints, 4) or (nFrames, nPerson, nFrames, 4)
|
||||
"""
|
||||
key = list(kwargs.keys())[0]
|
||||
keypoints3d = kwargs[key]
|
||||
if keypoints3d.ndim == 4:
|
||||
shape = (keypoints3d.shape[:2])
|
||||
elif keypoints3d.ndim == 3:
|
||||
shape = (keypoints3d.shape[0],)
|
||||
else:
|
||||
raise ValueError('keypoints3d must be 3 or 4 dim')
|
||||
params={
|
||||
'Rh': np.zeros((*shape, 3),dtype=np.float32),
|
||||
'Th': np.zeros((*shape, 3),dtype=np.float32),
|
||||
'poses': np.zeros((*shape, self.num_poses),dtype=np.float32),
|
||||
'shapes': np.zeros((*shape, self.num_shapes),dtype=np.float32)
|
||||
}
|
||||
# TODO: check the root confidence and interpolate
|
||||
# 初始化
|
||||
if key == 'keypoints3d':
|
||||
params['Th'] = keypoints3d[..., self.rootid, :3]
|
||||
else:
|
||||
mywarn('[{}] Not used keypoints3d, set to {}'.format(self.__class__.__name__, self.init_trans))
|
||||
params['Th'][:, 2] = self.init_trans
|
||||
if self.share_shape:
|
||||
params['shapes'] = params['shapes'].mean(0, keepdims=True)
|
||||
return {'params': params}
|
||||
|
||||
class Init_params_and_target_poses(InitParams):
|
||||
def __call__(self, params_smplh, model):
|
||||
"""
|
||||
keypoints3d: (nFrames, nJoints, 4) or (nFrames, nPerson, nFrames, 4)
|
||||
"""
|
||||
out = model(params_smplh)
|
||||
keypoints3d = out['keypoints'].cpu().detach().numpy()
|
||||
ret = super().__call__(keypoints3d)
|
||||
for key in params_smplh.keys():
|
||||
ret['params'][key] = params_smplh[key]
|
||||
ret['target_'+key] = params_smplh[key]
|
||||
return ret
|
246
myeasymocap/operations/loss.py
Normal file
246
myeasymocap/operations/loss.py
Normal file
@ -0,0 +1,246 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
|
||||
class GMoF(nn.Module):
|
||||
def __init__(self, rho=1):
|
||||
super(GMoF, self).__init__()
|
||||
self.rho2 = rho * rho
|
||||
|
||||
def extra_repr(self):
|
||||
return 'rho = {}'.format(self.rho)
|
||||
|
||||
def forward(self, est, gt=None, conf=None):
|
||||
if gt is not None:
|
||||
square_diff = torch.sum((est - gt)**2, dim=-1)
|
||||
else:
|
||||
square_diff = torch.sum(est**2, dim=-1)
|
||||
diff = torch.div(square_diff, square_diff + self.rho2)
|
||||
if conf is not None:
|
||||
res = torch.sum(diff * conf)/(1e-5 + conf.sum())
|
||||
else:
|
||||
res = diff.sum()/diff.numel()
|
||||
return res
|
||||
|
||||
class BaseLoss(nn.Module):
|
||||
def __init__(self, norm='l2', norm_info={}, reduce='sum') -> None:
|
||||
super().__init__()
|
||||
self.loss = self.make_loss(norm, norm_info, reduce)
|
||||
|
||||
def make_loss(self, norm='l2', norm_info={}, reduce='sum'):
|
||||
reduce = torch.sum if reduce=='sum' else torch.mean
|
||||
if norm == 'l2':
|
||||
def loss(est, gt=None, conf=None):
|
||||
if gt is not None:
|
||||
square_diff = reduce((est - gt)**2, dim=-1)
|
||||
else:
|
||||
square_diff = reduce(est**2, dim=-1)
|
||||
if conf is not None:
|
||||
res = torch.sum(square_diff * conf)/(1e-5 + conf.sum())
|
||||
else:
|
||||
res = square_diff.sum()/square_diff.numel()
|
||||
return res
|
||||
elif norm == 'l1':
|
||||
def loss(est, gt=None, conf=None):
|
||||
if gt is not None:
|
||||
square_diff = reduce(torch.abs(est - gt), dim=-1)
|
||||
else:
|
||||
square_diff = reduce(torch.abs(est), dim=-1)
|
||||
if conf is not None:
|
||||
res = torch.sum(square_diff * conf)/(1e-5 + conf.sum())
|
||||
else:
|
||||
res = square_diff.sum()/square_diff.numel()
|
||||
return res
|
||||
elif norm == 'gm':
|
||||
loss = GMoF(norm_info)
|
||||
else:
|
||||
loss = None
|
||||
return loss
|
||||
|
||||
def forward(self, pred, target):
|
||||
pass
|
||||
|
||||
class BaseKeypoints(BaseLoss):
|
||||
@staticmethod
|
||||
def select(keypoints, index, ranges):
|
||||
if len(index) > 0:
|
||||
keypoints = keypoints[..., index, :]
|
||||
elif len(ranges) > 0:
|
||||
if ranges[1] == -1:
|
||||
keypoints = keypoints[..., ranges[0]:, :]
|
||||
else:
|
||||
keypoints = keypoints[..., ranges[0]:ranges[1], :]
|
||||
return keypoints
|
||||
|
||||
def __init__(self, index_est=[], index_gt=[],
|
||||
ranges_est=[], ranges_gt=[], **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.index_est = index_est
|
||||
self.index_gt = index_gt
|
||||
self.ranges_est = ranges_est
|
||||
self.ranges_gt = ranges_gt
|
||||
|
||||
def forward(self, pred, target):
|
||||
return super().forward(pred, target)
|
||||
|
||||
def loss_keypoints(self, pred, target, conf):
|
||||
# pred: (..., dim)
|
||||
# target: (..., dim)
|
||||
# conf: (..., 1)
|
||||
dist = torch.sum((pred - target)**2, dim=-1, keepdim=True)
|
||||
loss = torch.sum(dist * conf) / torch.sum(conf)
|
||||
return loss
|
||||
|
||||
class Keypoints2D(BaseKeypoints):
|
||||
def forward(self, pred, target):
|
||||
# (nFrames, nJoints, 3)
|
||||
pred_kpts3d = self.select(pred['keypoints'] , self.index_est, self.ranges_est)
|
||||
target_kpts2d = self.select(target['keypoints'], self.index_gt, self.ranges_gt)
|
||||
cameras = target['cameras']
|
||||
P = torch.cat([cameras['R'], cameras['T']], dim=-1)
|
||||
invKtrans = torch.inverse(cameras['K']).transpose(-1, -2)
|
||||
homo = torch.cat([target_kpts2d[..., :2], torch.ones_like(target_kpts2d[..., 2:])], dim=-1)
|
||||
target_points = torch.matmul(homo, invKtrans)[..., :2]
|
||||
pred_homo = torch.cat([pred_kpts3d, torch.ones_like(pred_kpts3d[..., :1])], dim=-1)
|
||||
self.einsum = 'fab,fjb->fja'
|
||||
point_cam = torch.einsum(self.einsum, P, pred_homo)
|
||||
img_points = point_cam[..., :2]/point_cam[..., 2:]
|
||||
loss = self.loss(est=img_points, gt=target_points, conf=target_kpts2d[..., -1])
|
||||
return loss
|
||||
|
||||
class Keypoints3D(BaseKeypoints):
|
||||
def forward(self, pred, target):
|
||||
# (nFrames, nJoints, 3)
|
||||
# breakpoint()
|
||||
pred_kpts3d = self.select(pred['keypoints'] , self.index_est, self.ranges_est)
|
||||
target_kpts3d = self.select(target['keypoints3d'], self.index_gt, self.ranges_gt)
|
||||
assert target_kpts3d.shape[-1] == 4, 'Target keypoints {} must have confidence '.format(target_kpts3d.shape)
|
||||
loss = self.loss(est=pred_kpts3d, gt=target_kpts3d[...,:3], conf=target_kpts3d[..., -1])
|
||||
return loss
|
||||
|
||||
class LimbLength(BaseKeypoints):
|
||||
def __init__(self, kintree, key='keypoints3d', **kwargs):
|
||||
self.kintree = np.array(kintree)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def __str__(self):
|
||||
return "Limb of: {}".format(','.join(['[{},{}]'.format(i,j) for (i,j) in self.kintree]))
|
||||
|
||||
def forward(self, pred, target):
|
||||
pred_kpts3d = pred['keypoints']
|
||||
target_kpts3d = target['keypoints3d']
|
||||
# 用kin tree来进行选择
|
||||
pred = torch.norm(pred_kpts3d[..., self.kintree[:, 1], :] - pred_kpts3d[..., self.kintree[:, 0], :], dim=-1, keepdim=True)
|
||||
target = torch.norm(target_kpts3d[..., self.kintree[:, 1], :] - target_kpts3d[..., self.kintree[:, 0], :], dim=-1, keepdim=True)
|
||||
target_conf = torch.minimum(target_kpts3d[..., self.kintree[:, 1], -1], target_kpts3d[..., self.kintree[:, 0], -1])
|
||||
loss = self.loss(est=pred, gt=target, conf=target_conf)
|
||||
return loss
|
||||
|
||||
class Smooth(BaseLoss):
|
||||
def __init__(self, keys, smooth_type, order, norm, weights, window_weight) -> None:
|
||||
super().__init__(norm)
|
||||
self.loss = {}
|
||||
for i in range(len(keys)):
|
||||
new_key = keys[i] + '_' + smooth_type[i]
|
||||
self.loss[new_key] = {
|
||||
'func': self.make_loss(norm='l2', norm_info={}, reduce='sum'),
|
||||
'key': keys[i],
|
||||
'weight': weights[i],
|
||||
'norm': norm[i],
|
||||
'order': order[i],
|
||||
'type': smooth_type[i],
|
||||
}
|
||||
self.window_weight = window_weight
|
||||
|
||||
def convert_Rh_to_R(self, Rh):
|
||||
from ..bodymodels.geometry import batch_rodrigues
|
||||
# Rh: (..., nRot x 3)
|
||||
nRot = Rh.shape[-1] // 3
|
||||
Rh_flat = Rh.reshape(-1, nRot, 3)
|
||||
Rh_flat = Rh_flat.reshape(-1, 3)
|
||||
Rot = batch_rodrigues(Rh_flat)
|
||||
Rot_0 = Rot.reshape(-1, nRot, 3, 3)
|
||||
Rot = Rot_0.reshape(*Rh.shape[:-1], 3, 3)
|
||||
Rot = Rot.reshape(*Rh.shape[:-1], 9)
|
||||
return Rot
|
||||
|
||||
def forward(self, pred, target):
|
||||
ret = {}
|
||||
for key, cfg in self.loss.items():
|
||||
value = pred[cfg['key']]
|
||||
loss = 0
|
||||
for width, weight in enumerate(self.window_weight, start=1):
|
||||
if cfg['type'] == 'Linear':
|
||||
vel = value[width:] - value[:-width]
|
||||
elif cfg['type'] == 'Rot':
|
||||
_value = self.convert_Rh_to_R(value)
|
||||
vel = _value[width:] - _value[:-width]
|
||||
elif cfg['type'] == 'Depth':
|
||||
# TODO: 考虑相机的RT
|
||||
if 'cameras' in target.keys():
|
||||
R = target['cameras']['R']
|
||||
_value = torch.bmm(value[..., None, :], R.transpose(-1, -2))
|
||||
_value = _value[..., 0, :]
|
||||
_value = _value[..., [2]] # 只使用深度
|
||||
vel = _value[width:] - _value[:-width]
|
||||
if cfg['order'] == 2:
|
||||
vel = vel[1:] - vel[:-1]
|
||||
loss += weight * cfg['func'](est=vel)
|
||||
ret[key] = loss * cfg['weight']
|
||||
return ret
|
||||
|
||||
class AnySmooth(BaseLoss):
|
||||
def __init__(self, key, weight, norm, norm_info={}, dim=-1, order=1):
|
||||
super().__init__()
|
||||
self.dim = dim
|
||||
self.weight = weight
|
||||
self.loss = self.make_loss(norm, norm_info)
|
||||
self.norm_name = norm
|
||||
self.key = key
|
||||
self.order = order
|
||||
|
||||
def forward(self, pred, target):
|
||||
loss = 0
|
||||
value = pred[self.key]
|
||||
# value = select(value, self.ranges, self.index, self.dim)
|
||||
if value.shape[0] <= len(self.weight):
|
||||
return torch.FloatTensor([0.]).to(value.device)
|
||||
for width, weight in enumerate(self.weight, start=1):
|
||||
vel = value[width:] - value[:-width]
|
||||
if self.order == 2:
|
||||
vel = vel[1:] - vel[:-1]
|
||||
loss += weight * self.loss(vel)
|
||||
return loss
|
||||
|
||||
class Init(BaseLoss):
|
||||
def __init__(self, keys, weights, norm) -> None:
|
||||
super().__init__(norm)
|
||||
self.keys = keys
|
||||
self.weights = weights
|
||||
|
||||
def forward(self, pred, target):
|
||||
ret = {}
|
||||
for key in self.keys:
|
||||
ret[key] = torch.mean((pred[key] - target['init_'+key])**2)
|
||||
return ret
|
||||
|
||||
from easymocap.multistage.lossbase import AnyReg
|
||||
class RegLoss(AnyReg):
|
||||
def __init__(self, key, norm) -> None:
|
||||
super().__init__(key, norm)
|
||||
|
||||
def __call__(self, pred, target):
|
||||
return self.forward(**{self.key: pred[self.key]})
|
||||
|
||||
class Init_pose(Init):
|
||||
def __init__(self, keys, weights, norm) -> None:
|
||||
super().__init__(keys, weights, norm)
|
||||
self.norm = norm
|
||||
def forward(self, pred, target):
|
||||
ret = {}
|
||||
for key in self.keys:
|
||||
if self.norm == 'l2':
|
||||
ret[key] = torch.sum((pred[key] - target['target_'+key])**2)
|
||||
elif self.norm == 'l1':
|
||||
ret[key] = torch.sum(torch.abs(pred[key] - target['target_'+key]))
|
||||
return ret
|
1074
myeasymocap/operations/match.py
Normal file
1074
myeasymocap/operations/match.py
Normal file
File diff suppressed because it is too large
Load Diff
193
myeasymocap/operations/merge.py
Normal file
193
myeasymocap/operations/merge.py
Normal file
@ -0,0 +1,193 @@
|
||||
import numpy as np
|
||||
import cv2
|
||||
import scipy
|
||||
import torch
|
||||
|
||||
class MultilView_Merge:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
def forward(self, data,ax=0):
|
||||
'''
|
||||
data - dict
|
||||
data[key] (nv,...)
|
||||
'''
|
||||
results={}
|
||||
for key in data.keys():
|
||||
results[key] = data[key].mean(axis=ax)
|
||||
return results
|
||||
|
||||
|
||||
class Merge_hand(MultilView_Merge):
|
||||
def __init__(self, camtoworld) -> None:
|
||||
self.camtoworld = camtoworld
|
||||
# pass
|
||||
def __call__(self, posel , cameras, match3d_l):
|
||||
# ret = []
|
||||
# for nf in range(len(posel)):
|
||||
# breakpoint()
|
||||
hand_list=[]
|
||||
for pid in range(len(match3d_l)):
|
||||
dt = match3d_l[pid]
|
||||
if(isinstance(dt,int)):
|
||||
# TODO:处理-1的情况,也就是没有找到合适的匹配到的手
|
||||
hand_list.append(np.zeros((1,48)))
|
||||
break
|
||||
Merge_list=[]
|
||||
for cid in range(len(dt['views'])):
|
||||
nv = dt['views'][cid]
|
||||
poseid = dt['indices'][cid]
|
||||
pose = posel[nv][poseid].copy()
|
||||
|
||||
if self.camtoworld:
|
||||
Rh = pose[:,:3].copy()
|
||||
invR = np.linalg.inv(cameras['R'][nv])
|
||||
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
|
||||
Rh_m_new = invR @ Rh_m_old
|
||||
Rh = cv2.Rodrigues(Rh_m_new)[0]
|
||||
Merge_list.append(np.hstack((Rh.reshape(3),pose[:,3:].reshape(-1))))
|
||||
else:
|
||||
Merge_list.append(pose)
|
||||
out = self.forward({'pose':np.stack(Merge_list)},0)
|
||||
|
||||
hand_list.append(out['pose'])
|
||||
pose_ = np.stack(hand_list)
|
||||
Rh = pose_[:,:3].copy()
|
||||
pose_[:,:3] = 0
|
||||
params={
|
||||
'Rh':Rh,
|
||||
'Th':np.zeros_like(Rh),
|
||||
'poses':pose_,
|
||||
'shapes':np.zeros((Rh.shape[0],10)),
|
||||
}
|
||||
# ret.append(params)
|
||||
return {'params': params}
|
||||
|
||||
class Merge_handlr(Merge_hand):
|
||||
def __call__(self, posel, poser, cameras, match3d_l, match3d_r):
|
||||
params_l = super().__call__(posel, cameras, match3d_l)
|
||||
params_r = super().__call__(poser, cameras, match3d_r)
|
||||
# breakpoint()
|
||||
return {'params_l':params_l['params'], 'params_r':params_r['params']}
|
||||
# return {'params_l':params_l['params'], 'params_r':params_r['params'], 'params':params_l['params']}
|
||||
|
||||
class Merge_bodyandhand:
|
||||
def __init__(self, tmp) -> None:
|
||||
pass
|
||||
def get_R(self, poses, cfg, st):
|
||||
res = st.copy()
|
||||
for i in cfg:
|
||||
res = res @ cv2.Rodrigues(poses[i,:])[0]
|
||||
return res
|
||||
def process_poses_mano(self, poses, hand_Rh, flag):
|
||||
if sum(flag) == 0:
|
||||
return poses
|
||||
|
||||
poses = poses.reshape((-1,3))
|
||||
cfg={'rt': [0,3,6,9],
|
||||
'r': [14,17,19],
|
||||
'l': [13,16,18]
|
||||
}
|
||||
RA = self.get_R(poses, cfg['rt'],np.eye(3))
|
||||
|
||||
if flag[0] :
|
||||
RL = self.get_R(poses, cfg['l'],RA)
|
||||
tmppose = np.matrix(RL).I @ cv2.Rodrigues(np.array(hand_Rh[0]))[0]
|
||||
tmppose = cv2.Rodrigues(tmppose)[0]
|
||||
poses[20,:] = tmppose.reshape(3)
|
||||
|
||||
e20 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[20,:]).reshape(-1,3))
|
||||
e20 = e20.as_euler('ZYX', degrees=True)
|
||||
|
||||
|
||||
dt = scipy.spatial.transform.Rotation.from_euler('ZYX', np.array([0,0,e20[0,2]/2]), degrees=True)
|
||||
rot_dt = dt.as_matrix()
|
||||
rot18 = cv2.Rodrigues(poses[18,:])[0]
|
||||
rot18 = rot18@rot_dt
|
||||
vec18 = cv2.Rodrigues(rot18)[0].reshape((1,3))
|
||||
rot20 = cv2.Rodrigues(poses[20,:])[0]
|
||||
rot20 = np.linalg.inv(rot_dt) @ rot20
|
||||
vec20 = cv2.Rodrigues(rot20)[0].reshape((1,3))
|
||||
poses[20,:] = vec20
|
||||
poses[18,:] = vec18
|
||||
|
||||
# e18 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[18,:]).reshape(-1,3))
|
||||
# e18 = e18.as_euler('ZYX', degrees=True)
|
||||
# e20[0,2] = e20[0,2]/2
|
||||
# e18[0,2] += e20[0,2]
|
||||
# e20 = scipy.spatial.transform.Rotation.from_euler('ZYX', e20, degrees=True)
|
||||
# e20 = e20.as_rotvec()
|
||||
# e18 = scipy.spatial.transform.Rotation.from_euler('ZYX', e18, degrees=True)
|
||||
# e18 = e18.as_rotvec()
|
||||
# poses[20,:] = e20
|
||||
# poses[18,:] = e18
|
||||
if flag[1] : #and sum(np.array(hand_Rh[1])!=0)>0:
|
||||
RR = self.get_R(poses, cfg['r'],RA)
|
||||
tmppose = np.matrix(RR).I @ cv2.Rodrigues(np.array(hand_Rh[1]))[0]
|
||||
tmppose = cv2.Rodrigues(tmppose)[0]
|
||||
poses[21,:] = tmppose.reshape(3)
|
||||
|
||||
e21 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[21,:]).reshape(-1,3))
|
||||
e21 = e21.as_euler('ZYX', degrees=True)
|
||||
|
||||
dt = scipy.spatial.transform.Rotation.from_euler('ZYX', np.array([0,0,e21[0,2]/2]), degrees=True)
|
||||
rot_dt = dt.as_matrix()
|
||||
rot19 = cv2.Rodrigues(poses[19,:])[0]
|
||||
rot19 = rot19@rot_dt
|
||||
vec19 = cv2.Rodrigues(rot19)[0].reshape((1,3))
|
||||
rot21 = cv2.Rodrigues(poses[21,:])[0]
|
||||
rot21 = np.linalg.inv(rot_dt) @ rot21
|
||||
vec21 = cv2.Rodrigues(rot21)[0].reshape((1,3))
|
||||
poses[21,:] = vec21
|
||||
poses[19,:] = vec19
|
||||
|
||||
# e19 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[19,:]).reshape(-1,3))
|
||||
# e19 = e19.as_euler('ZYX', degrees=True)
|
||||
# e21[0,2] = e21[0,2]/2
|
||||
# e19[0,2] += e21[0,2]
|
||||
# e21 = scipy.spatial.transform.Rotation.from_euler('ZYX', e21, degrees=True)
|
||||
# e21 = e21.as_rotvec()
|
||||
# e19 = scipy.spatial.transform.Rotation.from_euler('ZYX', e19, degrees=True)
|
||||
# e19 = e19.as_rotvec()
|
||||
# poses[21,:] = e21
|
||||
# poses[19,:] = e19
|
||||
|
||||
return poses.reshape((1,-1))
|
||||
|
||||
def merge_pose(self, bodypose,handlpose,handrpose):
|
||||
flag=[True,True]
|
||||
if abs(handlpose).sum()==0:
|
||||
flag[0]=False
|
||||
if abs(handrpose).sum()==0:
|
||||
flag[1]=False
|
||||
|
||||
out_L = []
|
||||
pose = np.hstack((bodypose,handlpose[:,3:],handrpose[:,3:])) # (1,156)
|
||||
out_pose = self.process_poses_mano(pose, [handlpose[0,:3],handrpose[0,:3]], flag) # 如果没找到手,那么应该设置为全0 这里设置为false
|
||||
out_L.append(out_pose)
|
||||
return out_pose
|
||||
def __call__(self, params_l, params_r, params):
|
||||
# breakpoint()
|
||||
bz = params['Rh'].shape[0]
|
||||
ret = {
|
||||
'Rh': np.zeros((bz,3),dtype=np.float32),
|
||||
'Th': params['Th'],
|
||||
'poses': np.zeros((bz,156),dtype=np.float32),
|
||||
'shapes':np.zeros((bz,16),dtype=np.float32)
|
||||
}
|
||||
ret['shapes'][:,:10] = params['shapes']
|
||||
# breakpoint()
|
||||
#TODO for nframe nperson
|
||||
for i in range(bz):
|
||||
inpose = np.zeros((1,66))
|
||||
inpose[:,3:] = params['poses'][i][:63].copy()
|
||||
inpose[:,:3] = params['Rh'][i].copy() # pose0:3有值 Rh 可能要合并
|
||||
|
||||
handlpose = params_l['poses'][i].reshape((1,-1)).copy()
|
||||
handrpose = params_r['poses'][i].reshape((1,-1)).copy()
|
||||
handlpose[:,:3] = params_l['Rh'][i]
|
||||
handrpose[:,:3] = params_r['Rh'][i]
|
||||
|
||||
out = self.merge_pose(inpose.reshape((1,-1)), handlpose, handrpose)
|
||||
ret['Rh'][i] = out[:,:3]
|
||||
ret['poses'][i,3:] = out[:,3:]
|
||||
return {'params_smplh': ret}
|
167
myeasymocap/operations/optimizer.py
Normal file
167
myeasymocap/operations/optimizer.py
Normal file
@ -0,0 +1,167 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from easymocap.config import Config, load_object
|
||||
from easymocap.mytools.debug_utils import log
|
||||
|
||||
def dict_of_numpy_to_tensor(body_params, device):
|
||||
params_ = {}
|
||||
for key, val in body_params.items():
|
||||
if isinstance(val, dict):
|
||||
params_[key] = dict_of_numpy_to_tensor(val, device)
|
||||
else:
|
||||
params_[key] = torch.Tensor(val).to(device)
|
||||
return params_
|
||||
|
||||
def dict_of_tensor_to_numpy(body_params):
|
||||
params_ = {}
|
||||
for key, val in body_params.items():
|
||||
if isinstance(val, dict):
|
||||
params_[key] = dict_of_tensor_to_numpy(val)
|
||||
else:
|
||||
params_[key] = val.cpu().numpy()
|
||||
return params_
|
||||
|
||||
def make_optimizer(opt_params, optim_type='lbfgs', max_iter=20,
|
||||
lr=1e-3, betas=(0.9, 0.999), weight_decay=0.0, **kwargs):
|
||||
if isinstance(opt_params, dict):
|
||||
# LBFGS 不支持参数字典
|
||||
opt_params = list(opt_params.values())
|
||||
if optim_type == 'lbfgs':
|
||||
# optimizer = torch.optim.LBFGS(
|
||||
# opt_params, max_iter=max_iter, lr=lr, line_search_fn='strong_wolfe',
|
||||
# tolerance_grad= 0.0000001, # float32的有效位数是7位
|
||||
# tolerance_change=0.0000001,
|
||||
# )
|
||||
from easymocap.pyfitting.lbfgs import LBFGS
|
||||
optimizer = LBFGS(opt_params, line_search_fn='strong_wolfe', max_iter=max_iter,
|
||||
tolerance_grad= 0.0000001, # float32的有效位数是7位
|
||||
tolerance_change=0.0000001,
|
||||
**kwargs)
|
||||
elif optim_type == 'adam':
|
||||
optimizer = torch.optim.Adam(opt_params, lr=lr, betas=betas, weight_decay=weight_decay)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return optimizer
|
||||
|
||||
def grad_require(params, flag=False):
|
||||
if isinstance(params, list):
|
||||
for par in params:
|
||||
par.requires_grad = flag
|
||||
elif isinstance(params, dict):
|
||||
for key, par in params.items():
|
||||
par.requires_grad = flag
|
||||
|
||||
def make_closure(optimizer, model, params, infos, loss, device):
|
||||
loss_func = {}
|
||||
for key, val in loss.items():
|
||||
loss_func[key] = load_object(val['module'], val['args'])
|
||||
if isinstance(loss_func[key], nn.Module):
|
||||
loss_func[key].to(device)
|
||||
|
||||
def closure(debug=False):
|
||||
optimizer.zero_grad()
|
||||
new_params = params.copy()
|
||||
output = model(new_params)
|
||||
loss_dict = {}
|
||||
loss_weight = {key:loss[key].weight for key in loss_func.keys()}
|
||||
for key, func in loss_func.items():
|
||||
output_ = {k: output[k] for k in loss[key].key_from_output}
|
||||
infos_ = {k: infos[k] for k in loss[key].key_from_infos}
|
||||
loss_now = func(output_, infos_)
|
||||
if isinstance(loss_now, dict):
|
||||
for k, _loss in loss_now.items():
|
||||
loss_dict[key+'_'+k] = _loss
|
||||
loss_weight[key+'_'+k] = loss_weight[key]
|
||||
loss_weight.pop(key)
|
||||
else:
|
||||
loss_dict[key] = loss_now
|
||||
loss_sum = sum([loss_dict[key]*loss_weight[key]
|
||||
for key in loss_dict.keys()])
|
||||
# for key in loss_dict.keys():
|
||||
# print(key, loss_dict[key] * loss_weight[key])
|
||||
# print(loss_sum)
|
||||
if debug:
|
||||
return loss_dict, loss_weight
|
||||
loss_sum.backward()
|
||||
return loss_sum
|
||||
return closure
|
||||
|
||||
def rel_change(prev_val, curr_val):
|
||||
return (prev_val - curr_val) / max([1e-5, abs(prev_val), abs(curr_val)])
|
||||
|
||||
class Optimizer:
|
||||
def __init__(self, optimize_keys, optimizer_args, loss) -> None:
|
||||
self.optimize_keys = optimize_keys
|
||||
self.optimizer_args = optimizer_args
|
||||
self.loss = loss
|
||||
self.used_infos = []
|
||||
for key, val in loss.items():
|
||||
self.used_infos.extend(val.key_from_infos)
|
||||
self.used_infos = list(set(self.used_infos))
|
||||
|
||||
def log_loss(self, iter_, closure, print_loss=False):
|
||||
if iter_ % 10 == 0 or print_loss:
|
||||
with torch.no_grad():
|
||||
loss_dict, loss_weight = closure(debug=True)
|
||||
print('{:-6d}: '.format(iter_) + ' '.join([key + ' %7.4f'%(loss_dict[key].item()*loss_weight[key]) for key in loss_dict.keys()]))
|
||||
|
||||
def optimizer_step(self, optimizer, closure):
|
||||
prev_loss = None
|
||||
self.log_loss(0, closure, True)
|
||||
for iter_ in range(1, 1000):
|
||||
loss = optimizer.step(closure)
|
||||
# check the loss
|
||||
if torch.isnan(loss).sum() > 0:
|
||||
print('[optimize] NaN loss value, stopping!')
|
||||
break
|
||||
if torch.isinf(loss).sum() > 0:
|
||||
print('[optimize] Infinite loss value, stopping!')
|
||||
break
|
||||
# check the delta
|
||||
if iter_ > 0 and prev_loss is not None:
|
||||
loss_rel_change = rel_change(prev_loss, loss.item())
|
||||
if loss_rel_change <= 0.0000001:
|
||||
break
|
||||
self.log_loss(iter_, closure)
|
||||
prev_loss = loss.item()
|
||||
self.log_loss(iter_, closure, True)
|
||||
return True
|
||||
|
||||
def __call__(self, params, model, **infos):
|
||||
"""
|
||||
待优化变量一定要在params中,但params中不一定会被优化
|
||||
infos中的变量不一定会被优化
|
||||
"""
|
||||
# TODO: 应该使用model的device,但考虑到model可能是一个函数,所以暂时当场计算
|
||||
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||
params = dict_of_numpy_to_tensor(params, device=device)
|
||||
infos_used = {key: infos[key] for key in self.used_infos if key in infos.keys()}
|
||||
infos_used = dict_of_numpy_to_tensor(infos_used, device=device)
|
||||
|
||||
log('[{}] Optimize {}'.format(self.__class__.__name__, self.optimize_keys))
|
||||
log('[{}] Loading {}'.format(self.__class__.__name__, self.used_infos))
|
||||
opt_params = {}
|
||||
for key in self.optimize_keys:
|
||||
if key in infos.keys(): # 优化的参数
|
||||
opt_params[key] = infos_used[key]
|
||||
elif key in params.keys():
|
||||
opt_params[key] = params[key]
|
||||
else:
|
||||
raise ValueError('{} is not in infos or body_params'.format(key))
|
||||
for key, val in opt_params.items():
|
||||
infos_used['init_'+key] = val.clone()
|
||||
optimizer = make_optimizer(opt_params, **self.optimizer_args)
|
||||
closure = make_closure(optimizer, model, params, infos_used, self.loss, device)
|
||||
# 准备开始优化
|
||||
grad_require(opt_params, True)
|
||||
self.optimizer_step(optimizer, closure)
|
||||
grad_require(opt_params, False)
|
||||
# 直接返回
|
||||
ret = {
|
||||
'params': params
|
||||
}
|
||||
for key in self.optimize_keys:
|
||||
if key in infos.keys():
|
||||
ret[key] = opt_params[key]
|
||||
ret = dict_of_tensor_to_numpy(ret)
|
||||
return ret
|
295
myeasymocap/operations/select.py
Normal file
295
myeasymocap/operations/select.py
Normal file
@ -0,0 +1,295 @@
|
||||
from typing import Any
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
LOG_FILE = 'log_hand_select.txt'
|
||||
LOG_LEVEL = 2 #0 2
|
||||
FULL_LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 1 else (lambda x: None)
|
||||
LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 0 else (lambda x: None)
|
||||
|
||||
def views_from_dimGroups(dimGroups):
|
||||
views = np.zeros(dimGroups[-1], dtype=np.int)
|
||||
for nv in range(len(dimGroups) - 1):
|
||||
views[dimGroups[nv]:dimGroups[nv+1]] = nv
|
||||
return views
|
||||
|
||||
class Select_Views:
|
||||
def __init__(self, camtoworld, handtype) -> None:
|
||||
self.camtoworld = camtoworld
|
||||
self.results = []
|
||||
self.DIST_MAX = 50
|
||||
self.threshold = 2
|
||||
self.handtype = handtype
|
||||
|
||||
self.threshold2 = 0.3
|
||||
|
||||
self.count = 0
|
||||
self.mode = 0 #[0,1] 0-sum 1-max&sum
|
||||
|
||||
def cvt_Rh_Rot(self, Rh):
|
||||
import cv2
|
||||
RotList = []
|
||||
for i in range(Rh.shape[0]):
|
||||
RotList.append(cv2.Rodrigues(Rh[i])[0])
|
||||
return np.stack(RotList)
|
||||
|
||||
def get_dis_Rh(self, Rh1, Rh2):
|
||||
rh_dis = (self.cvt_Rh_Rot(Rh1) - self.cvt_Rh_Rot(Rh2))**2
|
||||
return rh_dis.sum(axis=(1,2))
|
||||
|
||||
def match_with_lastframe(self, lastpose, new_poses):
|
||||
# breakpoint()
|
||||
if self.mode==0:
|
||||
rh_dis = self.get_dis_Rh(np.array(new_poses)[:,:3], lastpose[None][:,:3])
|
||||
dis = ((np.array(new_poses)[:,3:] - lastpose[None][:,3:])**2).sum(axis=1)
|
||||
dis+=rh_dis
|
||||
minid = np.argmin(dis)
|
||||
return new_poses[minid], dis[minid], minid, dis
|
||||
else:
|
||||
# breakpoint()
|
||||
dis1 = ((np.array(new_poses) - lastpose[None])**2).sum(axis=1)
|
||||
dis2 = ((np.array(new_poses) - lastpose[None])**2).max(axis=1)
|
||||
dis = np.stack([dis2,dis1]).T
|
||||
val_idx = dis[:,0]<self.threshold2
|
||||
dis = dis[val_idx,:]
|
||||
if(len(dis)==0):
|
||||
dis = ((np.array(new_poses) - lastpose[None])**2).sum(axis=1)
|
||||
minid = np.argmin(dis1)
|
||||
mindis = dis[minid]
|
||||
return new_poses[minid], mindis, minid, dis
|
||||
|
||||
else:
|
||||
minid = np.argmin(dis[:,1])
|
||||
mindis = dis[minid,1]
|
||||
# breakpoint()
|
||||
# minid = val_idx[minid]
|
||||
return np.array(new_poses)[val_idx,:][minid], mindis, minid, dis
|
||||
|
||||
|
||||
# breakpoint()
|
||||
# dis = ((np.array(new_poses) - lastpose[None])**2).sum(axis=1)
|
||||
# minid = np.argmin(dis)
|
||||
# return new_poses[minid], mindis, minid, dis
|
||||
|
||||
def calculate_aff(self, poseslist, DIST_MAX):
|
||||
#TODO Rh的距离不能这么求,最好是转成Rot再求误差
|
||||
M = len(poseslist)
|
||||
distance = np.zeros((M, M), dtype=np.float32)
|
||||
for id0 in range(M):
|
||||
for id1 in range(id0+1,M):
|
||||
p0 = poseslist[id0]
|
||||
p1 = poseslist[id1]
|
||||
dis = ((p0-p1)**2).sum()
|
||||
distance[id0,id1]=dis
|
||||
distance[id1,id0]=dis
|
||||
DIST_MAX = max(DIST_MAX, distance.max())
|
||||
# breakpoint()
|
||||
# return distance
|
||||
for nv in range(M):
|
||||
distance[nv,nv]=DIST_MAX
|
||||
# for nv in range(nViews):
|
||||
# distance[dimGroups[nv]:dimGroups[nv+1], dimGroups[nv]:dimGroups[nv+1]] = DIST_MAX
|
||||
distance -= np.eye(M) * DIST_MAX
|
||||
aff = (DIST_MAX - distance)/DIST_MAX
|
||||
aff = np.clip(aff, 0, 1)
|
||||
return aff
|
||||
|
||||
def Hierarchical_Cluster(self, data,threshold=2):
|
||||
# import matplotlib.pyplot as plt
|
||||
# breakpoint()
|
||||
if(len(data)==1):
|
||||
return [[0]]
|
||||
import scipy.cluster.hierarchy as sch
|
||||
|
||||
out = sch.linkage(data , method = 'ward')
|
||||
ret=[]
|
||||
vis=[]
|
||||
for i in range(len(data)):
|
||||
ret.append([i])
|
||||
vis.append(0)
|
||||
|
||||
for i in range(out.shape[0]):
|
||||
if(out[i][2]>threshold):
|
||||
break
|
||||
id1 = int(out[i][0])
|
||||
id2 = int(out[i][1])
|
||||
vis[id1]=1
|
||||
vis[id2]=1
|
||||
vis.append(0)
|
||||
ret.append(ret[id1]+ret[id2])
|
||||
|
||||
groups = []
|
||||
for i in range(len(ret)):
|
||||
if vis[i]==1:
|
||||
continue
|
||||
groups.append(ret[i])
|
||||
|
||||
return groups
|
||||
|
||||
def aff_to_groups(data, affinity, dimGroups, prev_id):
|
||||
sum1 = np.zeros((affinity.shape[0]))
|
||||
for i in range(len(dimGroups)-1):
|
||||
start, end = dimGroups[i], dimGroups[i+1]
|
||||
if end == start:continue
|
||||
sum1 += affinity[:, start:end].max(axis=-1)
|
||||
n2d = affinity.shape[0]
|
||||
nViews = len(dimGroups) - 1
|
||||
idx_zero = np.zeros(nViews, dtype=np.int) - 1
|
||||
views = views_from_dimGroups(dimGroups)
|
||||
# the assigned results of each person
|
||||
p2dAssigned = np.zeros(n2d, dtype=np.int) - 1
|
||||
visited = np.zeros(n2d, dtype=np.int)
|
||||
sortidx = np.argsort(-sum1)
|
||||
pid = 0
|
||||
k3dresults = []
|
||||
breakpoint()
|
||||
|
||||
return k3dresults
|
||||
|
||||
|
||||
|
||||
def __call__(self, posel , cameras, match3d_l):
|
||||
hand_list=[]
|
||||
# breakpoint()
|
||||
for pid in range(len(match3d_l)):
|
||||
dt = match3d_l[pid]
|
||||
|
||||
Merge_list=[]
|
||||
Merge_list_rot = []
|
||||
|
||||
if(isinstance(dt,int)):
|
||||
# TODO:处理-1的情况,也就是没有找到合适的匹配到的手
|
||||
# hand_list.append(np.zeros((48,)))
|
||||
Merge_list_rot.append(np.zeros((54,)))
|
||||
# continue
|
||||
else:
|
||||
for cid in range(len(dt['views'])):
|
||||
nv = dt['views'][cid]
|
||||
poseid = dt['indices'][cid]
|
||||
pose = posel[nv][poseid].copy()
|
||||
|
||||
if self.camtoworld:
|
||||
Rh = pose[:,:3].copy()
|
||||
invR = np.linalg.inv(cameras['R'][nv])
|
||||
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
|
||||
Rh_m_new = invR @ Rh_m_old
|
||||
Rh = cv2.Rodrigues(Rh_m_new)[0]
|
||||
Merge_list.append(np.hstack((Rh.reshape(3),pose[:,3:].reshape(-1))))
|
||||
# breakpoint()
|
||||
Merge_list_rot.append(np.hstack((np.array(Rh_m_new).reshape(-1),pose[:,3:].reshape(-1))))
|
||||
|
||||
else:
|
||||
Merge_list.append(pose.reshape(-1))
|
||||
|
||||
Rh = pose[:,:3].copy()
|
||||
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
|
||||
Merge_list_rot.append(np.hstack((np.array(Rh_m_old).reshape(-1),pose[:,3:].reshape(-1))))
|
||||
|
||||
#将坐标系转换,及视角选择完的pose整理成新的集合。
|
||||
|
||||
|
||||
# breakpoint()
|
||||
# self.count, pid, self.handtype, str(groups), (0,1) 0的话是,选了哪一组? 1 xuanle怎么选择的
|
||||
#用层次聚类的方法进行视角的选择
|
||||
# groups = self.Hierarchical_Cluster(Merge_list, self.threshold)
|
||||
|
||||
groups = self.Hierarchical_Cluster(Merge_list_rot, self.threshold)
|
||||
|
||||
|
||||
# #求亲和矩阵,即任意两个pose之间的距离。
|
||||
# affinity = self.calculate_aff(Merge_list,self.DIST_MAX)
|
||||
# N2D = affinity.shape[0]
|
||||
# prev_id = np.zeros(N2D) - 1
|
||||
# dims = [1]*N2D
|
||||
# dimGroups = np.cumsum([0] + dims)
|
||||
# groups = self.aff_to_groups(Merge_list, affinity, dimGroups, prev_id)
|
||||
# # #根据亲和矩阵进行分组,这里可以考虑将分组的结果Merge起来。
|
||||
# groups = []
|
||||
|
||||
FULL_LOG('[select views] frame:{}, pid:{}, handtype:{}'.format(self.count, pid, self.handtype))
|
||||
FULL_LOG('[groups] groups:{}'.format(str(groups)))
|
||||
|
||||
|
||||
#合并分组结果
|
||||
new_poses = []
|
||||
for gp in groups:
|
||||
# merge_pose = np.array(Merge_list)[gp].mean(axis=0)
|
||||
merge_pose = np.array(Merge_list_rot)[gp].mean(axis=0)
|
||||
# breakpoint()
|
||||
|
||||
|
||||
Rot = merge_pose[:9].reshape((3,3))
|
||||
Rh = cv2.Rodrigues(Rot)[0]
|
||||
merge_pose = np.hstack((Rh.reshape(3),merge_pose[9:].reshape(-1)))
|
||||
|
||||
new_poses.append(merge_pose)
|
||||
#多个组,求每个组和上一帧结果之间的距离。(找出上一帧匹配的手,和这帧对应的手)
|
||||
#根据该距离在多个组之间进行选择。选出距离更小的组。
|
||||
# if self.handtype == 'handr':
|
||||
# breakpoint()
|
||||
if (len(self.results)>pid): # False and
|
||||
# TODO 求与前一帧的距离,如果发现距离过大?则尝试重启跟踪?即选择视角最多的
|
||||
pose_, dis, minid, dis_ = self.match_with_lastframe(self.results[pid],new_poses)
|
||||
FULL_LOG('[select 0 ] minid:{}'.format(minid))
|
||||
FULL_LOG('[select 0 ] dis:{}'.format(str(dis_.tolist())))
|
||||
if isinstance(dt,int) or dis_.min()>10: # 没有合适的视角检测到手,或者所有视角检测到的都与上一帧差的很远
|
||||
FULL_LOG('[select 0 ] las pose')
|
||||
pose_ = self.results[pid].copy()
|
||||
else:
|
||||
threshold_=0.3
|
||||
if self.mode==1:
|
||||
threshold_=1
|
||||
if(dis>threshold_):# 超过一定阈值,假定上一帧不是很好,则这帧重选
|
||||
array_len = np.array([len(gp) for gp in groups])
|
||||
a_max = array_len.max()
|
||||
d_max = 500
|
||||
idx=0
|
||||
for gid in range(array_len.shape[0]):
|
||||
# breakpoint()
|
||||
if array_len[gid]==a_max and dis_[gid]<d_max:
|
||||
d_max = dis_[gid]
|
||||
idx=gid
|
||||
# dis_[array_len==a_max
|
||||
# breakpoint()
|
||||
# dis_
|
||||
# idx=np.argmax([len(gp) for gp in groups])
|
||||
pose_ = new_poses[idx].copy()
|
||||
|
||||
FULL_LOG('[select 0 ] max len(groups):{}\n'.format(idx))
|
||||
|
||||
self.results[pid] = pose_.copy()
|
||||
else:
|
||||
#TODO如果没有前一帧的监督,一种可以用所有组的结果进行处理,另外就是可以用数量较多的组的结果
|
||||
#TODO 如果数量相同的有多组,需要进一步处理 比如根据aff求sum最大的?
|
||||
idx=np.argmax([len(gp) for gp in groups])
|
||||
pose_ = new_poses[idx].copy()
|
||||
self.results.append(pose_.copy())
|
||||
|
||||
FULL_LOG('[select 1 ] max len(groups):{}\n'.format(idx))
|
||||
#将结果整理返回,有一组和身体id对应的左手或者右手的Pose集合(在世界坐标系下的),也可以返回Params ,看卡params是个list还是dict?
|
||||
|
||||
hand_list.append(pose_)
|
||||
poses_ = np.stack(hand_list)
|
||||
Rh = poses_[:,:3].copy()
|
||||
poses_[:,:3] = 0
|
||||
params={
|
||||
'Rh':Rh,
|
||||
'Th':np.zeros_like(Rh),
|
||||
'poses':poses_,
|
||||
'shapes':np.zeros((Rh.shape[0],10)),
|
||||
}
|
||||
|
||||
self.count+=1
|
||||
|
||||
return {'params': params}
|
||||
|
||||
class Select_Views_handlr:
|
||||
def __init__(self, camtoworld) -> None:
|
||||
self.camtoworld = camtoworld
|
||||
self.model_l = Select_Views(camtoworld, 'handl')
|
||||
self.model_r = Select_Views(camtoworld, 'handr')
|
||||
|
||||
def __call__(self, posel, poser, match3d_l, match3d_r, cameras) -> Any:
|
||||
params_l = self.model_l(posel, cameras, match3d_l)
|
||||
params_r = self.model_r(poser, cameras, match3d_r)
|
||||
return {'params_l':params_l['params'], 'params_r':params_r['params']}
|
138
myeasymocap/operations/smooth.py
Normal file
138
myeasymocap/operations/smooth.py
Normal file
@ -0,0 +1,138 @@
|
||||
from typing import Any
|
||||
import numpy as np
|
||||
|
||||
class SmoothAny:
|
||||
def __init__(self, window_size) -> None:
|
||||
self.w = window_size
|
||||
|
||||
def __call__(self, value, with_conf=True):
|
||||
wsize = self.w
|
||||
value = value.copy()
|
||||
if with_conf:
|
||||
pos_sum = np.zeros_like(value[:-wsize, ..., :-1])
|
||||
conf_sum = np.zeros_like(value[:-wsize, ..., -1:])
|
||||
else:
|
||||
pos_sum = np.zeros_like(value[:-wsize])
|
||||
for w in range(wsize):
|
||||
if with_conf:
|
||||
pos_sum += value[w:w-wsize, ..., :-1] * value[w:w-wsize, ..., -1:]
|
||||
conf_sum += value[w:w-wsize, ..., -1:]
|
||||
else:
|
||||
pos_sum += value[w:w-wsize]
|
||||
if with_conf:
|
||||
pos_smooth = pos_sum / (1e-5 + conf_sum)
|
||||
value[wsize//2:-wsize//2] = np.dstack([pos_smooth, conf_sum])
|
||||
else:
|
||||
pos_smooth = pos_sum / (wsize)
|
||||
value[wsize//2:-wsize//2] = pos_smooth
|
||||
return value
|
||||
|
||||
class Smooth(SmoothAny):
|
||||
def __call__(self, keypoints3d):
|
||||
return {'keypoints3d': super().__call__(keypoints3d, with_conf=True)}
|
||||
|
||||
class SmoothPoses:
|
||||
def __init__(self, window_size) -> None:
|
||||
self.W = window_size
|
||||
|
||||
def __call__(self, params):
|
||||
# TODO: 这个是使用了padding的
|
||||
poses = params['poses']
|
||||
padding_before = poses[:1].copy().repeat(self.W, 0)
|
||||
padding_after = poses[-1:].copy().repeat(self.W, 0)
|
||||
mean = poses.copy()
|
||||
nFrames = mean.shape[0]
|
||||
poses_full = np.vstack([padding_before, poses, padding_after])
|
||||
for w in range(1, self.W+1):
|
||||
mean += poses_full[self.W-w:self.W-w+nFrames]
|
||||
mean += poses_full[self.W+w:self.W+w+nFrames]
|
||||
mean /= 2*self.W + 1
|
||||
params['poses'] = mean
|
||||
return {'params': params}
|
||||
|
||||
class SmoothRealtime:
|
||||
def __init__(self, opt_name, win_sizes) -> None:
|
||||
# import cv2
|
||||
self.size = {}
|
||||
self.opt_name = opt_name
|
||||
self.smdata={}
|
||||
for idx, name in enumerate(opt_name):
|
||||
self.smdata[name] = []
|
||||
self.size[name] = win_sizes[idx]
|
||||
def cvt_Rh_Rot(self, Rh):
|
||||
import cv2
|
||||
RotList = []
|
||||
Rh = Rh.reshape((-1,3))
|
||||
for i in range(Rh.shape[0]):
|
||||
RotList.append(cv2.Rodrigues(Rh[i])[0])
|
||||
return np.stack(RotList)
|
||||
|
||||
def cvt_Rot_Rh(self, Rot):
|
||||
import cv2
|
||||
RhList = []
|
||||
for i in range(Rot.shape[0]):
|
||||
RhList.append(cv2.Rodrigues(Rot[i])[0].reshape(3))
|
||||
return np.stack(RhList).reshape((1,-1))
|
||||
|
||||
def now_smplh(self):
|
||||
data={}
|
||||
for name in self.opt_name:
|
||||
# if name == 'Rh':
|
||||
if name in ['Rh','poses']:
|
||||
out = (sum(self.smdata[name])/len(self.smdata[name]))
|
||||
data[name] = self.cvt_Rot_Rh(out)
|
||||
else:
|
||||
data[name] = (sum(self.smdata[name])/len(self.smdata[name]))
|
||||
return data
|
||||
def __call__(self, data):
|
||||
# breakpoint()
|
||||
for name in self.opt_name:
|
||||
if name in ['Rh','poses']:
|
||||
self.smdata[name].append(self.cvt_Rh_Rot(data[name].copy()))
|
||||
if len(self.smdata[name])>self.size[name]:
|
||||
self.smdata[name].pop(0)
|
||||
out = (sum(self.smdata[name])/len(self.smdata[name]))
|
||||
data[name] = self.cvt_Rot_Rh(out) #.reshape(1,self.smdata[key][0].shape[-1])
|
||||
else:
|
||||
self.smdata[name].append(data[name].copy())
|
||||
if len(self.smdata[name])>self.size[name]:
|
||||
self.smdata[name].pop(0)
|
||||
data[name] = (sum(self.smdata[name])/len(self.smdata[name])) #.reshape(1,self.smdata[key][0].shape[-1])
|
||||
return data
|
||||
class SmoothHandlr:
|
||||
def __init__(self, opt_name, win_sizes):
|
||||
self.smooth_handl = SmoothRealtime(opt_name, win_sizes)
|
||||
self.smooth_handr = SmoothRealtime(opt_name, win_sizes)
|
||||
def __call__(self, params_l, params_r) -> Any:
|
||||
params_l = self.smooth_handl(params_l)
|
||||
params_r = self.smooth_handr(params_r)
|
||||
return {'params_l': params_l, 'params_r': params_r}
|
||||
|
||||
class SmoothSmplh(SmoothRealtime):
|
||||
def __init__(self, opt_name, win_sizes):
|
||||
self.opt_name = opt_name
|
||||
self.win_sizes = win_sizes
|
||||
self.smooth_lists=[]
|
||||
# self.smooth_smplh = SmoothRealtime(opt_name, win_sizes)
|
||||
def __call__(self, params_smplh):
|
||||
#TODO 应该根据id, 放入到对应的smooth列表中, 长久不在的要删除或者清空,之后把id作为输入,然后smoothlists换成map
|
||||
bz = params_smplh['Rh'].shape[0]
|
||||
while (len(self.smooth_lists)<bz):
|
||||
self.smooth_lists.append(SmoothRealtime(self.opt_name, self.win_sizes))
|
||||
for i in range(bz):
|
||||
param={}
|
||||
for key in params_smplh.keys():
|
||||
param[key] = params_smplh[key][i].reshape(1,-1)
|
||||
out = self.smooth_lists[i](param)
|
||||
for key in params_smplh.keys():
|
||||
params_smplh[key][i] = out[key]
|
||||
# params_smplh = self.smooth_smplh(params_smplh)
|
||||
return {'params_smplh': params_smplh}
|
||||
|
||||
class Smoothkeypoints3d(SmoothRealtime):
|
||||
def __init__(self, opt_name, win_sizes):
|
||||
self.smooth_smplh = SmoothRealtime(opt_name, win_sizes)
|
||||
def __call__(self, keypoints3d):
|
||||
ret = self.smooth_smplh({'keypoints3d':keypoints3d})
|
||||
return ret
|
||||
|
151
myeasymocap/operations/triangulate.py
Normal file
151
myeasymocap/operations/triangulate.py
Normal file
@ -0,0 +1,151 @@
|
||||
import numpy as np
|
||||
from itertools import combinations
|
||||
from easymocap.mytools.camera_utils import Undistort
|
||||
from easymocap.mytools.triangulator import iterative_triangulate
|
||||
|
||||
def batch_triangulate(keypoints_, Pall, min_view=2):
|
||||
""" triangulate the keypoints of whole body
|
||||
|
||||
Args:
|
||||
keypoints_ (nViews, nJoints, 3): 2D detections
|
||||
Pall (nViews, 3, 4): projection matrix of each view
|
||||
min_view (int, optional): min view for visible points. Defaults to 2.
|
||||
|
||||
Returns:
|
||||
keypoints3d: (nJoints, 4)
|
||||
"""
|
||||
# keypoints: (nViews, nJoints, 3)
|
||||
# Pall: (nViews, 3, 4)
|
||||
# A: (nJoints, nViewsx2, 4), x: (nJoints, 4, 1); b: (nJoints, nViewsx2, 1)
|
||||
v = (keypoints_[:, :, -1]>0).sum(axis=0)
|
||||
valid_joint = np.where(v >= min_view)[0]
|
||||
keypoints = keypoints_[:, valid_joint]
|
||||
conf3d = keypoints[:, :, -1].sum(axis=0)/v[valid_joint]
|
||||
# P2: P矩阵的最后一行:(1, nViews, 1, 4)
|
||||
P0 = Pall[None, :, 0, :]
|
||||
P1 = Pall[None, :, 1, :]
|
||||
P2 = Pall[None, :, 2, :]
|
||||
# uP2: x坐标乘上P2: (nJoints, nViews, 1, 4)
|
||||
uP2 = keypoints[:, :, 0].T[:, :, None] * P2
|
||||
vP2 = keypoints[:, :, 1].T[:, :, None] * P2
|
||||
conf = keypoints[:, :, 2].T[:, :, None]
|
||||
Au = conf * (uP2 - P0)
|
||||
Av = conf * (vP2 - P1)
|
||||
A = np.hstack([Au, Av])
|
||||
u, s, v = np.linalg.svd(A)
|
||||
X = v[:, -1, :]
|
||||
X = X / X[:, 3:]
|
||||
# out: (nJoints, 4)
|
||||
result = np.zeros((keypoints_.shape[1], 4))
|
||||
result[valid_joint, :3] = X[:, :3]
|
||||
result[valid_joint, 3] = conf3d #* (conf[..., 0].sum(axis=-1)>min_view)
|
||||
return result
|
||||
|
||||
def project_wo_dist(keypoints, RT, einsum='vab,kb->vka'):
|
||||
homo = np.concatenate([keypoints[..., :3], np.ones_like(keypoints[..., :1])], axis=-1)
|
||||
kpts2d = np.einsum(einsum, RT, homo)
|
||||
depth = kpts2d[..., 2]
|
||||
kpts2d[..., :2] /= kpts2d[..., 2:]
|
||||
return kpts2d, depth
|
||||
|
||||
class SimpleTriangulate:
|
||||
def __init__(self, mode):
|
||||
self.mode = mode
|
||||
|
||||
@staticmethod
|
||||
def undistort(points, cameras):
|
||||
nViews = len(points)
|
||||
pelvis_undis = []
|
||||
for nv in range(nViews):
|
||||
camera = {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}
|
||||
if points[nv].shape[0] > 0:
|
||||
pelvis = Undistort.points(points[nv], camera['K'], camera['dist'])
|
||||
else:
|
||||
pelvis = points[nv].copy()
|
||||
pelvis_undis.append(pelvis)
|
||||
return pelvis_undis
|
||||
|
||||
def __call__(self, keypoints, cameras):
|
||||
'''
|
||||
keypoints: [nViews, nJoints, 3]
|
||||
|
||||
output:
|
||||
keypoints3d: (nJoints, 4)
|
||||
'''
|
||||
keypoints = self.undistort(keypoints, cameras)
|
||||
keypoints = np.stack(keypoints)
|
||||
if self.mode == 'naive':
|
||||
keypoints3d = batch_triangulate(keypoints, cameras['P'])
|
||||
else:
|
||||
keypoints3d, k2d = iterative_triangulate(keypoints, cameras['P'], dist_max=25)
|
||||
return {'keypoints3d': keypoints3d}
|
||||
|
||||
class RobustTriangulate(SimpleTriangulate):
|
||||
def __init__(self, mode, cfg):
|
||||
super().__init__(mode)
|
||||
self.cache_view = {}
|
||||
self.cfg = cfg
|
||||
|
||||
def try_to_triangulate_and_project(self, index, keypoints, cameras):
|
||||
# 选择最好的3个视角
|
||||
P = cameras['P'][index]
|
||||
kpts = keypoints[index][:, None]
|
||||
k3d = batch_triangulate(kpts, P)
|
||||
k2d, depth = project_wo_dist(k3d, P)
|
||||
dist_repro = np.linalg.norm(k2d[..., :2] - kpts[..., :2], axis=-1).mean(axis=-1)
|
||||
return k3d, dist_repro
|
||||
|
||||
def robust_triangulate(self, keypoints, cameras):
|
||||
# 选择最好的3个视角
|
||||
# TODO: 移除不合理的视角
|
||||
nViews = keypoints.shape[0]
|
||||
if nViews not in self.cache_view:
|
||||
views = list(range(nViews))
|
||||
combs = list(combinations(views, self.cfg.triangulate.init_views))
|
||||
combs = np.array(combs)
|
||||
self.cache_view[nViews] = combs
|
||||
combs = self.cache_view[nViews]
|
||||
keypoints_comb = keypoints[combs]
|
||||
conf_sum = keypoints_comb[..., 2].mean(axis=1) * (keypoints_comb[..., 2]>0.05).all(axis=1)
|
||||
comb_sort_id = (-conf_sum).argsort()
|
||||
flag_find_init = False
|
||||
for comb_id in comb_sort_id:
|
||||
if conf_sum[comb_id] < 0.1:
|
||||
break
|
||||
comb = combs[comb_id]
|
||||
k3d, dist_repro = self.try_to_triangulate_and_project(comb, keypoints, cameras)
|
||||
if (dist_repro < self.cfg.triangulate.repro_init).all():
|
||||
flag_find_init = True
|
||||
init = comb.tolist()
|
||||
break
|
||||
if not flag_find_init:
|
||||
print('Cannot find good initialize pair')
|
||||
import ipdb; ipdb.set_trace()
|
||||
view_idxs = (-keypoints[:, -1]).argsort()
|
||||
for view_idx in view_idxs:
|
||||
if view_idx in init:
|
||||
continue
|
||||
if keypoints[view_idx, 2] < 0.1:
|
||||
continue
|
||||
k3d, dist_repro = self.try_to_triangulate_and_project(init+[view_idx], keypoints, cameras)
|
||||
if (dist_repro < self.cfg.triangulate.repro_2d).all():
|
||||
# print('Add view {}'.format(view_idx))
|
||||
init.append(view_idx)
|
||||
return k3d, init
|
||||
|
||||
def __call__(self, keypoints, cameras):
|
||||
"""
|
||||
keypoints: (nViews, nJoints, 3)
|
||||
cameras: (nViews, 3, 4)
|
||||
"""
|
||||
nViews, nJoints, _ = keypoints.shape
|
||||
keypoints_undis = np.stack(self.undistort(keypoints, cameras))
|
||||
# for each points, find good initial pairs
|
||||
points_all = np.zeros((nJoints, 4))
|
||||
keypoints_copy = keypoints.copy()
|
||||
for nj in range(nJoints):
|
||||
point, select_views = self.robust_triangulate(keypoints_undis[:, nj], cameras)
|
||||
points_all[nj:nj+1] = point
|
||||
keypoints_copy[select_views, nj, 2] += 10
|
||||
keypoints_copy[:, nj, 2] = np.clip(keypoints_copy[:, nj, 2]-10, 0, 1)
|
||||
return {'keypoints3d': points_all, 'keypoints_select': keypoints_copy}
|
149
myeasymocap/stages/basestage.py
Normal file
149
myeasymocap/stages/basestage.py
Normal file
@ -0,0 +1,149 @@
|
||||
from typing import Any
|
||||
from easymocap.config import Config, load_object
|
||||
from easymocap.mytools.debug_utils import mywarn, log
|
||||
import numpy as np
|
||||
import time
|
||||
from tabulate import tabulate
|
||||
|
||||
class Timer:
|
||||
def __init__(self, record, verbose) -> None:
|
||||
self.keys = list(record.keys())
|
||||
self.header = self.keys
|
||||
self.verbose = verbose
|
||||
|
||||
def update(self, timer):
|
||||
if not self.verbose:
|
||||
return
|
||||
contents = []
|
||||
for key in self.keys:
|
||||
if key not in timer:
|
||||
contents.append('skip')
|
||||
else:
|
||||
contents.append('{:.3f}s'.format(timer[key]))
|
||||
print(tabulate(headers=self.header, tabular_data=[contents], tablefmt='fancy_grid'))
|
||||
|
||||
class MultiStage:
|
||||
def load_final(self):
|
||||
at_finals = {}
|
||||
for key, val in self._at_final.items():
|
||||
if val['module'] == 'skip':
|
||||
mywarn('Stage {} is not used'.format(key))
|
||||
continue
|
||||
log('[{}] loading {}'.format(self.__class__.__name__, key))
|
||||
model = load_object(val['module'], val['args'])
|
||||
model.output = self.output
|
||||
at_finals[key] = model
|
||||
self.model_finals = at_finals
|
||||
|
||||
def __init__(self, output, at_step, at_final) -> None:
|
||||
log('[{}] writing the results to {}'.format(self.__class__.__name__, output))
|
||||
at_steps = {}
|
||||
for key, val in at_step.items():
|
||||
if val['module'] == 'skip':
|
||||
mywarn('Stage {} is not used'.format(key))
|
||||
continue
|
||||
log('[{}] loading module {}'.format(self.__class__.__name__, key))
|
||||
model = load_object(val['module'], val['args'])
|
||||
model.output = output
|
||||
at_steps[key] = model
|
||||
self.output = output
|
||||
self.model_steps = at_steps
|
||||
self._at_step = at_step
|
||||
self._at_final = at_final
|
||||
self.timer = Timer(at_steps, verbose=False)
|
||||
|
||||
def at_step(self, data, index):
|
||||
ret = {}
|
||||
if 'meta' in data:
|
||||
ret['meta'] = data['meta']
|
||||
timer = {}
|
||||
for key, model in self.model_steps.items():
|
||||
for k in self._at_step[key].get('key_keep', []):
|
||||
ret[k] = data[k]
|
||||
if self._at_step[key].get('skip', False):
|
||||
continue
|
||||
inputs = {}
|
||||
for k in self._at_step[key].get('key_from_data', []):
|
||||
inputs[k] = data[k]
|
||||
for k in self._at_step[key].get('key_from_previous', []):
|
||||
inputs[k] = ret[k]
|
||||
start = time.time()
|
||||
try:
|
||||
output = model(**inputs)
|
||||
except:
|
||||
print('[{}] Error in {}'.format('Stages', key))
|
||||
raise Exception
|
||||
timer[key] = time.time() - start
|
||||
if output is not None:
|
||||
ret.update(output)
|
||||
|
||||
self.timer.update(timer)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def merge_data(infos_all):
|
||||
info0 = infos_all[0]
|
||||
data = {}
|
||||
for key, val in info0.items():
|
||||
data[key] = [info[key] for info in infos_all]
|
||||
if isinstance(val, np.ndarray):
|
||||
try:
|
||||
data[key] = np.stack(data[key])
|
||||
except ValueError:
|
||||
print('[{}] Skip merge {}'.format('Stages', key))
|
||||
pass
|
||||
elif isinstance(val, dict):
|
||||
data[key] = MultiStage.merge_data(data[key])
|
||||
return data
|
||||
|
||||
def at_final(self, infos_all):
|
||||
self.load_final()
|
||||
data = self.merge_data(infos_all)
|
||||
log('Keep keys: {}'.format(list(data.keys())))
|
||||
ret = {}
|
||||
for key, model in self.model_finals.items():
|
||||
for iter_ in range(self._at_final[key].get('repeat', 1)):
|
||||
inputs = {}
|
||||
for k in self._at_final[key].get('key_from_data', []):
|
||||
inputs[k] = data[k]
|
||||
for k in self._at_final[key].get('key_from_previous', []):
|
||||
inputs[k] = ret[k]
|
||||
try:
|
||||
output = model(**inputs)
|
||||
except:
|
||||
print('[{}] Error in {}'.format('Stages', key))
|
||||
raise Exception
|
||||
if output is not None:
|
||||
ret.update(output)
|
||||
return ret
|
||||
|
||||
class StageForFittingEach:
|
||||
def __init__(self, stages, keys_keep) -> None:
|
||||
stages_ = {}
|
||||
for key, val in stages.items():
|
||||
if val['module'] == 'skip':
|
||||
mywarn('Stage {} is not used'.format(key))
|
||||
continue
|
||||
model = load_object(val['module'], val['args'])
|
||||
stages_[key] = model
|
||||
self.stages = stages_
|
||||
self.stages_args = stages
|
||||
self.keys_keep = keys_keep
|
||||
|
||||
def __call__(self, results, **ret):
|
||||
for pid, result in results.items():
|
||||
ret0 = {}
|
||||
ret0.update(ret)
|
||||
for key, stage in self.stages.items():
|
||||
for iter_ in range(self.stages_args[key].get('repeat', 1)):
|
||||
inputs = {}
|
||||
for k in self.stages_args[key].get('key_from_data', []):
|
||||
inputs[k] = result[k]
|
||||
for k in self.stages_args[key].get('key_from_previous', []):
|
||||
inputs[k] = ret0[k]
|
||||
output = stage(**inputs)
|
||||
if output is not None:
|
||||
ret0.update(output)
|
||||
for key in self.keys_keep:
|
||||
result[key] = ret0[key]
|
||||
return {'results': results}
|
46
myeasymocap/stages/collect.py
Normal file
46
myeasymocap/stages/collect.py
Normal file
@ -0,0 +1,46 @@
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
class CheckFramePerson:
|
||||
def __init__(self, key) -> None:
|
||||
self.key = key
|
||||
self.pids = []
|
||||
self.frames = 0
|
||||
|
||||
def __call__(self, keypoints3d, pids):
|
||||
k3d_, pid_ = [], []
|
||||
for i, pid in enumerate(pids):
|
||||
if pid not in self.pids:
|
||||
if self.frames == 0:
|
||||
print('[{}]/{:06d} Add person {}'.format(self.__class__.__name__, self.frames, pid))
|
||||
self.pids.append(pid)
|
||||
else:
|
||||
continue
|
||||
k3d_.append(keypoints3d[i])
|
||||
pid_.append(pid)
|
||||
self.frames += 1
|
||||
k3d_ = np.stack(k3d_)
|
||||
return {
|
||||
'keypoints3d': k3d_,
|
||||
'pids': pid_
|
||||
}
|
||||
|
||||
class CollectMultiPersonMultiFrame:
|
||||
def __init__(self, key) -> None:
|
||||
self.key = key
|
||||
|
||||
def __call__(self, keypoints3d, pids):
|
||||
records = {}
|
||||
for frame in tqdm(range(len(pids)), desc='Reading'):
|
||||
pid_frame = pids[frame]
|
||||
for i, pid in enumerate(pid_frame):
|
||||
if pid not in records:
|
||||
records[pid] = {
|
||||
'frames': [],
|
||||
'keypoints3d': []
|
||||
}
|
||||
records[pid]['frames'].append(frame)
|
||||
records[pid]['keypoints3d'].append(keypoints3d[frame][i])
|
||||
for pid, record in records.items():
|
||||
record['keypoints3d'] = np.stack(record['keypoints3d']).astype(np.float32)
|
||||
return {'results': records}
|
@ -6,5 +6,7 @@ yacs
|
||||
tabulate
|
||||
termcolor
|
||||
chumpy
|
||||
mediapipe
|
||||
func_timeout
|
||||
mediapipe==0.10.0
|
||||
func_timeout
|
||||
ultralytics
|
||||
gdown
|
||||
|
13
setup.py
13
setup.py
@ -21,8 +21,19 @@ setup(
|
||||
'easymocap.pyfitting',
|
||||
'easymocap.mytools',
|
||||
'easymocap.annotator',
|
||||
'easymocap.estimator'
|
||||
'easymocap.estimator',
|
||||
'myeasymocap'
|
||||
],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'emc=apps.mocap.run:main_entrypoint',
|
||||
# 'easymocap_calib=easymocap.mytools.entry:calib',
|
||||
# 'easymocap_tools=easymocap.mytools.entry:main',
|
||||
# 'extract_keypoints=easymocap.mytools.cmdtools.extract_keypoints:main'
|
||||
],
|
||||
},
|
||||
install_requires=[],
|
||||
data_files = []
|
||||
)
|
||||
|
||||
emc = "apps.mocap.run:main_entrypoint"
|
||||
|
Loading…
Reference in New Issue
Block a user