🚀 update to v0.3

This commit is contained in:
shuaiqing 2023-06-19 16:39:27 +08:00
parent b44fa3c90b
commit e7800a1356
39 changed files with 6218 additions and 5 deletions

6
.gitignore vendored
View File

@ -112,5 +112,7 @@ data/**
.DS*
code_deprecate
code
neuralbody
lightning_logs
# neuralbody
lightning_logs
models
yolov5m.pt

96
apps/mocap/run.py Normal file
View File

@ -0,0 +1,96 @@
# 这个脚本提供mocap的基本运行接口
import os
from easymocap.config import Config, load_object
from tqdm import tqdm
def process(dataset, model):
ret_all = []
print('[Run] dataset has {} samples'.format(len(dataset)))
for i in tqdm(range(len(dataset)), desc='[Run]'):
data = dataset[i]
ret = model.at_step(data, i)
ret_all.append(ret)
ret_all = model.at_final(ret_all)
def update_data_by_args(cfg_data, args):
if args.root is not None:
cfg_data.args.root = args.root
if args.subs is not None:
cfg_data.args.subs = args.subs
if args.subs_vis is not None:
cfg_data.args.subs_vis = args.subs_vis
if args.ranges is not None:
cfg_data.args.ranges = args.ranges
if args.cameras is not None:
cfg_data.args.reader.cameras.root = args.cameras
if args.skip_vis:
cfg_data.args.subs_vis = []
return cfg_data
def update_exp_by_args(cfg_exp, args):
opts_alias = []
if 'alias' in cfg_exp.keys():
for i in range(len(args.opt_exp)//2):
if args.opt_exp[i*2] in cfg_exp.alias.keys():
opts_alias.append(cfg_exp.alias[args.opt_exp[i*2]])
opts_alias.append(args.opt_exp[i*2+1])
cfg_exp.merge_from_list(opts_alias)
if args.skip_vis:
for key, val in cfg_exp.args.at_step.items():
if key.startswith('vis'):
val.skip = True
def load_cfg_from_file(cfg, args):
cfg = Config.load(cfg)
cfg_data = Config.load(cfg.data)
cfg_data.args.merge_from_other_cfg(cfg.data_opts)
cfg_data = update_data_by_args(cfg_data, args)
cfg_exp = Config.load(cfg.exp)
cfg_exp.args.merge_from_other_cfg(cfg.exp_opts)
update_exp_by_args(cfg_exp, args)
return cfg_data, cfg_exp
def load_cfg_from_cmd(args):
cfg_data = Config.load(args.data, args.opt_data)
cfg_data = update_data_by_args(cfg_data, args)
cfg_exp = Config.load(args.exp, args.opt_exp)
update_exp_by_args(cfg_exp, args)
return cfg_data, cfg_exp
def main_entrypoint():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default=None)
for name in ['data', 'exp']:
parser.add_argument('--{}'.format(name), type=str, required=False)
parser.add_argument('--opt_{}'.format(name), type=str, nargs='+', default=[])
parser.add_argument('--root', type=str, default=None)
parser.add_argument('--subs', type=str, default=None, nargs='+')
parser.add_argument('--subs_vis', type=str, default=None, nargs='+')
parser.add_argument('--ranges', type=int, default=None, nargs=3)
parser.add_argument('--cameras', type=str, default=None, help='Camera file path')
parser.add_argument('--out', type=str, default=None)
parser.add_argument('--skip_vis', action='store_true')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
if args.cfg is not None:
cfg_data, cfg_exp = load_cfg_from_file(args.cfg, args)
else:
cfg_data, cfg_exp = load_cfg_from_cmd(args)
if args.out is not None:
cfg_exp.args.output = args.out
out = cfg_exp.args.output
os.makedirs(out, exist_ok=True)
print(cfg_data, file=open(os.path.join(out, 'cfg_data.yml'), 'w'))
print(cfg_exp, file=open(os.path.join(out, 'cfg_exp.yml'), 'w'))
dataset = load_object(cfg_data.module, cfg_data.args)
print(dataset)
model = load_object(cfg_exp.module, cfg_exp.args)
process(dataset, model)
if __name__ == '__main__':
main_entrypoint()

139
config/1v1p/fixhand.yml Normal file
View File

@ -0,0 +1,139 @@
smooth: &smooth_keypoints
weight: 50.
module: myeasymocap.operations.loss.Smooth
key_from_output: [keypoints, Th]
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
args:
keys: [Th, keypoints]
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
norm: [l2, l2]
order: [2, 2]
weights: [1000., 1000.]
window_weight: [0.5, 0.3, 0.1, 0.1]
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/sv1p_hand_fix
at_step:
detect_by_mediapipe:
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
key_from_data: [images, imgnames]
args:
ckpt: models/mediapipe/hand_landmarker.task
hand2d:
module: myeasymocap.backbone.hand2d.hand2d.MyHand2D
key_from_data: [images, imgnames]
key_from_previous: [bbox]
args:
# ckpt: /nas/public/EasyMocapModels/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
mode: resnet
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints, bbox]
args:
name: vis_keypoints2d
scale: 0.5
infer_mano: #
module: myeasymocap.backbone.hmr.hmr.MyHMR
key_from_data: [images, imgnames]
key_from_previous: [bbox]
key_keep: [meta, cameras, imgnames] # 将这些参数都保留到最后的输出中
args:
ckpt: models/manol_pca45_noflat.ckpt
# TODO: add visualize for Init MANO
at_final:
load_hand_model: # 载入身体模型
module: myeasymocap.io.model.MANOLoader
args:
cfg_path: config/model/mano.yml
model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
num_pca_comps: 45
use_pca: True
use_flat_mean: False
# 这个模块返回两个内容body_model, model; 其中的body_model是用来进行可视化的
mean_param: # 初始化姿态这里将poses和shapes都进行平均
module: myeasymocap.operations.init.MeanShapes
key_from_data: [params]
args:
keys: ['poses', 'shapes']
init_T: # 初始化每一帧的位置
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras, params]
key_from_previous: [model]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Th]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l2
smooth: *smooth_keypoints
init_R: # 初始化每一帧的旋转
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Rh]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l2
smooth: *smooth_keypoints
refine_poses: # 优化poses
repeat: 2
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [poses, shapes, Rh, Th]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l1
reg:
weight: 0.001
module: myeasymocap.operations.loss.RegLoss
key_from_output: [poses]
key_from_infos: []
args:
key: poses
norm: l2
smooth: *smooth_keypoints
write:
module: myeasymocap.io.write.WriteSMPL
key_from_data: [meta]
key_from_previous: [params, model]
args:
name: smpl
render:
module: myeasymocap.io.vis3d.Render_multiview
key_from_data: [cameras, imgnames]
key_from_previous: [hand_model, params]
args:
model_name: hand_model
backend: pyrender
view_list: [0]
scale: 0.5
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 50
keep_image: False

View File

@ -0,0 +1,139 @@
smooth: &smooth_keypoints
weight: 50.
module: myeasymocap.operations.loss.Smooth
key_from_output: [keypoints, Th]
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
args:
keys: [Th, keypoints]
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
norm: [l2, l2]
order: [2, 2]
weights: [1000., 1000.]
window_weight: [0.5, 0.3, 0.1, 0.1]
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/sv1p_hand
at_step:
detect_by_mediapipe:
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
key_from_data: [images, imgnames]
args:
ckpt: models/mediapipe/hand_landmarker.task
hand2d:
module: myeasymocap.backbone.hand2d.hand2d.MyHand2D
key_from_data: [images, imgnames]
key_from_previous: [bbox]
args:
# ckpt: /nas/public/EasyMocapModels/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
mode: resnet
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints, bbox]
args:
name: vis_keypoints2d
scale: 0.5
infer_mano: #
module: myeasymocap.backbone.hmr.hmr.MyHMR
key_from_data: [images, imgnames]
key_from_previous: [bbox]
key_keep: [meta, cameras, imgnames] # 将这些参数都保留到最后的输出中
args:
ckpt: models/manol_pca45_noflat.ckpt
# TODO: add visualize for Init MANO
at_final:
load_hand_model: # 载入身体模型
module: myeasymocap.io.model.MANOLoader
args:
cfg_path: config/model/mano.yml
model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
num_pca_comps: 45
use_pca: True
use_flat_mean: False
# 这个模块返回两个内容body_model, model; 其中的body_model是用来进行可视化的
mean_param: # 初始化姿态这里将poses和shapes都进行平均
module: myeasymocap.operations.init.MeanShapes
key_from_data: [params]
args:
keys: ['shapes']
init_T: # 初始化每一帧的位置
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras, params]
key_from_previous: [model]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Th]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l2
smooth: *smooth_keypoints
init_R: # 初始化每一帧的旋转
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Rh]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l2
smooth: *smooth_keypoints
refine_poses: # 优化poses
repeat: 2
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [poses, shapes, Rh, Th]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l1
reg:
weight: 0.001
module: myeasymocap.operations.loss.RegLoss
key_from_output: [poses]
key_from_infos: []
args:
key: poses
norm: l2
smooth: *smooth_keypoints
write:
module: myeasymocap.io.write.WriteSMPL
key_from_data: [meta]
key_from_previous: [params, model]
args:
name: smpl
render:
module: myeasymocap.io.vis3d.Render_multiview
key_from_data: [cameras, imgnames]
key_from_previous: [hand_model, params]
args:
model_name: hand_model
backend: pyrender
view_list: [0]
scale: 0.5
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 50
keep_image: False

View File

@ -0,0 +1,147 @@
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/sv1p # 指定输出路径
at_step:
detect:
module: myeasymocap.backbone.yolo.yolo.YoloWithTrack
key_from_data: [images, imgnames]
args:
model: yolov5m
name: person
keypoints2d:
module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
key_from_data: [images, imgnames]
key_from_previous: [bbox]
key_keep: []
args:
ckpt: /nas/home/shuaiqing/Code/EasyMocapPublic/data/models/pose_hrnet_w48_384x288.pth
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints, bbox]
args:
name: vis_keypoints2d
scale: 0.5
infer: # 这个模块给定图片和检测的框直接返回crop系下的人体姿态
module: myeasymocap.backbone.pare.pare.MyPARE
key_from_data: [images, imgnames] # 从数据集中读入的bbox、图片、图片名图片名用于保存结果
key_from_previous: [bbox]
key_keep: [cameras, imgnames] # 将这些参数都保留到最后的输出中
args:
ckpt: 3dpw # 指定使用3dpw的预训练模型
at_final:
load_body_model: # 载入SMPL模型
module: myeasymocap.io.model.SMPLLoader
args:
model_path: models/pare/data/body_models/smpl/SMPL_NEUTRAL.pkl
regressor_path: models/J_regressor_body25.npy
init_translation: # 给定crop系下的姿态、2D关键点、相机参数返回世界系下的人体姿态
module: myeasymocap.operations.init.InitTranslation
key_from_data: [keypoints, cameras, params] # 读入关键点、相机参数、SMPL参数
key_from_previous: [body_model] # 读入SMPL模型进行关键点计算
args:
solve_T: True
solve_R: False
smooth: # 对初始化的结果进行平滑
module: myeasymocap.operations.smooth.SmoothPoses
key_from_data: [params]
args:
window_size: 2
mean_param: # Mean shapes
module: myeasymocap.operations.init.MeanShapes
key_from_data: [params]
args:
keys: ['shapes']
init_RT:
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Th, Rh]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: l2
index_est: [2, 5, 9, 12]
index_gt: [2, 5, 9, 12]
smooth:
weight: 1.
module: myeasymocap.operations.loss.Smooth
key_from_output: [Rh, Th]
key_from_infos: [cameras] # TODO: 根据2D的置信度来计算smooth权重
args:
keys: [Th, Th]
smooth_type: [Linear, Depth] # 这个depth似乎需要相机参数进行转换
norm: [l2, l2]
order: [2, 2]
weights: [100., 1000.]
window_weight: [0.5, 0.3, 0.1, 0.1]
refine_poses:
repeat: 2
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, cameras]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [poses, Rh, Th]
loss:
repro:
weight: 100.
module: myeasymocap.operations.loss.Keypoints2D
key_from_output: [keypoints]
key_from_infos: [keypoints, cameras]
args:
norm: gm
norm_info: 0.02
smooth:
weight: 1.
module: myeasymocap.operations.loss.Smooth
key_from_output: [poses, Rh, Th, keypoints]
key_from_infos: [cameras] # TODO: 根据2D的置信度来计算smooth权重
args:
keys: [Th, Th, poses, keypoints]
smooth_type: [Linear, Depth, Linear, Linear] # 这个depth似乎需要相机参数进行转换
norm: [l2, l2, l2, l2]
order: [2, 2, 2, 2]
weights: [100., 1000., 50., 100.]
window_weight: [0.5, 0.3, 0.1, 0.1]
init:
weight: 1.
module: myeasymocap.operations.loss.Init
key_from_output: [poses]
key_from_infos: [init_poses]
args:
keys: [poses]
norm: l2
weights: [1.]
prior:
weight: 0.1
module: easymocap.multistage.gmm.GMMPrior
key_from_output: [poses]
key_from_infos: []
args:
start: 0
end: 69
write:
module: myeasymocap.io.write.WriteSMPL
key_from_data: [meta]
key_from_previous: [params, model]
args:
name: smpl
render:
module: myeasymocap.io.vis3d.Render
key_from_data: [cameras, imgnames]
key_from_previous: [params, body_model]
args:
backend: pyrender
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 30
keep_image: False

View File

@ -0,0 +1,16 @@
module: myeasymocap.datasets.mv1p.MVDataset
args:
root: TO_BE_FILLED
subs: [] # used views, default all views
subs_vis: ['01'] # visualized views
ranges: [0, 10000, 1]
read_image: True
reader:
images:
root: images
ext: .jpg
image_shape:
root: images
ext: .jpg
cameras:
root: ''

View File

@ -0,0 +1,13 @@
module: myeasymocap.datasets.sv1p.SVDataset
args:
root: TO_BE_FILLED
subs: ['video'] # 指定路径下的其中一个文件夹
ranges: [0, 10000, 1] # 指定使用的数据的范围
read_image: True # 后面会使用CNN来进行SMPL参数估计所以需要读入图片
reader:
images:
root: images
ext: .jpg
image_shape:
root: images
ext: .jpg

View File

@ -0,0 +1,50 @@
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/detect_hand_triangulate
at_step:
detect:
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
key_from_data: [images, imgnames]
args:
ckpt: models/mediapipe/hand_landmarker.task
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints]
args:
name: vis_keypoints2d
scale: 0.5
triangulate:
module: myeasymocap.operations.triangulate.SimpleTriangulate
key_from_data: [cameras]
key_from_previous: [keypoints]
key_keep: [cameras] # 用于最后的一起优化
args:
mode: iterative # [naive, iterative]
visualize:
module: myeasymocap.io.vis.Vis3D
key_from_data: [images, cameras]
key_from_previous: [keypoints3d] # 用于最后的一起优化
args:
scale: 1.
mode: crop
mode_args:
- [0, 720, 100, 820]
- [0, 720, 100, 820]
- [0, 720, 400, 1120]
at_final:
smooth:
module: myeasymocap.operations.smooth.Smooth
key_from_data: [keypoints3d]
args:
window_size: 5
write:
module: myeasymocap.io.write.Write
key_from_data: [keypoints3d]
args: {}
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 60
keep_image: False

View File

@ -0,0 +1,166 @@
smooth: &smooth_keypoints
weight: 1.
module: myeasymocap.operations.loss.Smooth
key_from_output: [keypoints, poses]
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
args:
keys: [poses, keypoints]
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
norm: [l2, l2]
order: [2, 2]
weights: [10., 1000.]
window_weight: [0.5, 0.3, 0.1, 0.1]
k3dtorso: &k3dtorso
weight: 100.
module: myeasymocap.operations.loss.Keypoints3D
key_from_output: [keypoints]
key_from_infos: [keypoints3d]
args:
norm: l2
index_est: [0, 5, 9, 13, 17]
index_gt: [0, 5, 9, 13, 17]
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/detect_hand_triangulate_fitMANO
at_step:
detect:
module: myeasymocap.backbone.mediapipe.hand.MediaPipe
key_from_data: [images, imgnames]
key_keep: [imgnames]
args:
ckpt: models/mediapipe/hand_landmarker.task
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints]
args:
name: vis_keypoints2d
scale: 0.5
triangulate:
module: myeasymocap.operations.triangulate.SimpleTriangulate
key_from_data: [cameras]
key_from_previous: [keypoints]
key_keep: [cameras] # 用于最后的一起优化
args:
mode: iterative # [naive, iterative]
visualize:
module: myeasymocap.io.vis.Vis3D
key_from_data: [images, cameras]
key_from_previous: [keypoints3d] # 用于最后的一起优化
args:
scale: 0.5
mode: center
at_final:
load_hand_model: # 载入身体模型
module: myeasymocap.io.model.MANOLoader
args:
cfg_path: config/model/manol.yml
model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
num_pca_comps: 45
use_pca: True
use_flat_mean: False
init_params:
module: myeasymocap.operations.init.InitParams
key_from_data: [keypoints3d]
args:
num_poses: 45
num_shapes: 10
fitShape:
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [shapes]
loss:
k3d:
weight: 10000.
module: myeasymocap.operations.loss.LimbLength
key_from_output: [keypoints]
key_from_infos: [keypoints3d]
args:
kintree: [[ 1, 0], [ 2, 1], [ 3, 2], [ 4, 3], [ 5, 0], [ 6, 5], [ 7, 6], [ 8, 7], [ 9, 0], [10, 9], [11, 10], [12, 11], [13, 0], [14, 13], [15, 14], [16, 15], [17, 0], [18, 17], [19, 18], [20, 19]]
regshape:
weight: 0.1
module: myeasymocap.operations.loss.RegLoss
key_from_output: [shapes]
key_from_infos: []
args:
key: shapes
norm: l2
init_T:
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Th]
loss:
k3d: *k3dtorso
smooth: *smooth_keypoints
init_R:
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Rh]
loss:
k3d: *k3dtorso
smooth: *smooth_keypoints
refine_poses:
repeat: 2
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [poses, Rh, Th]
loss:
k3d:
weight: 1000000.
module: myeasymocap.operations.loss.Keypoints3D
key_from_output: [keypoints]
key_from_infos: [keypoints3d]
args:
norm: l2
norm_info: 0.02
smooth: *smooth_keypoints
regpose:
weight: 0.1
module: myeasymocap.operations.loss.RegLoss
key_from_output: [poses]
key_from_infos: []
args:
key: poses
norm: l2
write:
module: myeasymocap.io.write.WriteSMPL
key_from_data: [meta]
key_from_previous: [params, model]
args:
name: smpl
render:
module: myeasymocap.io.vis3d.Render_multiview
key_from_data: [cameras, imgnames]
key_from_previous: [params, hand_model]
args:
model_name: hand_model
backend: pyrender
view_list: [1, 0, 2]
scale: 1.
render_mode: image
mode: crop
mode_args:
- [0, 720, 100, 820]
- [0, 720, 100, 820]
- [0, 720, 400, 1120]
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 60
keep_image: False

View File

@ -0,0 +1,54 @@
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/detect_triangulate
at_step:
detect:
module: myeasymocap.backbone.yolo.yolo.BaseYOLOv5
key_from_data: [images, imgnames]
args:
model: yolov5m
name: person
keypoints2d:
module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
key_from_data: [images, imgnames]
key_from_previous: [bbox]
key_keep: []
args:
ckpt: data/models/pose_hrnet_w48_384x288.pth
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints, bbox]
args:
name: vis_keypoints2d
scale: 0.5
triangulate:
module: myeasymocap.operations.triangulate.SimpleTriangulate
key_from_data: [cameras]
key_from_previous: [keypoints]
key_keep: [cameras, imgnames]
args:
mode: iterative # [naive, iterative]
visualize:
module: myeasymocap.io.vis.Vis3D
key_from_data: [images, cameras]
key_from_previous: [keypoints3d] # 用于最后的一起优化
args:
scale: 0.5
mode: center
at_final:
smooth:
module: myeasymocap.operations.smooth.Smooth
key_from_data: [keypoints3d]
args:
window_size: 5
write:
module: myeasymocap.io.write.Write
key_from_data: [keypoints3d]
args: {}
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 50
keep_image: False

View File

@ -0,0 +1,169 @@
module: myeasymocap.stages.basestage.MultiStage
args:
output: output/detect_triangulate_fitSMPL
at_step:
detect:
module: myeasymocap.backbone.yolo.yolo.BaseYOLOv5
key_from_data: [images, imgnames]
args:
model: yolov5m
name: person
keypoints2d:
module: myeasymocap.backbone.hrnet.myhrnet.MyHRNet
key_from_data: [images, imgnames]
key_from_previous: [bbox]
key_keep: []
args:
ckpt: data/models/pose_hrnet_w48_384x288.pth
vis2d:
module: myeasymocap.io.vis.Vis2D
skip: False
key_from_data: [images]
key_from_previous: [keypoints, bbox]
args:
name: vis_keypoints2d
scale: 0.5
triangulate:
module: myeasymocap.operations.triangulate.SimpleTriangulate
key_from_data: [cameras]
key_from_previous: [keypoints]
key_keep: [cameras, imgnames] # 用于最后的一起优化
args:
mode: iterative # [naive, iterative]
visualize:
module: myeasymocap.io.vis.Vis3D
skip: False
key_from_data: [images, cameras]
key_from_previous: [keypoints3d] # 用于最后的一起优化
args:
scale: 0.5
mode: center
at_final:
load_body_model:
module: myeasymocap.io.model.SMPLLoader
args:
model_path: models/pare/data/body_models/smpl/SMPL_NEUTRAL.pkl #
regressor_path: models/J_regressor_body25.npy
init_params:
module: myeasymocap.operations.init.InitParams
key_from_data: [keypoints3d]
args:
num_poses: 69
num_shapes: 10
fitShape:
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [shapes]
loss:
k3d:
weight: 100.
module: myeasymocap.operations.loss.LimbLength
key_from_output: [keypoints]
key_from_infos: [keypoints3d]
args:
kintree: [[8, 1], [2, 5], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [2, 3], [5, 6], [3, 4], [6, 7], [1, 0], [9, 12], [9, 10], [10, 11], [12, 13],[13, 14]]
regshape:
weight: 0.1
module: myeasymocap.operations.loss.RegLoss
key_from_output: [shapes]
key_from_infos: []
args:
key: shapes
norm: l2
init_RT:
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [Th, Rh]
loss:
k3d:
weight: 100.
module: myeasymocap.operations.loss.Keypoints3D
key_from_output: [keypoints]
key_from_infos: [keypoints3d]
args:
norm: l2
index_est: [2, 5, 9, 12]
index_gt: [2, 5, 9, 12]
smooth:
weight: 1.
module: myeasymocap.operations.loss.Smooth
key_from_output: [Th, keypoints]
key_from_infos: [] # TODO: 根据2D的置信度来计算smooth权重
args:
keys: [keypoints, Th]
smooth_type: [Linear, Linear] # 这个depth似乎需要相机参数进行转换
norm: [l2, l2]
order: [2, 2]
weights: [10., 100.]
window_weight: [0.5, 0.3, 0.1, 0.1]
refine_poses:
repeat: 2
module: myeasymocap.operations.optimizer.Optimizer
key_from_data: [keypoints, keypoints3d]
key_from_previous: [model, params]
args:
optimizer_args: {optim_type: lbfgs}
optimize_keys: [poses, Rh, Th]
loss:
k3d:
weight: 1000.
module: myeasymocap.operations.loss.Keypoints3D
key_from_output: [keypoints]
key_from_infos: [keypoints3d]
args:
norm: l2
norm_info: 0.02
smooth:
weight: 1.
module: myeasymocap.operations.loss.Smooth
key_from_output: [poses, Th, keypoints]
key_from_infos: []
args:
keys: [Th, poses, keypoints]
smooth_type: [Linear, Linear, Linear]
norm: [l2, l2, l2]
order: [2, 2, 2]
weights: [100., 10., 10.,]
window_weight: [0.5, 0.3, 0.1, 0.1]
prior:
weight: 0.1
module: easymocap.multistage.gmm.GMMPrior
key_from_output: [poses]
key_from_infos: []
args:
start: 0
end: 69
write:
module: myeasymocap.io.write.WriteSMPL
key_from_data: [meta]
key_from_previous: [params, model]
args:
name: smpl
# render:
# module: myeasymocap.io.vis3d.Render_multiview
# key_from_data: [cameras, imgnames]
# key_from_previous: [params, body_model]
# args:
# backend: pyrender
# view_list: [0]
render_ground:
module: myeasymocap.io.vis3d.Render_multiview
key_from_data: [cameras, imgnames]
key_from_previous: [params, body_model]
args:
backend: pyrender
view_list: [3]
mode: ground
scale: 1.
shape: [1024, 1024]
make_video:
module: myeasymocap.io.video.MakeVideo
args:
fps: 50
keep_image: False

155
easymocap/multistage/gmm.py Normal file
View File

@ -0,0 +1,155 @@
import pickle
import os
from os.path import join
import numpy as np
import torch
from .lossbase import LossBase
def create_prior_from_cmu(n_gaussians, epsilon=1e-15):
"""Load the gmm from the CMU motion database."""
from os.path import dirname
np_dtype = np.float32
with open(join(dirname(__file__), 'gmm_%02d.pkl'%(n_gaussians)), 'rb') as f:
gmm = pickle.load(f, encoding='latin1')
if True:
means = gmm['means'].astype(np_dtype)
covs = gmm['covars'].astype(np_dtype)
weights = gmm['weights'].astype(np_dtype)
precisions = [np.linalg.inv(cov) for cov in covs]
precisions = np.stack(precisions).astype(np_dtype)
sqrdets = np.array([(np.sqrt(np.linalg.det(c)))
for c in gmm['covars']])
const = (2 * np.pi)**(69 / 2.)
nll_weights = np.asarray(gmm['weights'] / (const * (sqrdets / sqrdets.min())))
cov_dets = [np.log(np.linalg.det(cov.astype(np_dtype)) + epsilon)
for cov in covs]
return {
'means': means,
'covs': covs,
'precisions': precisions,
'nll_weights': -np.log(nll_weights[None]),
'weights': weights,
'pi_term': np.log(2*np.pi),
'cov_dets': cov_dets
}
class MaxMixturePrior(LossBase):
def __init__(self, num_gaussians=8, epsilon=1e-16, use_merged=True,
start=3, end=72):
super(MaxMixturePrior, self).__init__()
np_dtype = np.float32
self.num_gaussians = num_gaussians
self.epsilon = epsilon
self.use_merged = use_merged
data = create_prior_from_cmu(num_gaussians)
self.start = start
self.end = end
for key, val in data.items():
self.register_buffer(key, torch.tensor(val, dtype=torch.float32))
def get_mean(self):
''' Returns the mean of the mixture '''
mean_pose = torch.matmul(self.weights, self.means)
return mean_pose
def merged_log_likelihood(self, poses):
poses = poses[..., self.start:self.end]
diff_from_mean = poses.unsqueeze(dim=1) - self.means[None, :, :self.end-self.start]
prec_diff_prod = torch.einsum('mij,bmj->bmi',
[self.precisions, diff_from_mean])
diff_prec_quadratic = (prec_diff_prod * diff_from_mean).sum(dim=-1)
curr_loglikelihood = 0.5 * diff_prec_quadratic + self.nll_weights
min_likelihood, _ = torch.min(curr_loglikelihood, dim=1)
return min_likelihood
def log_likelihood(self, pose, betas, *args, **kwargs):
''' Create graph operation for negative log-likelihood calculation
'''
likelihoods = []
for idx in range(self.num_gaussians):
mean = self.means[idx]
prec = self.precisions[idx]
cov = self.covs[idx]
diff_from_mean = pose - mean
curr_loglikelihood = torch.einsum('bj,ji->bi',
[diff_from_mean, prec])
curr_loglikelihood = torch.einsum('bi,bi->b',
[curr_loglikelihood,
diff_from_mean])
cov_term = torch.log(torch.det(cov) + self.epsilon)
curr_loglikelihood += 0.5 * (cov_term +
self.random_var_dim *
self.pi_term)
likelihoods.append(curr_loglikelihood)
log_likelihoods = torch.stack(likelihoods, dim=1)
min_idx = torch.argmin(log_likelihoods, dim=1)
weight_component = self.nll_weights[:, min_idx]
return weight_component + log_likelihoods[:, min_idx]
def forward(self, poses, **kwargs):
if self.use_merged:
return self.merged_log_likelihood(poses).mean()
else:
return self.log_likelihood(poses).mean()
class MaxMixtureCompletePrior(object):
"""Prior density estimation."""
prior = None
mean_pose = None
def __init__(self, n_gaussians=8, start=3, end=72):
self.n_gaussians = n_gaussians
self.start = start
self.end = end
if self.prior is None:
self.prior = self.create_prior_from_cmu()
def create_prior_from_cmu(self):
"""Load the gmm from the CMU motion database."""
from os.path import dirname
np_dtype = np.float32
with open(join(dirname(__file__), 'gmm_%02d.pkl'%(self.n_gaussians)), 'rb') as f:
gmm = pickle.load(f, encoding='latin1')
if True:
means = gmm['means'].astype(np_dtype)
covs = gmm['covars'].astype(np_dtype)
weights = gmm['weights'].astype(np_dtype)
precisions = [np.linalg.inv(cov) for cov in covs]
precisions = np.stack(precisions).astype(np_dtype)
sqrdets = np.array([(np.sqrt(np.linalg.det(c)))
for c in gmm['covars']])
const = (2 * np.pi)**(69 / 2.)
nll_weights = np.asarray(gmm['weights'] / (const *
(sqrdets / sqrdets.min())))
self.means = means
self.weights = weights
self.mean_pose = weights.dot(means)
def __call__(self, body_model, body_params, info):
poses = body_params['poses']
for nf in range(poses.shape[0]):
poses[nf][self.start:self.end] = self.mean_pose[:self.end-self.start]
return body_params
def get_gmm_prior(self):
"""Getter implementation."""
return self.prior
class GMMPrior(MaxMixturePrior):
def __call__(self, pred, target):
poses = pred['poses']
poses = poses.reshape(-1, poses.shape[-1])
if self.use_merged:
return self.merged_log_likelihood(poses).mean()
else:
return self.log_likelihood(poses).mean()

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,241 @@
import os
from os.path import join
import numpy as np
import cv2
import torch
import torch.nn as nn
import pickle
import math
def rotate_2d(pt_2d, rot_rad):
x = pt_2d[0]
y = pt_2d[1]
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
xx = x * cs - y * sn
yy = x * sn + y * cs
return np.array([xx, yy], dtype=np.float32)
def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False):
# augment size with scale
src_w = src_width * scale
src_h = src_height * scale
src_center = np.zeros(2)
src_center[0] = c_x
src_center[1] = c_y # np.array([c_x, c_y], dtype=np.float32)
# augment rotation
rot_rad = np.pi * rot / 180
src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)
dst_w = dst_width
dst_h = dst_height
dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)
src = np.zeros((3, 2), dtype=np.float32)
src[0, :] = src_center
src[1, :] = src_center + src_downdir
src[2, :] = src_center + src_rightdir
dst = np.zeros((3, 2), dtype=np.float32)
dst[0, :] = dst_center
dst[1, :] = dst_center + dst_downdir
dst[2, :] = dst_center + dst_rightdir
inv_trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans, inv_trans
def generate_patch_image_cv(cvimg, c_x, c_y, bb_width, bb_height, patch_width, patch_height, do_flip, scale, rot):
trans, inv_trans = gen_trans_from_patch_cv(c_x, c_y, bb_width, bb_height, patch_width, patch_height, scale, rot, inv=False)
img_patch = cv2.warpAffine(cvimg, trans, (int(patch_width), int(patch_height)),
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
return img_patch, trans, inv_trans
def get_single_image_crop_demo(image, bbox, scale=1.2, crop_size=224,
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], fliplr=False):
crop_image, trans, inv_trans = generate_patch_image_cv(
cvimg=image.copy(),
c_x=bbox[0],
c_y=bbox[1],
bb_width=bbox[2],
bb_height=bbox[3],
patch_width=crop_size[0],
patch_height=crop_size[1],
do_flip=False,
scale=scale,
rot=0,
)
if fliplr:
crop_image = cv2.flip(crop_image, 1)
# cv2.imwrite('debug_crop.jpg', crop_image)
# import ipdb; ipdb.set_trace()
crop_image = crop_image.transpose(2,0,1)
mean1=np.array(mean, dtype=np.float32).reshape(3,1,1)
std1= np.array(std, dtype=np.float32).reshape(3,1,1)
crop_image = (crop_image.astype(np.float32))/255.
# _max = np.max(abs(crop_image))
# crop_image = np.divide(crop_image, _max)
crop_image = (crop_image - mean1)/std1
return crop_image, inv_trans
def xyxy2ccwh(bbox):
w = bbox[:, 2] - bbox[:, 0]
h = bbox[:, 3] - bbox[:, 1]
cx = (bbox[:, 2] + bbox[:, 0])/2
cy = (bbox[:, 3] + bbox[:, 1])/2
return np.stack([cx, cy, w, h], axis=1)
class BaseTopDownModel(nn.Module):
def __init__(self, bbox_scale, res_input,
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
super().__init__()
self.bbox_scale = bbox_scale
if not isinstance(res_input, list):
res_input = [res_input, res_input]
self.crop_size = res_input
self.mean = mean
self.std = std
def load_checkpoint(self, model, state_dict, prefix, strict):
state_dict_new = {}
for key, val in state_dict.items():
if key.startswith(prefix):
key_new = key.replace(prefix, '')
state_dict_new[key_new] = val
model.load_state_dict(state_dict_new, strict=strict)
def infer(self, image, bbox, to_numpy=False, flips=None):
if isinstance(image, str):
image = cv2.imread(image)
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
squeeze = False
if len(bbox.shape) == 1:
bbox = bbox[None]
squeeze = True
# TODO: 兼容多张图片的
bbox = xyxy2ccwh(bbox)
inputs = []
inv_trans_ = []
for i in range(bbox.shape[0]):
if flips is None:
fliplr=False
else:
fliplr=flips[i]
norm_img, inv_trans = get_single_image_crop_demo(
img,
bbox[i],
scale=self.bbox_scale,
crop_size=self.crop_size,
mean=self.mean,
std=self.std,
fliplr=fliplr
)
inputs.append(norm_img)
inv_trans_.append(inv_trans)
inputs = np.stack(inputs)
inv_trans_ = np.stack(inv_trans_)
inputs = torch.FloatTensor(inputs).to(self.device)
with torch.no_grad():
output = self.model(inputs)
if squeeze:
for key, val in output.items():
output[key] = val[0]
if to_numpy:
for key, val in output.items():
if torch.is_tensor(val):
output[key] = val.detach().cpu().numpy()
output['inv_trans'] = inv_trans_
return output
@staticmethod
def batch_affine_transform(points, trans):
# points: (Bn, J, 2), trans: (Bn, 2, 3)
points = np.dstack((points[..., :2], np.ones((*points.shape[:-1], 1))))
out = np.matmul(points, trans.swapaxes(-1, -2))
return out
class BaseTopDownModelCache(BaseTopDownModel):
def __init__(self, name, **kwargs):
super().__init__(**kwargs)
self.name = name
def __call__(self, bbox, images, imgname, flips=None):
basename = os.sep.join(imgname.split(os.sep)[-2:])
cachename = join(self.output, self.name, basename.replace('.jpg', '.pkl'))
os.makedirs(os.path.dirname(cachename), exist_ok=True)
if os.path.exists(cachename):
with open(cachename, 'rb') as f:
output = pickle.load(f)
else:
output = self.infer(images, bbox, to_numpy=True, flips=flips)
with open(cachename, 'wb') as f:
pickle.dump(output, f)
ret = {
'params': output
}
return ret
# post processing
def get_max_preds(batch_heatmaps):
'''
get predictions from score maps
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
'''
assert isinstance(batch_heatmaps, np.ndarray), \
'batch_heatmaps should be numpy.ndarray'
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = batch_heatmaps.shape[0]
num_joints = batch_heatmaps.shape[1]
width = batch_heatmaps.shape[3]
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def get_preds_from_heatmaps(batch_heatmaps):
coords, maxvals = get_max_preds(batch_heatmaps)
heatmap_height = batch_heatmaps.shape[2]
heatmap_width = batch_heatmaps.shape[3]
# post-processing
if True:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = batch_heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
diff = np.array(
[
hm[py][px+1] - hm[py][px-1],
hm[py+1][px]-hm[py-1][px]
]
)
coords[n][p] += np.sign(diff) * .25
coords = coords.astype(np.float32) * 4
pred = np.dstack((coords, maxvals))
return pred

View File

View File

@ -0,0 +1,218 @@
import torch
from torch import nn
from .modules import BasicBlock, Bottleneck
class StageModule(nn.Module):
def __init__(self, stage, output_branches, c, bn_momentum):
super(StageModule, self).__init__()
self.stage = stage
self.output_branches = output_branches
self.branches = nn.ModuleList()
for i in range(self.stage):
w = c * (2 ** i)
branch = nn.Sequential(
BasicBlock(w, w, bn_momentum=bn_momentum),
BasicBlock(w, w, bn_momentum=bn_momentum),
BasicBlock(w, w, bn_momentum=bn_momentum),
BasicBlock(w, w, bn_momentum=bn_momentum),
)
self.branches.append(branch)
self.fuse_layers = nn.ModuleList()
# for each output_branches (i.e. each branch in all cases but the very last one)
for i in range(self.output_branches):
self.fuse_layers.append(nn.ModuleList())
for j in range(self.stage): # for each branch
if i == j:
self.fuse_layers[-1].append(nn.Sequential()) # Used in place of "None" because it is callable
elif i < j:
self.fuse_layers[-1].append(nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(1, 1), stride=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.Upsample(scale_factor=(2.0 ** (j - i)), mode='nearest'),
))
elif i > j:
ops = []
for k in range(i - j - 1):
ops.append(nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
bias=False),
nn.BatchNorm2d(c * (2 ** j), eps=1e-05, momentum=0.1, affine=True,
track_running_stats=True),
nn.ReLU(inplace=True),
))
ops.append(nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
bias=False),
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
))
self.fuse_layers[-1].append(nn.Sequential(*ops))
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
assert len(self.branches) == len(x)
x = [branch(b) for branch, b in zip(self.branches, x)]
x_fused = []
for i in range(len(self.fuse_layers)):
for j in range(0, len(self.branches)):
if j == 0:
x_fused.append(self.fuse_layers[i][0](x[0]))
else:
x_fused[i] = x_fused[i] + self.fuse_layers[i][j](x[j])
for i in range(len(x_fused)):
x_fused[i] = self.relu(x_fused[i])
return x_fused
class HRNet(nn.Module):
def __init__(self, c=48, nof_joints=17, bn_momentum=0.1):
super(HRNet, self).__init__()
# Input (stem net)
self.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
self.bn1 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
self.conv2 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
self.bn2 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
self.relu = nn.ReLU(inplace=True)
# Stage 1 (layer1) - First group of bottleneck (resnet) modules
downsample = nn.Sequential(
nn.Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
nn.BatchNorm2d(256, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
)
self.layer1 = nn.Sequential(
Bottleneck(64, 64, downsample=downsample),
Bottleneck(256, 64),
Bottleneck(256, 64),
Bottleneck(256, 64),
)
# Fusion layer 1 (transition1) - Creation of the first two branches (one full and one half resolution)
self.transition1 = nn.ModuleList([
nn.Sequential(
nn.Conv2d(256, c, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
nn.BatchNorm2d(c, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
),
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
nn.Conv2d(256, c * (2 ** 1), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** 1), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
)),
])
# Stage 2 (stage2) - Second module with 1 group of bottleneck (resnet) modules. This has 2 branches
self.stage2 = nn.Sequential(
StageModule(stage=2, output_branches=2, c=c, bn_momentum=bn_momentum),
)
# Fusion layer 2 (transition2) - Creation of the third branch (1/4 resolution)
self.transition2 = nn.ModuleList([
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
nn.Conv2d(c * (2 ** 1), c * (2 ** 2), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** 2), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
)), # ToDo Why the new branch derives from the "upper" branch only?
])
# Stage 3 (stage3) - Third module with 4 groups of bottleneck (resnet) modules. This has 3 branches
self.stage3 = nn.Sequential(
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
)
# Fusion layer 3 (transition3) - Creation of the fourth branch (1/8 resolution)
self.transition3 = nn.ModuleList([
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
nn.Conv2d(c * (2 ** 2), c * (2 ** 3), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** 3), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
)), # ToDo Why the new branch derives from the "upper" branch only?
])
# Stage 4 (stage4) - Fourth module with 3 groups of bottleneck (resnet) modules. This has 4 branches
self.stage4 = nn.Sequential(
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
StageModule(stage=4, output_branches=1, c=c, bn_momentum=bn_momentum),
)
# Final layer (final_layer)
self.final_layer = nn.Conv2d(c, nof_joints, kernel_size=(1, 1), stride=(1, 1))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x)
x = self.layer1(x)
x = [trans(x) for trans in self.transition1] # Since now, x is a list (# == nof branches)
x = self.stage2(x)
# x = [trans(x[-1]) for trans in self.transition2] # New branch derives from the "upper" branch only
x = [
self.transition2[0](x[0]),
self.transition2[1](x[1]),
self.transition2[2](x[-1])
] # New branch derives from the "upper" branch only
x = self.stage3(x)
# x = [trans(x) for trans in self.transition3] # New branch derives from the "upper" branch only
x = [
self.transition3[0](x[0]),
self.transition3[1](x[1]),
self.transition3[2](x[2]),
self.transition3[3](x[-1])
] # New branch derives from the "upper" branch only
x = self.stage4(x)
x = self.final_layer(x[0])
return {
'output': x
}
if __name__ == '__main__':
# model = HRNet(48, 17, 0.1)
model = HRNet(32, 17, 0.1)
# print(model)
model.load_state_dict(
# torch.load('./weights/pose_hrnet_w48_384x288.pth')
torch.load('./weights/pose_hrnet_w32_256x192.pth')
)
print('ok!!')
if torch.cuda.is_available() and False:
torch.backends.cudnn.deterministic = True
device = torch.device('cuda:0')
else:
device = torch.device('cpu')
print(device)
model = model.to(device)
y = model(torch.ones(1, 3, 384, 288).to(device))
print(y.shape)
print(torch.min(y).item(), torch.mean(y).item(), torch.max(y).item())

View File

@ -0,0 +1,72 @@
import torch
from torch import nn
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_momentum)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out

View File

@ -0,0 +1,130 @@
import os
import numpy as np
import math
import cv2
import torch
from ..basetopdown import BaseTopDownModelCache
from .hrnet import HRNet
def get_max_preds(batch_heatmaps):
'''
get predictions from score maps
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
'''
assert isinstance(batch_heatmaps, np.ndarray), \
'batch_heatmaps should be numpy.ndarray'
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim: {}'.format(batch_heatmaps.shape)
batch_size = batch_heatmaps.shape[0]
num_joints = batch_heatmaps.shape[1]
width = batch_heatmaps.shape[3]
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
COCO17_IN_BODY25 = [0,16,15,18,17,5,2,6,3,7,4,12,9,13,10,14,11]
pairs = [[1, 8], [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [10, 11], [8, 12], [12, 13], [13, 14], [1, 0], [0,15], [15,17], [0,16], [16,18], [14,19], [19,20], [14,21], [11,22], [22,23], [11,24]]
def coco17tobody25(points2d):
kpts = np.zeros((points2d.shape[0], 25, 3))
kpts[:, COCO17_IN_BODY25, :2] = points2d[:, :, :2]
kpts[:, COCO17_IN_BODY25, 2:3] = points2d[:, :, 2:3]
kpts[:, 8, :2] = kpts[:, [9, 12], :2].mean(axis=1)
kpts[:, 8, 2] = kpts[:, [9, 12], 2].min(axis=1)
kpts[:, 1, :2] = kpts[:, [2, 5], :2].mean(axis=1)
kpts[:, 1, 2] = kpts[:, [2, 5], 2].min(axis=1)
# 需要交换一下
# kpts = kpts[:, :, [1,0,2]]
return kpts
class MyHRNet(BaseTopDownModelCache):
def __init__(self, ckpt):
super().__init__(name='hand2d', bbox_scale=1.25, res_input=[288, 384])
model = HRNet(48, 17, 0.1)
if not os.path.exists(ckpt) and ckpt.endswith('pose_hrnet_w48_384x288.pth'):
url = "11ezQ6a_MxIRtj26WqhH3V3-xPI3XqYAw"
text = '''Download `models/pytorch/pose_coco/pose_hrnet_w48_384x288.pth` from (OneDrive)[https://1drv.ms/f/s!AhIXJn_J-blW231MH2krnmLq5kkQ],
And place it into {}'''.format(os.path.dirname(ckpt))
print(text)
os.makedirs(os.path.dirname(ckpt), exist_ok=True)
cmd = 'gdown "{}" -O {}'.format(url, ckpt)
print('\n', cmd, '\n')
os.system(cmd)
assert os.path.exists(ckpt), f'{ckpt} not exists'
checkpoint = torch.load(ckpt, map_location='cpu')
model.load_state_dict(checkpoint)
model.eval()
self.model = model
self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
self.model.to(self.device)
@staticmethod
def get_max_preds(batch_heatmaps):
coords, maxvals = get_max_preds(batch_heatmaps)
heatmap_height = batch_heatmaps.shape[2]
heatmap_width = batch_heatmaps.shape[3]
# post-processing
if True:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = batch_heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
diff = np.array(
[
hm[py][px+1] - hm[py][px-1],
hm[py+1][px]-hm[py-1][px]
]
)
coords[n][p] += np.sign(diff) * .25
coords = coords.astype(np.float32) * 4
pred = np.dstack((coords, maxvals))
return pred
def __call__(self, bbox, images, imgnames):
squeeze = False
if not isinstance(images, list):
images = [images]
imgnames = [imgnames]
bbox = [bbox]
squeeze = True
nViews = len(images)
kpts_all = []
for nv in range(nViews):
_bbox = bbox[nv]
if _bbox.shape[0] == 0:
kpts_all.append(np.zeros((17, 3)))
continue
img = images[nv]
# TODO: add flip test
out = super().__call__(_bbox, img, imgnames[nv])
output = out['params']['output']
kpts = self.get_max_preds(output)
kpts_ori = self.batch_affine_transform(kpts, out['params']['inv_trans'])
kpts = np.concatenate([kpts_ori, kpts[..., -1:]], axis=-1)
kpts = coco17tobody25(kpts)
if len(kpts.shape) == 3:
kpts = kpts[0]
kpts_all.append(kpts)
kpts_all = np.stack(kpts_all)
if squeeze:
kpts_all = kpts_all[0]
return {
'keypoints': kpts_all
}

View File

@ -0,0 +1,292 @@
import torch
import numpy as np
import os
import cv2
from os.path import join
import pickle
def check_modelpath(paths):
if isinstance(paths, str):
assert os.path.exists(paths), paths
return paths
elif isinstance(paths, list):
for path in paths:
if os.path.exists(path):
print(f'Found model in {path}')
break
else:
print(f'No model found in {paths}!')
raise FileExistsError
return path
else:
raise NotImplementedError
class BaseYOLOv5:
def __init__(self, ckpt=None, model='yolov5m', name='object2d', multiview=True) -> None:
if ckpt is not None:
ckpt = check_modelpath(ckpt)
self.model = torch.hub.load('ultralytics/yolov5', 'custom', ckpt)
else:
print('[{}] Not given ckpt, use default yolov5'.format(self.__class__.__name__))
self.model = torch.hub.load('ultralytics/yolov5', model)
self.multiview = multiview
self.name = name
def check_cache(self, imgname):
basename = os.path.basename(imgname)
imgext = '.' + basename.split('.')[-1]
nv = imgname.split(os.sep)[-2]
cachename = join(self.output, self.name, nv, basename.replace(imgext, '.npy'))
os.makedirs(os.path.dirname(cachename), exist_ok=True)
if os.path.exists(cachename):
output = np.load(cachename, allow_pickle=True)
return True, output, cachename
else:
return False, None, cachename
def check_image(self, img_or_name):
if isinstance(img_or_name, str):
images = cv2.imread(img_or_name)
else:
images = img_or_name
images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)
return images
@torch.no_grad()
def detect(self, image, imgname):
flag, cache, cachename = self.check_cache(imgname)
if flag:
return cache
image = self.check_image(imgname)
results = self.model(image) #RGB images[:,:,::-1]
arrays = np.array(results.pandas().xyxy[0])
np.save(cachename, arrays)
return arrays
@staticmethod
def select_class(results, name):
select = []
for i, res in enumerate(results):
classname = res[6]
if classname != name:
continue
box = res[:5]
select.append(box)
return select
def select_bbox(self, select, imgname):
if select.shape[0] == 0:
return select
# Naive: select the best
idx = np.argsort(select[:, -1])[::-1]
return select[idx[0:1]]
def __call__(self, images, imgnames): # 这里好像默认是多视角了,需要继承一下单视角的
squeeze = False
if not isinstance(images, list):
images = [images]
imgnames = [imgnames]
squeeze = True
detects = {'bbox': [[] for _ in range(len(images))]}
for nv in range(len(images)):
res = self.detect(images[nv], imgnames[nv])
select = self.select_class(res, self.name)
if len(select) == 0:
select = np.zeros((0,5), dtype=np.float32)
else:
select = np.stack(select).astype(np.float32)
# TODO: add track here
select = self.select_bbox(select, imgnames[nv])
detects['bbox'][nv] = select
if squeeze:
detects['bbox'] = detects['bbox'][0]
return detects
class YoloWithTrack(BaseYOLOv5):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.track_cache = {}
@staticmethod
def calculate_iou(bbox_pre, bbox_now):
area_now = (bbox_now[:, 2] - bbox_now[:, 0])*(bbox_now[:, 3]-bbox_now[:, 1])
area_pre = (bbox_pre[:, 2] - bbox_pre[:, 0])*(bbox_pre[:, 3]-bbox_pre[:, 1])
# compute IOU
# max of left
xx1 = np.maximum(bbox_now[:, 0], bbox_pre[:, 0])
yy1 = np.maximum(bbox_now[:, 1], bbox_pre[:, 1])
# min of right
xx2 = np.minimum(bbox_now[:, 0+2], bbox_pre[:, 0+2])
yy2 = np.minimum(bbox_now[:, 1+2], bbox_pre[:, 1+2])
# w h
w = np.maximum(0, xx2 - xx1)
h = np.maximum(0, yy2 - yy1)
over = (w*h)/(area_pre+area_now-w*h)
return over
def select_bbox(self, select, imgname):
if select.shape[0] == 0:
return select
sub = os.path.basename(os.path.dirname(imgname))
frame = int(os.path.basename(imgname).split('.')[0])
if sub not in self.track_cache:
# select the best
select = super().select_bbox(select, imgname)
self.track_cache[sub] = {
'frame': [frame],
'bbox': [select]
}
return select
bbox_pre = self.track_cache[sub]['bbox'][-1]
iou = self.calculate_iou(bbox_pre, select)
idx = iou.argmax()
select = select[idx:idx+1]
self.track_cache[sub]['frame'].append(frame)
self.track_cache[sub]['bbox'].append(select)
return select
class DetectToPelvis:
def __init__(self, key) -> None:
self.key = key
self.multiview = True
def __call__(self, **kwargs):
key = self.key
val = kwargs[key]
ret = {'pelvis': []}
for nv in range(len(val)):
bbox = val[nv]
center = np.stack([(bbox[:, 0] + bbox[:, 2])/2, (bbox[:, 1] + bbox[:, 3])/2, bbox[:, -1]], axis=-1)
ret['pelvis'].append(center)
return ret
class Yolo_model:
def __init__(self, mode, yolo_ckpt, multiview, repo_or_dir = 'ultralytics/yolov5', source='github') -> None:
yolo_ckpt = check_modelpath(yolo_ckpt)
self.model = torch.hub.load(repo_or_dir, 'custom', yolo_ckpt, source=source)
self.min_detect_thres = 0.3
self.mode = mode # 'fullimg' # 'bboxcrop'
self.output = 'output'
self.name = 'yolo'
self.multiview = multiview
@torch.no_grad()
def det_step(self, img_or_name, imgname, bbox=[]):
basename = os.path.basename(imgname)
if self.multiview:
nv = imgname.split('/')[-2]
cachename = join(self.output, self.name, nv, basename.replace('.jpg', '.pkl'))
else:
cachename = join(self.output, self.name, basename.replace('.jpg', '.pkl'))
os.makedirs(os.path.dirname(cachename), exist_ok=True)
if os.path.exists(cachename):
with open(cachename, 'rb') as f:
output = pickle.load(f)
return output
if isinstance(img_or_name,str):
images = cv2.imread(img_or_name)
else:
images = img_or_name
if self.mode == 'bboxcrop':
bbox[0] = max(0,bbox[0])
bbox[1] = max(0,bbox[1])
crop = images[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2]),::-1]
else:
crop = images[:,:,::-1]
# print("[yolo img shape] ",crop.shape)
results = self.model(crop) #RGB images[:,:,::-1]
# breakpoint()
arrays = np.array(results.pandas().xyxy[0])
bboxes = {
'bbox':[],
'bbox_handl':[],
'bbox_handr':[],
'pelvis':[],
'pelvis_l':[],
'pelvis_r':[]
}
for i, res in enumerate(arrays):
classid = res[5]
box = res[:5]
if self.mode == 'bboxcrop':
box[0]+=bbox[0]
box[2]+=bbox[0]
box[1]+=bbox[1]
box[3]+=bbox[1]
if False:
vis = images.copy()
cpimg = crop.copy()
from easymocap.mytools.vis_base import plot_bbox
plot_bbox(vis,box,0)
plot_bbox(cpimg,res[:5],0)
cv2.imshow('vis',vis)
# cv2.waitKey(0)
cv2.imshow('crop',cpimg)
cv2.waitKey(0)
breakpoint()
if box[4] < self.min_detect_thres:
continue
if classid==0:
bboxes['bbox'].append(box)
elif classid==1:
bboxes['bbox_handl'].append(box)
bboxes['pelvis_l'].append([(box[0]+box[2])/2,(box[1]+box[3])/2,box[-1]])
elif classid==2:
bboxes['bbox_handr'].append(box)
bboxes['pelvis_r'].append([(box[0]+box[2])/2,(box[1]+box[3])/2,box[-1]])
if(len(bboxes['bbox_handl'])==0):
# bboxes['bbox_handl'].append(np.zeros((0, 5)))
# bboxes['pelvis_l'].append(np.zeros((0, 3)))
bboxes['bbox_handl'].append(np.zeros((5)))
bboxes['pelvis_l'].append(np.zeros((3)))
if(len(bboxes['bbox_handr'])==0):
# bboxes['bbox_handr'].append(np.zeros((0, 5)))
# bboxes['pelvis_r'].append(np.zeros((0, 3)))
bboxes['bbox_handr'].append(np.zeros((5)))
bboxes['pelvis_r'].append(np.zeros((3)))
if(len(bboxes['bbox'])==0):
bboxes['bbox'].append(np.zeros((5)))
bboxes['bbox'] = np.array(bboxes['bbox'])
if isinstance(imgname,str):
with open(cachename, 'wb') as f:
pickle.dump(bboxes, f)
return bboxes
def __call__(self, images, imgname, bbox=[]):
return self.det_step(images, imgname, bbox)
class Yolo_model_hand_mvmp(Yolo_model):
@torch.no_grad()
def __call__(self, bbox, images, imgnames):
ret = {
'pelvis_l':[],
'pelvis_r':[],
# 'pelvis':[],
'bbox_handl':[],
'bbox_handr':[],
}
for nv in range(len(images)):
img = images[nv]
imgname = imgnames[nv]
if self.mode == 'bboxcrop':
bboxes = {
'bbox':[],
'bbox_handl':[],
'bbox_handr':[],
'pelvis_l':[],
'pelvis_r':[]
}
for pid in range(len(bbox[nv])):
bboxes_ = self.det_step(img, imgname, bbox[nv][pid])
for key in bboxes.keys():
bboxes[key].append(bboxes_[key])
else:
bboxes = self.det_step(img, imgname)
for k in ret.keys():
ret[k].append(np.array(bboxes[k]))
return ret

View File

@ -0,0 +1,106 @@
import os
from os.path import join
import numpy as np
import cv2
from easymocap.mytools.debug_utils import log, myerror, mywarn
class ImageDataBase:
def __init__(self, root, subs, ranges, read_image) -> None:
assert root != 'TO_BE_FILLED', 'You must set the root of dataset'
assert os.path.exists(root), f'root {root} not exists'
self.root = root
self.subs = subs
self.ranges = ranges
self.flag_read_image = read_image
self.infos = {}
self.meta = {}
def check_frames_length(self):
if len(self.ranges) == 0:
self.ranges = [0, self.length, 1]
if self.ranges[1] > self.length:
self.ranges[1] = self.length
self.frames = list(range(*self.ranges))
self.length = len(self.frames)
def try_to_extract_images(self, root, value):
if not os.path.exists(os.path.join(root, value['root'])) and os.path.exists(os.path.join(root, 'videos')):
print('[{}] Cannot find the images but find the videos, try to extract it'.format(self.__class__.__name__))
for videoname in os.listdir(os.path.join(root, 'videos')):
videoext = '.' + videoname.split('.')[-1]
outdir = join(root, value['root'], videoname.replace(videoext, ''))
os.makedirs(outdir, exist_ok=True)
cmd = 'ffmpeg -i {videoname} -q:v 1 -start_number 0 {outdir}/%06d.jpg'.format(
videoname=join(root, 'videos', videoname),
outdir=outdir
)
os.system(cmd)
def __str__(self) -> str:
return f''' [Dataset] {self.__class__.__name__}
root : {self.root}
subs : {self.subs}
ranges: {self.ranges}
'''
def __getitem__(self, index):
raise NotImplementedError
def __len__(self):
return self.length
def read_image(self, imgname, cameras=None):
assert os.path.exists(imgname), "image {} not exists".format(imgname)
sub = os.path.basename(os.path.dirname(imgname))
img = cv2.imread(imgname)
if cameras is None:
return img
K, D = self.cameras[sub]['K'], self.cameras[sub]['dist']
if np.linalg.norm(D) < 1e-3:
return img
if sub not in self.distortMap.keys():
h, w = img.shape[:2]
mapx, mapy = cv2.initUndistortRectifyMap(K, D, None, K, (w,h), 5)
self.distortMap[sub] = (mapx, mapy)
mapx, mapy = self.distortMap[sub]
img = cv2.remap(img, mapx, mapy, cv2.INTER_NEAREST)
return img
def read_mv_images(root, root_images, ext, subs):
assert os.path.exists(os.path.join(root, root_images)), f'root {root}/{root_images} not exists'
if len(subs) == 0:
subs = sorted(os.listdir(os.path.join(root, root_images)))
if subs[0].isdigit():
subs = sorted(subs, key=lambda x: int(x))
imagelists = []
log(f'Found {len(subs)} subjects in {root}/{root_images}')
for sub in subs:
images = sorted(os.listdir(os.path.join(root, root_images, sub)))
images = [os.path.join(root, root_images, sub, image) for image in images if image.endswith(ext)]
log(f' -> Found {len(images)} {root_images} in {sub}.')
imagelists.append(images)
min_length = min([len(image) for image in imagelists])
log(f' -> Min length: {min_length}')
imagenames = [[image[i] for image in imagelists] for i in range(min_length)]
return imagenames, {'subs': subs}
def FloatArray(x):
return np.array(x, dtype=np.float32)
def find_best_people(annots):
if len(annots) == 0:
return {}
# TODO: find the best
annot = annots[0]
bbox = FloatArray(annot['bbox'])
if 'keypoints' not in annot.keys():
return {}
keypoints = FloatArray(annot['keypoints'])
return {'bbox': bbox, 'keypoints': keypoints}
def find_all_people(annots):
if len(annots) == 0:
return {}
bbox = FloatArray([annot['bbox'] for annot in annots])
keypoints = FloatArray([annot['keypoints'] for annot in annots])
return {'bbox': bbox, 'keypoints': keypoints}

View File

@ -0,0 +1,332 @@
from easymocap.mytools.camera_utils import read_cameras
from easymocap.mytools.debug_utils import log, myerror, mywarn
from easymocap.mytools.file_utils import read_json
from .basedata import ImageDataBase, read_mv_images, find_best_people, find_all_people
import os
from os.path import join
import numpy as np
import cv2
from collections import defaultdict
panoptic15_in_body15 = [1,0,8,5,6,7,12,13,14,2,3,4,9,10,11]
def convert_body15_panoptic15(keypoints):
k3d_panoptic15 = keypoints[..., panoptic15_in_body15,: ]
return k3d_panoptic15
def convert_panoptic15_body15(keypoints):
keypoints_b15 = np.zeros_like(keypoints)
keypoints_b15[..., panoptic15_in_body15, :] = keypoints
return keypoints_b15
def padding_and_stack(datas):
shapes = {}
for data in datas:
if len(data) == 0:
continue
for key, value in data.items():
if key not in shapes.keys():
shapes[key] = value.shape
collect = {key: np.zeros((len(datas), *shapes[key])) for key in shapes.keys()}
for i, data in enumerate(datas):
for key, value in data.items():
collect[key][i] = value
return collect
def padding_empty(datas):
shapes = {}
for data in datas:
if len(data) == 0:
continue
for key, value in data.items():
if key not in shapes.keys():
shapes[key] = value.shape[1:]
collect = {key: [None for data in datas] for key in shapes.keys()}
for i, data in enumerate(datas):
for key, shape in shapes.items():
if key not in data.keys():
print('[Dataset] padding empty view {} of {}'.format(i, key))
collect[key][i] = np.zeros((0, *shape), dtype=np.float32)
else:
collect[key][i] = data[key]
return collect
def parse_frames(pafs_frame, H, W):
# 解析单帧的
res = {
'joints': [],
'pafs': {}
}
joints = pafs_frame[1:1+3*25]
for i in range(25):
value = np.fromstring(joints[3*i+2], sep=' ').reshape(3, -1).T
value[:, 0] = value[:, 0] * W
value[:, 1] = value[:, 1] * H
res['joints'].append(value.astype(np.float32))
# parse pafs
pafs = pafs_frame[1+3*25+1:]
for npart in range(26):
label = pafs[3*npart+0].split(' ')[2:]
label = (int(label[0]), int(label[1]))
shape = pafs[3*npart+1].split(' ')[2:]
w, h = int(shape[0]), int(shape[1])
value = np.fromstring(pafs[3*npart+2], sep=' ').reshape(w, h).astype(np.float32)
res['pafs'][label] = value
return res
def read_4dassociation(pafs, H, W):
outputs = []
# 解析paf文件
with open(pafs, 'r') as f:
pafs = f.readlines()
indices = []
for i, line in enumerate(pafs):
if line.startswith('# newframes:'):
indices.append([i])
elif line.startswith('# end frames:'):
indices[-1].append(i)
print('[Read OpenPose] Totally {} frames'.format(len(indices)))
for (start, end) in indices:
pafs_frame = pafs[start+1:end]
pafs_frame = list(map(lambda x:x.strip(), pafs_frame))
frames = parse_frames(pafs_frame, H, W)
outputs.append(frames)
return outputs
class MVDataset(ImageDataBase):
def __init__(self, root, subs, subs_vis, ranges, read_image=False, reader={}, filter={}) -> None:
super().__init__(root, subs, ranges, read_image)
self.subs_vis = subs_vis
self.length = 0
for key, value in reader.items():
if key == 'images':
self.try_to_extract_images(root, value)
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
self.length = len(data)
elif key == 'image_shape':
imgnames = self.infos['images'][0]
shapes = []
for imgname in imgnames:
img = cv2.imread(imgname)
height, width, _ = img.shape
log('[{}] sub {} shape {}'.format(self.__class__.__name__, imgname, img.shape))
shapes.append([height, width])
data = [shapes]
meta = {}
elif key == 'annots':
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
if self.length > 0:
if self.length != len(data):
myerror('annots length {} not equal to images length {}.'.format(len(data), self.length))
data = data[:self.length]
else:
self.length = len(data)
elif key == 'openpose':
# 读取open pose
if len(subs) == 0:
pafs = sorted(os.listdir(join(root, value['root'])))
else:
pafs = [f'{sub}.txt' for sub in subs]
results = []
for nv, paf in enumerate(pafs):
pafname = join(root, value['root'], paf)
infos = read_4dassociation(pafname, H=self.infos['image_shape'][0][nv][0], W=self.infos['image_shape'][0][nv][1])
results.append(infos)
data = [[d[i] for d in results] for i in range(self.length)]
meta = {}
elif key == 'cameras':
if 'with_sub' in value.keys():
raise NotImplementedError
else:
cameras = read_cameras(os.path.join(root, value['root']))
if 'remove_k3' in value.keys():
for cam, camera in cameras.items():
camera['dist'][:, 4] = 0.
data = [cameras]
meta = {}
elif key in ['pelvis']:
continue
elif key == 'keypoints3d':
k3droot = value['root']
filenames = sorted(os.listdir(k3droot))[:self.length]
res_key = value.get('key', 'pred')
data = []
for filename in filenames:
results = read_json(join(k3droot, filename))
if 'pids' not in results.keys():
# 擅自补全
results['pids'] = list(range(len(results[res_key])))
data.append({
'pids': results['pids'],
'keypoints3d': np.array(results[res_key], dtype=np.float32)
})
if data[-1]['keypoints3d'].shape[-1] == 3:
mywarn('The input keypoints dont have confidence')
data[-1]['keypoints3d'] = np.concatenate([data[-1]['keypoints3d'], np.ones_like(data[-1]['keypoints3d'][..., :1])], axis=-1)
if 'conversion' in value.keys():
if value['conversion'] == 'panoptic15_to_body15':
data[-1]['keypoints3d'] = convert_panoptic15_body15(data[-1]['keypoints3d'])
else:
raise ValueError(f'Unknown reader: {key}')
self.infos[key] = data
self.meta.update(meta)
self.reader = reader
self.filter = filter
if len(self.subs) == 0:
self.subs = self.meta['subs']
self.check_frames_length()
@staticmethod
def read_annots(annotnames):
val = []
for annname in annotnames:
annots = read_json(annname)['annots']
# select the best people
annots = find_best_people(annots)
val.append(annots)
val = padding_and_stack(val)
return val
def filter_openpose(self, candidates, pafs):
for nv, candview in enumerate(candidates):
H=self.infos['image_shape'][0][nv][0]
W=self.infos['image_shape'][0][nv][1]
for cand in candview:
if 'border' in self.filter.keys():
border = self.filter['border'] * max(H, W)
flag = (cand[:, 0] > border) & (cand[:, 0] < W - border) & (cand[:, 1] > border) & (cand[:, 1] < H - border)
cand[~flag] = 0
return candidates, pafs
def __getitem__(self, index):
frame = self.frames[index]
ret = {}
for key, value in self.infos.items():
if len(value) == 1:
ret[key] = value[0]
elif frame >= len(value):
myerror(f'[{self.__class__.__name__}] {key}: index {frame} out of range {len(value)}')
else:
ret[key] = value[frame]
ret_list = defaultdict(list)
for key, val in ret.items():
if key == 'annots':
ret_list[key] = self.read_annots(val)
elif key == 'cameras':
for sub in self.subs:
select = {k: val[sub][k] for k in ['K', 'R', 'T', 'dist', 'P']}
ret_list[key].append(select)
ret_list[key] = padding_and_stack(ret_list[key])
elif key == 'images':
if self.flag_read_image:
for i, sub in enumerate(self.subs):
imgname = val[i]
if sub in self.subs_vis or self.subs_vis == 'all':
img = self.read_image(imgname)
else:
img = imgname
ret_list[key].append(img)
ret_list['imgnames'].append(imgname)
else:
ret_list[key] = val
ret_list['imgnames'] = val
elif key == 'openpose':
ret_list[key] = [v['joints'] for v in val]
# 同时返回PAF
ret_list[key+'_paf'] = [v['pafs'] for v in val]
# check一下PAF:
for nv in range(len(ret_list[key])):
ret_list[key+'_paf'][nv][(8, 1)] = ret_list[key+'_paf'][nv].pop((1, 8)).T
ret_list[key], ret_list[key+'_paf'] = self.filter_openpose(ret_list[key], ret_list[key+'_paf'])
elif key == 'keypoints3d':
ret_list['keypoints3d'] = val['keypoints3d']
if 'pids' in val.keys():
ret_list['pids'] = val['pids']
else:
ret_list['pids'] = list(range(len(val['keypoints3d'])))
elif key in ['image_shape']:
pass
else:
print('[Dataset] Unknown key: {}'.format(key))
ret_list.update(ret_list.pop('annots', {}))
for key, val in self.reader.items():
if key == 'pelvis' and 'annots' in self.reader.keys(): # load pelvis from annots.keypoints
ret_list[key] = [d[:, val.root_id] for d in ret_list['keypoints']]
elif key == 'pelvis' and 'openpose' in self.reader.keys():
ret_list[key] = [d[val.root_id] for d in ret_list['openpose']]
ret_list['meta'] = {
'subs': self.subs,
'index': index,
'frame': frame,
'image_shape': ret['image_shape'],
'imgnames': ret_list['imgnames'],
}
return ret_list
def check(self, index):
raise NotImplementedError
class MVMP(MVDataset):
def read_annots(self, annotnames):
val = []
for annname in annotnames:
annots = read_json(annname)['annots']
# 在这里进行filter去掉不需要的2D
annots_valid = []
for annot in annots:
flag = True
if 'bbox_size' in self.filter.keys():
bbox_size = self.filter['bbox_size']
bbox = annot['bbox']
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
if area < bbox_size:
flag = False
if flag:
annots_valid.append(annot)
annots = annots_valid
# select the best people
annots = find_all_people(annots)
val.append(annots)
val = padding_empty(val)
return val
def check(self, index):
data = self.__getitem__(index)
from easymocap.mytools.vis_base import plot_bbox, merge, plot_keypoints_auto
# check the subs vis
vis = []
for nv, sub in enumerate(self.subs):
if sub not in self.subs_vis:continue
img = data['images'][nv].copy()
bbox = data['bbox'][nv]
kpts = data['keypoints'][nv]
for i in range(bbox.shape[0]):
plot_bbox(img, bbox[i], pid=i)
plot_keypoints_auto(img, kpts[i], pid=i, use_limb_color=False)
vis.append(img)
vis = merge(vis)
cv2.imwrite('debug/{}_{:06d}.jpg'.format(self.__class__.__name__, index), vis)
if __name__ == '__main__':
config = '''
args:
root: /nas/ZJUMoCap/Part0/313
subs: []
subs_vis: ['01', '07', '13', '19']
ranges: [0, 100, 1]
read_image: False
reader:
images:
root: images
ext: .jpg
annots:
root: annots
ext: .json
cameras: # 兼容所有帧的相机参数不同的情况
root: ''
'''
import yaml
config = yaml.load(config, Loader=yaml.FullLoader)
dataset = MVDataset(**config['args'])
for i in range(len(dataset)):
data = dataset[i]

View File

@ -0,0 +1,136 @@
from .basedata import ImageDataBase, read_mv_images, find_best_people
from easymocap.mytools.debug_utils import log, myerror, mywarn
from easymocap.mytools.camera_utils import read_cameras
from easymocap.mytools.file_utils import read_json
import os
import numpy as np
import cv2
class SVDataset(ImageDataBase):
'''
这个数据只用来返回单段的视频数据不用来返回多段的视频数据
'''
def __init__(self, root, subs, ranges, read_image=False, reader={}) -> None:
super().__init__(root, subs, ranges, read_image)
assert len(subs) == 1, 'SVDataset only support one subject'
for key, value in reader.items():
if key == 'images':
self.try_to_extract_images(root, value)
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
data = [d[0] for d in data]
self.length = len(data)
elif key == 'image_shape':
imgname = self.infos['images'][0]
shapes = []
assert os.path.exists(imgname), "image {} not exists".format(imgname)
img = cv2.imread(imgname)
assert img is not None, "image {} read failed".format(imgname)
height, width, _ = img.shape
log('[{}] sub {} shape {}'.format(self.__class__.__name__, imgname, img.shape))
shapes.append([height, width])
data = shapes
elif key == 'annots':
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
data = [d[0] for d in data]
if self.length > 0:
assert self.length == len(data), \
myerror('annots length {} not equal to images length {}.'.format(len(data), self.length))
else:
self.length = len(data)
elif key == 'cameras':
myerror('暂时没有实现相机参数')
raise NotImplementedError
else:
raise ValueError(f'Unknown reader: {key}')
self.infos[key] = data
self.meta.update(meta)
# check cameras:
if 'cameras' not in self.infos:
mywarn('[{}] No camera info, use default camera'.format(self.__class__.__name__))
imgname0 = self.infos['images'][0]
img = self.read_image(imgname0)
height, width = img.shape[:2]
log('[{}] Read shape {} from image {}'.format(self.__class__.__name__, img.shape, imgname0))
focal = 1.2*min(height, width) # as colmap
log('[{}] Set a fix focal length {}'.format(self.__class__.__name__, focal))
K = np.array([focal, 0., width/2, 0., focal, height/2, 0. ,0., 1.]).reshape(3, 3)
camera = {'K':K ,'R': np.eye(3), 'T': np.zeros((3, 1)), 'dist': np.zeros((1, 5))}
for key, val in camera.items():
camera[key] = val.astype(np.float32)
self.infos['cameras'] = [camera]
self.check_frames_length()
self.find_best_people = find_best_people
def __getitem__(self, index):
frame = self.frames[index]
ret = {}
for key, value in self.infos.items():
if len(value) == 1:
ret[key] = value[0]
elif index >= len(value):
myerror(f'[{self.__class__.__name__}] {key}: index {frame} out of range {len(value)}')
else:
ret[key] = value[frame]
ret_new = {}
for key, val in ret.items():
if key == 'annots':
annots = read_json(val)['annots']
# select the best people
annots = self.find_best_people(annots)
ret_new.update(annots)
elif key == 'cameras':
ret_new[key] = val
elif key == 'images':
ret_new['imgnames'] = val
if self.flag_read_image:
img = self.read_image(val)
ret_new[key] = img
else:
ret_new[key] = val
elif key == 'image_shape':
ret_new['image_shape'] = val
ret_new['meta'] = {
'subs': self.subs,
'index': index,
'frame': self.frames[index],
'image_shape': ret_new['image_shape'],
'imgnames': ret_new['imgnames'],
}
return ret_new
class SVHandL(SVDataset):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.find_best_people = self._find_best_hand
def _find_best_hand(self, annots):
assert len(annots) == 1, 'SVHandL only support one person'
annot = annots[0]
ret = {
'bbox': np.array(annot['bbox_handl2d'], dtype=np.float32),
'keypoints': np.array(annot['handl2d'], dtype=np.float32),
}
return ret
if __name__ == '__main__':
cfg = '''
module: myeasymocap.datasets.1v1p.MonoDataset
args:
root: /nas/home/shuaiqing/EasyMocapDoc/demo/1v1p
subs: ['0+000553+000965']
ranges: [0, 99999, 1]
read_image: True
reader:
images:
root: images
ext: .jpg
annots:
root: annots
ext: .json
'''
import yaml
cfg = yaml.load(cfg, Loader=yaml.FullLoader)
dataset = SVDataset(**cfg['args'])
print(dataset)
for i in range(len(dataset)):
data = dataset[i]

123
myeasymocap/io/model.py Normal file
View File

@ -0,0 +1,123 @@
import os
import torch
import numpy as np
from easymocap.bodymodel.smpl import SMPLModel
from easymocap.mytools.debug_utils import log
def try_to_download_SMPL(model_dir):
cmd = 'wget https://www.dropbox.com/s/aeulffqzb3zmh8x/pare-github-data.zip'
os.system(cmd)
os.makedirs(model_dir, exist_ok=True)
cmd = 'unzip pare-github-data.zip -d {}'.format(model_dir)
print('[RUN] {}'.format(cmd))
os.system(cmd)
class SMPLLoader:
def __init__(self, model_path, regressor_path, return_keypoints=True):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if not os.path.exists(model_path):
log('[SMPL] Model not found in `{}`'.format(model_path))
log('[SMPL] Downloading model to `{}`'.format(model_path))
try_to_download_SMPL('models/pare')
assert os.path.exists(model_path), f'{model_path} not exists'
if not os.path.exists(regressor_path):
if regressor_path.endswith('J_regressor_body25.npy'):
url = 'https://github.com/zju3dv/EasyMocap/raw/master/data/smplx/J_regressor_body25.npy'
os.makedirs(os.path.dirname(regressor_path), exist_ok=True)
cmd = 'wget {} -O {}'.format(url, regressor_path)
os.system(cmd)
assert os.path.exists(regressor_path), f'{regressor_path} not exists'
log('[SMPL] Loading model in `{}`'.format(model_path))
log('[SMPL] Using keypoints regressor `{}`'.format(regressor_path))
smplmodel = SMPLModel(model_path=model_path,
model_type='smpl', device=device,
regressor_path=regressor_path,
NUM_SHAPES=10,
)
self.smplmodel = smplmodel
self.return_keypoints = return_keypoints
def __call__(self,):
return {
'body_model': self.smplmodel,
'model': self.forward}
def forward(self, params):
keypoints = self.smplmodel.keypoints(params, return_tensor=True)
ret = {
'keypoints': keypoints
}
ret.update(params)
return ret
class MANOLoader:
def __init__(self, cfg_path, model_path, regressor_path, num_pca_comps=45, use_pca=False, use_flat_mean=False):
log('[MANO] Loading model in `{}`'.format(model_path))
log('[MANO] Using keypoints regressor `{}`'.format(regressor_path))
assert os.path.exists(model_path), f'{model_path} not exists, Please download it from `mano.is.tue.mpg.de`'
if not os.path.exists(regressor_path) and regressor_path.endswith('J_regressor_mano_LEFT.txt'):
url = 'https://raw.githubusercontent.com/zju3dv/EasyMocap/master/data/smplx/J_regressor_mano_LEFT.txt'
os.makedirs(os.path.dirname(regressor_path), exist_ok=True)
cmd = 'wget {} -O {}'.format(url, regressor_path)
os.system(cmd)
assert os.path.exists(regressor_path), f'{regressor_path} not exists'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
from easymocap.config import Config, load_object
cfg_data = Config.load(cfg_path)
cfg_data['args']['model_path'] = model_path
cfg_data['args']['regressor_path'] = regressor_path
cfg_data['args']['cfg_hand']['num_pca_comps'] = num_pca_comps
cfg_data['args']['cfg_hand']['use_pca'] = use_pca
cfg_data['args']['cfg_hand']['use_flat_mean'] = use_flat_mean
model = load_object(cfg_data.module, cfg_data.args)
self.manomodel = model
def __call__(self,):
return {
'hand_model': self.manomodel,
'model': self.forward}
def forward(self, params):
keypoints = self.manomodel.keypoints(params, return_tensor=True)
ret = {
'keypoints': keypoints
}
ret.update(params)
return ret
class MANOLoader_lr:
def __init__(self, cfg_path, model_path, regressor_path, num_pca_comps=45, use_pca=False):
self.Model_l = MANOLoader(cfg_path, model_path, regressor_path, num_pca_comps, use_pca)
self.Model_r = MANOLoader(cfg_path, model_path.replace('LEFT','RIGHT'), regressor_path.replace('LEFT','RIGHT'), num_pca_comps, use_pca)
def __call__(self,):
ret={}
out1 = self.Model_l()
for key in out1.keys():
ret[key+'_l'] = out1[key]
out2 = self.Model_r()
for key in out1.keys():
ret[key+'_r'] = out2[key]
return ret
class SMPLHLoader:
def __init__(self, path):
from easymocap.config import Config, load_object
cfg_data = Config.load(path)
self.model = load_object(cfg_data.module, cfg_data.args)
def __call__(self,):
return {
'smplh_model': self.model,
'model': self.forward}
def forward(self, params):
keypoints = self.model(**params, return_verts=False, return_tensor=True)
ret = {
'keypoints': keypoints.clone(),#
'keypoints_body': keypoints[...,:25,:].clone(),
'keypoints_handlr': keypoints[...,25:,:].clone()
}
ret.update(params)
return ret

42
myeasymocap/io/video.py Normal file
View File

@ -0,0 +1,42 @@
import os
import shutil
from os.path import join
from glob import glob
from easymocap.mytools.debug_utils import log, mywarn, myerror, run_cmd
class MakeVideo:
def __init__(self, fps, keep_image, output='tmp') -> None:
self.output = output
self.fps = fps
self.debug = False
self.keep_image = keep_image
def __call__(self):
restart = ' -y '
fps_in = fps_out = self.fps
fps_in = ' -r {}'.format(fps_in)
path = self.output
ext = '.jpg'
cmd = ' -pix_fmt yuv420p -vcodec libx264'
cmd += ' -r {}'.format(fps_out)
if ext == '.png':
cmd += ' -profile:v main'
pathlist = sorted(os.listdir(path))
pathlist = [join(path, p) for p in pathlist if os.path.isdir(join(path, p))]
for path in pathlist:
imgnames = glob(join(path, '*{}'.format(ext)))
if len(imgnames) == 0:
continue
shell = f'ffmpeg{restart}{fps_in} -i "{path}/%06d{ext}" -vf scale="2*ceil(iw/2):2*ceil(ih/2)"{cmd} "{path}.mp4"'
if not self.debug:
shell += ' -loglevel quiet'
print(shell)
os.system(shell)
# 确认一下文件已经生成了
if not os.path.exists(path+'.mp4'):
mywarn('Video {} is not generated'.format(path+'.mp4'))
shell = shell.replace(' -loglevel quiet', '')
run_cmd(shell)
else:
if not self.keep_image:
shutil.rmtree(path)

260
myeasymocap/io/vis.py Normal file
View File

@ -0,0 +1,260 @@
import os
from typing import Any
import numpy as np
import cv2
from os.path import join
from easymocap.mytools.vis_base import plot_keypoints_auto, merge, plot_bbox, get_rgb, plot_cross
from easymocap.datasets.base import add_logo
from easymocap.mytools.camera_utils import Undistort
def projectPoints(k3d, camera):
k3d0 = np.ascontiguousarray(k3d[:, :3])
k3d_rt = np.dot(k3d0, camera['R'].T) + camera['T'].T
depth = k3d_rt[:, -1:]
k2d, _ = cv2.projectPoints(k3d0, camera['R'], camera['T'], camera['K'], camera['dist'])
k2d = np.hstack([k2d[:, 0], k3d[:, -1:]])
return k2d, depth
class VisBase:
def __init__(self, scale=1, lw_factor=1, name='vis', mode='none', mode_args={}):
self.scale = scale
self.output = '/tmp'
self.name = name
self.lw = lw_factor
self.count = 0
self.mode = mode
self.mode_args = mode_args
def merge_and_write(self, vis):
vis = [v for v in vis if not isinstance(v, str)]
if self.mode == 'center':
for i, v in enumerate(vis):
# crop the center region
left = int(v.shape[1] - v.shape[0]) // 2
v = v[:, left:left+v.shape[0], :]
vis[i] = v
elif self.mode == 'crop':
for i, v in enumerate(vis):
t, b, l, r = self.mode_args[i]
v = v[t:b, l:r]
vis[i] = v
if len(vis) == 0:
return 0
if len(vis) == 3: # 只有3个的时候的merge方案第一个不变后面两个缩小了放在右边
vis_0 = vis[0]
vis_1 = cv2.resize(vis[1], None, fx=0.5, fy=0.5)
vis_2 = cv2.resize(vis[2], None, fx=0.5, fy=0.5)
vis_12 = np.vstack([vis_1, vis_2])
vis = np.hstack([vis_0, vis_12])
else:
vis = merge(vis)
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
vis = add_logo(vis)
# TODO: 从输入的Meta里面读入图片名字
outname = join(self.output, self.name, '{:06d}.jpg'.format(self.count))
os.makedirs(os.path.dirname(outname), exist_ok=True)
cv2.imwrite(outname, vis)
self.count += 1
class Vis3D(VisBase):
def __init__(self, scale, lw_factor=1, name='repro', **kwargs) -> None:
super().__init__(scale, lw_factor, name, **kwargs)
def __call__(self, images, cameras, keypoints3d=None, results=None):
# keypoints3d: (nJoints, 4)
undist = False
cameras['dist'] = np.zeros_like(cameras['dist'])
vis_all = []
for nv in range(len(images)):
if isinstance(images[nv], str): continue
camera = {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}
if undist:
vis = Undistort.image(images[nv], cameras['K'][nv], cameras['dist'][nv])
camera['dist'] = np.zeros_like(camera['dist'])
else:
vis = images[nv].copy()
if results is None:
if len(keypoints3d.shape) == 2:
keypoints_repro, depth = projectPoints(keypoints3d, {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']})
plot_keypoints_auto(vis, keypoints_repro, pid=0, use_limb_color=False)
else:
for pid in range(keypoints3d.shape[0]):
keypoints_repro, depth = projectPoints(keypoints3d[pid], {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']})
plot_keypoints_auto(vis, keypoints_repro, pid=pid, use_limb_color=False)
else:
for res in results:
k3d = res['keypoints3d']
keypoints_repro, depth = projectPoints(k3d, camera)
if k3d.shape[0] == 1:
x, y = keypoints_repro[0,0], keypoints_repro[0,1]
# if res['id'] == 6:
plot_cross(vis, x, y, col=get_rgb(res['id']), lw=self.lw, width=self.lw * 5)
elif k3d.shape[0] == 2: # limb
x1, y1 = keypoints_repro[0,0], keypoints_repro[0,1]
x2, y2 = keypoints_repro[1,0], keypoints_repro[1,1]
cv2.line(vis, (int(x1), int(y1)), (int(x2), int(y2)), get_rgb(res['id']), self.lw)
else:
plot_keypoints_auto(vis, keypoints_repro, pid=res['id'], use_limb_color=False, lw_factor=self.lw)
cv2.putText(vis, '{}'.format(res['id']), (int(keypoints_repro[0,0]), int(keypoints_repro[0,1])),
cv2.FONT_HERSHEY_SIMPLEX, 2, get_rgb(res['id']), self.lw)
vis_all.append(vis)
self.merge_and_write(vis_all)
class VisRoot(VisBase):
def __call__(self, images, pelvis):
vis = []
for nv in range(len(images)):
if isinstance(images[nv], str): continue
v = images[nv].copy()
for i in range(pelvis[nv].shape[0]):
color = get_rgb(i)
x, y = pelvis[nv][i][0], pelvis[nv][i][1]
x, y = int(x), int(y)
plot_cross(v, x, y , col=color, lw=self.lw, width=self.lw * 10)
cv2.putText(v, '{}'.format(i), (int(x), int(y)),
cv2.FONT_HERSHEY_SIMPLEX, 2, color, self.lw)
vis.append(v)
self.merge_and_write(vis)
class VisPAF(VisBase):
def __call__(self, images, openpose, openpose_paf):
# openpose [nViews, nJoints, 3]
# openpose_paf [nViews, dict, MxN]
vis_limb = [(8, 1)]
vis = []
nViews = len(images)
for nv in range(nViews):
if isinstance(images[nv], str): continue
v = images[nv].copy()
k2d = openpose[nv]
paf = openpose_paf[nv]
for (src, dst) in vis_limb:
# (M, N)
paf_ = paf[(src, dst)]
for i in range(paf_.shape[0]):
for j in range(paf_.shape[1]):
if paf_[i, j] < 0.1:
continue
x1, y1 = k2d[src][i, :2]
x2, y2 = k2d[dst][j, :2]
lw = int(paf_[i, j] * 10)
cv2.line(v, (int(x1), int(y1)), (int(x2), int(y2)), get_rgb(src), lw)
vis.append(v)
self.merge_and_write(vis)
class VisBirdEye(VisBase):
def __init__(self, xranges, yranges, resolution=1024, name='bird', **kwargs):
super().__init__(name=name, **kwargs)
self.xranges = xranges
self.yranges = yranges
self.resolution = resolution
self.blank = np.zeros((resolution, resolution, 3), dtype=np.uint8) + 255
x0, y0 = self.map_x_y(0, 0)
cv2.line(self.blank, (x0, 0), (x0, resolution), (0, 0, 0), 1)
cv2.line(self.blank, (0, y0), (resolution, y0), (0, 0, 0), 1)
def map_x_y(self, x, y):
x = (x - self.xranges[0]) / (self.xranges[1] - self.xranges[0]) * self.resolution
y = (y - self.yranges[0]) / (self.yranges[1] - self.yranges[0]) * self.resolution
y = self.resolution - y
x, y = int(x), int(y)
return x, y
def __call__(self, results, cameras):
vis = self.blank.copy()
R = cameras['R']
T = cameras['T']
# 这里要兼容将来的相机运动的情况,所以不能预先可视化好
center = - np.einsum('bmn,bnj->bmj', R.swapaxes(1, 2), T)
for nv in range(center.shape[0]):
x, y = center[nv, 0], center[nv, 1]
x, y = self.map_x_y(x, y)
plot_cross(vis, x, y, col=(0,0,255), lw=self.lw, width=20)
cv2.putText(vis, 'cam{}'.format(nv), (int(x), int(y)),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), self.lw//4)
for res in results:
pid = res['id']
color = get_rgb(pid)
x, y, z = res['pelvis'][0, 0], res['pelvis'][0, 1], res['pelvis'][0, 2]
length = 0.5 * (np.clip(z - 1., 0, 1) + 1)
length = int(length/(self.xranges[1] - self.xranges[0]) * self.resolution)
x, y = self.map_x_y(x, y)
plot_cross(vis, x, y, col=color, lw=self.lw, width=self.lw * 5)
cv2.rectangle(vis, (x - length, y - length), (x + length, y + length), color, self.lw)
cv2.putText(vis, '{}'.format(pid), (int(x), int(y)),
cv2.FONT_HERSHEY_SIMPLEX, 2, color, self.lw)
self.merge_and_write([vis])
class VisMatch(VisBase):
def __call__(self, images, pelvis, results):
vis = []
for nv in range(len(images)):
if isinstance(images[nv], str):
vis.append(images[nv])
continue
else:
vis.append(images[nv].copy())
for res in results:
pid = res['id']
for nv, ind in zip(res['views'], res['indices']):
v = vis[nv]
if isinstance(v, str): continue
x, y = pelvis[nv][ind][0], pelvis[nv][ind][1]
plot_cross(v, pelvis[nv][ind][0], pelvis[nv][ind][1], col=get_rgb(pid), lw=self.lw, width=self.lw * 5)
cv2.putText(v, '{}'.format(pid), (int(x), int(y)),
cv2.FONT_HERSHEY_SIMPLEX, 2, get_rgb(pid), self.lw)
self.merge_and_write(vis)
class Vis_det(VisBase):
def __call__(self, images, **kwargs):
vis = []
for nv in range(len(images)):
if isinstance(images[nv], str):
vis.append(images[nv])
continue
else:
v = images[nv].copy()
for key, bbox in kwargs.items():
_bbox = bbox[nv]
for idet in range(_bbox.shape[0]):
plot_bbox(v, _bbox[idet], idet)
vis.append(v)
self.merge_and_write(vis)
class Vis2D(VisBase):
def __call__(self, images, **kwargs):
if 'keypoints' in kwargs:
keypoints = kwargs['keypoints']
else:
if len(kwargs.keys()) == 1:
keypoints = list(kwargs.values())[0]
else:
raise NotImplementedError
if 'bbox' in kwargs:
bbox = kwargs['bbox']
else:
bbox = None
if not isinstance(images, list):
images = [images]
keypoints = [keypoints]
bbox = [bbox]
vis = []
for nv in range(len(images)):
if isinstance(images[nv], str): continue
k2d = keypoints[nv]
vis_ = images[nv].copy()
if len(k2d.shape) == 2:
plot_keypoints_auto(vis_, k2d, pid=0, use_limb_color=False)
if bbox is not None:
if len(bbox[nv].shape) == 2:
plot_bbox(vis_, bbox[nv][0], 0)
else:
plot_bbox(vis_, bbox[nv], 0)
else:
for pid in range(k2d.shape[0]):
plot_keypoints_auto(vis_, k2d[pid], pid=pid, use_limb_color=False)
vis.append(vis_)
self.merge_and_write(vis)

389
myeasymocap/io/vis3d.py Normal file
View File

@ -0,0 +1,389 @@
from tqdm import tqdm
import cv2
import os
from easymocap.visualize.pyrender_wrapper import plot_meshes
from os.path import join
import numpy as np
from easymocap.datasets.base import add_logo
from easymocap.mytools.vis_base import merge, plot_bbox
from .vis import VisBase
class Render(VisBase):
def __init__(self, name='render', scale=0.5, backend='pyrender', **kwargs) -> None:
super().__init__(name=name, scale=1., **kwargs)
self.scale3d = scale
def __call__(self, body_model, params, cameras, imgnames):
vertices = body_model.vertices(params, return_tensor=False)
faces = body_model.faces
for nf, img in enumerate(tqdm(imgnames, desc=self.name)):
basename = os.path.basename(img)
# 重新读入图片
assert os.path.exists(img), img
vis = cv2.imread(img)
vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d)
vert = vertices[nf]
meshes = {}
meshes[0] = {
'vertices': vert,
'faces': faces,
'id': 0,
'name': 'human_{}'.format(0)
}
K = cameras['K'][nf].copy()
K[:2, :] *= self.scale3d
R = cameras['R'][nf]
T = cameras['T'][nf]
ret = plot_meshes(vis, meshes, K, R, T, mode='image')
self.merge_and_write([ret])
class Render_multiview(VisBase):
def __init__(self, view_list=[], name='render', model_name='body_model', render_mode='image', backend='pyrender', shape=[-1,-1], scale=1., **kwargs):
self.scale3d = scale
super().__init__(name=name, scale=1., **kwargs)
self.view_list = view_list
self.render_mode = render_mode
self.model_name = model_name
self.shape = shape
def render_(self, vertices, faces, cameras, imgnames):
for nf, img in enumerate(tqdm(imgnames, desc=self.name)):
mv_ret = []
if not isinstance(img, list):
img = [img]
for nv in self.view_list:
basename = os.path.basename(img[nv])
assert os.path.exists(img[nv]), img[nv]
vis = cv2.imread(img[nv])
vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d)
vert = vertices[nf]
meshes = {}
if vert.ndim == 2:
meshes[0] = {
'vertices': vert,
'faces': faces,
'id': 0,
'name': 'human_{}'.format(0)
}
elif vert.ndim == 3:
for pid in range(vert.shape[0]):
meshes[pid] = {
'vertices': vert[pid],
'faces': faces,
'id': pid,
'name': 'human_{}'.format(pid)
}
if cameras['K'].ndim == 4:
K = cameras['K'][nf][nv].copy()
K[:2, :] *= self.scale
R = cameras['R'][nf][nv]
T = cameras['T'][nf][nv]
else:
K = cameras['K'][nv].copy()
K[:2, :] *= self.scale3d
R = cameras['R'][nv]
T = cameras['T'][nv]
# add ground
if self.render_mode == 'ground':
from easymocap.visualize.geometry import create_ground
ground = create_ground(
center=[0, 0, -0.05], xdir=[1, 0, 0], ydir=[0, 1, 0], # 位置
step=1, xrange=10, yrange=10, # 尺寸
white=[1., 1., 1.], black=[0.5,0.5,0.5], # 颜色
two_sides=True
)
meshes[1001] = ground
vis = np.zeros((self.shape[0], self.shape[1], 3), dtype=np.uint8) + 255
focal = min(self.shape) * 1.2
K = np.array([
[focal,0,vis.shape[0]/2],
[0,focal,vis.shape[1]/2],
[0,0,1]])
ret = plot_meshes(vis, meshes, K, R, T, mode='rgb')
else:
ret = plot_meshes(vis, meshes, K, R, T, mode=self.render_mode)
ret = add_logo(ret)
mv_ret.append(ret)
self.merge_and_write(mv_ret)
def __call__(self, params, cameras, imgnames, **kwargs):
body_model = kwargs[self.model_name]
vertices = body_model.vertices(params, return_tensor=False)
faces = body_model.faces
self.render_(vertices, faces, cameras, imgnames)
class Render_nocam:
def __init__(self, scale=0.5, backend='pyrender',view_list=[0]) -> None:
self.name = 'render'
self.scale = scale
self.view_list = view_list
def __call__(self, hand_model, params, images):
vertices = hand_model(**params, return_verts=True, return_tensor=False)
faces = hand_model.faces
for nf, img in enumerate(tqdm(images, desc=self.name)):
for nv in self.view_list:
if isinstance(img, np.ndarray):
vis = img.copy()
basename = '{:06}.jpg'.format(nf)
else:
basename = os.path.basename(img[nv])
# 重新读入图片
assert os.path.exists(img[nv]), img[nv]
vis = cv2.imread(img[nv])
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
vert = vertices[nf]
meshes = {}
meshes[0] = {
'vertices': vert,
'faces': faces,
'id': 0,
'name': 'human_{}'.format(0)
}
K = np.array([[vis.shape[0],0,vis.shape[0]/2],[0,vis.shape[1],vis.shape[1]/2],[0,0,1]])
K[:2, :] *= self.scale
R = np.eye(3)
T = np.array([0,0,0.3])
ret = plot_meshes(vis, meshes, K, R, T, mode='image')
outname = join(self.output, self.name, basename)
os.makedirs(os.path.dirname(outname), exist_ok=True)
cv2.imwrite(outname, ret)
class Render_multiview_hand(Render_multiview):
def __call__(self, hand_model_l, params_l, cameras, imgnames):
vertices = hand_model_l(**params_l, return_verts=True, return_tensor=False)
faces = hand_model_l.faces
self.render_(vertices, faces, cameras, imgnames)
class Render_smplh(Render_multiview):
def __init__(self, path, at_step, scale=0.5, mode='image', backend='pyrender', view_list=[0]) -> None:
super().__init__(scale, mode, backend, view_list)
from easymocap.config import Config, load_object
cfg_data = Config.load(path)
self.model = load_object(cfg_data.module, cfg_data.args)
self.at_step = at_step
def __call__(self, params_smplh, cameras, imgnames):
vertices = self.model(return_verts=True, return_tensor=False, **params_smplh)
faces = self.model.faces
if self.at_step:
self.render_([vertices], faces, cameras, [imgnames])
else:
self.render_(vertices, faces, cameras, imgnames)
class Render_smplh2(Render_smplh):
def __call__(self, params, cameras, imgnames):
super().__call__(params, cameras, imgnames)
def projectPoints(X, K, R, t, Kd):
x = R @ X + t
x[0:2,:] = x[0:2,:]/x[2,:]#到归一化平面
r = x[0,:]*x[0,:] + x[1,:]*x[1,:]
x[0,:] = x[0,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[2]*x[0,:]*x[1,:] + Kd[3]*(r + 2*x[0,:]*x[0,:])
x[1,:] = x[1,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[3]*x[0,:]*x[1,:] + Kd[2]*(r + 2*x[1,:]*x[1,:])
x[0,:] = K[0,0]*x[0,:] + K[0,1]*x[1,:] + K[0,2]
x[1,:] = K[1,0]*x[0,:] + K[1,1]*x[1,:] + K[1,2]
return x
class Render_multiview_handbyk3d(Render_multiview):
def __call__(self, hand_model_l, params_l, hand_model_r, params_r, cameras, imgnames, keypoints3d):
# breakpoint()
joint_regressor_r = np.load('models/handmesh/data/joint_regressor_r.npy') #右手
joint_regressor_l = np.load('models/handmesh/data/joint_regressor_l.npy') #右手
facesl = hand_model_l.faces
facesr = hand_model_r.faces
# for nf, img in enumerate(tqdm(imgnames, desc=self.name)):
#不显示0号人物的结果
keypoints3d[0]=0
img = imgnames
k3d = keypoints3d
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False) #[nf]
vertices_r = hand_model_r(**params_r, return_verts=True, return_tensor=False) #[nf]
# breakpoint()
joint_l = np.repeat(joint_regressor_l[None, :, :],vertices_l.shape[0],0) @ vertices_l
joint_r = np.repeat(joint_regressor_r[None, :, :],vertices_r.shape[0],0) @ vertices_r
params_l['Th']+=k3d[:,7,:3] - joint_l[:,0,:] #左手7右手4 #[nf]
params_r['Th']+=k3d[:,4,:3] - joint_r[:,0,:] #左手7右手4 #[nf]
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False) #[nf]
vertices_r = hand_model_r(**params_r, return_verts=True, return_tensor=False) #[nf]
faces = []
vert = []
pids = []
for i in range(k3d.shape[0]):
if k3d[i,7,-1]==0:
continue
vv = vertices_l[i].copy()
vert.append(vv)
faces.append(facesl)
pids.append(i)
for i in range(k3d.shape[0]):
if k3d[i,4,-1]==0:
continue
vv = vertices_r[i].copy()
vert.append(vv)
faces.append(facesr)
pids.append(i)
faces = np.stack(faces)
vert = np.stack(vert)
for nv in self.view_list:
basename = os.path.basename(img[nv])
# 重新读入图片
assert os.path.exists(img[nv]), img[nv]
vis = cv2.imread(img[nv])
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
# vert = vertices
meshes = {}
if vert.ndim == 2:
meshes[0] = {
'vertices': vert,
'faces': faces,
'id': 0,
'name': 'human_{}'.format(0)
}
elif vert.ndim == 3:
for pid in range(vert.shape[0]):
meshes[pid] = {
'vertices': vert[pid],
'faces': faces[pid],
'vid': pids[pid],
'name': 'human_{}'.format(pid)
}
K = cameras['K'][nv].copy()
K[:2, :] *= self.scale
R = cameras['R'][nv]
T = cameras['T'][nv]
# breakpoint()
from easymocap.mytools.vis_base import plot_keypoints_auto
for pid in range(keypoints3d.shape[0]):
keypoints_repro = projectPoints(keypoints3d[pid].T[:3,:], K, R, T, cameras['dist'][nv].reshape(5)).T
keypoints_repro[:,-1] = keypoints3d[pid,:,-1]
plot_keypoints_auto(vis, keypoints_repro, pid=pid, use_limb_color=False)
ret = plot_meshes(vis, meshes, K, R, T, mode=self.mode)
outname = join(self.output, self.name, basename)
os.makedirs(os.path.dirname(outname), exist_ok=True)
cv2.imwrite(outname, ret)
class Render_selectview:
def __init__(self, scale=0.5, backend='pyrender', output='output',mode = 'image') -> None:
self.name = 'render_debug'
self.scale = scale
self.view_list = [5]
self.output = output
self.mode = mode
def __call__(self, hand_model_l, posel, match3d_l, cameras, imgnames, keypoints3d,bbox_handl, joint_regressor, wristid):
img = imgnames
k3d = keypoints3d
# joint_regressor_r = np.load('models/handmesh/data/joint_regressor_r.npy') #右手
# joint_regressor_l = np.load('models/handmesh/data/joint_regressor_l.npy')
joint_regressor_l = joint_regressor
facesl = hand_model_l.faces
# facesr = hand_model_r.faces
# breakpoint()
hand_list=[]
for pid in range(len(match3d_l)):
dt = match3d_l[pid]
if(isinstance(dt,int)):
# TODO:处理-1的情况也就是没有找到合适的匹配到的手
hand_list.append(np.zeros((1,48)))
break
# Merge_list=[]
out_img = []
for cid in range(len(dt['views'])):
nv = dt['views'][cid]
poseid = dt['indices'][cid]
pose = posel[nv][poseid].copy()
Rh = pose[:,:3].copy()
invR = np.linalg.inv(cameras['R'][nv])
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
Rh_m_new = invR @ Rh_m_old
Rh = cv2.Rodrigues(Rh_m_new)[0]
pose_ = np.hstack((Rh.reshape(3),pose[:,3:].reshape(-1))).reshape(1,-1)
Rh = pose_[:,:3].copy()
pose_[:,:3] = 0
params_l={
'Rh':Rh,
'Th':np.zeros_like(Rh),
'poses':pose_,
'shapes':np.zeros((Rh.shape[0],10)),
}
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False)
joint_l = np.repeat(joint_regressor_l[None, :, :],vertices_l.shape[0],0) @ vertices_l
params_l['Th']+=k3d[pid,wristid,:3] - joint_l[0,0,:]
vertices_l = hand_model_l(**params_l, return_verts=True, return_tensor=False)
vert = vertices_l[0]
faces = facesl
basename = os.path.basename(img[nv])
# 重新读入图片
assert os.path.exists(img[nv]), img[nv]
vis = cv2.imread(img[nv])
plot_bbox(vis,bbox_handl[nv][poseid],0)
vis = cv2.resize(vis, None, fx=self.scale, fy=self.scale)
meshes = {}
if vert.ndim == 2:
meshes[0] = {
'vertices': vert,
'faces': faces,
'id': 0,
'name': 'human_{}'.format(0)
}
elif vert.ndim == 3:
for pid in range(vert.shape[0]):
meshes[pid] = {
'vertices': vert[pid],
'faces': faces[pid],
'id': pid,
'name': 'human_{}'.format(pid)
}
K = cameras['K'][nv].copy()
K[:2, :] *= self.scale
R = cameras['R'][nv]
T = cameras['T'][nv]
# breakpoint()
ret = plot_meshes(vis, meshes, K, R, T, mode=self.mode)
out_img.append(ret)
out_img = merge(out_img)
outname = join(self.output, self.name, '{}-{:02d}.jpg'.format(basename.split('.jpg')[0],pid))
os.makedirs(os.path.dirname(outname), exist_ok=True)
cv2.imwrite(outname, out_img)
class Render_selectview_lr:
def __init__(self, scale=0.5, backend='pyrender', output='output',mode = 'image') -> None:
self.output = output
self.model_l = Render_selectview(scale=0.5, backend='pyrender', output = self.output,mode = mode)
self.model_r = Render_selectview(scale=0.5, backend='pyrender', output = self.output,mode = mode)
self.model_l.name+='_l'
self.model_r.name+='_r'
def __call__(self, hand_model_l, posel, poser, match3d_l, match3d_r, hand_model_r, cameras, imgnames, keypoints3d,bbox_handl,bbox_handr):
joint_regressor_r = np.load('models/handmesh/data/joint_regressor_r.npy') #右手
joint_regressor_l = np.load('models/handmesh/data/joint_regressor_l.npy')
self.model_l(hand_model_l, posel, match3d_l, cameras, imgnames, keypoints3d,bbox_handl, joint_regressor_l, 7)
self.model_r(hand_model_r, poser, match3d_r, cameras, imgnames, keypoints3d,bbox_handr, joint_regressor_r, 4)
class Render_mv(Render):
def __call__(self, body_model, params, cameras, imgnames):
# breakpoint()
super().__call__(body_model, params, cameras, [imgnames[0],imgnames[1]])

97
myeasymocap/io/write.py Normal file
View File

@ -0,0 +1,97 @@
import os
from easymocap.mytools.file_utils import write_keypoints3d, write_smpl
from easymocap.annotator.file_utils import save_annot
from os.path import join
from tqdm import tqdm
class Write:
def __init__(self, output='/tmp', name='keypoints3d') -> None:
self.output = output
self.name = name
def __call__(self, keypoints3d):
for nf in tqdm(range(keypoints3d.shape[0]), desc='writing to {}/{}'.format(self.output, self.name)):
res = [{
'id': 0,
'keypoints3d': keypoints3d[nf]
}]
dumpname = join(self.output, self.name, '{:06d}.json'.format(nf))
write_keypoints3d(dumpname, res)
return {}
class WriteAll:
def __init__(self, name, output='/tmp') -> None:
self.output = output
self.name = name
def __call__(self, results, meta):
for nf in tqdm(range(len(results)), desc='writing to {}/{}'.format(self.output, self.name)):
res = [{'id': r['id'], 'keypoints3d': r['keypoints3d']} for r in results[nf]]
res.sort(key=lambda x: x['id'])
imgnames = meta['imgnames'][nf]
if len(imgnames) > 0:
name = os.path.basename(imgnames[0])
name = name.replace('.jpg', '')
else:
name = '{:06f}'.format(nf)
dumpname = join(self.output, self.name, '{}.json'.format(name))
write_keypoints3d(dumpname, res)
class Write2D:
def __init__(self, name, output='/tmp') -> None:
self.output = output
self.name = name
def __call__(self, results, meta):
for nf in tqdm(range(len(results)), desc='writing to {}/{}'.format(self.output, self.name)):
subs = meta['subs'][nf]
result = results[nf]
annots_all = {sub: [] for sub in subs}
for res in result:
for nv, v in enumerate(res['views']):
annots_all[subs[v]].append({
'personID': res['id'],
'bbox': res['bbox'][nv],
'keypoints': res['keypoints2d'][nv],
})
for nv, sub in enumerate(subs):
annots = {
'filename': f'{sub}/{nf:06d}.jpg',
'height': meta['image_shape'][nf][nv][0],
'width': meta['image_shape'][nf][nv][1],
'annots': annots_all[sub],
'isKeyframe': False
}
dumpname = join(self.output, self.name, sub, '{:06d}.json'.format(nf))
save_annot(dumpname, annots)
class WriteSMPL:
def __init__(self, name='smpl') -> None:
self.name = name
def __call__(self, params=None, results=None, meta=None, model=None):
results_all = []
if results is None and params is not None:
# copy params to results
results = {0: {'params': params, 'keypoints3d': None, 'frames': list(range(len(params['Rh'])))}}
for index in tqdm(meta['index'], desc=self.name):
results_frame = []
for pid, result in results.items():
if index >= result['frames'][0] and index <= result['frames'][-1]:
frame_rel = result['frames'].index(index)
results_frame.append({
'id': pid,
# 'keypoints3d': result['keypoints3d'][frame_rel]
})
for key in ['Rh', 'Th', 'poses', 'shapes']:
if result['params'][key].shape[0] == 1:
results_frame[-1][key] = result['params'][key]
else:
results_frame[-1][key] = result['params'][key][frame_rel:frame_rel+1]
param = results_frame[-1]
pred = model(param)['keypoints'][0]
results_frame[-1]['keypoints3d'] = pred
write_smpl(join(self.output, self.name, '{:06d}.json'.format(meta['frame'][index])), results_frame)
write_keypoints3d(join(self.output, 'keypoints3d', '{:06d}.json'.format(meta['frame'][index])), results_frame)
results_all.append(results_frame)
return {'results_perframe': results_all}

View File

@ -0,0 +1,101 @@
from typing import Any
import numpy as np
from easymocap.mytools.debug_utils import mywarn, log
def solve_translation(X, x, K):
A = np.zeros((2*X.shape[0], 3))
b = np.zeros((2*X.shape[0], 1))
fx, fy = K[0, 0], K[1, 1]
cx, cy = K[0, 2], K[1, 2]
for nj in range(X.shape[0]):
A[2*nj, 0] = 1
A[2*nj + 1, 1] = 1
A[2*nj, 2] = -(x[nj, 0] - cx)/fx
A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
A[2*nj:2*nj+2, :] *= x[nj, 2]
b[2*nj:2*nj+2, :] *= x[nj, 2]
trans = np.linalg.inv(A.T @ A) @ A.T @ b
return trans.T[0]
class MeanShapes:
def __init__(self, keys, dim=0) -> None:
self.keys = keys
self.dim = dim
def __call__(self, params):
for key in self.keys:
log('[{}] Mean {}: {}'.format(self.__class__.__name__, key, params[key].shape))
params[key] = params[key].mean(axis=self.dim, keepdims=True)
log('[{}] Mean {}: {}'.format(self.__class__.__name__, key, params[key].shape))
class InitTranslation:
def __init__(self, solve_T=True, solve_R=False) -> None:
self.solve_T = solve_T
self.solve_R = solve_R
def __call__(self, body_model, params, cameras, keypoints):
nJoints = 15 # 只使用主要的15个点
params['Th'] = np.zeros_like(params['Th'])
kpts1 = body_model.keypoints(params, return_tensor=False)
for i in range(kpts1.shape[0]):
k2d = keypoints[i, :nJoints]
if k2d[:, -1].sum() < nJoints / 2:
mywarn('[{}] No valid keypoints in frame {}'.format(self.__class__.__name__, i))
params['Th'][i] = params['Th'][i-1]
continue
trans = solve_translation(kpts1[i, :nJoints], k2d, cameras['K'][i])
params['Th'][i] += trans
# params['shapes'] = params['shapes'].mean(0, keepdims=True)
return {'params': params}
class InitParams:
def __init__(self, num_poses=69, num_shapes=10, rootid=8, share_shape=True, init_trans=0.) -> None:
self.num_poses = num_poses
self.num_shapes = num_shapes
self.rootid = rootid
self.share_shape = share_shape
self.init_trans = init_trans
def __call__(self, **kwargs):
"""
keypoints3d: (nFrames, nJoints, 4) or (nFrames, nPerson, nFrames, 4)
"""
key = list(kwargs.keys())[0]
keypoints3d = kwargs[key]
if keypoints3d.ndim == 4:
shape = (keypoints3d.shape[:2])
elif keypoints3d.ndim == 3:
shape = (keypoints3d.shape[0],)
else:
raise ValueError('keypoints3d must be 3 or 4 dim')
params={
'Rh': np.zeros((*shape, 3),dtype=np.float32),
'Th': np.zeros((*shape, 3),dtype=np.float32),
'poses': np.zeros((*shape, self.num_poses),dtype=np.float32),
'shapes': np.zeros((*shape, self.num_shapes),dtype=np.float32)
}
# TODO: check the root confidence and interpolate
# 初始化
if key == 'keypoints3d':
params['Th'] = keypoints3d[..., self.rootid, :3]
else:
mywarn('[{}] Not used keypoints3d, set to {}'.format(self.__class__.__name__, self.init_trans))
params['Th'][:, 2] = self.init_trans
if self.share_shape:
params['shapes'] = params['shapes'].mean(0, keepdims=True)
return {'params': params}
class Init_params_and_target_poses(InitParams):
def __call__(self, params_smplh, model):
"""
keypoints3d: (nFrames, nJoints, 4) or (nFrames, nPerson, nFrames, 4)
"""
out = model(params_smplh)
keypoints3d = out['keypoints'].cpu().detach().numpy()
ret = super().__call__(keypoints3d)
for key in params_smplh.keys():
ret['params'][key] = params_smplh[key]
ret['target_'+key] = params_smplh[key]
return ret

View File

@ -0,0 +1,246 @@
import torch
import torch.nn as nn
import numpy as np
class GMoF(nn.Module):
def __init__(self, rho=1):
super(GMoF, self).__init__()
self.rho2 = rho * rho
def extra_repr(self):
return 'rho = {}'.format(self.rho)
def forward(self, est, gt=None, conf=None):
if gt is not None:
square_diff = torch.sum((est - gt)**2, dim=-1)
else:
square_diff = torch.sum(est**2, dim=-1)
diff = torch.div(square_diff, square_diff + self.rho2)
if conf is not None:
res = torch.sum(diff * conf)/(1e-5 + conf.sum())
else:
res = diff.sum()/diff.numel()
return res
class BaseLoss(nn.Module):
def __init__(self, norm='l2', norm_info={}, reduce='sum') -> None:
super().__init__()
self.loss = self.make_loss(norm, norm_info, reduce)
def make_loss(self, norm='l2', norm_info={}, reduce='sum'):
reduce = torch.sum if reduce=='sum' else torch.mean
if norm == 'l2':
def loss(est, gt=None, conf=None):
if gt is not None:
square_diff = reduce((est - gt)**2, dim=-1)
else:
square_diff = reduce(est**2, dim=-1)
if conf is not None:
res = torch.sum(square_diff * conf)/(1e-5 + conf.sum())
else:
res = square_diff.sum()/square_diff.numel()
return res
elif norm == 'l1':
def loss(est, gt=None, conf=None):
if gt is not None:
square_diff = reduce(torch.abs(est - gt), dim=-1)
else:
square_diff = reduce(torch.abs(est), dim=-1)
if conf is not None:
res = torch.sum(square_diff * conf)/(1e-5 + conf.sum())
else:
res = square_diff.sum()/square_diff.numel()
return res
elif norm == 'gm':
loss = GMoF(norm_info)
else:
loss = None
return loss
def forward(self, pred, target):
pass
class BaseKeypoints(BaseLoss):
@staticmethod
def select(keypoints, index, ranges):
if len(index) > 0:
keypoints = keypoints[..., index, :]
elif len(ranges) > 0:
if ranges[1] == -1:
keypoints = keypoints[..., ranges[0]:, :]
else:
keypoints = keypoints[..., ranges[0]:ranges[1], :]
return keypoints
def __init__(self, index_est=[], index_gt=[],
ranges_est=[], ranges_gt=[], **kwargs):
super().__init__(**kwargs)
self.index_est = index_est
self.index_gt = index_gt
self.ranges_est = ranges_est
self.ranges_gt = ranges_gt
def forward(self, pred, target):
return super().forward(pred, target)
def loss_keypoints(self, pred, target, conf):
# pred: (..., dim)
# target: (..., dim)
# conf: (..., 1)
dist = torch.sum((pred - target)**2, dim=-1, keepdim=True)
loss = torch.sum(dist * conf) / torch.sum(conf)
return loss
class Keypoints2D(BaseKeypoints):
def forward(self, pred, target):
# (nFrames, nJoints, 3)
pred_kpts3d = self.select(pred['keypoints'] , self.index_est, self.ranges_est)
target_kpts2d = self.select(target['keypoints'], self.index_gt, self.ranges_gt)
cameras = target['cameras']
P = torch.cat([cameras['R'], cameras['T']], dim=-1)
invKtrans = torch.inverse(cameras['K']).transpose(-1, -2)
homo = torch.cat([target_kpts2d[..., :2], torch.ones_like(target_kpts2d[..., 2:])], dim=-1)
target_points = torch.matmul(homo, invKtrans)[..., :2]
pred_homo = torch.cat([pred_kpts3d, torch.ones_like(pred_kpts3d[..., :1])], dim=-1)
self.einsum = 'fab,fjb->fja'
point_cam = torch.einsum(self.einsum, P, pred_homo)
img_points = point_cam[..., :2]/point_cam[..., 2:]
loss = self.loss(est=img_points, gt=target_points, conf=target_kpts2d[..., -1])
return loss
class Keypoints3D(BaseKeypoints):
def forward(self, pred, target):
# (nFrames, nJoints, 3)
# breakpoint()
pred_kpts3d = self.select(pred['keypoints'] , self.index_est, self.ranges_est)
target_kpts3d = self.select(target['keypoints3d'], self.index_gt, self.ranges_gt)
assert target_kpts3d.shape[-1] == 4, 'Target keypoints {} must have confidence '.format(target_kpts3d.shape)
loss = self.loss(est=pred_kpts3d, gt=target_kpts3d[...,:3], conf=target_kpts3d[..., -1])
return loss
class LimbLength(BaseKeypoints):
def __init__(self, kintree, key='keypoints3d', **kwargs):
self.kintree = np.array(kintree)
super().__init__(**kwargs)
def __str__(self):
return "Limb of: {}".format(','.join(['[{},{}]'.format(i,j) for (i,j) in self.kintree]))
def forward(self, pred, target):
pred_kpts3d = pred['keypoints']
target_kpts3d = target['keypoints3d']
# 用kin tree来进行选择
pred = torch.norm(pred_kpts3d[..., self.kintree[:, 1], :] - pred_kpts3d[..., self.kintree[:, 0], :], dim=-1, keepdim=True)
target = torch.norm(target_kpts3d[..., self.kintree[:, 1], :] - target_kpts3d[..., self.kintree[:, 0], :], dim=-1, keepdim=True)
target_conf = torch.minimum(target_kpts3d[..., self.kintree[:, 1], -1], target_kpts3d[..., self.kintree[:, 0], -1])
loss = self.loss(est=pred, gt=target, conf=target_conf)
return loss
class Smooth(BaseLoss):
def __init__(self, keys, smooth_type, order, norm, weights, window_weight) -> None:
super().__init__(norm)
self.loss = {}
for i in range(len(keys)):
new_key = keys[i] + '_' + smooth_type[i]
self.loss[new_key] = {
'func': self.make_loss(norm='l2', norm_info={}, reduce='sum'),
'key': keys[i],
'weight': weights[i],
'norm': norm[i],
'order': order[i],
'type': smooth_type[i],
}
self.window_weight = window_weight
def convert_Rh_to_R(self, Rh):
from ..bodymodels.geometry import batch_rodrigues
# Rh: (..., nRot x 3)
nRot = Rh.shape[-1] // 3
Rh_flat = Rh.reshape(-1, nRot, 3)
Rh_flat = Rh_flat.reshape(-1, 3)
Rot = batch_rodrigues(Rh_flat)
Rot_0 = Rot.reshape(-1, nRot, 3, 3)
Rot = Rot_0.reshape(*Rh.shape[:-1], 3, 3)
Rot = Rot.reshape(*Rh.shape[:-1], 9)
return Rot
def forward(self, pred, target):
ret = {}
for key, cfg in self.loss.items():
value = pred[cfg['key']]
loss = 0
for width, weight in enumerate(self.window_weight, start=1):
if cfg['type'] == 'Linear':
vel = value[width:] - value[:-width]
elif cfg['type'] == 'Rot':
_value = self.convert_Rh_to_R(value)
vel = _value[width:] - _value[:-width]
elif cfg['type'] == 'Depth':
# TODO: 考虑相机的RT
if 'cameras' in target.keys():
R = target['cameras']['R']
_value = torch.bmm(value[..., None, :], R.transpose(-1, -2))
_value = _value[..., 0, :]
_value = _value[..., [2]] # 只使用深度
vel = _value[width:] - _value[:-width]
if cfg['order'] == 2:
vel = vel[1:] - vel[:-1]
loss += weight * cfg['func'](est=vel)
ret[key] = loss * cfg['weight']
return ret
class AnySmooth(BaseLoss):
def __init__(self, key, weight, norm, norm_info={}, dim=-1, order=1):
super().__init__()
self.dim = dim
self.weight = weight
self.loss = self.make_loss(norm, norm_info)
self.norm_name = norm
self.key = key
self.order = order
def forward(self, pred, target):
loss = 0
value = pred[self.key]
# value = select(value, self.ranges, self.index, self.dim)
if value.shape[0] <= len(self.weight):
return torch.FloatTensor([0.]).to(value.device)
for width, weight in enumerate(self.weight, start=1):
vel = value[width:] - value[:-width]
if self.order == 2:
vel = vel[1:] - vel[:-1]
loss += weight * self.loss(vel)
return loss
class Init(BaseLoss):
def __init__(self, keys, weights, norm) -> None:
super().__init__(norm)
self.keys = keys
self.weights = weights
def forward(self, pred, target):
ret = {}
for key in self.keys:
ret[key] = torch.mean((pred[key] - target['init_'+key])**2)
return ret
from easymocap.multistage.lossbase import AnyReg
class RegLoss(AnyReg):
def __init__(self, key, norm) -> None:
super().__init__(key, norm)
def __call__(self, pred, target):
return self.forward(**{self.key: pred[self.key]})
class Init_pose(Init):
def __init__(self, keys, weights, norm) -> None:
super().__init__(keys, weights, norm)
self.norm = norm
def forward(self, pred, target):
ret = {}
for key in self.keys:
if self.norm == 'l2':
ret[key] = torch.sum((pred[key] - target['target_'+key])**2)
elif self.norm == 'l1':
ret[key] = torch.sum(torch.abs(pred[key] - target['target_'+key]))
return ret

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,193 @@
import numpy as np
import cv2
import scipy
import torch
class MultilView_Merge:
def __init__(self) -> None:
pass
def forward(self, data,ax=0):
'''
data - dict
data[key] (nv,...)
'''
results={}
for key in data.keys():
results[key] = data[key].mean(axis=ax)
return results
class Merge_hand(MultilView_Merge):
def __init__(self, camtoworld) -> None:
self.camtoworld = camtoworld
# pass
def __call__(self, posel , cameras, match3d_l):
# ret = []
# for nf in range(len(posel)):
# breakpoint()
hand_list=[]
for pid in range(len(match3d_l)):
dt = match3d_l[pid]
if(isinstance(dt,int)):
# TODO:处理-1的情况也就是没有找到合适的匹配到的手
hand_list.append(np.zeros((1,48)))
break
Merge_list=[]
for cid in range(len(dt['views'])):
nv = dt['views'][cid]
poseid = dt['indices'][cid]
pose = posel[nv][poseid].copy()
if self.camtoworld:
Rh = pose[:,:3].copy()
invR = np.linalg.inv(cameras['R'][nv])
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
Rh_m_new = invR @ Rh_m_old
Rh = cv2.Rodrigues(Rh_m_new)[0]
Merge_list.append(np.hstack((Rh.reshape(3),pose[:,3:].reshape(-1))))
else:
Merge_list.append(pose)
out = self.forward({'pose':np.stack(Merge_list)},0)
hand_list.append(out['pose'])
pose_ = np.stack(hand_list)
Rh = pose_[:,:3].copy()
pose_[:,:3] = 0
params={
'Rh':Rh,
'Th':np.zeros_like(Rh),
'poses':pose_,
'shapes':np.zeros((Rh.shape[0],10)),
}
# ret.append(params)
return {'params': params}
class Merge_handlr(Merge_hand):
def __call__(self, posel, poser, cameras, match3d_l, match3d_r):
params_l = super().__call__(posel, cameras, match3d_l)
params_r = super().__call__(poser, cameras, match3d_r)
# breakpoint()
return {'params_l':params_l['params'], 'params_r':params_r['params']}
# return {'params_l':params_l['params'], 'params_r':params_r['params'], 'params':params_l['params']}
class Merge_bodyandhand:
def __init__(self, tmp) -> None:
pass
def get_R(self, poses, cfg, st):
res = st.copy()
for i in cfg:
res = res @ cv2.Rodrigues(poses[i,:])[0]
return res
def process_poses_mano(self, poses, hand_Rh, flag):
if sum(flag) == 0:
return poses
poses = poses.reshape((-1,3))
cfg={'rt': [0,3,6,9],
'r': [14,17,19],
'l': [13,16,18]
}
RA = self.get_R(poses, cfg['rt'],np.eye(3))
if flag[0] :
RL = self.get_R(poses, cfg['l'],RA)
tmppose = np.matrix(RL).I @ cv2.Rodrigues(np.array(hand_Rh[0]))[0]
tmppose = cv2.Rodrigues(tmppose)[0]
poses[20,:] = tmppose.reshape(3)
e20 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[20,:]).reshape(-1,3))
e20 = e20.as_euler('ZYX', degrees=True)
dt = scipy.spatial.transform.Rotation.from_euler('ZYX', np.array([0,0,e20[0,2]/2]), degrees=True)
rot_dt = dt.as_matrix()
rot18 = cv2.Rodrigues(poses[18,:])[0]
rot18 = rot18@rot_dt
vec18 = cv2.Rodrigues(rot18)[0].reshape((1,3))
rot20 = cv2.Rodrigues(poses[20,:])[0]
rot20 = np.linalg.inv(rot_dt) @ rot20
vec20 = cv2.Rodrigues(rot20)[0].reshape((1,3))
poses[20,:] = vec20
poses[18,:] = vec18
# e18 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[18,:]).reshape(-1,3))
# e18 = e18.as_euler('ZYX', degrees=True)
# e20[0,2] = e20[0,2]/2
# e18[0,2] += e20[0,2]
# e20 = scipy.spatial.transform.Rotation.from_euler('ZYX', e20, degrees=True)
# e20 = e20.as_rotvec()
# e18 = scipy.spatial.transform.Rotation.from_euler('ZYX', e18, degrees=True)
# e18 = e18.as_rotvec()
# poses[20,:] = e20
# poses[18,:] = e18
if flag[1] : #and sum(np.array(hand_Rh[1])!=0)>0:
RR = self.get_R(poses, cfg['r'],RA)
tmppose = np.matrix(RR).I @ cv2.Rodrigues(np.array(hand_Rh[1]))[0]
tmppose = cv2.Rodrigues(tmppose)[0]
poses[21,:] = tmppose.reshape(3)
e21 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[21,:]).reshape(-1,3))
e21 = e21.as_euler('ZYX', degrees=True)
dt = scipy.spatial.transform.Rotation.from_euler('ZYX', np.array([0,0,e21[0,2]/2]), degrees=True)
rot_dt = dt.as_matrix()
rot19 = cv2.Rodrigues(poses[19,:])[0]
rot19 = rot19@rot_dt
vec19 = cv2.Rodrigues(rot19)[0].reshape((1,3))
rot21 = cv2.Rodrigues(poses[21,:])[0]
rot21 = np.linalg.inv(rot_dt) @ rot21
vec21 = cv2.Rodrigues(rot21)[0].reshape((1,3))
poses[21,:] = vec21
poses[19,:] = vec19
# e19 = scipy.spatial.transform.Rotation.from_rotvec(torch.from_numpy(poses[19,:]).reshape(-1,3))
# e19 = e19.as_euler('ZYX', degrees=True)
# e21[0,2] = e21[0,2]/2
# e19[0,2] += e21[0,2]
# e21 = scipy.spatial.transform.Rotation.from_euler('ZYX', e21, degrees=True)
# e21 = e21.as_rotvec()
# e19 = scipy.spatial.transform.Rotation.from_euler('ZYX', e19, degrees=True)
# e19 = e19.as_rotvec()
# poses[21,:] = e21
# poses[19,:] = e19
return poses.reshape((1,-1))
def merge_pose(self, bodypose,handlpose,handrpose):
flag=[True,True]
if abs(handlpose).sum()==0:
flag[0]=False
if abs(handrpose).sum()==0:
flag[1]=False
out_L = []
pose = np.hstack((bodypose,handlpose[:,3:],handrpose[:,3:])) # (1,156)
out_pose = self.process_poses_mano(pose, [handlpose[0,:3],handrpose[0,:3]], flag) # 如果没找到手那么应该设置为全0 这里设置为false
out_L.append(out_pose)
return out_pose
def __call__(self, params_l, params_r, params):
# breakpoint()
bz = params['Rh'].shape[0]
ret = {
'Rh': np.zeros((bz,3),dtype=np.float32),
'Th': params['Th'],
'poses': np.zeros((bz,156),dtype=np.float32),
'shapes':np.zeros((bz,16),dtype=np.float32)
}
ret['shapes'][:,:10] = params['shapes']
# breakpoint()
#TODO for nframe nperson
for i in range(bz):
inpose = np.zeros((1,66))
inpose[:,3:] = params['poses'][i][:63].copy()
inpose[:,:3] = params['Rh'][i].copy() # pose0:3有值 Rh 可能要合并
handlpose = params_l['poses'][i].reshape((1,-1)).copy()
handrpose = params_r['poses'][i].reshape((1,-1)).copy()
handlpose[:,:3] = params_l['Rh'][i]
handrpose[:,:3] = params_r['Rh'][i]
out = self.merge_pose(inpose.reshape((1,-1)), handlpose, handrpose)
ret['Rh'][i] = out[:,:3]
ret['poses'][i,3:] = out[:,3:]
return {'params_smplh': ret}

View File

@ -0,0 +1,167 @@
import torch
import torch.nn as nn
from easymocap.config import Config, load_object
from easymocap.mytools.debug_utils import log
def dict_of_numpy_to_tensor(body_params, device):
params_ = {}
for key, val in body_params.items():
if isinstance(val, dict):
params_[key] = dict_of_numpy_to_tensor(val, device)
else:
params_[key] = torch.Tensor(val).to(device)
return params_
def dict_of_tensor_to_numpy(body_params):
params_ = {}
for key, val in body_params.items():
if isinstance(val, dict):
params_[key] = dict_of_tensor_to_numpy(val)
else:
params_[key] = val.cpu().numpy()
return params_
def make_optimizer(opt_params, optim_type='lbfgs', max_iter=20,
lr=1e-3, betas=(0.9, 0.999), weight_decay=0.0, **kwargs):
if isinstance(opt_params, dict):
# LBFGS 不支持参数字典
opt_params = list(opt_params.values())
if optim_type == 'lbfgs':
# optimizer = torch.optim.LBFGS(
# opt_params, max_iter=max_iter, lr=lr, line_search_fn='strong_wolfe',
# tolerance_grad= 0.0000001, # float32的有效位数是7位
# tolerance_change=0.0000001,
# )
from easymocap.pyfitting.lbfgs import LBFGS
optimizer = LBFGS(opt_params, line_search_fn='strong_wolfe', max_iter=max_iter,
tolerance_grad= 0.0000001, # float32的有效位数是7位
tolerance_change=0.0000001,
**kwargs)
elif optim_type == 'adam':
optimizer = torch.optim.Adam(opt_params, lr=lr, betas=betas, weight_decay=weight_decay)
else:
raise NotImplementedError
return optimizer
def grad_require(params, flag=False):
if isinstance(params, list):
for par in params:
par.requires_grad = flag
elif isinstance(params, dict):
for key, par in params.items():
par.requires_grad = flag
def make_closure(optimizer, model, params, infos, loss, device):
loss_func = {}
for key, val in loss.items():
loss_func[key] = load_object(val['module'], val['args'])
if isinstance(loss_func[key], nn.Module):
loss_func[key].to(device)
def closure(debug=False):
optimizer.zero_grad()
new_params = params.copy()
output = model(new_params)
loss_dict = {}
loss_weight = {key:loss[key].weight for key in loss_func.keys()}
for key, func in loss_func.items():
output_ = {k: output[k] for k in loss[key].key_from_output}
infos_ = {k: infos[k] for k in loss[key].key_from_infos}
loss_now = func(output_, infos_)
if isinstance(loss_now, dict):
for k, _loss in loss_now.items():
loss_dict[key+'_'+k] = _loss
loss_weight[key+'_'+k] = loss_weight[key]
loss_weight.pop(key)
else:
loss_dict[key] = loss_now
loss_sum = sum([loss_dict[key]*loss_weight[key]
for key in loss_dict.keys()])
# for key in loss_dict.keys():
# print(key, loss_dict[key] * loss_weight[key])
# print(loss_sum)
if debug:
return loss_dict, loss_weight
loss_sum.backward()
return loss_sum
return closure
def rel_change(prev_val, curr_val):
return (prev_val - curr_val) / max([1e-5, abs(prev_val), abs(curr_val)])
class Optimizer:
def __init__(self, optimize_keys, optimizer_args, loss) -> None:
self.optimize_keys = optimize_keys
self.optimizer_args = optimizer_args
self.loss = loss
self.used_infos = []
for key, val in loss.items():
self.used_infos.extend(val.key_from_infos)
self.used_infos = list(set(self.used_infos))
def log_loss(self, iter_, closure, print_loss=False):
if iter_ % 10 == 0 or print_loss:
with torch.no_grad():
loss_dict, loss_weight = closure(debug=True)
print('{:-6d}: '.format(iter_) + ' '.join([key + ' %7.4f'%(loss_dict[key].item()*loss_weight[key]) for key in loss_dict.keys()]))
def optimizer_step(self, optimizer, closure):
prev_loss = None
self.log_loss(0, closure, True)
for iter_ in range(1, 1000):
loss = optimizer.step(closure)
# check the loss
if torch.isnan(loss).sum() > 0:
print('[optimize] NaN loss value, stopping!')
break
if torch.isinf(loss).sum() > 0:
print('[optimize] Infinite loss value, stopping!')
break
# check the delta
if iter_ > 0 and prev_loss is not None:
loss_rel_change = rel_change(prev_loss, loss.item())
if loss_rel_change <= 0.0000001:
break
self.log_loss(iter_, closure)
prev_loss = loss.item()
self.log_loss(iter_, closure, True)
return True
def __call__(self, params, model, **infos):
"""
待优化变量一定要在params中但params中不一定会被优化
infos中的变量不一定会被优化
"""
# TODO: 应该使用model的device但考虑到model可能是一个函数所以暂时当场计算
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
params = dict_of_numpy_to_tensor(params, device=device)
infos_used = {key: infos[key] for key in self.used_infos if key in infos.keys()}
infos_used = dict_of_numpy_to_tensor(infos_used, device=device)
log('[{}] Optimize {}'.format(self.__class__.__name__, self.optimize_keys))
log('[{}] Loading {}'.format(self.__class__.__name__, self.used_infos))
opt_params = {}
for key in self.optimize_keys:
if key in infos.keys(): # 优化的参数
opt_params[key] = infos_used[key]
elif key in params.keys():
opt_params[key] = params[key]
else:
raise ValueError('{} is not in infos or body_params'.format(key))
for key, val in opt_params.items():
infos_used['init_'+key] = val.clone()
optimizer = make_optimizer(opt_params, **self.optimizer_args)
closure = make_closure(optimizer, model, params, infos_used, self.loss, device)
# 准备开始优化
grad_require(opt_params, True)
self.optimizer_step(optimizer, closure)
grad_require(opt_params, False)
# 直接返回
ret = {
'params': params
}
for key in self.optimize_keys:
if key in infos.keys():
ret[key] = opt_params[key]
ret = dict_of_tensor_to_numpy(ret)
return ret

View File

@ -0,0 +1,295 @@
from typing import Any
import numpy as np
import cv2
LOG_FILE = 'log_hand_select.txt'
LOG_LEVEL = 2 #0 2
FULL_LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 1 else (lambda x: None)
LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 0 else (lambda x: None)
def views_from_dimGroups(dimGroups):
views = np.zeros(dimGroups[-1], dtype=np.int)
for nv in range(len(dimGroups) - 1):
views[dimGroups[nv]:dimGroups[nv+1]] = nv
return views
class Select_Views:
def __init__(self, camtoworld, handtype) -> None:
self.camtoworld = camtoworld
self.results = []
self.DIST_MAX = 50
self.threshold = 2
self.handtype = handtype
self.threshold2 = 0.3
self.count = 0
self.mode = 0 #[0,1] 0-sum 1-max&sum
def cvt_Rh_Rot(self, Rh):
import cv2
RotList = []
for i in range(Rh.shape[0]):
RotList.append(cv2.Rodrigues(Rh[i])[0])
return np.stack(RotList)
def get_dis_Rh(self, Rh1, Rh2):
rh_dis = (self.cvt_Rh_Rot(Rh1) - self.cvt_Rh_Rot(Rh2))**2
return rh_dis.sum(axis=(1,2))
def match_with_lastframe(self, lastpose, new_poses):
# breakpoint()
if self.mode==0:
rh_dis = self.get_dis_Rh(np.array(new_poses)[:,:3], lastpose[None][:,:3])
dis = ((np.array(new_poses)[:,3:] - lastpose[None][:,3:])**2).sum(axis=1)
dis+=rh_dis
minid = np.argmin(dis)
return new_poses[minid], dis[minid], minid, dis
else:
# breakpoint()
dis1 = ((np.array(new_poses) - lastpose[None])**2).sum(axis=1)
dis2 = ((np.array(new_poses) - lastpose[None])**2).max(axis=1)
dis = np.stack([dis2,dis1]).T
val_idx = dis[:,0]<self.threshold2
dis = dis[val_idx,:]
if(len(dis)==0):
dis = ((np.array(new_poses) - lastpose[None])**2).sum(axis=1)
minid = np.argmin(dis1)
mindis = dis[minid]
return new_poses[minid], mindis, minid, dis
else:
minid = np.argmin(dis[:,1])
mindis = dis[minid,1]
# breakpoint()
# minid = val_idx[minid]
return np.array(new_poses)[val_idx,:][minid], mindis, minid, dis
# breakpoint()
# dis = ((np.array(new_poses) - lastpose[None])**2).sum(axis=1)
# minid = np.argmin(dis)
# return new_poses[minid], mindis, minid, dis
def calculate_aff(self, poseslist, DIST_MAX):
#TODO Rh的距离不能这么求最好是转成Rot再求误差
M = len(poseslist)
distance = np.zeros((M, M), dtype=np.float32)
for id0 in range(M):
for id1 in range(id0+1,M):
p0 = poseslist[id0]
p1 = poseslist[id1]
dis = ((p0-p1)**2).sum()
distance[id0,id1]=dis
distance[id1,id0]=dis
DIST_MAX = max(DIST_MAX, distance.max())
# breakpoint()
# return distance
for nv in range(M):
distance[nv,nv]=DIST_MAX
# for nv in range(nViews):
# distance[dimGroups[nv]:dimGroups[nv+1], dimGroups[nv]:dimGroups[nv+1]] = DIST_MAX
distance -= np.eye(M) * DIST_MAX
aff = (DIST_MAX - distance)/DIST_MAX
aff = np.clip(aff, 0, 1)
return aff
def Hierarchical_Cluster(self, data,threshold=2):
# import matplotlib.pyplot as plt
# breakpoint()
if(len(data)==1):
return [[0]]
import scipy.cluster.hierarchy as sch
out = sch.linkage(data , method = 'ward')
ret=[]
vis=[]
for i in range(len(data)):
ret.append([i])
vis.append(0)
for i in range(out.shape[0]):
if(out[i][2]>threshold):
break
id1 = int(out[i][0])
id2 = int(out[i][1])
vis[id1]=1
vis[id2]=1
vis.append(0)
ret.append(ret[id1]+ret[id2])
groups = []
for i in range(len(ret)):
if vis[i]==1:
continue
groups.append(ret[i])
return groups
def aff_to_groups(data, affinity, dimGroups, prev_id):
sum1 = np.zeros((affinity.shape[0]))
for i in range(len(dimGroups)-1):
start, end = dimGroups[i], dimGroups[i+1]
if end == start:continue
sum1 += affinity[:, start:end].max(axis=-1)
n2d = affinity.shape[0]
nViews = len(dimGroups) - 1
idx_zero = np.zeros(nViews, dtype=np.int) - 1
views = views_from_dimGroups(dimGroups)
# the assigned results of each person
p2dAssigned = np.zeros(n2d, dtype=np.int) - 1
visited = np.zeros(n2d, dtype=np.int)
sortidx = np.argsort(-sum1)
pid = 0
k3dresults = []
breakpoint()
return k3dresults
def __call__(self, posel , cameras, match3d_l):
hand_list=[]
# breakpoint()
for pid in range(len(match3d_l)):
dt = match3d_l[pid]
Merge_list=[]
Merge_list_rot = []
if(isinstance(dt,int)):
# TODO:处理-1的情况也就是没有找到合适的匹配到的手
# hand_list.append(np.zeros((48,)))
Merge_list_rot.append(np.zeros((54,)))
# continue
else:
for cid in range(len(dt['views'])):
nv = dt['views'][cid]
poseid = dt['indices'][cid]
pose = posel[nv][poseid].copy()
if self.camtoworld:
Rh = pose[:,:3].copy()
invR = np.linalg.inv(cameras['R'][nv])
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
Rh_m_new = invR @ Rh_m_old
Rh = cv2.Rodrigues(Rh_m_new)[0]
Merge_list.append(np.hstack((Rh.reshape(3),pose[:,3:].reshape(-1))))
# breakpoint()
Merge_list_rot.append(np.hstack((np.array(Rh_m_new).reshape(-1),pose[:,3:].reshape(-1))))
else:
Merge_list.append(pose.reshape(-1))
Rh = pose[:,:3].copy()
Rh_m_old = np.matrix(cv2.Rodrigues(Rh)[0])
Merge_list_rot.append(np.hstack((np.array(Rh_m_old).reshape(-1),pose[:,3:].reshape(-1))))
#将坐标系转换及视角选择完的pose整理成新的集合。
# breakpoint()
# self.count, pid, self.handtype, str(groups), (0,1) 0的话是,选了哪一组? 1 xuanle怎么选择的
#用层次聚类的方法进行视角的选择
# groups = self.Hierarchical_Cluster(Merge_list, self.threshold)
groups = self.Hierarchical_Cluster(Merge_list_rot, self.threshold)
# #求亲和矩阵即任意两个pose之间的距离。
# affinity = self.calculate_aff(Merge_list,self.DIST_MAX)
# N2D = affinity.shape[0]
# prev_id = np.zeros(N2D) - 1
# dims = [1]*N2D
# dimGroups = np.cumsum([0] + dims)
# groups = self.aff_to_groups(Merge_list, affinity, dimGroups, prev_id)
# # #根据亲和矩阵进行分组这里可以考虑将分组的结果Merge起来。
# groups = []
FULL_LOG('[select views] frame:{}, pid:{}, handtype:{}'.format(self.count, pid, self.handtype))
FULL_LOG('[groups] groups:{}'.format(str(groups)))
#合并分组结果
new_poses = []
for gp in groups:
# merge_pose = np.array(Merge_list)[gp].mean(axis=0)
merge_pose = np.array(Merge_list_rot)[gp].mean(axis=0)
# breakpoint()
Rot = merge_pose[:9].reshape((3,3))
Rh = cv2.Rodrigues(Rot)[0]
merge_pose = np.hstack((Rh.reshape(3),merge_pose[9:].reshape(-1)))
new_poses.append(merge_pose)
#多个组,求每个组和上一帧结果之间的距离。(找出上一帧匹配的手,和这帧对应的手)
#根据该距离在多个组之间进行选择。选出距离更小的组。
# if self.handtype == 'handr':
# breakpoint()
if (len(self.results)>pid): # False and
# TODO 求与前一帧的距离,如果发现距离过大?则尝试重启跟踪?即选择视角最多的
pose_, dis, minid, dis_ = self.match_with_lastframe(self.results[pid],new_poses)
FULL_LOG('[select 0 ] minid:{}'.format(minid))
FULL_LOG('[select 0 ] dis:{}'.format(str(dis_.tolist())))
if isinstance(dt,int) or dis_.min()>10: # 没有合适的视角检测到手,或者所有视角检测到的都与上一帧差的很远
FULL_LOG('[select 0 ] las pose')
pose_ = self.results[pid].copy()
else:
threshold_=0.3
if self.mode==1:
threshold_=1
if(dis>threshold_):# 超过一定阈值,假定上一帧不是很好,则这帧重选
array_len = np.array([len(gp) for gp in groups])
a_max = array_len.max()
d_max = 500
idx=0
for gid in range(array_len.shape[0]):
# breakpoint()
if array_len[gid]==a_max and dis_[gid]<d_max:
d_max = dis_[gid]
idx=gid
# dis_[array_len==a_max
# breakpoint()
# dis_
# idx=np.argmax([len(gp) for gp in groups])
pose_ = new_poses[idx].copy()
FULL_LOG('[select 0 ] max len(groups):{}\n'.format(idx))
self.results[pid] = pose_.copy()
else:
#TODO如果没有前一帧的监督一种可以用所有组的结果进行处理另外就是可以用数量较多的组的结果
#TODO 如果数量相同的有多组,需要进一步处理 比如根据aff求sum最大的
idx=np.argmax([len(gp) for gp in groups])
pose_ = new_poses[idx].copy()
self.results.append(pose_.copy())
FULL_LOG('[select 1 ] max len(groups):{}\n'.format(idx))
#将结果整理返回有一组和身体id对应的左手或者右手的Pose集合在世界坐标系下的也可以返回Params ,看卡params是个list还是dict?
hand_list.append(pose_)
poses_ = np.stack(hand_list)
Rh = poses_[:,:3].copy()
poses_[:,:3] = 0
params={
'Rh':Rh,
'Th':np.zeros_like(Rh),
'poses':poses_,
'shapes':np.zeros((Rh.shape[0],10)),
}
self.count+=1
return {'params': params}
class Select_Views_handlr:
def __init__(self, camtoworld) -> None:
self.camtoworld = camtoworld
self.model_l = Select_Views(camtoworld, 'handl')
self.model_r = Select_Views(camtoworld, 'handr')
def __call__(self, posel, poser, match3d_l, match3d_r, cameras) -> Any:
params_l = self.model_l(posel, cameras, match3d_l)
params_r = self.model_r(poser, cameras, match3d_r)
return {'params_l':params_l['params'], 'params_r':params_r['params']}

View File

@ -0,0 +1,138 @@
from typing import Any
import numpy as np
class SmoothAny:
def __init__(self, window_size) -> None:
self.w = window_size
def __call__(self, value, with_conf=True):
wsize = self.w
value = value.copy()
if with_conf:
pos_sum = np.zeros_like(value[:-wsize, ..., :-1])
conf_sum = np.zeros_like(value[:-wsize, ..., -1:])
else:
pos_sum = np.zeros_like(value[:-wsize])
for w in range(wsize):
if with_conf:
pos_sum += value[w:w-wsize, ..., :-1] * value[w:w-wsize, ..., -1:]
conf_sum += value[w:w-wsize, ..., -1:]
else:
pos_sum += value[w:w-wsize]
if with_conf:
pos_smooth = pos_sum / (1e-5 + conf_sum)
value[wsize//2:-wsize//2] = np.dstack([pos_smooth, conf_sum])
else:
pos_smooth = pos_sum / (wsize)
value[wsize//2:-wsize//2] = pos_smooth
return value
class Smooth(SmoothAny):
def __call__(self, keypoints3d):
return {'keypoints3d': super().__call__(keypoints3d, with_conf=True)}
class SmoothPoses:
def __init__(self, window_size) -> None:
self.W = window_size
def __call__(self, params):
# TODO: 这个是使用了padding的
poses = params['poses']
padding_before = poses[:1].copy().repeat(self.W, 0)
padding_after = poses[-1:].copy().repeat(self.W, 0)
mean = poses.copy()
nFrames = mean.shape[0]
poses_full = np.vstack([padding_before, poses, padding_after])
for w in range(1, self.W+1):
mean += poses_full[self.W-w:self.W-w+nFrames]
mean += poses_full[self.W+w:self.W+w+nFrames]
mean /= 2*self.W + 1
params['poses'] = mean
return {'params': params}
class SmoothRealtime:
def __init__(self, opt_name, win_sizes) -> None:
# import cv2
self.size = {}
self.opt_name = opt_name
self.smdata={}
for idx, name in enumerate(opt_name):
self.smdata[name] = []
self.size[name] = win_sizes[idx]
def cvt_Rh_Rot(self, Rh):
import cv2
RotList = []
Rh = Rh.reshape((-1,3))
for i in range(Rh.shape[0]):
RotList.append(cv2.Rodrigues(Rh[i])[0])
return np.stack(RotList)
def cvt_Rot_Rh(self, Rot):
import cv2
RhList = []
for i in range(Rot.shape[0]):
RhList.append(cv2.Rodrigues(Rot[i])[0].reshape(3))
return np.stack(RhList).reshape((1,-1))
def now_smplh(self):
data={}
for name in self.opt_name:
# if name == 'Rh':
if name in ['Rh','poses']:
out = (sum(self.smdata[name])/len(self.smdata[name]))
data[name] = self.cvt_Rot_Rh(out)
else:
data[name] = (sum(self.smdata[name])/len(self.smdata[name]))
return data
def __call__(self, data):
# breakpoint()
for name in self.opt_name:
if name in ['Rh','poses']:
self.smdata[name].append(self.cvt_Rh_Rot(data[name].copy()))
if len(self.smdata[name])>self.size[name]:
self.smdata[name].pop(0)
out = (sum(self.smdata[name])/len(self.smdata[name]))
data[name] = self.cvt_Rot_Rh(out) #.reshape(1,self.smdata[key][0].shape[-1])
else:
self.smdata[name].append(data[name].copy())
if len(self.smdata[name])>self.size[name]:
self.smdata[name].pop(0)
data[name] = (sum(self.smdata[name])/len(self.smdata[name])) #.reshape(1,self.smdata[key][0].shape[-1])
return data
class SmoothHandlr:
def __init__(self, opt_name, win_sizes):
self.smooth_handl = SmoothRealtime(opt_name, win_sizes)
self.smooth_handr = SmoothRealtime(opt_name, win_sizes)
def __call__(self, params_l, params_r) -> Any:
params_l = self.smooth_handl(params_l)
params_r = self.smooth_handr(params_r)
return {'params_l': params_l, 'params_r': params_r}
class SmoothSmplh(SmoothRealtime):
def __init__(self, opt_name, win_sizes):
self.opt_name = opt_name
self.win_sizes = win_sizes
self.smooth_lists=[]
# self.smooth_smplh = SmoothRealtime(opt_name, win_sizes)
def __call__(self, params_smplh):
#TODO 应该根据id 放入到对应的smooth列表中 长久不在的要删除或者清空,之后把id作为输入然后smoothlists换成map
bz = params_smplh['Rh'].shape[0]
while (len(self.smooth_lists)<bz):
self.smooth_lists.append(SmoothRealtime(self.opt_name, self.win_sizes))
for i in range(bz):
param={}
for key in params_smplh.keys():
param[key] = params_smplh[key][i].reshape(1,-1)
out = self.smooth_lists[i](param)
for key in params_smplh.keys():
params_smplh[key][i] = out[key]
# params_smplh = self.smooth_smplh(params_smplh)
return {'params_smplh': params_smplh}
class Smoothkeypoints3d(SmoothRealtime):
def __init__(self, opt_name, win_sizes):
self.smooth_smplh = SmoothRealtime(opt_name, win_sizes)
def __call__(self, keypoints3d):
ret = self.smooth_smplh({'keypoints3d':keypoints3d})
return ret

View File

@ -0,0 +1,151 @@
import numpy as np
from itertools import combinations
from easymocap.mytools.camera_utils import Undistort
from easymocap.mytools.triangulator import iterative_triangulate
def batch_triangulate(keypoints_, Pall, min_view=2):
""" triangulate the keypoints of whole body
Args:
keypoints_ (nViews, nJoints, 3): 2D detections
Pall (nViews, 3, 4): projection matrix of each view
min_view (int, optional): min view for visible points. Defaults to 2.
Returns:
keypoints3d: (nJoints, 4)
"""
# keypoints: (nViews, nJoints, 3)
# Pall: (nViews, 3, 4)
# A: (nJoints, nViewsx2, 4), x: (nJoints, 4, 1); b: (nJoints, nViewsx2, 1)
v = (keypoints_[:, :, -1]>0).sum(axis=0)
valid_joint = np.where(v >= min_view)[0]
keypoints = keypoints_[:, valid_joint]
conf3d = keypoints[:, :, -1].sum(axis=0)/v[valid_joint]
# P2: P矩阵的最后一行(1, nViews, 1, 4)
P0 = Pall[None, :, 0, :]
P1 = Pall[None, :, 1, :]
P2 = Pall[None, :, 2, :]
# uP2: x坐标乘上P2: (nJoints, nViews, 1, 4)
uP2 = keypoints[:, :, 0].T[:, :, None] * P2
vP2 = keypoints[:, :, 1].T[:, :, None] * P2
conf = keypoints[:, :, 2].T[:, :, None]
Au = conf * (uP2 - P0)
Av = conf * (vP2 - P1)
A = np.hstack([Au, Av])
u, s, v = np.linalg.svd(A)
X = v[:, -1, :]
X = X / X[:, 3:]
# out: (nJoints, 4)
result = np.zeros((keypoints_.shape[1], 4))
result[valid_joint, :3] = X[:, :3]
result[valid_joint, 3] = conf3d #* (conf[..., 0].sum(axis=-1)>min_view)
return result
def project_wo_dist(keypoints, RT, einsum='vab,kb->vka'):
homo = np.concatenate([keypoints[..., :3], np.ones_like(keypoints[..., :1])], axis=-1)
kpts2d = np.einsum(einsum, RT, homo)
depth = kpts2d[..., 2]
kpts2d[..., :2] /= kpts2d[..., 2:]
return kpts2d, depth
class SimpleTriangulate:
def __init__(self, mode):
self.mode = mode
@staticmethod
def undistort(points, cameras):
nViews = len(points)
pelvis_undis = []
for nv in range(nViews):
camera = {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}
if points[nv].shape[0] > 0:
pelvis = Undistort.points(points[nv], camera['K'], camera['dist'])
else:
pelvis = points[nv].copy()
pelvis_undis.append(pelvis)
return pelvis_undis
def __call__(self, keypoints, cameras):
'''
keypoints: [nViews, nJoints, 3]
output:
keypoints3d: (nJoints, 4)
'''
keypoints = self.undistort(keypoints, cameras)
keypoints = np.stack(keypoints)
if self.mode == 'naive':
keypoints3d = batch_triangulate(keypoints, cameras['P'])
else:
keypoints3d, k2d = iterative_triangulate(keypoints, cameras['P'], dist_max=25)
return {'keypoints3d': keypoints3d}
class RobustTriangulate(SimpleTriangulate):
def __init__(self, mode, cfg):
super().__init__(mode)
self.cache_view = {}
self.cfg = cfg
def try_to_triangulate_and_project(self, index, keypoints, cameras):
# 选择最好的3个视角
P = cameras['P'][index]
kpts = keypoints[index][:, None]
k3d = batch_triangulate(kpts, P)
k2d, depth = project_wo_dist(k3d, P)
dist_repro = np.linalg.norm(k2d[..., :2] - kpts[..., :2], axis=-1).mean(axis=-1)
return k3d, dist_repro
def robust_triangulate(self, keypoints, cameras):
# 选择最好的3个视角
# TODO: 移除不合理的视角
nViews = keypoints.shape[0]
if nViews not in self.cache_view:
views = list(range(nViews))
combs = list(combinations(views, self.cfg.triangulate.init_views))
combs = np.array(combs)
self.cache_view[nViews] = combs
combs = self.cache_view[nViews]
keypoints_comb = keypoints[combs]
conf_sum = keypoints_comb[..., 2].mean(axis=1) * (keypoints_comb[..., 2]>0.05).all(axis=1)
comb_sort_id = (-conf_sum).argsort()
flag_find_init = False
for comb_id in comb_sort_id:
if conf_sum[comb_id] < 0.1:
break
comb = combs[comb_id]
k3d, dist_repro = self.try_to_triangulate_and_project(comb, keypoints, cameras)
if (dist_repro < self.cfg.triangulate.repro_init).all():
flag_find_init = True
init = comb.tolist()
break
if not flag_find_init:
print('Cannot find good initialize pair')
import ipdb; ipdb.set_trace()
view_idxs = (-keypoints[:, -1]).argsort()
for view_idx in view_idxs:
if view_idx in init:
continue
if keypoints[view_idx, 2] < 0.1:
continue
k3d, dist_repro = self.try_to_triangulate_and_project(init+[view_idx], keypoints, cameras)
if (dist_repro < self.cfg.triangulate.repro_2d).all():
# print('Add view {}'.format(view_idx))
init.append(view_idx)
return k3d, init
def __call__(self, keypoints, cameras):
"""
keypoints: (nViews, nJoints, 3)
cameras: (nViews, 3, 4)
"""
nViews, nJoints, _ = keypoints.shape
keypoints_undis = np.stack(self.undistort(keypoints, cameras))
# for each points, find good initial pairs
points_all = np.zeros((nJoints, 4))
keypoints_copy = keypoints.copy()
for nj in range(nJoints):
point, select_views = self.robust_triangulate(keypoints_undis[:, nj], cameras)
points_all[nj:nj+1] = point
keypoints_copy[select_views, nj, 2] += 10
keypoints_copy[:, nj, 2] = np.clip(keypoints_copy[:, nj, 2]-10, 0, 1)
return {'keypoints3d': points_all, 'keypoints_select': keypoints_copy}

View File

@ -0,0 +1,149 @@
from typing import Any
from easymocap.config import Config, load_object
from easymocap.mytools.debug_utils import mywarn, log
import numpy as np
import time
from tabulate import tabulate
class Timer:
def __init__(self, record, verbose) -> None:
self.keys = list(record.keys())
self.header = self.keys
self.verbose = verbose
def update(self, timer):
if not self.verbose:
return
contents = []
for key in self.keys:
if key not in timer:
contents.append('skip')
else:
contents.append('{:.3f}s'.format(timer[key]))
print(tabulate(headers=self.header, tabular_data=[contents], tablefmt='fancy_grid'))
class MultiStage:
def load_final(self):
at_finals = {}
for key, val in self._at_final.items():
if val['module'] == 'skip':
mywarn('Stage {} is not used'.format(key))
continue
log('[{}] loading {}'.format(self.__class__.__name__, key))
model = load_object(val['module'], val['args'])
model.output = self.output
at_finals[key] = model
self.model_finals = at_finals
def __init__(self, output, at_step, at_final) -> None:
log('[{}] writing the results to {}'.format(self.__class__.__name__, output))
at_steps = {}
for key, val in at_step.items():
if val['module'] == 'skip':
mywarn('Stage {} is not used'.format(key))
continue
log('[{}] loading module {}'.format(self.__class__.__name__, key))
model = load_object(val['module'], val['args'])
model.output = output
at_steps[key] = model
self.output = output
self.model_steps = at_steps
self._at_step = at_step
self._at_final = at_final
self.timer = Timer(at_steps, verbose=False)
def at_step(self, data, index):
ret = {}
if 'meta' in data:
ret['meta'] = data['meta']
timer = {}
for key, model in self.model_steps.items():
for k in self._at_step[key].get('key_keep', []):
ret[k] = data[k]
if self._at_step[key].get('skip', False):
continue
inputs = {}
for k in self._at_step[key].get('key_from_data', []):
inputs[k] = data[k]
for k in self._at_step[key].get('key_from_previous', []):
inputs[k] = ret[k]
start = time.time()
try:
output = model(**inputs)
except:
print('[{}] Error in {}'.format('Stages', key))
raise Exception
timer[key] = time.time() - start
if output is not None:
ret.update(output)
self.timer.update(timer)
return ret
@staticmethod
def merge_data(infos_all):
info0 = infos_all[0]
data = {}
for key, val in info0.items():
data[key] = [info[key] for info in infos_all]
if isinstance(val, np.ndarray):
try:
data[key] = np.stack(data[key])
except ValueError:
print('[{}] Skip merge {}'.format('Stages', key))
pass
elif isinstance(val, dict):
data[key] = MultiStage.merge_data(data[key])
return data
def at_final(self, infos_all):
self.load_final()
data = self.merge_data(infos_all)
log('Keep keys: {}'.format(list(data.keys())))
ret = {}
for key, model in self.model_finals.items():
for iter_ in range(self._at_final[key].get('repeat', 1)):
inputs = {}
for k in self._at_final[key].get('key_from_data', []):
inputs[k] = data[k]
for k in self._at_final[key].get('key_from_previous', []):
inputs[k] = ret[k]
try:
output = model(**inputs)
except:
print('[{}] Error in {}'.format('Stages', key))
raise Exception
if output is not None:
ret.update(output)
return ret
class StageForFittingEach:
def __init__(self, stages, keys_keep) -> None:
stages_ = {}
for key, val in stages.items():
if val['module'] == 'skip':
mywarn('Stage {} is not used'.format(key))
continue
model = load_object(val['module'], val['args'])
stages_[key] = model
self.stages = stages_
self.stages_args = stages
self.keys_keep = keys_keep
def __call__(self, results, **ret):
for pid, result in results.items():
ret0 = {}
ret0.update(ret)
for key, stage in self.stages.items():
for iter_ in range(self.stages_args[key].get('repeat', 1)):
inputs = {}
for k in self.stages_args[key].get('key_from_data', []):
inputs[k] = result[k]
for k in self.stages_args[key].get('key_from_previous', []):
inputs[k] = ret0[k]
output = stage(**inputs)
if output is not None:
ret0.update(output)
for key in self.keys_keep:
result[key] = ret0[key]
return {'results': results}

View File

@ -0,0 +1,46 @@
import numpy as np
from tqdm import tqdm
class CheckFramePerson:
def __init__(self, key) -> None:
self.key = key
self.pids = []
self.frames = 0
def __call__(self, keypoints3d, pids):
k3d_, pid_ = [], []
for i, pid in enumerate(pids):
if pid not in self.pids:
if self.frames == 0:
print('[{}]/{:06d} Add person {}'.format(self.__class__.__name__, self.frames, pid))
self.pids.append(pid)
else:
continue
k3d_.append(keypoints3d[i])
pid_.append(pid)
self.frames += 1
k3d_ = np.stack(k3d_)
return {
'keypoints3d': k3d_,
'pids': pid_
}
class CollectMultiPersonMultiFrame:
def __init__(self, key) -> None:
self.key = key
def __call__(self, keypoints3d, pids):
records = {}
for frame in tqdm(range(len(pids)), desc='Reading'):
pid_frame = pids[frame]
for i, pid in enumerate(pid_frame):
if pid not in records:
records[pid] = {
'frames': [],
'keypoints3d': []
}
records[pid]['frames'].append(frame)
records[pid]['keypoints3d'].append(keypoints3d[frame][i])
for pid, record in records.items():
record['keypoints3d'] = np.stack(record['keypoints3d']).astype(np.float32)
return {'results': records}

View File

@ -6,5 +6,7 @@ yacs
tabulate
termcolor
chumpy
mediapipe
func_timeout
mediapipe==0.10.0
func_timeout
ultralytics
gdown

View File

@ -21,8 +21,19 @@ setup(
'easymocap.pyfitting',
'easymocap.mytools',
'easymocap.annotator',
'easymocap.estimator'
'easymocap.estimator',
'myeasymocap'
],
entry_points={
'console_scripts': [
'emc=apps.mocap.run:main_entrypoint',
# 'easymocap_calib=easymocap.mytools.entry:calib',
# 'easymocap_tools=easymocap.mytools.entry:main',
# 'extract_keypoints=easymocap.mytools.cmdtools.extract_keypoints:main'
],
},
install_requires=[],
data_files = []
)
emc = "apps.mocap.run:main_entrypoint"