init the code

This commit is contained in:
shuaiqing 2021-01-14 21:32:09 +08:00
parent 9c7832820e
commit c730628c62
21 changed files with 3268 additions and 2 deletions

View File

@ -2,7 +2,7 @@
* @Date: 2021-01-13 20:32:12
* @Author: Qing Shuai
* @LastEditors: Qing Shuai
* @LastEditTime: 2021-01-14 21:25:44
* @LastEditTime: 2021-01-14 21:31:39
* @FilePath: /EasyMocapRelease/Readme.md
-->
# EasyMocap
@ -65,15 +65,29 @@ Here `intri.yml` and `extri.yml` store the camera intrinsici and extrinsic param
### 1. Run [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)
```bash
data=path/to/data
out=path/to/output
python3 scripts/preprocess/extract_video.py ${data} --openpose <openpose_path>
```
### 2. Run the code
```bash
# 1. example for skeleton reconstruction
python3 code/demo_mv1pmf_skel.py ${data} --out ${out} --vis_det --vis_repro --undis --sub_vis 1 7 13 19
# 2. example for SMPL reconstruction
python3 code/demo_mv1pmf_smpl.py ${data} --out ${out} --end 300 --vis_smpl --undis --sub_vis 1 7 13 19
```
- `--vis_det`: visualize the detection
- `--vis_repro`: visualize the reprojection
- `--undis`: use to undistort the images
- `--sub_vis`: use to specify the views to visualize. If not set, the code will use all views
- `--vis_smpl`: use to render the SMPL mesh to images.
- `--start, --end`: control the begin and end number of frames.
### 3. Output
The results are saved in `json` format.
```bash
- <output_root>
<output_root>
├── keypoints3d
│   ├── 000000.json
│   └── xxxxxx.json

216
code/dataset/base.py Normal file
View File

@ -0,0 +1,216 @@
'''
@ Date: 2021-01-13 16:53:55
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 19:55:58
@ FilePath: /EasyMocapRelease/code/dataset/base.py
'''
import os
import json
from os.path import join
from torch.utils.data.dataset import Dataset
import cv2
import os, sys
import numpy as np
code_path = join(os.path.dirname(__file__), '..')
sys.path.append(code_path)
from mytools.camera_utils import read_camera, undistort, write_camera
from mytools.vis_base import merge, plot_bbox, plot_keypoints
def read_json(path):
with open(path) as f:
data = json.load(f)
return data
def save_json(file, data):
if not os.path.exists(os.path.dirname(file)):
os.makedirs(os.path.dirname(file))
with open(file, 'w') as f:
json.dump(data, f, indent=4)
def read_annot(annotname, add_hand_face=False):
data = read_json(annotname)['annots']
for i in range(len(data)):
data[i]['id'] = data[i].pop('personID')
for key in ['bbox', 'keypoints', 'handl2d', 'handr2d', 'face2d']:
if key not in data[i].keys():continue
data[i][key] = np.array(data[i][key])
return data
def get_bbox_from_pose(pose_2d, img, rate = 0.1):
# this function returns bounding box from the 2D pose
validIdx = pose_2d[:, 2] > 0
if validIdx.sum() == 0:
return [0, 0, 100, 100, 0]
y_min = int(min(pose_2d[validIdx, 1]))
y_max = int(max(pose_2d[validIdx, 1]))
x_min = int(min(pose_2d[validIdx, 0]))
x_max = int(max(pose_2d[validIdx, 0]))
dx = (x_max - x_min)*rate
dy = (y_max - y_min)*rate
# 后面加上类别这些
bbox = [x_min-dx, y_min-dy, x_max+dx, y_max+dy, 1]
correct_bbox(img, bbox)
return bbox
def correct_bbox(img, bbox):
# this function corrects the bbox, which is out of image
w = img.shape[0]
h = img.shape[1]
if bbox[2] <= 0 or bbox[0] >= h or bbox[1] >= w or bbox[3] <= 0:
bbox[4] = 0
return bbox
class FileWriter:
def __init__(self, output_path, config=None, basenames=[], cfg=None) -> None:
self.out = output_path
keys = ['keypoints3d', 'smpl', 'repro', 'keypoints']
output_dict = {key:join(self.out, key) for key in keys}
for key, p in output_dict.items():
os.makedirs(p, exist_ok=True)
self.output_dict = output_dict
self.basenames = basenames
if cfg is not None:
print(cfg, file=open(join(output_path, 'exp.yml'), 'w'))
self.save_origin = False
self.config = config
def write_keypoints3d(self, results, nf):
savename = join(self.output_dict['keypoints3d'], '{:06d}.json'.format(nf))
save_json(savename, results)
def vis_detections(self, images, lDetections, nf, key='keypoints', to_img=True, vis_id=True):
images_vis = []
for nv, image in enumerate(images):
img = image.copy()
for det in lDetections[nv]:
keypoints = det[key]
bbox = det.pop('bbox', get_bbox_from_pose(keypoints, img))
# bbox = det['bbox']
plot_bbox(img, bbox, pid=det['id'], vis_id=vis_id)
plot_keypoints(img, keypoints, pid=det['id'], config=self.config, use_limb_color=False, lw=2)
images_vis.append(img)
image_vis = merge(images_vis, resize=not self.save_origin)
if to_img:
savename = join(self.output_dict[key], '{:06d}.jpg'.format(nf))
cv2.imwrite(savename, image_vis)
return image_vis
def write_smpl(self, results, nf):
format_out = {'float_kind':lambda x: "%.3f" % x}
filename = join(self.output_dict['smpl'], '{:06d}.json'.format(nf))
with open(filename, 'w') as f:
f.write('[\n')
for data in results:
f.write(' {\n')
output = {}
output['id'] = data['id']
output['Rh'] = np.array2string(data['Rh'], max_line_width=1000, separator=', ', formatter=format_out)
output['Th'] = np.array2string(data['Th'], max_line_width=1000, separator=', ', formatter=format_out)
output['poses'] = np.array2string(data['poses'], max_line_width=1000, separator=', ', formatter=format_out)
output['shapes'] = np.array2string(data['shapes'], max_line_width=1000, separator=', ', formatter=format_out)
for key in ['id', 'Rh', 'Th', 'poses', 'shapes']:
f.write(' \"{}\": {},\n'.format(key, output[key]))
f.write(' },\n')
f.write(']\n')
def vis_smpl(self, render_data, nf, images, cameras):
from visualize.renderer import Renderer
render = Renderer(height=1024, width=1024, faces=None)
render_results = render.render(render_data, cameras, images)
image_vis = merge(render_results, resize=not self.save_origin)
savename = join(self.output_dict['smpl'], '{:06d}.jpg'.format(nf))
cv2.imwrite(savename, image_vis)
class MVBase(Dataset):
""" Dataset for multiview data
"""
def __init__(self, root, cams=[], out=None, config={},
image_root='images', annot_root='annots',
add_hand_face=True,
undis=True, no_img=False) -> None:
self.root = root
self.image_root = join(root, image_root)
self.annot_root = join(root, annot_root)
self.add_hand_face = add_hand_face
self.undis = undis
self.no_img = no_img
self.config = config
if out is None:
out = join(root, 'output')
self.out = out
self.writer = FileWriter(self.out, config=config)
if len(cams) == 0:
cams = sorted([i for i in os.listdir(self.image_root) if os.path.isdir(join(self.image_root, i))])
self.cams = cams
self.imagelist = {}
self.annotlist = {}
for cam in cams: #TODO: 增加start,end
imgnames = sorted(os.listdir(join(self.image_root, cam)))
self.imagelist[cam] = imgnames
self.annotlist[cam] = sorted(os.listdir(join(self.annot_root, cam)))
nFrames = min([len(val) for key, val in self.imagelist.items()])
self.nFrames = nFrames
self.nViews = len(cams)
self.read_camera()
def read_camera(self):
path = self.root
# 读入相机参数
intri_name = join(path, 'intri.yml')
extri_name = join(path, 'extri.yml')
if os.path.exists(intri_name) and os.path.exists(extri_name):
self.cameras = read_camera(intri_name, extri_name, self.cams)
self.cameras.pop('basenames')
self.cameras_for_affinity = [[cam['invK'], cam['R'], cam['T']] for cam in [self.cameras[name] for name in self.cams]]
self.Pall = [self.cameras[cam]['P'] for cam in self.cams]
else:
print('!!!there is no camera parameters, maybe bug', intri_name, extri_name)
self.cameras = None
def undistort(self, images):
if self.cameras is not None and len(images) > 0:
images_ = []
for nv in range(self.nViews):
mtx = self.cameras[self.cams[nv]]['K']
dist = self.cameras[self.cams[nv]]['dist']
frame = cv2.undistort(images[nv], mtx, dist, None)
images_.append(frame)
else:
images_ = images
return images_
def undis_det(self, lDetections):
for nv in range(len(lDetections)):
camera = self.cameras[self.cams[nv]]
for det in lDetections[nv]:
det['bbox'] = undistort(camera, bbox=det['bbox'])
keypoints = det['keypoints']
det['keypoints'] = undistort(camera, keypoints=keypoints[None, :, :])[1][0]
return lDetections
def __getitem__(self, index: int):
images, annots = [], []
for cam in self.cams:
imgname = join(self.image_root, cam, self.imagelist[cam][index])
annname = join(self.annot_root, cam, self.annotlist[cam][index])
assert os.path.exists(imgname), imgname
assert os.path.exists(annname), annname
assert self.imagelist[cam][index].split('.')[0] == self.annotlist[cam][index].split('.')[0]
if not self.no_img:
img = cv2.imread(imgname)
images.append(img)
# TODO:这里直接取了0
annot = read_annot(annname, self.add_hand_face)
annots.append(annot)
if self.undis:
images = self.undistort(images)
annots = self.undis_det(annots)
return images, annots
def __len__(self) -> int:
return self.nFrames

197
code/dataset/config.py Normal file
View File

@ -0,0 +1,197 @@
'''
* @ Date: 2020-09-26 16:52:55
* @ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-13 14:04:46
@ FilePath: /EasyMocap/code/dataset/config.py
'''
import numpy as np
CONFIG = {}
CONFIG['body25'] = {'kintree':
[[ 1, 0],
[ 2, 1],
[ 3, 2],
[ 4, 3],
[ 5, 1],
[ 6, 5],
[ 7, 6],
[ 8, 1],
[ 9, 8],
[10, 9],
[11, 10],
[12, 8],
[13, 12],
[14, 13],
[15, 0],
[16, 0],
[17, 15],
[18, 16],
[19, 14],
[20, 19],
[21, 14],
[22, 11],
[23, 22],
[24, 11]]}
CONFIG['body15'] = {'kintree':
[[ 1, 0],
[ 2, 1],
[ 3, 2],
[ 4, 3],
[ 5, 1],
[ 6, 5],
[ 7, 6],
[ 8, 1],
[ 9, 8],
[10, 9],
[11, 10],
[12, 8],
[13, 12],
[14, 13],]}
CONFIG['hand'] = {'kintree':
[[ 1, 0],
[ 2, 1],
[ 3, 2],
[ 4, 3],
[ 5, 0],
[ 6, 5],
[ 7, 6],
[ 8, 7],
[ 9, 0],
[10, 9],
[11, 10],
[12, 11],
[13, 0],
[14, 13],
[15, 14],
[16, 15],
[17, 0],
[18, 17],
[19, 18],
[20, 19]]
}
CONFIG['bodyhand'] = {'kintree':
[[ 1, 0],
[ 2, 1],
[ 3, 2],
[ 4, 3],
[ 5, 1],
[ 6, 5],
[ 7, 6],
[ 8, 1],
[ 9, 8],
[10, 9],
[11, 10],
[12, 8],
[13, 12],
[14, 13],
[15, 0],
[16, 0],
[17, 15],
[18, 16],
[19, 14],
[20, 19],
[21, 14],
[22, 11],
[23, 22],
[24, 11],
[26, 25], # handl
[27, 26],
[28, 27],
[29, 28],
[30, 25],
[31, 30],
[32, 31],
[33, 32],
[34, 25],
[35, 34],
[36, 35],
[37, 36],
[38, 25],
[39, 38],
[40, 39],
[41, 40],
[42, 25],
[43, 42],
[44, 43],
[45, 44],
[47, 46], # handr
[48, 47],
[49, 48],
[50, 49],
[51, 46],
[52, 51],
[53, 52],
[54, 53],
[55, 46],
[56, 55],
[57, 56],
[58, 57],
[59, 46],
[60, 59],
[61, 60],
[62, 61],
[63, 46],
[64, 63],
[65, 64],
[66, 65]
]
}
CONFIG['face'] = {'kintree':[ [0,1],[1,2],[2,3],[3,4],[4,5],[5,6],[6,7],[7,8],[8,9],[9,10],[10,11],[11,12],[12,13],[13,14],[14,15],[15,16], #outline (ignored)
[17,18],[18,19],[19,20],[20,21], #right eyebrow
[22,23],[23,24],[24,25],[25,26], #left eyebrow
[27,28],[28,29],[29,30], #nose upper part
[31,32],[32,33],[33,34],[34,35], #nose lower part
[36,37],[37,38],[38,39],[39,40],[40,41],[41,36], #right eye
[42,43],[43,44],[44,45],[45,46],[46,47],[47,42], #left eye
[48,49],[49,50],[50,51],[51,52],[52,53],[53,54],[54,55],[55,56],[56,57],[57,58],[58,59],[59,48], #Lip outline
[60,61],[61,62],[62,63],[63,64],[64,65],[65,66],[66,67],[67,60] #Lip inner line
]}
NJOINTS_BODY = 25
NJOINTS_HAND = 21
NJOINTS_FACE = 70
NLIMBS_BODY = len(CONFIG['body25']['kintree'])
NLIMBS_HAND = len(CONFIG['hand']['kintree'])
NLIMBS_FACE = len(CONFIG['face']['kintree'])
def getKintree(name='total'):
if name == 'total':
# order: body25, face, rhand, lhand
kintree = CONFIG['body25']['kintree'] + CONFIG['hand']['kintree'] + CONFIG['hand']['kintree'] + CONFIG['face']['kintree']
kintree = np.array(kintree)
kintree[NLIMBS_BODY:NLIMBS_BODY + NLIMBS_HAND] += NJOINTS_BODY
kintree[NLIMBS_BODY + NLIMBS_HAND:NLIMBS_BODY + 2*NLIMBS_HAND] += NJOINTS_BODY + NJOINTS_HAND
kintree[NLIMBS_BODY + 2*NLIMBS_HAND:] += NJOINTS_BODY + 2*NJOINTS_HAND
elif name == 'smplh':
# order: body25, lhand, rhand
kintree = CONFIG['body25']['kintree'] + CONFIG['hand']['kintree'] + CONFIG['hand']['kintree']
kintree = np.array(kintree)
kintree[NLIMBS_BODY:NLIMBS_BODY + NLIMBS_HAND] += NJOINTS_BODY
kintree[NLIMBS_BODY + NLIMBS_HAND:NLIMBS_BODY + 2*NLIMBS_HAND] += NJOINTS_BODY + NJOINTS_HAND
return kintree
CONFIG['total'] = {}
CONFIG['total']['kintree'] = getKintree('total')
COCO17_IN_BODY25 = [0,16,15,18,17,5,2,6,3,7,4,12,9,13,10,14,11]
def coco17tobody25(points2d):
dim = 3
if len(points2d.shape) == 2:
points2d = points2d[None, :, :]
dim = 2
kpts = np.zeros((points2d.shape[0], 25, 3))
kpts[:, COCO17_IN_BODY25, :2] = points2d[:, :, :2]
kpts[:, COCO17_IN_BODY25, 2:3] = points2d[:, :, 2:3]
kpts[:, 8, :2] = kpts[:, [9, 12], :2].mean(axis=1)
kpts[:, 8, 2] = kpts[:, [9, 12], 2].min(axis=1)
kpts[:, 1, :2] = kpts[:, [2, 5], :2].mean(axis=1)
kpts[:, 1, 2] = kpts[:, [2, 5], 2].min(axis=1)
if dim == 2:
kpts = kpts[0]
return kpts

102
code/dataset/mv1pmf.py Normal file
View File

@ -0,0 +1,102 @@
'''
@ Date: 2021-01-12 17:12:50
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 17:14:34
@ FilePath: /EasyMocap/code/dataset/mv1pmf.py
'''
import os
import ipdb
import torch
from os.path import join
import numpy as np
import cv2
from .base import MVBase
class MV1PMF(MVBase):
def __init__(self, root, cams=[], pid=0, out=None, config={},
image_root='images', annot_root='annots', add_hand_face=True,
undis=True, no_img=False) -> None:
super().__init__(root, cams, out, config, image_root, annot_root,
add_hand_face, undis, no_img)
self.pid = pid
def write_keypoints3d(self, keypoints3d, nf):
results = [{'id': 0, 'keypoints3d': keypoints3d.tolist()}]
self.writer.write_keypoints3d(results, nf)
def write_smpl(self, params, nf, images=[], to_img=False):
result = {'id': 0}
result.update(params)
self.writer.write_smpl([result], nf)
def vis_smpl(self, vertices, faces, images, nf, sub_vis):
render_data = {}
if len(vertices.shape) == 3:
vertices = vertices[0]
pid = self.pid
render_data[pid] = {'vertices': vertices, 'faces': faces,
'vid': pid, 'name': '{}_{}'.format(nf, pid)}
cameras = {'K': [], 'R':[], 'T':[]}
if len(sub_vis) == 0:
sub_vis = self.cams
for key in cameras.keys():
cameras[key] = [self.cameras[cam][key] for cam in sub_vis]
images = [images[self.cams.index(cam)] for cam in sub_vis]
self.writer.vis_smpl(render_data, nf, images, cameras)
def vis_detections(self, images, annots, nf, to_img=True, sub_vis=[]):
lDetections = []
for nv in range(len(images)):
det = {
'id': self.pid,
'bbox': annots['bbox'][nv],
'keypoints': annots['keypoints'][nv]
}
lDetections.append([det])
if len(sub_vis) != 0:
valid_idx = [self.cams.index(i) for i in sub_vis]
images = [images[i] for i in valid_idx]
lDetections = [lDetections[i] for i in valid_idx]
return self.writer.vis_detections(images, lDetections, nf,
key='keypoints', to_img=to_img, vis_id=False)
def vis_repro(self, images, annots, kpts_repro, nf, to_img=True, sub_vis=[]):
lDetections = []
for nv in range(len(images)):
det = {
'id': -1,
'repro': kpts_repro[nv]
}
lDetections.append([det])
if len(sub_vis) != 0:
valid_idx = [self.cams.index(i) for i in sub_vis]
images = [images[i] for i in valid_idx]
lDetections = [lDetections[i] for i in valid_idx]
return self.writer.vis_detections(images, lDetections, nf, key='repro',
to_img=to_img, vis_id=False)
def __getitem__(self, index: int):
images, annots_all = super().__getitem__(index)
annots = {'bbox': [], 'keypoints': []}
for nv, cam in enumerate(self.cams):
data = [d for d in annots_all[nv] if d['id'] == self.pid]
if len(data) == 1:
data = data[0]
bbox = data['bbox']
keypoints = data['keypoints']
else:
print('not found pid {} in {}, {}'.format(self.pid, index, nv))
keypoints = np.zeros((25, 3))
bbox = np.array([0, 0, 100., 100., 0.])
annots['bbox'].append(bbox)
annots['keypoints'].append(keypoints)
for key in ['bbox', 'keypoints']:
annots[key] = np.stack(annots[key])
return images, annots
if __name__ == "__main__":
root = '/home/qian/zjurv2/mnt/data/ftp/Human/vis/lightstage/CoreView_302_sync/'
dataset = MV1PMF(root)
images, annots = dataset[0]

82
code/demo_mv1pmf_skel.py Normal file
View File

@ -0,0 +1,82 @@
'''
@ Date: 2021-01-12 17:08:25
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 17:08:05
@ FilePath: /EasyMocap/code/demo_mv1pmf_skel.py
'''
# show skeleton and reprojection
from dataset.mv1pmf import MV1PMF
from dataset.config import CONFIG
from mytools.reconstruction import simple_recon_person, projectN3
from tqdm import tqdm
import numpy as np
def smooth_skeleton(skeleton):
# nFrames, nJoints, 4: [[(x, y, z, c)]]
nFrames = skeleton.shape[0]
span = 2
# results = np.zeros((nFrames-2*span, skeleton.shape[1], skeleton.shape[2]))
origin = skeleton[span:nFrames-span, :, :].copy()
conf = origin[:, :, 3:4].copy()
skel = origin[:, :, :3] * conf
base_start = span
for i in range(-span, span+1):
sample = skeleton[base_start+i:base_start+i+skel.shape[0], :, :]
skel += sample[:, :, :3] * sample[:, :, 3:]
conf += sample[:, :, 3:]
not_f, not_j, _ = np.where(conf<0.1)
skel[not_f, not_j, :] = 0.
conf[not_f, not_j, :] = 1.
skel = skel/conf
skeleton[span:nFrames-span, :, :3] = skel
return skeleton
def mv1pmf_skel(path, sub, out, mode, args):
MIN_CONF_THRES = 0.5
no_img = not (args.vis_det or args.vis_repro)
dataset = MV1PMF(path, cams=sub, config=CONFIG[mode], add_hand_face=args.add_hand_face,
undis=args.undis, no_img=no_img, out=out)
kp3ds = []
start, end = args.start, min(args.end, len(dataset))
for nf in tqdm(range(start, end), desc='triangulation'):
images, annots = dataset[nf]
conf = annots['keypoints'][..., -1]
conf[conf < MIN_CONF_THRES] = 0
keypoints3d, _, kpts_repro = simple_recon_person(annots['keypoints'], dataset.Pall, ret_repro=True)
kp3ds.append(keypoints3d)
if args.vis_det:
dataset.vis_detections(images, annots, nf, sub_vis=args.sub_vis)
if args.vis_repro:
dataset.vis_repro(images, annots, kpts_repro, nf, sub_vis=args.sub_vis)
# smooth the skeleton
kp3ds = np.stack(kp3ds)
if args.smooth:
kp3ds = smooth_skeleton(kp3ds)
for nf in tqdm(range(kp3ds.shape[0]), desc='dump'):
dataset.write_keypoints3d(kp3ds[nf], nf + start)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser('multi_view one_person multi_frame skel')
parser.add_argument('path', type=str)
parser.add_argument('--out', type=str, default=None)
parser.add_argument('--sub', type=str, nargs='+', default=[],
help='the sub folder lists when in video mode')
parser.add_argument('--start', type=int, default=0,
help='frame start')
parser.add_argument('--end', type=int, default=10000,
help='frame end')
parser.add_argument('--step', type=int, default=1,
help='frame step')
parser.add_argument('--body', type=str, default='body25', choices=['body15', 'body25', 'total'])
parser.add_argument('--undis', action='store_true')
parser.add_argument('--add_hand_face', action='store_true')
parser.add_argument('--smooth', action='store_true')
parser.add_argument('--vis_det', action='store_true')
parser.add_argument('--vis_repro', action='store_true')
parser.add_argument('--sub_vis', type=str, nargs='+', default=[],
help='the sub folder lists for visualization')
args = parser.parse_args()
mv1pmf_skel(args.path, args.sub, args.out, args.body, args)

107
code/demo_mv1pmf_smpl.py Normal file
View File

@ -0,0 +1,107 @@
'''
@ Date: 2021-01-12 17:08:25
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 20:49:25
@ FilePath: /EasyMocap/code/demo_mv1pmf_smpl.py
'''
# show skeleton and reprojection
import pyrender # first import the pyrender
from pyfitting.optimize_simple import optimizeShape, optimizePose
from dataset.mv1pmf import MV1PMF
from dataset.config import CONFIG
from mytools.reconstruction import simple_recon_person, projectN3
from smplmodel import select_nf, init_params, Config
from tqdm import tqdm
import numpy as np
def load_model(use_cuda=True):
# prepare SMPL model
import torch
if use_cuda:
device = torch.device('cuda')
else:
device = torch.device('cpu')
from smplmodel import SMPLlayer
body_model = SMPLlayer('data/smplx/smpl', gender='neutral', device=device,
regressor_path='data/smplx/J_regressor_body25.npy')
body_model.to(device)
return body_model
def load_weight_shape():
weight = {'s3d': 1., 'reg_shape': 5e-3}
return weight
def load_weight_pose():
weight = {
'k3d': 1., 'reg_poses_zero': 1e-2,
'smooth_Rh': 1e-2, 'smooth_Th': 1e-2, 'smooth_poses': 1e-2
}
return weight
def mv1pmf_smpl(path, sub, out, mode, args):
config = CONFIG[mode]
MIN_CONF_THRES = 0.5
no_img = False
dataset = MV1PMF(path, cams=sub, config=CONFIG[mode], add_hand_face=False,
undis=args.undis, no_img=no_img, out=out)
kp3ds = []
start, end = args.start, min(args.end, len(dataset))
dataset.no_img = True
annots_all = []
for nf in tqdm(range(start, end), desc='triangulation'):
images, annots = dataset[nf]
conf = annots['keypoints'][..., -1]
conf[conf < MIN_CONF_THRES] = 0
keypoints3d, _, kpts_repro = simple_recon_person(annots['keypoints'], dataset.Pall, ret_repro=True)
kp3ds.append(keypoints3d)
annots_all.append(annots)
# smooth the skeleton
kp3ds = np.stack(kp3ds)
# optimize the human shape
body_model = load_model()
params_init = init_params(nFrames=1)
weight = load_weight_shape()
params_shape = optimizeShape(body_model, params_init, kp3ds, weight_loss=weight, kintree=config['kintree'])
# optimize 3D pose
cfg = Config()
params = init_params(nFrames=kp3ds.shape[0])
params['shapes'] = params_shape['shapes'].copy()
weight = load_weight_pose()
cfg.OPT_R = True
cfg.OPT_T = True
params = optimizePose(body_model, params, kp3ds, weight_loss=weight, kintree=config['kintree'], cfg=cfg)
cfg.OPT_POSE = True
params = optimizePose(body_model, params, kp3ds, weight_loss=weight, kintree=config['kintree'], cfg=cfg)
# optimize 2D pose
# render the mesh
dataset.no_img = not args.vis_smpl
for nf in tqdm(range(start, end), desc='render'):
images, annots = dataset[nf]
dataset.write_smpl(select_nf(params, nf-start), nf)
if args.vis_smpl:
vertices = body_model(return_verts=True, return_tensor=False, **select_nf(params, nf-start))
dataset.vis_smpl(vertices=vertices, faces=body_model.faces, images=images, nf=nf, sub_vis=args.sub_vis)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser('multi_view one_person multi_frame skel')
parser.add_argument('path', type=str)
parser.add_argument('--out', type=str, default=None)
parser.add_argument('--sub', type=str, nargs='+', default=[],
help='the sub folder lists when in video mode')
parser.add_argument('--start', type=int, default=0,
help='frame start')
parser.add_argument('--end', type=int, default=10000,
help='frame end')
parser.add_argument('--step', type=int, default=1,
help='frame step')
parser.add_argument('--body', type=str, default='body15', choices=['body15', 'body25', 'total'])
parser.add_argument('--undis', action='store_true')
parser.add_argument('--add_hand_face', action='store_true')
parser.add_argument('--vis_smpl', action='store_true')
parser.add_argument('--sub_vis', type=str, nargs='+', default=[],
help='the sub folder lists for visualization')
args = parser.parse_args()
mv1pmf_smpl(args.path, args.sub, args.out, args.body, args)

View File

@ -0,0 +1,230 @@
import cv2
import numpy as np
from tqdm import tqdm
import os
class FileStorage(object):
def __init__(self, filename, isWrite=False):
version = cv2.__version__
self.version = int(version.split('.')[0])
if isWrite:
self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_WRITE)
else:
self.fs = cv2.FileStorage(filename, cv2.FILE_STORAGE_READ)
def __del__(self):
cv2.FileStorage.release(self.fs)
def write(self, key, value, dt='mat'):
if dt == 'mat':
cv2.FileStorage.write(self.fs, key, value)
elif dt == 'list':
if self.version == 4: # 4.4
# self.fs.write(key, '[')
# for elem in value:
# self.fs.write('none', elem)
# self.fs.write('none', ']')
# import ipdb; ipdb.set_trace()
self.fs.startWriteStruct(key, cv2.FileNode_SEQ)
for elem in value:
self.fs.write('', elem)
self.fs.endWriteStruct()
else: # 3.4
self.fs.write(key, '[')
for elem in value:
self.fs.write('none', elem)
self.fs.write('none', ']')
def read(self, key, dt='mat'):
if dt == 'mat':
output = self.fs.getNode(key).mat()
elif dt == 'list':
results = []
n = self.fs.getNode(key)
for i in range(n.size()):
val = n.at(i).string()
if val == '':
val = str(int(n.at(i).real()))
if val != 'none':
results.append(val)
output = results
else:
raise NotImplementedError
return output
def close(self):
self.__del__(self)
def _FindChessboardCorners(img, patternSize = (9, 6)):
if len(img.shape) == 3:
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
retval, corners = cv2.findChessboardCorners(img, patternSize,
flags=cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_NORMALIZE_IMAGE)
if not retval:
return False, False
corners = cv2.cornerSubPix(img, corners, (11, 11), (-1, -1), criteria)
return True, corners
def FindChessboardCorners(image_names, patternSize=(9, 6), gridSize=0.1, debug=False):
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
object_points = np.zeros((patternSize[1]*patternSize[0], 3), np.float32)
object_points[:,:2] = np.mgrid[0:patternSize[0], 0:patternSize[1]].T.reshape(-1,2)
object_points = object_points * gridSize
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = []
for fname in tqdm(image_names):
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = _FindChessboardCorners(gray, patternSize)
# If found, add object points, image points (after refining them)
if ret:
objpoints.append(object_points)
imgpoints.append(corners)
# Draw and display the corners
images.append(img)
if debug:
img = cv2.drawChessboardCorners(img, patternSize, corners, ret)
cv2.imshow('img',img)
cv2.waitKey(10)
else:
os.remove(fname)
return imgpoints, objpoints, images
def safe_mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
def read_camera(intri_name, extri_name, cam_names=[0,1,2,3]):
assert os.path.exists(intri_name), intri_name
assert os.path.exists(extri_name), extri_name
intri = FileStorage(intri_name)
extri = FileStorage(extri_name)
cams, P = {}, {}
for cam in cam_names:
# 内参只读子码流的
cams[cam] = {}
cams[cam]['K'] = intri.read('K_{}'.format( cam))
cams[cam]['invK'] = np.linalg.inv(cams[cam]['K'])
Rvec = extri.read('R_{}'.format(cam))
Tvec = extri.read('T_{}'.format(cam))
R = cv2.Rodrigues(Rvec)[0]
RT = np.hstack((R, Tvec))
cams[cam]['RT'] = RT
cams[cam]['R'] = R
cams[cam]['T'] = Tvec
P[cam] = cams[cam]['K'] @ cams[cam]['RT']
cams[cam]['P'] = P[cam]
cams[cam]['dist'] = intri.read('dist_{}'.format(cam))
cams['basenames'] = cam_names
return cams
def write_camera(camera, path):
from os.path import join
intri_name = join(path, 'intri.yml')
extri_name = join(path, 'extri.yml')
intri = FileStorage(intri_name, True)
extri = FileStorage(extri_name, True)
results = {}
camnames = [key_.split('.')[0] for key_ in camera.keys()]
intri.write('names', camnames, 'list')
extri.write('names', camnames, 'list')
for key_, val in camera.items():
if key_ == 'basenames':
continue
key = key_.split('.')[0]
intri.write('K_{}'.format(key), val['K'])
intri.write('dist_{}'.format(key), val['dist'])
if 'Rvec' not in val.keys():
val['Rvec'] = cv2.Rodrigues(val['R'])[0]
extri.write('R_{}'.format(key), val['Rvec'])
extri.write('Rot_{}'.format(key), val['R'])
extri.write('T_{}'.format(key), val['T'])
def undistort(camera, frame=None, keypoints=None, output=None, bbox=None):
# bbox: 1, 7
mtx = camera['K']
dist = camera['dist']
if frame is not None:
frame = cv2.undistort(frame, mtx, dist, None)
if output is not None:
output = cv2.undistort(output, mtx, dist, None)
if keypoints is not None:
for nP in range(keypoints.shape[0]):
kpts = keypoints[nP][:, None, :2]
kpts = np.ascontiguousarray(kpts)
kpts = cv2.undistortPoints(kpts, mtx, dist, P=mtx)
keypoints[nP, :, :2] = kpts[:, 0]
if bbox is not None:
kpts = np.zeros((2, 1, 2))
kpts[0, 0, 0] = bbox[0]
kpts[0, 0, 1] = bbox[1]
kpts[1, 0, 0] = bbox[2]
kpts[1, 0, 1] = bbox[3]
kpts = cv2.undistortPoints(kpts, mtx, dist, P=mtx)
bbox[0] = kpts[0, 0, 0]
bbox[1] = kpts[0, 0, 1]
bbox[2] = kpts[1, 0, 0]
bbox[3] = kpts[1, 0, 1]
return bbox
return frame, keypoints, output
def get_bbox(points_set, H, W, thres=0.1, scale=1.2):
bboxes = np.zeros((points_set.shape[0], 6))
for iv in range(points_set.shape[0]):
pose = points_set[iv, :, :]
use_idx = pose[:,2] > thres
if np.sum(use_idx) < 1:
continue
ll, rr = np.min(pose[use_idx, 0]), np.max(pose[use_idx, 0])
bb, tt = np.min(pose[use_idx, 1]), np.max(pose[use_idx, 1])
center = (int((ll + rr) / 2), int((bb + tt) / 2))
length = [int(scale*(rr-ll)/2), int(scale*(tt-bb)/2)]
l = max(0, center[0] - length[0])
r = min(W, center[0] + length[0]) # img.shape[1]
b = max(0, center[1] - length[1])
t = min(H, center[1] + length[1]) # img.shape[0]
conf = pose[:, 2].mean()
cls_conf = pose[use_idx, 2].mean()
bboxes[iv, 0] = l
bboxes[iv, 1] = r
bboxes[iv, 2] = b
bboxes[iv, 3] = t
bboxes[iv, 4] = conf
bboxes[iv, 5] = cls_conf
return bboxes
def filterKeypoints(keypoints, thres = 0.1, min_width=40, \
min_height=40, min_area= 50000, min_count=6):
add_list = []
# TODO:并行化
for ik in range(keypoints.shape[0]):
pose = keypoints[ik]
vis_count = np.sum(pose[:15, 2] > thres) #TODO:
if vis_count < min_count:
continue
ll, rr = np.min(pose[pose[:,2]>thres,0]), np.max(pose[pose[:,2]>thres,0])
bb, tt = np.min(pose[pose[:,2]>thres,1]), np.max(pose[pose[:,2]>thres,1])
center = (int((ll+rr)/2), int((bb+tt)/2))
length = [int(1.2*(rr-ll)/2), int(1.2*(tt-bb)/2)]
l = center[0] - length[0]
r = center[0] + length[0]
b = center[1] - length[1]
t = center[1] + length[1]
if (r - l) < min_width:
continue
if (t - b) < min_height:
continue
if (r - l)*(t - b) < min_area:
continue
add_list.append(ik)
keypoints = keypoints[add_list, :, :]
return keypoints, add_list

View File

@ -0,0 +1,100 @@
'''
* @ Date: 2020-09-14 11:01:52
* @ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-13 11:30:38
@ FilePath: /EasyMocap/code/mytools/reconstruction.py
'''
import numpy as np
def solveZ(A):
u, s, v = np.linalg.svd(A)
X = v[-1, :]
X = X / X[3]
return X[:3]
def projectN3(kpts3d, Pall):
# kpts3d: (N, 3)
nViews = len(Pall)
kp3d = np.hstack((kpts3d[:, :3], np.ones((kpts3d.shape[0], 1))))
kp2ds = []
for nv in range(nViews):
kp2d = Pall[nv] @ kp3d.T
kp2d[:2, :] /= kp2d[2:, :]
kp2ds.append(kp2d.T[None, :, :])
kp2ds = np.vstack(kp2ds)
return kp2ds
def simple_reprojection_error(kpts1, kpts1_proj):
# (N, 3)
error = np.mean((kpts1[:, :2] - kpts1_proj[:, :2])**2)
return error
def simple_triangulate(kpts, Pall):
# kpts: (nViews, 3)
# Pall: (nViews, 3, 4)
# return: kpts3d(3,), conf: float
nViews = len(kpts)
A = np.zeros((nViews*2, 4), dtype=np.float)
result = np.zeros(4)
result[3] = kpts[:, 2].sum()/(kpts[:, 2]>0).sum()
for i in range(nViews):
P = Pall[i]
A[i*2, :] = kpts[i, 2]*(kpts[i, 0]*P[2:3,:] - P[0:1,:])
A[i*2 + 1, :] = kpts[i, 2]*(kpts[i, 1]*P[2:3,:] - P[1:2,:])
result[:3] = solveZ(A)
return result
# kpts_proj = projectN3(result, Pall)
# repro_error = simple_reprojection_error(kpts, kpts_proj)
# return kpts3d, conf/nViews, repro_error/nViews
# else:
# return kpts3d, conf
def simple_recon_person(keypoints_use, Puse, ret_repro=False, max_error=100):
nJoints = keypoints_use[0].shape[0]
if isinstance(keypoints_use, list):
keypoints_use = np.stack(keypoints_use)
out = np.zeros((nJoints, 4))
for nj in range(nJoints):
keypoints = keypoints_use[:, nj]
if (keypoints[:, 2] > 0.01).sum() < 2:
continue
out[nj] = simple_triangulate(keypoints, Puse)
# 计算重投影误差
kpts_repro = projectN3(out, Puse)
square_diff = (keypoints_use[:, :, :2] - kpts_repro[:, :, :2])**2
conf = (out[None, :, -1] > 0.01) * (keypoints_use[:, :, 2] > 0.01)
if conf.sum() < 3: # 至少得有3个有效的关节
repro_error = 1e3
else:
repro_error_joint = np.sqrt(square_diff.sum(axis=2))*conf
num_valid_view = conf.sum(axis=0)
# 对于可见视角少的,强行设置为不可见
repro_error_joint[:, num_valid_view==0] = max_error * 2
num_valid_view[num_valid_view==0] = 1
repro_error_joint_ = repro_error_joint.sum(axis=0)/num_valid_view
# print(repro_error_joint_)
not_valid = np.where(repro_error_joint_>max_error)[0]
out[not_valid, -1] = 0
repro_error = repro_error_joint.sum()/conf.sum()
if ret_repro:
return out, repro_error, kpts_repro
return out, repro_error
def check_limb(keypoints3d, limb_means, thres=0.5):
# keypoints3d: (nJ, 4)
valid = True
cnt = 0
for (src, dst), val in limb_means.items():
if not (keypoints3d[src, 3] > 0 and keypoints3d[dst, 3] > 0):
continue
cnt += 1
# 计算骨长
l_est = np.linalg.norm(keypoints3d[src, :3] - keypoints3d[dst, :3])
if abs(l_est - val['mean'])/val['mean']/val['std'] > thres:
valid = False
break
# 至少两段骨头可以使用
valid = valid and cnt > 2
return valid

118
code/mytools/vis_base.py Normal file
View File

@ -0,0 +1,118 @@
'''
@ Date: 2020-11-28 17:23:04
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 17:11:51
@ FilePath: /EasyMocap/code/mytools/vis_base.py
'''
import cv2
import numpy as np
import json
def read_json(json_path):
with open(json_path, 'r') as f:
return json.load(f)
def generate_colorbar(N = 1000, cmap = 'gist_rainbow'):
from matplotlib import cm
import numpy as np
import random
random.seed(666)
cmaps = cm.get_cmap(cmap, N)
x = np.linspace(0.0, 1.0, N)
index = [i for i in range(N)]
random.shuffle(index)
rgb = cm.get_cmap(cmap)(x)[:, :3]
rgb = rgb[index, :]
rgb = (rgb*255).astype(np.uint8).tolist()
return rgb
colors_bar_rgb = generate_colorbar(cmap='hsv')
# colors_bar_rgb = read_json('config/colors.json')
def get_rgb(index):
index = int(index)
if index == -1:
return (255, 255, 255)
if index < -1:
return (0, 0, 0)
col = colors_bar_rgb[index%len(colors_bar_rgb)]
# color = tuple([int(c*255) for c in col])
return col
def plot_point(img, x, y, r, col, pid=-1):
cv2.circle(img, (int(x+0.5), int(y+0.5)), r, col, -1)
if pid != -1:
cv2.putText(img, '{}'.format(pid), (int(x+0.5), int(y+0.5)), cv2.FONT_HERSHEY_SIMPLEX, 1, col, 2)
def plot_line(img, pt1, pt2, lw, col):
cv2.line(img, (int(pt1[0]+0.5), int(pt1[1]+0.5)), (int(pt2[0]+0.5), int(pt2[1]+0.5)),
col, lw)
def plot_bbox(img, bbox, pid, vis_id=True):
# 画bbox: (l, t, r, b)
x1, y1, x2, y2 = bbox[:4]
x1 = int(round(x1))
x2 = int(round(x2))
y1 = int(round(y1))
y2 = int(round(y2))
color = get_rgb(pid)
lw = max(img.shape[0]//300, 2)
cv2.rectangle(img, (x1, y1), (x2, y2), color, lw)
if vis_id:
cv2.putText(img, '{}'.format(pid), (x1, y1+20), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True, lw=2):
for ii, (i, j) in enumerate(config['kintree']):
if i >= points.shape[0] or j >= points.shape[0]:
continue
pt1, pt2 = points[i], points[j]
if use_limb_color:
col = get_rgb(config['colors'][ii])
else:
col = get_rgb(pid)
if pt1[2] > 0.01 and pt2[2] > 0.01:
image = cv2.line(
img, (int(pt1[0]+0.5), int(pt1[1]+0.5)), (int(pt2[0]+0.5), int(pt2[1]+0.5)),
col, lw)
for i in range(len(points)):
x, y, c = points[i]
if c > 0.01:
col = get_rgb(pid)
cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1)
if vis_conf:
cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, col, 2)
def merge(images, row=-1, col=-1, resize=False, ret_range=False):
if row == -1 and col == -1:
from math import sqrt
row = int(sqrt(len(images)) + 0.5)
col = int(len(images)/ row + 0.5)
if row > col:
row, col = col, row
if len(images) == 8:
# basketball 场景
row, col = 2, 4
images = [images[i] for i in [0, 1, 2, 3, 7, 6, 5, 4]]
if len(images) == 7:
row, col = 3, 3
height = images[0].shape[0]
width = images[0].shape[1]
ret_img = np.zeros((height * row, width * col, 3), dtype=np.uint8) + 255
ranges = []
for i in range(row):
for j in range(col):
if i*col + j >= len(images):
break
img = images[i * col + j]
ret_img[height * i: height * (i+1), width * j: width * (j+1)] = img
ranges.append((width*j, height*i, width*(j+1), height*(i+1)))
if resize:
scale = min(1000/ret_img.shape[0], 1800/ret_img.shape[1])
while ret_img.shape[0] > 2000:
ret_img = cv2.resize(ret_img, None, fx=scale, fy=scale)
if ret_range:
return ret_img, ranges
return ret_img

471
code/pyfitting/lbfgs.py Normal file
View File

@ -0,0 +1,471 @@
import torch
from functools import reduce
from torch.optim.optimizer import Optimizer
def _cubic_interpolate(x1, f1, g1, x2, f2, g2, bounds=None):
# ported from https://github.com/torch/optim/blob/master/polyinterp.lua
# Compute bounds of interpolation area
if bounds is not None:
xmin_bound, xmax_bound = bounds
else:
xmin_bound, xmax_bound = (x1, x2) if x1 <= x2 else (x2, x1)
# Code for most common case: cubic interpolation of 2 points
# w/ function and derivative values for both
# Solution in this case (where x2 is the farthest point):
# d1 = g1 + g2 - 3*(f1-f2)/(x1-x2);
# d2 = sqrt(d1^2 - g1*g2);
# min_pos = x2 - (x2 - x1)*((g2 + d2 - d1)/(g2 - g1 + 2*d2));
# t_new = min(max(min_pos,xmin_bound),xmax_bound);
d1 = g1 + g2 - 3 * (f1 - f2) / (x1 - x2)
d2_square = d1**2 - g1 * g2
if d2_square >= 0:
d2 = d2_square.sqrt()
if x1 <= x2:
min_pos = x2 - (x2 - x1) * ((g2 + d2 - d1) / (g2 - g1 + 2 * d2))
else:
min_pos = x1 - (x1 - x2) * ((g1 + d2 - d1) / (g1 - g2 + 2 * d2))
return min(max(min_pos, xmin_bound), xmax_bound)
else:
return (xmin_bound + xmax_bound) / 2.
def _strong_wolfe(obj_func,
x,
t,
d,
f,
g,
gtd,
c1=1e-4,
c2=0.9,
tolerance_change=1e-9,
max_ls=25):
# ported from https://github.com/torch/optim/blob/master/lswolfe.lua
d_norm = d.abs().max()
g = g.clone()
# evaluate objective and gradient using initial step
f_new, g_new = obj_func(x, t, d)
ls_func_evals = 1
gtd_new = g_new.dot(d)
# bracket an interval containing a point satisfying the Wolfe criteria
t_prev, f_prev, g_prev, gtd_prev = 0, f, g, gtd
done = False
ls_iter = 0
while ls_iter < max_ls:
# check conditions
if f_new > (f + c1 * t * gtd) or (ls_iter > 1 and f_new >= f_prev):
bracket = [t_prev, t]
bracket_f = [f_prev, f_new]
bracket_g = [g_prev, g_new.clone()]
bracket_gtd = [gtd_prev, gtd_new]
break
if abs(gtd_new) <= -c2 * gtd:
bracket = [t]
bracket_f = [f_new]
bracket_g = [g_new]
done = True
break
if gtd_new >= 0:
bracket = [t_prev, t]
bracket_f = [f_prev, f_new]
bracket_g = [g_prev, g_new.clone()]
bracket_gtd = [gtd_prev, gtd_new]
break
# interpolate
min_step = t + 0.01 * (t - t_prev)
max_step = t * 10
tmp = t
t = _cubic_interpolate(
t_prev,
f_prev,
gtd_prev,
t,
f_new,
gtd_new,
bounds=(min_step, max_step))
# next step
t_prev = tmp
f_prev = f_new
g_prev = g_new.clone()
gtd_prev = gtd_new
f_new, g_new = obj_func(x, t, d)
ls_func_evals += 1
gtd_new = g_new.dot(d)
ls_iter += 1
# reached max number of iterations?
if ls_iter == max_ls:
bracket = [0, t]
bracket_f = [f, f_new]
bracket_g = [g, g_new]
# zoom phase: we now have a point satisfying the criteria, or
# a bracket around it. We refine the bracket until we find the
# exact point satisfying the criteria
insuf_progress = False
# find high and low points in bracket
low_pos, high_pos = (0, 1) if bracket_f[0] <= bracket_f[-1] else (1, 0)
while not done and ls_iter < max_ls:
# compute new trial value
t = _cubic_interpolate(bracket[0], bracket_f[0], bracket_gtd[0],
bracket[1], bracket_f[1], bracket_gtd[1])
# test that we are making sufficient progress:
# in case `t` is so close to boundary, we mark that we are making
# insufficient progress, and if
# + we have made insufficient progress in the last step, or
# + `t` is at one of the boundary,
# we will move `t` to a position which is `0.1 * len(bracket)`
# away from the nearest boundary point.
eps = 0.1 * (max(bracket) - min(bracket))
if min(max(bracket) - t, t - min(bracket)) < eps:
# interpolation close to boundary
if insuf_progress or t >= max(bracket) or t <= min(bracket):
# evaluate at 0.1 away from boundary
if abs(t - max(bracket)) < abs(t - min(bracket)):
t = max(bracket) - eps
else:
t = min(bracket) + eps
insuf_progress = False
else:
insuf_progress = True
else:
insuf_progress = False
# Evaluate new point
f_new, g_new = obj_func(x, t, d)
ls_func_evals += 1
gtd_new = g_new.dot(d)
ls_iter += 1
if f_new > (f + c1 * t * gtd) or f_new >= bracket_f[low_pos]:
# Armijo condition not satisfied or not lower than lowest point
bracket[high_pos] = t
bracket_f[high_pos] = f_new
bracket_g[high_pos] = g_new.clone()
bracket_gtd[high_pos] = gtd_new
low_pos, high_pos = (0, 1) if bracket_f[0] <= bracket_f[1] else (1, 0)
else:
if abs(gtd_new) <= -c2 * gtd:
# Wolfe conditions satisfied
done = True
elif gtd_new * (bracket[high_pos] - bracket[low_pos]) >= 0:
# old high becomes new low
bracket[high_pos] = bracket[low_pos]
bracket_f[high_pos] = bracket_f[low_pos]
bracket_g[high_pos] = bracket_g[low_pos]
bracket_gtd[high_pos] = bracket_gtd[low_pos]
# new point becomes new low
bracket[low_pos] = t
bracket_f[low_pos] = f_new
bracket_g[low_pos] = g_new.clone()
bracket_gtd[low_pos] = gtd_new
# line-search bracket is so small
if abs(bracket[1] - bracket[0]) * d_norm < tolerance_change:
break
# return stuff
t = bracket[low_pos]
f_new = bracket_f[low_pos]
g_new = bracket_g[low_pos]
return f_new, g_new, t, ls_func_evals
class LBFGS(Optimizer):
"""Implements L-BFGS algorithm, heavily inspired by `minFunc
<https://www.cs.ubc.ca/~schmidtm/Software/minFunc.html>`.
.. warning::
This optimizer doesn't support per-parameter options and parameter
groups (there can be only one).
.. warning::
Right now all parameters have to be on a single device. This will be
improved in the future.
.. note::
This is a very memory intensive optimizer (it requires additional
``param_bytes * (history_size + 1)`` bytes). If it doesn't fit in memory
try reducing the history size, or use a different algorithm.
Arguments:
lr (float): learning rate (default: 1)
max_iter (int): maximal number of iterations per optimization step
(default: 20)
max_eval (int): maximal number of function evaluations per optimization
step (default: max_iter * 1.25).
tolerance_grad (float): termination tolerance on first order optimality
(default: 1e-5).
tolerance_change (float): termination tolerance on function
value/parameter changes (default: 1e-9).
history_size (int): update history size (default: 100).
line_search_fn (str): either 'strong_wolfe' or None (default: None).
"""
def __init__(self,
params,
lr=1,
max_iter=20,
max_eval=None,
tolerance_grad=1e-5,
tolerance_change=1e-9,
history_size=100,
line_search_fn=None):
if max_eval is None:
max_eval = max_iter * 5 // 4
defaults = dict(
lr=lr,
max_iter=max_iter,
max_eval=max_eval,
tolerance_grad=tolerance_grad,
tolerance_change=tolerance_change,
history_size=history_size,
line_search_fn=line_search_fn)
super(LBFGS, self).__init__(params, defaults)
if len(self.param_groups) != 1:
raise ValueError("LBFGS doesn't support per-parameter options "
"(parameter groups)")
self._params = self.param_groups[0]['params']
self._numel_cache = None
def _numel(self):
if self._numel_cache is None:
self._numel_cache = reduce(lambda total, p: total + p.numel(), self._params, 0)
return self._numel_cache
def _gather_flat_grad(self):
views = []
for p in self._params:
if p.grad is None:
view = p.new(p.numel()).zero_()
elif p.grad.is_sparse:
view = p.grad.to_dense().view(-1)
else:
view = p.grad.view(-1)
views.append(view)
return torch.cat(views, 0)
def _add_grad(self, step_size, update):
offset = 0
for p in self._params:
numel = p.numel()
# view as to avoid deprecated pointwise semantics
p.data.add_(step_size, update[offset:offset + numel].view_as(p.data))
offset += numel
assert offset == self._numel()
def _clone_param(self):
return [p.clone() for p in self._params]
def _set_param(self, params_data):
for p, pdata in zip(self._params, params_data):
p.data.copy_(pdata)
def _directional_evaluate(self, closure, x, t, d):
self._add_grad(t, d)
loss = float(closure())
flat_grad = self._gather_flat_grad()
self._set_param(x)
return loss, flat_grad
def step(self, closure):
"""Performs a single optimization step.
Arguments:
closure (callable): A closure that reevaluates the model
and returns the loss.
"""
assert len(self.param_groups) == 1
group = self.param_groups[0]
lr = group['lr']
max_iter = group['max_iter']
max_eval = group['max_eval']
tolerance_grad = group['tolerance_grad']
tolerance_change = group['tolerance_change']
line_search_fn = group['line_search_fn']
history_size = group['history_size']
# NOTE: LBFGS has only global state, but we register it as state for
# the first param, because this helps with casting in load_state_dict
state = self.state[self._params[0]]
state.setdefault('func_evals', 0)
state.setdefault('n_iter', 0)
# evaluate initial f(x) and df/dx
orig_loss = closure()
loss = float(orig_loss)
current_evals = 1
state['func_evals'] += 1
flat_grad = self._gather_flat_grad()
opt_cond = flat_grad.abs().max() <= tolerance_grad
# optimal condition
if opt_cond:
return orig_loss
# tensors cached in state (for tracing)
d = state.get('d')
t = state.get('t')
old_dirs = state.get('old_dirs')
old_stps = state.get('old_stps')
ro = state.get('ro')
H_diag = state.get('H_diag')
prev_flat_grad = state.get('prev_flat_grad')
prev_loss = state.get('prev_loss')
n_iter = 0
# optimize for a max of max_iter iterations
while n_iter < max_iter:
# keep track of nb of iterations
n_iter += 1
state['n_iter'] += 1
############################################################
# compute gradient descent direction
############################################################
if state['n_iter'] == 1:
d = flat_grad.neg()
old_dirs = []
old_stps = []
ro = []
H_diag = 1
else:
# do lbfgs update (update memory)
y = flat_grad.sub(prev_flat_grad)
s = d.mul(t)
ys = y.dot(s) # y*s
if ys > 1e-10:
# updating memory
if len(old_dirs) == history_size:
# shift history by one (limited-memory)
old_dirs.pop(0)
old_stps.pop(0)
ro.pop(0)
# store new direction/step
old_dirs.append(y)
old_stps.append(s)
ro.append(1. / ys)
# update scale of initial Hessian approximation
H_diag = ys / y.dot(y) # (y*y)
# compute the approximate (L-BFGS) inverse Hessian
# multiplied by the gradient
num_old = len(old_dirs)
if 'al' not in state:
state['al'] = [None] * history_size
al = state['al']
# iteration in L-BFGS loop collapsed to use just one buffer
q = flat_grad.neg()
for i in range(num_old - 1, -1, -1):
al[i] = old_stps[i].dot(q) * ro[i]
q.add_(-al[i], old_dirs[i])
# multiply by initial Hessian
# r/d is the final direction
d = r = torch.mul(q, H_diag)
for i in range(num_old):
be_i = old_dirs[i].dot(r) * ro[i]
r.add_(al[i] - be_i, old_stps[i])
if prev_flat_grad is None:
prev_flat_grad = flat_grad.clone()
else:
prev_flat_grad.copy_(flat_grad)
prev_loss = loss
############################################################
# compute step length
############################################################
# reset initial guess for step size
if state['n_iter'] == 1:
t = min(1., 1. / flat_grad.abs().sum()) * lr
else:
t = lr
# directional derivative
gtd = flat_grad.dot(d) # g * d
# directional derivative is below tolerance
if gtd > -tolerance_change:
break
# optional line search: user function
ls_func_evals = 0
if line_search_fn is not None:
# perform line search, using user function
if line_search_fn != "strong_wolfe":
raise RuntimeError("only 'strong_wolfe' is supported")
else:
x_init = self._clone_param()
def obj_func(x, t, d):
return self._directional_evaluate(closure, x, t, d)
loss, flat_grad, t, ls_func_evals = _strong_wolfe(
obj_func, x_init, t, d, loss, flat_grad, gtd)
self._add_grad(t, d)
opt_cond = flat_grad.abs().max() <= tolerance_grad
else:
# no line search, simply move with fixed-step
self._add_grad(t, d)
if n_iter != max_iter:
# re-evaluate function only if not in last iteration
# the reason we do this: in a stochastic setting,
# no use to re-evaluate that function here
loss = float(closure())
flat_grad = self._gather_flat_grad()
opt_cond = flat_grad.abs().max() <= tolerance_grad
ls_func_evals = 1
# update func eval
current_evals += ls_func_evals
state['func_evals'] += ls_func_evals
############################################################
# check conditions
############################################################
if n_iter == max_iter:
break
if current_evals >= max_eval:
break
# optimal condition
if opt_cond:
break
# lack of progress
if d.mul(t).abs().max() <= tolerance_change:
break
if abs(loss - prev_loss) < tolerance_change:
break
state['d'] = d
state['t'] = t
state['old_dirs'] = old_dirs
state['old_stps'] = old_stps
state['ro'] = ro
state['H_diag'] = H_diag
state['prev_flat_grad'] = prev_flat_grad
state['prev_loss'] = prev_loss
return orig_loss

View File

@ -0,0 +1,64 @@
'''
@ Date: 2020-11-19 17:46:04
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 15:02:39
@ FilePath: /EasyMocap/code/pyfitting/lossfactory.py
'''
import torch
from .operation import projection, batch_rodrigues
def ReprojectionLoss(keypoints3d, keypoints2d, K, Rc, Tc, inv_bbox_sizes):
img_points = projection(keypoints3d, K, Rc, Tc)
residual = (img_points - keypoints2d[:, :, :2]) * keypoints2d[:, :, 2:3]
squared_res = (residual ** 2) * inv_bbox_sizes
return torch.sum(squared_res)
class SMPLAngleLoss:
def __init__(self, keypoints):
use_feet = keypoints[:, [19, 20, 21, 22, 23, 24], -1].sum() > 0.1
use_head = keypoints[:, [15, 16, 17, 18], -1].sum() > 0.1
SMPL_JOINT_ZERO_IDX = [3, 6, 9, 13, 14, 20, 21, 22, 23]
if not use_feet:
SMPL_JOINT_ZERO_IDX.extend([7, 8])
if not use_head:
SMPL_JOINT_ZERO_IDX.extend([12, 15])
SMPL_POSES_ZERO_IDX = [[j for j in range(3*i, 3*i+3)] for i in SMPL_JOINT_ZERO_IDX]
SMPL_POSES_ZERO_IDX = sum(SMPL_POSES_ZERO_IDX, [])
self.idx = SMPL_POSES_ZERO_IDX
def loss(self, poses):
return torch.sum(torch.abs(poses[:, self.idx]))
def SmoothLoss(body_params, keys, weight_loss, span=4):
spans = [i for i in range(1, span)]
span_weights = {i:1/i for i in range(1, span)}
span_weights = {key: i/sum(span_weights) for key, i in span_weights.items()}
loss_dict = {}
nFrames = body_params['poses'].shape[0]
for key in ['poses', 'Th']:
k = 'smooth_' + key
if k in weight_loss.keys() and weight_loss[k] > 0.:
loss_dict[k] = 0.
for span in spans:
val = torch.sum((body_params[key][span:, :] - body_params[key][:nFrames-span, :])**2)
loss_dict[k] += span_weights[span] * val
# smooth rotation
rot = batch_rodrigues(body_params['Rh'])
key, k = 'Rh', 'smooth_Rh'
if k in weight_loss.keys() and weight_loss[k] > 0.:
loss_dict[k] = 0.
for span in spans:
val = torch.sum((rot[span:, :] - rot[:nFrames-span, :])**2)
loss_dict[k] += span_weights[span] * val
return loss_dict
def RegularizationLoss(body_params, body_params_init, weight_loss):
loss_dict = {}
for key in ['poses', 'shapes', 'Th']:
if 'init_'+key in weight_loss.keys() and weight_loss['init_'+key] > 0.:
loss_dict['init_'+key] = torch.sum((body_params[key] - body_params_init[key])**2)
for key in ['poses', 'shapes']:
if 'reg_'+key in weight_loss.keys() and weight_loss['reg_'+key] > 0.:
loss_dict['reg_'+key] = torch.sum((body_params[key])**2)
return loss_dict

View File

@ -0,0 +1,68 @@
'''
@ Date: 2020-11-19 11:39:45
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2020-11-19 11:50:20
@ FilePath: /EasyMocap/code/pyfitting/operation.py
'''
import torch
def batch_rodrigues(rot_vecs, epsilon=1e-8, dtype=torch.float32):
''' Calculates the rotation matrices for a batch of rotation vectors
Parameters
----------
rot_vecs: torch.tensor Nx3
array of N axis-angle vectors
Returns
-------
R: torch.tensor Nx3x3
The rotation matrices for the given axis-angle parameters
'''
batch_size = rot_vecs.shape[0]
device = rot_vecs.device
angle = torch.norm(rot_vecs + 1e-8, dim=1, keepdim=True)
rot_dir = rot_vecs / angle
cos = torch.unsqueeze(torch.cos(angle), dim=1)
sin = torch.unsqueeze(torch.sin(angle), dim=1)
# Bx1 arrays
rx, ry, rz = torch.split(rot_dir, 1, dim=1)
K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device)
zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device)
K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1) \
.view((batch_size, 3, 3))
ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0)
rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K)
return rot_mat
def projection(points3d, camera_intri, R=None, T=None, distance=None):
""" project the 3d points to camera coordinate
Arguments:
points3d {Tensor} -- (bn, N, 3)
camera_intri {Tensor} -- (bn, 3, 3)
distance {Tensor} -- (bn, 1, 1)
Returns:
points2d -- (bn, N, 2)
"""
if R is not None:
Rt = torch.transpose(R, 1, 2)
points3d = torch.matmul(points3d, Rt) + T
if distance is None:
img_points = torch.div(points3d[:, :, :2],
points3d[:, :, 2:3])
else:
img_points = torch.div(points3d[:, :, :2],
distance)
camera_mat = camera_intri[:, :2, :2]
center = torch.transpose(camera_intri[:, :2, 2:3], 1, 2)
img_points = torch.matmul(img_points, camera_mat.transpose(1, 2)) + center
# img_points = torch.einsum('bki,bji->bjk', [camera_mat, img_points]) \
# + center
return img_points

131
code/pyfitting/optimize.py Normal file
View File

@ -0,0 +1,131 @@
'''
@ Date: 2020-06-26 12:06:25
@ LastEditors: Qing Shuai
@ LastEditTime: 2020-06-26 12:08:37
@ Author: Qing Shuai
@ Mail: s_q@zju.edu.cn
'''
import numpy as np
import os
from tqdm import tqdm
import torch
import json
def rel_change(prev_val, curr_val):
return (prev_val - curr_val) / max([np.abs(prev_val), np.abs(curr_val), 1])
class FittingMonitor:
def __init__(self, ftol=1e-5, gtol=1e-6, maxiters=100, visualize=False, verbose=False, **kwargs):
self.maxiters = maxiters
self.ftol = ftol
self.gtol = gtol
self.visualize = visualize
self.verbose = verbose
if self.visualize:
from utils.mesh_viewer import MeshViewer
self.mv = MeshViewer(width=1024, height=1024, bg_color=[1.0, 1.0, 1.0, 1.0],
body_color=[0.65098039, 0.74117647, 0.85882353, 1.0],
offscreen=False)
def run_fitting(self, optimizer, closure, params, smpl_render=None, **kwargs):
prev_loss = None
grad_require(params, True)
if self.verbose:
trange = tqdm(range(self.maxiters), desc='Fitting')
else:
trange = range(self.maxiters)
for iter in trange:
loss = optimizer.step(closure)
if torch.isnan(loss).sum() > 0:
print('NaN loss value, stopping!')
break
if torch.isinf(loss).sum() > 0:
print('Infinite loss value, stopping!')
break
# if all([torch.abs(var.grad.view(-1).max()).item() < self.gtol
# for var in params if var.grad is not None]):
# print('Small grad, stopping!')
# break
if iter > 0 and prev_loss is not None and self.ftol > 0:
loss_rel_change = rel_change(prev_loss, loss.item())
if loss_rel_change <= self.ftol:
break
if self.visualize:
vertices = smpl_render.GetVertices(**kwargs)
self.mv.update_mesh(vertices[::10], smpl_render.faces)
prev_loss = loss.item()
grad_require(params, False)
return prev_loss
def close(self):
if self.visualize:
self.mv.close_viewer()
class FittingLog:
if False:
from tensorboardX import SummaryWriter
swriter = SummaryWriter()
def __init__(self, log_name, useVisdom=False):
if not os.path.exists(log_name):
log_file = open(log_name, 'w')
self.index = {log_name:0}
else:
log_file = open(log_name, 'r')
log_pre = log_file.readlines()
log_file.close()
self.index = {log_name:len(log_pre)}
log_file = open(log_name, 'a')
self.log_file = log_file
self.useVisdom = useVisdom
if useVisdom:
import visdom
self.vis = visdom.Visdom(env=os.path.realpath(
join(os.path.dirname(log_name), '..')).replace(os.sep, '_'))
elif False:
self.writer = FittingLog.swriter
self.log_name = log_name
def step(self, loss_dict, weight_loss):
print(' '.join([key + ' %f'%(loss_dict[key].item()*weight_loss[key])
for key in loss_dict.keys() if weight_loss[key]>0]), file=self.log_file)
loss = {key:loss_dict[key].item()*weight_loss[key]
for key in loss_dict.keys() if weight_loss[key]>0}
if self.useVisdom:
name = list(loss.keys())
val = list(loss.values())
x = self.index.get(self.log_name, 0)
if len(val) == 1:
y = np.array(val)
else:
y = np.array(val).reshape(-1, len(val))
self.vis.line(Y=y,X=np.ones(y.shape)*x,
win=str(self.log_name),#unicode
opts=dict(legend=name,
title=self.log_name),
update=None if x == 0 else 'append'
)
elif False:
self.writer.add_scalars('data/{}'.format(self.log_name), loss, self.index[self.log_name])
self.index[self.log_name] += 1
def log_loss(self, weight_loss):
loss = json.dumps(weight_loss, indent=4)
self.log_file.writelines(loss)
self.log_file.write('\n')
def close(self):
self.log_file.close()
def grad_require(paras, flag=False):
if isinstance(paras, list):
for par in paras:
par.requires_grad = flag
elif isinstance(paras, dict):
for key, par in paras.items():
par.requires_grad = flag

View File

@ -0,0 +1,353 @@
'''
@ Date: 2020-11-19 10:49:26
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 20:19:34
@ FilePath: /EasyMocap/code/pyfitting/optimize_simple.py
'''
import numpy as np
import torch
from .lbfgs import LBFGS
from .optimize import FittingMonitor, grad_require, FittingLog
from .lossfactory import SMPLAngleLoss, SmoothLoss, RegularizationLoss, ReprojectionLoss
def create_closure(optimizer, body_model, body_params, body_params_init, cam_params,
keypoints2d, bboxes, weight_loss_, debug=False, verbose=False):
K, Rc, Tc = cam_params['K'], cam_params['Rc'], cam_params['Tc']
bbox_sizes = np.maximum(bboxes[:, 2] - bboxes[:, 0], bboxes[:, 3] - bboxes[:, 1])/100
inv_bbox_sizes = torch.Tensor(1./bbox_sizes[:, None, None]).to(body_model.device)**2
nFrames = keypoints2d.shape[0]
nPoses = body_params['poses'].shape[0]
if nFrames == nPoses:
weight_loss = {key:val/nFrames for key, val in weight_loss_.items()} # normalize by frames
else:
# the multiple views case
weight_loss = weight_loss_.copy()
weight_loss['repro'] /= nFrames
angle_prior = SMPLAngleLoss(keypoints2d)
if len(K.shape) == 2:
K, Rc, Tc = K[None, :, :], Rc[None, :, :], Tc[None, :, :]
def closure(debug=False):
optimizer.zero_grad()
keypoints3d = body_model(return_verts=False, return_tensor=True, **body_params)
loss_dict = {}
loss_dict['repro'] = ReprojectionLoss(keypoints3d, keypoints2d, K, Rc, Tc, inv_bbox_sizes)
# smooth
loss_dict.update(SmoothLoss(body_params, ['poses', 'Th'], weight_loss))
# regularize
loss_dict.update(RegularizationLoss(body_params, body_params_init, weight_loss))
loss_dict['reg_poses_zero'] = angle_prior.loss(body_params['poses'])
# fittingLog.step(loss_dict, weight_loss)
if verbose:
print(' '.join([key + ' %f'%(loss_dict[key].item()*weight_loss[key])
for key in loss_dict.keys() if weight_loss[key]>0]))
loss = sum([loss_dict[key]*weight_loss[key]
for key in loss_dict.keys()])
if not debug:
loss.backward()
return loss
else:
return loss_dict
return closure
def findNeighborIdx(nf, nF, nspan):
idx = [i for i in range(nf-nspan, nf+nspan+1) if i >=0 and i<nF and i!= nf]
return idx
def viewSelection(body_params, span=5):
""" Apply view selection for body parameters
Args:
body_params (DictParams)
"""
assert span % 2 == 1, 'span = {} must be an odd number'.format(span)
nspan = (span - 1)//2
# for linear data
nF = body_params['poses'].shape[0]
min_thres = {'Th': 0.2, 'poses': 0.2}
for key in ['poses', 'Th']:
weight = np.ones((nF))
for nf in range(nF):
# first find neighbor
neighbor_idx = findNeighborIdx(nf, nF, nspan)
dist = np.linalg.norm(body_params[key][neighbor_idx, :] - body_params[key][nf:nf+1, :], axis=1)
conf = dist.min()/np.linalg.norm(body_params[key][nf])
weight[nf] = min_thres[key] - conf
if conf > min_thres[key]:
weight[nf] = 0
for nf in range(nF):
neighbor_idx = findNeighborIdx(nf, nF, nspan) + [nf]
weight_cur = weight[neighbor_idx]
weight_sum = weight_cur.sum()
if weight_sum == 0:
pass
else:
val = body_params[key][neighbor_idx, :]*weight_cur[:, None]
val = val.sum(axis=0)/weight_sum
# simply remove outliers
body_params[key][nf] = val
# for rotation data
pass
return body_params
def optimizeVideo(body_model, params_init, cam_params,
keypoints, bbox,
weight, cfg=None):
""" simple function for optimizing video/image
Args:
body_model (SMPL model)
params_init (DictParam): poses(F, 72), shapes(1/F, 10), Rh(F, 3), Th(F, 3)
cam_params (DictParam): K, R, T
keypoints (F, J, 3): 2D keypoints
bbox (F, 7): bounding box
weight (Dict): string:float
cfg (Config): Config Node controling running mode
"""
device = body_model.device
cam_params = {key:torch.Tensor(val).to(device) for key, val in cam_params.items()}
keypoints = torch.Tensor(keypoints).to(device)
body_params = {key:torch.Tensor(val).to(device) for key, val in params_init.items()}
body_params_init = {key:val.clone() for key, val in body_params.items()}
if cfg is None:
opt_params = [body_params['Rh'], body_params['Th'], body_params['poses']]
else:
opt_params = []
if cfg.OPT_R:
opt_params.append(body_params['Rh'])
if cfg.OPT_T:
opt_params.append(body_params['Th'])
if cfg.OPT_POSE:
opt_params.append(body_params['poses'])
grad_require(opt_params, True)
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
closure = create_closure(optimizer, body_model, body_params,
body_params_init, cam_params,
keypoints, bbox, weight, verbose=cfg.VERBOSE)
fitting = FittingMonitor(ftol=1e-4)
final_loss = fitting.run_fitting(optimizer, closure, opt_params)
fitting.close()
grad_require(opt_params, False)
loss_dict = closure(debug=True)
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
body_params = {key:val.detach().cpu().numpy() for key, val in body_params.items()}
return body_params
def optimizeMultiImage(body_model, params_init, cam_params,
keypoints, bbox,
weight, cfg):
""" simple function for optimizing multiple images
Args:
body_model (SMPL model)
params_init (DictParam): poses(1, 72), shapes(1, 10), Rh(1, 3), Th(1, 3)
cam_params (DictParam): K(nV, 3, 3), R(nV, 3, 3), T(nV, 3, 1)
keypoints (nV, J, 3): 2D keypoints
bbox (nV, 7): bounding box
weight (Dict): string:float
cfg (Config): Config Node controling running mode
"""
device = body_model.device
cam_params = {key:torch.Tensor(val).to(device) for key, val in cam_params.items()}
keypoints = torch.Tensor(keypoints).to(device)
body_params = {key:torch.Tensor(val).to(device) for key, val in params_init.items()}
body_params_init = {key:val.clone() for key, val in body_params.items()}
if cfg is None:
opt_params = [body_params['Rh'], body_params['Th'], body_params['poses']]
else:
opt_params = []
if cfg.OPT_R:
opt_params.append(body_params['Rh'])
if cfg.OPT_T:
opt_params.append(body_params['Th'])
if cfg.OPT_POSE:
opt_params.append(body_params['poses'])
if cfg.OPT_SHAPE:
opt_params.append(body_params['shapes'])
grad_require(opt_params, True)
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
closure = create_closure(optimizer, body_model, body_params,
body_params_init, cam_params,
keypoints, bbox, weight, verbose=cfg.VERBOSE)
fitting = FittingMonitor(ftol=1e-4)
final_loss = fitting.run_fitting(optimizer, closure, opt_params)
fitting.close()
grad_require(opt_params, False)
loss_dict = closure(debug=True)
for key in loss_dict.keys():
loss_dict[key] = loss_dict[key].item()
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
body_params = {key:val.detach().cpu().numpy() for key, val in body_params.items()}
return body_params, loss_dict
def optimizeShape(body_model, body_params, keypoints3d,
weight_loss, kintree, cfg=None):
""" simple function for optimizing model shape given 3d keypoints
Args:
body_model (SMPL model)
params_init (DictParam): poses(1, 72), shapes(1, 10), Rh(1, 3), Th(1, 3)
keypoints (nFrames, nJoints, 3): 3D keypoints
weight (Dict): string:float
kintree ([[src, dst]]): list of list:int
cfg (Config): Config Node controling running mode
"""
device = body_model.device
# 计算不同的骨长
kintree = np.array(kintree, dtype=np.int)
# limb_length: nFrames, nLimbs, 1
limb_length = np.linalg.norm(keypoints3d[:, kintree[:, 1], :3] - keypoints3d[:, kintree[:, 0], :3], axis=2, keepdims=True)
# conf: nFrames, nLimbs, 1
limb_conf = np.minimum(keypoints3d[:, kintree[:, 1], 3:], keypoints3d[:, kintree[:, 0], 3:])
limb_length = torch.Tensor(limb_length).to(device)
limb_conf = torch.Tensor(limb_conf).to(device)
body_params = {key:torch.Tensor(val).to(device) for key, val in body_params.items()}
opt_params = [body_params['shapes']]
grad_require(opt_params, True)
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe', max_iter=10)
nFrames = keypoints3d.shape[0]
verbose = False
def closure(debug=False):
optimizer.zero_grad()
keypoints3d = body_model(return_verts=False, return_tensor=True, only_shape=True, **body_params)
src = keypoints3d[:, kintree[:, 0], :3] #.detach()
dst = keypoints3d[:, kintree[:, 1], :3]
direct_est = (dst - src).detach()
direct_norm = torch.norm(direct_est, dim=2, keepdim=True)
direct_normalized = direct_est/direct_norm
err = dst - src - direct_normalized * limb_length
loss_dict = {
's3d': torch.sum(err**2*limb_conf)/nFrames,
'reg_shape': torch.sum(body_params['shapes']**2)}
# fittingLog.step(loss_dict, weight_loss)
if verbose:
print(' '.join([key + ' %f'%(loss_dict[key].item()*weight_loss[key])
for key in loss_dict.keys() if weight_loss[key]>0]))
loss = sum([loss_dict[key]*weight_loss[key]
for key in loss_dict.keys()])
if not debug:
loss.backward()
return loss
else:
return loss_dict
fitting = FittingMonitor(ftol=1e-4)
final_loss = fitting.run_fitting(optimizer, closure, opt_params)
fitting.close()
grad_require(opt_params, False)
loss_dict = closure(debug=True)
for key in loss_dict.keys():
loss_dict[key] = loss_dict[key].item()
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
body_params = {key:val.detach().cpu().numpy() for key, val in body_params.items()}
return body_params
def optimizePose(body_model, body_params, keypoints3d,
weight_loss, kintree, cfg=None):
""" simple function for optimizing model pose given 3d keypoints
Args:
body_model (SMPL model)
params_init (DictParam): poses(1, 72), shapes(1, 10), Rh(1, 3), Th(1, 3)
keypoints (nFrames, nJoints, 3): 3D keypoints
weight (Dict): string:float
kintree ([[src, dst]]): list of list:int
cfg (Config): Config Node controling running mode
"""
device = body_model.device
# 计算不同的骨长
kintree = np.array(kintree, dtype=np.int)
nFrames = keypoints3d.shape[0]
# limb_length: nFrames, nLimbs, 1
limb = keypoints3d[:, kintree[:, 1], :3] - keypoints3d[:, kintree[:, 0], :3]
limb_length = np.linalg.norm(limb, axis=2, keepdims=True)
# conf: nFrames, nLimbs, 1
limb_conf = np.minimum(keypoints3d[:, kintree[:, 1], 3:], keypoints3d[:, kintree[:, 0], 3:])
limb_dir = limb/limb_length
keypoints3d = torch.Tensor(keypoints3d).to(device)
limb_dir = torch.Tensor(limb_dir).to(device).unsqueeze(2)
limb_conf = torch.Tensor(limb_conf).to(device)
angle_prior = SMPLAngleLoss(keypoints3d)
body_params = {key:torch.Tensor(val).to(device) for key, val in body_params.items()}
if cfg is None:
opt_params = [body_params['Rh'], body_params['Th'], body_params['poses']]
verbose = False
else:
opt_params = []
if cfg.OPT_R:
opt_params.append(body_params['Rh'])
if cfg.OPT_T:
opt_params.append(body_params['Th'])
if cfg.OPT_POSE:
opt_params.append(body_params['poses'])
if cfg.OPT_SHAPE:
opt_params.append(body_params['shapes'])
verbose = cfg.VERBOSE
grad_require(opt_params, True)
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
zero_pose = torch.zeros((nFrames, 3), device=device)
def closure(debug=False):
optimizer.zero_grad()
new_params = body_params.copy()
new_params['poses'] = torch.cat([zero_pose, body_params['poses'][:, 3:]], dim=1)
kpts_est = body_model(return_verts=False, return_tensor=True, **new_params)
diff_square = (kpts_est - keypoints3d[..., :3])**2
if False:
pass
else:
conf = keypoints3d[..., 3:]
loss_3d = torch.sum(conf * diff_square)
if False:
src = keypoints3d[:, kintree[:, 0], :3].detach()
dst = keypoints3d[:, kintree[:, 1], :3]
direct_est = dst - src
direct_norm = torch.norm(direct_est, dim=2, keepdim=True)
direct_normalized = direct_est/direct_norm
loss_dict = {
'k3d': loss_3d,
'reg_poses_zero': angle_prior.loss(body_params['poses'])
}
# smooth
loss_dict.update(SmoothLoss(body_params, ['poses', 'Th'], weight_loss))
for key in loss_dict.keys():
loss_dict[key] = loss_dict[key]/nFrames
# fittingLog.step(loss_dict, weight_loss)
if verbose:
print(' '.join([key + ' %f'%(loss_dict[key].item()*weight_loss[key])
for key in loss_dict.keys() if weight_loss[key]>0]))
loss = sum([loss_dict[key]*weight_loss[key]
for key in loss_dict.keys()])
if not debug:
loss.backward()
return loss
else:
return loss_dict
fitting = FittingMonitor(ftol=1e-4)
final_loss = fitting.run_fitting(optimizer, closure, opt_params)
fitting.close()
grad_require(opt_params, False)
loss_dict = closure(debug=True)
for key in loss_dict.keys():
loss_dict[key] = loss_dict[key].item()
optimizer = LBFGS(
opt_params, line_search_fn='strong_wolfe')
body_params = {key:val.detach().cpu().numpy() for key, val in body_params.items()}
return body_params

View File

@ -0,0 +1,9 @@
'''
@ Date: 2020-11-18 14:33:20
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 20:12:26
@ FilePath: /EasyMocap/code/smplmodel/__init__.py
'''
from .body_model import SMPLlayer
from .body_param import merge_params, select_nf, init_params, Config

View File

@ -0,0 +1,120 @@
'''
@ Date: 2020-11-18 14:04:10
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 20:14:34
@ FilePath: /EasyMocap/code/smplmodel/body_model.py
'''
import torch
import torch.nn as nn
from .lbs import lbs, batch_rodrigues
import os.path as osp
import pickle
import numpy as np
def to_tensor(array, dtype=torch.float32, device=torch.device('cpu')):
if 'torch.tensor' not in str(type(array)):
return torch.tensor(array, dtype=dtype).to(device)
else:
return array.to(device)
def to_np(array, dtype=np.float32):
if 'scipy.sparse' in str(type(array)):
array = array.todense()
return np.array(array, dtype=dtype)
class SMPLlayer(nn.Module):
def __init__(self, model_path, gender='neutral', device=None,
regressor_path=None) -> None:
super(SMPLlayer, self).__init__()
dtype = torch.float32
self.dtype = dtype
self.device = device
# create the SMPL model
if osp.isdir(model_path):
model_fn = 'SMPL_{}.{ext}'.format(gender.upper(), ext='pkl')
smpl_path = osp.join(model_path, model_fn)
else:
smpl_path = model_path
assert osp.exists(smpl_path), 'Path {} does not exist!'.format(
smpl_path)
with open(smpl_path, 'rb') as smpl_file:
data = pickle.load(smpl_file, encoding='latin1')
self.faces = data['f']
self.register_buffer('faces_tensor',
to_tensor(to_np(self.faces, dtype=np.int64),
dtype=torch.long))
# Pose blend shape basis: 6890 x 3 x 207, reshaped to 6890*3 x 207
num_pose_basis = data['posedirs'].shape[-1]
# 207 x 20670
posedirs = data['posedirs']
data['posedirs'] = np.reshape(data['posedirs'], [-1, num_pose_basis]).T
for key in ['J_regressor', 'v_template', 'weights', 'posedirs', 'shapedirs']:
val = to_tensor(to_np(data[key]), dtype=dtype)
self.register_buffer(key, val)
# indices of parents for each joints
parents = to_tensor(to_np(data['kintree_table'][0])).long()
parents[0] = -1
self.register_buffer('parents', parents)
# joints regressor
if regressor_path is not None:
X_regressor = to_tensor(np.load(regressor_path))
X_regressor = torch.cat((self.J_regressor, X_regressor), dim=0)
j_J_regressor = torch.zeros(24, X_regressor.shape[0], device=device)
for i in range(24):
j_J_regressor[i, i] = 1
j_v_template = X_regressor @ self.v_template
#
j_shapedirs = torch.einsum('vij,kv->kij', [self.shapedirs, X_regressor])
# (25, 24)
j_weights = X_regressor @ self.weights
j_posedirs = torch.einsum('ab, bde->ade', [X_regressor, torch.Tensor(posedirs)]).numpy()
j_posedirs = np.reshape(j_posedirs, [-1, num_pose_basis]).T
j_posedirs = to_tensor(j_posedirs)
self.register_buffer('j_posedirs', j_posedirs)
self.register_buffer('j_shapedirs', j_shapedirs)
self.register_buffer('j_weights', j_weights)
self.register_buffer('j_v_template', j_v_template)
self.register_buffer('j_J_regressor', j_J_regressor)
def forward(self, poses, shapes, Rh=None, Th=None, return_verts=True, return_tensor=True, only_shape=False, **kwargs):
""" Forward pass for SMPL model
Args:
poses (n, 72)
shapes (n, 10)
Rh (n, 3): global orientation
Th (n, 3): global translation
return_verts (bool, optional): if True return (6890, 3). Defaults to False.
"""
if 'torch' not in str(type(poses)):
dtype, device = self.dtype, self.device
poses = to_tensor(poses, dtype, device)
shapes = to_tensor(shapes, dtype, device)
Rh = to_tensor(Rh, dtype, device)
Th = to_tensor(Th, dtype, device)
bn = poses.shape[0]
if Rh is None:
Rh = torch.zeros(bn, 3, device=poses.device)
rot = batch_rodrigues(Rh)
transl = Th.unsqueeze(dim=1)
if shapes.shape[0] < bn:
shapes = shapes.expand(bn, -1)
if return_verts:
vertices, joints = lbs(shapes, poses, self.v_template,
self.shapedirs, self.posedirs,
self.J_regressor, self.parents,
self.weights, pose2rot=True, dtype=self.dtype)
else:
vertices, joints = lbs(shapes, poses, self.j_v_template,
self.j_shapedirs, self.j_posedirs,
self.j_J_regressor, self.parents,
self.j_weights, pose2rot=True, dtype=self.dtype, only_shape=only_shape)
vertices = vertices[:, 24:, :]
vertices = torch.matmul(vertices, rot.transpose(1, 2)) + transl
if not return_tensor:
vertices = vertices.detach().cpu().numpy()
return vertices

View File

@ -0,0 +1,42 @@
'''
@ Date: 2020-11-20 13:34:54
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 20:09:40
@ FilePath: /EasyMocap/code/smplmodel/body_param.py
'''
import numpy as np
def merge_params(param_list, share_shape=True):
output = {}
for key in ['poses', 'shapes', 'Rh', 'Th']:
output[key] = np.vstack([v[key] for v in param_list])
if share_shape:
output['shapes'] = output['shapes'].mean(axis=0, keepdims=True)
return output
def select_nf(params_all, nf):
output = {}
for key in ['poses', 'Rh', 'Th']:
output[key] = params_all[key][nf:nf+1, :]
if params_all['shapes'].shape[0] == 1:
output['shapes'] = params_all['shapes']
else:
output['shapes'] = params_all['shapes'][nf:nf+1, :]
return output
def init_params(nFrames=1):
params = {
'poses': np.zeros((nFrames, 72)),
'shapes': np.zeros((1, 10)),
'Rh': np.zeros((nFrames, 3)),
'Th': np.zeros((nFrames, 3)),
}
return params
class Config:
OPT_R = False
OPT_T = False
OPT_POSE = False
OPT_SHAPE = False
VERBOSE = False

378
code/smplmodel/lbs.py Normal file
View File

@ -0,0 +1,378 @@
# -*- coding: utf-8 -*-
# Max-Planck-Gesellschaft zur Förderung der Wissenschaften e.V. (MPG) is
# holder of all proprietary rights on this computer program.
# You can only use this computer program if you have closed
# a license agreement with MPG or you get the right to use the computer
# program from someone who is authorized to grant you that right.
# Any use of the computer program without a valid license is prohibited and
# liable to prosecution.
#
# Copyright©2019 Max-Planck-Gesellschaft zur Förderung
# der Wissenschaften e.V. (MPG). acting on behalf of its Max Planck Institute
# for Intelligent Systems. All rights reserved.
#
# Contact: ps-license@tuebingen.mpg.de
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import numpy as np
import torch
import torch.nn.functional as F
def rot_mat_to_euler(rot_mats):
# Calculates rotation matrix to euler angles
# Careful for extreme cases of eular angles like [0.0, pi, 0.0]
sy = torch.sqrt(rot_mats[:, 0, 0] * rot_mats[:, 0, 0] +
rot_mats[:, 1, 0] * rot_mats[:, 1, 0])
return torch.atan2(-rot_mats[:, 2, 0], sy)
def find_dynamic_lmk_idx_and_bcoords(vertices, pose, dynamic_lmk_faces_idx,
dynamic_lmk_b_coords,
neck_kin_chain, dtype=torch.float32):
''' Compute the faces, barycentric coordinates for the dynamic landmarks
To do so, we first compute the rotation of the neck around the y-axis
and then use a pre-computed look-up table to find the faces and the
barycentric coordinates that will be used.
Special thanks to Soubhik Sanyal (soubhik.sanyal@tuebingen.mpg.de)
for providing the original TensorFlow implementation and for the LUT.
Parameters
----------
vertices: torch.tensor BxVx3, dtype = torch.float32
The tensor of input vertices
pose: torch.tensor Bx(Jx3), dtype = torch.float32
The current pose of the body model
dynamic_lmk_faces_idx: torch.tensor L, dtype = torch.long
The look-up table from neck rotation to faces
dynamic_lmk_b_coords: torch.tensor Lx3, dtype = torch.float32
The look-up table from neck rotation to barycentric coordinates
neck_kin_chain: list
A python list that contains the indices of the joints that form the
kinematic chain of the neck.
dtype: torch.dtype, optional
Returns
-------
dyn_lmk_faces_idx: torch.tensor, dtype = torch.long
A tensor of size BxL that contains the indices of the faces that
will be used to compute the current dynamic landmarks.
dyn_lmk_b_coords: torch.tensor, dtype = torch.float32
A tensor of size BxL that contains the indices of the faces that
will be used to compute the current dynamic landmarks.
'''
batch_size = vertices.shape[0]
aa_pose = torch.index_select(pose.view(batch_size, -1, 3), 1,
neck_kin_chain)
rot_mats = batch_rodrigues(
aa_pose.view(-1, 3), dtype=dtype).view(batch_size, -1, 3, 3)
rel_rot_mat = torch.eye(3, device=vertices.device,
dtype=dtype).unsqueeze_(dim=0)
for idx in range(len(neck_kin_chain)):
rel_rot_mat = torch.bmm(rot_mats[:, idx], rel_rot_mat)
y_rot_angle = torch.round(
torch.clamp(-rot_mat_to_euler(rel_rot_mat) * 180.0 / np.pi,
max=39)).to(dtype=torch.long)
neg_mask = y_rot_angle.lt(0).to(dtype=torch.long)
mask = y_rot_angle.lt(-39).to(dtype=torch.long)
neg_vals = mask * 78 + (1 - mask) * (39 - y_rot_angle)
y_rot_angle = (neg_mask * neg_vals +
(1 - neg_mask) * y_rot_angle)
dyn_lmk_faces_idx = torch.index_select(dynamic_lmk_faces_idx,
0, y_rot_angle)
dyn_lmk_b_coords = torch.index_select(dynamic_lmk_b_coords,
0, y_rot_angle)
return dyn_lmk_faces_idx, dyn_lmk_b_coords
def vertices2landmarks(vertices, faces, lmk_faces_idx, lmk_bary_coords):
''' Calculates landmarks by barycentric interpolation
Parameters
----------
vertices: torch.tensor BxVx3, dtype = torch.float32
The tensor of input vertices
faces: torch.tensor Fx3, dtype = torch.long
The faces of the mesh
lmk_faces_idx: torch.tensor L, dtype = torch.long
The tensor with the indices of the faces used to calculate the
landmarks.
lmk_bary_coords: torch.tensor Lx3, dtype = torch.float32
The tensor of barycentric coordinates that are used to interpolate
the landmarks
Returns
-------
landmarks: torch.tensor BxLx3, dtype = torch.float32
The coordinates of the landmarks for each mesh in the batch
'''
# Extract the indices of the vertices for each face
# BxLx3
batch_size, num_verts = vertices.shape[:2]
device = vertices.device
lmk_faces = torch.index_select(faces, 0, lmk_faces_idx.view(-1)).view(
batch_size, -1, 3)
lmk_faces += torch.arange(
batch_size, dtype=torch.long, device=device).view(-1, 1, 1) * num_verts
lmk_vertices = vertices.view(-1, 3)[lmk_faces].view(
batch_size, -1, 3, 3)
landmarks = torch.einsum('blfi,blf->bli', [lmk_vertices, lmk_bary_coords])
return landmarks
def lbs(betas, pose, v_template, shapedirs, posedirs, J_regressor, parents,
lbs_weights, pose2rot=True, dtype=torch.float32, only_shape=False):
''' Performs Linear Blend Skinning with the given shape and pose parameters
Parameters
----------
betas : torch.tensor BxNB
The tensor of shape parameters
pose : torch.tensor Bx(J + 1) * 3
The pose parameters in axis-angle format
v_template torch.tensor BxVx3
The template mesh that will be deformed
shapedirs : torch.tensor 1xNB
The tensor of PCA shape displacements
posedirs : torch.tensor Px(V * 3)
The pose PCA coefficients
J_regressor : torch.tensor JxV
The regressor array that is used to calculate the joints from
the position of the vertices
parents: torch.tensor J
The array that describes the kinematic tree for the model
lbs_weights: torch.tensor N x V x (J + 1)
The linear blend skinning weights that represent how much the
rotation matrix of each part affects each vertex
pose2rot: bool, optional
Flag on whether to convert the input pose tensor to rotation
matrices. The default value is True. If False, then the pose tensor
should already contain rotation matrices and have a size of
Bx(J + 1)x9
dtype: torch.dtype, optional
Returns
-------
verts: torch.tensor BxVx3
The vertices of the mesh after applying the shape and pose
displacements.
joints: torch.tensor BxJx3
The joints of the model
'''
batch_size = max(betas.shape[0], pose.shape[0])
device = betas.device
# Add shape contribution
v_shaped = v_template + blend_shapes(betas, shapedirs)
# Get the joints
# NxJx3 array
J = vertices2joints(J_regressor, v_shaped)
if only_shape:
return v_shaped, J
# 3. Add pose blend shapes
# N x J x 3 x 3
ident = torch.eye(3, dtype=dtype, device=device)
if pose2rot:
rot_mats = batch_rodrigues(
pose.view(-1, 3), dtype=dtype).view([batch_size, -1, 3, 3])
pose_feature = (rot_mats[:, 1:, :, :] - ident).view([batch_size, -1])
# (N x P) x (P, V * 3) -> N x V x 3
pose_offsets = torch.matmul(pose_feature, posedirs) \
.view(batch_size, -1, 3)
else:
pose_feature = pose[:, 1:].view(batch_size, -1, 3, 3) - ident
rot_mats = pose.view(batch_size, -1, 3, 3)
pose_offsets = torch.matmul(pose_feature.view(batch_size, -1),
posedirs).view(batch_size, -1, 3)
v_posed = pose_offsets + v_shaped
# 4. Get the global joint location
J_transformed, A = batch_rigid_transform(rot_mats, J, parents, dtype=dtype)
# 5. Do skinning:
# W is N x V x (J + 1)
W = lbs_weights.unsqueeze(dim=0).expand([batch_size, -1, -1])
# (N x V x (J + 1)) x (N x (J + 1) x 16)
num_joints = J_regressor.shape[0]
T = torch.matmul(W, A.view(batch_size, num_joints, 16)) \
.view(batch_size, -1, 4, 4)
homogen_coord = torch.ones([batch_size, v_posed.shape[1], 1],
dtype=dtype, device=device)
v_posed_homo = torch.cat([v_posed, homogen_coord], dim=2)
v_homo = torch.matmul(T, torch.unsqueeze(v_posed_homo, dim=-1))
verts = v_homo[:, :, :3, 0]
return verts, J_transformed
def vertices2joints(J_regressor, vertices):
''' Calculates the 3D joint locations from the vertices
Parameters
----------
J_regressor : torch.tensor JxV
The regressor array that is used to calculate the joints from the
position of the vertices
vertices : torch.tensor BxVx3
The tensor of mesh vertices
Returns
-------
torch.tensor BxJx3
The location of the joints
'''
return torch.einsum('bik,ji->bjk', [vertices, J_regressor])
def blend_shapes(betas, shape_disps):
''' Calculates the per vertex displacement due to the blend shapes
Parameters
----------
betas : torch.tensor Bx(num_betas)
Blend shape coefficients
shape_disps: torch.tensor Vx3x(num_betas)
Blend shapes
Returns
-------
torch.tensor BxVx3
The per-vertex displacement due to shape deformation
'''
# Displacement[b, m, k] = sum_{l} betas[b, l] * shape_disps[m, k, l]
# i.e. Multiply each shape displacement by its corresponding beta and
# then sum them.
blend_shape = torch.einsum('bl,mkl->bmk', [betas, shape_disps])
return blend_shape
def batch_rodrigues(rot_vecs, epsilon=1e-8, dtype=torch.float32):
''' Calculates the rotation matrices for a batch of rotation vectors
Parameters
----------
rot_vecs: torch.tensor Nx3
array of N axis-angle vectors
Returns
-------
R: torch.tensor Nx3x3
The rotation matrices for the given axis-angle parameters
'''
batch_size = rot_vecs.shape[0]
device = rot_vecs.device
angle = torch.norm(rot_vecs + 1e-8, dim=1, keepdim=True)
rot_dir = rot_vecs / angle
cos = torch.unsqueeze(torch.cos(angle), dim=1)
sin = torch.unsqueeze(torch.sin(angle), dim=1)
# Bx1 arrays
rx, ry, rz = torch.split(rot_dir, 1, dim=1)
K = torch.zeros((batch_size, 3, 3), dtype=dtype, device=device)
zeros = torch.zeros((batch_size, 1), dtype=dtype, device=device)
K = torch.cat([zeros, -rz, ry, rz, zeros, -rx, -ry, rx, zeros], dim=1) \
.view((batch_size, 3, 3))
ident = torch.eye(3, dtype=dtype, device=device).unsqueeze(dim=0)
rot_mat = ident + sin * K + (1 - cos) * torch.bmm(K, K)
return rot_mat
def transform_mat(R, t):
''' Creates a batch of transformation matrices
Args:
- R: Bx3x3 array of a batch of rotation matrices
- t: Bx3x1 array of a batch of translation vectors
Returns:
- T: Bx4x4 Transformation matrix
'''
# No padding left or right, only add an extra row
return torch.cat([F.pad(R, [0, 0, 0, 1]),
F.pad(t, [0, 0, 0, 1], value=1)], dim=2)
def batch_rigid_transform(rot_mats, joints, parents, dtype=torch.float32):
"""
Applies a batch of rigid transformations to the joints
Parameters
----------
rot_mats : torch.tensor BxNx3x3
Tensor of rotation matrices
joints : torch.tensor BxNx3
Locations of joints
parents : torch.tensor BxN
The kinematic tree of each object
dtype : torch.dtype, optional:
The data type of the created tensors, the default is torch.float32
Returns
-------
posed_joints : torch.tensor BxNx3
The locations of the joints after applying the pose rotations
rel_transforms : torch.tensor BxNx4x4
The relative (with respect to the root joint) rigid transformations
for all the joints
"""
joints = torch.unsqueeze(joints, dim=-1)
rel_joints = joints.clone()
rel_joints[:, 1:] -= joints[:, parents[1:]]
transforms_mat = transform_mat(
rot_mats.view(-1, 3, 3),
rel_joints.contiguous().view(-1, 3, 1)).view(-1, joints.shape[1], 4, 4)
transform_chain = [transforms_mat[:, 0]]
for i in range(1, parents.shape[0]):
# Subtract the joint location at the rest pose
# No need for rotation, since it's identity when at rest
curr_res = torch.matmul(transform_chain[parents[i]],
transforms_mat[:, i])
transform_chain.append(curr_res)
transforms = torch.stack(transform_chain, dim=1)
# The last column of the transformations contains the posed joints
posed_joints = transforms[:, :, :3, 3]
# The last column of the transformations contains the posed joints
posed_joints = transforms[:, :, :3, 3]
joints_homogen = F.pad(joints, [0, 0, 0, 1])
rel_transforms = transforms - F.pad(
torch.matmul(transforms, joints_homogen), [3, 0, 0, 0, 0, 0, 0, 0])
return posed_joints, rel_transforms

294
code/visualize/renderer.py Normal file
View File

@ -0,0 +1,294 @@
import os
import numpy as np
import cv2
import pyrender
import trimesh
import copy
# 这个顺序是BGR的。虽然render的使用的是RGB的但是由于和图像拼接了所以又变成BGR的了
colors = [
# (0.5, 0.2, 0.2, 1.), # Defalut BGR
(.5, .5, .7, 1.), # Pink BGR
(.44, .50, .98, 1.), # Red
(.7, .7, .6, 1.), # Neutral
(.5, .5, .7, 1.), # Blue
(.5, .55, .3, 1.), # capsule
(.3, .5, .55, 1.), # Yellow
# (.6, .6, .6, 1.), # gray
(.9, 1., 1., 1.),
(0.95, 0.74, 0.65, 1.),
(.9, .7, .7, 1.)
]
colors_table = {
# colorblind/print/copy safe:
'_blue': [0.65098039, 0.74117647, 0.85882353],
'_pink': [.9, .7, .7],
'_mint': [ 166/255., 229/255., 204/255.],
'_mint2': [ 202/255., 229/255., 223/255.],
'_green': [ 153/255., 216/255., 201/255.],
'_green2': [ 171/255., 221/255., 164/255.],
'_red': [ 251/255., 128/255., 114/255.],
'_orange': [ 253/255., 174/255., 97/255.],
'_yellow': [ 250/255., 230/255., 154/255.],
'r':[255/255,0,0],
'g':[0,255/255,0],
'b':[0,0,255/255],
'k':[0,0,0],
'y':[255/255,255/255,0],
'purple':[128/255,0,128/255]
}
def get_colors(pid):
if isinstance(pid, int):
return colors[pid % len(colors)]
elif isinstance(pid, str):
return colors_table[pid]
from pyrender import RenderFlags
render_flags = {
'flip_wireframe': False,
'all_wireframe': False,
'all_solid': True,
'shadows': True,
'vertex_normals': False,
'face_normals': False,
'cull_faces': False, # set to False
'point_size': 1.0,
'rgba':True
}
flags = RenderFlags.NONE
if render_flags['flip_wireframe']:
flags |= RenderFlags.FLIP_WIREFRAME
elif render_flags['all_wireframe']:
flags |= RenderFlags.ALL_WIREFRAME
elif render_flags['all_solid']:
flags |= RenderFlags.ALL_SOLID
if render_flags['shadows']:
flags |= RenderFlags.SHADOWS_DIRECTIONAL | RenderFlags.SHADOWS_SPOT
if render_flags['vertex_normals']:
flags |= RenderFlags.VERTEX_NORMALS
if render_flags['face_normals']:
flags |= RenderFlags.FACE_NORMALS
if not render_flags['cull_faces']:
flags |= RenderFlags.SKIP_CULL_FACES
if render_flags['rgba']:
flags |= RenderFlags.RGBA
class Renderer(object):
def __init__(self, focal_length=1000, height=512, width=512, faces=None,
bg_color=[0.0, 0.0, 0.0, 0.0] # render 配置
):
self.renderer = pyrender.OffscreenRenderer(height, width)
self.faces = faces
self.focal_length = focal_length
self.bg_color = bg_color
self.ambient_light = (0.3, 0.3, 0.3)
def add_light(self, scene):
trans = [0, 0, 0]
# Use 3 directional lights
# Create light source
light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1)
light_pose = np.eye(4)
light_pose[:3, 3] = np.array([0, -1, 1]) + trans
scene.add(light, pose=light_pose)
light_pose[:3, 3] = np.array([0, 1, 1]) + trans
scene.add(light, pose=light_pose)
light_pose[:3, 3] = np.array([1, 1, 2]) + trans
scene.add(light, pose=light_pose)
def render(self, render_data, cameras, images,
use_white=False,
ret_depth=False, ret_color=False):
# Need to flip x-axis
rot = trimesh.transformations.rotation_matrix(
np.radians(180), [1, 0, 0])
output_images, output_colors, output_depths = [], [], []
for nv, img_ in enumerate(images):
if use_white:
img = np.zeros_like(img_, dtype=np.uint8) + 255
else:
img = img_.copy()
K, R, T = cameras['K'][nv], cameras['R'][nv], cameras['T'][nv]
self.renderer.viewport_height = img.shape[0]
self.renderer.viewport_width = img.shape[1]
scene = pyrender.Scene(bg_color=self.bg_color,
ambient_light=self.ambient_light)
for trackId, data in render_data.items():
vert = data['vertices'].copy()
faces = data['faces']
# 如果使用了vid这个键那么可视化的颜色使用vid的颜色
col = get_colors(data.get('vid', trackId))
vert = vert @ R.T + T.T
mesh = trimesh.Trimesh(vert, faces)
mesh.apply_transform(rot)
material = pyrender.MetallicRoughnessMaterial(
metallicFactor=0.0,
alphaMode='OPAQUE',
baseColorFactor=col)
mesh = pyrender.Mesh.from_trimesh(
mesh,
material=material)
scene.add(mesh, 'mesh')
camera_pose = np.eye(4)
camera = pyrender.camera.IntrinsicsCamera(fx=K[0, 0], fy=K[1, 1], cx=K[0, 2], cy=K[1, 2])
scene.add(camera, pose=camera_pose)
self.add_light(scene)
# Alpha channel was not working previously need to check again
# Until this is fixed use hack with depth image to get the opacity
rend_rgba, rend_depth = self.renderer.render(scene, flags=flags)
if rend_rgba.shape[2] == 3: # fail to generate transparent channel
valid_mask = (rend_depth > 0)[:, :, None]
rend_rgba = np.dstack((rend_rgba, (valid_mask*255).astype(np.uint8)))
rend_cat = cv2.addWeighted(cv2.bitwise_and(img, 255 - rend_rgba[:, :, 3:4].repeat(3, 2)), 1, rend_rgba[:, :, :3], 1, 0)
output_colors.append(rend_rgba)
output_depths.append(rend_depth)
output_images.append(rend_cat)
if ret_depth:
return output_images, output_depths
elif ret_color:
return output_colors
else:
return output_images
def render_multiview(self, vertices, K, R, T, imglist, trackId=0, return_depth=False, return_color=False,
bg_color=[0.0, 0.0, 0.0, 0.0], camera=None):
# List to store rendered scenes
output_images, output_colors, output_depths = [], [], []
# Need to flip x-axis
rot = trimesh.transformations.rotation_matrix(
np.radians(180), [1, 0, 0])
nViews = len(imglist)
for nv in range(nViews):
img = imglist[nv]
self.renderer.viewport_height = img.shape[0]
self.renderer.viewport_width = img.shape[1]
# Create a scene for each image and render all meshes
scene = pyrender.Scene(bg_color=bg_color,
ambient_light=(0.3, 0.3, 0.3))
camera_pose = np.eye(4)
# for every person in the scene
if isinstance(vertices, dict):
for trackId, data in vertices.items():
vert = data['vertices'].copy()
faces = data['faces']
col = data.get('col', trackId)
vert = vert @ R[nv].T + T[nv]
mesh = trimesh.Trimesh(vert, faces)
mesh.apply_transform(rot)
trans = [0, 0, 0]
material = pyrender.MetallicRoughnessMaterial(
metallicFactor=0.0,
alphaMode='OPAQUE',
baseColorFactor=colors[col % len(colors)])
mesh = pyrender.Mesh.from_trimesh(
mesh,
material=material)
scene.add(mesh, 'mesh')
else:
verts = vertices @ R[nv].T + T[nv]
mesh = trimesh.Trimesh(verts, self.faces)
mesh.apply_transform(rot)
trans = [0, 0, 0]
material = pyrender.MetallicRoughnessMaterial(
metallicFactor=0.0,
alphaMode='OPAQUE',
baseColorFactor=colors[trackId % len(colors)])
mesh = pyrender.Mesh.from_trimesh(
mesh,
material=material)
scene.add(mesh, 'mesh')
if camera is not None:
light = pyrender.PointLight(color=[1.0, 1.0, 1.0], intensity=70)
light_pose = np.eye(4)
light_pose[:3, 3] = [0, 0, 4.5]
scene.add(light, pose=light_pose)
light_pose[:3, 3] = [0, 1, 4]
scene.add(light, pose=light_pose)
light_pose[:3, 3] = [0, -1, 4]
scene.add(light, pose=light_pose)
else:
trans = [0, 0, 0]
# Use 3 directional lights
# Create light source
light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1)
light_pose = np.eye(4)
light_pose[:3, 3] = np.array([0, -1, 1]) + trans
scene.add(light, pose=light_pose)
light_pose[:3, 3] = np.array([0, 1, 1]) + trans
scene.add(light, pose=light_pose)
light_pose[:3, 3] = np.array([1, 1, 2]) + trans
scene.add(light, pose=light_pose)
if camera is None:
if K is None:
camera_center = np.array([img.shape[1] / 2., img.shape[0] / 2.])
camera = pyrender.camera.IntrinsicsCamera(fx=self.focal_length, fy=self.focal_length, cx=camera_center[0], cy=camera_center[1])
else:
camera = pyrender.camera.IntrinsicsCamera(fx=K[nv][0, 0], fy=K[nv][1, 1], cx=K[nv][0, 2], cy=K[nv][1, 2])
scene.add(camera, pose=camera_pose)
# Alpha channel was not working previously need to check again
# Until this is fixed use hack with depth image to get the opacity
color, rend_depth = self.renderer.render(scene, flags=flags)
# color = color[::-1,::-1]
# rend_depth = rend_depth[::-1,::-1]
output_depths.append(rend_depth)
color = color.astype(np.uint8)
valid_mask = (rend_depth > 0)[:, :, None]
if color.shape[2] == 3: # 在服务器上透明通道失败
color = np.dstack((color, (valid_mask*255).astype(np.uint8)))
output_colors.append(color)
output_img = (color[:, :, :3] * valid_mask +
(1 - valid_mask) * img)
output_img = output_img.astype(np.uint8)
output_images.append(output_img)
if return_depth:
return output_images, output_depths
elif return_color:
return output_colors
else:
return output_images
def render_results(img, render_data, cam_params, outname=None, rotate=False, degree=90, axis=[1.,0.,0],
fix_center=None):
render_data = copy.deepcopy(render_data)
render = Renderer(height=1024, width=1024, faces=None)
Ks, Rs, Ts = [cam_params['K']], [cam_params['Rc']], [cam_params['Tc']]
imgsrender = render.render_multiview(render_data, Ks, Rs, Ts, [img], return_color=True)[0]
render0 = cv2.addWeighted(cv2.bitwise_and(img, 255 - imgsrender[:, :, 3:4].repeat(3, 2)), 1, imgsrender[:, :, :3], 1, 0.0)
if rotate:
# simple rotate the vertices
if fix_center is None:
center = np.mean(np.vstack([v['vertices'] for i, v in render_data.items()]), axis=0, keepdims=True)
new_center = center.copy()
new_center[:, 0:2] = 0
else:
center = fix_center.copy()
new_center = fix_center.copy()
new_center[:, 2] *= 1.5
direc = np.array(axis)
rot, _ = cv2.Rodrigues(direc*degree/90*np.pi/2)
for key in render_data.keys():
vertices = render_data[key]['vertices']
vert = (vertices - center) @ rot.T + new_center
render_data[key]['vertices'] = vert
blank = np.zeros(())
blank = np.zeros((img.shape[0], img.shape[1], 3), dtype=img.dtype) + 255
imgsrender = render.render_multiview(render_data, Ks, Rs, Ts, [blank], return_color=True)[0]
render1 = cv2.addWeighted(cv2.bitwise_and(blank, 255- imgsrender[:, :, 3:4].repeat(3, 2)), 1, imgsrender[:, :, :3], 1, 0.0)
render0 = np.vstack([render0, render1])
if outname is not None:
os.makedirs(os.path.dirname(outname), exist_ok=True)
cv2.imwrite(outname, render0)
return render0

View File

@ -0,0 +1,7 @@
'''
@ Date: 2021-01-14 21:13:05
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 21:13:06
@ FilePath: /EasyMocapRelease/scripts/postprocess/export_bvh.py
'''

View File

@ -0,0 +1,163 @@
'''
@ Date: 2021-01-13 20:38:33
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-01-14 16:59:06
@ FilePath: /EasyMocapRelease/scripts/preprocess/extract_video.py
'''
import os
import cv2
from os.path import join
from tqdm import tqdm
from glob import glob
import numpy as np
mkdir = lambda x: os.makedirs(x, exist_ok=True)
def extract_video(videoname, path, start=0, end=10000, step=1):
base = os.path.basename(videoname).replace('.mp4', '')
if not os.path.exists(videoname):
return base
video = cv2.VideoCapture(videoname)
outpath = join(path, 'images', base)
if os.path.exists(outpath) and len(os.listdir(outpath)) > 0:
return base
else:
os.makedirs(outpath)
totalFrames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
for cnt in tqdm(range(totalFrames)):
ret, frame = video.read()
if cnt < start:continue
if cnt > end:break
if not ret:break
cv2.imwrite(join(outpath, '{:06d}.jpg'.format(cnt)), frame)
video.release()
return base
def extract_2d(openpose, image, keypoints, render):
if not os.path.exists(keypoints):
cmd = './build/examples/openpose/openpose.bin --image_dir {} --write_json {} --display 0'.format(image, keypoints)
if args.handface:
cmd = cmd + ' --hand --face'
if args.render:
cmd = cmd + ' --write_images {}'.format(render)
else:
cmd = cmd + ' --render_pose 0'
os.chdir(openpose)
os.system(cmd)
import json
def read_json(path):
with open(path) as f:
data = json.load(f)
return data
def save_json(file, data):
if not os.path.exists(os.path.dirname(file)):
os.makedirs(os.path.dirname(file))
with open(file, 'w') as f:
json.dump(data, f, indent=4)
def create_annot_file(annotname, imgname):
assert os.path.exists(imgname), imgname
img = cv2.imread(imgname)
height, width = img.shape[0], img.shape[1]
imgnamesep = imgname.split(os.sep)
filename = os.sep.join(imgnamesep[imgnamesep.index('images'):])
annot = {
'filename':filename,
'height':height,
'width':width,
'annots': [],
'isKeyframe': False
}
save_json(annotname, annot)
return annot
def bbox_from_openpose(keypoints, rescale=1.2, detection_thresh=0.01):
"""Get center and scale for bounding box from openpose detections."""
valid = keypoints[:,-1] > detection_thresh
valid_keypoints = keypoints[valid][:,:-1]
center = valid_keypoints.mean(axis=0)
bbox_size = valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)
# adjust bounding box tightness
bbox_size = bbox_size * rescale
bbox = [
center[0] - bbox_size[0]/2,
center[1] - bbox_size[1]/2,
center[0] + bbox_size[0]/2,
center[1] + bbox_size[1]/2,
keypoints[valid, :2].mean()
]
return bbox
def load_openpose(opname):
mapname = {'face_keypoints_2d':'face2d', 'hand_left_keypoints_2d':'handl2d', 'hand_right_keypoints_2d':'handr2d'}
assert os.path.exists(opname), opname
data = read_json(opname)
out = []
pid = 0
for i, d in enumerate(data['people']):
keypoints = d['pose_keypoints_2d']
keypoints = np.array(keypoints).reshape(-1, 3)
annot = {
'bbox': bbox_from_openpose(keypoints),
'personID': pid + i,
'keypoints': keypoints.tolist(),
'isKeyframe': False
}
for key in ['face_keypoints_2d', 'hand_left_keypoints_2d', 'hand_right_keypoints_2d']:
if len(d[key]) == 0:
continue
kpts = np.array(d[key]).reshape(-1, 3)
annot[mapname[key]] = kpts.tolist()
out.append(annot)
return out
def convert_from_openpose(src, dst):
# convert the 2d pose from openpose
inputlist = sorted(os.listdir(src))
for inp in tqdm(inputlist):
annots = load_openpose(join(src, inp))
base = inp.replace('_keypoints.json', '')
annotname = join(dst, base+'.json')
imgname = annotname.replace('annots', 'images').replace('.json', '.jpg')
if not os.path.exists(imgname):
os.remove(join(src, inp))
continue
annot = create_annot_file(annotname, imgname)
annot['annots'] = annots
save_json(annotname, annot)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('path', type=str, default=None)
parser.add_argument('--handface', action='store_true')
parser.add_argument('--openpose', type=str,
default='/media/qing/Project/openpose')
parser.add_argument('--render', action='store_true', help='use to render the openpose 2d')
parser.add_argument('--no2d', action='store_true')
parser.add_argument('--debug', action='store_true')
args = parser.parse_args()
if os.path.isdir(args.path):
videos = sorted(glob(join(args.path, 'videos', '*.mp4')))
subs = []
for video in videos:
basename = extract_video(video, args.path)
subs.append(basename)
if not args.no2d:
os.makedirs(join(args.path, 'openpose'), exist_ok=True)
for sub in subs:
annot_root = join(args.path, 'annots', sub)
if os.path.exists(annot_root):
continue
extract_2d(args.openpose, join(args.path, 'images', sub),
join(args.path, 'openpose', sub),
join(args.path, 'openpose_render', sub))
convert_from_openpose(
src=join(args.path, 'openpose', sub),
dst=annot_root
)
else:
print(args.path, ' not exists')