diff --git a/myeasymocap/datasets/basedata.py b/myeasymocap/datasets/basedata.py index b471109..d28ac38 100644 --- a/myeasymocap/datasets/basedata.py +++ b/myeasymocap/datasets/basedata.py @@ -26,7 +26,7 @@ class ImageDataBase: def try_to_extract_images(self, root, value): if not os.path.exists(os.path.join(root, value['root'])) and os.path.exists(os.path.join(root, 'videos')): print('[{}] Cannot find the images but find the videos, try to extract it'.format(self.__class__.__name__)) - for videoname in os.listdir(os.path.join(root, 'videos')): + for videoname in sorted(os.listdir(os.path.join(root, 'videos'))): videoext = '.' + videoname.split('.')[-1] outdir = join(root, value['root'], videoname.replace(videoext, '')) os.makedirs(outdir, exist_ok=True) diff --git a/myeasymocap/datasets/mv1p.py b/myeasymocap/datasets/mv1p.py index c8291f7..e1f20c7 100644 --- a/myeasymocap/datasets/mv1p.py +++ b/myeasymocap/datasets/mv1p.py @@ -174,6 +174,11 @@ class MVDataset(ImageDataBase): self.filter = filter if len(self.subs) == 0: self.subs = self.meta['subs'] + if len(self.subs_vis) == 1: + if self.subs_vis[0] == '_all_': + self.subs_vis = self.subs + elif self.subs_vis[0] == '_sample_4_': + self.subs_vis = [self.subs[0], self.subs[len(self.subs)//3], self.subs[(len(self.subs)*2//3)], self.subs[-1]] self.check_frames_length() @staticmethod @@ -265,6 +270,11 @@ class MVDataset(ImageDataBase): def check(self, index): raise NotImplementedError + + def __str__(self) -> str: + pre = super().__str__() + pre += ''' subs_vis: {}'''.format(self.subs_vis) + return pre class MVMP(MVDataset): def read_annots(self, annotnames): diff --git a/myeasymocap/io/model.py b/myeasymocap/io/model.py index ce9477a..9e71753 100644 --- a/myeasymocap/io/model.py +++ b/myeasymocap/io/model.py @@ -43,8 +43,11 @@ class SMPLLoader: 'body_model': self.smplmodel, 'model': self.forward} - def forward(self, params): - keypoints = self.smplmodel.keypoints(params, return_tensor=True) + def forward(self, params, ret_vertices=False): + if ret_vertices: + keypoints = self.smplmodel.vertices(params, return_tensor=True) + else: + keypoints = self.smplmodel.keypoints(params, return_tensor=True) ret = { 'keypoints': keypoints } diff --git a/myeasymocap/io/vis.py b/myeasymocap/io/vis.py index 619952e..924f740 100644 --- a/myeasymocap/io/vis.py +++ b/myeasymocap/io/vis.py @@ -57,13 +57,12 @@ class VisBase: self.count += 1 class Vis3D(VisBase): - def __init__(self, scale, lw_factor=1, name='repro', **kwargs) -> None: + def __init__(self, scale, lw_factor=1, name='vis_repro', **kwargs) -> None: super().__init__(scale, lw_factor, name, **kwargs) def __call__(self, images, cameras, keypoints3d=None, results=None): # keypoints3d: (nJoints, 4) undist = False - cameras['dist'] = np.zeros_like(cameras['dist']) vis_all = [] for nv in range(len(images)): if isinstance(images[nv], str): continue @@ -77,25 +76,28 @@ class Vis3D(VisBase): if results is None: if len(keypoints3d.shape) == 2: keypoints_repro, depth = projectPoints(keypoints3d, {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}) - plot_keypoints_auto(vis, keypoints_repro, pid=0, use_limb_color=False) + plot_keypoints_auto(vis, keypoints_repro, pid=0, use_limb_color=True) else: for pid in range(keypoints3d.shape[0]): keypoints_repro, depth = projectPoints(keypoints3d[pid], {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}) - plot_keypoints_auto(vis, keypoints_repro, pid=pid, use_limb_color=False) + if (depth < 0.5).all(): + continue + plot_keypoints_auto(vis, keypoints_repro, pid=pid, use_limb_color=True) else: for res in results: k3d = res['keypoints3d'] + k3d_rt = np.dot(k3d[:, :3], camera['R'].T) + camera['T'].T keypoints_repro, depth = projectPoints(k3d, camera) + depth = k3d_rt[..., -1] if k3d.shape[0] == 1: x, y = keypoints_repro[0,0], keypoints_repro[0,1] - # if res['id'] == 6: plot_cross(vis, x, y, col=get_rgb(res['id']), lw=self.lw, width=self.lw * 5) elif k3d.shape[0] == 2: # limb x1, y1 = keypoints_repro[0,0], keypoints_repro[0,1] x2, y2 = keypoints_repro[1,0], keypoints_repro[1,1] cv2.line(vis, (int(x1), int(y1)), (int(x2), int(y2)), get_rgb(res['id']), self.lw) else: - plot_keypoints_auto(vis, keypoints_repro, pid=res['id'], use_limb_color=False, lw_factor=self.lw) + plot_keypoints_auto(vis, keypoints_repro, pid=res['id'], use_limb_color=True, lw_factor=self.lw) cv2.putText(vis, '{}'.format(res['id']), (int(keypoints_repro[0,0]), int(keypoints_repro[0,1])), cv2.FONT_HERSHEY_SIMPLEX, 2, get_rgb(res['id']), self.lw) vis_all.append(vis) @@ -255,6 +257,8 @@ class Vis2D(VisBase): plot_bbox(vis_, bbox[nv], 0) else: for pid in range(k2d.shape[0]): - plot_keypoints_auto(vis_, k2d[pid], pid=pid, use_limb_color=False) + plot_keypoints_auto(vis_, k2d[pid], pid=pid, use_limb_color=True) + if bbox is not None: + plot_bbox(vis_, bbox[nv][pid], pid=pid) vis.append(vis_) self.merge_and_write(vis) \ No newline at end of file diff --git a/myeasymocap/io/vis3d.py b/myeasymocap/io/vis3d.py index f671351..5f48083 100644 --- a/myeasymocap/io/vis3d.py +++ b/myeasymocap/io/vis3d.py @@ -38,7 +38,7 @@ class Render(VisBase): self.merge_and_write([ret]) class Render_multiview(VisBase): - def __init__(self, view_list=[], name='render', model_name='body_model', render_mode='image', backend='pyrender', shape=[-1,-1], scale=1., **kwargs): + def __init__(self, view_list=[], name='vis_render', model_name='body_model', render_mode='image', backend='pyrender', shape=[-1,-1], scale=1., **kwargs): self.scale3d = scale super().__init__(name=name, scale=1., **kwargs) self.view_list = view_list @@ -46,65 +46,65 @@ class Render_multiview(VisBase): self.model_name = model_name self.shape = shape - def render_(self, vertices, faces, cameras, imgnames): - for nf, img in enumerate(tqdm(imgnames, desc=self.name)): - mv_ret = [] - if not isinstance(img, list): - img = [img] - for nv in self.view_list: - basename = os.path.basename(img[nv]) - assert os.path.exists(img[nv]), img[nv] - vis = cv2.imread(img[nv]) - vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d) - vert = vertices[nf] - meshes = {} - if vert.ndim == 2: - meshes[0] = { - 'vertices': vert, + def render_frame(self, imgname, vert, faces, cameras, pids=[]): + mv_ret = [] + if not isinstance(imgname, list): + imgname = [imgname] + for nv in self.view_list: + basename = os.path.basename(imgname[nv]) + assert os.path.exists(imgname[nv]), imgname[nv] + vis = cv2.imread(imgname[nv]) + vis = cv2.resize(vis, None, fx=self.scale3d, fy=self.scale3d) + meshes = {} + if vert.ndim == 2: + meshes[0] = { + 'vertices': vert, + 'faces': faces, + 'id': 0, + 'name': 'human_{}'.format(0) + } + elif vert.ndim == 3: + if len(pids) == 0: + pids = list(range(vert.shape[0])) + for ipid, pid in enumerate(pids): + meshes[pid] = { + 'vertices': vert[ipid], 'faces': faces, - 'id': 0, - 'name': 'human_{}'.format(0) + 'id': pid, + 'name': 'human_{}'.format(pid) } - elif vert.ndim == 3: - for pid in range(vert.shape[0]): - meshes[pid] = { - 'vertices': vert[pid], - 'faces': faces, - 'id': pid, - 'name': 'human_{}'.format(pid) - } - if cameras['K'].ndim == 4: - K = cameras['K'][nf][nv].copy() - K[:2, :] *= self.scale - R = cameras['R'][nf][nv] - T = cameras['T'][nf][nv] - else: - K = cameras['K'][nv].copy() - K[:2, :] *= self.scale3d - R = cameras['R'][nv] - T = cameras['T'][nv] - # add ground - if self.render_mode == 'ground': - from easymocap.visualize.geometry import create_ground - ground = create_ground( - center=[0, 0, -0.05], xdir=[1, 0, 0], ydir=[0, 1, 0], # 位置 - step=1, xrange=10, yrange=10, # 尺寸 - white=[1., 1., 1.], black=[0.5,0.5,0.5], # 颜色 - two_sides=True - ) - meshes[1001] = ground - vis = np.zeros((self.shape[0], self.shape[1], 3), dtype=np.uint8) + 255 - focal = min(self.shape) * 1.2 - K = np.array([ - [focal,0,vis.shape[0]/2], - [0,focal,vis.shape[1]/2], - [0,0,1]]) - ret = plot_meshes(vis, meshes, K, R, T, mode='rgb') - else: - ret = plot_meshes(vis, meshes, K, R, T, mode=self.render_mode) - ret = add_logo(ret) - mv_ret.append(ret) - self.merge_and_write(mv_ret) + K = cameras['K'][nv].copy() + K[:2, :] *= self.scale3d + R = cameras['R'][nv] + T = cameras['T'][nv] + # add ground + if self.render_mode == 'ground': + from easymocap.visualize.geometry import create_ground + ground = create_ground( + center=[0, 0, -0.05], xdir=[1, 0, 0], ydir=[0, 1, 0], # 位置 + step=1, xrange=10, yrange=10, # 尺寸 + white=[1., 1., 1.], black=[0.5,0.5,0.5], # 颜色 + two_sides=True + ) + meshes[1001] = ground + vis = np.zeros((self.shape[0], self.shape[1], 3), dtype=np.uint8) + 255 + focal = min(self.shape) * 1.2 + K = np.array([ + [focal,0,vis.shape[0]/2], + [0,focal,vis.shape[1]/2], + [0,0,1]]) + ret = plot_meshes(vis, meshes, K, R, T, mode='rgb') + else: + ret = plot_meshes(vis, meshes, K, R, T, mode=self.render_mode) + ret = add_logo(ret) + mv_ret.append(ret) + self.merge_and_write(mv_ret) + + def render_(self, vertices, faces, cameras, imgnames, pids=[]): + for nf, imgname in enumerate(tqdm(imgnames, desc=self.name)): + vert = vertices[nf] + camera_ = {cam: val[nf] for cam, val in cameras.items()} + self.render_frame(imgname, vert, faces, camera_, pids=pids) def __call__(self, params, cameras, imgnames, **kwargs): body_model = kwargs[self.model_name] @@ -112,6 +112,33 @@ class Render_multiview(VisBase): faces = body_model.faces self.render_(vertices, faces, cameras, imgnames) +class RenderAll_multiview(Render_multiview): + def __call__(self, results, cameras, imgnames, meta, **kwargs): + body_model = kwargs[self.model_name] + for index in tqdm(meta['index'], desc=self.name): + results_frame = [] + for pid, result in results.items(): + if index >= result['frames'][0] and index <= result['frames'][-1]: + frame_rel = result['frames'].index(index) + results_frame.append({ + 'id': pid, + }) + for key in ['Rh', 'Th', 'poses', 'shapes']: + if result['params'][key].shape[0] == 1: + results_frame[-1][key] = result['params'][key] + else: + results_frame[-1][key] = result['params'][key][frame_rel:frame_rel+1] + params = {} + for key in results_frame[0].keys(): + if key != 'id': + params[key] = np.concatenate([res[key] for res in results_frame], axis=0) + pids = [res['id'] for res in results_frame] + vertices = body_model.vertices(params, return_tensor=False) + camera_ = {cam: val[index] for cam, val in cameras.items()} + self.render_frame(imgnames[index], vertices, body_model.faces, camera_, pids=pids) + # self.render_frame(vertices, body_model.faces, camera_, imgnames[index], pids=pids) + # self.render_frame(vertices, body_model.faces, camera_, imgnames[index], pids=pids) + class Render_nocam: def __init__(self, scale=0.5, backend='pyrender',view_list=[0]) -> None: self.name = 'render' diff --git a/myeasymocap/io/write.py b/myeasymocap/io/write.py index 247cd4c..6a53683 100644 --- a/myeasymocap/io/write.py +++ b/myeasymocap/io/write.py @@ -1,5 +1,5 @@ import os -from easymocap.mytools.file_utils import write_keypoints3d, write_smpl +from easymocap.mytools.file_utils import write_keypoints3d, write_smpl, write_vertices from easymocap.annotator.file_utils import save_annot from os.path import join from tqdm import tqdm @@ -66,8 +66,10 @@ class Write2D: save_annot(dumpname, annots) class WriteSMPL: - def __init__(self, name='smpl') -> None: + def __init__(self, name='smpl', write_vertices=False) -> None: self.name = name + # TODO: make available + self.write_vertices = write_vertices def __call__(self, params=None, results=None, meta=None, model=None): results_all = [] @@ -91,7 +93,14 @@ class WriteSMPL: param = results_frame[-1] pred = model(param)['keypoints'][0] results_frame[-1]['keypoints3d'] = pred + if self.write_vertices: + vert = model(param, ret_vertices=True)['keypoints'][0] + results_frame[-1]['vertices'] = vert write_smpl(join(self.output, self.name, '{:06d}.json'.format(meta['frame'][index])), results_frame) write_keypoints3d(join(self.output, 'keypoints3d', '{:06d}.json'.format(meta['frame'][index])), results_frame) + if self.write_vertices: + write_vertices(join(self.output, 'vertices', '{:06d}.json'.format(meta['frame'][index])), results_frame) + for res in results_frame: + res.pop('vertices') results_all.append(results_frame) return {'results_perframe': results_all} \ No newline at end of file diff --git a/myeasymocap/operations/iterative_triangulate.py b/myeasymocap/operations/iterative_triangulate.py new file mode 100644 index 0000000..d79376f --- /dev/null +++ b/myeasymocap/operations/iterative_triangulate.py @@ -0,0 +1,137 @@ +import numpy as np +import itertools +from easymocap.mytools.triangulator import batch_triangulate, project_points +from easymocap.mytools.debug_utils import log, mywarn, myerror + +def project_and_distance(kpts3d, RT, kpts2d): + kpts_proj = project_points(kpts3d, RT) + # 1. distance between input and projection + conf = (kpts3d[None, :, -1] > 0) * (kpts2d[:, :, -1] > 0) + dist = np.linalg.norm(kpts_proj[..., :2] - kpts2d[..., :2], axis=-1) * conf + return kpts_proj[..., -1], dist, conf + +def remove_outview(kpts2d, out_view, debug): + if len(out_view) == 0: + return False + elif len(out_view) == 1: + # directly remove the outlier view + outv = out_view[0] + if debug: + log('[triangulate] remove outview: {}'.format(outv)) + else: + # only remove the first outlier view + outv = out_view[0] + if debug: + mywarn('[triangulate] remove first outview: {} from {}'.format(outv, out_view)) + kpts2d[outv] = 0. + return True + +def remove_outjoint(kpts2d, Pall, out_joint, dist_max, dist_track, min_view=3, previous=None, debug=False): + MIN_CONF_3D = 0.1 + if len(out_joint) == 0: + return False + if debug: + mywarn('[triangulate] remove outjoint: {}'.format(out_joint)) + nviews = np.arange(kpts2d.shape[0]) + for nj in out_joint: + valid = np.where(kpts2d[:, nj, -1] > 0)[0] + if len(valid) < min_view: + # if less than 3 visible view, set these unvisible + kpts2d[:, nj, -1] = 0 + continue + kpts_nj = kpts2d[valid, nj] + Pall_nj = Pall[valid] + view_index = nviews[valid] + view_local = np.arange(valid.shape[0]) + comb_views = np.array(list(itertools.combinations(view_local.tolist(), min_view))).T + comb_kpts = kpts_nj[comb_views] + comb_Pall = Pall_nj[comb_views] + comb_k3d = batch_triangulate(comb_kpts, comb_Pall) + depth, dist, conf = project_and_distance(comb_k3d, comb_Pall, comb_kpts) + # 依次选择置信度最高的 + sort_by_conf = (-comb_kpts[..., -1].sum(axis=0)).argsort() + flag = (dist[:, sort_by_conf] MIN_CONF_3D: + _dist3d = np.linalg.norm(previous[[nj], :3] - k3d[:, :3], axis=-1) * 1000 + log('[triangulate] {} distance 3d mm (max {}): {}'.format(nj, dist_track, _dist3d)) + if _dist3d > dist_track: + import ipdb; ipdb.set_trace() + set0 = np.zeros(kpts2d.shape[0]) + set0[valid_view] = 1. + kpts2d[:, nj, -1] *= set0 + return True + +def iterative_triangulate(kpts2d, RT, + min_conf=0.1, min_view=3, min_joints=3, dist_max=0.05, dist_track=50, + thres_outlier_view=0.4, thres_outlier_joint=0.4, debug=True, + previous=None, + **kwargs): + kpts2d = kpts2d.copy() + conf = kpts2d[..., -1] + kpts2d[conf dist_max) + if vv.shape[0] < 1: + if debug: + log('[triangulate] Not found outlier, break') + break + ratio_outlier_view = (dist>dist_max).sum(axis=1)/(1e-5 + (conf > 0.).sum(axis=1)) + ratio_outlier_joint = (dist>dist_max).sum(axis=0)/(1e-5 + (conf > 0.).sum(axis=0)) + # 3. find the totally wrong detections + out_view = np.where(ratio_outlier_view > thres_outlier_view)[0] + error_joint = dist.sum(axis=0)/(1e-5 + (conf > 0.).sum(axis=0)) + # for joint, we calculate the mean distance of this joint + out_joint = np.where((ratio_outlier_joint > thres_outlier_joint) & (error_joint > dist_max))[0] + if len(out_view) > 1: + # TODO: 如果全都小于0的话,相当于随机丢了,应该增加视角的置信度 + # 应该生成多个proposal;然后递归的去寻找 + # 不应该直接抛弃的 + # 如果有previous的情况,应该用previous来作为判断标准 + # cfg = dict(min_conf=min_conf, min_view=min_view, min_joints=min_joints, dist_max=dist_max, dist_track=dist_track, + # thres_outlier_view=thres_outlier_view, thres_outlier_joint=0.4, debug=True, previous=None) + if debug: mywarn('[triangulate] More than one outlier view: {}, stop triangulation.'.format(ratio_outlier_view)) + return kpts3d, np.zeros_like(kpts2d) + if debug: mywarn('[triangulate] Remove outlier view give outlier ratio: {}'.format(ratio_outlier_view)) + dist_view = dist.sum(axis=1)/(1e-5 + (conf > 0.).sum(axis=1)) + out_view = out_view.tolist() + out_view.sort(key=lambda x:-dist_view[x]) + if remove_outview(kpts2d, out_view, debug): continue + if len(out_joint) > 0: + if debug: + print(dist[:, out_joint]) + mywarn('[triangulate] Remove outlier joint {} given outlier ratio: {}'.format(out_joint, ratio_outlier_joint[out_joint])) + remove_outjoint(kpts2d, RT, out_joint, dist_max, dist_track, previous=previous, debug=debug) + continue + if debug: + log('[triangulate] Directly remove {}, {}'.format(vv, jj)) + kpts2d[vv, jj, -1] = 0. + if debug: + log('[triangulate] finally {} valid points, {} not valid'.format((kpts3d[..., -1]>0).sum(), np.where(kpts3d[..., -1]<=0)[0])) + if (kpts3d[..., -1]>0).sum() < min_joints: + kpts3d[..., -1] = 0. + kpts2d[..., -1] = 0. + return kpts3d, kpts2d + return kpts3d, kpts2d \ No newline at end of file diff --git a/myeasymocap/operations/match_base.py b/myeasymocap/operations/match_base.py new file mode 100644 index 0000000..9714f60 --- /dev/null +++ b/myeasymocap/operations/match_base.py @@ -0,0 +1,558 @@ +import numpy as np +import cv2 +from easymocap.mytools.camera_utils import Undistort +from easymocap.mytools.debug_utils import log, mywarn, myerror +from .iterative_triangulate import iterative_triangulate +from easymocap.mytools.triangulator import project_points +from easymocap.mytools.timer import Timer + +class DistanceBase: + # 这个类用于计算affinity + # 主要基于关键点计算;未来可以考虑支持其他东西 + def __init__(self, cfg) -> None: + self.cfg = cfg + + def calculate_affinity_MxM(self, keypoints, cameras): + raise NotImplementedError + + @staticmethod + def SimpleConstrain(dimGroups): + constrain = np.ones((dimGroups[-1], dimGroups[-1])) + for i in range(len(dimGroups)-1): + start, end = dimGroups[i], dimGroups[i+1] + constrain[start:end, start:end] = 0 + N = constrain.shape[0] + constrain[range(N), range(N)] = 1 + return constrain + +def skew_op(x): + res = np.zeros((3, 3), dtype=x.dtype) + # 0, -z, y + res[0, 1] = -x[2, 0] + res[0, 2] = x[1, 0] + # z, 0, -x + res[1, 0] = x[2, 0] + res[1, 2] = -x[0, 0] + # -y, x, 0 + res[2, 0] = -x[1, 0] + res[2, 1] = x[0, 0] + return res + +def fundamental_op(K0, K1, R_0, T_0, R_1, T_1): + invK0 = np.linalg.inv(K0) + return invK0.T @ (R_0 @ R_1.T) @ K1.T @ skew_op(K1 @ R_1 @ R_0.T @ (T_0 - R_0 @ R_1.T @ T_1)) + +class EpipolarDistance(DistanceBase): + @staticmethod + def distance2d2d(pts0, pts1, K0, K1, R0, T0, R1, T1): + F = fundamental_op(K0, K1, R0, T0, R1, T1) + # Find epilines corresponding to points in left image (first image) and + # drawing its lines on right image + lines0 = cv2.computeCorrespondEpilines(pts0[..., :2].reshape (-1,1,2), 2, F) + # Find epilines corresponding to points in right image (second image) and + # drawing its lines on left image + lines1 = cv2.computeCorrespondEpilines(pts1[..., :2].reshape(-1,1,2), 1, F) + lines0 = lines0.reshape(pts0.shape) + lines1 = lines1.reshape(pts1.shape) + # dist: (D_v0, D_v1, nJoints) + # TODO: / sqrt(A^2 + B^2) + dist01 = np.abs(np.sum(lines0[:, None, :, :2] * pts1[None, :, :, :2], axis=-1) + lines0[:, None, :, 2]) + conf = (pts0[:, None, :, 2] * pts1[None, :, :, 2]) > 0 + dist10 = np.abs(np.sum(lines1[:, None, :, :2] * pts0[None, :, :, :2], axis=-1) + lines1[:, None, :, 2]) + + dist = np.sum(dist01 * conf + dist10.transpose(1, 0, 2) * conf, axis=-1)/(conf.sum(axis=-1) + 1e-5)/2 + return dist + + def vis_affinity(self, aff, dimGroups, scale=10): + aff = cv2.resize(aff, (aff.shape[1]*scale, aff.shape[0]*scale), interpolation=cv2.INTER_NEAREST) + aff_float = aff.copy() + aff = (aff * 255).astype(np.uint8) + aff = cv2.applyColorMap(aff, cv2.COLORMAP_JET) + transp = (aff_float * 255).astype(np.float32) + for dim in dimGroups[1:-1]: + cv2.line(aff, (0, dim*scale), (aff.shape[0], dim*scale), (255, 255, 255), thickness=1) + cv2.line(aff, (dim*scale, 0), (dim*scale, aff.shape[0]), (255, 255, 255), thickness=1) + cv2.line(transp, (0, dim*scale), (aff.shape[0], dim*scale), (255,), thickness=1) + cv2.line(transp, (dim*scale, 0), (dim*scale, aff.shape[0]), (255,), thickness=1) + # last line + cv2.rectangle(aff, (0, 0), (aff.shape[0]-1, aff.shape[0]-1), (0, 0, 255), thickness=1) + cv2.rectangle(transp, (0, 0), (aff.shape[0]-1, aff.shape[0]-1), (255,), thickness=1) + aff = np.dstack([aff, transp]) + return aff + + def calculate_affinity_MxM(self, keypoints, cameras): + # 计算一下总长度 + dims = [d.shape[0] for d in keypoints] + dimGroups = np.cumsum([0] + dims) + M = dimGroups[-1] + distance = np.eye((M), dtype=np.float32) + nViews = len(keypoints) + for v0 in range(nViews-1): + # set the diag block + for v1 in range(1, nViews): + # calculate distance between (v0, v1) + if v0 >= v1: + continue + pts0 = keypoints[v0] + pts1 = keypoints[v1] + if pts0.shape[0] == 0 or pts1.shape[0] == 0: + continue + K0, K1 = cameras['K'][v0], cameras['K'][v1] # K0, K1: (3, 3) + R0, T0 = cameras['R'][v0], cameras['T'][v0] + R1, T1 = cameras['R'][v1], cameras['T'][v1] + dist = self.distance2d2d(pts0, pts1, K0, K1, R0, T0, R1, T1) + conf0 = pts0[..., -1] + conf1 = pts1[..., -1] + common_count = ((conf0[:, None] > 0) & (conf1[None] > 0)).sum(axis=-1) + common_affinity = np.sqrt(conf0[:, None] * conf1[None]) + dist /= (K0[0, 0] + K1[0, 0])/2 + dist[common_count < self.cfg.min_common_joints] = self.cfg.threshold * 10 + aff_geo = (self.cfg.threshold - dist)/self.cfg.threshold + aff_conf = common_affinity.mean(axis=-1) + aff_compose = aff_geo * aff_conf + distance[dimGroups[v0]:dimGroups[v0+1], dimGroups[v1]:dimGroups[v1+1]] = aff_compose + distance[dimGroups[v1]:dimGroups[v1+1], dimGroups[v0]:dimGroups[v0+1]] = aff_compose.T + + affinity = np.clip(distance, 0, 1) + return affinity, dimGroups + + def _calculate_affinity_MxN(self, keypoints3d, keypoints, cameras): + DEPTH_NEAR = 0.5 + dims = [d.shape[0] for d in keypoints] + dimGroups = np.cumsum([0] + dims) + M = dimGroups[-1] + N = keypoints3d.shape[0] + distance = np.zeros((M, N), dtype=np.float32) + nViews = len(keypoints) + kpts_proj = project_points(keypoints3d, cameras['P'], einsum='vab,pkb->vpka') + depth = kpts_proj[..., -1] + kpts_proj[depth None: + self.cfg = cfg + if cfg.distance.mode == 'epipolar': + self.distance = EpipolarDistance(cfg.distance) + else: + raise NotImplementedError + + def set_previous(self, previous): + prev_ids = [p['id'] for p in previous] + prev_keypoints = [p['keypoints3d'] for p in previous] + self.prev_ids = prev_ids + self.prev_keypoints = prev_keypoints + if len(prev_ids) > 0: + self.prev_keypoints = np.stack(prev_keypoints) + + @staticmethod + def undistort(points, cameras): + nViews = len(points) + pelvis_undis = [] + for nv in range(nViews): + K = cameras['K'][nv] + dist = cameras['dist'][nv] + points_nv = points[nv] + points_nv_flat = points_nv.reshape(-1, 3) + if points_nv_flat.shape[0] > 0: + points_nv_flat = Undistort.points(points_nv_flat, K, dist) + pelvis_undis.append(points_nv_flat.reshape(*points_nv.shape)) + return pelvis_undis + + def _prepare_associate(self, affinity, keypoints): + dimGroups = [0] + views = [] + nViews = len(keypoints) + affinity_sum = np.zeros((affinity.shape[0],)) + for nv in range(nViews): + dimGroups.append(dimGroups[-1] + keypoints[nv].shape[0]) + views.extend([nv] * keypoints[nv].shape[0]) + start, end = dimGroups[nv], dimGroups[nv+1] + if end > start: + affinity_sum += affinity[:, start:end].max(axis=-1) + return affinity_sum, dimGroups, views + + def try_to_triangulate(self, keypoints, cameras, indices, previous=None): + Pall, keypoints2d = [], [] + for nv in range(indices.shape[0]): + if indices[nv] == -1: + Pall.append(cameras['P'][nv]) + keypoints2d.append(np.zeros((25, 3), dtype=np.float32)) + # keypoints2d.append(keypoints[nv][indices[nv]]) + else: + Pall.append(cameras['P'][nv]) + keypoints2d.append(keypoints[nv][indices[nv]]) + Pall = np.stack(Pall) + keypoints2d = np.stack(keypoints2d) + if previous is not None: + kpts_proj = project_points(previous, cameras['P'], einsum='vab,kb->vka') + # 注意,这里需要考虑深度,因为深度是已知的 + # 越近的地方这个阈值应该越大,越远的地方阈值越小 + # radius / depth * focal + depth = kpts_proj[..., -1] + # 超出这个track阈值的直接丢掉了;这样可以保证三角化出来的一定是小于阈值的 + # 如果对这个阈值有意见,应该增大这个阈值条件 + radius = self.cfg.triangulate.dist_track * cameras['K'][:, 0, 0][:, None] / depth + dist = np.linalg.norm(kpts_proj[..., :2] - keypoints2d[..., :2], axis=-1) + conf = np.sqrt(kpts_proj[..., -1] * keypoints2d[..., -1]) > 0 + not_track = (dist > radius) & conf + if not_track.sum() > 0: + log('[Tri] {} 2d joints not tracked'.format(not_track.sum())) + keypoints2d[not_track] = 0. + keypoints3d, k2d = iterative_triangulate(keypoints2d, Pall, previous=previous, **self.cfg.triangulate) + not_valid_view = np.where((k2d[..., -1] < 0.1).all(axis=1))[0] + indices[not_valid_view] = -1 + result = { + 'keypoints3d': keypoints3d, + 'indices': indices, + 'keypoints2d': k2d + } + return result, indices + + @staticmethod + def _indices_from_affinity(dimGroups, affinit_row, assigned, visited, nViews): + proposals = [] + indices = np.zeros((nViews), dtype=np.int) - 1 + for nv in range(nViews): + start, end = dimGroups[nv], dimGroups[nv+1] + block = affinit_row[start:end] + to_select = np.where((block>0.1) & \ + (~assigned[start:end]) & \ + (~visited[start:end]))[0] + if to_select.shape[0] == 1: + # 只有唯一的一个候选 + indices[nv] = to_select[0] + elif to_select.shape[0] > 1: + to_select_sort = sorted(to_select, key=lambda x:-block[x]) + indices[nv] = to_select_sort[0] + for select_id in to_select_sort[1:]: + proposals.append((nv, select_id, block[select_id])) + elif to_select.shape[0] == 0: + # empty + pass + return indices, proposals + + def _check_indices(self, indices): + return (indices > -1).sum() >= self.cfg.triangulate.min_view_body + + def _simple_associate2d_triangulate(self, affinity, keypoints, cameras, assigned=None): + # sum1 = affinity.sum(axis=1) + # 注意:这里的排序应该是对每个视角,挑选最大的一个 + affinity_sum, dimGroups, views = self._prepare_associate(affinity, keypoints) + nViews = len(keypoints) + n2d = affinity.shape[0] + # the assigned results of each person + if assigned is None: + assigned = np.zeros(n2d, dtype=np.bool) + visited = np.zeros(n2d, dtype=np.bool) + sortidx = np.argsort(-affinity_sum) + k3dresults = [] + for idx in sortidx: + if assigned[idx]:continue + log('[Tri] Visited view{}: {}'.format(views[idx], idx-dimGroups[views[idx]])) + affinit_row = affinity[idx] + indices, proposals = self._indices_from_affinity(dimGroups, affinit_row, assigned, visited, nViews) + # 注意:要再生成所有的proposal之后再设置visited + visited[idx] = True + if not self._check_indices(indices):continue + # 只考虑有候选的;不考虑移除某个视角的 + log('[Tri] First try to triangulate of {}'.format(indices)) + indices_origin = indices.copy() + result, indices = self.try_to_triangulate(keypoints, cameras, indices) + if not self._check_indices(indices): + # if the proposals is valid + if len(proposals) > 0: + proposals.sort(key=lambda x:-x[2]) + for (nviews, select_id, conf) in proposals: + indices = indices_origin.copy() + indices[nviews] = select_id + log('[Tri] Max fail, then try to triangulate of {}'.format(indices)) + result, indices = self.try_to_triangulate(keypoints, cameras, indices) + if self._check_indices(indices): + break + else: + # overall proposals, not find any valid + continue + else: + continue + for nv in range(nViews): + if indices[nv] == -1: + continue + assigned[indices[nv]+dimGroups[nv]] = True + result['id'] = -1 + k3dresults.append(result) + return k3dresults + + def _check_speed(self, previous, current, verbo=False): + conf = np.sqrt(previous[:, -1] * current[:, -1]) + conf[conf < self.cfg.triangulate.min_conf_3d] = 0. + dist = np.linalg.norm(previous[:, :3] - current[:, :3], axis=-1) + conf_mean = (conf * dist).sum()/(1e-5 + conf.sum()) * 1000 + if verbo: + log('Track distance of each joints:') + print(dist) + print(conf_mean) + return conf_mean < self.cfg.triangulate.dist_track + + def _simple_associate2d3d_triangulate(self, keypoints3d, affinity, keypoints, dimGroups, cameras): + nViews = len(keypoints) + n2d = affinity.shape[0] + # the assigned results of each person + assigned = np.zeros(n2d, dtype=np.bool) + visited = np.zeros(n2d, dtype=np.bool) + affinity_sum = affinity.sum(axis=0) + sortidx = np.argsort(-affinity_sum) + k3dresults = [] + for idx3d in sortidx: + log('[Tri] Visited 3D {}'.format(self.prev_ids[idx3d])) + affinit_row = affinity[:, idx3d] + indices, proposals = self._indices_from_affinity(dimGroups, affinit_row, assigned, visited, nViews) + if not self._check_indices(indices):continue + # 只考虑有候选的;不考虑移除某个视角的 + log('[Tri] First try to triangulate of {}'.format(indices)) + indices_origin = indices.copy() + result, indices = self.try_to_triangulate(keypoints, cameras, indices, previous=keypoints3d[idx3d]) + + if not (self._check_indices(indices) and self._check_speed(keypoints3d[idx3d], result['keypoints3d'])): + # if the proposals is valid + previous = keypoints3d[idx3d] + # select the best keypoints of each view + previous_proj = project_points(previous, cameras['P']) + dist_all = np.zeros((previous_proj.shape[0],)) + 999. + indices_all = np.zeros((previous_proj.shape[0],), dtype=int) + for nv in range(previous_proj.shape[0]): + dist = np.linalg.norm(previous_proj[nv, :, :2][None] - keypoints[nv][:, :, :2], axis=-1) + conf = (previous[..., -1] > 0.1)[None] & (keypoints[nv][:, :, -1] > 0.1) + dist_mean = (dist * conf).sum(axis=-1) / (1e-5 + conf.sum(axis=-1)) + dist_all[nv] = dist_mean.min() + indices_all[nv] = dist_mean.argmin() + want_view = dist_all.argsort()[:self.cfg.triangulate.min_view_body] + # TODO: add proposal + proposal = (want_view, indices_all[want_view], -dist_all[want_view]) + proposals = [proposal] + if len(proposals) > 0: + proposals.sort(key=lambda x:-x[2]) + for (nv, select_id, conf) in proposals: + indices = np.zeros_like(indices_origin) - 1 + indices[nv] = select_id + log('[Tri] Max fail, then try to triangulate of {}'.format(indices)) + result, indices = self.try_to_triangulate(keypoints, cameras, indices, previous=keypoints3d[idx3d]) + if (self._check_indices(indices) and self._check_speed(keypoints3d[idx3d], result['keypoints3d'])): + break + else: + # overall proposals, not find any valid + mywarn('[Tri] {} Track fail after {} proposal'.format(idx3d, len(proposals))) + import ipdb; ipdb.set_trace() + continue + else: + mywarn('[Tri] Track fail {}'.format(indices)) + self._check_speed(keypoints3d[idx3d], result['keypoints3d'], verbo=True) + continue + log('[Tri] finally used indices: {}'.format(indices)) + for nv in range(nViews): + if indices[nv] == -1: + continue + assigned[indices[nv]+dimGroups[nv]] = True + result['id'] = self.prev_ids[idx3d] + k3dresults.append(result) + return k3dresults, assigned + + def associate(self, cameras, keypoints): + keypoints = self.undistort(keypoints, cameras) + for kpts in keypoints: + conf = kpts[..., -1] + conf[conf < self.cfg.min_conf] = 0. + if len(self.prev_ids) > 0: + # naive track + with Timer('affinity 2d'): + affinity2d2d, dimGroups = self.distance.calculate_affinity_MxM(keypoints, cameras) + with Timer('affinity 3d'): + affinity2d3d = self.distance._calculate_affinity_MxN(self.prev_keypoints, keypoints, cameras) + affinity_comp = np.vstack([ + np.hstack([affinity2d2d, affinity2d3d]), + np.hstack([affinity2d3d.T, np.eye(len(self.prev_ids))]) + ]) + with Timer('svt'): + affinity2d2d_2d3d = self.distance.low_rank_optimization( + affinity_comp, + dimGroups.tolist() + [dimGroups[-1] + len(self.prev_ids)], + vis=False) + # 先associate2d 3d + affinity2d3d = affinity2d2d_2d3d[:affinity2d2d.shape[0], affinity2d2d.shape[1]:] + with Timer('associate 3d'): + k3dresults, assigned = self._simple_associate2d3d_triangulate(self.prev_keypoints, affinity2d3d, keypoints, dimGroups, cameras) + # 再associate2d 2d + with Timer('associate 2d'): + affinity2d2d = affinity2d2d_2d3d[:affinity2d2d.shape[0], :affinity2d2d.shape[1]] + match_results = self._simple_associate2d_triangulate(affinity2d2d, keypoints, cameras, assigned=assigned) + match_results = k3dresults + match_results + else: + affinity2d2d, dimGroups = self.distance.calculate_affinity_MxM(keypoints, cameras) + affinity2d2d = self.distance.low_rank_optimization(affinity2d2d, dimGroups) + # 直接associate2d + match_results = self._simple_associate2d_triangulate(affinity2d2d, keypoints, cameras) + return match_results + +class TrackBase: + # 这个类用于维护一般的track操作 + # 主要提供的接口: + # 1. add + # 2. remove + # 3. smooth + # 4. naive fit + def __init__(self, cfg) -> None: + self.cfg = cfg + self.kintree = np.array(cfg.kintree) + self.max_id = 0 + self.current_frame = -1 + self.record = {} + + def update_frame(self, frame): + # remove the results that are not in the frame + self.current_frame = frame + remove_id = [] + for pid, record in self.record.items(): + if frame - record['frames'][-1] > self.cfg.max_missing: + mywarn('[Track] remove track {} with frames {}'.format(pid, record['frames'])) + remove_id.append(pid) + for pid in remove_id: + self.record.pop(pid) + return True + + def query_current(self, ret_final=False): + # return the results that are in the frame + prevs = [] + for pid, record in self.record.items(): + k3d = record['records'][-1] + valid = k3d[:, -1] > 0.1 + if ret_final: + # 判断一下valid range + k3d_valid = k3d[valid] + flag = (k3d_valid[:, 0] > self.cfg.final_ranges[0][0]) & \ + (k3d_valid[:, 0] < self.cfg.final_ranges[1][0]) & \ + (k3d_valid[:, 1] > self.cfg.final_ranges[0][1]) & \ + (k3d_valid[:, 1] < self.cfg.final_ranges[1][1]) & \ + (k3d_valid[:, 2] > self.cfg.final_ranges[0][2]) & \ + (k3d_valid[:, 2] < self.cfg.final_ranges[1][2]) + if flag.sum() < 5: + continue + prevs.append({ + 'id': pid, + 'keypoints3d': record['records'][-1], + 'ages': len(record['frames']) + }) + if ret_final: + prevs.sort(key=lambda x:-x['ages']) + prevs = prevs[:self.cfg.final_max_person] + prevs.sort(key=lambda x:x['id']) + return prevs + + def add_track(self, res): + # add a new track + pid = self.max_id + res['id'] = pid + self.record[pid] = { + 'frames': [self.current_frame], + 'records': [res['keypoints3d']] + } + self.max_id += 1 + + def update_track(self, res): + pid = res['id'] + N_UPDATE_LENGTH = 10 + if len(self.record[pid]['frames']) >= N_UPDATE_LENGTH and len(self.record[pid]['frames']) % N_UPDATE_LENGTH == 0: + # 更新骨长 + # (nFrames, nJoints, 4) + history = np.stack(self.record[pid]['records']) + left = history[:, self.kintree[:, 0]] + right = history[:, self.kintree[:, 1]] + conf = np.minimum(left[..., -1], right[..., -1]) + conf[conf < 0.1] = 0. + limb_length = np.linalg.norm(left[..., :3] - right[..., :3], axis=-1) + limb_mean = (conf * limb_length).sum(axis=0)/(1e-5 + conf.sum(axis=0)) + conf_mean = conf.sum(axis=0) + log('[Track] Update limb length of {} to \n {}'.format(pid, limb_mean)) + self.record[pid]['limb_length'] = (limb_mean, conf_mean) + k3d = res['keypoints3d'] + if 'limb_length' in self.record[pid].keys(): + left = k3d[self.kintree[:, 0]] + right = k3d[self.kintree[:, 1]] + limb_now = np.linalg.norm(left[:, :3] - right[:, :3], axis=-1) + limb_mean, conf_mean = self.record[pid]['limb_length'] + not_valid = ((limb_now > limb_mean * 1.5) | (limb_now < limb_mean * 0.5)) & (conf_mean > 0.1) + if not_valid.sum() > 0: + leaf = self.kintree[not_valid, 1] + res['keypoints3d'][leaf] = 0. + mywarn('[Track] {} remove {} joints'.format(pid, leaf)) + mywarn('[Track] mean: {}'.format(limb_mean[not_valid])) + mywarn('[Track] current: {}'.format(limb_now[not_valid])) + + self.record[pid]['frames'].append(self.current_frame) + self.record[pid]['records'].append(res['keypoints3d']) + + def track(self, match_results): + wo_id_results = [r for r in match_results if r['id'] == -1] + w_id_results = [r for r in match_results if r['id'] != -1] + wo_id_results.sort(key=lambda x:-(x['indices']!=-1).sum()) + for res in wo_id_results: + self.add_track(res) + for res in w_id_results: + self.update_track(res) + return w_id_results + wo_id_results + +class MatchAndTrack(): + def __init__(self, cfg_match, cfg_track) -> None: + self.matcher = MatchBase(cfg_match) + self.tracker = TrackBase(cfg_track) + + def __call__(self, cameras, keypoints, meta): + frame = meta['frame'] + # 1. query the previous frame + self.tracker.update_frame(frame) + previous = self.tracker.query_current() + # 2. associate the current frame + self.matcher.set_previous(previous) + match_results = self.matcher.associate(cameras, keypoints) + # 3. update the tracker + self.tracker.track(match_results) + results = self.tracker.query_current(ret_final=True) + pids = [p['id'] for p in results] + if len(pids) > 0: + keypoints3d = np.stack([p['keypoints3d'] for p in results]) + else: + keypoints3d = [] + log('[Match&Triangulate] Current ID: {}'.format(pids)) + return {'results': results, 'keypoints3d': keypoints3d, 'pids': pids} \ No newline at end of file diff --git a/myeasymocap/operations/optimizer.py b/myeasymocap/operations/optimizer.py index 98c1283..97b180b 100644 --- a/myeasymocap/operations/optimizer.py +++ b/myeasymocap/operations/optimizer.py @@ -98,6 +98,7 @@ class Optimizer: for key, val in loss.items(): self.used_infos.extend(val.key_from_infos) self.used_infos = list(set(self.used_infos)) + self.iter = 0 def log_loss(self, iter_, closure, print_loss=False): if iter_ % 10 == 0 or print_loss: @@ -138,10 +139,13 @@ class Optimizer: infos_used = {key: infos[key] for key in self.used_infos if key in infos.keys()} infos_used = dict_of_numpy_to_tensor(infos_used, device=device) - log('[{}] Optimize {}'.format(self.__class__.__name__, self.optimize_keys)) + optimize_keys = self.optimize_keys + if isinstance(optimize_keys[0], list): + optimize_keys = optimize_keys[self.iter] + log('[{}] Optimize {}'.format(self.__class__.__name__, optimize_keys)) log('[{}] Loading {}'.format(self.__class__.__name__, self.used_infos)) opt_params = {} - for key in self.optimize_keys: + for key in optimize_keys: if key in infos.keys(): # 优化的参数 opt_params[key] = infos_used[key] elif key in params.keys(): @@ -160,7 +164,7 @@ class Optimizer: ret = { 'params': params } - for key in self.optimize_keys: + for key in optimize_keys: if key in infos.keys(): ret[key] = opt_params[key] ret = dict_of_tensor_to_numpy(ret) diff --git a/myeasymocap/stages/basestage.py b/myeasymocap/stages/basestage.py index d8f5e95..8c1cc4d 100644 --- a/myeasymocap/stages/basestage.py +++ b/myeasymocap/stages/basestage.py @@ -26,6 +26,8 @@ class MultiStage: def load_final(self): at_finals = {} for key, val in self._at_final.items(): + if 'module' not in val.keys(): + continue if val['module'] == 'skip': mywarn('Stage {} is not used'.format(key)) continue @@ -35,7 +37,7 @@ class MultiStage: at_finals[key] = model self.model_finals = at_finals - def __init__(self, output, at_step, at_final) -> None: + def __init__(self, output, at_step, at_final, keys_keep=[], timer=True) -> None: log('[{}] writing the results to {}'.format(self.__class__.__name__, output)) at_steps = {} for key, val in at_step.items(): @@ -50,12 +52,15 @@ class MultiStage: self.model_steps = at_steps self._at_step = at_step self._at_final = at_final - self.timer = Timer(at_steps, verbose=False) + self.keys_keep = keys_keep + self.timer = Timer(at_steps, verbose=timer) def at_step(self, data, index): ret = {} if 'meta' in data: ret['meta'] = data['meta'] + for key in self.keys_keep: + ret[key] = data[key] timer = {} for key, model in self.model_steps.items(): for k in self._at_step[key].get('key_keep', []): @@ -104,6 +109,7 @@ class MultiStage: for key, model in self.model_finals.items(): for iter_ in range(self._at_final[key].get('repeat', 1)): inputs = {} + model.iter = iter_ for k in self._at_final[key].get('key_from_data', []): inputs[k] = data[k] for k in self._at_final[key].get('key_from_previous', []): @@ -117,6 +123,7 @@ class MultiStage: ret.update(output) return ret + class StageForFittingEach: def __init__(self, stages, keys_keep) -> None: stages_ = {} @@ -132,6 +139,7 @@ class StageForFittingEach: def __call__(self, results, **ret): for pid, result in results.items(): + print('[{}] Optimize person {} with {} frames'.format(self.__class__.__name__, pid, len(result['frames']))) ret0 = {} ret0.update(ret) for key, stage in self.stages.items(): diff --git a/myeasymocap/stages/collect.py b/myeasymocap/stages/collect.py index 83079ce..3d8bc16 100644 --- a/myeasymocap/stages/collect.py +++ b/myeasymocap/stages/collect.py @@ -26,8 +26,9 @@ class CheckFramePerson: } class CollectMultiPersonMultiFrame: - def __init__(self, key) -> None: + def __init__(self, key, min_frame=10) -> None: self.key = key + self.min_frame = min_frame def __call__(self, keypoints3d, pids): records = {} @@ -41,6 +42,12 @@ class CollectMultiPersonMultiFrame: } records[pid]['frames'].append(frame) records[pid]['keypoints3d'].append(keypoints3d[frame][i]) + remove_id = [] for pid, record in records.items(): + print('[{}] Collect person {} with {} frames'.format(self.__class__.__name__, pid, len(record['frames']))) record['keypoints3d'] = np.stack(record['keypoints3d']).astype(np.float32) + if len(record['frames']) < self.min_frame: + remove_id.append(pid) + for pid in remove_id: + records.pop(pid) return {'results': records} \ No newline at end of file