1075 lines
49 KiB
Python
1075 lines
49 KiB
Python
|
import numpy as np
|
|||
|
import cv2
|
|||
|
from easymocap.mytools.camera_utils import Undistort
|
|||
|
from easymocap.mytools.debug_utils import mywarn
|
|||
|
from .triangulate import batch_triangulate, project_wo_dist
|
|||
|
from collections import defaultdict
|
|||
|
LOG_FILE = 'log.txt'
|
|||
|
LOG_LEVEL = 0 #2
|
|||
|
FULL_LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 1 else (lambda x: None)
|
|||
|
LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 0 else (lambda x: None)
|
|||
|
|
|||
|
def LOG_ARRAY(array2d, format='{:>8.2f} '):
|
|||
|
res = ''
|
|||
|
for i in range(array2d.shape[0]):
|
|||
|
for j in range(array2d.shape[1]):
|
|||
|
res += format.format(array2d[i, j])
|
|||
|
res += '\n'
|
|||
|
return res
|
|||
|
|
|||
|
class MatchBase:
|
|||
|
def __init__(self, mode, cfg) -> None:
|
|||
|
self.mode = mode
|
|||
|
self.cfg = cfg
|
|||
|
print('[{}]'.format(self.__class__.__name__))
|
|||
|
print(self.cfg)
|
|||
|
self.max_id = 0
|
|||
|
|
|||
|
def make_grids(self, grids, grids_step):
|
|||
|
grid_x = np.arange(grids[0][0], grids[1][0], grids_step)
|
|||
|
grid_y = np.arange(grids[0][1], grids[1][1], grids_step)
|
|||
|
grid_z = np.arange(grids[0][2], grids[1][2], grids_step)
|
|||
|
grid_xyz = np.meshgrid(grid_x, grid_y, grid_z)
|
|||
|
grid_xyz = np.stack(grid_xyz, axis=-1)
|
|||
|
grids = grid_xyz.reshape(-1, 3)
|
|||
|
print('[{}] Generate {} => {} grids'.format(self.__class__.__name__, grid_xyz.shape, grids.shape[0]))
|
|||
|
return grids
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def stack_array(arrays):
|
|||
|
dimGroups = [0]
|
|||
|
results = []
|
|||
|
views_all = []
|
|||
|
for nv, array in enumerate(arrays):
|
|||
|
dimGroups.append(dimGroups[-1] + array.shape[0])
|
|||
|
views_all.extend([nv for _ in range(array.shape[0])])
|
|||
|
results.append(array)
|
|||
|
results = np.concatenate(results, axis=0)
|
|||
|
return results, np.array(views_all), np.array(dimGroups)
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def undistort(points, cameras):
|
|||
|
nViews = len(points)
|
|||
|
pelvis_undis = []
|
|||
|
for nv in range(nViews):
|
|||
|
camera = {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}
|
|||
|
if points[nv].shape[0] > 0:
|
|||
|
pelvis = Undistort.points(points[nv], camera['K'], camera['dist'])
|
|||
|
else:
|
|||
|
pelvis = points[nv].copy()
|
|||
|
pelvis_undis.append(pelvis)
|
|||
|
return pelvis_undis
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def distance_by_triangulate(p_src, p_dst, camera_src, camera_dst, ranges):
|
|||
|
dist = np.zeros((p_src.shape[0], p_dst.shape[0]), dtype=np.float32)
|
|||
|
# generate (m, n) points and distance
|
|||
|
idx_src = np.arange(p_src.shape[0])
|
|||
|
idx_dst = np.arange(p_dst.shape[0])
|
|||
|
idx_src, idx_dst = np.meshgrid(idx_src, idx_dst)
|
|||
|
idx_src = idx_src.reshape(-1)
|
|||
|
idx_dst = idx_dst.reshape(-1)
|
|||
|
p_src = p_src[idx_src]
|
|||
|
p_dst = p_dst[idx_dst]
|
|||
|
keypoints = np.stack([p_src, p_dst], axis=0)
|
|||
|
keypoints_flat = keypoints.reshape(keypoints.shape[0], -1, keypoints.shape[-1])
|
|||
|
P = np.stack([camera_src['P'], camera_dst['P']], axis=0)
|
|||
|
k3d = batch_triangulate(keypoints_flat, P, min_view=2)
|
|||
|
repro, depth = project_wo_dist(k3d, P)
|
|||
|
dist_repro = np.linalg.norm(repro[..., :2] - keypoints_flat[..., :2], axis=-1).mean(axis=0)
|
|||
|
valid = (k3d[:, 0] > ranges[0][0]) & (k3d[:, 0] < ranges[1][0]) & \
|
|||
|
(k3d[:, 1] > ranges[0][1]) & (k3d[:, 1] < ranges[1][1]) & \
|
|||
|
(k3d[:, 2] > ranges[0][2]) & (k3d[:, 2] < ranges[1][2])
|
|||
|
dist_repro[~valid] = 1e5
|
|||
|
dist[idx_src, idx_dst] = dist_repro
|
|||
|
return dist
|
|||
|
|
|||
|
def calculate_distance(self, pelvis_undis, cameras, dimGroups):
|
|||
|
DIST_MAX = 10000.
|
|||
|
distance = np.zeros((dimGroups[-1], dimGroups[-1]), dtype=np.float32) + DIST_MAX
|
|||
|
nViews = len(dimGroups) - 1
|
|||
|
ray0 = np.array([0, 0, 1], dtype=np.float32).reshape(1, 3, 1)
|
|||
|
ray_cam = cameras['R'].transpose(0, 2, 1) @ ray0
|
|||
|
ray_cam = ray_cam[..., 0]
|
|||
|
cos_theta = np.sum(ray_cam[:, None] * ray_cam[None], axis=-1)
|
|||
|
theta = np.rad2deg(np.arccos(np.clip(cos_theta, -1., 1.)))
|
|||
|
valid_theta = np.logical_and(theta > self.cfg.valid_angle[0], theta < self.cfg.valid_angle[1])
|
|||
|
for src in range(nViews - 1):
|
|||
|
for dst in range(src + 1, nViews):
|
|||
|
# TODO: 计算两个射线的夹角
|
|||
|
# 这里对于不相邻或者对角的视角,我们直接跳过距离的计算
|
|||
|
# 这样后面在进行初始化的时候就无法挑到两个比较接近的视角了
|
|||
|
# if not valid_theta[src, dst]:
|
|||
|
# continue
|
|||
|
p_src = pelvis_undis[src][:, None] #(m, 2)
|
|||
|
p_dst = pelvis_undis[dst][:, None] #(n, 2)
|
|||
|
if p_src.shape[0] == 0 or p_dst.shape[0] == 0:
|
|||
|
continue
|
|||
|
camera_src = {key:cameras[key][src] for key in ['R', 'T', 'K', 'dist', 'P']}
|
|||
|
camera_dst = {key:cameras[key][dst] for key in ['R', 'T', 'K', 'dist', 'P']}
|
|||
|
dist = self.distance_by_triangulate(p_src, p_dst, camera_src, camera_dst, self.cfg.valid_ranges)
|
|||
|
distance[dimGroups[src]:dimGroups[src+1], dimGroups[dst]:dimGroups[dst+1]] = dist
|
|||
|
distance[dimGroups[dst]:dimGroups[dst+1], dimGroups[src]:dimGroups[src+1]] = dist.T
|
|||
|
cameras['valid_theta'] = valid_theta
|
|||
|
return distance
|
|||
|
|
|||
|
def calculate_repro(self, results, pelvis_undis, cameras, views_all):
|
|||
|
nViews = len(cameras['P'])
|
|||
|
n3D = len(results)
|
|||
|
distance = np.zeros((pelvis_undis.shape[0], n3D), dtype=np.float32)
|
|||
|
if n3D == 0:
|
|||
|
return distance
|
|||
|
keypoints3d = np.stack([d['pelvis'] for d in results], axis=0)
|
|||
|
Pall = np.stack([cameras['P'][nv] for nv in range(nViews)])
|
|||
|
# k2d: (nViews, nPerson, nPoints, 3)
|
|||
|
k2d, depth = project_wo_dist(keypoints3d, Pall, einsum='vab,pkb->vpka')
|
|||
|
repro_select = k2d[views_all]
|
|||
|
# dist: (nPoints, n3D)
|
|||
|
dist = np.linalg.norm(repro_select[..., :2] - pelvis_undis[:, None, None, :2], axis=-1).mean(axis=2)
|
|||
|
# for nv in range(nViews):
|
|||
|
return dist
|
|||
|
|
|||
|
def triangulate_and_repro(self, cameras, views, proposals):
|
|||
|
Pall = np.stack([cameras['P'][v] for v in views])
|
|||
|
kpts = np.stack(proposals)
|
|||
|
kpts = kpts[:, None]
|
|||
|
k3d = batch_triangulate(kpts, Pall)
|
|||
|
k2d, depth = project_wo_dist(k3d, Pall)
|
|||
|
dist_repro = np.linalg.norm(k2d[..., :2] - kpts[..., :2], axis=-1).mean(axis=-1)
|
|||
|
return k3d, dist_repro, depth
|
|||
|
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def check_is_best_3d_of_2d(distance, idx3d, idx2d, visited3d):
|
|||
|
isbest3d = True
|
|||
|
distance_2d = distance[idx2d]
|
|||
|
for i3d in distance_2d.argsort():
|
|||
|
if i3d != idx3d and i3d not in visited3d:
|
|||
|
isbest3d = False
|
|||
|
break
|
|||
|
elif i3d == idx3d:
|
|||
|
break
|
|||
|
return isbest3d
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def sort_with_affinity(distance, dimGroups, INLIER_REPRO):
|
|||
|
nViews = len(dimGroups) - 1
|
|||
|
# 排序计算affinity
|
|||
|
count_rows = np.zeros((dimGroups[-1]), dtype=int)
|
|||
|
distance_rows = np.zeros((dimGroups[-1]))
|
|||
|
for nv in range(nViews):
|
|||
|
if dimGroups[nv] == dimGroups[nv+1]:continue
|
|||
|
valid_view = np.clip((distance[:, dimGroups[nv]:dimGroups[nv+1]] < INLIER_REPRO).sum(axis=-1), 0, 1)
|
|||
|
count_rows += valid_view # 最多也只累计一个
|
|||
|
distance_rows += valid_view * (distance[:, dimGroups[nv]:dimGroups[nv+1]].min(axis=-1))
|
|||
|
index = list(range(dimGroups[-1]))
|
|||
|
# index.sort(key=lambda x: (-count_rows[x], distance_rows[x]))
|
|||
|
# sort with 2D confidence
|
|||
|
# index.sort(key=lambda x: -pelvis_all[x, 2])
|
|||
|
# sort with valid matches
|
|||
|
# 选择2D的依据改为:根据有效的2D重投影距离的数量
|
|||
|
valid_count = (distance < INLIER_REPRO * 2).sum(axis=0)
|
|||
|
index = (-valid_count).argsort()
|
|||
|
return index
|
|||
|
|
|||
|
def assign_by_3D(self, used_index, distance, pelvis_all, views_all, dimGroups, cameras):
|
|||
|
INLIER_TRACK = self.cfg.track_pixel
|
|||
|
INLIER_REPRO = self.cfg.max_pixel
|
|||
|
# 使用前一帧的可见性来进行排序
|
|||
|
index_3d = list(range(len(self.results)))
|
|||
|
index_3d.sort(key=lambda x:-len(self.results[x]['views']))
|
|||
|
results = []
|
|||
|
visited3d = set()
|
|||
|
for idx3d in index_3d:
|
|||
|
visited3d.add(idx3d)
|
|||
|
self.results[idx3d]['tracked'] = False
|
|||
|
pid = self.results[idx3d]['id']
|
|||
|
dist = distance[:, idx3d]
|
|||
|
FULL_LOG('[Assign 3D] Check 3D {}'.format(pid))
|
|||
|
FULL_LOG('[Assign 3D] Distance {}'.format(LOG_ARRAY(dist[None])))
|
|||
|
current = []
|
|||
|
views = []
|
|||
|
proposal = dist.argsort()
|
|||
|
# 初始化一下:
|
|||
|
for idx2d in proposal:
|
|||
|
# 不满足视角关系
|
|||
|
# if not valid_theta[views_all[row], views_all[idx2d]]:
|
|||
|
# continue
|
|||
|
# 不满足距离关系
|
|||
|
if dist[idx2d] > INLIER_TRACK:
|
|||
|
break
|
|||
|
if used_index[idx2d] > -1:
|
|||
|
continue
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
if not self.check_is_best_3d_of_2d(distance, idx3d, idx2d, visited3d):
|
|||
|
continue
|
|||
|
if len(current) == 1: # 已经有一个了,如果还要再添加,那么需要判断一下三角化出来的距离关系
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views + [views_all[idx2d]], current + [pelvis_all[idx2d]])
|
|||
|
_dist = np.linalg.norm(k3d[:, :3] - self.results[idx3d]['pelvis'][:, :3], axis=-1).mean()
|
|||
|
if _dist > self.cfg.max_movement:
|
|||
|
continue
|
|||
|
# 找到了合理的pair,作为一个良好的初始化
|
|||
|
current.append(pelvis_all[idx2d])
|
|||
|
views.append(views_all[idx2d])
|
|||
|
used_index[idx2d] = pid
|
|||
|
FULL_LOG(f'[Assign 3D] First track 3D {pid} with {idx2d}, view ({views_all[idx2d]})')
|
|||
|
if len(current) == 2:
|
|||
|
break
|
|||
|
if len(current) < 2:
|
|||
|
# 没有找到良好的初始化
|
|||
|
continue
|
|||
|
for idx2d in proposal:
|
|||
|
# 这个视角已经有了 | 这个2D已经被使用过了
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
if used_index[idx2d] > -1:
|
|||
|
continue
|
|||
|
if not self.check_is_best_3d_of_2d(distance, idx3d, idx2d, visited3d):
|
|||
|
continue
|
|||
|
# 尝试添加
|
|||
|
FULL_LOG('[Assign 3D] 3D {} add {}, distance={:.2f}'.format(pid, idx2d, dist[idx2d]))
|
|||
|
new = current + [pelvis_all[idx2d]]
|
|||
|
views_new = views + [views_all[idx2d]]
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_new, new)
|
|||
|
_dist = np.linalg.norm(k3d[:, :3] - self.results[idx3d]['pelvis'][:, :3], axis=-1).mean()
|
|||
|
flag_movement = _dist < self.cfg.max_movement
|
|||
|
flag_depth = (depth > 0.5).all()
|
|||
|
flag_repro = dist_repro.mean() < INLIER_REPRO
|
|||
|
flag = flag_repro & flag_depth
|
|||
|
FULL_LOG('[Assign 3D] repro: \n{}, \ndepth: \n{}'.format(LOG_ARRAY(dist_repro[None]), LOG_ARRAY(depth.T)))
|
|||
|
if flag:
|
|||
|
# 添加
|
|||
|
current = new
|
|||
|
views = views_new
|
|||
|
used_index[idx2d] = pid
|
|||
|
FULL_LOG('[Assign 3D] {} => {}'.format(idx2d, np.where(used_index == pid)[0]))
|
|||
|
else:
|
|||
|
FULL_LOG('[Assign 3D] Failed')
|
|||
|
# check the results
|
|||
|
if len(views) < self.cfg.min_views: #不足以添加
|
|||
|
continue
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, current)
|
|||
|
select = np.where(used_index == pid)[0]
|
|||
|
results.append({
|
|||
|
'id': pid,
|
|||
|
'pelvis': k3d,
|
|||
|
'keypoints3d': k3d, # 这里保存两个,这样即使后面覆盖掉了keypoints3d还能取出pelvis来
|
|||
|
'views': views_all[select],
|
|||
|
'select': select,
|
|||
|
'indices': select - dimGroups[views_all[select]],
|
|||
|
'frames': self.results[idx3d]['frames'] + [self.frames]
|
|||
|
})
|
|||
|
self.results[idx3d]['tracked'] = True
|
|||
|
for res in results:
|
|||
|
text = f''' - Track {res['id']} with {len(res['views'])} views
|
|||
|
views: {' '.join(list(map(lambda x:'{:2d}'.format(x), res['views'])))}
|
|||
|
id : {' '.join(list(map(lambda x:'{:2d}'.format(x), res['select'])))}'''
|
|||
|
LOG(text)
|
|||
|
print(text)
|
|||
|
for res in self.results:
|
|||
|
if not res['tracked']:
|
|||
|
mywarn('- 3D {} not tracked'.format(res['id']))
|
|||
|
# 对于没有被跟踪到的:检查是否有两个距离很小的视角
|
|||
|
# 如果有,并且被其他人占用了,那么把这个2D也给他;在极端情况下,有的视角下会有人恰好被另一个人挡住
|
|||
|
print(res)
|
|||
|
if len(res['frames']) < 3:
|
|||
|
mywarn('- 3D {} not tracked, but only {} frames'.format(res['id'], len(res['frames'])))
|
|||
|
else:
|
|||
|
pass
|
|||
|
# import ipdb; ipdb.set_trace()
|
|||
|
return results
|
|||
|
|
|||
|
def find_initial_3_pair(self, distance, pelvis_all, views_all, dimGroups):
|
|||
|
# 生成所有可能的候选的3个pair
|
|||
|
index_0 = np.arange(pelvis_all.shape[0])
|
|||
|
index_0 = np.stack(np.meshgrid(index_0, index_0, index_0), axis=-1).reshape(-1, 3)
|
|||
|
flag_order = (index_0[:, 0] < index_0[:, 1]) & (index_0[:, 1] < index_0[:, 2])
|
|||
|
# flag_views = (views_all[index_0[:, 0]] != views_all[index_0[:, 1]]) & \
|
|||
|
# (views_all[index_0[:, 1]] != views_all[index_0[:, 2]]) & \
|
|||
|
# (views_all[index_0[:, 0]] != views_all[index_0[:, 2]])
|
|||
|
valid_index = index_0[flag_order]
|
|||
|
distance_circle = distance[valid_index[:, 0], valid_index[:, 1]] + \
|
|||
|
distance[valid_index[:, 1], valid_index[:, 2]] + \
|
|||
|
distance[valid_index[:, 2], valid_index[:, 0]]
|
|||
|
distance_circle = distance_circle / 3
|
|||
|
valid_dist = distance_circle < self.cfg.max_pixel
|
|||
|
valid_ = valid_index[valid_dist]
|
|||
|
dist_sum = distance_circle[valid_dist]
|
|||
|
arg_idx = dist_sum.argsort()
|
|||
|
FULL_LOG('[Assign 2D] find {} 3 pair: '.format(len(arg_idx)))
|
|||
|
return valid_[arg_idx], dist_sum[arg_idx]
|
|||
|
|
|||
|
def try_to_add_index(self, dist_row, cameras, pelvis_all, views_all, dimGroups,
|
|||
|
used_index, views, current, pid):
|
|||
|
INLIER_REPRO = self.cfg.max_pixel
|
|||
|
proposal = dist_row.argsort()
|
|||
|
indices = []
|
|||
|
for idx2d in proposal:
|
|||
|
if dist_row[idx2d] > INLIER_REPRO:
|
|||
|
break
|
|||
|
# 这个视角已经有了 | 这个2D已经被使用过了
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
if used_index[idx2d] > -1:
|
|||
|
continue
|
|||
|
FULL_LOG('[Assign 2D] Try to add {}, distance={:.2f}'.format(idx2d, dist_row[idx2d]))
|
|||
|
# 尝试三角化并进行重投影
|
|||
|
new = current + [pelvis_all[idx2d]]
|
|||
|
views_new = views + [views_all[idx2d]]
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_new, new)
|
|||
|
flag_depth = (depth > 0.5).all()
|
|||
|
flag_repro = dist_repro.mean() < INLIER_REPRO
|
|||
|
flag = flag_repro & flag_depth
|
|||
|
FULL_LOG('[Assign 2D] repro: \n{}, \ndepth: \n{}'.format(LOG_ARRAY(dist_repro[None]), LOG_ARRAY(depth.T)))
|
|||
|
if flag:
|
|||
|
# 添加
|
|||
|
current.append(pelvis_all[idx2d])
|
|||
|
views.append(views_all[idx2d])
|
|||
|
indices.append(idx2d)
|
|||
|
FULL_LOG('[Assign 2D] Add {}'.format(idx2d ))
|
|||
|
else:
|
|||
|
FULL_LOG('[Assign 2D] Failed')
|
|||
|
return indices
|
|||
|
|
|||
|
def assign_by_2D_3pair(self, results, distance, dimGroups, used_index, valid_3pairs, views_all, pelvis_all, cameras):
|
|||
|
INLIER_REPRO = self.cfg.max_pixel
|
|||
|
for ipair, valid_3pair in enumerate(valid_3pairs):
|
|||
|
# 先检查是否被使用过了
|
|||
|
if (used_index[valid_3pair] > -1).any():
|
|||
|
continue
|
|||
|
# 先检查是否是合理的
|
|||
|
FULL_LOG('[Assign 2D] Check 3 pair {}'.format(valid_3pair))
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_all[valid_3pair], pelvis_all[valid_3pair])
|
|||
|
flag_depth = (depth > 0.5).all()
|
|||
|
flag_repro = dist_repro.mean() < INLIER_REPRO
|
|||
|
# TODO: flag range
|
|||
|
flag = flag_repro & flag_depth
|
|||
|
if not flag: continue
|
|||
|
# 添加其余的点
|
|||
|
pid = self.max_id
|
|||
|
self.max_id += 1
|
|||
|
dist_pair = distance[valid_3pair].mean(axis=0)
|
|||
|
views = views_all[valid_3pair].tolist()
|
|||
|
current = [pelvis_all[i] for i in valid_3pair]
|
|||
|
indices = self.try_to_add_index(dist_pair, cameras, pelvis_all, views_all, dimGroups,
|
|||
|
used_index, views, current, pid)
|
|||
|
select = np.array(valid_3pair.tolist() + indices)
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, current)
|
|||
|
|
|||
|
used_index[select] = pid
|
|||
|
results.append({
|
|||
|
'id': pid,
|
|||
|
'pelvis': k3d,
|
|||
|
'keypoints3d': k3d, # 这里保存两个,这样即使后面覆盖掉了keypoints3d还能取出pelvis来
|
|||
|
'views': views_all[select],
|
|||
|
'select': select,
|
|||
|
'indices': select - dimGroups[views_all[select]],
|
|||
|
'frames': [self.frames],
|
|||
|
})
|
|||
|
return results
|
|||
|
|
|||
|
def assign_by_2D(self, used_index, distance, pelvis_all, views_all, dimGroups, cameras):
|
|||
|
def log_index_2d(index2d):
|
|||
|
return '({}|{}-{})'.format(index2d, views_all[index2d], index2d-dimGroups[views_all[index2d]])
|
|||
|
def log_indexes_2d(index2d_):
|
|||
|
return ', '.join(['({}|{}-{})'.format(index2d, views_all[index2d], index2d-dimGroups[views_all[index2d]]) for index2d in index2d_])
|
|||
|
INLIER_REPRO = self.cfg.max_pixel
|
|||
|
new_id_start = 10000
|
|||
|
new_max_id = new_id_start
|
|||
|
valid_3pairs, dist_3pair = self.find_initial_3_pair(distance, pelvis_all, views_all, dimGroups=dimGroups)
|
|||
|
results = []
|
|||
|
if valid_3pairs.sum() > 0:
|
|||
|
results = self.assign_by_2D_3pair(results, distance, dimGroups, used_index, valid_3pairs, views_all, pelvis_all, cameras)
|
|||
|
valid_theta = cameras['valid_theta']
|
|||
|
nViews = len(dimGroups)-1
|
|||
|
# 排序计算affinity
|
|||
|
count_rows = np.zeros((dimGroups[-1]), dtype=int)
|
|||
|
distance_rows = np.zeros((dimGroups[-1]))
|
|||
|
for nv in range(nViews):
|
|||
|
if dimGroups[nv] == dimGroups[nv+1]:continue
|
|||
|
valid_view = np.clip((distance[:, dimGroups[nv]:dimGroups[nv+1]] < INLIER_REPRO).sum(axis=-1), 0, 1)
|
|||
|
count_rows += valid_view # 最多也只累计一个
|
|||
|
distance_rows += valid_view * (distance[:, dimGroups[nv]:dimGroups[nv+1]].min(axis=-1))
|
|||
|
index = list(range(dimGroups[-1]))
|
|||
|
# index.sort(key=lambda x: (-count_rows[x], distance_rows[x]))
|
|||
|
# sort with 2D confidence
|
|||
|
# index.sort(key=lambda x: -pelvis_all[x, 2])
|
|||
|
# sort with valid matches
|
|||
|
# 选择2D的依据改为:根据有效的2D重投影距离的数量
|
|||
|
valid_count = (distance < INLIER_REPRO * 2).sum(axis=0)
|
|||
|
index = (-valid_count).argsort()
|
|||
|
|
|||
|
visited2d = set()
|
|||
|
for row in index:
|
|||
|
visited2d.add(row)
|
|||
|
if used_index[row] > -1:continue
|
|||
|
FULL_LOG('[Assign 2D] Check 2D {}'.format(log_index_2d(row)))
|
|||
|
pid = new_max_id
|
|||
|
new_max_id += 1
|
|||
|
dist_row = distance[row]
|
|||
|
proposal = dist_row.argsort()
|
|||
|
current = [pelvis_all[row]]
|
|||
|
views = [views_all[row]]
|
|||
|
used_index[row] = pid
|
|||
|
# 初始化一下:
|
|||
|
for idx2d in proposal:
|
|||
|
# 不满足视角关系
|
|||
|
if not valid_theta[views_all[row], views_all[idx2d]]:
|
|||
|
continue
|
|||
|
# 不满足距离关系
|
|||
|
if dist_row[idx2d] > INLIER_REPRO:
|
|||
|
break
|
|||
|
if used_index[idx2d] > -1:
|
|||
|
continue
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
# self.triangulate_and_repro(cameras, [views_all[18], views_all[34]], [pelvis_all[18], pelvis_all[34]])
|
|||
|
# 2D的时候不能选择是最好的,因为2D可能还有其他视角的在
|
|||
|
# 顶多判断一下,是对于这个视角来说最好的
|
|||
|
# if not self.check_is_best_3d_of_2d(distance, row, idx2d, visited2d):
|
|||
|
# continue
|
|||
|
# 找到了合理的pair,作为一个良好的初始化
|
|||
|
current.append(pelvis_all[idx2d])
|
|||
|
views.append(views_all[idx2d])
|
|||
|
used_index[idx2d] = pid
|
|||
|
FULL_LOG(f'[Assign 2D] Init with {log_index_2d(idx2d)}')
|
|||
|
break
|
|||
|
if len(current) < 2:
|
|||
|
# 没有找到良好的初始化
|
|||
|
continue
|
|||
|
for idx2d in proposal:
|
|||
|
if dist_row[idx2d] > INLIER_REPRO:
|
|||
|
break
|
|||
|
# 这个视角已经有了 | 这个2D已经被使用过了
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
if used_index[idx2d] > -1:
|
|||
|
continue
|
|||
|
# if not self.check_is_best_3d_of_2d(distance, row, idx2d, visited2d):
|
|||
|
# continue
|
|||
|
# 尝试三角化并进行重投影
|
|||
|
new = current + [pelvis_all[idx2d]]
|
|||
|
views_new = views + [views_all[idx2d]]
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_new, new)
|
|||
|
flag_depth = (depth > 0.5).all()
|
|||
|
flag_repro = dist_repro.mean() < INLIER_REPRO
|
|||
|
flag = flag_repro & flag_depth
|
|||
|
FULL_LOG('[Assign 2D] repro: \n{}, \ndepth: \n{}'.format(LOG_ARRAY(dist_repro[None]), LOG_ARRAY(depth.T)))
|
|||
|
if flag:
|
|||
|
# 添加
|
|||
|
current = new
|
|||
|
views = views_new
|
|||
|
used_index[idx2d] = pid
|
|||
|
_current_id = np.where(used_index == pid)[0]
|
|||
|
FULL_LOG('[Assign 2D] {} => {}'.format(idx2d, log_indexes_2d(_current_id)))
|
|||
|
else:
|
|||
|
FULL_LOG('[Assign 2D] Failed')
|
|||
|
if len(views) < self.cfg.min_views_init: #不足以添加
|
|||
|
continue
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, current)
|
|||
|
select = np.where(used_index == pid)[0]
|
|||
|
final_id = self.max_id
|
|||
|
self.max_id += 1
|
|||
|
used_index[select] = final_id
|
|||
|
results.append({
|
|||
|
'id': final_id,
|
|||
|
'pelvis': k3d,
|
|||
|
'keypoints3d': k3d, # 这里保存两个,这样即使后面覆盖掉了keypoints3d还能取出pelvis来
|
|||
|
'views': views_all[select],
|
|||
|
'select': select,
|
|||
|
'indices': select - dimGroups[views_all[select]],
|
|||
|
'frames': [self.frames],
|
|||
|
})
|
|||
|
for res in results:
|
|||
|
text = f''' - Init {res['id']} with {len(res['views'])} views
|
|||
|
views: {res['views']}
|
|||
|
id : {res['select']}'''
|
|||
|
LOG(text)
|
|||
|
print(text)
|
|||
|
return results
|
|||
|
|
|||
|
class MatchRoot(MatchBase):
|
|||
|
def __init__(self, mode, cfg):
|
|||
|
super().__init__(mode, cfg)
|
|||
|
self.results = []
|
|||
|
self.frames = -1
|
|||
|
|
|||
|
def __call__(self, pelvis, cameras, self_results=None):
|
|||
|
"""
|
|||
|
cameras: {K, R, T, dist, P}
|
|||
|
"""
|
|||
|
self.frames += 1
|
|||
|
LOG('>>> Current frames: {}'.format(self.frames))
|
|||
|
if self_results is None:
|
|||
|
self_results = self.results
|
|||
|
nViews = len(pelvis)
|
|||
|
pelvis_all, views_all, dimGroups = self.stack_array(pelvis)
|
|||
|
# Undistort
|
|||
|
pelvis_undis = self.undistort(pelvis, cameras)
|
|||
|
pelvis_undis_all, _, _ = self.stack_array(pelvis_undis)
|
|||
|
# distance3D => 2D
|
|||
|
distance3d_2d = self.calculate_repro(self_results, pelvis_all, cameras, views_all)
|
|||
|
# FULL_LOG('distance3d_2d: {}'.format(LOG_ARRAY(distance3d_2d)))
|
|||
|
# distance: triangulate and project
|
|||
|
distance2d_2d = self.calculate_distance(pelvis_undis, cameras, dimGroups)
|
|||
|
# FULL_LOG('distance2d_2d: {}'.format(LOG_ARRAY(distance2d_2d)))
|
|||
|
# set assign index
|
|||
|
used_index = np.zeros((dimGroups[-1]), dtype=int) - 1
|
|||
|
results = []
|
|||
|
# assign by 3D => 2D
|
|||
|
results3d = self_results
|
|||
|
if len(results3d) > 0:
|
|||
|
results3d = self.assign_by_3D(used_index, distance3d_2d, pelvis_undis_all, views_all, dimGroups, cameras)
|
|||
|
# assign by 2D + 2D
|
|||
|
results2d = self.assign_by_2D(used_index, distance2d_2d, pelvis_undis_all, views_all, dimGroups, cameras)
|
|||
|
results = results3d + results2d
|
|||
|
# distance = np.linalg.norm(keypoints3d[:, None, ..., :3] - keypoints3d[None, ..., :3], axis=-1).mean(axis=-1)
|
|||
|
# print(LOG_ARRAY(distance, format='{:6.2f}'))
|
|||
|
results.sort(key=lambda x: -len(x['views']))
|
|||
|
results = results[:self.cfg.max_person]
|
|||
|
|
|||
|
if self.mode == 'track':
|
|||
|
self.results = results
|
|||
|
results.sort(key=lambda x:x['id'])
|
|||
|
# TODO: 增加结果的NMS检查和合并
|
|||
|
if len(results) == 0:
|
|||
|
keypoints3d = np.zeros((0, 25, 3))
|
|||
|
else:
|
|||
|
keypoints3d = np.stack([d['keypoints3d'] for d in results])
|
|||
|
return {'keypoints3d': keypoints3d, 'results': results}
|
|||
|
|
|||
|
class MatchTwoRoot(MatchRoot):
|
|||
|
def __init__(self, mode, cfg):
|
|||
|
keys = ['pelvis', 'neck']
|
|||
|
self._max_id_add = -1
|
|||
|
self._max_id = {key: 0 for key in keys}
|
|||
|
self.current = 'pelvis'
|
|||
|
self._results = {key: [] for key in keys}
|
|||
|
super().__init__(mode, cfg)
|
|||
|
self.results_limb = []
|
|||
|
self.mapping = {key: {} for key in keys}
|
|||
|
|
|||
|
@property
|
|||
|
def max_id_add(self):
|
|||
|
self._max_id_add += 1
|
|||
|
return self._max_id_add
|
|||
|
|
|||
|
@property
|
|||
|
def max_id(self):
|
|||
|
return self._max_id[self.current]
|
|||
|
|
|||
|
@max_id.setter
|
|||
|
def max_id(self, index):
|
|||
|
self._max_id[self.current] = index
|
|||
|
|
|||
|
@property
|
|||
|
def results(self):
|
|||
|
return self._results[self.current]
|
|||
|
|
|||
|
@results.setter
|
|||
|
def results(self, val):
|
|||
|
self._results[self.current] = val
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def check_tracked(key, record_pelvis, current_3d, mapping):
|
|||
|
for ires, res in enumerate(record_pelvis):
|
|||
|
pid = res['id']
|
|||
|
res['limb_id'] = -1
|
|||
|
if pid in mapping[key]:
|
|||
|
p3d = mapping[key][pid]
|
|||
|
res['limb_id'] = p3d
|
|||
|
current_3d[p3d][key] = ires
|
|||
|
|
|||
|
def __call__(self, cameras, openpose):
|
|||
|
pelvis_id = 8
|
|||
|
neck_id = 1
|
|||
|
pelvis = [openpose[v][pelvis_id] for v in range(len(openpose))]
|
|||
|
neck = [openpose[v][neck_id] for v in range(len(openpose))]
|
|||
|
self.current = 'pelvis'
|
|||
|
record_pelvis = super().__call__(pelvis, cameras)['results']
|
|||
|
self.current = 'neck'
|
|||
|
record_neck = super().__call__(neck, cameras)['results']
|
|||
|
current_3d = {p['id']: {'pelvis': -1, 'neck': -1} for p in self.results_limb}
|
|||
|
# 先检查是否已经track过了
|
|||
|
self.check_tracked('pelvis', record_pelvis, current_3d, self.mapping)
|
|||
|
self.check_tracked('neck', record_neck, current_3d, self.mapping)
|
|||
|
# 先整体记录一下ID;然后如果某一帧有丢掉的;就更新
|
|||
|
for p in self.results_limb:
|
|||
|
# 检查一下当前帧
|
|||
|
current_a, current_b = current_3d[p['id']]['pelvis'], current_3d[p['id']]['neck']
|
|||
|
if current_a != -1 and current_b != -1:
|
|||
|
assert current_a < len(record_pelvis) and current_b < len(record_neck), 'Index Error {}/{}, {}/{}'.format(current_a, current_b, len(record_pelvis), len(record_neck))
|
|||
|
p['pelvis'] = record_pelvis[current_a]['pelvis']
|
|||
|
p['neck'] = record_neck[current_b]['pelvis']
|
|||
|
elif current_a == -1 and current_b != -1:
|
|||
|
# a没有检测到,但b检测到了
|
|||
|
# 保持相对值
|
|||
|
mywarn('Missing Pelvis')
|
|||
|
p['neck'] = record_neck[current_b]['pelvis']
|
|||
|
pre_direc = p['pelvis'][:, :3] - p['neck'][:, :3]
|
|||
|
p['pelvis'][:, :3] = p['neck'][:, :3] + pre_direc
|
|||
|
# 得把补全的这个点设置回去
|
|||
|
self._results['pelvis'].append({
|
|||
|
'id': p['pelvis_id'],
|
|||
|
'pelvis': p['pelvis'],
|
|||
|
'views': [],
|
|||
|
'frames': [],
|
|||
|
'indices': [],
|
|||
|
'limb_id': p['id'],
|
|||
|
})
|
|||
|
elif current_a != -1 and current_b == -1:
|
|||
|
mywarn('Missing Neck')
|
|||
|
pre_direc = p['neck'][:, :3] - p['pelvis'][:, :3]
|
|||
|
p['pelvis'] = record_pelvis[current_a]['pelvis']
|
|||
|
p['neck'][:, :3] = p['pelvis'][:, :3] + pre_direc
|
|||
|
# 得把补全的这个点设置回去
|
|||
|
self._results['neck'].append({
|
|||
|
'id': p['neck_id'],
|
|||
|
'pelvis': p['neck'],
|
|||
|
'views': [],
|
|||
|
'frames': [],
|
|||
|
'indices': [],
|
|||
|
'limb_id': p['id'],
|
|||
|
})
|
|||
|
else:
|
|||
|
import ipdb; ipdb.set_trace()
|
|||
|
raise NotImplementedError
|
|||
|
# 遍历所有没有跟踪上的组合
|
|||
|
n_pelvis = len(record_pelvis)
|
|||
|
n_neck = len(record_neck)
|
|||
|
dist = np.zeros((n_pelvis, n_neck))
|
|||
|
# TODO: 用2D PAF来关联
|
|||
|
for i in range(n_pelvis):
|
|||
|
if record_pelvis[i]['limb_id'] > -1:
|
|||
|
continue
|
|||
|
for j in range(n_neck):
|
|||
|
if record_neck[j]['limb_id'] > -1:
|
|||
|
continue
|
|||
|
pa = record_pelvis[i]['pelvis']
|
|||
|
pb = record_neck[j]['pelvis']
|
|||
|
length = np.linalg.norm(pa[:, :3] - pb[:, :3])
|
|||
|
dist[i, j] = length
|
|||
|
LIMB_MEAN = 0.489
|
|||
|
dist_to_mean = np.exp(-(dist - LIMB_MEAN)**2/(2*(LIMB_MEAN/3)**2))
|
|||
|
for i in range(n_pelvis):
|
|||
|
if record_pelvis[i]['limb_id'] > -1:
|
|||
|
continue
|
|||
|
for j in range(n_neck):
|
|||
|
if record_neck[j]['limb_id'] > -1:
|
|||
|
continue
|
|||
|
pa = record_pelvis[i]['pelvis']
|
|||
|
pb = record_neck[j]['pelvis']
|
|||
|
if dist_to_mean[i, j] > 0.8:
|
|||
|
# 可以接受
|
|||
|
limb = {
|
|||
|
'id': self.max_id_add,
|
|||
|
'pelvis_id': record_pelvis[i]['id'],
|
|||
|
'neck_id': record_neck[j]['id'],
|
|||
|
'pelvis': pa,
|
|||
|
'neck': pb,
|
|||
|
'frames': [self.frames],
|
|||
|
}
|
|||
|
self.mapping['pelvis'][limb['pelvis_id']] = limb['id']
|
|||
|
self.mapping['neck'][limb['neck_id']] = limb['id']
|
|||
|
self.results_limb.append(limb)
|
|||
|
# 丢掉没有跟踪上的
|
|||
|
results = []
|
|||
|
for limb in self.results_limb:
|
|||
|
k3d = np.vstack([limb['pelvis'], limb['neck']])
|
|||
|
results.append({
|
|||
|
'id': limb['id'],
|
|||
|
'keypoints3d': k3d,
|
|||
|
})
|
|||
|
return {'results': results}
|
|||
|
|
|||
|
|
|||
|
class MatchTorso(MatchBase):
|
|||
|
def __init__(self, mode, cfg):
|
|||
|
super().__init__(mode, cfg)
|
|||
|
self.results = []
|
|||
|
self.frames = -1
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def stack_pafs(pafs):
|
|||
|
dimGroups = [0]
|
|||
|
results = defaultdict(list)
|
|||
|
views_all = []
|
|||
|
for nv, paf in enumerate(pafs):
|
|||
|
src = paf['src']
|
|||
|
dimGroups.append(dimGroups[-1] + src.shape[0])
|
|||
|
views_all.extend([nv for _ in range(src.shape[0])])
|
|||
|
results['src'].append(src)
|
|||
|
results['dst'].append(paf['dst'])
|
|||
|
results['value'].append(paf['value'])
|
|||
|
results = {key: np.concatenate(val, axis=0) for key, val in results.items()}
|
|||
|
return results, np.array(views_all), np.array(dimGroups)
|
|||
|
|
|||
|
def check_used_index(self, info_limb, index, info_joints):
|
|||
|
idx_src = info_limb['src'][index]
|
|||
|
idx_dst = info_limb['dst'][index]
|
|||
|
if info_joints['src']['used_index'][idx_src] > -1:
|
|||
|
return True
|
|||
|
if info_joints['dst']['used_index'][idx_dst] > -1:
|
|||
|
return True
|
|||
|
return False
|
|||
|
|
|||
|
def set_used_index(self, info_limb, index, info_joints, pid):
|
|||
|
idx_src = info_limb['src'][index]
|
|||
|
idx_dst = info_limb['dst'][index]
|
|||
|
info_joints['src']['used_index'][idx_src] = pid
|
|||
|
info_joints['dst']['used_index'][idx_dst] = pid
|
|||
|
return True
|
|||
|
|
|||
|
def triangulate_limb(self, info_limb, info_joints, index, views, cameras):
|
|||
|
flag = True
|
|||
|
k3d_all = []
|
|||
|
dist_all = []
|
|||
|
for key in ['src', 'dst']:
|
|||
|
proposals = []
|
|||
|
for idx in index:
|
|||
|
idx_ = info_limb[key][idx]
|
|||
|
proposals.append(info_joints[key]['detect_undis'][idx_])
|
|||
|
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, proposals)
|
|||
|
dist_all.append(dist_repro)
|
|||
|
k3d_all.append(k3d)
|
|||
|
k3d_all = np.vstack(k3d_all)
|
|||
|
limb_length = np.linalg.norm(k3d_all[1, ..., :3] - k3d_all[0, ..., :3])
|
|||
|
if limb_length < 0.3 or limb_length > 0.7:
|
|||
|
flag = False
|
|||
|
dist_all = np.stack(dist_all)
|
|||
|
dist_all = np.max(dist_all, axis=0)
|
|||
|
return flag, k3d_all, dist_all
|
|||
|
|
|||
|
# def assign_limb_by_2D(self, used_index, distance, pelvis_all, views_all, dimGroups, cameras):
|
|||
|
def assign_limb_by_2D(self, info_limb, info_joints, distance, views_all, dimGroups, cameras):
|
|||
|
def log_index_2d(index2d):
|
|||
|
src = info_limb['src'][index2d]
|
|||
|
dst = info_limb['dst'][index2d]
|
|||
|
src = src - info_joints['src']['dimGroups'][views_all[index2d]]
|
|||
|
dst = dst - info_joints['dst']['dimGroups'][views_all[index2d]]
|
|||
|
return '({}|{}-({},{}))'.format(index2d, views_all[index2d], src, dst)
|
|||
|
# def log_indexes_2d(index2d_):
|
|||
|
# return ', '.join(['({}|{}-{})'.format(index2d, views_all[index2d], index2d-dimGroups[views_all[index2d]]) for index2d in index2d_])
|
|||
|
|
|||
|
INLIER_REPRO = self.cfg.max_pixel
|
|||
|
valid_theta = cameras['valid_theta']
|
|||
|
index = self.sort_with_affinity(distance, dimGroups, INLIER_REPRO)
|
|||
|
visited2d = set()
|
|||
|
results = []
|
|||
|
new_id_start = 10000
|
|||
|
new_max_id = new_id_start
|
|||
|
for row in index:
|
|||
|
visited2d.add(row)
|
|||
|
if self.check_used_index(info_limb, row, info_joints):
|
|||
|
continue
|
|||
|
pid = new_max_id
|
|||
|
new_max_id += 1
|
|||
|
FULL_LOG('[Assign 2D] Check 2D {}'.format(log_index_2d(row)))
|
|||
|
dist_row = distance[row]
|
|||
|
proposal = dist_row.argsort()
|
|||
|
# 尝试初始化
|
|||
|
views = [views_all[row]]
|
|||
|
current = [row]
|
|||
|
for idx2d in proposal:
|
|||
|
# 不满足视角关系
|
|||
|
if not valid_theta[views_all[row], views_all[idx2d]]:
|
|||
|
continue
|
|||
|
# 不满足距离关系
|
|||
|
if dist_row[idx2d] > INLIER_REPRO:
|
|||
|
break
|
|||
|
if self.check_used_index(info_limb, idx2d, info_joints):
|
|||
|
continue
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
# 检查骨长
|
|||
|
flag, k3d, repro_error = self.triangulate_limb(info_limb, info_joints, [row, idx2d], [views_all[row], views_all[idx2d]], cameras)
|
|||
|
length = np.linalg.norm(k3d[1, ..., :3] - k3d[0, ..., :3])
|
|||
|
if flag:
|
|||
|
views.append(views_all[idx2d])
|
|||
|
current.append(idx2d)
|
|||
|
FULL_LOG(f'[Assign 2D] Init with {log_index_2d(idx2d)}, length={length:.4f}')
|
|||
|
break
|
|||
|
else:
|
|||
|
FULL_LOG(f'[Assign 2D] Init failed with {log_index_2d(idx2d)}, length = {length:.4f}')
|
|||
|
if len(current) < 2:
|
|||
|
# 没有找到良好的初始化
|
|||
|
FULL_LOG(f'[Assign 2D] Cannot find a good initialization pair {log_index_2d(row)}')
|
|||
|
continue
|
|||
|
for idx2d in proposal:
|
|||
|
if dist_row[idx2d] > INLIER_REPRO:break
|
|||
|
# 这个视角已经有了 | 这个2D已经被使用过了
|
|||
|
if views_all[idx2d] in views:
|
|||
|
continue
|
|||
|
if self.check_used_index(info_limb, idx2d, info_joints):
|
|||
|
continue
|
|||
|
FULL_LOG('[Assign 2D] Try to add 2D {} => {}'.format(idx2d, log_index_2d(idx2d)))
|
|||
|
# 尝试三角化并进行重投影
|
|||
|
new = current + [idx2d]
|
|||
|
views_new = views + [views_all[idx2d]]
|
|||
|
flag_limb, k3d, dist_repro = self.triangulate_limb(info_limb, info_joints, new, views_new, cameras)
|
|||
|
# flag_depth = (depth > 0.5).all()
|
|||
|
flag_depth = True
|
|||
|
flag_repro = dist_repro.mean() < INLIER_REPRO
|
|||
|
flag = flag_repro & flag_depth & flag_limb
|
|||
|
FULL_LOG('[Assign 2D] repro: \n{}'.format(LOG_ARRAY(dist_repro[None])))
|
|||
|
if flag:
|
|||
|
# 添加
|
|||
|
current = new
|
|||
|
views = views_new
|
|||
|
self.set_used_index(info_limb, idx2d, info_joints, pid)
|
|||
|
FULL_LOG('[Assign 2D] {} => {}'.format(idx2d, current))
|
|||
|
else:
|
|||
|
FULL_LOG('[Assign 2D] Failed')
|
|||
|
new = None
|
|||
|
views_new = None
|
|||
|
if len(views) < self.cfg.min_views: #不足以添加
|
|||
|
continue
|
|||
|
flag_limb, k3d, dist_repro = self.triangulate_limb(info_limb, info_joints, current, views, cameras)
|
|||
|
final_id = self.max_id
|
|||
|
self.max_id += 1
|
|||
|
results.append({
|
|||
|
'id': final_id,
|
|||
|
'torso': k3d,
|
|||
|
'keypoints3d': k3d, # 这里保存两个,这样即使后面覆盖掉了keypoints3d还能取出pelvis来
|
|||
|
'views': views,
|
|||
|
'select': current,
|
|||
|
# 'indices': select - dimGroups[views_all[select]],
|
|||
|
'frames': [self.frames],
|
|||
|
})
|
|||
|
for res in results:
|
|||
|
text = f''' - Init {res['id']} with {len(res['views'])} views
|
|||
|
views: {res['views']}
|
|||
|
id : {res['select']}'''
|
|||
|
LOG(text)
|
|||
|
print(text)
|
|||
|
return results
|
|||
|
|
|||
|
def calculte_distance_src_dst(self, src, dst, cameras):
|
|||
|
info = {}
|
|||
|
for name, detect in zip(['src', 'dst'], [src, dst]):
|
|||
|
detect_all, views_all, dimGroups = self.stack_array(detect)
|
|||
|
# Undistort
|
|||
|
detect_undis = self.undistort(detect, cameras)
|
|||
|
detect_undis_all, _, _ = self.stack_array(detect_undis)
|
|||
|
# # distance3D => 2D
|
|||
|
# distance3d_2d = self.calculate_repro(self.results, pelvis_all, cameras, views_all)
|
|||
|
# FULL_LOG('distance3d_2d: {}'.format(LOG_ARRAY(distance3d_2d)))
|
|||
|
# distance: triangulate and project
|
|||
|
distance2d_2d = self.calculate_distance(detect_undis, cameras, dimGroups)
|
|||
|
info[name] = {
|
|||
|
'detect_all': detect_all,
|
|||
|
'views_all': views_all,
|
|||
|
'dimGroups': dimGroups,
|
|||
|
'distance2d_2d': distance2d_2d,
|
|||
|
'detect_undis': detect_undis_all,
|
|||
|
'used_index': np.zeros((dimGroups[-1]), dtype=int) - 1
|
|||
|
}
|
|||
|
return info
|
|||
|
|
|||
|
def get_valid_limbs(self, pafs, info_joint):
|
|||
|
nViews = len(pafs)
|
|||
|
valid_paf = []
|
|||
|
for nv in range(nViews):
|
|||
|
paf = pafs[nv]
|
|||
|
src, dst = np.where(paf > 0.3)
|
|||
|
value = paf[src, dst]
|
|||
|
valid_paf.append({
|
|||
|
'src': src + info_joint['src']['dimGroups'][nv],
|
|||
|
'dst': dst + info_joint['dst']['dimGroups'][nv],
|
|||
|
'value': value,
|
|||
|
'view': nv,
|
|||
|
})
|
|||
|
results, views_all, dimGroups = self.stack_pafs(valid_paf)
|
|||
|
return results, views_all, dimGroups
|
|||
|
|
|||
|
def calculate_distance_limb(self, results, cameras, dimGroups, distance_src, distance_dst):
|
|||
|
src_idx, dst_idx = results['src'], results['dst']
|
|||
|
src_idx0, src_idx1 = np.meshgrid(src_idx, src_idx)
|
|||
|
dist_src_src = distance_src[src_idx0, src_idx1]
|
|||
|
dst_idx0, dst_idx1 = np.meshgrid(dst_idx, dst_idx)
|
|||
|
dist_dst_dst = distance_dst[dst_idx0, dst_idx1]
|
|||
|
# TODO: 考虑每个视角的 limb的置信度,joint的置信度
|
|||
|
dist_spatial = np.maximum(dist_src_src, dist_dst_dst)
|
|||
|
return dist_spatial
|
|||
|
|
|||
|
def __call__(self, cameras, openpose, openpose_paf):
|
|||
|
"""
|
|||
|
cameras: {K, R, T, dist, P}
|
|||
|
"""
|
|||
|
self.frames += 1
|
|||
|
pelvis_id = 8
|
|||
|
neck_id = 1
|
|||
|
nViews = len(openpose)
|
|||
|
LOG('>>> Current frames: {}'.format(self.frames))
|
|||
|
pelvis = [openpose[v][pelvis_id] for v in range(len(openpose))]
|
|||
|
neck = [openpose[v][neck_id] for v in range(len(openpose))]
|
|||
|
info_joint = self.calculte_distance_src_dst(pelvis, neck, cameras)
|
|||
|
pafs = [openpose_paf[v][(pelvis_id, neck_id)] for v in range(len(openpose_paf))]
|
|||
|
info_limb, views_all, dimGroups = self.get_valid_limbs(pafs, info_joint)
|
|||
|
distance2d_2d = self.calculate_distance_limb(info_limb, cameras, dimGroups,
|
|||
|
info_joint['src']['distance2d_2d'], info_joint['dst']['distance2d_2d'])
|
|||
|
results = self.assign_limb_by_2D(info_limb, info_joint, distance2d_2d, views_all, dimGroups, cameras)
|
|||
|
results.sort(key=lambda x: -len(x['views']))
|
|||
|
results = results[:self.cfg.max_person]
|
|||
|
# if self.mode == 'track':
|
|||
|
# self.results = results
|
|||
|
results.sort(key=lambda x:x['id'])
|
|||
|
|
|||
|
# TODO: 增加结果的NMS检查和合并
|
|||
|
if len(results) == 0:
|
|||
|
keypoints3d = np.zeros((0, 2, 3))
|
|||
|
else:
|
|||
|
keypoints3d = np.stack([d['keypoints3d'] for d in results])
|
|||
|
return {'keypoints3d': keypoints3d, 'results': results}
|
|||
|
|
|||
|
class TriangulateAll:
|
|||
|
def __init__(self, mode) -> None:
|
|||
|
self.mode = mode
|
|||
|
|
|||
|
def __call__(self, bbox, keypoints, cameras, results):
|
|||
|
for res in results:
|
|||
|
bbox_, k2d, Pall = [], [], []
|
|||
|
for i in range(len(res['views'])):
|
|||
|
v = res['views'][i]
|
|||
|
bbox_.append(bbox[v][res['indices'][i]])
|
|||
|
k2d.append(keypoints[v][res['indices'][i]])
|
|||
|
Pall.append(cameras['P'][v])
|
|||
|
k2d = np.stack(k2d)
|
|||
|
Pall = np.stack(Pall)
|
|||
|
bbox_ = np.stack(bbox_)
|
|||
|
if self.mode == 'naive':
|
|||
|
k3d = batch_triangulate(k2d, Pall)
|
|||
|
elif self.mode == 'robust':
|
|||
|
from easymocap.mytools.triangulator import iterative_triangulate
|
|||
|
k3d, k2d = iterative_triangulate(k2d, Pall,
|
|||
|
dist_max=25)
|
|||
|
res['keypoints3d'] = k3d
|
|||
|
res['keypoints2d'] = k2d
|
|||
|
res['bbox'] = bbox_
|
|||
|
return {'keypoints3d': np.stack([d['keypoints3d'] for d in results]), 'results': results}
|
|||
|
|
|||
|
class MatchHandLR:
|
|||
|
def __init__(self, mode, cfg):
|
|||
|
self.model_l = MatchRoot(mode,cfg)
|
|||
|
self.model_r = MatchRoot(mode,cfg)
|
|||
|
def __call__(self, pelvis_l, pelvis_r, cameras):
|
|||
|
ret = {}
|
|||
|
outl = self.model_l(pelvis_l, cameras)
|
|||
|
outr = self.model_r(pelvis_r, cameras)
|
|||
|
for k in outl.keys():
|
|||
|
ret[k+'_l'] = outl[k]
|
|||
|
for k in outr.keys():
|
|||
|
ret[k+'_r'] = outr[k]
|
|||
|
return ret
|
|||
|
|
|||
|
class MatchBodyHand:
|
|||
|
def __init__(self, mode) -> None:
|
|||
|
pass
|
|||
|
|
|||
|
def projectPoints(self, X, K, R, t, Kd):
|
|||
|
x = R @ X + t
|
|||
|
x[0:2,:] = x[0:2,:]/x[2,:]#到归一化平面
|
|||
|
r = x[0,:]*x[0,:] + x[1,:]*x[1,:]
|
|||
|
|
|||
|
x[0,:] = x[0,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[2]*x[0,:]*x[1,:] + Kd[3]*(r + 2*x[0,:]*x[0,:])
|
|||
|
x[1,:] = x[1,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[3]*x[0,:]*x[1,:] + Kd[2]*(r + 2*x[1,:]*x[1,:])
|
|||
|
x[0,:] = K[0,0]*x[0,:] + K[0,1]*x[1,:] + K[0,2]
|
|||
|
x[1,:] = K[1,0]*x[0,:] + K[1,1]*x[1,:] + K[1,2]
|
|||
|
return x
|
|||
|
def match3d_step(self, results, keypoints3d,wristid):
|
|||
|
match_results=(np.zeros((len(keypoints3d)),dtype=np.int)-1).tolist()
|
|||
|
vis = (np.zeros((len(keypoints3d)))-1).tolist()
|
|||
|
dis = []
|
|||
|
for i in range(len(keypoints3d)):
|
|||
|
for j in range(len(results)):
|
|||
|
dis.append([i,j,((keypoints3d[i][wristid][:3]-results[j]['pelvis'][0,:3].reshape(-1))**2).sum()])
|
|||
|
if(len(dis)>0):
|
|||
|
dis = np.array(dis)
|
|||
|
dis = dis[np.argsort(dis[:,-1])]
|
|||
|
for i in range(len(dis)):
|
|||
|
bid =int(dis[i][0])
|
|||
|
hid =int(dis[i][1])
|
|||
|
if vis[bid]>=0 or hid in vis:
|
|||
|
continue
|
|||
|
if dis[i][2]>0.5:
|
|||
|
continue
|
|||
|
tmp_results = results[hid].copy()
|
|||
|
# tmp_results['dis_bh'] = dis[i][2]
|
|||
|
match_results[bid]=tmp_results
|
|||
|
vis[bid]=hid
|
|||
|
return match_results
|
|||
|
def match2d_step(self, bbox_hand, keypoints3d, wristid, results_match_l, cameras):
|
|||
|
lack_body_id=[]
|
|||
|
mv_use_hand=[]
|
|||
|
for i in range(cameras['R'].shape[0]):
|
|||
|
mv_use_hand.append([])
|
|||
|
|
|||
|
for i in range(len(results_match_l)):
|
|||
|
if isinstance(results_match_l[i],int) and results_match_l[i]==-1:
|
|||
|
lack_body_id.append(i)
|
|||
|
else:
|
|||
|
mv = results_match_l[i]['views']#[cid]
|
|||
|
indices = results_match_l[i]['indices']#[cid]
|
|||
|
for j in range(len(mv)):
|
|||
|
mv_use_hand[mv[j]].append(indices[j])
|
|||
|
|
|||
|
wrist3dkpts = keypoints3d[lack_body_id,wristid,:3] #(nperson,3)每个人呢的wrist关键点
|
|||
|
dis = []
|
|||
|
for nv in range(len(bbox_hand)):
|
|||
|
for hid in range(len(bbox_hand[nv])):
|
|||
|
if hid in mv_use_hand[nv]:
|
|||
|
continue
|
|||
|
if bbox_hand[nv][hid][-1]==0:
|
|||
|
continue
|
|||
|
bx_ = bbox_hand[nv][hid]
|
|||
|
k2d = np.array([(bx_[0]+bx_[2])/2,(bx_[1]+bx_[3])/2,bx_[-1]])
|
|||
|
K = cameras['K'][nv]
|
|||
|
Kd = cameras['dist'][nv].reshape(5)
|
|||
|
R = cameras['R'][nv]
|
|||
|
t = cameras['T'][nv]
|
|||
|
wristkpts2d = self.projectPoints(wrist3dkpts.T[0:3,:], K, R, t, Kd).T
|
|||
|
for bid in range(len(lack_body_id)):
|
|||
|
D = ((wristkpts2d[bid][:2]-k2d[:2])**2).sum()
|
|||
|
dis.append([D,lack_body_id[bid],nv,hid]) # 误差,3d身体id ,视角编号 ,2d图像上手box id
|
|||
|
|
|||
|
if(len(dis)>0):
|
|||
|
vis = (np.zeros((len(keypoints3d)))-1).tolist()
|
|||
|
dis = np.array(dis)
|
|||
|
dis = dis[np.argsort(dis[:,0])]
|
|||
|
# TODO 判断dis大小,将dis过大的删除掉
|
|||
|
for i in range(len(dis)):
|
|||
|
bid = int(dis[i][1])
|
|||
|
nv = int(dis[i][2])
|
|||
|
hid = int(dis[i][3])
|
|||
|
if vis[bid]>=0 or hid in vis or results_match_l[bid]!=-1:
|
|||
|
continue
|
|||
|
if dis[i][0]>50: #人和手的在2D中距离
|
|||
|
continue
|
|||
|
results_match_l[bid]={
|
|||
|
'views': np.array([nv]),
|
|||
|
'indices': np.array([hid]), # ?indices是在对应的视角下第几个Box
|
|||
|
# 'dis_bh': dis[i][0]
|
|||
|
}
|
|||
|
|
|||
|
vis[bid]=hid
|
|||
|
return results_match_l
|
|||
|
|
|||
|
|
|||
|
def __call__(self, results_l, results_r, keypoints3d, cameras, bbox_handl, bbox_handr):
|
|||
|
'''
|
|||
|
results: list nhand
|
|||
|
keypoints3d: (nperson,25,3)
|
|||
|
'''
|
|||
|
results_match_l = self.match3d_step(results_l, keypoints3d, 7)
|
|||
|
results_match_r = self.match3d_step(results_r, keypoints3d, 4)
|
|||
|
|
|||
|
if(-1 in results_match_l):
|
|||
|
# TODO: dis为空,则表示没有身体,或者所有视角都未检测到手,尝试启动单视角检测
|
|||
|
# TODO: dis不为空,也有可能有的身体缺少与手的匹配,可以尝试单视角检测,或者之后尝试补全。
|
|||
|
# 单视角匹配,从匹配列表中找出-1的部分,将其投影到多视角中,在多视角找出未被选择的box,然后匹配,记录在a
|
|||
|
results_match_l = self.match2d_step(bbox_handl, keypoints3d, 7, results_match_l, cameras)
|
|||
|
if(-1 in results_match_r):
|
|||
|
results_match_r = self.match2d_step(bbox_handr, keypoints3d, 4, results_match_r, cameras)
|
|||
|
return {'match3d_l':results_match_l ,'match3d_r':results_match_r}
|