EasyMocap/myeasymocap/operations/match.py
2023-06-19 16:39:27 +08:00

1075 lines
49 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
import cv2
from easymocap.mytools.camera_utils import Undistort
from easymocap.mytools.debug_utils import mywarn
from .triangulate import batch_triangulate, project_wo_dist
from collections import defaultdict
LOG_FILE = 'log.txt'
LOG_LEVEL = 0 #2
FULL_LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 1 else (lambda x: None)
LOG = (lambda x: print(x, file=open(LOG_FILE, 'a'))) if LOG_LEVEL > 0 else (lambda x: None)
def LOG_ARRAY(array2d, format='{:>8.2f} '):
res = ''
for i in range(array2d.shape[0]):
for j in range(array2d.shape[1]):
res += format.format(array2d[i, j])
res += '\n'
return res
class MatchBase:
def __init__(self, mode, cfg) -> None:
self.mode = mode
self.cfg = cfg
print('[{}]'.format(self.__class__.__name__))
print(self.cfg)
self.max_id = 0
def make_grids(self, grids, grids_step):
grid_x = np.arange(grids[0][0], grids[1][0], grids_step)
grid_y = np.arange(grids[0][1], grids[1][1], grids_step)
grid_z = np.arange(grids[0][2], grids[1][2], grids_step)
grid_xyz = np.meshgrid(grid_x, grid_y, grid_z)
grid_xyz = np.stack(grid_xyz, axis=-1)
grids = grid_xyz.reshape(-1, 3)
print('[{}] Generate {} => {} grids'.format(self.__class__.__name__, grid_xyz.shape, grids.shape[0]))
return grids
@staticmethod
def stack_array(arrays):
dimGroups = [0]
results = []
views_all = []
for nv, array in enumerate(arrays):
dimGroups.append(dimGroups[-1] + array.shape[0])
views_all.extend([nv for _ in range(array.shape[0])])
results.append(array)
results = np.concatenate(results, axis=0)
return results, np.array(views_all), np.array(dimGroups)
@staticmethod
def undistort(points, cameras):
nViews = len(points)
pelvis_undis = []
for nv in range(nViews):
camera = {key:cameras[key][nv] for key in ['R', 'T', 'K', 'dist']}
if points[nv].shape[0] > 0:
pelvis = Undistort.points(points[nv], camera['K'], camera['dist'])
else:
pelvis = points[nv].copy()
pelvis_undis.append(pelvis)
return pelvis_undis
@staticmethod
def distance_by_triangulate(p_src, p_dst, camera_src, camera_dst, ranges):
dist = np.zeros((p_src.shape[0], p_dst.shape[0]), dtype=np.float32)
# generate (m, n) points and distance
idx_src = np.arange(p_src.shape[0])
idx_dst = np.arange(p_dst.shape[0])
idx_src, idx_dst = np.meshgrid(idx_src, idx_dst)
idx_src = idx_src.reshape(-1)
idx_dst = idx_dst.reshape(-1)
p_src = p_src[idx_src]
p_dst = p_dst[idx_dst]
keypoints = np.stack([p_src, p_dst], axis=0)
keypoints_flat = keypoints.reshape(keypoints.shape[0], -1, keypoints.shape[-1])
P = np.stack([camera_src['P'], camera_dst['P']], axis=0)
k3d = batch_triangulate(keypoints_flat, P, min_view=2)
repro, depth = project_wo_dist(k3d, P)
dist_repro = np.linalg.norm(repro[..., :2] - keypoints_flat[..., :2], axis=-1).mean(axis=0)
valid = (k3d[:, 0] > ranges[0][0]) & (k3d[:, 0] < ranges[1][0]) & \
(k3d[:, 1] > ranges[0][1]) & (k3d[:, 1] < ranges[1][1]) & \
(k3d[:, 2] > ranges[0][2]) & (k3d[:, 2] < ranges[1][2])
dist_repro[~valid] = 1e5
dist[idx_src, idx_dst] = dist_repro
return dist
def calculate_distance(self, pelvis_undis, cameras, dimGroups):
DIST_MAX = 10000.
distance = np.zeros((dimGroups[-1], dimGroups[-1]), dtype=np.float32) + DIST_MAX
nViews = len(dimGroups) - 1
ray0 = np.array([0, 0, 1], dtype=np.float32).reshape(1, 3, 1)
ray_cam = cameras['R'].transpose(0, 2, 1) @ ray0
ray_cam = ray_cam[..., 0]
cos_theta = np.sum(ray_cam[:, None] * ray_cam[None], axis=-1)
theta = np.rad2deg(np.arccos(np.clip(cos_theta, -1., 1.)))
valid_theta = np.logical_and(theta > self.cfg.valid_angle[0], theta < self.cfg.valid_angle[1])
for src in range(nViews - 1):
for dst in range(src + 1, nViews):
# TODO: 计算两个射线的夹角
# 这里对于不相邻或者对角的视角,我们直接跳过距离的计算
# 这样后面在进行初始化的时候就无法挑到两个比较接近的视角了
# if not valid_theta[src, dst]:
# continue
p_src = pelvis_undis[src][:, None] #(m, 2)
p_dst = pelvis_undis[dst][:, None] #(n, 2)
if p_src.shape[0] == 0 or p_dst.shape[0] == 0:
continue
camera_src = {key:cameras[key][src] for key in ['R', 'T', 'K', 'dist', 'P']}
camera_dst = {key:cameras[key][dst] for key in ['R', 'T', 'K', 'dist', 'P']}
dist = self.distance_by_triangulate(p_src, p_dst, camera_src, camera_dst, self.cfg.valid_ranges)
distance[dimGroups[src]:dimGroups[src+1], dimGroups[dst]:dimGroups[dst+1]] = dist
distance[dimGroups[dst]:dimGroups[dst+1], dimGroups[src]:dimGroups[src+1]] = dist.T
cameras['valid_theta'] = valid_theta
return distance
def calculate_repro(self, results, pelvis_undis, cameras, views_all):
nViews = len(cameras['P'])
n3D = len(results)
distance = np.zeros((pelvis_undis.shape[0], n3D), dtype=np.float32)
if n3D == 0:
return distance
keypoints3d = np.stack([d['pelvis'] for d in results], axis=0)
Pall = np.stack([cameras['P'][nv] for nv in range(nViews)])
# k2d: (nViews, nPerson, nPoints, 3)
k2d, depth = project_wo_dist(keypoints3d, Pall, einsum='vab,pkb->vpka')
repro_select = k2d[views_all]
# dist: (nPoints, n3D)
dist = np.linalg.norm(repro_select[..., :2] - pelvis_undis[:, None, None, :2], axis=-1).mean(axis=2)
# for nv in range(nViews):
return dist
def triangulate_and_repro(self, cameras, views, proposals):
Pall = np.stack([cameras['P'][v] for v in views])
kpts = np.stack(proposals)
kpts = kpts[:, None]
k3d = batch_triangulate(kpts, Pall)
k2d, depth = project_wo_dist(k3d, Pall)
dist_repro = np.linalg.norm(k2d[..., :2] - kpts[..., :2], axis=-1).mean(axis=-1)
return k3d, dist_repro, depth
@staticmethod
def check_is_best_3d_of_2d(distance, idx3d, idx2d, visited3d):
isbest3d = True
distance_2d = distance[idx2d]
for i3d in distance_2d.argsort():
if i3d != idx3d and i3d not in visited3d:
isbest3d = False
break
elif i3d == idx3d:
break
return isbest3d
@staticmethod
def sort_with_affinity(distance, dimGroups, INLIER_REPRO):
nViews = len(dimGroups) - 1
# 排序计算affinity
count_rows = np.zeros((dimGroups[-1]), dtype=int)
distance_rows = np.zeros((dimGroups[-1]))
for nv in range(nViews):
if dimGroups[nv] == dimGroups[nv+1]:continue
valid_view = np.clip((distance[:, dimGroups[nv]:dimGroups[nv+1]] < INLIER_REPRO).sum(axis=-1), 0, 1)
count_rows += valid_view # 最多也只累计一个
distance_rows += valid_view * (distance[:, dimGroups[nv]:dimGroups[nv+1]].min(axis=-1))
index = list(range(dimGroups[-1]))
# index.sort(key=lambda x: (-count_rows[x], distance_rows[x]))
# sort with 2D confidence
# index.sort(key=lambda x: -pelvis_all[x, 2])
# sort with valid matches
# 选择2D的依据改为根据有效的2D重投影距离的数量
valid_count = (distance < INLIER_REPRO * 2).sum(axis=0)
index = (-valid_count).argsort()
return index
def assign_by_3D(self, used_index, distance, pelvis_all, views_all, dimGroups, cameras):
INLIER_TRACK = self.cfg.track_pixel
INLIER_REPRO = self.cfg.max_pixel
# 使用前一帧的可见性来进行排序
index_3d = list(range(len(self.results)))
index_3d.sort(key=lambda x:-len(self.results[x]['views']))
results = []
visited3d = set()
for idx3d in index_3d:
visited3d.add(idx3d)
self.results[idx3d]['tracked'] = False
pid = self.results[idx3d]['id']
dist = distance[:, idx3d]
FULL_LOG('[Assign 3D] Check 3D {}'.format(pid))
FULL_LOG('[Assign 3D] Distance {}'.format(LOG_ARRAY(dist[None])))
current = []
views = []
proposal = dist.argsort()
# 初始化一下:
for idx2d in proposal:
# 不满足视角关系
# if not valid_theta[views_all[row], views_all[idx2d]]:
# continue
# 不满足距离关系
if dist[idx2d] > INLIER_TRACK:
break
if used_index[idx2d] > -1:
continue
if views_all[idx2d] in views:
continue
if not self.check_is_best_3d_of_2d(distance, idx3d, idx2d, visited3d):
continue
if len(current) == 1: # 已经有一个了,如果还要再添加,那么需要判断一下三角化出来的距离关系
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views + [views_all[idx2d]], current + [pelvis_all[idx2d]])
_dist = np.linalg.norm(k3d[:, :3] - self.results[idx3d]['pelvis'][:, :3], axis=-1).mean()
if _dist > self.cfg.max_movement:
continue
# 找到了合理的pair作为一个良好的初始化
current.append(pelvis_all[idx2d])
views.append(views_all[idx2d])
used_index[idx2d] = pid
FULL_LOG(f'[Assign 3D] First track 3D {pid} with {idx2d}, view ({views_all[idx2d]})')
if len(current) == 2:
break
if len(current) < 2:
# 没有找到良好的初始化
continue
for idx2d in proposal:
# 这个视角已经有了 这个2D已经被使用过了
if views_all[idx2d] in views:
continue
if used_index[idx2d] > -1:
continue
if not self.check_is_best_3d_of_2d(distance, idx3d, idx2d, visited3d):
continue
# 尝试添加
FULL_LOG('[Assign 3D] 3D {} add {}, distance={:.2f}'.format(pid, idx2d, dist[idx2d]))
new = current + [pelvis_all[idx2d]]
views_new = views + [views_all[idx2d]]
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_new, new)
_dist = np.linalg.norm(k3d[:, :3] - self.results[idx3d]['pelvis'][:, :3], axis=-1).mean()
flag_movement = _dist < self.cfg.max_movement
flag_depth = (depth > 0.5).all()
flag_repro = dist_repro.mean() < INLIER_REPRO
flag = flag_repro & flag_depth
FULL_LOG('[Assign 3D] repro: \n{}, \ndepth: \n{}'.format(LOG_ARRAY(dist_repro[None]), LOG_ARRAY(depth.T)))
if flag:
# 添加
current = new
views = views_new
used_index[idx2d] = pid
FULL_LOG('[Assign 3D] {} => {}'.format(idx2d, np.where(used_index == pid)[0]))
else:
FULL_LOG('[Assign 3D] Failed')
# check the results
if len(views) < self.cfg.min_views: #不足以添加
continue
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, current)
select = np.where(used_index == pid)[0]
results.append({
'id': pid,
'pelvis': k3d,
'keypoints3d': k3d, # 这里保存两个这样即使后面覆盖掉了keypoints3d还能取出pelvis来
'views': views_all[select],
'select': select,
'indices': select - dimGroups[views_all[select]],
'frames': self.results[idx3d]['frames'] + [self.frames]
})
self.results[idx3d]['tracked'] = True
for res in results:
text = f''' - Track {res['id']} with {len(res['views'])} views
views: {' '.join(list(map(lambda x:'{:2d}'.format(x), res['views'])))}
id : {' '.join(list(map(lambda x:'{:2d}'.format(x), res['select'])))}'''
LOG(text)
print(text)
for res in self.results:
if not res['tracked']:
mywarn('- 3D {} not tracked'.format(res['id']))
# 对于没有被跟踪到的:检查是否有两个距离很小的视角
# 如果有并且被其他人占用了那么把这个2D也给他在极端情况下有的视角下会有人恰好被另一个人挡住
print(res)
if len(res['frames']) < 3:
mywarn('- 3D {} not tracked, but only {} frames'.format(res['id'], len(res['frames'])))
else:
pass
# import ipdb; ipdb.set_trace()
return results
def find_initial_3_pair(self, distance, pelvis_all, views_all, dimGroups):
# 生成所有可能的候选的3个pair
index_0 = np.arange(pelvis_all.shape[0])
index_0 = np.stack(np.meshgrid(index_0, index_0, index_0), axis=-1).reshape(-1, 3)
flag_order = (index_0[:, 0] < index_0[:, 1]) & (index_0[:, 1] < index_0[:, 2])
# flag_views = (views_all[index_0[:, 0]] != views_all[index_0[:, 1]]) & \
# (views_all[index_0[:, 1]] != views_all[index_0[:, 2]]) & \
# (views_all[index_0[:, 0]] != views_all[index_0[:, 2]])
valid_index = index_0[flag_order]
distance_circle = distance[valid_index[:, 0], valid_index[:, 1]] + \
distance[valid_index[:, 1], valid_index[:, 2]] + \
distance[valid_index[:, 2], valid_index[:, 0]]
distance_circle = distance_circle / 3
valid_dist = distance_circle < self.cfg.max_pixel
valid_ = valid_index[valid_dist]
dist_sum = distance_circle[valid_dist]
arg_idx = dist_sum.argsort()
FULL_LOG('[Assign 2D] find {} 3 pair: '.format(len(arg_idx)))
return valid_[arg_idx], dist_sum[arg_idx]
def try_to_add_index(self, dist_row, cameras, pelvis_all, views_all, dimGroups,
used_index, views, current, pid):
INLIER_REPRO = self.cfg.max_pixel
proposal = dist_row.argsort()
indices = []
for idx2d in proposal:
if dist_row[idx2d] > INLIER_REPRO:
break
# 这个视角已经有了 这个2D已经被使用过了
if views_all[idx2d] in views:
continue
if used_index[idx2d] > -1:
continue
FULL_LOG('[Assign 2D] Try to add {}, distance={:.2f}'.format(idx2d, dist_row[idx2d]))
# 尝试三角化并进行重投影
new = current + [pelvis_all[idx2d]]
views_new = views + [views_all[idx2d]]
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_new, new)
flag_depth = (depth > 0.5).all()
flag_repro = dist_repro.mean() < INLIER_REPRO
flag = flag_repro & flag_depth
FULL_LOG('[Assign 2D] repro: \n{}, \ndepth: \n{}'.format(LOG_ARRAY(dist_repro[None]), LOG_ARRAY(depth.T)))
if flag:
# 添加
current.append(pelvis_all[idx2d])
views.append(views_all[idx2d])
indices.append(idx2d)
FULL_LOG('[Assign 2D] Add {}'.format(idx2d ))
else:
FULL_LOG('[Assign 2D] Failed')
return indices
def assign_by_2D_3pair(self, results, distance, dimGroups, used_index, valid_3pairs, views_all, pelvis_all, cameras):
INLIER_REPRO = self.cfg.max_pixel
for ipair, valid_3pair in enumerate(valid_3pairs):
# 先检查是否被使用过了
if (used_index[valid_3pair] > -1).any():
continue
# 先检查是否是合理的
FULL_LOG('[Assign 2D] Check 3 pair {}'.format(valid_3pair))
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_all[valid_3pair], pelvis_all[valid_3pair])
flag_depth = (depth > 0.5).all()
flag_repro = dist_repro.mean() < INLIER_REPRO
# TODO: flag range
flag = flag_repro & flag_depth
if not flag: continue
# 添加其余的点
pid = self.max_id
self.max_id += 1
dist_pair = distance[valid_3pair].mean(axis=0)
views = views_all[valid_3pair].tolist()
current = [pelvis_all[i] for i in valid_3pair]
indices = self.try_to_add_index(dist_pair, cameras, pelvis_all, views_all, dimGroups,
used_index, views, current, pid)
select = np.array(valid_3pair.tolist() + indices)
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, current)
used_index[select] = pid
results.append({
'id': pid,
'pelvis': k3d,
'keypoints3d': k3d, # 这里保存两个这样即使后面覆盖掉了keypoints3d还能取出pelvis来
'views': views_all[select],
'select': select,
'indices': select - dimGroups[views_all[select]],
'frames': [self.frames],
})
return results
def assign_by_2D(self, used_index, distance, pelvis_all, views_all, dimGroups, cameras):
def log_index_2d(index2d):
return '({}|{}-{})'.format(index2d, views_all[index2d], index2d-dimGroups[views_all[index2d]])
def log_indexes_2d(index2d_):
return ', '.join(['({}|{}-{})'.format(index2d, views_all[index2d], index2d-dimGroups[views_all[index2d]]) for index2d in index2d_])
INLIER_REPRO = self.cfg.max_pixel
new_id_start = 10000
new_max_id = new_id_start
valid_3pairs, dist_3pair = self.find_initial_3_pair(distance, pelvis_all, views_all, dimGroups=dimGroups)
results = []
if valid_3pairs.sum() > 0:
results = self.assign_by_2D_3pair(results, distance, dimGroups, used_index, valid_3pairs, views_all, pelvis_all, cameras)
valid_theta = cameras['valid_theta']
nViews = len(dimGroups)-1
# 排序计算affinity
count_rows = np.zeros((dimGroups[-1]), dtype=int)
distance_rows = np.zeros((dimGroups[-1]))
for nv in range(nViews):
if dimGroups[nv] == dimGroups[nv+1]:continue
valid_view = np.clip((distance[:, dimGroups[nv]:dimGroups[nv+1]] < INLIER_REPRO).sum(axis=-1), 0, 1)
count_rows += valid_view # 最多也只累计一个
distance_rows += valid_view * (distance[:, dimGroups[nv]:dimGroups[nv+1]].min(axis=-1))
index = list(range(dimGroups[-1]))
# index.sort(key=lambda x: (-count_rows[x], distance_rows[x]))
# sort with 2D confidence
# index.sort(key=lambda x: -pelvis_all[x, 2])
# sort with valid matches
# 选择2D的依据改为根据有效的2D重投影距离的数量
valid_count = (distance < INLIER_REPRO * 2).sum(axis=0)
index = (-valid_count).argsort()
visited2d = set()
for row in index:
visited2d.add(row)
if used_index[row] > -1:continue
FULL_LOG('[Assign 2D] Check 2D {}'.format(log_index_2d(row)))
pid = new_max_id
new_max_id += 1
dist_row = distance[row]
proposal = dist_row.argsort()
current = [pelvis_all[row]]
views = [views_all[row]]
used_index[row] = pid
# 初始化一下:
for idx2d in proposal:
# 不满足视角关系
if not valid_theta[views_all[row], views_all[idx2d]]:
continue
# 不满足距离关系
if dist_row[idx2d] > INLIER_REPRO:
break
if used_index[idx2d] > -1:
continue
if views_all[idx2d] in views:
continue
# self.triangulate_and_repro(cameras, [views_all[18], views_all[34]], [pelvis_all[18], pelvis_all[34]])
# 2D的时候不能选择是最好的因为2D可能还有其他视角的在
# 顶多判断一下,是对于这个视角来说最好的
# if not self.check_is_best_3d_of_2d(distance, row, idx2d, visited2d):
# continue
# 找到了合理的pair作为一个良好的初始化
current.append(pelvis_all[idx2d])
views.append(views_all[idx2d])
used_index[idx2d] = pid
FULL_LOG(f'[Assign 2D] Init with {log_index_2d(idx2d)}')
break
if len(current) < 2:
# 没有找到良好的初始化
continue
for idx2d in proposal:
if dist_row[idx2d] > INLIER_REPRO:
break
# 这个视角已经有了 这个2D已经被使用过了
if views_all[idx2d] in views:
continue
if used_index[idx2d] > -1:
continue
# if not self.check_is_best_3d_of_2d(distance, row, idx2d, visited2d):
# continue
# 尝试三角化并进行重投影
new = current + [pelvis_all[idx2d]]
views_new = views + [views_all[idx2d]]
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views_new, new)
flag_depth = (depth > 0.5).all()
flag_repro = dist_repro.mean() < INLIER_REPRO
flag = flag_repro & flag_depth
FULL_LOG('[Assign 2D] repro: \n{}, \ndepth: \n{}'.format(LOG_ARRAY(dist_repro[None]), LOG_ARRAY(depth.T)))
if flag:
# 添加
current = new
views = views_new
used_index[idx2d] = pid
_current_id = np.where(used_index == pid)[0]
FULL_LOG('[Assign 2D] {} => {}'.format(idx2d, log_indexes_2d(_current_id)))
else:
FULL_LOG('[Assign 2D] Failed')
if len(views) < self.cfg.min_views_init: #不足以添加
continue
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, current)
select = np.where(used_index == pid)[0]
final_id = self.max_id
self.max_id += 1
used_index[select] = final_id
results.append({
'id': final_id,
'pelvis': k3d,
'keypoints3d': k3d, # 这里保存两个这样即使后面覆盖掉了keypoints3d还能取出pelvis来
'views': views_all[select],
'select': select,
'indices': select - dimGroups[views_all[select]],
'frames': [self.frames],
})
for res in results:
text = f''' - Init {res['id']} with {len(res['views'])} views
views: {res['views']}
id : {res['select']}'''
LOG(text)
print(text)
return results
class MatchRoot(MatchBase):
def __init__(self, mode, cfg):
super().__init__(mode, cfg)
self.results = []
self.frames = -1
def __call__(self, pelvis, cameras, self_results=None):
"""
cameras: {K, R, T, dist, P}
"""
self.frames += 1
LOG('>>> Current frames: {}'.format(self.frames))
if self_results is None:
self_results = self.results
nViews = len(pelvis)
pelvis_all, views_all, dimGroups = self.stack_array(pelvis)
# Undistort
pelvis_undis = self.undistort(pelvis, cameras)
pelvis_undis_all, _, _ = self.stack_array(pelvis_undis)
# distance3D => 2D
distance3d_2d = self.calculate_repro(self_results, pelvis_all, cameras, views_all)
# FULL_LOG('distance3d_2d: {}'.format(LOG_ARRAY(distance3d_2d)))
# distance: triangulate and project
distance2d_2d = self.calculate_distance(pelvis_undis, cameras, dimGroups)
# FULL_LOG('distance2d_2d: {}'.format(LOG_ARRAY(distance2d_2d)))
# set assign index
used_index = np.zeros((dimGroups[-1]), dtype=int) - 1
results = []
# assign by 3D => 2D
results3d = self_results
if len(results3d) > 0:
results3d = self.assign_by_3D(used_index, distance3d_2d, pelvis_undis_all, views_all, dimGroups, cameras)
# assign by 2D + 2D
results2d = self.assign_by_2D(used_index, distance2d_2d, pelvis_undis_all, views_all, dimGroups, cameras)
results = results3d + results2d
# distance = np.linalg.norm(keypoints3d[:, None, ..., :3] - keypoints3d[None, ..., :3], axis=-1).mean(axis=-1)
# print(LOG_ARRAY(distance, format='{:6.2f}'))
results.sort(key=lambda x: -len(x['views']))
results = results[:self.cfg.max_person]
if self.mode == 'track':
self.results = results
results.sort(key=lambda x:x['id'])
# TODO: 增加结果的NMS检查和合并
if len(results) == 0:
keypoints3d = np.zeros((0, 25, 3))
else:
keypoints3d = np.stack([d['keypoints3d'] for d in results])
return {'keypoints3d': keypoints3d, 'results': results}
class MatchTwoRoot(MatchRoot):
def __init__(self, mode, cfg):
keys = ['pelvis', 'neck']
self._max_id_add = -1
self._max_id = {key: 0 for key in keys}
self.current = 'pelvis'
self._results = {key: [] for key in keys}
super().__init__(mode, cfg)
self.results_limb = []
self.mapping = {key: {} for key in keys}
@property
def max_id_add(self):
self._max_id_add += 1
return self._max_id_add
@property
def max_id(self):
return self._max_id[self.current]
@max_id.setter
def max_id(self, index):
self._max_id[self.current] = index
@property
def results(self):
return self._results[self.current]
@results.setter
def results(self, val):
self._results[self.current] = val
@staticmethod
def check_tracked(key, record_pelvis, current_3d, mapping):
for ires, res in enumerate(record_pelvis):
pid = res['id']
res['limb_id'] = -1
if pid in mapping[key]:
p3d = mapping[key][pid]
res['limb_id'] = p3d
current_3d[p3d][key] = ires
def __call__(self, cameras, openpose):
pelvis_id = 8
neck_id = 1
pelvis = [openpose[v][pelvis_id] for v in range(len(openpose))]
neck = [openpose[v][neck_id] for v in range(len(openpose))]
self.current = 'pelvis'
record_pelvis = super().__call__(pelvis, cameras)['results']
self.current = 'neck'
record_neck = super().__call__(neck, cameras)['results']
current_3d = {p['id']: {'pelvis': -1, 'neck': -1} for p in self.results_limb}
# 先检查是否已经track过了
self.check_tracked('pelvis', record_pelvis, current_3d, self.mapping)
self.check_tracked('neck', record_neck, current_3d, self.mapping)
# 先整体记录一下ID然后如果某一帧有丢掉的就更新
for p in self.results_limb:
# 检查一下当前帧
current_a, current_b = current_3d[p['id']]['pelvis'], current_3d[p['id']]['neck']
if current_a != -1 and current_b != -1:
assert current_a < len(record_pelvis) and current_b < len(record_neck), 'Index Error {}/{}, {}/{}'.format(current_a, current_b, len(record_pelvis), len(record_neck))
p['pelvis'] = record_pelvis[current_a]['pelvis']
p['neck'] = record_neck[current_b]['pelvis']
elif current_a == -1 and current_b != -1:
# a没有检测到但b检测到了
# 保持相对值
mywarn('Missing Pelvis')
p['neck'] = record_neck[current_b]['pelvis']
pre_direc = p['pelvis'][:, :3] - p['neck'][:, :3]
p['pelvis'][:, :3] = p['neck'][:, :3] + pre_direc
# 得把补全的这个点设置回去
self._results['pelvis'].append({
'id': p['pelvis_id'],
'pelvis': p['pelvis'],
'views': [],
'frames': [],
'indices': [],
'limb_id': p['id'],
})
elif current_a != -1 and current_b == -1:
mywarn('Missing Neck')
pre_direc = p['neck'][:, :3] - p['pelvis'][:, :3]
p['pelvis'] = record_pelvis[current_a]['pelvis']
p['neck'][:, :3] = p['pelvis'][:, :3] + pre_direc
# 得把补全的这个点设置回去
self._results['neck'].append({
'id': p['neck_id'],
'pelvis': p['neck'],
'views': [],
'frames': [],
'indices': [],
'limb_id': p['id'],
})
else:
import ipdb; ipdb.set_trace()
raise NotImplementedError
# 遍历所有没有跟踪上的组合
n_pelvis = len(record_pelvis)
n_neck = len(record_neck)
dist = np.zeros((n_pelvis, n_neck))
# TODO: 用2D PAF来关联
for i in range(n_pelvis):
if record_pelvis[i]['limb_id'] > -1:
continue
for j in range(n_neck):
if record_neck[j]['limb_id'] > -1:
continue
pa = record_pelvis[i]['pelvis']
pb = record_neck[j]['pelvis']
length = np.linalg.norm(pa[:, :3] - pb[:, :3])
dist[i, j] = length
LIMB_MEAN = 0.489
dist_to_mean = np.exp(-(dist - LIMB_MEAN)**2/(2*(LIMB_MEAN/3)**2))
for i in range(n_pelvis):
if record_pelvis[i]['limb_id'] > -1:
continue
for j in range(n_neck):
if record_neck[j]['limb_id'] > -1:
continue
pa = record_pelvis[i]['pelvis']
pb = record_neck[j]['pelvis']
if dist_to_mean[i, j] > 0.8:
# 可以接受
limb = {
'id': self.max_id_add,
'pelvis_id': record_pelvis[i]['id'],
'neck_id': record_neck[j]['id'],
'pelvis': pa,
'neck': pb,
'frames': [self.frames],
}
self.mapping['pelvis'][limb['pelvis_id']] = limb['id']
self.mapping['neck'][limb['neck_id']] = limb['id']
self.results_limb.append(limb)
# 丢掉没有跟踪上的
results = []
for limb in self.results_limb:
k3d = np.vstack([limb['pelvis'], limb['neck']])
results.append({
'id': limb['id'],
'keypoints3d': k3d,
})
return {'results': results}
class MatchTorso(MatchBase):
def __init__(self, mode, cfg):
super().__init__(mode, cfg)
self.results = []
self.frames = -1
@staticmethod
def stack_pafs(pafs):
dimGroups = [0]
results = defaultdict(list)
views_all = []
for nv, paf in enumerate(pafs):
src = paf['src']
dimGroups.append(dimGroups[-1] + src.shape[0])
views_all.extend([nv for _ in range(src.shape[0])])
results['src'].append(src)
results['dst'].append(paf['dst'])
results['value'].append(paf['value'])
results = {key: np.concatenate(val, axis=0) for key, val in results.items()}
return results, np.array(views_all), np.array(dimGroups)
def check_used_index(self, info_limb, index, info_joints):
idx_src = info_limb['src'][index]
idx_dst = info_limb['dst'][index]
if info_joints['src']['used_index'][idx_src] > -1:
return True
if info_joints['dst']['used_index'][idx_dst] > -1:
return True
return False
def set_used_index(self, info_limb, index, info_joints, pid):
idx_src = info_limb['src'][index]
idx_dst = info_limb['dst'][index]
info_joints['src']['used_index'][idx_src] = pid
info_joints['dst']['used_index'][idx_dst] = pid
return True
def triangulate_limb(self, info_limb, info_joints, index, views, cameras):
flag = True
k3d_all = []
dist_all = []
for key in ['src', 'dst']:
proposals = []
for idx in index:
idx_ = info_limb[key][idx]
proposals.append(info_joints[key]['detect_undis'][idx_])
k3d, dist_repro, depth = self.triangulate_and_repro(cameras, views, proposals)
dist_all.append(dist_repro)
k3d_all.append(k3d)
k3d_all = np.vstack(k3d_all)
limb_length = np.linalg.norm(k3d_all[1, ..., :3] - k3d_all[0, ..., :3])
if limb_length < 0.3 or limb_length > 0.7:
flag = False
dist_all = np.stack(dist_all)
dist_all = np.max(dist_all, axis=0)
return flag, k3d_all, dist_all
# def assign_limb_by_2D(self, used_index, distance, pelvis_all, views_all, dimGroups, cameras):
def assign_limb_by_2D(self, info_limb, info_joints, distance, views_all, dimGroups, cameras):
def log_index_2d(index2d):
src = info_limb['src'][index2d]
dst = info_limb['dst'][index2d]
src = src - info_joints['src']['dimGroups'][views_all[index2d]]
dst = dst - info_joints['dst']['dimGroups'][views_all[index2d]]
return '({}|{}-({},{}))'.format(index2d, views_all[index2d], src, dst)
# def log_indexes_2d(index2d_):
# return ', '.join(['({}|{}-{})'.format(index2d, views_all[index2d], index2d-dimGroups[views_all[index2d]]) for index2d in index2d_])
INLIER_REPRO = self.cfg.max_pixel
valid_theta = cameras['valid_theta']
index = self.sort_with_affinity(distance, dimGroups, INLIER_REPRO)
visited2d = set()
results = []
new_id_start = 10000
new_max_id = new_id_start
for row in index:
visited2d.add(row)
if self.check_used_index(info_limb, row, info_joints):
continue
pid = new_max_id
new_max_id += 1
FULL_LOG('[Assign 2D] Check 2D {}'.format(log_index_2d(row)))
dist_row = distance[row]
proposal = dist_row.argsort()
# 尝试初始化
views = [views_all[row]]
current = [row]
for idx2d in proposal:
# 不满足视角关系
if not valid_theta[views_all[row], views_all[idx2d]]:
continue
# 不满足距离关系
if dist_row[idx2d] > INLIER_REPRO:
break
if self.check_used_index(info_limb, idx2d, info_joints):
continue
if views_all[idx2d] in views:
continue
# 检查骨长
flag, k3d, repro_error = self.triangulate_limb(info_limb, info_joints, [row, idx2d], [views_all[row], views_all[idx2d]], cameras)
length = np.linalg.norm(k3d[1, ..., :3] - k3d[0, ..., :3])
if flag:
views.append(views_all[idx2d])
current.append(idx2d)
FULL_LOG(f'[Assign 2D] Init with {log_index_2d(idx2d)}, length={length:.4f}')
break
else:
FULL_LOG(f'[Assign 2D] Init failed with {log_index_2d(idx2d)}, length = {length:.4f}')
if len(current) < 2:
# 没有找到良好的初始化
FULL_LOG(f'[Assign 2D] Cannot find a good initialization pair {log_index_2d(row)}')
continue
for idx2d in proposal:
if dist_row[idx2d] > INLIER_REPRO:break
# 这个视角已经有了 这个2D已经被使用过了
if views_all[idx2d] in views:
continue
if self.check_used_index(info_limb, idx2d, info_joints):
continue
FULL_LOG('[Assign 2D] Try to add 2D {} => {}'.format(idx2d, log_index_2d(idx2d)))
# 尝试三角化并进行重投影
new = current + [idx2d]
views_new = views + [views_all[idx2d]]
flag_limb, k3d, dist_repro = self.triangulate_limb(info_limb, info_joints, new, views_new, cameras)
# flag_depth = (depth > 0.5).all()
flag_depth = True
flag_repro = dist_repro.mean() < INLIER_REPRO
flag = flag_repro & flag_depth & flag_limb
FULL_LOG('[Assign 2D] repro: \n{}'.format(LOG_ARRAY(dist_repro[None])))
if flag:
# 添加
current = new
views = views_new
self.set_used_index(info_limb, idx2d, info_joints, pid)
FULL_LOG('[Assign 2D] {} => {}'.format(idx2d, current))
else:
FULL_LOG('[Assign 2D] Failed')
new = None
views_new = None
if len(views) < self.cfg.min_views: #不足以添加
continue
flag_limb, k3d, dist_repro = self.triangulate_limb(info_limb, info_joints, current, views, cameras)
final_id = self.max_id
self.max_id += 1
results.append({
'id': final_id,
'torso': k3d,
'keypoints3d': k3d, # 这里保存两个这样即使后面覆盖掉了keypoints3d还能取出pelvis来
'views': views,
'select': current,
# 'indices': select - dimGroups[views_all[select]],
'frames': [self.frames],
})
for res in results:
text = f''' - Init {res['id']} with {len(res['views'])} views
views: {res['views']}
id : {res['select']}'''
LOG(text)
print(text)
return results
def calculte_distance_src_dst(self, src, dst, cameras):
info = {}
for name, detect in zip(['src', 'dst'], [src, dst]):
detect_all, views_all, dimGroups = self.stack_array(detect)
# Undistort
detect_undis = self.undistort(detect, cameras)
detect_undis_all, _, _ = self.stack_array(detect_undis)
# # distance3D => 2D
# distance3d_2d = self.calculate_repro(self.results, pelvis_all, cameras, views_all)
# FULL_LOG('distance3d_2d: {}'.format(LOG_ARRAY(distance3d_2d)))
# distance: triangulate and project
distance2d_2d = self.calculate_distance(detect_undis, cameras, dimGroups)
info[name] = {
'detect_all': detect_all,
'views_all': views_all,
'dimGroups': dimGroups,
'distance2d_2d': distance2d_2d,
'detect_undis': detect_undis_all,
'used_index': np.zeros((dimGroups[-1]), dtype=int) - 1
}
return info
def get_valid_limbs(self, pafs, info_joint):
nViews = len(pafs)
valid_paf = []
for nv in range(nViews):
paf = pafs[nv]
src, dst = np.where(paf > 0.3)
value = paf[src, dst]
valid_paf.append({
'src': src + info_joint['src']['dimGroups'][nv],
'dst': dst + info_joint['dst']['dimGroups'][nv],
'value': value,
'view': nv,
})
results, views_all, dimGroups = self.stack_pafs(valid_paf)
return results, views_all, dimGroups
def calculate_distance_limb(self, results, cameras, dimGroups, distance_src, distance_dst):
src_idx, dst_idx = results['src'], results['dst']
src_idx0, src_idx1 = np.meshgrid(src_idx, src_idx)
dist_src_src = distance_src[src_idx0, src_idx1]
dst_idx0, dst_idx1 = np.meshgrid(dst_idx, dst_idx)
dist_dst_dst = distance_dst[dst_idx0, dst_idx1]
# TODO: 考虑每个视角的 limb的置信度joint的置信度
dist_spatial = np.maximum(dist_src_src, dist_dst_dst)
return dist_spatial
def __call__(self, cameras, openpose, openpose_paf):
"""
cameras: {K, R, T, dist, P}
"""
self.frames += 1
pelvis_id = 8
neck_id = 1
nViews = len(openpose)
LOG('>>> Current frames: {}'.format(self.frames))
pelvis = [openpose[v][pelvis_id] for v in range(len(openpose))]
neck = [openpose[v][neck_id] for v in range(len(openpose))]
info_joint = self.calculte_distance_src_dst(pelvis, neck, cameras)
pafs = [openpose_paf[v][(pelvis_id, neck_id)] for v in range(len(openpose_paf))]
info_limb, views_all, dimGroups = self.get_valid_limbs(pafs, info_joint)
distance2d_2d = self.calculate_distance_limb(info_limb, cameras, dimGroups,
info_joint['src']['distance2d_2d'], info_joint['dst']['distance2d_2d'])
results = self.assign_limb_by_2D(info_limb, info_joint, distance2d_2d, views_all, dimGroups, cameras)
results.sort(key=lambda x: -len(x['views']))
results = results[:self.cfg.max_person]
# if self.mode == 'track':
# self.results = results
results.sort(key=lambda x:x['id'])
# TODO: 增加结果的NMS检查和合并
if len(results) == 0:
keypoints3d = np.zeros((0, 2, 3))
else:
keypoints3d = np.stack([d['keypoints3d'] for d in results])
return {'keypoints3d': keypoints3d, 'results': results}
class TriangulateAll:
def __init__(self, mode) -> None:
self.mode = mode
def __call__(self, bbox, keypoints, cameras, results):
for res in results:
bbox_, k2d, Pall = [], [], []
for i in range(len(res['views'])):
v = res['views'][i]
bbox_.append(bbox[v][res['indices'][i]])
k2d.append(keypoints[v][res['indices'][i]])
Pall.append(cameras['P'][v])
k2d = np.stack(k2d)
Pall = np.stack(Pall)
bbox_ = np.stack(bbox_)
if self.mode == 'naive':
k3d = batch_triangulate(k2d, Pall)
elif self.mode == 'robust':
from easymocap.mytools.triangulator import iterative_triangulate
k3d, k2d = iterative_triangulate(k2d, Pall,
dist_max=25)
res['keypoints3d'] = k3d
res['keypoints2d'] = k2d
res['bbox'] = bbox_
return {'keypoints3d': np.stack([d['keypoints3d'] for d in results]), 'results': results}
class MatchHandLR:
def __init__(self, mode, cfg):
self.model_l = MatchRoot(mode,cfg)
self.model_r = MatchRoot(mode,cfg)
def __call__(self, pelvis_l, pelvis_r, cameras):
ret = {}
outl = self.model_l(pelvis_l, cameras)
outr = self.model_r(pelvis_r, cameras)
for k in outl.keys():
ret[k+'_l'] = outl[k]
for k in outr.keys():
ret[k+'_r'] = outr[k]
return ret
class MatchBodyHand:
def __init__(self, mode) -> None:
pass
def projectPoints(self, X, K, R, t, Kd):
x = R @ X + t
x[0:2,:] = x[0:2,:]/x[2,:]#到归一化平面
r = x[0,:]*x[0,:] + x[1,:]*x[1,:]
x[0,:] = x[0,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[2]*x[0,:]*x[1,:] + Kd[3]*(r + 2*x[0,:]*x[0,:])
x[1,:] = x[1,:]*(1 + Kd[0]*r + Kd[1]*r*r + Kd[4]*r*r*r) + 2*Kd[3]*x[0,:]*x[1,:] + Kd[2]*(r + 2*x[1,:]*x[1,:])
x[0,:] = K[0,0]*x[0,:] + K[0,1]*x[1,:] + K[0,2]
x[1,:] = K[1,0]*x[0,:] + K[1,1]*x[1,:] + K[1,2]
return x
def match3d_step(self, results, keypoints3d,wristid):
match_results=(np.zeros((len(keypoints3d)),dtype=np.int)-1).tolist()
vis = (np.zeros((len(keypoints3d)))-1).tolist()
dis = []
for i in range(len(keypoints3d)):
for j in range(len(results)):
dis.append([i,j,((keypoints3d[i][wristid][:3]-results[j]['pelvis'][0,:3].reshape(-1))**2).sum()])
if(len(dis)>0):
dis = np.array(dis)
dis = dis[np.argsort(dis[:,-1])]
for i in range(len(dis)):
bid =int(dis[i][0])
hid =int(dis[i][1])
if vis[bid]>=0 or hid in vis:
continue
if dis[i][2]>0.5:
continue
tmp_results = results[hid].copy()
# tmp_results['dis_bh'] = dis[i][2]
match_results[bid]=tmp_results
vis[bid]=hid
return match_results
def match2d_step(self, bbox_hand, keypoints3d, wristid, results_match_l, cameras):
lack_body_id=[]
mv_use_hand=[]
for i in range(cameras['R'].shape[0]):
mv_use_hand.append([])
for i in range(len(results_match_l)):
if isinstance(results_match_l[i],int) and results_match_l[i]==-1:
lack_body_id.append(i)
else:
mv = results_match_l[i]['views']#[cid]
indices = results_match_l[i]['indices']#[cid]
for j in range(len(mv)):
mv_use_hand[mv[j]].append(indices[j])
wrist3dkpts = keypoints3d[lack_body_id,wristid,:3] #(nperson,3)每个人呢的wrist关键点
dis = []
for nv in range(len(bbox_hand)):
for hid in range(len(bbox_hand[nv])):
if hid in mv_use_hand[nv]:
continue
if bbox_hand[nv][hid][-1]==0:
continue
bx_ = bbox_hand[nv][hid]
k2d = np.array([(bx_[0]+bx_[2])/2,(bx_[1]+bx_[3])/2,bx_[-1]])
K = cameras['K'][nv]
Kd = cameras['dist'][nv].reshape(5)
R = cameras['R'][nv]
t = cameras['T'][nv]
wristkpts2d = self.projectPoints(wrist3dkpts.T[0:3,:], K, R, t, Kd).T
for bid in range(len(lack_body_id)):
D = ((wristkpts2d[bid][:2]-k2d[:2])**2).sum()
dis.append([D,lack_body_id[bid],nv,hid]) # 误差3d身体id ,视角编号 ,2d图像上手box id
if(len(dis)>0):
vis = (np.zeros((len(keypoints3d)))-1).tolist()
dis = np.array(dis)
dis = dis[np.argsort(dis[:,0])]
# TODO 判断dis大小将dis过大的删除掉
for i in range(len(dis)):
bid = int(dis[i][1])
nv = int(dis[i][2])
hid = int(dis[i][3])
if vis[bid]>=0 or hid in vis or results_match_l[bid]!=-1:
continue
if dis[i][0]>50: #人和手的在2D中距离
continue
results_match_l[bid]={
'views': np.array([nv]),
'indices': np.array([hid]), # ?indices是在对应的视角下第几个Box
# 'dis_bh': dis[i][0]
}
vis[bid]=hid
return results_match_l
def __call__(self, results_l, results_r, keypoints3d, cameras, bbox_handl, bbox_handr):
'''
results: list nhand
keypoints3d: (nperson,25,3)
'''
results_match_l = self.match3d_step(results_l, keypoints3d, 7)
results_match_r = self.match3d_step(results_r, keypoints3d, 4)
if(-1 in results_match_l):
# TODO: dis为空则表示没有身体或者所有视角都未检测到手尝试启动单视角检测
# TODO: dis不为空也有可能有的身体缺少与手的匹配可以尝试单视角检测或者之后尝试补全。
# 单视角匹配,从匹配列表中找出-1的部分将其投影到多视角中在多视角找出未被选择的box然后匹配记录在a
results_match_l = self.match2d_step(bbox_handl, keypoints3d, 7, results_match_l, cameras)
if(-1 in results_match_r):
results_match_r = self.match2d_step(bbox_handr, keypoints3d, 4, results_match_r, cameras)
return {'match3d_l':results_match_l ,'match3d_r':results_match_r}