332 lines
13 KiB
Python
332 lines
13 KiB
Python
from easymocap.mytools.camera_utils import read_cameras
|
||
from easymocap.mytools.debug_utils import log, myerror, mywarn
|
||
from easymocap.mytools.file_utils import read_json
|
||
from .basedata import ImageDataBase, read_mv_images, find_best_people, find_all_people
|
||
import os
|
||
from os.path import join
|
||
import numpy as np
|
||
import cv2
|
||
from collections import defaultdict
|
||
|
||
panoptic15_in_body15 = [1,0,8,5,6,7,12,13,14,2,3,4,9,10,11]
|
||
|
||
def convert_body15_panoptic15(keypoints):
|
||
k3d_panoptic15 = keypoints[..., panoptic15_in_body15,: ]
|
||
return k3d_panoptic15
|
||
|
||
def convert_panoptic15_body15(keypoints):
|
||
keypoints_b15 = np.zeros_like(keypoints)
|
||
keypoints_b15[..., panoptic15_in_body15, :] = keypoints
|
||
return keypoints_b15
|
||
|
||
def padding_and_stack(datas):
|
||
shapes = {}
|
||
for data in datas:
|
||
if len(data) == 0:
|
||
continue
|
||
for key, value in data.items():
|
||
if key not in shapes.keys():
|
||
shapes[key] = value.shape
|
||
collect = {key: np.zeros((len(datas), *shapes[key])) for key in shapes.keys()}
|
||
for i, data in enumerate(datas):
|
||
for key, value in data.items():
|
||
collect[key][i] = value
|
||
return collect
|
||
|
||
def padding_empty(datas):
|
||
shapes = {}
|
||
for data in datas:
|
||
if len(data) == 0:
|
||
continue
|
||
for key, value in data.items():
|
||
if key not in shapes.keys():
|
||
shapes[key] = value.shape[1:]
|
||
collect = {key: [None for data in datas] for key in shapes.keys()}
|
||
for i, data in enumerate(datas):
|
||
for key, shape in shapes.items():
|
||
if key not in data.keys():
|
||
print('[Dataset] padding empty view {} of {}'.format(i, key))
|
||
collect[key][i] = np.zeros((0, *shape), dtype=np.float32)
|
||
else:
|
||
collect[key][i] = data[key]
|
||
return collect
|
||
|
||
def parse_frames(pafs_frame, H, W):
|
||
# 解析单帧的
|
||
res = {
|
||
'joints': [],
|
||
'pafs': {}
|
||
}
|
||
joints = pafs_frame[1:1+3*25]
|
||
for i in range(25):
|
||
value = np.fromstring(joints[3*i+2], sep=' ').reshape(3, -1).T
|
||
value[:, 0] = value[:, 0] * W
|
||
value[:, 1] = value[:, 1] * H
|
||
res['joints'].append(value.astype(np.float32))
|
||
# parse pafs
|
||
pafs = pafs_frame[1+3*25+1:]
|
||
for npart in range(26):
|
||
label = pafs[3*npart+0].split(' ')[2:]
|
||
label = (int(label[0]), int(label[1]))
|
||
shape = pafs[3*npart+1].split(' ')[2:]
|
||
w, h = int(shape[0]), int(shape[1])
|
||
value = np.fromstring(pafs[3*npart+2], sep=' ').reshape(w, h).astype(np.float32)
|
||
res['pafs'][label] = value
|
||
return res
|
||
|
||
def read_4dassociation(pafs, H, W):
|
||
outputs = []
|
||
# 解析paf文件
|
||
with open(pafs, 'r') as f:
|
||
pafs = f.readlines()
|
||
indices = []
|
||
for i, line in enumerate(pafs):
|
||
if line.startswith('# newframes:'):
|
||
indices.append([i])
|
||
elif line.startswith('# end frames:'):
|
||
indices[-1].append(i)
|
||
print('[Read OpenPose] Totally {} frames'.format(len(indices)))
|
||
for (start, end) in indices:
|
||
pafs_frame = pafs[start+1:end]
|
||
pafs_frame = list(map(lambda x:x.strip(), pafs_frame))
|
||
frames = parse_frames(pafs_frame, H, W)
|
||
outputs.append(frames)
|
||
return outputs
|
||
|
||
class MVDataset(ImageDataBase):
|
||
def __init__(self, root, subs, subs_vis, ranges, read_image=False, reader={}, filter={}) -> None:
|
||
super().__init__(root, subs, ranges, read_image)
|
||
self.subs_vis = subs_vis
|
||
self.length = 0
|
||
for key, value in reader.items():
|
||
if key == 'images':
|
||
self.try_to_extract_images(root, value)
|
||
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
|
||
self.length = len(data)
|
||
elif key == 'image_shape':
|
||
imgnames = self.infos['images'][0]
|
||
shapes = []
|
||
for imgname in imgnames:
|
||
img = cv2.imread(imgname)
|
||
height, width, _ = img.shape
|
||
log('[{}] sub {} shape {}'.format(self.__class__.__name__, imgname, img.shape))
|
||
shapes.append([height, width])
|
||
data = [shapes]
|
||
meta = {}
|
||
elif key == 'annots':
|
||
data, meta = read_mv_images(root, value['root'], value['ext'], subs)
|
||
if self.length > 0:
|
||
if self.length != len(data):
|
||
myerror('annots length {} not equal to images length {}.'.format(len(data), self.length))
|
||
data = data[:self.length]
|
||
else:
|
||
self.length = len(data)
|
||
elif key == 'openpose':
|
||
# 读取open pose
|
||
if len(subs) == 0:
|
||
pafs = sorted(os.listdir(join(root, value['root'])))
|
||
else:
|
||
pafs = [f'{sub}.txt' for sub in subs]
|
||
results = []
|
||
for nv, paf in enumerate(pafs):
|
||
pafname = join(root, value['root'], paf)
|
||
infos = read_4dassociation(pafname, H=self.infos['image_shape'][0][nv][0], W=self.infos['image_shape'][0][nv][1])
|
||
results.append(infos)
|
||
data = [[d[i] for d in results] for i in range(self.length)]
|
||
meta = {}
|
||
elif key == 'cameras':
|
||
if 'with_sub' in value.keys():
|
||
raise NotImplementedError
|
||
else:
|
||
cameras = read_cameras(os.path.join(root, value['root']))
|
||
if 'remove_k3' in value.keys():
|
||
for cam, camera in cameras.items():
|
||
camera['dist'][:, 4] = 0.
|
||
data = [cameras]
|
||
meta = {}
|
||
elif key in ['pelvis']:
|
||
continue
|
||
elif key == 'keypoints3d':
|
||
k3droot = value['root']
|
||
filenames = sorted(os.listdir(k3droot))[:self.length]
|
||
res_key = value.get('key', 'pred')
|
||
data = []
|
||
for filename in filenames:
|
||
results = read_json(join(k3droot, filename))
|
||
if 'pids' not in results.keys():
|
||
# 擅自补全
|
||
results['pids'] = list(range(len(results[res_key])))
|
||
data.append({
|
||
'pids': results['pids'],
|
||
'keypoints3d': np.array(results[res_key], dtype=np.float32)
|
||
})
|
||
if data[-1]['keypoints3d'].shape[-1] == 3:
|
||
mywarn('The input keypoints dont have confidence')
|
||
data[-1]['keypoints3d'] = np.concatenate([data[-1]['keypoints3d'], np.ones_like(data[-1]['keypoints3d'][..., :1])], axis=-1)
|
||
if 'conversion' in value.keys():
|
||
if value['conversion'] == 'panoptic15_to_body15':
|
||
data[-1]['keypoints3d'] = convert_panoptic15_body15(data[-1]['keypoints3d'])
|
||
else:
|
||
raise ValueError(f'Unknown reader: {key}')
|
||
self.infos[key] = data
|
||
self.meta.update(meta)
|
||
self.reader = reader
|
||
self.filter = filter
|
||
if len(self.subs) == 0:
|
||
self.subs = self.meta['subs']
|
||
self.check_frames_length()
|
||
|
||
@staticmethod
|
||
def read_annots(annotnames):
|
||
val = []
|
||
for annname in annotnames:
|
||
annots = read_json(annname)['annots']
|
||
# select the best people
|
||
annots = find_best_people(annots)
|
||
val.append(annots)
|
||
val = padding_and_stack(val)
|
||
return val
|
||
|
||
def filter_openpose(self, candidates, pafs):
|
||
for nv, candview in enumerate(candidates):
|
||
H=self.infos['image_shape'][0][nv][0]
|
||
W=self.infos['image_shape'][0][nv][1]
|
||
for cand in candview:
|
||
if 'border' in self.filter.keys():
|
||
border = self.filter['border'] * max(H, W)
|
||
flag = (cand[:, 0] > border) & (cand[:, 0] < W - border) & (cand[:, 1] > border) & (cand[:, 1] < H - border)
|
||
cand[~flag] = 0
|
||
return candidates, pafs
|
||
|
||
def __getitem__(self, index):
|
||
frame = self.frames[index]
|
||
ret = {}
|
||
for key, value in self.infos.items():
|
||
if len(value) == 1:
|
||
ret[key] = value[0]
|
||
elif frame >= len(value):
|
||
myerror(f'[{self.__class__.__name__}] {key}: index {frame} out of range {len(value)}')
|
||
else:
|
||
ret[key] = value[frame]
|
||
ret_list = defaultdict(list)
|
||
for key, val in ret.items():
|
||
if key == 'annots':
|
||
ret_list[key] = self.read_annots(val)
|
||
elif key == 'cameras':
|
||
for sub in self.subs:
|
||
select = {k: val[sub][k] for k in ['K', 'R', 'T', 'dist', 'P']}
|
||
ret_list[key].append(select)
|
||
ret_list[key] = padding_and_stack(ret_list[key])
|
||
elif key == 'images':
|
||
if self.flag_read_image:
|
||
for i, sub in enumerate(self.subs):
|
||
imgname = val[i]
|
||
if sub in self.subs_vis or self.subs_vis == 'all':
|
||
img = self.read_image(imgname)
|
||
else:
|
||
img = imgname
|
||
ret_list[key].append(img)
|
||
ret_list['imgnames'].append(imgname)
|
||
else:
|
||
ret_list[key] = val
|
||
ret_list['imgnames'] = val
|
||
elif key == 'openpose':
|
||
ret_list[key] = [v['joints'] for v in val]
|
||
# 同时返回PAF
|
||
ret_list[key+'_paf'] = [v['pafs'] for v in val]
|
||
# check一下PAF:
|
||
for nv in range(len(ret_list[key])):
|
||
ret_list[key+'_paf'][nv][(8, 1)] = ret_list[key+'_paf'][nv].pop((1, 8)).T
|
||
ret_list[key], ret_list[key+'_paf'] = self.filter_openpose(ret_list[key], ret_list[key+'_paf'])
|
||
elif key == 'keypoints3d':
|
||
ret_list['keypoints3d'] = val['keypoints3d']
|
||
if 'pids' in val.keys():
|
||
ret_list['pids'] = val['pids']
|
||
else:
|
||
ret_list['pids'] = list(range(len(val['keypoints3d'])))
|
||
elif key in ['image_shape']:
|
||
pass
|
||
else:
|
||
print('[Dataset] Unknown key: {}'.format(key))
|
||
ret_list.update(ret_list.pop('annots', {}))
|
||
for key, val in self.reader.items():
|
||
if key == 'pelvis' and 'annots' in self.reader.keys(): # load pelvis from annots.keypoints
|
||
ret_list[key] = [d[:, val.root_id] for d in ret_list['keypoints']]
|
||
elif key == 'pelvis' and 'openpose' in self.reader.keys():
|
||
ret_list[key] = [d[val.root_id] for d in ret_list['openpose']]
|
||
ret_list['meta'] = {
|
||
'subs': self.subs,
|
||
'index': index,
|
||
'frame': frame,
|
||
'image_shape': ret['image_shape'],
|
||
'imgnames': ret_list['imgnames'],
|
||
}
|
||
return ret_list
|
||
|
||
def check(self, index):
|
||
raise NotImplementedError
|
||
|
||
class MVMP(MVDataset):
|
||
def read_annots(self, annotnames):
|
||
val = []
|
||
for annname in annotnames:
|
||
annots = read_json(annname)['annots']
|
||
# 在这里进行filter,去掉不需要的2D
|
||
annots_valid = []
|
||
for annot in annots:
|
||
flag = True
|
||
if 'bbox_size' in self.filter.keys():
|
||
bbox_size = self.filter['bbox_size']
|
||
bbox = annot['bbox']
|
||
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
|
||
if area < bbox_size:
|
||
flag = False
|
||
if flag:
|
||
annots_valid.append(annot)
|
||
annots = annots_valid
|
||
# select the best people
|
||
annots = find_all_people(annots)
|
||
val.append(annots)
|
||
val = padding_empty(val)
|
||
return val
|
||
|
||
def check(self, index):
|
||
data = self.__getitem__(index)
|
||
from easymocap.mytools.vis_base import plot_bbox, merge, plot_keypoints_auto
|
||
# check the subs vis
|
||
vis = []
|
||
for nv, sub in enumerate(self.subs):
|
||
if sub not in self.subs_vis:continue
|
||
img = data['images'][nv].copy()
|
||
bbox = data['bbox'][nv]
|
||
kpts = data['keypoints'][nv]
|
||
for i in range(bbox.shape[0]):
|
||
plot_bbox(img, bbox[i], pid=i)
|
||
plot_keypoints_auto(img, kpts[i], pid=i, use_limb_color=False)
|
||
vis.append(img)
|
||
vis = merge(vis)
|
||
cv2.imwrite('debug/{}_{:06d}.jpg'.format(self.__class__.__name__, index), vis)
|
||
|
||
if __name__ == '__main__':
|
||
config = '''
|
||
args:
|
||
root: /nas/ZJUMoCap/Part0/313
|
||
subs: []
|
||
subs_vis: ['01', '07', '13', '19']
|
||
ranges: [0, 100, 1]
|
||
read_image: False
|
||
reader:
|
||
images:
|
||
root: images
|
||
ext: .jpg
|
||
annots:
|
||
root: annots
|
||
ext: .json
|
||
cameras: # 兼容所有帧的相机参数不同的情况
|
||
root: ''
|
||
'''
|
||
import yaml
|
||
config = yaml.load(config, Loader=yaml.FullLoader)
|
||
dataset = MVDataset(**config['args'])
|
||
for i in range(len(dataset)):
|
||
data = dataset[i] |