support mediapipe
This commit is contained in:
parent
e8084971d1
commit
05b666a267
@ -142,6 +142,7 @@ With our proposed method, we release two large dataset of human motion: LightSta
|
||||
- [Exporting of multiple data formats(bvh, asf/amc, ...)](./doc/02_output.md)
|
||||
|
||||
## Updates
|
||||
- 12/25/2021: Support mediapipe keypoints detector.
|
||||
- 08/09/2021: Add a colab demo [here](https://colab.research.google.com/drive/1Cyvu_lPFUajr2RKt6yJIfS3HQIIYl6QU?usp=sharing).
|
||||
- 06/28/2021: The **Multi-view Multi-person** part is released!
|
||||
- 06/10/2021: The **real-time 3D visualization** part is released!
|
||||
@ -160,6 +161,7 @@ Here are the great works this project is built upon:
|
||||
- Some functions are borrowed from [SPIN](https://github.com/nkolot/SPIN), [VIBE](https://github.com/mkocabas/VIBE), [SMPLify-X](https://github.com/vchoutas/smplify-x)
|
||||
- The method for fitting 3D skeleton and SMPL model is similar to [TotalCapture](http://www.cs.cmu.edu/~hanbyulj/totalcapture/), without using point clouds.
|
||||
- We integrate some easy-to-use functions for previous great work:
|
||||
- `easymocap/estimator/mediapipe_wrapper.py`: [MediaPipe](https://github.com/google/mediapipe)
|
||||
- `easymocap/estimator/SPIN` : an SMPL estimator[5]
|
||||
- `easymocap/estimator/YOLOv4`: an object detector[6](Coming soon)
|
||||
- `easymocap/estimator/HRNet` : a 2D human pose estimator[7](Coming soon)
|
||||
|
50
apps/preprocess/extract_image.py
Normal file
50
apps/preprocess/extract_image.py
Normal file
@ -0,0 +1,50 @@
|
||||
'''
|
||||
@ Date: 2021-08-19 22:06:13
|
||||
@ Author: Qing Shuai
|
||||
@ LastEditors: Qing Shuai
|
||||
@ LastEditTime: 2021-10-23 16:02:43
|
||||
@ FilePath: /EasyMocap/apps/preprocess/extract_image.py
|
||||
'''
|
||||
# extract image from videos
|
||||
import os
|
||||
from os.path import join
|
||||
from glob import glob
|
||||
|
||||
extensions = ['.mp4', '.webm', '.flv', '.MP4', '.MOV', '.mov', '.avi']
|
||||
|
||||
def run(cmd):
|
||||
print(cmd)
|
||||
os.system(cmd)
|
||||
|
||||
def extract_images(path, ffmpeg, image):
|
||||
videos = sorted(sum([
|
||||
glob(join(path, 'videos', '*'+ext)) for ext in extensions
|
||||
], [])
|
||||
)
|
||||
for videoname in videos:
|
||||
sub = '.'.join(os.path.basename(videoname).split('.')[:-1])
|
||||
sub = sub.replace(args.strip, '')
|
||||
outpath = join(path, image, sub)
|
||||
os.makedirs(outpath, exist_ok=True)
|
||||
other_cmd = ''
|
||||
if args.num != -1:
|
||||
other_cmd += '-vframes {}'.format(args.num)
|
||||
if args.transpose != -1:
|
||||
other_cmd += '-vf transpose={}'.format(args.transpose)
|
||||
cmd = '{} -i {} {} -q:v 1 -start_number 0 {}/%06d.jpg'.format(
|
||||
ffmpeg, videoname, other_cmd, outpath)
|
||||
run(cmd)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('path', type=str)
|
||||
parser.add_argument('--strip', type=str, default='')
|
||||
parser.add_argument('--image', type=str, default='images')
|
||||
parser.add_argument('--num', type=int, default=-1)
|
||||
parser.add_argument('--transpose', type=int, default=-1)
|
||||
parser.add_argument('--ffmpeg', type=str, default='ffmpeg')
|
||||
parser.add_argument('--debug', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
extract_images(args.path, args.ffmpeg, args.image)
|
156
apps/preprocess/extract_keypoints.py
Normal file
156
apps/preprocess/extract_keypoints.py
Normal file
@ -0,0 +1,156 @@
|
||||
'''
|
||||
@ Date: 2021-08-19 22:06:22
|
||||
@ Author: Qing Shuai
|
||||
@ LastEditors: Qing Shuai
|
||||
@ LastEditTime: 2021-12-02 21:19:41
|
||||
@ FilePath: /EasyMocap/apps/preprocess/extract_keypoints.py
|
||||
'''
|
||||
import os
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
||||
def load_subs(path, subs):
|
||||
if len(subs) == 0:
|
||||
subs = sorted(os.listdir(join(path, 'images')))
|
||||
subs = [sub for sub in subs if os.path.isdir(join(path, 'images', sub))]
|
||||
if len(subs) == 0:
|
||||
subs = ['']
|
||||
return subs
|
||||
|
||||
config = {
|
||||
'openpose':{
|
||||
'root': '',
|
||||
'res': 1,
|
||||
'hand': False,
|
||||
'face': False,
|
||||
'vis': False,
|
||||
'ext': '.jpg'
|
||||
},
|
||||
'feet':{
|
||||
'root': '',
|
||||
'res': 1,
|
||||
'hand': False,
|
||||
'face': False,
|
||||
'vis': False,
|
||||
'ext': '.jpg'
|
||||
},
|
||||
'feetcrop':{
|
||||
'root': '',
|
||||
'res': 1,
|
||||
'hand': False,
|
||||
'face': False,
|
||||
'vis': False,
|
||||
'ext': '.jpg'
|
||||
},
|
||||
'yolo':{
|
||||
'ckpt_path': 'data/models/yolov4.weights',
|
||||
'conf_thres': 0.3,
|
||||
'box_nms_thres': 0.5, # means keeping the bboxes that IOU<0.5
|
||||
'ext': '.jpg',
|
||||
'isWild': False,
|
||||
},
|
||||
'hrnet':{
|
||||
'nof_joints': 17,
|
||||
'c': 48,
|
||||
'checkpoint_path': 'data/models/pose_hrnet_w48_384x288.pth'
|
||||
},
|
||||
'yolo-hrnet':{},
|
||||
'mp-pose':{
|
||||
'model_complexity': 2,
|
||||
'min_detection_confidence':0.5,
|
||||
'min_tracking_confidence': 0.5
|
||||
},
|
||||
'mp-holistic':{
|
||||
'model_complexity': 2,
|
||||
# 'refine_face_landmarks': True,
|
||||
'min_detection_confidence':0.5,
|
||||
'min_tracking_confidence': 0.5
|
||||
},
|
||||
'mp-handl':{
|
||||
'model_complexity': 1,
|
||||
'min_detection_confidence':0.3,
|
||||
'min_tracking_confidence': 0.1,
|
||||
'static_image_mode': False,
|
||||
},
|
||||
'mp-handr':{
|
||||
'model_complexity': 1,
|
||||
'min_detection_confidence':0.3,
|
||||
'min_tracking_confidence': 0.1,
|
||||
'static_image_mode': False,
|
||||
}
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('path', type=str, default=None, help="the path of data")
|
||||
parser.add_argument('--subs', type=str, nargs='+', default=[], help="the path of data")
|
||||
# Output Control
|
||||
parser.add_argument('--annot', type=str, default='annots',
|
||||
help="sub directory name to store the generated annotation files, default to be annots")
|
||||
# Detection Control
|
||||
parser.add_argument('--mode', type=str, default='openpose', choices=[
|
||||
'openpose', 'feet', 'feetcrop', 'yolo-hrnet', 'yolo', 'hrnet',
|
||||
'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'],
|
||||
help="model to extract joints from image")
|
||||
# Openpose
|
||||
parser.add_argument('--openpose', type=str,
|
||||
default='/media/qing/Project/openpose')
|
||||
parser.add_argument('--openpose_res', type=float, default=1)
|
||||
parser.add_argument('--ext', type=str, default='.jpg')
|
||||
parser.add_argument('--hand', action='store_true')
|
||||
parser.add_argument('--face', action='store_true')
|
||||
parser.add_argument('--wild', action='store_true',
|
||||
help='remove crowd class of yolo')
|
||||
parser.add_argument('--force', action='store_true')
|
||||
args = parser.parse_args()
|
||||
config['yolo']['isWild'] = args.wild
|
||||
mode = args.mode
|
||||
subs = load_subs(args.path, args.subs)
|
||||
global_tasks = []
|
||||
for sub in subs:
|
||||
config[mode]['force'] = args.force
|
||||
image_root = join(args.path, 'images', sub)
|
||||
annot_root = join(args.path, args.annot, sub)
|
||||
tmp_root = join(args.path, mode, sub)
|
||||
if os.path.exists(annot_root) and not args.force:
|
||||
# check the number of annots and images
|
||||
if len(os.listdir(image_root)) == len(os.listdir(annot_root)):
|
||||
print('[Skip] detection {}'.format(annot_root))
|
||||
continue
|
||||
if mode == 'openpose':
|
||||
from easymocap.estimator.openpose_wrapper import extract_2d
|
||||
config[mode]['root'] = args.openpose
|
||||
config[mode]['hand'] = args.hand
|
||||
config[mode]['face'] = args.face
|
||||
config[mode]['res'] = args.openpose_res
|
||||
config[mode]['ext'] = args.ext
|
||||
global_tasks = extract_2d(image_root, annot_root, tmp_root, config[mode])
|
||||
elif mode == 'feet':
|
||||
from easymocap.estimator.openpose_wrapper import FeetEstimator
|
||||
config[mode]['openpose'] = args.openpose
|
||||
estimator = FeetEstimator(openpose=args.openpose)
|
||||
estimator.detect_foot(image_root, annot_root, args.ext)
|
||||
elif mode == 'yolo':
|
||||
from easymocap.estimator.yolohrnet_wrapper import extract_bbox
|
||||
config[mode]['ext'] = args.ext
|
||||
extract_bbox(image_root, annot_root, **config[mode])
|
||||
elif mode == 'hrnet':
|
||||
from easymocap.estimator.yolohrnet_wrapper import extract_hrnet
|
||||
config[mode]['ext'] = args.ext
|
||||
extract_hrnet(image_root, annot_root, **config[mode])
|
||||
elif mode == 'yolo-hrnet':
|
||||
from easymocap.estimator.yolohrnet_wrapper import extract_yolo_hrnet
|
||||
extract_yolo_hrnet(image_root, annot_root, args.ext, config['yolo'], config['hrnet'])
|
||||
elif mode in ['mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face']:
|
||||
from easymocap.estimator.mediapipe_wrapper import extract_2d
|
||||
config[mode]['ext'] = args.ext
|
||||
extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', ''))
|
||||
if mode == 'feetcrop':
|
||||
from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop
|
||||
config[mode]['openpose'] = args.openpose
|
||||
estimator = FeetEstimatorByCrop(openpose=args.openpose)
|
||||
estimator.detect_foot(image_root, annot_root, args.ext)
|
||||
for task in global_tasks:
|
||||
task.join()
|
@ -1,41 +1,131 @@
|
||||
'''
|
||||
@ Date: 2021-06-09 10:16:46
|
||||
@ Author: Qing Shuai
|
||||
@ LastEditors: Qing Shuai
|
||||
@ LastEditTime: 2021-08-31 16:19:48
|
||||
@ FilePath: /EasyMocap/easymocap/annotator/file_utils.py
|
||||
'''
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
from os.path import join
|
||||
import shutil
|
||||
from ..mytools.file_utils import myarray2string
|
||||
|
||||
def read_json(path):
|
||||
with open(path) as f:
|
||||
with open(path, 'r') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
def save_json(file, data):
|
||||
if file is None:
|
||||
return 0
|
||||
if not os.path.exists(os.path.dirname(file)):
|
||||
os.makedirs(os.path.dirname(file))
|
||||
with open(file, 'w') as f:
|
||||
json.dump(data, f, indent=4)
|
||||
|
||||
save_annot = save_json
|
||||
tobool = lambda x: 'true' if x else 'false'
|
||||
|
||||
def getFileList(root, ext='.jpg'):
|
||||
def annot2string(data):
|
||||
out_text = []
|
||||
out_text.append('{\n')
|
||||
keysbase = ['filename', 'height', 'width', 'annots', 'isKeyframe']
|
||||
keys_other = [key for key in data.keys() if key not in keysbase]
|
||||
for key in keysbase[:-1] + keys_other + ['isKeyframe']:
|
||||
value = data[key]
|
||||
indent = 4
|
||||
if key != 'annots':
|
||||
if isinstance(value, str):
|
||||
res = '"{}": "{}",'.format(key, value)
|
||||
elif isinstance(value, bool):
|
||||
res = '"{}": {}'.format(key, tobool(value))
|
||||
elif isinstance(value, int):
|
||||
res = '"{}": {},'.format(key, value)
|
||||
elif isinstance(value, np.ndarray):
|
||||
#TODO: pretty array
|
||||
res = '"{}": {},'.format(key, myarray2string(value, indent=0))
|
||||
else:
|
||||
res = '"{}": {},'.format(key, value)
|
||||
out_text.append(indent * ' ' + res+'\n')
|
||||
else:
|
||||
out_text.append(indent * ' ' + '"annots": [\n')
|
||||
for n, annot in enumerate(value):
|
||||
head = (indent + 4) * " " + "{\n"
|
||||
ind = (indent + 8) * " "
|
||||
pid = ind + '"personID": {},\n'.format(annot['personID'])
|
||||
out_text.append(head)
|
||||
out_text.append(pid)
|
||||
for bkey in ['bbox', 'bbox_handl2d', 'bbox_handr2d', 'bbox_face2d']:
|
||||
if bkey not in annot.keys():
|
||||
continue
|
||||
bbox = ind + '"{}": [{:.2f}, {:.2f}, {:.2f}, {:.2f}, {:.2f}],\n'.format(bkey, *annot[bkey][:5])
|
||||
out_text.append(bbox)
|
||||
for bkey in ['keypoints', 'handl2d', 'handr2d', 'face2d']:
|
||||
if bkey not in annot.keys():
|
||||
continue
|
||||
val = np.array(annot[bkey])
|
||||
conf = val[:, -1]
|
||||
conf[conf<0] = 0
|
||||
ret = myarray2string(val, fmt='%7.2f', indent=12)
|
||||
kpts = ind + '"{}": '.format(bkey) + ret + ',\n'
|
||||
out_text.append(kpts)
|
||||
for rkey in ['isKeyframe']:
|
||||
val = annot.get(rkey, False)
|
||||
bkey = ind + '"{}": {}\n'.format(rkey, tobool(val))
|
||||
tail = (indent + 4) * " " + "}"
|
||||
if n == len(value) - 1:
|
||||
tail += '\n'
|
||||
else:
|
||||
tail += ',\n'
|
||||
out_text.extend([bkey, tail])
|
||||
out_text.append(indent * ' ' + '],\n')
|
||||
out_text.append('}\n')
|
||||
out_text = ''.join(out_text)
|
||||
return out_text
|
||||
|
||||
def save_annot(file, data):
|
||||
if file is None:
|
||||
return 0
|
||||
if not os.path.exists(os.path.dirname(file)):
|
||||
os.makedirs(os.path.dirname(file))
|
||||
if 'filename' not in data.keys():
|
||||
if data.get('isList', False):
|
||||
data = data['annots']
|
||||
save_json(file, data)
|
||||
return 0
|
||||
out_text = annot2string(data)
|
||||
print(out_text, file=open(file, 'w'))
|
||||
|
||||
def getFileList(root, ext='.jpg', max=-1, ret_full=False):
|
||||
files = []
|
||||
dirs = os.listdir(root)
|
||||
dirs = sorted(os.listdir(root))
|
||||
while len(dirs) > 0:
|
||||
path = dirs.pop()
|
||||
fullname = join(root, path)
|
||||
if os.path.isfile(fullname) and fullname.endswith(ext):
|
||||
files.append(path)
|
||||
if ret_full:
|
||||
files.append(fullname)
|
||||
else:
|
||||
files.append(path)
|
||||
elif os.path.isdir(fullname):
|
||||
for s in os.listdir(fullname):
|
||||
names = sorted(os.listdir(fullname))
|
||||
if max != -1 and os.path.isfile(join(fullname, names[0])):
|
||||
names = names[:max]
|
||||
for s in names:
|
||||
newDir = join(path, s)
|
||||
dirs.append(newDir)
|
||||
files = sorted(files)
|
||||
return files
|
||||
|
||||
def load_annot_to_tmp(annotname):
|
||||
if annotname is None:
|
||||
return {}
|
||||
if not os.path.exists(annotname):
|
||||
dirname = os.path.dirname(annotname)
|
||||
os.makedirs(dirname, exist_ok=True)
|
||||
shutil.copy(annotname.replace('_tmp', ''), annotname)
|
||||
annot = read_json(annotname)
|
||||
if isinstance(annot, list):
|
||||
annot = {'annots': annot, 'isKeyframe': False, 'isList': True}
|
||||
return annot
|
@ -4,5 +4,4 @@
|
||||
@ LastEditors: Qing Shuai
|
||||
@ LastEditTime: 2021-04-14 16:25:49
|
||||
@ FilePath: /EasyMocapRelease/easymocap/estimator/__init__.py
|
||||
'''
|
||||
from .SPIN import SPIN, init_with_spin
|
||||
'''
|
274
easymocap/estimator/mediapipe_wrapper.py
Normal file
274
easymocap/estimator/mediapipe_wrapper.py
Normal file
@ -0,0 +1,274 @@
|
||||
import numpy as np
|
||||
import cv2
|
||||
import mediapipe as mp
|
||||
mp_drawing = mp.solutions.drawing_utils
|
||||
mp_drawing_styles = mp.solutions.drawing_styles
|
||||
mp_holistic = mp.solutions.holistic
|
||||
from ..mytools import Timer
|
||||
|
||||
def bbox_from_keypoints(keypoints, rescale=1.2, detection_thresh=0.05, MIN_PIXEL=5):
|
||||
"""Get center and scale for bounding box from openpose detections."""
|
||||
valid = keypoints[:,-1] > detection_thresh
|
||||
if valid.sum() < 3:
|
||||
return [0, 0, 100, 100, 0]
|
||||
valid_keypoints = keypoints[valid][:,:-1]
|
||||
center = (valid_keypoints.max(axis=0) + valid_keypoints.min(axis=0))/2
|
||||
bbox_size = valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)
|
||||
# adjust bounding box tightness
|
||||
if bbox_size[0] < MIN_PIXEL or bbox_size[1] < MIN_PIXEL:
|
||||
return [0, 0, 100, 100, 0]
|
||||
bbox_size = bbox_size * rescale
|
||||
bbox = [
|
||||
center[0] - bbox_size[0]/2,
|
||||
center[1] - bbox_size[1]/2,
|
||||
center[0] + bbox_size[0]/2,
|
||||
center[1] + bbox_size[1]/2,
|
||||
keypoints[valid, 2].mean()
|
||||
]
|
||||
return bbox
|
||||
|
||||
class Detector:
|
||||
NUM_BODY = 33
|
||||
NUM_HAND = 21
|
||||
NUM_FACE = 468
|
||||
def __init__(self, nViews, to_openpose, model_type, show=False, **cfg) -> None:
|
||||
self.nViews = nViews
|
||||
self.to_openpose = to_openpose
|
||||
self.model_type = model_type
|
||||
self.show = show
|
||||
if self.to_openpose:
|
||||
self.NUM_BODY = 25
|
||||
self.openpose25_in_33 = [0, 0, 12, 14, 16, 11, 13, 15, 0, 24, 26, 28, 23, 25, 27, 5, 2, 8, 7, 31, 31, 29, 32, 32, 30]
|
||||
if model_type == 'holistic':
|
||||
model_name = mp_holistic.Holistic
|
||||
elif model_type == 'pose':
|
||||
model_name = mp.solutions.pose.Pose
|
||||
elif model_type == 'face':
|
||||
model_name = mp.solutions.face_mesh.FaceMesh
|
||||
cfg.pop('model_complexity')
|
||||
cfg['max_num_faces'] = 1
|
||||
elif model_type in ['hand', 'handl', 'handr']:
|
||||
model_name = mp.solutions.hands.Hands
|
||||
else:
|
||||
raise NotImplementedError
|
||||
self.models = [
|
||||
model_name(**cfg) for nv in range(nViews)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def to_array(pose, W, H, start=0):
|
||||
N = len(pose.landmark) - start
|
||||
res = np.zeros((N, 3))
|
||||
for i in range(start, len(pose.landmark)):
|
||||
res[i-start, 0] = pose.landmark[i].x * W
|
||||
res[i-start, 1] = pose.landmark[i].y * H
|
||||
res[i-start, 2] = pose.landmark[i].visibility
|
||||
return res
|
||||
|
||||
def get_body(self, pose, W, H):
|
||||
if pose is None:
|
||||
bodies = np.zeros((self.NUM_BODY, 3))
|
||||
return bodies, [0, 0, 100, 100, 0]
|
||||
poses = self.to_array(pose, W, H)
|
||||
if self.to_openpose:
|
||||
poses = poses[self.openpose25_in_33]
|
||||
poses[8, :2] = poses[[9, 12], :2].mean(axis=0)
|
||||
poses[8, 2] = poses[[9, 12], 2].min(axis=0)
|
||||
poses[1, :2] = poses[[2, 5], :2].mean(axis=0)
|
||||
poses[1, 2] = poses[[2, 5], 2].min(axis=0)
|
||||
return poses, bbox_from_keypoints(poses)
|
||||
|
||||
def get_hand(self, pose, W, H):
|
||||
if pose is None:
|
||||
bodies = np.zeros((self.NUM_HAND, 3))
|
||||
return bodies, [0, 0, 100, 100, 0.]
|
||||
poses = self.to_array(pose, W, H)
|
||||
poses[:, 2] = 1.
|
||||
return poses, bbox_from_keypoints(poses)
|
||||
|
||||
def get_face(self, pose, W, H):
|
||||
if pose is None:
|
||||
bodies = np.zeros((self.NUM_FACE, 3))
|
||||
return bodies, [0, 0, 100, 100, 0]
|
||||
poses = self.to_array(pose, W, H)
|
||||
poses[:, 2] = 1.
|
||||
return poses, bbox_from_keypoints(poses)
|
||||
|
||||
def vis(self, image, annots, nv=0):
|
||||
from easymocap.mytools.vis_base import plot_keypoints
|
||||
from easymocap.dataset.config import CONFIG
|
||||
annots = annots['annots'][0]
|
||||
if 'keypoints' in annots.keys():
|
||||
kpts = annots['keypoints']
|
||||
if self.to_openpose:
|
||||
config = CONFIG['body25']
|
||||
else:
|
||||
config = CONFIG['mpbody']
|
||||
plot_keypoints(image, kpts, 0, config)
|
||||
if 'face2d' in annots.keys():
|
||||
kpts = annots['face2d']
|
||||
plot_keypoints(image, kpts, 0, CONFIG['mpface'], use_limb_color=False)
|
||||
if len(kpts) > 468:
|
||||
plot_keypoints(image, kpts[468:], 0, {'kintree': [[4, 1], [1, 2], [2, 3], [3, 4], [9, 6], [6, 7], [7, 8], [8, 9]]}, use_limb_color=False)
|
||||
if 'handl2d' in annots.keys():
|
||||
kpts = annots['handl2d']
|
||||
plot_keypoints(image, kpts, 1, CONFIG['hand'], use_limb_color=True)
|
||||
if 'handr2d' in annots.keys():
|
||||
kpts = annots['handr2d']
|
||||
plot_keypoints(image, kpts, 1, CONFIG['hand'], use_limb_color=True)
|
||||
cv2.imshow('vis{}'.format(nv), image)
|
||||
cv2.waitKey(5)
|
||||
|
||||
def process_body(self, data, results, image_width, image_height):
|
||||
if self.model_type in ['pose', 'holistic']:
|
||||
keypoints, bbox = self.get_body(results.pose_landmarks, image_width, image_height)
|
||||
data['keypoints'] = keypoints
|
||||
data['bbox'] = bbox
|
||||
|
||||
def process_hand(self, data, results, image_width, image_height):
|
||||
lm = {'Left': None, 'Right': None}
|
||||
if self.model_type in ['hand', 'handl', 'handr']:
|
||||
if results.multi_hand_landmarks:
|
||||
for i in range(len(results.multi_hand_landmarks)):
|
||||
label = results.multi_handedness[i].classification[0].label
|
||||
if lm[label] is not None:
|
||||
continue
|
||||
lm[label] = results.multi_hand_landmarks[i]
|
||||
if self.model_type == 'handl':
|
||||
lm['Right'] = None
|
||||
elif self.model_type == 'handr':
|
||||
lm['Left'] = None
|
||||
elif self.model_type == 'holistic':
|
||||
lm = {'Left': results.left_hand_landmarks, 'Right': results.right_hand_landmarks}
|
||||
if self.model_type in ['holistic', 'hand', 'handl', 'handr']:
|
||||
handl, bbox_handl = self.get_hand(lm['Left'], image_width, image_height)
|
||||
handr, bbox_handr = self.get_hand(lm['Right'], image_width, image_height)
|
||||
|
||||
# flip
|
||||
if self.model_type != 'holistic':
|
||||
handl[:, 0] = image_width - handl[:, 0] - 1
|
||||
handr[:, 0] = image_width - handr[:, 0] - 1
|
||||
bbox_handl[0] = image_width - bbox_handl[0] - 1
|
||||
bbox_handl[2] = image_width - bbox_handl[2] - 1
|
||||
bbox_handr[0] = image_width - bbox_handr[0] - 1
|
||||
bbox_handr[2] = image_width - bbox_handr[2] - 1
|
||||
bbox_handl[0], bbox_handl[2] = bbox_handl[2], bbox_handl[0]
|
||||
bbox_handr[0], bbox_handr[2] = bbox_handr[2], bbox_handl[0]
|
||||
if self.model_type in ['hand', 'handl', 'holistic']:
|
||||
data['handl2d'] = handl.tolist()
|
||||
data['bbox_handl2d'] = bbox_handl
|
||||
if self.model_type in ['hand', 'handr', 'holistic']:
|
||||
data['handr2d'] = handr.tolist()
|
||||
data['bbox_handr2d'] = bbox_handr
|
||||
|
||||
def process_face(self, data, results, image_width, image_height, image=None):
|
||||
if self.model_type == 'holistic':
|
||||
face2d, bbox_face2d = self.get_face(results.face_landmarks, image_width, image_height)
|
||||
data['face2d'] = face2d
|
||||
data['bbox_face2d'] = bbox_face2d
|
||||
elif self.model_type == 'face':
|
||||
if results.multi_face_landmarks:
|
||||
# only select the first
|
||||
face_landmarks = results.multi_face_landmarks[0]
|
||||
else:
|
||||
face_landmarks = None
|
||||
face2d, bbox_face2d = self.get_face(face_landmarks, image_width, image_height)
|
||||
data['face2d'] = face2d
|
||||
data['bbox_face2d'] = bbox_face2d
|
||||
|
||||
def __call__(self, images):
|
||||
annots_all = []
|
||||
for nv, image_ in enumerate(images):
|
||||
image_height, image_width, _ = image_.shape
|
||||
image = cv2.cvtColor(image_, cv2.COLOR_BGR2RGB)
|
||||
if self.model_type in ['hand', 'handl', 'handr']:
|
||||
image = cv2.flip(image, 1)
|
||||
image.flags.writeable = False
|
||||
with Timer('- detect', True):
|
||||
results = self.models[nv].process(image)
|
||||
data = {
|
||||
'personID': 0,
|
||||
}
|
||||
self.process_body(data, results, image_width, image_height)
|
||||
self.process_hand(data, results, image_width, image_height)
|
||||
with Timer('- face', True):
|
||||
self.process_face(data, results, image_width, image_height, image=image)
|
||||
annots = {
|
||||
'filename': '{}/run.jpg'.format(nv),
|
||||
'height': image_height,
|
||||
'width': image_width,
|
||||
'annots': [
|
||||
data
|
||||
],
|
||||
'isKeyframe': False
|
||||
}
|
||||
if self.show:
|
||||
self.vis(image_, annots, nv)
|
||||
annots_all.append(annots)
|
||||
# results.face_landmarks
|
||||
return annots_all
|
||||
|
||||
def extract_2d(image_root, annot_root, config, mode='holistic'):
|
||||
from .wrapper_base import check_result, save_annot
|
||||
force = config.pop('force')
|
||||
if check_result(image_root, annot_root) and not force:
|
||||
return 0
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
ext = config.pop('ext')
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
if mode == 'holistic' or mode == 'pose':
|
||||
to_openpose = True
|
||||
else:
|
||||
to_openpose = False
|
||||
detector = Detector(nViews=1, to_openpose=to_openpose, model_type=mode, show=False, **config)
|
||||
imgnames = sorted(glob(join(image_root, '*'+ext)))
|
||||
for imgname in tqdm(imgnames, desc='{:10s}'.format(os.path.basename(annot_root))):
|
||||
base = os.path.basename(imgname).replace(ext, '')
|
||||
annotname = join(annot_root, base+'.json')
|
||||
image = cv2.imread(imgname)
|
||||
annots = detector([image])[0]
|
||||
annots['filename'] = os.sep.join(imgname.split(os.sep)[-2:])
|
||||
save_annot(annotname, annots)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('path', type=str)
|
||||
parser.add_argument('--num', type=int, default=1)
|
||||
parser.add_argument('--debug', action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
path = args.path
|
||||
mp_hands = mp.solutions.hands
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
imgnames = sorted(glob(join(path, '*.jpg')))
|
||||
with mp_hands.Hands(
|
||||
model_complexity=1,
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5) as hands:
|
||||
for imgname in imgnames:
|
||||
image = cv2.imread(imgname)
|
||||
# To improve performance, optionally mark the image as not writeable to
|
||||
# pass by reference.
|
||||
image.flags.writeable = False
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
results = hands.process(image)
|
||||
|
||||
# Draw the hand annotations on the image.
|
||||
image.flags.writeable = True
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
if results.multi_hand_landmarks:
|
||||
for hand_landmarks in results.multi_hand_landmarks:
|
||||
mp_drawing.draw_landmarks(
|
||||
image,
|
||||
hand_landmarks,
|
||||
mp_hands.HAND_CONNECTIONS,
|
||||
mp_drawing_styles.get_default_hand_landmarks_style(),
|
||||
mp_drawing_styles.get_default_hand_connections_style())
|
||||
# Flip the image horizontally for a selfie-view display.
|
||||
cv2.imshow('MediaPipe Hands', image)
|
||||
if cv2.waitKey(5) & 0xFF == 27:
|
||||
break
|
52
easymocap/estimator/wrapper_base.py
Normal file
52
easymocap/estimator/wrapper_base.py
Normal file
@ -0,0 +1,52 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
from ..annotator.file_utils import save_annot
|
||||
|
||||
def check_result(image_root, annot_root):
|
||||
if os.path.exists(annot_root):
|
||||
# check the number of images and keypoints
|
||||
nimg = len(os.listdir(image_root))
|
||||
nann = len(os.listdir(annot_root))
|
||||
print('Check {} == {}'.format(nimg, nann))
|
||||
if nimg == nann:
|
||||
return True
|
||||
return False
|
||||
|
||||
def create_annot_file(annotname, imgname):
|
||||
assert os.path.exists(imgname), imgname
|
||||
img = cv2.imread(imgname)
|
||||
height, width = img.shape[0], img.shape[1]
|
||||
imgnamesep = imgname.split(os.sep)
|
||||
filename = os.sep.join(imgnamesep[imgnamesep.index('images'):])
|
||||
annot = {
|
||||
'filename':filename,
|
||||
'height':height,
|
||||
'width':width,
|
||||
'annots': [],
|
||||
'isKeyframe': False
|
||||
}
|
||||
save_annot(annotname, annot)
|
||||
return annot
|
||||
|
||||
def bbox_from_keypoints(keypoints, rescale=1.2, detection_thresh=0.05, MIN_PIXEL=5):
|
||||
"""Get center and scale for bounding box from openpose detections."""
|
||||
valid = keypoints[:,-1] > detection_thresh
|
||||
if valid.sum() < 3:
|
||||
return [0, 0, 100, 100, 0]
|
||||
valid_keypoints = keypoints[valid][:,:-1]
|
||||
center = (valid_keypoints.max(axis=0) + valid_keypoints.min(axis=0))/2
|
||||
bbox_size = valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)
|
||||
# adjust bounding box tightness
|
||||
if bbox_size[0] < MIN_PIXEL or bbox_size[1] < MIN_PIXEL:
|
||||
return [0, 0, 100, 100, 0]
|
||||
bbox_size = bbox_size * rescale
|
||||
bbox = [
|
||||
center[0] - bbox_size[0]/2,
|
||||
center[1] - bbox_size[1]/2,
|
||||
center[0] + bbox_size[0]/2,
|
||||
center[1] + bbox_size[1]/2,
|
||||
keypoints[valid, 2].mean()
|
||||
]
|
||||
bbox = np.array(bbox).tolist()
|
||||
return bbox
|
Loading…
Reference in New Issue
Block a user