🚧 support multi-gpu for keypoints

This commit is contained in:
Qing Shuai 2022-08-21 16:01:18 +08:00
parent 95f15b14d8
commit cd3f184f04
2 changed files with 187 additions and 25 deletions

View File

@ -2,8 +2,8 @@
@ Date: 2021-08-19 22:06:22 @ Date: 2021-08-19 22:06:22
@ Author: Qing Shuai @ Author: Qing Shuai
@ LastEditors: Qing Shuai @ LastEditors: Qing Shuai
@ LastEditTime: 2021-12-02 21:19:41 @ LastEditTime: 2022-05-23 22:58:57
@ FilePath: /EasyMocap/apps/preprocess/extract_keypoints.py @ FilePath: /EasyMocapPublic/apps/preprocess/extract_keypoints.py
''' '''
import os import os
from os.path import join from os.path import join
@ -27,6 +27,7 @@ config = {
'vis': False, 'vis': False,
'ext': '.jpg' 'ext': '.jpg'
}, },
'openposecrop': {},
'feet':{ 'feet':{
'root': '', 'root': '',
'res': 1, 'res': 1,
@ -78,7 +79,7 @@ config = {
'min_detection_confidence':0.3, 'min_detection_confidence':0.3,
'min_tracking_confidence': 0.1, 'min_tracking_confidence': 0.1,
'static_image_mode': False, 'static_image_mode': False,
} },
} }
if __name__ == "__main__": if __name__ == "__main__":
@ -91,24 +92,58 @@ if __name__ == "__main__":
help="sub directory name to store the generated annotation files, default to be annots") help="sub directory name to store the generated annotation files, default to be annots")
# Detection Control # Detection Control
parser.add_argument('--mode', type=str, default='openpose', choices=[ parser.add_argument('--mode', type=str, default='openpose', choices=[
'openpose', 'feet', 'feetcrop', 'yolo-hrnet', 'yolo', 'hrnet', 'openpose', 'feet', 'feetcrop', 'openposecrop',
'yolo-hrnet', 'yolo', 'hrnet',
'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'], 'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'],
help="model to extract joints from image") help="model to extract joints from image")
# Openpose # Openpose
parser.add_argument('--openpose', type=str, parser.add_argument('--openpose', type=str,
default='/media/qing/Project/openpose') default=os.environ.get('openpose', 'openpose'))
parser.add_argument('--openpose_res', type=float, default=1) parser.add_argument('--openpose_res', type=float, default=1)
parser.add_argument('--gpus', type=int, default=[], nargs='+')
parser.add_argument('--ext', type=str, default='.jpg') parser.add_argument('--ext', type=str, default='.jpg')
parser.add_argument('--tmp', type=str, default=os.path.abspath('tmp'))
parser.add_argument('--hand', action='store_true') parser.add_argument('--hand', action='store_true')
parser.add_argument('--face', action='store_true') parser.add_argument('--face', action='store_true')
parser.add_argument('--wild', action='store_true', parser.add_argument('--wild', action='store_true',
help='remove crowd class of yolo') help='remove crowd class of yolo')
parser.add_argument('--reverse', action='store_true')
parser.add_argument('--force', action='store_true') parser.add_argument('--force', action='store_true')
args = parser.parse_args() args = parser.parse_args()
config['yolo']['isWild'] = args.wild config['yolo']['isWild'] = args.wild
mode = args.mode mode = args.mode
if not os.path.exists(join(args.path, 'images')) and os.path.exists(join(args.path, 'videos')):
# default extract image
cmd = f'''python3 apps/preprocess/extract_image.py {args.path}'''
os.system(cmd)
subs = load_subs(args.path, args.subs) subs = load_subs(args.path, args.subs)
if len(args.gpus) != 0:
# perform multiprocess by runcmd
from easymocap.mytools.debug_utils import run_cmd
nproc = len(args.gpus)
plist = []
for i in range(nproc):
if len(subs[i::nproc]) == 0:
continue
cmd = f'export CUDA_VISIBLE_DEVICES={args.gpus[i]} && python3 apps/preprocess/extract_keypoints.py {args.path} --mode {args.mode} --subs {" ".join(subs[i::nproc])}'
cmd += f' --annot {args.annot} --ext {args.ext}'
if args.hand:
cmd += ' --hand'
if args.face:
cmd += ' --face'
if args.force:
cmd += ' --force'
cmd += f' --tmp {args.tmp}'
cmd += ' &'
print(cmd)
p = run_cmd(cmd, bg=False)
plist.extend(p)
for p in plist:
p.join()
exit()
global_tasks = [] global_tasks = []
if len(subs) == 0:
subs = ['']
for sub in subs: for sub in subs:
config[mode]['force'] = args.force config[mode]['force'] = args.force
image_root = join(args.path, 'images', sub) image_root = join(args.path, 'images', sub)
@ -116,7 +151,7 @@ if __name__ == "__main__":
tmp_root = join(args.path, mode, sub) tmp_root = join(args.path, mode, sub)
if os.path.exists(annot_root) and not args.force: if os.path.exists(annot_root) and not args.force:
# check the number of annots and images # check the number of annots and images
if len(os.listdir(image_root)) == len(os.listdir(annot_root)): if len(os.listdir(image_root)) == len(os.listdir(annot_root)) and mode != 'feetcrop':
print('[Skip] detection {}'.format(annot_root)) print('[Skip] detection {}'.format(annot_root))
continue continue
if mode == 'openpose': if mode == 'openpose':
@ -132,10 +167,11 @@ if __name__ == "__main__":
config[mode]['openpose'] = args.openpose config[mode]['openpose'] = args.openpose
estimator = FeetEstimator(openpose=args.openpose) estimator = FeetEstimator(openpose=args.openpose)
estimator.detect_foot(image_root, annot_root, args.ext) estimator.detect_foot(image_root, annot_root, args.ext)
elif mode == 'yolo': elif mode == 'yolo' or mode == 'openposecrop':
from easymocap.estimator.yolohrnet_wrapper import extract_bbox from easymocap.estimator.yolohrnet_wrapper import extract_bbox
config[mode]['ext'] = args.ext config['yolo']['force'] = False # donot redetect
extract_bbox(image_root, annot_root, **config[mode]) config['yolo']['ext'] = args.ext
extract_bbox(image_root, annot_root, **config['yolo'])
elif mode == 'hrnet': elif mode == 'hrnet':
from easymocap.estimator.yolohrnet_wrapper import extract_hrnet from easymocap.estimator.yolohrnet_wrapper import extract_hrnet
config[mode]['ext'] = args.ext config[mode]['ext'] = args.ext
@ -147,10 +183,15 @@ if __name__ == "__main__":
from easymocap.estimator.mediapipe_wrapper import extract_2d from easymocap.estimator.mediapipe_wrapper import extract_2d
config[mode]['ext'] = args.ext config[mode]['ext'] = args.ext
extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', '')) extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', ''))
if mode == 'feetcrop': if mode == 'feetcrop' or mode == 'openposecrop':
from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop
config[mode]['openpose'] = args.openpose config[mode]['openpose'] = args.openpose
estimator = FeetEstimatorByCrop(openpose=args.openpose) estimator = FeetEstimatorByCrop(openpose=args.openpose,
tmpdir=join(args.tmp, '{}_{}'.format(os.path.basename(args.path), sub)),
fullbody=mode=='openposecrop',
hand=(mode=='openposecrop')or args.hand,
face=args.face)
estimator.detect_foot(image_root, annot_root, args.ext) estimator.detect_foot(image_root, annot_root, args.ext)
for task in global_tasks: for task in global_tasks:
task.join() task.join()

View File

@ -2,8 +2,8 @@
@ Date: 2020-10-23 20:07:49 @ Date: 2020-10-23 20:07:49
@ Author: Qing Shuai @ Author: Qing Shuai
@ LastEditors: Qing Shuai @ LastEditors: Qing Shuai
@ LastEditTime: 2021-03-05 13:43:01 @ LastEditTime: 2022-07-14 12:44:30
@ FilePath: /EasyMocap/code/estimator/SPIN/spin_api.py @ FilePath: /EasyMocapPublic/easymocap/estimator/SPIN/spin_api.py
''' '''
""" """
Demo code Demo code
@ -32,7 +32,6 @@ Running the previous command will save the results in ```examples/im1010_{shape,
""" """
import torch import torch
from torchvision.transforms import Normalize
import numpy as np import numpy as np
import cv2 import cv2
@ -46,6 +45,81 @@ class constants:
IMG_NORM_MEAN = [0.485, 0.456, 0.406] IMG_NORM_MEAN = [0.485, 0.456, 0.406]
IMG_NORM_STD = [0.229, 0.224, 0.225] IMG_NORM_STD = [0.229, 0.224, 0.225]
def normalize(tensor, mean, std, inplace: bool = False):
"""Normalize a tensor image with mean and standard deviation.
.. note::
This transform acts out of place by default, i.e., it does not mutates the input tensor.
See :class:`~torchvision.transforms.Normalize` for more details.
Args:
tensor (Tensor): Tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation inplace.
Returns:
Tensor: Normalized Tensor image.
"""
if not isinstance(tensor, torch.Tensor):
raise TypeError('Input tensor should be a torch tensor. Got {}.'.format(type(tensor)))
if tensor.ndim < 3:
raise ValueError('Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = '
'{}.'.format(tensor.size()))
if not inplace:
tensor = tensor.clone()
dtype = tensor.dtype
mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
if (std == 0).any():
raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
if mean.ndim == 1:
mean = mean.view(-1, 1, 1)
if std.ndim == 1:
std = std.view(-1, 1, 1)
tensor.sub_(mean).div_(std)
return tensor
class Normalize(torch.nn.Module):
"""Normalize a tensor image with mean and standard deviation.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
def __init__(self, mean, std, inplace=False):
super().__init__()
self.mean = mean
self.std = std
self.inplace = inplace
def forward(self, tensor):
"""
Args:
tensor (Tensor): Tensor image to be normalized.
Returns:
Tensor: Normalized Tensor image.
"""
return normalize(tensor, self.mean, self.std, self.inplace)
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
def get_transform(center, scale, res, rot=0): def get_transform(center, scale, res, rot=0):
"""Generate transformation matrix.""" """Generate transformation matrix."""
@ -116,7 +190,7 @@ def crop(img, center, scale, res, rot=0, bias=0):
new_img = new_img[pad:-pad, pad:-pad] new_img = new_img[pad:-pad, pad:-pad]
new_img = cv2.resize(new_img, (res[0], res[1])) new_img = cv2.resize(new_img, (res[0], res[1]))
return new_img return new_img
def process_image(img, bbox, input_res=224): def process_image(img, bbox, input_res=224):
"""Read image, do preprocessing and possibly crop it according to the bounding box. """Read image, do preprocessing and possibly crop it according to the bounding box.
If there are bounding box annotations, use them to crop the image. If there are bounding box annotations, use them to crop the image.
@ -134,6 +208,23 @@ def process_image(img, bbox, input_res=224):
norm_img = normalize_img(img.clone())[None] norm_img = normalize_img(img.clone())[None]
return img, norm_img return img, norm_img
def solve_translation(X, x, K):
A = np.zeros((2*X.shape[0], 3))
b = np.zeros((2*X.shape[0], 1))
fx, fy = K[0, 0], K[1, 1]
cx, cy = K[0, 2], K[1, 2]
for nj in range(X.shape[0]):
A[2*nj, 0] = 1
A[2*nj + 1, 1] = 1
A[2*nj, 2] = -(x[nj, 0] - cx)/fx
A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
A[2*nj:2*nj+2, :] *= x[nj, 2]
b[2*nj:2*nj+2, :] *= x[nj, 2]
trans = np.linalg.inv(A.T @ A) @ A.T @ b
return trans.T[0]
def estimate_translation_np(S, joints_2d, joints_conf, K): def estimate_translation_np(S, joints_2d, joints_conf, K):
"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d. """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
Input: Input:
@ -197,16 +288,46 @@ class SPIN:
p, _ = cv2.Rodrigues(rotmat[i]) p, _ = cv2.Rodrigues(rotmat[i])
poses[0, 3*i:3*i+3] = p[:, 0] poses[0, 3*i:3*i+3] = p[:, 0]
results['poses'] = poses results['poses'] = poses
if use_rh_th: body_params = {
body_params = { 'Rh': poses[:, :3],
'poses': results['poses'], 'poses': poses[:, 3:],
'shapes': results['shapes'], 'shapes': results['shapes'],
'Rh': results['poses'][:, :3].copy(), }
'Th': np.zeros((1, 3)), results = body_params
} return results
body_params['Th'][0, 2] = 5
body_params['poses'][:, :3] = 0 def __call__(self, body_model, img, bbox, kpts, camera, ret_vertices=True):
results = body_params body_params = self.forward(img.copy(), bbox)
# TODO: bug: This encode will arise errors in keypoints
kpts1 = body_model.keypoints(body_params, return_tensor=False)[0]
body_params = body_model.encode(body_params)
# only use body joints to estimation translation
nJoints = 15
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
kpts_diff = np.linalg.norm(kpts1 - keypoints3d, axis=-1).max()
# print('Encode and decode error: {}'.format(kpts_diff))
trans = solve_translation(keypoints3d[:nJoints], kpts[:nJoints], camera['K'])
body_params['Th'] += trans[None, :]
if body_params['Th'][0, 2] < 0:
print(' [WARN in SPIN] solved a negative position of human {}'.format(body_params['Th'][0]))
body_params['Th'] = -body_params['Th']
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
rotx = cv2.Rodrigues(np.pi*np.array([1., 0, 0]))[0]
Rhold = rotx @ Rhold
body_params['Rh'] = cv2.Rodrigues(Rhold)[0].reshape(1, 3)
# convert to world coordinate
if False:
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
Thold = body_params['Th']
Rh = camera['R'].T @ Rhold
Th = (camera['R'].T @ (Thold.T - camera['T'])).T
body_params['Th'] = Th
body_params['Rh'] = cv2.Rodrigues(Rh)[0].reshape(1, 3)
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
results = {'body_params': body_params, 'keypoints3d': keypoints3d}
if ret_vertices:
vertices = body_model(return_verts=True, return_tensor=False, **body_params)[0]
results['vertices'] = vertices
return results return results
def init_with_spin(body_model, spin_model, img, bbox, kpts, camera): def init_with_spin(body_model, spin_model, img, bbox, kpts, camera):