🚧 support multi-gpu for keypoints

This commit is contained in:
Qing Shuai 2022-08-21 16:01:18 +08:00
parent 95f15b14d8
commit cd3f184f04
2 changed files with 187 additions and 25 deletions

View File

@ -2,8 +2,8 @@
@ Date: 2021-08-19 22:06:22
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-12-02 21:19:41
@ FilePath: /EasyMocap/apps/preprocess/extract_keypoints.py
@ LastEditTime: 2022-05-23 22:58:57
@ FilePath: /EasyMocapPublic/apps/preprocess/extract_keypoints.py
'''
import os
from os.path import join
@ -27,6 +27,7 @@ config = {
'vis': False,
'ext': '.jpg'
},
'openposecrop': {},
'feet':{
'root': '',
'res': 1,
@ -78,7 +79,7 @@ config = {
'min_detection_confidence':0.3,
'min_tracking_confidence': 0.1,
'static_image_mode': False,
}
},
}
if __name__ == "__main__":
@ -91,24 +92,58 @@ if __name__ == "__main__":
help="sub directory name to store the generated annotation files, default to be annots")
# Detection Control
parser.add_argument('--mode', type=str, default='openpose', choices=[
'openpose', 'feet', 'feetcrop', 'yolo-hrnet', 'yolo', 'hrnet',
'openpose', 'feet', 'feetcrop', 'openposecrop',
'yolo-hrnet', 'yolo', 'hrnet',
'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'],
help="model to extract joints from image")
# Openpose
parser.add_argument('--openpose', type=str,
default='/media/qing/Project/openpose')
default=os.environ.get('openpose', 'openpose'))
parser.add_argument('--openpose_res', type=float, default=1)
parser.add_argument('--gpus', type=int, default=[], nargs='+')
parser.add_argument('--ext', type=str, default='.jpg')
parser.add_argument('--tmp', type=str, default=os.path.abspath('tmp'))
parser.add_argument('--hand', action='store_true')
parser.add_argument('--face', action='store_true')
parser.add_argument('--wild', action='store_true',
help='remove crowd class of yolo')
parser.add_argument('--reverse', action='store_true')
parser.add_argument('--force', action='store_true')
args = parser.parse_args()
config['yolo']['isWild'] = args.wild
mode = args.mode
if not os.path.exists(join(args.path, 'images')) and os.path.exists(join(args.path, 'videos')):
# default extract image
cmd = f'''python3 apps/preprocess/extract_image.py {args.path}'''
os.system(cmd)
subs = load_subs(args.path, args.subs)
if len(args.gpus) != 0:
# perform multiprocess by runcmd
from easymocap.mytools.debug_utils import run_cmd
nproc = len(args.gpus)
plist = []
for i in range(nproc):
if len(subs[i::nproc]) == 0:
continue
cmd = f'export CUDA_VISIBLE_DEVICES={args.gpus[i]} && python3 apps/preprocess/extract_keypoints.py {args.path} --mode {args.mode} --subs {" ".join(subs[i::nproc])}'
cmd += f' --annot {args.annot} --ext {args.ext}'
if args.hand:
cmd += ' --hand'
if args.face:
cmd += ' --face'
if args.force:
cmd += ' --force'
cmd += f' --tmp {args.tmp}'
cmd += ' &'
print(cmd)
p = run_cmd(cmd, bg=False)
plist.extend(p)
for p in plist:
p.join()
exit()
global_tasks = []
if len(subs) == 0:
subs = ['']
for sub in subs:
config[mode]['force'] = args.force
image_root = join(args.path, 'images', sub)
@ -116,7 +151,7 @@ if __name__ == "__main__":
tmp_root = join(args.path, mode, sub)
if os.path.exists(annot_root) and not args.force:
# check the number of annots and images
if len(os.listdir(image_root)) == len(os.listdir(annot_root)):
if len(os.listdir(image_root)) == len(os.listdir(annot_root)) and mode != 'feetcrop':
print('[Skip] detection {}'.format(annot_root))
continue
if mode == 'openpose':
@ -132,10 +167,11 @@ if __name__ == "__main__":
config[mode]['openpose'] = args.openpose
estimator = FeetEstimator(openpose=args.openpose)
estimator.detect_foot(image_root, annot_root, args.ext)
elif mode == 'yolo':
elif mode == 'yolo' or mode == 'openposecrop':
from easymocap.estimator.yolohrnet_wrapper import extract_bbox
config[mode]['ext'] = args.ext
extract_bbox(image_root, annot_root, **config[mode])
config['yolo']['force'] = False # donot redetect
config['yolo']['ext'] = args.ext
extract_bbox(image_root, annot_root, **config['yolo'])
elif mode == 'hrnet':
from easymocap.estimator.yolohrnet_wrapper import extract_hrnet
config[mode]['ext'] = args.ext
@ -147,10 +183,15 @@ if __name__ == "__main__":
from easymocap.estimator.mediapipe_wrapper import extract_2d
config[mode]['ext'] = args.ext
extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', ''))
if mode == 'feetcrop':
if mode == 'feetcrop' or mode == 'openposecrop':
from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop
config[mode]['openpose'] = args.openpose
estimator = FeetEstimatorByCrop(openpose=args.openpose)
estimator = FeetEstimatorByCrop(openpose=args.openpose,
tmpdir=join(args.tmp, '{}_{}'.format(os.path.basename(args.path), sub)),
fullbody=mode=='openposecrop',
hand=(mode=='openposecrop')or args.hand,
face=args.face)
estimator.detect_foot(image_root, annot_root, args.ext)
for task in global_tasks:
task.join()

View File

@ -2,8 +2,8 @@
@ Date: 2020-10-23 20:07:49
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2021-03-05 13:43:01
@ FilePath: /EasyMocap/code/estimator/SPIN/spin_api.py
@ LastEditTime: 2022-07-14 12:44:30
@ FilePath: /EasyMocapPublic/easymocap/estimator/SPIN/spin_api.py
'''
"""
Demo code
@ -32,7 +32,6 @@ Running the previous command will save the results in ```examples/im1010_{shape,
"""
import torch
from torchvision.transforms import Normalize
import numpy as np
import cv2
@ -46,6 +45,81 @@ class constants:
IMG_NORM_MEAN = [0.485, 0.456, 0.406]
IMG_NORM_STD = [0.229, 0.224, 0.225]
def normalize(tensor, mean, std, inplace: bool = False):
"""Normalize a tensor image with mean and standard deviation.
.. note::
This transform acts out of place by default, i.e., it does not mutates the input tensor.
See :class:`~torchvision.transforms.Normalize` for more details.
Args:
tensor (Tensor): Tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation inplace.
Returns:
Tensor: Normalized Tensor image.
"""
if not isinstance(tensor, torch.Tensor):
raise TypeError('Input tensor should be a torch tensor. Got {}.'.format(type(tensor)))
if tensor.ndim < 3:
raise ValueError('Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = '
'{}.'.format(tensor.size()))
if not inplace:
tensor = tensor.clone()
dtype = tensor.dtype
mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
if (std == 0).any():
raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
if mean.ndim == 1:
mean = mean.view(-1, 1, 1)
if std.ndim == 1:
std = std.view(-1, 1, 1)
tensor.sub_(mean).div_(std)
return tensor
class Normalize(torch.nn.Module):
"""Normalize a tensor image with mean and standard deviation.
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
channels, this transform will normalize each channel of the input
``torch.*Tensor`` i.e.,
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
.. note::
This transform acts out of place, i.e., it does not mutate the input tensor.
Args:
mean (sequence): Sequence of means for each channel.
std (sequence): Sequence of standard deviations for each channel.
inplace(bool,optional): Bool to make this operation in-place.
"""
def __init__(self, mean, std, inplace=False):
super().__init__()
self.mean = mean
self.std = std
self.inplace = inplace
def forward(self, tensor):
"""
Args:
tensor (Tensor): Tensor image to be normalized.
Returns:
Tensor: Normalized Tensor image.
"""
return normalize(tensor, self.mean, self.std, self.inplace)
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
def get_transform(center, scale, res, rot=0):
"""Generate transformation matrix."""
@ -134,6 +208,23 @@ def process_image(img, bbox, input_res=224):
norm_img = normalize_img(img.clone())[None]
return img, norm_img
def solve_translation(X, x, K):
A = np.zeros((2*X.shape[0], 3))
b = np.zeros((2*X.shape[0], 1))
fx, fy = K[0, 0], K[1, 1]
cx, cy = K[0, 2], K[1, 2]
for nj in range(X.shape[0]):
A[2*nj, 0] = 1
A[2*nj + 1, 1] = 1
A[2*nj, 2] = -(x[nj, 0] - cx)/fx
A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
A[2*nj:2*nj+2, :] *= x[nj, 2]
b[2*nj:2*nj+2, :] *= x[nj, 2]
trans = np.linalg.inv(A.T @ A) @ A.T @ b
return trans.T[0]
def estimate_translation_np(S, joints_2d, joints_conf, K):
"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
Input:
@ -197,16 +288,46 @@ class SPIN:
p, _ = cv2.Rodrigues(rotmat[i])
poses[0, 3*i:3*i+3] = p[:, 0]
results['poses'] = poses
if use_rh_th:
body_params = {
'poses': results['poses'],
'shapes': results['shapes'],
'Rh': results['poses'][:, :3].copy(),
'Th': np.zeros((1, 3)),
}
body_params['Th'][0, 2] = 5
body_params['poses'][:, :3] = 0
results = body_params
body_params = {
'Rh': poses[:, :3],
'poses': poses[:, 3:],
'shapes': results['shapes'],
}
results = body_params
return results
def __call__(self, body_model, img, bbox, kpts, camera, ret_vertices=True):
body_params = self.forward(img.copy(), bbox)
# TODO: bug: This encode will arise errors in keypoints
kpts1 = body_model.keypoints(body_params, return_tensor=False)[0]
body_params = body_model.encode(body_params)
# only use body joints to estimation translation
nJoints = 15
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
kpts_diff = np.linalg.norm(kpts1 - keypoints3d, axis=-1).max()
# print('Encode and decode error: {}'.format(kpts_diff))
trans = solve_translation(keypoints3d[:nJoints], kpts[:nJoints], camera['K'])
body_params['Th'] += trans[None, :]
if body_params['Th'][0, 2] < 0:
print(' [WARN in SPIN] solved a negative position of human {}'.format(body_params['Th'][0]))
body_params['Th'] = -body_params['Th']
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
rotx = cv2.Rodrigues(np.pi*np.array([1., 0, 0]))[0]
Rhold = rotx @ Rhold
body_params['Rh'] = cv2.Rodrigues(Rhold)[0].reshape(1, 3)
# convert to world coordinate
if False:
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
Thold = body_params['Th']
Rh = camera['R'].T @ Rhold
Th = (camera['R'].T @ (Thold.T - camera['T'])).T
body_params['Th'] = Th
body_params['Rh'] = cv2.Rodrigues(Rh)[0].reshape(1, 3)
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
results = {'body_params': body_params, 'keypoints3d': keypoints3d}
if ret_vertices:
vertices = body_model(return_verts=True, return_tensor=False, **body_params)[0]
results['vertices'] = vertices
return results
def init_with_spin(body_model, spin_model, img, bbox, kpts, camera):