🚧 support multi-gpu for keypoints
This commit is contained in:
parent
95f15b14d8
commit
cd3f184f04
@ -2,8 +2,8 @@
|
||||
@ Date: 2021-08-19 22:06:22
|
||||
@ Author: Qing Shuai
|
||||
@ LastEditors: Qing Shuai
|
||||
@ LastEditTime: 2021-12-02 21:19:41
|
||||
@ FilePath: /EasyMocap/apps/preprocess/extract_keypoints.py
|
||||
@ LastEditTime: 2022-05-23 22:58:57
|
||||
@ FilePath: /EasyMocapPublic/apps/preprocess/extract_keypoints.py
|
||||
'''
|
||||
import os
|
||||
from os.path import join
|
||||
@ -27,6 +27,7 @@ config = {
|
||||
'vis': False,
|
||||
'ext': '.jpg'
|
||||
},
|
||||
'openposecrop': {},
|
||||
'feet':{
|
||||
'root': '',
|
||||
'res': 1,
|
||||
@ -78,7 +79,7 @@ config = {
|
||||
'min_detection_confidence':0.3,
|
||||
'min_tracking_confidence': 0.1,
|
||||
'static_image_mode': False,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -91,24 +92,58 @@ if __name__ == "__main__":
|
||||
help="sub directory name to store the generated annotation files, default to be annots")
|
||||
# Detection Control
|
||||
parser.add_argument('--mode', type=str, default='openpose', choices=[
|
||||
'openpose', 'feet', 'feetcrop', 'yolo-hrnet', 'yolo', 'hrnet',
|
||||
'openpose', 'feet', 'feetcrop', 'openposecrop',
|
||||
'yolo-hrnet', 'yolo', 'hrnet',
|
||||
'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'],
|
||||
help="model to extract joints from image")
|
||||
# Openpose
|
||||
parser.add_argument('--openpose', type=str,
|
||||
default='/media/qing/Project/openpose')
|
||||
default=os.environ.get('openpose', 'openpose'))
|
||||
parser.add_argument('--openpose_res', type=float, default=1)
|
||||
parser.add_argument('--gpus', type=int, default=[], nargs='+')
|
||||
parser.add_argument('--ext', type=str, default='.jpg')
|
||||
parser.add_argument('--tmp', type=str, default=os.path.abspath('tmp'))
|
||||
parser.add_argument('--hand', action='store_true')
|
||||
parser.add_argument('--face', action='store_true')
|
||||
parser.add_argument('--wild', action='store_true',
|
||||
help='remove crowd class of yolo')
|
||||
parser.add_argument('--reverse', action='store_true')
|
||||
parser.add_argument('--force', action='store_true')
|
||||
args = parser.parse_args()
|
||||
config['yolo']['isWild'] = args.wild
|
||||
mode = args.mode
|
||||
if not os.path.exists(join(args.path, 'images')) and os.path.exists(join(args.path, 'videos')):
|
||||
# default extract image
|
||||
cmd = f'''python3 apps/preprocess/extract_image.py {args.path}'''
|
||||
os.system(cmd)
|
||||
subs = load_subs(args.path, args.subs)
|
||||
if len(args.gpus) != 0:
|
||||
# perform multiprocess by runcmd
|
||||
from easymocap.mytools.debug_utils import run_cmd
|
||||
nproc = len(args.gpus)
|
||||
plist = []
|
||||
for i in range(nproc):
|
||||
if len(subs[i::nproc]) == 0:
|
||||
continue
|
||||
cmd = f'export CUDA_VISIBLE_DEVICES={args.gpus[i]} && python3 apps/preprocess/extract_keypoints.py {args.path} --mode {args.mode} --subs {" ".join(subs[i::nproc])}'
|
||||
cmd += f' --annot {args.annot} --ext {args.ext}'
|
||||
if args.hand:
|
||||
cmd += ' --hand'
|
||||
if args.face:
|
||||
cmd += ' --face'
|
||||
if args.force:
|
||||
cmd += ' --force'
|
||||
cmd += f' --tmp {args.tmp}'
|
||||
cmd += ' &'
|
||||
print(cmd)
|
||||
p = run_cmd(cmd, bg=False)
|
||||
plist.extend(p)
|
||||
for p in plist:
|
||||
p.join()
|
||||
exit()
|
||||
global_tasks = []
|
||||
if len(subs) == 0:
|
||||
subs = ['']
|
||||
for sub in subs:
|
||||
config[mode]['force'] = args.force
|
||||
image_root = join(args.path, 'images', sub)
|
||||
@ -116,7 +151,7 @@ if __name__ == "__main__":
|
||||
tmp_root = join(args.path, mode, sub)
|
||||
if os.path.exists(annot_root) and not args.force:
|
||||
# check the number of annots and images
|
||||
if len(os.listdir(image_root)) == len(os.listdir(annot_root)):
|
||||
if len(os.listdir(image_root)) == len(os.listdir(annot_root)) and mode != 'feetcrop':
|
||||
print('[Skip] detection {}'.format(annot_root))
|
||||
continue
|
||||
if mode == 'openpose':
|
||||
@ -132,10 +167,11 @@ if __name__ == "__main__":
|
||||
config[mode]['openpose'] = args.openpose
|
||||
estimator = FeetEstimator(openpose=args.openpose)
|
||||
estimator.detect_foot(image_root, annot_root, args.ext)
|
||||
elif mode == 'yolo':
|
||||
elif mode == 'yolo' or mode == 'openposecrop':
|
||||
from easymocap.estimator.yolohrnet_wrapper import extract_bbox
|
||||
config[mode]['ext'] = args.ext
|
||||
extract_bbox(image_root, annot_root, **config[mode])
|
||||
config['yolo']['force'] = False # donot redetect
|
||||
config['yolo']['ext'] = args.ext
|
||||
extract_bbox(image_root, annot_root, **config['yolo'])
|
||||
elif mode == 'hrnet':
|
||||
from easymocap.estimator.yolohrnet_wrapper import extract_hrnet
|
||||
config[mode]['ext'] = args.ext
|
||||
@ -147,10 +183,15 @@ if __name__ == "__main__":
|
||||
from easymocap.estimator.mediapipe_wrapper import extract_2d
|
||||
config[mode]['ext'] = args.ext
|
||||
extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', ''))
|
||||
if mode == 'feetcrop':
|
||||
if mode == 'feetcrop' or mode == 'openposecrop':
|
||||
from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop
|
||||
config[mode]['openpose'] = args.openpose
|
||||
estimator = FeetEstimatorByCrop(openpose=args.openpose)
|
||||
estimator = FeetEstimatorByCrop(openpose=args.openpose,
|
||||
tmpdir=join(args.tmp, '{}_{}'.format(os.path.basename(args.path), sub)),
|
||||
fullbody=mode=='openposecrop',
|
||||
hand=(mode=='openposecrop')or args.hand,
|
||||
face=args.face)
|
||||
estimator.detect_foot(image_root, annot_root, args.ext)
|
||||
|
||||
for task in global_tasks:
|
||||
task.join()
|
@ -2,8 +2,8 @@
|
||||
@ Date: 2020-10-23 20:07:49
|
||||
@ Author: Qing Shuai
|
||||
@ LastEditors: Qing Shuai
|
||||
@ LastEditTime: 2021-03-05 13:43:01
|
||||
@ FilePath: /EasyMocap/code/estimator/SPIN/spin_api.py
|
||||
@ LastEditTime: 2022-07-14 12:44:30
|
||||
@ FilePath: /EasyMocapPublic/easymocap/estimator/SPIN/spin_api.py
|
||||
'''
|
||||
"""
|
||||
Demo code
|
||||
@ -32,7 +32,6 @@ Running the previous command will save the results in ```examples/im1010_{shape,
|
||||
"""
|
||||
|
||||
import torch
|
||||
from torchvision.transforms import Normalize
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
@ -46,6 +45,81 @@ class constants:
|
||||
IMG_NORM_MEAN = [0.485, 0.456, 0.406]
|
||||
IMG_NORM_STD = [0.229, 0.224, 0.225]
|
||||
|
||||
def normalize(tensor, mean, std, inplace: bool = False):
|
||||
"""Normalize a tensor image with mean and standard deviation.
|
||||
|
||||
.. note::
|
||||
This transform acts out of place by default, i.e., it does not mutates the input tensor.
|
||||
|
||||
See :class:`~torchvision.transforms.Normalize` for more details.
|
||||
|
||||
Args:
|
||||
tensor (Tensor): Tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
|
||||
mean (sequence): Sequence of means for each channel.
|
||||
std (sequence): Sequence of standard deviations for each channel.
|
||||
inplace(bool,optional): Bool to make this operation inplace.
|
||||
|
||||
Returns:
|
||||
Tensor: Normalized Tensor image.
|
||||
"""
|
||||
if not isinstance(tensor, torch.Tensor):
|
||||
raise TypeError('Input tensor should be a torch tensor. Got {}.'.format(type(tensor)))
|
||||
|
||||
if tensor.ndim < 3:
|
||||
raise ValueError('Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = '
|
||||
'{}.'.format(tensor.size()))
|
||||
|
||||
if not inplace:
|
||||
tensor = tensor.clone()
|
||||
|
||||
dtype = tensor.dtype
|
||||
mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
|
||||
std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
|
||||
if (std == 0).any():
|
||||
raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
|
||||
if mean.ndim == 1:
|
||||
mean = mean.view(-1, 1, 1)
|
||||
if std.ndim == 1:
|
||||
std = std.view(-1, 1, 1)
|
||||
tensor.sub_(mean).div_(std)
|
||||
return tensor
|
||||
|
||||
class Normalize(torch.nn.Module):
|
||||
"""Normalize a tensor image with mean and standard deviation.
|
||||
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
|
||||
channels, this transform will normalize each channel of the input
|
||||
``torch.*Tensor`` i.e.,
|
||||
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
|
||||
|
||||
.. note::
|
||||
This transform acts out of place, i.e., it does not mutate the input tensor.
|
||||
|
||||
Args:
|
||||
mean (sequence): Sequence of means for each channel.
|
||||
std (sequence): Sequence of standard deviations for each channel.
|
||||
inplace(bool,optional): Bool to make this operation in-place.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mean, std, inplace=False):
|
||||
super().__init__()
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.inplace = inplace
|
||||
|
||||
def forward(self, tensor):
|
||||
"""
|
||||
Args:
|
||||
tensor (Tensor): Tensor image to be normalized.
|
||||
|
||||
Returns:
|
||||
Tensor: Normalized Tensor image.
|
||||
"""
|
||||
return normalize(tensor, self.mean, self.std, self.inplace)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
|
||||
|
||||
|
||||
def get_transform(center, scale, res, rot=0):
|
||||
"""Generate transformation matrix."""
|
||||
@ -134,6 +208,23 @@ def process_image(img, bbox, input_res=224):
|
||||
norm_img = normalize_img(img.clone())[None]
|
||||
return img, norm_img
|
||||
|
||||
def solve_translation(X, x, K):
|
||||
A = np.zeros((2*X.shape[0], 3))
|
||||
b = np.zeros((2*X.shape[0], 1))
|
||||
fx, fy = K[0, 0], K[1, 1]
|
||||
cx, cy = K[0, 2], K[1, 2]
|
||||
for nj in range(X.shape[0]):
|
||||
A[2*nj, 0] = 1
|
||||
A[2*nj + 1, 1] = 1
|
||||
A[2*nj, 2] = -(x[nj, 0] - cx)/fx
|
||||
A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
|
||||
b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
|
||||
b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
|
||||
A[2*nj:2*nj+2, :] *= x[nj, 2]
|
||||
b[2*nj:2*nj+2, :] *= x[nj, 2]
|
||||
trans = np.linalg.inv(A.T @ A) @ A.T @ b
|
||||
return trans.T[0]
|
||||
|
||||
def estimate_translation_np(S, joints_2d, joints_conf, K):
|
||||
"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
|
||||
Input:
|
||||
@ -197,18 +288,48 @@ class SPIN:
|
||||
p, _ = cv2.Rodrigues(rotmat[i])
|
||||
poses[0, 3*i:3*i+3] = p[:, 0]
|
||||
results['poses'] = poses
|
||||
if use_rh_th:
|
||||
body_params = {
|
||||
'poses': results['poses'],
|
||||
'Rh': poses[:, :3],
|
||||
'poses': poses[:, 3:],
|
||||
'shapes': results['shapes'],
|
||||
'Rh': results['poses'][:, :3].copy(),
|
||||
'Th': np.zeros((1, 3)),
|
||||
}
|
||||
body_params['Th'][0, 2] = 5
|
||||
body_params['poses'][:, :3] = 0
|
||||
results = body_params
|
||||
return results
|
||||
|
||||
def __call__(self, body_model, img, bbox, kpts, camera, ret_vertices=True):
|
||||
body_params = self.forward(img.copy(), bbox)
|
||||
# TODO: bug: This encode will arise errors in keypoints
|
||||
kpts1 = body_model.keypoints(body_params, return_tensor=False)[0]
|
||||
body_params = body_model.encode(body_params)
|
||||
# only use body joints to estimation translation
|
||||
nJoints = 15
|
||||
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
|
||||
kpts_diff = np.linalg.norm(kpts1 - keypoints3d, axis=-1).max()
|
||||
# print('Encode and decode error: {}'.format(kpts_diff))
|
||||
trans = solve_translation(keypoints3d[:nJoints], kpts[:nJoints], camera['K'])
|
||||
body_params['Th'] += trans[None, :]
|
||||
if body_params['Th'][0, 2] < 0:
|
||||
print(' [WARN in SPIN] solved a negative position of human {}'.format(body_params['Th'][0]))
|
||||
body_params['Th'] = -body_params['Th']
|
||||
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
|
||||
rotx = cv2.Rodrigues(np.pi*np.array([1., 0, 0]))[0]
|
||||
Rhold = rotx @ Rhold
|
||||
body_params['Rh'] = cv2.Rodrigues(Rhold)[0].reshape(1, 3)
|
||||
# convert to world coordinate
|
||||
if False:
|
||||
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
|
||||
Thold = body_params['Th']
|
||||
Rh = camera['R'].T @ Rhold
|
||||
Th = (camera['R'].T @ (Thold.T - camera['T'])).T
|
||||
body_params['Th'] = Th
|
||||
body_params['Rh'] = cv2.Rodrigues(Rh)[0].reshape(1, 3)
|
||||
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
|
||||
results = {'body_params': body_params, 'keypoints3d': keypoints3d}
|
||||
if ret_vertices:
|
||||
vertices = body_model(return_verts=True, return_tensor=False, **body_params)[0]
|
||||
results['vertices'] = vertices
|
||||
return results
|
||||
|
||||
def init_with_spin(body_model, spin_model, img, bbox, kpts, camera):
|
||||
body_params = spin_model.forward(img.copy(), bbox)
|
||||
body_params = body_model.check_params(body_params)
|
||||
|
Loading…
Reference in New Issue
Block a user