🚧 support multi-gpu for keypoints
This commit is contained in:
parent
95f15b14d8
commit
cd3f184f04
@ -2,8 +2,8 @@
|
|||||||
@ Date: 2021-08-19 22:06:22
|
@ Date: 2021-08-19 22:06:22
|
||||||
@ Author: Qing Shuai
|
@ Author: Qing Shuai
|
||||||
@ LastEditors: Qing Shuai
|
@ LastEditors: Qing Shuai
|
||||||
@ LastEditTime: 2021-12-02 21:19:41
|
@ LastEditTime: 2022-05-23 22:58:57
|
||||||
@ FilePath: /EasyMocap/apps/preprocess/extract_keypoints.py
|
@ FilePath: /EasyMocapPublic/apps/preprocess/extract_keypoints.py
|
||||||
'''
|
'''
|
||||||
import os
|
import os
|
||||||
from os.path import join
|
from os.path import join
|
||||||
@ -27,6 +27,7 @@ config = {
|
|||||||
'vis': False,
|
'vis': False,
|
||||||
'ext': '.jpg'
|
'ext': '.jpg'
|
||||||
},
|
},
|
||||||
|
'openposecrop': {},
|
||||||
'feet':{
|
'feet':{
|
||||||
'root': '',
|
'root': '',
|
||||||
'res': 1,
|
'res': 1,
|
||||||
@ -78,7 +79,7 @@ config = {
|
|||||||
'min_detection_confidence':0.3,
|
'min_detection_confidence':0.3,
|
||||||
'min_tracking_confidence': 0.1,
|
'min_tracking_confidence': 0.1,
|
||||||
'static_image_mode': False,
|
'static_image_mode': False,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@ -91,24 +92,58 @@ if __name__ == "__main__":
|
|||||||
help="sub directory name to store the generated annotation files, default to be annots")
|
help="sub directory name to store the generated annotation files, default to be annots")
|
||||||
# Detection Control
|
# Detection Control
|
||||||
parser.add_argument('--mode', type=str, default='openpose', choices=[
|
parser.add_argument('--mode', type=str, default='openpose', choices=[
|
||||||
'openpose', 'feet', 'feetcrop', 'yolo-hrnet', 'yolo', 'hrnet',
|
'openpose', 'feet', 'feetcrop', 'openposecrop',
|
||||||
|
'yolo-hrnet', 'yolo', 'hrnet',
|
||||||
'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'],
|
'mp-pose', 'mp-holistic', 'mp-handl', 'mp-handr', 'mp-face'],
|
||||||
help="model to extract joints from image")
|
help="model to extract joints from image")
|
||||||
# Openpose
|
# Openpose
|
||||||
parser.add_argument('--openpose', type=str,
|
parser.add_argument('--openpose', type=str,
|
||||||
default='/media/qing/Project/openpose')
|
default=os.environ.get('openpose', 'openpose'))
|
||||||
parser.add_argument('--openpose_res', type=float, default=1)
|
parser.add_argument('--openpose_res', type=float, default=1)
|
||||||
|
parser.add_argument('--gpus', type=int, default=[], nargs='+')
|
||||||
parser.add_argument('--ext', type=str, default='.jpg')
|
parser.add_argument('--ext', type=str, default='.jpg')
|
||||||
|
parser.add_argument('--tmp', type=str, default=os.path.abspath('tmp'))
|
||||||
parser.add_argument('--hand', action='store_true')
|
parser.add_argument('--hand', action='store_true')
|
||||||
parser.add_argument('--face', action='store_true')
|
parser.add_argument('--face', action='store_true')
|
||||||
parser.add_argument('--wild', action='store_true',
|
parser.add_argument('--wild', action='store_true',
|
||||||
help='remove crowd class of yolo')
|
help='remove crowd class of yolo')
|
||||||
|
parser.add_argument('--reverse', action='store_true')
|
||||||
parser.add_argument('--force', action='store_true')
|
parser.add_argument('--force', action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
config['yolo']['isWild'] = args.wild
|
config['yolo']['isWild'] = args.wild
|
||||||
mode = args.mode
|
mode = args.mode
|
||||||
|
if not os.path.exists(join(args.path, 'images')) and os.path.exists(join(args.path, 'videos')):
|
||||||
|
# default extract image
|
||||||
|
cmd = f'''python3 apps/preprocess/extract_image.py {args.path}'''
|
||||||
|
os.system(cmd)
|
||||||
subs = load_subs(args.path, args.subs)
|
subs = load_subs(args.path, args.subs)
|
||||||
|
if len(args.gpus) != 0:
|
||||||
|
# perform multiprocess by runcmd
|
||||||
|
from easymocap.mytools.debug_utils import run_cmd
|
||||||
|
nproc = len(args.gpus)
|
||||||
|
plist = []
|
||||||
|
for i in range(nproc):
|
||||||
|
if len(subs[i::nproc]) == 0:
|
||||||
|
continue
|
||||||
|
cmd = f'export CUDA_VISIBLE_DEVICES={args.gpus[i]} && python3 apps/preprocess/extract_keypoints.py {args.path} --mode {args.mode} --subs {" ".join(subs[i::nproc])}'
|
||||||
|
cmd += f' --annot {args.annot} --ext {args.ext}'
|
||||||
|
if args.hand:
|
||||||
|
cmd += ' --hand'
|
||||||
|
if args.face:
|
||||||
|
cmd += ' --face'
|
||||||
|
if args.force:
|
||||||
|
cmd += ' --force'
|
||||||
|
cmd += f' --tmp {args.tmp}'
|
||||||
|
cmd += ' &'
|
||||||
|
print(cmd)
|
||||||
|
p = run_cmd(cmd, bg=False)
|
||||||
|
plist.extend(p)
|
||||||
|
for p in plist:
|
||||||
|
p.join()
|
||||||
|
exit()
|
||||||
global_tasks = []
|
global_tasks = []
|
||||||
|
if len(subs) == 0:
|
||||||
|
subs = ['']
|
||||||
for sub in subs:
|
for sub in subs:
|
||||||
config[mode]['force'] = args.force
|
config[mode]['force'] = args.force
|
||||||
image_root = join(args.path, 'images', sub)
|
image_root = join(args.path, 'images', sub)
|
||||||
@ -116,7 +151,7 @@ if __name__ == "__main__":
|
|||||||
tmp_root = join(args.path, mode, sub)
|
tmp_root = join(args.path, mode, sub)
|
||||||
if os.path.exists(annot_root) and not args.force:
|
if os.path.exists(annot_root) and not args.force:
|
||||||
# check the number of annots and images
|
# check the number of annots and images
|
||||||
if len(os.listdir(image_root)) == len(os.listdir(annot_root)):
|
if len(os.listdir(image_root)) == len(os.listdir(annot_root)) and mode != 'feetcrop':
|
||||||
print('[Skip] detection {}'.format(annot_root))
|
print('[Skip] detection {}'.format(annot_root))
|
||||||
continue
|
continue
|
||||||
if mode == 'openpose':
|
if mode == 'openpose':
|
||||||
@ -132,10 +167,11 @@ if __name__ == "__main__":
|
|||||||
config[mode]['openpose'] = args.openpose
|
config[mode]['openpose'] = args.openpose
|
||||||
estimator = FeetEstimator(openpose=args.openpose)
|
estimator = FeetEstimator(openpose=args.openpose)
|
||||||
estimator.detect_foot(image_root, annot_root, args.ext)
|
estimator.detect_foot(image_root, annot_root, args.ext)
|
||||||
elif mode == 'yolo':
|
elif mode == 'yolo' or mode == 'openposecrop':
|
||||||
from easymocap.estimator.yolohrnet_wrapper import extract_bbox
|
from easymocap.estimator.yolohrnet_wrapper import extract_bbox
|
||||||
config[mode]['ext'] = args.ext
|
config['yolo']['force'] = False # donot redetect
|
||||||
extract_bbox(image_root, annot_root, **config[mode])
|
config['yolo']['ext'] = args.ext
|
||||||
|
extract_bbox(image_root, annot_root, **config['yolo'])
|
||||||
elif mode == 'hrnet':
|
elif mode == 'hrnet':
|
||||||
from easymocap.estimator.yolohrnet_wrapper import extract_hrnet
|
from easymocap.estimator.yolohrnet_wrapper import extract_hrnet
|
||||||
config[mode]['ext'] = args.ext
|
config[mode]['ext'] = args.ext
|
||||||
@ -147,10 +183,15 @@ if __name__ == "__main__":
|
|||||||
from easymocap.estimator.mediapipe_wrapper import extract_2d
|
from easymocap.estimator.mediapipe_wrapper import extract_2d
|
||||||
config[mode]['ext'] = args.ext
|
config[mode]['ext'] = args.ext
|
||||||
extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', ''))
|
extract_2d(image_root, annot_root, config[mode], mode=mode.replace('mp-', ''))
|
||||||
if mode == 'feetcrop':
|
if mode == 'feetcrop' or mode == 'openposecrop':
|
||||||
from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop
|
from easymocap.estimator.openpose_wrapper import FeetEstimatorByCrop
|
||||||
config[mode]['openpose'] = args.openpose
|
config[mode]['openpose'] = args.openpose
|
||||||
estimator = FeetEstimatorByCrop(openpose=args.openpose)
|
estimator = FeetEstimatorByCrop(openpose=args.openpose,
|
||||||
|
tmpdir=join(args.tmp, '{}_{}'.format(os.path.basename(args.path), sub)),
|
||||||
|
fullbody=mode=='openposecrop',
|
||||||
|
hand=(mode=='openposecrop')or args.hand,
|
||||||
|
face=args.face)
|
||||||
estimator.detect_foot(image_root, annot_root, args.ext)
|
estimator.detect_foot(image_root, annot_root, args.ext)
|
||||||
|
|
||||||
for task in global_tasks:
|
for task in global_tasks:
|
||||||
task.join()
|
task.join()
|
@ -2,8 +2,8 @@
|
|||||||
@ Date: 2020-10-23 20:07:49
|
@ Date: 2020-10-23 20:07:49
|
||||||
@ Author: Qing Shuai
|
@ Author: Qing Shuai
|
||||||
@ LastEditors: Qing Shuai
|
@ LastEditors: Qing Shuai
|
||||||
@ LastEditTime: 2021-03-05 13:43:01
|
@ LastEditTime: 2022-07-14 12:44:30
|
||||||
@ FilePath: /EasyMocap/code/estimator/SPIN/spin_api.py
|
@ FilePath: /EasyMocapPublic/easymocap/estimator/SPIN/spin_api.py
|
||||||
'''
|
'''
|
||||||
"""
|
"""
|
||||||
Demo code
|
Demo code
|
||||||
@ -32,7 +32,6 @@ Running the previous command will save the results in ```examples/im1010_{shape,
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torchvision.transforms import Normalize
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
@ -46,6 +45,81 @@ class constants:
|
|||||||
IMG_NORM_MEAN = [0.485, 0.456, 0.406]
|
IMG_NORM_MEAN = [0.485, 0.456, 0.406]
|
||||||
IMG_NORM_STD = [0.229, 0.224, 0.225]
|
IMG_NORM_STD = [0.229, 0.224, 0.225]
|
||||||
|
|
||||||
|
def normalize(tensor, mean, std, inplace: bool = False):
|
||||||
|
"""Normalize a tensor image with mean and standard deviation.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This transform acts out of place by default, i.e., it does not mutates the input tensor.
|
||||||
|
|
||||||
|
See :class:`~torchvision.transforms.Normalize` for more details.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tensor (Tensor): Tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
|
||||||
|
mean (sequence): Sequence of means for each channel.
|
||||||
|
std (sequence): Sequence of standard deviations for each channel.
|
||||||
|
inplace(bool,optional): Bool to make this operation inplace.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: Normalized Tensor image.
|
||||||
|
"""
|
||||||
|
if not isinstance(tensor, torch.Tensor):
|
||||||
|
raise TypeError('Input tensor should be a torch tensor. Got {}.'.format(type(tensor)))
|
||||||
|
|
||||||
|
if tensor.ndim < 3:
|
||||||
|
raise ValueError('Expected tensor to be a tensor image of size (..., C, H, W). Got tensor.size() = '
|
||||||
|
'{}.'.format(tensor.size()))
|
||||||
|
|
||||||
|
if not inplace:
|
||||||
|
tensor = tensor.clone()
|
||||||
|
|
||||||
|
dtype = tensor.dtype
|
||||||
|
mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device)
|
||||||
|
std = torch.as_tensor(std, dtype=dtype, device=tensor.device)
|
||||||
|
if (std == 0).any():
|
||||||
|
raise ValueError('std evaluated to zero after conversion to {}, leading to division by zero.'.format(dtype))
|
||||||
|
if mean.ndim == 1:
|
||||||
|
mean = mean.view(-1, 1, 1)
|
||||||
|
if std.ndim == 1:
|
||||||
|
std = std.view(-1, 1, 1)
|
||||||
|
tensor.sub_(mean).div_(std)
|
||||||
|
return tensor
|
||||||
|
|
||||||
|
class Normalize(torch.nn.Module):
|
||||||
|
"""Normalize a tensor image with mean and standard deviation.
|
||||||
|
Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
|
||||||
|
channels, this transform will normalize each channel of the input
|
||||||
|
``torch.*Tensor`` i.e.,
|
||||||
|
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
This transform acts out of place, i.e., it does not mutate the input tensor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mean (sequence): Sequence of means for each channel.
|
||||||
|
std (sequence): Sequence of standard deviations for each channel.
|
||||||
|
inplace(bool,optional): Bool to make this operation in-place.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, mean, std, inplace=False):
|
||||||
|
super().__init__()
|
||||||
|
self.mean = mean
|
||||||
|
self.std = std
|
||||||
|
self.inplace = inplace
|
||||||
|
|
||||||
|
def forward(self, tensor):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
tensor (Tensor): Tensor image to be normalized.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tensor: Normalized Tensor image.
|
||||||
|
"""
|
||||||
|
return normalize(tensor, self.mean, self.std, self.inplace)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
|
||||||
|
|
||||||
|
|
||||||
def get_transform(center, scale, res, rot=0):
|
def get_transform(center, scale, res, rot=0):
|
||||||
"""Generate transformation matrix."""
|
"""Generate transformation matrix."""
|
||||||
@ -134,6 +208,23 @@ def process_image(img, bbox, input_res=224):
|
|||||||
norm_img = normalize_img(img.clone())[None]
|
norm_img = normalize_img(img.clone())[None]
|
||||||
return img, norm_img
|
return img, norm_img
|
||||||
|
|
||||||
|
def solve_translation(X, x, K):
|
||||||
|
A = np.zeros((2*X.shape[0], 3))
|
||||||
|
b = np.zeros((2*X.shape[0], 1))
|
||||||
|
fx, fy = K[0, 0], K[1, 1]
|
||||||
|
cx, cy = K[0, 2], K[1, 2]
|
||||||
|
for nj in range(X.shape[0]):
|
||||||
|
A[2*nj, 0] = 1
|
||||||
|
A[2*nj + 1, 1] = 1
|
||||||
|
A[2*nj, 2] = -(x[nj, 0] - cx)/fx
|
||||||
|
A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
|
||||||
|
b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
|
||||||
|
b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
|
||||||
|
A[2*nj:2*nj+2, :] *= x[nj, 2]
|
||||||
|
b[2*nj:2*nj+2, :] *= x[nj, 2]
|
||||||
|
trans = np.linalg.inv(A.T @ A) @ A.T @ b
|
||||||
|
return trans.T[0]
|
||||||
|
|
||||||
def estimate_translation_np(S, joints_2d, joints_conf, K):
|
def estimate_translation_np(S, joints_2d, joints_conf, K):
|
||||||
"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
|
"""Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
|
||||||
Input:
|
Input:
|
||||||
@ -197,16 +288,46 @@ class SPIN:
|
|||||||
p, _ = cv2.Rodrigues(rotmat[i])
|
p, _ = cv2.Rodrigues(rotmat[i])
|
||||||
poses[0, 3*i:3*i+3] = p[:, 0]
|
poses[0, 3*i:3*i+3] = p[:, 0]
|
||||||
results['poses'] = poses
|
results['poses'] = poses
|
||||||
if use_rh_th:
|
body_params = {
|
||||||
body_params = {
|
'Rh': poses[:, :3],
|
||||||
'poses': results['poses'],
|
'poses': poses[:, 3:],
|
||||||
'shapes': results['shapes'],
|
'shapes': results['shapes'],
|
||||||
'Rh': results['poses'][:, :3].copy(),
|
}
|
||||||
'Th': np.zeros((1, 3)),
|
results = body_params
|
||||||
}
|
return results
|
||||||
body_params['Th'][0, 2] = 5
|
|
||||||
body_params['poses'][:, :3] = 0
|
def __call__(self, body_model, img, bbox, kpts, camera, ret_vertices=True):
|
||||||
results = body_params
|
body_params = self.forward(img.copy(), bbox)
|
||||||
|
# TODO: bug: This encode will arise errors in keypoints
|
||||||
|
kpts1 = body_model.keypoints(body_params, return_tensor=False)[0]
|
||||||
|
body_params = body_model.encode(body_params)
|
||||||
|
# only use body joints to estimation translation
|
||||||
|
nJoints = 15
|
||||||
|
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
|
||||||
|
kpts_diff = np.linalg.norm(kpts1 - keypoints3d, axis=-1).max()
|
||||||
|
# print('Encode and decode error: {}'.format(kpts_diff))
|
||||||
|
trans = solve_translation(keypoints3d[:nJoints], kpts[:nJoints], camera['K'])
|
||||||
|
body_params['Th'] += trans[None, :]
|
||||||
|
if body_params['Th'][0, 2] < 0:
|
||||||
|
print(' [WARN in SPIN] solved a negative position of human {}'.format(body_params['Th'][0]))
|
||||||
|
body_params['Th'] = -body_params['Th']
|
||||||
|
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
|
||||||
|
rotx = cv2.Rodrigues(np.pi*np.array([1., 0, 0]))[0]
|
||||||
|
Rhold = rotx @ Rhold
|
||||||
|
body_params['Rh'] = cv2.Rodrigues(Rhold)[0].reshape(1, 3)
|
||||||
|
# convert to world coordinate
|
||||||
|
if False:
|
||||||
|
Rhold = cv2.Rodrigues(body_params['Rh'])[0]
|
||||||
|
Thold = body_params['Th']
|
||||||
|
Rh = camera['R'].T @ Rhold
|
||||||
|
Th = (camera['R'].T @ (Thold.T - camera['T'])).T
|
||||||
|
body_params['Th'] = Th
|
||||||
|
body_params['Rh'] = cv2.Rodrigues(Rh)[0].reshape(1, 3)
|
||||||
|
keypoints3d = body_model.keypoints(body_params, return_tensor=False)[0]
|
||||||
|
results = {'body_params': body_params, 'keypoints3d': keypoints3d}
|
||||||
|
if ret_vertices:
|
||||||
|
vertices = body_model(return_verts=True, return_tensor=False, **body_params)[0]
|
||||||
|
results['vertices'] = vertices
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def init_with_spin(body_model, spin_model, img, bbox, kpts, camera):
|
def init_with_spin(body_model, spin_model, img, bbox, kpts, camera):
|
||||||
|
Loading…
Reference in New Issue
Block a user