🚀 add mediapipe
This commit is contained in:
parent
11f13d6953
commit
e9d5f061a5
118
myeasymocap/backbone/mediapipe/hand.py
Normal file
118
myeasymocap/backbone/mediapipe/hand.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
# 2023.06.15
|
||||||
|
# https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/hand_landmarker/python/hand_landmarker.ipynb#scrollTo=OMjuVQiDYJKF&uniqifier=1
|
||||||
|
# pip install -q mediapipe==0.10.0
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import cv2
|
||||||
|
# !wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task
|
||||||
|
try:
|
||||||
|
import mediapipe as mp
|
||||||
|
from mediapipe.tasks import python
|
||||||
|
from mediapipe.tasks.python import vision
|
||||||
|
except:
|
||||||
|
print('Please install the mediapipe by\npip install -q mediapipe==0.10.0')
|
||||||
|
raise ModuleNotFoundError
|
||||||
|
|
||||||
|
VisionRunningMode = mp.tasks.vision.RunningMode
|
||||||
|
|
||||||
|
def bbox_from_keypoints(keypoints, rescale=1.2, detection_thresh=0.05, MIN_PIXEL=5):
|
||||||
|
"""Get center and scale for bounding box from openpose detections."""
|
||||||
|
valid = keypoints[:,-1] > detection_thresh
|
||||||
|
if valid.sum() < 3:
|
||||||
|
return [0, 0, 100, 100, 0]
|
||||||
|
valid_keypoints = keypoints[valid][:,:-1]
|
||||||
|
center = (valid_keypoints.max(axis=0) + valid_keypoints.min(axis=0))/2
|
||||||
|
bbox_size = valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)
|
||||||
|
# adjust bounding box tightness
|
||||||
|
if bbox_size[0] < MIN_PIXEL or bbox_size[1] < MIN_PIXEL:
|
||||||
|
return [0, 0, 100, 100, 0]
|
||||||
|
bbox_size = bbox_size * rescale
|
||||||
|
bbox = [
|
||||||
|
center[0] - bbox_size[0]/2,
|
||||||
|
center[1] - bbox_size[1]/2,
|
||||||
|
center[0] + bbox_size[0]/2,
|
||||||
|
center[1] + bbox_size[1]/2,
|
||||||
|
keypoints[valid, 2].mean()
|
||||||
|
]
|
||||||
|
return bbox
|
||||||
|
|
||||||
|
class MediaPipe:
|
||||||
|
NUM_HAND = 21
|
||||||
|
def create_detector(self):
|
||||||
|
base_options = python.BaseOptions(model_asset_path=self.ckpt)
|
||||||
|
options = vision.HandLandmarkerOptions(base_options=base_options,
|
||||||
|
num_hands=2,
|
||||||
|
running_mode=VisionRunningMode.VIDEO)
|
||||||
|
detector = vision.HandLandmarker.create_from_options(options)
|
||||||
|
return detector
|
||||||
|
|
||||||
|
def __init__(self, ckpt) -> None:
|
||||||
|
if not os.path.exists(ckpt):
|
||||||
|
cmd = 'wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task'
|
||||||
|
print('Cannot find {}, try to download it'.format(ckpt))
|
||||||
|
print(cmd)
|
||||||
|
os.system(cmd)
|
||||||
|
os.makedirs(os.path.dirname(ckpt), exist_ok=True)
|
||||||
|
cmd = 'mv hand_landmarker.task {}'.format(os.path.dirname(ckpt))
|
||||||
|
os.system(cmd)
|
||||||
|
self.ckpt = ckpt
|
||||||
|
self.detector = {}
|
||||||
|
self.timestamp = 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def to_array(pose, W, H):
|
||||||
|
N = len(pose)
|
||||||
|
if N == 0:
|
||||||
|
return np.zeros((1, 21, 3))
|
||||||
|
res = np.zeros((N, 21, 3))
|
||||||
|
for nper in range(N):
|
||||||
|
for i in range(len(pose[nper])):
|
||||||
|
res[nper, i, 0] = pose[nper][i].x * W
|
||||||
|
res[nper, i, 1] = pose[nper][i].y * H
|
||||||
|
res[nper, i, 2] = pose[nper][i].visibility
|
||||||
|
res[..., 0] = W - res[..., 0] - 1
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_hand(self, pose, W, H):
|
||||||
|
if pose is None:
|
||||||
|
bodies = np.zeros((1, self.NUM_HAND, 3))
|
||||||
|
return bodies
|
||||||
|
poses = self.to_array(pose, W, H)
|
||||||
|
poses[..., 2] = 1.
|
||||||
|
return poses
|
||||||
|
|
||||||
|
def __call__(self, imgnames, images):
|
||||||
|
squeeze = False
|
||||||
|
if not isinstance(imgnames, list):
|
||||||
|
imgnames = [imgnames]
|
||||||
|
images = [images]
|
||||||
|
squeeze = True
|
||||||
|
# STEP 3: Load the input image.
|
||||||
|
nViews = len(images)
|
||||||
|
keypoints = []
|
||||||
|
bboxes = []
|
||||||
|
for nv in range(nViews):
|
||||||
|
if isinstance(images[nv], str):
|
||||||
|
images[nv] = cv2.imread(images[nv])
|
||||||
|
sub = os.path.basename(os.path.dirname(imgnames[nv]))
|
||||||
|
if sub not in self.detector.keys():
|
||||||
|
self.detector[sub] = self.create_detector()
|
||||||
|
image_ = cv2.cvtColor(images[nv], cv2.COLOR_BGR2RGB)
|
||||||
|
image_height, image_width, _ = image_.shape
|
||||||
|
image_ = cv2.flip(image_, 1)
|
||||||
|
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image_)
|
||||||
|
detection_result = self.detector[sub].detect_for_video(mp_image, self.timestamp)
|
||||||
|
handl2d = self.get_hand(detection_result.hand_landmarks, image_width, image_height)
|
||||||
|
keypoints.append(handl2d[:1])
|
||||||
|
bboxes.append(bbox_from_keypoints(handl2d[0]))
|
||||||
|
|
||||||
|
keypoints = np.vstack(keypoints)
|
||||||
|
bboxes = np.stack(bboxes)
|
||||||
|
if squeeze:
|
||||||
|
keypoints = keypoints[0]
|
||||||
|
bboxes = bboxes[0]
|
||||||
|
self.timestamp += 33 # 假设30fps
|
||||||
|
return {
|
||||||
|
'keypoints': keypoints,
|
||||||
|
'bbox': bboxes,
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user