🚀 add model check point

2023-06-19 17:23:44 +08:00 · 2023-06-19 17:23:44 +08:00 · 11f13d6953
commit 11f13d6953
parent e7800a1356
12 changed files with 793 additions and 22 deletions
--- a/config/1v1p/fixhand.yml
+++ b/config/1v1p/fixhand.yml
@ -26,7 +26,8 @@ args:
      key_from_previous: [bbox]
      args:
        # ckpt: /nas/public/EasyMocapModels/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
-        ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
+        ckpt: models/hand_resnet_kp2d_clean.pt
        url: 1LTK7e9oAS6B3drmQyXwTZild6k87fEZa
        mode: resnet
    vis2d:
      module: myeasymocap.io.vis.Vis2D
@ -42,13 +43,14 @@ args:
      key_from_previous: [bbox]
      key_keep: [meta, cameras, imgnames] # 将这些参数都保留到最后的输出中
      args:
-        ckpt: models/manol_pca45_noflat.ckpt
+        ckpt: models/hand_manol_pca45_noflat.ckpt
        url: '1KTi_oJ_udLRK3WZ3xyHzBUd6vKAApfT8'
    # TODO: add visualize for Init MANO
  at_final:
    load_hand_model: # 载入身体模型
      module: myeasymocap.io.model.MANOLoader
      args:
-        cfg_path: config/model/mano.yml
+        cfg_path: config/model/manol.yml
        model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
        regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
        num_pca_comps: 45
--- a/config/1v1p/hand_detect_finetune.yml
+++ b/config/1v1p/hand_detect_finetune.yml
@ -26,7 +26,9 @@ args:
      key_from_previous: [bbox]
      args:
        # ckpt: /nas/public/EasyMocapModels/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
-        ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
+        # ckpt: /nas/public/EasyMocapModels/hand/resnet_kp2d_clean.pt
        ckpt: models/hand_resnet_kp2d_clean.pt
        url: 1LTK7e9oAS6B3drmQyXwTZild6k87fEZa
        mode: resnet
    vis2d:
      module: myeasymocap.io.vis.Vis2D
@ -42,13 +44,14 @@ args:
      key_from_previous: [bbox]
      key_keep: [meta, cameras, imgnames] # 将这些参数都保留到最后的输出中
      args:
-        ckpt: models/manol_pca45_noflat.ckpt
+        ckpt: models/hand_manol_pca45_noflat.ckpt
        url: '1KTi_oJ_udLRK3WZ3xyHzBUd6vKAApfT8'
    # TODO: add visualize for Init MANO
  at_final:
    load_hand_model: # 载入身体模型
      module: myeasymocap.io.model.MANOLoader
      args:
-        cfg_path: config/model/mano.yml
+        cfg_path: config/model/manol.yml
        model_path: models/manov1.2/MANO_LEFT.pkl #models/handmesh/data/MANO_RIGHT.pkl # load mano model
        regressor_path: models/manov1.2/J_regressor_mano_LEFT.txt #models/handmesh/data/J_regressor_mano_RIGHT.txt
        num_pca_comps: 45
--- a/easymocap/mytools/camera_utils.py
+++ b/easymocap/mytools/camera_utils.py
@ -33,11 +33,13 @@ class FileStorage(object):
            self._write('  rows: {}'.format(value.shape[0]))
            self._write('  cols: {}'.format(value.shape[1]))
            self._write('  dt: d')
-            self._write('  data: [{}]'.format(', '.join(['{:.3f}'.format(i) for i in value.reshape(-1)])))
+            self._write('  data: [{}]'.format(', '.join(['{:.6f}'.format(i) for i in value.reshape(-1)])))
        elif dt == 'list':
            self._write('{}:'.format(key))
            for elem in value:
                self._write('  - "{}"'.format(elem))
        elif dt == 'int':
            self._write('{}: {}'.format(key, value))
    def read(self, key, dt='mat'):
        if dt == 'mat':
@ -52,6 +54,8 @@ class FileStorage(object):
                if val != 'none':
                    results.append(val)
            output = results
        elif dt == 'int':
            output = int(self.fs.getNode(key).real())
        else:
            raise NotImplementedError
        return output
@ -114,6 +118,13 @@ def read_camera(intri_name, extri_name, cam_names=[]):
        cams[cam] = {}
        cams[cam]['K'] = intri.read('K_{}'.format( cam))
        cams[cam]['invK'] = np.linalg.inv(cams[cam]['K'])
        H = intri.read('H_{}'.format(cam), dt='int')
        W = intri.read('W_{}'.format(cam), dt='int')
        if H is None or W is None:
            print('[camera] no H or W for {}'.format(cam))
            H, W = -1, -1
        cams[cam]['H'] = H
        cams[cam]['W'] = W
        Rvec = extri.read('R_{}'.format(cam))
        Tvec = extri.read('T_{}'.format(cam))
        assert Rvec is not None, cam
@ -129,6 +140,10 @@ def read_camera(intri_name, extri_name, cam_names=[]):
        cams[cam]['P'] = P[cam]
        cams[cam]['dist'] = intri.read('dist_{}'.format(cam))
        if cams[cam]['dist'] is None:
            cams[cam]['dist'] = intri.read('D_{}'.format(cam))
            if cams[cam]['dist'] is None:
                print('[camera] no dist for {}'.format(cam))
    cams['basenames'] = cam_names
    return cams
@ -155,6 +170,9 @@ def write_camera(camera, path):
        key = key_.split('.')[0]
        intri.write('K_{}'.format(key), val['K'])
        intri.write('dist_{}'.format(key), val['dist'])
        if 'H' in val.keys() and 'W' in val.keys():
            intri.write('H_{}'.format(key), val['H'], dt='int')
            intri.write('W_{}'.format(key), val['W'], dt='int')
        if 'Rvec' not in val.keys():
            val['Rvec'] = cv2.Rodrigues(val['R'])[0]
        extri.write('R_{}'.format(key), val['Rvec'])
@ -174,7 +192,7 @@ def camera_from_img(img):
 class Undistort:
    distortMap = {}
    @classmethod
-    def image(cls, frame, K, dist, sub=None):
+    def image(cls, frame, K, dist, sub=None, interp=cv2.INTER_NEAREST):
        if sub is None:
            return cv2.undistort(frame, K, dist, None)
        else:
@ -183,7 +201,7 @@ class Undistort:
                mapx, mapy = cv2.initUndistortRectifyMap(K, dist, None, K, (w,h), 5)
                cls.distortMap[sub] = (mapx, mapy)
            mapx, mapy = cls.distortMap[sub]
-            img = cv2.remap(frame, mapx, mapy, cv2.INTER_NEAREST)
+            img = cv2.remap(frame, mapx, mapy, interp)
            return img
    @staticmethod
@ -203,6 +221,21 @@ class Undistort:
        bbox = np.array([kpts[0, 0], kpts[0, 1], kpts[1, 0], kpts[1, 1], bbox[4]])
        return bbox
 class Distort:
    @staticmethod
    def points(keypoints, K, dist):
        pass
    @staticmethod
    def bbox(bbox, K, dist):
        keypoints = np.array([[bbox[0], bbox[1]], [bbox[2], bbox[3]]], dtype=np.float32)
        k3d = cv2.convertPointsToHomogeneous(keypoints)
        k3d = (np.linalg.inv(K) @ k3d[:, 0].T).T[:, None]
        k2d, _ = cv2.projectPoints(k3d, np.zeros((3,)), np.zeros((3,)), K, dist)
        k2d = k2d[:, 0]
        bbox = np.array([k2d[0,0], k2d[0,1], k2d[1, 0], k2d[1, 1], bbox[-1]])
        return bbox
 def unproj(kpts, invK):
    homo = np.hstack([kpts[:, :2], np.ones_like(kpts[:, :1])])
    homo = homo @ invK.T
--- a/easymocap/mytools/vis_base.py
+++ b/easymocap/mytools/vis_base.py
@ -2,14 +2,15 @@
  @ Date: 2020-11-28 17:23:04
  @ Author: Qing Shuai
  @ LastEditors: Qing Shuai
-  @ LastEditTime: 2022-08-12 21:50:56
+  @ LastEditTime: 2022-10-27 15:13:56
  @ FilePath: /EasyMocapPublic/easymocap/mytools/vis_base.py
 '''
 import cv2
 import numpy as np
 import json
-def generate_colorbar(N = 20, cmap = 'jet', rand=True):
+def generate_colorbar(N = 20, cmap = 'jet', rand=True, 
    ret_float=False, ret_array=False, ret_rgb=False):
    bar = ((np.arange(N)/(N-1))*255).astype(np.uint8).reshape(-1, 1)
    colorbar = cv2.applyColorMap(bar, cv2.COLORMAP_JET).squeeze()
    if False:
@ -22,6 +23,11 @@ def generate_colorbar(N = 20, cmap = 'jet', rand=True):
        rgb = colorbar[index, :]
    else:
        rgb = colorbar
    if ret_rgb:
        rgb = rgb[:, ::-1]
    if ret_float:
        rgb = rgb/255.
    if not ret_array:
        rgb = rgb.tolist()
    return rgb
@ -69,9 +75,11 @@ def get_rgb(index):
        # elif index == 0:
        #     return (245, 150, 150)
        col = list(colors_bar_rgb[index%len(colors_bar_rgb)])[::-1]
-    else:
+    elif isinstance(index, str):
        col = colors_table.get(index, (1, 0, 0))
        col = tuple([int(c*255) for c in col[::-1]])
    else:
        raise TypeError('index should be int or str')
    return col
 def get_rgb_01(index):
@ -150,14 +158,16 @@ def plot_keypoints(img, points, pid, config, vis_conf=False, use_limb_color=True
                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), 
                cv2.FONT_HERSHEY_SIMPLEX, text_size, col, 2)
-def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, scale=1, lw=-1):
+def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, scale=1, lw=-1, config_name=None, lw_factor=1):
    from ..dataset.config import CONFIG
-    config_name = {25: 'body25', 21: 'hand', 42:'handlr', 17: 'coco', 1:'points', 67:'bodyhand', 137: 'total', 79:'up'}[len(points)]
+    if config_name is None:
        config_name = {25: 'body25', 15: 'body15', 21: 'hand', 42:'handlr', 17: 'coco', 1:'points', 67:'bodyhand', 137: 'total', 79:'up',
            19:'ochuman'}[len(points)]
    config = CONFIG[config_name]
    if lw == -1:
        lw = img.shape[0]//200
    if config_name == 'hand':
-        lw = img.shape[0]//1000
+        lw = img.shape[0]//100
    lw = max(lw, 1)
    for ii, (i, j) in enumerate(config['kintree']):
        if i >= len(points) or j >= len(points):
@ -169,9 +179,9 @@ def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, s
            col = get_rgb(config['colors'][ii])
        else:
            col = get_rgb(pid)
-        if pt1[0] < 0 or pt1[1] < 0 or pt1[0] > 10000 or pt1[1] > 10000:
+        if pt1[0] < -10000 or pt1[1] < -10000 or pt1[0] > 10000 or pt1[1] > 10000:
            continue
-        if pt2[0] < 0 or pt2[1] < 0 or pt2[0] > 10000 or pt2[1] > 10000:
+        if pt2[0] < -10000 or pt2[1] < -10000 or pt2[0] > 10000 or pt2[1] > 10000:
            continue
        if pt1[-1] > 0.01 and pt2[-1] > 0.01:
            image = cv2.line(
@ -191,12 +201,13 @@ def plot_keypoints_auto(img, points, pid, vis_conf=False, use_limb_color=True, s
        if c > 0.01:
            col = get_rgb(pid)
            if len(points) == 1:
-                cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*10, col, lw*2)
+                _lw = max(0, int(lw * lw_factor))
-                plot_cross(img, int(x+0.5), int(y+0.5), width=lw*5, col=col, lw=lw*2)
+                cv2.circle(img, (int(x+0.5), int(y+0.5)), _lw*2, col, lw*2)
                plot_cross(img, int(x+0.5), int(y+0.5), width=_lw, col=col, lw=lw*2)
            else:
                cv2.circle(img, (int(x+0.5), int(y+0.5)), lw*2, col, -1)
            if vis_conf:
-                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, col, 2)
+                cv2.putText(img, '{:.1f}'.format(c), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, col, 2)
 def plot_keypoints_total(img, annots, scale, pid_offset=0):
    _lw = img.shape[0] // 150
--- a/myeasymocap/backbone/basetopdown.py
+++ b/myeasymocap/backbone/basetopdown.py
@ -239,3 +239,10 @@ def get_preds_from_heatmaps(batch_heatmaps):
    coords = coords.astype(np.float32) * 4
    pred = np.dstack((coords, maxvals))
    return pred
 def gdown_models(ckpt, url):
    print('Try to download model from {} to {}'.format(url, ckpt))
    os.makedirs(os.path.dirname(ckpt), exist_ok=True)
    cmd = 'gdown "{}" -O {}'.format(url, ckpt)
    print('\n', cmd, '\n')
    os.system(cmd)
--- a/myeasymocap/backbone/hand2d/init.py
+++ b/myeasymocap/backbone/hand2d/init.py
--- a/myeasymocap/backbone/hand2d/hand2d.py
+++ b/myeasymocap/backbone/hand2d/hand2d.py
@ -0,0 +1,89 @@
 import os
 import cv2
 import torch
 import torch.nn as nn
 import numpy as np
 import math
 # https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth
 # https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth
 from .hrnet import PoseHighResolutionNet
 from ..basetopdown import BaseTopDownModelCache, get_preds_from_heatmaps, gdown_models
 class TopDownAsMMPose(nn.Module):
    def __init__(self, backbone, head):
        super().__init__()
        self.bacbone = backbone
        self.head = head
    def forward(self, x):
        feat_list = self.bacbone(x)
        size = feat_list[0].shape[-2:]
        resized_inputs = [
            nn.functional.interpolate(feat, size, mode='bilinear', align_corners=False) \
            for feat in feat_list
        ]
        resized_inputs = torch.cat(resized_inputs, 1)
        out = self.head(resized_inputs)
        pred = get_preds_from_heatmaps(out.detach().cpu().numpy())
        return {'keypoints': pred}
 class MyHand2D(BaseTopDownModelCache):
    def __init__(self, ckpt, url=None, mode='hrnet'):
        if mode == 'hrnet':
            super().__init__(name='hand2d', bbox_scale=1.1, res_input=256)
            backbone = PoseHighResolutionNet(inp_ch=3, out_ch=21, W=18, multi_scale_final=True, add_final_layer=False)
            checkpoint = torch.load(ckpt, map_location='cpu')['state_dict']
            self.load_checkpoint(backbone, checkpoint, prefix='backbone.', strict=True)
            head = nn.Sequential(
                nn.Conv2d(270, 270, kernel_size=1),
                nn.BatchNorm2d(270),
                nn.ReLU(inplace=True),
                nn.Conv2d(270, 21, kernel_size=1)
            )
            self.load_checkpoint(head, checkpoint, prefix='keypoint_head.final_layer.', strict=True)
            # self.model = nn.Sequential(backbone, head)
            self.model = TopDownAsMMPose(backbone, head)
        elif mode == 'resnet':
            super().__init__(name='hand2d', bbox_scale=1.1, res_input=256, mean=[0., 0., 0.], std=[1., 1., 1.])
            from .resnet import ResNet_Deconv
            if not os.path.exists(ckpt) and url is not None:
                gdown_models(ckpt, url)
            assert os.path.exists(ckpt), f'{ckpt} not exists'
            checkpoint = torch.load(ckpt, map_location='cpu')['state_dict']
            model = ResNet_Deconv()
            self.load_checkpoint(model, checkpoint, prefix='model.', strict=True)
            self.model = model
        self.model.eval()
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(self.device)
    def __call__(self, bbox, images, imgnames):
        squeeze = False
        if not isinstance(images, list):
            images = [images]
            imgnames = [imgnames]
            bbox = [bbox]
            squeeze = True
        nViews = len(images)
        kpts_all = []
        for nv in range(nViews):
            if bbox[nv].shape[0] == 0:
                kpts_all.append(np.zeros((21, 3)))
                continue
            _bbox = bbox[nv]
            if len(_bbox.shape) == 1:
                _bbox = _bbox[None]
            output = super().__call__(_bbox, images[nv], imgnames[nv])
            kpts = output['params']['keypoints']
            conf = kpts[..., -1:]
            kpts = self.batch_affine_transform(kpts, output['params']['inv_trans'])
            kpts = np.concatenate([kpts, conf], axis=-1)
            if len(kpts.shape) == 3:
                kpts = kpts[0]
            kpts_all.append(kpts)
        kpts_all = np.stack(kpts_all)
        if squeeze:
            kpts_all = kpts_all[0]
        return {
            'keypoints': kpts_all
        }
--- a/myeasymocap/backbone/hand2d/resnet.py
+++ b/myeasymocap/backbone/hand2d/resnet.py
@ -0,0 +1,161 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 #
 import os
 import os.path as osp
 import torch
 import torch.nn as nn
 from torchvision.models.resnet import BasicBlock, Bottleneck
 from torchvision.models.resnet import model_urls
 from ..basetopdown import get_preds_from_heatmaps
 def make_conv_layers(feat_dims, kernel=3, stride=1, padding=1, bnrelu_final=True):
    layers = []
    for i in range(len(feat_dims)-1):
        layers.append(
            nn.Conv2d(
                in_channels=feat_dims[i],
                out_channels=feat_dims[i+1],
                kernel_size=kernel,
                stride=stride,
                padding=padding
                ))
        # Do not use BN and ReLU for final estimation
        if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final):
            layers.append(nn.BatchNorm2d(feat_dims[i+1]))
            layers.append(nn.ReLU(inplace=True))
    return nn.Sequential(*layers)
 def make_deconv_layers(feat_dims, bnrelu_final=True):
    layers = []
    for i in range(len(feat_dims)-1):
        layers.append(
            nn.ConvTranspose2d(
                in_channels=feat_dims[i],
                out_channels=feat_dims[i+1],
                kernel_size=4,
                stride=2,
                padding=1,
                output_padding=0,
                bias=False))
        # Do not use BN and ReLU for final estimation
        if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final):
            layers.append(nn.BatchNorm2d(feat_dims[i+1]))
            layers.append(nn.ReLU(inplace=True))
    return nn.Sequential(*layers)
 class ResNetBackbone(nn.Module):
    def __init__(self, resnet_type):
        resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'),
 		       34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'),
 		       50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'),
 		       101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'),
 		       152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')}
        block, layers, channels, name = resnet_spec[resnet_type]
        self.name = name
        self.inplanes = 64
        super(ResNetBackbone, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False) # RGB
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.normal_(m.weight, mean=0, std=0.001)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x
    def init_weights(self):
        org_resnet = torch.utils.model_zoo.load_url(model_urls[self.name])
        # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error
        org_resnet.pop('fc.weight', None)
        org_resnet.pop('fc.bias', None)
        self.load_state_dict(org_resnet)
        print("Initialize resnet from model zoo")
 class ResNet_Deconv(nn.Module):
    def __init__(self):
        super().__init__()
        self.hm2d_size = 64
        self.resnet = ResNetBackbone(50)
        self.deconv = make_deconv_layers([2048, 256, 256, 256])
        self.conv_hm2d = make_conv_layers([256, 21],kernel=1,stride=1,padding=0,bnrelu_final=False)
        self.resnet.init_weights()
        self.deconv.apply(self.init_weights)
        self.conv_hm2d.apply(self.init_weights)
    @staticmethod
    def init_weights(m):
        if type(m) == nn.ConvTranspose2d:
            nn.init.normal_(m.weight,std=0.001)
        elif type(m) == nn.Conv2d:
            nn.init.normal_(m.weight,std=0.001)
            nn.init.constant_(m.bias, 0)
        elif type(m) == nn.BatchNorm2d:
            nn.init.constant_(m.weight,1)
            nn.init.constant_(m.bias,0)
        elif type(m) == nn.Linear:
            nn.init.normal_(m.weight,std=0.01)
            nn.init.constant_(m.bias,0)
    def forward(self, img):
        x_feat = self.resnet(img)
        x_feat = self.deconv(x_feat)
        x_hm2d = self.conv_hm2d(x_feat)
        pred = get_preds_from_heatmaps(x_hm2d.detach().cpu().numpy())
        return {
            'keypoints': pred
        }
--- a/myeasymocap/backbone/hmr/init.py
+++ b/myeasymocap/backbone/hmr/init.py
--- a/myeasymocap/backbone/hmr/hmr.py
+++ b/myeasymocap/backbone/hmr/hmr.py
@ -0,0 +1,35 @@
 import os
 import numpy as np
 import torch
 from ..basetopdown import BaseTopDownModelCache, gdown_models
 import pickle
 from .models import hmr
 class MyHMR(BaseTopDownModelCache):
    def __init__(self, ckpt, url=None):
        super().__init__('handhmr', bbox_scale=1., res_input=224)
        self.model = hmr()
        self.model.eval()
        if not os.path.exists(ckpt) and url is not None:
            gdown_models(ckpt, url)
        assert os.path.exists(ckpt), f'{ckpt} not exists'
        checkpoint = torch.load(ckpt)
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        state_dict = checkpoint['state_dict']
        prefix = 'model.'
        self.load_checkpoint(self.model, state_dict, prefix, strict=True)
        self.model.to(self.device)
    def __call__(self, bbox, images, imgnames):
        output = super().__call__(bbox, images, imgnames)
        Rh = output['params']['poses'][:3].copy()
        poses = output['params']['poses'][3:]
        Th = np.zeros_like(Rh)
        Th[2] = 1.
        output['params'] = {
            'Rh': Rh,
            'Th': Th,
            'poses': poses,
            'shapes': output['params']['shapes'],
        }
        return output
--- a/myeasymocap/backbone/hmr/hmr_api.py
+++ b/myeasymocap/backbone/hmr/hmr_api.py
@ -0,0 +1,256 @@
 '''
 Date: 2021-10-25 11:51:37 am
 Author: dihuangdh
 Descriptions: 
 -----
 LastEditTime: 2021-10-25 1:50:40 pm
 LastEditors: dihuangdh
 '''
 import torch
 from torchvision.transforms import Normalize
 import numpy as np
 import cv2
 from .models import hmr
 class constants:
    FOCAL_LENGTH = 5000.
    IMG_RES = 224
    # Mean and standard deviation for normalizing input image
    IMG_NORM_MEAN = [0.485, 0.456, 0.406]
    IMG_NORM_STD = [0.229, 0.224, 0.225]
 def get_transform(center, scale, res, rot=0):
    """Generate transformation matrix."""
    h = 200 * scale
    t = np.zeros((3, 3))
    t[0, 0] = float(res[1]) / h
    t[1, 1] = float(res[0]) / h
    t[0, 2] = res[1] * (-float(center[0]) / h + .5)
    t[1, 2] = res[0] * (-float(center[1]) / h + .5)
    t[2, 2] = 1
    if not rot == 0:
        rot = -rot # To match direction of rotation from cropping
        rot_mat = np.zeros((3,3))
        rot_rad = rot * np.pi / 180
        sn,cs = np.sin(rot_rad), np.cos(rot_rad)
        rot_mat[0,:2] = [cs, -sn]
        rot_mat[1,:2] = [sn, cs]
        rot_mat[2,2] = 1
        # Need to rotate around center
        t_mat = np.eye(3)
        t_mat[0,2] = -res[1]/2
        t_mat[1,2] = -res[0]/2
        t_inv = t_mat.copy()
        t_inv[:2,2] *= -1
        t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t)))
    return t
 def transform(pt, center, scale, res, invert=0, rot=0):
    """Transform pixel location to different reference."""
    t = get_transform(center, scale, res, rot=rot)
    if invert:
        t = np.linalg.inv(t)
    new_pt = np.array([pt[0]-1, pt[1]-1, 1.]).T
    new_pt = np.dot(t, new_pt)
    return new_pt[:2].astype(int)+1
 def crop(img, center, scale, res, rot=0, bias=0):
    """Crop image according to the supplied bounding box."""
    # Upper left point
    ul = np.array(transform([1, 1], center, scale, res, invert=1))-1
    # Bottom right point
    br = np.array(transform([res[0]+1, 
                             res[1]+1], center, scale, res, invert=1))-1
    # Padding so that when rotated proper amount of context is included
    pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
    if not rot == 0:
        ul -= pad
        br += pad
    new_shape = [br[1] - ul[1], br[0] - ul[0]]
    if len(img.shape) > 2:
        new_shape += [img.shape[2]]
    new_img = np.zeros(new_shape) + bias
    # Range to fill new array
    new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
    new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
    # Range to sample from original image
    old_x = max(0, ul[0]), min(len(img[0]), br[0])
    old_y = max(0, ul[1]), min(len(img), br[1])
    new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], 
                                                        old_x[0]:old_x[1]]
    if not rot == 0:
        # Remove padding
        new_img = scipy.misc.imrotate(new_img, rot)
        new_img = new_img[pad:-pad, pad:-pad]
    new_img = cv2.resize(new_img, (res[0], res[1]))
    return new_img
 def process_image(img, bbox, input_res=224):
    """Read image, do preprocessing and possibly crop it according to the bounding box.
    If there are bounding box annotations, use them to crop the image.
    If no bounding box is specified but openpose detections are available, use them to get the bounding box.
    """
    img = img[:, :, ::-1].copy()
    normalize_img = Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD)
    l, t, r, b = bbox[:4]
    center = [(l+r)/2, (t+b)/2]
    width = max(r-l, b-t)
    scale = width/200.0
    img = crop(img, center, scale, (input_res, input_res))
    img = img.astype(np.float32) / 255.
    img = torch.from_numpy(img).permute(2,0,1)
    norm_img = normalize_img(img.clone())[None]
    return img, norm_img
 def solve_translation(X, x, K):
    A = np.zeros((2*X.shape[0], 3))
    b = np.zeros((2*X.shape[0], 1))
    fx, fy = K[0, 0], K[1, 1]
    cx, cy = K[0, 2], K[1, 2]
    for nj in range(X.shape[0]):
        A[2*nj, 0] = 1
        A[2*nj + 1, 1] = 1
        A[2*nj, 2] = -(x[nj, 0] - cx)/fx
        A[2*nj+1, 2] = -(x[nj, 1] - cy)/fy
        b[2*nj, 0] = X[nj, 2]*(x[nj, 0] - cx)/fx - X[nj, 0]
        b[2*nj+1, 0] = X[nj, 2]*(x[nj, 1] - cy)/fy - X[nj, 1]
        A[2*nj:2*nj+2, :] *= x[nj, 2]
        b[2*nj:2*nj+2, :] *= x[nj, 2]
    trans = np.linalg.inv(A.T @ A) @ A.T @ b
    return trans.T[0]
 def estimate_translation_np(S, joints_2d, joints_conf, K):
    """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
    Input:
        S: (25, 3) 3D joint locations
        joints: (25, 3) 2D joint locations and confidence
    Returns:
        (3,) camera translation vector
    """
    num_joints = S.shape[0]
    # focal length
    f = np.array([K[0, 0], K[1, 1]])
    # optical center
    center = np.array([K[0, 2], K[1, 2]])
    # transformations
    Z = np.reshape(np.tile(S[:,2],(2,1)).T,-1)
    XY = np.reshape(S[:,0:2],-1)
    O = np.tile(center,num_joints)
    F = np.tile(f,num_joints)
    weight2 = np.reshape(np.tile(np.sqrt(joints_conf),(2,1)).T,-1)
    # least squares
    Q = np.array([F*np.tile(np.array([1,0]),num_joints), F*np.tile(np.array([0,1]),num_joints), O-np.reshape(joints_2d,-1)]).T
    c = (np.reshape(joints_2d,-1)-O)*Z - F*XY
    # weighted least squares
    W = np.diagflat(weight2)
    Q = np.dot(W,Q)
    c = np.dot(W,c)
    # square matrix
    A = np.dot(Q.T,Q)
    b = np.dot(Q.T,c)
    # solution
    trans = np.linalg.solve(A, b)
    return trans
 class HMR:
    def __init__(self, checkpoint, device) -> None:
        model = hmr().to(device)
        checkpoint = torch.load(checkpoint)
        state_dict = checkpoint['state_dict']
        # update state_dict, remove 'model.'
        for key in list(state_dict.keys()):
            state_dict[key[6:]] = state_dict.pop(key)
        model.load_state_dict(state_dict, strict=False)
        # Load SMPL model
        model.eval()
        self.model = model
        self.device = device
    def forward(self, img, bbox, use_rh_th=True):
        # Preprocess input image and generate predictions
        img, norm_img = process_image(img, bbox, input_res=constants.IMG_RES)
        with torch.no_grad():
            pred_rotmat, pred_betas, pred_camera = self.model(norm_img.to(self.device))
        results = {
            'shapes': pred_betas.detach().cpu().numpy()
        }
        results['poses'] = pred_rotmat.detach().cpu().numpy()
        if use_rh_th:
            body_params = {
                'poses': results['poses'],
                'shapes': results['shapes'],
                'Rh': results['poses'][:, :3].copy(),
                'Th': np.zeros((1, 3)),
            }
            body_params['Th'][0, 2] = 5
            body_params['poses'][:, :3] = 0
            results = body_params
        return results
    def __call__(self, body_model, img, bbox, kpts, camera, ret_vertices=True):
        body_params = self.forward(img.copy(), bbox)
        body_params = body_model.check_params(body_params)
        # only use body joints to estimation translation
        nJoints = 21
        keypoints3d = body_model(return_verts=False, return_tensor=False, **body_params)[0]
        trans = solve_translation(keypoints3d[:nJoints], kpts[:nJoints], camera['K'])
        body_params['Th'] += trans[None, :]
        if body_params['Th'][0, 2] < 0:
            body_params['Th'] = -body_params['Th']
            Rhold = cv2.Rodrigues(body_params['Rh'])[0]
            rotx = cv2.Rodrigues(np.pi*np.array([1., 0, 0]))[0]
            Rhold = rotx @ Rhold
            body_params['Rh'] = cv2.Rodrigues(Rhold)[0].reshape(1, 3)
        # convert to world coordinate
        Rhold = cv2.Rodrigues(body_params['Rh'])[0]
        Thold = body_params['Th']
        Rh = camera['R'].T @ Rhold
        Th = (camera['R'].T @ (Thold.T - camera['T'])).T
        body_params['Th'] = Th
        body_params['Rh'] = cv2.Rodrigues(Rh)[0].reshape(1, 3)
        keypoints3d = body_model(return_verts=False, return_tensor=False, **body_params)[0]
        results = {'body_params': body_params, 'keypoints3d': keypoints3d}
        if ret_vertices:
            vertices = body_model(return_verts=True, return_tensor=False, **body_params)[0]
            results['vertices'] = vertices
        return results
 def init_with_hmr(body_model, spin_model, img, bbox, kpts, camera):
    body_params = spin_model.forward(img.copy(), bbox)
    body_params = body_model.check_params(body_params)
    # only use body joints to estimation translation
    nJoints = 15
    keypoints3d = body_model(return_verts=False, return_tensor=False, **body_params)[0]
    trans = estimate_translation_np(keypoints3d[:nJoints], kpts[:nJoints, :2], kpts[:nJoints, 2], camera['K'])
    body_params['Th'] += trans[None, :]
    # convert to world coordinate
    Rhold = cv2.Rodrigues(body_params['Rh'])[0]
    Thold = body_params['Th']
    Rh = camera['R'].T @ Rhold
    Th = (camera['R'].T @ (Thold.T - camera['T'])).T
    body_params['Th'] = Th
    body_params['Rh'] = cv2.Rodrigues(Rh)[0].reshape(1, 3)
    vertices = body_model(return_verts=True, return_tensor=False, **body_params)[0]
    keypoints3d = body_model(return_verts=False, return_tensor=False, **body_params)[0]
    results = {'body_params': body_params, 'vertices': vertices, 'keypoints3d': keypoints3d}
    return results
 class 
 if __name__ == '__main__':
    pass
--- a/myeasymocap/backbone/hmr/models.py
+++ b/myeasymocap/backbone/hmr/models.py
@ -0,0 +1,174 @@
 '''
 Date: 2021-10-25 11:51:29 am
 Author: dihuangdh
 Descriptions: 
 -----
 LastEditTime: 2021-10-25 11:51:58 am
 LastEditors: dihuangdh
 '''
 import torch
 import torch.nn as nn
 import torchvision.models.resnet as resnet
 import numpy as np
 import math
 class Bottleneck(nn.Module):
    """ Redefinition of Bottleneck residual block
        Adapted from the official PyTorch implementation
    """
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out
 class HMR(nn.Module):
    """ SMPL Iterative Regressor with ResNet50 backbone
    """
    def __init__(self, block, layers):
        self.inplanes = 64
        super(HMR, self).__init__()
        npose = 3 + 45
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc1 = nn.Linear(512 * block.expansion + npose + 13, 1024)
        self.drop1 = nn.Dropout()
        self.fc2 = nn.Linear(1024, 1024)
        self.drop2 = nn.Dropout()
        self.decpose = nn.Linear(1024, npose)
        self.decshape = nn.Linear(1024, 10)
        self.deccam = nn.Linear(1024, 3)
        nn.init.xavier_uniform_(self.decpose.weight, gain=0.01)
        nn.init.xavier_uniform_(self.decshape.weight, gain=0.01)
        nn.init.xavier_uniform_(self.deccam.weight, gain=0.01)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        init_pose = torch.zeros(npose).unsqueeze(0)
        init_shape = torch.zeros(10).unsqueeze(0)
        init_cam = torch.zeros(3).unsqueeze(0)
        self.register_buffer('init_pose', init_pose)
        self.register_buffer('init_shape', init_shape)
        self.register_buffer('init_cam', init_cam)
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)
    def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3):
        batch_size = x.shape[0]
        if init_pose is None:
            init_pose = self.init_pose.expand(batch_size, -1)
        if init_shape is None:
            init_shape = self.init_shape.expand(batch_size, -1)
        if init_cam is None:
            init_cam = self.init_cam.expand(batch_size, -1)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)
        xf = self.avgpool(x4)
        xf = xf.view(xf.size(0), -1)
        pred_pose = init_pose
        pred_shape = init_shape
        pred_cam = init_cam
        for i in range(n_iter):
            xc = torch.cat([xf, pred_pose, pred_shape, pred_cam],1)
            xc = self.fc1(xc)
            xc = self.drop1(xc)
            xc = self.fc2(xc)
            xc = self.drop2(xc)
            pred_pose = self.decpose(xc) + pred_pose
            pred_shape = self.decshape(xc) + pred_shape
            pred_cam = self.deccam(xc) + pred_cam
        # pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3)
        return {
            'poses': pred_pose,
            'shapes': pred_shape,
            'cam': pred_cam
        }
 def hmr(pretrained=True, **kwargs):
    """ Constructs an HMR model with ResNet50 backbone.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = HMR(Bottleneck, [3, 4, 6, 3],  **kwargs)
    if pretrained:
        resnet_imagenet = resnet.resnet50(pretrained=True)
        model.load_state_dict(resnet_imagenet.state_dict(),strict=False)
    return model