support YOLOv4 + HRNet
This commit is contained in:
parent
af452c6949
commit
0175f07290
8
easymocap/estimator/HRNet/__init__.py
Normal file
8
easymocap/estimator/HRNet/__init__.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
'''
|
||||||
|
@ Date: 2020-06-04 12:48:29
|
||||||
|
@ LastEditors: Qing Shuai
|
||||||
|
@ LastEditTime: 2020-11-17 15:52:23
|
||||||
|
@ Author: Qing Shuai
|
||||||
|
@ Mail: s_q@zju.edu.cn
|
||||||
|
'''
|
||||||
|
from .hrnet_api import SimpleHRNet
|
216
easymocap/estimator/HRNet/hrnet.py
Normal file
216
easymocap/estimator/HRNet/hrnet.py
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from .modules import BasicBlock, Bottleneck
|
||||||
|
|
||||||
|
|
||||||
|
class StageModule(nn.Module):
|
||||||
|
def __init__(self, stage, output_branches, c, bn_momentum):
|
||||||
|
super(StageModule, self).__init__()
|
||||||
|
self.stage = stage
|
||||||
|
self.output_branches = output_branches
|
||||||
|
|
||||||
|
self.branches = nn.ModuleList()
|
||||||
|
for i in range(self.stage):
|
||||||
|
w = c * (2 ** i)
|
||||||
|
branch = nn.Sequential(
|
||||||
|
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||||
|
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||||
|
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||||
|
BasicBlock(w, w, bn_momentum=bn_momentum),
|
||||||
|
)
|
||||||
|
self.branches.append(branch)
|
||||||
|
|
||||||
|
self.fuse_layers = nn.ModuleList()
|
||||||
|
# for each output_branches (i.e. each branch in all cases but the very last one)
|
||||||
|
for i in range(self.output_branches):
|
||||||
|
self.fuse_layers.append(nn.ModuleList())
|
||||||
|
for j in range(self.stage): # for each branch
|
||||||
|
if i == j:
|
||||||
|
self.fuse_layers[-1].append(nn.Sequential()) # Used in place of "None" because it is callable
|
||||||
|
elif i < j:
|
||||||
|
self.fuse_layers[-1].append(nn.Sequential(
|
||||||
|
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(1, 1), stride=(1, 1), bias=False),
|
||||||
|
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
|
||||||
|
nn.Upsample(scale_factor=(2.0 ** (j - i)), mode='nearest'),
|
||||||
|
))
|
||||||
|
elif i > j:
|
||||||
|
ops = []
|
||||||
|
for k in range(i - j - 1):
|
||||||
|
ops.append(nn.Sequential(
|
||||||
|
nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
|
||||||
|
bias=False),
|
||||||
|
nn.BatchNorm2d(c * (2 ** j), eps=1e-05, momentum=0.1, affine=True,
|
||||||
|
track_running_stats=True),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
))
|
||||||
|
ops.append(nn.Sequential(
|
||||||
|
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
|
||||||
|
bias=False),
|
||||||
|
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
|
||||||
|
))
|
||||||
|
self.fuse_layers[-1].append(nn.Sequential(*ops))
|
||||||
|
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert len(self.branches) == len(x)
|
||||||
|
|
||||||
|
x = [branch(b) for branch, b in zip(self.branches, x)]
|
||||||
|
|
||||||
|
x_fused = []
|
||||||
|
for i in range(len(self.fuse_layers)):
|
||||||
|
for j in range(0, len(self.branches)):
|
||||||
|
if j == 0:
|
||||||
|
x_fused.append(self.fuse_layers[i][0](x[0]))
|
||||||
|
else:
|
||||||
|
x_fused[i] = x_fused[i] + self.fuse_layers[i][j](x[j])
|
||||||
|
|
||||||
|
for i in range(len(x_fused)):
|
||||||
|
x_fused[i] = self.relu(x_fused[i])
|
||||||
|
|
||||||
|
return x_fused
|
||||||
|
|
||||||
|
|
||||||
|
class HRNet(nn.Module):
|
||||||
|
def __init__(self, c=48, nof_joints=17, bn_momentum=0.1):
|
||||||
|
super(HRNet, self).__init__()
|
||||||
|
|
||||||
|
# Input (stem net)
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
|
||||||
|
self.conv2 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
|
||||||
|
# Stage 1 (layer1) - First group of bottleneck (resnet) modules
|
||||||
|
downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
|
||||||
|
nn.BatchNorm2d(256, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||||
|
)
|
||||||
|
self.layer1 = nn.Sequential(
|
||||||
|
Bottleneck(64, 64, downsample=downsample),
|
||||||
|
Bottleneck(256, 64),
|
||||||
|
Bottleneck(256, 64),
|
||||||
|
Bottleneck(256, 64),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fusion layer 1 (transition1) - Creation of the first two branches (one full and one half resolution)
|
||||||
|
self.transition1 = nn.ModuleList([
|
||||||
|
nn.Sequential(
|
||||||
|
nn.Conv2d(256, c, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
|
||||||
|
nn.BatchNorm2d(c, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
),
|
||||||
|
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
|
||||||
|
nn.Conv2d(256, c * (2 ** 1), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
|
||||||
|
nn.BatchNorm2d(c * (2 ** 1), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
)),
|
||||||
|
])
|
||||||
|
|
||||||
|
# Stage 2 (stage2) - Second module with 1 group of bottleneck (resnet) modules. This has 2 branches
|
||||||
|
self.stage2 = nn.Sequential(
|
||||||
|
StageModule(stage=2, output_branches=2, c=c, bn_momentum=bn_momentum),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fusion layer 2 (transition2) - Creation of the third branch (1/4 resolution)
|
||||||
|
self.transition2 = nn.ModuleList([
|
||||||
|
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||||
|
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||||
|
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
|
||||||
|
nn.Conv2d(c * (2 ** 1), c * (2 ** 2), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
|
||||||
|
nn.BatchNorm2d(c * (2 ** 2), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
)), # ToDo Why the new branch derives from the "upper" branch only?
|
||||||
|
])
|
||||||
|
|
||||||
|
# Stage 3 (stage3) - Third module with 4 groups of bottleneck (resnet) modules. This has 3 branches
|
||||||
|
self.stage3 = nn.Sequential(
|
||||||
|
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||||
|
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||||
|
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||||
|
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fusion layer 3 (transition3) - Creation of the fourth branch (1/8 resolution)
|
||||||
|
self.transition3 = nn.ModuleList([
|
||||||
|
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||||
|
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||||
|
nn.Sequential(), # None, - Used in place of "None" because it is callable
|
||||||
|
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
|
||||||
|
nn.Conv2d(c * (2 ** 2), c * (2 ** 3), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
|
||||||
|
nn.BatchNorm2d(c * (2 ** 3), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
)), # ToDo Why the new branch derives from the "upper" branch only?
|
||||||
|
])
|
||||||
|
|
||||||
|
# Stage 4 (stage4) - Fourth module with 3 groups of bottleneck (resnet) modules. This has 4 branches
|
||||||
|
self.stage4 = nn.Sequential(
|
||||||
|
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
|
||||||
|
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
|
||||||
|
StageModule(stage=4, output_branches=1, c=c, bn_momentum=bn_momentum),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Final layer (final_layer)
|
||||||
|
self.final_layer = nn.Conv2d(c, nof_joints, kernel_size=(1, 1), stride=(1, 1))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv1(x)
|
||||||
|
x = self.bn1(x)
|
||||||
|
x = self.relu(x)
|
||||||
|
x = self.conv2(x)
|
||||||
|
x = self.bn2(x)
|
||||||
|
x = self.relu(x)
|
||||||
|
|
||||||
|
x = self.layer1(x)
|
||||||
|
x = [trans(x) for trans in self.transition1] # Since now, x is a list (# == nof branches)
|
||||||
|
|
||||||
|
x = self.stage2(x)
|
||||||
|
# x = [trans(x[-1]) for trans in self.transition2] # New branch derives from the "upper" branch only
|
||||||
|
x = [
|
||||||
|
self.transition2[0](x[0]),
|
||||||
|
self.transition2[1](x[1]),
|
||||||
|
self.transition2[2](x[-1])
|
||||||
|
] # New branch derives from the "upper" branch only
|
||||||
|
|
||||||
|
x = self.stage3(x)
|
||||||
|
# x = [trans(x) for trans in self.transition3] # New branch derives from the "upper" branch only
|
||||||
|
x = [
|
||||||
|
self.transition3[0](x[0]),
|
||||||
|
self.transition3[1](x[1]),
|
||||||
|
self.transition3[2](x[2]),
|
||||||
|
self.transition3[3](x[-1])
|
||||||
|
] # New branch derives from the "upper" branch only
|
||||||
|
|
||||||
|
x = self.stage4(x)
|
||||||
|
|
||||||
|
x = self.final_layer(x[0])
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# model = HRNet(48, 17, 0.1)
|
||||||
|
model = HRNet(32, 17, 0.1)
|
||||||
|
|
||||||
|
# print(model)
|
||||||
|
|
||||||
|
model.load_state_dict(
|
||||||
|
# torch.load('./weights/pose_hrnet_w48_384x288.pth')
|
||||||
|
torch.load('./weights/pose_hrnet_w32_256x192.pth')
|
||||||
|
)
|
||||||
|
print('ok!!')
|
||||||
|
|
||||||
|
if torch.cuda.is_available() and False:
|
||||||
|
torch.backends.cudnn.deterministic = True
|
||||||
|
device = torch.device('cuda:0')
|
||||||
|
else:
|
||||||
|
device = torch.device('cpu')
|
||||||
|
|
||||||
|
print(device)
|
||||||
|
|
||||||
|
model = model.to(device)
|
||||||
|
|
||||||
|
y = model(torch.ones(1, 3, 384, 288).to(device))
|
||||||
|
print(y.shape)
|
||||||
|
print(torch.min(y).item(), torch.mean(y).item(), torch.max(y).item())
|
527
easymocap/estimator/HRNet/hrnet_api.py
Normal file
527
easymocap/estimator/HRNet/hrnet_api.py
Normal file
@ -0,0 +1,527 @@
|
|||||||
|
'''
|
||||||
|
@ Date: 2020-06-04 12:47:04
|
||||||
|
@ LastEditors: Qing Shuai
|
||||||
|
@ LastEditTime: 2022-04-19 17:02:57
|
||||||
|
@ Author: Qing Shuai
|
||||||
|
@ Mail: s_q@zju.edu.cn
|
||||||
|
'''
|
||||||
|
from os.path import join
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torchvision.transforms import transforms
|
||||||
|
|
||||||
|
from .hrnet import HRNet
|
||||||
|
|
||||||
|
COCO17_IN_BODY25 = [0,16,15,18,17,5,2,6,3,7,4,12,9,13,10,14,11]
|
||||||
|
pairs = [[1, 8], [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [10, 11], [8, 12], [12, 13], [13, 14], [1, 0], [0,15], [15,17], [0,16], [16,18], [14,19], [19,20], [14,21], [11,22], [22,23], [11,24]]
|
||||||
|
def coco17tobody25(points2d):
|
||||||
|
kpts = np.zeros((points2d.shape[0], 25, 3))
|
||||||
|
kpts[:, COCO17_IN_BODY25, :2] = points2d[:, :, :2]
|
||||||
|
kpts[:, COCO17_IN_BODY25, 2:3] = points2d[:, :, 2:3]
|
||||||
|
kpts[:, 8, :2] = kpts[:, [9, 12], :2].mean(axis=1)
|
||||||
|
kpts[:, 8, 2] = kpts[:, [9, 12], 2].min(axis=1)
|
||||||
|
kpts[:, 1, :2] = kpts[:, [2, 5], :2].mean(axis=1)
|
||||||
|
kpts[:, 1, 2] = kpts[:, [2, 5], 2].min(axis=1)
|
||||||
|
# 需要交换一下
|
||||||
|
# kpts = kpts[:, :, [1,0,2]]
|
||||||
|
return kpts
|
||||||
|
|
||||||
|
# 生成高斯核
|
||||||
|
def generate_gauss(sigma):
|
||||||
|
tmp_size = sigma * 3
|
||||||
|
size = 2 * tmp_size + 1
|
||||||
|
x = np.arange(0, size, 1, np.float32)
|
||||||
|
y = x[:, np.newaxis]
|
||||||
|
x0 = y0 = size // 2
|
||||||
|
# The gaussian is not normalized, we want the center value to equal 1
|
||||||
|
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
|
||||||
|
return g, tmp_size
|
||||||
|
|
||||||
|
gauss = {}
|
||||||
|
for SIGMA in range(1, 5):
|
||||||
|
gauss_kernel, gauss_radius = generate_gauss(SIGMA)
|
||||||
|
gauss[SIGMA] = {
|
||||||
|
'kernel': gauss_kernel,
|
||||||
|
'radius': gauss_radius
|
||||||
|
}
|
||||||
|
|
||||||
|
def box_to_center_scale(box, model_image_width, model_image_height, scale_factor=1.25):
|
||||||
|
"""convert a box to center,scale information required for pose transformation
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
box : list of tuple
|
||||||
|
list of length 2 with two tuples of floats representing
|
||||||
|
bottom left and top right corner of a box
|
||||||
|
model_image_width : int
|
||||||
|
model_image_height : int
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
(numpy array, numpy array)
|
||||||
|
Two numpy arrays, coordinates for the center of the box and the scale of the box
|
||||||
|
"""
|
||||||
|
center = np.zeros((2), dtype=np.float32)
|
||||||
|
|
||||||
|
bottom_left_corner = (box[0], box[1])
|
||||||
|
top_right_corner = (box[2], box[3])
|
||||||
|
box_width = top_right_corner[0]-bottom_left_corner[0]
|
||||||
|
box_height = top_right_corner[1]-bottom_left_corner[1]
|
||||||
|
bottom_left_x = bottom_left_corner[0]
|
||||||
|
bottom_left_y = bottom_left_corner[1]
|
||||||
|
center[0] = bottom_left_x + box_width * 0.5
|
||||||
|
center[1] = bottom_left_y + box_height * 0.5
|
||||||
|
|
||||||
|
aspect_ratio = model_image_width * 1.0 / model_image_height
|
||||||
|
pixel_std = 200
|
||||||
|
|
||||||
|
if box_width > aspect_ratio * box_height:
|
||||||
|
box_height = box_width * 1.0 / aspect_ratio
|
||||||
|
elif box_width < aspect_ratio * box_height:
|
||||||
|
box_width = box_height * aspect_ratio
|
||||||
|
scale = np.array(
|
||||||
|
[box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
|
||||||
|
dtype=np.float32)
|
||||||
|
scale = scale * scale_factor
|
||||||
|
return center, scale
|
||||||
|
|
||||||
|
def get_dir(src_point, rot_rad):
|
||||||
|
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
|
||||||
|
|
||||||
|
src_result = [0, 0]
|
||||||
|
src_result[0] = src_point[0] * cs - src_point[1] * sn
|
||||||
|
src_result[1] = src_point[0] * sn + src_point[1] * cs
|
||||||
|
|
||||||
|
return src_result
|
||||||
|
|
||||||
|
def get_3rd_point(a, b):
|
||||||
|
direct = a - b
|
||||||
|
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
|
def get_affine_transform(
|
||||||
|
center, scale, rot, output_size,
|
||||||
|
shift=np.array([0, 0], dtype=np.float32), inv=0
|
||||||
|
):
|
||||||
|
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
|
||||||
|
print(scale)
|
||||||
|
scale = np.array([scale, scale])
|
||||||
|
|
||||||
|
scale_tmp = scale * 200.0
|
||||||
|
src_w = scale_tmp[0]
|
||||||
|
dst_w = output_size[0]
|
||||||
|
dst_h = output_size[1]
|
||||||
|
|
||||||
|
rot_rad = np.pi * rot / 180
|
||||||
|
src_dir = get_dir([0, src_w * -0.5], rot_rad)
|
||||||
|
dst_dir = np.array([0, dst_w * -0.5], np.float32)
|
||||||
|
|
||||||
|
src = np.zeros((3, 2), dtype=np.float32)
|
||||||
|
dst = np.zeros((3, 2), dtype=np.float32)
|
||||||
|
src[0, :] = center + scale_tmp * shift
|
||||||
|
src[1, :] = center + src_dir + scale_tmp * shift
|
||||||
|
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
|
||||||
|
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
|
||||||
|
|
||||||
|
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
|
||||||
|
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
|
||||||
|
|
||||||
|
if inv:
|
||||||
|
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
|
||||||
|
else:
|
||||||
|
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
||||||
|
|
||||||
|
return trans
|
||||||
|
|
||||||
|
|
||||||
|
def get_max_preds(batch_heatmaps):
|
||||||
|
'''
|
||||||
|
get predictions from score maps
|
||||||
|
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
|
||||||
|
'''
|
||||||
|
assert isinstance(batch_heatmaps, np.ndarray), \
|
||||||
|
'batch_heatmaps should be numpy.ndarray'
|
||||||
|
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
|
||||||
|
|
||||||
|
batch_size = batch_heatmaps.shape[0]
|
||||||
|
num_joints = batch_heatmaps.shape[1]
|
||||||
|
width = batch_heatmaps.shape[3]
|
||||||
|
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
|
||||||
|
idx = np.argmax(heatmaps_reshaped, 2)
|
||||||
|
maxvals = np.amax(heatmaps_reshaped, 2)
|
||||||
|
|
||||||
|
maxvals = maxvals.reshape((batch_size, num_joints, 1))
|
||||||
|
idx = idx.reshape((batch_size, num_joints, 1))
|
||||||
|
|
||||||
|
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
|
||||||
|
|
||||||
|
preds[:, :, 0] = (preds[:, :, 0]) % width
|
||||||
|
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
|
||||||
|
|
||||||
|
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
|
||||||
|
pred_mask = pred_mask.astype(np.float32)
|
||||||
|
|
||||||
|
preds *= pred_mask
|
||||||
|
return preds, maxvals
|
||||||
|
|
||||||
|
def affine_transform(pt, t):
|
||||||
|
new_pt = np.array([pt[0], pt[1], 1.]).T
|
||||||
|
new_pt = np.dot(t, new_pt)
|
||||||
|
return new_pt[:2]
|
||||||
|
|
||||||
|
def batch_affine_transform(points, trans):
|
||||||
|
points = np.hstack((points[:, :2], np.ones((points.shape[0], 1))))
|
||||||
|
out = points @ trans.T
|
||||||
|
return out
|
||||||
|
|
||||||
|
def transform_preds(coords, center, scale, rot, output_size):
|
||||||
|
target_coords = np.zeros(coords.shape)
|
||||||
|
trans = get_affine_transform(center, scale, rot, output_size, inv=1)
|
||||||
|
target_coords[:, :2] = batch_affine_transform(coords, trans)
|
||||||
|
return target_coords
|
||||||
|
|
||||||
|
config_ = {'kintree': [[1, 0], [2, 0], [3, 1], [4, 2], [5, 0], [6, 0], [7, 5], [8, 6], [9, 7], [10, 8], [11, 5], [12, 6], [13, 11], [
|
||||||
|
14, 12], [15, 13], [16, 14], [6, 5], [12, 11]], 'color': ['g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'k', 'k']}
|
||||||
|
colors_table = {
|
||||||
|
# colorblind/print/copy safe:
|
||||||
|
'_blue': [0.65098039, 0.74117647, 0.85882353],
|
||||||
|
'_pink': [.9, .7, .7],
|
||||||
|
'_mint': [ 166/255., 229/255., 204/255.],
|
||||||
|
'_mint2': [ 202/255., 229/255., 223/255.],
|
||||||
|
'_green': [ 153/255., 216/255., 201/255.],
|
||||||
|
'_green2': [ 171/255., 221/255., 164/255.],
|
||||||
|
'_red': [ 251/255., 128/255., 114/255.],
|
||||||
|
'_orange': [ 253/255., 174/255., 97/255.],
|
||||||
|
'_yellow': [ 250/255., 230/255., 154/255.],
|
||||||
|
'r':[255/255,0,0],
|
||||||
|
'g':[0,255/255,0],
|
||||||
|
'b':[0,0,255/255],
|
||||||
|
'k':[0,0,0],
|
||||||
|
'y':[255/255,255/255,0],
|
||||||
|
'purple':[128/255,0,128/255]
|
||||||
|
}
|
||||||
|
for key, val in colors_table.items():
|
||||||
|
colors_table[key] = tuple([int(val[2]*255), int(val[1]*255), int(val[0]*255)])
|
||||||
|
|
||||||
|
def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
|
||||||
|
normalize=True):
|
||||||
|
'''
|
||||||
|
batch_image: [batch_size, channel, height, width]
|
||||||
|
batch_heatmaps: ['batch_size, num_joints, height, width]
|
||||||
|
file_name: saved file name
|
||||||
|
'''
|
||||||
|
if normalize:
|
||||||
|
batch_image = batch_image.clone()
|
||||||
|
min = float(batch_image.min())
|
||||||
|
max = float(batch_image.max())
|
||||||
|
|
||||||
|
batch_image.add_(-min).div_(max - min + 1e-5)
|
||||||
|
|
||||||
|
batch_size = batch_heatmaps.size(0)
|
||||||
|
num_joints = batch_heatmaps.size(1)
|
||||||
|
heatmap_height = batch_heatmaps.size(2)
|
||||||
|
heatmap_width = batch_heatmaps.size(3)
|
||||||
|
|
||||||
|
grid_image = np.zeros((batch_size*heatmap_height,
|
||||||
|
(num_joints+2)*heatmap_width,
|
||||||
|
3),
|
||||||
|
dtype=np.uint8)
|
||||||
|
|
||||||
|
preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
|
||||||
|
|
||||||
|
for i in range(batch_size):
|
||||||
|
image = batch_image[i].mul(255)\
|
||||||
|
.clamp(0, 255)\
|
||||||
|
.byte()\
|
||||||
|
.permute(1, 2, 0)\
|
||||||
|
.cpu().numpy()
|
||||||
|
heatmaps = batch_heatmaps[i].mul(255)\
|
||||||
|
.clamp(0, 255)\
|
||||||
|
.byte()\
|
||||||
|
.cpu().numpy()
|
||||||
|
|
||||||
|
resized_image = cv2.resize(image,
|
||||||
|
(int(heatmap_width), int(heatmap_height)))
|
||||||
|
resized_image_copy = resized_image.copy()
|
||||||
|
height_begin = heatmap_height * i
|
||||||
|
height_end = heatmap_height * (i + 1)
|
||||||
|
for ip in range(len(config_['kintree'])):
|
||||||
|
src, dst = config_['kintree'][ip]
|
||||||
|
c = config_['color'][ip]
|
||||||
|
if maxvals[i][src] < 0.1 or maxvals[i][dst] < 0.1:
|
||||||
|
continue
|
||||||
|
plot_line(resized_image_copy, preds[i][src], preds[i][dst], colors_table[c], 1)
|
||||||
|
for j in range(num_joints):
|
||||||
|
cv2.circle(resized_image,
|
||||||
|
(int(preds[i][j][0]), int(preds[i][j][1])),
|
||||||
|
1, [0, 0, 255], 1)
|
||||||
|
heatmap = heatmaps[j, :, :]
|
||||||
|
mask = (heatmap > 0.1)[:,:,None]
|
||||||
|
colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
|
||||||
|
masked_image = (colored_heatmap*0.7 + resized_image*0.3)*mask + resized_image*(1-mask)
|
||||||
|
cv2.circle(masked_image,
|
||||||
|
(int(preds[i][j][0]), int(preds[i][j][1])),
|
||||||
|
1, [0, 0, 255], 1)
|
||||||
|
|
||||||
|
width_begin = heatmap_width * (j+2)
|
||||||
|
width_end = heatmap_width * (j+2+1)
|
||||||
|
grid_image[height_begin:height_end, width_begin:width_end, :] = \
|
||||||
|
masked_image
|
||||||
|
# grid_image[height_begin:height_end, width_begin:width_end, :] = \
|
||||||
|
# colored_heatmap*0.7 + resized_image*0.3
|
||||||
|
|
||||||
|
grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
|
||||||
|
grid_image[height_begin:height_end, heatmap_width:heatmap_width+heatmap_width, :] = resized_image_copy
|
||||||
|
cv2.imwrite(file_name, grid_image)
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
def get_final_preds(batch_heatmaps, center, scale, rot=None, flip=None):
|
||||||
|
coords, maxvals = get_max_preds(batch_heatmaps)
|
||||||
|
|
||||||
|
heatmap_height = batch_heatmaps.shape[2]
|
||||||
|
heatmap_width = batch_heatmaps.shape[3]
|
||||||
|
|
||||||
|
# post-processing
|
||||||
|
if True:
|
||||||
|
for n in range(coords.shape[0]):
|
||||||
|
for p in range(coords.shape[1]):
|
||||||
|
hm = batch_heatmaps[n][p]
|
||||||
|
px = int(math.floor(coords[n][p][0] + 0.5))
|
||||||
|
py = int(math.floor(coords[n][p][1] + 0.5))
|
||||||
|
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
|
||||||
|
diff = np.array(
|
||||||
|
[
|
||||||
|
hm[py][px+1] - hm[py][px-1],
|
||||||
|
hm[py+1][px]-hm[py-1][px]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
coords[n][p] += np.sign(diff) * .25
|
||||||
|
|
||||||
|
preds = coords.copy()
|
||||||
|
|
||||||
|
# Transform back
|
||||||
|
for i in range(coords.shape[0]):
|
||||||
|
if flip is not None:
|
||||||
|
if flip[i]:
|
||||||
|
coords[i, :, 0] = heatmap_width - 1 - coords[i, :, 0]
|
||||||
|
if rot is None:
|
||||||
|
_rot = 0
|
||||||
|
else:
|
||||||
|
_rot = rot[i]
|
||||||
|
preds[i] = transform_preds(
|
||||||
|
coords[i], center[i], scale[i], _rot, [heatmap_width, heatmap_height]
|
||||||
|
)
|
||||||
|
return preds, maxvals
|
||||||
|
|
||||||
|
def get_gaussian_maps(net_out, keypoints, sigma):
|
||||||
|
radius, kernel = gauss[sigma]['radius'], gauss[sigma]['kernel']
|
||||||
|
weights = np.ones(net_out.shape, dtype=np.float32)
|
||||||
|
for i in range(weights.shape[0]):
|
||||||
|
for nj in range(weights.shape[1]):
|
||||||
|
if keypoints[i][nj][2] < 0:
|
||||||
|
weights[i][nj] = 0
|
||||||
|
continue
|
||||||
|
elif keypoints[i][nj][2] < 0.01:
|
||||||
|
weights[i][nj] = 0
|
||||||
|
continue
|
||||||
|
weights[i][nj] = 0
|
||||||
|
mu_x, mu_y = keypoints[i][nj][:2]
|
||||||
|
mu_x, mu_y = int(mu_x + 0.5), int(mu_y + 0.5)
|
||||||
|
# Usable gaussian range
|
||||||
|
ul = [mu_x - radius, mu_y - radius]
|
||||||
|
br = [mu_x + radius + 1, mu_y + radius + 1]
|
||||||
|
# Usable gaussian range
|
||||||
|
g_x = max(0, -ul[0]), min(br[0], weights.shape[3]) - ul[0]
|
||||||
|
g_y = max(0, -ul[1]), min(br[1], weights.shape[2]) - ul[1]
|
||||||
|
# Image range
|
||||||
|
img_x = max(0, ul[0]), min(br[0], weights.shape[3])
|
||||||
|
img_y = max(0, ul[1]), min(br[1], weights.shape[2])
|
||||||
|
weights[i][nj][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
|
||||||
|
kernel[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
||||||
|
return weights
|
||||||
|
|
||||||
|
humanId = 0
|
||||||
|
|
||||||
|
class SimpleHRNet:
|
||||||
|
def __init__(self, c, nof_joints, checkpoint_path, device, resolution=(288, 384),):
|
||||||
|
self.device = device
|
||||||
|
self.c = c
|
||||||
|
self.nof_joints = nof_joints
|
||||||
|
self.checkpoint_path = checkpoint_path
|
||||||
|
self.max_batch_size = 64
|
||||||
|
self.resolution = resolution # in the form (height, width) as in the original implementation
|
||||||
|
self.transform = transforms.Compose([
|
||||||
|
# transforms.ToPILImage(),
|
||||||
|
# transforms.Resize((self.resolution[0], self.resolution[1])), # (height, width)
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||||
|
])
|
||||||
|
self.model = HRNet(c=c, nof_joints=nof_joints).to(device)
|
||||||
|
self.model.load_state_dict(torch.load(checkpoint_path, map_location=self.device))
|
||||||
|
self.model.eval()
|
||||||
|
|
||||||
|
def __call__(self, image, bboxes, rot=0, net_out=False):
|
||||||
|
# image:
|
||||||
|
images = torch.zeros((len(bboxes), 3, self.resolution[1], self.resolution[0]), device=self.device) # (height, width)
|
||||||
|
if len(bboxes) > 0:
|
||||||
|
# pose estimation : for multiple people
|
||||||
|
centers, scales, trans_all = [], [], []
|
||||||
|
for box in bboxes:
|
||||||
|
center, scale = box_to_center_scale(box, self.resolution[0], self.resolution[1])
|
||||||
|
centers.append(center)
|
||||||
|
scales.append(scale)
|
||||||
|
trans = get_affine_transform(center, scale, rot=rot, output_size=self.resolution)
|
||||||
|
trans_all.append(trans)
|
||||||
|
for i, trans in enumerate(trans_all):
|
||||||
|
# Crop smaller image of people
|
||||||
|
model_input = cv2.warpAffine(
|
||||||
|
image, trans,
|
||||||
|
(int(self.resolution[0]), int(self.resolution[1])),
|
||||||
|
flags=cv2.INTER_LINEAR)
|
||||||
|
# cv2.imshow('input', model_input)
|
||||||
|
# cv2.waitKey(0)
|
||||||
|
# hwc -> 1chw
|
||||||
|
model_input = self.transform(model_input)#.unsqueeze(0)
|
||||||
|
images[i] = model_input
|
||||||
|
images = images.to(self.device)
|
||||||
|
with torch.no_grad():
|
||||||
|
out = self.model(images)
|
||||||
|
out = out.cpu().detach().numpy()
|
||||||
|
if net_out:
|
||||||
|
return out, trans_all, centers, scales, rot
|
||||||
|
coords, max_val = get_final_preds(
|
||||||
|
out,
|
||||||
|
np.asarray(centers),
|
||||||
|
np.asarray(scales),
|
||||||
|
[rot for _ in range(out.shape[0])])
|
||||||
|
pts = np.concatenate((coords, max_val), axis=2)
|
||||||
|
return coco17tobody25(pts)
|
||||||
|
else:
|
||||||
|
return np.empty(0, 25, 3)
|
||||||
|
|
||||||
|
def predict_with_previous(self, image, bboxes, keypoints, sigma):
|
||||||
|
# (batch, nJoints, height, width)
|
||||||
|
net_out, trans_all, centers, scales, rot = self.__call__(image, bboxes, net_out=True)
|
||||||
|
keypoints = keypoints[:, COCO17_IN_BODY25]
|
||||||
|
keypoints_rescale = keypoints.copy()
|
||||||
|
for i in range(keypoints.shape[0]):
|
||||||
|
keypoints_rescale[..., :2] = batch_affine_transform(keypoints[i], trans_all[i])/4
|
||||||
|
weights = get_gaussian_maps(net_out, keypoints_rescale, sigma)
|
||||||
|
out = net_out * weights
|
||||||
|
coords, max_val = get_final_preds(
|
||||||
|
out,
|
||||||
|
np.asarray(centers),
|
||||||
|
np.asarray(scales),
|
||||||
|
rot)
|
||||||
|
pts = np.concatenate((coords, max_val), axis=2)
|
||||||
|
return coco17tobody25(pts)
|
||||||
|
|
||||||
|
def predict(self, image, detections, keypoints=None, ret_crop=False):
|
||||||
|
if keypoints is not None:
|
||||||
|
keypoints = keypoints[:, COCO17_IN_BODY25]
|
||||||
|
kpts_rescale = [None for _ in range(len(keypoints))]
|
||||||
|
boxes = []
|
||||||
|
rotation = 0
|
||||||
|
image_pose = image
|
||||||
|
# image_pose = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
if detections is not None:
|
||||||
|
images = torch.zeros((len(detections), 3, self.resolution[1], self.resolution[0]), device=self.device) # (height, width)
|
||||||
|
# pose estimation : for multiple people
|
||||||
|
centers = []
|
||||||
|
scales = []
|
||||||
|
for box in detections:
|
||||||
|
center, scale = box_to_center_scale(box, self.resolution[0], self.resolution[1])
|
||||||
|
centers.append(center)
|
||||||
|
scales.append(scale)
|
||||||
|
model_inputs = []
|
||||||
|
for i, (center, scale) in enumerate(zip(centers, scales)):
|
||||||
|
trans = get_affine_transform(center, scale, rotation, self.resolution)
|
||||||
|
# Crop smaller image of people
|
||||||
|
model_input = cv2.warpAffine(
|
||||||
|
image_pose,
|
||||||
|
trans,
|
||||||
|
(int(self.resolution[0]), int(self.resolution[1])),
|
||||||
|
flags=cv2.INTER_LINEAR)
|
||||||
|
if keypoints is not None:
|
||||||
|
kpts_homo = keypoints[i].copy()
|
||||||
|
kpts_homo[:, 2] = 1
|
||||||
|
kpts_rescale[i] = (kpts_homo @ trans.T)/4
|
||||||
|
# global humanId
|
||||||
|
# cv2.imwrite('../output/debughrnet/person_{}.jpg'.format(humanId), model_input[:,:,[2,1,0]])
|
||||||
|
# humanId += 1
|
||||||
|
# hwc -> 1chw
|
||||||
|
model_input = self.transform(model_input)#.unsqueeze(0)
|
||||||
|
images[i] = model_input
|
||||||
|
# torch.cuda.synchronize(self.device)
|
||||||
|
|
||||||
|
# print(' - spending {:.2f}ms in preprocess.'.format(1000*(time.time() - start)))
|
||||||
|
if images.shape[0] == 0:
|
||||||
|
return np.empty((0, 25, 3))
|
||||||
|
else:
|
||||||
|
# start = time.time()
|
||||||
|
images = images.to(self.device)
|
||||||
|
# torch.cuda.synchronize(self.device)
|
||||||
|
|
||||||
|
# print(' - spending {:.2f}ms in copy to cuda.'.format(1000*(time.time() - start)))
|
||||||
|
# start = time.time()
|
||||||
|
with torch.no_grad():
|
||||||
|
if len(images) <= self.max_batch_size:
|
||||||
|
out = self.model(images)
|
||||||
|
else:
|
||||||
|
out = torch.empty(
|
||||||
|
(images.shape[0], self.nof_joints, self.resolution[1] // 4, self.resolution[0] // 4)
|
||||||
|
).to(self.device)
|
||||||
|
for i in range(0, len(images), self.max_batch_size):
|
||||||
|
out[i:i + self.max_batch_size] = self.model(images[i:i + self.max_batch_size])
|
||||||
|
# torch.cuda.synchronize(self.device)
|
||||||
|
global humanId
|
||||||
|
if keypoints is not None:
|
||||||
|
filename = join('../output/debughrnet', '{:06d}.jpg'.format(humanId))
|
||||||
|
humanId += 1
|
||||||
|
# save_batch_heatmaps(images, out, filename)
|
||||||
|
# 制造高斯核,默认为1
|
||||||
|
weights = np.ones(out.shape, dtype=np.float32)
|
||||||
|
for i in range(weights.shape[0]):
|
||||||
|
for nj in range(weights.shape[1]):
|
||||||
|
if keypoints[i][nj][2] < 0:
|
||||||
|
weights[i][nj] = 0
|
||||||
|
continue
|
||||||
|
elif keypoints[i][nj][2] < 0.01:
|
||||||
|
continue
|
||||||
|
weights[i][nj] = 0
|
||||||
|
mu_x, mu_y = kpts_rescale[i][nj]
|
||||||
|
mu_x, mu_y = int(mu_x + 0.5), int(mu_y + 0.5)
|
||||||
|
# Usable gaussian range
|
||||||
|
ul = [mu_x - gauss_radius, mu_y - gauss_radius]
|
||||||
|
br = [mu_x + gauss_radius + 1, mu_y + gauss_radius + 1]
|
||||||
|
# Usable gaussian range
|
||||||
|
g_x = max(0, -ul[0]), min(br[0], weights.shape[3]) - ul[0]
|
||||||
|
g_y = max(0, -ul[1]), min(br[1], weights.shape[2]) - ul[1]
|
||||||
|
# Image range
|
||||||
|
img_x = max(0, ul[0]), min(br[0], weights.shape[3])
|
||||||
|
img_y = max(0, ul[1]), min(br[1], weights.shape[2])
|
||||||
|
weights[i][nj][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
|
||||||
|
gauss_kernel[g_y[0]:g_y[1], g_x[0]:g_x[1]]
|
||||||
|
filename = join('../output/debughrnet', '{:06d}.jpg'.format(humanId))
|
||||||
|
humanId += 1
|
||||||
|
# save_batch_heatmaps(images, torch.Tensor(weights), filename)
|
||||||
|
out = out.cpu().detach().numpy()
|
||||||
|
out = out * weights
|
||||||
|
filename = join('../output/debughrnet', '{:06d}.jpg'.format(humanId))
|
||||||
|
humanId += 1
|
||||||
|
# save_batch_heatmaps(images, torch.Tensor(out), filename)
|
||||||
|
else:
|
||||||
|
out = out.cpu().detach().numpy()
|
||||||
|
coords, max_val = get_final_preds(
|
||||||
|
out,
|
||||||
|
np.asarray(centers),
|
||||||
|
np.asarray(scales))
|
||||||
|
pts = np.concatenate((coords, max_val), axis=2)
|
||||||
|
# torch.cuda.synchronize(self.device)
|
||||||
|
# print(' - spending {:.2f}ms in postprocess.'.format(1000*(time.time() - start)))
|
||||||
|
# print('')
|
||||||
|
if ret_crop:
|
||||||
|
return coco17tobody25(pts), images
|
||||||
|
else:
|
||||||
|
return coco17tobody25(pts)
|
72
easymocap/estimator/HRNet/modules.py
Normal file
72
easymocap/estimator/HRNet/modules.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
expansion = 4
|
||||||
|
|
||||||
|
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||||
|
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_momentum)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.downsample = downsample
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = x
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.bn1(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv3(out)
|
||||||
|
out = self.bn3(out)
|
||||||
|
|
||||||
|
if self.downsample is not None:
|
||||||
|
residual = self.downsample(x)
|
||||||
|
|
||||||
|
out += residual
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class BasicBlock(nn.Module):
|
||||||
|
expansion = 1
|
||||||
|
|
||||||
|
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
|
||||||
|
super(BasicBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.conv2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
|
||||||
|
self.downsample = downsample
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = x
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.bn1(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
|
||||||
|
if self.downsample is not None:
|
||||||
|
residual = self.downsample(x)
|
||||||
|
|
||||||
|
out += residual
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
return out
|
8
easymocap/estimator/YOLOv4/__init__.py
Normal file
8
easymocap/estimator/YOLOv4/__init__.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
'''
|
||||||
|
@ Date: 2020-12-10 16:37:04
|
||||||
|
@ Author: Qing Shuai
|
||||||
|
@ LastEditors: Qing Shuai
|
||||||
|
@ LastEditTime: 2020-12-10 16:52:06
|
||||||
|
@ FilePath: /mvpose/code/estimator/YOLOv4/__init__.py
|
||||||
|
'''
|
||||||
|
from .yolo import YOLOv4
|
80
easymocap/estimator/YOLOv4/coco.names
Normal file
80
easymocap/estimator/YOLOv4/coco.names
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
person
|
||||||
|
bicycle
|
||||||
|
car
|
||||||
|
motorbike
|
||||||
|
aeroplane
|
||||||
|
bus
|
||||||
|
train
|
||||||
|
truck
|
||||||
|
boat
|
||||||
|
traffic light
|
||||||
|
fire hydrant
|
||||||
|
stop sign
|
||||||
|
parking meter
|
||||||
|
bench
|
||||||
|
bird
|
||||||
|
cat
|
||||||
|
dog
|
||||||
|
horse
|
||||||
|
sheep
|
||||||
|
cow
|
||||||
|
elephant
|
||||||
|
bear
|
||||||
|
zebra
|
||||||
|
giraffe
|
||||||
|
backpack
|
||||||
|
umbrella
|
||||||
|
handbag
|
||||||
|
tie
|
||||||
|
suitcase
|
||||||
|
frisbee
|
||||||
|
skis
|
||||||
|
snowboard
|
||||||
|
sports ball
|
||||||
|
kite
|
||||||
|
baseball bat
|
||||||
|
baseball glove
|
||||||
|
skateboard
|
||||||
|
surfboard
|
||||||
|
tennis racket
|
||||||
|
bottle
|
||||||
|
wine glass
|
||||||
|
cup
|
||||||
|
fork
|
||||||
|
knife
|
||||||
|
spoon
|
||||||
|
bowl
|
||||||
|
banana
|
||||||
|
apple
|
||||||
|
sandwich
|
||||||
|
orange
|
||||||
|
broccoli
|
||||||
|
carrot
|
||||||
|
hot dog
|
||||||
|
pizza
|
||||||
|
donut
|
||||||
|
cake
|
||||||
|
chair
|
||||||
|
sofa
|
||||||
|
pottedplant
|
||||||
|
bed
|
||||||
|
diningtable
|
||||||
|
toilet
|
||||||
|
tvmonitor
|
||||||
|
laptop
|
||||||
|
mouse
|
||||||
|
remote
|
||||||
|
keyboard
|
||||||
|
cell phone
|
||||||
|
microwave
|
||||||
|
oven
|
||||||
|
toaster
|
||||||
|
sink
|
||||||
|
refrigerator
|
||||||
|
book
|
||||||
|
clock
|
||||||
|
vase
|
||||||
|
scissors
|
||||||
|
teddy bear
|
||||||
|
hair drier
|
||||||
|
toothbrush
|
257
easymocap/estimator/YOLOv4/config.py
Normal file
257
easymocap/estimator/YOLOv4/config.py
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
import torch
|
||||||
|
from .torch_utils import convert2cpu
|
||||||
|
|
||||||
|
def parse_cfg(cfgfile):
|
||||||
|
blocks = []
|
||||||
|
fp = open(cfgfile, 'r')
|
||||||
|
block = None
|
||||||
|
line = fp.readline()
|
||||||
|
while line != '':
|
||||||
|
line = line.rstrip()
|
||||||
|
if line == '' or line[0] == '#':
|
||||||
|
line = fp.readline()
|
||||||
|
continue
|
||||||
|
elif line[0] == '[':
|
||||||
|
if block:
|
||||||
|
blocks.append(block)
|
||||||
|
block = dict()
|
||||||
|
block['type'] = line.lstrip('[').rstrip(']')
|
||||||
|
# set default value
|
||||||
|
if block['type'] == 'convolutional':
|
||||||
|
block['batch_normalize'] = 0
|
||||||
|
else:
|
||||||
|
key, value = line.split('=')
|
||||||
|
key = key.strip()
|
||||||
|
if key == 'type':
|
||||||
|
key = '_type'
|
||||||
|
value = value.strip()
|
||||||
|
block[key] = value
|
||||||
|
line = fp.readline()
|
||||||
|
|
||||||
|
if block:
|
||||||
|
blocks.append(block)
|
||||||
|
fp.close()
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
def print_cfg(blocks):
|
||||||
|
print('layer filters size input output');
|
||||||
|
prev_width = 416
|
||||||
|
prev_height = 416
|
||||||
|
prev_filters = 3
|
||||||
|
out_filters = []
|
||||||
|
out_widths = []
|
||||||
|
out_heights = []
|
||||||
|
ind = -2
|
||||||
|
for block in blocks:
|
||||||
|
ind = ind + 1
|
||||||
|
if block['type'] == 'net':
|
||||||
|
prev_width = int(block['width'])
|
||||||
|
prev_height = int(block['height'])
|
||||||
|
continue
|
||||||
|
elif block['type'] == 'convolutional':
|
||||||
|
filters = int(block['filters'])
|
||||||
|
kernel_size = int(block['size'])
|
||||||
|
stride = int(block['stride'])
|
||||||
|
is_pad = int(block['pad'])
|
||||||
|
pad = (kernel_size - 1) // 2 if is_pad else 0
|
||||||
|
width = (prev_width + 2 * pad - kernel_size) // stride + 1
|
||||||
|
height = (prev_height + 2 * pad - kernel_size) // stride + 1
|
||||||
|
print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
|
||||||
|
ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
|
||||||
|
height, filters))
|
||||||
|
prev_width = width
|
||||||
|
prev_height = height
|
||||||
|
prev_filters = filters
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'maxpool':
|
||||||
|
pool_size = int(block['size'])
|
||||||
|
stride = int(block['stride'])
|
||||||
|
width = prev_width // stride
|
||||||
|
height = prev_height // stride
|
||||||
|
print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
|
||||||
|
ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height,
|
||||||
|
filters))
|
||||||
|
prev_width = width
|
||||||
|
prev_height = height
|
||||||
|
prev_filters = filters
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'avgpool':
|
||||||
|
width = 1
|
||||||
|
height = 1
|
||||||
|
print('%5d %-6s %3d x %3d x%4d -> %3d' % (
|
||||||
|
ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
|
||||||
|
prev_width = width
|
||||||
|
prev_height = height
|
||||||
|
prev_filters = filters
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'softmax':
|
||||||
|
print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters))
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'cost':
|
||||||
|
print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters))
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'reorg':
|
||||||
|
stride = int(block['stride'])
|
||||||
|
filters = stride * stride * prev_filters
|
||||||
|
width = prev_width // stride
|
||||||
|
height = prev_height // stride
|
||||||
|
print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
|
||||||
|
ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
|
||||||
|
prev_width = width
|
||||||
|
prev_height = height
|
||||||
|
prev_filters = filters
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'upsample':
|
||||||
|
stride = int(block['stride'])
|
||||||
|
filters = prev_filters
|
||||||
|
width = prev_width * stride
|
||||||
|
height = prev_height * stride
|
||||||
|
print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
|
||||||
|
ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
|
||||||
|
prev_width = width
|
||||||
|
prev_height = height
|
||||||
|
prev_filters = filters
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'route':
|
||||||
|
layers = block['layers'].split(',')
|
||||||
|
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
|
||||||
|
if len(layers) == 1:
|
||||||
|
print('%5d %-6s %d' % (ind, 'route', layers[0]))
|
||||||
|
prev_width = out_widths[layers[0]]
|
||||||
|
prev_height = out_heights[layers[0]]
|
||||||
|
prev_filters = out_filters[layers[0]]
|
||||||
|
elif len(layers) == 2:
|
||||||
|
print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
|
||||||
|
prev_width = out_widths[layers[0]]
|
||||||
|
prev_height = out_heights[layers[0]]
|
||||||
|
assert (prev_width == out_widths[layers[1]])
|
||||||
|
assert (prev_height == out_heights[layers[1]])
|
||||||
|
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
|
||||||
|
elif len(layers) == 4:
|
||||||
|
print('%5d %-6s %d %d %d %d' % (ind, 'route', layers[0], layers[1], layers[2], layers[3]))
|
||||||
|
prev_width = out_widths[layers[0]]
|
||||||
|
prev_height = out_heights[layers[0]]
|
||||||
|
assert (prev_width == out_widths[layers[1]] == out_widths[layers[2]] == out_widths[layers[3]])
|
||||||
|
assert (prev_height == out_heights[layers[1]] == out_heights[layers[2]] == out_heights[layers[3]])
|
||||||
|
prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + out_filters[
|
||||||
|
layers[3]]
|
||||||
|
else:
|
||||||
|
print("route error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
|
||||||
|
sys._getframe().f_code.co_name, sys._getframe().f_lineno))
|
||||||
|
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] in ['region', 'yolo']:
|
||||||
|
print('%5d %-6s' % (ind, 'detection'))
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'shortcut':
|
||||||
|
from_id = int(block['from'])
|
||||||
|
from_id = from_id if from_id > 0 else from_id + ind
|
||||||
|
print('%5d %-6s %d' % (ind, 'shortcut', from_id))
|
||||||
|
prev_width = out_widths[from_id]
|
||||||
|
prev_height = out_heights[from_id]
|
||||||
|
prev_filters = out_filters[from_id]
|
||||||
|
out_widths.append(prev_width)
|
||||||
|
out_heights.append(prev_height)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
elif block['type'] == 'connected':
|
||||||
|
filters = int(block['output'])
|
||||||
|
print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters))
|
||||||
|
prev_filters = filters
|
||||||
|
out_widths.append(1)
|
||||||
|
out_heights.append(1)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
else:
|
||||||
|
print('unknown type %s' % (block['type']))
|
||||||
|
|
||||||
|
|
||||||
|
def load_conv(buf, start, conv_model):
|
||||||
|
num_w = conv_model.weight.numel()
|
||||||
|
num_b = conv_model.bias.numel()
|
||||||
|
conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
|
||||||
|
start = start + num_b
|
||||||
|
conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape));
|
||||||
|
start = start + num_w
|
||||||
|
return start
|
||||||
|
|
||||||
|
|
||||||
|
def save_conv(fp, conv_model):
|
||||||
|
if conv_model.bias.is_cuda:
|
||||||
|
convert2cpu(conv_model.bias.data).numpy().tofile(fp)
|
||||||
|
convert2cpu(conv_model.weight.data).numpy().tofile(fp)
|
||||||
|
else:
|
||||||
|
conv_model.bias.data.numpy().tofile(fp)
|
||||||
|
conv_model.weight.data.numpy().tofile(fp)
|
||||||
|
|
||||||
|
|
||||||
|
def load_conv_bn(buf, start, conv_model, bn_model):
|
||||||
|
num_w = conv_model.weight.numel()
|
||||||
|
num_b = bn_model.bias.numel()
|
||||||
|
bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
|
||||||
|
start = start + num_b
|
||||||
|
bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]));
|
||||||
|
start = start + num_b
|
||||||
|
bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]));
|
||||||
|
start = start + num_b
|
||||||
|
bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]));
|
||||||
|
start = start + num_b
|
||||||
|
conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape));
|
||||||
|
start = start + num_w
|
||||||
|
return start
|
||||||
|
|
||||||
|
|
||||||
|
def save_conv_bn(fp, conv_model, bn_model):
|
||||||
|
if bn_model.bias.is_cuda:
|
||||||
|
convert2cpu(bn_model.bias.data).numpy().tofile(fp)
|
||||||
|
convert2cpu(bn_model.weight.data).numpy().tofile(fp)
|
||||||
|
convert2cpu(bn_model.running_mean).numpy().tofile(fp)
|
||||||
|
convert2cpu(bn_model.running_var).numpy().tofile(fp)
|
||||||
|
convert2cpu(conv_model.weight.data).numpy().tofile(fp)
|
||||||
|
else:
|
||||||
|
bn_model.bias.data.numpy().tofile(fp)
|
||||||
|
bn_model.weight.data.numpy().tofile(fp)
|
||||||
|
bn_model.running_mean.numpy().tofile(fp)
|
||||||
|
bn_model.running_var.numpy().tofile(fp)
|
||||||
|
conv_model.weight.data.numpy().tofile(fp)
|
||||||
|
|
||||||
|
|
||||||
|
def load_fc(buf, start, fc_model):
|
||||||
|
num_w = fc_model.weight.numel()
|
||||||
|
num_b = fc_model.bias.numel()
|
||||||
|
fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
|
||||||
|
start = start + num_b
|
||||||
|
fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]));
|
||||||
|
start = start + num_w
|
||||||
|
return start
|
||||||
|
|
||||||
|
|
||||||
|
def save_fc(fp, fc_model):
|
||||||
|
fc_model.bias.data.numpy().tofile(fp)
|
||||||
|
fc_model.weight.data.numpy().tofile(fp)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
|
||||||
|
blocks = parse_cfg('cfg/yolo.cfg')
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
blocks = parse_cfg(sys.argv[1])
|
||||||
|
print_cfg(blocks)
|
515
easymocap/estimator/YOLOv4/darknet2pytorch.py
Normal file
515
easymocap/estimator/YOLOv4/darknet2pytorch.py
Normal file
@ -0,0 +1,515 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import numpy as np
|
||||||
|
from .region_loss import RegionLoss
|
||||||
|
from .yolo_layer import YoloLayer
|
||||||
|
from .config import *
|
||||||
|
from .torch_utils import *
|
||||||
|
|
||||||
|
|
||||||
|
class Mish(torch.nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = x * (torch.tanh(torch.nn.functional.softplus(x)))
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class MaxPoolDark(nn.Module):
|
||||||
|
def __init__(self, size=2, stride=1):
|
||||||
|
super(MaxPoolDark, self).__init__()
|
||||||
|
self.size = size
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
'''
|
||||||
|
darknet output_size = (input_size + p - k) / s +1
|
||||||
|
p : padding = k - 1
|
||||||
|
k : size
|
||||||
|
s : stride
|
||||||
|
torch output_size = (input_size + 2*p -k) / s +1
|
||||||
|
p : padding = k//2
|
||||||
|
'''
|
||||||
|
p = self.size // 2
|
||||||
|
if ((x.shape[2] - 1) // self.stride) != ((x.shape[2] + 2 * p - self.size) // self.stride):
|
||||||
|
padding1 = (self.size - 1) // 2
|
||||||
|
padding2 = padding1 + 1
|
||||||
|
else:
|
||||||
|
padding1 = (self.size - 1) // 2
|
||||||
|
padding2 = padding1
|
||||||
|
if ((x.shape[3] - 1) // self.stride) != ((x.shape[3] + 2 * p - self.size) // self.stride):
|
||||||
|
padding3 = (self.size - 1) // 2
|
||||||
|
padding4 = padding3 + 1
|
||||||
|
else:
|
||||||
|
padding3 = (self.size - 1) // 2
|
||||||
|
padding4 = padding3
|
||||||
|
x = F.max_pool2d(F.pad(x, (padding3, padding4, padding1, padding2), mode='replicate'),
|
||||||
|
self.size, stride=self.stride)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Upsample_expand(nn.Module):
|
||||||
|
def __init__(self, stride=2):
|
||||||
|
super(Upsample_expand, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert (x.data.dim() == 4)
|
||||||
|
|
||||||
|
x = x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\
|
||||||
|
expand(x.size(0), x.size(1), x.size(2), self.stride, x.size(3), self.stride).contiguous().\
|
||||||
|
view(x.size(0), x.size(1), x.size(2) * self.stride, x.size(3) * self.stride)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Upsample_interpolate(nn.Module):
|
||||||
|
def __init__(self, stride):
|
||||||
|
super(Upsample_interpolate, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert (x.data.dim() == 4)
|
||||||
|
|
||||||
|
out = F.interpolate(x, size=(x.size(2) * self.stride, x.size(3) * self.stride), mode='nearest')
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class Reorg(nn.Module):
|
||||||
|
def __init__(self, stride=2):
|
||||||
|
super(Reorg, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
stride = self.stride
|
||||||
|
assert (x.data.dim() == 4)
|
||||||
|
B = x.data.size(0)
|
||||||
|
C = x.data.size(1)
|
||||||
|
H = x.data.size(2)
|
||||||
|
W = x.data.size(3)
|
||||||
|
assert (H % stride == 0)
|
||||||
|
assert (W % stride == 0)
|
||||||
|
ws = stride
|
||||||
|
hs = stride
|
||||||
|
x = x.view(B, C, H / hs, hs, W / ws, ws).transpose(3, 4).contiguous()
|
||||||
|
x = x.view(B, C, H / hs * W / ws, hs * ws).transpose(2, 3).contiguous()
|
||||||
|
x = x.view(B, C, hs * ws, H / hs, W / ws).transpose(1, 2).contiguous()
|
||||||
|
x = x.view(B, hs * ws * C, H / hs, W / ws)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalAvgPool2d(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(GlobalAvgPool2d, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N = x.data.size(0)
|
||||||
|
C = x.data.size(1)
|
||||||
|
H = x.data.size(2)
|
||||||
|
W = x.data.size(3)
|
||||||
|
x = F.avg_pool2d(x, (H, W))
|
||||||
|
x = x.view(N, C)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# for route and shortcut
|
||||||
|
class EmptyModule(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(EmptyModule, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# support route shortcut and reorg
|
||||||
|
class Darknet(nn.Module):
|
||||||
|
def __init__(self, cfgfile, inference=False):
|
||||||
|
super(Darknet, self).__init__()
|
||||||
|
self.inference = inference
|
||||||
|
self.training = not self.inference
|
||||||
|
|
||||||
|
self.blocks = parse_cfg(cfgfile)
|
||||||
|
self.width = int(self.blocks[0]['width'])
|
||||||
|
self.height = int(self.blocks[0]['height'])
|
||||||
|
|
||||||
|
self.models = self.create_network(self.blocks) # merge conv, bn,leaky
|
||||||
|
self.loss = self.models[len(self.models) - 1]
|
||||||
|
|
||||||
|
if self.blocks[(len(self.blocks) - 1)]['type'] == 'region':
|
||||||
|
self.anchors = self.loss.anchors
|
||||||
|
self.num_anchors = self.loss.num_anchors
|
||||||
|
self.anchor_step = self.loss.anchor_step
|
||||||
|
self.num_classes = self.loss.num_classes
|
||||||
|
|
||||||
|
self.header = torch.IntTensor([0, 0, 0, 0])
|
||||||
|
self.seen = 0
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
ind = -2
|
||||||
|
self.loss = None
|
||||||
|
outputs = dict()
|
||||||
|
out_boxes = []
|
||||||
|
for block in self.blocks:
|
||||||
|
ind = ind + 1
|
||||||
|
# if ind > 0:
|
||||||
|
# return x
|
||||||
|
|
||||||
|
if block['type'] == 'net':
|
||||||
|
continue
|
||||||
|
elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']:
|
||||||
|
x = self.models[ind](x)
|
||||||
|
outputs[ind] = x
|
||||||
|
elif block['type'] == 'route':
|
||||||
|
layers = block['layers'].split(',')
|
||||||
|
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
|
||||||
|
if len(layers) == 1:
|
||||||
|
if 'groups' not in block.keys() or int(block['groups']) == 1:
|
||||||
|
x = outputs[layers[0]]
|
||||||
|
outputs[ind] = x
|
||||||
|
else:
|
||||||
|
groups = int(block['groups'])
|
||||||
|
group_id = int(block['group_id'])
|
||||||
|
_, b, _, _ = outputs[layers[0]].shape
|
||||||
|
x = outputs[layers[0]][:, b // groups * group_id:b // groups * (group_id + 1)]
|
||||||
|
outputs[ind] = x
|
||||||
|
elif len(layers) == 2:
|
||||||
|
x1 = outputs[layers[0]]
|
||||||
|
x2 = outputs[layers[1]]
|
||||||
|
x = torch.cat((x1, x2), 1)
|
||||||
|
outputs[ind] = x
|
||||||
|
elif len(layers) == 4:
|
||||||
|
x1 = outputs[layers[0]]
|
||||||
|
x2 = outputs[layers[1]]
|
||||||
|
x3 = outputs[layers[2]]
|
||||||
|
x4 = outputs[layers[3]]
|
||||||
|
x = torch.cat((x1, x2, x3, x4), 1)
|
||||||
|
outputs[ind] = x
|
||||||
|
else:
|
||||||
|
print("rounte number > 2 ,is {}".format(len(layers)))
|
||||||
|
|
||||||
|
elif block['type'] == 'shortcut':
|
||||||
|
from_layer = int(block['from'])
|
||||||
|
activation = block['activation']
|
||||||
|
from_layer = from_layer if from_layer > 0 else from_layer + ind
|
||||||
|
x1 = outputs[from_layer]
|
||||||
|
x2 = outputs[ind - 1]
|
||||||
|
x = x1 + x2
|
||||||
|
if activation == 'leaky':
|
||||||
|
x = F.leaky_relu(x, 0.1, inplace=True)
|
||||||
|
elif activation == 'relu':
|
||||||
|
x = F.relu(x, inplace=True)
|
||||||
|
outputs[ind] = x
|
||||||
|
elif block['type'] == 'region':
|
||||||
|
continue
|
||||||
|
if self.loss:
|
||||||
|
self.loss = self.loss + self.models[ind](x)
|
||||||
|
else:
|
||||||
|
self.loss = self.models[ind](x)
|
||||||
|
outputs[ind] = None
|
||||||
|
elif block['type'] == 'yolo':
|
||||||
|
# if self.training:
|
||||||
|
# pass
|
||||||
|
# else:
|
||||||
|
# boxes = self.models[ind](x)
|
||||||
|
# out_boxes.append(boxes)
|
||||||
|
boxes = self.models[ind](x)
|
||||||
|
out_boxes.append(boxes)
|
||||||
|
elif block['type'] == 'cost':
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print('unknown type %s' % (block['type']))
|
||||||
|
|
||||||
|
if self.training:
|
||||||
|
return out_boxes
|
||||||
|
else:
|
||||||
|
return get_region_boxes(out_boxes)
|
||||||
|
|
||||||
|
def print_network(self):
|
||||||
|
print_cfg(self.blocks)
|
||||||
|
|
||||||
|
def create_network(self, blocks):
|
||||||
|
models = nn.ModuleList()
|
||||||
|
|
||||||
|
prev_filters = 3
|
||||||
|
out_filters = []
|
||||||
|
prev_stride = 1
|
||||||
|
out_strides = []
|
||||||
|
conv_id = 0
|
||||||
|
for block in blocks:
|
||||||
|
if block['type'] == 'net':
|
||||||
|
prev_filters = int(block['channels'])
|
||||||
|
continue
|
||||||
|
elif block['type'] == 'convolutional':
|
||||||
|
conv_id = conv_id + 1
|
||||||
|
batch_normalize = int(block['batch_normalize'])
|
||||||
|
filters = int(block['filters'])
|
||||||
|
kernel_size = int(block['size'])
|
||||||
|
stride = int(block['stride'])
|
||||||
|
is_pad = int(block['pad'])
|
||||||
|
pad = (kernel_size - 1) // 2 if is_pad else 0
|
||||||
|
activation = block['activation']
|
||||||
|
model = nn.Sequential()
|
||||||
|
if batch_normalize:
|
||||||
|
model.add_module('conv{0}'.format(conv_id),
|
||||||
|
nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
|
||||||
|
model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
|
||||||
|
# model.add_module('bn{0}'.format(conv_id), BN2d(filters))
|
||||||
|
else:
|
||||||
|
model.add_module('conv{0}'.format(conv_id),
|
||||||
|
nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
|
||||||
|
if activation == 'leaky':
|
||||||
|
model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
|
||||||
|
elif activation == 'relu':
|
||||||
|
model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
|
||||||
|
elif activation == 'mish':
|
||||||
|
model.add_module('mish{0}'.format(conv_id), Mish())
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
# print("convalution havn't activate {}".format(activation))
|
||||||
|
|
||||||
|
prev_filters = filters
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
prev_stride = stride * prev_stride
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(model)
|
||||||
|
elif block['type'] == 'maxpool':
|
||||||
|
pool_size = int(block['size'])
|
||||||
|
stride = int(block['stride'])
|
||||||
|
if stride == 1 and pool_size % 2:
|
||||||
|
# You can use Maxpooldark instead, here is convenient to convert onnx.
|
||||||
|
# Example: [maxpool] size=3 stride=1
|
||||||
|
model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=pool_size // 2)
|
||||||
|
elif stride == pool_size:
|
||||||
|
# You can use Maxpooldark instead, here is convenient to convert onnx.
|
||||||
|
# Example: [maxpool] size=2 stride=2
|
||||||
|
model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=0)
|
||||||
|
else:
|
||||||
|
model = MaxPoolDark(pool_size, stride)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
prev_stride = stride * prev_stride
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(model)
|
||||||
|
elif block['type'] == 'avgpool':
|
||||||
|
model = GlobalAvgPool2d()
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
models.append(model)
|
||||||
|
elif block['type'] == 'softmax':
|
||||||
|
model = nn.Softmax()
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
models.append(model)
|
||||||
|
elif block['type'] == 'cost':
|
||||||
|
if block['_type'] == 'sse':
|
||||||
|
model = nn.MSELoss(reduction='mean')
|
||||||
|
elif block['_type'] == 'L1':
|
||||||
|
model = nn.L1Loss(reduction='mean')
|
||||||
|
elif block['_type'] == 'smooth':
|
||||||
|
model = nn.SmoothL1Loss(reduction='mean')
|
||||||
|
out_filters.append(1)
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(model)
|
||||||
|
elif block['type'] == 'reorg':
|
||||||
|
stride = int(block['stride'])
|
||||||
|
prev_filters = stride * stride * prev_filters
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
prev_stride = prev_stride * stride
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(Reorg(stride))
|
||||||
|
elif block['type'] == 'upsample':
|
||||||
|
stride = int(block['stride'])
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
prev_stride = prev_stride // stride
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
|
||||||
|
models.append(Upsample_expand(stride))
|
||||||
|
# models.append(Upsample_interpolate(stride))
|
||||||
|
|
||||||
|
elif block['type'] == 'route':
|
||||||
|
layers = block['layers'].split(',')
|
||||||
|
ind = len(models)
|
||||||
|
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
|
||||||
|
if len(layers) == 1:
|
||||||
|
if 'groups' not in block.keys() or int(block['groups']) == 1:
|
||||||
|
prev_filters = out_filters[layers[0]]
|
||||||
|
prev_stride = out_strides[layers[0]]
|
||||||
|
else:
|
||||||
|
prev_filters = out_filters[layers[0]] // int(block['groups'])
|
||||||
|
prev_stride = out_strides[layers[0]] // int(block['groups'])
|
||||||
|
elif len(layers) == 2:
|
||||||
|
assert (layers[0] == ind - 1 or layers[1] == ind - 1)
|
||||||
|
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
|
||||||
|
prev_stride = out_strides[layers[0]]
|
||||||
|
elif len(layers) == 4:
|
||||||
|
assert (layers[0] == ind - 1)
|
||||||
|
prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + \
|
||||||
|
out_filters[layers[3]]
|
||||||
|
prev_stride = out_strides[layers[0]]
|
||||||
|
else:
|
||||||
|
print("route error!!!")
|
||||||
|
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(EmptyModule())
|
||||||
|
elif block['type'] == 'shortcut':
|
||||||
|
ind = len(models)
|
||||||
|
prev_filters = out_filters[ind - 1]
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
prev_stride = out_strides[ind - 1]
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(EmptyModule())
|
||||||
|
elif block['type'] == 'connected':
|
||||||
|
filters = int(block['output'])
|
||||||
|
if block['activation'] == 'linear':
|
||||||
|
model = nn.Linear(prev_filters, filters)
|
||||||
|
elif block['activation'] == 'leaky':
|
||||||
|
model = nn.Sequential(
|
||||||
|
nn.Linear(prev_filters, filters),
|
||||||
|
nn.LeakyReLU(0.1, inplace=True))
|
||||||
|
elif block['activation'] == 'relu':
|
||||||
|
model = nn.Sequential(
|
||||||
|
nn.Linear(prev_filters, filters),
|
||||||
|
nn.ReLU(inplace=True))
|
||||||
|
prev_filters = filters
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(model)
|
||||||
|
elif block['type'] == 'region':
|
||||||
|
loss = RegionLoss()
|
||||||
|
anchors = block['anchors'].split(',')
|
||||||
|
loss.anchors = [float(i) for i in anchors]
|
||||||
|
loss.num_classes = int(block['classes'])
|
||||||
|
loss.num_anchors = int(block['num'])
|
||||||
|
loss.anchor_step = len(loss.anchors) // loss.num_anchors
|
||||||
|
loss.object_scale = float(block['object_scale'])
|
||||||
|
loss.noobject_scale = float(block['noobject_scale'])
|
||||||
|
loss.class_scale = float(block['class_scale'])
|
||||||
|
loss.coord_scale = float(block['coord_scale'])
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(loss)
|
||||||
|
elif block['type'] == 'yolo':
|
||||||
|
yolo_layer = YoloLayer()
|
||||||
|
anchors = block['anchors'].split(',')
|
||||||
|
anchor_mask = block['mask'].split(',')
|
||||||
|
yolo_layer.anchor_mask = [int(i) for i in anchor_mask]
|
||||||
|
yolo_layer.anchors = [float(i) for i in anchors]
|
||||||
|
yolo_layer.num_classes = int(block['classes'])
|
||||||
|
self.num_classes = yolo_layer.num_classes
|
||||||
|
yolo_layer.num_anchors = int(block['num'])
|
||||||
|
yolo_layer.anchor_step = len(yolo_layer.anchors) // yolo_layer.num_anchors
|
||||||
|
yolo_layer.stride = prev_stride
|
||||||
|
yolo_layer.scale_x_y = float(block['scale_x_y'])
|
||||||
|
# yolo_layer.object_scale = float(block['object_scale'])
|
||||||
|
# yolo_layer.noobject_scale = float(block['noobject_scale'])
|
||||||
|
# yolo_layer.class_scale = float(block['class_scale'])
|
||||||
|
# yolo_layer.coord_scale = float(block['coord_scale'])
|
||||||
|
out_filters.append(prev_filters)
|
||||||
|
out_strides.append(prev_stride)
|
||||||
|
models.append(yolo_layer)
|
||||||
|
else:
|
||||||
|
print('unknown type %s' % (block['type']))
|
||||||
|
|
||||||
|
return models
|
||||||
|
|
||||||
|
def load_weights(self, weightfile):
|
||||||
|
fp = open(weightfile, 'rb')
|
||||||
|
header = np.fromfile(fp, count=5, dtype=np.int32)
|
||||||
|
self.header = torch.from_numpy(header)
|
||||||
|
self.seen = self.header[3]
|
||||||
|
buf = np.fromfile(fp, dtype=np.float32)
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
start = 0
|
||||||
|
ind = -2
|
||||||
|
for block in self.blocks:
|
||||||
|
if start >= buf.size:
|
||||||
|
break
|
||||||
|
ind = ind + 1
|
||||||
|
if block['type'] == 'net':
|
||||||
|
continue
|
||||||
|
elif block['type'] == 'convolutional':
|
||||||
|
model = self.models[ind]
|
||||||
|
batch_normalize = int(block['batch_normalize'])
|
||||||
|
if batch_normalize:
|
||||||
|
start = load_conv_bn(buf, start, model[0], model[1])
|
||||||
|
else:
|
||||||
|
start = load_conv(buf, start, model[0])
|
||||||
|
elif block['type'] == 'connected':
|
||||||
|
model = self.models[ind]
|
||||||
|
if block['activation'] != 'linear':
|
||||||
|
start = load_fc(buf, start, model[0])
|
||||||
|
else:
|
||||||
|
start = load_fc(buf, start, model)
|
||||||
|
elif block['type'] == 'maxpool':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'reorg':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'upsample':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'route':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'shortcut':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'region':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'yolo':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'avgpool':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'softmax':
|
||||||
|
pass
|
||||||
|
elif block['type'] == 'cost':
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
print('unknown type %s' % (block['type']))
|
||||||
|
|
||||||
|
# def save_weights(self, outfile, cutoff=0):
|
||||||
|
# if cutoff <= 0:
|
||||||
|
# cutoff = len(self.blocks) - 1
|
||||||
|
#
|
||||||
|
# fp = open(outfile, 'wb')
|
||||||
|
# self.header[3] = self.seen
|
||||||
|
# header = self.header
|
||||||
|
# header.numpy().tofile(fp)
|
||||||
|
#
|
||||||
|
# ind = -1
|
||||||
|
# for blockId in range(1, cutoff + 1):
|
||||||
|
# ind = ind + 1
|
||||||
|
# block = self.blocks[blockId]
|
||||||
|
# if block['type'] == 'convolutional':
|
||||||
|
# model = self.models[ind]
|
||||||
|
# batch_normalize = int(block['batch_normalize'])
|
||||||
|
# if batch_normalize:
|
||||||
|
# save_conv_bn(fp, model[0], model[1])
|
||||||
|
# else:
|
||||||
|
# save_conv(fp, model[0])
|
||||||
|
# elif block['type'] == 'connected':
|
||||||
|
# model = self.models[ind]
|
||||||
|
# if block['activation'] != 'linear':
|
||||||
|
# save_fc(fc, model)
|
||||||
|
# else:
|
||||||
|
# save_fc(fc, model[0])
|
||||||
|
# elif block['type'] == 'maxpool':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'reorg':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'upsample':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'route':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'shortcut':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'region':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'yolo':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'avgpool':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'softmax':
|
||||||
|
# pass
|
||||||
|
# elif block['type'] == 'cost':
|
||||||
|
# pass
|
||||||
|
# else:
|
||||||
|
# print('unknown type %s' % (block['type']))
|
||||||
|
# fp.close()
|
195
easymocap/estimator/YOLOv4/region_loss.py
Normal file
195
easymocap/estimator/YOLOv4/region_loss.py
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from .torch_utils import *
|
||||||
|
|
||||||
|
|
||||||
|
def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
|
||||||
|
sil_thresh, seen):
|
||||||
|
nB = target.size(0)
|
||||||
|
nA = num_anchors
|
||||||
|
nC = num_classes
|
||||||
|
anchor_step = len(anchors) / num_anchors
|
||||||
|
conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
|
||||||
|
coord_mask = torch.zeros(nB, nA, nH, nW)
|
||||||
|
cls_mask = torch.zeros(nB, nA, nH, nW)
|
||||||
|
tx = torch.zeros(nB, nA, nH, nW)
|
||||||
|
ty = torch.zeros(nB, nA, nH, nW)
|
||||||
|
tw = torch.zeros(nB, nA, nH, nW)
|
||||||
|
th = torch.zeros(nB, nA, nH, nW)
|
||||||
|
tconf = torch.zeros(nB, nA, nH, nW)
|
||||||
|
tcls = torch.zeros(nB, nA, nH, nW)
|
||||||
|
|
||||||
|
nAnchors = nA * nH * nW
|
||||||
|
nPixels = nH * nW
|
||||||
|
for b in range(nB):
|
||||||
|
cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
|
||||||
|
cur_ious = torch.zeros(nAnchors)
|
||||||
|
for t in range(50):
|
||||||
|
if target[b][t * 5 + 1] == 0:
|
||||||
|
break
|
||||||
|
gx = target[b][t * 5 + 1] * nW
|
||||||
|
gy = target[b][t * 5 + 2] * nH
|
||||||
|
gw = target[b][t * 5 + 3] * nW
|
||||||
|
gh = target[b][t * 5 + 4] * nH
|
||||||
|
cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
|
||||||
|
cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
|
||||||
|
conf_mask[b][cur_ious > sil_thresh] = 0
|
||||||
|
if seen < 12800:
|
||||||
|
if anchor_step == 4:
|
||||||
|
tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
|
||||||
|
1).repeat(
|
||||||
|
nB, 1, nH, nW)
|
||||||
|
ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
|
||||||
|
1, nA, 1, 1).repeat(nB, 1, nH, nW)
|
||||||
|
else:
|
||||||
|
tx.fill_(0.5)
|
||||||
|
ty.fill_(0.5)
|
||||||
|
tw.zero_()
|
||||||
|
th.zero_()
|
||||||
|
coord_mask.fill_(1)
|
||||||
|
|
||||||
|
nGT = 0
|
||||||
|
nCorrect = 0
|
||||||
|
for b in range(nB):
|
||||||
|
for t in range(50):
|
||||||
|
if target[b][t * 5 + 1] == 0:
|
||||||
|
break
|
||||||
|
nGT = nGT + 1
|
||||||
|
best_iou = 0.0
|
||||||
|
best_n = -1
|
||||||
|
min_dist = 10000
|
||||||
|
gx = target[b][t * 5 + 1] * nW
|
||||||
|
gy = target[b][t * 5 + 2] * nH
|
||||||
|
gi = int(gx)
|
||||||
|
gj = int(gy)
|
||||||
|
gw = target[b][t * 5 + 3] * nW
|
||||||
|
gh = target[b][t * 5 + 4] * nH
|
||||||
|
gt_box = [0, 0, gw, gh]
|
||||||
|
for n in range(nA):
|
||||||
|
aw = anchors[anchor_step * n]
|
||||||
|
ah = anchors[anchor_step * n + 1]
|
||||||
|
anchor_box = [0, 0, aw, ah]
|
||||||
|
iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
|
||||||
|
if anchor_step == 4:
|
||||||
|
ax = anchors[anchor_step * n + 2]
|
||||||
|
ay = anchors[anchor_step * n + 3]
|
||||||
|
dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
|
||||||
|
if iou > best_iou:
|
||||||
|
best_iou = iou
|
||||||
|
best_n = n
|
||||||
|
elif anchor_step == 4 and iou == best_iou and dist < min_dist:
|
||||||
|
best_iou = iou
|
||||||
|
best_n = n
|
||||||
|
min_dist = dist
|
||||||
|
|
||||||
|
gt_box = [gx, gy, gw, gh]
|
||||||
|
pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
|
||||||
|
|
||||||
|
coord_mask[b][best_n][gj][gi] = 1
|
||||||
|
cls_mask[b][best_n][gj][gi] = 1
|
||||||
|
conf_mask[b][best_n][gj][gi] = object_scale
|
||||||
|
tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
|
||||||
|
ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
|
||||||
|
tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
|
||||||
|
th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
|
||||||
|
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
|
||||||
|
tconf[b][best_n][gj][gi] = iou
|
||||||
|
tcls[b][best_n][gj][gi] = target[b][t * 5]
|
||||||
|
if iou > 0.5:
|
||||||
|
nCorrect = nCorrect + 1
|
||||||
|
|
||||||
|
return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
|
||||||
|
|
||||||
|
|
||||||
|
class RegionLoss(nn.Module):
|
||||||
|
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
|
||||||
|
super(RegionLoss, self).__init__()
|
||||||
|
self.num_classes = num_classes
|
||||||
|
self.anchors = anchors
|
||||||
|
self.num_anchors = num_anchors
|
||||||
|
self.anchor_step = len(anchors) / num_anchors
|
||||||
|
self.coord_scale = 1
|
||||||
|
self.noobject_scale = 1
|
||||||
|
self.object_scale = 5
|
||||||
|
self.class_scale = 1
|
||||||
|
self.thresh = 0.6
|
||||||
|
self.seen = 0
|
||||||
|
|
||||||
|
def forward(self, output, target):
|
||||||
|
# output : BxAs*(4+1+num_classes)*H*W
|
||||||
|
t0 = time.time()
|
||||||
|
nB = output.data.size(0)
|
||||||
|
nA = self.num_anchors
|
||||||
|
nC = self.num_classes
|
||||||
|
nH = output.data.size(2)
|
||||||
|
nW = output.data.size(3)
|
||||||
|
|
||||||
|
output = output.view(nB, nA, (5 + nC), nH, nW)
|
||||||
|
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
|
||||||
|
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
|
||||||
|
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
|
||||||
|
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
|
||||||
|
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
|
||||||
|
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
|
||||||
|
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
|
||||||
|
t1 = time.time()
|
||||||
|
|
||||||
|
pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
|
||||||
|
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
|
||||||
|
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
|
||||||
|
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
|
||||||
|
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
|
||||||
|
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
|
||||||
|
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
|
||||||
|
pred_boxes[0] = x.data + grid_x
|
||||||
|
pred_boxes[1] = y.data + grid_y
|
||||||
|
pred_boxes[2] = torch.exp(w.data) * anchor_w
|
||||||
|
pred_boxes[3] = torch.exp(h.data) * anchor_h
|
||||||
|
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
|
||||||
|
t2 = time.time()
|
||||||
|
|
||||||
|
nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
|
||||||
|
target.data,
|
||||||
|
self.anchors, nA,
|
||||||
|
nC, \
|
||||||
|
nH, nW,
|
||||||
|
self.noobject_scale,
|
||||||
|
self.object_scale,
|
||||||
|
self.thresh,
|
||||||
|
self.seen)
|
||||||
|
cls_mask = (cls_mask == 1)
|
||||||
|
nProposals = int((conf > 0.25).sum().data[0])
|
||||||
|
|
||||||
|
tx = Variable(tx.cuda())
|
||||||
|
ty = Variable(ty.cuda())
|
||||||
|
tw = Variable(tw.cuda())
|
||||||
|
th = Variable(th.cuda())
|
||||||
|
tconf = Variable(tconf.cuda())
|
||||||
|
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
|
||||||
|
|
||||||
|
coord_mask = Variable(coord_mask.cuda())
|
||||||
|
conf_mask = Variable(conf_mask.cuda().sqrt())
|
||||||
|
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
|
||||||
|
cls = cls[cls_mask].view(-1, nC)
|
||||||
|
|
||||||
|
t3 = time.time()
|
||||||
|
|
||||||
|
loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
|
||||||
|
loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
|
||||||
|
loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
|
||||||
|
loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
|
||||||
|
loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
|
||||||
|
loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
|
||||||
|
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
|
||||||
|
t4 = time.time()
|
||||||
|
if False:
|
||||||
|
print('-----------------------------------')
|
||||||
|
print(' activation : %f' % (t1 - t0))
|
||||||
|
print(' create pred_boxes : %f' % (t2 - t1))
|
||||||
|
print(' build targets : %f' % (t3 - t2))
|
||||||
|
print(' create loss : %f' % (t4 - t3))
|
||||||
|
print(' total : %f' % (t4 - t0))
|
||||||
|
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
|
||||||
|
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
|
||||||
|
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
|
||||||
|
return loss
|
98
easymocap/estimator/YOLOv4/torch_utils.py
Normal file
98
easymocap/estimator/YOLOv4/torch_utils.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from torch.autograd import Variable
|
||||||
|
|
||||||
|
|
||||||
|
def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
|
||||||
|
if x1y1x2y2:
|
||||||
|
mx = torch.min(boxes1[0], boxes2[0])
|
||||||
|
Mx = torch.max(boxes1[2], boxes2[2])
|
||||||
|
my = torch.min(boxes1[1], boxes2[1])
|
||||||
|
My = torch.max(boxes1[3], boxes2[3])
|
||||||
|
w1 = boxes1[2] - boxes1[0]
|
||||||
|
h1 = boxes1[3] - boxes1[1]
|
||||||
|
w2 = boxes2[2] - boxes2[0]
|
||||||
|
h2 = boxes2[3] - boxes2[1]
|
||||||
|
else:
|
||||||
|
mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
|
||||||
|
Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
|
||||||
|
my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
|
||||||
|
My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
|
||||||
|
w1 = boxes1[2]
|
||||||
|
h1 = boxes1[3]
|
||||||
|
w2 = boxes2[2]
|
||||||
|
h2 = boxes2[3]
|
||||||
|
uw = Mx - mx
|
||||||
|
uh = My - my
|
||||||
|
cw = w1 + w2 - uw
|
||||||
|
ch = h1 + h2 - uh
|
||||||
|
mask = ((cw <= 0) + (ch <= 0) > 0)
|
||||||
|
area1 = w1 * h1
|
||||||
|
area2 = w2 * h2
|
||||||
|
carea = cw * ch
|
||||||
|
carea[mask] = 0
|
||||||
|
uarea = area1 + area2 - carea
|
||||||
|
return carea / uarea
|
||||||
|
|
||||||
|
|
||||||
|
def get_region_boxes(boxes_and_confs):
|
||||||
|
|
||||||
|
# print('Getting boxes from boxes and confs ...')
|
||||||
|
|
||||||
|
boxes_list = []
|
||||||
|
confs_list = []
|
||||||
|
|
||||||
|
for item in boxes_and_confs:
|
||||||
|
boxes_list.append(item[0])
|
||||||
|
confs_list.append(item[1])
|
||||||
|
|
||||||
|
# boxes: [batch, num1 + num2 + num3, 1, 4]
|
||||||
|
# confs: [batch, num1 + num2 + num3, num_classes]
|
||||||
|
boxes = torch.cat(boxes_list, dim=1)
|
||||||
|
confs = torch.cat(confs_list, dim=1)
|
||||||
|
|
||||||
|
return [boxes, confs]
|
||||||
|
|
||||||
|
|
||||||
|
def convert2cpu(gpu_matrix):
|
||||||
|
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
|
||||||
|
|
||||||
|
|
||||||
|
def convert2cpu_long(gpu_matrix):
|
||||||
|
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
|
||||||
|
model.eval()
|
||||||
|
t0 = time.time()
|
||||||
|
|
||||||
|
if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image
|
||||||
|
img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
|
||||||
|
elif type(img) == np.ndarray and len(img.shape) == 4:
|
||||||
|
img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
|
||||||
|
else:
|
||||||
|
print("unknow image type")
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
if use_cuda:
|
||||||
|
img = img.cuda()
|
||||||
|
img = torch.autograd.Variable(img)
|
||||||
|
|
||||||
|
t1 = time.time()
|
||||||
|
|
||||||
|
output = model(img)
|
||||||
|
|
||||||
|
t2 = time.time()
|
||||||
|
|
||||||
|
print('-----------------------------------')
|
||||||
|
print(' Preprocess : %f' % (t1 - t0))
|
||||||
|
print(' Model Inference : %f' % (t2 - t1))
|
||||||
|
print('-----------------------------------')
|
||||||
|
|
||||||
|
return utils.post_processing(img, conf_thresh, nms_thresh, output)
|
||||||
|
|
221
easymocap/estimator/YOLOv4/utils.py
Normal file
221
easymocap/estimator/YOLOv4/utils.py
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import struct # get_image_size
|
||||||
|
import imghdr # get_image_size
|
||||||
|
|
||||||
|
|
||||||
|
def sigmoid(x):
|
||||||
|
return 1.0 / (np.exp(-x) + 1.)
|
||||||
|
|
||||||
|
|
||||||
|
def softmax(x):
|
||||||
|
x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
|
||||||
|
x = x / np.expand_dims(x.sum(axis=1), axis=1)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def bbox_iou(box1, box2, x1y1x2y2=True):
|
||||||
|
|
||||||
|
# print('iou box1:', box1)
|
||||||
|
# print('iou box2:', box2)
|
||||||
|
|
||||||
|
if x1y1x2y2:
|
||||||
|
mx = min(box1[0], box2[0])
|
||||||
|
Mx = max(box1[2], box2[2])
|
||||||
|
my = min(box1[1], box2[1])
|
||||||
|
My = max(box1[3], box2[3])
|
||||||
|
w1 = box1[2] - box1[0]
|
||||||
|
h1 = box1[3] - box1[1]
|
||||||
|
w2 = box2[2] - box2[0]
|
||||||
|
h2 = box2[3] - box2[1]
|
||||||
|
else:
|
||||||
|
w1 = box1[2]
|
||||||
|
h1 = box1[3]
|
||||||
|
w2 = box2[2]
|
||||||
|
h2 = box2[3]
|
||||||
|
|
||||||
|
mx = min(box1[0], box2[0])
|
||||||
|
Mx = max(box1[0] + w1, box2[0] + w2)
|
||||||
|
my = min(box1[1], box2[1])
|
||||||
|
My = max(box1[1] + h1, box2[1] + h2)
|
||||||
|
uw = Mx - mx
|
||||||
|
uh = My - my
|
||||||
|
cw = w1 + w2 - uw
|
||||||
|
ch = h1 + h2 - uh
|
||||||
|
carea = 0
|
||||||
|
if cw <= 0 or ch <= 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
area1 = w1 * h1
|
||||||
|
area2 = w2 * h2
|
||||||
|
carea = cw * ch
|
||||||
|
uarea = area1 + area2 - carea
|
||||||
|
return carea / uarea
|
||||||
|
|
||||||
|
|
||||||
|
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
|
||||||
|
# print(boxes.shape)
|
||||||
|
x1 = boxes[:, 0]
|
||||||
|
y1 = boxes[:, 1]
|
||||||
|
x2 = boxes[:, 2]
|
||||||
|
y2 = boxes[:, 3]
|
||||||
|
|
||||||
|
areas = (x2 - x1) * (y2 - y1)
|
||||||
|
order = confs.argsort()[::-1]
|
||||||
|
|
||||||
|
keep = []
|
||||||
|
while order.size > 0:
|
||||||
|
idx_self = order[0]
|
||||||
|
idx_other = order[1:]
|
||||||
|
|
||||||
|
keep.append(idx_self)
|
||||||
|
|
||||||
|
xx1 = np.maximum(x1[idx_self], x1[idx_other])
|
||||||
|
yy1 = np.maximum(y1[idx_self], y1[idx_other])
|
||||||
|
xx2 = np.minimum(x2[idx_self], x2[idx_other])
|
||||||
|
yy2 = np.minimum(y2[idx_self], y2[idx_other])
|
||||||
|
|
||||||
|
w = np.maximum(0.0, xx2 - xx1)
|
||||||
|
h = np.maximum(0.0, yy2 - yy1)
|
||||||
|
inter = w * h
|
||||||
|
|
||||||
|
if min_mode:
|
||||||
|
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
|
||||||
|
else:
|
||||||
|
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
|
||||||
|
|
||||||
|
inds = np.where(over <= nms_thresh)[0]
|
||||||
|
order = order[inds + 1]
|
||||||
|
|
||||||
|
return np.array(keep)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
|
||||||
|
import cv2
|
||||||
|
img = np.copy(img)
|
||||||
|
colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
|
||||||
|
|
||||||
|
def get_color(c, x, max_val):
|
||||||
|
ratio = float(x) / max_val * 5
|
||||||
|
i = int(math.floor(ratio))
|
||||||
|
j = int(math.ceil(ratio))
|
||||||
|
ratio = ratio - i
|
||||||
|
r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
|
||||||
|
return int(r * 255)
|
||||||
|
|
||||||
|
width = img.shape[1]
|
||||||
|
height = img.shape[0]
|
||||||
|
for i in range(len(boxes)):
|
||||||
|
box = boxes[i]
|
||||||
|
x1 = int(box[0] * width)
|
||||||
|
y1 = int(box[1] * height)
|
||||||
|
x2 = int(box[2] * width)
|
||||||
|
y2 = int(box[3] * height)
|
||||||
|
|
||||||
|
if color:
|
||||||
|
rgb = color
|
||||||
|
else:
|
||||||
|
rgb = (255, 0, 0)
|
||||||
|
if len(box) >= 7 and class_names:
|
||||||
|
cls_conf = box[5]
|
||||||
|
cls_id = box[6]
|
||||||
|
print('%s: %f' % (class_names[cls_id], cls_conf))
|
||||||
|
classes = len(class_names)
|
||||||
|
offset = cls_id * 123457 % classes
|
||||||
|
red = get_color(2, offset, classes)
|
||||||
|
green = get_color(1, offset, classes)
|
||||||
|
blue = get_color(0, offset, classes)
|
||||||
|
if color is None:
|
||||||
|
rgb = (red, green, blue)
|
||||||
|
img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
|
||||||
|
img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
|
||||||
|
if savename:
|
||||||
|
print("save plot results to %s" % savename)
|
||||||
|
cv2.imwrite(savename, img)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def read_truths(lab_path):
|
||||||
|
if not os.path.exists(lab_path):
|
||||||
|
return np.array([])
|
||||||
|
if os.path.getsize(lab_path):
|
||||||
|
truths = np.loadtxt(lab_path)
|
||||||
|
truths = truths.reshape(truths.size / 5, 5) # to avoid single truth problem
|
||||||
|
return truths
|
||||||
|
else:
|
||||||
|
return np.array([])
|
||||||
|
|
||||||
|
def post_processing(img, conf_thresh, nms_thresh, output):
|
||||||
|
|
||||||
|
# anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
|
||||||
|
# num_anchors = 9
|
||||||
|
# anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
|
||||||
|
# strides = [8, 16, 32]
|
||||||
|
# anchor_step = len(anchors) // num_anchors
|
||||||
|
|
||||||
|
# [batch, num, 1, 4]
|
||||||
|
box_array = output[0]
|
||||||
|
# [batch, num, num_classes]
|
||||||
|
confs = output[1]
|
||||||
|
|
||||||
|
t1 = time.time()
|
||||||
|
|
||||||
|
if type(box_array).__name__ != 'ndarray':
|
||||||
|
box_array = box_array.cpu().detach().numpy()
|
||||||
|
confs = confs.cpu().detach().numpy()
|
||||||
|
|
||||||
|
num_classes = confs.shape[2]
|
||||||
|
|
||||||
|
# [batch, num, 4]
|
||||||
|
box_array = box_array[:, :, 0]
|
||||||
|
|
||||||
|
# [batch, num, num_classes] --> [batch, num]
|
||||||
|
max_conf = np.max(confs, axis=2)
|
||||||
|
max_id = np.argmax(confs, axis=2)
|
||||||
|
|
||||||
|
t2 = time.time()
|
||||||
|
|
||||||
|
bboxes_batch = []
|
||||||
|
for i in range(box_array.shape[0]):
|
||||||
|
|
||||||
|
argwhere = max_conf[i] > conf_thresh
|
||||||
|
l_box_array = box_array[i, argwhere, :]
|
||||||
|
l_max_conf = max_conf[i, argwhere]
|
||||||
|
l_max_id = max_id[i, argwhere]
|
||||||
|
|
||||||
|
bboxes = []
|
||||||
|
# nms for each class
|
||||||
|
for j in range(num_classes):
|
||||||
|
|
||||||
|
cls_argwhere = l_max_id == j
|
||||||
|
ll_box_array = l_box_array[cls_argwhere, :]
|
||||||
|
ll_max_conf = l_max_conf[cls_argwhere]
|
||||||
|
ll_max_id = l_max_id[cls_argwhere]
|
||||||
|
|
||||||
|
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
|
||||||
|
|
||||||
|
if (keep.size > 0):
|
||||||
|
ll_box_array = ll_box_array[keep, :]
|
||||||
|
ll_max_conf = ll_max_conf[keep]
|
||||||
|
ll_max_id = ll_max_id[keep]
|
||||||
|
|
||||||
|
for k in range(ll_box_array.shape[0]):
|
||||||
|
bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
|
||||||
|
|
||||||
|
bboxes_batch.append(bboxes)
|
||||||
|
|
||||||
|
t3 = time.time()
|
||||||
|
|
||||||
|
print('-----------------------------------')
|
||||||
|
print(' max and argmax : %f' % (t2 - t1))
|
||||||
|
print(' nms : %f' % (t3 - t2))
|
||||||
|
print('Post processing total : %f' % (t3 - t1))
|
||||||
|
print('-----------------------------------')
|
||||||
|
|
||||||
|
return bboxes_batch
|
161
easymocap/estimator/YOLOv4/yolo.py
Normal file
161
easymocap/estimator/YOLOv4/yolo.py
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
'''
|
||||||
|
@ Date: 2020-12-10 16:39:51
|
||||||
|
@ Author: Qing Shuai
|
||||||
|
@ LastEditors: Qing Shuai
|
||||||
|
@ LastEditTime: 2022-04-21 23:53:40
|
||||||
|
@ FilePath: /EasyMocapPublic/easymocap/estimator/YOLOv4/yolo.py
|
||||||
|
'''
|
||||||
|
from .darknet2pytorch import Darknet
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
from os.path import join
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def load_class_names(namesfile):
|
||||||
|
class_names = []
|
||||||
|
with open(namesfile, 'r') as fp:
|
||||||
|
lines = fp.readlines()
|
||||||
|
for line in lines:
|
||||||
|
line = line.rstrip()
|
||||||
|
class_names.append(line)
|
||||||
|
return class_names
|
||||||
|
|
||||||
|
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
|
||||||
|
# print(boxes.shape)
|
||||||
|
x1 = boxes[:, 0]
|
||||||
|
y1 = boxes[:, 1]
|
||||||
|
x2 = boxes[:, 2]
|
||||||
|
y2 = boxes[:, 3]
|
||||||
|
|
||||||
|
areas = (x2 - x1) * (y2 - y1)
|
||||||
|
order = confs.argsort()[::-1]
|
||||||
|
|
||||||
|
keep = []
|
||||||
|
while order.size > 0:
|
||||||
|
idx_self = order[0]
|
||||||
|
idx_other = order[1:]
|
||||||
|
|
||||||
|
keep.append(idx_self)
|
||||||
|
|
||||||
|
xx1 = np.maximum(x1[idx_self], x1[idx_other])
|
||||||
|
yy1 = np.maximum(y1[idx_self], y1[idx_other])
|
||||||
|
xx2 = np.minimum(x2[idx_self], x2[idx_other])
|
||||||
|
yy2 = np.minimum(y2[idx_self], y2[idx_other])
|
||||||
|
|
||||||
|
w = np.maximum(0.0, xx2 - xx1)
|
||||||
|
h = np.maximum(0.0, yy2 - yy1)
|
||||||
|
inter = w * h
|
||||||
|
|
||||||
|
if min_mode:
|
||||||
|
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
|
||||||
|
else:
|
||||||
|
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
|
||||||
|
|
||||||
|
inds = np.where(over <= nms_thresh)[0]
|
||||||
|
order = order[inds + 1]
|
||||||
|
return np.array(keep)
|
||||||
|
|
||||||
|
def post_processing(conf_thresh, nms_thresh, output):
|
||||||
|
# [batch, num, 1, 4]
|
||||||
|
box_array = output[0]
|
||||||
|
# [batch, num, num_classes]
|
||||||
|
confs = output[1]
|
||||||
|
|
||||||
|
if type(box_array).__name__ != 'ndarray':
|
||||||
|
box_array = box_array.cpu().detach().numpy()
|
||||||
|
confs = confs.cpu().detach().numpy()
|
||||||
|
|
||||||
|
num_classes = confs.shape[2]
|
||||||
|
|
||||||
|
# [batch, num, 4]
|
||||||
|
box_array = box_array[:, :, 0]
|
||||||
|
|
||||||
|
# [batch, num, num_classes] --> [batch, num]
|
||||||
|
max_conf = np.max(confs, axis=2)
|
||||||
|
max_id = np.argmax(confs, axis=2)
|
||||||
|
|
||||||
|
bboxes_batch = []
|
||||||
|
for i in range(box_array.shape[0]):
|
||||||
|
argwhere = max_conf[i] > conf_thresh
|
||||||
|
l_box_array = box_array[i, argwhere, :]
|
||||||
|
l_max_conf = max_conf[i, argwhere]
|
||||||
|
l_max_id = max_id[i, argwhere]
|
||||||
|
|
||||||
|
bboxes = []
|
||||||
|
# nms for class person
|
||||||
|
j = 0
|
||||||
|
cls_argwhere = l_max_id == j
|
||||||
|
ll_box_array = l_box_array[cls_argwhere, :]
|
||||||
|
ll_max_conf = l_max_conf[cls_argwhere]
|
||||||
|
ll_max_id = l_max_id[cls_argwhere]
|
||||||
|
|
||||||
|
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
|
||||||
|
|
||||||
|
if (keep.size > 0):
|
||||||
|
ll_box_array = ll_box_array[keep, :]
|
||||||
|
ll_max_conf = ll_max_conf[keep]
|
||||||
|
ll_max_id = ll_max_id[keep]
|
||||||
|
bboxes = np.hstack([ll_box_array, ll_max_conf[:, None]])
|
||||||
|
|
||||||
|
bboxes_batch.append(bboxes)
|
||||||
|
|
||||||
|
return bboxes_batch
|
||||||
|
|
||||||
|
class YOLOv4:
|
||||||
|
def __init__(self, device, ckpt_path, box_nms_thres, conf_thres,
|
||||||
|
isWild=False) -> None:
|
||||||
|
dirname = os.path.dirname(__file__)
|
||||||
|
cfgfile = join(dirname, 'yolov4.cfg')
|
||||||
|
namesfile = join(dirname, 'coco.names')
|
||||||
|
self.model = Darknet(cfgfile)
|
||||||
|
self.model.load_weights(ckpt_path)
|
||||||
|
self.model.to(device)
|
||||||
|
self.model.eval()
|
||||||
|
class_names = load_class_names(namesfile)
|
||||||
|
self.device = device
|
||||||
|
self.box_nms_thres = box_nms_thres
|
||||||
|
self.conf_thres = conf_thres
|
||||||
|
self.isWild = isWild
|
||||||
|
|
||||||
|
def predict_single(self, image):
|
||||||
|
width = image.shape[1]
|
||||||
|
height = image.shape[0]
|
||||||
|
tgt_width = self.model.width
|
||||||
|
# 先缩小,再padding
|
||||||
|
if width > height:
|
||||||
|
tgt_shape = (tgt_width, int(height/width*tgt_width))
|
||||||
|
resize = cv2.resize(image, tgt_shape)
|
||||||
|
sized = np.zeros((tgt_width, tgt_width, 3), dtype=np.uint8)
|
||||||
|
start = (sized.shape[0] - resize.shape[0])//2
|
||||||
|
sized[start:start+resize.shape[0], :, :] = resize
|
||||||
|
# pad_to_square
|
||||||
|
elif width == height:
|
||||||
|
sized = cv2.resize(image, (tgt_width, tgt_width))
|
||||||
|
start = 0
|
||||||
|
else:
|
||||||
|
tgt_shape = (int(width/height*tgt_width), tgt_width)
|
||||||
|
resize = cv2.resize(image, tgt_shape)
|
||||||
|
sized = np.zeros((tgt_width, tgt_width, 3), dtype=np.uint8)
|
||||||
|
start = (sized.shape[1] - resize.shape[1]) // 2
|
||||||
|
sized[:, start:start+resize.shape[1], :] = resize
|
||||||
|
img = torch.from_numpy(sized.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
|
||||||
|
img = img.to(self.device)
|
||||||
|
with torch.no_grad():
|
||||||
|
output = self.model(img)
|
||||||
|
bboxes = post_processing(self.conf_thres, self.box_nms_thres, output)[0]
|
||||||
|
if len(bboxes) == 0:
|
||||||
|
return bboxes
|
||||||
|
if self.isWild:
|
||||||
|
flag = ((bboxes[:, 2] - bboxes[:, 0]) < 0.8)&(((bboxes[:, 2] - bboxes[:, 0]) > 0.1)|((bboxes[:, 3] - bboxes[:, 1]) > 0.1))
|
||||||
|
bboxes = bboxes[flag]
|
||||||
|
if width >= height:
|
||||||
|
bboxes[:, :4] *= width
|
||||||
|
bboxes[:, 1] -= start*width/tgt_width
|
||||||
|
bboxes[:, 3] -= start*width/tgt_width
|
||||||
|
else:
|
||||||
|
bboxes[:, :4] *= height
|
||||||
|
bboxes[:, 0] -= start*height/tgt_width
|
||||||
|
bboxes[:, 2] -= start*height/tgt_width
|
||||||
|
# return bounding box
|
||||||
|
return bboxes
|
322
easymocap/estimator/YOLOv4/yolo_layer.py
Normal file
322
easymocap/estimator/YOLOv4/yolo_layer.py
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from .torch_utils import *
|
||||||
|
|
||||||
|
def yolo_forward(output, conf_thresh, num_classes, anchors, num_anchors, scale_x_y, only_objectness=1,
|
||||||
|
validation=False):
|
||||||
|
# Output would be invalid if it does not satisfy this assert
|
||||||
|
# assert (output.size(1) == (5 + num_classes) * num_anchors)
|
||||||
|
|
||||||
|
# print(output.size())
|
||||||
|
|
||||||
|
# Slice the second dimension (channel) of output into:
|
||||||
|
# [ 2, 2, 1, num_classes, 2, 2, 1, num_classes, 2, 2, 1, num_classes ]
|
||||||
|
# And then into
|
||||||
|
# bxy = [ 6 ] bwh = [ 6 ] det_conf = [ 3 ] cls_conf = [ num_classes * 3 ]
|
||||||
|
batch = output.size(0)
|
||||||
|
H = output.size(2)
|
||||||
|
W = output.size(3)
|
||||||
|
|
||||||
|
bxy_list = []
|
||||||
|
bwh_list = []
|
||||||
|
det_confs_list = []
|
||||||
|
cls_confs_list = []
|
||||||
|
|
||||||
|
for i in range(num_anchors):
|
||||||
|
begin = i * (5 + num_classes)
|
||||||
|
end = (i + 1) * (5 + num_classes)
|
||||||
|
|
||||||
|
bxy_list.append(output[:, begin : begin + 2])
|
||||||
|
bwh_list.append(output[:, begin + 2 : begin + 4])
|
||||||
|
det_confs_list.append(output[:, begin + 4 : begin + 5])
|
||||||
|
cls_confs_list.append(output[:, begin + 5 : end])
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * 2, H, W]
|
||||||
|
bxy = torch.cat(bxy_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors * 2, H, W]
|
||||||
|
bwh = torch.cat(bwh_list, dim=1)
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
det_confs = torch.cat(det_confs_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors * H * W]
|
||||||
|
det_confs = det_confs.view(batch, num_anchors * H * W)
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * num_classes, H, W]
|
||||||
|
cls_confs = torch.cat(cls_confs_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, num_classes, H * W]
|
||||||
|
cls_confs = cls_confs.view(batch, num_anchors, num_classes, H * W)
|
||||||
|
# Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes]
|
||||||
|
cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(batch, num_anchors * H * W, num_classes)
|
||||||
|
|
||||||
|
# Apply sigmoid(), exp() and softmax() to slices
|
||||||
|
#
|
||||||
|
bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1)
|
||||||
|
bwh = torch.exp(bwh)
|
||||||
|
det_confs = torch.sigmoid(det_confs)
|
||||||
|
cls_confs = torch.sigmoid(cls_confs)
|
||||||
|
|
||||||
|
# Prepare C-x, C-y, P-w, P-h (None of them are torch related)
|
||||||
|
grid_x = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, W - 1, W), axis=0).repeat(H, 0), axis=0), axis=0)
|
||||||
|
grid_y = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, H - 1, H), axis=1).repeat(W, 1), axis=0), axis=0)
|
||||||
|
# grid_x = torch.linspace(0, W - 1, W).reshape(1, 1, 1, W).repeat(1, 1, H, 1)
|
||||||
|
# grid_y = torch.linspace(0, H - 1, H).reshape(1, 1, H, 1).repeat(1, 1, 1, W)
|
||||||
|
|
||||||
|
anchor_w = []
|
||||||
|
anchor_h = []
|
||||||
|
for i in range(num_anchors):
|
||||||
|
anchor_w.append(anchors[i * 2])
|
||||||
|
anchor_h.append(anchors[i * 2 + 1])
|
||||||
|
|
||||||
|
device = None
|
||||||
|
cuda_check = output.is_cuda
|
||||||
|
if cuda_check:
|
||||||
|
device = output.get_device()
|
||||||
|
|
||||||
|
bx_list = []
|
||||||
|
by_list = []
|
||||||
|
bw_list = []
|
||||||
|
bh_list = []
|
||||||
|
|
||||||
|
# Apply C-x, C-y, P-w, P-h
|
||||||
|
for i in range(num_anchors):
|
||||||
|
ii = i * 2
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
bx = bxy[:, ii : ii + 1] + torch.tensor(grid_x, device=device, dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32)
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
by = bxy[:, ii + 1 : ii + 2] + torch.tensor(grid_y, device=device, dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32)
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
bw = bwh[:, ii : ii + 1] * anchor_w[i]
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
bh = bwh[:, ii + 1 : ii + 2] * anchor_h[i]
|
||||||
|
|
||||||
|
bx_list.append(bx)
|
||||||
|
by_list.append(by)
|
||||||
|
bw_list.append(bw)
|
||||||
|
bh_list.append(bh)
|
||||||
|
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# Figure out bboxes from slices #
|
||||||
|
########################################
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
bx = torch.cat(bx_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
by = torch.cat(by_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
bw = torch.cat(bw_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
bh = torch.cat(bh_list, dim=1)
|
||||||
|
|
||||||
|
# Shape: [batch, 2 * num_anchors, H, W]
|
||||||
|
bx_bw = torch.cat((bx, bw), dim=1)
|
||||||
|
# Shape: [batch, 2 * num_anchors, H, W]
|
||||||
|
by_bh = torch.cat((by, bh), dim=1)
|
||||||
|
|
||||||
|
# normalize coordinates to [0, 1]
|
||||||
|
bx_bw /= W
|
||||||
|
by_bh /= H
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * H * W, 1]
|
||||||
|
bx = bx_bw[:, :num_anchors].view(batch, num_anchors * H * W, 1)
|
||||||
|
by = by_bh[:, :num_anchors].view(batch, num_anchors * H * W, 1)
|
||||||
|
bw = bx_bw[:, num_anchors:].view(batch, num_anchors * H * W, 1)
|
||||||
|
bh = by_bh[:, num_anchors:].view(batch, num_anchors * H * W, 1)
|
||||||
|
|
||||||
|
bx1 = bx - bw * 0.5
|
||||||
|
by1 = by - bh * 0.5
|
||||||
|
bx2 = bx1 + bw
|
||||||
|
by2 = by1 + bh
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4]
|
||||||
|
boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(batch, num_anchors * H * W, 1, 4)
|
||||||
|
# boxes = boxes.repeat(1, 1, num_classes, 1)
|
||||||
|
|
||||||
|
# boxes: [batch, num_anchors * H * W, 1, 4]
|
||||||
|
# cls_confs: [batch, num_anchors * H * W, num_classes]
|
||||||
|
# det_confs: [batch, num_anchors * H * W]
|
||||||
|
|
||||||
|
det_confs = det_confs.view(batch, num_anchors * H * W, 1)
|
||||||
|
confs = cls_confs * det_confs
|
||||||
|
|
||||||
|
# boxes: [batch, num_anchors * H * W, 1, 4]
|
||||||
|
# confs: [batch, num_anchors * H * W, num_classes]
|
||||||
|
|
||||||
|
return boxes, confs
|
||||||
|
|
||||||
|
|
||||||
|
def yolo_forward_dynamic(output, conf_thresh, num_classes, anchors, num_anchors, scale_x_y, only_objectness=1,
|
||||||
|
validation=False):
|
||||||
|
# Output would be invalid if it does not satisfy this assert
|
||||||
|
# assert (output.size(1) == (5 + num_classes) * num_anchors)
|
||||||
|
|
||||||
|
# print(output.size())
|
||||||
|
|
||||||
|
# Slice the second dimension (channel) of output into:
|
||||||
|
# [ 2, 2, 1, num_classes, 2, 2, 1, num_classes, 2, 2, 1, num_classes ]
|
||||||
|
# And then into
|
||||||
|
# bxy = [ 6 ] bwh = [ 6 ] det_conf = [ 3 ] cls_conf = [ num_classes * 3 ]
|
||||||
|
# batch = output.size(0)
|
||||||
|
# H = output.size(2)
|
||||||
|
# W = output.size(3)
|
||||||
|
|
||||||
|
bxy_list = []
|
||||||
|
bwh_list = []
|
||||||
|
det_confs_list = []
|
||||||
|
cls_confs_list = []
|
||||||
|
|
||||||
|
for i in range(num_anchors):
|
||||||
|
begin = i * (5 + num_classes)
|
||||||
|
end = (i + 1) * (5 + num_classes)
|
||||||
|
|
||||||
|
bxy_list.append(output[:, begin : begin + 2])
|
||||||
|
bwh_list.append(output[:, begin + 2 : begin + 4])
|
||||||
|
det_confs_list.append(output[:, begin + 4 : begin + 5])
|
||||||
|
cls_confs_list.append(output[:, begin + 5 : end])
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * 2, H, W]
|
||||||
|
bxy = torch.cat(bxy_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors * 2, H, W]
|
||||||
|
bwh = torch.cat(bwh_list, dim=1)
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
det_confs = torch.cat(det_confs_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors * H * W]
|
||||||
|
det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3))
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * num_classes, H, W]
|
||||||
|
cls_confs = torch.cat(cls_confs_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, num_classes, H * W]
|
||||||
|
cls_confs = cls_confs.view(output.size(0), num_anchors, num_classes, output.size(2) * output.size(3))
|
||||||
|
# Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes]
|
||||||
|
cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(output.size(0), num_anchors * output.size(2) * output.size(3), num_classes)
|
||||||
|
|
||||||
|
# Apply sigmoid(), exp() and softmax() to slices
|
||||||
|
#
|
||||||
|
bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1)
|
||||||
|
bwh = torch.exp(bwh)
|
||||||
|
det_confs = torch.sigmoid(det_confs)
|
||||||
|
cls_confs = torch.sigmoid(cls_confs)
|
||||||
|
|
||||||
|
# Prepare C-x, C-y, P-w, P-h (None of them are torch related)
|
||||||
|
grid_x = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, output.size(3) - 1, output.size(3)), axis=0).repeat(output.size(2), 0), axis=0), axis=0)
|
||||||
|
grid_y = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, output.size(2) - 1, output.size(2)), axis=1).repeat(output.size(3), 1), axis=0), axis=0)
|
||||||
|
# grid_x = torch.linspace(0, W - 1, W).reshape(1, 1, 1, W).repeat(1, 1, H, 1)
|
||||||
|
# grid_y = torch.linspace(0, H - 1, H).reshape(1, 1, H, 1).repeat(1, 1, 1, W)
|
||||||
|
|
||||||
|
anchor_w = []
|
||||||
|
anchor_h = []
|
||||||
|
for i in range(num_anchors):
|
||||||
|
anchor_w.append(anchors[i * 2])
|
||||||
|
anchor_h.append(anchors[i * 2 + 1])
|
||||||
|
|
||||||
|
device = None
|
||||||
|
cuda_check = output.is_cuda
|
||||||
|
if cuda_check:
|
||||||
|
device = output.get_device()
|
||||||
|
|
||||||
|
bx_list = []
|
||||||
|
by_list = []
|
||||||
|
bw_list = []
|
||||||
|
bh_list = []
|
||||||
|
|
||||||
|
# Apply C-x, C-y, P-w, P-h
|
||||||
|
for i in range(num_anchors):
|
||||||
|
ii = i * 2
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
bx = bxy[:, ii : ii + 1] + torch.tensor(grid_x, device=device, dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32)
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
by = bxy[:, ii + 1 : ii + 2] + torch.tensor(grid_y, device=device, dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32)
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
bw = bwh[:, ii : ii + 1] * anchor_w[i]
|
||||||
|
# Shape: [batch, 1, H, W]
|
||||||
|
bh = bwh[:, ii + 1 : ii + 2] * anchor_h[i]
|
||||||
|
|
||||||
|
bx_list.append(bx)
|
||||||
|
by_list.append(by)
|
||||||
|
bw_list.append(bw)
|
||||||
|
bh_list.append(bh)
|
||||||
|
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# Figure out bboxes from slices #
|
||||||
|
########################################
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
bx = torch.cat(bx_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
by = torch.cat(by_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
bw = torch.cat(bw_list, dim=1)
|
||||||
|
# Shape: [batch, num_anchors, H, W]
|
||||||
|
bh = torch.cat(bh_list, dim=1)
|
||||||
|
|
||||||
|
# Shape: [batch, 2 * num_anchors, H, W]
|
||||||
|
bx_bw = torch.cat((bx, bw), dim=1)
|
||||||
|
# Shape: [batch, 2 * num_anchors, H, W]
|
||||||
|
by_bh = torch.cat((by, bh), dim=1)
|
||||||
|
|
||||||
|
# normalize coordinates to [0, 1]
|
||||||
|
bx_bw /= output.size(3)
|
||||||
|
by_bh /= output.size(2)
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * H * W, 1]
|
||||||
|
bx = bx_bw[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
|
||||||
|
by = by_bh[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
|
||||||
|
bw = bx_bw[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
|
||||||
|
bh = by_bh[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
|
||||||
|
|
||||||
|
bx1 = bx - bw * 0.5
|
||||||
|
by1 = by - bh * 0.5
|
||||||
|
bx2 = bx1 + bw
|
||||||
|
by2 = by1 + bh
|
||||||
|
|
||||||
|
# Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4]
|
||||||
|
boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(output.size(0), num_anchors * output.size(2) * output.size(3), 1, 4)
|
||||||
|
# boxes = boxes.repeat(1, 1, num_classes, 1)
|
||||||
|
|
||||||
|
# boxes: [batch, num_anchors * H * W, 1, 4]
|
||||||
|
# cls_confs: [batch, num_anchors * H * W, num_classes]
|
||||||
|
# det_confs: [batch, num_anchors * H * W]
|
||||||
|
|
||||||
|
det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
|
||||||
|
confs = cls_confs * det_confs
|
||||||
|
|
||||||
|
# boxes: [batch, num_anchors * H * W, 1, 4]
|
||||||
|
# confs: [batch, num_anchors * H * W, num_classes]
|
||||||
|
|
||||||
|
return boxes, confs
|
||||||
|
|
||||||
|
class YoloLayer(nn.Module):
|
||||||
|
''' Yolo layer
|
||||||
|
model_out: while inference,is post-processing inside or outside the model
|
||||||
|
true:outside
|
||||||
|
'''
|
||||||
|
def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, stride=32, model_out=False):
|
||||||
|
super(YoloLayer, self).__init__()
|
||||||
|
self.anchor_mask = anchor_mask
|
||||||
|
self.num_classes = num_classes
|
||||||
|
self.anchors = anchors
|
||||||
|
self.num_anchors = num_anchors
|
||||||
|
self.anchor_step = len(anchors) // num_anchors
|
||||||
|
self.coord_scale = 1
|
||||||
|
self.noobject_scale = 1
|
||||||
|
self.object_scale = 5
|
||||||
|
self.class_scale = 1
|
||||||
|
self.thresh = 0.6
|
||||||
|
self.stride = stride
|
||||||
|
self.seen = 0
|
||||||
|
self.scale_x_y = 1
|
||||||
|
|
||||||
|
self.model_out = model_out
|
||||||
|
|
||||||
|
def forward(self, output, target=None):
|
||||||
|
if self.training:
|
||||||
|
return output
|
||||||
|
masked_anchors = []
|
||||||
|
for m in self.anchor_mask:
|
||||||
|
masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step]
|
||||||
|
masked_anchors = [anchor / self.stride for anchor in masked_anchors]
|
||||||
|
|
||||||
|
return yolo_forward_dynamic(output, self.thresh, self.num_classes, masked_anchors, len(self.anchor_mask),scale_x_y=self.scale_x_y)
|
||||||
|
|
1157
easymocap/estimator/YOLOv4/yolov4.cfg
Normal file
1157
easymocap/estimator/YOLOv4/yolov4.cfg
Normal file
File diff suppressed because it is too large
Load Diff
122
easymocap/estimator/yolohrnet_wrapper.py
Normal file
122
easymocap/estimator/yolohrnet_wrapper.py
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
from ..annotator.file_utils import read_json
|
||||||
|
from .wrapper_base import check_result, create_annot_file, save_annot
|
||||||
|
from glob import glob
|
||||||
|
from os.path import join
|
||||||
|
from tqdm import tqdm
|
||||||
|
import os
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def detect_frame(detector, img, pid=0, only_bbox=False):
|
||||||
|
lDetections = detector.detect([img], only_bbox=only_bbox)[0]
|
||||||
|
annots = []
|
||||||
|
for i in range(len(lDetections)):
|
||||||
|
annot = {
|
||||||
|
'bbox': [float(d) for d in lDetections[i]['bbox']],
|
||||||
|
'personID': pid + i,
|
||||||
|
'isKeyframe': False
|
||||||
|
}
|
||||||
|
if not only_bbox:
|
||||||
|
annot['keypoints'] = lDetections[i]['keypoints'].tolist()
|
||||||
|
annots.append(annot)
|
||||||
|
return annots
|
||||||
|
|
||||||
|
def extract_bbox(image_root, annot_root, ext, **config):
|
||||||
|
force = config.pop('force')
|
||||||
|
if check_result(image_root, annot_root) and not force:
|
||||||
|
return 0
|
||||||
|
import torch
|
||||||
|
from .YOLOv4 import YOLOv4
|
||||||
|
device = torch.device('cuda') \
|
||||||
|
if torch.cuda.is_available() else torch.device('cpu')
|
||||||
|
detector = YOLOv4(device=device, **config)
|
||||||
|
imgnames = sorted(glob(join(image_root, '*'+ext)))
|
||||||
|
if len(imgnames) == 0:
|
||||||
|
ext = '.png'
|
||||||
|
imgnames = sorted(glob(join(image_root, '*'+ext)))
|
||||||
|
# run_yolo(image_root, )
|
||||||
|
for imgname in tqdm(imgnames, desc='{:10s}'.format(os.path.basename(annot_root))):
|
||||||
|
base = os.path.basename(imgname).replace(ext, '')
|
||||||
|
annotname = join(annot_root, base+'.json')
|
||||||
|
annot = create_annot_file(annotname, imgname)
|
||||||
|
image = cv2.imread(imgname)
|
||||||
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
detections = detector.predict_single(image_rgb)
|
||||||
|
annots = []
|
||||||
|
pid = 0
|
||||||
|
for i in range(len(detections)):
|
||||||
|
annot_ = {
|
||||||
|
'bbox': [float(d) for d in detections[i]],
|
||||||
|
'isKeyframe': False
|
||||||
|
}
|
||||||
|
annot_['area'] = max(annot_['bbox'][2] - annot_['bbox'][0], annot_['bbox'][3] - annot_['bbox'][1])**2
|
||||||
|
annots.append(annot_)
|
||||||
|
annots.sort(key=lambda x:-x['area'])
|
||||||
|
# re-assign the person ID
|
||||||
|
for i in range(len(annots)):
|
||||||
|
annots[i]['personID'] = i + pid
|
||||||
|
annot['annots'] = annots
|
||||||
|
save_annot(annotname, annot)
|
||||||
|
|
||||||
|
def extract_hrnet(image_root, annot_root, ext, **config):
|
||||||
|
config.pop('force')
|
||||||
|
import torch
|
||||||
|
imgnames = sorted(glob(join(image_root, '*'+ext)))
|
||||||
|
import torch
|
||||||
|
device = torch.device('cuda') \
|
||||||
|
if torch.cuda.is_available() else torch.device('cpu')
|
||||||
|
from .HRNet import SimpleHRNet
|
||||||
|
estimator = SimpleHRNet(device=device, **config)
|
||||||
|
|
||||||
|
for imgname in tqdm(imgnames, desc='{:10s}'.format(os.path.basename(annot_root))):
|
||||||
|
base = os.path.basename(imgname).replace(ext, '')
|
||||||
|
annotname = join(annot_root, base+'.json')
|
||||||
|
annots = read_json(annotname)
|
||||||
|
detections = np.array([data['bbox'] for data in annots['annots']])
|
||||||
|
image = cv2.imread(imgname)
|
||||||
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
points2d = estimator.predict(image_rgb, detections)
|
||||||
|
for i in range(detections.shape[0]):
|
||||||
|
annot_ = annots['annots'][i]
|
||||||
|
annot_['keypoints'] = points2d[i]
|
||||||
|
save_annot(annotname, annots)
|
||||||
|
|
||||||
|
def extract_yolo_hrnet(image_root, annot_root, ext, config_yolo, config_hrnet):
|
||||||
|
config_yolo.pop('ext', None)
|
||||||
|
imgnames = sorted(glob(join(image_root, '*{}'.format(ext))))
|
||||||
|
import torch
|
||||||
|
device = torch.device('cuda')
|
||||||
|
from .YOLOv4 import YOLOv4
|
||||||
|
device = torch.device('cuda') \
|
||||||
|
if torch.cuda.is_available() else torch.device('cpu')
|
||||||
|
detector = YOLOv4(device=device, **config_yolo)
|
||||||
|
from .HRNet import SimpleHRNet
|
||||||
|
estimator = SimpleHRNet(device=device, **config_hrnet)
|
||||||
|
|
||||||
|
for nf, imgname in enumerate(tqdm(imgnames, desc=os.path.basename(image_root))):
|
||||||
|
base = os.path.basename(imgname).replace(ext, '')
|
||||||
|
annotname = join(annot_root, base+'.json')
|
||||||
|
annot = create_annot_file(annotname, imgname)
|
||||||
|
img0 = cv2.imread(imgname)
|
||||||
|
annot = create_annot_file(annotname, imgname)
|
||||||
|
image = cv2.imread(imgname)
|
||||||
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||||
|
detections = detector.predict_single(image_rgb)
|
||||||
|
# forward_hrnet
|
||||||
|
points2d = estimator.predict(image_rgb, detections)
|
||||||
|
annots = []
|
||||||
|
pid = 0
|
||||||
|
for i in range(len(detections)):
|
||||||
|
annot_ = {
|
||||||
|
'bbox': [float(d) for d in detections[i]],
|
||||||
|
'keypoints': points2d[i],
|
||||||
|
'isKeyframe': False
|
||||||
|
}
|
||||||
|
annot_['area'] = max(annot_['bbox'][2] - annot_['bbox'][0], annot_['bbox'][3] - annot_['bbox'][1])**2
|
||||||
|
annots.append(annot_)
|
||||||
|
annots.sort(key=lambda x:-x['area'])
|
||||||
|
# re-assign the person ID
|
||||||
|
for i in range(len(annots)):
|
||||||
|
annots[i]['personID'] = i + pid
|
||||||
|
annot['annots'] = annots
|
||||||
|
save_annot(annotname, annot)
|
184
scripts/preprocess/copy_dataset.py
Normal file
184
scripts/preprocess/copy_dataset.py
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
'''
|
||||||
|
@ Date: 2021-06-14 15:39:26
|
||||||
|
@ Author: Qing Shuai
|
||||||
|
@ LastEditors: Qing Shuai
|
||||||
|
@ LastEditTime: 2022-08-02 21:50:40
|
||||||
|
@ FilePath: /EasyMocapPublic/scripts/preprocess/copy_dataset.py
|
||||||
|
'''
|
||||||
|
import os
|
||||||
|
from os.path import join
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
from glob import glob
|
||||||
|
import cv2
|
||||||
|
|
||||||
|
from easymocap.mytools.debug_utils import myerror, mywarn
|
||||||
|
|
||||||
|
mkdir = lambda x:os.makedirs(x, exist_ok=True)
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
def save_json(file, data):
|
||||||
|
if not os.path.exists(os.path.dirname(file)):
|
||||||
|
os.makedirs(os.path.dirname(file))
|
||||||
|
with open(file, 'w') as f:
|
||||||
|
json.dump(data, f, indent=4)
|
||||||
|
|
||||||
|
def read_json(path):
|
||||||
|
with open(path) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def copy_dataset(inp, out, start, end, step, keys, args):
|
||||||
|
copy_keys = {
|
||||||
|
'images': args.ext,
|
||||||
|
'annots': '.json',
|
||||||
|
'mask-schp': '.png',
|
||||||
|
}
|
||||||
|
copy_share_keys = {
|
||||||
|
'output-keypoints3d/keypoints3d': '.json'
|
||||||
|
}
|
||||||
|
mkdir(out)
|
||||||
|
if os.path.exists(join(inp, 'intri.yml')):
|
||||||
|
shutil.copyfile(join(inp, 'intri.yml'), join(out, 'intri.yml'))
|
||||||
|
shutil.copyfile(join(inp, 'extri.yml'), join(out, 'extri.yml'))
|
||||||
|
if os.path.exists(join(inp, 'match_name.json')):
|
||||||
|
names = read_json(join(inp, 'match_name.json'))
|
||||||
|
names = names[start:end:step]
|
||||||
|
save_json(join(out, 'match_name.json'), names)
|
||||||
|
if os.path.exists(join(inp, 'sync_time.txt')):
|
||||||
|
import numpy as np
|
||||||
|
times = np.loadtxt(join(inp, 'sync_time.txt'))
|
||||||
|
times = times.reshape(times.shape[0], -1)
|
||||||
|
times = times[:, start:end:step]
|
||||||
|
np.savetxt(join(out, 'sync_time.txt'), times, fmt='%10d')
|
||||||
|
os.system('touch ' + join(out, '{}-{}-{}'.format(start, end, step)))
|
||||||
|
for copy, ext in copy_share_keys.items():
|
||||||
|
if not os.path.exists(join(inp, copy)):
|
||||||
|
continue
|
||||||
|
if len(args.frames) == 0:
|
||||||
|
ranges = [i for i in range(start, end, step)]
|
||||||
|
else:
|
||||||
|
ranges = args.frames
|
||||||
|
outdir = join(out, copy)
|
||||||
|
if os.path.exists(outdir) and len(os.listdir(outdir)) == len(ranges):
|
||||||
|
pass
|
||||||
|
os.makedirs(outdir, exist_ok=True)
|
||||||
|
for nnf, nf in enumerate(tqdm(ranges, desc='{}'.format(copy))):
|
||||||
|
oldname = join(inp, copy, '{:06d}{}'.format(nf, ext))
|
||||||
|
if not os.path.exists(oldname):
|
||||||
|
mywarn('{} not exists'.format(oldname))
|
||||||
|
continue
|
||||||
|
newname = join(outdir, '{:06d}{}'.format(nnf, ext))
|
||||||
|
shutil.copyfile(oldname, newname)
|
||||||
|
|
||||||
|
for copy in keys:
|
||||||
|
ext = copy_keys.get(copy, '.json')
|
||||||
|
if not os.path.exists(join(inp, copy)):
|
||||||
|
continue
|
||||||
|
if len(args.subs) == 0:
|
||||||
|
subs = sorted(os.listdir(join(inp, copy)))
|
||||||
|
subs = [s for s in subs if os.path.isdir(join(inp, copy, s))]
|
||||||
|
else:
|
||||||
|
subs = args.subs
|
||||||
|
for sub in subs:
|
||||||
|
if not os.path.exists(join(inp, copy)):
|
||||||
|
continue
|
||||||
|
outdir = join(out, copy, sub.replace(args.strip, ''))
|
||||||
|
os.makedirs(outdir, exist_ok=True)
|
||||||
|
if args.end == -1:
|
||||||
|
oldnames = sorted(glob(join(inp, copy, sub, '*{}'.format(ext))))
|
||||||
|
end = len(oldnames)
|
||||||
|
print('{} has {} frames'.format(sub, end))
|
||||||
|
if args.sample == -1:
|
||||||
|
if len(args.frames) == 0:
|
||||||
|
ranges = [i for i in range(start, end, step)]
|
||||||
|
else:
|
||||||
|
ranges = args.frames
|
||||||
|
else:
|
||||||
|
ranges = [(i/args.sample)*(end-start-2*args.strip_frame)+start+args.strip_frame for i in range(args.sample)]
|
||||||
|
ranges = [int(i+0.5) for i in ranges]
|
||||||
|
if os.path.exists(outdir) and len(os.listdir(outdir)) == len(ranges):
|
||||||
|
mywarn('[copy] Skip {}'.format(outdir))
|
||||||
|
continue
|
||||||
|
for nnf, nf in enumerate(tqdm(ranges, desc='{}:{}'.format(sub, copy))):
|
||||||
|
oldname = join(inp, copy, sub, '{:06d}{}'.format(nf, ext))
|
||||||
|
if not os.path.exists(oldname):
|
||||||
|
oldnames = sorted(glob(join(inp, copy, sub, '{:06d}_*{}'.format(nf, ext))))
|
||||||
|
if len(oldnames) == 0:
|
||||||
|
myerror('{} not exists'.format(oldname))
|
||||||
|
import ipdb;ipdb.set_trace()
|
||||||
|
else:
|
||||||
|
for oldname in oldnames:
|
||||||
|
newname = join(outdir, os.path.basename(oldname).replace('{:06d}'.format(nf), '{:06d}'.format(nnf)))
|
||||||
|
shutil.copyfile(oldname, newname)
|
||||||
|
else:
|
||||||
|
newname = join(outdir, '{:06d}{}'.format(nnf, ext))
|
||||||
|
if copy == 'images' and args.scale != 1:
|
||||||
|
img = cv2.imread(oldname)
|
||||||
|
img = cv2.resize(img, None, fx=args.scale, fy=args.scale)
|
||||||
|
cv2.imwrite(newname, img)
|
||||||
|
else:
|
||||||
|
shutil.copyfile(oldname, newname)
|
||||||
|
# make videos
|
||||||
|
if copy == 'images' and args.make_video:
|
||||||
|
os.makedirs(join(out, 'videos'), exist_ok=True)
|
||||||
|
for sub in subs:
|
||||||
|
shell = '{} -y -i {}/images/{}/%06d{} -vcodec libx264 {}/videos/{}.mp4 -loglevel quiet'.format(
|
||||||
|
args.ffmpeg, out, sub, ext, out, sub
|
||||||
|
)
|
||||||
|
print(shell)
|
||||||
|
os.system(shell)
|
||||||
|
|
||||||
|
def export(root, out, keys):
|
||||||
|
mkdir(out)
|
||||||
|
for key in keys:
|
||||||
|
src = join(root, key)
|
||||||
|
dst = join(out, key)
|
||||||
|
if key == 'videos':
|
||||||
|
if os.path.exists(src):
|
||||||
|
shutil.copytree(src, dst)
|
||||||
|
else:
|
||||||
|
mkdir(dst)
|
||||||
|
subs = sorted(os.listdir(join(root, 'images')))
|
||||||
|
for sub in subs:
|
||||||
|
cmd = '{ffmpeg} -r {fps} -i {inp}/%06d.jpg -vcodec libx264 {out}'.format(
|
||||||
|
ffmpeg=args.ffmpeg, fps=50, inp=join(root, 'images', sub),
|
||||||
|
out=join(dst, sub+'.mp4')
|
||||||
|
)
|
||||||
|
os.system(cmd)
|
||||||
|
if not os.path.exists(src):
|
||||||
|
print(src)
|
||||||
|
continue
|
||||||
|
shutil.copytree(src, dst)
|
||||||
|
for name in ['intri.yml', 'extri.yml']:
|
||||||
|
if os.path.exists(join(root, name)):
|
||||||
|
shutil.copyfile(join(root, name), join(out, name))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('path', type=str)
|
||||||
|
parser.add_argument('out', type=str)
|
||||||
|
parser.add_argument('--strip', type=str, default='')
|
||||||
|
parser.add_argument('--keys', type=str, nargs='+', default=['images', 'annots', 'chessboard'])
|
||||||
|
parser.add_argument('--subs', type=str, nargs='+', default=[])
|
||||||
|
parser.add_argument('--start', type=int, default=0)
|
||||||
|
parser.add_argument('--step', type=int, default=1)
|
||||||
|
parser.add_argument('--end', type=int, default=-1)
|
||||||
|
parser.add_argument('--scale', type=float, default=1)
|
||||||
|
parser.add_argument('--strip_frame', type=int, default=0,
|
||||||
|
help='remove the start frames and end frames')
|
||||||
|
parser.add_argument('--ffmpeg', type=str, default='ffmpeg')
|
||||||
|
parser.add_argument('--ext', type=str, default='.jpg')
|
||||||
|
parser.add_argument('--sample', type=int, default=-1,
|
||||||
|
help='use this flag to sample a fixed number of frames')
|
||||||
|
parser.add_argument('--frames', type=int, default=[], nargs='+')
|
||||||
|
parser.add_argument('--debug', action='store_true')
|
||||||
|
parser.add_argument('--make_video', action='store_true')
|
||||||
|
parser.add_argument('--export', action='store_true')
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.export:
|
||||||
|
export(args.path, args.out, args.keys)
|
||||||
|
else:
|
||||||
|
copy_dataset(args.path, args.out, start=args.start, end=args.end, step=args.step, keys=args.keys, args=args)
|
Loading…
Reference in New Issue
Block a user