support YOLOv4 + HRNet

This commit is contained in:
Qing Shuai 2022-08-22 00:07:46 +08:00
parent af452c6949
commit 0175f07290
16 changed files with 4143 additions and 0 deletions

View File

@ -0,0 +1,8 @@
'''
@ Date: 2020-06-04 12:48:29
@ LastEditors: Qing Shuai
@ LastEditTime: 2020-11-17 15:52:23
@ Author: Qing Shuai
@ Mail: s_q@zju.edu.cn
'''
from .hrnet_api import SimpleHRNet

View File

@ -0,0 +1,216 @@
import torch
from torch import nn
from .modules import BasicBlock, Bottleneck
class StageModule(nn.Module):
def __init__(self, stage, output_branches, c, bn_momentum):
super(StageModule, self).__init__()
self.stage = stage
self.output_branches = output_branches
self.branches = nn.ModuleList()
for i in range(self.stage):
w = c * (2 ** i)
branch = nn.Sequential(
BasicBlock(w, w, bn_momentum=bn_momentum),
BasicBlock(w, w, bn_momentum=bn_momentum),
BasicBlock(w, w, bn_momentum=bn_momentum),
BasicBlock(w, w, bn_momentum=bn_momentum),
)
self.branches.append(branch)
self.fuse_layers = nn.ModuleList()
# for each output_branches (i.e. each branch in all cases but the very last one)
for i in range(self.output_branches):
self.fuse_layers.append(nn.ModuleList())
for j in range(self.stage): # for each branch
if i == j:
self.fuse_layers[-1].append(nn.Sequential()) # Used in place of "None" because it is callable
elif i < j:
self.fuse_layers[-1].append(nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(1, 1), stride=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
nn.Upsample(scale_factor=(2.0 ** (j - i)), mode='nearest'),
))
elif i > j:
ops = []
for k in range(i - j - 1):
ops.append(nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** j), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
bias=False),
nn.BatchNorm2d(c * (2 ** j), eps=1e-05, momentum=0.1, affine=True,
track_running_stats=True),
nn.ReLU(inplace=True),
))
ops.append(nn.Sequential(
nn.Conv2d(c * (2 ** j), c * (2 ** i), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1),
bias=False),
nn.BatchNorm2d(c * (2 ** i), eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
))
self.fuse_layers[-1].append(nn.Sequential(*ops))
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
assert len(self.branches) == len(x)
x = [branch(b) for branch, b in zip(self.branches, x)]
x_fused = []
for i in range(len(self.fuse_layers)):
for j in range(0, len(self.branches)):
if j == 0:
x_fused.append(self.fuse_layers[i][0](x[0]))
else:
x_fused[i] = x_fused[i] + self.fuse_layers[i][j](x[j])
for i in range(len(x_fused)):
x_fused[i] = self.relu(x_fused[i])
return x_fused
class HRNet(nn.Module):
def __init__(self, c=48, nof_joints=17, bn_momentum=0.1):
super(HRNet, self).__init__()
# Input (stem net)
self.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
self.bn1 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
self.conv2 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
self.bn2 = nn.BatchNorm2d(64, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True)
self.relu = nn.ReLU(inplace=True)
# Stage 1 (layer1) - First group of bottleneck (resnet) modules
downsample = nn.Sequential(
nn.Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
nn.BatchNorm2d(256, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
)
self.layer1 = nn.Sequential(
Bottleneck(64, 64, downsample=downsample),
Bottleneck(256, 64),
Bottleneck(256, 64),
Bottleneck(256, 64),
)
# Fusion layer 1 (transition1) - Creation of the first two branches (one full and one half resolution)
self.transition1 = nn.ModuleList([
nn.Sequential(
nn.Conv2d(256, c, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
nn.BatchNorm2d(c, eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
),
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
nn.Conv2d(256, c * (2 ** 1), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** 1), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
)),
])
# Stage 2 (stage2) - Second module with 1 group of bottleneck (resnet) modules. This has 2 branches
self.stage2 = nn.Sequential(
StageModule(stage=2, output_branches=2, c=c, bn_momentum=bn_momentum),
)
# Fusion layer 2 (transition2) - Creation of the third branch (1/4 resolution)
self.transition2 = nn.ModuleList([
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
nn.Conv2d(c * (2 ** 1), c * (2 ** 2), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** 2), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
)), # ToDo Why the new branch derives from the "upper" branch only?
])
# Stage 3 (stage3) - Third module with 4 groups of bottleneck (resnet) modules. This has 3 branches
self.stage3 = nn.Sequential(
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
StageModule(stage=3, output_branches=3, c=c, bn_momentum=bn_momentum),
)
# Fusion layer 3 (transition3) - Creation of the fourth branch (1/8 resolution)
self.transition3 = nn.ModuleList([
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(), # None, - Used in place of "None" because it is callable
nn.Sequential(nn.Sequential( # Double Sequential to fit with official pretrained weights
nn.Conv2d(c * (2 ** 2), c * (2 ** 3), kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
nn.BatchNorm2d(c * (2 ** 3), eps=1e-05, momentum=bn_momentum, affine=True, track_running_stats=True),
nn.ReLU(inplace=True),
)), # ToDo Why the new branch derives from the "upper" branch only?
])
# Stage 4 (stage4) - Fourth module with 3 groups of bottleneck (resnet) modules. This has 4 branches
self.stage4 = nn.Sequential(
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
StageModule(stage=4, output_branches=4, c=c, bn_momentum=bn_momentum),
StageModule(stage=4, output_branches=1, c=c, bn_momentum=bn_momentum),
)
# Final layer (final_layer)
self.final_layer = nn.Conv2d(c, nof_joints, kernel_size=(1, 1), stride=(1, 1))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x)
x = self.layer1(x)
x = [trans(x) for trans in self.transition1] # Since now, x is a list (# == nof branches)
x = self.stage2(x)
# x = [trans(x[-1]) for trans in self.transition2] # New branch derives from the "upper" branch only
x = [
self.transition2[0](x[0]),
self.transition2[1](x[1]),
self.transition2[2](x[-1])
] # New branch derives from the "upper" branch only
x = self.stage3(x)
# x = [trans(x) for trans in self.transition3] # New branch derives from the "upper" branch only
x = [
self.transition3[0](x[0]),
self.transition3[1](x[1]),
self.transition3[2](x[2]),
self.transition3[3](x[-1])
] # New branch derives from the "upper" branch only
x = self.stage4(x)
x = self.final_layer(x[0])
return x
if __name__ == '__main__':
# model = HRNet(48, 17, 0.1)
model = HRNet(32, 17, 0.1)
# print(model)
model.load_state_dict(
# torch.load('./weights/pose_hrnet_w48_384x288.pth')
torch.load('./weights/pose_hrnet_w32_256x192.pth')
)
print('ok!!')
if torch.cuda.is_available() and False:
torch.backends.cudnn.deterministic = True
device = torch.device('cuda:0')
else:
device = torch.device('cpu')
print(device)
model = model.to(device)
y = model(torch.ones(1, 3, 384, 288).to(device))
print(y.shape)
print(torch.min(y).item(), torch.mean(y).item(), torch.max(y).item())

View File

@ -0,0 +1,527 @@
'''
@ Date: 2020-06-04 12:47:04
@ LastEditors: Qing Shuai
@ LastEditTime: 2022-04-19 17:02:57
@ Author: Qing Shuai
@ Mail: s_q@zju.edu.cn
'''
from os.path import join
import cv2
import numpy as np
import torch
from torchvision.transforms import transforms
from .hrnet import HRNet
COCO17_IN_BODY25 = [0,16,15,18,17,5,2,6,3,7,4,12,9,13,10,14,11]
pairs = [[1, 8], [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [8, 9], [9, 10], [10, 11], [8, 12], [12, 13], [13, 14], [1, 0], [0,15], [15,17], [0,16], [16,18], [14,19], [19,20], [14,21], [11,22], [22,23], [11,24]]
def coco17tobody25(points2d):
kpts = np.zeros((points2d.shape[0], 25, 3))
kpts[:, COCO17_IN_BODY25, :2] = points2d[:, :, :2]
kpts[:, COCO17_IN_BODY25, 2:3] = points2d[:, :, 2:3]
kpts[:, 8, :2] = kpts[:, [9, 12], :2].mean(axis=1)
kpts[:, 8, 2] = kpts[:, [9, 12], 2].min(axis=1)
kpts[:, 1, :2] = kpts[:, [2, 5], :2].mean(axis=1)
kpts[:, 1, 2] = kpts[:, [2, 5], 2].min(axis=1)
# 需要交换一下
# kpts = kpts[:, :, [1,0,2]]
return kpts
# 生成高斯核
def generate_gauss(sigma):
tmp_size = sigma * 3
size = 2 * tmp_size + 1
x = np.arange(0, size, 1, np.float32)
y = x[:, np.newaxis]
x0 = y0 = size // 2
# The gaussian is not normalized, we want the center value to equal 1
g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
return g, tmp_size
gauss = {}
for SIGMA in range(1, 5):
gauss_kernel, gauss_radius = generate_gauss(SIGMA)
gauss[SIGMA] = {
'kernel': gauss_kernel,
'radius': gauss_radius
}
def box_to_center_scale(box, model_image_width, model_image_height, scale_factor=1.25):
"""convert a box to center,scale information required for pose transformation
Parameters
----------
box : list of tuple
list of length 2 with two tuples of floats representing
bottom left and top right corner of a box
model_image_width : int
model_image_height : int
Returns
-------
(numpy array, numpy array)
Two numpy arrays, coordinates for the center of the box and the scale of the box
"""
center = np.zeros((2), dtype=np.float32)
bottom_left_corner = (box[0], box[1])
top_right_corner = (box[2], box[3])
box_width = top_right_corner[0]-bottom_left_corner[0]
box_height = top_right_corner[1]-bottom_left_corner[1]
bottom_left_x = bottom_left_corner[0]
bottom_left_y = bottom_left_corner[1]
center[0] = bottom_left_x + box_width * 0.5
center[1] = bottom_left_y + box_height * 0.5
aspect_ratio = model_image_width * 1.0 / model_image_height
pixel_std = 200
if box_width > aspect_ratio * box_height:
box_height = box_width * 1.0 / aspect_ratio
elif box_width < aspect_ratio * box_height:
box_width = box_height * aspect_ratio
scale = np.array(
[box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
dtype=np.float32)
scale = scale * scale_factor
return center, scale
def get_dir(src_point, rot_rad):
sn, cs = np.sin(rot_rad), np.cos(rot_rad)
src_result = [0, 0]
src_result[0] = src_point[0] * cs - src_point[1] * sn
src_result[1] = src_point[0] * sn + src_point[1] * cs
return src_result
def get_3rd_point(a, b):
direct = a - b
return b + np.array([-direct[1], direct[0]], dtype=np.float32)
def get_affine_transform(
center, scale, rot, output_size,
shift=np.array([0, 0], dtype=np.float32), inv=0
):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
print(scale)
scale = np.array([scale, scale])
scale_tmp = scale * 200.0
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = get_dir([0, src_w * -0.5], rot_rad)
dst_dir = np.array([0, dst_w * -0.5], np.float32)
src = np.zeros((3, 2), dtype=np.float32)
dst = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
src[2:, :] = get_3rd_point(src[0, :], src[1, :])
dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def get_max_preds(batch_heatmaps):
'''
get predictions from score maps
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
'''
assert isinstance(batch_heatmaps, np.ndarray), \
'batch_heatmaps should be numpy.ndarray'
assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = batch_heatmaps.shape[0]
num_joints = batch_heatmaps.shape[1]
width = batch_heatmaps.shape[3]
heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def affine_transform(pt, t):
new_pt = np.array([pt[0], pt[1], 1.]).T
new_pt = np.dot(t, new_pt)
return new_pt[:2]
def batch_affine_transform(points, trans):
points = np.hstack((points[:, :2], np.ones((points.shape[0], 1))))
out = points @ trans.T
return out
def transform_preds(coords, center, scale, rot, output_size):
target_coords = np.zeros(coords.shape)
trans = get_affine_transform(center, scale, rot, output_size, inv=1)
target_coords[:, :2] = batch_affine_transform(coords, trans)
return target_coords
config_ = {'kintree': [[1, 0], [2, 0], [3, 1], [4, 2], [5, 0], [6, 0], [7, 5], [8, 6], [9, 7], [10, 8], [11, 5], [12, 6], [13, 11], [
14, 12], [15, 13], [16, 14], [6, 5], [12, 11]], 'color': ['g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'g', 'r', 'k', 'k']}
colors_table = {
# colorblind/print/copy safe:
'_blue': [0.65098039, 0.74117647, 0.85882353],
'_pink': [.9, .7, .7],
'_mint': [ 166/255., 229/255., 204/255.],
'_mint2': [ 202/255., 229/255., 223/255.],
'_green': [ 153/255., 216/255., 201/255.],
'_green2': [ 171/255., 221/255., 164/255.],
'_red': [ 251/255., 128/255., 114/255.],
'_orange': [ 253/255., 174/255., 97/255.],
'_yellow': [ 250/255., 230/255., 154/255.],
'r':[255/255,0,0],
'g':[0,255/255,0],
'b':[0,0,255/255],
'k':[0,0,0],
'y':[255/255,255/255,0],
'purple':[128/255,0,128/255]
}
for key, val in colors_table.items():
colors_table[key] = tuple([int(val[2]*255), int(val[1]*255), int(val[0]*255)])
def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
normalize=True):
'''
batch_image: [batch_size, channel, height, width]
batch_heatmaps: ['batch_size, num_joints, height, width]
file_name: saved file name
'''
if normalize:
batch_image = batch_image.clone()
min = float(batch_image.min())
max = float(batch_image.max())
batch_image.add_(-min).div_(max - min + 1e-5)
batch_size = batch_heatmaps.size(0)
num_joints = batch_heatmaps.size(1)
heatmap_height = batch_heatmaps.size(2)
heatmap_width = batch_heatmaps.size(3)
grid_image = np.zeros((batch_size*heatmap_height,
(num_joints+2)*heatmap_width,
3),
dtype=np.uint8)
preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
for i in range(batch_size):
image = batch_image[i].mul(255)\
.clamp(0, 255)\
.byte()\
.permute(1, 2, 0)\
.cpu().numpy()
heatmaps = batch_heatmaps[i].mul(255)\
.clamp(0, 255)\
.byte()\
.cpu().numpy()
resized_image = cv2.resize(image,
(int(heatmap_width), int(heatmap_height)))
resized_image_copy = resized_image.copy()
height_begin = heatmap_height * i
height_end = heatmap_height * (i + 1)
for ip in range(len(config_['kintree'])):
src, dst = config_['kintree'][ip]
c = config_['color'][ip]
if maxvals[i][src] < 0.1 or maxvals[i][dst] < 0.1:
continue
plot_line(resized_image_copy, preds[i][src], preds[i][dst], colors_table[c], 1)
for j in range(num_joints):
cv2.circle(resized_image,
(int(preds[i][j][0]), int(preds[i][j][1])),
1, [0, 0, 255], 1)
heatmap = heatmaps[j, :, :]
mask = (heatmap > 0.1)[:,:,None]
colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
masked_image = (colored_heatmap*0.7 + resized_image*0.3)*mask + resized_image*(1-mask)
cv2.circle(masked_image,
(int(preds[i][j][0]), int(preds[i][j][1])),
1, [0, 0, 255], 1)
width_begin = heatmap_width * (j+2)
width_end = heatmap_width * (j+2+1)
grid_image[height_begin:height_end, width_begin:width_end, :] = \
masked_image
# grid_image[height_begin:height_end, width_begin:width_end, :] = \
# colored_heatmap*0.7 + resized_image*0.3
grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
grid_image[height_begin:height_end, heatmap_width:heatmap_width+heatmap_width, :] = resized_image_copy
cv2.imwrite(file_name, grid_image)
import math
def get_final_preds(batch_heatmaps, center, scale, rot=None, flip=None):
coords, maxvals = get_max_preds(batch_heatmaps)
heatmap_height = batch_heatmaps.shape[2]
heatmap_width = batch_heatmaps.shape[3]
# post-processing
if True:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = batch_heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
diff = np.array(
[
hm[py][px+1] - hm[py][px-1],
hm[py+1][px]-hm[py-1][px]
]
)
coords[n][p] += np.sign(diff) * .25
preds = coords.copy()
# Transform back
for i in range(coords.shape[0]):
if flip is not None:
if flip[i]:
coords[i, :, 0] = heatmap_width - 1 - coords[i, :, 0]
if rot is None:
_rot = 0
else:
_rot = rot[i]
preds[i] = transform_preds(
coords[i], center[i], scale[i], _rot, [heatmap_width, heatmap_height]
)
return preds, maxvals
def get_gaussian_maps(net_out, keypoints, sigma):
radius, kernel = gauss[sigma]['radius'], gauss[sigma]['kernel']
weights = np.ones(net_out.shape, dtype=np.float32)
for i in range(weights.shape[0]):
for nj in range(weights.shape[1]):
if keypoints[i][nj][2] < 0:
weights[i][nj] = 0
continue
elif keypoints[i][nj][2] < 0.01:
weights[i][nj] = 0
continue
weights[i][nj] = 0
mu_x, mu_y = keypoints[i][nj][:2]
mu_x, mu_y = int(mu_x + 0.5), int(mu_y + 0.5)
# Usable gaussian range
ul = [mu_x - radius, mu_y - radius]
br = [mu_x + radius + 1, mu_y + radius + 1]
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], weights.shape[3]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], weights.shape[2]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], weights.shape[3])
img_y = max(0, ul[1]), min(br[1], weights.shape[2])
weights[i][nj][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
kernel[g_y[0]:g_y[1], g_x[0]:g_x[1]]
return weights
humanId = 0
class SimpleHRNet:
def __init__(self, c, nof_joints, checkpoint_path, device, resolution=(288, 384),):
self.device = device
self.c = c
self.nof_joints = nof_joints
self.checkpoint_path = checkpoint_path
self.max_batch_size = 64
self.resolution = resolution # in the form (height, width) as in the original implementation
self.transform = transforms.Compose([
# transforms.ToPILImage(),
# transforms.Resize((self.resolution[0], self.resolution[1])), # (height, width)
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
self.model = HRNet(c=c, nof_joints=nof_joints).to(device)
self.model.load_state_dict(torch.load(checkpoint_path, map_location=self.device))
self.model.eval()
def __call__(self, image, bboxes, rot=0, net_out=False):
# image:
images = torch.zeros((len(bboxes), 3, self.resolution[1], self.resolution[0]), device=self.device) # (height, width)
if len(bboxes) > 0:
# pose estimation : for multiple people
centers, scales, trans_all = [], [], []
for box in bboxes:
center, scale = box_to_center_scale(box, self.resolution[0], self.resolution[1])
centers.append(center)
scales.append(scale)
trans = get_affine_transform(center, scale, rot=rot, output_size=self.resolution)
trans_all.append(trans)
for i, trans in enumerate(trans_all):
# Crop smaller image of people
model_input = cv2.warpAffine(
image, trans,
(int(self.resolution[0]), int(self.resolution[1])),
flags=cv2.INTER_LINEAR)
# cv2.imshow('input', model_input)
# cv2.waitKey(0)
# hwc -> 1chw
model_input = self.transform(model_input)#.unsqueeze(0)
images[i] = model_input
images = images.to(self.device)
with torch.no_grad():
out = self.model(images)
out = out.cpu().detach().numpy()
if net_out:
return out, trans_all, centers, scales, rot
coords, max_val = get_final_preds(
out,
np.asarray(centers),
np.asarray(scales),
[rot for _ in range(out.shape[0])])
pts = np.concatenate((coords, max_val), axis=2)
return coco17tobody25(pts)
else:
return np.empty(0, 25, 3)
def predict_with_previous(self, image, bboxes, keypoints, sigma):
# (batch, nJoints, height, width)
net_out, trans_all, centers, scales, rot = self.__call__(image, bboxes, net_out=True)
keypoints = keypoints[:, COCO17_IN_BODY25]
keypoints_rescale = keypoints.copy()
for i in range(keypoints.shape[0]):
keypoints_rescale[..., :2] = batch_affine_transform(keypoints[i], trans_all[i])/4
weights = get_gaussian_maps(net_out, keypoints_rescale, sigma)
out = net_out * weights
coords, max_val = get_final_preds(
out,
np.asarray(centers),
np.asarray(scales),
rot)
pts = np.concatenate((coords, max_val), axis=2)
return coco17tobody25(pts)
def predict(self, image, detections, keypoints=None, ret_crop=False):
if keypoints is not None:
keypoints = keypoints[:, COCO17_IN_BODY25]
kpts_rescale = [None for _ in range(len(keypoints))]
boxes = []
rotation = 0
image_pose = image
# image_pose = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
if detections is not None:
images = torch.zeros((len(detections), 3, self.resolution[1], self.resolution[0]), device=self.device) # (height, width)
# pose estimation : for multiple people
centers = []
scales = []
for box in detections:
center, scale = box_to_center_scale(box, self.resolution[0], self.resolution[1])
centers.append(center)
scales.append(scale)
model_inputs = []
for i, (center, scale) in enumerate(zip(centers, scales)):
trans = get_affine_transform(center, scale, rotation, self.resolution)
# Crop smaller image of people
model_input = cv2.warpAffine(
image_pose,
trans,
(int(self.resolution[0]), int(self.resolution[1])),
flags=cv2.INTER_LINEAR)
if keypoints is not None:
kpts_homo = keypoints[i].copy()
kpts_homo[:, 2] = 1
kpts_rescale[i] = (kpts_homo @ trans.T)/4
# global humanId
# cv2.imwrite('../output/debughrnet/person_{}.jpg'.format(humanId), model_input[:,:,[2,1,0]])
# humanId += 1
# hwc -> 1chw
model_input = self.transform(model_input)#.unsqueeze(0)
images[i] = model_input
# torch.cuda.synchronize(self.device)
# print(' - spending {:.2f}ms in preprocess.'.format(1000*(time.time() - start)))
if images.shape[0] == 0:
return np.empty((0, 25, 3))
else:
# start = time.time()
images = images.to(self.device)
# torch.cuda.synchronize(self.device)
# print(' - spending {:.2f}ms in copy to cuda.'.format(1000*(time.time() - start)))
# start = time.time()
with torch.no_grad():
if len(images) <= self.max_batch_size:
out = self.model(images)
else:
out = torch.empty(
(images.shape[0], self.nof_joints, self.resolution[1] // 4, self.resolution[0] // 4)
).to(self.device)
for i in range(0, len(images), self.max_batch_size):
out[i:i + self.max_batch_size] = self.model(images[i:i + self.max_batch_size])
# torch.cuda.synchronize(self.device)
global humanId
if keypoints is not None:
filename = join('../output/debughrnet', '{:06d}.jpg'.format(humanId))
humanId += 1
# save_batch_heatmaps(images, out, filename)
# 制造高斯核默认为1
weights = np.ones(out.shape, dtype=np.float32)
for i in range(weights.shape[0]):
for nj in range(weights.shape[1]):
if keypoints[i][nj][2] < 0:
weights[i][nj] = 0
continue
elif keypoints[i][nj][2] < 0.01:
continue
weights[i][nj] = 0
mu_x, mu_y = kpts_rescale[i][nj]
mu_x, mu_y = int(mu_x + 0.5), int(mu_y + 0.5)
# Usable gaussian range
ul = [mu_x - gauss_radius, mu_y - gauss_radius]
br = [mu_x + gauss_radius + 1, mu_y + gauss_radius + 1]
# Usable gaussian range
g_x = max(0, -ul[0]), min(br[0], weights.shape[3]) - ul[0]
g_y = max(0, -ul[1]), min(br[1], weights.shape[2]) - ul[1]
# Image range
img_x = max(0, ul[0]), min(br[0], weights.shape[3])
img_y = max(0, ul[1]), min(br[1], weights.shape[2])
weights[i][nj][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
gauss_kernel[g_y[0]:g_y[1], g_x[0]:g_x[1]]
filename = join('../output/debughrnet', '{:06d}.jpg'.format(humanId))
humanId += 1
# save_batch_heatmaps(images, torch.Tensor(weights), filename)
out = out.cpu().detach().numpy()
out = out * weights
filename = join('../output/debughrnet', '{:06d}.jpg'.format(humanId))
humanId += 1
# save_batch_heatmaps(images, torch.Tensor(out), filename)
else:
out = out.cpu().detach().numpy()
coords, max_val = get_final_preds(
out,
np.asarray(centers),
np.asarray(scales))
pts = np.concatenate((coords, max_val), axis=2)
# torch.cuda.synchronize(self.device)
# print(' - spending {:.2f}ms in postprocess.'.format(1000*(time.time() - start)))
# print('')
if ret_crop:
return coco17tobody25(pts), images
else:
return coco17tobody25(pts)

View File

@ -0,0 +1,72 @@
import torch
from torch import nn
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=bn_momentum)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, bn_momentum=0.1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=bn_momentum)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out

View File

@ -0,0 +1,8 @@
'''
@ Date: 2020-12-10 16:37:04
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2020-12-10 16:52:06
@ FilePath: /mvpose/code/estimator/YOLOv4/__init__.py
'''
from .yolo import YOLOv4

View File

@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

View File

@ -0,0 +1,257 @@
import torch
from .torch_utils import convert2cpu
def parse_cfg(cfgfile):
blocks = []
fp = open(cfgfile, 'r')
block = None
line = fp.readline()
while line != '':
line = line.rstrip()
if line == '' or line[0] == '#':
line = fp.readline()
continue
elif line[0] == '[':
if block:
blocks.append(block)
block = dict()
block['type'] = line.lstrip('[').rstrip(']')
# set default value
if block['type'] == 'convolutional':
block['batch_normalize'] = 0
else:
key, value = line.split('=')
key = key.strip()
if key == 'type':
key = '_type'
value = value.strip()
block[key] = value
line = fp.readline()
if block:
blocks.append(block)
fp.close()
return blocks
def print_cfg(blocks):
print('layer filters size input output');
prev_width = 416
prev_height = 416
prev_filters = 3
out_filters = []
out_widths = []
out_heights = []
ind = -2
for block in blocks:
ind = ind + 1
if block['type'] == 'net':
prev_width = int(block['width'])
prev_height = int(block['height'])
continue
elif block['type'] == 'convolutional':
filters = int(block['filters'])
kernel_size = int(block['size'])
stride = int(block['stride'])
is_pad = int(block['pad'])
pad = (kernel_size - 1) // 2 if is_pad else 0
width = (prev_width + 2 * pad - kernel_size) // stride + 1
height = (prev_height + 2 * pad - kernel_size) // stride + 1
print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
height, filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'maxpool':
pool_size = int(block['size'])
stride = int(block['stride'])
width = prev_width // stride
height = prev_height // stride
print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height,
filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'avgpool':
width = 1
height = 1
print('%5d %-6s %3d x %3d x%4d -> %3d' % (
ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'softmax':
print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'cost':
print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'reorg':
stride = int(block['stride'])
filters = stride * stride * prev_filters
width = prev_width // stride
height = prev_height // stride
print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'upsample':
stride = int(block['stride'])
filters = prev_filters
width = prev_width * stride
height = prev_height * stride
print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % (
ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
prev_width = width
prev_height = height
prev_filters = filters
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'route':
layers = block['layers'].split(',')
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
if len(layers) == 1:
print('%5d %-6s %d' % (ind, 'route', layers[0]))
prev_width = out_widths[layers[0]]
prev_height = out_heights[layers[0]]
prev_filters = out_filters[layers[0]]
elif len(layers) == 2:
print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
prev_width = out_widths[layers[0]]
prev_height = out_heights[layers[0]]
assert (prev_width == out_widths[layers[1]])
assert (prev_height == out_heights[layers[1]])
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
elif len(layers) == 4:
print('%5d %-6s %d %d %d %d' % (ind, 'route', layers[0], layers[1], layers[2], layers[3]))
prev_width = out_widths[layers[0]]
prev_height = out_heights[layers[0]]
assert (prev_width == out_widths[layers[1]] == out_widths[layers[2]] == out_widths[layers[3]])
assert (prev_height == out_heights[layers[1]] == out_heights[layers[2]] == out_heights[layers[3]])
prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + out_filters[
layers[3]]
else:
print("route error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
sys._getframe().f_code.co_name, sys._getframe().f_lineno))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] in ['region', 'yolo']:
print('%5d %-6s' % (ind, 'detection'))
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'shortcut':
from_id = int(block['from'])
from_id = from_id if from_id > 0 else from_id + ind
print('%5d %-6s %d' % (ind, 'shortcut', from_id))
prev_width = out_widths[from_id]
prev_height = out_heights[from_id]
prev_filters = out_filters[from_id]
out_widths.append(prev_width)
out_heights.append(prev_height)
out_filters.append(prev_filters)
elif block['type'] == 'connected':
filters = int(block['output'])
print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters))
prev_filters = filters
out_widths.append(1)
out_heights.append(1)
out_filters.append(prev_filters)
else:
print('unknown type %s' % (block['type']))
def load_conv(buf, start, conv_model):
num_w = conv_model.weight.numel()
num_b = conv_model.bias.numel()
conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape));
start = start + num_w
return start
def save_conv(fp, conv_model):
if conv_model.bias.is_cuda:
convert2cpu(conv_model.bias.data).numpy().tofile(fp)
convert2cpu(conv_model.weight.data).numpy().tofile(fp)
else:
conv_model.bias.data.numpy().tofile(fp)
conv_model.weight.data.numpy().tofile(fp)
def load_conv_bn(buf, start, conv_model, bn_model):
num_w = conv_model.weight.numel()
num_b = bn_model.bias.numel()
bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).reshape(conv_model.weight.data.shape));
start = start + num_w
return start
def save_conv_bn(fp, conv_model, bn_model):
if bn_model.bias.is_cuda:
convert2cpu(bn_model.bias.data).numpy().tofile(fp)
convert2cpu(bn_model.weight.data).numpy().tofile(fp)
convert2cpu(bn_model.running_mean).numpy().tofile(fp)
convert2cpu(bn_model.running_var).numpy().tofile(fp)
convert2cpu(conv_model.weight.data).numpy().tofile(fp)
else:
bn_model.bias.data.numpy().tofile(fp)
bn_model.weight.data.numpy().tofile(fp)
bn_model.running_mean.numpy().tofile(fp)
bn_model.running_var.numpy().tofile(fp)
conv_model.weight.data.numpy().tofile(fp)
def load_fc(buf, start, fc_model):
num_w = fc_model.weight.numel()
num_b = fc_model.bias.numel()
fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
start = start + num_b
fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]));
start = start + num_w
return start
def save_fc(fp, fc_model):
fc_model.bias.data.numpy().tofile(fp)
fc_model.weight.data.numpy().tofile(fp)
if __name__ == '__main__':
import sys
blocks = parse_cfg('cfg/yolo.cfg')
if len(sys.argv) == 2:
blocks = parse_cfg(sys.argv[1])
print_cfg(blocks)

View File

@ -0,0 +1,515 @@
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from .region_loss import RegionLoss
from .yolo_layer import YoloLayer
from .config import *
from .torch_utils import *
class Mish(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
x = x * (torch.tanh(torch.nn.functional.softplus(x)))
return x
class MaxPoolDark(nn.Module):
def __init__(self, size=2, stride=1):
super(MaxPoolDark, self).__init__()
self.size = size
self.stride = stride
def forward(self, x):
'''
darknet output_size = (input_size + p - k) / s +1
p : padding = k - 1
k : size
s : stride
torch output_size = (input_size + 2*p -k) / s +1
p : padding = k//2
'''
p = self.size // 2
if ((x.shape[2] - 1) // self.stride) != ((x.shape[2] + 2 * p - self.size) // self.stride):
padding1 = (self.size - 1) // 2
padding2 = padding1 + 1
else:
padding1 = (self.size - 1) // 2
padding2 = padding1
if ((x.shape[3] - 1) // self.stride) != ((x.shape[3] + 2 * p - self.size) // self.stride):
padding3 = (self.size - 1) // 2
padding4 = padding3 + 1
else:
padding3 = (self.size - 1) // 2
padding4 = padding3
x = F.max_pool2d(F.pad(x, (padding3, padding4, padding1, padding2), mode='replicate'),
self.size, stride=self.stride)
return x
class Upsample_expand(nn.Module):
def __init__(self, stride=2):
super(Upsample_expand, self).__init__()
self.stride = stride
def forward(self, x):
assert (x.data.dim() == 4)
x = x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\
expand(x.size(0), x.size(1), x.size(2), self.stride, x.size(3), self.stride).contiguous().\
view(x.size(0), x.size(1), x.size(2) * self.stride, x.size(3) * self.stride)
return x
class Upsample_interpolate(nn.Module):
def __init__(self, stride):
super(Upsample_interpolate, self).__init__()
self.stride = stride
def forward(self, x):
assert (x.data.dim() == 4)
out = F.interpolate(x, size=(x.size(2) * self.stride, x.size(3) * self.stride), mode='nearest')
return out
class Reorg(nn.Module):
def __init__(self, stride=2):
super(Reorg, self).__init__()
self.stride = stride
def forward(self, x):
stride = self.stride
assert (x.data.dim() == 4)
B = x.data.size(0)
C = x.data.size(1)
H = x.data.size(2)
W = x.data.size(3)
assert (H % stride == 0)
assert (W % stride == 0)
ws = stride
hs = stride
x = x.view(B, C, H / hs, hs, W / ws, ws).transpose(3, 4).contiguous()
x = x.view(B, C, H / hs * W / ws, hs * ws).transpose(2, 3).contiguous()
x = x.view(B, C, hs * ws, H / hs, W / ws).transpose(1, 2).contiguous()
x = x.view(B, hs * ws * C, H / hs, W / ws)
return x
class GlobalAvgPool2d(nn.Module):
def __init__(self):
super(GlobalAvgPool2d, self).__init__()
def forward(self, x):
N = x.data.size(0)
C = x.data.size(1)
H = x.data.size(2)
W = x.data.size(3)
x = F.avg_pool2d(x, (H, W))
x = x.view(N, C)
return x
# for route and shortcut
class EmptyModule(nn.Module):
def __init__(self):
super(EmptyModule, self).__init__()
def forward(self, x):
return x
# support route shortcut and reorg
class Darknet(nn.Module):
def __init__(self, cfgfile, inference=False):
super(Darknet, self).__init__()
self.inference = inference
self.training = not self.inference
self.blocks = parse_cfg(cfgfile)
self.width = int(self.blocks[0]['width'])
self.height = int(self.blocks[0]['height'])
self.models = self.create_network(self.blocks) # merge conv, bn,leaky
self.loss = self.models[len(self.models) - 1]
if self.blocks[(len(self.blocks) - 1)]['type'] == 'region':
self.anchors = self.loss.anchors
self.num_anchors = self.loss.num_anchors
self.anchor_step = self.loss.anchor_step
self.num_classes = self.loss.num_classes
self.header = torch.IntTensor([0, 0, 0, 0])
self.seen = 0
def forward(self, x):
ind = -2
self.loss = None
outputs = dict()
out_boxes = []
for block in self.blocks:
ind = ind + 1
# if ind > 0:
# return x
if block['type'] == 'net':
continue
elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']:
x = self.models[ind](x)
outputs[ind] = x
elif block['type'] == 'route':
layers = block['layers'].split(',')
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
if len(layers) == 1:
if 'groups' not in block.keys() or int(block['groups']) == 1:
x = outputs[layers[0]]
outputs[ind] = x
else:
groups = int(block['groups'])
group_id = int(block['group_id'])
_, b, _, _ = outputs[layers[0]].shape
x = outputs[layers[0]][:, b // groups * group_id:b // groups * (group_id + 1)]
outputs[ind] = x
elif len(layers) == 2:
x1 = outputs[layers[0]]
x2 = outputs[layers[1]]
x = torch.cat((x1, x2), 1)
outputs[ind] = x
elif len(layers) == 4:
x1 = outputs[layers[0]]
x2 = outputs[layers[1]]
x3 = outputs[layers[2]]
x4 = outputs[layers[3]]
x = torch.cat((x1, x2, x3, x4), 1)
outputs[ind] = x
else:
print("rounte number > 2 ,is {}".format(len(layers)))
elif block['type'] == 'shortcut':
from_layer = int(block['from'])
activation = block['activation']
from_layer = from_layer if from_layer > 0 else from_layer + ind
x1 = outputs[from_layer]
x2 = outputs[ind - 1]
x = x1 + x2
if activation == 'leaky':
x = F.leaky_relu(x, 0.1, inplace=True)
elif activation == 'relu':
x = F.relu(x, inplace=True)
outputs[ind] = x
elif block['type'] == 'region':
continue
if self.loss:
self.loss = self.loss + self.models[ind](x)
else:
self.loss = self.models[ind](x)
outputs[ind] = None
elif block['type'] == 'yolo':
# if self.training:
# pass
# else:
# boxes = self.models[ind](x)
# out_boxes.append(boxes)
boxes = self.models[ind](x)
out_boxes.append(boxes)
elif block['type'] == 'cost':
continue
else:
print('unknown type %s' % (block['type']))
if self.training:
return out_boxes
else:
return get_region_boxes(out_boxes)
def print_network(self):
print_cfg(self.blocks)
def create_network(self, blocks):
models = nn.ModuleList()
prev_filters = 3
out_filters = []
prev_stride = 1
out_strides = []
conv_id = 0
for block in blocks:
if block['type'] == 'net':
prev_filters = int(block['channels'])
continue
elif block['type'] == 'convolutional':
conv_id = conv_id + 1
batch_normalize = int(block['batch_normalize'])
filters = int(block['filters'])
kernel_size = int(block['size'])
stride = int(block['stride'])
is_pad = int(block['pad'])
pad = (kernel_size - 1) // 2 if is_pad else 0
activation = block['activation']
model = nn.Sequential()
if batch_normalize:
model.add_module('conv{0}'.format(conv_id),
nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
# model.add_module('bn{0}'.format(conv_id), BN2d(filters))
else:
model.add_module('conv{0}'.format(conv_id),
nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
if activation == 'leaky':
model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
elif activation == 'relu':
model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
elif activation == 'mish':
model.add_module('mish{0}'.format(conv_id), Mish())
else:
pass
# print("convalution havn't activate {}".format(activation))
prev_filters = filters
out_filters.append(prev_filters)
prev_stride = stride * prev_stride
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'maxpool':
pool_size = int(block['size'])
stride = int(block['stride'])
if stride == 1 and pool_size % 2:
# You can use Maxpooldark instead, here is convenient to convert onnx.
# Example: [maxpool] size=3 stride=1
model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=pool_size // 2)
elif stride == pool_size:
# You can use Maxpooldark instead, here is convenient to convert onnx.
# Example: [maxpool] size=2 stride=2
model = nn.MaxPool2d(kernel_size=pool_size, stride=stride, padding=0)
else:
model = MaxPoolDark(pool_size, stride)
out_filters.append(prev_filters)
prev_stride = stride * prev_stride
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'avgpool':
model = GlobalAvgPool2d()
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'softmax':
model = nn.Softmax()
out_strides.append(prev_stride)
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'cost':
if block['_type'] == 'sse':
model = nn.MSELoss(reduction='mean')
elif block['_type'] == 'L1':
model = nn.L1Loss(reduction='mean')
elif block['_type'] == 'smooth':
model = nn.SmoothL1Loss(reduction='mean')
out_filters.append(1)
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'reorg':
stride = int(block['stride'])
prev_filters = stride * stride * prev_filters
out_filters.append(prev_filters)
prev_stride = prev_stride * stride
out_strides.append(prev_stride)
models.append(Reorg(stride))
elif block['type'] == 'upsample':
stride = int(block['stride'])
out_filters.append(prev_filters)
prev_stride = prev_stride // stride
out_strides.append(prev_stride)
models.append(Upsample_expand(stride))
# models.append(Upsample_interpolate(stride))
elif block['type'] == 'route':
layers = block['layers'].split(',')
ind = len(models)
layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
if len(layers) == 1:
if 'groups' not in block.keys() or int(block['groups']) == 1:
prev_filters = out_filters[layers[0]]
prev_stride = out_strides[layers[0]]
else:
prev_filters = out_filters[layers[0]] // int(block['groups'])
prev_stride = out_strides[layers[0]] // int(block['groups'])
elif len(layers) == 2:
assert (layers[0] == ind - 1 or layers[1] == ind - 1)
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
prev_stride = out_strides[layers[0]]
elif len(layers) == 4:
assert (layers[0] == ind - 1)
prev_filters = out_filters[layers[0]] + out_filters[layers[1]] + out_filters[layers[2]] + \
out_filters[layers[3]]
prev_stride = out_strides[layers[0]]
else:
print("route error!!!")
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(EmptyModule())
elif block['type'] == 'shortcut':
ind = len(models)
prev_filters = out_filters[ind - 1]
out_filters.append(prev_filters)
prev_stride = out_strides[ind - 1]
out_strides.append(prev_stride)
models.append(EmptyModule())
elif block['type'] == 'connected':
filters = int(block['output'])
if block['activation'] == 'linear':
model = nn.Linear(prev_filters, filters)
elif block['activation'] == 'leaky':
model = nn.Sequential(
nn.Linear(prev_filters, filters),
nn.LeakyReLU(0.1, inplace=True))
elif block['activation'] == 'relu':
model = nn.Sequential(
nn.Linear(prev_filters, filters),
nn.ReLU(inplace=True))
prev_filters = filters
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(model)
elif block['type'] == 'region':
loss = RegionLoss()
anchors = block['anchors'].split(',')
loss.anchors = [float(i) for i in anchors]
loss.num_classes = int(block['classes'])
loss.num_anchors = int(block['num'])
loss.anchor_step = len(loss.anchors) // loss.num_anchors
loss.object_scale = float(block['object_scale'])
loss.noobject_scale = float(block['noobject_scale'])
loss.class_scale = float(block['class_scale'])
loss.coord_scale = float(block['coord_scale'])
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(loss)
elif block['type'] == 'yolo':
yolo_layer = YoloLayer()
anchors = block['anchors'].split(',')
anchor_mask = block['mask'].split(',')
yolo_layer.anchor_mask = [int(i) for i in anchor_mask]
yolo_layer.anchors = [float(i) for i in anchors]
yolo_layer.num_classes = int(block['classes'])
self.num_classes = yolo_layer.num_classes
yolo_layer.num_anchors = int(block['num'])
yolo_layer.anchor_step = len(yolo_layer.anchors) // yolo_layer.num_anchors
yolo_layer.stride = prev_stride
yolo_layer.scale_x_y = float(block['scale_x_y'])
# yolo_layer.object_scale = float(block['object_scale'])
# yolo_layer.noobject_scale = float(block['noobject_scale'])
# yolo_layer.class_scale = float(block['class_scale'])
# yolo_layer.coord_scale = float(block['coord_scale'])
out_filters.append(prev_filters)
out_strides.append(prev_stride)
models.append(yolo_layer)
else:
print('unknown type %s' % (block['type']))
return models
def load_weights(self, weightfile):
fp = open(weightfile, 'rb')
header = np.fromfile(fp, count=5, dtype=np.int32)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
buf = np.fromfile(fp, dtype=np.float32)
fp.close()
start = 0
ind = -2
for block in self.blocks:
if start >= buf.size:
break
ind = ind + 1
if block['type'] == 'net':
continue
elif block['type'] == 'convolutional':
model = self.models[ind]
batch_normalize = int(block['batch_normalize'])
if batch_normalize:
start = load_conv_bn(buf, start, model[0], model[1])
else:
start = load_conv(buf, start, model[0])
elif block['type'] == 'connected':
model = self.models[ind]
if block['activation'] != 'linear':
start = load_fc(buf, start, model[0])
else:
start = load_fc(buf, start, model)
elif block['type'] == 'maxpool':
pass
elif block['type'] == 'reorg':
pass
elif block['type'] == 'upsample':
pass
elif block['type'] == 'route':
pass
elif block['type'] == 'shortcut':
pass
elif block['type'] == 'region':
pass
elif block['type'] == 'yolo':
pass
elif block['type'] == 'avgpool':
pass
elif block['type'] == 'softmax':
pass
elif block['type'] == 'cost':
pass
else:
print('unknown type %s' % (block['type']))
# def save_weights(self, outfile, cutoff=0):
# if cutoff <= 0:
# cutoff = len(self.blocks) - 1
#
# fp = open(outfile, 'wb')
# self.header[3] = self.seen
# header = self.header
# header.numpy().tofile(fp)
#
# ind = -1
# for blockId in range(1, cutoff + 1):
# ind = ind + 1
# block = self.blocks[blockId]
# if block['type'] == 'convolutional':
# model = self.models[ind]
# batch_normalize = int(block['batch_normalize'])
# if batch_normalize:
# save_conv_bn(fp, model[0], model[1])
# else:
# save_conv(fp, model[0])
# elif block['type'] == 'connected':
# model = self.models[ind]
# if block['activation'] != 'linear':
# save_fc(fc, model)
# else:
# save_fc(fc, model[0])
# elif block['type'] == 'maxpool':
# pass
# elif block['type'] == 'reorg':
# pass
# elif block['type'] == 'upsample':
# pass
# elif block['type'] == 'route':
# pass
# elif block['type'] == 'shortcut':
# pass
# elif block['type'] == 'region':
# pass
# elif block['type'] == 'yolo':
# pass
# elif block['type'] == 'avgpool':
# pass
# elif block['type'] == 'softmax':
# pass
# elif block['type'] == 'cost':
# pass
# else:
# print('unknown type %s' % (block['type']))
# fp.close()

View File

@ -0,0 +1,195 @@
import torch.nn as nn
import torch.nn.functional as F
from .torch_utils import *
def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
sil_thresh, seen):
nB = target.size(0)
nA = num_anchors
nC = num_classes
anchor_step = len(anchors) / num_anchors
conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
coord_mask = torch.zeros(nB, nA, nH, nW)
cls_mask = torch.zeros(nB, nA, nH, nW)
tx = torch.zeros(nB, nA, nH, nW)
ty = torch.zeros(nB, nA, nH, nW)
tw = torch.zeros(nB, nA, nH, nW)
th = torch.zeros(nB, nA, nH, nW)
tconf = torch.zeros(nB, nA, nH, nW)
tcls = torch.zeros(nB, nA, nH, nW)
nAnchors = nA * nH * nW
nPixels = nH * nW
for b in range(nB):
cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
cur_ious = torch.zeros(nAnchors)
for t in range(50):
if target[b][t * 5 + 1] == 0:
break
gx = target[b][t * 5 + 1] * nW
gy = target[b][t * 5 + 2] * nH
gw = target[b][t * 5 + 3] * nW
gh = target[b][t * 5 + 4] * nH
cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
conf_mask[b][cur_ious > sil_thresh] = 0
if seen < 12800:
if anchor_step == 4:
tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
1).repeat(
nB, 1, nH, nW)
ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
1, nA, 1, 1).repeat(nB, 1, nH, nW)
else:
tx.fill_(0.5)
ty.fill_(0.5)
tw.zero_()
th.zero_()
coord_mask.fill_(1)
nGT = 0
nCorrect = 0
for b in range(nB):
for t in range(50):
if target[b][t * 5 + 1] == 0:
break
nGT = nGT + 1
best_iou = 0.0
best_n = -1
min_dist = 10000
gx = target[b][t * 5 + 1] * nW
gy = target[b][t * 5 + 2] * nH
gi = int(gx)
gj = int(gy)
gw = target[b][t * 5 + 3] * nW
gh = target[b][t * 5 + 4] * nH
gt_box = [0, 0, gw, gh]
for n in range(nA):
aw = anchors[anchor_step * n]
ah = anchors[anchor_step * n + 1]
anchor_box = [0, 0, aw, ah]
iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
if anchor_step == 4:
ax = anchors[anchor_step * n + 2]
ay = anchors[anchor_step * n + 3]
dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
if iou > best_iou:
best_iou = iou
best_n = n
elif anchor_step == 4 and iou == best_iou and dist < min_dist:
best_iou = iou
best_n = n
min_dist = dist
gt_box = [gx, gy, gw, gh]
pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
coord_mask[b][best_n][gj][gi] = 1
cls_mask[b][best_n][gj][gi] = 1
conf_mask[b][best_n][gj][gi] = object_scale
tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
tconf[b][best_n][gj][gi] = iou
tcls[b][best_n][gj][gi] = target[b][t * 5]
if iou > 0.5:
nCorrect = nCorrect + 1
return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) / num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0
def forward(self, output, target):
# output : BxAs*(4+1+num_classes)*H*W
t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3)
output = output.view(nB, nA, (5 + nC), nH, nW)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
t1 = time.time()
pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
pred_boxes[0] = x.data + grid_x
pred_boxes[1] = y.data + grid_y
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
t2 = time.time()
nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
target.data,
self.anchors, nA,
nC, \
nH, nW,
self.noobject_scale,
self.object_scale,
self.thresh,
self.seen)
cls_mask = (cls_mask == 1)
nProposals = int((conf > 0.25).sum().data[0])
tx = Variable(tx.cuda())
ty = Variable(ty.cuda())
tw = Variable(tw.cuda())
th = Variable(th.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
cls = cls[cls_mask].view(-1, nC)
t3 = time.time()
loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
return loss

View File

@ -0,0 +1,98 @@
import sys
import os
import time
import math
import torch
import numpy as np
from torch.autograd import Variable
def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
if x1y1x2y2:
mx = torch.min(boxes1[0], boxes2[0])
Mx = torch.max(boxes1[2], boxes2[2])
my = torch.min(boxes1[1], boxes2[1])
My = torch.max(boxes1[3], boxes2[3])
w1 = boxes1[2] - boxes1[0]
h1 = boxes1[3] - boxes1[1]
w2 = boxes2[2] - boxes2[0]
h2 = boxes2[3] - boxes2[1]
else:
mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
w1 = boxes1[2]
h1 = boxes1[3]
w2 = boxes2[2]
h2 = boxes2[3]
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
mask = ((cw <= 0) + (ch <= 0) > 0)
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
carea[mask] = 0
uarea = area1 + area2 - carea
return carea / uarea
def get_region_boxes(boxes_and_confs):
# print('Getting boxes from boxes and confs ...')
boxes_list = []
confs_list = []
for item in boxes_and_confs:
boxes_list.append(item[0])
confs_list.append(item[1])
# boxes: [batch, num1 + num2 + num3, 1, 4]
# confs: [batch, num1 + num2 + num3, num_classes]
boxes = torch.cat(boxes_list, dim=1)
confs = torch.cat(confs_list, dim=1)
return [boxes, confs]
def convert2cpu(gpu_matrix):
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
def convert2cpu_long(gpu_matrix):
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
model.eval()
t0 = time.time()
if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image
img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
elif type(img) == np.ndarray and len(img.shape) == 4:
img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
else:
print("unknow image type")
exit(-1)
if use_cuda:
img = img.cuda()
img = torch.autograd.Variable(img)
t1 = time.time()
output = model(img)
t2 = time.time()
print('-----------------------------------')
print(' Preprocess : %f' % (t1 - t0))
print(' Model Inference : %f' % (t2 - t1))
print('-----------------------------------')
return utils.post_processing(img, conf_thresh, nms_thresh, output)

View File

@ -0,0 +1,221 @@
import sys
import os
import time
import math
import numpy as np
import itertools
import struct # get_image_size
import imghdr # get_image_size
def sigmoid(x):
return 1.0 / (np.exp(-x) + 1.)
def softmax(x):
x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
x = x / np.expand_dims(x.sum(axis=1), axis=1)
return x
def bbox_iou(box1, box2, x1y1x2y2=True):
# print('iou box1:', box1)
# print('iou box2:', box2)
if x1y1x2y2:
mx = min(box1[0], box2[0])
Mx = max(box1[2], box2[2])
my = min(box1[1], box2[1])
My = max(box1[3], box2[3])
w1 = box1[2] - box1[0]
h1 = box1[3] - box1[1]
w2 = box2[2] - box2[0]
h2 = box2[3] - box2[1]
else:
w1 = box1[2]
h1 = box1[3]
w2 = box2[2]
h2 = box2[3]
mx = min(box1[0], box2[0])
Mx = max(box1[0] + w1, box2[0] + w2)
my = min(box1[1], box2[1])
My = max(box1[1] + h1, box2[1] + h2)
uw = Mx - mx
uh = My - my
cw = w1 + w2 - uw
ch = h1 + h2 - uh
carea = 0
if cw <= 0 or ch <= 0:
return 0.0
area1 = w1 * h1
area2 = w2 * h2
carea = cw * ch
uarea = area1 + area2 - carea
return carea / uarea
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]
keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]
keep.append(idx_self)
xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]
return np.array(keep)
def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
import cv2
img = np.copy(img)
colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
def get_color(c, x, max_val):
ratio = float(x) / max_val * 5
i = int(math.floor(ratio))
j = int(math.ceil(ratio))
ratio = ratio - i
r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
return int(r * 255)
width = img.shape[1]
height = img.shape[0]
for i in range(len(boxes)):
box = boxes[i]
x1 = int(box[0] * width)
y1 = int(box[1] * height)
x2 = int(box[2] * width)
y2 = int(box[3] * height)
if color:
rgb = color
else:
rgb = (255, 0, 0)
if len(box) >= 7 and class_names:
cls_conf = box[5]
cls_id = box[6]
print('%s: %f' % (class_names[cls_id], cls_conf))
classes = len(class_names)
offset = cls_id * 123457 % classes
red = get_color(2, offset, classes)
green = get_color(1, offset, classes)
blue = get_color(0, offset, classes)
if color is None:
rgb = (red, green, blue)
img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
if savename:
print("save plot results to %s" % savename)
cv2.imwrite(savename, img)
return img
def read_truths(lab_path):
if not os.path.exists(lab_path):
return np.array([])
if os.path.getsize(lab_path):
truths = np.loadtxt(lab_path)
truths = truths.reshape(truths.size / 5, 5) # to avoid single truth problem
return truths
else:
return np.array([])
def post_processing(img, conf_thresh, nms_thresh, output):
# anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
# num_anchors = 9
# anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
# strides = [8, 16, 32]
# anchor_step = len(anchors) // num_anchors
# [batch, num, 1, 4]
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]
t1 = time.time()
if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()
num_classes = confs.shape[2]
# [batch, num, 4]
box_array = box_array[:, :, 0]
# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)
t2 = time.time()
bboxes_batch = []
for i in range(box_array.shape[0]):
argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]
bboxes = []
# nms for each class
for j in range(num_classes):
cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]
for k in range(ll_box_array.shape[0]):
bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
bboxes_batch.append(bboxes)
t3 = time.time()
print('-----------------------------------')
print(' max and argmax : %f' % (t2 - t1))
print(' nms : %f' % (t3 - t2))
print('Post processing total : %f' % (t3 - t1))
print('-----------------------------------')
return bboxes_batch

View File

@ -0,0 +1,161 @@
'''
@ Date: 2020-12-10 16:39:51
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2022-04-21 23:53:40
@ FilePath: /EasyMocapPublic/easymocap/estimator/YOLOv4/yolo.py
'''
from .darknet2pytorch import Darknet
import cv2
import torch
from os.path import join
import os
import numpy as np
def load_class_names(namesfile):
class_names = []
with open(namesfile, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.rstrip()
class_names.append(line)
return class_names
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]
keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]
keep.append(idx_self)
xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]
return np.array(keep)
def post_processing(conf_thresh, nms_thresh, output):
# [batch, num, 1, 4]
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]
if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()
num_classes = confs.shape[2]
# [batch, num, 4]
box_array = box_array[:, :, 0]
# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)
bboxes_batch = []
for i in range(box_array.shape[0]):
argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]
bboxes = []
# nms for class person
j = 0
cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]
bboxes = np.hstack([ll_box_array, ll_max_conf[:, None]])
bboxes_batch.append(bboxes)
return bboxes_batch
class YOLOv4:
def __init__(self, device, ckpt_path, box_nms_thres, conf_thres,
isWild=False) -> None:
dirname = os.path.dirname(__file__)
cfgfile = join(dirname, 'yolov4.cfg')
namesfile = join(dirname, 'coco.names')
self.model = Darknet(cfgfile)
self.model.load_weights(ckpt_path)
self.model.to(device)
self.model.eval()
class_names = load_class_names(namesfile)
self.device = device
self.box_nms_thres = box_nms_thres
self.conf_thres = conf_thres
self.isWild = isWild
def predict_single(self, image):
width = image.shape[1]
height = image.shape[0]
tgt_width = self.model.width
# 先缩小再padding
if width > height:
tgt_shape = (tgt_width, int(height/width*tgt_width))
resize = cv2.resize(image, tgt_shape)
sized = np.zeros((tgt_width, tgt_width, 3), dtype=np.uint8)
start = (sized.shape[0] - resize.shape[0])//2
sized[start:start+resize.shape[0], :, :] = resize
# pad_to_square
elif width == height:
sized = cv2.resize(image, (tgt_width, tgt_width))
start = 0
else:
tgt_shape = (int(width/height*tgt_width), tgt_width)
resize = cv2.resize(image, tgt_shape)
sized = np.zeros((tgt_width, tgt_width, 3), dtype=np.uint8)
start = (sized.shape[1] - resize.shape[1]) // 2
sized[:, start:start+resize.shape[1], :] = resize
img = torch.from_numpy(sized.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
img = img.to(self.device)
with torch.no_grad():
output = self.model(img)
bboxes = post_processing(self.conf_thres, self.box_nms_thres, output)[0]
if len(bboxes) == 0:
return bboxes
if self.isWild:
flag = ((bboxes[:, 2] - bboxes[:, 0]) < 0.8)&(((bboxes[:, 2] - bboxes[:, 0]) > 0.1)|((bboxes[:, 3] - bboxes[:, 1]) > 0.1))
bboxes = bboxes[flag]
if width >= height:
bboxes[:, :4] *= width
bboxes[:, 1] -= start*width/tgt_width
bboxes[:, 3] -= start*width/tgt_width
else:
bboxes[:, :4] *= height
bboxes[:, 0] -= start*height/tgt_width
bboxes[:, 2] -= start*height/tgt_width
# return bounding box
return bboxes

View File

@ -0,0 +1,322 @@
import torch.nn as nn
import torch.nn.functional as F
from .torch_utils import *
def yolo_forward(output, conf_thresh, num_classes, anchors, num_anchors, scale_x_y, only_objectness=1,
validation=False):
# Output would be invalid if it does not satisfy this assert
# assert (output.size(1) == (5 + num_classes) * num_anchors)
# print(output.size())
# Slice the second dimension (channel) of output into:
# [ 2, 2, 1, num_classes, 2, 2, 1, num_classes, 2, 2, 1, num_classes ]
# And then into
# bxy = [ 6 ] bwh = [ 6 ] det_conf = [ 3 ] cls_conf = [ num_classes * 3 ]
batch = output.size(0)
H = output.size(2)
W = output.size(3)
bxy_list = []
bwh_list = []
det_confs_list = []
cls_confs_list = []
for i in range(num_anchors):
begin = i * (5 + num_classes)
end = (i + 1) * (5 + num_classes)
bxy_list.append(output[:, begin : begin + 2])
bwh_list.append(output[:, begin + 2 : begin + 4])
det_confs_list.append(output[:, begin + 4 : begin + 5])
cls_confs_list.append(output[:, begin + 5 : end])
# Shape: [batch, num_anchors * 2, H, W]
bxy = torch.cat(bxy_list, dim=1)
# Shape: [batch, num_anchors * 2, H, W]
bwh = torch.cat(bwh_list, dim=1)
# Shape: [batch, num_anchors, H, W]
det_confs = torch.cat(det_confs_list, dim=1)
# Shape: [batch, num_anchors * H * W]
det_confs = det_confs.view(batch, num_anchors * H * W)
# Shape: [batch, num_anchors * num_classes, H, W]
cls_confs = torch.cat(cls_confs_list, dim=1)
# Shape: [batch, num_anchors, num_classes, H * W]
cls_confs = cls_confs.view(batch, num_anchors, num_classes, H * W)
# Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes]
cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(batch, num_anchors * H * W, num_classes)
# Apply sigmoid(), exp() and softmax() to slices
#
bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1)
bwh = torch.exp(bwh)
det_confs = torch.sigmoid(det_confs)
cls_confs = torch.sigmoid(cls_confs)
# Prepare C-x, C-y, P-w, P-h (None of them are torch related)
grid_x = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, W - 1, W), axis=0).repeat(H, 0), axis=0), axis=0)
grid_y = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, H - 1, H), axis=1).repeat(W, 1), axis=0), axis=0)
# grid_x = torch.linspace(0, W - 1, W).reshape(1, 1, 1, W).repeat(1, 1, H, 1)
# grid_y = torch.linspace(0, H - 1, H).reshape(1, 1, H, 1).repeat(1, 1, 1, W)
anchor_w = []
anchor_h = []
for i in range(num_anchors):
anchor_w.append(anchors[i * 2])
anchor_h.append(anchors[i * 2 + 1])
device = None
cuda_check = output.is_cuda
if cuda_check:
device = output.get_device()
bx_list = []
by_list = []
bw_list = []
bh_list = []
# Apply C-x, C-y, P-w, P-h
for i in range(num_anchors):
ii = i * 2
# Shape: [batch, 1, H, W]
bx = bxy[:, ii : ii + 1] + torch.tensor(grid_x, device=device, dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
by = bxy[:, ii + 1 : ii + 2] + torch.tensor(grid_y, device=device, dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
bw = bwh[:, ii : ii + 1] * anchor_w[i]
# Shape: [batch, 1, H, W]
bh = bwh[:, ii + 1 : ii + 2] * anchor_h[i]
bx_list.append(bx)
by_list.append(by)
bw_list.append(bw)
bh_list.append(bh)
########################################
# Figure out bboxes from slices #
########################################
# Shape: [batch, num_anchors, H, W]
bx = torch.cat(bx_list, dim=1)
# Shape: [batch, num_anchors, H, W]
by = torch.cat(by_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bw = torch.cat(bw_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bh = torch.cat(bh_list, dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
bx_bw = torch.cat((bx, bw), dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
by_bh = torch.cat((by, bh), dim=1)
# normalize coordinates to [0, 1]
bx_bw /= W
by_bh /= H
# Shape: [batch, num_anchors * H * W, 1]
bx = bx_bw[:, :num_anchors].view(batch, num_anchors * H * W, 1)
by = by_bh[:, :num_anchors].view(batch, num_anchors * H * W, 1)
bw = bx_bw[:, num_anchors:].view(batch, num_anchors * H * W, 1)
bh = by_bh[:, num_anchors:].view(batch, num_anchors * H * W, 1)
bx1 = bx - bw * 0.5
by1 = by - bh * 0.5
bx2 = bx1 + bw
by2 = by1 + bh
# Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4]
boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(batch, num_anchors * H * W, 1, 4)
# boxes = boxes.repeat(1, 1, num_classes, 1)
# boxes: [batch, num_anchors * H * W, 1, 4]
# cls_confs: [batch, num_anchors * H * W, num_classes]
# det_confs: [batch, num_anchors * H * W]
det_confs = det_confs.view(batch, num_anchors * H * W, 1)
confs = cls_confs * det_confs
# boxes: [batch, num_anchors * H * W, 1, 4]
# confs: [batch, num_anchors * H * W, num_classes]
return boxes, confs
def yolo_forward_dynamic(output, conf_thresh, num_classes, anchors, num_anchors, scale_x_y, only_objectness=1,
validation=False):
# Output would be invalid if it does not satisfy this assert
# assert (output.size(1) == (5 + num_classes) * num_anchors)
# print(output.size())
# Slice the second dimension (channel) of output into:
# [ 2, 2, 1, num_classes, 2, 2, 1, num_classes, 2, 2, 1, num_classes ]
# And then into
# bxy = [ 6 ] bwh = [ 6 ] det_conf = [ 3 ] cls_conf = [ num_classes * 3 ]
# batch = output.size(0)
# H = output.size(2)
# W = output.size(3)
bxy_list = []
bwh_list = []
det_confs_list = []
cls_confs_list = []
for i in range(num_anchors):
begin = i * (5 + num_classes)
end = (i + 1) * (5 + num_classes)
bxy_list.append(output[:, begin : begin + 2])
bwh_list.append(output[:, begin + 2 : begin + 4])
det_confs_list.append(output[:, begin + 4 : begin + 5])
cls_confs_list.append(output[:, begin + 5 : end])
# Shape: [batch, num_anchors * 2, H, W]
bxy = torch.cat(bxy_list, dim=1)
# Shape: [batch, num_anchors * 2, H, W]
bwh = torch.cat(bwh_list, dim=1)
# Shape: [batch, num_anchors, H, W]
det_confs = torch.cat(det_confs_list, dim=1)
# Shape: [batch, num_anchors * H * W]
det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3))
# Shape: [batch, num_anchors * num_classes, H, W]
cls_confs = torch.cat(cls_confs_list, dim=1)
# Shape: [batch, num_anchors, num_classes, H * W]
cls_confs = cls_confs.view(output.size(0), num_anchors, num_classes, output.size(2) * output.size(3))
# Shape: [batch, num_anchors, num_classes, H * W] --> [batch, num_anchors * H * W, num_classes]
cls_confs = cls_confs.permute(0, 1, 3, 2).reshape(output.size(0), num_anchors * output.size(2) * output.size(3), num_classes)
# Apply sigmoid(), exp() and softmax() to slices
#
bxy = torch.sigmoid(bxy) * scale_x_y - 0.5 * (scale_x_y - 1)
bwh = torch.exp(bwh)
det_confs = torch.sigmoid(det_confs)
cls_confs = torch.sigmoid(cls_confs)
# Prepare C-x, C-y, P-w, P-h (None of them are torch related)
grid_x = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, output.size(3) - 1, output.size(3)), axis=0).repeat(output.size(2), 0), axis=0), axis=0)
grid_y = np.expand_dims(np.expand_dims(np.expand_dims(np.linspace(0, output.size(2) - 1, output.size(2)), axis=1).repeat(output.size(3), 1), axis=0), axis=0)
# grid_x = torch.linspace(0, W - 1, W).reshape(1, 1, 1, W).repeat(1, 1, H, 1)
# grid_y = torch.linspace(0, H - 1, H).reshape(1, 1, H, 1).repeat(1, 1, 1, W)
anchor_w = []
anchor_h = []
for i in range(num_anchors):
anchor_w.append(anchors[i * 2])
anchor_h.append(anchors[i * 2 + 1])
device = None
cuda_check = output.is_cuda
if cuda_check:
device = output.get_device()
bx_list = []
by_list = []
bw_list = []
bh_list = []
# Apply C-x, C-y, P-w, P-h
for i in range(num_anchors):
ii = i * 2
# Shape: [batch, 1, H, W]
bx = bxy[:, ii : ii + 1] + torch.tensor(grid_x, device=device, dtype=torch.float32) # grid_x.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
by = bxy[:, ii + 1 : ii + 2] + torch.tensor(grid_y, device=device, dtype=torch.float32) # grid_y.to(device=device, dtype=torch.float32)
# Shape: [batch, 1, H, W]
bw = bwh[:, ii : ii + 1] * anchor_w[i]
# Shape: [batch, 1, H, W]
bh = bwh[:, ii + 1 : ii + 2] * anchor_h[i]
bx_list.append(bx)
by_list.append(by)
bw_list.append(bw)
bh_list.append(bh)
########################################
# Figure out bboxes from slices #
########################################
# Shape: [batch, num_anchors, H, W]
bx = torch.cat(bx_list, dim=1)
# Shape: [batch, num_anchors, H, W]
by = torch.cat(by_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bw = torch.cat(bw_list, dim=1)
# Shape: [batch, num_anchors, H, W]
bh = torch.cat(bh_list, dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
bx_bw = torch.cat((bx, bw), dim=1)
# Shape: [batch, 2 * num_anchors, H, W]
by_bh = torch.cat((by, bh), dim=1)
# normalize coordinates to [0, 1]
bx_bw /= output.size(3)
by_bh /= output.size(2)
# Shape: [batch, num_anchors * H * W, 1]
bx = bx_bw[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
by = by_bh[:, :num_anchors].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
bw = bx_bw[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
bh = by_bh[:, num_anchors:].view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
bx1 = bx - bw * 0.5
by1 = by - bh * 0.5
bx2 = bx1 + bw
by2 = by1 + bh
# Shape: [batch, num_anchors * h * w, 4] -> [batch, num_anchors * h * w, 1, 4]
boxes = torch.cat((bx1, by1, bx2, by2), dim=2).view(output.size(0), num_anchors * output.size(2) * output.size(3), 1, 4)
# boxes = boxes.repeat(1, 1, num_classes, 1)
# boxes: [batch, num_anchors * H * W, 1, 4]
# cls_confs: [batch, num_anchors * H * W, num_classes]
# det_confs: [batch, num_anchors * H * W]
det_confs = det_confs.view(output.size(0), num_anchors * output.size(2) * output.size(3), 1)
confs = cls_confs * det_confs
# boxes: [batch, num_anchors * H * W, 1, 4]
# confs: [batch, num_anchors * H * W, num_classes]
return boxes, confs
class YoloLayer(nn.Module):
''' Yolo layer
model_out: while inference,is post-processing inside or outside the model
true:outside
'''
def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, stride=32, model_out=False):
super(YoloLayer, self).__init__()
self.anchor_mask = anchor_mask
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) // num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.stride = stride
self.seen = 0
self.scale_x_y = 1
self.model_out = model_out
def forward(self, output, target=None):
if self.training:
return output
masked_anchors = []
for m in self.anchor_mask:
masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step]
masked_anchors = [anchor / self.stride for anchor in masked_anchors]
return yolo_forward_dynamic(output, self.thresh, self.num_classes, masked_anchors, len(self.anchor_mask),scale_x_y=self.scale_x_y)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
from ..annotator.file_utils import read_json
from .wrapper_base import check_result, create_annot_file, save_annot
from glob import glob
from os.path import join
from tqdm import tqdm
import os
import cv2
import numpy as np
def detect_frame(detector, img, pid=0, only_bbox=False):
lDetections = detector.detect([img], only_bbox=only_bbox)[0]
annots = []
for i in range(len(lDetections)):
annot = {
'bbox': [float(d) for d in lDetections[i]['bbox']],
'personID': pid + i,
'isKeyframe': False
}
if not only_bbox:
annot['keypoints'] = lDetections[i]['keypoints'].tolist()
annots.append(annot)
return annots
def extract_bbox(image_root, annot_root, ext, **config):
force = config.pop('force')
if check_result(image_root, annot_root) and not force:
return 0
import torch
from .YOLOv4 import YOLOv4
device = torch.device('cuda') \
if torch.cuda.is_available() else torch.device('cpu')
detector = YOLOv4(device=device, **config)
imgnames = sorted(glob(join(image_root, '*'+ext)))
if len(imgnames) == 0:
ext = '.png'
imgnames = sorted(glob(join(image_root, '*'+ext)))
# run_yolo(image_root, )
for imgname in tqdm(imgnames, desc='{:10s}'.format(os.path.basename(annot_root))):
base = os.path.basename(imgname).replace(ext, '')
annotname = join(annot_root, base+'.json')
annot = create_annot_file(annotname, imgname)
image = cv2.imread(imgname)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
detections = detector.predict_single(image_rgb)
annots = []
pid = 0
for i in range(len(detections)):
annot_ = {
'bbox': [float(d) for d in detections[i]],
'isKeyframe': False
}
annot_['area'] = max(annot_['bbox'][2] - annot_['bbox'][0], annot_['bbox'][3] - annot_['bbox'][1])**2
annots.append(annot_)
annots.sort(key=lambda x:-x['area'])
# re-assign the person ID
for i in range(len(annots)):
annots[i]['personID'] = i + pid
annot['annots'] = annots
save_annot(annotname, annot)
def extract_hrnet(image_root, annot_root, ext, **config):
config.pop('force')
import torch
imgnames = sorted(glob(join(image_root, '*'+ext)))
import torch
device = torch.device('cuda') \
if torch.cuda.is_available() else torch.device('cpu')
from .HRNet import SimpleHRNet
estimator = SimpleHRNet(device=device, **config)
for imgname in tqdm(imgnames, desc='{:10s}'.format(os.path.basename(annot_root))):
base = os.path.basename(imgname).replace(ext, '')
annotname = join(annot_root, base+'.json')
annots = read_json(annotname)
detections = np.array([data['bbox'] for data in annots['annots']])
image = cv2.imread(imgname)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
points2d = estimator.predict(image_rgb, detections)
for i in range(detections.shape[0]):
annot_ = annots['annots'][i]
annot_['keypoints'] = points2d[i]
save_annot(annotname, annots)
def extract_yolo_hrnet(image_root, annot_root, ext, config_yolo, config_hrnet):
config_yolo.pop('ext', None)
imgnames = sorted(glob(join(image_root, '*{}'.format(ext))))
import torch
device = torch.device('cuda')
from .YOLOv4 import YOLOv4
device = torch.device('cuda') \
if torch.cuda.is_available() else torch.device('cpu')
detector = YOLOv4(device=device, **config_yolo)
from .HRNet import SimpleHRNet
estimator = SimpleHRNet(device=device, **config_hrnet)
for nf, imgname in enumerate(tqdm(imgnames, desc=os.path.basename(image_root))):
base = os.path.basename(imgname).replace(ext, '')
annotname = join(annot_root, base+'.json')
annot = create_annot_file(annotname, imgname)
img0 = cv2.imread(imgname)
annot = create_annot_file(annotname, imgname)
image = cv2.imread(imgname)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
detections = detector.predict_single(image_rgb)
# forward_hrnet
points2d = estimator.predict(image_rgb, detections)
annots = []
pid = 0
for i in range(len(detections)):
annot_ = {
'bbox': [float(d) for d in detections[i]],
'keypoints': points2d[i],
'isKeyframe': False
}
annot_['area'] = max(annot_['bbox'][2] - annot_['bbox'][0], annot_['bbox'][3] - annot_['bbox'][1])**2
annots.append(annot_)
annots.sort(key=lambda x:-x['area'])
# re-assign the person ID
for i in range(len(annots)):
annots[i]['personID'] = i + pid
annot['annots'] = annots
save_annot(annotname, annot)

View File

@ -0,0 +1,184 @@
'''
@ Date: 2021-06-14 15:39:26
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2022-08-02 21:50:40
@ FilePath: /EasyMocapPublic/scripts/preprocess/copy_dataset.py
'''
import os
from os.path import join
import shutil
from tqdm import tqdm
from glob import glob
import cv2
from easymocap.mytools.debug_utils import myerror, mywarn
mkdir = lambda x:os.makedirs(x, exist_ok=True)
import json
def save_json(file, data):
if not os.path.exists(os.path.dirname(file)):
os.makedirs(os.path.dirname(file))
with open(file, 'w') as f:
json.dump(data, f, indent=4)
def read_json(path):
with open(path) as f:
data = json.load(f)
return data
def copy_dataset(inp, out, start, end, step, keys, args):
copy_keys = {
'images': args.ext,
'annots': '.json',
'mask-schp': '.png',
}
copy_share_keys = {
'output-keypoints3d/keypoints3d': '.json'
}
mkdir(out)
if os.path.exists(join(inp, 'intri.yml')):
shutil.copyfile(join(inp, 'intri.yml'), join(out, 'intri.yml'))
shutil.copyfile(join(inp, 'extri.yml'), join(out, 'extri.yml'))
if os.path.exists(join(inp, 'match_name.json')):
names = read_json(join(inp, 'match_name.json'))
names = names[start:end:step]
save_json(join(out, 'match_name.json'), names)
if os.path.exists(join(inp, 'sync_time.txt')):
import numpy as np
times = np.loadtxt(join(inp, 'sync_time.txt'))
times = times.reshape(times.shape[0], -1)
times = times[:, start:end:step]
np.savetxt(join(out, 'sync_time.txt'), times, fmt='%10d')
os.system('touch ' + join(out, '{}-{}-{}'.format(start, end, step)))
for copy, ext in copy_share_keys.items():
if not os.path.exists(join(inp, copy)):
continue
if len(args.frames) == 0:
ranges = [i for i in range(start, end, step)]
else:
ranges = args.frames
outdir = join(out, copy)
if os.path.exists(outdir) and len(os.listdir(outdir)) == len(ranges):
pass
os.makedirs(outdir, exist_ok=True)
for nnf, nf in enumerate(tqdm(ranges, desc='{}'.format(copy))):
oldname = join(inp, copy, '{:06d}{}'.format(nf, ext))
if not os.path.exists(oldname):
mywarn('{} not exists'.format(oldname))
continue
newname = join(outdir, '{:06d}{}'.format(nnf, ext))
shutil.copyfile(oldname, newname)
for copy in keys:
ext = copy_keys.get(copy, '.json')
if not os.path.exists(join(inp, copy)):
continue
if len(args.subs) == 0:
subs = sorted(os.listdir(join(inp, copy)))
subs = [s for s in subs if os.path.isdir(join(inp, copy, s))]
else:
subs = args.subs
for sub in subs:
if not os.path.exists(join(inp, copy)):
continue
outdir = join(out, copy, sub.replace(args.strip, ''))
os.makedirs(outdir, exist_ok=True)
if args.end == -1:
oldnames = sorted(glob(join(inp, copy, sub, '*{}'.format(ext))))
end = len(oldnames)
print('{} has {} frames'.format(sub, end))
if args.sample == -1:
if len(args.frames) == 0:
ranges = [i for i in range(start, end, step)]
else:
ranges = args.frames
else:
ranges = [(i/args.sample)*(end-start-2*args.strip_frame)+start+args.strip_frame for i in range(args.sample)]
ranges = [int(i+0.5) for i in ranges]
if os.path.exists(outdir) and len(os.listdir(outdir)) == len(ranges):
mywarn('[copy] Skip {}'.format(outdir))
continue
for nnf, nf in enumerate(tqdm(ranges, desc='{}:{}'.format(sub, copy))):
oldname = join(inp, copy, sub, '{:06d}{}'.format(nf, ext))
if not os.path.exists(oldname):
oldnames = sorted(glob(join(inp, copy, sub, '{:06d}_*{}'.format(nf, ext))))
if len(oldnames) == 0:
myerror('{} not exists'.format(oldname))
import ipdb;ipdb.set_trace()
else:
for oldname in oldnames:
newname = join(outdir, os.path.basename(oldname).replace('{:06d}'.format(nf), '{:06d}'.format(nnf)))
shutil.copyfile(oldname, newname)
else:
newname = join(outdir, '{:06d}{}'.format(nnf, ext))
if copy == 'images' and args.scale != 1:
img = cv2.imread(oldname)
img = cv2.resize(img, None, fx=args.scale, fy=args.scale)
cv2.imwrite(newname, img)
else:
shutil.copyfile(oldname, newname)
# make videos
if copy == 'images' and args.make_video:
os.makedirs(join(out, 'videos'), exist_ok=True)
for sub in subs:
shell = '{} -y -i {}/images/{}/%06d{} -vcodec libx264 {}/videos/{}.mp4 -loglevel quiet'.format(
args.ffmpeg, out, sub, ext, out, sub
)
print(shell)
os.system(shell)
def export(root, out, keys):
mkdir(out)
for key in keys:
src = join(root, key)
dst = join(out, key)
if key == 'videos':
if os.path.exists(src):
shutil.copytree(src, dst)
else:
mkdir(dst)
subs = sorted(os.listdir(join(root, 'images')))
for sub in subs:
cmd = '{ffmpeg} -r {fps} -i {inp}/%06d.jpg -vcodec libx264 {out}'.format(
ffmpeg=args.ffmpeg, fps=50, inp=join(root, 'images', sub),
out=join(dst, sub+'.mp4')
)
os.system(cmd)
if not os.path.exists(src):
print(src)
continue
shutil.copytree(src, dst)
for name in ['intri.yml', 'extri.yml']:
if os.path.exists(join(root, name)):
shutil.copyfile(join(root, name), join(out, name))
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('path', type=str)
parser.add_argument('out', type=str)
parser.add_argument('--strip', type=str, default='')
parser.add_argument('--keys', type=str, nargs='+', default=['images', 'annots', 'chessboard'])
parser.add_argument('--subs', type=str, nargs='+', default=[])
parser.add_argument('--start', type=int, default=0)
parser.add_argument('--step', type=int, default=1)
parser.add_argument('--end', type=int, default=-1)
parser.add_argument('--scale', type=float, default=1)
parser.add_argument('--strip_frame', type=int, default=0,
help='remove the start frames and end frames')
parser.add_argument('--ffmpeg', type=str, default='ffmpeg')
parser.add_argument('--ext', type=str, default='.jpg')
parser.add_argument('--sample', type=int, default=-1,
help='use this flag to sample a fixed number of frames')
parser.add_argument('--frames', type=int, default=[], nargs='+')
parser.add_argument('--debug', action='store_true')
parser.add_argument('--make_video', action='store_true')
parser.add_argument('--export', action='store_true')
args = parser.parse_args()
if args.export:
export(args.path, args.out, args.keys)
else:
copy_dataset(args.path, args.out, start=args.start, end=args.end, step=args.step, keys=args.keys, args=args)