EasyMocap/easymocap/estimator/YOLOv4/yolo.py

161 lines
5.2 KiB
Python
Raw Normal View History

2022-08-22 00:07:46 +08:00
'''
@ Date: 2020-12-10 16:39:51
@ Author: Qing Shuai
@ LastEditors: Qing Shuai
@ LastEditTime: 2022-04-21 23:53:40
@ FilePath: /EasyMocapPublic/easymocap/estimator/YOLOv4/yolo.py
'''
from .darknet2pytorch import Darknet
import cv2
import torch
from os.path import join
import os
import numpy as np
def load_class_names(namesfile):
class_names = []
with open(namesfile, 'r') as fp:
lines = fp.readlines()
for line in lines:
line = line.rstrip()
class_names.append(line)
return class_names
def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]
keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]
keep.append(idx_self)
xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)
inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]
return np.array(keep)
def post_processing(conf_thresh, nms_thresh, output):
# [batch, num, 1, 4]
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]
if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()
num_classes = confs.shape[2]
# [batch, num, 4]
box_array = box_array[:, :, 0]
# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)
bboxes_batch = []
for i in range(box_array.shape[0]):
argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]
bboxes = []
# nms for class person
j = 0
cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]
keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]
bboxes = np.hstack([ll_box_array, ll_max_conf[:, None]])
bboxes_batch.append(bboxes)
return bboxes_batch
class YOLOv4:
def __init__(self, device, ckpt_path, box_nms_thres, conf_thres,
isWild=False) -> None:
dirname = os.path.dirname(__file__)
cfgfile = join(dirname, 'yolov4.cfg')
namesfile = join(dirname, 'coco.names')
self.model = Darknet(cfgfile)
self.model.load_weights(ckpt_path)
self.model.to(device)
self.model.eval()
class_names = load_class_names(namesfile)
self.device = device
self.box_nms_thres = box_nms_thres
self.conf_thres = conf_thres
self.isWild = isWild
def predict_single(self, image):
width = image.shape[1]
height = image.shape[0]
tgt_width = self.model.width
# 先缩小再padding
if width > height:
tgt_shape = (tgt_width, int(height/width*tgt_width))
resize = cv2.resize(image, tgt_shape)
sized = np.zeros((tgt_width, tgt_width, 3), dtype=np.uint8)
start = (sized.shape[0] - resize.shape[0])//2
sized[start:start+resize.shape[0], :, :] = resize
# pad_to_square
elif width == height:
sized = cv2.resize(image, (tgt_width, tgt_width))
start = 0
else:
tgt_shape = (int(width/height*tgt_width), tgt_width)
resize = cv2.resize(image, tgt_shape)
sized = np.zeros((tgt_width, tgt_width, 3), dtype=np.uint8)
start = (sized.shape[1] - resize.shape[1]) // 2
sized[:, start:start+resize.shape[1], :] = resize
img = torch.from_numpy(sized.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
img = img.to(self.device)
with torch.no_grad():
output = self.model(img)
bboxes = post_processing(self.conf_thres, self.box_nms_thres, output)[0]
if len(bboxes) == 0:
return bboxes
if self.isWild:
flag = ((bboxes[:, 2] - bboxes[:, 0]) < 0.8)&(((bboxes[:, 2] - bboxes[:, 0]) > 0.1)|((bboxes[:, 3] - bboxes[:, 1]) > 0.1))
bboxes = bboxes[flag]
if width >= height:
bboxes[:, :4] *= width
bboxes[:, 1] -= start*width/tgt_width
bboxes[:, 3] -= start*width/tgt_width
else:
bboxes[:, :4] *= height
bboxes[:, 0] -= start*height/tgt_width
bboxes[:, 2] -= start*height/tgt_width
# return bounding box
return bboxes