# Copyright (c) Facebook, Inc. and its affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. # import os import os.path as osp import torch import torch.nn as nn from torchvision.models.resnet import BasicBlock, Bottleneck from torchvision.models.resnet import model_urls from ..basetopdown import get_preds_from_heatmaps def make_conv_layers(feat_dims, kernel=3, stride=1, padding=1, bnrelu_final=True): layers = [] for i in range(len(feat_dims)-1): layers.append( nn.Conv2d( in_channels=feat_dims[i], out_channels=feat_dims[i+1], kernel_size=kernel, stride=stride, padding=padding )) # Do not use BN and ReLU for final estimation if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final): layers.append(nn.BatchNorm2d(feat_dims[i+1])) layers.append(nn.ReLU(inplace=True)) return nn.Sequential(*layers) def make_deconv_layers(feat_dims, bnrelu_final=True): layers = [] for i in range(len(feat_dims)-1): layers.append( nn.ConvTranspose2d( in_channels=feat_dims[i], out_channels=feat_dims[i+1], kernel_size=4, stride=2, padding=1, output_padding=0, bias=False)) # Do not use BN and ReLU for final estimation if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final): layers.append(nn.BatchNorm2d(feat_dims[i+1])) layers.append(nn.ReLU(inplace=True)) return nn.Sequential(*layers) class ResNetBackbone(nn.Module): def __init__(self, resnet_type): resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'), 34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'), 50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'), 101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'), 152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')} block, layers, channels, name = resnet_spec[resnet_type] self.name = name self.inplanes = 64 super(ResNetBackbone, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) # RGB self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.normal_(m.weight, mean=0, std=0.001) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def init_weights(self): org_resnet = torch.utils.model_zoo.load_url(model_urls[self.name]) # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error org_resnet.pop('fc.weight', None) org_resnet.pop('fc.bias', None) self.load_state_dict(org_resnet) print("Initialize resnet from model zoo") class ResNet_Deconv(nn.Module): def __init__(self): super().__init__() self.hm2d_size = 64 self.resnet = ResNetBackbone(50) self.deconv = make_deconv_layers([2048, 256, 256, 256]) self.conv_hm2d = make_conv_layers([256, 21],kernel=1,stride=1,padding=0,bnrelu_final=False) self.resnet.init_weights() self.deconv.apply(self.init_weights) self.conv_hm2d.apply(self.init_weights) @staticmethod def init_weights(m): if type(m) == nn.ConvTranspose2d: nn.init.normal_(m.weight,std=0.001) elif type(m) == nn.Conv2d: nn.init.normal_(m.weight,std=0.001) nn.init.constant_(m.bias, 0) elif type(m) == nn.BatchNorm2d: nn.init.constant_(m.weight,1) nn.init.constant_(m.bias,0) elif type(m) == nn.Linear: nn.init.normal_(m.weight,std=0.01) nn.init.constant_(m.bias,0) def forward(self, img): x_feat = self.resnet(img) x_feat = self.deconv(x_feat) x_hm2d = self.conv_hm2d(x_feat) pred = get_preds_from_heatmaps(x_hm2d.detach().cpu().numpy()) return { 'keypoints': pred }