Paddle复现RetinaFace详细解析
RetinaFace前向推理
分析主要分以下部分:
1,网络主干结构
2,网络的后处理
3, 网络前向推理
1,网络的主干结构复现
网络结构图如下:
这里复现部分做了精简,5层FPN删减为3层,主干为mobilinet
In [10]
# 专干网络所用的模块
# View dataset directory.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
def conv_bn(inp, oup, stride = 1, leaky = 0):
return nn.Sequential(
nn.Conv2D(inp, oup, 3, stride, 1, bias_attr=False),
nn.BatchNorm2D(oup),
nn.LeakyReLU(negative_slope=leaky)
)
def conv_bn_no_relu(inp, oup, stride):
return nn.Sequential(
nn.Conv2D(inp, oup, 3, stride, 1, bias_attr=False),
nn.BatchNorm2D(oup),
)
def conv_bn1X1(inp, oup, stride, leaky=0):
return nn.Sequential(
nn.Conv2D(inp, oup, 1, stride, padding=0, bias_attr=False),
nn.BatchNorm2D(oup),
nn.LeakyReLU(negative_slope=leaky)
)
def conv_dw(inp, oup, stride, leaky=0.1):
return nn.Sequential(
nn.Conv2D(inp, inp, 3, stride, 1, groups=inp, bias_attr=False),
nn.BatchNorm2D(inp),
nn.LeakyReLU(negative_slope=leaky),
nn.Conv2D(inp, oup, 1, 1, 0, bias_attr=False),
nn.BatchNorm2D(oup),
nn.LeakyReLU(negative_slope=leaky),
)
class SSH(nn.Layer):
def __init__(self, in_channel, out_channel):
super(SSH, self).__init__()
assert out_channel % 4 == 0
leaky = 0
if (out_channel <= 64):
leaky = 0.1
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
def forward(self, input):
conv3X3 = self.conv3X3(input)
conv5X5_1 = self.conv5X5_1(input)
conv5X5 = self.conv5X5_2(conv5X5_1)
conv7X7_2 = self.conv7X7_2(conv5X5_1)
conv7X7 = self.conv7x7_3(conv7X7_2)
out = paddle.concat([conv3X3, conv5X5, conv7X7], axis=1)
out = F.relu(out)
return out
class FPN(nn.Layer):
def __init__(self,in_channels_list,out_channels):
super(FPN,self).__init__()
leaky = 0
if (out_channels <= 64):
leaky = 0.1
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
def forward(self, input):
# names = list(input.keys())
#input = list(input.values())
input = list(input)
output1 = self.output1(input[0])
output2 = self.output2(input[1])
output3 = self.output3(input[2])
up3 = F.interpolate(output3, size=[output2.shape[2], output2.shape[3]], mode="nearest")
output2 = output2 + up3
output2 = self.merge2(output2)
up2 = F.interpolate(output2, size=[output1.shape[2], output1.shape[3]], mode="nearest")
output1 = output1 + up2
output1 = self.merge1(output1)
out = [output1, output2, output3]
return out
class MobileNetV1(nn.Layer):
def __init__(self):
super(MobileNetV1, self).__init__()
self.stage1 = nn.Sequential(
conv_bn(3, 8, 2, leaky = 0.1), # 3
conv_dw(8, 16, 1), # 7
conv_dw(16, 32, 2), # 11
conv_dw(32, 32, 1), # 19
conv_dw(32, 64, 2), # 27
conv_dw(64, 64, 1), # 43
)
self.stage2 = nn.Sequential(
conv_dw(64, 128, 2), # 43 + 16 = 59
conv_dw(128, 128, 1), # 59 + 32 = 91
conv_dw(128, 128, 1), # 91 + 32 = 123
conv_dw(128, 128, 1), # 123 + 32 = 155
conv_dw(128, 128, 1), # 155 + 32 = 187
conv_dw(128, 128, 1), # 187 + 32 = 219
)
self.stage3 = nn.Sequential(
conv_dw(128, 256, 2), # 219 +3 2 = 241
conv_dw(256, 256, 1), # 241 + 64 = 301
)
self.avg = nn.AdaptiveAvgPool2D((1,1))
self.fc = nn.Linear(256, 1000)
def forward(self, x):
x1 = self.stage1(x)
x2 = self.stage2(x1)
x3 = self.stage3(x2)
#x = self.avg(x)
# x = self.model(x)
#x = x.view(-1, 256)
#x = self.fc(x)
out = [x1,x2,x3]
return out
以上完成的是fpn,ssh等结构代码,修改后的MobileNetV1,前向传播完成以上部分,将得到三个输出,每个输出将再分别连接三个卷积得到分类,定位,关键点内容,实现如下:
In [11]
# 网络主干部分,前向推理
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class ClassHead(nn.Layer):
def __init__(self,inchannels=512,num_anchors=3):
super(ClassHead,self).__init__()
self.num_anchors = num_anchors
self.conv1x1 = nn.Conv2D(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.transpose([0,2,3,1])
return out.reshape([out.shape[0], -1, 2])
class BboxHead(nn.Layer):
def __init__(self,inchannels=512,num_anchors=3):
super(BboxHead,self).__init__()
self.conv1x1 = nn.Conv2D(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.transpose([0,2,3,1])
return out.reshape([out.shape[0], -1, 4])
class LandmarkHead(nn.Layer):
def __init__(self,inchannels=512,num_anchors=3):
super(LandmarkHead,self).__init__()
self.conv1x1 = nn.Conv2D(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
def forward(self,x):
out = self.conv1x1(x)
out = out.transpose([0,2,3,1])
return out.reshape([out.shape[0], -1, 10])
class RetinaFace(nn.Layer):
def __init__(self, cfg = None, phase = 'train'):
"""
:param cfg: Network related settings.
:param phase: train or test.
"""
super(RetinaFace,self).__init__()
self.phase = phase
backbone = None
if cfg['name'] == 'mobilenet0.25':
backbone = MobileNetV1()
if cfg['pretrain']:
checkpoint = paddle.load("./weights/mobilenetV1X0.25_pretrain.pdparams")
backbone.set_state_dict(checkpoint)
elif cfg['name'] == 'Resnet50':
import paddle.vision.models as models
backbone = models.resnet50(pretrained=cfg['pretrain'])
self.body = backbone
in_channels_stage2 = cfg['in_channel']
in_channels_list = [
in_channels_stage2 * 2,
in_channels_stage2 * 4,
in_channels_stage2 * 8,
]
out_channels = cfg['out_channel']
self.fpn = FPN(in_channels_list,out_channels)
self.ssh1 = SSH(out_channels, out_channels)
self.ssh2 = SSH(out_channels, out_channels)
self.ssh3 = SSH(out_channels, out_channels)
self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
classhead = nn.LayerList()
for i in range(fpn_num):
classhead.append(ClassHead(inchannels,anchor_num))
return classhead
def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
bboxhead = nn.LayerList()
for i in range(fpn_num):
bboxhead.append(BboxHead(inchannels,anchor_num))
return bboxhead
def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
landmarkhead = nn.LayerList()
for i in range(fpn_num):
landmarkhead.append(LandmarkHead(inchannels,anchor_num))
return landmarkhead
def forward(self,inputs):
out = self.body(inputs)
# FPN
fpn = self.fpn(out)
# SSH
feature1 = self.ssh1(fpn[0])
feature2 = self.ssh2(fpn[1])
feature3 = self.ssh3(fpn[2])
features = [feature1, feature2, feature3]
bbox_regressions = paddle.concat([self.BboxHead[i](feature) for i, feature in enumerate(features)], axis=1)
classifications = paddle.concat([self.ClassHead[i](feature) for i, feature in enumerate(features)],axis=1)
ldm_regressions = paddle.concat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], axis=1)
if self.phase == 'train':
output = (bbox_regressions, classifications, ldm_regressions)
else:
output = (bbox_regressions, F.softmax(classifications, axis=-1), ldm_regressions)
return output
cfg_mnet = {
'name': 'mobilenet0.25',
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
'loc_weight': 2.0,
'gpu_train': True,
'batch_size': 32,
'ngpu': 1,
'epoch': 250,
'decay1': 190,
'decay2': 220,
'image_size': 640,
'pretrain': True,
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
'in_channel': 32,
'out_channel': 64
}
##net = RetinaFace(cfg=cfg_mnet, phase = 'test')
#net.eval()
2,预选框生成与网络结果后处理
先生成anchors, 使用推理结果和anchors进行解码
In [12]
# 预选框生成, 如下方代码示例:
import paddle
from itertools import product as product
from math import ceil
class PriorBox(object):
def __init__(self, cfg, image_size=None, phase='train'):
super(PriorBox, self).__init__()
self.min_sizes = cfg['min_sizes']
self.steps = cfg['steps']
self.clip = cfg['clip']
self.image_size = image_size
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
self.name = "s"
def forward(self):
anchors = []
for k, f in enumerate(self.feature_maps):
min_sizes = self.min_sizes[k]
for i, j in product(range(f[0]), range(f[1])):
for min_size in min_sizes:
s_kx = min_size / self.image_size[1]
s_ky = min_size / self.image_size[0]
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
for cy, cx in product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]
# back to torch land
output = paddle.to_tensor(anchors).reshape([-1, 4])
if self.clip:
output = output.clip(max=1, min=0)
return output
In [13]
# 同时添加如下代码, 这样每次环境(kernel)启动的时候只要运行下方代码即可:
import paddle
import numpy as np
def index_fill(input, index, update):
'''
achieve Tensor.index_fill method
only for this repo, it's not common use
'''
for i in range(len(index)):
input[index[i]] = update
return input
def point_form(boxes):
""" Convert prior_boxes to (xmin, ymin, xmax, ymax)
representation for comparison to point form ground truth data.
Args:
boxes: (tensor) center-size default boxes from priorbox layers.
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return paddle.concat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax
def center_size(boxes):
""" Convert prior_boxes to (cx, cy, w, h)
representation for comparison to center-size form ground truth data.
Args:
boxes: (tensor) point_form boxes
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return paddle.concat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy
boxes[:, 2:] - boxes[:, :2], 1) # w, h
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.shape[0]
B = box_b.shape[0]
max_xy = paddle.minimum(box_a[:, 2:].unsqueeze(1).expand([A, B, 2]),
box_b[:, 2:].unsqueeze(0).expand([A, B, 2]))
min_xy = paddle.maximum(box_a[:, :2].unsqueeze(1).expand([A, B, 2]),
box_b[:, :2].unsqueeze(0).expand([A, B, 2]))
inter = paddle.clip(max_xy - min_xy, min=0)
return inter[:, :, 0] * inter[:, :, 1]
def jaccard(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.shape[0], box_b.shape[0]]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
def matrix_iou(a, b):
"""
return iou of a and b, numpy version for data augenmentation
"""
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
return area_i / (area_a[:, np.newaxis] + area_b - area_i)
def matrix_iof(a, b):
"""
return iof of a and b, numpy version for data augenmentation
"""
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
return area_i / np.maximum(area_a[:, np.newaxis], 1)
def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
"""Match each prior box with the ground truth box of the highest jaccard
overlap, encode the bounding boxes, then return the matched indices
corresponding to both confidence and location preds.
Args:
threshold: (float) The overlap threshold used when mathing boxes.
truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
variances: (tensor) Variances corresponding to each prior coord,
Shape: [num_priors, 4].
labels: (tensor) All the class labels for the image, Shape: [num_obj].
landms: (tensor) Ground truth landms, Shape [num_obj, 10].
loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
idx: (int) current batch index
Return:
The matched indices corresponding to 1)location 2)confidence 3)landm preds.
"""
# jaccard index
overlaps = jaccard(
truths,
point_form(priors)
)
# (Bipartite Matching)
# [1,num_objects] best prior for each ground truth
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True), overlaps.argmax(1, keepdim=True)
# ignore hard gt
valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
best_prior_idx_filter = best_prior_idx.masked_select(valid_gt_idx.unsqueeze(1)).unsqueeze(1)
if best_prior_idx_filter.shape[0] <= 0:
loc_t[idx] = 0
conf_t[idx] = 0
return
# [1,num_priors] best ground truth for each prior
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True), overlaps.argmax(0, keepdim=True)
best_truth_idx = best_truth_idx.squeeze(0)
best_truth_overlap = best_truth_overlap.squeeze(0)
best_prior_idx = best_prior_idx.squeeze(1)
best_prior_idx_filter = best_prior_idx_filter.squeeze(1)
best_prior_overlap = best_prior_overlap.squeeze(1)
best_truth_overlap = index_fill(best_truth_overlap, best_prior_idx_filter, 2) # ensure best prior
# TODO refactor: index best_prior_idx with long tensor
# ensure every gt matches with its prior of max overlap
for j in range(best_prior_idx.shape[0]): # 判别此anchor是预测哪一个boxes
best_truth_idx[best_prior_idx[j]] = j
matches = paddle.to_tensor(truths.numpy()[best_truth_idx.numpy()]) # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
conf = paddle.to_tensor(labels.numpy()[best_truth_idx.numpy()]) # Shape: [num_priors] 此处为每一个anchor对应的label取出来
temp_conf = conf.numpy()
temp_conf[(best_truth_overlap < threshold).numpy()] = 0 # label as background overlap<0.35的全部作为负样本
conf = paddle.to_tensor(temp_conf).astype('int32')
loc = encode(matches, priors, variances)
matches_landm = paddle.to_tensor(landms.numpy()[best_truth_idx.numpy()])
landm = encode_landm(matches_landm, priors, variances)
loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
conf_t[idx] = conf # [num_priors] top class label for each prior
landm_t[idx] = landm
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = paddle.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return paddle.concat([g_cxcy, g_wh], 1) # [num_priors,4]
def encode_landm(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 10].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded landm (tensor), Shape: [num_priors, 10]
"""
# dist b/t match center and prior's center
matched = paddle.reshape(matched, [matched.shape[0], 5, 2])
priors_cx = priors[:, 0].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors_cy = priors[:, 1].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors_w = priors[:, 2].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors_h = priors[:, 3].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors = paddle.concat([priors_cx, priors_cy, priors_w, priors_h], axis=2)
g_cxcy = matched[:, :, :2] - priors[:, :, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, :, 2:])
# g_cxcy /= priors[:, :, 2:]
g_cxcy = g_cxcy.reshape([g_cxcy.shape[0], -1])
# return target for smooth_l1_loss
return g_cxcy
# Adapted from https://github.com/Hakuyume/chainer-ssd
def decode(loc, priors, variances):
"""Decode locations from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
loc (tensor): location predictions for loc layers,
Shape: [num_priors,4]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded bounding box predictions
"""
boxes = paddle.concat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * paddle.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
def decode_landm(pre, priors, variances):
"""Decode landm from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
pre (tensor): landm predictions for loc layers,
Shape: [num_priors,10]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded landm predictions
"""
landms = paddle.concat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
), axis=1)
return landms
def log_sum_exp(x):
"""Utility function for computing log_sum_exp while determining
This will be used to determine unaveraged confidence loss across
all examples in a batch.
Args:
x (Variable(tensor)): conf_preds from conf layers
"""
x_max = x.max()
return paddle.log(paddle.sum(paddle.exp(x-x_max), 1, keepdim=True)) + x_max
# Original author: Francisco Massa:
# https://github.com/fmassa/object-detection.torch
# Ported to PyTorch by Max deGroot (02/01/2017)
def nms(boxes, scores, overlap=0.5, top_k=200):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
scores: (tensor) The class predscores for the img, Shape:[num_priors].
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
top_k: (int) The Maximum number of box preds to consider.
Return:
The indices of the kept boxes with respect to num_priors.
"""
keep = paddle.to_tensor(scores.shape[0]).fill_(0).long()
if boxes.numel() == 0:
return keep
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
area = paddle.multiply(x2 - x1, y2 - y1)
v, idx = scores.sort(0) # sort in ascending order
# I = I[v >= 0.01]
idx = idx[-top_k:] # indices of the top-k largest vals
xx1 = boxes.new()
yy1 = boxes.new()
xx2 = boxes.new()
yy2 = boxes.new()
w = boxes.new()
h = boxes.new()
# keep = paddle.Tensor()
count = 0
while idx.numel() > 0:
i = idx[-1] # index of current largest val
# keep.append(i)
keep[count] = i
count += 1
if idx.shape[0] == 1:
break
idx = idx[:-1] # remove kept element from view
# load bboxes of next highest vals
paddle.index_select(x1, 0, idx, out=xx1)
paddle.index_select(y1, 0, idx, out=yy1)
paddle.index_select(x2, 0, idx, out=xx2)
paddle.index_select(y2, 0, idx, out=yy2)
# store element-wise max with next highest score
xx1 = paddle.clip(xx1, min=x1[i])
yy1 = paddle.clip(yy1, min=y1[i])
xx2 = paddle.clip(xx2, max=x2[i])
yy2 = paddle.clip(yy2, max=y2[i])
w.resize_as_(xx2)
h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
# check sizes of xx1 and xx2.. after each iteration
w = paddle.clip(w, min=0.0)
h = paddle.clip(h, min=0.0)
inter = w*h
# IoU = i / (area(a) + area(b) - i)
rem_areas = paddle.index_select(area, 0, idx) # load remaining areas)
union = (rem_areas - inter) + area[i]
IoU = inter/union # store result in iou
# keep only elements with an IoU <= overlap
idx = idx[IoU.le(overlap)]
return keep, count
3,网络前向推理
In [14]
from __future__ import print_function
import argparse
import paddle
import numpy as np
import cv2
import time
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
parser = argparse.ArgumentParser(description='Retinaface')
parser.add_argument('-m', '--trained_model', default='./weights/mobilenetV1X0.25_pretrain.pdparams',
type=str, help='Trained state_dict file path to open')
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
parser.add_argument('--top_k', default=5000, type=int, help='top_k')
parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
#args = parser.parse_args()
args = parser.parse_known_args()[0]
def check_keys(model, pretrained_state_dict):
ckpt_keys = set(pretrained_state_dict.keys())
model_keys = set(model.state_dict().keys())
used_pretrained_keys = model_keys & ckpt_keys
unused_pretrained_keys = ckpt_keys - model_keys
missing_keys = model_keys - ckpt_keys
print('Missing keys:{}'.format(len(missing_keys)))
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
print('Used keys:{}'.format(len(used_pretrained_keys)))
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
return True
def remove_prefix(state_dict, prefix):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
print('remove prefix \'{}\''.format(prefix))
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
return {f(key): value for key, value in state_dict.items()}
def load_model(model, pretrained_path):
print('Loading pretrained model from {}'.format(pretrained_path))
pretrained_dict = paddle.load(pretrained_path)
if "state_dict" in pretrained_dict.keys():
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
else:
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
check_keys(model, pretrained_dict)
model.set_state_dict(pretrained_dict)
return model
paddle.set_grad_enabled(False)
cfg = cfg_mnet
#args.network == "mobile0.25"
# net and model
net = RetinaFace(cfg=cfg_mnet, phase = 'test')
net = load_model(net, 'test/mobilenet0.25_epoch_5.pdparams')
net.eval()
print('Finished loading model!')
# print(net)
resize = 1
# testing begin
image_path = "test.jpg"
img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
img = np.float32(img_raw)
im_height, im_width, _ = img.shape
scale = paddle.to_tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
img -= (104, 117, 123)
img /= (57.1,57.4,58.4)
img = img.transpose(2, 0, 1)
img = paddle.to_tensor(img).unsqueeze(0)
tic = time.time()
loc, conf, landms = net(img) # forward pass
print('net forward time: {:.4f}'.format(time.time() - tic))
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
priors = priorbox.forward()
prior_data = priors
boxes = decode(loc.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).cpu().numpy()[:, 1]
landms = decode_landm(landms.squeeze(0), prior_data, cfg['variance'])
scale1 = paddle.to_tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
# show image
if args.save_image:
for b in dets:
if b[4] < args.vis_thres:
continue
text = "{:.4f}".format(b[4])
b = list(map(int, b))
cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
cx = b[0]
cy = b[1] + 12
cv2.putText(img_raw, text, (cx, cy),
cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
# landms
cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
# save image
name = "test_out.jpg"
cv2.imwrite(name, img_raw)
Loading pretrained model from test/mobilenet0.25_epoch_5.pdparams remove prefix 'module.' Missing keys:0 Unused checkpoint keys:0 Used keys:255 Finished loading model! net forward time: 0.0325
RetinaFace反向传播
主要包括:
4,网络损失函数
5,训练数据组织
6,训练设置,迭代
4,网络损失函数
正向传播需要对网络的输出结果进行解码,训练需要根据预选框和真实结果进行编码
In [15]
#网络损失函数
def index_fill(input, index, update):
'''
achieve Tensor.index_fill method
only for this repo, it's not common use
'''
for i in range(len(index)):
input[index[i]] = update
return input
def point_form(boxes):
""" Convert prior_boxes to (xmin, ymin, xmax, ymax)
representation for comparison to point form ground truth data.
Args:
boxes: (tensor) center-size default boxes from priorbox layers.
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return paddle.concat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin
boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax
def center_size(boxes):
""" Convert prior_boxes to (cx, cy, w, h)
representation for comparison to center-size form ground truth data.
Args:
boxes: (tensor) point_form boxes
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return paddle.concat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy
boxes[:, 2:] - boxes[:, :2], 1) # w, h
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.shape[0]
B = box_b.shape[0]
max_xy = paddle.minimum(box_a[:, 2:].unsqueeze(1).expand([A, B, 2]),
box_b[:, 2:].unsqueeze(0).expand([A, B, 2]))
min_xy = paddle.maximum(box_a[:, :2].unsqueeze(1).expand([A, B, 2]),
box_b[:, :2].unsqueeze(0).expand([A, B, 2]))
inter = paddle.clip(max_xy - min_xy, min=0)
return inter[:, :, 0] * inter[:, :, 1]
def jaccard(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.shape[0], box_b.shape[0]]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
def matrix_iou(a, b):
"""
return iou of a and b, numpy version for data augenmentation
"""
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
return area_i / (area_a[:, np.newaxis] + area_b - area_i)
def matrix_iof(a, b):
"""
return iof of a and b, numpy version for data augenmentation
"""
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
return area_i / np.maximum(area_a[:, np.newaxis], 1)
def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
"""Match each prior box with the ground truth box of the highest jaccard
overlap, encode the bounding boxes, then return the matched indices
corresponding to both confidence and location preds.
Args:
threshold: (float) The overlap threshold used when mathing boxes.
truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
variances: (tensor) Variances corresponding to each prior coord,
Shape: [num_priors, 4].
labels: (tensor) All the class labels for the image, Shape: [num_obj].
landms: (tensor) Ground truth landms, Shape [num_obj, 10].
loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
idx: (int) current batch index
Return:
The matched indices corresponding to 1)location 2)confidence 3)landm preds.
"""
# jaccard index
overlaps = jaccard(
truths,
point_form(priors)
)
# (Bipartite Matching)
# [1,num_objects] best prior for each ground truth
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True), overlaps.argmax(1, keepdim=True)
# ignore hard gt
valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
best_prior_idx_filter = best_prior_idx.masked_select(valid_gt_idx.unsqueeze(1)).unsqueeze(1)
if best_prior_idx_filter.shape[0] <= 0:
loc_t[idx] = 0
conf_t[idx] = 0
return
# [1,num_priors] best ground truth for each prior
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True), overlaps.argmax(0, keepdim=True)
best_truth_idx = best_truth_idx.squeeze(0)
best_truth_overlap = best_truth_overlap.squeeze(0)
best_prior_idx = best_prior_idx.squeeze(1)
best_prior_idx_filter = best_prior_idx_filter.squeeze(1)
best_prior_overlap = best_prior_overlap.squeeze(1)
best_truth_overlap = index_fill(best_truth_overlap, best_prior_idx_filter, 2) # ensure best prior
# TODO refactor: index best_prior_idx with long tensor
# ensure every gt matches with its prior of max overlap
for j in range(best_prior_idx.shape[0]): # 判别此anchor是预测哪一个boxes
best_truth_idx[best_prior_idx[j]] = j
matches = paddle.to_tensor(truths.numpy()[best_truth_idx.numpy()]) # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
conf = paddle.to_tensor(labels.numpy()[best_truth_idx.numpy()]) # Shape: [num_priors] 此处为每一个anchor对应的label取出来
temp_conf = conf.numpy()
temp_conf[(best_truth_overlap < threshold).numpy()] = 0 # label as background overlap<0.35的全部作为负样本
conf = paddle.to_tensor(temp_conf).astype('int32')
loc = encode(matches, priors, variances)
matches_landm = paddle.to_tensor(landms.numpy()[best_truth_idx.numpy()])
landm = encode_landm(matches_landm, priors, variances)
loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
conf_t[idx] = conf # [num_priors] top class label for each prior
landm_t[idx] = landm
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
# dist b/t match center and prior's center
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, 2:])
# match wh / prior wh
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = paddle.log(g_wh) / variances[1]
# return target for smooth_l1_loss
return paddle.concat([g_cxcy, g_wh], 1) # [num_priors,4]
def encode_landm(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 10].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded landm (tensor), Shape: [num_priors, 10]
"""
# dist b/t match center and prior's center
matched = paddle.reshape(matched, [matched.shape[0], 5, 2])
priors_cx = priors[:, 0].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors_cy = priors[:, 1].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors_w = priors[:, 2].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors_h = priors[:, 3].unsqueeze(1).expand([matched.shape[0], 5]).unsqueeze(2)
priors = paddle.concat([priors_cx, priors_cy, priors_w, priors_h], axis=2)
g_cxcy = matched[:, :, :2] - priors[:, :, :2]
# encode variance
g_cxcy /= (variances[0] * priors[:, :, 2:])
# g_cxcy /= priors[:, :, 2:]
g_cxcy = g_cxcy.reshape([g_cxcy.shape[0], -1])
# return target for smooth_l1_loss
return g_cxcy
def log_sum_exp(x):
"""Utility function for computing log_sum_exp while determining
This will be used to determine unaveraged confidence loss across
all examples in a batch.
Args:
x (Variable(tensor)): conf_preds from conf layers
"""
x_max = x.max()
return paddle.log(paddle.sum(paddle.exp(x-x_max), 1, keepdim=True)) + x_max
# Original author: Francisco Massa:
# https://github.com/fmassa/object-detection.torch
# Ported to PyTorch by Max deGroot (02/01/2017)
对人脸框和人脸关键点进行编码之后,可以计算损失:
In [16]
GPU = cfg['gpu_train']
class MultiBoxLoss(nn.Layer):
"""SSD Weighted Loss Function
Compute Targets:
1) Produce Confidence Target Indices by matching ground truth boxes
with (default) 'priorboxes' that have jaccard index > threshold parameter
(default threshold: 0.5).
2) Produce localization target by 'encoding' variance into offsets of ground
truth boxes and their matched 'priorboxes'.
3) Hard negative mining to filter the excessive number of negative examples
that comes with using a large number of default bounding boxes.
(default negative:positive ratio 3:1)
Objective Loss:
L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
weighted by α which is set to 1 by cross val.
Args:
c: class confidences,
l: predicted boxes,
g: ground truth boxes
N: number of matched default boxes
See: https://arxiv.org/pdf/1512.02325.pdf for more details.
"""
def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
super(MultiBoxLoss, self).__init__()
self.num_classes = num_classes
self.threshold = overlap_thresh
self.background_label = bkg_label
self.encode_target = encode_target
self.use_prior_for_matching = prior_for_matching
self.do_neg_mining = neg_mining
self.negpos_ratio = neg_pos
self.neg_overlap = neg_overlap
self.variance = [0.1, 0.2]
def forward(self, predictions, priors, targets):
"""Multibox Loss
Args:
predictions (tuple): A tuple containing loc preds, conf preds,
and prior boxes from SSD net.
conf shape: paddle.shape(batch_size,num_priors,num_classes)
loc shape: paddle.shape(batch_size,num_priors,4)
priors shape: paddle.shape(num_priors,4)
ground_truth (tensor): Ground truth boxes and labels for a batch,
shape: [batch_size,num_objs,5] (last idx is the label).
"""
loc_data, conf_data, landm_data = predictions
priors = priors
num = loc_data.shape[0]
num_priors = (priors.shape[0])
# match priors (default boxes) and ground truth boxes
loc_t = paddle.randn([num, num_priors, 4])
landm_t = paddle.randn([num, num_priors, 10])
conf_t = paddle.zeros([num, num_priors], dtype='int32')
for idx in range(num):
truths = targets[idx][:, :4]
labels = targets[idx][:, -1]
landms = targets[idx][:, 4:14]
defaults = priors
match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
# landm Loss (Smooth L1)
# Shape: [batch,num_priors,10]
pos1 = conf_t > 0
num_pos_landm = pos1.astype('int64').sum(1, keepdim=True)
N1 = max(num_pos_landm.sum().astype('float32'), 1)
pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
landm_p = landm_data.masked_select(pos_idx1).reshape([-1, 10])
landm_t = landm_t.masked_select(pos_idx1).reshape([-1, 10])
loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
pos = conf_t != 0
conf_t_temp = conf_t.numpy()
conf_t_temp[pos.numpy()] = 1
conf_t = paddle.to_tensor(conf_t_temp)
# conf_t[pos] = 1
# conf_t = conf_t.add(pos.astype('int64'))
# Localization Loss (Smooth L1)
# Shape: [batch,num_priors,4]
pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
loc_p = loc_data.masked_select(pos_idx).reshape([-1, 4])
loc_t = loc_t.masked_select(pos_idx).reshape([-1, 4])
loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
# Compute max conf across batch for hard negative mining
batch_conf = conf_data.reshape([-1, self.num_classes])
loss_c = log_sum_exp(batch_conf) - batch_conf.multiply(paddle.nn.functional.one_hot(conf_t.reshape([-1, 1]), 2).squeeze(1)).sum(1).unsqueeze(1)
# Hard Negative Mining
# loss_c[pos.reshape([-1, 1])] = 0 # filter out pos boxes for now
loss_c = loss_c * (pos.reshape([-1, 1])==0).astype('float32')
loss_c = loss_c.reshape([num, -1])
loss_idx = loss_c.argsort(1, descending=True)
idx_rank = loss_idx.argsort(1)
num_pos = pos.astype('int64').sum(1, keepdim=True)
num_neg = paddle.clip(self.negpos_ratio*num_pos, max=pos.shape[1]-1)
neg = idx_rank < num_neg.expand_as(idx_rank)
# Confidence Loss Including Positive and Negative Examples
pos_idx = pos.unsqueeze(2).expand_as(conf_data)
neg_idx = neg.unsqueeze(2).expand_as(conf_data)
conf_p = conf_data.masked_select((pos_idx.logical_or(neg_idx)).astype('float32') > 0).reshape([-1,self.num_classes])
targets_weighted = conf_t.masked_select((pos.logical_or(neg)).astype('float32') > 0)
loss_c = F.cross_entropy(conf_p, targets_weighted.astype('int64'), reduction='sum')
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
N = max(num_pos.sum().astype('float32'), 1)
loss_l /= N
loss_c /= N
loss_landm /= N1
return loss_l, loss_c, loss_landm
5,训练数据组织
构建加载训练数据函数,数据增强,构建数据生成器
In [17]
#构建加载训练数据函数
from paddle.io import Dataset
from paddle.io import BatchSampler, DistributedBatchSampler, RandomSampler, SequenceSampler, DataLoader
class WiderFaceDetection(Dataset):
def __init__(self, txt_path, preproc=None):
self.preproc = preproc
self.imgs_path = []
self.words = []
f = open(txt_path,'r')
lines = f.readlines()
isFirst = True
labels = []
for line in lines:
line = line.rstrip()
if line.startswith('#'):
if isFirst is True:
isFirst = False
else:
labels_copy = labels.copy()
self.words.append(labels_copy)
labels.clear()
path = line[2:]
path = txt_path.replace('label.txt','images/') + path
self.imgs_path.append(path)
else:
line = line.split(' ')
label = [float(x) for x in line]
labels.append(label)
self.words.append(labels)
def __len__(self):
return len(self.imgs_path)
def __getitem__(self, index):
img = cv2.imread(self.imgs_path[index])
height, width, _ = img.shape
labels = self.words[index]
annotations = np.zeros((0, 15))
if len(labels) == 0:
return annotations
for idx, label in enumerate(labels):
annotation = np.zeros((1, 15))
# bbox
annotation[0, 0] = label[0] # x1
annotation[0, 1] = label[1] # y1
annotation[0, 2] = label[0] + label[2] # x2
annotation[0, 3] = label[1] + label[3] # y2
# landmarks
annotation[0, 4] = label[4] # l0_x
annotation[0, 5] = label[5] # l0_y
annotation[0, 6] = label[7] # l1_x
annotation[0, 7] = label[8] # l1_y
annotation[0, 8] = label[10] # l2_x
annotation[0, 9] = label[11] # l2_y
annotation[0, 10] = label[13] # l3_x
annotation[0, 11] = label[14] # l3_y
annotation[0, 12] = label[16] # l4_x
annotation[0, 13] = label[17] # l4_y
if (annotation[0, 4]<0):
annotation[0, 14] = -1
else:
annotation[0, 14] = 1
annotations = np.append(annotations, annotation, axis=0)
target = np.array(annotations)
if self.preproc is not None:
img, target = self.preproc(img, target)
return img, target
def detection_collate(batch):
"""Custom collate fn for dealing with batches of images that have a different
number of associated object annotations (bounding boxes).
Arguments:
batch: (tuple) A tuple of tensor images and lists of annotations
Return:
A tuple containing:
1) (tensor) batch of images stacked on their 0 dim
2) (list of tensors) annotations for a given image are stacked on 0 dim
"""
targets = []
imgs = []
for sample in batch:
imgs.append(sample[0].astype('float32'))
targets.append(sample[1].astype('float32'))
return (np.stack(imgs, 0), targets)
'''
targets = []
imgs = []
for _, sample in enumerate(batch):
for _, tup in enumerate(sample):
if len(tup.shape) == 3:
imgs.append(tup.astype('float32'))
elif len(tup.shape) == 2:
annos = tup.astype('float32')
targets.append(annos)
'''
return (np.stack(imgs, 0), targets)
def make_dataloader(dataset, shuffle=True, batchsize=12, distributed=False, num_workers=0, num_iters=None, start_iter=0, collate_fn=None):
if distributed:
data_sampler=DistributedBatchSampler(dataset, batch_size=batchsize, shuffle=True, drop_last=True)
dataloader = DataLoader(dataset, batch_sampler=data_sampler, num_workers=num_workers, collate_fn=collate_fn)
if not distributed and shuffle:
sampler = RandomSampler(dataset)
batch_sampler = BatchSampler(sampler=sampler, batch_size=batchsize, drop_last=True)
if num_iters is not None:
batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iters, start_iter)
dataloader = DataLoader(dataset=dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=collate_fn)
else:
sampler = SequenceSampler(dataset)
batch_sampler = BatchSampler(sampler=sampler, batch_size=batchsize, drop_last=True)
if num_iters is not None:
batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iters, start_iter)
dataloader = DataLoader(dataset=dataset, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=collate_fn)
return dataloader
class IterationBasedBatchSampler(BatchSampler):
"""
Wraps a BatchSampler, resampling from it until
a specified number of iterations have been sampled
"""
def __init__(self, batch_sampler, num_iterations, start_iter=0):
self.batch_sampler = batch_sampler
self.num_iterations = num_iterations
self.start_iter = start_iter
def __iter__(self):
iteration = self.start_iter
while iteration <= self.num_iterations:
# if the underlying sampler has a set_epoch method, like
# DistributedSampler, used for making each process see
# a different split of the dataset, then set it
if hasattr(self.batch_sampler.sampler, "set_epoch"):
self.batch_sampler.sampler.set_epoch(iteration)
for batch in self.batch_sampler:
iteration += 1
if iteration > self.num_iterations:
break
yield batch
def __len__(self):
return self.num_iterations
#数据增强
def _crop(image, boxes, labels, landm, img_dim):
height, width, _ = image.shape
pad_image_flag = True
for _ in range(250):
"""
if random.uniform(0, 1) <= 0.2:
scale = 1.0
else:
scale = random.uniform(0.3, 1.0)
"""
PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
scale = random.choice(PRE_SCALES)
short_side = min(width, height)
w = int(scale * short_side)
h = w
if width == w:
l = 0
else:
l = random.randrange(width - w)
if height == h:
t = 0
else:
t = random.randrange(height - h)
roi = np.array((l, t, l + w, t + h))
value = matrix_iof(boxes, roi[np.newaxis])
flag = (value >= 1)
if not flag.any():
continue
centers = (boxes[:, :2] + boxes[:, 2:]) / 2
mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
boxes_t = boxes[mask_a].copy()
labels_t = labels[mask_a].copy()
landms_t = landm[mask_a].copy()
landms_t = landms_t.reshape([-1, 5, 2])
if boxes_t.shape[0] == 0:
continue
image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
boxes_t[:, :2] -= roi[:2]
boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
boxes_t[:, 2:] -= roi[:2]
# landm
landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
landms_t = landms_t.reshape([-1, 10])
# make sure that the cropped image contains at least one face > 16 pixel at training image scale
b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
mask_b = np.minimum(b_w_t, b_h_t) > 0.0
boxes_t = boxes_t[mask_b]
labels_t = labels_t[mask_b]
landms_t = landms_t[mask_b]
if boxes_t.shape[0] == 0:
continue
pad_image_flag = False
return image_t, boxes_t, labels_t, landms_t, pad_image_flag
return image, boxes, labels, landm, pad_image_flag
def _distort(image):
def _convert(image, alpha=1, beta=0):
tmp = image.astype(float) * alpha + beta
tmp[tmp < 0] = 0
tmp[tmp > 255] = 255
image[:] = tmp
image = image.copy()
if random.randrange(2):
#brightness distortion
if random.randrange(2):
_convert(image, beta=random.uniform(-32, 32))
#contrast distortion
if random.randrange(2):
_convert(image, alpha=random.uniform(0.5, 1.5))
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#saturation distortion
if random.randrange(2):
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
#hue distortion
if random.randrange(2):
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
tmp %= 180
image[:, :, 0] = tmp
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
#brightness distortion
if random.randrange(2):
_convert(image, beta=random.uniform(-32, 32))
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#saturation distortion
if random.randrange(2):
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
#hue distortion
if random.randrange(2):
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
tmp %= 180
image[:, :, 0] = tmp
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
#contrast distortion
if random.randrange(2):
_convert(image, alpha=random.uniform(0.5, 1.5))
return image
def _expand(image, boxes, fill, p):
if random.randrange(2):
return image, boxes
height, width, depth = image.shape
scale = random.uniform(1, p)
w = int(scale * width)
h = int(scale * height)
left = random.randint(0, w - width)
top = random.randint(0, h - height)
boxes_t = boxes.copy()
boxes_t[:, :2] += (left, top)
boxes_t[:, 2:] += (left, top)
expand_image = np.empty(
(h, w, depth),
dtype=image.dtype)
expand_image[:, :] = fill
expand_image[top:top + height, left:left + width] = image
image = expand_image
return image, boxes_t
def _mirror(image, boxes, landms):
_, width, _ = image.shape
if random.randrange(2):
image = image[:, ::-1]
boxes = boxes.copy()
boxes[:, 0::2] = width - boxes[:, 2::-2]
# landm
landms = landms.copy()
landms = landms.reshape([-1, 5, 2])
landms[:, :, 0] = width - landms[:, :, 0]
tmp = landms[:, 1, :].copy()
landms[:, 1, :] = landms[:, 0, :]
landms[:, 0, :] = tmp
tmp1 = landms[:, 4, :].copy()
landms[:, 4, :] = landms[:, 3, :]
landms[:, 3, :] = tmp1
landms = landms.reshape([-1, 10])
return image, boxes, landms
def _pad_to_square(image, rgb_mean, pad_image_flag):
if not pad_image_flag:
return image
height, width, _ = image.shape
long_side = max(width, height)
image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
image_t[:, :] = rgb_mean
image_t[0:0 + height, 0:0 + width] = image
return image_t
def _resize_subtract_mean(image, insize, rgb_mean, rgb_std):
interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
interp_method = interp_methods[random.randrange(5)]
image = cv2.resize(image, (insize, insize), interpolation=interp_method)
image = image.astype(np.float32)
image -= rgb_mean
image /= rgb_std
return image.transpose(2, 0, 1)
class preproc(object):
def __init__(self, img_dim, rgb_means, rgb_stds):
self.img_dim = img_dim
self.rgb_means = rgb_means
self.rgb_stds = rgb_stds
def __call__(self, image, targets):
assert targets.shape[0] > 0, "this image does not have gt"
boxes = targets[:, :4].copy()
labels = targets[:, -1].copy()
landm = targets[:, 4:-1].copy()
image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
image_t = _distort(image_t)
image_t = _pad_to_square(image_t, self.rgb_means, pad_image_flag)
image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
height, width, _ = image_t.shape
image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means, self.rgb_stds)
boxes_t[:, 0::2] /= width
boxes_t[:, 1::2] /= height
landm_t[:, 0::2] /= width
landm_t[:, 1::2] /= height
labels_t = np.expand_dims(labels_t, 1)
targets_t = np.hstack((boxes_t, landm_t, labels_t))
return image_t, targets_t
6,训练参数设置,迭代
加载数据,构建网络,构建损失函数,训练迭代次数,学习率,优化器设置
In [9]
from __future__ import print_function
import os
import paddle
import paddle.optimizer as optim
import time
import datetime
import math
import random
parser = argparse.ArgumentParser(description='Retinaface Training')
parser.add_argument('--training_dataset', default='data/widerface/train/label.txt', help='Training dataset directory')
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading')
parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
parser.add_argument('--resume_net', default=None, help='resume net for retraining')
parser.add_argument('--resume_epoch', default=0, type=int, help='resume iter for retraining')
parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD')
parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD')
parser.add_argument('--save_folder', default='./test/', help='Location to save checkpoint models')
args = parser.parse_known_args()[0]
rgb_mean = (104, 117, 123) # bgr order
rgb_std = (57.1,57.4,58.4)
num_classes = 2
img_dim = cfg['image_size']
num_gpu = cfg['ngpu']
batch_size = cfg['batch_size']
max_epoch = cfg['epoch']
gpu_train = cfg['gpu_train']
num_workers = args.num_workers
momentum = args.momentum
weight_decay = args.weight_decay
initial_lr = args.lr
gamma = args.gamma
training_dataset = args.training_dataset
save_folder = args.save_folder
net = RetinaFace(cfg=cfg)
print("Printing net...")
print(net)
if args.resume_net is not None:
print('Loading resume network...')
state_dict = paddle.load(args.resume_net)
# create new OrderedDict that does not contain `module.`
from collections import OrderedDict
new_state_dict = OrderedDict()
for k, v in state_dict.items():
head = k[:7]
if head == 'module.':
name = k[7:] # remove `module.`
else:
name = k
new_state_dict[name] = v
net.set_state_dict(new_state_dict)
if num_gpu > 1 and gpu_train:
net = paddle.DataParallel(net)
optimizer = optim.Momentum(parameters=net.parameters(), learning_rate=initial_lr, momentum=momentum, weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)
priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
with paddle.no_grad():
priors = priorbox.forward()
def train():
net.train()
epoch = 0 + args.resume_epoch
print('Loading Dataset...')
dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean, rgb_std))
epoch_size = math.ceil(len(dataset) / batch_size)
stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size)
step_index = 0
if args.resume_epoch > 0:
start_iter = args.resume_epoch * epoch_size
else:
start_iter = 0
max_iter = max_epoch * epoch_size - start_iter
batch_iterator = make_dataloader(dataset, shuffle=True, batchsize=batch_size, distributed=False, num_workers=0, num_iters=max_iter, start_iter=0, collate_fn=detection_collate)
iteration = start_iter
for images, labels in batch_iterator:
if iteration % epoch_size == 0:
if (epoch % 5 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']):
paddle.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pdparams')
epoch += 1
load_t0 = time.time()
if iteration in stepvalues:
step_index += 1
lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size)
# forward
out = net(images)
# backprop
loss_l, loss_c, loss_landm = criterion(out, priors, [anno for anno in labels])
loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
loss.backward()
optimizer.step()
optimizer.clear_gradients()
load_t1 = time.time()
batch_time = load_t1 - load_t0
eta = int(batch_time * (max_iter - iteration))
print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'
.format(epoch, max_epoch, (iteration % epoch_size) + 1,
epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta))))
iteration += 1
paddle.save(net.state_dict(), save_folder + cfg['name'] + '_Final.pdparams')
def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size):
"""Sets the learning rate
# Adapted from PyTorch Imagenet example:
"""
warmup_epoch = -1
if epoch <= warmup_epoch:
lr = 1e-6 + (initial_lr-1e-6) * iteration / (epoch_size * warmup_epoch)
else:
lr = initial_lr * (gamma ** (step_index))
optimizer.set_lr(lr)
return lr
if __name__ == '__main__':
train()
Loading Dataset...
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel_launcher.py:9: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
if __name__ == '__main__':
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel_launcher.py:152: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
Epoch:1/250 || Epochiter: 1/403 || Iter: 1/100750 || Loc: 5.5039 Cla: 18.9180 Landm: 21.8879 || LR: 0.00100000 || Batchtime: 0.8275 s || ETA: 23:09:30
Epoch:1/250 || Epochiter: 2/403 || Iter: 2/100750 || Loc: 5.5344 Cla: 18.5185 Landm: 21.6857 || LR: 0.00100000 || Batchtime: 0.5799 s || ETA: 16:13:40
Epoch:1/250 || Epochiter: 3/403 || Iter: 3/100750 || Loc: 5.1545 Cla: 16.1501 Landm: 20.6097 || LR: 0.00100000 || Batchtime: 0.3594 s || ETA: 10:03:30
Epoch:1/250 || Epochiter: 4/403 || Iter: 4/100750 || Loc: 5.1112 Cla: 14.2228 Landm: 20.8187 || LR: 0.00100000 || Batchtime: 0.4798 s || ETA: 13:25:35
Epoch:1/250 || Epochiter: 5/403 || Iter: 5/100750 || Loc: 4.9339 Cla: 12.7024 Landm: 20.1817 || LR: 0.00100000 || Batchtime: 0.3922 s || ETA: 10:58:28
Epoch:1/250 || Epochiter: 6/403 || Iter: 6/100750 || Loc: 4.9495 Cla: 12.1982 Landm: 21.1817 || LR: 0.00100000 || Batchtime: 0.4395 s || ETA: 12:17:59
Epoch:1/250 || Epochiter: 7/403 || Iter: 7/100750 || Loc: 4.8757 Cla: 11.8493 Landm: 19.9921 || LR: 0.00100000 || Batchtime: 0.3520 s || ETA: 9:51:00
Epoch:1/250 || Epochiter: 8/403 || Iter: 8/100750 || Loc: 4.7777 Cla: 10.8863 Landm: 20.3447 || LR: 0.00100000 || Batchtime: 0.4070 s || ETA: 11:23:19
Epoch:1/250 || Epochiter: 9/403 || Iter: 9/100750 || Loc: 4.7849 Cla: 8.2906 Landm: 19.6271 || LR: 0.00100000 || Batchtime: 0.5299 s || ETA: 14:49:42
Epoch:1/250 || Epochiter: 10/403 || Iter: 10/100750 || Loc: 4.4964 Cla: 9.6688 Landm: 20.5572 || LR: 0.00100000 || Batchtime: 0.4078 s || ETA: 11:24:44
Epoch:1/250 || Epochiter: 11/403 || Iter: 11/100750 || Loc: 4.8028 Cla: 8.2961 Landm: 19.0736 || LR: 0.00100000 || Batchtime: 0.4680 s || ETA: 13:05:47
Epoch:1/250 || Epochiter: 12/403 || Iter: 12/100750 || Loc: 4.4659 Cla: 8.8183 Landm: 19.5600 || LR: 0.00100000 || Batchtime: 0.4338 s || ETA: 12:08:17
Epoch:1/250 || Epochiter: 13/403 || Iter: 13/100750 || Loc: 4.7572 Cla: 7.3842 Landm: 19.5515 || LR: 0.00100000 || Batchtime: 0.3566 s || ETA: 9:58:45
Epoch:1/250 || Epochiter: 14/403 || Iter: 14/100750 || Loc: 4.4520 Cla: 7.4838 Landm: 19.3934 || LR: 0.00100000 || Batchtime: 0.4300 s || ETA: 12:01:54
Epoch:1/250 || Epochiter: 15/403 || Iter: 15/100750 || Loc: 4.4183 Cla: 7.6439 Landm: 19.8088 || LR: 0.00100000 || Batchtime: 0.3954 s || ETA: 11:03:46
Epoch:1/250 || Epochiter: 16/403 || Iter: 16/100750 || Loc: 4.5246 Cla: 6.6345 Landm: 19.4181 || LR: 0.00100000 || Batchtime: 0.3990 s || ETA: 11:09:51
Epoch:1/250 || Epochiter: 17/403 || Iter: 17/100750 || Loc: 4.5214 Cla: 5.8815 Landm: 18.9220 || LR: 0.00100000 || Batchtime: 0.3920 s || ETA: 10:58:05
Epoch:1/250 || Epochiter: 18/403 || Iter: 18/100750 || Loc: 4.4529 Cla: 6.2802 Landm: 18.9054 || LR: 0.00100000 || Batchtime: 0.4728 s || ETA: 13:13:41
Epoch:1/250 || Epochiter: 19/403 || Iter: 19/100750 || Loc: 4.3458 Cla: 5.9653 Landm: 19.6959 || LR: 0.00100000 || Batchtime: 0.3720 s || ETA: 10:24:32
Epoch:1/250 || Epochiter: 20/403 || Iter: 20/100750 || Loc: 4.3320 Cla: 5.3611 Landm: 18.2379 || LR: 0.00100000 || Batchtime: 0.4461 s || ETA: 12:28:56
Epoch:1/250 || Epochiter: 21/403 || Iter: 21/100750 || Loc: 4.2572 Cla: 5.0751 Landm: 19.0970 || LR: 0.00100000 || Batchtime: 0.4399 s || ETA: 12:18:28
Epoch:1/250 || Epochiter: 22/403 || Iter: 22/100750 || Loc: 4.3705 Cla: 4.8014 Landm: 17.8947 || LR: 0.00100000 || Batchtime: 0.6961 s || ETA: 19:28:33
Epoch:1/250 || Epochiter: 23/403 || Iter: 23/100750 || Loc: 4.2417 Cla: 4.6365 Landm: 18.2330 || LR: 0.00100000 || Batchtime: 0.5477 s || ETA: 15:19:28
Epoch:1/250 || Epochiter: 24/403 || Iter: 24/100750 || Loc: 3.9981 Cla: 4.8140 Landm: 17.9373 || LR: 0.00100000 || Batchtime: 0.4679 s || ETA: 13:05:25
Epoch:1/250 || Epochiter: 25/403 || Iter: 25/100750 || Loc: 4.2646 Cla: 4.2984 Landm: 19.7434 || LR: 0.00100000 || Batchtime: 0.3951 s || ETA: 11:03:14
Epoch:1/250 || Epochiter: 26/403 || Iter: 26/100750 || Loc: 4.3223 Cla: 4.3522 Landm: 18.5542 || LR: 0.00100000 || Batchtime: 0.4015 s || ETA: 11:14:05
Epoch:1/250 || Epochiter: 27/403 || Iter: 27/100750 || Loc: 4.1652 Cla: 4.4780 Landm: 17.2275 || LR: 0.00100000 || Batchtime: 0.3671 s || ETA: 10:16:20
Epoch:1/250 || Epochiter: 28/403 || Iter: 28/100750 || Loc: 4.1778 Cla: 4.2569 Landm: 17.9795 || LR: 0.00100000 || Batchtime: 0.3724 s || ETA: 10:25:06
Epoch:1/250 || Epochiter: 29/403 || Iter: 29/100750 || Loc: 4.2219 Cla: 4.1784 Landm: 18.4573 || LR: 0.00100000 || Batchtime: 0.3764 s || ETA: 10:31:52
Epoch:1/250 || Epochiter: 30/403 || Iter: 30/100750 || Loc: 3.8601 Cla: 4.1411 Landm: 17.5710 || LR: 0.00100000 || Batchtime: 0.4443 s || ETA: 12:25:55
Epoch:1/250 || Epochiter: 31/403 || Iter: 31/100750 || Loc: 4.1699 Cla: 3.9541 Landm: 17.5772 || LR: 0.00100000 || Batchtime: 0.4302 s || ETA: 12:02:06
Epoch:1/250 || Epochiter: 32/403 || Iter: 32/100750 || Loc: 3.9781 Cla: 3.9588 Landm: 17.0395 || LR: 0.00100000 || Batchtime: 0.4304 s || ETA: 12:02:33
Epoch:1/250 || Epochiter: 33/403 || Iter: 33/100750 || Loc: 4.0470 Cla: 3.8377 Landm: 17.3675 || LR: 0.00100000 || Batchtime: 0.4221 s || ETA: 11:48:36
Epoch:1/250 || Epochiter: 34/403 || Iter: 34/100750 || Loc: 3.8755 Cla: 3.9044 Landm: 17.1435 || LR: 0.00100000 || Batchtime: 0.4630 s || ETA: 12:57:07
Epoch:1/250 || Epochiter: 35/403 || Iter: 35/100750 || Loc: 3.7425 Cla: 3.8776 Landm: 16.4029 || LR: 0.00100000 || Batchtime: 0.3946 s || ETA: 11:02:25
Epoch:1/250 || Epochiter: 36/403 || Iter: 36/100750 || Loc: 3.9764 Cla: 3.9707 Landm: 16.8414 || LR: 0.00100000 || Batchtime: 0.5842 s || ETA: 16:20:40
Epoch:1/250 || Epochiter: 37/403 || Iter: 37/100750 || Loc: 3.7441 Cla: 3.8408 Landm: 17.6619 || LR: 0.00100000 || Batchtime: 0.4351 s || ETA: 12:10:19
Epoch:1/250 || Epochiter: 38/403 || Iter: 38/100750 || Loc: 3.9364 Cla: 3.7047 Landm: 18.6355 || LR: 0.00100000 || Batchtime: 0.3682 s || ETA: 10:18:01
Epoch:1/250 || Epochiter: 39/403 || Iter: 39/100750 || Loc: 3.7350 Cla: 3.8814 Landm: 16.8114 || LR: 0.00100000 || Batchtime: 0.3889 s || ETA: 10:52:45
Epoch:1/250 || Epochiter: 40/403 || Iter: 40/100750 || Loc: 3.5163 Cla: 3.8605 Landm: 16.5092 || LR: 0.00100000 || Batchtime: 0.4335 s || ETA: 12:07:37
Epoch:1/250 || Epochiter: 41/403 || Iter: 41/100750 || Loc: 3.7482 Cla: 3.6833 Landm: 16.7456 || LR: 0.00100000 || Batchtime: 0.4363 s || ETA: 12:12:24
Epoch:1/250 || Epochiter: 42/403 || Iter: 42/100750 || Loc: 3.6396 Cla: 3.6600 Landm: 16.9524 || LR: 0.00100000 || Batchtime: 0.3985 s || ETA: 11:08:51
Epoch:1/250 || Epochiter: 43/403 || Iter: 43/100750 || Loc: 3.6277 Cla: 3.5343 Landm: 15.3870 || LR: 0.00100000 || Batchtime: 0.4476 s || ETA: 12:31:15
Epoch:1/250 || Epochiter: 44/403 || Iter: 44/100750 || Loc: 3.6305 Cla: 3.8723 Landm: 15.8916 || LR: 0.00100000 || Batchtime: 0.3889 s || ETA: 10:52:42
Epoch:1/250 || Epochiter: 45/403 || Iter: 45/100750 || Loc: 3.4999 Cla: 3.7035 Landm: 16.0365 || LR: 0.00100000 || Batchtime: 0.3781 s || ETA: 10:34:32
Epoch:1/250 || Epochiter: 46/403 || Iter: 46/100750 || Loc: 3.5424 Cla: 3.6261 Landm: 16.0393 || LR: 0.00100000 || Batchtime: 0.4740 s || ETA: 13:15:38
Epoch:1/250 || Epochiter: 47/403 || Iter: 47/100750 || Loc: 3.7910 Cla: 3.6140 Landm: 16.3321 || LR: 0.00100000 || Batchtime: 0.3976 s || ETA: 11:07:20
Epoch:1/250 || Epochiter: 48/403 || Iter: 48/100750 || Loc: 3.5407 Cla: 3.6659 Landm: 15.9662 || LR: 0.00100000 || Batchtime: 0.3946 s || ETA: 11:02:21
Epoch:1/250 || Epochiter: 49/403 || Iter: 49/100750 || Loc: 3.2723 Cla: 3.5321 Landm: 15.0109 || LR: 0.00100000 || Batchtime: 0.4122 s || ETA: 11:31:50
Epoch:1/250 || Epochiter: 50/403 || Iter: 50/100750 || Loc: 3.6369 Cla: 3.6536 Landm: 16.7837 || LR: 0.00100000 || Batchtime: 0.3930 s || ETA: 10:59:35
Epoch:1/250 || Epochiter: 51/403 || Iter: 51/100750 || Loc: 3.2572 Cla: 3.5005 Landm: 16.94