1.计算anchor的iou
计算九个anchor框和真实框的iou
2.计算网格位置
3.填充正样本
把真实样本的信息填充到target里面去
4.删选负样本
把不是负样本的删除掉,否则负样本太多
1.遍历Yolo Head(78,38,19)
2.构造掩码
mask:[1,3,19,19]=0 正样本掩码,首先全赋值为0,如果为正样本,则赋值为1
noobj_mask: [1,3,19,19]=1 负样本掩码,首先全赋值为1,假设没有物体,全是负样本,然后赋值为0
tbox:[1,3,19,19, 4] =0
tconf:[1,3,19,19] = 0
tcls::[1,3,19,19,num_Class] =0
3.记录当前head对应的3个anchors的绝对序号
4.计算真实label和9个anchors的iou,找出序号
找出序号,计算相对位置,用来计算网格位置i,j
5.填充掩码
填充掩码,一定要有i,j和相对anchor序号,i,j通过target获得,相对序号通过anchors和target共同获得
1.通过target,获得ij
2.通过target和anchors,计算iou,取最大iou,获得绝对的anchor序号,进而计算出相对序号
3.通过ij和相对序号,进行掩码填充
6.删选负样本,将摇摆不定的正样本和负样本之间的样本删除,使之不参与损失的计算
获得预测值
1.预测值和target计算他们之间的iou
2.通过阈值删选iou,删除摇摆不定的样本
3.获得负样本的掩码
返回预测值和负样本掩码
import cv2 from random import shuffle import numpy as np import torch import torch.nn as nn import math import torch.nn.functional as F from matplotlib.colors import rgb_to_hsv, hsv_to_rgb from PIL import Image from utils.utils import bbox_iou, merge_bboxes def iou(_box_a, _box_b): b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2 b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2 b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2 b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2 box_a = torch.zeros_like(_box_a) box_b = torch.zeros_like(_box_b) box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2 box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2 A = box_a.size(0) B = box_b.size(0) max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2)) inter = torch.clamp((max_xy - min_xy), min=0) inter = inter[:, :, 0] * inter[:, :, 1] # 计算先验框和真实框各自的面积 area_a = ((box_a[:, 2]-box_a[:, 0]) * (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] area_b = ((box_b[:, 2]-box_b[:, 0]) * (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] # 求IOU union = area_a + area_b - inter return inter / union # [A,B] #---------------------------------------------------# # 平滑标签 #---------------------------------------------------# def smooth_labels(y_true, label_smoothing,num_classes): return y_true * (1.0 - label_smoothing) + label_smoothing / num_classes def box_ciou(b1, b2): """ 输入为: ---------- b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh 返回为: ------- ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) """ # 求出预测框左上角右下角 b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh/2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # 求出真实框左上角右下角 b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half # 求真实框和预测框所有的iou intersect_mins = torch.max(b1_mins, b2_mins) intersect_maxes = torch.min(b1_maxes, b2_maxes) intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes)) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area iou = intersect_area / torch.clamp(union_area,min = 1e-6) # 计算中心的差距 center_distance = torch.sum(torch.pow((b1_xy - b2_xy), 2), axis=-1) # 找到包裹两个框的最小框的左上角和右下角 enclose_mins = torch.min(b1_mins, b2_mins) enclose_maxes = torch.max(b1_maxes, b2_maxes) enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes)) # 计算对角线距离 enclose_diagonal = torch.sum(torch.pow(enclose_wh,2), axis=-1) ciou = iou - 1.0 * (center_distance) / torch.clamp(enclose_diagonal,min = 1e-6) v = (4 / (math.pi ** 2)) * torch.pow((torch.atan(b1_wh[..., 0]/torch.clamp(b1_wh[..., 1],min = 1e-6)) - torch.atan(b2_wh[..., 0]/torch.clamp(b2_wh[..., 1],min = 1e-6))), 2) alpha = v / torch.clamp((1.0 - iou + v),min=1e-6) ciou = ciou - alpha * v return ciou def clip_by_tensor(t,t_min,t_max): t=t.float() result = (t >= t_min).float() * t + (t < t_min).float() * t_min result = (result <= t_max).float() * result + (result > t_max).float() * t_max return result def MSELoss(pred,target): return (pred-target)**2 def BCELoss(pred,target): epsilon = 1e-7 pred = clip_by_tensor(pred, epsilon, 1.0 - epsilon) output = -target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred) return output class YOLOLoss(nn.Module): def __init__(self, anchors, num_classes, img_size, label_smooth=0, cuda=True): super(YOLOLoss, self).__init__() self.anchors = anchors self.num_anchors = len(anchors) self.num_classes = num_classes self.bbox_attrs = 5 + num_classes self.img_size = img_size self.feature_length = [img_size[0]//8,img_size[0]//16,img_size[0]//32] self.label_smooth = label_smooth self.ignore_threshold = 0.7 self.lambda_conf = 1.0 self.lambda_cls = 1.0 self.lambda_loc = 1.0 self.cuda = cuda def forward(self, input, targets=None): # input为bs,3*(5+num_classes),13,13 # 一共多少张图片 bs = input.size(0) # 特征层的高 in_h = input.size(2) # 特征层的宽 in_w = input.size(3) # 计算步长 # 每一个特征点对应原来的图片上多少个像素点 # 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点 stride_h = self.img_size[1] / in_h stride_w = self.img_size[0] / in_w # 把先验框的尺寸调整成特征层大小的形式 # 计算出先验框在特征层上对应的宽高 scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors] # bs,3*(5+num_classes),13,13 -> bs,3,13,13,(5+num_classes) prediction = input.view(bs, int(self.num_anchors/3), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() # 对prediction预测进行调整 conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # 找到哪些先验框内部包含物体 mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y = self.get_target(targets, scaled_anchors,in_w, in_h,self.ignore_threshold) noobj_mask, pred_boxes_for_ciou = self.get_ignore(prediction, targets, scaled_anchors, in_w, in_h, noobj_mask) if self.cuda: mask, noobj_mask = mask.cuda(), noobj_mask.cuda() box_loss_scale_x, box_loss_scale_y= box_loss_scale_x.cuda(), box_loss_scale_y.cuda() tconf, tcls = tconf.cuda(), tcls.cuda() pred_boxes_for_ciou = pred_boxes_for_ciou.cuda() t_box = t_box.cuda() box_loss_scale = 2-box_loss_scale_x*box_loss_scale_y # losses. ciou = box_ciou( pred_boxes_for_ciou[mask.bool()], t_box[mask.bool()]) loss_ciou = 1 - ciou loss_ciou = loss_ciou * box_loss_scale[mask.bool()] # ciou = (1 - box_ciou( pred_boxes_for_ciou[mask.bool()], t_box[mask.bool()]))* box_loss_scale[mask.bool()] loss_loc = torch.sum(loss_ciou / bs) loss_conf = torch.sum(BCELoss(conf, mask) * mask / bs) + \ torch.sum(BCELoss(conf, mask) * noobj_mask / bs) # print(smooth_labels(tcls[mask == 1],self.label_smooth,self.num_classes)) loss_cls = torch.sum(BCELoss(pred_cls[mask == 1], smooth_labels(tcls[mask == 1],self.label_smooth,self.num_classes))/bs) # print(loss_loc,loss_conf,loss_cls) loss = loss_conf * self.lambda_conf + loss_cls * self.lambda_cls + loss_loc * self.lambda_loc return loss, loss_conf.item(), loss_cls.item(), loss_loc.item() def get_target(self, target, anchors, in_w, in_h, ignore_threshold): # 计算一共有多少张图片 bs = len(target) # 获得先验框 anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)] subtract_index = [0,3,6][self.feature_length.index(in_w)] # 创建全是0或者全是1的阵列 mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) t_box = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, 4, requires_grad=False) tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False) box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) for b in range(bs): for t in range(target[b].shape[0]): # box = [0.2,0.7,0.25,0.4] ---> [gx,gh,gw,gh] 相对于图的偏移,就是原图608*608 / 608 = 1 * 1 ===》 0.2*608 0.7*608 0.25*608 # box = box * 19 = [3.8,13.3,4.75,7.6] # i = int(3.8)=3 j = int(13.3) =13 # 计算出在特征层上的点位 gx = target[b][t, 0] * in_w gy = target[b][t, 1] * in_h gw = target[b][t, 2] * in_w gh = target[b][t, 3] * in_h # 计算出属于哪个网格 gi = int(gx) gj = int(gy) # 计算真实框的位置 gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0) # 计算出所有先验框的位置 anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)), np.array(anchors)), 1)) # 计算重合程度 anch_ious = bbox_iou(gt_box, anchor_shapes) # Find the best matching anchor box # 计算真实label和9个anchors的IOU,找出序号 # 假定best_n = 6,那么就落在19的Head上,对应的相对位置是0 best_n = np.argmax(anch_ious) if best_n not in anchor_index: continue # Masks if (gj < in_h) and (gi < in_w): best_n = best_n - subtract_index # 判定哪些先验框内部真实的存在物体 noobj_mask[b, best_n, gj, gi] = 0 mask[b, best_n, gj, gi] = 1 # 计算先验框中心调整参数 tx[b, best_n, gj, gi] = gx ty[b, best_n, gj, gi] = gy # 计算先验框宽高调整参数 tw[b, best_n, gj, gi] = gw th[b, best_n, gj, gi] = gh # 用于获得xywh的比例 box_loss_scale_x[b, best_n, gj, gi] = target[b][t, 2] box_loss_scale_y[b, best_n, gj, gi] = target[b][t, 3] # 物体置信度 tconf[b, best_n, gj, gi] = 1 # 种类 tcls[b, best_n, gj, gi, int(target[b][t, 4])] = 1 else: print('Step {0} out of bound'.format(b)) print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w)) continue t_box[...,0] = tx t_box[...,1] = ty t_box[...,2] = tw t_box[...,3] = th return mask, noobj_mask, t_box, tconf, tcls, box_loss_scale_x, box_loss_scale_y def get_ignore(self,prediction,target,scaled_anchors,in_w, in_h,noobj_mask): bs = len(target) anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)] scaled_anchors = np.array(scaled_anchors)[anchor_index] # 先验框的中心位置的调整参数 x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) # 先验框的宽高调整参数 w = prediction[..., 2] # Width h = prediction[..., 3] # Height FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor # 生成网格,先验框中心,网格左上角 # grid_x = [1,3,19,19] # grid_y = [1,3,19,19] grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_w, 1).repeat( int(bs*self.num_anchors/3), 1, 1).view(x.shape).type(FloatTensor) grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_h, 1).t().repeat( int(bs*self.num_anchors/3), 1, 1).view(y.shape).type(FloatTensor) # 生成先验框的宽高 anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape) anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape) # 计算调整后的先验框中心与宽高 pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x + grid_x pred_boxes[..., 1] = y + grid_y pred_boxes[..., 2] = torch.exp(w) * anchor_w pred_boxes[..., 3] = torch.exp(h) * anchor_h for i in range(bs): pred_boxes_for_ignore = pred_boxes[i] pred_boxes_for_ignore = pred_boxes_for_ignore.view(-1, 4) if len(target[i]) > 0: gx = target[i][:, 0:1] * in_w gy = target[i][:, 1:2] * in_h gw = target[i][:, 2:3] * in_w gh = target[i][:, 3:4] * in_h gt_box = torch.FloatTensor(np.concatenate([gx, gy, gw, gh],-1)).type(FloatTensor) anch_ious = iou(gt_box, pred_boxes_for_ignore) for t in range(target[i].shape[0]): anch_iou = anch_ious[t].view(pred_boxes[i].size()[:3]) noobj_mask[i][anch_iou>self.ignore_threshold] = 0 return noobj_mask, pred_boxes