检测算法中的损失函数

深碍√TFBOYSˉ_ 2022-03-01 06:54 394阅读 0赞
  • faster rcnn训练可分为交替训练和多任务训练两种方式,后者损失的计算过程可参看train.py,

    RPN

    1. # classification loss
    2. rpn_cls_score = tf.reshape(self.net.get_output('rpn_cls_score_reshape'),[-1,2])
    3. rpn_label = tf.reshape(self.net.get_output('rpn-data')[0],[-1])
    4. rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_label,-1))),[-1,2])
    5. rpn_label = tf.reshape(tf.gather(rpn_label,tf.where(tf.not_equal(rpn_label,-1))),[-1])
    6. rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label))
    7. # bounding box regression L1 loss
    8. rpn_bbox_pred = self.net.get_output('rpn_bbox_pred')
    9. rpn_bbox_targets = tf.transpose(self.net.get_output('rpn-data')[1],[0,2,3,1])
    10. rpn_bbox_inside_weights = tf.transpose(self.net.get_output('rpn-data')[2],[0,2,3,1])
    11. rpn_bbox_outside_weights = tf.transpose(self.net.get_output('rpn-data')[3],[0,2,3,1])
    12. rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights)
    13. rpn_loss_box = tf.reduce_mean(tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3]))
    14. # R-CNN
    15. # classification loss
    16. cls_score = self.net.get_output('cls_score')
    17. label = tf.reshape(self.net.get_output('roi-data')[1],[-1])
    18. cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label))
    19. # bounding box regression L1 loss
    20. bbox_pred = self.net.get_output('bbox_pred')
    21. bbox_targets = self.net.get_output('roi-data')[2]
    22. bbox_inside_weights = self.net.get_output('roi-data')[3]
    23. bbox_outside_weights = self.net.get_output('roi-data')[4]
    24. smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)
    25. loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1]))
    26. # final loss
    27. loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
  • darknet损失的计算过程可参看network.c

    void calc_network_cost(network *netp)
    {

    1. network net = *netp;
    2. int i;
    3. float sum = 0;
    4. int count = 0;
    5. for(i = 0; i < net.n; ++i){
    6. if(net.layers[i].cost){
    7. sum += net.layers[i].cost[0];
    8. ++count;
    9. }
    10. }
    11. *net.cost = sum/count;

    }

  • pytorch-yolo2的损失函数可参考该函数region_loss.py,也可参考yolo2-pytorch项目中的darknet.py(line216行for training部分)

    def forward(self, output, target):

    1. #output : BxAs*(4+1+num_classes)*H*W
    2. t0 = time.time()
    3. nB = output.data.size(0)
    4. nA = self.num_anchors
    5. nC = self.num_classes
    6. nH = output.data.size(2)
    7. nW = output.data.size(3)
    8. output = output.view(nB, nA, (5+nC), nH, nW)#batch*anchor*(5+nc)*feature_map分辨率转换数据维度
    9. x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
    10. y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
    11. w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
    12. h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
    13. conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW)) #网络输出的检测参数的结果
    14. cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
    15. cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC) #识别类别
    16. t1 = time.time()
    17. pred_boxes = torch.cuda.FloatTensor(4, nB*nA*nH*nW)
    18. grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
    19. grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
    20. anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
    21. anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
    22. anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW)
    23. anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH*nW).view(nB*nA*nH*nW)
    24. pred_boxes[0] = x.data + grid_x
    25. pred_boxes[1] = y.data + grid_y #相对输出feature map栅格的坐标偏移
    26. pred_boxes[2] = torch.exp(w.data) * anchor_w
    27. pred_boxes[3] = torch.exp(h.data) * anchor_h #相对于anchor的宽高比
    28. pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)) #转换为nB*nA*nH*nW 乘以 4的二维向量
    29. t2 = time.time()
    30. nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,tcls = build_targets(pred_boxes, target.data, self.anchors, nA, nC, \
    31. nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen)
    32. cls_mask = (cls_mask == 1)
    33. nProposals = int((conf > 0.25).sum().data[0])
    34. tx = Variable(tx.cuda())
    35. ty = Variable(ty.cuda())
    36. tw = Variable(tw.cuda())
    37. th = Variable(th.cuda())
    38. tconf = Variable(tconf.cuda())
    39. tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
    40. coord_mask = Variable(coord_mask.cuda())
    41. conf_mask = Variable(conf_mask.cuda().sqrt())
    42. cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda())
    43. cls = cls[cls_mask].view(-1, nC)
    44. t3 = time.time()
    45. loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x*coord_mask, tx*coord_mask)/2.0
    46. loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y*coord_mask, ty*coord_mask)/2.0
    47. loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w*coord_mask, tw*coord_mask)/2.0
    48. loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h*coord_mask, th*coord_mask)/2.0
    49. loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0 #边界框的置信度
    50. loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) #分类结果
    51. loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
  • PyTorch-YOLOv3的损失函数可参考models.py文件

    Get outputs

    1. x = torch.sigmoid(prediction[..., 0]) # Center x
    2. y = torch.sigmoid(prediction[..., 1]) # Center y
    3. w = prediction[..., 2] # Width
    4. h = prediction[..., 3] # Height
    5. pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
    6. pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
    7. # Calculate offsets for each grid
    8. grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
    9. grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
    10. scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
    11. anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
    12. anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
    13. # Add offset and scale with anchors
    14. pred_boxes = FloatTensor(prediction[..., :4].shape)
    15. pred_boxes[..., 0] = x.data + grid_x
    16. pred_boxes[..., 1] = y.data + grid_y
    17. pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
    18. pred_boxes[..., 3] = torch.exp(h.data) * anchor_h
    19. # Training
    20. if targets is not None:
    21. if x.is_cuda:
    22. self.mse_loss = self.mse_loss.cuda()
    23. self.bce_loss = self.bce_loss.cuda()
    24. self.ce_loss = self.ce_loss.cuda()
    25. nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
    26. pred_boxes=pred_boxes.cpu().data,
    27. pred_conf=pred_conf.cpu().data,
    28. pred_cls=pred_cls.cpu().data,
    29. target=targets.cpu().data,
    30. anchors=scaled_anchors.cpu().data,
    31. num_anchors=nA,
    32. num_classes=self.num_classes,
    33. grid_size=nG,
    34. ignore_thres=self.ignore_thres,
    35. img_dim=self.image_dim,
    36. )
    37. nProposals = int((pred_conf > 0.5).sum().item())
    38. recall = float(nCorrect / nGT) if nGT else 1
    39. precision = float(nCorrect / nProposals)
    40. # Handle masks
    41. mask = Variable(mask.type(ByteTensor))
    42. conf_mask = Variable(conf_mask.type(ByteTensor))
    43. # Handle target variables
    44. tx = Variable(tx.type(FloatTensor), requires_grad=False)
    45. ty = Variable(ty.type(FloatTensor), requires_grad=False)
    46. tw = Variable(tw.type(FloatTensor), requires_grad=False)
    47. th = Variable(th.type(FloatTensor), requires_grad=False)
    48. tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
    49. tcls = Variable(tcls.type(LongTensor), requires_grad=False)
    50. # Get conf mask where gt and where there is no gt
    51. conf_mask_true = mask
    52. conf_mask_false = conf_mask - mask
    53. # Mask outputs to ignore non-existing objects
    54. loss_x = self.mse_loss(x[mask], tx[mask])
    55. loss_y = self.mse_loss(y[mask], ty[mask])
    56. loss_w = self.mse_loss(w[mask], tw[mask])
    57. loss_h = self.mse_loss(h[mask], th[mask])
    58. loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
    59. pred_conf[conf_mask_true], tconf[conf_mask_true]
    60. )
    61. loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
    62. loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
  • ssd.pytorch的损失函数可参考文件multibox_loss.py

    Localization Loss (Smooth L1)

    1. # Shape: [batch,num_priors,4]
    2. pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
    3. loc_p = loc_data[pos_idx].view(-1, 4)
    4. loc_t = loc_t[pos_idx].view(-1, 4)
    5. loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
    6. # Compute max conf across batch for hard negative mining
    7. batch_conf = conf_data.view(-1, self.num_classes)
    8. loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
    9. # Hard Negative Mining
    10. loss_c[pos] = 0 # filter out pos boxes for now
    11. loss_c = loss_c.view(num, -1)
    12. _, loss_idx = loss_c.sort(1, descending=True)
    13. _, idx_rank = loss_idx.sort(1)
    14. num_pos = pos.long().sum(1, keepdim=True)
    15. num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
    16. neg = idx_rank < num_neg.expand_as(idx_rank)
    17. # Confidence Loss Including Positive and Negative Examples
    18. pos_idx = pos.unsqueeze(2).expand_as(conf_data)
    19. neg_idx = neg.unsqueeze(2).expand_as(conf_data)
    20. conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
    21. targets_weighted = conf_t[(pos+neg).gt(0)]
    22. loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
    23. # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
    24. N = num_pos.data.sum()
    25. loss_l /= N
    26. loss_c /= N
    27. return loss_l, loss_c

参考文献:

  1. https://github.com/smallcorgi/Faster-RCNN_TF
  2. https://github.com/pjreddie/darknet
  3. https://github.com/marvis/pytorch-yolo2
  4. https://github.com/longcw/yolo2-pytorch
  5. https://github.com/eriklindernoren/PyTorch-YOLOv3
  6. https://github.com/amdegroot/ssd.pytorch

发表评论

表情:
评论列表 (有 0 条评论,394人围观)

还没有评论,来说两句吧...

相关阅读