函数compute_loss

for i in range(3):
    conv, pred = pred_result[i*2], pred_result[i*2+1]
    loss_items = compute_loss(pred, conv, *target[i], i)
    giou_loss += loss_items[0]
    conf_loss += loss_items[1]
    prob_loss += loss_items[2]

def compute_loss(pred, conv, label, bboxes, i=0):

    conv_shape  = tf.shape(conv)
    batch_size  = conv_shape[0]
    output_size = conv_shape[1]
    # __C.YOLO.STRIDES              = [8, 16, 32]
    input_size  = STRIDES[i] * output_size
    conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))

    conv_raw_conf = conv[:, :, :, :, 4:5]
    conv_raw_prob = conv[:, :, :, :, 5:]

    # 预测出的4个坐标值
    pred_xywh     = pred[:, :, :, :, 0:4]
    # 预测含object的概率
    pred_conf     = pred[:, :, :, :, 4:5]

    # 从标签文件中提取的坐标真值
    label_xywh    = label[:, :, :, :, 0:4]
    respond_bbox  = label[:, :, :, :, 4:5]  # respond_bbox 猜测存在目标为1，不存在目标为0
    label_prob    = label[:, :, :, :, 5:]

    # ①计算xy和wh上的loss，其计算的是实际上存在目标的，利用第三步真实框编码后的的结果和未处理的预测结果进行对比得到loss
    giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
    input_size = tf.cast(input_size, tf.float32)

    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
    giou_loss = respond_bbox * bbox_loss_scale * (1- giou)

    # 对于每一幅图，计算其中所有真实框与预测框的IOU，取出每个网络点中IOU最大的先验框，
    # 如果这个最大的IOU都小于ignore_thresh，意味着这个网络点内不存在目标，可以被忽略。
    # 不明白这里为什么要加这么多np.newaxis
    iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)

    respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )

    conf_focal = tf.pow(respond_bbox - pred_conf, 2)

    # 不明白conv_raw_conf和pred_conf的关系
    # ②计算置信度loss
    conf_loss = conf_focal * (
            respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
            +
            respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
    )

    # ③计算类别条件概率的loss
    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)

    giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))

    return giou_loss, conf_loss, prob_loss

1、tf.reshape

给定后tensor，此操作将返回一个新tf.Tensor值tensor，该值具有与相同顺序相同的值，但具有由给出的新形状 shape。

tf.reshape(
    tensor, shape, name=None
)

t1 = [[1, 2, 3],
      [4, 5, 6]]
print(tf.shape(t1).numpy())

[2 3]

t2 = tf.reshape(t1, [6])
t2

<tf.Tensor: id=8, shape=(6,), dtype=int32, numpy=array([1, 2, 3, 4, 5, 6], dtype=int32)>

2、tf.expand_dims

返回在index处插入长度为1的轴的张量axis。

tf.expand_dims(
    input, axis, name=None
)

给定的张量input，该操作插入尺寸指数在长度为1的尺寸axis的input的形状。维度索引遵循Python索引规则：从零开始，它是一个负索引，从末尾开始倒数。

image = tf.zeros([10,10,3])
tf.expand_dims(image, axis=0).shape.as_list()

[1, 10, 10, 3]

tf.expand_dims(image, axis=1).shape.as_list()

[10, 1, 10, 3]

tf.expand_dims(image, -1).shape.as_list()

[10, 10, 3, 1]

3、tf.math.reduce_max

计算张量维度上的元素最大值。

x = tf.constant([5, 1, 2, 4])
tf.reduce_max(x,axis=-1)

<tf.Tensor: id=14, shape=(), dtype=int32, numpy=5>

x = tf.constant([-5, -1, -2, -4])
tf.reduce_max(x)

<tf.Tensor: id=22, shape=(), dtype=int32, numpy=-1>

4、tf.math.pow

计算一个值对另一个值的幂。

tf.math.pow(
    x, y, name=None
)

x = tf.constant([[2, 2], [3, 3]])
y = tf.constant([[8, 16], [2, 3]])
tf.pow(x, y)  # [[256, 65536], [9, 27]]

<tf.Tensor: id=26, shape=(2, 2), dtype=int32, numpy=
array([[  256, 65536],
       [    9,    27]], dtype=int32)>

5、tf.nn.sigmoid_cross_entropy_with_logits

计算给定的S形交叉熵logits

tf.nn.sigmoid_cross_entropy_with_logits(
    labels=None, logits=None, name=None
)

6、tf.math.reduce_sum

计算跨张量维度的元素之和。

x = tf.constant([[1, 1, 1], [1, 1, 1]])
tf.reduce_sum(x).numpy()

7、tf.math.reduce_mean

计算跨张量维度的元素的均值。

函数bbox_giou

pred_xywh     = pred[:, :, :, :, 0:4]
label_xywh    = label[:, :, :, :, 0:4]
bbox_giou(pred_xywh, label_xywh)

def bbox_giou(boxes1, boxes2):
    # 将(x,y,w,h)转化为(x-0.5*w, y-0.5*h, x+0.5*w, y+0.5*h)
    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

    # 保证两个端点坐标值的大小顺序，但是这一步的必要性在哪里
    boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
                        tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
    boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
                        tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)

    # 计算两个矩形框的面积
    boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
    boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])

    # 交叠区域左上角坐标
    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    # 交叠区域右下角坐标
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    # 交叠区域的宽高
    # 如果预测框与真实框不交叠则right_down - left_up是负值，所以这里与0对比，若为负值则取零
    inter_section = tf.maximum(right_down - left_up, 0.0)

    # 交叠区域面积
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    # 并集区域面积
    union_area = boxes1_area + boxes2_area - inter_area
    iou = inter_area / union_area
  
    # 计算最小闭合凸面 C 左上角和右下角的坐标
    enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
    enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
    # 计算最小闭合凸面 C的宽高
    enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
    # 计算最小闭合凸面 C的面积 = 宽 * 高
    enclose_area = enclose[..., 0] * enclose[..., 1]
    # 计算GIoU
    giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
    
    return giou

1、tf.math.minimum

返回元素的x和y的最小值

tf.math.minimum(
    x, y, name=None
)

x = tf.constant([0., 0., 0., 0.])
y = tf.constant([-5., -2., 0., 3.])
tf.math.minimum(x, y)

<tf.Tensor: id=31, shape=(4,), dtype=float32, numpy=array([-5., -2.,  0.,  0.], dtype=float32)>

x = tf.constant([-5., 0., 0., 0.])
y = tf.constant([-3.])
tf.math.minimum(x, y)

<tf.Tensor: id=35, shape=(4,), dtype=float32, numpy=array([-5., -3., -3., -3.], dtype=float32)>

x = tf.constant([-5., 0., 0., 0.])
y = tf.constant([-3., -1.])
tf.math.minimum(x, y)

InvalidArgumentError: Incompatible shapes: [4] vs. [2] [Op:Minimum]

2、tf.convert_to_tensor

将给定转换value为Tensor

tf.convert_to_tensor(
    value, dtype=None, dtype_hint=None, name=None
)

此函数将各种类型的Python对象转换为Tensor 对象。它接受Tensor对象，numpy数组，Python列表和Python标量

import numpy as np
def my_func(arg):
  arg = tf.convert_to_tensor(arg, dtype=tf.float32)
  return arg
value_1 = my_func(tf.constant([[1.0, 2.0], [3.0, 4.0]]))
print(value_1)

tf.Tensor(
[[1. 2.]
 [3. 4.]], shape=(2, 2), dtype=float32)

value_2 = my_func([[1.0, 2.0], [3.0, 4.0]])
print(value_2)

tf.Tensor(
[[1. 2.]
 [3. 4.]], shape=(2, 2), dtype=float32)

value_3 = my_func(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
print(value_3)

tf.Tensor(
[[1. 2.]
 [3. 4.]], shape=(2, 2), dtype=float32)

函数bbox_iou

iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], 
				 bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])

def bbox_iou(boxes1, boxes2):

    boxes1_area = boxes1[..., 2] * boxes1[..., 3]
    boxes2_area = boxes2[..., 2] * boxes2[..., 3]

    boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
                        boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
    boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
                        boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)

    left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
    right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])

    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area

    return 1.0 * inter_area / union_area

TF2.0 API学习（Python）六：函数compute_loss、函数bbox_giou、函数bbox_iou