YoloV3目标检测

转换生成yolo.h5

#! /usr/bin/env python
"""
Reads Darknet53 config and weights and creates Keras model with TF backend.

Currently only supports layers in Darknet53 config.

yad2k.py的作用主要是使用yolo的网络结构配置文件和权重文件转换成keras的.h5文件
"""

import argparse#argparse的作用就是为py文件封装好可以选择的参数，使他们更加灵活，
import configparser#该模块在python中用来读取配置文件，配置文件的格式可以包含一个或多个节，每个节可以有多个参数
import io
import os
from collections import defaultdict

import numpy as np
from keras import backend as K
from keras.layers import (Conv2D, GlobalAveragePooling2D, Input, Reshape,
                          ZeroPadding2D, UpSampling2D, Activation, Lambda, MaxPooling2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.merge import concatenate, add
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from keras.utils.vis_utils import plot_model as plot
'''
arser = argparse.ArgumentParser()的目的是创建一个解析对象
add_argument的作用是向该对象中添加你要关注的命令行参数和选项，
每一个add_argument方法对应一个你要关注的参数或选项
最后调用parse_args方法进行解析，解析成功之后即可使用
'''

parser = argparse.ArgumentParser(description='Yet Another Darknet To Keras Converter.')
parser.add_argument('config_path', help='Path to Darknet cfg file.')
parser.add_argument('weights_path', help='Path to Darknet weights file.')
parser.add_argument('output_path', help='Path to output Keras model file.')
parser.add_argument(
    '-p',
    '--plot_model',
    help='Plot generated Keras model and save as image.',
    action='store_true')
parser.add_argument(
    '-flcl',
    '--fully_convolutional',
    help='Model is fully convolutional so set input shape to (None, None, 3). '
    'WARNING: This experimental option does not work properly for YOLO_v2.',
    action='store_true')


def unique_config_sections(config_file):
    """Convert all config sections to have unique names.
    Adds unique suffixes to config sections for compability with configparser.

    转换所有config部分，使其具有唯一名称，并为config解析器的兼容性向配置部分添加唯一后缀

    """
    # defaultdict的作用是当字典里的key不存在但被查找时，返回的不是keyError而是一个默认值，
    # 这个默认值是多少根据defaultdict()中的值决定，如defaultdict(int)就表示这个默认值是0
    section_counters = defaultdict(int)

    output_stream = io.StringIO()#io.StringIO表示在内存中以io流的方式读写str
    with open(config_file) as fin:
        for line in fin:
            if line.startswith('['):
                section = line.strip().strip('[]')
                _section = section + '_' + str(section_counters[section])
                section_counters[section] += 1
                line = line.replace(section, _section)
            output_stream.write(line)
    output_stream.seek(0)#把文件指针移动到文件开始处
    return output_stream


def _main(args):
    # 返回参数，参数中开头的~被替换成user的主目录;如果扩展失败或者参数path不是以~打头，则直接返回参数（path）。
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    # assert断言是声明其布尔值必须为真的判定，如果发生异常就说明表达示为假。
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    # ndarray 是一个多维的数组对象，它有一个特点是同构，即其中所有元素的类型必须相同
    weights_header = np.ndarray(shape=(5, ), dtype='int32', buffer=weights_file.read(20))
    print('Weights Header: ', weights_header)
    # TODO: Check transpose flag when implementing fully connected layers.
    # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    if args.fully_convolutional:#如果模型是全卷积的
        image_height, image_width = None, None
    else:
        image_height = int(cfg_parser['net_0']['height'])
        image_width = int(cfg_parser['net_0']['width'])

    prev_layer = Input(shape=(image_height, image_width, 3))
    all_layers = [prev_layer]
    outputs = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0

    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            # TODO: This assumes channel last dim_ordering.
            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                # TODO: Keras BatchNormalization mistakenly refers to var
                # as std.
                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            # TODO: Add check for Theano dim ordering.
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            padding = 'same' if pad == 1 and stride == 1 else 'valid'
            # Adjust padding model for darknet.
            if stride == 2:
                prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)

            # Create Conv2D layer
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)

            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(prev_layer)

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    padding='same',
                    pool_size=(size, size),
                    strides=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('avgpool'):
            if cfg_parser.items(section) != []:
                raise ValueError('{} with params unsupported.'.format(section))
            all_layers.append(GlobalAveragePooling2D()(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            if len(ids) == 2:
                for i, item in enumerate(ids):
                    if item != -1:
                        ids[i] = item + 1

            layers = [all_layers[i] for i in ids]

            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = concatenate(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('shortcut'):
            ids = [int(i) for i in cfg_parser[section]['from'].split(',')][0]
            activation = cfg_parser[section]['activation']
            shortcut = add([all_layers[ids], prev_layer])
            if activation == 'linear':
                shortcut = Activation('linear')(shortcut)
            all_layers.append(shortcut)
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                UpSampling2D(
                    size=(stride, stride))(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            classes = int(cfg_parser[section]['classes'])
            # num = int(cfg_parser[section]['num'])
            # mask = int(cfg_parser[section]['mask'])
            n1, n2 = int(prev_layer.shape[1]), int(prev_layer.shape[2])
            n3 = 3
            n4 = (4 + 1 + classes)
            yolo = Reshape((n1, n2, n3, n4))(prev_layer)
            all_layers.append(yolo)
            prev_layer = all_layers[-1]
            outputs.append(len(all_layers) - 1)

        elif (section.startswith('net')):
            pass  # Configs not currently handled during model definition.
        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    model = Model(inputs=all_layers[0],
                  outputs=[all_layers[i] for i in outputs])
    print(model.summary())
    model.save('{}'.format(output_path))
    print('Saved Keras model to {}'.format(output_path))
    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count +
                                                       remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))
    plot(model, to_file='{}.png'.format(output_root), show_shapes=True)

    plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
    print('Saved model plot to {}.png'.format(output_root))


if __name__ == '__main__':
    _main(parser.parse_args())

demo

"""Demo for use yolo v3
"""
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO


def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    # 图像缩放函数，将图像缩放至指定大小,image是416x416x3矩阵，其中416和416是指定的图像的宽和高的大小，3指图像的通道为3，这表示处理的是彩色图像
    image = cv2.resize(img, (416, 416),interpolation=cv2.INTER_CUBIC)
    # 将图像转化为指定类型的数组
    image = np.array(image, dtype='float32')
    image /= 255.#将图像矩阵转化至0~1之间
    # np.expand_dims:用于扩展数组的形状，这里axis=0使image矩阵变成1x416x416x3矩阵了
    image = np.expand_dims(image, axis=0)

    return image


def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names


def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box #x,y用于平行；w,h用于缩放
        # 得到上下左右坐标
        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)#通过对角线画矩形
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)#为图片添加文字，其中(top, left - 6)指的是左上角坐标

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()


def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    '''
    检测的核心输出，其中
    boxes：框的四个点坐标，(top, left, bottom, right)；
    scores：框的类别置信度，融合框置信度和类别置信度；
    classes：框的类别；
    '''

    end = time.time()

    print('time: {0:.2f}s'.format(end - start))#输出所用时间

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image


def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # cv2.VideoWriter_fourcc()函数的作用是输入四个字符代码即可得到对应的视频编码器。
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:#以下代码的作用是在视频中截取图片
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

if __name__ == '__main__':
    # 0.6和0.5分别表示threshold for object以及threshold for box；就是定义的两个参数
    yolo = YOLO(0.6, 0.5)
    file = 'data/coco_classes.txt'
    all_classes = get_classes(file)
    '''
    # detect images in test floder.
    # os.walk返回的(root，dirs,files)分别表示：root 所指的是当前正在遍历的这个文件夹的本身的地址；
    # dirs 是一个 list ，内容是该文件夹中所有的目录的名字(不包括子目录)；
    # files 同样是 list , 内容是该文件夹中所有的文件(不包括子目录)
    '''
    # for (root, dirs, files) in os.walk('images/test'):
    #     if files:
    #         for f in files:
    #             print(f)
    #             path = os.path.join(root, f)
    #             image = cv2.imread(path)
    #             image = detect_image(image, yolo, all_classes)
    #             cv2.imwrite('images/res/' + f, image)
    #
    # # detect videos one at a time in videos/test folder

    video = 'library1.mp4'
    detect_video(video, yolo, all_classes)

猜你喜欢