yolo v2 源码分析(一)

detector .c文件,这里仅分析train_detector

 void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
    list *options = read_data_cfg(datacfg);
    char *train_images = option_find_str(options, "train", "data/train.list");
    char *backup_directory = option_find_str(options, "backup", "/backup/");

    /*srand函数是随机数发生器的初始化函数。srand和rand()配合使用产生伪随机数序列。
    rand函数在产生随机数前,需要系统提供的生成伪随机数序列的种子,rand根据这个种子的值产生一系列随机数。
    如果系统提供的种子没有变化,每次调用rand函数生成的伪随机数序列都是一样的。*/
    srand(time(0));

    /*第三个参数是:`cfg/yolo.train.cfg`,`basecfg()`这个函数把`cfg/yolo.train.cfg`
    变成了`yolo0train.cfg`,然后用base指针指向`yolo0train.cfg`*/
    char *base = basecfg(cfgfile);
    printf("%s\n", base); //打印"yolo"字样
    float avg_loss = -1;
    network *nets = calloc(ngpus, sizeof(network));

    srand(time(0));
    int seed = rand();
    int i;
    for(i = 0; i < ngpus; ++i)
    {
        srand(seed);
#ifdef GPU
        cuda_set_device(gpus[i]);
#endif
        nets[i] = parse_network_cfg(cfgfile);//解析网络构架,下面会仔细分析该函数
        if(weightfile)
        {
            load_weights(&nets[i], weightfile);//加载预训练参数,下面会仔细分析该函数
        }
        if(clear) *nets[i].seen = 0;
        nets[i].learning_rate *= ngpus;
    }
    srand(time(0));
    network net = nets[0];

    /*imgs是一次加载到内存的图像数量,如果占内存太大的话可以把subdivisions调大或者batch调小一点 */
    int imgs = net.batch * net.subdivisions * ngpus;
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
    data train, buffer;

    layer l = net.layers[net.n - 1];

    int classes = l.classes;
    float jitter = l.jitter; //jitter是什么意思呢?可以参考这篇博客:[非均衡数据集处理:利用抖动(jittering)生成额外数据]

    list *plist = get_paths(train_images);
    //int N = plist->size;
    char **paths = (char **)list_to_array(plist);

    load_args args = {0};
    args.w = net.w;
    args.h = net.h;
    args.paths = paths;
    args.n = imgs; //n就是一次加载到内存中的图片数量
    args.m = plist->size;//m是待训练图片的总数量
    args.classes = classes;
    args.jitter = jitter;
    args.num_boxes = l.max_boxes;
    args.d = &buffer;
    args.type = DETECTION_DATA;
    args.threads = 8;

    //调节图片旋转角度、曝光度、饱和度、色调等,来增加图片数量
    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;

    pthread_t load_thread = load_data(args);
    clock_t time;
    int count = 0;
    //while(i*imgs < N*120){
    while(get_current_batch(net) < net.max_batches)
    {
        //进行10次迭代后,调整一次网络大小
        if(l.random && count++%10 == 0)
        {
             printf("Resizing\n");
             int dim = (rand() % 10 + 10) * 32;//dim为320,352,384,416。。。
             if (get_current_batch(net)+100 > net.max_batches) 
                dim = 544;
             //int dim = (rand() % 4 + 16) * 32;
             printf("%d\n", dim);
             //网络输入图片的宽高可调节,dim最小为320,最大为618,这样可以更好使用多尺度的目标
             args.w = dim;
             args.h = dim;

             pthread_join(load_thread, 0);
             train = buffer;
             free_data(train);
             load_thread = load_data(args);

             for(i = 0; i < ngpus; ++i){
                resize_network(nets + i, dim, dim);
             }
            net = nets[0];
        }
        time=clock();
        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data(args);


        printf("Loaded: %lf seconds\n", sec(clock()-time));

        time=clock();
        float loss = 0;
#ifdef GPU
        if(ngpus == 1)
        {
            loss = train_network(net, train);
        } 
        else 
        {
            loss = train_networks(nets, ngpus, train, 4);//开始训练
        }
#else
        loss = train_network(net, train); //开始训练
#endif
        if (avg_loss < 0) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;

        i = get_current_batch(net);
        printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);

        //每100次或者1000次保存一次权重
        if(i%1000==0 || (i < 1000 && i%100 == 0))
        {
#ifdef GPU
            if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
            save_weights(net, buff);
        }
        free_data(train);
     }
#ifdef GPU
    if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
    char buff[256];
    sprintf(buff, "%s/%s_final.weights", backup_directory, base);
    save_weights(net, buff);
}

注意

  1. resize网络是yolo v2版本新加的功能。即每进行10次迭代就会resize一次网络输入图片的宽和高,这样保证了网络可以试音各种不同尺度的目标,这样以来,即使没有dropout层,训练出来的网络也不会过拟合。
  2. 在imgs = net.batch * net.subdivisions * ngpus中,net.batch并不是cfg文件中的batch值,而是cfg文件中的batch值除以net.subdivisions,这样以来,一次加载imgs张图片到内存,while循环中每次count,就会处理完这些图片,完成一次迭代。比如,cfg文件中的batch为64,subdivisions为16,对应在计算imgs时,net.batch=64/16=4, net.subdivisions =16,因此imgs=64。为什么net.batch并不对应cfg文件中的batch值,请看3。
  3. net在初始化时调用了parse_network_cfg函数,该函数调用parse_net_options,该函数修改了net->batch的值。

    net->batch = option_find_int(options, “batch”,1)

    int subdivs = option_find_int(options, “subdivisions”,1)

    net->batch /= subdivs

    net->subdivisions = subdivs
    这里写图片描述

猜你喜欢

转载自blog.csdn.net/nongfu_spring/article/details/54140453
今日推荐