1，不对的地方求各位纠正

2，其中参数：

burn_in=1000

batch_normalize=1 目前还不太理解，希望明白的朋友给出解释。谢谢！

[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64   				一批训练样本的样本数量，每batch个样本更新一次参数
subdivisions=64				batch/subdivisions作为一次性送入训练器的样本数量
					如果内存不够大，将batch分割为subdivisions个子batch
 
					上面这两个参数如果电脑内存小，则把batch改小一点，batch越大，训练效果越好
					subdivisions越大，可以减轻显卡压力

width=416			
height=416
channels=3
					以上三个参数为输入图像的参数信息 width和height影响网络对输入图像的分辨率，
					从而影响precision，只可以设置成32的倍数

momentum=0.9				DeepLearning1中最优化方法中的动量参数，这个值影响着梯度下降到最优值得速度  
decay=0.0005				权重衰减正则项，防止过拟合
angle=0					通过旋转角度来生成更多训练样本
saturation = 1.5			通过调整饱和度来生成更多训练样本
exposure = 1.5				通过调整曝光量来生成更多训练样本
hue=.1					通过调整色调来生成更多训练样本

learning_rate=0.001			学习率决定着权值更新的速度，设置得太大会使结果超过最优值，太小会使下降速度过慢。
					如果仅靠人为干预调整参数，需要不断修改学习率。刚开始训练时可以将学习率设置的高一点，
					而一定轮数之后，将其减小
					在训练过程中，一般根据训练轮数设置动态变化的学习率。
					刚开始训练时：学习率以 0.01 ~ 0.001 为宜。
					一定轮数过后：逐渐减缓。
					接近训练结束：学习速率的衰减应该在100倍以上。
					学习率的调整参考https://blog.csdn.net/qq_33485434/article/details/80452941

burn_in=1000				？
max_batches = 500200		        训练达到max_batches后停止学习
policy=steps				这个是学习率调整的策略，有policy：constant, steps, exp, poly, step, sig, RANDOM，constant等方式
					参考https://nanfei.ink/2018/01/23/YOLOv2%E8%B0%83%E5%8F%82%E6%80%BB%E7%BB%93/#more
steps=400000,450000			下面这两个参数steps和scale是设置学习率的变化，比如迭代到400000次时，学习率衰减十倍。
scales=.1,.1				

[convolutional]
batch_normalize=1 			？
filters=32			        输出特征图的数量
size=3					卷积核的尺寸
stride=1				做卷积运算的步长
pad=1					如果pad为0,padding由 padding参数指定。
					如果pad为1，padding大小为size/2，padding应该是对输入图像左边缘拓展的像素数量
activation=leaky			激活函数的类型

# Downsample

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=18				每一个[region/yolo]层前的最后一个卷积层中的 filters=num(yolo层个数)*(classes+5) 
					5的意义是5个坐标，论文中的tx,ty,tw,th,to
activation=linear


[yolo] 					在yoloV2中yolo层叫region层
mask = 6,7,8
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
					anchors是可以事先通过cmd指令计算出来的，是和图片数量，width,height以及cluster(应该就是下面的num的值，
					即想要使用的anchors的数量)相关的预选框，可以手工挑选，也可以通过k means 从训练样本中学出

classes=1
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1				random设置成1，可以增加检测精度precision


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61



[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=18
activation=linear


[yolo]
mask = 3,4,5
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=1
num=9	  				每个grid cell预测几个box,和anchors的数量一致。当想要使用更多anchors时需要调大num，
					且如果调大num后训练时Obj趋近0的话可以尝试调大object_scale
jitter=.3			        利用数据抖动产生更多数据，YOLOv2中使用的是crop，filp，以及net层的angle，flip是随机的，
					jitter就是crop的参数，tiny-yolo-voc.cfg中jitter=.3，就是在0~0.3中进行crop
ignore_thresh = .5			决定是否需要计算IOU误差的参数，大于thresh，IOU误差不会夹在cost function中
truth_thresh = 1
random=1				如果为1，每次迭代图片大小随机从320到608，步长为32，如果为0，每次训练大小与输入大小一致



[route]
layers = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 36



[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=18
activation=linear


[yolo]
mask = 0,1,2
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=1
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1	

官网介绍中的关键点：
    图片样本的获取可以自己找，也可以下载相关数据集，coco,voc数据集中可能也含有相关能用的样本

    Note: If during training you see nan values for avg (loss) field - then training goes wrong, 
    but if nan is in some other lines - then training goes well.

    When should I stop training:
    When you see that average loss 0.xxxxxx avg no longer decreases at many iterations then you should stop training.
    Once training is stopped, you should take some of last .weights-files from darknet\build\darknet\x64\backup and choose the best of them.

    Overfitting - is case when you can detect objects on images from training-dataset, 
    but can't detect objects on any others images. You should get weights from Early Stopping Point.
	
    IoU (intersect of union) - average instersect of union of objects and detections for a certain threshold = 0.24

    How to improve object detection:
    Before training:
    set flag random=1 in your .cfg-file - it will increase precision by training Yolo for different resolutions.
    increase network resolution in your .cfg-file (height=608, width=608 or any value multiple of 32) - it will increase precision.
    recalculate anchors for your dataset for width and height from cfg-file: 
    darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416 then set the same 9 anchors in each of 3 [yolo]-layers in your cfg-file
    设置锚点
	
    desirable that your training dataset include images with objects at diffrent: 
    scales, rotations, lightings, from different sides, on different backgrounds
    样本特点尽量多样化，亮度，旋转，背景，目标位置，尺寸

    desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty .txt files)
    可以添加没有标注框的图片和其空的txt文件，作为negative数据
	
    for training with a large number of objects in each image, add the parameter max=200 or higher value in the last layer [region] in your cfg-file

    to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning, 
    set param stopbackward=1 in one of the penultimate convolutional layers before the 1-st [yolo]-layer, 
    for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598
    可以在第一个[yolo]层之前的倒数第二个[convolutional]层末尾添加 stopbackward=1,以此提升训练速度

    After training - for detection:
    Increase network-resolution by set in your .cfg-file (height=608 and width=608) or (height=832 and width=832) 
    or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects,
    you do not need to train the network again, just use .weights-file already trained for 416x416 resolution.
    即使在用416*416训练完之后，也可以在cfg文件中设置较大的width和height，增加网络对图像的分辨率，从而更可能检测出图像中的小目标，而不需要重新训练

    if error Out of memory occurs then in .cfg-file you should increase subdivisions=16, 32 or 64
    Out of memory的错误需要通过增大subdivisions来解决

参考文章：yolo参数

[net] batch=64 一批次用的图像数量，一般来说，在显存允许范围内尽量的大会收敛更精细，不过也可能导致收敛到局部最优解 subdivisions=4 划分子集 width=416 height=416 网络输入尺寸 channels=3 图像通道数 momentum=0.9 SGD方法的一个缺点是其更新方向完全依赖于当前batch计算出的梯度，因而十分不稳定。Momentum算法借用了物理中的动量概念，它模拟的是物体运动时的惯性，即更新的时候在一定程度上保留之前更新的方向，同时利用当前batch的梯度微调最终的更新方向。这样一来，可以在一定程度上增加稳定性，从而学习地更快，并且还有一定摆脱局部最优的能力：

v t = γ \cdot v t - 1 + α \cdot ▽ Θ J (Θ)

Θ = Θ - v t

Momentum算法会观察历史梯度vt−1，若当前梯度的方向与历史梯度一致（表明当前样本不太可能为异常点），则会增强这个方向的梯度，若当前梯度与历史梯方向不一致，则梯度会衰减。一种形象的解释是：我们把一个球推下山，球在下坡时积聚动量，在途中变得越来越快，γ可视为空气阻力，若球的方向发生变化，则动量会衰减。对于权重来说公式如下：

这里的m就是我们可以调整的参数，一般取值有0.5,0.9,0.99，当然,也可以让α的值随着时间而变化,一开始小点,后来再加大。

特点: 前后梯度方向一致时,能够加速学习前后梯度方向不一致时,能够抑制震荡 decay=0.0005 在实际应用中，为了避免网络的过拟合，必须对价值函数（Cost function）加入一些正则项，在SGD中加入正则项对这个Cost function进行规范化：

上面这个公式基本思想就是减小不重要的参数对最后结果的影响，网络中有用的权重则不会收到Weight decay影响。可以看到，此项越大则防止过拟合的能力越强。 angle=0 数据扩充时图片旋转的角度 saturation = 1.5 饱和度范围 exposure = 1.5 曝光度范围 hue=.1 色调变化范围 learning_rate=0.001 初始学习率 max_batches = 200000 训练达到max_batches后停止学习 policy=steps 调整学习率的policy，有如下policy：constant, steps, exp, poly, step, sig, RANDOM constant 保持学习率为常量，caffe里为fixed steps 比较好理解，按照steps来改变学习率

steps=-1,400,100000,150000 scales=.1,10,.1,.1 steps和scales是对应的有人认为，使用自己的数据集训练时很大概概率需要这个warm up的trick，当然这个也要看具体的数据集 Warmup method: Constant warmup: 在train前几个epoch（一般前5 epochs）时采用较小的constant learning rate，但是对于大的learning rate，constant warmup不能很好的初始化网络。 gradual warmup：在 training的前几个 epoch，逐渐将learning rate由小到大的提高，让training在开始的时候健康的收敛。重要参考： https://arxiv.org/abs/1706.02677 exp gamma= 返回base_lr*gamma^iter,iter为当前迭代次数，gamma设置为0.98

poly power=4 max_batches=800000

对学习率进行多项式衰减。图中power为0.9 sig 学习率进行sigmod函数衰减 gamma= 0.05 step=200 效果如图所示

step 返回net.learning_rate*pow(net.scale, batch_num/net.step)

[convolutional] batch_normalize=1 是否做BN filters=32 输出多少个特征图 size=3 卷积核的尺寸 stride=1 做卷积运算的步长 pad=1 如果pad为0,padding由padding参数指定。如果pad为1，padding大小为size/2 activation=leaky 激活函数有以下几种：logistic，loggy，relu，elu，relie，plse，hardtan，lhtan，linear，ramp，leaky，tanh，stair 一般来说，现在用leaky的会比较多

[maxpool] size=2 池化层尺寸 stride=2 池化步长

特别：最后一个卷积层 [convolutional] size=1 stride=1 pad=1 filters=125 region前最后一个卷积层的filters数是特定的，计算公式为filter=num*(classes+5)，5的意义是5个坐标，论文中的tx,ty,tw,th,to activation=linear

[region] anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 预测框的初始宽高，第一个是w，第二个是h，总数量是num*2 聚类的脚本放在github中。 bias_match=1 只是在选择与groundturth_box宽、高最相近的anchors，然后在选定的anchors基础之上进行宽、高的偏差调节。最终的预测框宽、高不一定与anchors一致。 classes=20 类别数 coords=4 BoundingBox的tx,ty,tw,th，tx与ty是相对于左上角的gird，同时是当前grid的比例，tw与th是宽度与高度取对数 num=5 每个grid预测的BoundingBox个数 softmax=1 jitter=.2 利用数据抖动产生更多数据，YOLOv2中使用的是crop，filp，以及net层的angle，flip是随机的，crop就是jitter的参数，tiny-yolo-voc.cfg中jitter=.2，就是在0~0.2中进行crop rescore=1 决定使用哪种方式计算IOU的误差，为1时，使用当前best iou计算，为0时，使用1计算 object_scale=5 noobject_scale=1 class_scale=1 coord_scale=1 loss的系数 absolute=1 thresh = .6 决定是否需要计算IOU误差的参数，大于thresh，IOU误差不会夹在cost function中 random=1 如果为1每次迭代图片大小随机从320到608，步长为32，如果为0，每次训练大小与输入大小一致

yoloV3参数理解及注释

参考文章：yolo参数

猜你喜欢