After saving the model, we can evaluate the model through the script provided by PaddleSeg
python val.py \
--config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \
--model_path output/iter_1000/model.pdparams
If you want to perform multi-scale flip evaluation, you can turn it on by passing in –aug_eval, and then passing in scale information via –scales, --flip_horizontal turns on horizontal flipping, and flip_vertical turns on vertical flipping. Examples of usage are as follows:
python val.py \
--config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \
--model_path output/iter_1000/model.pdparams \
--aug_eval \
--scales 0.75 1.0 1.25 \
--flip_horizontal
If you want to perform sliding window evaluation, you can open it by passing in –is_slide, pass in the window size by –crop_size, and pass in the step size by --stride. Examples of usage are as follows:
python val.py \
--config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \
--model_path output/iter_1000/model.pdparams \
--is_slide \
--crop_size 256 256 \
--stride 128 128
First of all, you can understand the workflow of the evaluation process through the following figure.
Let's interpret the code of val.py below.
if __name__ == '__main__':
#解析传入参数
args = parse_args()
#执行主体函数
main(args)
Let us understand which input parameters the val.py script supports by interpreting the parse_args function.
def parse_args():
parser = argparse.ArgumentParser(description='Model evaluation')
# params of evaluate
# 配置文件路径
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
# 训练好的模型权重路径
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for evaluation',
type=str,
default=None)
# 数据读取器的进程
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
#是否开启多尺度翻转评估
# augment for evaluation
parser.add_argument(
'--aug_eval',
dest='aug_eval',
help='Whether to use mulit-scales and flip augment for evaluation',
action='store_true')
# 指定缩放系数,1.0为保持尺寸不变,可以指定多个系数,用空格隔开。
parser.add_argument(
'--scales',
dest='scales',
nargs='+',
help='Scales for augment',
type=float,
default=1.0)
# 开启图片水平翻转
parser.add_argument(
'--flip_horizontal',
dest='flip_horizontal',
help='Whether to use flip horizontally augment',
action='store_true')
#开启图片垂直翻转
parser.add_argument(
'--flip_vertical',
dest='flip_vertical',
help='Whether to use flip vertically augment',
action='store_true')
#滑动窗口参数配置,是否开启滑动窗口
# sliding window evaluation
parser.add_argument(
'--is_slide',
dest='is_slide',
help='Whether to evaluate by sliding window',
action='store_true')
#滑动窗口尺寸
parser.add_argument(
'--crop_size',
dest='crop_size',
nargs=2,
help=
'The crop size of sliding window, the first is width and the second is height.',
type=int,
default=None)
# 滑动窗口移动的步长,需要指定水平方向和垂直方向两个参数。
parser.add_argument(
'--stride',
dest='stride',
nargs=2,
help=
'The stride of sliding window, the first is width and the second is height.',
type=int,
default=None)
return parser.parse_args()
The above is the analysis of the input parameters. In the main function, the evaluate function in the core/val.py module is mainly used to evaluate the model.
First look at the code summary of the evaluate function.
Then interpret the code of the evaluate function.
def evaluate(model,
eval_dataset,
aug_eval=False,
scales=1.0,
flip_horizontal=True,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None,
num_workers=0):
#设置模型为评估模式
model.eval()
#为了兼容多卡训练,这里需要获取显卡数量。
nranks = paddle.distributed.ParallelEnv().nranks
#在分布式训练中,每个显卡都会执行本程序,所以需要在程序里获取本显卡的序列号。
local_rank = paddle.distributed.ParallelEnv().local_rank
#如果是多卡训练,则需要初始化多卡训练环境。
if nranks > 1:
# Initialize parallel environment if not done.
if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
):
paddle.distributed.init_parallel_env()
#创建一个批量采样器,这里指定数据集,通过批量采样器组成一个batch。
#评估时指定batch size为1,不需要打乱数据,不能丢弃末尾的数据。
batch_sampler = paddle.io.DistributedBatchSampler(
eval_dataset, batch_size=1, shuffle=False, drop_last=False)
#通过数据集参数和批量采样器等参数构建一个数据读取器。可以通过num_works设置多进程,这里的多进程通过共享内存通信,
#如果共享内存过小可能会报错,如果报错可以尝将num_workers设置为0,则不开启多进程。
loader = paddle.io.DataLoader(
eval_dataset,
batch_sampler=batch_sampler,
num_workers=num_workers,
return_list=True,
)
#迭代次数,为评估数据的数量
total_iters = len(loader)
#初始化评估指标
intersect_area_all = 0
pred_area_all = 0
label_area_all = 0
logger.info("Start evaluating (total_samples={}, total_iters={})...".format(
len(eval_dataset), total_iters))
#定义一个进度条
progbar_val = progbar.Progbar(target=total_iters, verbose=1)
timer = Timer()
with paddle.no_grad():
#遍历数据集中的数据
for iter, (im, label) in enumerate(loader):
reader_cost = timer.elapsed_time()
label = label.astype('int64')
ori_shape = label.shape[-2:]
#是否开启多尺度翻转评估
if aug_eval:
#对图片进行多尺度翻转推理
pred = infer.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
#对图片进行常规的推理操作。
pred = infer.inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
#使用推理结果计算预测结果每个类别的区域面积、标签中每个类别的区域面积和预测结果和标签每个类别交集的面积。
intersect_area, pred_area, label_area = metrics.calculate_area(
pred,
label,
eval_dataset.num_classes,
ignore_index=eval_dataset.ignore_index)
#如果是多卡评估,则需要从其他显卡收集数据
# Gather from all ranks
if nranks > 1:
intersect_area_list = []
pred_area_list = []
label_area_list = []
paddle.distributed.all_gather(intersect_area_list, intersect_area)
paddle.distributed.all_gather(pred_area_list, pred_area)
paddle.distributed.all_gather(label_area_list, label_area)
# 多卡评估有可能会重复评估一部分样本,所以需要去除掉
# Some image has been evaluated and should be eliminated in last iter
if (iter + 1) * nranks > len(eval_dataset):
valid = len(eval_dataset) - iter * nranks
intersect_area_list = intersect_area_list[:valid]
pred_area_list = pred_area_list[:valid]
label_area_list = label_area_list[:valid]
#将之前计算的各个面积数值进行累加
for i in range(len(intersect_area_list)):
intersect_area_all = intersect_area_all + intersect_area_list[i]
pred_area_all = pred_area_all + pred_area_list[i]
label_area_all = label_area_all + label_area_list[i]
else:
#单卡评估直接对面积数值进行累加
intersect_area_all = intersect_area_all + intersect_area
pred_area_all = pred_area_all + pred_area
label_area_all = label_area_all + label_area
batch_cost = timer.elapsed_time()
timer.restart()
#更新进度条
if local_rank == 0:
progbar_val.update(iter + 1, [('batch_cost', batch_cost),
('reader cost', reader_cost)])
#计算mean_iou。
class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
label_area_all)
# 计算各个类别的精确率和平均精确率,这里函数名称是accuracy,但计算的是精确率。
class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
# 计算kappa系数,验证一致性。
kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
# 输出评估指标
logger.info("[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".format(
len(eval_dataset), miou, acc, kappa))
logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
return miou, acc
First, the evaluation program obtains three areas through the calculate_area function, which are:
- pred_area: contains the area of the prediction results of each category
- label_area: contains the area of the sample label for each category
- intersect_area: Contains the area of the intersection of pred_area and intersect_area of each category.
Using the above three types of data, three evaluation indicators can be calculated: IOU (IOU), Precision (Precision) and kappa coefficient. The calculation methods and meanings of these three indicators are introduced below.
- IOU: You can calculate the intersection ratio of each category, the formula is as follows:
I O U = i n t e r s e c t _ a r e a [ i ] p r e d _ a r e a [ i ] + l a b e l _ a r e a [ i ] − i n t e r s e c t _ a r e a [ i ] IOU= \frac{intersect\_area[i]}{pred\_area[i] + label\_area[i] - intersect\_area[i]} I O U=pred_area[i]+label_area[i]−intersect_area[i]intersect_area[i]
- MIOU: Average IOU, that is, the average value of IOU for each category, the formula is as follows:
MIOU = IOU [1] + IOU [2] +. . . + IOU [N] N MIOU = \ frac {IOU [1] + IOU [2] + ... + IOU [N]} {N}M I O U=NI O U [ 1 ] + I O U [ 2 ] + . . . + I O U [ N ]
It can be understood from the formula that the closer the values of IOU and MIOU are to 1, the better the effect. This is an important indicator to measure the performance of a model.
- Precision: precision rate, the following formula can be used in image segmentation to calculate the precision rate of each category:
P r e c i s i o n = i n t e r s e c t _ a r e a [ i ] p r e d _ a r e a [ i ] Precision= \frac{intersect\_area[i]}{pred\_area[i]} Precision=pred_area[i]intersect_area[i]
- Kappa coefficient: Kappa coefficient is used for consistency testing and can also be used to measure classification accuracy. Calculated as follows:
kappa = PO - PE 1 - PE kappa = \ frac {PO-PE} {1-PE} kappa=1 - P EP O - P E
PO: The sum of the number of samples correctly classified in each category divided by the total number of samples, which is the accuracy.
PE: Assuming that the number of real samples of each category are stored in the label_area list, and the predicted number of samples of each category are stored in the label_area list, the total number of samples is the sum of the median value of label_area, then:
P O = S U M ( i n t e r s e c t _ a r e a ) S U M ( l a b e l _ a r e a ) PO=\frac{SUM(intersect\_area)}{SUM(label\_area)} P O=SUM(label_area)SUM(intersect_area)
P E = S U M ( p r e d _ a r e a ∗ l a b e l _ a r e a ) S U M ( l a b e l _ a r e a ) ∗ S U M ( l a b e l _ a r e a ) PE=\frac{SUM(pred\_area * label\_area)}{SUM(label\_area)*SUM(label\_area)} P E=SUM(label_area)∗SUM(label_area)SUM(pred_area∗label_area)
In the above code, different inference functions are called according to different input parameters. The inference function is introduced below.
def inference(model,
im,
ori_shape=None,
transforms=None,
is_slide=False,
stride=None,
crop_size=None):
#如果没开启滑动窗口
if not is_slide:
#预测结果
logits = model(im)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0]
else:
#开启滑动窗口,预测结果
logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
if ori_shape is not None:
#通过argmax函数,获取每个像素点中最大的分类序号。
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
pred = reverse_transform(pred, ori_shape, transforms)
return pred
else:
return logit
def slide_inference(model, im, crop_size, stride):
#获取图像的宽度和高度
h_im, w_im = im.shape[-2:]
#获取窗口的宽度和高度
w_crop, h_crop = crop_size
#获取水平和垂直方向,窗口移动的步长
w_stride, h_stride = stride
# calculate the crop nums
#计算出水平和垂直需要移动多少步
rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
# TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
final_logit = None
#定义一个计数器,保存预测结果叠加的次数。
count = np.zeros([1, 1, h_im, w_im])
#循环开始,移动窗口
for r in range(rows):
for c in range(cols):
#计算窗口的位置和尺寸
h1 = r * h_stride
w1 = c * w_stride
h2 = min(h1 + h_crop, h_im)
w2 = min(w1 + w_crop, w_im)
h1 = max(h2 - h_crop, 0)
w1 = max(w2 - w_crop, 0)
#裁剪图像
im_crop = im[:, :, h1:h2, w1:w2]
#对图像进行预测
logits = model(im_crop)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0].numpy()
#创建一个输出的logit
if final_logit is None:
final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
#将输出结果与之前计算的结果相加,保存到final_logit中
final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
#计数
count[:, :, h1:h2, w1:w2] += 1
if np.sum(count == 0) != 0:
raise RuntimeError(
'There are pixel not predicted. It is possible that stride is greater than crop_size'
)
#由于滑动窗口,会多次叠加final_logit,计算平均值。
final_logit = final_logit / count
#转换ndarray为张量
final_logit = paddle.to_tensor(final_logit)
return final_logit
Let's take a look at the code summary of the aug_inference function,
Then take a look at the code interpretation of aug_inference.
def aug_inference(model,
im,
ori_shape,
transforms,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
if isinstance(scales, float):
scales = [scales]
elif not isinstance(scales, (tuple, list)):
raise TypeError(
'`scales` expects float/tuple/list type, but received {}'.format(
type(scales)))
final_logit = 0
h_input, w_input = im.shape[-2], im.shape[-1]
#通过水平和垂直翻转的参数,得到翻转列表
flip_comb = flip_combination(flip_horizontal, flip_vertical)
#遍历所有输入的缩放系数
for scale in scales:
#通过系数计算图像的高和宽
h = int(h_input * scale + 0.5)
w = int(w_input * scale + 0.5)
#对图像进行缩放
im = F.interpolate(im, (h, w), mode='bilinear')
#遍历翻转列表
for flip in flip_comb:
#对图像进行翻转
im_flip = tensor_flip(im, flip)
#运行常规预测,得到结果logit
logit = inference(
model,
im_flip,
is_slide=is_slide,
crop_size=crop_size,
stride=stride)
#因为图像经过翻转,所以将logit的结果恢复
logit = tensor_flip(logit, flip)
#将logit进行缩放,恢复到原有输入图像的尺寸
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
#将logit进行softmax运算
logit = F.softmax(logit, axis=1)
#将增强预测的结果进行叠加
final_logit = final_logit + logit
#通过argmax函数,获取每个像素点中最大的分类序号。
pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
#如果输入图像进行了transforms预处理操作,这里需要对输出结果进行还原,保持与输入图像一致。
pred = reverse_transform(pred, ori_shape, transforms)
return pred
PaddleSeg warehouse address: https://github.com/PaddlePaddle/PaddleSeg