従来の方法を使用して深層学習モデルの出力を処理し、二次補正を実現します

通常の状況では、深層学習モデルは、複数のシーンや広範囲のターゲットタイプに対して良好な計算精度を得ることができません。たとえば、検出モデルが十分にトレーニングされていても、大きな変化がある特定のものについては合理的に検出できません。現時点での良い解決策は、二次分類によって修正することです

-------以下は、深層学習モデルの出力をさらに決定するためにプロジェクトに取り組んでいるときに自分で書いたコードの一部のスクリーンショットです。

  • テンプレートマッチングアルゴリズム:

def makejson(img_name,bboxes,signbit,filename):
    offer={
    
    }
    offer['img_name']=img_name
    offer['objects']=[]
    for i in range(len(bboxes)):
        temp = {
    
    
            "x1": int(bboxes[i][0]),
            "x2": int(bboxes[i][1]),
            "y1": int(bboxes[i][2]),
            "y2": int(bboxes[i][3]),
            "confidence": float('%.2f' % bboxes[i][4]),
            "category": 0
            }
        offer['objects'].append(temp)
    objects = offer['objects']  # list类型
    num_lenght = len(objects)
    i = 0
    img = cv2.imread(img_name, 0)  # 导入图片转化为灰度图
    if signbit == 0:  # 模板采集状态
        if os.path.exists(filename):
            img_list = os.listdir(filename)
            p = len(img_list)
            while i < num_lenght:  # 遍历列表
                obj = objects[i]  # 字典类型
                x1 = obj['x1']
                x2 = obj['x2']
                y1 = obj['y1']
                y2 = obj['y2']
                img1 = img[x2:y2, x1:y1]  # 裁图
                # thres, img2 = cv2.threshold(img1, 0, 255, cv2.THRESH_OTSU)
                img2 = cv2.Canny(img1, 150, 300)
                save_img = filename + '/' + str(i + p) + '.bmp'
                cv2.imwrite(save_img, img2)
                i += 1
        else:
            os.mkdir(filename)
            while i < num_lenght:
                obj = objects[i]
                x1 = obj['x1']
                x2 = obj['x2']
                y1 = obj['y1']
                y2 = obj['y2']
                img1 = img[x2:y2, x1:y1]
                # thres, img2 = cv2.threshold(img1, 0, 255, cv2.THRESH_OTSU)
                img2 = cv2.Canny(img1, 150, 300)
                save_img = filename + '/' + str(i) + '.bmp'
                cv2.imwrite(save_img, img2)
                i += 1
    else:
        img_list = os.listdir(filename)
        img_num = len(img_list)
        while i < num_lenght:  # 遍历列表
            obj = objects[i]  # 字典类型
            x1 = obj['x1']
            x2 = obj['x2']
            y1 = obj['y1']
            y2 = obj['y2']
            img1 = img[x2:y2, x1:y1]  # 裁图
            img2 = cv2.Canny(img1, 150, 300)
            for j in range(img_num):
                temp_path = filename + '/' + img_list[j]
                template = cv2.imread(temp_path, 0)
                height1, width1 = img2.shape
                height2, width2 = template.shape
                if (height1 >= height2 and width1 >= width2) or (
                        height1 < height2 and width1 < width2):  # 解决模板和原图大小不相协调的报错
                    res = cv2.matchTemplate(img2, template, cv2.TM_CCOEFF_NORMED)
                    min_val, a, _, _ = cv2.minMaxLoc(res)
                    if min_val > 0.05:
                        del objects[i]
                        i = i - 1
                        num_lenght -= 1
                        break
                else:
                    img3 = cv2.resize(img2, (width2, height2))
                    res = cv2.matchTemplate(img3, template, cv2.TM_CCOEFF_NORMED)
                    min_val, a, _, _ = cv2.minMaxLoc(res)
                    print(min_val)
                    if min_val > 0.05:
                        del objects[i]
                        i = i - 1
                        num_lenght -= 1
                        break
            i += 1
    final = {
    
    }
    final['img_name'] = img_name
    final['objects'] = objects
    return json.dumps(final,cls=NpEncoder)
  • 投影アルゴリズム:

def makejson(img_name,bboxes):
    offer={
    
    }
    offer['img_name']=img_name
    offer['objects']=[]
    for i in range(len(bboxes)):
        temp = {
    
    
            "x1": int(bboxes[i][0]),
            "x2": int(bboxes[i][1]),
            "y1": int(bboxes[i][2]),
            "y2": int(bboxes[i][3]),
            "confidence": float('%.2f' % bboxes[i][4]),
            "category": 0
        }
        offer['objects'].append(temp)
    objects = offer['objects']  # list类型
    num_lenght = len(objects)
    i = 0
    while i <num_lenght:#遍历列表
        obj = objects[i]  # 字典类型
        x1 = obj['x1']
        x2 = obj['x2']
        y1 = obj['y1']
        y2 = obj['y2']
        img = cv2.imread(img_name, 0)  # 导入图片转化为灰度图
        img1 = img[x2:y2, x1:y1]  # 裁图
        height, width = img1.shape  # 获取宽高
        thres, img2 = cv2.threshold(img1, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)  # 自适应阈值分割
        ##########垂直投影
        paint = np.zeros(img1.shape, dtype=np.uint8)
        # 每一列黑色像素个数
        pointSum = np.zeros(width, dtype=np.uint8)
        for x in range(width):
            for y in range(height):
                if img2[y][x]:
                    pointSum[x] = pointSum[x] + 1
        # print(pointSum)
        # 输出垂直投影分布长度
        subX = 0
        for x in range(width):
            if (pointSum[x] != 0):
                subX = subX + 1
        pro = round(subX / width, 3)  # 计算投影值
        if 0.95 > pro > 0.35:
            del objects[i]
            i = i - 1
            num_lenght -= 1
        i += 1
    final = {
    
    }
    final['img_name'] = img_name
    final['objects'] = objects
    return json.dumps(final,cls=NpEncoder)
  • エリア方式:

def makejson(img_name,bboxes):
    offer={
    
    }
    offer['img_name']=img_name
    offer['objects']=[]
    for i in range(len(bboxes)):
        temp = {
    
    
            "x1": int(bboxes[i][0]),
            "x2": int(bboxes[i][1]),
            "y1": int(bboxes[i][2]),
            "y2": int(bboxes[i][3]),
            "confidence": float('%.2f' % bboxes[i][4]),
            "category": 0
            }
        offer['objects'].append(temp)
    objects = offer['objects']  # list类型
    num_lenght = len(objects)
    m = 0
    while m < num_lenght:  # 遍历列表
        obj = objects[m]  # 字典类型
        x1 = obj['x1']
        x2 = obj['x2']
        y1 = obj['y1']
        y2 = obj['y2']
        img = cv2.imread(img_name, 0)  # 导入图片转化为灰度图
        img1 = img[x2:y2, x1:y1]  # 裁图
        thres, img0 = cv2.threshold(img1, 80, 255, cv2.THRESH_BINARY)
        x, y = img0.shape
        bk0 = 0
        bk1 = 0
        for i in range(x):
            for j in range(y):
                if img0[i, j] == 0:
                    bk0 += 1
        rate0 = bk0 / (x * y)
        if rate0 > 0.25:
            del objects[m]
            m = m - 1
            num_lenght -= 1
            m += 1
            continue
        for i in range(int(x / 2)):
            for j in range(int(y / 2)):
                if img0[i + int(x / 4), j + int(y / 4)] == 0:
                    bk1 += 1
        rate1 = bk1 / int((x * y) / 4)
        if rate1 > 0.40:
            del objects[m]
            m = m - 1
            num_lenght -= 1
        m += 1
    final = {
    
    }
    final['img_name'] = img_name
    final['objects'] = objects
    return json.dumps(final,cls=NpEncoder)
  • 最小囲み長方形法

def makejson(img_name,bboxes):
    offer={
    
    }
    offer['img_name']=img_name
    offer['objects']=[]
    for i in range(len(bboxes)):
        temp = {
    
    
            "x1": int(bboxes[i][0]),
            "x2": int(bboxes[i][1]),
            "y1": int(bboxes[i][2]),
            "y2": int(bboxes[i][3]),
            "confidence": float('%.2f' % bboxes[i][4]),
            "category": 0
        }
        offer['objects'].append(temp)
    objects = offer['objects']  # list类型
    num_lenght = len(objects)
    i = 0
    while i <num_lenght:#遍历列表
        obj = objects[i]  # 字典类型
        x1 = obj['x1']
        x2 = obj['x2']
        y1 = obj['y1']
        y2 = obj['y2']
        img = cv2.imread(img_name, 0)  # 导入图片转化为灰度图
        img1 = img[x2:y2, x1:y1]  # 裁图
        _, img2 = cv2.threshold(img1, 65, 255, cv2.THRESH_BINARY)
        contours, hierarchy = cv2.findContours(img2, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
        temp0 = 0
        for c in range(len(contours)):
            rect = cv2.minAreaRect(contours[c])  # 得到最小外接矩形的(中心(x,y), (宽,高), 旋转角度)
            box = cv2.boxPoints(rect)  # 获取最小外接矩形的4个顶点坐标
            box = np.int64(box)  # 将其转换为整数,否则会报错
            length = cv2.arcLength(box, True)
            if len(contours) == 1:
                temp0 = length
            else:
                if box[2, 0] == 0 and box[2, 1] == 0:
                    t = 1
                else:
                    if length > temp0:
                       temp0 = length
        if 75 < temp0 < 150:
            del objects[i]
            i = i - 1
            num_lenght -= 1
        i += 1
    final = {
    
    }
    final['img_name'] = img_name
    final['objects'] = objects
    return json.dumps(final,cls=NpEncoder)

おすすめ

転載: blog.csdn.net/qq_42308217/article/details/113749456