通常の状況では、深層学習モデルは、複数のシーンや広範囲のターゲットタイプに対して良好な計算精度を得ることができません。たとえば、検出モデルが十分にトレーニングされていても、大きな変化がある特定のものについては合理的に検出できません。現時点での良い解決策は、二次分類によって修正することです
-------以下は、深層学習モデルの出力をさらに決定するためにプロジェクトに取り組んでいるときに自分で書いたコードの一部のスクリーンショットです。
def makejson(img_name,bboxes,signbit,filename):
offer={
}
offer['img_name']=img_name
offer['objects']=[]
for i in range(len(bboxes)):
temp = {
"x1": int(bboxes[i][0]),
"x2": int(bboxes[i][1]),
"y1": int(bboxes[i][2]),
"y2": int(bboxes[i][3]),
"confidence": float('%.2f' % bboxes[i][4]),
"category": 0
}
offer['objects'].append(temp)
objects = offer['objects']
num_lenght = len(objects)
i = 0
img = cv2.imread(img_name, 0)
if signbit == 0:
if os.path.exists(filename):
img_list = os.listdir(filename)
p = len(img_list)
while i < num_lenght:
obj = objects[i]
x1 = obj['x1']
x2 = obj['x2']
y1 = obj['y1']
y2 = obj['y2']
img1 = img[x2:y2, x1:y1]
img2 = cv2.Canny(img1, 150, 300)
save_img = filename + '/' + str(i + p) + '.bmp'
cv2.imwrite(save_img, img2)
i += 1
else:
os.mkdir(filename)
while i < num_lenght:
obj = objects[i]
x1 = obj['x1']
x2 = obj['x2']
y1 = obj['y1']
y2 = obj['y2']
img1 = img[x2:y2, x1:y1]
img2 = cv2.Canny(img1, 150, 300)
save_img = filename + '/' + str(i) + '.bmp'
cv2.imwrite(save_img, img2)
i += 1
else:
img_list = os.listdir(filename)
img_num = len(img_list)
while i < num_lenght:
obj = objects[i]
x1 = obj['x1']
x2 = obj['x2']
y1 = obj['y1']
y2 = obj['y2']
img1 = img[x2:y2, x1:y1]
img2 = cv2.Canny(img1, 150, 300)
for j in range(img_num):
temp_path = filename + '/' + img_list[j]
template = cv2.imread(temp_path, 0)
height1, width1 = img2.shape
height2, width2 = template.shape
if (height1 >= height2 and width1 >= width2) or (
height1 < height2 and width1 < width2):
res = cv2.matchTemplate(img2, template, cv2.TM_CCOEFF_NORMED)
min_val, a, _, _ = cv2.minMaxLoc(res)
if min_val > 0.05:
del objects[i]
i = i - 1
num_lenght -= 1
break
else:
img3 = cv2.resize(img2, (width2, height2))
res = cv2.matchTemplate(img3, template, cv2.TM_CCOEFF_NORMED)
min_val, a, _, _ = cv2.minMaxLoc(res)
print(min_val)
if min_val > 0.05:
del objects[i]
i = i - 1
num_lenght -= 1
break
i += 1
final = {
}
final['img_name'] = img_name
final['objects'] = objects
return json.dumps(final,cls=NpEncoder)
def makejson(img_name,bboxes):
offer={
}
offer['img_name']=img_name
offer['objects']=[]
for i in range(len(bboxes)):
temp = {
"x1": int(bboxes[i][0]),
"x2": int(bboxes[i][1]),
"y1": int(bboxes[i][2]),
"y2": int(bboxes[i][3]),
"confidence": float('%.2f' % bboxes[i][4]),
"category": 0
}
offer['objects'].append(temp)
objects = offer['objects']
num_lenght = len(objects)
i = 0
while i <num_lenght:
obj = objects[i]
x1 = obj['x1']
x2 = obj['x2']
y1 = obj['y1']
y2 = obj['y2']
img = cv2.imread(img_name, 0)
img1 = img[x2:y2, x1:y1]
height, width = img1.shape
thres, img2 = cv2.threshold(img1, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)
paint = np.zeros(img1.shape, dtype=np.uint8)
pointSum = np.zeros(width, dtype=np.uint8)
for x in range(width):
for y in range(height):
if img2[y][x]:
pointSum[x] = pointSum[x] + 1
subX = 0
for x in range(width):
if (pointSum[x] != 0):
subX = subX + 1
pro = round(subX / width, 3)
if 0.95 > pro > 0.35:
del objects[i]
i = i - 1
num_lenght -= 1
i += 1
final = {
}
final['img_name'] = img_name
final['objects'] = objects
return json.dumps(final,cls=NpEncoder)
def makejson(img_name,bboxes):
offer={
}
offer['img_name']=img_name
offer['objects']=[]
for i in range(len(bboxes)):
temp = {
"x1": int(bboxes[i][0]),
"x2": int(bboxes[i][1]),
"y1": int(bboxes[i][2]),
"y2": int(bboxes[i][3]),
"confidence": float('%.2f' % bboxes[i][4]),
"category": 0
}
offer['objects'].append(temp)
objects = offer['objects']
num_lenght = len(objects)
m = 0
while m < num_lenght:
obj = objects[m]
x1 = obj['x1']
x2 = obj['x2']
y1 = obj['y1']
y2 = obj['y2']
img = cv2.imread(img_name, 0)
img1 = img[x2:y2, x1:y1]
thres, img0 = cv2.threshold(img1, 80, 255, cv2.THRESH_BINARY)
x, y = img0.shape
bk0 = 0
bk1 = 0
for i in range(x):
for j in range(y):
if img0[i, j] == 0:
bk0 += 1
rate0 = bk0 / (x * y)
if rate0 > 0.25:
del objects[m]
m = m - 1
num_lenght -= 1
m += 1
continue
for i in range(int(x / 2)):
for j in range(int(y / 2)):
if img0[i + int(x / 4), j + int(y / 4)] == 0:
bk1 += 1
rate1 = bk1 / int((x * y) / 4)
if rate1 > 0.40:
del objects[m]
m = m - 1
num_lenght -= 1
m += 1
final = {
}
final['img_name'] = img_name
final['objects'] = objects
return json.dumps(final,cls=NpEncoder)
def makejson(img_name,bboxes):
offer={
}
offer['img_name']=img_name
offer['objects']=[]
for i in range(len(bboxes)):
temp = {
"x1": int(bboxes[i][0]),
"x2": int(bboxes[i][1]),
"y1": int(bboxes[i][2]),
"y2": int(bboxes[i][3]),
"confidence": float('%.2f' % bboxes[i][4]),
"category": 0
}
offer['objects'].append(temp)
objects = offer['objects']
num_lenght = len(objects)
i = 0
while i <num_lenght:
obj = objects[i]
x1 = obj['x1']
x2 = obj['x2']
y1 = obj['y1']
y2 = obj['y2']
img = cv2.imread(img_name, 0)
img1 = img[x2:y2, x1:y1]
_, img2 = cv2.threshold(img1, 65, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(img2, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
temp0 = 0
for c in range(len(contours)):
rect = cv2.minAreaRect(contours[c])
box = cv2.boxPoints(rect)
box = np.int64(box)
length = cv2.arcLength(box, True)
if len(contours) == 1:
temp0 = length
else:
if box[2, 0] == 0 and box[2, 1] == 0:
t = 1
else:
if length > temp0:
temp0 = length
if 75 < temp0 < 150:
del objects[i]
i = i - 1
num_lenght -= 1
i += 1
final = {
}
final['img_name'] = img_name
final['objects'] = objects
return json.dumps(final,cls=NpEncoder)