基于九宫格与bbox逻辑回归的细粒度高密度目标检测

# -*- coding: utf-8 -*-
"""
Created on Fri Oct 19 21:13:41 2018

@author: Lenovo
"""

from keras.applications.vgg16 import VGG16
from keras.layers import Dense,Conv2D,MaxPooling2D,Average,Concatenate,Reshape,Flatten,Input,Add,Multiply,Lambda,Dropout,TimeDistributed
from keras.models import Model
from keras import backend as K
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras.utils import plot_model
from PIL import Image as Im
import numpy as np
import os
import random
import tensorflow as tf
'''
1 2 3
4 0 6
7 8 9
'''

def sum_lambda(a):
    print(a.shape)
    return K.sum(a, axis=-1)

def bbox_loss(yp,yt):
#    print(a.shape)
    return tf.reduce_mean(tf.square(yp-yt))

def get_model():
    vgg16_dense_list = []
    
    input_list = [Input(shape=(50,50,3)) for i in range(9)]
    input_bbox = Input(shape=(4,))
    vgg16 = VGG16(include_top=False, weights='imagenet')
    
    for inp in input_list:
        vgg16_dense = Dense(200,activation='relu')(Flatten()(vgg16(inp)))
        vgg16_dense_list.append(vgg16_dense)
#        print(vgg16_dense.shape)
    sudoku_concat = Concatenate()(vgg16_dense_list)
#    print(vgg16_dense_list[1].shape)
    sudoku_reshape = Reshape((200,9))(sudoku_concat)
    
    sudoku_weights = Dense(9,activation='softmax')(sudoku_concat)
#    sudoku_weights_T = Reshape((1,9))(sudoku_weights)
#    
#    print(sudoku_weights[0][0].shape)
    sudoku_multi = Multiply()([sudoku_reshape,sudoku_weights])
    sum_lambda_out = Lambda(sum_lambda)(sudoku_multi)
#    print(sum_lambda_out.shape)
#    sudoku_multi  = Dense(100,activation='relu')(sum_lambda_out )
    drop = Dropout(0.5)(sum_lambda_out)
    out = Dense(1,activation='sigmoid',name='fenlei')(drop)
    
    #bbox 4 parameters
#    drop = Dropout(0.5)(sum_lambda_out)
    bbox100dense = Dense(100,activation='relu')(sum_lambda_out)
    bbox4w = Dense(4,activation='relu')(bbox100dense)
    bboxrs_out = Multiply()([input_bbox,bbox4w])
    bboxout = Dense(4,activation='sigmoid',name='huigui')(bboxrs_out)
    
    
    model = Model(inputs=input_list+[input_bbox],outputs=[out,bboxout])
    
    model.compile(loss={'fenlei':'binary_crossentropy',
                        'huigui':'mse'},
                    loss_weights={
                            'fenlei':1.,
                            'huigui':1.
                                  },
                  optimizer=SGD(lr=0.001, momentum=0.99, decay=1e-5),metrics=['accuracy'])
    print(model.summary())
    plot_model(model,to_file='model.png')
    return model

def sudoku_data_generator():
    imglist = get_all_img_path()
    i=0
    while i<len(imglist):
#        x_train = list()
#        y_train = list()
        imgdata = imglist[i].split(' ')
        img = Im.open('data/'+imgdata[0])
        img = np.pad(np.array(img), ((100, 100), (100, 100), (0, 0)), 'constant', constant_values=255)
        img = Im.fromarray(img)
        yuanimg = img
        img = np.array(img)
        x_train0 = []
        x_train1 = []
        x_train2 = []
        x_train3 = []
        x_train4 = []
        x_train6 = []
        x_train7 = []
        x_train8 = []
        x_train9 = []
        x_bbox_train = []
        y_train1 = []
        y_train2 = []
        #append所有人头bbox
        for k in range(2,len(imgdata),5):

            x = int(imgdata[k+1])+100
            y = int(imgdata[k+2])+100
            w = int(imgdata[k+3])
            h = int(imgdata[k+4])
            x_train0.append(np.array(yuanimg.crop((x,y,x+w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))

            x_train1.append(np.array(yuanimg.crop((x-w,y-h,x,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train2.append(np.array(yuanimg.crop((x,y-h,x+w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train3.append(np.array(yuanimg.crop((x+w,y-h,x+2*w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train4.append(np.array(yuanimg.crop((x-w,y,x,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train6.append(np.array(yuanimg.crop((x+w,y,x+2*w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train7.append(np.array(yuanimg.crop((x-w,y+h,x,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train8.append(np.array(yuanimg.crop((x,y+h,x+w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train9.append(np.array(yuanimg.crop((x+w,y+h,x+2*w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_bbox_train.append(np.array([x,y,w,h]))
            
            y_train1.append(np.array([1]))
#            
            y_train2.append(np.array([x/yuanimg.size[0],y/yuanimg.size[1],w/yuanimg.size[0],h/yuanimg.size[1]]))
#            print(np.array([[1]]).shape)
            img[y:y+h,x:x+w]=255
#           _train = list()
#            y_c_train = list()
            
        #随机找五个非人头bbox
        img = Im.fromarray(img)
        for k in range(5):
            x = random.randint(100,img.size[0]-100)
            y = random.randint(100,img.size[1]-100)
            w = random.randint(15,35)
            h = random.randint(15,35)
            x_train0.append(np.array(img.crop((x,y,x+w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
           
#            12346789
            x_train1.append(np.array(img.crop((x-w,y-h,x,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train2.append(np.array(img.crop((x,y-h,x+w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train3.append(np.array(img.crop((x+w,y-h,x+2*w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train4.append(np.array(img.crop((x-w,y,x,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train6.append(np.array(img.crop((x+w,y,x+2*w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train7.append(np.array(img.crop((x-w,y+h,x,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train8.append(np.array(img.crop((x,y+h,x+w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train9.append(np.array(img.crop((x+w,y+h,x+2*w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_bbox_train.append(np.array([x,y,w,h]))
            y_train1.append(np.array([0]))
            y_train2.append(np.array([x/yuanimg.size[0],y/yuanimg.size[1],w/yuanimg.size[0],h/yuanimg.size[1]]))
            
#            x_train.extend(x_c_train)
#            y_train.extend(y_c_train)
            yield [np.array(x_train0),np.array(x_train1),np.array(x_train2),np.array(x_train3),np.array(x_train4),np.array(x_train6),np.array(x_train7),np.array(x_train8),np.array(x_train9),np.array(x_bbox_train)],[np.array(y_train1),np.array(y_train2)]
        i+=1    
        
        
def sudoku_val_data_generator():
    imglist = get_all_img_path()
    i=len(imglist)-300
    while i<len(imglist):
#        x_train = list()
#        y_train = list()
        imgdata = imglist[i].split(' ')
        img = Im.open('data/'+imgdata[0])
        img = np.pad(np.array(img), ((100, 100), (100, 100), (0, 0)), 'constant', constant_values=255)
        img = Im.fromarray(img)
        yuanimg = img
        img = np.array(img)
        x_train0 = []
        x_train1 = []
        x_train2 = []
        x_train3 = []
        x_train4 = []
        x_train6 = []
        x_train7 = []
        x_train8 = []
        x_train9 = []
        x_bbox_train = []
        y_train1 = []
        y_train2 = []
        #append所有人头bbox
        for k in range(2,len(imgdata),5):

            x = int(imgdata[k+1])+100
            y = int(imgdata[k+2])+100
            w = int(imgdata[k+3])
            h = int(imgdata[k+4])
            x_train0.append(np.array(yuanimg.crop((x,y,x+w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))

            x_train1.append(np.array(yuanimg.crop((x-w,y-h,x,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train2.append(np.array(yuanimg.crop((x,y-h,x+w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train3.append(np.array(yuanimg.crop((x+w,y-h,x+2*w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train4.append(np.array(yuanimg.crop((x-w,y,x,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train6.append(np.array(yuanimg.crop((x+w,y,x+2*w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train7.append(np.array(yuanimg.crop((x-w,y+h,x,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train8.append(np.array(yuanimg.crop((x,y+h,x+w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train9.append(np.array(yuanimg.crop((x+w,y+h,x+2*w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_bbox_train.append(np.array([x,y,w,h]))
            
            y_train1.append(np.array([1]))
#            
            y_train2.append(np.array([x/yuanimg.liangd[0],y/yuanimg.size[1],w/yuanimg.size[0],h/yuanimg.size[1]]))
#            print(np.array([[1]]).shape)
            img[y:y+h,x:x+w]=255
#           _train = list()
#            y_c_train = list()
            
        #随机找五个非人头bbox
        img = Im.fromarray(img)
        for k in range(5):
            x = random.randint(100,img.size[0]-100)
            y = random.randint(100,img.size[1]-100)
            w = random.randint(15,35)
            h = random.randint(15,35)
            x_train0.append(np.array(img.crop((x,y,x+w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
           
#            12346789
            x_train1.append(np.array(img.crop((x-w,y-h,x,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train2.append(np.array(img.crop((x,y-h,x+w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train3.append(np.array(img.crop((x+w,y-h,x+2*w,y)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train4.append(np.array(img.crop((x-w,y,x,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train6.append(np.array(img.crop((x+w,y,x+2*w,y+h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train7.append(np.array(img.crop((x-w,y+h,x,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train8.append(np.array(img.crop((x,y+h,x+w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_train9.append(np.array(img.crop((x+w,y+h,x+2*w,y+2*h)).resize((50,50),Im.LANCZOS)).reshape((50,50,3)))
            x_bbox_train.append(np.array([x,y,w,h]))
            y_train1.append(np.array([0]))
            y_train2.append(np.array([x/yuanimg.size[0],y/yuanimg.size[1],w/yuanimg.size[0],h/yuanimg.size[1]]))
            
#            x_train.extend(x_c_train)
#            y_train.extend(y_c_train)
            yield [np.array(x_train0),np.array(x_train1),np.array(x_train2),np.array(x_train3),np.array(x_train4),np.array(x_train6),np.array(x_train7),np.array(x_train8),np.array(x_train9),np.array(x_bbox_train)],[np.array(y_train1),np.array(y_train2)]
        i+=1    
        
   
    
def get_all_img_path():   
    path = os.listdir(r'data/')
    print(path)
    imgnamelist=list()
    i = 0     
    for i in path:
        if 'txt' in i:
            with open('data/'+i) as f:
                imgnamelist.extend(f.readlines())
    return imgnamelist

print(1)

mp = ModelCheckpoint(filepath='rentou_best_predict_score_sudoki.h5',
                     save_best_only=True, monitor='val_acc', verbose=1)
cll = [mp]
model = get_model()
model.fit_generator(sudoku_data_generator(),
                  steps_per_epoch=9500,
                  epochs=2,
                  verbose=1,
                  callbacks=cll,
                  validation_data=sudoku_val_data_generator(),
                  validation_steps=100,
                  class_weight=None,
                  shuffle=True)
基于九宫格与bbox逻辑回归的细粒度 高密度目标检测

猜你喜欢

基于九宫格与bbox逻辑回归的细粒度高密度目标检测