tflearn 中文汉字识别模型试验汇总

def get_model(width, height, classes=40):
    # TODO, modify model
    # Building 'VGG Network'
    network = input_data(shape=[None, width, height, 1])  # if RGB, 224,224,3
    network = conv_2d(network, 64, 3, activation='relu')
    #network = conv_2d(network, 64, 3, activation='relu')
    network = max_pool_2d(network, 2, strides=2)
    network = conv_2d(network, 128, 3, activation='relu')
    #network = conv_2d(network, 128, 3, activation='relu')
    network = max_pool_2d(network, 2, strides=2)
    network = conv_2d(network, 256, 3, activation='relu')
    #network = conv_2d(network, 256, 3, activation='relu')
    #network = conv_2d(network, 256, 3, activation='relu')
    network = max_pool_2d(network, 2, strides=2)
    network = conv_2d(network, 512, 3, 2, activation='relu')
    # network = conv_2d(network, 512, 3, activation='relu')
    # network = conv_2d(network, 512, 3, activation='relu')
    # network = max_pool_2d(network, 2, strides=2)
    # network = conv_2d(network, 512, 3, activation='relu')
    # network = conv_2d(network, 512, 3, activation='relu')
    # network = conv_2d(network, 512, 3, activation='relu')
    network = max_pool_2d(network, 2, strides=2)
    # network = fully_connected(network, 4096, activation='relu')
    # network = dropout(network, 0.5)
    #network = fully_connected(network, 1024, activation='relu')
    network = fully_connected(network, 2048, activation='relu')
    network = dropout(network, 0.8)
    network = fully_connected(network, classes, activation='softmax')
    network = regression(network, optimizer='rmsprop',
                         loss='categorical_crossentropy',
                         learning_rate=0.0001)
    model = tflearn.DNN(network, checkpoint_path='checkpoint',
                        max_checkpoints=1, tensorboard_verbose=1)
    return model

if __name__ == "__main__":
    width, height = 32, 32
    X, Y, org_labels = load_data(dirname="data", resize_pics=(width, height))
    trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2, random_state=666)
    print("sample data:")
    print(trainX[0])
    print(trainY[0])
    print(testX[-1])
    print(testY[-1])

    model = get_model(width, height, classes=100)

    filename = 'cnn_handwrite-acc0.8.tflearn'
    # try to load model and resume training
    #try:
    #    model.load(filename)
    #    print("Model loaded OK. Resume training!")
    #except:
    #    pass

    # Initialize our callback with desired accuracy threshold.
    early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.9)
    try:
        model.fit(trainX, trainY, validation_set=(testX, testY), n_epoch=500, shuffle=True,
                  snapshot_epoch=True, # Snapshot (save & evaluate) model every epoch.
                  show_metric=True, batch_size=32, callbacks=early_stopping_cb, run_id='cnn_handwrite')
    except StopIteration as e:
        print("OK, stop iterate!Good!")

    model.save(filename)

    # predict all data and calculate confusion_matrix
    model.load(filename)

    pro_arr =model.predict(X)
    predict_labels = np.argmax(pro_arr, axis=1)
    print(classification_report(org_labels, predict_labels))
    print(confusion_matrix(org_labels, predict_labels))

上述模型效果：

。。。

def get_model(width, height, classes=40):
    # TODO, modify model
    # Building 'VGG Network'
    network = input_data(shape=[None, width, height, 1])  # if RGB, 224,224,3
    network = conv_2d(network, 64, 3, activation='relu')
    #network = conv_2d(network, 64, 3, activation='relu')
    network = max_pool_2d(network, 2, strides=2)
    network = conv_2d(network, 128, 3, activation='relu')
    #network = conv_2d(network, 128, 3, activation='relu')
    network = max_pool_2d(network, 2, strides=2)
    netword = tflearn.batch_normalization(network)
    network = fully_connected(network, 1024, activation='relu')
    network = dropout(network, 0.8)
    network = fully_connected(network, classes, activation='softmax')
    network = regression(network, optimizer='rmsprop',
                         loss='categorical_crossentropy',
                         learning_rate=0.0001)
    model = tflearn.DNN(network, checkpoint_path='checkpoint',
                        max_checkpoints=1, tensorboard_verbose=1)
    return model

上述模型效果：

Inception模型：

def get_model(width, height, classes=40):
    # TODO, modify model
    # Building 'VGG Network'
    network = input_data(shape=[None, width, height, 1])  # if RGB, 224,224,3
    network = conv_2d(network, 64, 3, activation='relu')
    inception_3b_1_1 = conv_2d(network, 64, filter_size=1, activation='relu', name='inception_3b_1_1')
    inception_3b_3_3 = conv_2d(network, 64, filter_size=3, activation='relu', name='inception_3b_3_3')
    inception_3b_5_5 = conv_2d(network, 64, filter_size=5, activation='relu', name='inception_3b_5_5')
    inception_3b_output = merge([inception_3b_1_1, inception_3b_3_3, inception_3b_5_5], mode='concat', axis=3, name='inception_3b_output')
    network = max_pool_2d(inception_3b_output, kernel_size=3, strides=2, name='pool3_3_3')
    network = dropout(network, 0.4)
    network = fully_connected(network, classes, activation='softmax')
    network = regression(network, optimizer='momentum',
                     loss='categorical_crossentropy',
                     learning_rate=0.001)
    #network = regression(network, optimizer='rmsprop',
    #                     loss='categorical_crossentropy',
    #                     learning_rate=0.0001)
    model = tflearn.DNN(network, checkpoint_path='checkpoint',
                        max_checkpoints=1, tensorboard_verbose=1)
    return model

上述模型效果：

--
Training Step: 597  | total loss: 4.36442 | time: 342.271ss
| Momentum | epoch: 001 | loss: 4.36442 - acc: 0.0578 | val_loss: 4.30726 - val_acc: 0.1274 -- iter: 19094/19094
--
Terminating training at the end of epoch 1
Training Step: 1193  | total loss: 3.02893 | time: 322.366ss
Training Step: 1194  | total loss: 3.00916 | time: 339.206ser: 19072/19094
| Momentum | epoch: 002 | loss: 3.00916 - acc: 0.2988 | val_loss: 2.71907 - val_acc: 0.4845 -- iter: 19094/19094
--
Terminating training at the end of epoch 2
Training Step: 1791  | total loss: 2.23406 | time: 347.633ss
| Momentum | epoch: 003 | loss: 2.23406 - acc: 0.4559 | val_loss: 1.84004 - val_acc: 0.5888 -- iter: 19094/19094

换成avg pool跑起来很慢：

    #network = max_pool_2d(inception_3b_output, kernel_size=3, strides=2, name='pool3_3_3')
    network = avg_pool_2d(inception_3b_output, kernel_size=7, strides=1) # acc: 0.0217 | val_loss: 4.50712 - val_acc: 0.0630 -- iter: 19094/19094

花费时间长，而且看不到什么效果：

resnet结构：

def get_model(width, height, classes=40):
    # TODO, modify model
    # Building 'VGG Network'
    network = input_data(shape=[None, width, height, 1])  # if RGB, 224,224,3
    # Residual blocks  
    # 32 layers: n=5, 56 layers: n=9, 110 layers: n=18  
    n = 2
    net = tflearn.conv_2d(network, 16, 3, regularizer='L2', weight_decay=0.0001)
    net = tflearn.residual_block(net, n, 16)
    net = tflearn.residual_block(net, 1, 32, downsample=True)
    net = tflearn.residual_block(net, n-1, 32)
    net = tflearn.residual_block(net, 1, 64, downsample=True)
    net = tflearn.residual_block(net, n-1, 64)
    net = tflearn.batch_normalization(net)
    net = tflearn.activation(net, 'relu')
    net = tflearn.global_avg_pool(net)
    # Regression  
    net = tflearn.fully_connected(net, classes, activation='softmax')
    mom = tflearn.Momentum(0.1, lr_decay=0.1, decay_step=32000, staircase=True)
    net = tflearn.regression(net, optimizer=mom,
                             loss='categorical_crossentropy')
    # Training  
    model = tflearn.DNN(net, checkpoint_path='model_resnet_cifar10',
                        max_checkpoints=10, tensorboard_verbose=0,
                        clip_gradients=0.)
    return model

--
Terminating training at the end of epoch 7
Training Step: 4776 | total loss: 0.13311 | time: 132.182ss
| Momentum | epoch: 008 | loss: 0.13311 - acc: 0.9561 | val_loss: 0.22734 - val_acc: 0.9370 -- iter: 19094/19094
--
Terminating training at the end of epoch 8
Successfully left training! Final model accuracy: 0.95614439249
OK, stop iterate!Good!
avg / total 0.97 0.96 0.96 23868

resnet加深结构：

def get_model(width, height, classes=40):
    # TODO, modify model
    # Building 'VGG Network'
    network = input_data(shape=[None, width, height, 1])  # if RGB, 224,224,3
    # Building Residual Network
    net = tflearn.conv_2d(network, 64, 3, activation='relu', bias=False)
    # Residual blocks
    net = tflearn.residual_bottleneck(net, 3, 16, 64)
    net = tflearn.residual_bottleneck(net, 1, 32, 128, downsample=True)
    net = tflearn.residual_bottleneck(net, 2, 32, 128)
    net = tflearn.residual_bottleneck(net, 1, 64, 256, downsample=True)
    net = tflearn.residual_bottleneck(net, 2, 64, 256)
    net = tflearn.batch_normalization(net)
    net = tflearn.activation(net, 'relu')
    net = tflearn.global_avg_pool(net)
    # Regression
    net = tflearn.fully_connected(net, classes, activation='softmax')
    net = tflearn.regression(net, optimizer='momentum',
                             loss='categorical_crossentropy',
                             learning_rate=0.1)
    # Training
    model = tflearn.DNN(net, checkpoint_path='model_resnet_mnist',
                        max_checkpoints=10, tensorboard_verbose=0)
    return model

结果是训练的时间更久了。

--
Terminating training at the end of epoch 5
Training Step: 3582 | total loss: 0.14701 | time: 313.084s
| Momentum | epoch: 006 | loss: 0.14701 - acc: 0.9516 | val_loss: 0.30464 - val_acc: 0.9103 -- iter: 19094/19094
--
Terminating training at the end of epoch 6
Successfully left training! Final model accuracy: 0.951571881771
OK, stop iterate!Good!

avg / total 0.94 0.93 0.93 23868

resnet加入预处理：

def get_model(width, height, classes=40):
    # TODO, modify model
    # Real-time data preprocessing
    img_prep = tflearn.ImagePreprocessing()
    img_prep.add_featurewise_zero_center(per_channel=True)
    network = input_data(shape=[None, width, height, 1], data_preprocessing=img_prep)  # if RGB, 224,224,3

    ...

效果：

highway模型：又快又好！

def get_model(width, height, classes=40):
    # TODO, modify model
    network = input_data(shape=[None, width, height, 1])  # if RGB, 224,224,3

    # Building convolutional network
    #highway convolutions with pooling and dropout
    for i in range(3):
        for j in [3, 2, 1]:
            network = highway_conv_2d(network, 16, j, activation='elu')
        network = max_pool_2d(network, 2)
        network = batch_normalization(network)
    network = fully_connected(network, 128, activation='elu')
    network = fully_connected(network, 256, activation='elu')
    network = fully_connected(network, classes, activation='softmax')
    network = regression(network, optimizer='adam', learning_rate=0.01,
                         loss='categorical_crossentropy', name='target')

    model = tflearn.DNN(network, tensorboard_verbose=0)
    return model

tflearn 中文汉字识别模型试验汇总

猜你喜欢