tensorflow ccn picture learning

 Following the reading of the previous article, this is the identification code of cnn. Many online codes can actually be run directly, but I just coded it and understood it again.

 

# -*- coding: utf-8 -*-
import numpy as np

w_alpha=0.01
b_alpha=0.1

IMAGE_HEIGHT = 240
IMAGE_WIDTH = 320
MAX_CAPTCHA = 1
# Image type 37
CHAR_SET_LEN = 37
dropout = 0.7


conv_dict = {
    # The first layer convolution parameter 3*3, because it is a color image, the first layer input channel is 3, and the output is 32
    "w_1": tf.Variable(w_alpha * tf.random_normal([3, 3, 3, 32]), name='w_1'),
    "b_1": tf.Variable(b_alpha * tf.random_normal([32]), name='b_1'),
    # Second layer convolution parameters
    "w_2": tf.Variable(w_alpha * tf.random_normal([3, 3, 32, 64]), name='w_2'),
    "b_2": tf.Variable(b_alpha * tf.random_normal([64]), name='b_2'),
    # The third layer convolution parameters
    "w_3": tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 128]), name='w_3'),
    "b_3": tf.Variable(b_alpha * tf.random_normal([128]), name='b_3'),
    # Fourth layer convolution parameters
    "w_4": tf.Variable(w_alpha * tf.random_normal([3, 3, 128, 128]), name='w_4'),
    "b_4": tf.Variable(b_alpha * tf.random_normal([128]), name='b_4'),

    'out': tf.Variable(tf.random_normal([1024, CHAR_SET_LEN])),
    'out_add': tf.Variable(tf.random_normal([CHAR_SET_LEN]))
}

# batch normalization - prevent gradient dispersion
# wx_plus_b tensor
# out_size number of channels
def batch_normal(wx_plus_b, out_size):
    fc_mean, fc_var = tf.nn.moments(
        wx_plus_b,
        axes=[0, 1, 2], # The dimension you want to normalize, [0] represents the batch dimension
        # If it is image data, you can pass in [0, 1, 2], which is equivalent to finding the mean/variance of [batch, height, width], be careful not to add the channel dimension
    )
    # out_size is the same as the number of output channels of wx_plus_b
    scale = tf.Variable(tf.ones([out_size]))
    shift = tf.Variable(tf.zeros([out_size]))
    epsilon = 0.001
    wx_plus_b = tf.nn.batch_normalization(wx_plus_b, fc_mean, fc_var, shift, scale, epsilon)
    return wx_plus_b



X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT , IMAGE_WIDTH,3])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA * CHAR_SET_LEN])
NOR = tf.placeholder(tf.float32)
keep_prob = tf.placeholder(tf.float32)  # dropout

# Convert a single number to an array
def one_hot_n(x, n):
    x = np.array(x)
    return np.eye(n)[x]


def conv2d(conv, cd1, cd2, out_size, nor):
    conv = tf.nn.bias_add(tf.nn.conv2d(conv, cd1, strides=[1, 1, 1, 1], padding='SAME'), cd2)
    # do batch_normal
#    if nor > 1:
#        conv = batch_normal(conv, out_size)
    conv = tf.nn.relu(conv)
    conv = tf.nn.max_pool(conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    # dropout prevents overfitting
    conv = tf.nn.dropout(conv, keep_prob)
    return conv


# define CNN
def crack_captcha_cnn():

    # Four layers of convolution pooling
    conv1 = conv2d(X, conv_dict['w_1'], conv_dict['b_1'], 32, NOR)
    conv2 = conv2d(conv1, conv_dict['w_2'], conv_dict['b_2'], 64, NOR)
    conv3 = conv2d(conv2, conv_dict['w_3'], conv_dict['b_3'], 128, NOR)
    conv4 = conv2d(conv3, conv_dict['w_4'], conv_dict['b_4'], 128, NOR)

    # Fully connected layer
    # 240/16=15  320/16=20
    w_d = tf.Variable(w_alpha * tf.random_normal([15 * 20 * 128, 1024]))
    b_d = tf.Variable(b_alpha * tf.random_normal([1024]))

    dense = tf.reshape(conv4, [-1, w_d.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))

    out = tf.add(tf.matmul(dense, conv_dict['out']), conv_dict['out_add'])

    return out

# read tfrecrods data
def read_and_decode(filename):
    filename_queue = tf.train.string_input_producer([filename])
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
       features={
           'label': tf.FixedLenFeature([], tf.int64),
           'img_raw' : tf.FixedLenFeature([], tf.string),
       })
    img = tf.decode_raw(features['img_raw'], tf.uint8)
    img = tf.reshape(img, [IMAGE_HEIGHT, IMAGE_WIDTH, 3])
    # normalize
    img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
    label = tf.cast(features['label'], tf.int32)
    return img, label

# Training
def train_crack_captcha_cnn():

    output = crack_captcha_cnn()
    # softmax , sigmoid The first one is for single result, the second one is for multiple results
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=Y))
    #loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(output, Y))
    # optimizer In order to speed up the training learning_rate should start large, and then slowly decay
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    # Evaluate model
    # gives the index of the maximum value of pred in the horizontal dimension. prd tensor, 1 horizontal dimension, returns a boolen
    correct_pred = tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1))
    # Convert boolean to floating point data, find the average
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    img, label = read_and_decode("anm_pic_train.tfrecords")
    img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=30, capacity=7000,min_after_dequeue=1000)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, tf.train.latest_checkpoint('/home/root/wtf/yzm/code/'))
        step = 0
#        img, label = read_and_decode("anm_pic_train.tfrecords")
#       img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=64, capacity=70000,min_after_dequeue=1000)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)
        while True:
#       for i in range(3000):
            imgs, labs = sess.run([img_batch, label_batch])
#           print (labs)
            one_hot_labs = sess.run(tf.cast(one_hot_n(labs, CHAR_SET_LEN), tf.float32))
            sess.run(optimizer, feed_dict={X: imgs, Y: one_hot_labs, keep_prob: dropout, NOR: 1.})
            if step % 50 == 0:
                acc = sess.run( accuracy, feed_dict={X: imgs, Y: one_hot_labs, keep_prob: 1., NOR: 1.})
                print(step, acc)
                if acc > 0.5:
                    saver.save(sess, "crack_capcha.model", global_step=step)
                    print("Complete!!")
                    coord.request_stop()
                    coord.join(threads)
                    sess.close()
                    break
            step += 1
#       print("Complete!!")
#       coord.request_stop()
#        coord.join(threads)
#        sess.close()

train_crack_captcha_cnn()

 

 1 .batch_noraml is to prevent gradient dispersion, but it is not clear whether it is placed before activation, and how to do the if statement wink....   

 2. This code is run with cpu, don't ask me why I use cpu, poor. It's best to run with gpu if you have the conditions, don't worry

 

The result of running for half a day:



 

 

Recommended blog address:

http://blog.topspeedsnail.com

https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-13-BN/

 

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=326609815&siteId=291194637