[tensorflow] 入门day1-数据整理与展示

tensorflow真是一个我绕不开的坑（苍天饶过谁.jpg）

其实tensorflow1和2的差别挺大的，暂时从1入坑，2的话之后简单过一下。

tf2中更改的函数（供参考）：https://docs.google.com/spreadsheets/d/1FLFJLzg7WNP6JHODX5q8BDgptKafq_slHpnHVbJIteQ/edit#gid=0

本文仅记录我的踩坑历程。

参考文献：https://www.datacamp.com/community/tutorials/tensorflow-tutorial

数据来源：https://btsd.ethz.ch/shareddata/

基础知识部分另外编写，这里只记录操作和结果。

import skimage
import tensorflow as tf
from skimage import io # [MUST] for skimage.io.imread
import os
import matplotlib.pyplot as plt # draw distribution graph
from skimage import transform
from skimage.color import rgb2gray # convert img to grayscale
import numpy as np

def first_try():
    # initialize constant
    x1 = tf.constant([1,2,3,4])
    x2 = tf.constant([5,6,7,8])
    # multiply
    result = tf.multiply(x1, x2)
    # only return a tensor, not real-value
    # that means: tf does not calculate. only deprive a graph
    print(result) # Tensor("Mul:0", shape=(4,), dtype=int32)
    # run result and print. 'with' will close automatically
    #sess = tf.Session()
    #print(sess.run(result))
    #sess.close()
    with tf.Session() as sess:
        output = sess.run(result)
        print(output)

def load_data(data_dir):
    dirs = [d for d in os.listdir(data_dir)
            if os.path.isdir(os.path.join(data_dir, d))]
    labels = []
    images = []
    # each type of sign
    for d in dirs:
        # .ppm 's file name
        label_dir = os.path.join(data_dir, d)
        # real path of .ppm
        file_names = [os.path.join(label_dir, f)
                      for f in os.listdir(label_dir)
                      if f.endswith(".ppm")]
        for f in file_names:
            # load image
            images.append(skimage.io.imread(f))
            labels.append(int(d))
    return images, labels

def random_show(images, name, cmap=None):
    for i in range(len(name)):
        plt.subplot(1, len(name), i+1)
        plt.axis('off')
        # add cmap for gray-scaled pic, which set cmap='gray'
        # or u'll get wrong color
        plt.imshow(images[name[i]], cmap)
        plt.subplots_adjust(wspace=0.5)
        print("shape: {0}, min: {1}, max: {2}".format(images[name[i]].shape,
                                                      images[name[i]].min(),
                                                      images[name[i]].max()))
    plt.show()


def show_each_label_pic(labels):
    uniq_labels = set(labels)
    # initialize the figure
    plt.figure(figsize=(15, 15))
    i = 1
    for label in uniq_labels:
        # pick the 1st image for each label
        image = images[labels.index(label)]
        # 8X8, ith
        plt.subplot(8, 8, i)
        plt.axis('off')
        plt.title("Label {0} ({1})".format(label, labels.count(label)))
        i += 1
        plt.imshow(image) # plot single picture
    plt.show()

def transform_img(images, rows, cols):
    return [transform.resize(image, (rows, cols)) for image in images]

def to_gray(images):
    # need array
    return rgb2gray(np.array(images))

if __name__=="__main__":
    ROOT_PATH = r"G:/share/testTF"
    train_data_dir = ROOT_PATH + "/Training"
    images, labels = load_data(train_data_dir)
    #print(len(set(labels))) # 62. coz 62 type of traffic signs
    #print(len(images)) # 4575
    #plt.hist(labels, 63) # draw a bar-graph.
    #plt.show()
    #random_show(images, [300, 2250, 3650, 4000])
    #print(type(images[0])) # <class 'numpy.ndarray'>
    #show_each_label_pic(labels)
    images28 = transform_img(images, 28, 28)
    #random_show(images28, [300, 2250, 3650, 4000])
    gray_images28 = to_gray(images28)
    random_show(gray_images28, [300, 2250, 3650, 4000], cmap="gray")

图像：

条形图：

随机查看的四个图：

统计一下每个label有多少个图：

而且这个resize之后数据其实进行了归一化，进到(0,1)了

灰度图怎么样：这里转化成灰度图是因为作者说，当前问题中，颜色在分类时不起作用。这一点我随后会再验证。

[tensorflow] 入门day1-数据整理与展示

猜你喜欢