搜罗CIFAR-10资料时发现Kevin的视频教程系列与网上的资料略有出路

网上关于CIFAR-10的程序大致相同,以下博客都是相同的程序代码及解释:

http://www.cnblogs.com/lixiaoran/p/6740022.html
http://shartoo.github.io/tensorflow-sourcecode-input/
http://blog.csdn.net/diligent_321/article/details/53130913
程序链接https://github.com/tensorflow/models/blob/master/tutorials/image/cifar10

下载到的cifar10程序共包括

1. cifar10.py

2. cifar10_eval.py
3. cifar10_input.py
4. cifar10_multi_gpu_train.py
5. cifar10_train.py
只讨论CPU版本则可以自动忽略cifar10_multi_gpu_train.py文件。另外,为加快程序调试,避免在程序运行时再去自动下载资源,可以提前去http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz下好图片压缩文件,放在默认位置/tmp/cifar10_data 中即可

本文针对Kevin的视频教程做学习笔录,则代码这块主要如下:

(不懂的函数查找tensonflow官方网站的API手册:https://www.tensorflow.org/versions/r0.12/api_docs/python/nn.html#neural-network)
1. cifar10_input.py

#By @Kevin Xu
#kevin28520@gmail.com
#Youtube: https://www.youtube.com/channel/UCVCSn4qQXTDAtGWpWAe4Plw
#Chinese weibo: http://bit.ly/2nAmOcO
#The aim of this project is to use TensorFlow to process our own data.
#    - cifar10_input.py:  read in data and generate batches
#    - cifar10.py: build the model architecture, train, evaluate
# I used Ubuntu with Python 3.5, TensorFlow 1.0*, other OS should also be good.
# I didn't use data argumentation, I spent less than 30 mins with 10K steps.
# data: cifar10 binary version
# https://www.cs.toronto.edu/~kriz/cifar.html
# data size: ~184M
# How to run?
# 0. you need to change the data directory
# 1. run cifar10.py
# 2. call train() in the console to train the model
# 3. call evaluate() in the console to test on the test data
 
# Note: 
# it is suggested to restart your kenel to train the model multiple times 
# (in order to clear all the variables in the memory)
# Otherwise errors may occur: conv1/weights/biases already exist......
#%%
import tensorflow as tf
import numpy as np
import os
#%% Reading data
 
def read_cifar10(data_dir, is_train, batch_size, shuffle):
    """Read CIFAR10
    
    Args:
        data_dir: the directory of CIFAR10
        is_train: boolen
        batch_size:
        shuffle:       
    Returns:
        label: 1D tensor, tf.int32
        image: 4D tensor, [batch_size, height, width, 3], tf.float32
    
    """
    img_width = 32
    img_height = 32
    img_depth = 3
    label_bytes = 1
    image_bytes = img_width*img_height*img_depth
        
    with tf.name_scope('input'):        
        if is_train:
            filenames = [os.path.join(data_dir, 'data_batch_%d.bin' %ii)
                                        for ii in np.arange(1, 6)]
        else:
            filenames = [os.path.join(data_dir, 'test_batch.bin')]
          
        filename_queue = tf.train.string_input_producer(filenames)
    
        reader = tf.FixedLengthRecordReader(label_bytes + image_bytes)
    
        key, value = reader.read(filename_queue)
           
        record_bytes = tf.decode_raw(value, tf.uint8)
        
        label = tf.slice(record_bytes, [0], [label_bytes])   
        label = tf.cast(label, tf.int32)
        
        image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes])     
        image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width])     
        image = tf.transpose(image_raw, (1,2,0)) # convert from D/H/W to H/W/D       
        image = tf.cast(image, tf.float32)
 
#        # data argumentation
#        image = tf.random_crop(image, [24, 24, 3])# randomly crop the image size to 24 x 24
#        image = tf.image.random_flip_left_right(image)
#        image = tf.image.random_brightness(image, max_delta=63)
#        image = tf.image.random_contrast(image,lower=0.2,upper=1.8)
        
        image = tf.image.per_image_standardization(image) #substract off the mean and divide by the variance 
 
        if shuffle:
            images, label_batch = tf.train.shuffle_batch(
                                    [image, label], 
                                    batch_size = batch_size,
                                    num_threads= 16,
                                    capacity = 2000,
                                    min_after_dequeue = 1500)
        else:
            images, label_batch = tf.train.batch(
                                    [image, label],
                                    batch_size = batch_size,
                                    num_threads = 16,
                                    capacity= 2000)
        
#        return images, tf.reshape(label_batch, [batch_size])
## ONE-HOT      
        n_classes = 10
        label_batch = tf.one_hot(label_batch, depth= n_classes)                
        return images, tf.reshape(label_batch, [batch_size, n_classes])
    
#%%   TEST
# To test the generated batches of images
# When training the model, DO comment the following codes
 
import matplotlib.pyplot as plt
 
#data_dir = '/home/kevin/tensorflow/CIFAR10/data/cifar-10-batches-bin/'
data_dir = 'D:/cat_VS_dog/CIFAR10/data/cifar-10-batches-bin/'  #My dir for data bin
BATCH_SIZE = 10
image_batch, label_batch = read_cifar10(data_dir,
                                        is_train=True,
                                        batch_size=BATCH_SIZE, 
                                        shuffle=True)
 
with tf.Session() as sess:
    i = 0
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    try:
        while not coord.should_stop() and i<1:
            
            img, label = sess.run([image_batch, label_batch])
            
            # just test one batch
            for j in np.arange(BATCH_SIZE):
                #print('label: %d' %label[j])  #20170730 remark by ciky for error when running
                plt.imshow(img[j,:,:,:])
                plt.show()
            i+=1
            
    except tf.errors.OutOfRangeError:
        print('done!')
    finally:
        coord.request_stop()
    coord.join(threads)

1.在Test中,print('label: %d' %label[j])这句运行时,弹出错误如下:

Traceback (most recent call last):File "<ipython-input-7-5a78e53decff>", line 23, in <module> print('label: %d' %label[j])TypeError: %d format: a number is required, not numpy.ndarray

改成“”print('label: %s' %label[j])“”(或注释掉)未报错,每个显示的图片显示为:label: [ 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
2. 显示图片可能看上去较怪,这是因为代码中加了句:

image = tf.image.per_image_standardization(image)#substract off the mean and divide by the variance

如想看原图显示效果,可将该句注释掉,不过显示效果怪不影响训练。

2. cifar10.py

#By @Kevin Xu
#kevin28520@gmail.com
#Youtube: https://www.youtube.com/channel/UCVCSn4qQXTDAtGWpWAe4Plw
#Chinese weibo: http://bit.ly/2nAmOcO
 
    
#The aim of this project is to use TensorFlow to process our own data.
#    - cifar10_input.py:  read in data and generate batches
#    - cifar10.py: build the model architecture, train, evaluate
 
 
# I used Ubuntu with Python 3.5, TensorFlow 1.0*, other OS should also be good.
# I didn't use data argumentation, I spent less than 30 mins with 10K steps.
 
 
# data: cifar10 binary version
# https://www.cs.toronto.edu/~kriz/cifar.html
# data size: ~184M
 
# How to run?
# 0. you need to change the data directory
# 1. run cifar10.py
# 2. call train() in the console to train the model
# 3. call evaluate() in the console to test on the test data
 
# Note: 
# it is suggested to restart your kenel to train the model multiple times 
# (in order to clear all the variables in the memory)
# Otherwise errors may occur: conv1/weights/biases already exist......
 
 
#%%
 
import os
import os.path
import math
 
import numpy as np
import tensorflow as tf
 
import cifar10_input
 
#%%
 
BATCH_SIZE = 128
learning_rate = 0.05
MAX_STEP = 10000 # with this setting, it took less than 30 mins on my laptop to train.
 
 
#%%
 
def inference(images):
    '''
    Args:
        images: 4D tensor [batch_size, img_width, img_height, img_channel]
    Notes:
        In each conv layer, the kernel size is:
        [kernel_size, kernel_size, number of input channels, number of output channels].
        number of input channels are from previuous layer, if previous layer is THE input
        layer, number of input channels should be image's channels.
        
            
    '''
    #conv1, [5, 5, 3, 96], The first two dimensions are the patch size,
    #the next is the number of input channels, 
    #the last is the number of output channels
    with tf.variable_scope('conv1') as scope:
        weights = tf.get_variable('weights', 
                                  shape = [3, 3, 3, 96],
                                  dtype = tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.05,dtype=tf.float32)) 
        biases = tf.get_variable('biases', 
                                 shape=[96],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.0))
        conv = tf.nn.conv2d(images, weights, strides=[1,1,1,1], padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(pre_activation, name= scope.name)
    
    
    #pool1 and norm1   
    with tf.variable_scope('pooling1_lrn') as scope:
        pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1],strides=[1,2,2,1],
                               padding='SAME', name='pooling1')
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0,
                          beta=0.75, name='norm1')
    
    
    #conv2
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights',
                                  shape=[3,3,96, 64],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.05,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[64], 
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(norm1, weights, strides=[1,1,1,1],padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name='conv2')
    
    
    #pool2 and norm2
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0,
                          beta=0.75,name='norm2')
        pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,1,1,1],
                               padding='SAME',name='pooling2')
    
    
    #local3
    with tf.variable_scope('local3') as scope:
        reshape = tf.reshape(pool2, shape=[BATCH_SIZE, -1])
        dim = reshape.get_shape()[1].value
        weights = tf.get_variable('weights',
                                  shape=[dim,384],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.004,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[384],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.1))
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    
    
    #local4
    with tf.variable_scope('local4') as scope:
        weights = tf.get_variable('weights',
                                  shape=[384,192],
                                  dtype=tf.float32, 
                                  initializer=tf.truncated_normal_initializer(stddev=0.004,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[192],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')
     
        
    # softmax
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.get_variable('softmax_linear',
                                  shape=[192, 10],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.004,dtype=tf.float32))
        biases = tf.get_variable('biases', 
                                 shape=[10],
                                 dtype=tf.float32, 
                                 initializer=tf.constant_initializer(0.1))
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
    
    return softmax_linear
 
#%%
 
def losses(logits, labels):
    with tf.variable_scope('loss') as scope:
        
        labels = tf.cast(labels, tf.int64)
        
        # to use this loss fuction, one-hot encoding is needed!
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits\
                        (logits=logits, labels=labels, name='xentropy_per_example')
                        
#        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits\
#                        (logits=logits, labels=labels, name='xentropy_per_example')
                        
        loss = tf.reduce_mean(cross_entropy, name='loss')
        tf.summary.scalar(scope.name+'/loss', loss)
        
    return loss
 
#%% Train the model on the training data
# you need to change the training data directory below
 
def train():
    
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
       
    #data_dir = '/home/kevin/tensorflow/CIFAR10/data/cifar-10-batches-bin/'
    data_dir = 'D:/cat_VS_dog/CIFAR10/data/cifar-10-batches-bin/'  #My dir for data bin
    #log_dir = '/home/kevin/tensorflow/CIFAR10/logs234/'
    log_dir = 'D:/cat_VS_dog/CIFAR10/logs'
    
    images, labels = cifar10_input.read_cifar10(data_dir=data_dir,
                                                is_train=True,
                                                batch_size= BATCH_SIZE,
                                                shuffle=True)
    logits = inference(images)
    
    loss = losses(logits, labels)
        
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(loss, global_step= my_global_step)
    
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()
    
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
    
    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                    break
            _, loss_value = sess.run([train_op, loss])
               
            if step % 50 == 0:                 
                print ('Step: %d, loss: %.4f' % (step, loss_value))
                
            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)                
    
            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()
        
    coord.join(threads)
    sess.close()
 
 
#%% To test the model on the test data
 
def evaluate():
    with tf.Graph().as_default():
        
        log_dir = '/home/kevin/tensorflow/CIFAR10/logs10000/'
        test_dir = '/home/kevin/tensorflow/CIFAR10/data/cifar-10-batches-bin/'
        n_test = 10000
              
        # reading test data
        images, labels = cifar10_input.read_cifar10(data_dir=test_dir,
                                                    is_train=False,
                                                    batch_size= BATCH_SIZE,
                                                    shuffle=False)
 
        logits = inference(images)
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        saver = tf.train.Saver(tf.global_variables())
        
        with tf.Session() as sess:
            
            print("Reading checkpoints...")
            ckpt = tf.train.get_checkpoint_state(log_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Loading success, global_step is %s' % global_step)
            else:
                print('No checkpoint file found')
                return
        
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess = sess, coord = coord)
            
            try:
                num_iter = int(math.ceil(n_test / BATCH_SIZE))
                true_count = 0
                total_sample_count = num_iter * BATCH_SIZE
                step = 0
 
                while step < num_iter and not coord.should_stop():
                    predictions = sess.run([top_k_op])
                    true_count += np.sum(predictions)
                    step += 1
                    precision = true_count / total_sample_count
                print('precision = %.3f' % precision)
            except Exception as e:
                coord.request_stop(e)
            finally:
                coord.request_stop()
                coord.join(threads)
    
#%%

待续