【CNN】实现 | 郭飞的笔记

卷积和池化

卷积核

tf.nn.conv2d(x,kernel,strides=[1,1,1,1],padding='SAME')
# kernel[0,1]数表示卷积核的尺寸，
# kernel[2]表示channel数量，灰度图片是1，RGB图是3
# kernel[3]表示卷积核的数量
# padding='SAME'表示边界加上padding，使得卷积的输入和输出保持同样的尺寸。所加的padding为0，也就是边界补0
# padding='VALID'表示取样不超过边框，也就是 no padding

池化

tf.nn.max_pool(x,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')
# 池化尺寸3*3，横竖步长为2*2
# padding='SAME'表示加边框,使池化层的输出输出保持同样的尺寸
# padding='VALID'表示取样不超过边框，

tf.contrib.layers.flatten(P): given an input P, this function flattens each example into a 1D vector it while maintaining the batch-size. It returns a flattened tensor with shape [batch_size, k]. You can read the full documentation here.
tf.contrib.layers.fully_connected(F, num_outputs): given a the flattened input F, it returns the output computed using a fully connected layer. You can read the full documentation here.

In the last function above (tf.contrib.layers.fully_connected), the fully connected layer automatically initializes weights in the graph and keeps on training them as you train the model. Hence, you did not need to initialize those weights when initializing the parameters.

tf.contrib.layers.flatten(inputs, outputs_collections=None, scope=None)
# inputs: A tensor of size [batch_size, ...]
# outputs_collections: Collection to add the outputs.
# scope: Optional scope for name_scope.
# Returns: A flattened tensor with shape [batch_size, k].

tf.contrib.layers.fully_connected(
    inputs,
    num_outputs,
    activation_fn=tf.nn.relu,
    weights_initializer=initializers.xavier_initializer(),
    weights_regularizer=None,
    biases_initializer=tf.zeros_initializer(),
    biases_regularizer=None,
    trainable=True,
    scope=None
)
# inputs: [batch_size, depth], [None, None, None, channels].
# activation_fn: Activation function. The default value is a ReLU function.
# scope: Optional scope for variable_scope.

案例

step1:获取数据

import requests
url = 'https://github.com/guofei9987/datasets_for_ml/blob/master/MNIST/MNIST_data_csv.zip?raw=true'
r = requests.get(url)
with open('MNIST_data_csv.zip', 'wb') as f:
    f.write(r.content)
import zipfile
azip = zipfile.ZipFile('MNIST_data_csv.zip')
azip.extractall()
# %%
import pandas as pd
mnist_train_images = pd.read_csv('mnist_train_images.csv', header=None).values
mnist_test_images = pd.read_csv('mnist_test_images.csv', header=None).values
mnist_train_labels = pd.read_csv('mnist_train_labels.csv', header=None).values
mnist_test_labels = pd.read_csv('mnist_test_labels.csv', header=None).values

# %%
import numpy as np

X_train = np.array([img.reshape(28, 28) for img in mnist_train_images])
X_test = np.array([img.reshape(28, 28) for img in mnist_test_images])
# add channel,因为是灰度图像，所以 num_channels=1
X_train=np.expand_dims(X_train, 3)
X_test=np.expand_dims(X_test, 3)
Y_train = mnist_train_labels
Y_test = mnist_test_labels

step2：定义网络

# %%
data_size,image_width, image_height,num_channels = X_train.shape
target_size = mnist_train_labels.shape[1]

conv1_features = 25
conv2_features = 50
max_pool_size1 = 2
max_pool_size2 = 2
fully_connected_size1 = 100

# %%
import tensorflow as tf


x = tf.placeholder(dtype=tf.float32, shape=(None, image_width, image_height, num_channels))
y = tf.placeholder(dtype=tf.float32, shape=(None, target_size))


# 卷积+ReLu+池化+卷积+ReLu+池化
conv1_weight = tf.Variable(tf.truncated_normal(shape=(4, 4, num_channels, conv1_features), stddev=0.1))
conv1_bias = tf.Variable(tf.zeros(shape=(conv1_features)))

conv1 = tf.nn.conv2d(x, conv1_weight, strides=(1, 1, 1, 1), padding='SAME')
relu1 = tf.nn.relu(conv1 + conv1_bias)
max_pool1 = tf.nn.max_pool(relu1, ksize=(1, max_pool_size1, max_pool_size1, 1), strides=(1, 2, 2, 1), padding='SAME')


conv2_weight = tf.Variable(tf.truncated_normal(shape=(4, 4, conv1_features, conv2_features), stddev=0.1))
conv2_bias = tf.Variable(tf.zeros(shape=(conv2_features)))

conv2 = tf.nn.conv2d(max_pool1, conv2_weight, strides=(1, 1, 1, 1), padding='SAME')
relu2 = tf.nn.relu(conv2 + conv2_bias)
max_pool2 = tf.nn.max_pool(relu2, ksize=(1, max_pool_size2, max_pool_size2, 1), strides=(1, 2, 2, 1), padding='SAME')




# %%
# 全连接层

# 先摊平
final_conv_shape = max_pool2.get_shape().as_list()
final_shape = final_conv_shape[1] * final_conv_shape[2] * final_conv_shape[3]
flat_output = tf.reshape(tensor=max_pool2, shape=(-1, final_shape))

# 全连接层1
full1_weight = tf.Variable(tf.truncated_normal((final_shape, fully_connected_size1), stddev=0.1))
full1_bias = tf.Variable(tf.truncated_normal([1, fully_connected_size1], stddev=0.1))
fully_connected1 = tf.nn.relu(tf.matmul(flat_output, full1_weight) + full1_bias)

# dropout
keep_prob=tf.placeholder(tf.float32)
h_fc1_drop=tf.nn.dropout(fully_connected1,keep_prob)

# 全连接层2+softmax
full2_weight = tf.Variable(tf.truncated_normal((fully_connected_size1, target_size), stddev=0.1))
full2_bias = tf.Variable(tf.truncated_normal([1, target_size], stddev=0.1, dtype=tf.float32))
y_hat = tf.nn.softmax(tf.matmul(fully_connected1, full2_weight) + full2_bias)


# %%
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_hat), axis=1))
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cross_entropy)

step3:训练网络

sess = tf.Session()
sess.run(tf.global_variables_initializer())
batch_size = 100

for i in range(2000):
    choice_indexes = np.random.choice(data_size, batch_size)
    feed_dict = {x: X_train[choice_indexes], y: Y_train[choice_indexes], keep_prob: 0.75}
    sess.run(train_step, feed_dict=feed_dict)
    if i % 100 == 0:
        value_cross_entropy = sess.run(cross_entropy, feed_dict={x: X_test[:100], y: Y_test[:100],keep_prob:1})
        print('第{i}次循环，test上的cross_entropy是{value_cross_entropy}'
              .format(i=i,value_cross_entropy=value_cross_entropy))

step4：预测和评价

# 模型评价
Y_test_predict=sess.run(y_hat,feed_dict={x:X_test,keep_prob:1})
from sklearn import metrics
metrics.confusion_matrix(Y_test.argmax(axis=1),Y_test_predict.argmax(axis=1))

参考文献

《Matlab神经网络原理与实例精解》陈明，清华大学出版社
《神经网络43个案例》王小川，北京航空航天大学出版社
《人工神经网络原理》马锐，机械工业出版社
白话深度学习与TensorFlow，高扬，机械工业出版社
《TensorFlow实战》中国工信出版集团