个人习惯
X,Y,X_train,Y_trian,X_test,Y_test : 原始数据,一般是np.array
x,y :place_holder 类型,在 tf.run 阶段用真实数据来填充
y_hat : 由x和w等计算出的变量(tf类型)
y_train_predict, y_test_predict 模型在X_train, X_test上的预测值
扩展功能
求导
会先寻找从I到C的的正向路径,然后回溯,计算梯度 [db,dW,dx]=tf.gradients(C,[b,W,x])
经典CNN:
输入层→(卷积层+→池化层?)+→全连接层+
卷积层+表示可以连续使用卷积层,一般最多连续使用三层
池化层?表示可有可无
案例集
案例1
- 一个简单的线性回归
- 随机梯度下降(用的np,是不是海量数据用tf更好?)
- 画loss图等
import pandas as pd
from sklearn import datasets
import tensorflow as tf
import numpy as np
X=np.linspace(start=0,stop=5,num=300).reshape(300,-1)
Y=3*X+np.random.normal(loc=0,scale=1,size=300).reshape(300,-1)
#%%
sess=tf.Session()
x=tf.placeholder(shape=(None,1),dtype=tf.float32,name='x')
y=tf.placeholder(shape=(None,1),dtype=tf.float32,name='y')
A=tf.Variable(tf.truncated_normal(shape=(1,1)))
b=tf.Variable(tf.truncated_normal(shape=(1,1)))
y_hat=tf.matmul(x,A)+b
loss_l2=tf.reduce_sum(tf.square(y-y_hat))
train_step=tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss_l2)
sess.run(tf.global_variables_initializer())
loss_list_l2=[]
batch_size=10
for i in range(1000):
choice_index=np.random.choice(len(X),size=batch_size)
sess.run(train_step,feed_dict={x:X[choice_index],y:Y[choice_index]})
loss_list_l2.append(sess.run(loss_l2,feed_dict={x:X,y:Y}))
y_predict_l2=sess.run(y_hat,feed_dict={x:X})
#%%
import matplotlib.pyplot as plt
plt.plot(X,Y,'.')
plt.plot(X,y_predict_l2,'-')
plt.figure()
plt.plot(loss_list_l2)
plt.show()
# l1回归
loss_l1=tf.reduce_sum(tf.abs(y-y_hat))
# 戴明回归(total regression),损失函数是点到直线的距离之和
loss_total=tf.reduce_mean(tf.div(tf.abs(y-y_hat),tf.sqrt(1+tf.square(A))))
# 弹性网络(《TensorFlow机器学习实战指南》的代码示例中,正则化项和误差全部用reduce_mean计算的,但wiki上弹性网络的公式对应reduce_sum,目前还没研究哪个合理)
el_param1=tf.constant(1,dtype=tf.float32)
el_param2=tf.constant(1,dtype=tf.float32)
l1_term=el_param1*tf.reduce_sum(tf.abs(A))
l2_term=el_param2*tf.reduce_sum(tf.square(A))
loss=tf.reduce_sum(tf.square(y-y_hat))+l1_term+l2_term
案例2:分类
import pandas as pd
from sklearn import datasets
import tensorflow as tf
dataset = datasets.load_iris()
X = dataset.data
Y = pd.get_dummies(pd.Series(dataset.target)).values
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
# %%
sess = tf.Session()
w1 = tf.Variable(tf.truncated_normal(shape=(4, 3), stddev=1), name='weight1')
b1 = tf.Variable(tf.truncated_normal(shape=(1, 3), stddev=1), name='bias1')
x = tf.placeholder(shape=(None, 4), dtype=tf.float32)
y = tf.placeholder(shape=(None, 3), dtype=tf.float32)
y_hat = tf.nn.softmax(tf.matmul(x, w1) + b1)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_hat), axis=1))
# 等价写法:(理论上没啥区别,但实际运行发现收敛速度似乎会稍微快一些)
# a=tf.matmul(x_col_sums_2D,A)+b
# y_hat=tf.nn.sigmoid(a)
# cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=a, labels=y))
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(cross_entropy)
sess.run(tf.global_variables_initializer())
# %%
import random
dataset_size=X_train.shape[0]
batch_size = 10
cross_entropy_list = []
for i in range(50000):
choice = random.choices(range(dataset_size), k=10)
sess.run(train_step, feed_dict={x: [X[i] for i in choice], y: [Y[i] for i in choice]})
cross_entropy_list.append(sess.run(cross_entropy, feed_dict={x: X_train, y: Y_train}))
# %%
y_train_predict = sess.run(y_hat, feed_dict={x: X_train})
y_test_predict = sess.run(y_hat, feed_dict={x: X_test})
import sklearn.metrics as metrics
print('train', metrics.confusion_matrix(Y_train.argmax(axis=1), y_train_predict.argmax(axis=1)))
print('test', metrics.confusion_matrix(Y_test.argmax(axis=1), y_test_predict.argmax(axis=1)))
# sess.close()
# %%
import matplotlib.pyplot as plt
plt.plot(cross_entropy_list)
plt.show()
参考资料
Nick McClure:《TensorFlow机器学习实战指南》 机械工业出版社
lan Goodfellow:《深度学习》 人民邮电出版社
王琛等:《深度学习原理与TensorFlow实战》 电子工业出版社
李嘉璇:《TensorFlow技术解析与实战》 人民邮电出版社
黄文坚:《TensorFlow实战》 电子工业出版社
郑泽宇等:《TensorFlow实战Google深度学习框架》 电子工业出版社