通用代码
导入包,载入数据
import tensorflow as tf
from tensorflow import keras
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
x_valid, y_valid = train_images[:5000], train_labels[:5000]
x_train, y_train = train_images[5000:], train_labels[5000:]
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
构建模型,运行模型
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
# 也可以用 model.add(keras.layers.Dense(128, activation='relu')) 来一步一步添加
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(x=x_train, y=y_train, epochs=10, validation_data=(x_valid, y_valid))
# 或者 validation_split = 0.01 也可以自动分验证集
predictions = model.predict(test_images)
显示模型的一些情况
model.layers
model.summary()
history.history # 存放的是迭代过程中的一些值
model.evaluate(test_images, test_labels)
# 看参数
model.layers.variables # 是下面这两个组成的 list
model.layer.kernel, model.layer.bias
layer.trainable_variables
# layer可以直接像函数一样调用:
layer(tf.zeros([10, 5]))
这里自编一个显示指标走势的代码
import matplotlib.pyplot as plt
fig,ax=plt.subplots(1,1)
for i,j in history.history.items():
ax.plot(j,label=i)
ax.set_ylim(0,1)
ax.legend()
plt.show()
一些部件
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_object(y_true=y, y_pred=model(x))
callbacks
- EarlyStopping
- ModelCheckpoint
- TensorBoard
import os
logdir='.\\callbacks'
output_model_file=os.path.join(logdir,'fashin_mnist_model.h5')
callbacks=[keras.callbacks.TensorBoard(logdir),
keras.callbacks.ModelCheckpoint(output_model_file,save_best_only=True), # 如果是 False,保存最后一个
keras.callbacks.EarlyStopping(min_delta=1e-3,patience=5)
]
history = model.fit(x=train_images, y=train_labels, epochs=100, validation_split = 0.01 ,callbacks=callbacks)
TensorBoard
使用 TensorBoard时,windows对于直接用字符串写路径有bug,这样处理:(Linux和win都可以运行)
import pathlib
import shutil
import tempfile
logdir = pathlib.Path(tempfile.mkdtemp())/"tensorboard_logs"
shutil.rmtree(logdir, ignore_errors=True)
name = "model/tiny_model" # 你跑多个模型,可以改这个name,最后画的图在一起
tf.keras.callbacks.TensorBoard(logdir/name)
TensorBoard的打开命令
tensorboard --logdir=callbacks
ModelCheckpoint
step1:训练并保存一个模型(这里只保存权重)
import tensorflow as tf
from tensorflow import keras
fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
x_valid, y_valid = train_images[:5000], train_labels[:5000]
x_train, y_train = train_images[5000:], train_labels[5000:]
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
# 也可以用 model.add(keras.layers.Dense(128, activation='relu')) 来一步一步添加
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(x=x_train, y=y_train, epochs=10,
validation_data=(x_valid, y_valid),
callbacks=[keras.callbacks.ModelCheckpoint('saved_model/cp.ckpt',
save_weights_only=True,
verbose=1)])
step2:重新构建这个模型
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
# 也可以用 model.add(keras.layers.Dense(128, activation='relu')) 来一步一步添加
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
step3:加载权重,再评估一次
model.load_weights('saved_model/cp.ckpt')
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
Save the entire model
model.save('my_model.h5')
new_model = tf.keras.models.load_model('my_model.h5')
这样保存模型的全部信息:
- The weight values
- The model’s configuration(architecture)
- The optimizer configuration
(但现在版本的tf还不能保存tf.train
,你得重新写)
EarlyStopping
keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
DNN的技巧
regularizers
from tensorflow.keras import regularizers
layers.Dense(100, activation='elu', kernel_regularizer=regularizers.l2(0.001))
dropout
一般不会给每一层都添加dropout,而是最后几层。
model.add(keras.layers.Dropout(rate=0.5))
model.add(keras.layers.AlphaDropout(rate=0.5))
关于 AlphaDropout:
- 均值和方差不变
- 因此归一化性质不变
- 因此可以和 BatchNormalization 连用
归一化
把X归一化,代码就不贴了。
下面的 batch normalization,在数据归一化的前提下,效果才更好。
batch normalization
批归一化可以缓解深度神经网络的梯度消失,因为每一层更加规整。
批归一化有3种做法:(下面代码中)
- 先归一化再激活,
- 先激活后归一化,
- 用selu作为激活函数,这是个自带Normalization的激活函数,而且相比之下,训练速度快,训练效果好
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28))
])
for _ in range(20):
model.add(keras.layers.Dense(100,activation='selu'))
# 先激活,后归一化
# model.add(keras.layers.Dense(100,activation='relu'))
# model.add(keras.layers.BatchNormalization())
# 先归一化,再激活
# model.add(keras.layers.Dense(100))
# model.add(keras.layers.BatchNormalization())
# model.add(keras.layers.Activation('relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
Wide&Deep模型
Google 在 2016 年发布的模型,可以用于分类和回归问题。用于 Google Play 的推荐算法。
稀疏特征与密集特征
稀疏特征
- 是什么
- 离散值特征
- One-hot 编码可以产生
- 优缺点
- 优点:工业界广泛使用
- 缺点:需要人工设计。可能过拟合。叉乘后所有的可能太多了。
密集特征
- 是什么:实数向量表达
- 优缺点
- 优点:距离等带有信息。兼容没有出现的特征组合。更少的人工参与。
step1:载入数据
import tensorflow as tf
from tensorflow import keras
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
housing = fetch_california_housing()
x_train_all, x_test, y_train_all, y_test = train_test_split(
housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid = train_test_split(
x_train_all, y_train_all, random_state = 11)
step2:标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)
step3:做模型
input = keras.layers.Input(shape=x_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation='relu')(input)
hidden2 = keras.layers.Dense(30, activation='relu')(hidden1)
# 复合函数: f(x) = h(g(x))
concat = keras.layers.concatenate([input, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs = [input],
outputs = [output])
model.compile(optimizer='adam',
loss='mean_squared_error')
history = model.fit(x=x_train_scaled, y=y_train, epochs=100, validation_data=(x_valid_scaled, y_valid))
# 或者 validation_split = 0.01 也可以自动分验证集
predictions = model.predict(x_test)
一个回归的例子
导入包和数据,拆分训练集
from tensorflow import keras
import tensorflow as tf
from sklearn import model_selection
from sklearn import datasets
X, y, coef = \
datasets.make_regression(n_samples=10000,
n_features=5,
n_informative=3, # 其中,3个feature是有信息的
n_targets=1, # 多少个 target
bias=1, # 就是 intercept
coef=True, # 为True时,会返回真实的coef值
noise=1, # 噪声的标准差
)
X_train_all, X_test, y_train_all, y_test=model_selection.train_test_split(X,y,test_size=0.3)
X_train, X_valid, y_train, y_valid=model_selection.train_test_split(X_train_all,y_train_all,test_size=0.1)
建立模型
model=keras.Sequential()
model.add(keras.layers.Dense(64,activation='relu',input_shape=X_train.shape[1:]))
model.add(keras.layers.Dense(64,activation='relu'))
model.add(keras.layers.Dense(1))
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
loss='mse',
metrics=['mae', 'mse'])
训练
history=model.fit(x=X_train,y=y_train,epochs=100,validation_data=(X_valid,y_valid),
callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)])
自定义你的layer
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_variable("kernel",
shape=[int(input_shape[-1]),
self.num_outputs])
def call(self, input):
return tf.matmul(input, self.kernel)
layer = MyDenseLayer(10)
自定义整个神经网络
class ResnetIdentityBlock(tf.keras.Model):
def __init__(self, kernel_size, filters):
super(ResnetIdentityBlock, self).__init__(name='')
filters1, filters2, filters3 = filters
self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1))
self.bn2a = tf.keras.layers.BatchNormalization()
self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same')
self.bn2b = tf.keras.layers.BatchNormalization()
self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1))
self.bn2c = tf.keras.layers.BatchNormalization()
def call(self, input_tensor, training=False):
x = self.conv2a(input_tensor)
x = self.bn2a(x, training=training)
x = tf.nn.relu(x)
x = self.conv2b(x)
x = self.bn2b(x, training=training)
x = tf.nn.relu(x)
x = self.conv2c(x)
x = self.bn2c(x, training=training)
x += input_tensor
return tf.nn.relu(x)
block = ResnetIdentityBlock(1, [1, 2, 3])