欠拟合:
from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist
def plot_val_loss_and_acc(model):
import matplotlib.pyplot as plt
val_loss = model.history["val_loss"]
epochs = range(1, 21)
plt.plot(epochs, val_loss, "b--",
label="Validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
val_acc = model.history["val_accuracy"]
epochs = range(1, 21)
plt.plot(epochs, val_acc, "b-",
label="Validation accuracy")
plt.title("validation loss and accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy/Loss")
plt.legend()
plt.show()
(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
model = keras.Sequential([layers.Dense(10, activation="softmax")])
model.compile(optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
history_small_model = model.fit(
train_images, train_labels,
epochs=20,
batch_size=128,
validation_split=0.2)
plot_val_loss_and_acc(history_small_model)
python深度学习这本书里提到的欠拟合现象和解决思路:
ok,有了思路,我们改进下:
from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist
def plot_val_loss_and_acc(model):
import matplotlib.pyplot as plt
val_loss = model.history["val_loss"]
epochs = range(1, 21)
plt.plot(epochs, val_loss, "b--",
label="Validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
val_acc = model.history["val_accuracy"]
epochs = range(1, 21)
plt.plot(epochs, val_acc, "b-",
label="Validation accuracy")
plt.title("validation loss and accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy/Loss")
plt.legend()
plt.show()
(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
model = keras.Sequential([layers.Dense(10, activation="softmax")])
model.compile(optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
history_small_model = model.fit(
train_images, train_labels,
epochs=20,
batch_size=128,
validation_split=0.2)
plot_val_loss_and_acc(history_small_model)
model = keras.Sequential([
layers.Dense(128, activation="relu"),
layers.Dense(128, activation="relu"),
layers.Dense(10, activation="softmax"),
])
model.compile(optimizer="rmsprop",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"])
history_large_model = model.fit(
train_images, train_labels,
epochs=20,
batch_size=128,
validation_split=0.2)
plot_val_loss_and_acc(history_large_model)
看到有过拟合的迹象了!
接下来,我们看看L1/L2正则化和 dropout处理过拟合:
from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist
def plot_val_loss_and_acc(model):
import matplotlib.pyplot as plt
val_loss = model.history["val_loss"]
epochs = range(1, 21)
plt.plot(epochs, val_loss, "b--",
label="Validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
val_acc = model.history["val_accuracy"]
epochs = range(1, 21)
plt.plot(epochs, val_acc, "b-",
label="Validation accuracy")
plt.title("validation loss and accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy/Loss")
plt.legend()
plt.show()
# (train_images, train_labels), _ = mnist.load_data()
# train_images = train_images.reshape((60000, 28 * 28))
# train_images = train_images.astype("float32") / 255
# model = keras.Sequential([layers.Dense(10, activation="softmax")])
# model.compile(optimizer="rmsprop",
# loss="sparse_categorical_crossentropy",
# metrics=["accuracy"])
# history_small_model = model.fit(
# train_images, train_labels,
# epochs=20,
# batch_size=128,
# validation_split=0.2)
# plot_val_loss_and_acc(history_small_model)
# model = keras.Sequential([
# layers.Dense(128, activation="relu"),
# layers.Dense(128, activation="relu"),
# layers.Dense(10, activation="softmax"),
# ])
# model.compile(optimizer="rmsprop",
# loss="sparse_categorical_crossentropy",
# metrics=["accuracy"])
# history_large_model = model.fit(
# train_images, train_labels,
# epochs=20,
# batch_size=128,
# validation_split=0.2)
# plot_val_loss_and_acc(history_large_model)
# L1/L2 and dropout
############################################################################
(train_data, train_labels), _ = imdb.load_data(num_words=10000)
def vectorize_sequences(sequences, dimension=10000):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1.
return results
train_data = vectorize_sequences(train_data)
# small model !!!
model = keras.Sequential([
layers.Dense(4, activation="relu"),
layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy",
metrics=["accuracy"])
history_small_original = model.fit(train_data, train_labels,
epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_small_original)
# we need more complex or large model, but overfitted!!!
model = keras.Sequential([
layers.Dense(16, activation="relu"),
layers.Dense(16, activation="relu"),
layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy",
metrics=["accuracy"])
history_original = model.fit(train_data, train_labels,
epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_original)
"""
Version of the model with lower capacity
model = keras.Sequential([
layers.Dense(4, activation="relu"),
layers.Dense(4, activation="relu"),
layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy",
metrics=["accuracy"])
history_smaller_model = model.fit(
train_data, train_labels,
epochs=20, batch_size=512, validation_split=0.4)
Version of the model with higher capacity
model = keras.Sequential([
layers.Dense(512, activation="relu"),
layers.Dense(512, activation="relu"),
layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy",
metrics=["accuracy"])
history_larger_model = model.fit(
train_data, train_labels,
epochs=20, batch_size=512, validation_split=0.4)
"""
### Adding L2 weight regularization to the model
model = keras.Sequential([
layers.Dense(16,
kernel_regularizer=regularizers.l2(0.002),
activation="relu"),
layers.Dense(16,
kernel_regularizer=regularizers.l2(0.002),
activation="relu"),
layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy",
metrics=["accuracy"])
history_l2_reg = model.fit(
train_data, train_labels,
epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_l2_reg)
# from tensorflow.keras import regularizers
# regularizers.l1(0.001)
# regularizers.l1_l2(l1=0.001, l2=0.001)
model = keras.Sequential([
layers.Dense(16, activation="relu"),
layers.Dropout(0.5),
layers.Dense(16, activation="relu"),
layers.Dropout(0.5),
layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy",
metrics=["accuracy"])
history_dropout = model.fit(
train_data, train_labels,
epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_dropout)
原始的欠拟合模型:
过拟合的模型:
加入正则化 后的:
加入dropout后的: