欠拟合:

from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist


def plot_val_loss_and_acc(model):
    import matplotlib.pyplot as plt
    val_loss = model.history["val_loss"]
    epochs = range(1, 21)
    plt.plot(epochs, val_loss, "b--",
            label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    val_acc = model.history["val_accuracy"]
    epochs = range(1, 21)
    plt.plot(epochs, val_acc, "b-",
            label="Validation accuracy")
    plt.title("validation loss and accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy/Loss")
    plt.legend()
    plt.show()


(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
model = keras.Sequential([layers.Dense(10, activation="softmax")])
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_small_model = model.fit(
    train_images, train_labels,
    epochs=20,
    batch_size=128,
    validation_split=0.2)
plot_val_loss_and_acc(history_small_model)

  

深度学习算法原理实现——模型欠拟合和过拟合处理_拟合

python深度学习这本书里提到的欠拟合现象和解决思路:

 

深度学习算法原理实现——模型欠拟合和过拟合处理_拟合_02

ok,有了思路,我们改进下:

from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist


def plot_val_loss_and_acc(model):
    import matplotlib.pyplot as plt
    val_loss = model.history["val_loss"]
    epochs = range(1, 21)
    plt.plot(epochs, val_loss, "b--",
            label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    val_acc = model.history["val_accuracy"]
    epochs = range(1, 21)
    plt.plot(epochs, val_acc, "b-",
            label="Validation accuracy")
    plt.title("validation loss and accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy/Loss")
    plt.legend()
    plt.show()


(train_images, train_labels), _ = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
model = keras.Sequential([layers.Dense(10, activation="softmax")])
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_small_model = model.fit(
    train_images, train_labels,
    epochs=20,
    batch_size=128,
    validation_split=0.2)
plot_val_loss_and_acc(history_small_model)

model = keras.Sequential([
    layers.Dense(128, activation="relu"),
    layers.Dense(128, activation="relu"),
    layers.Dense(10, activation="softmax"),
])
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
history_large_model = model.fit(
    train_images, train_labels,
    epochs=20,
    batch_size=128,
    validation_split=0.2)
plot_val_loss_and_acc(history_large_model)

  

看到有过拟合的迹象了!

深度学习算法原理实现——模型欠拟合和过拟合处理_过拟合_03

 

 接下来,我们看看L1/L2正则化和 dropout处理过拟合:

from tensorflow.keras import regularizers
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
from tensorflow.keras.datasets import mnist


def plot_val_loss_and_acc(model):
    import matplotlib.pyplot as plt
    val_loss = model.history["val_loss"]
    epochs = range(1, 21)
    plt.plot(epochs, val_loss, "b--",
            label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    val_acc = model.history["val_accuracy"]
    epochs = range(1, 21)
    plt.plot(epochs, val_acc, "b-",
            label="Validation accuracy")
    plt.title("validation loss and accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy/Loss")
    plt.legend()
    plt.show()


# (train_images, train_labels), _ = mnist.load_data()
# train_images = train_images.reshape((60000, 28 * 28))
# train_images = train_images.astype("float32") / 255
# model = keras.Sequential([layers.Dense(10, activation="softmax")])
# model.compile(optimizer="rmsprop",
#               loss="sparse_categorical_crossentropy",
#               metrics=["accuracy"])
# history_small_model = model.fit(
#     train_images, train_labels,
#     epochs=20,
#     batch_size=128,
#     validation_split=0.2)
# plot_val_loss_and_acc(history_small_model)

# model = keras.Sequential([
#     layers.Dense(128, activation="relu"),
#     layers.Dense(128, activation="relu"),
#     layers.Dense(10, activation="softmax"),
# ])
# model.compile(optimizer="rmsprop",
#               loss="sparse_categorical_crossentropy",
#               metrics=["accuracy"])
# history_large_model = model.fit(
#     train_images, train_labels,
#     epochs=20,
#     batch_size=128,
#     validation_split=0.2)
# plot_val_loss_and_acc(history_large_model)

# L1/L2 and dropout
############################################################################

(train_data, train_labels), _ = imdb.load_data(num_words=10000)

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results
train_data = vectorize_sequences(train_data)

# small model !!! 
model = keras.Sequential([
    layers.Dense(4, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_small_original = model.fit(train_data, train_labels,
                             epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_small_original)


# we need more complex or large model, but overfitted!!!
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_original = model.fit(train_data, train_labels,
                             epochs=20, batch_size=512, validation_split=0.4)

plot_val_loss_and_acc(history_original)

"""
Version of the model with lower capacity
model = keras.Sequential([
    layers.Dense(4, activation="relu"),
    layers.Dense(4, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_smaller_model = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)

Version of the model with higher capacity    
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(512, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_larger_model = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)
"""

### Adding L2 weight regularization to the model
model = keras.Sequential([
    layers.Dense(16,
                 kernel_regularizer=regularizers.l2(0.002),
                 activation="relu"),
    layers.Dense(16,
                 kernel_regularizer=regularizers.l2(0.002),
                 activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_l2_reg = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)

plot_val_loss_and_acc(history_l2_reg)

# from tensorflow.keras import regularizers
# regularizers.l1(0.001)
#  regularizers.l1_l2(l1=0.001, l2=0.001)

model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(16, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
history_dropout = model.fit(
    train_data, train_labels,
    epochs=20, batch_size=512, validation_split=0.4)
plot_val_loss_and_acc(history_dropout)

 

原始的欠拟合模型:

深度学习算法原理实现——模型欠拟合和过拟合处理_过拟合_04

过拟合的模型:

深度学习算法原理实现——模型欠拟合和过拟合处理_拟合_05

 

加入正则化 后的:

深度学习算法原理实现——模型欠拟合和过拟合处理_tensorflow_06

 

加入dropout后的:

深度学习算法原理实现——模型欠拟合和过拟合处理_tensorflow_07

 

深度学习算法原理实现——模型欠拟合和过拟合处理_过拟合_08