@我的CNN图像分类学习之路(1)

CNN学习之路

这是我注册以来的第一篇学习记录,仅当作自己学习之路上的笔记使用,当然如果某些地方可供大家参考使用那将使我倍感荣幸。不多说,开始记录!!

Alexnet网络简介

在学习CNN图像分类之前,我已经学习过了python基础、keras神经网络的架构,在此基础上开始进行CNN学习。打开使用CNN网络进行图像分类之路的就是这个Alexnet网络(论文原文在此:http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf,有兴趣的读者可以自己查阅,虽然到现在我还是没有把这篇论文完整的细细研读完毕),自此网络之后,其他像VGGNet、GoogleNet、ResNet等如雨后春笋般出现,图像分类任务也因此前进了一大步。因此我的CNN学习第一个开始也是基于AlexNet网络进行的,之后会使用其他网络不断训练任务来发现各个网络的优缺点。

MINIST手写数字识别

MINIST手写数字识别是学习图像分类的入门级数据集,我第一个使用的数据集也是MINIST,在这里我使用了三种方式来实现这一识别任务。

  1. 使用多层全连接:
  2. 使用自定义少层CNN;
  3. 使用AlexNet网络;

这里MINIST数据集的测试不知道问什么总是报错,等待下次我再将结果给出来。代码如下:

import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense,MaxPooling2D
from tensorflow.keras.datasets import mnist
from tensorflow.keras import utils
def load_datasets1():
    """全连接神经网络的数据集"""
    (train_x, train_y),(test_x, test_y) = mnist.load_data()
    train_x = train_x.reshape(-1,784)
    test_x = test_x.reshape(-1,784)
    train_y = utils.to_categorical(train_y,10)
    test_y = utils.to_categorical(test_y,10)
    return train_x,train_y,test_x,test_y
def load_datasets2():
    """CNN神经网络的数据集"""
    (train_x, train_y),(test_x, test_y) = mnist.load_data()
    train_x = train_x.reshape(-1,28,28,1)
    test_x = test_x.reshape(-1,28,28,1)
    train_y = utils.to_categorical(train_y,10)
    test_y = utils.to_categorical(test_y,10)
    return train_x,train_y,test_x,test_y
def model1():
    model = Sequential()
    model.add(Dense(4096,activation = "relu",input_dim = 784))
    model.add(Dense(4096,activation = "relu"))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    return model
def model2():
    model = Sequential()
    model.add(Conv2D(96, (11, 11), strides=(1, 1), input_shape=(28, 28, 1), padding='same', activation='relu',
                 kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(256, (5, 5), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dense(4096,activation = "relu",input_dim = 784))
    model.add(Dense(4096,activation = "relu"))
    model.add(Dense(10))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    return model
def model3():
    # 创建模型序列
    model = Sequential()
    model.add(Conv2D(96, (11, 11), strides=(1, 1), input_shape=(28, 28, 1), padding='same', activation='relu',
                     kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(256, (5, 5), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    return model
def train_model():
    epochs = 32
    batch_size = 256
    train_x,train_y,test_x,test_y = load_datasets1()
    model = model1()
    history = model.fit(train_x,train_y,epochs = epochs,batch_size = batch_size,validation_data=(test_x,test_y))
    return history
if __name__ == '__main__':
    history = train_model()

CIFAR-10图像分类

对于CIFAR-10图像分类,本数据集包含50000个训练样本和10000个测试样本。这里直接使用了AlexNet网络进行训练,共进行了两次训练,第一次未进行数据增强操作,共迭代32次,第二次使用数据增强,迭代100次.
对于未增强图像的AlexNet网络训练,得到的结果训练集准确率达到94.88%,测试集只有63.01%.明显本次训练中出现了过拟合现象。对于过拟合现象,我考虑主要原因是没有进行数据增强,不能很好的对测试集进行预测,因此便有了第二次使用数据增强操作的训练,具体结果需要待下次分享。

CNN 图像分类 cnn图像分类流程图_CNN 图像分类

CNN 图像分类 cnn图像分类流程图_深度学习_02

import numpy as np
import pandas as pd
from  tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense,MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import utils
import matplotlib.pyplot as plt

def unpickle(file):
    """获取数据集字典"""
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
def load_datasets():
    """获取numpy的array格式的数据集"""
    labels = []
    datas = []
    for i in range(1,6):
        filename = r"D:/学习/python学习/计算机视觉/CIFAR-10/data_batch_"+str(i)
        dict_data = unpickle(filename)
        labels.append(dict_data[b'labels'])
        datas.append(dict_data[b'data'])
    DATA = []
    for i in datas:
        data = pd.DataFrame(i)
        DATA.append(data)
    label = labels[0] + labels[1] + labels[2] + labels[3] + labels[4]
    label = pd.Series(label)
    label = utils.to_categorical(label, 10)
    data = np.array(pd.concat(DATA)).reshape(-1, 3, 32, 32)/255
    return label,data
label,data = load_datasets()
def load_test_data():
    """获取测试集的数据"""
    test_dict = unpickle("D:/学习/python学习/计算机视觉/CIFAR-10/test_batch")
    label_test = np.array(test_dict[b'labels'])
    label_test = utils.to_categorical(label_test,10)
    data_test = np.array(test_dict[b'data']).reshape(-1,3,32,32)/255
    return label_test,data_test

def image_generator():
    # 创建图像生成器,指定对图像操作的内容
    label,data = load_datasets()
    datagen = ImageDataGenerator(rotation_range=90,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip = True,
        fill_mode='nearest')
    # 图像生成器要训练的数据
    datagen.fit(data)
    return datagen
def train_model():
    data_augmentation = False #判断是否使用数据增强技术
    batch_size = 1024
    epochs = 32
    train_y,train_x = load_datasets()
    test_y, test_x = load_test_data()
    model = Sequential()
    model.add(Conv2D(96, (11, 11), strides=(1, 1), input_shape=(3 ,32, 32), padding='same', activation='relu',
                     kernel_initializer='uniform',data_format="channels_first"))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(256, (5, 5), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    #使用池化层,步长为2
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(384, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(Conv2D(256, (3, 3), strides=(1, 1), padding='same', activation='relu', kernel_initializer='uniform'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

    if not data_augmentation:
        print("Not using data augementation")
        history = model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_data=(test_x, test_y),
                  shuffle=True)
    else:
        print("Using real-time data augmentation")  # 使用实时的数据增加
        data_generate = ImageDataGenerator(featurewise_center=False,  # 将输入数据的均值设置为0
                                           samplewise_center=False,  # 将每个样本的均值设置为0
                                           featurewise_std_normalization=False,  # 将输入除以数据标准差,逐特征进行
                                           samplewise_std_normalization=False,  # 将每个输出除以其标准差
                                           zca_epsilon=1e-6,  # ZCA白化的epsilon值,默认为1e-6
                                           zca_whitening=True,  # 是否应用ZCA白化
                                           rotation_range=None,  # 随机旋转的度数范围,输入为整数
                                           width_shift_range=0,  # 左右平移,输入为浮点数,大于1时输出为像素值
                                           height_shift_range=0,  # 上下平移,输入为浮点数,大于1时输出为像素值
                                           shear_range=0,  # 剪切强度,输入为浮点数
                                           zoom_range=0,  # 随机缩放,输入为浮点数
                                           channel_shift_range=0,  # 随机通道转换范围,输入为浮点数
                                           fill_mode='nearest',  # 输入边界以外点的填充方式,还有constant,reflect,wrap三种填充方式
                                           cval=0.,  # 用于填充的值,当fill_mode='constant'时生效
                                           horizontal_flip=True,  # 随机水平翻转
                                           vertical_flip=True,  # 随机垂直翻转
                                           rescale=None,  # 重随放因子,为None或0时不进行缩放
                                           preprocessing_function=None,  # 应用于每个输入的函数
                                           data_format="channels_first",  # 图像数据格式,默认为channels_last
                                           validation_split=0.0)

        # 使用实时数据增强的batch对模型进行拟合
        history = model.fit(data_generate.flow(train_x, train_y,batch_size), steps_per_epoch=len(train_x)//batch_size,
                            epochs=epochs,validation_data = (test_x,test_y))
    return history

if __name__ == '__main__':
    train_model()