数据集包含102种不同类别的花朵,通过pytorch官方resnet152预训练模型文件,实现迁移学习。本文分为训练和评估两个步骤。

     代码中所涉及的数据集、训练后的pth模型文件已上传,有需要可下载:https://pan.baidu.com/s/1I40-fMFVxeW4KASveSfTYQ 提取码: 8q5k 

训练resnet152最后全连接层fc

        原模型在最后一层,全连接层输出为1000,本例中最后类别数为102,所以固定全连接层之前所有层的权重参数,修改最后一层输出维度,然后对其进行训练。

from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader
import os
import torch.nn as nn
import torch
import numpy as np
import json
import matplotlib.pyplot as plt
import time
import copy

data_dir = './dataset/flower_data/'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
pth_path='./dataset/pth/resnet152-b121ed2d.pth'


# 将数字类别转换为名字
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)


def im_convert(tensor):
    # 展示数据
    image = tensor.to("cpu").clone().detach()
    image = image.numpy().squeeze()
    image = image.transpose(1, 2, 0)
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    image = image.clip(0, 1)
    return image


def show_image():
    fig = plt.figure(figsize=(20, 12))
    columns = 4
    rows = 2

    dataiter = iter(dataloaders['valid'])
    inputs, classes = dataiter.next()

    for idx in range(columns * rows):
        ax = fig.add_subplot(rows, columns, idx + 1, xticks=[], yticks=[])
        ax.set_title(cat_to_name[str(int(class_names[classes[idx]]))])
        plt.imshow(im_convert(inputs[idx]))
    plt.show()


# 数据预处理
data_transform = {
    'train': transforms.Compose([
        # [transforms.RandomRotation(45),#随机旋转,-45到45度之间随机选
        transforms.CenterCrop(224),  # 从中心开始裁剪
        transforms.RandomHorizontalFlip(p=0.5),  # 随机水平翻转 选择一个概率概率
        transforms.RandomVerticalFlip(p=0.5),  # 随机垂直翻转
        transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),  # 参数1为亮度,参数2为对比度,参数3为饱和度,参数4为色相
        transforms.RandomGrayscale(p=0.025),  # 概率转换成灰度率,3通道就是R=G=B
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 均值,标准差
    ]),
    'valid': transforms.Compose([transforms.Resize(256),
                                 transforms.CenterCrop(224),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                 ]),
}


def create_dataset():
    # 数据创建初始化
    batch_size = 16
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),     data_transforms[x]) for x in ['train', 'valid']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
    class_names = image_datasets['train'].classes
    return image_datasets, dataloaders, dataset_sizes, class_names


def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


def initialize_model(model_name, num_classes, feature_extract, use_pretrained=False):
    """
    :param model_name: 模型名字
    :param num_classes:  类别数
    :param feature_extract:
    :param use_pretrained: 是否使用预训练
    :return: 模型 model_ft
    """
    model_ft = None

    if model_name == "resnet":
        """ 
            原来Resnet152模型结构中最后两层为:
                (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
                (fc): Linear(in_features=2048, out_features=1000, bias=True)  
        """
        model_ft = models.resnet152(pretrained=use_pretrained)
        model_ft.load_state_dict(torch.load(pth_path))
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features  # 获取resnet最后全连接层输入的维度 2048
        model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, num_classes),
                                    nn.LogSoftmax(dim=1))
        """ 
            经过修改线性层输出的维度之后变为:
                (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
                (fc): Sequential(
                    (0): Linear(in_features=2048, out_features=102, bias=True)
                    (1): LogSoftmax()
                )
        """
        print(model_ft)
    return model_ft


def train_model(model, dataloaders, criterion, optimizer, num_epoch, filename):
    start = time.time()
    best_acc = 0
    model.to(device)

    val_acc_history = []
    train_acc_history = []
    train_loss = []
    valid_loss = []
    LRs = [optimizer.param_groups[0]['lr']]
    best_model_wts = copy.deepcopy(model.state_dict())  # 保存最好的模型权重参数

    for epoch in range(num_epoch):
        print(f'Epoch {epoch} / {num_epoch - 1}')
        print('-' * 20)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0
            running_corrects = 0
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)  # inputs shape : torch.Size([16, 3, 224, 224])
                labels = labels.to(device)
                # 清零
                optimizer.zero_grad()

                # 只有训练的时候计算和更新梯度
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)  # torch.Size([16, 102])
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)  # dim是max函数索引的维度0/1,0是每列的最大值,1是每行的最大值
                    # 训练阶段更新权重
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # 计算损失
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            time_spend = time.time() - start
            print('Time spend {:.0f}m {:.0f}s'.format(time_spend // 60, time_spend % 60))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # 得到最好那次的模型
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                state = {
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                }
                torch.save(state, filename)
            if phase == 'valid':
                val_acc_history.append(epoch_acc)
                valid_loss.append(epoch_loss)
                scheduler.step(epoch_loss)
            if phase == 'train':
                train_acc_history.append(epoch_acc)
                train_loss.append(epoch_loss)

        print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))
        LRs.append(optimizer.param_groups[0]['lr'])

    time_spend_all = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(time_spend_all // 60, time_spend_all % 60))
    print('Best val Acc: {:4f}'.format(best_acc))



if __name__ == '__main__':
    # 是否用人家训练好的特征来做,True 为 不改变权重参数,False 为自己重新训练所有层的权重参数
    feature_extract = True

    image_datasets, dataloaders, dataset_sizes, class_names = create_dataset()

    if not torch.cuda.is_available():
        print('CUDA is not available.  Training on CPU ...')
    else:
        print('CUDA is available!  Training on GPU ...')

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model_ft = initialize_model(model_name='resnet', num_classes=102, feature_extract=feature_extract,
                                use_pretrained=False)
    model_ft = model_ft.to(device)

    # 是否训练所有层
    params_to_update = model_ft.parameters()
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name, param in model_ft.named_parameters():
            if param.requires_grad:
                params_to_update.append(param)
                print("\t", name)
    else:
        for name, param in model_ft.named_parameters():
            if param.requires_grad:
                print("\t", name)

    # 优化器设置
    print('params_to_update:', params_to_update)
    optimizer = torch.optim.Adam(params_to_update, lr=0.01)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1)  # 学习率每7个epoch衰减成原来的1/10
    # 最后一层已经LogSoftmax()了,所以不能nn.CrossEntropyLoss()来计算了,nn.CrossEntropyLoss()相当于logSoftmax()和nn.NLLLoss()整合
    criterion = nn.NLLLoss()

    # 开始训练
    train_model(model_ft, dataloaders, criterion, optimizer, num_epoch=20, filename="resnet152_fc.pth") # 只训练全连接层fc 参数

 结果:

Epoch 0 / 19
--------------------
Time spend 1m 23s
train Loss: 6.3684 Acc: 0.4066
Time spend 1m 32s
valid Loss: 5.8717 Acc: 0.4976
Optimizer learning rate : 0.0100000
Epoch 1 / 19
--------------------
Time spend 2m 56s
train Loss: 4.2060 Acc: 0.6039
Time spend 3m 4s
valid Loss: 5.4534 Acc: 0.6015
Optimizer learning rate : 0.0100000
Epoch 2 / 19
--------------------
Time spend 4m 29s
train Loss: 3.8017 Acc: 0.6664
Optimizer learning rate : 0.0100000
Epoch 3 / 19
--------------------
Time spend 6m 2s
train Loss: 3.7740 Acc: 0.7015
Time spend 6m 11s
valid Loss: 7.5827 Acc: 0.6002
Optimizer learning rate : 0.0010000
Epoch 4 / 19
--------------------
Time spend 7m 43s
valid Loss: 2.8858 Acc: 0.7579
Optimizer learning rate : 0.0100000
Epoch 5 / 19
--------------------
Time spend 9m 8s
train Loss: 4.0601 Acc: 0.7067
Time spend 9m 16s
valid Loss: 9.9845 Acc: 0.5575
Optimizer learning rate : 0.0010000
Epoch 6 / 19
--------------------
Time spend 10m 48s
valid Loss: 3.0394 Acc: 0.7592
Optimizer learning rate : 0.0100000
Epoch 7 / 19
--------------------
Time spend 12m 13s
train Loss: 4.0818 Acc: 0.7245
Optimizer learning rate : 0.0100000
Epoch 9 / 19
--------------------
Time spend 15m 18s
train Loss: 3.9790 Acc: 0.7477
Time spend 15m 27s
valid Loss: 7.6528 Acc: 0.6699
Optimizer learning rate : 0.0010000
Epoch 10 / 19
--------------------
Time spend 16m 59s
valid Loss: 3.7281 Acc: 0.7800
Optimizer learning rate : 0.0100000
Epoch 11 / 19
--------------------
Time spend 18m 23s
train Loss: 4.0878 Acc: 0.7512
Time spend 18m 32s
valid Loss: 9.4204 Acc: 0.6186
Optimizer learning rate : 0.0010000
Epoch 12 / 19
--------------------
Time spend 19m 55s
train Loss: 1.2848 Acc: 0.8829
Time spend 20m 3s
valid Loss: 3.6463 Acc: 0.7910
Optimizer learning rate : 0.0100000
Epoch 13 / 19
--------------------
Time spend 21m 29s
train Loss: 3.9060 Acc: 0.7694
Time spend 21m 37s
valid Loss: 8.6173 Acc: 0.6785
Optimizer learning rate : 0.0010000
Epoch 14 / 19
--------------------
Time spend 23m 0s
train Loss: 1.2803 Acc: 0.8872
Time spend 23m 8s
valid Loss: 3.9265 Acc: 0.7934
Optimizer learning rate : 0.0100000
Epoch 15 / 19
--------------------
Time spend 24m 33s
train Loss: 3.9600 Acc: 0.7717
Time spend 24m 41s
valid Loss: 9.4567 Acc: 0.6638
Optimizer learning rate : 0.0010000
Epoch 16 / 19
--------------------
Time spend 26m 5s
train Loss: 1.2917 Acc: 0.8915
Time spend 26m 13s
valid Loss: 3.7734 Acc: 0.7958
Optimizer learning rate : 0.0100000
Epoch 17 / 19
--------------------
Time spend 27m 38s
train Loss: 3.8783 Acc: 0.7831
Time spend 29m 11s
train Loss: 1.4112 Acc: 0.8936
Time spend 29m 19s
valid Loss: 4.2978 Acc: 0.8081
Optimizer learning rate : 0.0100000
Epoch 19 / 19
--------------------
Time spend 30m 45s
train Loss: 3.7042 Acc: 0.8001
Current training job status: Successful

训练所有层权重参数

"""训练所有层"""
    print('开始训练所有层参数...')
    file_path = pth_path + 'resnet152_fc.pth'

    checkpoint = torch.load(file_path)
    best_acc = checkpoint['best_acc']
    print('=' * 50)
    print(best_acc)
    model_ft = initialize_model('resnet', 102, feature_extract, use_pretrained=False)
    model_ft.load_state_dict(checkpoint['state_dict'])

    model_ft = model_ft.to(device)

    # 将所有参数的requires_grad 设为True 微调 训练所有层
    for param in model_ft.parameters():
        param.requires_grad = True

    optimizer = torch.optim.Adam(model_ft.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1)
    # 损失函数
    criterion = nn.NLLLoss()

    # optimizer.load_state_dict(checkpoint['optimizer'])

    train_model(model_ft, dataloaders, criterion, optimizer, num_epoch=10, filename='resnet152_all_layers.pth')
开始训练所有层参数...
==================================================
tensor(0.4267, device='cuda:0', dtype=torch.float64)
Epoch 0 / 9
--------------------
Time spend 2m 15s
train Loss: 1.3265 Acc: 0.7640
Time spend 2m 24s
valid Loss: 3.1559 Acc: 0.7066
Optimizer learning rate : 0.0001000
Epoch 1 / 9
--------------------
Time spend 4m 42s
train Loss: 0.4628 Acc: 0.8860
Time spend 4m 50s
valid Loss: 1.1325 Acc: 0.8007
Optimizer learning rate : 0.0001000
Epoch 2 / 9
--------------------
Time spend 7m 9s
train Loss: 0.2922 Acc: 0.9202
Time spend 7m 18s
valid Loss: 1.4380 Acc: 0.7775
Optimizer learning rate : 0.0001000
Epoch 3 / 9
--------------------
Time spend 9m 32s
train Loss: 0.2818 Acc: 0.9269
Time spend 9m 41s
valid Loss: 1.1434 Acc: 0.8264
Optimizer learning rate : 0.0001000
Epoch 4 / 9
--------------------
Time spend 11m 60s
train Loss: 0.1942 Acc: 0.9431
Time spend 12m 8s
valid Loss: 1.0701 Acc: 0.8178
Optimizer learning rate : 0.0001000
Epoch 5 / 9
--------------------
Time spend 14m 23s
train Loss: 0.1809 Acc: 0.9481
Time spend 14m 32s
valid Loss: 1.4222 Acc: 0.7934
Optimizer learning rate : 0.0001000
Epoch 6 / 9
--------------------
Time spend 16m 47s
train Loss: 0.1690 Acc: 0.9518
Time spend 16m 56s
valid Loss: 0.8131 Acc: 0.8435
Optimizer learning rate : 0.0001000
Epoch 7 / 9
--------------------
Time spend 19m 15s
train Loss: 0.1576 Acc: 0.9545
Time spend 19m 23s
valid Loss: 0.9752 Acc: 0.8081
Optimizer learning rate : 0.0001000
Epoch 8 / 9
--------------------
Time spend 21m 38s
train Loss: 0.1399 Acc: 0.9588
Time spend 21m 47s
valid Loss: 1.0644 Acc: 0.8093
Optimizer learning rate : 0.0001000
Epoch 9 / 9
--------------------
Time spend 24m 2s
train Loss: 0.1374 Acc: 0.9617
Time spend 24m 11s
valid Loss: 0.9745 Acc: 0.8032
Optimizer learning rate : 0.0001000
Training complete in 24m 11s

评估结果

def evaluate():
    print('开始评估...')
    model_ft = initialize_model('resnet', 102, feature_extract, use_pretrained=False)
    file_path = pth_path + 'resnet152_all_layers.pth'

    checkpoint = torch.load(file_path)
    best_acc = checkpoint['best_acc']
    model_ft.load_state_dict(checkpoint['state_dict'])
    print('=' * 50)
    print(best_acc)

    # GPU模式
    model_ft = model_ft.to(device)
    # # 得到一个batch的测试数据
    dataiter = iter(dataloaders['valid'])
    images, labels = dataiter.next()

    model_ft.eval()
    train_on_gpu = torch.cuda.is_available()
    if train_on_gpu:
        output = model_ft(images.cuda())
    else:
        output = model_ft(images)

    _, preds_tensor = torch.max(output, 1)

    preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())

    # 结果展示
    fig = plt.figure(figsize=(20, 20))
    columns = 4
    rows = 4

    for idx in range(columns * rows):
        ax = fig.add_subplot(rows, columns, idx + 1, xticks=[], yticks=[])
        plt.imshow(im_convert(images[idx]))
        ax.set_title("{} ({})".format(cat_to_name[str(preds[idx])], cat_to_name[str(labels[idx].item())]),
                     color=("green" if cat_to_name[str(preds[idx])] == cat_to_name[str(labels[idx].item())] else "red"))
    plt.savefig('/img_show.png')
    plt.close(fig)

在验证集上的最佳准确率:tensor(0.8435, device='cuda:0', dtype=torch.float64) 


pytorch 模型转rknn pytorch模型迁移_迁移学习

绿色表示预测正确,红色预测错误

 总结

        1、修改pytorch官方预训练的resnet152模型结构,适用本任务,之后对修改后的层单独训练参数。

        2、训练所有层,以提升准确率

        3、评估结果