Cats vs Dogs based on VGG16

1.准备数据集

from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive
!unzip cat_dog.zip

2.数据预处理

#先为数据集划分合理的文件存储结构
import shutil
import os

os.mkdir('train_/cat/')
os.mkdir('train_/dog/')

for f in os.listdir('cat_dog/train'):
    if f.split('_')[0] == 'cat':
        shutil.move('cat_dog/train/'+f,'train_/cat/'+f)
    else:
        shutil.move('cat_dog/train/'+f,'train_/dog/'+f)
        
train_dir = 'train_'
# 对训练集做数据扩充
train_transform = transforms.Compose([
  transforms.Resize(256),
  transforms.CenterCrop(224),
  transforms.RandomRotation(30),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_dataset = datasets.ImageFolder(train_dir,transform=train_transform)

train_dataloader = torch.utils.data.DataLoader(
  train_dataset,  
  batch_size = 128,
  shuffle = True
)

查看图片

import matplotlib.pyplot as plt

def imshow(image, ax=None, title=None, normalize=True):
    """Imshow for Tensor."""
    if ax is None:
        fig, ax = plt.subplots()
    image = image.numpy().transpose((1, 2, 0))

    if normalize:
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = std * image + mean
        image = np.clip(image, 0, 1)

    ax.imshow(image)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.tick_params(axis='both', length=0)
    ax.set_xticklabels('')
    ax.set_yticklabels('')

    return ax



images, labels = next(iter(train_dataloader))

title = 'Dog' if labels[0].item() == 1 else 'Cat'
imshow(images[0])

我的猫狗大战分类_特征提取

3.训练模型

#先获取预训练好的VGG16
model = models.vgg16(pretrained=True)
model

冻结特征提取部分的参数

for param in model.parameters():
  param.requires_grad = False

修改自己的分类器

from collections import OrderedDict

#修改2个层
classifier = nn.Sequential(OrderedDict([
  # Layer 1
  ('dropout1',nn.Dropout(0.3)),
  ('fc1', nn.Linear(25088,500)),
  ('relu', nn.ReLU()),
  # output layer
  ('fc2', nn.Linear(500,2)),
  ('output', nn.LogSoftmax(dim=1))
]))

model.classifier = classifier

# 损失函数选取NLLLoss()
criterion = nn.NLLLoss()

# 优化器采用Adam
optimizer = optim.Adam(model.classifier.parameters(),lr =0.001)

model = model.to(device)

迭代训练

from tqdm import tqdm
epochs = 5


for e in range(epochs):
  running_loss, total, correct = 0, 0 , 0

  model.train()
    
  for images,labels in tqdm(train_dataloader):
    
    # Moving input to GPU
    images, labels = images.to(device), labels.to(device)

    # Forward prop
    outputs = model(images)
    loss = criterion(outputs,labels)

    # Backward prop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Metrics 
    running_loss += loss.item()
    total += labels.size(0)

    _, predicted = torch.max(torch.exp(outputs).data,1)
    correct += (predicted == labels).sum().item()
  else:
    # Logs 
    print(f'Epoch {e} Training: Loss={running_loss:.5f} Acc={correct/total * 100:.2f}')

4.保存模型

checkpoints = {
     'pre-trained':'vgg16',
     'classifier':nn.Sequential(OrderedDict([
          # 修改的Layer 1
          ('dropout1',nn.Dropout(0.3)),
          ('fc1', nn.Linear(25088,500)),
          ('relu', nn.ReLU()),
          # 修改的output layer
          ('fc2', nn.Linear(500,2)),
          ('output', nn.LogSoftmax(dim=1))
    ])),
    'state_dict':model.state_dict()
}

torch.save(checkpoints,'vgg16_catsVdogs.pth')
def load_saved_model(path):
    checkpoint = torch.load(path)
    
    model = models.vgg16(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.classifier = checkpoint['classifier']
    model.load_state_dict(checkpoints['state_dict'])
    model.eval()
    return model
loaded_model = load_saved_model('vgg16_catsVdogs.pth')
loaded_model.to(device)

5.测试模型

test_transform = transforms.Compose([
          transforms.Resize(256),
          transforms.CenterCrop(224),
          transforms.ToTensor(),
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

predictions = []

for i in tqdm(range(0,2000)):
    path = 'catdog/test/'+str(i)+'.jpg'
    X = Image.open(path).convert('RGB')
    X = test_transform(X)[:3,:,:]
    X = X.unsqueeze(0)
    X = X.to(device)
    outputs = loaded_model(X)
    predictions.append(torch.argmax(outputs).item())

6.生成结果

# 对测试集的2000图片预测
data = {'id':list(range(0,2000)),'label':predictions}
df = pd.DataFrame(data)
df.to_csv('cats-dogs-submission.csv',index=False)

7.总结与结果分析

  • 仅仅训练最后一层时,得到的准确率为96.5
  • 多训练一层参数时(Layer1),得到的准确率为97.45
  • 加入数据扩增方法(对原图片进行水平竖直平移、翻转、缩放)、换用损失函数$NLLLoss$、增加迭代epoch为5后得到的准确率为98.45

我的猫狗大战分类_损失函数_02