Cats vs Dogs based on VGG16
1.准备数据集
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive
!unzip cat_dog.zip
2.数据预处理
#先为数据集划分合理的文件存储结构
import shutil
import os
os.mkdir('train_/cat/')
os.mkdir('train_/dog/')
for f in os.listdir('cat_dog/train'):
if f.split('_')[0] == 'cat':
shutil.move('cat_dog/train/'+f,'train_/cat/'+f)
else:
shutil.move('cat_dog/train/'+f,'train_/dog/'+f)
train_dir = 'train_'
# 对训练集做数据扩充
train_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
train_dataset = datasets.ImageFolder(train_dir,transform=train_transform)
train_dataloader = torch.utils.data.DataLoader(
train_dataset,
batch_size = 128,
shuffle = True
)
查看图片
import matplotlib.pyplot as plt
def imshow(image, ax=None, title=None, normalize=True):
"""Imshow for Tensor."""
if ax is None:
fig, ax = plt.subplots()
image = image.numpy().transpose((1, 2, 0))
if normalize:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
image = std * image + mean
image = np.clip(image, 0, 1)
ax.imshow(image)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(axis='both', length=0)
ax.set_xticklabels('')
ax.set_yticklabels('')
return ax
images, labels = next(iter(train_dataloader))
title = 'Dog' if labels[0].item() == 1 else 'Cat'
imshow(images[0])
3.训练模型
#先获取预训练好的VGG16
model = models.vgg16(pretrained=True)
model
冻结特征提取部分的参数
for param in model.parameters():
param.requires_grad = False
修改自己的分类器
from collections import OrderedDict
#修改2个层
classifier = nn.Sequential(OrderedDict([
# Layer 1
('dropout1',nn.Dropout(0.3)),
('fc1', nn.Linear(25088,500)),
('relu', nn.ReLU()),
# output layer
('fc2', nn.Linear(500,2)),
('output', nn.LogSoftmax(dim=1))
]))
model.classifier = classifier
# 损失函数选取NLLLoss()
criterion = nn.NLLLoss()
# 优化器采用Adam
optimizer = optim.Adam(model.classifier.parameters(),lr =0.001)
model = model.to(device)
迭代训练
from tqdm import tqdm
epochs = 5
for e in range(epochs):
running_loss, total, correct = 0, 0 , 0
model.train()
for images,labels in tqdm(train_dataloader):
# Moving input to GPU
images, labels = images.to(device), labels.to(device)
# Forward prop
outputs = model(images)
loss = criterion(outputs,labels)
# Backward prop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Metrics
running_loss += loss.item()
total += labels.size(0)
_, predicted = torch.max(torch.exp(outputs).data,1)
correct += (predicted == labels).sum().item()
else:
# Logs
print(f'Epoch {e} Training: Loss={running_loss:.5f} Acc={correct/total * 100:.2f}')
4.保存模型
checkpoints = {
'pre-trained':'vgg16',
'classifier':nn.Sequential(OrderedDict([
# 修改的Layer 1
('dropout1',nn.Dropout(0.3)),
('fc1', nn.Linear(25088,500)),
('relu', nn.ReLU()),
# 修改的output layer
('fc2', nn.Linear(500,2)),
('output', nn.LogSoftmax(dim=1))
])),
'state_dict':model.state_dict()
}
torch.save(checkpoints,'vgg16_catsVdogs.pth')
def load_saved_model(path):
checkpoint = torch.load(path)
model = models.vgg16(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.classifier = checkpoint['classifier']
model.load_state_dict(checkpoints['state_dict'])
model.eval()
return model
loaded_model = load_saved_model('vgg16_catsVdogs.pth')
loaded_model.to(device)
5.测试模型
test_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
predictions = []
for i in tqdm(range(0,2000)):
path = 'catdog/test/'+str(i)+'.jpg'
X = Image.open(path).convert('RGB')
X = test_transform(X)[:3,:,:]
X = X.unsqueeze(0)
X = X.to(device)
outputs = loaded_model(X)
predictions.append(torch.argmax(outputs).item())
6.生成结果
# 对测试集的2000图片预测
data = {'id':list(range(0,2000)),'label':predictions}
df = pd.DataFrame(data)
df.to_csv('cats-dogs-submission.csv',index=False)
7.总结与结果分析
- 仅仅训练最后一层时,得到的准确率为96.5
- 多训练一层参数时(Layer1),得到的准确率为97.45
- 加入数据扩增方法(对原图片进行水平竖直平移、翻转、缩放)、换用损失函数$NLLLoss$、增加迭代epoch为5后得到的准确率为98.45