Kaggle竞赛之dog vs cat识别(alexne)
- 数据集介绍
- 数据处理
- 网络
- 训练和结果
- 总结
数据集介绍
这个数据集是给出图片识别出该图是猫或者狗,训练集和测试集只有这2类别的图像 数据处理
- 获得数据及标注的文件(kaggle数据加载比较弯弯绕绕,我这都是偷懒直接用别人跑通的代码,这里就不贴了);
- 对数据进行分析 并作一定可视化 ;
- 划分训练集 和验证集 ;
- 包裹数据 即批处理 。
看一下数据集里的数据构成
ist_of_fnames = os.listdir(os.path.join(tmp_dir,'train'))
print('Total number of of images in tmp/train is {0}'.format(len(list_of_fnames)))
list_of_cats_fnames = [i for i in list_of_fnames if 'CAT' in i.upper()]
list_of_dogs_fnames = [i for i in list_of_fnames if 'DOG' in i.upper()]
TOTAL_CATS = len(list_of_cats_fnames)
TOTAL_DOGS = len(list_of_dogs_fnames)
print('{0} CATS images'.format(TOTAL_CATS))
print('{0} DOGS images'.format(TOTAL_DOGS))``结果
Total number of of images in tmp/train is 25000
12500 CATS images
12500 DOGS images
划分训练集和验证集
TRAIN_TEST_SPLIT_AT = 0.9
BATCH_SIZE = 100
TARGET_SIZE = (128, 128)
NO_OF_EPOCHS = 1
EXPERIMENT_SIZE = 10000
NO_OF_FOLDS = 5
from shutil import copyfile
np.random.shuffle(list_of_cats_fnames)
np.random.shuffle(list_of_dogs_fnames)
tmp_train_dir = os.path.join(tmp_dir, 'train')
c = 0
for i in list_of_cats_fnames:
if c < (round(TRAIN_TEST_SPLIT_AT * EXPERIMENT_SIZE)):
copyfile(os.path.join(tmp_train_dir, i), os.path.join(train_dir, i))
else:
copyfile(os.path.join(tmp_train_dir, i), os.path.join(test_dir, i))
c += 1
if c >= EXPERIMENT_SIZE:
break
c = 0
for i in list_of_dogs_fnames:
if c < (round(TRAIN_TEST_SPLIT_AT * EXPERIMENT_SIZE)):
copyfile(os.path.join(tmp_train_dir, i), os.path.join(train_dir, i))
else:
copyfile(os.path.join(tmp_train_dir, i), os.path.join(test_dir, i))
c += 1
if c >= EXPERIMENT_SIZE:
break
print('Total training cat images :', len(os.listdir(train_dir)))
print('Total test dog images :', len(os.listdir(test_dir)))
train_X = [img_fname for img_fname in os.listdir(train_dir)]
train_X = np.array(train_X)
#
train_labels = [l.split('/')[-1].split('.')[0].strip('0123456789') for l in train_X]
train_labels = np.array(train_labels)
#
print ('Training shape:', train_X.shape, train_labels.shape)
#
print(train_X[:5], train_labels[:5])
test_X = [img_fname for img_fname in os.listdir(train_dir)]
test_X = np.array(train_X)
print ('testing shape:', test_X.shape)
#
print(test_X[:5])结果
Total training cat images : 18000
Total test dog images : 2000
Training shape: (18000,) (18000,)
[‘dog.890.jpg’ ‘dog.7845.jpg’ ‘cat.3660.jpg’ ‘dog.814.jpg’ ‘dog.2066.jpg’] [‘dog’ ‘dog’ ‘cat’ ‘dog’ ‘dog’]
testing shape: (18000,)
[‘dog.890.jpg’ ‘dog.7845.jpg’ ‘cat.3660.jpg’ ‘dog.814.jpg’ ‘dog.2066.jpg’]
接下来就是做批处理
NAMES = ['cat', 'dog']
# 返回一个字典
# {'cat': 0, 'dog': 1}
def get_names():
category2id = {}
for i, item in enumerate(NAMES):
category2id[item] = i
return category2id
# 获得数据集列表
def get_annotations(cname2cid, datadir):
train_records = []
for tmp in os.listdir(datadir):
train_records.append(datadir+'/'+tmp)
return train_records
# 从文件中加载图片
def get_img_data_from_file(record,size=227):
# 图片数组
img = cv2.imread(record)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(size,size),interpolation=cv2.INTER_AREA)
# 图片大小
w = img.shape[0]
h = img.shape[1]
# 标签
label = cname2cid[record.split('/')[-1].split('.')[0]]
return img, label, (h, w)
# 读取图片并做归一化
# 将图片从(227,227,3)转为(batch,3,227,227)形式
def get_img_data(record, size=227):
img, labels, scales = get_img_data_from_file(record)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean).reshape((1, 1, -1))
std = np.array(std).reshape((1, 1, -1))
img = (img / 255.0 - mean) / std
img = img.astype('float32').transpose((2, 0, 1))
return img, labels, scales
# 将 list形式的batch数据 转化成多个array构成的tuple
def make_array(batch_data):
img_array = np.array([item[0] for item in batch_data], dtype = 'float32')
img_array = torch.from_numpy(img_array)
img_array = Variable(img_array)
labels_array = np.array([item[1] for item in batch_data], dtype = 'long')
labels_array = torch.from_numpy(labels_array)
labels_array = Variable(labels_array)
return img_array, labels_array
# 批量读取数据,同一批次内图像的尺寸大小必须是一样的,
# 不同批次之间的大小是随机的,
# 由上面定义的get_img_size函数产生
def data_loader(datadir, batch_size= 4, mode='train'):
cname2cid = get_names()
records = get_annotations(cname2cid, datadir)
#数据太大了,我为了节约训练时间,只训练部分数据,主要想跑通流程
# records = records[0:2000]
def reader():
if mode == 'train':
np.random.shuffle(records)
batch_data = []
for record in records:
#print(record)
img, labels, im_shape = get_img_data(record)
batch_data.append((img, labels))
if len(batch_data) == batch_size:
yield make_array(batch_data)
batch_data = []
if len(batch_data) > 0:
yield make_array(batch_data)
return reader网络
使用alexnet网络,这个网络很多教程,我直接贴代码,需要的话可以自己搜索原论文。我这使用微调方式做训练,加载已经训练好的alexnet,最后一层fc2修改为这个案例中的输出类别2。
import torch.nn as nn
from torchvision import models
class BuildAlexNet(nn.Module):
def __init__(self, model_type, n_output):
super(BuildAlexNet, self).__init__()
self.model_type = model_type
#微调结构
if model_type == 'pre':
model = models.alexnet(pretrained=True)
self.features = model.features
fc1 = nn.Linear(9216, 4096)
fc1.bias = model.classifier[1].bias
fc1.weight = model.classifier[1].weight
fc2 = nn.Linear(4096, 4096)
fc2.bias = model.classifier[4].bias
fc2.weight = model.classifier[4].weight
self.classifier = nn.Sequential(
nn.Dropout(),
fc1,
nn.ReLU(inplace=True),
nn.Dropout(),
fc2,
nn.ReLU(inplace=True),
nn.Linear(4096, n_output))
#重新自己训练结构
if model_type == 'new':
self.features = nn.Sequential(
nn.Conv2d(3, 64, 11, 4, 2),
nn.ReLU(inplace = True),
nn.MaxPool2d(3, 2, 0),
nn.Conv2d(64, 192, 5, 1, 2),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, 0),
nn.Conv2d(192, 384, 3, 1, 1),
nn.ReLU(inplace = True),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, 2, 0))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(9216, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, n_output))
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
out = self.classifier(x)
return out训练和结果
#设置模型
model_type = 'pre'
n_output = 2
alexnet = BuildAlexNet(model_type, n_output)
# 定义损失函数,优化方法
# 采用Cross-Entropy loss, SGD with moment
criterion = nn.CrossEntropyLoss()
if(use_gpu):
criterion = ("cuda")
optimizer = optim.SGD(alexnet.parameters(), lr=0.001, momentum=0.9)
d = data_loader(train_dir, batch_size=3, mode='train')
# 训练网络
# 迭代epoch
for epoch in range(20):
running_loss = 0.0
for i, data in tqdm(enumerate(d(), 0)):
# 获取批数据
inputs, labels = data
if(use_gpu):
labels = ("cuda")
# 梯度清零
optimizer.zero_grad()
# forward + backward + optimize
if(use_gpu):
outputs = alexnet(inputs).cuda()
else:
outputs = alexnet(inputs)
loss = criterion(outputs, labels) # 计算loss
loss.backward() # loss 求导
optimizer.step() # 更新参数
# print statistics
running_loss += loss.item()
if i % 250 == 249:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 249)) # 每2000次迭代,输出loss的平均值
running_loss = 0.0
#保存模型状态(方便恢复训练)
state = {'net':alexnet.state_dict(),'optimizer':optimizer.state_dict(), 'epoch':epoch}
filepath = os.path.join('/kaggle/output/dogs-vs-cats/', 'checkpoint_model_epoch_{}.pth'.format(epoch))
torch.save(state, '\parameter.pkl')
print('Finished Training')最后就是测试网络训练结果啦
#模型测试 这里代码写的有点问题(建议用后面的代码)!!!!
batch_data = []
i = 0
for record in tqdm(records):
img, labels, im_shape = get_img_data(record)
img_array = np.array([img], dtype = 'float32')
img_array = torch.from_numpy(img_array)
img_array = Variable(img_array)
y = alexnet(img_array)
_, pre = torch.max(y.data, 1)
if pre.item() == labels:
i+=1
print(“acc:{}”.format(i/2000.0))
# 建议用下面的代码,但是我没跑,以后有空补
# v = data_loader(test_dir, batch_size=100)
# imgs_v, labels_v = next(v())
# labels_v = labels_v.detach().numpy()
# outputs_v = sppnet(imgs_v)
# _, pre = torch.max(outputs_v.data, 1)
# score = accuracy_score(pre, labels_v)
# print("score:{}".format(score))结果
100%|██████████| 2000/2000 [01:19<00:00, 25.23it/s]
acc:0.776
总结
这是一个识别的数据集,相对而言比较简单,我做这个主要是为了跑通整个训练的框架,然后做个小记录。这个项目做的很早,代码写的很青涩,也没考虑太多可能会出错的判断,比如读图片什么的可以加点assert。训练过程为了节省时间用的也是部分训练集,很多参数都没调。以后有时间在改进。
















