Kaggle竞赛之dog vs cat识别(alexne)

  • 数据集介绍
  • 数据处理
  • 网络
  • 训练和结果
  • 总结


数据集介绍

这个数据集是给出图片识别出该图是猫或者狗,训练集和测试集只有这2类别的图像 数据处理

  1. 获得数据及标注的文件(kaggle数据加载比较弯弯绕绕,我这都是偷懒直接用别人跑通的代码,这里就不贴了);
  2. 对数据进行分析 并作一定可视化
  3. 划分训练集验证集
  4. 包裹数据 即批处理

看一下数据集里的数据构成

ist_of_fnames = os.listdir(os.path.join(tmp_dir,'train'))
print('Total number of of images in tmp/train is {0}'.format(len(list_of_fnames)))
list_of_cats_fnames = [i for i in list_of_fnames if 'CAT' in i.upper()]
list_of_dogs_fnames = [i for i in list_of_fnames if 'DOG' in i.upper()]
TOTAL_CATS = len(list_of_cats_fnames)
TOTAL_DOGS = len(list_of_dogs_fnames)
print('{0} CATS images'.format(TOTAL_CATS))
print('{0} DOGS images'.format(TOTAL_DOGS))``

结果

Total number of of images in tmp/train is 25000
12500 CATS images
12500 DOGS images

划分训练集和验证集

TRAIN_TEST_SPLIT_AT = 0.9
BATCH_SIZE = 100
TARGET_SIZE = (128, 128)
NO_OF_EPOCHS = 1
EXPERIMENT_SIZE = 10000
NO_OF_FOLDS = 5

from shutil import copyfile

np.random.shuffle(list_of_cats_fnames)
np.random.shuffle(list_of_dogs_fnames)

tmp_train_dir = os.path.join(tmp_dir, 'train')
c = 0
for i in list_of_cats_fnames:
    if c < (round(TRAIN_TEST_SPLIT_AT * EXPERIMENT_SIZE)):
        copyfile(os.path.join(tmp_train_dir, i), os.path.join(train_dir, i))
    else:
        copyfile(os.path.join(tmp_train_dir, i), os.path.join(test_dir, i))
    c += 1
    if c >= EXPERIMENT_SIZE:
        break

c = 0
for i in list_of_dogs_fnames:
    if c < (round(TRAIN_TEST_SPLIT_AT * EXPERIMENT_SIZE)):
        copyfile(os.path.join(tmp_train_dir, i), os.path.join(train_dir, i))
    else:
        copyfile(os.path.join(tmp_train_dir, i), os.path.join(test_dir, i))
    c += 1
    if c >= EXPERIMENT_SIZE:
        break

print('Total training cat images :', len(os.listdir(train_dir)))
print('Total test dog images :', len(os.listdir(test_dir)))

train_X = [img_fname for img_fname in os.listdir(train_dir)]
train_X = np.array(train_X)
# 
train_labels = [l.split('/')[-1].split('.')[0].strip('0123456789') for l in train_X]
train_labels = np.array(train_labels)
# 
print ('Training shape:', train_X.shape, train_labels.shape) 
# 
print(train_X[:5], train_labels[:5])

test_X = [img_fname for img_fname in os.listdir(train_dir)]
test_X = np.array(train_X)
 
print ('testing shape:', test_X.shape) 
# 
print(test_X[:5])

结果

Total training cat images : 18000
Total test dog images : 2000
Training shape: (18000,) (18000,)
[‘dog.890.jpg’ ‘dog.7845.jpg’ ‘cat.3660.jpg’ ‘dog.814.jpg’ ‘dog.2066.jpg’] [‘dog’ ‘dog’ ‘cat’ ‘dog’ ‘dog’]
testing shape: (18000,)
[‘dog.890.jpg’ ‘dog.7845.jpg’ ‘cat.3660.jpg’ ‘dog.814.jpg’ ‘dog.2066.jpg’]

接下来就是做批处理

NAMES = ['cat', 'dog']

# 返回一个字典
# {'cat': 0, 'dog': 1}
def get_names():
    category2id = {}
    for i, item in enumerate(NAMES):
        category2id[item] = i

    return category2id
 
 # 获得数据集列表
 def get_annotations(cname2cid, datadir):
    train_records = []
    for tmp in os.listdir(datadir):
        train_records.append(datadir+'/'+tmp)
    return train_records
 
 # 从文件中加载图片
 def get_img_data_from_file(record,size=227):
 	# 图片数组
    img = cv2.imread(record)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img  = cv2.resize(img,(size,size),interpolation=cv2.INTER_AREA)
    # 图片大小
    w = img.shape[0]
    h = img.shape[1]
    # 标签
    label = cname2cid[record.split('/')[-1].split('.')[0]]
    return img, label, (h, w)

# 读取图片并做归一化
# 将图片从(227,227,3)转为(batch,3,227,227)形式
def get_img_data(record, size=227):
    img, labels, scales = get_img_data_from_file(record)
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    mean = np.array(mean).reshape((1, 1, -1))
    std = np.array(std).reshape((1, 1, -1))
    img = (img / 255.0 - mean) / std
    img = img.astype('float32').transpose((2, 0, 1))
    return img, labels, scales

# 将 list形式的batch数据 转化成多个array构成的tuple
def make_array(batch_data):
    img_array = np.array([item[0] for item in batch_data], dtype = 'float32')
    img_array = torch.from_numpy(img_array)
    img_array = Variable(img_array)
    labels_array = np.array([item[1] for item in batch_data], dtype = 'long')
    labels_array = torch.from_numpy(labels_array)
    labels_array = Variable(labels_array)
    return img_array, labels_array

# 批量读取数据,同一批次内图像的尺寸大小必须是一样的,
# 不同批次之间的大小是随机的,
# 由上面定义的get_img_size函数产生
def data_loader(datadir, batch_size= 4, mode='train'):
    cname2cid = get_names()
    records = get_annotations(cname2cid, datadir)
    #数据太大了,我为了节约训练时间,只训练部分数据,主要想跑通流程
    # records = records[0:2000]  

    def reader():
        if mode == 'train':
            np.random.shuffle(records)
        batch_data = []
        for record in records:
            #print(record)
            img, labels, im_shape = get_img_data(record)
            batch_data.append((img, labels))
            if len(batch_data) == batch_size:
                yield make_array(batch_data)
                batch_data = []
        if len(batch_data) > 0:
            yield make_array(batch_data)

    return reader

网络

使用alexnet网络,这个网络很多教程,我直接贴代码,需要的话可以自己搜索原论文。我这使用微调方式做训练,加载已经训练好的alexnet,最后一层fc2修改为这个案例中的输出类别2。

import torch.nn as nn
from torchvision import models

class BuildAlexNet(nn.Module):
    def __init__(self, model_type, n_output):
        super(BuildAlexNet, self).__init__()
        self.model_type = model_type
        #微调结构
        if model_type == 'pre':
            model = models.alexnet(pretrained=True)
            self.features = model.features
            fc1 = nn.Linear(9216, 4096)
            fc1.bias = model.classifier[1].bias
            fc1.weight = model.classifier[1].weight
            
            fc2 = nn.Linear(4096, 4096)
            fc2.bias = model.classifier[4].bias
            fc2.weight = model.classifier[4].weight
            
            self.classifier = nn.Sequential(
                    nn.Dropout(),
                    fc1,
                    nn.ReLU(inplace=True),
                    nn.Dropout(),
                    fc2,
                    nn.ReLU(inplace=True),
                    nn.Linear(4096, n_output))  
        #重新自己训练结构
        if model_type == 'new':
            self.features = nn.Sequential(
                    nn.Conv2d(3, 64, 11, 4, 2),
                    nn.ReLU(inplace = True),
                    nn.MaxPool2d(3, 2, 0),
                    nn.Conv2d(64, 192, 5, 1, 2),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(3, 2, 0),
                    nn.Conv2d(192, 384, 3, 1, 1),
                    nn.ReLU(inplace = True),
                    nn.Conv2d(384, 256, 3, 1, 1),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(3, 2, 0))
            self.classifier = nn.Sequential(
                    nn.Dropout(),
                    nn.Linear(9216, 4096),
                    nn.ReLU(inplace=True),
                    nn.Dropout(),
                    nn.Linear(4096, 4096),
                    nn.ReLU(inplace=True),
                    nn.Linear(4096, n_output))
            
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        out  = self.classifier(x)
        return out

训练和结果

#设置模型
model_type = 'pre'
n_output = 2
alexnet = BuildAlexNet(model_type, n_output)

# 定义损失函数,优化方法
# 采用Cross-Entropy loss,  SGD with moment
criterion = nn.CrossEntropyLoss()
if(use_gpu):
    criterion = ("cuda")
optimizer = optim.SGD(alexnet.parameters(), lr=0.001, momentum=0.9)

d = data_loader(train_dir, batch_size=3, mode='train')

# 训练网络
# 迭代epoch
for epoch in range(20):
    running_loss = 0.0
    for i, data in tqdm(enumerate(d(), 0)):
        # 获取批数据
        inputs, labels = data
        if(use_gpu):
            labels = ("cuda")
        # 梯度清零
        optimizer.zero_grad()  

        # forward + backward + optimize
        if(use_gpu):
            outputs = alexnet(inputs).cuda()
        else:
            outputs = alexnet(inputs)
        loss = criterion(outputs, labels)  # 计算loss
        loss.backward()     # loss 求导
        optimizer.step()    # 更新参数

        # print statistics
        running_loss += loss.item()
        
        if i % 250 == 249:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 249))  # 每2000次迭代,输出loss的平均值
            running_loss = 0.0
    
    #保存模型状态(方便恢复训练)    
    state = {'net':alexnet.state_dict(),'optimizer':optimizer.state_dict(), 'epoch':epoch}
    filepath = os.path.join('/kaggle/output/dogs-vs-cats/', 'checkpoint_model_epoch_{}.pth'.format(epoch)) 
    torch.save(state, '\parameter.pkl')
        
print('Finished Training')

最后就是测试网络训练结果啦

#模型测试 这里代码写的有点问题(建议用后面的代码)!!!!
batch_data = []
i = 0
for record in tqdm(records):
    img, labels, im_shape = get_img_data(record)
    img_array = np.array([img], dtype = 'float32')
    img_array = torch.from_numpy(img_array)
    img_array = Variable(img_array)
    y = alexnet(img_array)
    _, pre = torch.max(y.data, 1)
    if pre.item() == labels:
        i+=1
print(“acc:{}”.format(i/2000.0))

# 建议用下面的代码,但是我没跑,以后有空补
# v = data_loader(test_dir, batch_size=100)
# imgs_v, labels_v = next(v())
# labels_v = labels_v.detach().numpy()
# outputs_v = sppnet(imgs_v)
# _, pre = torch.max(outputs_v.data, 1)
# score = accuracy_score(pre, labels_v)
# print("score:{}".format(score))

结果

100%|██████████| 2000/2000 [01:19<00:00, 25.23it/s]
acc:0.776

总结

这是一个识别的数据集,相对而言比较简单,我做这个主要是为了跑通整个训练的框架,然后做个小记录。这个项目做的很早,代码写的很青涩,也没考虑太多可能会出错的判断,比如读图片什么的可以加点assert。训练过程为了节省时间用的也是部分训练集,很多参数都没调。以后有时间在改进。