目录
lastDay
1.图像分类
1.1案例一(Kaggle上的图像分类(CIFAR-10))
图像增强
导入数据集
定义模型
训练和测试
注意!!!
1.2案例二(Kaggle上的狗品种识别(ImageNet Dogs))
图像增强
读取数据
定义模型
定义训练函数
调参
lastDay
图像分类;GAN;DCGAN
1.图像分类
1.1案例一(Kaggle上的图像分类(CIFAR-10))
https://www.boyuai.com/elites/course/cZu18YmweLv10OeV/jupyter/SEnCZF82BrKQURdU9O3Sh
图像增强
导入数据集
定义模型
class ResidualBlock(nn.Module): # 我们定义网络时一般是继承的torch.nn.Module创建新的子类
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock, self).__init__()
#torch.nn.Sequential是一个Sequential容器,模块将按照构造函数中传递的顺序添加到模块中。
self.left = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
# 添加第一个卷积层,调用了nn里面的Conv2d()
nn.BatchNorm2d(outchannel), # 进行数据的归一化处理
nn.ReLU(inplace=True), # 修正线性单元,是一种人工神经网络中常用的激活函数
nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
self.shortcut = nn.Sequential()
if stride != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
# 便于之后的联合,要判断Y = self.left(X)的形状是否与X相同
def forward(self, x): # 将两个模块的特征进行结合,并使用ReLU激活函数得到最终的特征。
out = self.left(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, ResidualBlock, num_classes=10):
super(ResNet, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential( # 用3个3x3的卷积核代替7x7的卷积核,减少模型参数
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
self.fc = nn.Linear(512, num_classes)
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) #第一个ResidualBlock的步幅由make_layer的函数参数stride指定
# ,后续的num_blocks-1个ResidualBlock步幅是1
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
def ResNet18():
return ResNet(ResidualBlock)
训练和测试
# 定义是否使用GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 超参数设置
EPOCH = 20 #遍历数据集次数
pre_epoch = 0 # 定义已经遍历数据集的次数
NATCH_SIZE = 32
LR = 0.01 #学习率
# 模型定义-ResNet
net = ResNet18().to(device)
# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss() #损失函数为交叉熵,多用于多分类问题
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
#优化方式为mini-batch momentum-SGD,并采用L2正则化(权重衰减)
# 训练
if __name__ == "__main__":
best_acc = 0
print("Start Training, Resnet-18!")
num_iters = 0
for epoch in range(pre_epoch, EPOCH):
print('\nEpoch: %d' % (epoch + 1))
net.train()
sum_loss = 0.0
correct = 0.0
total = 0
for i, data in enumerate(trainloader, 0):
#用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标,
#下标起始位置为0,返回 enumerate(枚举) 对象。
#准备数据
length = len(trainloader)
num_iters += 1
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad() # 清空梯度
# forward + backward
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
sum_loss += loss.item() * labels.size(0)
_, predicted = torch.max(outputs, 1) #选出每一列中最大的值作为预测结果
total += labels.size(0)
correct += (predicted == labels).sum().item()
# 每20个batch打印一次loss和准确率
if (i + 1) % 20 == 0:
print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, num_iters, sum_loss / (i + 1), 100. * correct / total))
#每训练完一个epoch测试一下准确率
print("Waiting Test!")
with torch.no_grad():
correct = 0
total = 0
for data in testloader:
net.eval()
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
#取得分数高的那个类(outputs.data的索引号)
_,predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
#print('predicted,labels',predicted.numpy(),labels.numpy())
acc = 100.*correct/total
print('EPOCH ACC = %.3%f%%', acc)
#记录最佳测试分类准确率
if acc>best_acc:
best_acc = acc
print('best_acc = %.3f%%', best_acc)
print("Training Finished, TotalEPOCH=%d" % EPOCH)
注意!!!
由于用到BN,训练时与测试时略有不同,
net.train()
net.eval()
1.2案例二(Kaggle上的狗品种识别(ImageNet Dogs))
https://www.boyuai.com/elites/course/cZu18YmweLv10OeV/jupyter/6N7O3g4unlGHLY_kfyTa-
data_dir = '/home/kesci/input/Kaggle_Dog6357/dog-breed-identification' # 数据集目录
label_file, train_dir, test_dir = 'labels.csv', 'train', 'test' # data_dir中的文件夹、文件
new_data_dir = './train_valid_test' # 整理之后的数据存放的目录
valid_ratio = 0.1 # 验证集所占比例
def mkdir_if_not_exist(path):
# 若目录path不存在,则创建目录
if not os.path.exists(os.path.join(*path)):
os.makedirs(os.path.join(*path))
def reorg_dog_data(data_dir, label_file, train_dir, test_dir, new_data_dir, valid_ratio):
# 读取训练数据标签
labels = pd.read_csv(os.path.join(data_dir, label_file))
id2label = {Id: label for Id, label in labels.values} # (key: value): (id: label)
# 随机打乱训练数据
train_files = os.listdir(os.path.join(data_dir, train_dir))
random.shuffle(train_files)
# 原训练集
valid_ds_size = int(len(train_files) * valid_ratio) # 验证集大小
for i, file in enumerate(train_files):
img_id = file.split('.')[0] # file是形式为id.jpg的字符串
img_label = id2label[img_id]
if i < valid_ds_size:
mkdir_if_not_exist([new_data_dir, 'valid', img_label])
shutil.copy(os.path.join(data_dir, train_dir, file),
os.path.join(new_data_dir, 'valid', img_label))
else:
mkdir_if_not_exist([new_data_dir, 'train', img_label])
shutil.copy(os.path.join(data_dir, train_dir, file),
os.path.join(new_data_dir, 'train', img_label))
mkdir_if_not_exist([new_data_dir, 'train_valid', img_label])
shutil.copy(os.path.join(data_dir, train_dir, file),
os.path.join(new_data_dir, 'train_valid', img_label))
# 测试集
mkdir_if_not_exist([new_data_dir, 'test', 'unknown'])
for test_file in os.listdir(os.path.join(data_dir, test_dir)):
shutil.copy(os.path.join(data_dir, test_dir, test_file),
os.path.join(new_data_dir, 'test', 'unknown'))
reorg_dog_data(data_dir, label_file, train_dir, test_dir, new_data_dir, valid_ratio)
图像增强
transform_train = transforms.Compose([
# 随机对图像裁剪出面积为原图像面积0.08~1倍、且高和宽之比在3/4~4/3的图像,再放缩为高和宽均为224像素的新图像
transforms.RandomResizedCrop(224, scale=(0.08, 1.0),
ratio=(3.0/4.0, 4.0/3.0)),
# 以0.5的概率随机水平翻转
transforms.RandomHorizontalFlip(),
# 随机更改亮度、对比度和饱和度
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
transforms.ToTensor(),
# 对各个通道做标准化,(0.485, 0.456, 0.406)和(0.229, 0.224, 0.225)是在ImageNet上计算得的各通道均值与方差
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # ImageNet上的均值和方差
])
# 在测试集上的图像增强只做确定性的操作
transform_test = transforms.Compose([
transforms.Resize(256),
# 将图像中央的高和宽均为224的正方形区域裁剪出来
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
读取数据
# new_data_dir目录下有train, valid, train_valid, test四个目录
# 这四个目录中,每个子目录表示一种类别,目录中是属于该类别的所有图像
train_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'train'),
transform=transform_train)
valid_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'valid'),
transform=transform_test)
train_valid_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'train_valid'),
transform=transform_train)
test_ds = torchvision.datasets.ImageFolder(root=os.path.join(new_data_dir, 'test'),
transform=transform_test)
batch_size = 128
train_iter = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size=batch_size, shuffle=True)
train_valid_iter = torch.utils.data.DataLoader(train_valid_ds, batch_size=batch_size, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=False) # shuffle=False
定义模型
这个比赛的数据属于ImageNet数据集的子集,我们使用微调的方法,选用在ImageNet完整数据集上预训练的模型来抽取图像特征,以作为自定义小规模输出网络的输入。
此处我们使用与训练的ResNet-34模型,直接复用预训练模型在输出层的输入,即抽取的特征,然后我们重新定义输出层,本次我们仅对重定义的输出层的参数进行训练,而对于用于抽取特征的部分,我们保留预训练模型的参数。
def get_net(device):
finetune_net = models.resnet34(pretrained=False) # 预训练的resnet34网络
finetune_net.load_state_dict(torch.load('/home/kesci/input/resnet347742/resnet34-333f7ec4.pth'))
for param in finetune_net.parameters(): # 冻结参数
param.requires_grad = False
# 原finetune_net.fc是一个输入单元数为512,输出单元数为1000的全连接层
# 替换掉原finetune_net.fc,新finetuen_net.fc中的模型参数会记录梯度
finetune_net.fc = nn.Sequential(
nn.Linear(in_features=512, out_features=256),
nn.ReLU(),
nn.Linear(in_features=256, out_features=120) # 120是输出类别数
)
return finetune_net
定义训练函数
def evaluate_loss_acc(data_iter, net, device):
# 计算data_iter上的平均损失与准确率
loss = nn.CrossEntropyLoss()
is_training = net.training # Bool net是否处于train模式
net.eval()
l_sum, acc_sum, n = 0, 0, 0
with torch.no_grad():
for X, y in data_iter:
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l_sum += l.item() * y.shape[0]
acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
net.train(is_training) # 恢复net的train/eval状态
return l_sum / n, acc_sum / n
def train(net, train_iter, valid_iter, num_epochs, lr, wd, device, lr_period,
lr_decay):
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.fc.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
net = net.to(device)
for epoch in range(num_epochs):
train_l_sum, n, start = 0.0, 0, time.time()
if epoch > 0 and epoch % lr_period == 0: # 每lr_period个epoch,学习率衰减一次
lr = lr * lr_decay
for param_group in optimizer.param_groups:
param_group['lr'] = lr
for X, y in train_iter:
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
train_l_sum += l.item() * y.shape[0]
n += y.shape[0]
time_s = "time %.2f sec" % (time.time() - start)
if valid_iter is not None:
valid_loss, valid_acc = evaluate_loss_acc(valid_iter, net, device)
epoch_s = ("epoch %d, train loss %f, valid loss %f, valid acc %f, "
% (epoch + 1, train_l_sum / n, valid_loss, valid_acc))
else:
epoch_s = ("epoch %d, train loss %f, "
% (epoch + 1, train_l_sum / n))
print(epoch_s + time_s + ', lr ' + str(lr))
调参
num_epochs, lr_period, lr_decay = 20, 10, 0.1
lr, wd = 0.03, 1e-4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = get_net(device)
train(net, train_iter, valid_iter, num_epochs, lr, wd, device, lr_period, lr_decay)