PyTorch实现自编码器

原创

guog算法笔记 2024-09-09 16:13:14 博主文章分类：机器学习and深度学习 ©著作权

文章标签 pytorch 深度学习 python 编码器损失函数 文章分类 Java 后端开发

©著作权归作者所有：来自51CTO博客作者guog算法笔记的原创作品，请联系作者获取转载授权，否则将追究法律责任

以下是一个使用PyTorch实现自编码器的示例代码，该代码包括三个自编码器和一些辅助函数，用于训练和测试自编码器。

案例1

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
import matplotlib.pyplot as plt

# Define the Stacked Autoencoder class
class StackedAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dims):
        super(StackedAutoencoder, self).__init__()
        self.input_dim = input_dim
        self.hidden_dims = hidden_dims
        
        # Define the encoder layers
        self.encoder1 = nn.Linear(input_dim, hidden_dims[0])
        self.encoder2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.encoder3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        
        # Define the decoder layers
        self.decoder3 = nn.Linear(hidden_dims[2], hidden_dims[1])
        self.decoder2 = nn.Linear(hidden_dims[1], hidden_dims[0])
        self.decoder1 = nn.Linear(hidden_dims[0], input_dim)
        
        # Define the activation function
        self.activation = nn.ReLU()
        
    def encoder(self, x):
        z1 = self.activation(self.encoder1(x))
        z2 = self.activation(self.encoder2(z1))
        z3 = self.activation(self.encoder3(z2))
        return z3
    
    def decoder(self, z):
        xhat3 = self.activation(self.decoder3(z))
        xhat2 = self.activation(self.decoder2(xhat3))
        xhat1 = self.decoder1(xhat2)
        return xhat1
    
    def forward(self, x):
        z = self.encoder(x)
        xhat = self.decoder(z)
        return xhat
    
    def get_encoder_output(self, x):
        return self.encoder(x)

# Define the training function
def train(model, train_loader, num_epochs, learning_rate):
    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Train the model
    for epoch in range(num_epochs):
        for data in train_loader:
            # Get the input data and target data
            inputs, targets = data
            inputs, targets = inputs.view(-1, 28*28), targets.view(-1, 28*28)
            
            # Zero the gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
        
        # Print the loss after each epoch
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

# Define the test function
def test(model, test_loader):
    # Define the loss function
    criterion = nn.MSELoss()
    
    # Evaluate the model
    test_loss = 0
    with torch.no_grad():
        for data in test_loader:
            # Get the input data and target data
            inputs, targets = data
            inputs, targets = inputs.view(-1, 28*28), targets.view(-1, 28*28)
            
            # Forward pass
            outputs = model(inputs)
            test_loss += criterion(outputs, targets).item()
    
    # Print the average test loss
    test_loss /= len(test_loader.dataset)
    print('Average Test Loss: {:.4f}'.format(test_loss))

主程序

# Define the main function
def main():
    # Set the device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Set the hyperparameters
    input_dim = 28*28
    hidden_dims = [256, 128, 64]
    num_epochs = 10
    batch_size = 128
    learning_rate = 0.001
    
    # Download the MNIST dataset and create data loaders
    train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
    test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Create the Stacked Autoencoder model and move it to the device
    model = StackedAutoencoder(input_dim, hidden_dims).to(device)
    
    # Train the model
    train(model, train_loader, num_epochs, learning_rate)
    
    # Test the model
    test(model, test_loader)
    
    # Generate a random image and its reconstruction
    with torch.no_grad():
        z = torch.randn(1, hidden_dims[-1]).to(device)
        xhat = model.decoder(z)
        xhat = xhat.view(28, 28).cpu().numpy()
        plt.imshow(xhat, cmap='gray')
        plt.show()

if __name__ == '__main__':
    main()

在 main() 函数中，首先设置了设备，然后定义了超参数，接着下载 MNIST 数据集并创建数据加载器。然后创建了自编码器模型，并将其移动到设备上。接下来调用 train() 函数进行训练，然后调用 test() 函数进行测试。最后生成一个随机图像并进行重构，然后显示出来。
在 train() 函数中，定义了损失函数和优化器，然后对模型进行了训练。在 test() 函数中，定义了损失函数，并对模型进行了测试。
在 test() 函数中，定义了损失函数，并对模型进行了测试。测试过程与训练过程类似，但是不需要进行梯度更新。最后返回测试损失的平均值。

案例2

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# 加载MNIST数据集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# 定义自编码器模型
class StackedAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dims):
        super(StackedAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dims[0]),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dims[0], input_dim),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# 训练自编码器
input_dim = 28 * 28  # MNIST图像尺寸为28x28
hidden_dims = [256, 128]  # 隐藏层维度

model = StackedAutoencoder(input_dim, hidden_dims)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0
    for images, _ in trainloader:
        images = images.view(images.size(0), -1)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, images)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(trainloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

print("Training finished!")

# 使用自编码器进行图像重建
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=False)

dataiter = iter(testloader)
images, labels = dataiter.next()
images = images.view(images.size(0), -1)
outputs = model(images)

# 可视化原始图像和重建图像
import matplotlib.pyplot as plt

def imshow(img):
    img = img / 2 + 0.5  # 反归一化
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis('off')
    plt.show()

# 显示原始图像
imshow(torchvision.utils.make_grid(images.view(-1, 1, 28, 28)))
# 显示重建图像
imshow(torchvision.utils.make_grid(outputs.view(-1, 1, 28, 28)))

这个案例实现了一个简单的两层自编码器，用于对MNIST数据集中的手写数字图像进行重建。首先定义了一个StackedAutoencoder类，它包含一个编码器和一个解码器，其中编码器是一个包含ReLU激活函数的全连接层序列，解码器也是一个包含ReLU激活函数的全连接层序列。然后，通过使用MSE损失函数和Adam优化器对模型进行训练。在训练过程中，将图像展平为784维的向量，并将其输入到模型中，然后计算重建图像与原始图像之间的损失，并进行反向传播和参数更新。最后，使用训练好的模型对一批测试图像进行重建，并将原始图像和重建图像可视化。

自编码器可以包含更多的隐藏层，更复杂的结构和更多的训练步骤，以更好地学习数据的表示。此外，还可以尝试在编码器和解码器之间添加dropout层、使用不同的激活函数等来改进模型的性能。