PyTorch实现自编码器
以下是一个使用
PyTorch
实现自编码器的示例代码,该代码包括三个自编码器和一些辅助函数,用于训练和测试自编码器。
案例1
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np
import matplotlib.pyplot as plt
# Define the Stacked Autoencoder class
class StackedAutoencoder(nn.Module):
def __init__(self, input_dim, hidden_dims):
super(StackedAutoencoder, self).__init__()
self.input_dim = input_dim
self.hidden_dims = hidden_dims
# Define the encoder layers
self.encoder1 = nn.Linear(input_dim, hidden_dims[0])
self.encoder2 = nn.Linear(hidden_dims[0], hidden_dims[1])
self.encoder3 = nn.Linear(hidden_dims[1], hidden_dims[2])
# Define the decoder layers
self.decoder3 = nn.Linear(hidden_dims[2], hidden_dims[1])
self.decoder2 = nn.Linear(hidden_dims[1], hidden_dims[0])
self.decoder1 = nn.Linear(hidden_dims[0], input_dim)
# Define the activation function
self.activation = nn.ReLU()
def encoder(self, x):
z1 = self.activation(self.encoder1(x))
z2 = self.activation(self.encoder2(z1))
z3 = self.activation(self.encoder3(z2))
return z3
def decoder(self, z):
xhat3 = self.activation(self.decoder3(z))
xhat2 = self.activation(self.decoder2(xhat3))
xhat1 = self.decoder1(xhat2)
return xhat1
def forward(self, x):
z = self.encoder(x)
xhat = self.decoder(z)
return xhat
def get_encoder_output(self, x):
return self.encoder(x)
# Define the training function
def train(model, train_loader, num_epochs, learning_rate):
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
for data in train_loader:
# Get the input data and target data
inputs, targets = data
inputs, targets = inputs.view(-1, 28*28), targets.view(-1, 28*28)
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(inputs)
loss = criterion(outputs, targets)
# Backward pass and optimization
loss.backward()
optimizer.step()
# Print the loss after each epoch
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Define the test function
def test(model, test_loader):
# Define the loss function
criterion = nn.MSELoss()
# Evaluate the model
test_loss = 0
with torch.no_grad():
for data in test_loader:
# Get the input data and target data
inputs, targets = data
inputs, targets = inputs.view(-1, 28*28), targets.view(-1, 28*28)
# Forward pass
outputs = model(inputs)
test_loss += criterion(outputs, targets).item()
# Print the average test loss
test_loss /= len(test_loader.dataset)
print('Average Test Loss: {:.4f}'.format(test_loss))
主程序
# Define the main function
def main():
# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Set the hyperparameters
input_dim = 28*28
hidden_dims = [256, 128, 64]
num_epochs = 10
batch_size = 128
learning_rate = 0.001
# Download the MNIST dataset and create data loaders
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# Create the Stacked Autoencoder model and move it to the device
model = StackedAutoencoder(input_dim, hidden_dims).to(device)
# Train the model
train(model, train_loader, num_epochs, learning_rate)
# Test the model
test(model, test_loader)
# Generate a random image and its reconstruction
with torch.no_grad():
z = torch.randn(1, hidden_dims[-1]).to(device)
xhat = model.decoder(z)
xhat = xhat.view(28, 28).cpu().numpy()
plt.imshow(xhat, cmap='gray')
plt.show()
if __name__ == '__main__':
main()
在
main()
函数中,首先设置了设备,然后定义了超参数,接着下载MNIST
数据集并创建数据加载器。然后创建了自编码器模型,并将其移动到设备上。接下来调用train()
函数进行训练,然后调用test()
函数进行测试。最后生成一个随机图像并进行重构,然后显示出来。在
train()
函数中,定义了损失函数和优化器,然后对模型进行了训练。在test()
函数中,定义了损失函数,并对模型进行了测试。在
test()
函数中,定义了损失函数,并对模型进行了测试。测试过程与训练过程类似,但是不需要进行梯度更新。最后返回测试损失的平均值。
案例2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# 加载MNIST数据集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
# 定义自编码器模型
class StackedAutoencoder(nn.Module):
def __init__(self, input_dim, hidden_dims):
super(StackedAutoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dims[0]),
nn.ReLU()
)
self.decoder = nn.Sequential(
nn.Linear(hidden_dims[0], input_dim),
nn.ReLU()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
# 训练自编码器
input_dim = 28 * 28 # MNIST图像尺寸为28x28
hidden_dims = [256, 128] # 隐藏层维度
model = StackedAutoencoder(input_dim, hidden_dims)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
for images, _ in trainloader:
images = images.view(images.size(0), -1)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, images)
loss.backward()
optimizer.step()
running_loss += loss.item()
epoch_loss = running_loss / len(trainloader)
print(f"Epoch {
epoch+1}/{
num_epochs}, Loss: {
epoch_loss:.4f}")
print("Training finished!")
# 使用自编码器进行图像重建
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=False)
dataiter = iter(testloader)
images, labels = dataiter.next()
images = images.view(images.size(0), -1)
outputs = model(images)
# 可视化原始图像和重建图像
import matplotlib.pyplot as plt
def imshow(img):
img = img / 2 + 0.5 # 反归一化
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.axis('off')
plt.show()
# 显示原始图像
imshow(torchvision.utils.make_grid(images.view(-1, 1, 28, 28)))
# 显示重建图像
imshow(torchvision.utils.make_grid(outputs.view(-1, 1, 28, 28)))
这个案例实现了一个简单的两层自编码器,用于对MNIST数据集中的手写数字图像进行重建。首先定义了一个StackedAutoencoder类,它包含一个编码器和一个解码器,其中编码器是一个包含ReLU激活函数的全连接层序列,解码器也是一个包含ReLU激活函数的全连接层序列。然后,通过使用MSE损失函数和Adam优化器对模型进行训练。在训练过程中,将图像展平为784维的向量,并将其输入到模型中,然后计算重建图像与原始图像之间的损失,并进行反向传播和参数更新。最后,使用训练好的模型对一批测试图像进行重建,并将原始图像和重建图像可视化。
自编码器可以包含更多的隐藏层,更复杂的结构和更多的训练步骤,以更好地学习数据的表示。此外,还可以尝试在编码器和解码器之间添加dropout层、使用不同的激活函数等来改进模型的性能。
还没有评论,来说两句吧...