实践
MLP
import time
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
# 保证可复现性
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
# GPU是否可用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 超参数
random_seed = 1
learning_rate = 0.1
num_epochs = 25
batch_size = 64
# 网络结构参数
num_features = 784
num_hidden_1 = 128
num_hidden_2 = 256
num_classes = 10
# 加载数据
# transforms.ToTensor()可以将图片的像素值归一化到 0-1
train_data = datasets.MNIST(root="datasets",
train=True,
transform=transforms.ToTensor(),
download=True)
test_data = datasets.MNIST(root="datasets",
train=False,
transform=transforms.ToTensor(),
download=True)
# 数据迭代器
train_data_loader = DataLoader(dataset=train_data,
batch_size=batch_size,
shuffle=True)
test_data_loader = DataLoader(dataset=test_data,
batch_size=batch_size,
shuffle=False)
# 检查数据集
# for images, labels in train_data_loader:
# print("image batch dimensions: ", images.shape) # image batch dimensions: torch.Size([64, 1, 28, 28])
# print("Label tach dimensions: ", labels.shape) # label tach dimensions: torch.Size([64])
# print(labels.size(0)) # 64
# break
# 定义模型结构
class MultilayerPerceptron(nn.Module):
def __init__(self, num_features, num_hidden_1, num_hidden_2, num_classes):
super(MultilayerPerceptron, self).__init__()
self.linear_1 = nn.Linear(num_features, num_hidden_1)
# The following two lines are not necessary,
# but used here to demonstrate how to access the weights
# and use a different weight initialization.
# By default, PyTorch uses Xavier/Glorot initialization, which
# should usually be preferred.
# self.linear_1.weight.detach().normal_(0.0, 0.1)
# self.linear_1.bias.detach().zero_()
self.linear_2 = nn.Linear(num_hidden_1, num_hidden_2)
# self.linear_2.weight.detach().normal_(0.0, 0.1)
# self.linear_2.bias.detach().zero_()
self.linear_out = nn.Linear(num_hidden_2, num_classes)
# self.linear_out.weight.detach().normal_(0.0, 0.1)
# self.linear_out.bias.detach().zero_()
# self.model = nn.Sequential(nn.Linear(num_features, num_hidden_1),
# nn.ReLU(),
# nn.Linear(num_hidden_1, num_hidden_2),
# nn.ReLU(),
# nn.Linear(num_hidden_2, num_classes))
def forward(self, X):
out = self.linear_1(X)
out = F.relu(out)
out = self.linear_2(out)
out = F.relu(out)
out = self.linear_out(out)
out_prob = F.softmax(out, dim=1)
# out = self.model(X)
# out_prob = F.softmax(out, dim=1)
return out, out_prob
np.random.seed(random_seed) # 固定随机数种子
torch.manual_seed(random_seed) # 为CPU设置随机数种子
torch.cuda.manual_seed(random_seed) # 为GPU设置随机数种子
model = MultilayerPerceptron(num_features, num_hidden_1, num_hidden_2, num_classes)
model = model.to(device)
# 损失函数
loss_func = nn.CrossEntropyLoss()
loss_func = loss_func.to(device)
# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# 计算准确率
def compute_accuracy(net, data_loader):
net.eval() # 评价模式
num_correct_pred, num_total_examples = 0, 0
with torch.no_grad():
for features, labels in data_loader:
features = features.reshape(-1, 28*28).to(device)
labels = labels.to(device)
out, out_prob = net(features)
_, label_pred = torch.max(out_prob, 1) # max 会返回最大值以及最大值所对应的索引,我们只需要索引
num_total_examples += labels.size(0) # 其中的元素个数取出来
num_correct_pred += (label_pred == labels).sum()
return num_correct_pred.float() / num_total_examples * 100
start_time = time.time()
for epoch in range(num_epochs):
model.train() # 训练模式
for batch_idx, (features, labels) in enumerate(train_data_loader):
features = features.reshape(-1, 28*28).to(device)
labels = labels.to(device)
out, out_prob = model(features) # 前向传播
optimizer.zero_grad() # 梯度归零
loss = loss_func(out, labels) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新参数
if not batch_idx % 50:
print("Epoch: %03d/%03d | Batch: %03d/%03d | Loss: %.4f"
%(epoch+1, num_epochs, batch_idx, len(train_data_loader), loss))
with torch.no_grad():
print("Epoch: %03d/%03d | training accuracy: %.2f%%"
% (epoch + 1, num_epochs, compute_accuracy(model, train_data_loader)))
print("Time elapsed: %.2f min" % ((time.time() - start_time)/60)) # 每一轮训练所需要的时间
print("Total Training Time %.2f min" % ((time.time() - start_time)/60)) # 全部训练完毕所需要的时间
print("Test accuracy: %.2f%%" % (compute_accuracy(model, test_data_loader)))
MLP+BN
为了保证非线性,一般BatchNormal与激活与激活函数的顺序如下:
- 如果激活函数使用的是sigmoid或者tanh函数,那么就是先进行非线性激活,再进行batchnormal。
- 如果激活函数使用的是relu函数,那么可以先进行batchnormal,再进行非线性激活。
import time
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
# 保证可复现性
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
# GPU还是CPU
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# DEVICE = torch.device("cpu")
# 超参数
RANDOM_SEED = 1 # 随机数种子
BATCH_SIZE = 64 # 批量大小
NUM_EPOCHS = 25 # 训练轮数
LEARNING_RATE = 0.01 # 学习率
# 网络结构参数
num_features = 784
num_hidden_1 = 128
num_hidden_2 = 256
num_classes = 10
# 数据集路径
dataset_path = "/mnt/wxh_data/practice_datasets/"
# 加载数据
# transforms.ToTensor()可以将图片的像素值归一化到 0-1
train_data = datasets.MNIST(root=dataset_path,
train=True,
transform=transforms.ToTensor(),
download=True)
test_data = datasets.MNIST(root=dataset_path,
train=False,
transform=transforms.ToTensor(),
download=True)
# 构建数据迭代器
train_data_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_data_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False)
# 定义模型结构
class MultilayerPerceptron(nn.Module):
def __init__(self, num_features, num_hidden_1, num_hidden_2, num_classes):
super(MultilayerPerceptron, self).__init__()
self.model = nn.Sequential(nn.Linear(num_features, num_hidden_1),
nn.BatchNorm1d(num_hidden_1),
nn.ReLU(),
nn.Linear(num_hidden_1, num_hidden_2),
nn.BatchNorm1d(num_hidden_2),
nn.ReLU(),
nn.Linear(num_hidden_2, num_classes))
def forward(self, X):
out = self.model(X)
out_prob = nn.Softmax(dim=1)(out)
return out, out_prob
torch.manual_seed(RANDOM_SEED) # 为CPU设置随机数种子
torch.cuda.manual_seed(RANDOM_SEED) # 为GPU设置随机数种子
# 计算准确率
def compute_accuracy(net, data_loader):
net.eval() # 设置模型处于评估模式
num_corrct_pred, num_total_examples = 0, 0
with torch.no_grad(): # 不计算梯度
for features, labels in data_loader:
features = features.reshape(-1, 28*28).to(DEVICE)
labels = labels.to(DEVICE)
num_total_examples += labels.size(0)
out, out_prob = net(features)
_, labels_pred = torch.max(out_prob, 1)
num_corrct_pred += (labels_pred == labels).sum()
return num_corrct_pred / num_total_examples * 100
# 创建模型
model = MultilayerPerceptron(num_features, num_hidden_1, num_hidden_2, num_classes)
model = model.to(DEVICE)
# 损失函数
loss_func = nn.CrossEntropyLoss()
loss_func = loss_func.to(DEVICE)
# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
# 训练
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train() # 设置模型处于训练模式
for batch_idx, (features, labels) in enumerate(train_data_loader):
features = features.reshape(-1, 28*28).to(DEVICE)
labels = labels.to(DEVICE)
optimizer.zero_grad()
out, out_prob = model(features)
loss = loss_func(out, labels)
loss.backward()
optimizer.step()
if not batch_idx % 50:
print("Epoch: %03d/%03d | Batch: %03d/%03d | Loss: %.4f"
% (epoch+1, NUM_EPOCHS, batch_idx, len(train_data_loader), loss))
print("Epoch: %03d/%03d | training accuracy: %.2f%%"
% (epoch+1, NUM_EPOCHS, compute_accuracy(model, train_data_loader)))
# 从开始训练到现在花费的时间
print("Time elapsed: %.2f min" % ((time.time() - start_time) / 60))
# 全部轮数训练结束,花费的总时间
print("Total training time: %.2f min" % ((time.time() - start_time) / 60))
# 测试
print("Test accuracy: %.2f%%" % compute_accuracy(model, test_data_loader))