多层感知机MLP（MultiLayer Perceptron）了解一下-JZTXT

实践
MLP


import time
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader

# 保证可复现性
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

# GPU是否可用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 超参数
random_seed = 1
learning_rate = 0.1
num_epochs = 25
batch_size = 64

# 网络结构参数
num_features = 784
num_hidden_1 = 128
num_hidden_2 = 256
num_classes = 10

# 加载数据
# transforms.ToTensor()可以将图片的像素值归一化到 0-1
train_data = datasets.MNIST(root="datasets",
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_data = datasets.MNIST(root="datasets",
                           train=False,
                           transform=transforms.ToTensor(),
                           download=True)

# 数据迭代器
train_data_loader = DataLoader(dataset=train_data,
                               batch_size=batch_size,
                               shuffle=True)
test_data_loader = DataLoader(dataset=test_data,
                              batch_size=batch_size,
                              shuffle=False)

# 检查数据集
# for images, labels in train_data_loader:
#     print("image batch dimensions: ", images.shape)  # image batch dimensions:  torch.Size([64, 1, 28, 28])
#     print("Label tach dimensions: ", labels.shape)  # label tach dimensions:  torch.Size([64])
#     print(labels.size(0))  # 64
#     break


# 定义模型结构
class MultilayerPerceptron(nn.Module):
    def __init__(self, num_features, num_hidden_1, num_hidden_2, num_classes):
        super(MultilayerPerceptron, self).__init__()
        self.linear_1 = nn.Linear(num_features, num_hidden_1)
        # The following two lines are not necessary,
        # but used here to demonstrate how to access the weights
        # and use a different weight initialization.
        # By default, PyTorch uses Xavier/Glorot initialization, which
        # should usually be preferred.
        # self.linear_1.weight.detach().normal_(0.0, 0.1)
        # self.linear_1.bias.detach().zero_()

        self.linear_2 = nn.Linear(num_hidden_1, num_hidden_2)
        # self.linear_2.weight.detach().normal_(0.0, 0.1)
        # self.linear_2.bias.detach().zero_()

        self.linear_out = nn.Linear(num_hidden_2, num_classes)
        # self.linear_out.weight.detach().normal_(0.0, 0.1)
        # self.linear_out.bias.detach().zero_()

        # self.model = nn.Sequential(nn.Linear(num_features, num_hidden_1),
        #                          nn.ReLU(),
        #                          nn.Linear(num_hidden_1, num_hidden_2),
        #                          nn.ReLU(),
        #                          nn.Linear(num_hidden_2, num_classes))

    def forward(self, X):
        out = self.linear_1(X)
        out = F.relu(out)
        out = self.linear_2(out)
        out = F.relu(out)
        out = self.linear_out(out)
        out_prob = F.softmax(out, dim=1)

        # out = self.model(X)
        # out_prob = F.softmax(out, dim=1)
        return out, out_prob


np.random.seed(random_seed)  # 固定随机数种子
torch.manual_seed(random_seed)  # 为CPU设置随机数种子
torch.cuda.manual_seed(random_seed)  # 为GPU设置随机数种子


model = MultilayerPerceptron(num_features, num_hidden_1, num_hidden_2, num_classes)
model = model.to(device)

# 损失函数
loss_func = nn.CrossEntropyLoss()
loss_func = loss_func.to(device)

# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


# 计算准确率
def compute_accuracy(net, data_loader):
    net.eval()  # 评价模式
    num_correct_pred, num_total_examples = 0, 0
    with torch.no_grad():
        for features, labels in data_loader:
            features = features.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            out, out_prob = net(features)
            _, label_pred = torch.max(out_prob, 1)  # max 会返回最大值以及最大值所对应的索引，我们只需要索引
            num_total_examples += labels.size(0)  # 其中的元素个数取出来
            num_correct_pred += (label_pred == labels).sum()
        return num_correct_pred.float() / num_total_examples * 100

start_time = time.time()
for epoch in range(num_epochs):
    model.train()  # 训练模式
    for batch_idx, (features, labels) in enumerate(train_data_loader):
        features = features.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        out, out_prob = model(features)  # 前向传播
        optimizer.zero_grad()  # 梯度归零
        loss = loss_func(out, labels)  # 计算损失
        loss.backward()  # 反向传播
        optimizer.step()  # 更新参数

        if not batch_idx % 50:
            print("Epoch: %03d/%03d | Batch: %03d/%03d | Loss: %.4f"
                  %(epoch+1, num_epochs, batch_idx, len(train_data_loader), loss))

    with torch.no_grad():
        print("Epoch: %03d/%03d | training accuracy: %.2f%%"
              % (epoch + 1, num_epochs, compute_accuracy(model, train_data_loader)))

    print("Time elapsed: %.2f min" % ((time.time() - start_time)/60))  # 每一轮训练所需要的时间

print("Total Training Time %.2f min" % ((time.time() - start_time)/60))  # 全部训练完毕所需要的时间

print("Test accuracy: %.2f%%" % (compute_accuracy(model, test_data_loader)))

MLP+BN
为了保证非线性，一般BatchNormal与激活与激活函数的顺序如下：
- 如果激活函数使用的是sigmoid或者tanh函数，那么就是先进行非线性激活，再进行batchnormal。
- 如果激活函数使用的是relu函数，那么可以先进行batchnormal，再进行非线性激活。


import time
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms


# 保证可复现性
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True


# GPU还是CPU
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# DEVICE = torch.device("cpu")

# 超参数
RANDOM_SEED = 1  # 随机数种子
BATCH_SIZE = 64  # 批量大小
NUM_EPOCHS = 25  # 训练轮数
LEARNING_RATE = 0.01  # 学习率

# 网络结构参数
num_features = 784
num_hidden_1 = 128
num_hidden_2 = 256
num_classes = 10

# 数据集路径
dataset_path = "/mnt/wxh_data/practice_datasets/"

# 加载数据
# transforms.ToTensor()可以将图片的像素值归一化到 0-1
train_data = datasets.MNIST(root=dataset_path,
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_data = datasets.MNIST(root=dataset_path,
                           train=False,
                           transform=transforms.ToTensor(),
                           download=True)
# 构建数据迭代器
train_data_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_data_loader = DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=False)


# 定义模型结构
class MultilayerPerceptron(nn.Module):
    def __init__(self, num_features, num_hidden_1, num_hidden_2, num_classes):
        super(MultilayerPerceptron, self).__init__()
        self.model = nn.Sequential(nn.Linear(num_features, num_hidden_1),
                                   nn.BatchNorm1d(num_hidden_1),
                                   nn.ReLU(),
                                   nn.Linear(num_hidden_1, num_hidden_2),
                                   nn.BatchNorm1d(num_hidden_2),
                                   nn.ReLU(),
                                   nn.Linear(num_hidden_2, num_classes))

    def forward(self, X):
        out = self.model(X)
        out_prob = nn.Softmax(dim=1)(out)
        return out, out_prob


torch.manual_seed(RANDOM_SEED)  # 为CPU设置随机数种子
torch.cuda.manual_seed(RANDOM_SEED)  # 为GPU设置随机数种子


# 计算准确率
def compute_accuracy(net, data_loader):
    net.eval()  # 设置模型处于评估模式
    num_corrct_pred, num_total_examples = 0, 0
    with torch.no_grad():  # 不计算梯度
        for features, labels in data_loader:
            features = features.reshape(-1, 28*28).to(DEVICE)
            labels = labels.to(DEVICE)
            num_total_examples += labels.size(0)
            out, out_prob = net(features)
            _, labels_pred = torch.max(out_prob, 1)
            num_corrct_pred += (labels_pred == labels).sum()
        return num_corrct_pred / num_total_examples * 100


# 创建模型
model = MultilayerPerceptron(num_features, num_hidden_1, num_hidden_2, num_classes)
model = model.to(DEVICE)

# 损失函数
loss_func = nn.CrossEntropyLoss()
loss_func = loss_func.to(DEVICE)

# 优化器
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)


# 训练
start_time = time.time()
for epoch in range(NUM_EPOCHS):
    model.train()  # 设置模型处于训练模式
    for batch_idx, (features, labels) in enumerate(train_data_loader):
        features = features.reshape(-1, 28*28).to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer.zero_grad()
        out, out_prob = model(features)
        loss = loss_func(out, labels)
        loss.backward()
        optimizer.step()

        if not batch_idx % 50:
            print("Epoch: %03d/%03d | Batch: %03d/%03d | Loss: %.4f"
                  % (epoch+1, NUM_EPOCHS, batch_idx, len(train_data_loader), loss))

    print("Epoch: %03d/%03d | training accuracy: %.2f%%"
          % (epoch+1, NUM_EPOCHS, compute_accuracy(model, train_data_loader)))
    # 从开始训练到现在花费的时间
    print("Time elapsed: %.2f min" % ((time.time() - start_time) / 60))
# 全部轮数训练结束，花费的总时间
print("Total training time: %.2f min" % ((time.time() - start_time) / 60))

# 测试
print("Test accuracy: %.2f%%" % compute_accuracy(model, test_data_loader))