深度神经网络 (DNN) 预测 HIV 阳性率：基于 PyTorch 的模型训练和评估

导入必要的库

import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

# 读取数据
data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\output_data1.xlsx')
x = data.iloc[:, 1:].values
y = data.iloc[:, 0].values

# 数据归一化
x = (x - x.mean()) / x.std()

# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# 将numpy数组转换为张量
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
x_test = torch.Tensor(x_test)
y_test = torch.Tensor(y_test)

# 定义模型
class DNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 8)
        #self.fc2 = nn.Linear(8, 4)
        #self.fc3 = nn.Linear(4, 8)
        self.out = nn.Linear(8, num_classes)
        self.dropout = nn.Dropout(p=0.5)
        self.attention = nn.Sequential(
            nn.Linear(hidden_size, 1),
            nn.Tanh(),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        out = torch.relu(self.fc1(x))
        #out = self.dropout(out)
        #out = torch.relu(self.fc2(out))
        #out = self.dropout(out)
        #out = torch.relu(self.fc3(out))
        #out = self.dropout(out)
       # attention_weights = self.attention(out)
       # out = attention_weights * out
        out = self.out(out)
        return out

# 定义超参数
input_size = 16
hidden_size = 8
num_classes = 2
learning_rate = 0.001
num_epochs = 200

# 初始化模型
model = DNN(input_size, num_classes)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
train_loss = []
train_accuracy = []
for epoch in range(num_epochs):
    # 前向传播和反向传播
    outputs = model(x_train)
    loss = criterion(outputs, y_train.long())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # 计算准确率
    _, predicted = torch.max(outputs.data, 1)
    total = y_train.size(0)
    correct = (predicted == y_train.long()).sum().item()
    accuracy = correct / total
    train_accuracy.append(accuracy)
    train_loss.append(loss.item())

    # 输出训练信息
    print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
          .format(epoch + 1, num_epochs, loss.item(), accuracy * 100))

# 在测试集上进行预测
with torch.no_grad():
    outputs = model(x_test)
    loss = criterion(outputs, y_test.long())
    _, predicted = torch.max(outputs.data, 1)
    total = y_test.size(0)
    correct = (predicted == y_test.long()).sum().item()
    accuracy = correct / total
    print('Test Accuracy: {:.2f}%'.format(accuracy * 100))

    # 计算AUC
    prob = torch.softmax(outputs, dim=1)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, prob)
    roc_auc = auc(fpr, tpr)
    print('AUC: {:.2f}'.format(roc_auc))

# 输出每个样本的概率
prob = torch.softmax(outputs, dim=1)[:, 1]
print('每个样本的概率:', prob.detach().numpy().reshape(-1, 1))

# 读取验证集数据
val_data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\验证.xlsx')
val_x = val_data.iloc[:, 1:].values
val_y = val_data.iloc[:, 0].values

# 将numpy数组转换为张量
x = torch.Tensor(x)
y = torch.Tensor(y)
val_x = torch.Tensor(val_x)
val_y = torch.Tensor(val_y)

# 数据归一化
x_mean = x.mean()
x_std = x.std()
x = (x - x_mean) / x_std
val_x = (val_x - x_mean) / x_std

# 定义模型、超参数、损失函数和优化器...
# 训练模型...
# 输出每个样本的概率...
# 绘制准确率变化的图...
# 绘制损失变化的图...
# 绘制ROC图...

# 验证模型
model.eval()
val_outputs = model(val_x)
val_loss = criterion(val_outputs, val_y.long())

# 计算准确率
_, val_predicted = torch.max(val_outputs.data, 1)
val_total = val_y.size(0)
val_correct = (val_predicted == val_y.long()).sum().item()
val_accuracy = val_correct / val_total
print('Validation Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss.item(), val_accuracy * 100))
# 输出验证集每个样本的概率
val_prob = torch.softmax(val_outputs, dim=1)[:, 1]
print('Validation每个样本的概率:', val_prob.detach().numpy().reshape(-1, 1))


上述代码的测试集数据有没有参与训练内容：根据代码，测试集数据在训练前被划分出来，并没有参与训练。在训练完成后，使用训练好的模型对测试集进行预测和评估。