导入必要的库

import torch import torch.nn as nn import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.metrics import roc_curve, auc

读取数据

data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\output_data1.xlsx') x = data.iloc[:, 1:].values y = data.iloc[:, 0].values

数据归一化

x = (x - x.mean()) / x.std()

划分训练集和测试集

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

将 numpy 数组转换为张量

x_train = torch.Tensor(x_train) y_train = torch.Tensor(y_train) x_test = torch.Tensor(x_test) y_test = torch.Tensor(y_test)

定义模型

class DNN(nn.Module): def init(self, input_size, num_classes): super(DNN, self).init() self.fc1 = nn.Linear(input_size, 8) self.fc2 = nn.Linear(8, 4) self.fc3 = nn.Linear(4, 8) self.out = nn.Linear(8, num_classes) self.dropout = nn.Dropout(p=0.5) self.attention = nn.Sequential( nn.Linear(hidden_size, 1), nn.Tanh(), nn.Softmax(dim=1) )

def forward(self, x):
    out = torch.relu(self.fc1(x))
    #out = self.dropout(out)
    out = torch.relu(self.fc2(out))
    #out = self.dropout(out)
    out = torch.relu(self.fc3(out))
    #out = self.dropout(out)
   # attention_weights = self.attention(out)
   # out = attention_weights * out
    out = self.out(out)
    return out

定义超参数

input_size = 16 hidden_size = 8 num_classes = 2 learning_rate = 0.001 num_epochs = 100

初始化模型

model = DNN(input_size, num_classes)

定义损失函数和优化器

criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

训练模型

train_loss = [] train_accuracy = [] for epoch in range(num_epochs): # 前向传播和反向传播 outputs = model(x_train) loss = criterion(outputs, y_train.long()) optimizer.zero_grad() loss.backward() optimizer.step()

# 计算准确率
_, predicted = torch.max(outputs.data, 1)
total = y_train.size(0)
correct = (predicted == y_train.long()).sum().item()
accuracy = correct / total
train_accuracy.append(accuracy)
train_loss.append(loss.item())

# 输出训练信息
print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
      .format(epoch + 1, num_epochs, loss.item(), accuracy * 100))

在测试集上进行预测

with torch.no_grad(): outputs = model(x_test) loss = criterion(outputs, y_test.long()) _, predicted = torch.max(outputs.data, 1) total = y_test.size(0) correct = (predicted == y_test.long()).sum().item() accuracy = correct / total print('Test Accuracy: {:.2f}%'.format(accuracy * 100))

# 计算 AUC
prob = torch.softmax(outputs, dim=1)[:, 1]
fpr, tpr, _ = roc_curve(y_test, prob)
roc_auc = auc(fpr, tpr)
print('AUC: {:.2f}'.format(roc_auc))

输出每个样本的概率

prob = torch.softmax(outputs, dim=1)[:, 1] print('每个样本的概率:', prob.detach().numpy().reshape(-1, 1))

读取验证集数据

val_data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\验证.xlsx') val_x = val_data.iloc[:, 1:].values val_y = val_data.iloc[:, 0].values

将 numpy 数组转换为张量

x = torch.Tensor(x) y = torch.Tensor(y) val_x = torch.Tensor(val_x) val_y = torch.Tensor(val_y)

数据归一化

x_mean = x.mean() x_std = x.std() x = (x - x_mean) / x_std val_x = (val_x - x_mean) / x_std

定义模型、超参数、损失函数和优化器...

训练模型...

输出每个样本的概率...

绘制准确率变化的图...

绘制损失变化的图...

绘制 ROC 图...

验证模型

model.eval() val_outputs = model(val_x) val_loss = criterion(val_outputs, val_y.long())

计算准确率

_, val_predicted = torch.max(val_outputs.data, 1) val_total = val_y.size(0) val_correct = (val_predicted == val_y.long()).sum().item() val_accuracy = val_correct / val_total print('Validation Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss.item(), val_accuracy * 100))

输出验证集每个样本的概率

val_prob = torch.softmax(val_outputs, dim=1)[:, 1] print('Validation 每个样本的概率:', val_prob.detach().numpy().reshape(-1, 1))

在总共 130 个样本的基础上：为什么上述代码的验证集的准确率很低内容：可能是由于模型过拟合了训练集，导致在验证集上表现不佳。可以考虑加入正则化方法、调整模型结构或者增加数据量等方式来解决过拟合问题。另外，也可以尝试使用交叉验证等方法来更准确地评估模型的性能。

深度神经网络 (DNN) 模型用于 HIV 预测 - Python 实现

导入必要的库

读取数据

数据归一化

划分训练集和测试集

将 numpy 数组转换为张量

定义模型

定义超参数

初始化模型

定义损失函数和优化器

训练模型

在测试集上进行预测

输出每个样本的概率

读取验证集数据

将 numpy 数组转换为张量

数据归一化

定义模型、超参数、损失函数和优化器...

训练模型...

输出每个样本的概率...

绘制准确率变化的图...

绘制损失变化的图...

绘制 ROC 图...

验证模型

计算准确率

输出验证集每个样本的概率