基于深度神经网络的HIV预测模型
基于深度神经网络的HIV预测模型
本代码使用深度神经网络 (DNN) 构建了一个 HIV 预测模型。
1. 导入必要的库
import torch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
2. 读取数据
data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\output_data.xlsx')
x = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
3. 数据归一化
x = (x - x.mean()) / x.std()
4. 将numpy数组转换为张量
x = torch.Tensor(x)
y = torch.Tensor(y)
5. 定义模型
class DNN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(DNN, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, hidden_size)
self.out = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(p=0.5)
self.attention = nn.Sequential(
nn.Linear(hidden_size, 1),
nn.Tanh(),
nn.Softmax(dim=1)
)
def forward(self, x):
out = torch.relu(self.fc1(x))
out = self.dropout(out)
out = torch.relu(self.fc2(out))
out = self.dropout(out)
out = torch.relu(self.fc3(out))
out = self.dropout(out)
attention_weights = self.attention(out)
out = attention_weights * out
out = self.out(out)
return out
6. 定义超参数
input_size = 16
hidden_size = 128
num_classes = 2
learning_rate = 0.001
num_epochs = 100
7. 初始化模型
model = DNN(input_size, hidden_size, num_classes)
8. 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
9. 训练模型
train_loss = []
train_accuracy = []
for epoch in range(num_epochs):
# 前向传播和反向传播
outputs = model(x)
loss = criterion(outputs, y.long())
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 计算准确率
_, predicted = torch.max(outputs.data, 1)
total = y.size(0)
correct = (predicted == y.long()).sum().item()
accuracy = correct / total
train_accuracy.append(accuracy)
train_loss.append(loss.item())
# 输出训练信息
print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, loss.item(), accuracy * 100))
# 输出每个样本的概率
prob = torch.softmax(outputs, dim=1)[:, 1]
print('每个样本的概率:', prob.detach().numpy().reshape(-1, 1))
# 绘制准确率变化的图
plt.plot(train_accuracy)
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()
# 绘制损失变化的图
plt.plot(train_loss)
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
# 绘制ROC图
fpr, tpr, threshold = roc_curve(y, prob.detach().numpy())
roc_auc = auc(fpr, tpr)
plt.title('ROC Curve')
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
10. 读取验证集数据
val_data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\HIV数据.xlsx')
val_x = val_data.iloc[:, 1:].values
val_y = val_data.iloc[:, 0].values
11. 数据归一化
val_x = torch.Tensor(val_x)
val_y = torch.Tensor(val_y)
val_x = (val_x - x_mean) / x_std
12. 验证模型
model.eval()
val_outputs = model(val_x)
val_loss = criterion(val_outputs, val_y.long())
# 计算准确率
_, val_predicted = torch.max(val_outputs.data, 1)
val_total = val_y.size(0)
val_correct = (val_predicted == val_y.long()).sum().item()
val_accuracy = val_correct / val_total
print('Validation Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss.item(), val_accuracy * 100))
# 输出验证集每个样本的概率
val_prob = torch.softmax(val_outputs, dim=1)[:, 1]
print('Validation每个样本的概率:', val_prob.detach().numpy().reshape(-1, 1))
# 绘制验证集ROC图
val_fpr, val_tpr, val_threshold = roc_curve(val_y, val_prob.detach().numpy())
val_roc_auc = auc(val_fpr, val_tpr)
plt.title('Validation ROC Curve')
plt.plot(val_fpr, val_tpr, 'b', label='AUC = %0.2f' % val_roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
13. 解决错误
在验证阶段,当进行归一化时,代码出现了错误:TypeError: unsupported operand type(s) for -: 'numpy.ndarray' and 'Tensor'
出现该错误是因为 val_x 是 numpy 数组,而 x_mean 和 x_std 是 PyTorch 张量。为了解决这个问题,需要将 val_x 转换为 PyTorch 张量。
修改后的代码:
# 读取验证集数据
val_data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\HIV数据.xlsx')
val_x = val_data.iloc[:, 1:].values
val_y = val_data.iloc[:, 0].values
# 将numpy数组转换为张量
val_x = torch.Tensor(val_x)
val_y = torch.Tensor(val_y)
# 数据归一化
val_x = (val_x - x_mean) / x_std
通过将 val_x 转换为 PyTorch 张量,可以解决该错误。
总结
本代码展示了如何使用深度神经网络构建一个 HIV 预测模型,并使用注意力机制来提升模型性能。通过训练和验证,模型取得了不错的效果。
原文地址: https://www.cveoy.top/t/topic/ndC7 著作权归作者所有。请勿转载和采集!