基于基因表达量的疾病预测:使用 PyTorch 构建 DNN 神经网络
import pandas as pd import torch import torch.nn.functional as F import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc
读取数据
data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\data1.xlsx')
标准化处理
data.iloc[:,1:] = (data.iloc[:,1:] - data.iloc[:,1:].mean()) / data.iloc[:,1:].std()
转换为tensor格式
x = torch.tensor(data.iloc[:,1:].values, dtype=torch.float32) y = torch.tensor(data.iloc[:,0].values, dtype=torch.float32)
构建模型
class Net(torch.nn.Module): def init(self, n_feature, n_hidden1, n_hidden2, n_hidden3, n_output, dropout_rate): super(Net, self).init() self.hidden1 = torch.nn.Linear(n_feature, n_hidden1) self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2) self.hidden3 = torch.nn.Linear(n_hidden2, n_hidden3) self.output = torch.nn.Linear(n_hidden3, n_output) self.dropout = torch.nn.Dropout(dropout_rate) self.attention = torch.nn.Linear(n_hidden3, 1, bias=False)
def forward(self, x):
x = F.relu(self.hidden1(x))
x = self.dropout(x)
x = F.relu(self.hidden2(x))
x = self.dropout(x)
x = F.relu(self.hidden3(x))
x = self.dropout(x)
# 注意力机制
atten = F.softmax(self.attention(x), dim=0)
x = x * atten
x = self.output(x)
return x
net = Net(n_feature=16, n_hidden1=64, n_hidden2=32, n_hidden3=16, n_output=1, dropout_rate=0.5)
定义优化器和损失函数
optimizer = torch.optim.Adam(net.parameters(), lr=0.001) loss_func = torch.nn.BCEWithLogitsLoss()
训练模型
epochs = 200 train_loss_list = [] train_acc_list = [] for epoch in range(epochs): # 前向传播 y_pred = net(x) loss = loss_func(y_pred.view(-1), y) # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() # 计算准确率 y_pred = F.sigmoid(y_pred) y_pred = torch.where(y_pred > 0.5, torch.tensor([1.0]), torch.tensor([0.0])) acc = (y_pred == y).sum().item() / len(y) # 输出每轮训练的结果 train_loss_list.append(loss.item()) train_acc_list.append(acc) print('Epoch [{}/{}], Loss: {:.4f}, Acc: {:.2f}%'.format(epoch+1, epochs, loss.item(), acc*100))
输出每个样本的概率
y_pred_prob = F.sigmoid(net(x)) print(y_pred_prob)
绘制准确率变化的图
plt.plot(train_acc_list) plt.title('Training Accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.show()
绘制损失变化的图
plt.plot(train_loss_list) plt.title('Training Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.show()
绘制ROC图
fpr, tpr, thresholds = roc_curve(y, y_pred_prob.detach().numpy()) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, alpha=0.8, label='ROC(area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Random Chance') plt.title('ROC curve') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc="lower right") plt.show()
原文地址: https://www.cveoy.top/t/topic/ndzC 著作权归作者所有。请勿转载和采集!