基于基因表达量的患者疾病预测:Python DNN神经网络模型
基于基因表达量的患者疾病预测:Python DNN神经网络模型
本项目使用 Python 和 PyTorch 框架构建深度神经网络 (DNN) 模型,根据基因表达量预测患者是否患病。该模型包含三个子网络,分别进行 8 分类、4 分类和 2 分类,并加入 Dropout 层以防止过拟合。模型参数可调,可在 JetBrains PyCharm 环境中运行。
1. 数据准备
- 导入所需库
import torch
import torch.nn as nn
import pandas as pd
from sklearn import preprocessing
- 读入 Excel 表格
data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\data1.xlsx')
- 数据标准化
data.iloc[:, 1:] = preprocessing.scale(data.iloc[:, 1:])
- 划分数据集
X = torch.tensor(data.iloc[:, 1:].values, dtype=torch.float32)
y = torch.tensor(data.iloc[:, 0].values, dtype=torch.long)
train_data = torch.utils.data.TensorDataset(X, y)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
2. 模型定义
2.1 模型 1 (8 分类)
class Model1(nn.Module):
def __init__(self):
super(Model1, self).__init__()
self.fc1 = nn.Linear(58, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 8)
self.dropout = nn.Dropout(p=0.5)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.dropout(x)
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
2.2 模型 2 (4 分类)
class Model2(nn.Module):
def __init__(self):
super(Model2, self).__init__()
self.fc1 = nn.Linear(8, 32)
self.fc2 = nn.Linear(32, 16)
self.fc3 = nn.Linear(16, 4)
self.dropout = nn.Dropout(p=0.5)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.dropout(x)
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
2.3 模型 3 (2 分类)
class Model3(nn.Module):
def __init__(self):
super(Model3, self).__init__()
self.fc1 = nn.Linear(4, 2)
def forward(self, x):
x = self.fc1(x)
return x
3. 模型初始化
model1 = Model1()
model2 = Model2()
model3 = Model3()
4. 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.001)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.001)
optimizer3 = torch.optim.Adam(model3.parameters(), lr=0.001)
5. 模型训练
5.1 训练模型 1
for epoch in range(100):
running_loss = 0.0
total = 0
correct = 0
for i, data in enumerate(train_loader):
inputs, labels = data
optimizer1.zero_grad()
outputs = model1(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer1.step()
running_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Epoch: %d, Loss: %.3f, Accuracy: %.3f' % (epoch+1, running_loss/len(train_loader), correct/total))
5.2 训练模型 2
train_X = []
train_y = []
with torch.no_grad():
for data in train_loader:
inputs, labels = data
outputs = model1(inputs)
train_X.append(outputs)
train_y.append(labels)
train_X = torch.cat(train_X, 0)
train_y = torch.cat(train_y, 0)
train_data2 = torch.utils.data.TensorDataset(train_X, train_y)
train_loader2 = torch.utils.data.DataLoader(train_data2, batch_size=64, shuffle=True)
for epoch in range(100):
running_loss = 0.0
total = 0
correct = 0
for i, data in enumerate(train_loader2):
inputs, labels = data
optimizer2.zero_grad()
outputs = model2(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer2.step()
running_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Epoch: %d, Loss: %.3f, Accuracy: %.3f' % (epoch+1, running_loss/len(train_loader2), correct/total))
5.3 训练模型 3
train_X2 = []
train_y2 = []
with torch.no_grad():
for data in train_loader2:
inputs, labels = data
outputs = model2(inputs)
train_X2.append(outputs)
train_y2.append(labels)
train_X2 = torch.cat(train_X2, 0)
train_y2 = torch.cat(train_y2, 0)
train_data3 = torch.utils.data.TensorDataset(train_X2, train_y2)
train_loader3 = torch.utils.data.DataLoader(train_data3, batch_size=64, shuffle=True)
for epoch in range(100):
running_loss = 0.0
total = 0
correct = 0
for i, data in enumerate(train_loader3):
inputs, labels = data
optimizer3.zero_grad()
outputs = model3(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer3.step()
running_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Epoch: %d, Loss: %.3f, Accuracy: %.3f' % (epoch+1, running_loss/len(train_loader3), correct/total))
6. 模型评估
在训练过程中,代码会输出每个 epoch 的训练损失和准确率。您可以根据这些指标来评估模型性能,并调整模型参数或训练策略以优化模型。
7. 模型保存
您可以使用 torch.save 函数将训练好的模型保存到文件中,以便在需要时进行加载和使用。
8. 模型使用
训练完成后,您可以使用保存的模型来预测新数据的患者患病状态。
总结
本项目提供了一个基于基因表达量的患者疾病预测的 DNN 模型构建示例。您可以根据自己的需求调整模型结构、参数和训练策略,以构建更适合您的应用场景的模型。
原文地址: https://www.cveoy.top/t/topic/myKd 著作权归作者所有。请勿转载和采集!