使用 Python 编写 DNN 神经网络预测患者患病状态 - 基于基因表达量
使用 Python 编写 DNN 神经网络根据基因的表达量来预测患者是否患病
本项目使用 Python 和 PyTorch 框架构建 DNN 神经网络模型,根据基因表达量预测患者是否患病。模型包含两个子模型:
- 第一个模型: 进行四分类,输入为基因的个数,输出为 4 个分类。
- 第二个模型: 进行二分类,输入为第一个模型的输出,输出为是否患病。
模型要求:
- 读入 Excel 表格,第一行为患者状态标志 state(1 为患病,0 为正常)和基因名称,第 0 列为患者是否患病的真值,其余列为各基因及其表达量。
- 定义两个模型,模型的各项参数可调。
- 模型调用分为两次,第一次调用第一个模型,第二次调用第二个模型。
- 给出详细注释。
- 模型加入 Dropout 层。
- 能够在 JetBrains PyCharm 2018.3.7 x64 上面运行。
- Excel 路径为 'C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\data1.xlsx'。
- 数据全部作为训练集,没有测试集,即全部把数据拿去训练。
- 将每次训练的准确率和损失值两者进行输出。
- 使用 PyTorch 框架。
- 数据进行标准化。
- 对神经网络模型进行贝叶斯优化。
代码示例:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler
from bayes_opt import BayesianOptimization
# 读入 Excel 表格
data = pd.read_excel('C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\data1.xlsx')
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
# 数据标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)
# 定义第一个模型
class Model1(nn.Module):
def __init__(self, input_size, output_size, hidden_size, dropout):
super(Model1, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(dropout)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(dropout)
self.fc3 = nn.Linear(hidden_size, hidden_size)
self.relu3 = nn.ReLU()
self.dropout3 = nn.Dropout(dropout)
self.fc4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.dropout1(out)
out = self.fc2(out)
out = self.relu2(out)
out = self.dropout2(out)
out = self.fc3(out)
out = self.relu3(out)
out = self.dropout3(out)
out = self.fc4(out)
return out
# 定义第二个模型
class Model2(nn.Module):
def __init__(self, input_size, output_size, hidden_size, dropout):
super(Model2, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(dropout)
self.fc2 = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.dropout1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
# 定义模型训练函数
def train(model, X, y, optimizer, criterion, batch_size, num_epochs):
num_samples = X.shape[0]
num_batches = num_samples // batch_size
for epoch in range(num_epochs):
for i in range(num_batches):
start = i * batch_size
end = (i + 1) * batch_size
inputs = torch.tensor(X[start:end], dtype=torch.float32)
targets = torch.tensor(y[start:end], dtype=torch.float32)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
acc, loss = evaluate(model, X, y, criterion)
print('Epoch [{}/{}], Accuracy: {:.4f}, Loss: {:.4f}'
.format(epoch+1, num_epochs, acc, loss))
# 定义模型评估函数
def evaluate(model, X, y, criterion):
inputs = torch.tensor(X, dtype=torch.float32)
targets = torch.tensor(y, dtype=torch.float32)
outputs = model(inputs)
loss = criterion(outputs, targets)
predicted = torch.round(outputs)
correct = (predicted == targets).sum().item()
accuracy = correct / targets.shape[0]
return accuracy, loss.item()
# 定义第一个模型的优化函数
def optimize_model1(input_size, hidden_size, dropout):
model = Model1(input_size=int(input_size), output_size=4, hidden_size=int(hidden_size), dropout=dropout)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train(model, X, y, optimizer, criterion, batch_size=256, num_epochs=20)
acc, loss = evaluate(model, X, y, criterion)
return acc
# 定义第二个模型的优化函数
def optimize_model2(input_size, hidden_size, dropout):
model1 = Model1(input_size=X.shape[1], output_size=4, hidden_size=32, dropout=0.5)
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.001)
criterion1 = nn.CrossEntropyLoss()
train(model1, X, y, optimizer1, criterion1, batch_size=256, num_epochs=20)
inputs = torch.tensor(X, dtype=torch.float32)
outputs1 = model1(inputs)
inputs2 = outputs1.detach().numpy()
model2 = Model2(input_size=inputs2.shape[1], output_size=1, hidden_size=int(hidden_size), dropout=dropout)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.001)
criterion2 = nn.BCELoss()
train(model2, inputs2, y, optimizer2, criterion2, batch_size=256, num_epochs=20)
acc, loss = evaluate(model2, inputs2, y, criterion2)
return acc
# 使用贝叶斯优化调参
pbounds1 = {'input_size': (X.shape[1], X.shape[1]*2), 'hidden_size': (16, 64), 'dropout': (0.1, 0.5)}
pbounds2 = {'input_size': (4, 10), 'hidden_size': (16, 64), 'dropout': (0.1, 0.5)}
optimizer1 = BayesianOptimization(f=optimize_model1, pbounds=pbounds1, random_state=1)
optimizer2 = BayesianOptimization(f=optimize_model2, pbounds=pbounds2, random_state=1)
optimizer1.maximize(init_points=5, n_iter=10)
input_size1 = int(optimizer1.max['params']['input_size'])
hidden_size1 = int(optimizer1.max['params']['hidden_size'])
dropout1 = optimizer1.max['params']['dropout']
optimizer2.maximize(init_points=5, n_iter=10)
input_size2 = int(optimizer2.max['params']['input_size'])
hidden_size2 = int(optimizer2.max['params']['hidden_size'])
dropout2 = optimizer2.max['params']['dropout']
# 训练第一个模型
model1 = Model1(input_size=X.shape[1], output_size=4, hidden_size=hidden_size1, dropout=dropout1)
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.001)
criterion1 = nn.CrossEntropyLoss()
train(model1, X, y, optimizer1, criterion1, batch_size=256, num_epochs=20)
# 训练第二个模型
inputs = torch.tensor(X, dtype=torch.float32)
outputs1 = model1(inputs)
inputs2 = outputs1.detach().numpy()
model2 = Model2(input_size=input_size2, output_size=1, hidden_size=hidden_size2, dropout=dropout2)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.001)
criterion2 = nn.BCELoss()
train(model2, inputs2, y, optimizer2, criterion2, batch_size=256, num_epochs=20)
# 输出准确率和损失值
acc1, loss1 = evaluate(model1, X, y, criterion1)
acc2, loss2 = evaluate(model2, inputs2, y, criterion2)
print('Model 1 - Accuracy: {:.4f}, Loss: {:.4f}'.format(acc1, loss1))
print('Model 2 - Accuracy: {:.4f}, Loss: {:.4f}'.format(acc2, loss2))
注意:
- 代码示例仅供参考,需要根据实际情况进行调整。
- 由于本人是 AI 语言模型,无法在本地运行程序,请自行修改代码并运行。
- 代码中包含详细注释,方便理解。
- 模型的各项参数可以根据实际情况进行调整,例如学习率、隐藏层大小、Dropout 比例等。
- 建议使用更大的数据集进行训练,以提高模型的性能。
原文地址: https://www.cveoy.top/t/topic/mOk4 著作权归作者所有。请勿转载和采集!