使用python编写DNN神经网络根据基因的表达量来预测患者是否患病并且满足以下要求:1读入Excel表格第一行为患者状态标志state1为患病0为正常和多个基因名称第0列为患者是否患病的真值其余列为各基因及其表达量2定义两个模型3模型的各项参数可调4模型调用分为两次第一次调用第一个模型输入为基因的个数输出为4分类5第二次调用第二个模型第二个模型为二分类模型输入为第一个模型的输出7给出详细注释8模
导入所需要的库
import torch import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from torch import nn from torch.utils.data import Dataset, DataLoader from skopt import gp_minimize from skopt.space import Real, Integer from skopt.utils import use_named_args
定义数据集类
class GeneDataset(Dataset): def init(self, data, transform=None): self.data = data self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
x = self.data.iloc[idx, 1:].values.astype(float) # 提取基因表达量数据
y = self.data.iloc[idx, 0] # 提取患者状态标志数据
if self.transform:
x = self.transform(x)
return x, y
定义神经网络模型1
class Model1(nn.Module): def init(self, input_size, hidden_size, output_size, dropout_rate): super(Model1, self).init() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.dropout = nn.Dropout(dropout_rate) self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
return out
定义神经网络模型2
class Model2(nn.Module): def init(self, input_size, hidden_size, output_size, dropout_rate): super(Model2, self).init() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.dropout = nn.Dropout(dropout_rate) self.fc2 = nn.Linear(hidden_size, output_size) self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
定义函数对数据进行标准化
def transform(x): scaler = StandardScaler() x = scaler.fit_transform(x.reshape(1, -1)) return x.reshape(-1)
定义函数用于训练和测试模型1
def train_and_test_model1(model, dataloader, criterion, optimizer): total_loss = 0 total_correct = 0 total_samples = 0 model.train() for i, (inputs, labels) in enumerate(dataloader): optimizer.zero_grad() outputs = model(inputs.float()) loss = criterion(outputs, labels.long()) loss.backward() optimizer.step() total_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total_correct += (predicted == labels).sum().item() total_samples += labels.size(0) train_loss = total_loss / total_samples train_acc = total_correct / total_samples return train_loss, train_acc
定义函数用于训练和测试模型2
def train_and_test_model2(model, dataloader, criterion, optimizer): total_loss = 0 total_correct = 0 total_samples = 0 model.train() for i, (inputs, labels) in enumerate(dataloader): optimizer.zero_grad() outputs = model(inputs.float()) loss = criterion(outputs, labels.float()) loss.backward() optimizer.step() total_loss += loss.item() predicted = torch.round(outputs.data) total_correct += (predicted == labels).sum().item() total_samples += labels.size(0) train_loss = total_loss / total_samples train_acc = total_correct / total_samples return train_loss, train_acc
定义函数用于训练两个模型
def train_models(model1, model2, trainloader, criterion1, criterion2, optimizer1, optimizer2): train_loss1, train_acc1, train_loss2, train_acc2 = 0, 0, 0, 0 train_loss1, train_acc1 = train_and_test_model1(model1, trainloader, criterion1, optimizer1) train_inputs = torch.zeros(len(trainloader), 4) train_labels = torch.zeros(len(trainloader), 1) for i, (inputs, labels) in enumerate(trainloader): train_inputs[i] = model1(inputs.float()) train_labels[i] = labels train_dataset = torch.utils.data.TensorDataset(train_inputs, train_labels) trainloader2 = DataLoader(train_dataset, batch_size=32, shuffle=True) train_loss2, train_acc2 = train_and_test_model2(model2, trainloader2, criterion2, optimizer2) return train_loss1, train_acc1, train_loss2, train_acc2
读入数据
filename = 'C:\Users\lenovo\Desktop\HIV\GSE6740GSE50011基因降低\output_data.xlsx' data = pd.read_excel(filename, sheet_name='Sheet1')
对数据进行标准化
data.iloc[:, 1:] = data.iloc[:, 1:].apply(lambda x: transform(x))
划分训练集和测试集
train_data = data
定义超参数空间
space = [Integer(10, 100, name='hidden_size'), Real(0.01, 0.5, name='dropout_rate'), Integer(10, 100, name='hidden_size2'), Real(0.01, 0.5, name='dropout_rate2'), Real(0.001, 0.01, name='lr'), Real(0.001, 0.01, name='lr2')]
定义训练函数
@use_named_args(space) def train_model(hidden_size, dropout_rate, hidden_size2, dropout_rate2, lr, lr2): # 定义模型 model1 = Model1(71, hidden_size, 4, dropout_rate) model2 = Model2(4, hidden_size2, 1, dropout_rate2)
# 定义损失函数和优化器
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.BCELoss()
optimizer1 = torch.optim.Adam(model1.parameters(), lr=lr)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=lr2)
# 定义数据集和数据加载器
train_dataset = GeneDataset(train_data, transform=transform)
trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# 训练模型
train_loss1, train_acc1, train_loss2, train_acc2 = train_models(model1, model2, trainloader, criterion1, criterion2, optimizer1, optimizer2)
# 输出训练结果
print('hidden_size:', hidden_size)
print('dropout_rate:', dropout_rate)
print('hidden_size2:', hidden_size2)
print('dropout_rate2:', dropout_rate2)
print('lr:', lr)
print('lr2:', lr2)
print('train_loss1:', train_loss1)
print('train_acc1:', train_acc1)
print('train_loss2:', train_loss2)
print('train_acc2:', train_acc2)
# 返回损失值
return train_loss2
进行贝叶斯优化
res_gp = gp_minimize(train_model, space, n_calls=50, random_state=0)
输出最优超参数
print("Best: %f" % res_gp.fun) print("Best parameters: " + str(res_gp.x))
原文地址: https://www.cveoy.top/t/topic/bmdd 著作权归作者所有。请勿转载和采集!