基于基因表达量预测患者患病状态的 DNN 神经网络模型
import pandas as pd import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Dataset from sklearn.preprocessing import StandardScaler from bayes_opt import BayesianOptimization
读入 Excel 表格
file_path = 'C:\Users\lenovo\Desktop\HIV\GSE6740GSE50011基因降低\output_data.xlsx' data = pd.read_excel(file_path, header=0)
数据预处理
x = data.iloc[:, 1:].values.astype(np.float32) # 取第 1 列到最后一列的数据,转换成浮点型 y = data.iloc[:, 0].values.astype(np.int64) # 取第 0 列的数据,转换成整型 sc = StandardScaler() # 标准化 x = sc.fit_transform(x)
定义数据集类
class MyDataset(Dataset): def init(self, x, y): self.x = x self.y = y
def __getitem__(self, index):
x = self.x[index]
y = self.y[index]
return x, y
def __len__(self):
return len(self.x)
定义第一个模型
class Model1(nn.Module): def init(self, input_size, output_size, hidden_size, dropout_rate): super(Model1, self).init() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.dropout = nn.Dropout(dropout_rate) self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, hidden_size) self.fc4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
out = self.relu(out)
out = self.dropout(out)
out = self.fc3(out)
out = self.relu(out)
out = self.dropout(out)
out = self.fc4(out)
return out
定义第二个模型
class Model2(nn.Module): def init(self, input_size, output_size, hidden_size, dropout_rate): super(Model2, self).init() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.dropout = nn.Dropout(dropout_rate) self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, hidden_size) self.fc4 = nn.Linear(hidden_size, output_size) self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.dropout(out)
out = self.fc2(out)
out = self.relu(out)
out = self.dropout(out)
out = self.fc3(out)
out = self.relu(out)
out = self.dropout(out)
out = self.fc4(out)
out = self.sigmoid(out)
return out
定义训练函数
def train(model, dataloader, optimizer, criterion): model.train() running_loss = 0.0 correct = 0 total = 0 for i, (inputs, labels) in enumerate(dataloader, 0): optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() acc = 100 * correct / total epoch_loss = running_loss / len(dataloader) return acc, epoch_loss
定义贝叶斯优化函数
def bayesian_optimization(input_size, output_size, hidden_size, dropout_rate): # 定义模型 model1 = Model1(input_size, output_size, hidden_size, dropout_rate) model2 = Model2(hidden_size, output_size=1, hidden_size=hidden_size, dropout_rate=dropout_rate) # 定义损失函数和优化器 criterion1 = nn.CrossEntropyLoss() criterion2 = nn.BCELoss() optimizer1 = optim.SGD(model1.parameters(), lr=0.01, momentum=0.9) optimizer2 = optim.SGD(model2.parameters(), lr=0.01, momentum=0.9) # 定义数据集和数据加载器 dataset = MyDataset(x, y) dataloader = DataLoader(dataset, batch_size=32, shuffle=True) # 训练第一个模型 for epoch in range(100): acc1, loss1 = train(model1, dataloader, optimizer1, criterion1) print('Epoch [{}/{}], Loss1: {:.4f}, Acc1: {:.2f}%'.format(epoch+1, 100, loss1, acc1)) # 得到第一个模型的输出 model1.eval() y_pred1 = [] with torch.no_grad(): for i in range(len(x)): inputs = torch.from_numpy(x[i]) outputs = model1(inputs) _, predicted = torch.max(outputs.data, 0) y_pred1.append(predicted.item()) y_pred1 = np.array(y_pred1) # 训练第二个模型 y_pred1 = y_pred1.reshape(-1, 1).astype(np.float32) x2 = y_pred1 sc2 = StandardScaler() x2 = sc2.fit_transform(x2) dataset2 = MyDataset(x2, y) dataloader2 = DataLoader(dataset2, batch_size=32, shuffle=True) for epoch in range(100): acc2, loss2 = train(model2, dataloader2, optimizer2, criterion2) print('Epoch [{}/{}], Loss2: {:.4f}, Acc2: {:.2f}%'.format(epoch+1, 100, loss2, acc2)) # 返回第二个模型的准确率 return acc2
进行贝叶斯优化
pbounds = {'input_size': (1, x.shape[1]), 'output_size': (2, 10), 'hidden_size': (10, 100), 'dropout_rate': (0.1, 0.9)} optimizer = BayesianOptimization(f=bayesian_optimization, pbounds=pbounds, verbose=2) optimizer.maximize(init_points=10, n_iter=10)
原文地址: https://www.cveoy.top/t/topic/mOoK 著作权归作者所有。请勿转载和采集!