# 导入相关库import pandas as pdimport numpy as npimport torchimport torchnn as nnfrom torchutilsdata import Dataset DataLoaderfrom sklearnpreprocessing import StandardScalerfrom bayes_opt import BayesianOp
导入相关库
import pandas as pd import numpy as np import torch import torch.nn as nn from torch.utils.data import Dataset, DataLoader from sklearn.preprocessing import StandardScaler from bayes_opt import BayesianOptimization
定义数据集类
class GeneDataset(Dataset): def init(self, data): self.data = data self.X = torch.from_numpy(data.iloc[:, 1:].values).float() # 取所有行的1列后面的数据作为X self.y = torch.from_numpy(data.iloc[:, 0].values).long() # 取所有行的0列作为y
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.X[index], self.y[index]
定义第一个模型类
class Model1(nn.Module): def init(self, input_size, hidden_size, output_size, dropout): super().init() self.fc1 = nn.Linear(input_size, hidden_size) # 全连接层1 self.fc2 = nn.Linear(hidden_size, hidden_size) # 全连接层2 self.fc3 = nn.Linear(hidden_size, hidden_size) # 全连接层3 self.fc4 = nn.Linear(hidden_size, output_size) # 全连接层4 self.dropout = nn.Dropout(dropout) # Dropout层
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.dropout(x) # 加入Dropout
x = torch.relu(self.fc2(x))
x = self.dropout(x) # 加入Dropout
x = torch.relu(self.fc3(x))
x = self.dropout(x) # 加入Dropout
x = self.fc4(x)
return x
定义第二个模型类
class Model2(nn.Module): def init(self, input_size, hidden_size, output_size, dropout): super().init() self.fc1 = nn.Linear(input_size, hidden_size) # 全连接层1 self.fc2 = nn.Linear(hidden_size, output_size) # 全连接层2 self.dropout = nn.Dropout(dropout) # Dropout层
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.dropout(x) # 加入Dropout
x = self.fc2(x)
return x.sigmoid()
定义训练函数
def train(model, optimizer, criterion, train_loader, device): model.train() # 切换到训练模式 total_loss = 0 total_correct = 0 for X, y in train_loader: X, y = X.to(device), y.to(device) optimizer.zero_grad() output = model(X) loss = criterion(output, y) loss.backward() optimizer.step() total_loss += loss.item() * X.size(0) total_correct += (output.argmax(1) == y).sum().item() return total_loss / len(train_loader.dataset), total_correct / len(train_loader.dataset)
定义主函数
def main(): # 读取数据 df = pd.read_excel('C:\Users\lenovo\Desktop\HIV\GSE6740GSE50011基因降低\output_data.xlsx', index_col=0)
# 标准化数据
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.iloc[:, 1:])
df.iloc[:, 1:] = scaled_data
# 定义超参数范围
pbounds = {'input_size': (10, len(df.columns)-1),
'hidden_size': (10, 200),
'output_size': (2, 5),
'dropout': (0, 0.5)}
# 定义目标函数
def target_function(input_size, hidden_size, output_size, dropout):
# 定义超参数
learning_rate = 0.01
batch_size = 32
num_epochs = 100
# 划分数据集
dataset = GeneDataset(df)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# 定义设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 定义模型
model1 = Model1(input_size=int(input_size), hidden_size=int(hidden_size), output_size=int(output_size), dropout=dropout).to(device)
model2 = Model2(input_size=int(output_size), hidden_size=50, output_size=1, dropout=dropout).to(device)
# 定义优化器和损失函数
optimizer1 = torch.optim.Adam(model1.parameters(), lr=learning_rate)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=learning_rate)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.BCEWithLogitsLoss()
# 训练第一个模型
for epoch in range(num_epochs):
loss1, acc1 = train(model1, optimizer1, criterion1, train_loader, device)
print('Epoch [{}/{}], Loss1: {:.4f}, Accuracy1: {:.2f}%'.format(epoch+1, num_epochs, loss1, acc1*100))
# 得到第一个模型的输出
output1 = torch.softmax(model1(dataset.X.to(device)), dim=1)
# 训练第二个模型
dataset2 = torch.utils.data.TensorDataset(output1, dataset.y.float().unsqueeze(1))
train_loader2 = DataLoader(dataset2, batch_size=batch_size, shuffle=True)
for epoch in range(num_epochs):
loss2, acc2 = train(model2, optimizer2, criterion2, train_loader2, device)
print('Epoch [{}/{}], Loss2: {:.4f}, Accuracy2: {:.2f}%'.format(epoch+1, num_epochs, loss2, acc2*100))
# 返回准确率
return acc2
# 进行贝叶斯优化
optimizer = BayesianOptimization(f=target_function, pbounds=pbounds)
optimizer.maximize(init_points=5, n_iter=20)
if name == 'main': main()
原文地址: https://www.cveoy.top/t/topic/bDJf 著作权归作者所有。请勿转载和采集!