import pandas as pd from sklearn.preprocessing import StandardScaler from bayes_opt import BayesianOptimization import torch from torch import nn, optim from torch.utils.data import Dataset, DataLoader

读取数据

data = pd.read_excel(r'C:\Users\lenovo\Desktop\HIV\DNN神经网络测试\data1.xlsx', header=0) X = data.iloc[:, 1:].values y = data.iloc[:, 0].values

数据标准化

sc = StandardScaler() X = sc.fit_transform(X)

定义数据集类

class HIVDataset(Dataset): def init(self, X, y): self.X = torch.tensor(X, dtype=torch.float32) self.y = torch.tensor(y, dtype=torch.float32)

def __getitem__(self, index):
    return self.X[index], self.y[index]

def __len__(self):
    return len(self.X)

定义模型

class DNN(nn.Module): def init(self, input_dim, output_dim, hidden_dim1, hidden_dim2, hidden_dim3): super(DNN, self).init() self.fc1 = nn.Linear(input_dim, hidden_dim1) self.fc2 = nn.Linear(hidden_dim1, hidden_dim2) self.fc3 = nn.Linear(hidden_dim2, hidden_dim3) self.fc4 = nn.Linear(hidden_dim3, output_dim) self.relu = nn.ReLU() self.softmax = nn.Softmax(dim=1) self.attention = nn.Sequential( nn.Linear(hidden_dim3, 1), nn.Softmax(dim=0) )

def forward(self, x):
    x = self.relu(self.fc1(x))
    x = self.relu(self.fc2(x))
    x = self.relu(self.fc3(x))
    attn_weight = self.attention(x)
    x = x * attn_weight
    x = self.fc4(x)
    x = self.softmax(x)
    return x

定义训练函数

def train(hidden_dim1, hidden_dim2, hidden_dim3, lr, weight_decay): # 定义模型 model = DNN(input_dim=X.shape[1], output_dim=2, hidden_dim1=int(hidden_dim1), hidden_dim2=int(hidden_dim2), hidden_dim3=int(hidden_dim3)) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # 定义数据集和数据加载器 dataset = HIVDataset(X, y) loader = DataLoader(dataset, batch_size=32, shuffle=True) # 开始训练 train_loss = [] train_acc = [] for epoch in range(50): running_loss = 0.0 correct = 0 total = 0 for batch_x, batch_y in loader: optimizer.zero_grad() outputs = model(batch_x) loss = criterion(outputs, batch_y.long()) loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) total += batch_y.size(0) correct += (predicted == batch_y).sum().item() running_loss += loss.item() * batch_y.size(0) epoch_loss = running_loss / total epoch_acc = correct / total train_loss.append(epoch_loss) train_acc.append(epoch_acc) print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch + 1, 50, epoch_loss, epoch_acc)) return epoch_acc

使用贝叶斯优化寻找最优参数

pbounds = {'hidden_dim1': (10, 100), 'hidden_dim2': (10, 100), 'hidden_dim3': (10, 100), 'lr': (0.0001, 0.1), 'weight_decay': (0, 0.1)} optimizer = BayesianOptimization(f=train, pbounds=pbounds) optimizer.maximize(init_points=5, n_iter=10)

输出最优参数和最优结果

print('Maximum accuracy value: {:.4f}'.format(optimizer.max['target'])) print('Best parameters: ', optimizer.max['params'])

获取最优参数

best_params = optimizer.max['params']

训练模型

model = DNN(input_dim=X.shape[1], output_dim=2, hidden_dim1=int(best_params['hidden_dim1']), hidden_dim2=int(best_params['hidden_dim2']), hidden_dim3=int(best_params['hidden_dim3'])) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=best_params['lr'], weight_decay=best_params['weight_decay'])

dataset = HIVDataset(X, y) loader = DataLoader(dataset, batch_size=32, shuffle=True)

train_loss = []

for epoch in range(50): running_loss = 0.0 correct = 0 total = 0 for batch_x, batch_y in loader: optimizer.zero_grad() outputs = model(batch_x) loss = criterion(outputs, batch_y.long()) loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) total += batch_y.size(0) correct += (predicted == batch_y).sum().item() running_loss += loss.item() * batch_y.size(0) epoch_loss = running_loss / total epoch_acc = correct / total train_loss.append(epoch_loss) train_acc.append(epoch_acc) print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch + 1, 50, epoch_loss, epoch_acc))

绘制准确率变化图

plt.figure(figsize=(8, 6)) plt.plot(train_acc) plt.title('Accuracy of the DNN model') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.show()

绘制损失变化图

plt.figure(figsize=(8, 6)) plt.plot(train_loss) plt.title('Loss of the DNN model') plt.xlabel('Epoch') plt.ylabel('Loss') plt.show()

计算并绘制ROC曲线

y_prob = model(torch.tensor(X, dtype=torch.float32)).detach().numpy()[:, 1] fpr, tpr, thresholds = roc_curve(y, y_prob) roc_auc = roc_auc_score(y, y_prob) plt.figure(figsize=(8, 6)) plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc)) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic of the DNN model') plt.legend(loc="lower right") plt.show()

基于贝叶斯优化的DNN模型用于HIV预测：性能评估与可视化