导入必要的库

import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import roc_curve, auc from sklearn.manifold import TSNE import matplotlib.pyplot as plt import seaborn as sns import tensorflow as tf

读取 Excel 数据

data = pd.read_excel('data.xlsx')

提取患病标签和基因表达数据

X = data.iloc[:, 1:].values y = data.iloc[:, 0].values

将标签转换为二元变量

y = np.array([1 if i == 'Yes' else 0 for i in y])

划分训练集和测试集

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

数据标准化

scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test)

定义 DNN 神经网络

model = tf.keras.Sequential([ tf.keras.layers.Dense(64, activation='relu', input_dim=X_train.shape[1]), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ])

定义优化器和损失函数

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) loss = tf.keras.losses.BinaryCrossentropy()

编译模型

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

训练模型

history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))

绘制损失函数曲线

plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper right') plt.show()

预测测试集标签

y_pred = model.predict(X_test)

绘制 ROC 曲线

fpr, tpr, threshold = roc_curve(y_test, y_pred) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic') plt.legend(loc="lower right") plt.show()

计算特征重要性

importance = model.layers[0].get_weights()[0] gene_names = data.columns[1:] gene_importance = pd.DataFrame({'Gene': gene_names, 'Importance': importance.reshape(-1)}) gene_importance = gene_importance.sort_values(by='Importance', ascending=False)

绘制特征重要性图

plt.figure(figsize=(12, 8)) sns.barplot(x='Importance', y='Gene', data=gene_importance) plt.title('Feature Importance') plt.xlabel('Importance') plt.ylabel('Gene') plt.show()

绘制热图

sns.heatmap(data.corr(method='pearson'), cmap='coolwarm') plt.title('Correlation Heatmap') plt.show()

使用 t-SNE 将高维数据转换为二维空间

tsne = TSNE(n_components=2, random_state=42) X_tsne = tsne.fit_transform(X)

绘制 t-SNE 图

plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y) plt.title('t-SNE') plt.show()

使用 Python 构建 DNN 神经网络,基于基因表达数据预测患者疾病

原文地址: https://www.cveoy.top/t/topic/l6Lr 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录