使用 Python 构建 DNN 神经网络,基于基因表达数据预测患者疾病
导入必要的库
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import roc_curve, auc from sklearn.manifold import TSNE import matplotlib.pyplot as plt import seaborn as sns import tensorflow as tf
读取 Excel 数据
data = pd.read_excel('data.xlsx')
提取患病标签和基因表达数据
X = data.iloc[:, 1:].values y = data.iloc[:, 0].values
将标签转换为二元变量
y = np.array([1 if i == 'Yes' else 0 for i in y])
划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
数据标准化
scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test)
定义 DNN 神经网络
model = tf.keras.Sequential([ tf.keras.layers.Dense(64, activation='relu', input_dim=X_train.shape[1]), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ])
定义优化器和损失函数
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) loss = tf.keras.losses.BinaryCrossentropy()
编译模型
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
训练模型
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))
绘制损失函数曲线
plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper right') plt.show()
预测测试集标签
y_pred = model.predict(X_test)
绘制 ROC 曲线
fpr, tpr, threshold = roc_curve(y_test, y_pred) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic') plt.legend(loc="lower right") plt.show()
计算特征重要性
importance = model.layers[0].get_weights()[0] gene_names = data.columns[1:] gene_importance = pd.DataFrame({'Gene': gene_names, 'Importance': importance.reshape(-1)}) gene_importance = gene_importance.sort_values(by='Importance', ascending=False)
绘制特征重要性图
plt.figure(figsize=(12, 8)) sns.barplot(x='Importance', y='Gene', data=gene_importance) plt.title('Feature Importance') plt.xlabel('Importance') plt.ylabel('Gene') plt.show()
绘制热图
sns.heatmap(data.corr(method='pearson'), cmap='coolwarm') plt.title('Correlation Heatmap') plt.show()
使用 t-SNE 将高维数据转换为二维空间
tsne = TSNE(n_components=2, random_state=42) X_tsne = tsne.fit_transform(X)
绘制 t-SNE 图
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y) plt.title('t-SNE') plt.show()
原文地址: https://www.cveoy.top/t/topic/l6Lr 著作权归作者所有。请勿转载和采集!