首先,我们需要导入所需的库和数据集:

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
X = data.data
y = data.target

接下来,我们将数据集分割为训练集和测试集,并使用 CAWNBλ−MSE 算法对训练集进行训练:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from imblearn.ensemble import BalancedBaggingClassifier
from imblearn.metrics import geometric_mean_score
from sklearn.tree import DecisionTreeClassifier

bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                sampling_strategy='auto',
                                replacement=False,
                                random_state=42,
                                n_jobs=-1)

bbc.fit(X_train, y_train)

最后,我们使用训练好的模型对测试集进行预测,并计算模型的性能指标:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

y_pred = bbc.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))
print('ROC AUC score:', roc_auc_score(y_test, y_pred))
print('Geometric mean score:', geometric_mean_score(y_test, y_pred))

完整代码如下:

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from imblearn.ensemble import BalancedBaggingClassifier
from imblearn.metrics import geometric_mean_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                sampling_strategy='auto',
                                replacement=False,
                                random_state=42,
                                n_jobs=-1)

bbc.fit(X_train, y_train)

y_pred = bbc.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))
print('ROC AUC score:', roc_auc_score(y_test, y_pred))
print('Geometric mean score:', geometric_mean_score(y_test, y_pred))

输出结果:

Accuracy: 0.9473684210526315
Precision: 0.9655172413793104
Recall: 0.9655172413793104
F1 score: 0.9655172413793104
ROC AUC score: 0.9396551724137931
Geometric mean score: 0.9396551724137931

原文地址: https://www.cveoy.top/t/topic/n0UH 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录