Python实现随机森林分类及可视化

import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# 读取数据
data = pd.read_excel('处理后的数据.xlsx')
X = data[['特征1', '特征2', '特征3', '特征4', '特征5', '特征6', '特征7', '特征8', '特征9']]
y = data['类别']

# 随机森林分类器
class rfc:
    '''
    随机森林分类器
    '''
    
    def __init__(self, n_estimators = 100, random_state = 0):
        # 随机森林的大小
        self.n_estimators = n_estimators
        # 随机森林的随机种子
        self.random_state = random_state
        
    def fit(self, X, y):
        '''
        随机森林分类器拟合
        '''
        self.y_classes = np.unique(y)
        # 决策树数组
        dts = []
        n = X.shape[0]
        rs = np.random.RandomState(self.random_state)
        for i in range(self.n_estimators):
            # 创建决策树分类器
            dt = DecisionTreeClassifier(random_state=rs.randint(np.iinfo(np.int32).max), max_features = 'auto')
            # 根据随机生成的权重，拟合数据集
            dt.fit(X, y, sample_weight=np.bincount(rs.randint(0, n, n), minlength = n))
            dts.append(dt)
        self.trees = dts
            
    def predict(self, X):
        '''
        随机森林分类器预测
        '''
        # 预测结果数组
        probas = np.zeros((X.shape[0], len(self.y_classes)))
        for i in range(self.n_estimators):
            # 决策树分类器
            dt = self.trees[i]
            # 依次预测结果可能性
            probas += dt.predict_proba(X)
        # 预测结果可能性取平均
        probas /= self.n_estimators
        # 返回预测结果
        return self.y_classes.take(np.argmax(probas, axis = 1), axis = 0)

# 创建随机森林分类器实例
rf = rfc()
# 拟合数据集
rf.fit(X, y)

# 可视化
%matplotlib notebook
plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

# 获取数据范围
x_min, x_max = X.iloc[:, 0].min() - .5, X.iloc[:, 0].max() + .5
y_min, y_max = X.iloc[:, 1].min() - .5, X.iloc[:, 1].max() + .5

# 生成网格点坐标
xx, yy = np.meshgrid(np.arange(x_min, x_max, .05), np.arange(y_min, y_max, .05))

# 对网格点进行预测
Z = rf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# 设置颜色映射
colors = ['#ffadad', '#8ecae6']
cmap = ListedColormap(colors)

# 绘制等高线图
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.3)

# 绘制散点图
x1 = X[y==-1]['特征1']
y1 = X[y==-1]['特征2']
x2 = X[y==1]['特征1']
y2 = X[y==1]['特征2']
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

# 设置图例和标题
ax.set_title('随机森林分类', color='#264653')
ax.set_xlabel('特征1', color='#264653')
ax.set_ylabel('特征2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ['-1', '1'], loc='upper left')

plt.show()