Python实现随机森林分类及可视化
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# 读取数据
data = pd.read_excel('处理后的数据.xlsx')
X = data[['特征1', '特征2', '特征3', '特征4', '特征5', '特征6', '特征7', '特征8', '特征9']]
y = data['类别']
# 随机森林分类器
class rfc:
'''
随机森林分类器
'''
def __init__(self, n_estimators = 100, random_state = 0):
# 随机森林的大小
self.n_estimators = n_estimators
# 随机森林的随机种子
self.random_state = random_state
def fit(self, X, y):
'''
随机森林分类器拟合
'''
self.y_classes = np.unique(y)
# 决策树数组
dts = []
n = X.shape[0]
rs = np.random.RandomState(self.random_state)
for i in range(self.n_estimators):
# 创建决策树分类器
dt = DecisionTreeClassifier(random_state=rs.randint(np.iinfo(np.int32).max), max_features = 'auto')
# 根据随机生成的权重,拟合数据集
dt.fit(X, y, sample_weight=np.bincount(rs.randint(0, n, n), minlength = n))
dts.append(dt)
self.trees = dts
def predict(self, X):
'''
随机森林分类器预测
'''
# 预测结果数组
probas = np.zeros((X.shape[0], len(self.y_classes)))
for i in range(self.n_estimators):
# 决策树分类器
dt = self.trees[i]
# 依次预测结果可能性
probas += dt.predict_proba(X)
# 预测结果可能性取平均
probas /= self.n_estimators
# 返回预测结果
return self.y_classes.take(np.argmax(probas, axis = 1), axis = 0)
# 创建随机森林分类器实例
rf = rfc()
# 拟合数据集
rf.fit(X, y)
# 可视化
%matplotlib notebook
plt.rcParams['font.sans-serif'] = ['PingFang HK'] # 选择一个支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')
# 获取数据范围
x_min, x_max = X.iloc[:, 0].min() - .5, X.iloc[:, 0].max() + .5
y_min, y_max = X.iloc[:, 1].min() - .5, X.iloc[:, 1].max() + .5
# 生成网格点坐标
xx, yy = np.meshgrid(np.arange(x_min, x_max, .05), np.arange(y_min, y_max, .05))
# 对网格点进行预测
Z = rf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# 设置颜色映射
colors = ['#ffadad', '#8ecae6']
cmap = ListedColormap(colors)
# 绘制等高线图
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.3)
# 绘制散点图
x1 = X[y==-1]['特征1']
y1 = X[y==-1]['特征2']
x2 = X[y==1]['特征1']
y2 = X[y==1]['特征2']
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)
# 设置图例和标题
ax.set_title('随机森林分类', color='#264653')
ax.set_xlabel('特征1', color='#264653')
ax.set_ylabel('特征2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ['-1', '1'], loc='upper left')
plt.show()
原文地址: https://www.cveoy.top/t/topic/f1fY 著作权归作者所有。请勿转载和采集!