import pandas as pdimport numpy as npimport matplotlibpyplot as pltfrom sklearnneighbors import LocalOutlierFactor# 加载数据集df = pdread_csv1csv# 可视化初始数据分布pltscatterdfx dfy c=b s=10 alpha=05pltshow# 训练LOF
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import IsolationForest
加载数据集
df = pd.read_csv('1.csv')
可视化初始数据分布
plt.scatter(df['x'], df['y'], c='b', s=10, alpha=0.5) plt.show()
训练Isolation Forest模型
clf = IsolationForest(n_estimators=100, contamination=0.1) clf.fit(df) y_pred = clf.predict(df)
可视化离群点
plt.scatter(df[y_pred==-1]['x'], df[y_pred==-1]['y'], c='r', s=30, alpha=0.7) plt.scatter(df[y_pred==1]['x'], df[y_pred==1]['y'], c='b', s=10, alpha=0.5) plt.show(
原文地址: https://www.cveoy.top/t/topic/e9yM 著作权归作者所有。请勿转载和采集!