Python 聚类性能指标计算:DBI、DI、CHI 和 SI (仅使用 NumPy 和 SciPy.Stats)
以下是使用 NumPy 和 SciPy.stats 计算 DBI、DI、CHI 和 SI 的示例代码:
import numpy as np
from scipy.stats import f_oneway
from scipy.spatial.distance import cdist
def dbi(data, labels):
# 计算聚类中心
centers = []
for label in np.unique(labels):
centers.append(np.mean(data[labels == label], axis=0))
# 计算簇内平均距离
avg_distances = []
for i, center in enumerate(centers):
distances = cdist(data[labels == i], np.array([center]))
avg_distances.append(np.mean(distances))
# 计算簇间距离
max_distances = []
for i, center in enumerate(centers):
distances = cdist(data[labels != i], np.array([center]))
max_distances.append(np.max(distances))
# 计算DBI指标
dbi = 0
for i in range(len(centers)):
dbi += (avg_distances[i] + avg_distances[labels != i]) / max_distances[i]
dbi /= len(centers)
return dbi
def di(data, labels):
# 计算类内平均距离和类间最小距离
intra_dists = []
inter_dists = []
for label in np.unique(labels):
intra_dist = np.mean(cdist(data[labels == label], data[labels == label]))
intra_dists.append(intra_dist)
for other_label in np.unique(labels):
if other_label != label:
inter_dist = np.mean(cdist(data[labels == label], data[labels == other_label]))
inter_dists.append(inter_dist)
# 计算DI指标
di = np.min(inter_dists) / np.max(intra_dists)
return di
def chi(data, labels):
# 计算聚类中心
centers = []
for label in np.unique(labels):
centers.append(np.mean(data[labels == label], axis=0))
# 计算类内平均距离
intra_dists = []
for i, center in enumerate(centers):
distances = cdist(data[labels == i], np.array([center]))
intra_dists.append(np.mean(distances))
# 计算类间平均距离
inter_dists = []
for i, center in enumerate(centers):
for j, other_center in enumerate(centers):
if i != j:
distance = np.linalg.norm(center - other_center)
inter_dists.append(distance)
# 计算CHI指标
chi = np.sum(intra_dists) / np.sum(inter_dists)
return chi
def si(data, labels):
# 计算类内平均距离和类间最小距离
intra_dists = []
inter_dists = []
for label in np.unique(labels):
intra_dist = np.mean(cdist(data[labels == label], data[labels == label]))
intra_dists.append(intra_dist)
for other_label in np.unique(labels):
if other_label != label:
inter_dist = np.mean(cdist(data[labels == label], data[labels == other_label]))
inter_dists.append(inter_dist)
# 计算SI指标
f_statistic, _ = f_oneway(intra_dists, inter_dists)
si = 1 - 1 / (1 + f_statistic)
return si
使用示例:
data = np.random.rand(100, 3)
labels = np.random.randint(0, 3, size=100)
dbi_score = dbi(data, labels)
di_score = di(data, labels)
chi_score = chi(data, labels)
si_score = si(data, labels)
print('DBI:', dbi_score)
print('DI:', di_score)
print('CHI:', chi_score)
print('SI:', si_score)
原文地址: https://www.cveoy.top/t/topic/oN52 著作权归作者所有。请勿转载和采集!