实现聚类性能指标DBI、DI、CHI和SI的计算只能调用numpy scipystats包数据特征为三维的
以下是使用numpy和scipy.stats计算DBI、DI、CHI和SI的示例代码:
import numpy as np
from scipy.stats import f_oneway
from scipy.spatial.distance import cdist
def dbi(data, labels):
# 计算聚类中心
centers = []
for label in np.unique(labels):
centers.append(np.mean(data[labels == label], axis=0))
# 计算簇内平均距离
avg_distances = []
for i, center in enumerate(centers):
distances = cdist(data[labels == i], np.array([center]))
avg_distances.append(np.mean(distances))
# 计算簇间距离
max_distances = []
for i, center in enumerate(centers):
distances = cdist(data[labels != i], np.array([center]))
max_distances.append(np.max(distances))
# 计算DBI指标
dbi = 0
for i in range(len(centers)):
dbi += (avg_distances[i] + avg_distances[labels != i]) / max_distances[i]
dbi /= len(centers)
return dbi
def di(data, labels):
# 计算类内平均距离和类间最小距离
intra_dists = []
inter_dists = []
for label in np.unique(labels):
intra_dist = np.mean(cdist(data[labels == label], data[labels == label]))
intra_dists.append(intra_dist)
for other_label in np.unique(labels):
if other_label != label:
inter_dist = np.mean(cdist(data[labels == label], data[labels == other_label]))
inter_dists.append(inter_dist)
# 计算DI指标
di = np.min(inter_dists) / np.max(intra_dists)
return di
def chi(data, labels):
# 计算聚类中心
centers = []
for label in np.unique(labels):
centers.append(np.mean(data[labels == label], axis=0))
# 计算类内平均距离
intra_dists = []
for i, center in enumerate(centers):
distances = cdist(data[labels == i], np.array([center]))
intra_dists.append(np.mean(distances))
# 计算类间平均距离
inter_dists = []
for i, center in enumerate(centers):
for j, other_center in enumerate(centers):
if i != j:
distance = np.linalg.norm(center - other_center)
inter_dists.append(distance)
# 计算CHI指标
chi = np.sum(intra_dists) / np.sum(inter_dists)
return chi
def si(data, labels):
# 计算类内平均距离和类间最小距离
intra_dists = []
inter_dists = []
for label in np.unique(labels):
intra_dist = np.mean(cdist(data[labels == label], data[labels == label]))
intra_dists.append(intra_dist)
for other_label in np.unique(labels):
if other_label != label:
inter_dist = np.mean(cdist(data[labels == label], data[labels == other_label]))
inter_dists.append(inter_dist)
# 计算SI指标
f_statistic, _ = f_oneway(intra_dists, inter_dists)
si = 1 - 1 / (1 + f_statistic)
return si
使用示例:
data = np.random.rand(100, 3)
labels = np.random.randint(0, 3, size=100)
dbi_score = dbi(data, labels)
di_score = di(data, labels)
chi_score = chi(data, labels)
si_score = si(data, labels)
print("DBI:", dbi_score)
print("DI:", di_score)
print("CHI:", chi_score)
print("SI:", si_score)
``
原文地址: https://www.cveoy.top/t/topic/hljj 著作权归作者所有。请勿转载和采集!