以下是使用 NumPy 和 SciPy.stats 计算 DBI、DI、CHI 和 SI 的示例代码:

import numpy as np
from scipy.stats import f_oneway
from scipy.spatial.distance import cdist

def dbi(data, labels):
    # 计算聚类中心
    centers = []
    for label in np.unique(labels):
        centers.append(np.mean(data[labels == label], axis=0))
    
    # 计算簇内平均距离
    avg_distances = []
    for i, center in enumerate(centers):
        distances = cdist(data[labels == i], np.array([center]))
        avg_distances.append(np.mean(distances))
    
    # 计算簇间距离
    max_distances = []
    for i, center in enumerate(centers):
        distances = cdist(data[labels != i], np.array([center]))
        max_distances.append(np.max(distances))
    
    # 计算DBI指标
    dbi = 0
    for i in range(len(centers)):
        dbi += (avg_distances[i] + avg_distances[labels != i]) / max_distances[i]
    dbi /= len(centers)
    return dbi

def di(data, labels):
    # 计算类内平均距离和类间最小距离
    intra_dists = []
    inter_dists = []
    for label in np.unique(labels):
        intra_dist = np.mean(cdist(data[labels == label], data[labels == label]))
        intra_dists.append(intra_dist)
        for other_label in np.unique(labels):
            if other_label != label:
                inter_dist = np.mean(cdist(data[labels == label], data[labels == other_label]))
                inter_dists.append(inter_dist)
    
    # 计算DI指标
    di = np.min(inter_dists) / np.max(intra_dists)
    return di

def chi(data, labels):
    # 计算聚类中心
    centers = []
    for label in np.unique(labels):
        centers.append(np.mean(data[labels == label], axis=0))
    
    # 计算类内平均距离
    intra_dists = []
    for i, center in enumerate(centers):
        distances = cdist(data[labels == i], np.array([center]))
        intra_dists.append(np.mean(distances))
    
    # 计算类间平均距离
    inter_dists = []
    for i, center in enumerate(centers):
        for j, other_center in enumerate(centers):
            if i != j:
                distance = np.linalg.norm(center - other_center)
                inter_dists.append(distance)
    
    # 计算CHI指标
    chi = np.sum(intra_dists) / np.sum(inter_dists)
    return chi

def si(data, labels):
    # 计算类内平均距离和类间最小距离
    intra_dists = []
    inter_dists = []
    for label in np.unique(labels):
        intra_dist = np.mean(cdist(data[labels == label], data[labels == label]))
        intra_dists.append(intra_dist)
        for other_label in np.unique(labels):
            if other_label != label:
                inter_dist = np.mean(cdist(data[labels == label], data[labels == other_label]))
                inter_dists.append(inter_dist)
    
    # 计算SI指标
    f_statistic, _ = f_oneway(intra_dists, inter_dists)
    si = 1 - 1 / (1 + f_statistic)
    return si

使用示例:

data = np.random.rand(100, 3)
labels = np.random.randint(0, 3, size=100)

dbi_score = dbi(data, labels)
di_score = di(data, labels)
chi_score = chi(data, labels)
si_score = si(data, labels)

print('DBI:', dbi_score)
print('DI:', di_score)
print('CHI:', chi_score)
print('SI:', si_score)

原文地址: https://www.cveoy.top/t/topic/oN52 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录