K-means 聚类算法实现宿舍分配 - Java 代码示例
该代码实现了 K-means 聚类算法,用于将学生分配到宿舍中。auto() 方法实现分配逻辑,首先从数据库中查询出空余床位的宿舍列表,然后使用 K-means 算法将这些宿舍聚为 3 类,每类中心节点即为分配给学生的宿舍。最后随机选择一个空床位最多的宿舍,将学生分配到该宿舍中。
K-means 算法的实现过程如下:
- 首先随机从数据集中选取 k 个数据点作为初始的 k 个中心节点。
- 对于每个数据点,计算它到 k 个中心节点的距离,并将该数据点归为距离最近的中心节点所在的簇。
- 对于每个簇,重新计算该簇的中心节点。
- 重复步骤 2 和 3,直到中心节点的位置变化量小于某个阈值或达到最大迭代次数。
- 返回每个数据点所属的簇。
package com.code.admin;
import com.code.entity.Dorminfo;
import com.code.entity.Stuinfo;
import com.code.mapper.DorminfoMapper;
import com.code.mapper.StuinfoMapper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.stereotype.Controller;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class KmeansAlgorithm {
private static final int T = 10; // 最大迭代次数
private static final double THRESHOLD = 0.1; // 中心节点位置变化大小的阈值
public void auto(Stuinfo stuinfo, List<Dorminfo> dorminfoList, StuinfoMapper stuinfoMapper, DorminfoMapper dorminfoMapper) {
final int CLUSTER1_NUM = 4;
final int CLUSTER2_NUM = 4;
final int CLUSTER3_NUM = 4;
ArrayList<ArrayList<Double>> dataSet = new ArrayList<ArrayList<Double>>();
// 产生簇 1
for (int i = 0; i < CLUSTER1_NUM; i++) {
ArrayList<Double> cluster1 = new ArrayList<Double>();
cluster1.add(1 + Math.random() * 2);
cluster1.add(1 + Math.random() * 2);
dataSet.add(cluster1);
}
// 产生簇 2
for (int i = 0; i < CLUSTER2_NUM; i++) {
ArrayList<Double> cluster2 = new ArrayList<Double>();
cluster2.add(Math.random());
cluster2.add(Math.random());
dataSet.add(cluster2);
}
List<Dorminfo> dorminfoListSend = new ArrayList<Dorminfo>();
System.out.println('dorminfoList = ' + dorminfoList);
for (Dorminfo dorminfo : dorminfoList) {
System.out.println('dorminfo = ' + dorminfo);
System.out.println('stuinfoMapper = ' + stuinfoMapper);
List<Stuinfo> stuinfosListTwo = stuinfoMapper.countSusheID(dorminfo.getDormid());
if (stuinfosListTwo.size() < dorminfo.getDormnum()) {
dorminfoListSend.add(dorminfo);
}
}
// 产生簇 3
for (int i = 0; i < CLUSTER3_NUM; i++) {
ArrayList<Double> cluster3 = new ArrayList<Double>();
cluster3.add(3 + Math.random());
cluster3.add(3 + Math.random());
dataSet.add(cluster3);
}
int num = new Random().nextInt(dorminfoListSend.size());
Dorminfo dorminfo = dorminfoListSend.get(num);
stuinfo.setSusheid(dorminfo.getDormid() + '');
stuinfoMapper.updateById(stuinfo);
KmeansAlgorithm d = new KmeansAlgorithm();
ArrayList<ArrayList<Double>> dd = d.getClusters(dataSet, 3);
System.out.println(dd);
}
public ArrayList<ArrayList<Double>> getClusters(ArrayList<ArrayList<Double>> dataSet, int k) {
int dataDimension = 0;
if (null != dataSet && dataSet.size() < k) {
System.out.println('data size is smaller than the number to be clustered');
} else {
dataDimension = dataSet.get(0).size();
}
// 为每条数据赋初始类别 0
for (int i = 0; i < dataSet.size(); i++) {
dataSet.get(i).add(0d);
}
// 随机从数据集中选注 k 个点作为初始的 k 个中心节点
ArrayList<ArrayList<Double>> centerData = new ArrayList<ArrayList<Double>>();
for (int i = 0; i < k; i++) {
centerData.add(dataSet.get(i));
}
for (int i = 0; i < T; i++) {
for (int j = 0; j < dataSet.size(); j++) {
double classify = 0; // classify 取值为 0 到 k-1 代表 k 个类别
double minDistance = computeDistance(dataSet.get(j), centerData.get(0));
for (int l = 1; l < centerData.size(); l++) {
if (computeDistance(dataSet.get(j), centerData.get(l)) < minDistance) {
minDistance = computeDistance(dataSet.get(j), centerData.get(l));
classify = l;
}
}
dataSet.get(j).set(dataDimension, classify);
}
// 每次分类后计算中心节点的位置变化情况
double variance = computeChange(dataSet, centerData, k, dataDimension);
if (variance < THRESHOLD) {
break;
}
// 每次分类后重新计算中心节点
centerData = computeCenterData(dataSet, k, dataDimension);
}
return dataSet;
}
/**
* @return double
* @throws
* @Title: computeDistance
* @Description: 计算任意两个节点间的距离
*/
public double computeDistance(ArrayList<Double> d1, ArrayList<Double> d2) {
double squareSum = 0;
for (int i = 0; i < d1.size() - 1; i++) {
squareSum += (d1.get(i) - d2.get(i)) * (d1.get(i) - d2.get(i));
}
return Math.sqrt(squareSum);
}
/**
* @return ArrayList<Double>
* @throws
* @Title: computeCenterData
* @Description: 计算中心节点
*/
public ArrayList<ArrayList<Double>> computeCenterData(ArrayList<ArrayList<Double>> dataSet, int k, int dataDimension) {
ArrayList<ArrayList<Double>> res = new ArrayList<ArrayList<Double>>();
for (int i = 0; i < k; i++) {
int ClassNum = 0;
ArrayList<Double> tmp = new ArrayList<Double>();
for (int l = 0; l < dataDimension; l++) {
tmp.add(0d);
}
for (int j = 0; j < dataSet.size(); j++) {
if (dataSet.get(j).get(dataDimension) == i) {
ClassNum++;
for (int m = 0; m < dataDimension; m++) {
tmp.set(m, tmp.get(m) + dataSet.get(j).get(m));
}
}
}
for (int l = 0; l < dataDimension; l++) {
tmp.set(l, tmp.get(l) / (double) ClassNum);
}
res.add(tmp);
}
return res;
}
/**
* @return double
* @throws
* @Title: computeChange
* @Description: 计算两轮迭代中心节点位置的变化量
*/
public double computeChange(ArrayList<ArrayList<Double>> dataSet, ArrayList<ArrayList<Double>> centerData, int k, int dataDimension) {
double variance = 0;
ArrayList<ArrayList<Double>> originalCenterData = computeCenterData(dataSet, k, dataDimension);
for (int i = 0; i < centerData.size(); i++) {
variance += computeDistance(originalCenterData.get(i), centerData.get(i));
}
return variance;
}
public static void main(String[] args) {
final int CLUSTER1_NUM = 4;
final int CLUSTER2_NUM = 4;
final int CLUSTER3_NUM = 4;
ArrayList<ArrayList<Double>> dataSet = new ArrayList<ArrayList<Double>>();
// 产生簇 1
for (int i = 0; i < CLUSTER1_NUM; i++) {
ArrayList<Double> cluster1 = new ArrayList<Double>();
cluster1.add(1 + Math.random() * 2);
cluster1.add(1 + Math.random() * 2);
dataSet.add(cluster1);
}
// 产生簇 2
for (int i = 0; i < CLUSTER2_NUM; i++) {
ArrayList<Double> cluster2 = new ArrayList<Double>();
cluster2.add(Math.random());
cluster2.add(Math.random());
dataSet.add(cluster2);
}
// 产生簇 3
for (int i = 0; i < CLUSTER3_NUM; i++) {
ArrayList<Double> cluster3 = new ArrayList<Double>();
cluster3.add(3 + Math.random());
cluster3.add(3 + Math.random());
dataSet.add(cluster3);
}
KmeansAlgorithm d = new KmeansAlgorithm();
ArrayList<ArrayList<Double>> dd = d.getClusters(dataSet, 3);
System.out.println(dd);
}
}
原文地址: https://www.cveoy.top/t/topic/nW50 著作权归作者所有。请勿转载和采集!