import numpy as npfrom sklearndatasets import load_irisfrom sklearnmodel_selection import train_test_splitnprandomseed20def predictx_test x_train y_train k = 3 param x_test 测试集特征值 param x_tr
import numpy as np from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split np.random.seed(20)
def predict(x_test, x_train, y_train, k = 3): """ :param x_test: 测试集特征值 :param x_train: 训练集特征值 :param y_train: 训练集目标值 :param k: k邻居数,请根据具体情况调节k值的大小 :return: 返回预测结果,类型为numpy数组 """ # 请根据注释在下面补充你的代码实现knn算法的过程 # ********** Begin ********** #
result = []
for test_sample in x_test:
dist = []
for train_sample, train_label in zip(x_train, y_train):
# 计算欧几里得距离
d = np.sqrt(np.sum((test_sample - train_sample)**2))
dist.append((d, train_label))
# 根据距离排序,并截取前k个元素
dist_sorted = sorted(dist, key=lambda x: x[0])[:k]
# 计算每个类别出现的次数
counts = np.bincount([d[1] for d in dist_sorted], weights=[1/d[0] for d in dist_sorted])
# 返回出现次数最多的类别
result.append(np.argmax(counts))
result = np.array(result)
# ********** End ********** #
return result
if name == 'main': iris = load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2) result = predict(x_test, x_train, y_train) score = (np.sum(result == y_test) / len(result)) if score >= 0.9: print("测试通过") else: print("测试失败"
原文地址: https://www.cveoy.top/t/topic/fgkb 著作权归作者所有。请勿转载和采集!