基于SVM和KNN的DAS数据分类Python实现
基于SVM和KNN的DAS数据分类Python实现
本文提供两种机器学习方法,SVM和KNN,对DAS数据进行分类,并提供完整的Python代码实现,方便进行实验和学习。
1. SVM分类模型python#coding = UTF-8import sysclass Logger(object): def init(self, filename='default.log', stream=sys.stdout): self.terminal = stream self.log = open(filename, 'w')
def write(self, message): self.terminal.write(message) self.log.write(message)
def flush(self): passsys.stdout = Logger('svm_result.log', sys.stdout)import datetimefrom sklearn import svm, preprocessingfrom get_das_data import get_das_datafrom sklearn.metrics import confusion_matriximport numpy as npimport matplotlib.pyplot as pltimport matplotlib.font_manager as fmimport pandas as pdimport seaborn as snsrootpath = 'das_data'train_rootpath = rootpath+'/train'train_labelpath = rootpath+'/train/label.txt'test_rootpath = rootpath+'/test'test_labelpath = rootpath+'/test/label.txt'start_train = datetime.datetime.now()X_train, y_train = get_das_data(train_rootpath, train_labelpath)X_test, y_test = get_das_data(test_rootpath, test_labelpath)
pre_y_test = y_test[:, np.newaxis]
minMaxScaler = preprocessing.MinMaxScaler()trainingData = minMaxScaler.fit_transform(X_train)testData = minMaxScaler.fit_transform(X_test)
feature_data = np.concatenate((testData, pre_y_test), axis=1)np.savetxt('knn_feature_data.csv', feature_data, delimiter=',')
clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)clf.fit(trainingData, y_train)end_train = datetime.datetime.now()
train_result = clf.predict(trainingData)
start_test = datetime.datetime.now()test_result = clf.predict(testData)end_test = datetime.datetime.now()
train_matrix = confusion_matrix(y_train, train_result)test_matrix = confusion_matrix(y_test, test_result)print('train_matrix: ', train_matrix)print('test_matrix: ', test_matrix)print('train time is ', end_train - start_train)print('test time is ', end_test - start_test)C = test_matrixfig = plt.figure()ax = fig.add_subplot(111)df = pd.DataFrame(C)f1 = fm.FontProperties('Times New Roman', size=15)sns.heatmap(df, fmt='g', annot=True, annot_kws={'size': 10}, xticklabels=['1', '2', '3', '4', '5', '6'], yticklabels=['1', '2', '3', '4', '5', '6'], cmap='Blues')ax.set_xlabel('Predicted label', fontproperties=f1) # x轴ax.set_ylabel('True label', fontproperties=f1) # y轴plt.savefig('./knn_confusion_matrix.jpg')plt.show()Acc = (C[0][0] + C[1][1] + C[2][2] + C[3][3] + C[4][4] + C[5][5]) / sum(C[0] + C[1] + C[2] + C[3] + C[4] + C[5])print('acc: %.3f' % Acc)lie_he = sum(C, 1) - 1for i in range(1, 7): Precision = C[i - 1][i - 1] / lie_he[i - 1] NAR = (sum(C[i - 1]) - C[i - 1][i - 1]) / sum(C[i - 1]) F1_score = 2 * C[i - 1][i - 1] / (lie_he[i - 1] + sum(C[i - 1])) print('precision_%d: %.3f' % (i, Precision)) print('NAR_%d: %.3f' % (i, NAR)) print('F1_score_%d: %.3f' % (i, F1_score))
2. KNN分类模型pythonimport sysclass Logger(object): def init(self, filename='default.log', stream=sys.stdout): self.terminal = stream self.log = open(filename, 'w')
def write(self, message): self.terminal.write(message) self.log.write(message)
def flush(self): passsys.stdout = Logger('knn_result.log', sys.stdout)import datetimefrom sklearn import neighbors, preprocessingfrom get_das_data import get_das_datafrom sklearn.metrics import confusion_matriximport numpy as npimport matplotlib.pyplot as pltimport matplotlib.font_manager as fmimport pandas as pdimport seaborn as snsrootpath = 'das_data' # 修改rootpathtrain_rootpath = rootpath+'/train'train_labelpath = rootpath+'/train/label.txt'test_rootpath = rootpath+'/test'test_labelpath = rootpath+'/test/label.txt'start_train = datetime.datetime.now()X_train, y_train = get_das_data(train_rootpath, train_labelpath)X_test, y_test = get_das_data(test_rootpath, test_labelpath)
pre_y_test = y_test[:, np.newaxis]
minMaxScaler = preprocessing.MinMaxScaler()trainingData = minMaxScaler.fit_transform(X_train)testData = minMaxScaler.fit_transform(X_test)
feature_data = np.concatenate((testData, pre_y_test), axis=1)np.savetxt('5km_10km_knn_feature_data.csv', feature_data, delimiter=',')
clf = neighbors.KNeighborsClassifier(n_neighbors=3)clf.fit(trainingData, y_train)end_train = datetime.datetime.now()
train_result = clf.predict(trainingData)
start_test = datetime.datetime.now()test_result = clf.predict(testData)end_test = datetime.datetime.now()
train_matrix = confusion_matrix(y_train, train_result)test_matrix = confusion_matrix(y_test, test_result)print('train_matrix: ', train_matrix)print('test_matrix: ', test_matrix)print('train time is ', end_train - start_train)print('test time is ', end_test - start_test)C = test_matrixfig = plt.figure()ax = fig.add_subplot(111)df = pd.DataFrame(C)f1 = fm.FontProperties('Times New Roman', size=15)sns.heatmap(df, fmt='g', annot=True, annot_kws={'size': 10}, xticklabels=['1', '2', '3', '4', '5', '6'], yticklabels=['1', '2', '3', '4', '5', '6'], cmap='Blues')ax.set_xlabel('Predicted label', fontproperties=f1) # x轴ax.set_ylabel('True label', fontproperties=f1) # y轴plt.savefig('./5km_10km_knn_confusion_matrix.jpg')plt.show()Acc = (C[0][0] + C[1][1] + C[2][2] + C[3][3] + C[4][4] + C[5][5]) / sum(C[0] + C[1] + C[2] + C[3] + C[4] + C[5])print('acc: %.3f' % Acc)lie_he = sum(C, 1) - 1for i in range(1, 7): Precision = C[i - 1][i - 1] / lie_he[i - 1] NAR = (sum(C[i - 1]) - C[i - 1][i - 1]) / sum(C[i - 1]) F1_score = 2 * C[i - 1][i - 1] / (lie_he[i - 1] + sum(C[i - 1])) print('precision_%d: %.3f' % (i, Precision)) print('NAR_%d: %.3f' % (i, NAR)) print('F1_score_%d: %.3f' % (i, F1_score))
问题解决: FileNotFoundError
根据代码分析,KNN模型报错'FileNotFoundError'的原因是,在第一段代码中,'rootpath'变量被定义为'das_data',而在第二段代码中,'rootpath'变量被定义为'./das_data'。这两个变量的定义不一致导致了路径错误。请将第二段代码中的'rootpath'变量修改为'das_data',然后重新运行代码。
原文地址: http://www.cveoy.top/t/topic/brNs 著作权归作者所有。请勿转载和采集!