使用朴素贝叶斯的类特定的属性值加权python实现breast-cancer数据集利用L-BFGS优化算法 λ − CLL 最小化λ − MSE 从而确定最优权重矩阵W
import numpy as np from sklearn.datasets import load_breast_cancer from scipy.optimize import minimize
加载数据集
data = load_breast_cancer() X = data.data y = data.target
将数据集分为训练集和测试集
train_X = X[:400] train_y = y[:400] test_X = X[400:] test_y = y[400:]
定义类别特定的属性值加权朴素贝叶斯类
class NaiveBayes: def init(self, alpha=1.0): self.alpha = alpha
def fit(self, X, y):
self.classes = np.unique(y)
self.n_classes = len(self.classes)
self.n_features = X.shape[1]
self.mean = np.zeros((self.n_classes, self.n_features))
self.var = np.zeros((self.n_classes, self.n_features))
self.prior = np.zeros(self.n_classes)
for i, c in enumerate(self.classes):
X_c = X[y == c]
self.mean[i] = X_c.mean(axis=0)
self.var[i] = X_c.var(axis=0)
self.prior[i] = X_c.shape[0] / X.shape[0]
def predict(self, X):
joint_prob = np.zeros((X.shape[0], self.n_classes))
for i, c in enumerate(self.classes):
joint_prob[:, i] = np.log(self.prior[i]) + \
np.sum(np.log(self.pdf(X, self.mean[i], self.var[i], self.alpha)), axis=1)
return self.classes[np.argmax(joint_prob, axis=1)]
def pdf(self, X, mean, var, alpha):
eps = 1e-6
return np.exp(-(X - mean)**2 / (2 * (var + eps))) / np.sqrt(2 * np.pi * (var + eps)) * alpha
def get_params(self):
return np.hstack((self.mean.ravel(), self.var.ravel(), self.prior.ravel()))
def set_params(self, params):
mean_start = 0
mean_end = self.n_classes * self.n_features
var_start = mean_end
var_end = mean_end + self.n_classes * self.n_features
prior_start = var_end
prior_end = prior_start + self.n_classes
self.mean = np.reshape(params[mean_start:mean_end], (self.n_classes, self.n_features))
self.var = np.reshape(params[var_start:var_end], (self.n_classes, self.n_features))
self.prior = np.reshape(params[prior_start:prior_end], self.n_classes)
定义损失函数
def loss_func(weights, model, X, y): model.set_params(weights) y_pred = model.predict(X) cll = -np.sum(np.log(model.prior[y]) + np.sum(np.log(model.pdf(X, model.mean[y], model.var[y], model.alpha)), axis=1)) mse = ((y - y_pred)**2).mean() return model.alpha * cll + (1 - model.alpha) * mse
初始化模型
nb = NaiveBayes()
最小化损失函数,得到最优权重矩阵
res = minimize(loss_func, nb.get_params(), args=(nb, train_X, train_y), method='L-BFGS-B') nb.set_params(res.x)
在测试集上进行预测
y_pred = nb.predict(test_X) accuracy = (y_pred == test_y).mean() print('Accuracy:', accuracy
原文地址: https://www.cveoy.top/t/topic/e0Bi 著作权归作者所有。请勿转载和采集!