下面是一个使用朴素贝叶斯算法进行多分类问题的示例代码:

import numpy as np

class NaiveBayes:
    def __init__(self, alpha=1.0):
        self.alpha = alpha  # 学习率,用于平滑概率
    
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.num_classes = len(self.classes)
        self.num_features = X.shape[1]
        
        # 计算每个类别的先验概率
        self.priors = np.zeros(self.num_classes)
        for i, c in enumerate(self.classes):
            self.priors[i] = np.sum(y == c) / len(y)
        
        # 计算每个特征的条件概率
        self.likelihoods = np.zeros((self.num_classes, self.num_features, 256))
        for i, c in enumerate(self.classes):
            X_c = X[y == c]
            for feature in range(self.num_features):
                for value in range(256):
                    self.likelihoods[i, feature, value] = (np.sum(X_c[:, feature] == value) + self.alpha) / (len(X_c) + self.alpha * 256)
    
    def predict(self, X):
        y_pred = []
        for x in X:
            posteriors = []
            for i, c in enumerate(self.classes):
                likelihood = 1.0
                for feature, value in enumerate(x):
                    likelihood *= self.likelihoods[i, feature, value]
                posterior = self.priors[i] * likelihood
                posteriors.append(posterior)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return y_pred


# 生成随机训练集和标签
np.random.seed(0)
X_train = np.random.randint(0, 256, (100, 5))
y_train = np.random.randint(0, 3, 100)

# 生成随机验证集和标签
X_val = np.random.randint(0, 256, (30, 5))
y_val = np.random.randint(0, 3, 30)

# 生成随机测试集和标签
X_test = np.random.randint(0, 256, (20, 5))
y_test = np.random.randint(0, 3, 20)

# 创建朴素贝叶斯分类器
naive_bayes = NaiveBayes(alpha=1.0)

# 训练模型
naive_bayes.fit(X_train, y_train)

# 预测验证集
y_val_pred = naive_bayes.predict(X_val)
print("Validation Accuracy:", np.mean(y_val_pred == y_val))

# 预测测试集
y_test_pred = naive_bayes.predict(X_test)
print("Test Accuracy:", np.mean(y_test_pred == y_test))

在上面的示例代码中,首先定义了一个NaiveBayes类,其构造函数接收一个学习率参数alphafit方法用于训练模型,接收训练集X和对应的标签y,并计算每个类别的先验概率和每个特征的条件概率。predict方法用于预测新样本的类别。然后,使用随机生成的训练集、验证集和测试集进行模型训练和预测,并输出准确率


原文地址: https://www.cveoy.top/t/topic/hXUr 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录