PaddlePaddle实现鸢尾花分类:两种方法深度解析

本文将介绍如何使用PaddlePaddle框架实现两种不同的鸢尾花分类方法:

  1. 直接进行多分类: 将问题视为一个多分类任务,一次性预测所有类别。
  2. 拆分为多个二分类问题: 将每个类别的求解问题拆分成一个二分类任务,通过判断是否属于该类别来判断最终结果。

我们将提供完整的代码示例,并比较两种方法的性能,帮助你深入理解多分类问题的解决思路。

方法一:直接进行多分类

import paddle
from paddle.io import Dataset
from paddle.vision.transforms import ToTensor

# 定义鸢尾花数据集
class IrisDataset(Dataset):
    def __init__(self, mode='train'):
        super(IrisDataset, self).__init__()
        self.mode = mode
        self.data, self.labels = self.load_data()

    def load_data(self):
        # 加载鸢尾花数据集,获取特征和标签
        # ...

    def __getitem__(self, index):
        data = self.data[index]
        label = self.labels[index]
        return data, label

    def __len__(self):
        return len(self.data)

# 创建数据加载器
train_dataset = IrisDataset(mode='train')
train_loader = paddle.io.DataLoader(train_dataset, batch_size=16, shuffle=True)

# 定义模型
model = paddle.nn.Sequential(
    paddle.nn.Linear(4, 64),
    paddle.nn.ReLU(),
    paddle.nn.Linear(64, 32),
    paddle.nn.ReLU(),
    paddle.nn.Linear(32, 3)
)

# 定义损失函数和优化器
loss_fn = paddle.nn.CrossEntropyLoss()
optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=0.001)

# 训练模型
for epoch in range(10):
    for batch_data, batch_labels in train_loader:
        pred = model(batch_data)
        loss = loss_fn(pred, batch_labels)
        loss.backward()
        optimizer.step()
        optimizer.clear_grad()

# 在测试集上进行评估
test_dataset = IrisDataset(mode='test')
test_loader = paddle.io.DataLoader(test_dataset, batch_size=16, shuffle=False)

correct = 0
total = 0

model.eval()
with paddle.no_grad():
    for batch_data, batch_labels in test_loader:
        pred = model(batch_data)
        _, predicted = paddle.max(pred, axis=1)
        total += batch_labels.shape[0]
        correct += (predicted == batch_labels).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy}')

方法二:拆分为二分类问题

import paddle
from paddle.io import Dataset
from paddle.vision.transforms import ToTensor

# 定义鸢尾花数据集
class IrisDataset(Dataset):
    def __init__(self, mode='train'):
        super(IrisDataset, self).__init__()
        self.mode = mode
        self.data, self.labels = self.load_data()

    def load_data(self):
        # 加载鸢尾花数据集,获取特征和标签
        # ...

    def __getitem__(self, index):
        data = self.data[index]
        label = self.labels[index]
        return data, label

    def __len__(self):
        return len(self.data)

# 创建数据加载器
train_dataset = IrisDataset(mode='train')
train_loader = paddle.io.DataLoader(train_dataset, batch_size=16, shuffle=True)

# 定义模型
model_setosa = paddle.nn.Sequential(
    paddle.nn.Linear(4, 64),
    paddle.nn.ReLU(),
    paddle.nn.Linear(64, 32),
    paddle.nn.ReLU(),
    paddle.nn.Linear(32, 2)
)

model_versicolor = paddle.nn.Sequential(
    paddle.nn.Linear(4, 64),
    paddle.nn.ReLU(),
    paddle.nn.Linear(64, 32),
    paddle.nn.ReLU(),
    paddle.nn.Linear(32, 2)
)

model_virginica = paddle.nn.Sequential(
    paddle.nn.Linear(4, 64),
    paddle.nn.ReLU(),
    paddle.nn.Linear(64, 32),
    paddle.nn.ReLU(),
    paddle.nn.Linear(32, 2)
)

# 定义损失函数和优化器
loss_fn = paddle.nn.CrossEntropyLoss()
optimizer_setosa = paddle.optimizer.Adam(parameters=model_setosa.parameters(), learning_rate=0.001)
optimizer_versicolor = paddle.optimizer.Adam(parameters=model_versicolor.parameters(), learning_rate=0.001)
optimizer_virginica = paddle.optimizer.Adam(parameters=model_virginica.parameters(), learning_rate=0.001)

# 训练模型
for epoch in range(10):
    for batch_data, batch_labels in train_loader:
        # 训练 model_setosa
        pred_setosa = model_setosa(batch_data)
        loss_setosa = loss_fn(pred_setosa, (batch_labels == 0).astype('int64'))
        loss_setosa.backward()
        optimizer_setosa.step()
        optimizer_setosa.clear_grad()

        # 训练 model_versicolor
        pred_versicolor = model_versicolor(batch_data)
        loss_versicolor = loss_fn(pred_versicolor, (batch_labels == 1).astype('int64'))
        loss_versicolor.backward()
        optimizer_versicolor.step()
        optimizer_versicolor.clear_grad()

        # 训练 model_virginica
        pred_virginica = model_virginica(batch_data)
        loss_virginica = loss_fn(pred_virginica, (batch_labels == 2).astype('int64'))
        loss_virginica.backward()
        optimizer_virginica.step()
        optimizer_virginica.clear_grad()

# 在测试集上进行评估
test_dataset = IrisDataset(mode='test')
test_loader = paddle.io.DataLoader(test_dataset, batch_size=16, shuffle=False)

correct = 0
total = 0

model_setosa.eval()
model_versicolor.eval()
model_virginica.eval()

with paddle.no_grad():
    for batch_data, batch_labels in test_loader:
        pred_setosa = model_setosa(batch_data)
        pred_versicolor = model_versicolor(batch_data)
        pred_virginica = model_virginica(batch_data)

        _, predicted_setosa = paddle.max(pred_setosa, axis=1)
        _, predicted_versicolor = paddle.max(pred_versicolor, axis=1)
        _, predicted_virginica = paddle.max(pred_virginica, axis=1)

        predicted = paddle.where(predicted_setosa == 1, paddle.to_tensor(0),
                                 paddle.where(predicted_versicolor == 1, paddle.to_tensor(1),
                                              paddle.to_tensor(2)))
        total += batch_labels.shape[0]
        correct += (predicted == batch_labels).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy}')

两种方法的比较

  • 直接多分类: 模型简单,训练效率高,但对于类别数量较多的情况,可能会影响模型的泛化能力。
  • 拆分为二分类: 模型复杂度较高,训练时间长,但对于类别数量较多或类别不均衡的情况,可以提高模型的鲁棒性。

总结

选择哪种方法取决于具体的应用场景和数据集特点。建议根据实际情况进行实验,比较两种方法的性能,选择最优的方案。

两种方法实现PaddlePaddle鸢尾花分类任务(多分类与二分类)

原文地址: https://www.cveoy.top/t/topic/bfUW 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录