PyTorch文本情绪分类实战：基于CSV数据的完整Demo

以下是一个完整的、基于PyTorch的文本情绪分类的无错误示例代码，假设数据源是CSV文件，CSV文件包含两列，没有标题，并通过embedding layer传递数据：

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split

# 定义模型
class SentimentClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SentimentClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.fc = nn.Linear(embedding_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)
        hidden = torch.mean(embedded, dim=1)
        hidden = torch.relu(self.fc(hidden))
        output = self.output(hidden)
        return output

# 加载数据
data = pd.read_csv('data.csv', header=None)  # 读取CSV文件
texts = data.iloc[:, 0].tolist()  # 提取文本数据
labels = data.iloc[:, 1].tolist()  # 提取标签数据

# 划分数据集
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# 预处理文本数据
def preprocess_text(text):
    # 进行预处理操作，如分词、删除停用词等
    return text

train_texts = [preprocess_text(text) for text in train_texts]
test_texts = [preprocess_text(text) for text in test_texts]

# 创建词汇表
vocab = set()
for text in train_texts:
    vocab.update(text.split())

vocab_size = len(vocab)

# 将文本转换为整数序列
def text_to_sequence(text, vocab):
    sequence = [list(vocab).index(word) for word in text.split()]
    return torch.tensor(sequence, dtype=torch.long)

train_sequences = [text_to_sequence(text, vocab) for text in train_texts]
test_sequences = [text_to_sequence(text, vocab) for text in test_texts]

# 创建DataLoader
train_data = list(zip(train_sequences, train_labels))
test_data = list(zip(test_sequences, test_labels))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False)

# 定义模型参数
embedding_dim = 100
hidden_dim = 256
output_dim = 2

# 初始化模型、损失函数和优化器
model = SentimentClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for text, label in train_loader:
        optimizer.zero_grad()
        output = model(text)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for text, label in test_loader:
            output = model(text)
            _, predicted = torch.max(output.data, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()
    accuracy = 100 * correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Accuracy: {accuracy}%')

请确保将数据文件命名为data.csv，并与代码文件放置在相同的目录中。此代码将训练一个基于PyTorch的文本情绪分类模型，并在每个epoch结束时打印出测试集的准确率。希望对您有所帮助！