以下是一个完整的基于PyTorch的文本情绪分类的代码示例,该示例演示了如何使用PyTorch构建一个简单的模型来对文本数据进行情感分析。

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split

# 定义文本情绪分类模型
class SentimentClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SentimentClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.fc = nn.Linear(embedding_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)
        hidden = torch.mean(embedded, dim=1)
        hidden = torch.relu(self.fc(hidden))
        output = self.output(hidden)
        return output

# 加载CSV文件并划分数据集
data = pd.read_csv('data.csv', header=None)
texts = data[0].tolist()
labels = data[1].tolist()
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# 预处理文本数据并创建词汇表
def preprocess_text(text):
    return text

train_texts = [preprocess_text(text) for text in train_texts]
test_texts = [preprocess_text(text) for text in test_texts]

vocab = set()
for text in train_texts:
    vocab.update(text.split())

vocab_size = len(vocab)

# 将文本转换为整数序列并创建DataLoader
def text_to_sequence(text, vocab):
    sequence = [vocab.index(word) for word in text.split()]
    return torch.tensor(sequence)

train_sequences = [text_to_sequence(text, vocab) for text in train_texts]
test_sequences = [text_to_sequence(text, vocab) for text in test_texts]

train_data = list(zip(train_sequences, train_labels))
test_data = list(zip(test_sequences, test_labels))
train_loader = torch.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False)

# 训练和测试函数
def train(model, train_loader, criterion, optimizer):
    model.train()
    for text, label in train_loader:
        optimizer.zero_grad()
        output = model(text)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for text, label in test_loader:
            output = model(text)
            _, predicted = torch.max(output.data, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# 定义模型参数
embedding_dim = 100
hidden_dim = 256
output_dim = 2

# 初始化模型、损失函数和优化器
model = SentimentClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
    train(model, train_loader, criterion, optimizer)
    accuracy = test(model, test_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Accuracy: {accuracy}%')

请确保将数据文件命名为data.csv并与代码文件放在同一目录下。这是一个基本的文本情绪分类的代码示例,您可以根据需要进行修改和调整。希望对您有所帮助!

PyTorch文本情绪分类代码示例:完整指南

原文地址: https://www.cveoy.top/t/topic/NY3 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录