import copy import torch from torch import nn from torch import optim import torchtext from torchtext import data from torchtext import datasets

TEXT = data.Field(sequential=True, batch_first=True, lower=True) LABEL = data.LabelField()

load data splits

train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)

build dictionary

TEXT.build_vocab(train_data) LABEL.build_vocab(train_data)

hyperparameters

vocab_size = len(TEXT.vocab) label_size = len(LABEL.vocab) padding_idx = TEXT.vocab.stoi[''] embedding_dim = 128 hidden_dim = 128

build iterators

train_iter, val_iter, test_iter = data.BucketIterator.splits( (train_data, val_data, test_data), batch_size=32)

1. Define the training and evaluation function

def train_model(model, train_iter, val_iter, optimizer, criterion, num_epochs=5): best_val_acc = 0.0 best_model = None

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_acc = 0.0

    for batch in train_iter:
        text = batch.text
        labels = batch.label

        optimizer.zero_grad()

        output = model(text)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_acc += (output.argmax(1) == labels).sum().item()

    train_loss /= len(train_iter)
    train_acc /= len(train_iter)

    model.eval()
    val_loss = 0.0
    val_acc = 0.0

    with torch.no_grad():
        for batch in val_iter:
            text = batch.text
            labels = batch.label

            output = model(text)
            loss = criterion(output, labels)

            val_loss += loss.item()
            val_acc += (output.argmax(1) == labels).sum().item()

    val_loss /= len(val_iter)
    val_acc /= len(val_iter)

    print(f'Epoch {epoch + 1}/{num_epochs}:')
    print(f'Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.4f}')
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}')

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model = copy.deepcopy(model)

return best_model

2. Build a RNN model for sentiment analysis

class RNNClassifier(nn.Module): def init(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx): super(RNNClassifier, self).init() self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.label_size = label_size self.num_layers = 1

    self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim, padding_idx=padding_idx)
    self.rnn = nn.RNN(self.embedding_dim, self.hidden_dim, num_layers=self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_dim, self.label_size)

def zero_state(self, batch_size):
    return torch.zeros(self.num_layers, batch_size, self.hidden_dim)

def forward(self, text):
    embedding = self.embedding(text)
    hidden = self.zero_state(text.size(0))
    output, _ = self.rnn(embedding, hidden)
    output = self.fc(output[:, -1, :])
    return output

3. Train the model and compute the accuracy

model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters())

best_model = train_model(model, train_iter, val_iter, optimizer, criterion)

4. Train a model with better accuracy

You can try different optimizers and hyperparameters here to improve the accuracy. For example:

model_sgd = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx) optimizer_sgd = optim.SGD(model_sgd.parameters(), lr=0.01) best_model_sgd = train_model(model_sgd, train_iter, val_iter, optimizer_sgd, criterion)

model_adam = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx) optimizer_adam = optim.Adam(model_adam.parameters(), lr=0.001) best_model_adam = train_model(model_adam, train_iter, val_iter, optimizer_adam, criterion)

Sentiment Analysis with PyTorch RNN: A Comprehensive Guide

原文地址: https://www.cveoy.top/t/topic/o9cm 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录