Sentiment Analysis with PyTorch RNN: A Comprehensive Guide
import copy import torch from torch import nn from torch import optim import torchtext from torchtext import data from torchtext import datasets
TEXT = data.Field(sequential=True, batch_first=True, lower=True) LABEL = data.LabelField()
load data splits
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)
build dictionary
TEXT.build_vocab(train_data) LABEL.build_vocab(train_data)
hyperparameters
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['
build iterators
train_iter, val_iter, test_iter = data.BucketIterator.splits( (train_data, val_data, test_data), batch_size=32)
1. Define the training and evaluation function
def train_model(model, train_iter, val_iter, optimizer, criterion, num_epochs=5): best_val_acc = 0.0 best_model = None
for epoch in range(num_epochs):
model.train()
train_loss = 0.0
train_acc = 0.0
for batch in train_iter:
text = batch.text
labels = batch.label
optimizer.zero_grad()
output = model(text)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_acc += (output.argmax(1) == labels).sum().item()
train_loss /= len(train_iter)
train_acc /= len(train_iter)
model.eval()
val_loss = 0.0
val_acc = 0.0
with torch.no_grad():
for batch in val_iter:
text = batch.text
labels = batch.label
output = model(text)
loss = criterion(output, labels)
val_loss += loss.item()
val_acc += (output.argmax(1) == labels).sum().item()
val_loss /= len(val_iter)
val_acc /= len(val_iter)
print(f'Epoch {epoch + 1}/{num_epochs}:')
print(f'Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.4f}')
print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}')
if val_acc > best_val_acc:
best_val_acc = val_acc
best_model = copy.deepcopy(model)
return best_model
2. Build a RNN model for sentiment analysis
class RNNClassifier(nn.Module): def init(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx): super(RNNClassifier, self).init() self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.label_size = label_size self.num_layers = 1
self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim, padding_idx=padding_idx)
self.rnn = nn.RNN(self.embedding_dim, self.hidden_dim, num_layers=self.num_layers, batch_first=True)
self.fc = nn.Linear(self.hidden_dim, self.label_size)
def zero_state(self, batch_size):
return torch.zeros(self.num_layers, batch_size, self.hidden_dim)
def forward(self, text):
embedding = self.embedding(text)
hidden = self.zero_state(text.size(0))
output, _ = self.rnn(embedding, hidden)
output = self.fc(output[:, -1, :])
return output
3. Train the model and compute the accuracy
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters())
best_model = train_model(model, train_iter, val_iter, optimizer, criterion)
4. Train a model with better accuracy
You can try different optimizers and hyperparameters here to improve the accuracy. For example:
model_sgd = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx) optimizer_sgd = optim.SGD(model_sgd.parameters(), lr=0.01) best_model_sgd = train_model(model_sgd, train_iter, val_iter, optimizer_sgd, criterion)
model_adam = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size, padding_idx) optimizer_adam = optim.Adam(model_adam.parameters(), lr=0.001) best_model_adam = train_model(model_adam, train_iter, val_iter, optimizer_adam, criterion)
原文地址: https://www.cveoy.top/t/topic/o9cm 著作权归作者所有。请勿转载和采集!