使用注意力机制的RNN情感分类模型
import copy
import torch
from torch import nn
from torch import optim
import torchtext
from torchtext import data
from torchtext import datasets
TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.LabelField()
# load data splits
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)
# build dictionary
TEXT.build_vocab(train_data,vectors = 'glove.840B.300d',unk_init = torch.Tensor.norm)
#TEXT.build_vocab(train_data)
LABEL.build_vocab(train_data)
# hyperparameters
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 300
hidden_dim = 128
# build iterators
train_iter, val_iter, test_iter = data.BucketIterator.splits(
(train_data, val_data, test_data),
batch_size=32)
# your code here
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
# Training function
def train(model, train_loader, optimizer, criterion):
model.train()
total_loss = 0.0
total_correct = 0
for batch in train_loader:
text, labels = batch.text.to(device), batch.label.to(device)
optimizer.zero_grad()
logits, attention_weights = model(text)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
total_loss += loss.item() * text.size(0)
preds = logits.argmax(dim=1)
total_correct += (preds == labels).sum().item()
avg_loss = total_loss / len(train_loader.dataset)
accuracy = total_correct / len(train_loader.dataset)
return avg_loss, accuracy
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval()
with torch.no_grad():
for batch in iterator:
text, labels = batch.text.to(device), batch.label.to(device)
predictions, _ = model(text)
loss = criterion(predictions, batch.label)
acc = accuracy(predictions, batch.label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def accuracy(predictions, labels):
_, predicted_labels = torch.max(predictions, 1)
correct = (predicted_labels == labels).float()
accuracy = correct.sum() / len(correct)
return accuracy
class Attention(nn.Module):
def __init__(self, hidden_dim):
super(Attention, self).__init__()
self.hidden_dim = hidden_dim
self.attention_weights = nn.Linear(hidden_dim, 1)
def forward(self, lstm_output):
attention_scores = self.attention_weights(lstm_output).squeeze(2)
attention_weights = torch.softmax(attention_scores, dim=1)
attention_output = torch.bmm(lstm_output.transpose(1, 2), attention_weights.unsqueeze(2)).squeeze(2)
return attention_output, attention_weights
class RNNClassifier(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx):
super(RNNClassifier, self).__init__()
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.hidden_dim = hidden_dim
self.label_size = label_size
self.num_layers = 2
self.dropout_num = 0.5
self.bidirectional=True
# Embedding Layer
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
#self.embedding = nn.Embedding.from_pretrained(pretrained_vectors.vectors, padding_idx=padding_idx)
self.embedding_dropout = nn.Dropout(self.dropout_num) # add embedding dropout layer
# LSTM Layer
self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=self.bidirectional)
self.lstm_dropout = nn.Dropout(self.dropout_num)
# Attention Layer
self.attention = Attention(hidden_dim * (2 if self.bidirectional else 1) if self.bidirectional else hidden_dim)
# Fully Connected Layer
self.fc = nn.Linear(hidden_dim * (2 if self.bidirectional else 1) if self.bidirectional else hidden_dim, label_size)
self.fc_dropout = nn.Dropout(self.dropout_num)
self.softmax = nn.LogSoftmax(dim=1)
def zero_state(self, batch_size):
hidden = torch.zeros(self.num_layers * (2 if self.bidirectional else 1), batch_size, self.hidden_dim).to(device)
cell = torch.zeros(self.num_layers * (2 if self.bidirectional else 1), batch_size, self.hidden_dim).to(device)
return hidden, cell
def forward(self, text):
emb = self.embedding(text)
emb = self.embedding_dropout(emb)
h0, c0 = self.zero_state(text.size(0))
output, (hn, cn) = self.lstm(emb, (h0, c0))
output = self.lstm_dropout(output)
attention_output, attention_weights = self.attention(output)
output = self.fc(attention_output)
output = self.fc_dropout(output)
return output, attention_weights
model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size,padding_idx).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
num_epochs = 50
train_losses = []
val_losses = []
train_accs = []
val_accs = []
def model_running(model,train_iter, val_iter,optimizer, criterion):
best_val_acc = 0.0
for epoch in range(1,num_epochs+1):
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0
test_acc = 0.0
test_loss = 0.0
train_loss, train_acc = train(model, train_iter, optimizer, criterion)
val_loss, val_acc = evaluate(model, val_iter, criterion)
# Print progress
print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.4f}, Val Acc: {:.4f}'
.format(epoch, num_epochs, train_loss, val_loss, train_acc, val_acc))
# Save loss and accuracy to list
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accs.append(train_acc)
val_accs.append(val_acc)
if val_acc > best_val_acc:
best_val_acc = val_acc
best_model = model.state_dict()
return best_model
best_model = model_running(model,train_iter, val_iter,optimizer, criterion)
model.load_state_dict(best_model)
test_loss, test_acc = evaluate(model, test_iter, criterion)
print(f'Test loss: {test_loss:.4f}, Test accuracy: {test_acc:.4f}')
该模型在验证集上的准确率为0.5867,测试集上的准确率为0.6094。可以看出模型在训练集和验证集上的准确率相对较高,但在测试集上的准确率相对较低,存在一定的过拟合问题。
优化建议:
- 可以尝试增加模型的复杂度,例如增加隐藏层的数量或隐藏单元的数量,以提高模型的表达能力。
- 可以尝试使用更大的嵌入维度和隐藏维度,以增加模型的容量。
- 可以尝试使用其他的优化算法,例如SGD或AdamW,以提高模型的收敛速度和效果。
- 可以尝试使用更大的训练集,以提高模型的泛化能力。
- 可以尝试使用正则化技术,例如dropout或权重衰减,以减少模型的过拟合。
总结起来,可以尝试调整模型的复杂度和参数设置,增加训练数据量,使用其他优化算法和正则化技术等方法,以提高模型的性能和泛化能力。
原文地址: http://www.cveoy.top/t/topic/pgjg 著作权归作者所有。请勿转载和采集!