使用注意力机制的RNN情感分类模型

import copy
import torch
from torch import nn
from torch import optim
import torchtext
from torchtext import data
from torchtext import datasets

TEXT = data.Field(sequential=True, batch_first=True, lower=True)
LABEL = data.LabelField()

# load data splits
train_data, val_data, test_data = datasets.SST.splits(TEXT, LABEL)

# build dictionary
TEXT.build_vocab(train_data,vectors = 'glove.840B.300d',unk_init = torch.Tensor.norm)
#TEXT.build_vocab(train_data)

LABEL.build_vocab(train_data)
# hyperparameters
vocab_size = len(TEXT.vocab)
label_size = len(LABEL.vocab)
padding_idx = TEXT.vocab.stoi['<pad>']
embedding_dim = 300
hidden_dim = 128

# build iterators
train_iter, val_iter, test_iter = data.BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size=32)
# your code here
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu') 
  
# Training function  
def train(model, train_loader, optimizer, criterion):  
    model.train()  
    total_loss = 0.0  
    total_correct = 0  
    for batch in train_loader:  
        text, labels = batch.text.to(device), batch.label.to(device)  
        optimizer.zero_grad()  
        logits, attention_weights = model(text)
        loss = criterion(logits, labels)  
        
        loss.backward()  
        optimizer.step()  
        
        total_loss += loss.item() * text.size(0)  
        preds = logits.argmax(dim=1)  
        total_correct += (preds == labels).sum().item()  
    avg_loss = total_loss / len(train_loader.dataset)  
    accuracy = total_correct / len(train_loader.dataset)  
    return avg_loss, accuracy  

def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():
        for batch in iterator:
            text, labels = batch.text.to(device), batch.label.to(device)  
            predictions, _ = model(text)

            loss = criterion(predictions, batch.label)
            acc = accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)


def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).float()
    accuracy = correct.sum() / len(correct)
    return accuracy

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.hidden_dim = hidden_dim
        self.attention_weights = nn.Linear(hidden_dim, 1)
        
    def forward(self, lstm_output):
        attention_scores = self.attention_weights(lstm_output).squeeze(2)
        attention_weights = torch.softmax(attention_scores, dim=1)
        attention_output = torch.bmm(lstm_output.transpose(1, 2), attention_weights.unsqueeze(2)).squeeze(2)
        return attention_output, attention_weights

class RNNClassifier(nn.Module):  
    def __init__(self, vocab_size, embedding_dim, hidden_dim, label_size, padding_idx):  
        super(RNNClassifier, self).__init__()  
        self.vocab_size = vocab_size  
        self.embedding_dim = embedding_dim  
        self.hidden_dim = hidden_dim  
        self.label_size = label_size  
        self.num_layers = 2
        self.dropout_num = 0.5
        self.bidirectional=True
  
        # Embedding Layer  
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)  
        #self.embedding = nn.Embedding.from_pretrained(pretrained_vectors.vectors, padding_idx=padding_idx)  
        self.embedding_dropout = nn.Dropout(self.dropout_num)  # add embedding dropout layer  
  
        # LSTM Layer  
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=self.bidirectional)  
        self.lstm_dropout = nn.Dropout(self.dropout_num)  
  
        # Attention Layer
        self.attention = Attention(hidden_dim * (2 if self.bidirectional else 1) if self.bidirectional else hidden_dim)
        
        # Fully Connected Layer  
        self.fc = nn.Linear(hidden_dim * (2 if self.bidirectional else 1) if self.bidirectional else hidden_dim, label_size)  
        self.fc_dropout = nn.Dropout(self.dropout_num)  
        self.softmax = nn.LogSoftmax(dim=1)
  
    def zero_state(self, batch_size):  
        hidden = torch.zeros(self.num_layers * (2 if self.bidirectional else 1), batch_size, self.hidden_dim).to(device)  
        cell = torch.zeros(self.num_layers * (2 if self.bidirectional else 1), batch_size, self.hidden_dim).to(device) 
        return hidden, cell  
  
    def forward(self, text):  
        emb = self.embedding(text)  
        emb = self.embedding_dropout(emb)  
        
        h0, c0 = self.zero_state(text.size(0))  
        output, (hn, cn) = self.lstm(emb, (h0, c0))
        output = self.lstm_dropout(output)
        
        attention_output, attention_weights = self.attention(output)
        
        output = self.fc(attention_output)  
        output = self.fc_dropout(output)  
        
        return output, attention_weights

model = RNNClassifier(vocab_size, embedding_dim, hidden_dim, label_size,padding_idx).to(device)  
optimizer = optim.Adam(model.parameters(), lr=0.001)  
criterion = nn.CrossEntropyLoss()
num_epochs = 50

train_losses = []
val_losses = []
train_accs = []
val_accs = []

def model_running(model,train_iter, val_iter,optimizer, criterion):
    best_val_acc = 0.0 
    for epoch in range(1,num_epochs+1):
        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0
        test_acc = 0.0
        test_loss = 0.0
        
        train_loss, train_acc = train(model, train_iter, optimizer, criterion)
        val_loss, val_acc = evaluate(model, val_iter, criterion)
        
        # Print progress
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Train Acc: {:.4f}, Val Acc: {:.4f}' 
          .format(epoch, num_epochs, train_loss, val_loss, train_acc, val_acc))
        
        # Save loss and accuracy to list
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
    if val_acc > best_val_acc:  
        best_val_acc = val_acc  
        best_model = model.state_dict()  
        
    return best_model

best_model = model_running(model,train_iter, val_iter,optimizer, criterion)
model.load_state_dict(best_model)  
  
test_loss, test_acc = evaluate(model, test_iter, criterion)  
print(f'Test loss: {test_loss:.4f}, Test accuracy: {test_acc:.4f}')

该模型在验证集上的准确率为0.5867，测试集上的准确率为0.6094。可以看出模型在训练集和验证集上的准确率相对较高，但在测试集上的准确率相对较低，存在一定的过拟合问题。

优化建议：

可以尝试增加模型的复杂度，例如增加隐藏层的数量或隐藏单元的数量，以提高模型的表达能力。
可以尝试使用更大的嵌入维度和隐藏维度，以增加模型的容量。
可以尝试使用其他的优化算法，例如SGD或AdamW，以提高模型的收敛速度和效果。
可以尝试使用更大的训练集，以提高模型的泛化能力。
可以尝试使用正则化技术，例如dropout或权重衰减，以减少模型的过拟合。

总结起来，可以尝试调整模型的复杂度和参数设置，增加训练数据量，使用其他优化算法和正则化技术等方法，以提高模型的性能和泛化能力。