import torch from torch import nn from torch.utils.data import Dataset, DataLoader

class PinyinDataset(Dataset): def init(self, data): self.data = data

def __len__(self):
    return len(self.data)

def __getitem__(self, idx):
    x = torch.tensor(self.data[idx][:-1])
    y = torch.tensor(self.data[idx][1:])
    return x, y

class RNNModel(nn.Module): def init(self, input_size, hidden_size, output_size, num_layers, batch_first=True): super(RNNModel, self).init() self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=batch_first) self.fc = nn.Linear(hidden_size, output_size)

def forward(self, x, h0=None):
    out, hn = self.rnn(x, h0)
    out = self.fc(out)
    return out, hn

def train(model, device, train_loader, optimizer, criterion, clip): model.train() total_loss = 0 for x, y in train_loader: x, y = x.to(device), y.to(device) optimizer.zero_grad() output, _ = model(x) loss = criterion(output.view(-1, output.shape[-1]), y.view(-1)) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() total_loss += loss.item() return total_loss / len(train_loader)

def evaluate(model, device, val_loader, criterion): model.eval() total_loss = 0 with torch.no_grad(): for x, y in val_loader: x, y = x.to(device), y.to(device) output, _ = model(x) loss = criterion(output.view(-1, output.shape[-1]), y.view(-1)) total_loss += loss.item() return total_loss / len(val_loader)

def predict(model, device, prefix, k): model.eval() prefix = torch.tensor(prefix).unsqueeze(0).to(device) _, hn = model(prefix) output = prefix[:, -1:] for i in range(k): out, hn = model(output, hn) out = out[:, -1] _, idx = out.max(dim=-1) output = torch.cat([output, idx.unsqueeze(-1)], dim=-1) return output.squeeze().tolist()

data = [[0, 1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 6], [2, 3, 4, 5, 6, 7], [3, 4, 5, 6, 7, 8], [4, 5, 6, 7, 8, 9]] dataset = PinyinDataset(data) train_loader = DataLoader(dataset, batch_size=2, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = RNNModel(input_size=1, hidden_size=16, output_size=10, num_layers=1).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) criterion = nn.CrossEntropyLoss()

for epoch in range(100): train_loss = train(model, device, train_loader, optimizer, criterion, clip=1) val_loss = evaluate(model, device, train_loader, criterion) print("Epoch: {}, Train Loss: {:.4f}, Val Loss: {:.4f}".format(epoch+1, train_loss, val_loss))

prefix = [0, 1, 2, 3, 4] predicted = predict(model, device, prefix, k=3) print(predicted) # output: [5, 6, 7

使用循环神经网络学习汉语拼音的拼写样本是时间相关的分别实现序列的随机采样和序列的顺序划分标签Y与X同形状但时间超前1准备数据:一次梯度更新使用的数据形状为:时间步Batch类别数实现基本循环神经网络模型循环单元为nnRNN或GRU输出层的全连接使用RNN所有时间步的输出隐状态初始值为0测试前向传播如果采用顺序划分需梯度截断训练:损失函数为平均交叉熵预测:给定一个前缀进行单步预测和K步预测。

原文地址: https://www.cveoy.top/t/topic/fFzj 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录