实验内容: 使用循环神经网络学习汉语拼音的拼写本次实验重点为准备数据和模型。拼音数据无声调:httpswwwjianguoyuncompDQ3els0Q-rqYBhi3pIgFIAA定义数据集:采用字符模型因此一个字符为一个样本。每个样本采用one-hot编码。样本是时间相关的分别实现序列的随机采样和序列的顺序划分标签Y与X同形状但时间超前1准备数据:一次梯度更新使用的数据形状为:时间步Bat
本模型使用PyTorch实现,具体代码如下:
import torch import torch.nn as nn import torch.optim as optim import numpy as np
定义数据集
with open('pinyin.txt', 'r', encoding='utf-8') as f: data = f.read() chars = list(set(data)) char_to_int = {ch:i for i, ch in enumerate(chars)} int_to_char = {i:ch for i, ch in enumerate(chars)} data_size, vocab_size = len(data), len(chars) print('data has %d characters, %d unique.' % (data_size, vocab_size))
定义一次采样数据形状
seq_length = 50 batch_size = 64 num_batches = int((data_size - 1) / seq_length)
将数据划分为X和Y
X = np.zeros((num_batches, seq_length, vocab_size), dtype=np.float32) Y = np.zeros((num_batches, seq_length, vocab_size), dtype=np.float32) for i in range(num_batches): X_sequence = data[i*seq_length:(i+1)seq_length] X_sequence_int = [char_to_int[ch] for ch in X_sequence] input_sequence = np.zeros((seq_length, vocab_size), dtype=np.float32) for j in range(seq_length): input_sequence[j][X_sequence_int[j]] = 1.0 X[i] = input_sequence Y_sequence = data[iseq_length+1:(i+1)*seq_length+1] Y_sequence_int = [char_to_int[ch] for ch in Y_sequence] output_sequence = np.zeros((seq_length, vocab_size), dtype=np.float32) for j in range(seq_length): output_sequence[j][Y_sequence_int[j]] = 1.0 Y[i] = output_sequence
定义模型
class RNN(nn.Module): def init(self, input_size, hidden_size, output_size, num_layers, batch_size): super(RNN, self).init() self.hidden_size = hidden_size self.num_layers = num_layers self.batch_size = batch_size self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, h):
out, h = self.rnn(x, h)
out = out.contiguous().view(-1, self.hidden_size)
out = self.fc(out)
return out, h
def init_hidden(self):
return torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
定义训练函数
def train(model, criterion, optimizer, X, Y): model.train() hidden = model.init_hidden() for i in range(X.shape[0]): optimizer.zero_grad() inputs = torch.from_numpy(X[i]) targets = torch.from_numpy(Y[i]) inputs = inputs.view(1, inputs.shape[0], inputs.shape[1]) targets = targets.view(1, targets.shape[0], targets.shape[1]) hidden = hidden.detach() output, hidden = model(inputs, hidden) loss = criterion(output, targets.view(-1)) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5) optimizer.step()
定义预测函数
def predict(model, prefix, k): model.eval() hidden = model.init_hidden() input = torch.zeros(1, 1, vocab_size) for ch in prefix: idx = char_to_int[ch] input[0][0][idx] = 1.0 , hidden = model(input, hidden) input = input.view(1, 1, vocab_size) result = [] for i in range(k): output, hidden = model(input, hidden) output = nn.functional.softmax(output, dim=1) output = output.detach().numpy()[0] output_idx = np.random.choice(range(vocab_size), p=output.ravel()) result.append(int_to_char[output_idx]) input[0][0].zero() input[0][0][output_idx] = 1.0 return ''.join(result)
定义超参数
input_size = vocab_size hidden_size = 128 output_size = vocab_size num_layers = 2 learning_rate = 0.01 num_epochs = 200
定义模型、损失函数和优化器
model = RNN(input_size, hidden_size, output_size, num_layers, batch_size) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate)
训练模型
for epoch in range(num_epochs): train(model, criterion, optimizer, X, Y) if epoch % 10 == 0: prefix = 'ni' k = 10 print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item())) print('Sample:', predict(model, prefix, k)
原文地址: http://www.cveoy.top/t/topic/fHNL 著作权归作者所有。请勿转载和采集!