Python 实现基础 RNN 模型 - 代码示例及说明 - 常规

以下是 Python 实现基本的 RNN 模型的示例代码：

import numpy as np

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # 初始化权重矩阵
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01

        # 初始化偏置项
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, x):
        # 初始化缓存变量
        h = np.zeros((self.hidden_size, 1))
        self.cache = {
            'x': x,
            'h': h,
            'y': np.zeros((self.output_size, 1)),
            'z': [],
        }

        # 前向传播计算隐藏状态和输出
        for t in range(len(x)):
            z = np.dot(self.Wxh, x[t]) + np.dot(self.Whh, h) + self.bh
            h = np.tanh(z)
            y = np.dot(self.Why, h) + self.by
            self.cache['z'].append(z)
            self.cache['h'] = h
            self.cache['y'][:, t] = y[:, 0]

        return self.cache['y']

    def backward(self, dy):
        # 初始化梯度变量
        dWxh = np.zeros_like(self.Wxh)
        dWhh = np.zeros_like(self.Whh)
        dWhy = np.zeros_like(self.Why)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)
        dh_next = np.zeros_like(self.cache['h'])

        # 反向传播计算梯度
        for t in reversed(range(len(self.cache['z']))):
            dyt = np.expand_dims(dy[:, t], axis=1)
            dh = np.dot(self.Why.T, dyt) + dh_next
            dz = (1 - self.cache['h'][:, t] ** 2) * dh
            dWxh += np.dot(dz, self.cache['x'][t].T)
            dWhh += np.dot(dz, self.cache['h'].T)
            dbh += dz
            dh_next = np.dot(self.Whh.T, dz)

        dWhy = np.dot(dy, self.cache['h'].T)
        dby = np.sum(dy, axis=1, keepdims=True)

        # 更新权重和偏置项
        self.Wxh -= dWxh
        self.Whh -= dWhh
        self.Why -= dWhy
        self.bh -= dbh
        self.by -= dby

    def train(self, x, y, learning_rate=0.1):
        # 前向传播
        y_pred = self.forward(x)

        # 计算损失函数
        loss = np.sum((y - y_pred) ** 2)

        # 反向传播
        dy = -2 * (y - y_pred)
        self.backward(dy)

        # 更新权重和偏置项
        self.Wxh -= learning_rate * dWxh
        self.Whh -= learning_rate * dWhh
        self.Why -= learning_rate * dWhy
        self.bh -= learning_rate * dbh
        self.by -= learning_rate * dby

        return loss

在这个 RNN 模型中，我们使用了 tanh 激活函数，而不是 ReLU 或 sigmoid，因为 tanh 函数可以将输出限制在 [-1, 1] 之间，这有助于控制模型的输出范围。另外，我们还使用了反向传播算法来计算梯度，并使用梯度下降算法来更新权重和偏置项。在训练过程中，我们使用均方误差作为损失函数。