import tensorflow as tf import numpy as np

class Seq2SeqModel: def init(self, vocab_size, embedding_dim, hidden_dim, max_seq_len, batch_size, start_token, end_token, learning_rate=0.001): self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.max_seq_len = max_seq_len self.batch_size = batch_size self.start_token = start_token self.end_token = end_token self.learning_rate = learning_rate

    self.build_model()

def build_model(self):
    self.encoder_inputs = tf.placeholder(shape=(self.batch_size, self.max_seq_len), dtype=tf.int32)
    self.decoder_inputs = tf.placeholder(shape=(self.batch_size, self.max_seq_len+1), dtype=tf.int32)
    self.decoder_targets = tf.placeholder(shape=(self.batch_size, self.max_seq_len+1), dtype=tf.int32)

    self.encoder_embeddings = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_dim], -1.0, 1.0))
    self.decoder_embeddings = tf.Variable(tf.random_uniform([self.vocab_size, self.embedding_dim], -1.0, 1.0))

    encoder_inputs_embedded = tf.nn.embedding_lookup(self.encoder_embeddings, self.encoder_inputs)
    decoder_inputs_embedded = tf.nn.embedding_lookup(self.decoder_embeddings, self.decoder_inputs)

    encoder_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_dim)
    _, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded, dtype=tf.float32)

    decoder_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_dim)
    decoder_outputs, _ = tf.nn.dynamic_rnn(decoder_cell, decoder_inputs_embedded, initial_state=encoder_state, dtype=tf.float32)

    self.decoder_logits = tf.layers.dense(decoder_outputs, self.vocab_size, activation=None)

    self.decoder_prediction = tf.argmax(self.decoder_logits, 2)

    masks = tf.sequence_mask(lengths=tf.fill([self.batch_size], self.max_seq_len+1), maxlen=self.max_seq_len+1, dtype=tf.float32)

    self.loss = tf.contrib.seq2seq.sequence_loss(logits=self.decoder_logits, targets=self.decoder_targets, weights=masks)

    optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
    self.train_op = optimizer.minimize(self.loss)

def train(self, input_data, target_data):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(100):
            epoch_loss = 0.0
            for i in range(input_data.shape[0] // self.batch_size):
                input_batch = input_data[i*self.batch_size:(i+1)*self.batch_size, :]
                target_batch = target_data[i*self.batch_size:(i+1)*self.batch_size, :]

                _, loss = sess.run([self.train_op, self.loss], feed_dict={self.encoder_inputs: input_batch, self.decoder_inputs: np.concatenate((np.ones((self.batch_size, 1))*self.start_token, target_batch), axis=1), self.decoder_targets: np.concatenate((target_batch, np.ones((self.batch_size, 1))*self.end_token), axis=1)})
                epoch_loss += loss

            print('Epoch:', epoch+1, ' Loss:', epoch_loss)

def predict(self, input_data):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        outputs = []

        for i in range(input_data.shape[0] // self.batch_size):
            input_batch = input_data[i*self.batch_size:(i+1)*self.batch_size, :]

            output_batch = sess.run(self.decoder_prediction, feed_dict={self.encoder_inputs: input_batch, self.decoder_inputs: np.ones((self.batch_size, 1))*self.start_token})

            for j in range(self.batch_size):
                output_seq = output_batch[j, :]
                end_idx = np.where(output_seq == self.end_token)[0]
                if len(end_idx) > 0:
                    output_seq = output_seq[:end_idx[0]]
                outputs.append(output_seq)

        return outputs

vocab_size = 10000 embedding_dim = 256 hidden_dim = 512 max_seq_len = 20 batch_size = 64 start_token = 0 end_token = 1 learning_rate = 0.001

model = Seq2SeqModel(vocab_size, embedding_dim, hidden_dim, max_seq_len, batch_size, start_token, end_token, learning_rate)

input_data = np.random.randint(0, vocab_size, size=(10000, max_seq_len)) target_data = np.concatenate((input_data[:, 1:], np.ones((10000, 1))*end_token), axis=1)

model.train(input_data, target_data)

test_data = np.random.randint(0, vocab_size, size=(128, max_seq_len)) outputs = model.predict(test_data)

for i in range(5): print('Input:', test_data[i, :]) print('Output:', outputs[i]

写出基于seq2seq注意力模型实现机器人聊天的程序

原文地址: https://www.cveoy.top/t/topic/dCYV 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录