Reinforcement Learning Agent for CartPole Environment
Initialize memory
memory = Memory(n_steps, env.observation_space.shape, env.action_space.shape[0])
Determine how many episodes to train
self.episodes = 5
Train the agent
for e in range(self.episodes): state = env.reset() total_reward = 0
for time in range(self.max_steps):
action = self.act(state, memory)
next_state, reward, done, info = env.step(action)
memory.add_step((state, action, reward, next_state, done))
state = next_state
total_reward += reward
if done:
break
if memory.size > self.batch_size:
self.train(memory)
print('Episode: {}/{}, score: {}, epsilon: {}'.format(e, self.episodes, total_reward, self.epsilon))
if total_reward > self.best_reward:
self.save()
self.best_reward = total_reward
Close the environment
env.close()
def act(self, state, memory): # Generate a random number rand = np.random.rand()
# If the random number is less than the epsilon value, select a random action
if rand <= self.epsilon:
action = env.action_space.sample()
# Else, select the action with the highest Q-value
else:
action = np.argmax(self.model.predict(np.array([state])))
return action
def train(self, memory): # Sample a batch of data from the memory batch = memory.sample(self.batch_size)
# Separate the batch into inputs and targets
states = np.array([each[0] for each in batch])
actions = np.array([each[1] for each in batch])
rewards = np.array([each[2] for each in batch])
next_states = np.array([each[3] for each in batch])
dones = np.array([each[4] for each in batch])
# Get the Q-values for the next states
next_Qs = self.model.predict(next_states)
# Set the targets
targets = rewards + self.gamma * np.max(next_Qs, axis=1) * (1 - dones)
# One-hot encode the actions
targets_full = self.model.predict(states)
indices = np.array([i for i in range(self.batch_size)])
targets_full[[indices], [actions]] = targets
# Train the model
self.model.fit(states, targets_full, epochs=1, verbose=0)
# Decay the epsilon
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def save(self): self.model.save('cartpole_model.h5')
if name == 'main': agent = Agent() agent.run()
原文地址: https://www.cveoy.top/t/topic/lidR 著作权归作者所有。请勿转载和采集!