Soccer HFO using Reinforcement Learning

Open AI gym-soccer environment using Tensorflow

HFO Setup

  1. cd HFO
  2. Run mkdir build & cd build
  3. Run cmake -DCMAKE_BUILD_TYPE=RelwithDebInfo ..
  4. Run make j4
  5. Run make install
  6. cd ..
  7. Open one terminal and run /bin/HFO — offense-agents=1 — defense-agents=1 — offense-npcs=2 — defense-npcs=2
  8. In another terminal run ./example/passing_agents.sh and python3 example/high_level_custom_agent.py
  9. To stop the server Ctrl+c
  10. But the rcssserver is yet not stopped. To stop it run killall -9 rcssserver
HFO environement

Gym Setup

  1. Open python gym module in the site-packages directory. For me, it is located here /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/gym/envs
  2. Format __init__.py is
from gym.envs.registration import registry, register, make, spec# Algorithmic
# ----------------------------------------
register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
max_episode_steps=200,
reward_threshold=25.0,
)
register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
max_episode_steps=200,
reward_threshold=75.0,
)
register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 2},
max_episode_steps=200,
reward_threshold=25.0,
)

4. create a new directory called multiplayer

5. We need to register Soccer environments here by editing __init__.py . Add the below lines to the file.

register(
id='Soccer-v0',
entry_point='gym.envs.multiplayer:SoccerEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)
register(
id='SoccerAgainstKeeper-v0',
entry_point='gym.envs.multiplayer:SoccerAgainstKeeperEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)
register(
id='SoccerEmptyGoal-v0',
entry_point='gym.envs.multiplayer:SoccerEmptyGoalEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)

5. In multiplayer the folder add the files soccer_env.py, soccer_empty_goal.py, soccer_against_keeper.py which are cloned previously from the gym-soccer repo.

Code

import tensorflow as tf
import gym
import numpy as np
num_inputs = 59
num_hidden = 10
num_outputs = 5
learning_rate = 0.01initializer = tf.contrib.layers.variance_scaling_initializer()X = tf.placeholder(tf.float32, shape=[None, num_inputs])hidden_layer = tf.layers.dense(X, num_hidden, activation=tf.nn.elu, kernel_initializer=initializer)
logits = tf.layers.dense(hidden_layer, num_outputs)
# outputs = tf.nn.dense(logits, num_outputs) # probability of action 0 (left)
action = logits# probabilties = tf.concat(axis=1, values=[outputs, 1 - outputs, outputs])
# action = tf.multinomial(probabilties, num_samples=1)
y = 1. - tf.to_float(action)cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients_and_variables = optimizer.compute_gradients(cross_entropy)gradients = []
gradient_placeholders = []
grads_and_vars_feed = []
for gradient, variable in gradients_and_variables:
gradients.append(gradient)
gradient_placeholder = tf.placeholder(tf.float32, shape=gradient.get_shape())
gradient_placeholders.append(gradient_placeholder)
grads_and_vars_feed.append((gradient_placeholder, variable))
training_op = optimizer.apply_gradients(grads_and_vars_feed)init = tf.global_variables_initializer()
saver = tf.train.Saver()
def helper_discount_rewards(rewards, discount_rate):
discounted_rewards = np.zeros(len(rewards))
cumulative_rewards = 0
for step in reversed(range(len(rewards))):
cumulative_rewards = rewards[step] + cumulative_rewards * discount_rate
discounted_rewards[step] = cumulative_rewards
return discounted_rewards
def discount_and_normalize_rewards(all_rewards, discount_rate):
all_discounted_rewards = []
for rewards in all_rewards:
all_discounted_rewards.append(helper_discount_rewards(rewards,discount_rate))
flat_rewards = np.concatenate(all_discounted_rewards)
reward_mean = flat_rewards.mean()
reward_std = flat_rewards.std()
return [(discounted_rewards - reward_mean)/reward_std for discounted_rewards in all_discounted_rewards]
env = gym.make("SoccerEmptyGoal-v0")num_game_rounds = 2 # 10
max_game_steps = 1000
num_iterations = 10 # 250
discount_rate = 0.95
with tf.Session() as sess:
sess.run(init)
for iteration in range(num_iterations):
print("Currently on Iteration: {} \n".format(iteration) )
# obs = env.render()
# print(obs.shape)
all_rewards = []
all_gradients = []
for game in range(num_game_rounds):current_rewards = []
current_gradients = []
observations = env.reset()
# print(observations)

for step in range(max_game_steps):
action_val, gradients_val = sess.run([action, gradients], feed_dict={X: observations.reshape(1, num_inputs)})
try:
observations, reward, done, info = env.step([0,1,2,3,4,5]) # action_val[0][0]
current_rewards.append(reward)
current_gradients.append(gradients_val)
except Exception as e:
print("====>",e)
done = True
if done:
break
all_rewards.append(current_rewards)
all_gradients.append(current_gradients)
all_rewards = discount_and_normalize_rewards(all_rewards,discount_rate)
feed_dict = {}
for var_index, gradient_placeholder in enumerate(gradient_placeholders):
mean_gradients = np.mean([reward * all_gradients[game_index][step][var_index]
for game_index, rewards in enumerate(all_rewards)
for step, reward in enumerate(rewards)], axis=0)
feed_dict[gradient_placeholder] = mean_gradients
sess.run(training_op, feed_dict=feed_dict)print('SAVING GRAPH AND SESSION')
meta_graph_def = tf.train.export_meta_graph(filename='models/my-650-step-model.meta')
saver.save(sess, 'models/my-650-step-model')
env = gym.make('SoccerEmptyGoal-v0')observations = env.reset()
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('models/my-650-step-model.meta')
new_saver.restore(sess,'models/my-650-step-model')
for x in range(500):
env.render()
action_val, gradients_val = sess.run([action, gradients], feed_dict={X: observations.reshape(1, num_inputs)})
observations, reward, done, info = env.step([0,1,2,3,4,5]) # action_val[0][0]

Consultant