Soccer HFO using Reinforcement Learning
Open AI gym-soccer environment using Tensorflow
HFO Setup
- Clone this repo by
git clone https://github.com/LARG/HFO.git
cd HFO
- Run
mkdir build & cd build
- Run
cmake -DCMAKE_BUILD_TYPE=RelwithDebInfo ..
- Run
make j4
- Run
make install
cd ..
- Open one terminal and run
/bin/HFO — offense-agents=1 — defense-agents=1 — offense-npcs=2 — defense-npcs=2
- In another terminal run
./example/passing_agents.sh
andpython3 example/high_level_custom_agent.py
- To stop the server
Ctrl+c
- But the
rcssserver
is yet not stopped. To stop it runkillall -9 rcssserver

Gym Setup
git clone https://github.com/openai/gym-soccer.git
- Open python gym module in the site-packages directory. For me, it is located here
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/gym/envs
- Format
__init__.py
is
from gym.envs.registration import registry, register, make, spec# Algorithmic
# ----------------------------------------register(
id='Copy-v0',
entry_point='gym.envs.algorithmic:CopyEnv',
max_episode_steps=200,
reward_threshold=25.0,
)register(
id='RepeatCopy-v0',
entry_point='gym.envs.algorithmic:RepeatCopyEnv',
max_episode_steps=200,
reward_threshold=75.0,
)register(
id='ReversedAddition-v0',
entry_point='gym.envs.algorithmic:ReversedAdditionEnv',
kwargs={'rows' : 2},
max_episode_steps=200,
reward_threshold=25.0,
)
4. create a new directory called multiplayer
5. We need to register Soccer environments here by editing __init__.py
. Add the below lines to the file.
register(
id='Soccer-v0',
entry_point='gym.envs.multiplayer:SoccerEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)register(
id='SoccerAgainstKeeper-v0',
entry_point='gym.envs.multiplayer:SoccerAgainstKeeperEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)register(
id='SoccerEmptyGoal-v0',
entry_point='gym.envs.multiplayer:SoccerEmptyGoalEnv',
reward_threshold=-100.0,
max_episode_steps=500,
)
5. In multiplayer
the folder add the files soccer_env.py,
soccer_empty_goal.py
, soccer_against_keeper.py
which are cloned previously from the gym-soccer repo.
Code
I am using tensorflow1.4
import tensorflow as tf
import gym
import numpy as npnum_inputs = 59
num_hidden = 10
num_outputs = 5learning_rate = 0.01initializer = tf.contrib.layers.variance_scaling_initializer()X = tf.placeholder(tf.float32, shape=[None, num_inputs])hidden_layer = tf.layers.dense(X, num_hidden, activation=tf.nn.elu, kernel_initializer=initializer)
logits = tf.layers.dense(hidden_layer, num_outputs)
# outputs = tf.nn.dense(logits, num_outputs) # probability of action 0 (left)action = logits# probabilties = tf.concat(axis=1, values=[outputs, 1 - outputs, outputs])
# action = tf.multinomial(probabilties, num_samples=1)y = 1. - tf.to_float(action)cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)
optimizer = tf.train.AdamOptimizer(learning_rate)gradients_and_variables = optimizer.compute_gradients(cross_entropy)gradients = []
gradient_placeholders = []
grads_and_vars_feed = []for gradient, variable in gradients_and_variables:
gradients.append(gradient)
gradient_placeholder = tf.placeholder(tf.float32, shape=gradient.get_shape())
gradient_placeholders.append(gradient_placeholder)
grads_and_vars_feed.append((gradient_placeholder, variable))training_op = optimizer.apply_gradients(grads_and_vars_feed)init = tf.global_variables_initializer()
saver = tf.train.Saver()def helper_discount_rewards(rewards, discount_rate):
discounted_rewards = np.zeros(len(rewards))
cumulative_rewards = 0
for step in reversed(range(len(rewards))):
cumulative_rewards = rewards[step] + cumulative_rewards * discount_rate
discounted_rewards[step] = cumulative_rewards
return discounted_rewardsdef discount_and_normalize_rewards(all_rewards, discount_rate):
all_discounted_rewards = []
for rewards in all_rewards:
all_discounted_rewards.append(helper_discount_rewards(rewards,discount_rate))flat_rewards = np.concatenate(all_discounted_rewards)
reward_mean = flat_rewards.mean()
reward_std = flat_rewards.std()
return [(discounted_rewards - reward_mean)/reward_std for discounted_rewards in all_discounted_rewards]env = gym.make("SoccerEmptyGoal-v0")num_game_rounds = 2 # 10
max_game_steps = 1000
num_iterations = 10 # 250
discount_rate = 0.95with tf.Session() as sess:
sess.run(init)for iteration in range(num_iterations):
print("Currently on Iteration: {} \n".format(iteration) )
# obs = env.render()
# print(obs.shape)all_rewards = []
all_gradients = []for game in range(num_game_rounds):current_rewards = []
current_gradients = []
observations = env.reset()
# print(observations)
for step in range(max_game_steps):
action_val, gradients_val = sess.run([action, gradients], feed_dict={X: observations.reshape(1, num_inputs)})
try:
observations, reward, done, info = env.step([0,1,2,3,4,5]) # action_val[0][0]
current_rewards.append(reward)
current_gradients.append(gradients_val)
except Exception as e:
print("====>",e)
done = Trueif done:
breakall_rewards.append(current_rewards)
all_gradients.append(current_gradients)all_rewards = discount_and_normalize_rewards(all_rewards,discount_rate)
feed_dict = {}for var_index, gradient_placeholder in enumerate(gradient_placeholders):
mean_gradients = np.mean([reward * all_gradients[game_index][step][var_index]
for game_index, rewards in enumerate(all_rewards)
for step, reward in enumerate(rewards)], axis=0)
feed_dict[gradient_placeholder] = mean_gradientssess.run(training_op, feed_dict=feed_dict)print('SAVING GRAPH AND SESSION')
meta_graph_def = tf.train.export_meta_graph(filename='models/my-650-step-model.meta')
saver.save(sess, 'models/my-650-step-model')env = gym.make('SoccerEmptyGoal-v0')observations = env.reset()
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('models/my-650-step-model.meta')
new_saver.restore(sess,'models/my-650-step-model')for x in range(500):
env.render()
action_val, gradients_val = sess.run([action, gradients], feed_dict={X: observations.reshape(1, num_inputs)})
observations, reward, done, info = env.step([0,1,2,3,4,5]) # action_val[0][0]