OpenAI’s Multi-Agent Particle Environments

Multi-Agent Environments include Particle interaction

The widely know Gym environments are Classic Control, Atari, Box2D, and MuJoCo. That is not helpful for multi-agent training. For this, OpenAI created an opensource envs. Though these envs have no Web page like other envs have, these are very useful for training multiple-agents.

Setup

Common Errors

# Fix: Replace
# random_array = prng.np_random.rand(self.num_discrete_space)
random_array = np.random.RandomState().rand(self.num_discrete_space)
# Ref: here

2.

# Fix: Open rendering.py file. Comment out below line
# from gym.utils import reraise
# Ref: here

Search and replace all by and remove parameter.

3. This issue will not come up if you are using open AI baselines. I am using stable baselines without mpi dependency as my macOS Sierra doesn't support the new GCC version and hence cannot use the llvm compiler. I am running out of space, so I did not install llvm… If you have llvm compiler preinstalled or if you could successfully install the llvm compiler (), I suggest you use openAI baselines.

Ok, Let's get to fix this. To get started, I am going to use this code

  1. Clone the above project, go to main.py and replace line 12
# from utils.env_wrappers import SubprocVecEnv, DummyVecEnvfrom stable_baselines.common.vec_env import SubprocVecEnv, DummyVecEnv

2. Open utils/make_envs.py and comment line 47

env = MultiAgentEnv(world, scenario.reset_world, scenario.reward,
scenario.observation)#,
#discrete_action=discrete_action)

3. Open stable_baselines/common/vec_env/utils.py. Replace lines 71–72

# shapes[key] = box.shape
# dtypes[key] = box.dtype
shapes[key] = np.array(box).shape
dtypes[key] = type(box[0])

4. Go to stable_baselines/common/vec_env/dummy_vec_env.py and replace __init__() and step_wait() with below

def __init__(self, env_fns):
self.envs = [fn() for fn in env_fns]
env = self.envs[0]
VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
obs_space = env.observation_space
self.keys, shapes, dtypes = obs_space_info(obs_space)
self.buf_obs = OrderedDict([
(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]))
for k in self.keys])
self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
self.buf_infos = [{} for _ in range(self.num_envs)]
if all([hasattr(a, 'adversary') for a in env.agents]):
self.agent_types = ['adversary' if a.adversary else 'agent' for a in
env.agents]
else:
self.agent_types = ['agent' for _ in env.agents]
self.ts = np.zeros(len(self.envs), dtype='int')
self.actions = None
self.metadata = env.metadata
def step_wait(self):
results = [env.step(a) for (a,env) in zip(self.actions, self.envs)]
obs, rews, dones, infos = map(np.array, zip(*results))
self.ts += 1
for (i, done) in enumerate(dones):
if all(done):
obs[i] = self.envs[i].reset()
self.ts[i] = 0
self.actions = None
return np.array(obs), np.array(rews), np.array(dones), infos
'''
def __init__(self, env_fns):
self.envs = [fn() for fn in env_fns]
env = self.envs[0]
VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
obs_space = env.observation_space
self.keys, shapes, dtypes = obs_space_info(obs_space)
self.buf_obs = OrderedDict([
(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]))
for k in self.keys])
self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
self.buf_infos = [{} for _ in range(self.num_envs)]
self.actions = None
self.metadata = env.metadata
def step_wait(self):
for env_idx in range(self.num_envs):
obs, self.buf_rews[env_idx], self.buf_dones[env_idx], self.buf_infos[env_idx] =\
self.envs[env_idx].step(self.actions[env_idx])
if self.buf_dones[env_idx]:
# save final observation where user can get it, then reset
self.buf_infos[env_idx]['terminal_observation'] = obs
obs = self.envs[env_idx].reset()
self._save_obs(env_idx, obs)
return (self._obs_from_buf(), np.copy(self.buf_rews), np.copy(self.buf_dones),
self.buf_infos.copy())
'''

5. All set. Run to test

ma-Gym

Setup

git clone https://github.com/koulanurag/ma-gym.git
cd ma-gym
pip install -e .

Code sample

import gym
import ma_gym

env = gym.make('Switch2-v0')
done_n = [False for _ in range(env.n_agents)]
ep_reward = 0

obs_n = env.reset()
while not all(done_n):
env.render()
obs_n, reward_n, done_n, info = env.step(env.action_space.sample())
ep_reward += sum(reward_n)
env.close()

Custom code changes

# Goto ma_gym -> switch_one_corridor.py, lines 113, 115, 117, 119, 121# Change move == 0 => move[0]if move[0]:  # down
next_pos = [curr_pos[0] + 1, curr_pos[1]]
elif move[1]: # left
next_pos = [curr_pos[0], curr_pos[1] - 1]
elif move[2]: # up
next_pos = [curr_pos[0] - 1, curr_pos[1]]
elif move[3]: # right
next_pos = [curr_pos[0], curr_pos[1] + 1]
elif move[4]: # no-op
pass

Consultant