# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
from grid2op.Environment.baseMultiProcessEnv import BaseMultiProcessEnvironment
[docs]class SingleEnvMultiProcess(BaseMultiProcessEnvironment):
"""
This class allows to evaluate a single agent instance on multiple environments running in parallel.
It is a kind of :class:`BaseMultiProcessEnvironment`. For more information you can consult the
documentation of this parent class. It allows to interact at the same time with different copy of the
(same) environment in parallel
Attributes
-----------
env: `list::grid2op.Environment.Environment`
Al list of environments for which the evaluation will be made in parallel.
nb_env: ``int``
Number of parallel underlying environment that will be handled. It is also the size of the list of actions
that need to be provided in :func:`MultiEnvironment.step` and the return sizes of the list of this
same function.
Examples
--------
An example on how you can best leverage this class is given in the getting_started notebooks. Another simple
example is:
.. code-block:: python
from grid2op.BaseAgent import DoNothingAgent
from grid2op.MakeEnv import make
from grid2op.Environment import SingleEnvMultiProcess
# create a simple environment
env = make("l2rpn_case14_sandbox")
# number of parrallel environment
nb_env = 2 # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
# create a simple agent
agent = DoNothingAgent(env.action_space)
# create the multi environment class
multi_envs = SingleEnvMultiProcess(env=env, nb_env=nb_env)
# making is usable
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(nb_env)]
dones = [False for i in range(nb_env)]
# performs the appropriated steps
for i in range(NB_STEP):
acts = [None for _ in range(nb_env)]
for env_act_id in range(nb_env):
acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id], dones[env_act_id])
obs, rews, dones, infos = multi_envs.step(acts)
# DO SOMETHING WITH THE AGENT IF YOU WANT
# close the environments
multi_envs.close()
# close the initial environment
env.close()
"""
def __init__(self, env, nb_env, obs_as_class=True, return_info=True, logger=None):
envs = [env for _ in range(nb_env)]
super().__init__(
envs,
obs_as_class=obs_as_class,
return_info=return_info,
logger=logger.getChild("SingleEnvMultiProcess")
if logger is not None
else None,
)
if __name__ == "__main__":
from tqdm import tqdm
from grid2op import make
from grid2op.Agent import DoNothingAgent
nb_env = 8 # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
env = make("l2rpn_case14_sandbox")
env.seed(42)
agent = DoNothingAgent(env.action_space)
multi_envs = SingleEnvMultiProcess(env, nb_env)
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(nb_env)]
dones = [False for i in range(nb_env)]
total_reward = 0.0
for i in tqdm(range(NB_STEP)):
acts = [None for _ in range(nb_env)]
for env_act_id in range(nb_env):
acts[env_act_id] = agent.act(
obs[env_act_id], rews[env_act_id], dones[env_act_id]
)
obs, rews, dones, infos = multi_envs.step(acts)
total_reward += np.sum(rews)
len(rews)
multi_envs.close()
ob = env.reset()
rew = env.reward_range[0]
done = False
total_reward_single = 0
for i in tqdm(range(NB_STEP)):
act = agent.act(ob, rew, done)
ob, rew, done, info = env.step(act)
if done:
ob = env.reset()
total_reward_single += np.sum(rew)
env.close()
print("total_reward mluti_env: {}".format(total_reward))
print("total_reward single env: {}".format(total_reward_single))