# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
from grid2op.dtypes import dt_int
from grid2op.Exceptions import MultiEnvException
from grid2op.Environment.baseMultiProcessEnv import BaseMultiProcessEnvironment
[docs]class MultiEnvMultiProcess(BaseMultiProcessEnvironment):
"""
This class allows to evaluate a single agent instance on multiple environments running in parrallel.
It is a kind of :class:`BaseMultiProcessEnvironment`. For more information you can consult the
documentation of this parent class. This class allows to interact at the same time with different copy of
possibly different environments in parallel
Attributes
-----------
envs: `list:grid2op.Environment.Environment`
Al list of environments for which the evaluation will be made in parallel.
nb_envs: ``list:int``
Number of parallel underlying environment that will be handled.
MUST be the same length as the parameter `envs`.
The total number of subprocesses will be the sum of this list.
Examples
--------
This class can be used as:
.. code-block:: python
import grid2op
from grid2op.Environment import MultiEnvMultiProcess
env0 = grid2op.make("l2rpn_case14_sandbox") # create an environment
env1 = grid2op.make("l2rpn_case14_sandbox") # create a second environment, that can be similar, or not
# it is recommended to filter or create the environment with different parameters, otherwise this class
# is of little interest
envs = [env0, env1] # list of all environments created
nb_envs = [1, 7] # number of "copies" of each environment that will be made.
# in this case the first one will be copied only once, and the second one 7 times.
# the total number of environments used in the multi env will be the sum(nb_envs), here 8.
multi_env = MultiEnvMultiProcess(envs=envs, nb_envs=nb_envs)
# and now you can use it like any other grid2op environment (almost)
observations = multi_env.reset()
"""
def __init__(self, envs, nb_envs, obs_as_class=True, return_info=True, logger=None):
try:
nb_envs = np.array(nb_envs)
nb_envs = nb_envs.astype(dt_int)
except Exception as exc_:
raise MultiEnvException(
'"nb_envs" argument should be a list of integers. We could not '
'convert it to such with error "{}"'.format(exc_)
)
if (nb_envs < 0).any():
raise MultiEnvException(
'You ask to perform "{}" copy of an environment. This is a negative '
'integer. I cannot do that. Please make sure "nb_envs" argument '
"is all made of strictly positive integers and not {}."
"".format(np.min(nb_envs), nb_envs)
)
if (nb_envs == 0).any():
raise MultiEnvException(
"You ask to perform 0 copy of an environment. This is not supported at "
'the moment. Please make sure "nb_envs" argument '
"is all made of strictly positive integers and not {}."
"".format(nb_envs)
)
all_envs = []
for e, n in enumerate(nb_envs):
all_envs += [envs[e] for _ in range(n)]
super().__init__(
all_envs,
obs_as_class=obs_as_class,
return_info=return_info,
logger=logger.getChild("MultiEnvMultiProcess")
if logger is not None
else None,
)
if __name__ == "__main__":
from tqdm import tqdm
from grid2op import make
from grid2op.Agent import DoNothingAgent
nb_env = [2, 2, 1, 1, 2] # change that to adapt to your system
NB_STEP = 100 # number of step for each environment
env = make("l2rpn_case14_sandbox")
env.seed(42)
envs = [env, env, env, env, env]
agent = DoNothingAgent(env.action_space)
multi_envs = MultiEnvMultiProcess(envs, nb_env)
obs = multi_envs.reset()
rews = [env.reward_range[0] for i in range(multi_envs.nb_env)]
dones = [False for i in range(multi_envs.nb_env)]
total_reward = 0.0
for i in tqdm(range(NB_STEP)):
acts = [None for _ in range(multi_envs.nb_env)]
for env_act_id in range(multi_envs.nb_env):
acts[env_act_id] = agent.act(
obs[env_act_id], rews[env_act_id], dones[env_act_id]
)
obs, rews, dones, infos = multi_envs.step(acts)
total_reward += np.sum(rews)
len(rews)
multi_envs.close()
ob = env.reset()
rew = env.reward_range[0]
done = False
total_reward_single = 0
for i in tqdm(range(NB_STEP)):
act = agent.act(ob, rew, done)
ob, rew, done, info = env.step(act)
if done:
ob = env.reset()
total_reward_single += np.sum(rew)
env.close()
print("total_reward mluti_env: {}".format(total_reward))
print("total_reward single env: {}".format(total_reward_single))