# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import numpy as np
from typing import Literal, Dict, Tuple, Any, Optional, Union, Generic
from grid2op.dtypes import dt_int
from grid2op.Chronics import Multifolder
from grid2op.Environment import Environment
from grid2op.typing_variables import STEP_INFO_TYPING, RESET_OPTIONS_TYPING
from grid2op.gym_compat.utils import (GYM_AVAILABLE,
GYMNASIUM_AVAILABLE,
check_gym_version,
sample_seed,
ObsType,
ActType)
def conditional_decorator(condition):
def decorator(func):
if condition:
# Return the function unchanged, not decorated.
return func
return NotImplementedError() # anything that is not a callbe anyway
return decorator
_TIME_SERIE_ID = "time serie id"
RESET_INFO_GYM_TYPING = Dict[Literal["time serie id", "seed", "grid2op_env_seed", "underlying_env_seeds"], Any]
class __AuxGymEnv(Generic[ObsType, ActType]):
"""
fully implements the gymnasium API by using the :class:`GymActionSpace` and :class:`GymObservationSpace`
for compliance with gymnasium.
They can handle action_space_converter or observation_space converter to change the representation of data
that will be fed to the agent. #TODO
.. warning::
The `gym` package has some breaking API change since its version 0.26. Depending on the version installed,
we attempted, in grid2op, to maintain compatibility both with former version and later one. This makes this
class behave differently depending on the version of gymnasium / gym you have installed !
The main changes involve the functions `env.step` and `env.reset`
If you want to use the same version of the GymEnv regardless of the gym / gymnasium version installed you can use:
- :class:`GymnasiumEnv` if `gymnasium` is available
- :class:`GymEnv_Legacy` for gym < 0.26
- :class:`GymEnv_Modern` for gym >= 0.26
.. warning::
Depending on the presence absence of `gymnasium` and `gym` packages this class might behave differently.
In grid2op we tried to maintain compatibility both with gymnasium (newest) and gym (legacy,
no more maintained) RL packages. The behaviour is the following:
- :class:`GymEnv` will inherit from gymnasium if it's installed
(in this case it will be :class:`GymnasiumEnv`), otherwise it will
inherit from gym (and will be exactly :class:`GymEnv_Legacy` - gym < 0.26-
or :class:`GymEnv_Modern` - for gym >= 0.26)
- :class:`GymnasiumEnv` will inherit from gymnasium if it's available and never from
from gym
- :class:`GymEnv_Legacy` and :class:`GymEnv_Modern` will inherit from gym if it's
available and never from from gymnasium
See :ref:`gymnasium_gym` for more information
Notes
------
The environment passed as input is copied. It is not modified by this "gymnasium environment"
Examples
--------
This can be used like:
.. code-block:: python
import grid2op
from grid2op.gym_compat import GymEnv
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name)
gym_env = GymEnv(env) # is a gymnasium environment properly inheriting from gym.Env !
There are a few difference between "raw" grid2op environment and gymnasium environments.
One of the major difference is that, to our knowledge, gymnasium does not support the
`simulate` feature (which allows an agent to test the impact of a given action
on the grid without having to perform a `step` see :ref:`model_based_rl` for more information)
[NB if you know or better are developping some "model based RL library" let us know !]
Another difference is in the way to do some actions. In grid2op, actions are a dedicated class
and can be made with an `action_space` and a dictionary, or using the properties of the action
class.
In gym, there are no specific representations of the action class. More precisely, for each action
type (:class:`MultiDiscreteActSpace`, :class:`DiscreteActSpace`, :class:`BoxGymActSpace` or
:class:`GymActionSpace`) there is a way to encode it. For example, by default (:class:`GymActionSpace`)
an action is represented through an Dict (`from collection import OrderedDict`)
"""
def __init__(self,
env_init: Environment,
shuffle_chronics:Optional[bool]=True,
render_mode: Literal["rgb_array"]="rgb_array",
with_forecast: bool=False):
cls = type(self)
check_gym_version(cls._gymnasium)
self.action_space = cls._ActionSpaceType(env_init)
self.observation_space = cls._ObservationSpaceType(env_init)
self.reward_range = env_init.reward_range
self.metadata = env_init.metadata
self.init_env = env_init.copy()
self.init_env.render_mode = render_mode
self._shuffle_chronics = shuffle_chronics
if not with_forecast:
# default in grid2op 1.10.3
# to improve pickle compatibility and speed
self.init_env.deactivate_forecast()
self.init_env._observation_space.obs_env.close()
self.init_env._observation_space.obs_env = None
self.init_env._observation_space._ObsEnv_class = None
self.init_env._last_obs._obs_env = None
self.init_env._last_obs._ptr_kwargs_env = False
self.init_env.current_obs._obs_env = None
self.init_env.current_obs._ptr_kwargs_env = False
super().__init__() # super should reference either gym.Env or gymnasium.Env
if not hasattr(self, "_np_random"):
# for older version of gymnasium it does not exist
self._np_random = np.random.RandomState()
def _aux_step(self, gym_action: ActType) -> Tuple[ObsType, float, bool, STEP_INFO_TYPING]:
# used for gym < 0.26
g2op_act = self.action_space.from_gym(gym_action)
g2op_obs, reward, done, info = self.init_env.step(g2op_act)
gym_obs = self.observation_space.to_gym(g2op_obs)
return gym_obs, float(reward), done, info
def _aux_step_new(self, gym_action: ActType) -> Tuple[ObsType, float, bool, bool, STEP_INFO_TYPING]:
# used for gym >= 0.26
# TODO refacto with _aux_step
g2op_act = self.action_space.from_gym(gym_action)
g2op_obs, reward, terminated, info = self.init_env.step(g2op_act)
gym_obs = self.observation_space.to_gym(g2op_obs)
truncated = False # see https://github.com/openai/gym/pull/2752
if "exception" in info:
info["exception"] = [str(el) for el in info["exception"]]
return gym_obs, float(reward), terminated, truncated, info
def _aux_reset(self,
seed: Optional[int]=None,
return_info: Optional[bool]=None,
options: RESET_OPTIONS_TYPING=None) -> Union[ObsType, Tuple[ObsType, RESET_INFO_GYM_TYPING]]:
# used for gym < 0.26
if self._shuffle_chronics and isinstance(
self.init_env.chronics_handler.real_data, Multifolder
):
self.init_env.chronics_handler.sample_next_chronics()
if seed is not None:
seed_, next_seed, underlying_env_seeds = self._aux_seed(seed)
g2op_obs = self.init_env.reset(options=options)
gym_obs = self.observation_space.to_gym(g2op_obs)
if return_info:
chron_id = self.init_env.chronics_handler.get_id()
info = {_TIME_SERIE_ID: chron_id}
if seed is not None:
info["seed"] = seed
info["grid2op_env_seed"] = next_seed
info["underlying_env_seeds"] = underlying_env_seeds
return gym_obs, info
else:
return gym_obs
def _aux_reset_new(self,
seed: Optional[int]=None,
options: RESET_OPTIONS_TYPING=None) -> Tuple[ObsType,RESET_INFO_GYM_TYPING]:
super().reset(seed=seed) # seed gymnasium env
if seed is not None:
self._aux_seed_spaces()
seed, next_seed, underlying_env_seeds = self._aux_seed_g2op(seed)
# used for gym > 0.26
if (self._shuffle_chronics and
isinstance(self.init_env.chronics_handler.real_data, Multifolder) and
(not (options is not None and _TIME_SERIE_ID in options))):
self.init_env.chronics_handler.sample_next_chronics()
# we don't seed grid2op with reset as it is done
# earlier
g2op_obs = self.init_env.reset(seed=None, options=options)
gym_obs = self.observation_space.to_gym(g2op_obs)
chron_id = self.init_env.chronics_handler.get_id()
info = {_TIME_SERIE_ID: chron_id}
if seed is not None:
info["seed"] = seed
info["grid2op_env_seed"] = next_seed
info["underlying_env_seeds"] = underlying_env_seeds
return gym_obs, info
def render(self):
"""for compatibility with open ai gymnasium render function"""
return self.init_env.render()
def close(self) -> None:
if hasattr(self, "init_env") and self.init_env is not None:
self.init_env.close()
del self.init_env
self.init_env = None
if hasattr(self, "action_space") and self.action_space is not None:
self.action_space.close()
self.action_space = None
if hasattr(self, "observation_space") and self.observation_space is not None:
self.observation_space.close()
self.observation_space = None
def _aux_seed_spaces(self):
max_ = np.iinfo(dt_int).max
next_seed = sample_seed(max_, self._np_random)
self.action_space.seed(next_seed)
next_seed = sample_seed(max_, self._np_random)
self.observation_space.seed(next_seed)
def _aux_seed_g2op(self, seed):
# then seed the underlying grid2op env
max_ = np.iinfo(dt_int).max
next_seed = sample_seed(max_, self._np_random)
underlying_env_seeds = self.init_env.seed(next_seed)
return seed, next_seed, underlying_env_seeds
def _aux_seed(self, seed: Optional[int]=None):
# deprecated in gym >=0.26
if seed is not None:
# seed the gym env
super().seed(seed)
self._np_random.seed(seed)
self._aux_seed_spaces()
return self._aux_seed_g2op(seed)
return None, None, None
def __del__(self):
# delete possible dangling reference
self.close()
if GYM_AVAILABLE:
from gym import Env as LegacyGymEnv
from grid2op.gym_compat.gym_obs_space import LegacyGymObservationSpace
from grid2op.gym_compat.gym_act_space import LegacyGymActionSpace
_AuxGymEnv = type("_AuxGymEnv",
(__AuxGymEnv, LegacyGymEnv),
{"_gymnasium": False,
"_ActionSpaceType": LegacyGymActionSpace,
"_ObservationSpaceType": LegacyGymObservationSpace,
"__module__": __name__})
_AuxGymEnv.__doc__ = __AuxGymEnv.__doc__
[docs] class GymEnv_Legacy(_AuxGymEnv):
# for old version of gym
[docs] def reset(self, *args, **kwargs) -> ObsType:
return self._aux_reset(*args, **kwargs)
[docs] def step(self, action: ActType) -> Tuple[ObsType, float, bool, STEP_INFO_TYPING]:
return self._aux_step(action)
def seed(self, seed: Optional[int]) -> None:
# defined only on some cases
return self._aux_seed(seed)
[docs] class GymEnv_Modern(_AuxGymEnv):
# for new version of gym
[docs] def reset(self,
*,
seed: Optional[int]=None,
options: RESET_OPTIONS_TYPING = None) -> Tuple[
ObsType,
RESET_INFO_GYM_TYPING
]:
return self._aux_reset_new(seed, options)
[docs] def step(self, action : ActType) -> Tuple[ObsType, float, bool, bool, STEP_INFO_TYPING]:
return self._aux_step_new(action)
GymEnv_Legacy.__doc__ = __AuxGymEnv.__doc__
GymEnv_Modern.__doc__ = __AuxGymEnv.__doc__
if GYMNASIUM_AVAILABLE:
from gymnasium import Env
from grid2op.gym_compat.gym_act_space import GymnasiumActionSpace
from grid2op.gym_compat.gym_obs_space import GymnasiumObservationSpace
_AuxGymnasiumEnv = type("_AuxGymnasiumEnv",
(__AuxGymEnv, Env),
{"_gymnasium": True,
"_ActionSpaceType": GymnasiumActionSpace,
"_ObservationSpaceType": GymnasiumObservationSpace,
"__module__": __name__})
_AuxGymnasiumEnv.__doc__ = __AuxGymEnv.__doc__
[docs] class GymnasiumEnv(_AuxGymnasiumEnv):
# for gymnasium
[docs] def reset(self,
*,
seed: Optional[int]=None,
options: RESET_OPTIONS_TYPING = None) -> Tuple[
ObsType,
RESET_INFO_GYM_TYPING
]:
"""This function will reset the underlying grid2op environment
and return the next state of the grid (as the gymnasium observation)
and some other information.
Parameters
----------
seed : Optional[int], optional
The seed for this new environment, by default None
options : RESET_OPTIONS_TYPING, optional
See the documentation of :func:`grid2op.Environment.Environment.reset`
for more information about it, by default None
Returns
-------
Tuple[ ObsType, RESET_INFO_GYM_TYPING ]
_description_
"""
return self._aux_reset_new(seed, options)
[docs] def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, STEP_INFO_TYPING]:
"""Run one timestep of the environment’s dynamics using the agent actions.
When the end of an episode is reached (terminated or truncated),
it is necessary to call reset() to reset this environment’s state for the next episode.
Parameters
----------
action : ``ActType``
An action that can be process by the :func:`grid2op.gym_compat.gym_act_space.GymActionSpace.from_gym`
(given in the form of a gymnasium action belonging to a gymnasium space.).
For example it can be a sorted dictionary if you are using default
:class:`grid2op.gym_compat.gym_act_space.GymActionSpace`
or a numpy array if you are using :class:`grid2op.gym_compat.box_gym_actspace.BoxGymnasiumActSpace`
Returns
-------
Tuple[ObsType, float, bool, bool, STEP_INFO_TYPING]
- observation: an instance of the current observation space (can be a dictionary, a numpy array etc.)
- reward: the reward for the previous action
- truncated: whether the environment was terminated
- done: whether the environment is done
- info: other information, see :func:`grid2op.Environment.BaseEnv.step` for more
information about the available informations.
"""
return self._aux_step_new(action)
GymnasiumEnv.__doc__ = __AuxGymEnv.__doc__