Source code for grid2op.Reward.n1Reward

# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import copy
from grid2op.Reward import BaseReward
from grid2op.Action._backendAction import _BackendAction


[docs]class N1Reward(BaseReward): """ This class implements a reward that is inspired by the "n-1" criterion widely used in power system. More specifically it returns the maximum flows (on all the powerlines) after a given (as input) a powerline has been disconnected. Examples -------- This can be used as: .. code-block:: python import grid2op from grid2op.Reward import N1Reward L_ID = 0 env = grid2op.make("l2rpn_case14_sandbox", reward_class=N1Reward(l_id=L_ID) ) obs = env.reset() obs, reward, *_ = env.step(env.action_space()) print(f"reward: {reward:.3f}") print("We can check that it is exactly like 'simulate' on the current step the disconnection of the same powerline") obs_n1, *_ = obs.simulate(env.action_space({"set_line_status": [(L_ID, -1)]}), time_step=0) print(f"\tmax flow after disconnection of line {L_ID}: {obs_n1.rho.max():.3f}") Notes ----- It is also possible to use the `other_rewards` argument to simulate multiple powerline disconnections, for example: .. code-block:: python import grid2op from grid2op.Reward import N1Reward L_ID = 0 env = grid2op.make("l2rpn_case14_sandbox", other_rewards={f"line_{l_id}": N1Reward(l_id=l_id) for l_id in [0, 1]} ) obs = env.reset() obs, reward, *_ = env.step(env.action_space()) print(f"reward: {reward:.3f}") print("We can check that it is exactly like 'simulate' on the current step the disconnection of the same powerline") obs_n1, *_ = obs.simulate(env.action_space({"set_line_status": [(L_ID, -1)]}), time_step=0) print(f"\tmax flow after disconnection of line {L_ID}: {obs_n1.rho.max():.3f}") """
[docs] def __init__(self, l_id=0, logger=None): BaseReward.__init__(self, logger=logger) self._backend = None self._backend_action = None self.l_id = l_id
[docs] def initialize(self, env): self._backend = env.backend.copy() bk_act_cls = _BackendAction.init_grid(env.backend) self._backend_action = bk_act_cls()
[docs] def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): if is_done: return self.reward_min self._backend_action.reset() act = env.backend.get_action_to_set() th_lim = env.get_thermal_limit() th_lim[th_lim <= 1] = 1 # assign 1 for the thermal limit this_n1 = copy.deepcopy(act) self._backend_action += this_n1 self._backend.apply_action(self._backend_action) self._backend._disconnect_line(self.l_id) div_exc_ = None try: # TODO there is a bug in lightsimbackend that make it crash instead of diverging conv, div_exc_ = self._backend.runpf() except Exception as exc_: conv = False div_exc_ = exc_ if conv: flow = self._backend.get_line_flow() res = (flow / th_lim).max() else: self.logger.info(f"Divergence of the backend at step {env.nb_time_step} for N1Reward with error `{div_exc_}`") res = -1 return res
[docs] def close(self): self._backend.close() del self._backend self._backend = None