Source code for grid2op.Reward.RewardHelper

# Copyright (c) 2019-2020, RTE (
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import copy

from grid2op.Reward.BaseReward import BaseReward
from grid2op.Reward.ConstantReward import ConstantReward
from grid2op.Exceptions import Grid2OpException

[docs]class RewardHelper: """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ It is a class internal to the :class:`grid2op.Environment.Environment` do not use outside of its purpose and do not attempt to modify it. This class aims at making the creation of rewards class more automatic by the :class:`grid2op.Environment`. It is not recommended to derived or modified this class. If a different reward need to be used, it is recommended to build another object of this class, and change the :attr:`RewardHelper.rewardClass` attribute. Attributes ---------- rewardClass: ``type`` Type of reward that will be use by this helper. Note that the type (and not an instance / object of that type) must be given here. It defaults to :class:`ConstantReward` template_reward: :class:`BaseReward` An object of class :attr:`RewardHelper.rewardClass` used to compute the rewards. """
[docs] def __init__(self, reward_func=ConstantReward, logger=None): self.rewardClass = None self.template_reward = None self.logger = logger self.change_reward(reward_func)
[docs] def initialize(self, env): """ This function initializes the template_reward with the environment. It is used especially for using :func:`RewardHelper.range`. Parameters ---------- env: :class:`grid2op.Environment.Environment` The current used environment. """ self.template_reward.initialize(env)
[docs] def range(self): """ Provides the range of the rewards. Returns ------- res: ``(float, float)`` The minimum reward per time step (possibly infinity) and the maximum reward per timestep (possibly infinity) """ return self.template_reward.get_range()
[docs] def reset(self, env): """called each time `env.reset` is invoked""" self.template_reward.reset(env)
[docs] def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): """ Gives the reward that follows the execution of the :class:`grid2op.BaseAction.BaseAction` action in the :class:`grid2op.Environment.Environment` env; Parameters ---------- action: :class:`grid2op.Action.Action` The action performed by the BaseAgent. env: :class:`grid2op.Environment.Environment` The current environment. has_error: ``bool`` Does the action caused an error, such a diverging powerflow for example= (``True``: the action caused an error) is_done: ``bool`` Is the game over (``True`` = the game is over) is_illegal: ``bool`` Is the action legal or not (``True`` = the action was illegal). See :class:`grid2op.Exceptions.IllegalAction` for more information. is_ambiguous: ``bool`` Is the action ambiguous or not (``True`` = the action was ambiguous). See :class:`grid2op.Exceptions.AmbiguousAction` for more information. Returns ------- res: ``float`` The computed reward """ res = self.template_reward( action, env, has_error, is_done, is_illegal, is_ambiguous ) return res
[docs] def change_reward(self, reward_func): """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ Use `env.change_reward` instead (:func:`grid2op.Environment.BaseEnv.change_reward`) """ if isinstance(reward_func, BaseReward): # reward object given directly self.rewardClass = type(reward_func) self.template_reward = copy.deepcopy(reward_func) elif issubclass(reward_func, BaseReward): # reward is provided as a class self.rewardClass = reward_func try: self.template_reward = reward_func(logger=self.logger) except TypeError as exc_: self.logger.warn(f"Reward \"{reward_func.__name__}\" does not support the logger feature. Error was : {exc_}") # old (<= 1.7.0) behaviour self.template_reward = reward_func() else: raise Grid2OpException( f"Impossible to build a reward with input reward_func={reward_func}. " f"NB `reward_func` should be either an object of type `BaseReward` (or " f"one of its derivative) " f"or a class that inherit from `BaseReward`" )
[docs] def close(self): """clsoe the reward helper (in case there are specific behaviour for certain rewards""" self.template_reward.close()