# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import sys
import numpy as np
import re
from grid2op._glop_platform_info import _IS_WINDOWS, _IS_LINUX, _IS_MACOS
from grid2op.Exceptions import Grid2OpException
from grid2op.Reward.baseReward import BaseReward
from grid2op.dtypes import dt_float
[docs]class RedispReward(BaseReward):
"""
This reward can be used for environments where redispatching is available. It assigns a cost to redispatching action
and penalizes with the losses.
This is the closest reward to the score used for the l2RPN competitions.
Examples
---------
You can use this reward in any environment with:
.. code-block:: python
import grid2op
from grid2op.Reward import RedispReward
# then you create your environment with it:
NAME_OF_THE_ENVIRONMENT = "l2rpn_case14_sandbox"
env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=RedispReward)
# and do a step with a "do nothing" action
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# the reward is computed with the RedispReward class
# NB this is the default reward of many environments in the grid2op framework
This class depends on some "meta parameters". These meta parameters can be changed when the class is created
in the following way:
.. code-block:: python
import grid2op
from grid2op.Reward import RedispReward
reward_cls = RedispReward.generate_class_custom_params(alpha_redisph=5,
min_load_ratio=0.1,
worst_losses_ratio=0.05,
min_reward=-10.,
reward_illegal_ambiguous=0.,
least_losses_ratio=0.015)
env_name = "l2rpn_case14_sandbox" # or any other name
env = grid2op.make(env_name,reward_class=reward_cls)
These meta parameters means:
- alpha_redisp: extra cost paid when performing redispatching. For 1MW of redispatching done, you pay
"alpha_redisph"
- min_load_ratio: how to compute the minimum load on the grid, based on the total generation (sum of gen_pmax)
- worst_losses_ratio: worst loss possible on the grid (5% is an upper bound for normal grid)
- min_reward: what is the minimum reward of this class (can be parametrized, and is only used when there is
a game over
- reward_illegal_ambiguous: reward given when the action is illegal or ambiguous
- least_losses_ratio: the minimum loss you can have (1.5% of the total demand should be a lower bound for real grid)
Notes
------
On windows and MacOs, due to a compatibility issue with multi-processing, it is not possible to have different
"RedisReward" with different meta parameters (see the "Examples" section).
"""
_alpha_redisp = dt_float(5.0)
_min_load_ratio = dt_float(0.1) # min load = min_load_ratio * max_load
_worst_losses_ratio = dt_float(
0.05
) # worst_losses = worst_losses_ratio * worst_load
_min_reward = dt_float(-10.0) # reward when game over
_reward_illegal_ambiguous = dt_float(
0.0
) # reward when action is illegal or ambiguous
_least_losses_ratio = dt_float(
0.015
) # least_losses = least_losses_ratio * least_loads
[docs] def __init__(self, logger=None):
BaseReward.__init__(self, logger=logger)
self.reward_min = None
self.reward_max = None
self.max_regret = dt_float(0.0)
self.reward_illegal_ambiguous = None
@classmethod
def generate_class_custom_params(
cls,
alpha_redisph=5.0,
min_load_ratio=0.1, # min load = min_load_ratio * max_load
worst_losses_ratio=0.05, # worst_losses = worst_losses_ratio * worst_load
min_reward=-10.0,
least_losses_ratio=0.015, # least_losses = least_losses_ratio * least_loads
reward_illegal_ambiguous=0.0,
):
if _IS_LINUX:
# on linux it's fine, i can create new classes for each meta parameters
nm_res = f"RedispReward_{alpha_redisph:.2f}_{min_load_ratio:.2f}_{worst_losses_ratio:.2f}"
nm_res += f"_{min_reward:.2f}_{least_losses_ratio:.2f}_{reward_illegal_ambiguous:.2f}"
nm_res = nm_res.replace(".", "@")
cls_attr_as_dict = {
"_alpha_redisp": dt_float(alpha_redisph),
"_min_load_ratio": dt_float(min_load_ratio),
"_worst_losses_ratio": dt_float(worst_losses_ratio),
"_min_reward": dt_float(min_reward),
"_least_losses_ratio": dt_float(least_losses_ratio),
"_reward_illegal_ambiguous": dt_float(reward_illegal_ambiguous),
}
res_cls = type(nm_res, (cls,), cls_attr_as_dict)
res_cls.__module__ = cls.__module__
setattr(sys.modules[cls.__module__], nm_res, res_cls)
globals()[nm_res] = res_cls
else:
# i mess with the default parameters in the base class, i know i know it's not pretty, but hey...
# TODO make that prettier and clean the way to make the reward in the env (for example allow to pass
# objects and not just class)
cls._alpha_redisp = dt_float(alpha_redisph)
cls._min_load_ratio = dt_float(min_load_ratio)
cls._worst_losses_ratio = dt_float(worst_losses_ratio)
cls._min_reward = dt_float(min_reward)
cls._least_losses_ratio = dt_float(least_losses_ratio)
cls._reward_illegal_ambiguous = dt_float(reward_illegal_ambiguous)
res_cls = cls
return res_cls
[docs] def initialize(self, env):
if not env.redispatching_unit_commitment_availble:
raise Grid2OpException(
"Impossible to use the RedispReward reward with an environment without generators "
"cost. Please make sure env.redispatching_unit_commitment_availble is available."
)
cls_ = type(self)
worst_marginal_cost = np.max(env.gen_cost_per_MW)
worst_load = env.gen_pmax.sum(dtype=dt_float)
# it's not the worst, but definitely an upper bound
worst_losses = dt_float(cls_._worst_losses_ratio) * worst_load
worst_redisp = cls_._alpha_redisp * env.gen_pmax.sum() # not realistic, but an upper bound
self.max_regret = (worst_losses + worst_redisp) * worst_marginal_cost * env.delta_time_seconds / 3600.0
self.reward_min = dt_float(cls_._min_reward)
least_loads = dt_float(
worst_load * cls_._min_load_ratio
) # half the capacity of the grid
least_losses = dt_float(
cls_._least_losses_ratio * least_loads * env.delta_time_seconds / 3600.0
) # 1.5% of losses
least_redisp = dt_float(0.0) # lower_bound is 0
base_marginal_cost = np.min(env.gen_cost_per_MW[env.gen_cost_per_MW > 0.0])
min_regret = (least_losses + least_redisp) * base_marginal_cost
self.reward_max = dt_float((self.max_regret - min_regret) / least_loads)
self.reward_illegal_ambiguous = cls_._reward_illegal_ambiguous
[docs] def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
res = None
if is_done:
# if the episode is over and it's my fault (i did a blackout) i strongly
if has_error or is_illegal or is_ambiguous:
res = self.reward_min
elif is_illegal or is_ambiguous:
res = self._reward_illegal_ambiguous
if res is None:
# compute the losses
gen_p, *_ = env.backend.generators_info()
load_p, *_ = env.backend.loads_info()
# don't forget to convert MW to MWh !
losses = (gen_p.sum() - load_p.sum()) * env.delta_time_seconds / 3600.0
# compute the marginal cost
gen_activeprod_t = env._gen_activeprod_t
marginal_cost = np.max(env.gen_cost_per_MW[gen_activeprod_t > 0.0])
# redispatching amount
actual_dispatch = env._actual_dispatch
redisp_cost = (
self._alpha_redisp * np.abs(actual_dispatch).sum() * marginal_cost * env.delta_time_seconds / 3600.0
)
# cost of losses
losses_cost = losses * marginal_cost
# cost of storage
c_storage = np.abs(env._storage_power).sum() * marginal_cost * env.delta_time_seconds / 3600.0
# total "regret"
regret = losses_cost + redisp_cost + c_storage
# compute reward
reward = self.max_regret - regret
# divide it by load, to be less sensitive to load variation
res = dt_float(reward / load_p.sum())
return res