# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
from grid2op.Reward.baseReward import BaseReward
from grid2op.dtypes import dt_float
[docs]class CombinedReward(BaseReward):
"""
This class allows to combine multiple pre defined reward. The reward it computes will
be the sum of all the sub rewards it is made of.
Each sub reward is identified by a key.
It is used a bit differently that the other rewards. See the section example for more information.
Examples
--------
.. code-block:: python
import grid2op
from grid2op.Reward import GameplayReward, FlatReward, CombinedReward
env = grid2op.make(..., reward_class=CombinedReward)
cr = self.env.get_reward_instance()
cr.addReward("Gameplay", GameplayReward(), 1.0)
cr.addReward("Flat", FlatReward(), 1.0)
cr.initialize(self.env)
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# reward here is computed by summing the results of what would have
# given `GameplayReward` and the one from `FlatReward`
"""
[docs] def __init__(self, logger=None):
BaseReward.__init__(self, logger=logger)
self.reward_min = dt_float(0.0)
self.reward_max = dt_float(0.0)
self.rewards = {}
def addReward(self, reward_name, reward_instance, reward_weight=1.0):
self.rewards[reward_name] = {
"instance": reward_instance,
"weight": dt_float(reward_weight),
}
return True
def removeReward(self, reward_name):
if reward_name in self.rewards:
self.rewards.pop(reward_name)
return True
return False
def updateRewardWeight(self, reward_name, reward_weight):
if reward_name in self.rewards:
self.rewards[reward_name]["weight"] = reward_weight
return True
return False
[docs] def __iter__(self):
for k, v in super().__iter__():
yield (k, v)
for k, v in self.rewards.items():
r_dict = dict(v["instance"])
r_dict["weight"] = float(v["weight"])
yield (k, r_dict)
[docs] def initialize(self, env):
self.reward_min = dt_float(0.0)
self.reward_max = dt_float(0.0)
for key, reward in self.rewards.items():
reward_w = reward["weight"]
reward_instance = reward["instance"]
reward_instance.initialize(env)
self.reward_max += dt_float(reward_instance.reward_max * reward_w)
self.reward_min += dt_float(reward_instance.reward_min * reward_w)
env.reward_range = self.get_range()
[docs] def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
res = dt_float(0.0)
# Loop over registered rewards
for key, reward in self.rewards.items():
r_instance = reward["instance"]
# Call individual reward
r = r_instance(action, env, has_error, is_done, is_illegal, is_ambiguous)
# Sum by weighted result
w = dt_float(reward["weight"])
res += dt_float(r) * w
# Return total sum
return res
[docs] def close(self):
for key, reward in self.rewards.items():
reward["instance"].close()