# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
import warnings
import numpy as np
import copy
from grid2op.Opponent.baseOpponent import BaseOpponent
from grid2op.Exceptions import OpponentError
[docs]class WeightedRandomOpponent(BaseOpponent):
"""
This opponent will disconnect lines randomly among the attackable lines `lines_attacked`.
The sampling is weighted by the lines current usage rate divided by some factor `rho_normalization`
(see init for more details).
When an attack becomes possible, the time of the attack will be sampled uniformly
in the next `attack_period` steps (see init).
"""
def __init__(self, action_space):
BaseOpponent.__init__(self, action_space)
self._do_nothing = None
self._attacks = None
self._lines_ids = None
self._next_attack_time = None
self._attack_period = None
self._rho_normalization = None
# this is the constructor:
# it should have the exact same signature as here
[docs] def init(
self,
partial_env,
lines_attacked=[],
rho_normalization=[],
attack_period=12 * 24,
**kwargs,
):
"""
Generic function used to initialize the derived classes. For example, if an opponent reads from a file, the
path where is the file is located should be pass with this method.
Parameters
----------
lines_attacked: ``list``
The list of lines that the WeightedRandomOpponent should be able to disconnect
rho_normalization: ``list``
The list of mean usage rates for the attackable lines. Should have
the same length as lines_attacked. If no value is given, no normalization will be performed.
The weights for sampling the attacked line are rho / rho_normalization.
attack_period: ``int``
The number of steps among which the attack may happen.
If attack_period=10, then whenever an attack can be made, it will happen in the 10
next steps.
"""
if len(lines_attacked) == 0:
warnings.warn(
f"The opponent is deactivated as there is no information as to which line to attack. "
f'You can set the argument "kwargs_opponent" to the list of the line names you want '
f' the opponent to attack in the "make" function.'
)
# Store attackable lines IDs
self._lines_ids = []
for l_name in lines_attacked:
l_id = (self.action_space.name_line == l_name).nonzero()
if len(l_id) and len(l_id[0]):
self._lines_ids.append(l_id[0][0])
else:
raise OpponentError(
'Unable to find the powerline named "{}" on the grid. For '
"information, powerlines on the grid are : {}"
"".format(l_name, sorted(self.action_space.name_line))
)
# Pre-build attacks actions
self._do_nothing = self.action_space({})
self._attacks = []
for l_id in self._lines_ids:
a = self.action_space({"set_line_status": [(l_id, -1)]})
self._attacks.append(a)
self._attacks = np.array(self._attacks)
# Usage rates normalization
self._rho_normalization = np.ones_like(lines_attacked)
if len(rho_normalization) == 0:
warnings.warn(
"The usage rate normalization is not specified. No normalization will be performed."
)
elif len(rho_normalization) != len(lines_attacked):
raise Warning(
f"The usage rate normalization must have the same length as the number "
f"of attacked lines. No normalization will be performed."
)
else:
self._rho_normalization = np.array(rho_normalization)
# Opponent's attack period
self._attack_period = attack_period
if self._attack_period <= 0:
raise OpponentError("Opponent attack cooldown need to be > 0")
[docs] def reset(self, initial_budget):
self._next_attack_time = None
[docs] def tell_attack_continues(self, observation, agent_action, env_action, budget):
self._next_attack_time = None
[docs] def attack(self, observation, agent_action, env_action, budget, previous_fails):
"""
This method is the equivalent of "attack" for a regular agent.
Opponent, in this framework can have more information than a regular agent (in particular it can
view time step t+1), it has access to its current budget etc.
Parameters
----------
observation: :class:`grid2op.Observation.Observation`
The last observation (at time t)
opp_reward: ``float``
THe opponent "reward" (equivalent to the agent reward, but for the opponent) TODO do i add it back ???
done: ``bool``
Whether the game ended or not TODO do i add it back ???
agent_action: :class:`grid2op.Action.Action`
The action that the agent took
env_action: :class:`grid2op.Action.Action`
The modification that the environment will take.
budget: ``float``
The current remaining budget (if an action is above this budget, it will be replaced by a do nothing.
previous_fails: ``bool``
Wheter the previous attack failed (due to budget or ambiguous action)
Returns
-------
attack: :class:`grid2op.Action.Action`
The attack performed by the opponent. In this case, a do nothing, all the time.
duration: ``int``
The duration of the attack
"""
# TODO maybe have a class "GymOpponent" where the observation would include the budget and all other
# TODO information, and forward something to the "act" method.
# During creation of the environment, do not attack
if observation is None:
return None, 0
# Decide the time of the next attack
if self._next_attack_time is None:
self._next_attack_time = 1 + self.space_prng.randint(self._attack_period)
self._next_attack_time -= 1
# If the attack time has not come yet, do not attack
if self._next_attack_time > 0:
return None, 0
# If all attackable lines are disconnected, do not attack
status = observation.line_status[self._lines_ids]
if not status.sum():
return None, 0
available_attacks = self._attacks[status]
rho = observation.rho[self._lines_ids][status] / self._rho_normalization[status]
rho_sum = rho.sum()
if rho_sum <= 0.0:
# this case can happen if a powerline has a flow of 0.0 but is connected, and it's the only one
# that can be attacked... Pretty rare hey !
return None, 0
attack = self.space_prng.choice(available_attacks, p=rho / rho_sum)
return attack, None
def _custom_deepcopy_for_copy(self, new_obj, dict_=None):
super()._custom_deepcopy_for_copy(new_obj, dict_)
if dict_ is None:
dict_ = {}
new_obj._attacks = copy.deepcopy(self._attacks)
new_obj._lines_ids = copy.deepcopy(self._lines_ids)
new_obj._next_attack_time = copy.deepcopy(self._next_attack_time)
new_obj._attack_period = copy.deepcopy(self._attack_period)
new_obj._rho_normalization = copy.deepcopy(self._rho_normalization)