Source code for grid2op.Opponent.OpponentSpace

# Copyright (c) 2019-2020, RTE (
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import numpy as np

from grid2op.Exceptions import OpponentError

[docs]class OpponentSpace(object): """ Is similar to the action space, but for the opponent. This class is used to express some "constraints" on the opponent attack. The opponent is free to attack whatever it wants, for how long it wants and when it wants. This class ensures that the opponent does not break any rules. Attributes ---------- action_space: :class:`grid2op.Action.ActionSpace` The action space defining which action the Opponent are allowed to take init_budget: ``float`` The initial budget of the opponent compute_budget: :class:`grid2op.Opponent.ActionBudget` The tool used to compute the budget opponent: :class:`grid2op.Opponent.BaseOpponent` The agent that will take malicious actions. previous_fails: ``bool`` Whether the last attack of the opponent failed or not budget_per_timestep: ``float`` The increase of the opponent budget per time step (if any) """ def __init__(self, compute_budget, init_budget, opponent, attack_duration, # maximum duration of an attack attack_cooldown, # minimum duration between two consecutive attack budget_per_timestep=0., action_space=None): if action_space is not None: if not isinstance(action_space, compute_budget.action_space): raise OpponentError("BaseAction space provided to build the agent is not a subclass from the" "action space to compute the cost of each action.") self.action_space = action_space else: self.action_space = compute_budget.action_space self.init_budget = init_budget self.budget = init_budget self.compute_budget = compute_budget self.opponent = opponent self._do_nothing = self.action_space() self.previous_fails = False self.budget_per_timestep = budget_per_timestep self.attack_max_duration = attack_duration self.attack_cooldown = attack_cooldown self.current_attack_duration = 0 self.current_attack_cooldown = attack_cooldown self.last_attack = None if init_budget < 0.: raise OpponentError("An opponent should at least have a positive (or null) budget. If you " "want to deactivate the opponent set its budget to 0 and use the" "DontAct class as the \"opponent_class\"") # TODO do i add it back # if not isinstance(opponent_reward_class, BaseReward): # raise OpponentError("Impossible to build an opponent reward with a reward of type {}".format(opponent_reward_class)) # self.opp_reward_helper = RewardHelper(opponent_reward_class)
[docs] def init_opponent(self, partial_env, **kwargs): """ Generic function used to initialize the opponent. For example, if an opponent reads from a file, the path where is the file is located should be pass with this method. """ self.opponent.init(partial_env=partial_env, **kwargs)
[docs] def reset(self): """ Reset the state of the Opponent to its original state, in particular re assign the proper budget to it. """ self.budget = self.init_budget self.previous_fails = False self.current_attack_duration = 0 self.current_attack_cooldown = self.attack_cooldown self.last_attack = None self.opponent.reset(self.budget) self.previous_fails = False
def _get_state(self): # used for simulate state_me = self.budget, self.previous_fails, self.current_attack_duration, \ self.current_attack_cooldown, self.last_attack state_opp = self.opponent.get_state() return state_me, state_opp def _set_state(self, my_state, opp_state=None): # used for simulate (and for deep copy) if opp_state is not None: self.opponent.set_state(opp_state) budget, previous_fails, current_attack_duration, current_attack_cooldown, last_attack = my_state self.budget = budget self.previous_fails = previous_fails self.current_attack_duration = current_attack_duration self.current_attack_cooldown = current_attack_cooldown self.last_attack = last_attack
[docs] def has_failed(self): """ This signal is sent by the environment and indicated the opponent attack could not be implmented on the powergrid, most likely due to the attack to be ambiguous. """ self.previous_fails = True
[docs] def attack(self, observation, agent_action, env_action): """ This function calls the attack from the opponent. It check whether the budget is consistent with the attack (budget should be more that the cosst associated with the attack). If the attack cost too much, then it is replaced by a "do nothing" action. Otherwise, the attack will be implemented by the environment. Note that if the attack is "ambiguous" it will fails (the environment will replace it by a "do nothing" action), but the budget will still be consumed. **NB** it is expected that this function update the :attr:`OpponentSpace.last_attack` attribute with ``None`` if the opponent choose not to attack, or with the attack of the opponent otherwise. Parameters ---------- observation: :class:`grid2op.Observation.Observation` The last observation (at time t) agent_action: :class:`grid2op.Action.Action` The action that the agent took env_action: :class:`grid2op.Action.Action` The modification that the environment will take. Returns ------- res: :class:`grid2op.Action.Action` : The attack the opponent wants to perform (or "do nothing" if the attack was too costly) or class:`NoneType` : Returns None if no action is taken """ if observation is None: # this is the first time step, which is not a "real" one # just here to load the data properly, so opponent do not attack there return None, 0 # Update variables self.budget += self.budget_per_timestep self.current_attack_duration = max(0, self.current_attack_duration - 1) self.current_attack_cooldown = max(0, self.current_attack_cooldown - 1) attack_called = False if self.current_attack_duration > 0: # previous attack is not over attack = self.last_attack elif self.current_attack_cooldown > self.attack_cooldown: # minimum time between two consecutive attack not met attack = None # If the opponent can attack else: attack_called = True attack, duration = self.opponent.attack(observation, agent_action, env_action, self.budget, self.previous_fails) if duration is None: if np.isfinite(self.attack_max_duration): duration = self.attack_max_duration else: duration = 1 self.previous_fails = False if duration > self.attack_max_duration: # duration chosen by the opponent would exceed the maximum duration allowed attack = None self.previous_fails = True # If the cost is too high final_budget = self.budget # TODO add the: + self.budget_per_timestep * (self.attack_duration - 1) # i did not do it in case an attack is ok at the beginning, ok at the end, but at some point in the attack # process it is not (but i'm not sure this can happen, and don't have time to think about it right now) if duration * self.compute_budget(attack) > final_budget: attack = None self.previous_fails = True # If we can afford the attack if attack is not None: # even if it's "do nothing", it's sill an attack. Too bad if the opponent chose to do nothing. self.current_attack_duration = duration self.current_attack_cooldown += self.attack_cooldown if not attack_called: self.opponent.tell_attack_continues(observation, agent_action, env_action, self.budget) self.previous_fails = False self.budget -= self.compute_budget(attack) self.last_attack = attack attack_duration = self.current_attack_duration if attack is None: attack_duration = 0 return attack, attack_duration
[docs] def close(self): """if this has a reference to a backend, you need to close it for grid2op to work properly. Do not forget to do it.""" pass