Source code for grid2op.Opponent.baseOpponent

# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import warnings

import numpy as np

from grid2op.Space import RandomObject
from grid2op.Exceptions import OpponentError

[docs]class BaseOpponent(RandomObject):
    def __init__(self, action_space):
        RandomObject.__init__(self)
        self.action_space = action_space
        self._do_nothing = self.action_space()

[docs]    def init(self, partial_env, **kwargs):
        """
        Generic function used to initialize the derived classes. For example, if an opponent reads from a file, the
        path where is the file is located should be pass with this method.
        """
        pass

[docs]    def reset(self, initial_budget):
        """
        This function is called at the end of an episode, when the episode is over. It aims at resetting the
        self and prepare it for a new episode.

        Parameters
        ----------
        initial_budget: ``float``
            The initial budget the opponent has
        """
        pass

[docs]    def attack(self, observation, agent_action, env_action, budget, previous_fails):
        """
        This method is the equivalent of "act" for a regular agent.

        Opponent, in this framework can have more information than a regular agent (in particular it can
        view time step t+1), it has access to its current budget etc.

        Parameters
        ----------
        observation: :class:`grid2op.Observation.Observation`
            The last observation (at time t)

        opp_reward: ``float``
            THe opponent "reward" (equivalent to the agent reward, but for the opponent) TODO do i add it back ???

        done: ``bool``
            Whether the game ended or not TODO do i add it back ???

        agent_action: :class:`grid2op.Action.Action`
            The action that the agent took

        env_action: :class:`grid2op.Action.Action`
            The modification that the environment will take.

        budget: ``float``
            The current remaining budget (if an action is above this budget, it will be replaced by a do nothing.

        previous_fails: ``bool``
            Wheter the previous attack failed (due to budget or ambiguous action)

        Returns
        -------
        attack: :class:`grid2op.Action.Action`
            The attack performed by the opponent. In this case, a do nothing, all the time.

        duration: ``int``
            The duration of the attack

        """
        # TODO maybe have a class "GymOpponent" where the observation would include the budget  and all other
        # TODO information, and forward something to the "act" method.
        return None, None

[docs]    def tell_attack_continues(self, observation, agent_action, env_action, budget):
        """
        The purpose of this method is to tell the agent that his attack is being continued
        and to indicate the current state of the grid.

        At every time step, either "attack" or "tell_acttack_continues" is called exactly once.

        Parameters
        ----------
        observation: :class:`grid2op.Observation.Observation`
            The last observation (at time t)

        agent_action: :class:`grid2op.Action.Action`
            The action that the agent took

        env_action: :class:`grid2op.Action.Action`
            The modification that the environment will take.

        budget: ``float``
            The current remaining budget (if an action is above this budget, it will be replaced by a do nothing.
        """
        pass

[docs]    def get_state(self):
        """
        This function should return the internal state of the Opponent.

        This means that after a call to `opponent.set_state(opponent.get_state())` the opponent should do the exact
        same things than without these calls.

        Returns
        -------

        """
        return None

[docs]    def set_state(self, my_state):
        """
        This function is used to set the internal state of the Opponent.

        Parameters
        ----------
        my_state

        """
        pass

    def _custom_deepcopy_for_copy(self, new_obj, dict_=None):
        super()._custom_deepcopy_for_copy(new_obj)
        if dict_ is None:
            dict_ = {}
        new_obj.action_space = self.action_space  # const
        new_obj._do_nothing = new_obj.action_space()
        new_obj.set_state(self.get_state())

    def _set_line_id(
        self,
        lines_attacked
        ):
        if len(lines_attacked) == 0:
            warnings.warn(
                "The opponent is deactivated as there is no information as to which line to attack. "
                'You can set the argument "kwargs_opponent" to the list of the line names you want '
                ' the opponent to attack in the "make" function.'
            )

        # Store attackable lines IDs
        self._lines_ids = []
        for l_name in lines_attacked:
            l_id = (self.action_space.name_line == l_name).nonzero()
            if len(l_id) and len(l_id[0]):
                self._lines_ids.append(l_id[0][0])
            else:
                raise OpponentError(
                    'Unable to find the powerline named "{}" on the grid. For '
                    "information, powerlines on the grid are : {}"
                    "".format(l_name, sorted(self.action_space.name_line))
                )
                
    def _prebuild_action(self):
        self._do_nothing = self.action_space({})
        self._attacks = []
        for l_id in self._lines_ids:
            a = self.action_space({"set_line_status": [(l_id, -1)]})
            self._attacks.append(a)
        self._attacks = np.array(self._attacks)