Source code for grid2op.Chronics.handlers.csvHandler

# Copyright (c) 2019-2023, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import os
import pandas as pd
import numpy as np
import copy
from typing import Tuple

from grid2op.Exceptions import (
    ChronicsError, HandlerError
)

from grid2op.dtypes import dt_int, dt_float
from grid2op.Chronics.handlers.baseHandler import BaseHandler


[docs]class CSVHandler(BaseHandler): """Reads and produce time series if given by a csv file (possibly compressed). The separator used can be specified as input. The file name should match the "array_name": for example if the data you want to use for "load_p" in the environment are in the file "my_load_p_data.csv.bz2" should name this handler "my_load_p_data" and not "load_p" nor "my_load_p_data.csv" nor "my_load_p_data.csv.bz2" The csv should be structured as follow: - it should not have any "index" or anything, only data used by grid2op will be used - Each element (for example a load) is represented by a `column`. - It should have a header with the name of the elements it "handles" and this name should match the one in the environment. For example if "load_1_0" is the name of a load and you read data for "load_p" or "load_q" then one column of your csv should be named "load_1_0". - each time step is represented as a `row` and in order. For example (removing the header), row 1 (first row) will be step 1, row 2 will be step 2 etc. - only floating point numbers should be present in the data (no bool, string and integers will be casted to float) .. warning:: Use this class only for the ENVIRONMENT data ("load_p", "load_q", "prod_p" or "prod_v") and not for maintenance (in this case use :class:`CSVMaintenanceHandler`) nor for forecast (in this case use :class:`CSVForecastHandler`) This is the default way to provide data to grid2op and its used for most l2rpn environments. """ def __init__(self, array_name, # eg "load_p" sep=";", chunk_size=None, max_iter=-1) -> None: super().__init__(array_name, max_iter) self.path = None self._file_ext = None self.tmp_max_index = None # size maximum of the current tables in memory self.array = None # numpy array corresponding to the current active load values in the power _grid. self.current_index = -1 self.sep = sep self.names_chronics_to_backend = None # added to provide an easier access to read data in chunk self.chunk_size = chunk_size self._data_chunk = {} self._order_array = None # self._order_backend_arrays = None # self._nb_row_per_step = 1 def _clear(self): """reset to a state as if it was just created""" super()._clear() self.path = None self._file_ext = None self.tmp_max_index = None self.array = None self.current_index = - 1 self.names_chronics_to_backend = None self._data_chunk = {} self._order_array = None self._order_backend_arrays = None return self
[docs] def set_path(self, path): self._file_ext = self._get_fileext(path) self.path = os.path.join(path, f"{self.array_name}{self._file_ext}")
[docs] def initialize(self, order_backend_arrays, names_chronics_to_backend): self._order_backend_arrays = copy.deepcopy(order_backend_arrays) self.names_chronics_to_backend = copy.deepcopy(names_chronics_to_backend) # read the data array_iter = self._get_data() if not self.names_chronics_to_backend: self.names_chronics_to_backend = {} self.names_chronics_to_backend[self.array_name] = { k: k for k in self._order_backend_arrays } # put the proper name in order order_backend_arrays = {el: i for i, el in enumerate(order_backend_arrays)} if self.chunk_size is None: array = array_iter if array is not None: self.tmp_max_index = array.shape[0] else: raise HandlerError( 'No files are found in directory "{}". If you don\'t want to load any chronics,' ' use "DoNothingHandler" (`from grid2op.Chronics.handlers import DoNothingHandler`) ' 'and not "{}" to load chronics.' "".format(self.path, type(self)) ) else: self._data_chunk = { self.array_name: array_iter, } array = self._get_next_chunk() # get the chronics in order order_chronics = self._get_orders(array, order_backend_arrays) # now "sort" the columns of each chunk of data self._order_array = np.argsort(order_chronics) self._init_attrs(array) self.curr_iter = 0 if self.chunk_size is None: self.max_episode_duration = self.array.shape[0] - 1
[docs] def done(self): """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ Compare to :func:`GridValue.done` an episode can be over for 2 main reasons: - :attr:`GridValue.max_iter` has been reached - There are no data in the csv. The episode is done if one of the above condition is met. Returns ------- res: ``bool`` Whether the episode has reached its end or not. """ if self.max_iter > 0 and self.curr_iter > self.max_iter: return True if self.chunk_size is None and self.current_index >= self.array.shape[0]: return True return False
[docs] def load_next(self, dict_): self.current_index += 1 if not self._data_in_memory(): try: self._load_next_chunk_in_memory() except StopIteration as exc_: raise StopIteration from exc_ if self.current_index > self.tmp_max_index: raise StopIteration if self.max_iter > 0: if self.curr_iter >= self.max_iter: raise StopIteration return copy.deepcopy(self.array[self.current_index, :])
[docs] def get_max_iter(self): if self.max_iter != -1: return self.max_iter if self.max_episode_duration is not None: return self.max_episode_duration if self.chunk_size is None and self.array is not None: return self.array.shape[0] - 1 if self.array is None: return -1 import warnings warnings.warn("Unable to read the 'max_iter' when there is a chunk size set and no \"max_iter\"") return -1 # TODO
[docs] def check_validity(self, backend): # TODO return True
def _init_attrs( self, array ): self.array = None if array is not None: self.array = copy.deepcopy( array.values[:, self._order_array].astype(dt_float) ) def _get_fileext(self, path_tmp): # in csvhandler read_compressed = ".csv" if not os.path.exists(os.path.join(path_tmp, "{}.csv".format(self.array_name))): # try to read compressed data if os.path.exists(os.path.join(path_tmp, "{}.csv.bz2".format(self.array_name))): read_compressed = ".csv.bz2" elif os.path.exists(os.path.join(path_tmp, "{}.zip".format(self.array_name))): read_compressed = ".zip" elif os.path.exists( os.path.join(path_tmp, "{}.csv.gzip".format(self.array_name)) ): read_compressed = ".csv.gzip" elif os.path.exists(os.path.join(path_tmp, "{}.csv.xz".format(self.array_name))): read_compressed = ".csv.xz" else: read_compressed = None return read_compressed def _get_data(self, chunksize=-1, nrows=None): # in csvhandler if nrows is None: if self.max_iter > 0: nrows = self.max_iter + self._nb_row_per_step if self._file_ext is not None: if chunksize == -1: chunksize = self.chunk_size res = pd.read_csv( self.path, sep=self.sep, chunksize=chunksize, nrows=nrows, ) else: res = None return res def _get_orders( self, array, # eg load_p order_arrays, # eg order_backend_loads ): order_chronics_arrays = None if array is not None: self._assert_correct_second_stage( array.columns, self.names_chronics_to_backend ) order_chronics_arrays = np.array( [ order_arrays[self.names_chronics_to_backend[self.array_name][el]] for el in array.columns ] ).astype(dt_int) return order_chronics_arrays def _assert_correct_second_stage(self, pandas_name, dict_convert): for i, el in enumerate(pandas_name): if not el in dict_convert[self.array_name]: raise ChronicsError( "Element named {} is found in the data (column {}) but it is not found on the " 'powergrid for data of type "{}".\nData in files are: {}\n' "Converter data are: {}".format( el, i + 1, self.array_name, sorted(list(pandas_name)), sorted(list(dict_convert[self.array_name].keys())), ) )
[docs] def set_chunk_size(self, chunk_size): self.chunk_size = int(chunk_size)
def _get_next_chunk(self): res = None if self._data_chunk[self.array_name] is not None: res = next(self._data_chunk[self.array_name]) return res def _data_in_memory(self): if self.chunk_size is None: # if i don't use chunk, all the data are in memory alreay return True if self.current_index == 0: # data are loaded the first iteration return True if self.current_index % self.chunk_size != 0: # data are already in ram return True return False def _load_next_chunk_in_memory(self): # i load the next chunk as dataframes array = self._get_next_chunk() # array: load_p # i put these dataframes in the right order (columns) self._init_attrs(array) # i don't forget to reset the reading index to 0 self.current_index = 0 def _get_next_chunk(self): array = None if self._data_chunk[self.array_name] is not None: array = next(self._data_chunk[self.array_name]) self.tmp_max_index = array.shape[0] return array
[docs] def forecast(self, forecast_horizon_id : int, inj_dict_env : dict, inj_dict_previous_forecast : dict, # eg gen_p_handler if this is set to gen_p_for_handler: env_handler : "BaseHandler", # list of the 4 env handlers: (load_p_handler, load_q_handler, gen_p_handler, gen_v_handler) env_handlers : Tuple["BaseHandler", "BaseHandler", "BaseHandler", "BaseHandler"]): raise HandlerError(f"forecast {self.array_name}: You should only use this class for ENVIRONMENT data, and not for FORECAST data. " "Please consider using `CSVForecastHandler` (`from grid2op.Chronics.handlers import CSVForecastHandler`) " "for your forecast data.")
[docs] def get_available_horizons(self): raise HandlerError(f"get_available_horizons {self.array_name}: You should only use this class for ENVIRONMENT data, and not for FORECAST data. " "Please consider using `CSVForecastHandler` (`from grid2op.Chronics.handlers import CSVForecastHandler`) " "for your forecast data.")
[docs] def load_next_maintenance(self): raise HandlerError(f"load_next_maintenance {self.array_name}: You should only use this class for ENVIRONMENT data, and not for FORECAST data nor MAINTENANCE data. " "Please consider using `CSVMaintenanceHandler` (`from grid2op.Chronics.handlers import CSVMaintenanceHandler`) " "for your maintenance data.")
def load_next_hazard(self): raise HandlerError(f"load_next_hazard {self.array_name}: You should only use this class for ENVIRONMENT data, and not for FORECAST " "data nor MAINTENANCE nor HAZARDS data. (NB HAZARDS data are not yet supported) " "by handlers.")
[docs] def next_chronics(self): self.current_index = -1 self.curr_iter = 0 if self.chunk_size is not None: self._clear() # we should have to reload everything if all data have been already loaded
[docs] def get_future_data(self, horizon: int, quiet_warnings : bool=False): horizon = int(horizon) tmp_index = self.current_index + horizon // (self.time_interval.total_seconds() // 60) tmp_index = int(tmp_index) if tmp_index < self.array.shape[0]: res = self.array[tmp_index, :] else: if not quiet_warnings: import warnings warnings.warn(f"{type(self)} {self.array_name}: No more data to get, the last known data is returned.") res = self.array[-1, :] return copy.deepcopy(res)