diff --git a/antarest/study/business/arrow_utils.py b/antarest/study/business/arrow_utils.py new file mode 100644 index 0000000000..8f23a7b232 --- /dev/null +++ b/antarest/study/business/arrow_utils.py @@ -0,0 +1,47 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. +import os +import tempfile +import typing as t +from io import BytesIO + +import pandas as pd +import pyarrow as pa +from pyarrow import feather +from pyarrow.feather import write_feather + + +def dataframe_to_bytes(df: pd.DataFrame, metadata: t.Optional[t.Dict[str | bytes, str | bytes]]) -> bytes: + table: pa.Table = pa.Table.from_pandas(df, preserve_index=False) + + if metadata: + metadata_bytes = {str(k): str(v) for k, v in metadata.items()} + schema_metadata: t.Dict[str | bytes, str | bytes] = {k: v for k, v in metadata_bytes.items()} + table = table.replace_schema_metadata(schema_metadata) + + buffer = BytesIO() + write_feather(df=table, dest=buffer) # type:ignore + + return buffer.getvalue() + + +def bytes_to_dataframe(buffer: bytes) -> pd.DataFrame: + data = BytesIO(buffer) + table = feather.read_table(data) + + df = table.to_pandas() + + metadata = table.schema.metadata + if metadata: + df.metadata = {k.decode("utf8"): v.decode("utf8") for k, v in metadata.items()} + + return df diff --git a/antarest/study/business/load_management.py b/antarest/study/business/load_management.py new file mode 100644 index 0000000000..fce3d33a23 --- /dev/null +++ b/antarest/study/business/load_management.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. + +import typing as t + +import pandas as pd + +from antarest.study.model import MatrixIndex, Study +from antarest.study.storage.rawstudy.model.filesystem.matrix.input_series_matrix import InputSeriesMatrix +from antarest.study.storage.storage_service import StudyStorageService +from antarest.study.storage.utils import get_start_date + +LOAD_PATH = "input/load/series/load_{area_id}" +matrix_columns = ["ts-0"] + + +class LoadManager: + def __init__(self, storage_service: StudyStorageService) -> None: + self.storage_service = storage_service + + def get_load_matrix(self, study: Study, area_id: str) -> t.Tuple[pd.DataFrame, t.Dict[str | bytes, str | bytes]]: + file_study = self.storage_service.get_storage(study).get_raw(study) + load_path = LOAD_PATH.format(area_id=area_id).split("/") + + node = file_study.tree.get_node(load_path) + + if not isinstance(node, InputSeriesMatrix): + raise TypeError(f"Expected node of type 'InputSeriesMatrix', but got '{type(node).__name__}'") + + matrix_data = InputSeriesMatrix.parse(node, return_dataframe=True) + + matrix_df = t.cast(pd.DataFrame, matrix_data) + matrix_df.columns = matrix_df.columns.map(str) + + matrix_df.columns = pd.Index(matrix_columns) + + matrix_index: MatrixIndex = get_start_date(file_study) + + metadata: t.Dict[str | bytes, str | bytes] = { + "start_date": str(matrix_index.start_date), + "steps": str(matrix_index.steps), + "first_week_size": str(matrix_index.first_week_size), + "level": str(matrix_index.level), + } + + return matrix_df, metadata diff --git a/antarest/study/model.py b/antarest/study/model.py index b8378aa356..15fffc8f1e 100644 --- a/antarest/study/model.py +++ b/antarest/study/model.py @@ -16,6 +16,7 @@ import typing as t import uuid from datetime import datetime, timedelta +from enum import StrEnum from pathlib import Path from antares.study.version import StudyVersion @@ -521,6 +522,11 @@ def suffix(self) -> str: return mapping[self] +class MatrixFormat(StrEnum): + JSON = "json" + ARROW = "arrow" + + class StudyDownloadDTO(AntaresBaseModel): """ DTO used to download outputs diff --git a/antarest/study/service.py b/antarest/study/service.py index 7e1198c6b7..3b148daec9 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -89,6 +89,7 @@ from antarest.study.business.district_manager import DistrictManager from antarest.study.business.general_management import GeneralManager from antarest.study.business.link_management import LinkManager +from antarest.study.business.load_management import LoadManager from antarest.study.business.matrix_management import MatrixManager, MatrixManagerError from antarest.study.business.model.link_model import LinkBaseDTO, LinkDTO from antarest.study.business.optimization_management import OptimizationManager @@ -394,6 +395,7 @@ def __init__( self.adequacy_patch_manager = AdequacyPatchManager(self.storage_service) self.advanced_parameters_manager = AdvancedParamsManager(self.storage_service) self.hydro_manager = HydroManager(self.storage_service) + self.load_manager = LoadManager(self.storage_service) self.allocation_manager = AllocationManager(self.storage_service) self.properties_manager = PropertiesManager(self.storage_service) self.renewable_manager = RenewableManager(self.storage_service) diff --git a/antarest/study/web/study_data_blueprint.py b/antarest/study/web/study_data_blueprint.py index 9f267df774..a166e1de56 100644 --- a/antarest/study/web/study_data_blueprint.py +++ b/antarest/study/web/study_data_blueprint.py @@ -17,7 +17,7 @@ import typing_extensions as te from fastapi import APIRouter, Body, Depends, Query -from starlette.responses import RedirectResponse +from starlette.responses import RedirectResponse, Response from antarest.core.config import Config from antarest.core.jwt import JWTUser @@ -53,6 +53,7 @@ ThermalClusterOutput, ThermalManager, ) +from antarest.study.business.arrow_utils import dataframe_to_bytes from antarest.study.business.binding_constraint_management import ( ConstraintCreation, ConstraintFilters, @@ -543,6 +544,32 @@ def update_inflow_structure( study = study_service.check_study_access(uuid, StudyPermissionType.WRITE, params) study_service.hydro_manager.update_inflow_structure(study, area_id, values) + @bp.get( + "/studies/{uuid}/{area_id}/load/series", + tags=[APITag.study_data], + summary="Get load series data", + ) + def get_load_series( + uuid: str, + area_id: str, + current_user: JWTUser = Depends(auth.get_current_user), + ) -> Response: + """Return the load matrix in ARROW format.""" + logger.info( + msg=f"Getting load series data for area {area_id} of study {uuid}", + extra={"user": current_user.id}, + ) + params = RequestParameters(user=current_user) + study = study_service.check_study_access(uuid, StudyPermissionType.READ, params) + + try: + df, metadata = study_service.load_manager.get_load_matrix(study, area_id) + except TypeError as e: + return Response(content=str(e), status_code=400) + + buffer = dataframe_to_bytes(df, metadata) + return Response(content=buffer, media_type="application/vnd.apache.arrow.file") + @bp.put( "/studies/{uuid}/matrix", tags=[APITag.study_data], diff --git a/requirements.txt b/requirements.txt index 60c782d945..5246430395 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,6 +38,8 @@ pandas~=2.2.3 paramiko~=3.4.1 plyer~=2.0.0 psycopg2-binary~=2.9.9 +pyarrow~=18.1.0 +pyarrow-stubs~=10.0.1.7 py7zr~=0.20.6 python-json-logger~=2.0.7 PyYAML~=5.3.1 diff --git a/tests/integration/study_data_blueprint/test_load.py b/tests/integration/study_data_blueprint/test_load.py new file mode 100644 index 0000000000..67e5f88fb2 --- /dev/null +++ b/tests/integration/study_data_blueprint/test_load.py @@ -0,0 +1,50 @@ +# Copyright (c) 2024, RTE (https://www.rte-france.com) +# +# See AUTHORS.txt +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# SPDX-License-Identifier: MPL-2.0 +# +# This file is part of the Antares project. +from io import BytesIO + +import pytest +from starlette.testclient import TestClient + +from antarest.study.business.arrow_utils import bytes_to_dataframe +from tests.integration.prepare_proxy import PreparerProxy + + +@pytest.mark.unit_test +class TestLoad: + @pytest.mark.parametrize("study_type", ["raw", "variant"]) + def test_load(self, client: TestClient, user_access_token: str, study_type: str) -> None: + client.headers = {"Authorization": f"Bearer {user_access_token}"} # type: ignore + + preparer = PreparerProxy(client, user_access_token) + study_id = preparer.create_study("foo", version=880) + + if study_type == "variant": + study_id = preparer.create_variant(study_id, name="Variant 1") + + area1_id = preparer.create_area(study_id, name="Area1")["id"] + + # Test simple get ARROW + + res = client.get(f"/v1/studies/{study_id}/{area1_id}/load/series") + assert res.status_code == 200 + assert res.headers["content-type"] == "application/vnd.apache.arrow.file" + + df = bytes_to_dataframe(res.content) + + column_name = ["ts-0"] + assert column_name == list(df.columns) + assert df.metadata == { + "first_week_size": "7", + "level": "hourly", + "start_date": "2018-01-01 00:00:00", + "steps": "8760", + }