From debaecaa4db4f33c682c8859eea64f5d1983ed25 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Thu, 6 Feb 2025 18:40:55 +0100 Subject: [PATCH] handle the link case --- antarest/matrixstore/uri_resolver_service.py | 6 +-- .../rawstudy/model/filesystem/lazy_node.py | 16 +++----- .../filesystem/matrix/input_series_matrix.py | 3 ++ .../model/filesystem/matrix/matrix.py | 40 ++++++++----------- .../test_fetch_raw_data.py | 2 +- ...est_generate_thermal_cluster_timeseries.py | 2 +- .../filesystem/matrix/test_matrix_node.py | 3 ++ .../repository/filesystem/test_lazy_node.py | 20 ---------- 8 files changed, 34 insertions(+), 58 deletions(-) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 542df1a49b..ae0bc4f7cd 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -15,7 +15,7 @@ import pandas as pd -from antarest.core.model import SUB_JSON +from antarest.core.model import JSON from antarest.matrixstore.service import ISimpleMatrixService @@ -23,7 +23,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON: + def resolve(self, uri: str, formatted: bool = True) -> JSON | str | None: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -49,7 +49,7 @@ def extract_id(uri: str) -> Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON: + def _resolve_matrix(self, id: str, formatted: bool = True) -> JSON | str: data = self.matrix_service.get(id) if not data: raise ValueError(f"id matrix {id} not found") diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index 2a9204d71e..2387b6f605 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -51,7 +51,8 @@ def _get_real_file_path( if self.config.archive_path: path, tmp_dir = self._extract_file_to_tmp_dir(self.config.archive_path) else: - path = self.config.path + link_path = self.get_link_path() + path = link_path if link_path.exists() else self.config.path return path, tmp_dir def file_exists(self) -> bool: @@ -82,17 +83,12 @@ def _get( if get_node: return self - if self.get_link_path().exists(): - link = self.get_link_path().read_text() - if expanded: - return link - else: - return t.cast(G, self.context.resolver.resolve(link, formatted)) - if expanded: + if self.get_link_path().exists(): + return self.get_link_path().read_text() return self.get_lazy_content() - else: - return self.load(url, depth, expanded, formatted) + + return self.load(url, depth, expanded, formatted) @override def get( diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py index eeb217d72e..919cb83686 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py @@ -55,6 +55,7 @@ def __init__( self.default_empty = np.copy(default_empty) self.default_empty.flags.writeable = True + @override def parse_as_dataframe(self, file_path: t.Optional[Path] = None) -> pd.DataFrame: file_path = file_path or self.config.path try: @@ -82,6 +83,8 @@ def parse_as_dataframe(self, file_path: t.Optional[Path] = None) -> pd.DataFrame raise ChildNotFoundError(f"File '{relpath}' not found in the study '{study_id}'") from e stopwatch.log_elapsed(lambda x: logger.info(f"Matrix parsed in {x}s")) final_matrix = matrix.dropna(how="any", axis=1) + if final_matrix.empty: + raise EmptyDataError return final_matrix except EmptyDataError: logger.warning(f"Empty file found when parsing {file_path}") diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index 5d720340b2..c8e4b7f750 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -14,7 +14,7 @@ from abc import ABC, abstractmethod from enum import StrEnum from pathlib import Path -from typing import List, Optional, Union, cast +from typing import List, Optional, Union import numpy as np import pandas as pd @@ -92,13 +92,11 @@ def normalize(self) -> None: if self.get_link_path().exists() or self.config.archive_path: return - matrix = self.parse_as_json() - - if "data" in matrix: - data = cast(List[List[float]], matrix["data"]) - uuid = self.context.matrix.create(data) - self.get_link_path().write_text(self.context.resolver.build_matrix_uri(uuid)) - self.config.path.unlink() + matrix = self.parse_as_dataframe() + data = matrix.to_numpy().tolist() + uuid = self.context.matrix.create(data) + self.get_link_path().write_text(self.context.resolver.build_matrix_uri(uuid)) + self.config.path.unlink() @override def denormalize(self) -> None: @@ -128,27 +126,16 @@ def load( expanded: bool = False, formatted: bool = True, ) -> Union[bytes, JSON]: - file_path, tmp_dir = self._get_real_file_path() + file_path, _ = self._get_real_file_path() if formatted: return self.parse_as_json(file_path) - if not file_path.exists(): - logger.warning(f"Missing file {self.config.path}") - if tmp_dir: - tmp_dir.cleanup() - return b"" - - file_content = file_path.read_bytes() - if file_content != b"": - return file_content - - # If the content is empty, we should return the default matrix to do the same as `parse_as_json()` - default_matrix = self.get_default_empty_matrix() - if default_matrix is None: + df = self.parse_as_dataframe(file_path) + if df.empty: return b"" buffer = io.BytesIO() - np.savetxt(buffer, default_matrix, delimiter="\t") + np.savetxt(buffer, df, delimiter="\t") return buffer.getvalue() @abstractmethod @@ -158,6 +145,13 @@ def parse_as_json(self, file_path: Optional[Path] = None) -> JSON: """ raise NotImplementedError() + @abstractmethod + def parse_as_dataframe(self, file_path: Optional[Path] = None) -> pd.DataFrame: + """ + Parse the matrix content and return it as a DataFrame object + """ + raise NotImplementedError() + @abstractmethod def get_default_empty_matrix(self) -> Optional[npt.NDArray[np.float64]]: """ diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index 878fd0896b..b0fb30c5c8 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -232,7 +232,7 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern written_data = res.json()["data"] if not content.decode("utf-8"): # The `GET` returns the default matrix when it's empty - expected = 8760 * [[0]] if study_type == "raw" else [[]] + expected = 8760 * [[0]] else: df = pd.read_csv(io.BytesIO(content), delimiter=delimiter, header=None).replace(",", ".", regex=True) df = df.dropna(axis=1, how="all") # We want to remove columns full of NaN at the import diff --git a/tests/integration/study_data_blueprint/test_generate_thermal_cluster_timeseries.py b/tests/integration/study_data_blueprint/test_generate_thermal_cluster_timeseries.py index c489a17ae5..c99b52de75 100644 --- a/tests/integration/study_data_blueprint/test_generate_thermal_cluster_timeseries.py +++ b/tests/integration/study_data_blueprint/test_generate_thermal_cluster_timeseries.py @@ -97,7 +97,7 @@ def test_lifecycle_nominal(self, client: TestClient, user_access_token: str) -> ) assert res.status_code == 200 data = res.json()["data"] - assert data == [[]] # no generation c.f. gen-ts parameter + assert data == 8760 * [[0]] # no generation c.f. gen-ts parameter -> empty file -> default simulator value @pytest.mark.parametrize("study_type", ["raw", "variant"]) def test_errors_and_limit_cases(self, client: TestClient, user_access_token: str, study_type: str) -> None: diff --git a/tests/storage/repository/filesystem/matrix/test_matrix_node.py b/tests/storage/repository/filesystem/matrix/test_matrix_node.py index 7edaa94c6e..aae0ee031a 100644 --- a/tests/storage/repository/filesystem/matrix/test_matrix_node.py +++ b/tests/storage/repository/filesystem/matrix/test_matrix_node.py @@ -43,6 +43,9 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig) -> None: def parse_as_json(self, file_path: Optional[Path] = None) -> JSON: return MOCK_MATRIX_JSON + def parse_as_dataframe(self, file_path: Optional[Path] = None) -> pd.DataFrame: + return pd.DataFrame(MOCK_MATRIX_DTO) + def get_default_empty_matrix(self) -> Optional[npt.NDArray[np.float64]]: pass diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py index 8564a80e1b..d1f231d408 100644 --- a/tests/storage/repository/filesystem/test_lazy_node.py +++ b/tests/storage/repository/filesystem/test_lazy_node.py @@ -62,26 +62,6 @@ def test_get_no_expanded_txt(tmp_path: Path): assert "Mock Matrix Content" == node.get(expanded=False) -def test_get_no_expanded_link(tmp_path: Path): - uri = "matrix://my-link" - - file = tmp_path / "my-study/lazy.txt" - file.parent.mkdir() - (file.parent / "lazy.txt.link").write_text(uri) - - config = FileStudyTreeConfig(study_path=file, path=file, version=-1, study_id="my-study") - - resolver = Mock() - resolver.resolve.return_value = "Mock Matrix Content" - - node = MockLazyNode( - context=ContextServer(matrix=Mock(), resolver=resolver), - config=config, - ) - assert "Mock Matrix Content" == node.get(expanded=False) - resolver.resolve.assert_called_once_with(uri, True) - - def test_get_expanded_txt(tmp_path: Path): file = tmp_path / "my-study/lazy.txt" file.parent.mkdir()