handle the link case

AntaresSimulatorTeam · Feb 6, 2025 · debaeca · debaeca
1 parent 3fae17f
commit debaeca
Show file tree

Hide file tree

Showing 8 changed files with 34 additions and 58 deletions.
diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py
@@ -15,15 +15,15 @@
 
 import pandas as pd
 
-from antarest.core.model import SUB_JSON
+from antarest.core.model import JSON
 from antarest.matrixstore.service import ISimpleMatrixService
 
 
 class UriResolverService:
     def __init__(self, matrix_service: ISimpleMatrixService):
         self.matrix_service = matrix_service
 
-    def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON:
+    def resolve(self, uri: str, formatted: bool = True) -> JSON | str | None:
         res = UriResolverService._extract_uri_components(uri)
         if res:
             protocol, uuid = res
@@ -49,7 +49,7 @@ def extract_id(uri: str) -> Optional[str]:
         res = UriResolverService._extract_uri_components(uri)
         return res[1] if res else None
 
-    def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON:
+    def _resolve_matrix(self, id: str, formatted: bool = True) -> JSON | str:
         data = self.matrix_service.get(id)
         if not data:
             raise ValueError(f"id matrix {id} not found")

diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py
@@ -51,7 +51,8 @@ def _get_real_file_path(
         if self.config.archive_path:
             path, tmp_dir = self._extract_file_to_tmp_dir(self.config.archive_path)
         else:
-            path = self.config.path
+            link_path = self.get_link_path()
+            path = link_path if link_path.exists() else self.config.path
         return path, tmp_dir
 
     def file_exists(self) -> bool:
@@ -82,17 +83,12 @@ def _get(
         if get_node:
             return self
 
-        if self.get_link_path().exists():
-            link = self.get_link_path().read_text()
-            if expanded:
-                return link
-            else:
-                return t.cast(G, self.context.resolver.resolve(link, formatted))
-
         if expanded:
+            if self.get_link_path().exists():
+                return self.get_link_path().read_text()
             return self.get_lazy_content()
-        else:
-            return self.load(url, depth, expanded, formatted)
+
+        return self.load(url, depth, expanded, formatted)
 
     @override
     def get(

diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py
@@ -55,6 +55,7 @@ def __init__(
             self.default_empty = np.copy(default_empty)
             self.default_empty.flags.writeable = True
 
+    @override
     def parse_as_dataframe(self, file_path: t.Optional[Path] = None) -> pd.DataFrame:
         file_path = file_path or self.config.path
         try:
@@ -82,6 +83,8 @@ def parse_as_dataframe(self, file_path: t.Optional[Path] = None) -> pd.DataFrame
                     raise ChildNotFoundError(f"File '{relpath}' not found in the study '{study_id}'") from e
             stopwatch.log_elapsed(lambda x: logger.info(f"Matrix parsed in {x}s"))
             final_matrix = matrix.dropna(how="any", axis=1)
+            if final_matrix.empty:
+                raise EmptyDataError
             return final_matrix
         except EmptyDataError:
             logger.warning(f"Empty file found when parsing {file_path}")

diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py
@@ -14,7 +14,7 @@
 from abc import ABC, abstractmethod
 from enum import StrEnum
 from pathlib import Path
-from typing import List, Optional, Union, cast
+from typing import List, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -92,13 +92,11 @@ def normalize(self) -> None:
         if self.get_link_path().exists() or self.config.archive_path:
             return
 
-        matrix = self.parse_as_json()
-
-        if "data" in matrix:
-            data = cast(List[List[float]], matrix["data"])
-            uuid = self.context.matrix.create(data)
-            self.get_link_path().write_text(self.context.resolver.build_matrix_uri(uuid))
-            self.config.path.unlink()
+        matrix = self.parse_as_dataframe()
+        data = matrix.to_numpy().tolist()
+        uuid = self.context.matrix.create(data)
+        self.get_link_path().write_text(self.context.resolver.build_matrix_uri(uuid))
+        self.config.path.unlink()
 
     @override
     def denormalize(self) -> None:
@@ -128,27 +126,16 @@ def load(
         expanded: bool = False,
         formatted: bool = True,
     ) -> Union[bytes, JSON]:
-        file_path, tmp_dir = self._get_real_file_path()
+        file_path, _ = self._get_real_file_path()
 
         if formatted:
             return self.parse_as_json(file_path)
 
-        if not file_path.exists():
-            logger.warning(f"Missing file {self.config.path}")
-            if tmp_dir:
-                tmp_dir.cleanup()
-            return b""
-
-        file_content = file_path.read_bytes()
-        if file_content != b"":
-            return file_content
-
-        # If the content is empty, we should return the default matrix to do the same as `parse_as_json()`
-        default_matrix = self.get_default_empty_matrix()
-        if default_matrix is None:
+        df = self.parse_as_dataframe(file_path)
+        if df.empty:
             return b""
         buffer = io.BytesIO()
-        np.savetxt(buffer, default_matrix, delimiter="\t")
+        np.savetxt(buffer, df, delimiter="\t")
         return buffer.getvalue()
 
     @abstractmethod
@@ -158,6 +145,13 @@ def parse_as_json(self, file_path: Optional[Path] = None) -> JSON:
         """
         raise NotImplementedError()
 
+    @abstractmethod
+    def parse_as_dataframe(self, file_path: Optional[Path] = None) -> pd.DataFrame:
+        """
+        Parse the matrix content and return it as a DataFrame object
+        """
+        raise NotImplementedError()
+
     @abstractmethod
     def get_default_empty_matrix(self) -> Optional[npt.NDArray[np.float64]]:
         """

diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py
@@ -232,7 +232,7 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern
             written_data = res.json()["data"]
             if not content.decode("utf-8"):
                 # The `GET` returns the default matrix when it's empty
-                expected = 8760 * [[0]] if study_type == "raw" else [[]]
+                expected = 8760 * [[0]]
             else:
                 df = pd.read_csv(io.BytesIO(content), delimiter=delimiter, header=None).replace(",", ".", regex=True)
                 df = df.dropna(axis=1, how="all")  # We want to remove columns full of NaN at the import

diff --git a/tests/integration/study_data_blueprint/test_generate_thermal_cluster_timeseries.py b/tests/integration/study_data_blueprint/test_generate_thermal_cluster_timeseries.py
@@ -97,7 +97,7 @@ def test_lifecycle_nominal(self, client: TestClient, user_access_token: str) ->
         )
         assert res.status_code == 200
         data = res.json()["data"]
-        assert data == [[]]  # no generation c.f. gen-ts parameter
+        assert data == 8760 * [[0]]  # no generation c.f. gen-ts parameter -> empty file -> default simulator value
 
     @pytest.mark.parametrize("study_type", ["raw", "variant"])
     def test_errors_and_limit_cases(self, client: TestClient, user_access_token: str, study_type: str) -> None:

diff --git a/tests/storage/repository/filesystem/matrix/test_matrix_node.py b/tests/storage/repository/filesystem/matrix/test_matrix_node.py
@@ -43,6 +43,9 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig) -> None:
     def parse_as_json(self, file_path: Optional[Path] = None) -> JSON:
         return MOCK_MATRIX_JSON
 
+    def parse_as_dataframe(self, file_path: Optional[Path] = None) -> pd.DataFrame:
+        return pd.DataFrame(MOCK_MATRIX_DTO)
+
     def get_default_empty_matrix(self) -> Optional[npt.NDArray[np.float64]]:
         pass
 

diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py
@@ -62,26 +62,6 @@ def test_get_no_expanded_txt(tmp_path: Path):
     assert "Mock Matrix Content" == node.get(expanded=False)
 
 
-def test_get_no_expanded_link(tmp_path: Path):
-    uri = "matrix://my-link"
-
-    file = tmp_path / "my-study/lazy.txt"
-    file.parent.mkdir()
-    (file.parent / "lazy.txt.link").write_text(uri)
-
-    config = FileStudyTreeConfig(study_path=file, path=file, version=-1, study_id="my-study")
-
-    resolver = Mock()
-    resolver.resolve.return_value = "Mock Matrix Content"
-
-    node = MockLazyNode(
-        context=ContextServer(matrix=Mock(), resolver=resolver),
-        config=config,
-    )
-    assert "Mock Matrix Content" == node.get(expanded=False)
-    resolver.resolve.assert_called_once_with(uri, True)
-
-
 def test_get_expanded_txt(tmp_path: Path):
     file = tmp_path / "my-study/lazy.txt"
     file.parent.mkdir()