Skip to content

Commit

Permalink
handle the link case
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinBelthle committed Feb 6, 2025
1 parent 3fae17f commit debaeca
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 58 deletions.
6 changes: 3 additions & 3 deletions antarest/matrixstore/uri_resolver_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@

import pandas as pd

from antarest.core.model import SUB_JSON
from antarest.core.model import JSON
from antarest.matrixstore.service import ISimpleMatrixService


class UriResolverService:
def __init__(self, matrix_service: ISimpleMatrixService):
self.matrix_service = matrix_service

def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON:
def resolve(self, uri: str, formatted: bool = True) -> JSON | str | None:
res = UriResolverService._extract_uri_components(uri)
if res:
protocol, uuid = res
Expand All @@ -49,7 +49,7 @@ def extract_id(uri: str) -> Optional[str]:
res = UriResolverService._extract_uri_components(uri)
return res[1] if res else None

def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON:
def _resolve_matrix(self, id: str, formatted: bool = True) -> JSON | str:
data = self.matrix_service.get(id)
if not data:
raise ValueError(f"id matrix {id} not found")
Expand Down
16 changes: 6 additions & 10 deletions antarest/study/storage/rawstudy/model/filesystem/lazy_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def _get_real_file_path(
if self.config.archive_path:
path, tmp_dir = self._extract_file_to_tmp_dir(self.config.archive_path)
else:
path = self.config.path
link_path = self.get_link_path()
path = link_path if link_path.exists() else self.config.path
return path, tmp_dir

def file_exists(self) -> bool:
Expand Down Expand Up @@ -82,17 +83,12 @@ def _get(
if get_node:
return self

if self.get_link_path().exists():
link = self.get_link_path().read_text()
if expanded:
return link
else:
return t.cast(G, self.context.resolver.resolve(link, formatted))

if expanded:
if self.get_link_path().exists():
return self.get_link_path().read_text()
return self.get_lazy_content()
else:
return self.load(url, depth, expanded, formatted)

return self.load(url, depth, expanded, formatted)

@override
def get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(
self.default_empty = np.copy(default_empty)
self.default_empty.flags.writeable = True

@override
def parse_as_dataframe(self, file_path: t.Optional[Path] = None) -> pd.DataFrame:
file_path = file_path or self.config.path
try:
Expand Down Expand Up @@ -82,6 +83,8 @@ def parse_as_dataframe(self, file_path: t.Optional[Path] = None) -> pd.DataFrame
raise ChildNotFoundError(f"File '{relpath}' not found in the study '{study_id}'") from e
stopwatch.log_elapsed(lambda x: logger.info(f"Matrix parsed in {x}s"))
final_matrix = matrix.dropna(how="any", axis=1)
if final_matrix.empty:
raise EmptyDataError
return final_matrix
except EmptyDataError:
logger.warning(f"Empty file found when parsing {file_path}")
Expand Down
40 changes: 17 additions & 23 deletions antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from abc import ABC, abstractmethod
from enum import StrEnum
from pathlib import Path
from typing import List, Optional, Union, cast
from typing import List, Optional, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -92,13 +92,11 @@ def normalize(self) -> None:
if self.get_link_path().exists() or self.config.archive_path:
return

matrix = self.parse_as_json()

if "data" in matrix:
data = cast(List[List[float]], matrix["data"])
uuid = self.context.matrix.create(data)
self.get_link_path().write_text(self.context.resolver.build_matrix_uri(uuid))
self.config.path.unlink()
matrix = self.parse_as_dataframe()
data = matrix.to_numpy().tolist()
uuid = self.context.matrix.create(data)
self.get_link_path().write_text(self.context.resolver.build_matrix_uri(uuid))
self.config.path.unlink()

@override
def denormalize(self) -> None:
Expand Down Expand Up @@ -128,27 +126,16 @@ def load(
expanded: bool = False,
formatted: bool = True,
) -> Union[bytes, JSON]:
file_path, tmp_dir = self._get_real_file_path()
file_path, _ = self._get_real_file_path()

if formatted:
return self.parse_as_json(file_path)

if not file_path.exists():
logger.warning(f"Missing file {self.config.path}")
if tmp_dir:
tmp_dir.cleanup()
return b""

file_content = file_path.read_bytes()
if file_content != b"":
return file_content

# If the content is empty, we should return the default matrix to do the same as `parse_as_json()`
default_matrix = self.get_default_empty_matrix()
if default_matrix is None:
df = self.parse_as_dataframe(file_path)
if df.empty:
return b""
buffer = io.BytesIO()
np.savetxt(buffer, default_matrix, delimiter="\t")
np.savetxt(buffer, df, delimiter="\t")
return buffer.getvalue()

@abstractmethod
Expand All @@ -158,6 +145,13 @@ def parse_as_json(self, file_path: Optional[Path] = None) -> JSON:
"""
raise NotImplementedError()

@abstractmethod
def parse_as_dataframe(self, file_path: Optional[Path] = None) -> pd.DataFrame:
"""
Parse the matrix content and return it as a DataFrame object
"""
raise NotImplementedError()

@abstractmethod
def get_default_empty_matrix(self) -> Optional[npt.NDArray[np.float64]]:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def test_get_study_data(self, client: TestClient, user_access_token: str, intern
written_data = res.json()["data"]
if not content.decode("utf-8"):
# The `GET` returns the default matrix when it's empty
expected = 8760 * [[0]] if study_type == "raw" else [[]]
expected = 8760 * [[0]]
else:
df = pd.read_csv(io.BytesIO(content), delimiter=delimiter, header=None).replace(",", ".", regex=True)
df = df.dropna(axis=1, how="all") # We want to remove columns full of NaN at the import
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_lifecycle_nominal(self, client: TestClient, user_access_token: str) ->
)
assert res.status_code == 200
data = res.json()["data"]
assert data == [[]] # no generation c.f. gen-ts parameter
assert data == 8760 * [[0]] # no generation c.f. gen-ts parameter -> empty file -> default simulator value

@pytest.mark.parametrize("study_type", ["raw", "variant"])
def test_errors_and_limit_cases(self, client: TestClient, user_access_token: str, study_type: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig) -> None:
def parse_as_json(self, file_path: Optional[Path] = None) -> JSON:
return MOCK_MATRIX_JSON

def parse_as_dataframe(self, file_path: Optional[Path] = None) -> pd.DataFrame:
return pd.DataFrame(MOCK_MATRIX_DTO)

def get_default_empty_matrix(self) -> Optional[npt.NDArray[np.float64]]:
pass

Expand Down
20 changes: 0 additions & 20 deletions tests/storage/repository/filesystem/test_lazy_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,26 +62,6 @@ def test_get_no_expanded_txt(tmp_path: Path):
assert "Mock Matrix Content" == node.get(expanded=False)


def test_get_no_expanded_link(tmp_path: Path):
uri = "matrix://my-link"

file = tmp_path / "my-study/lazy.txt"
file.parent.mkdir()
(file.parent / "lazy.txt.link").write_text(uri)

config = FileStudyTreeConfig(study_path=file, path=file, version=-1, study_id="my-study")

resolver = Mock()
resolver.resolve.return_value = "Mock Matrix Content"

node = MockLazyNode(
context=ContextServer(matrix=Mock(), resolver=resolver),
config=config,
)
assert "Mock Matrix Content" == node.get(expanded=False)
resolver.resolve.assert_called_once_with(uri, True)


def test_get_expanded_txt(tmp_path: Path):
file = tmp_path / "my-study/lazy.txt"
file.parent.mkdir()
Expand Down

0 comments on commit debaeca

Please sign in to comment.