Skip to content

Commit

Permalink
feat(raw-api): add an endpoint to retrieve files in their original fo…
Browse files Browse the repository at this point in the history
…rmat (#2244)

Co-authored-by: belthlemar <[email protected]>
  • Loading branch information
mabw-rte and MartinBelthle authored Dec 17, 2024
1 parent 88a7a53 commit ad70926
Show file tree
Hide file tree
Showing 17 changed files with 598 additions and 284 deletions.
5 changes: 5 additions & 0 deletions antarest/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,11 @@ def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.NOT_FOUND, message)


class PathIsAFolderError(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class WorkspaceNotFound(HTTPException):
"""
This will be raised when we try to load a workspace that does not exist
Expand Down
7 changes: 4 additions & 3 deletions antarest/core/swagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
#
# This file is part of the Antares project.

from typing import Any, List, Tuple
import typing as t

from fastapi import FastAPI
from fastapi.openapi.models import Example
from fastapi.routing import APIRoute

sim = "{sim} = simulation index <br/>"
Expand All @@ -21,7 +22,7 @@
attachment = "User-defined file attachment <br/>"

# noinspection SpellCheckingInspection
urls: List[Tuple[str, str]] = [
urls: t.List[t.Tuple[str, str]] = [
("layers/layers", ""),
("settings/generaldata", ""),
("output/{sim}/about-the-study/parameters", sim),
Expand All @@ -41,7 +42,7 @@
]


def get_path_examples() -> Any:
def get_path_examples() -> t.Dict[str, Example]:
return {url: {"value": url, "description": des} for url, des in urls}


Expand Down
25 changes: 20 additions & 5 deletions antarest/core/utils/archives.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def extract_file_to_tmp_dir(archive_path: Path, inside_archive_path: Path) -> t.
return path, tmp_dir


def read_file_from_archive(archive_path: Path, posix_path: str) -> str:
def read_original_file_in_archive(archive_path: Path, posix_path: str) -> bytes:
"""
Read a file from an archive.
Expand All @@ -154,21 +154,36 @@ def read_file_from_archive(archive_path: Path, posix_path: str) -> str:
posix_path: path to the file inside the archive.
Returns:
The content of the file as a string.
The content of the file as `bytes`.
"""

if archive_path.suffix == ArchiveFormat.ZIP:
with zipfile.ZipFile(archive_path) as zip_obj:
with zip_obj.open(posix_path) as f:
return f.read().decode("utf-8")
return f.read()
elif archive_path.suffix == ArchiveFormat.SEVEN_ZIP:
with py7zr.SevenZipFile(archive_path, mode="r") as szf:
file_text: str = szf.read([posix_path])[posix_path].read().decode("utf-8")
return file_text
output: bytes = szf.read([posix_path])[posix_path].read()
return output
else:
raise ValueError(f"Unsupported {archive_path.suffix} archive format for {archive_path}")


def read_file_from_archive(archive_path: Path, posix_path: str) -> str:
"""
Read a file from an archive.
Args:
archive_path: the path to the archive file.
posix_path: path to the file inside the archive.
Returns:
The content of the file as a string.
"""

return read_original_file_in_archive(archive_path, posix_path).decode("utf-8")


def extract_lines_from_archive(root: Path, posix_path: str) -> t.List[str]:
"""
Extract text lines from various types of files.
Expand Down
5 changes: 1 addition & 4 deletions antarest/study/business/matrix_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,7 @@ def update_matrix(

try:
logger.info(f"Loading matrix data from node '{path}'...")
matrix_df = cast(
pd.DataFrame,
matrix_node.parse(return_dataframe=True),
)
matrix_df = matrix_node.parse_as_dataframe()
except ValueError as exc:
raise MatrixManagerError(f"Cannot parse matrix: {exc}") from exc

Expand Down
18 changes: 18 additions & 0 deletions antarest/study/common/studystorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from antarest.study.model import Study, StudyMetadataDTO, StudyMetadataPatchDTO, StudySimResultDTO
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfigDTO
from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy
from antarest.study.storage.rawstudy.model.filesystem.inode import OriginalFile

T = t.TypeVar("T", bound=Study)

Expand Down Expand Up @@ -56,6 +57,23 @@ def get(
"""

@abstractmethod
def get_file(
self,
metadata: T,
url: str = "",
) -> OriginalFile:
"""
Entry point to fetch for a specific file inside a study folder
Args:
metadata: study
url: path data inside study to reach the file
Returns: study file content and extension
"""

@abstractmethod
def exists(self, metadata: T) -> bool:
"""
Expand Down
26 changes: 25 additions & 1 deletion antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
from antarest.study.storage.matrix_profile import adjust_matrix_columns_index
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfigDTO
from antarest.study.storage.rawstudy.model.filesystem.ini_file_node import IniFileNode
from antarest.study.storage.rawstudy.model.filesystem.inode import INode
from antarest.study.storage.rawstudy.model.filesystem.inode import INode, OriginalFile
from antarest.study.storage.rawstudy.model.filesystem.matrix.input_series_matrix import InputSeriesMatrix
from antarest.study.storage.rawstudy.model.filesystem.matrix.matrix import MatrixFrequency
from antarest.study.storage.rawstudy.model.filesystem.matrix.output_series_matrix import OutputSeriesMatrix
Expand Down Expand Up @@ -451,6 +451,30 @@ def get(

return self.storage_service.get_storage(study).get(study, url, depth, formatted)

def get_file(
self,
uuid: str,
url: str,
params: RequestParameters,
) -> OriginalFile:
"""
retrieve a file from a study folder
Args:
uuid: study uuid
url: route to follow inside study structure
params: request parameters
Returns: data study formatted in json
"""
study = self.get_study(uuid)
assert_permission(params.user, study, StudyPermissionType.READ)

output = self.storage_service.get_storage(study).get_file(study, url)

return output

def aggregate_output_data(
self,
uuid: str,
Expand Down
27 changes: 25 additions & 2 deletions antarest/study/storage/abstract_storage_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
from pathlib import Path
from uuid import uuid4

import py7zr

from antarest.core.config import Config
from antarest.core.exceptions import BadOutputError, StudyOutputNotFoundError
from antarest.core.interfaces.cache import CacheConstants, ICache
Expand All @@ -45,6 +43,7 @@
from antarest.study.storage.rawstudy.model.filesystem.config.files import get_playlist
from antarest.study.storage.rawstudy.model.filesystem.config.model import Simulation
from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy, StudyFactory
from antarest.study.storage.rawstudy.model.filesystem.inode import OriginalFile
from antarest.study.storage.rawstudy.model.helpers import FileStudyHelpers
from antarest.study.storage.utils import extract_output_name, fix_study_root, remove_from_cache

Expand Down Expand Up @@ -171,6 +170,30 @@ def get(
del study
return data

def get_file(
self,
metadata: T,
url: str = "",
use_cache: bool = True,
) -> OriginalFile:
"""
Entry point to fetch data inside study.
Args:
metadata: study
url: path data inside study to reach
use_cache: indicate if the cache must be used
Returns: a file content with its extension and name
"""
self._check_study_exists(metadata)
study = self.get_raw(metadata, use_cache)
parts = [item for item in url.split("/") if item]

file_node = study.tree.get_node(parts)

return file_node.get_file_content()

def get_study_sim_result(
self,
study: T,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
import typing as t
from abc import ABC, abstractmethod

from antarest.core.exceptions import ChildNotFoundError
from antarest.core.exceptions import ChildNotFoundError, PathIsAFolderError
from antarest.core.model import JSON, SUB_JSON
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfig
from antarest.study.storage.rawstudy.model.filesystem.context import ContextServer
from antarest.study.storage.rawstudy.model.filesystem.inode import TREE, INode
from antarest.study.storage.rawstudy.model.filesystem.inode import TREE, INode, OriginalFile


class FilterError(Exception):
Expand Down Expand Up @@ -216,3 +216,7 @@ def extract_child(self, children: TREE, url: t.List[str]) -> t.Tuple[t.List[str]
if not isinstance(children[name], child_class):
raise FilterError("Filter selection has different classes")
return names, sub_url

def get_file_content(self) -> OriginalFile:
relative_path = self.config.path.relative_to(self.config.study_path).as_posix()
raise PathIsAFolderError(f"Node at {relative_path} is a folder node.")
25 changes: 24 additions & 1 deletion antarest/study/storage/rawstudy/model/filesystem/inode.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,26 @@
# This file is part of the Antares project.

from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar

from antarest.core.exceptions import WritingInsideZippedFileException
from antarest.core.utils.archives import extract_file_to_tmp_dir
from antarest.core.utils.archives import extract_file_to_tmp_dir, read_original_file_in_archive
from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfig

G = TypeVar("G")
S = TypeVar("S")
V = TypeVar("V")


@dataclass
class OriginalFile:
suffix: str
content: bytes
filename: str


class INode(ABC, Generic[G, S, V]):
"""
Abstract tree element, have to be implemented to create hub or left.
Expand Down Expand Up @@ -124,6 +132,21 @@ def denormalize(self) -> None:
"""
raise NotImplementedError()

def get_file_content(self) -> OriginalFile:
suffix = self.config.path.suffix
filename = self.config.path.name
if self.config.archive_path:
content = read_original_file_in_archive(
self.config.archive_path,
self.get_relative_path_inside_archive(self.config.archive_path),
)
return OriginalFile(suffix=suffix, filename=filename, content=content)
else:
return OriginalFile(content=self.config.path.read_bytes(), suffix=suffix, filename=filename)

def get_relative_path_inside_archive(self, archive_path: Path) -> str:
return self.config.path.relative_to(archive_path.parent / self.config.study_id).as_posix()

def _assert_url_end(self, url: Optional[List[str]] = None) -> None:
"""
Raise error if elements remain in url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
# SPDX-License-Identifier: MPL-2.0
#
# This file is part of the Antares project.

import typing as t
from abc import ABC, abstractmethod
from dataclasses import dataclass
Expand Down
Loading

0 comments on commit ad70926

Please sign in to comment.