From 3b607e593ef62db0c16adfd8a5ab436d9f617759 Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Tue, 28 May 2024 11:18:23 +0200 Subject: [PATCH 01/13] feat(archive-output): use ".7z" format to archive outputs --- .../study/storage/abstract_storage_service.py | 49 +++++++++++++------ .../storage/rawstudy/raw_study_service.py | 2 + antarest/study/storage/utils.py | 22 ++++++--- 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 782eb5516f..738499d081 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -51,6 +51,14 @@ logger = logging.getLogger(__name__) +def find_output_archive(study: T, output_id: str) -> t.Optional[Path]: + if (Path(study.path) / "output" / f"{output_id}.7z").exists(): + return Path(study.path) / "output" / f"{output_id}.7z" + elif (Path(study.path) / "output" / f"{output_id}.zip").exists(): + return Path(study.path) / "output" / f"{output_id}.zip" + return None + + class AbstractStorageService(IStudyStorageService[T], ABC): def __init__( self, @@ -244,17 +252,19 @@ def import_output( study_id = metadata.id path_output.mkdir(parents=True) output_full_name: t.Optional[str] - is_zipped = False + is_archived = False + extension = "" stopwatch = StopWatch() try: if isinstance(output, Path): - if output != path_output and output.suffix != ArchiveFormat.ZIP: + if output != path_output and output.suffix not in {".zip", ".7z"}: shutil.copytree(output, path_output / "imported") - elif output.suffix == ArchiveFormat.ZIP: - is_zipped = True + elif output.suffix in {".zip", ".7z"}: + is_archived = True path_output.rmdir() - path_output = Path(str(path_output) + f"{ArchiveFormat.ZIP}") + path_output = Path(str(path_output) + output.suffix) shutil.copyfile(output, path_output) + extension = ".zip" if output.suffix == ".zip" else ".7z" else: extract_archive(output, path_output) @@ -273,7 +283,7 @@ def import_output( except Exception as e: logger.error("Failed to import output", exc_info=e) shutil.rmtree(path_output, ignore_errors=True) - if is_zipped: + if is_archived: Path(str(path_output) + f"{ArchiveFormat.ZIP}").unlink(missing_ok=True) output_full_name = None @@ -337,11 +347,12 @@ def _read_additional_data_from_files(self, file_study: FileStudy) -> StudyAdditi def archive_study_output(self, study: T, output_id: str) -> bool: try: + # use 7zip to compress the output folder archive_dir( Path(study.path) / "output" / output_id, - Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}", + Path(study.path) / "output" / f"{output_id}{ArchiveFormat.SEVEN_ZIP}", remove_source_dir=True, - archive_format=ArchiveFormat.ZIP, + archive_format=ArchiveFormat.SEVEN_ZIP, ) remove_from_cache(self.cache, study.id) return True @@ -352,18 +363,26 @@ def archive_study_output(self, study: T, output_id: str) -> bool: ) return False - def unarchive_study_output(self, study: T, output_id: str, keep_src_zip: bool) -> bool: - if not (Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}").exists(): + def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> bool: + archive_path = find_output_archive(study, output_id) + if archive_path is None: logger.warning( f"Failed to archive study {study.name} output {output_id}. Maybe it's already unarchived", ) return False try: - unzip( - Path(study.path) / "output" / output_id, - Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}", - remove_source_zip=not keep_src_zip, - ) + # use 7zip to uncompress the output folder + if archive_path.suffix == ".7z": + with py7zr.SevenZipFile(archive_path, "r") as szf: + szf.extractall(Path(study.path) / "output" / output_id) + if not keep_src_archive: + archive_path.unlink() + else: + unzip( + Path(study.path) / "output" / output_id, + Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}", + remove_source_zip=not keep_src_archive, + ) remove_from_cache(self.cache, study.id) return True except Exception as e: diff --git a/antarest/study/storage/rawstudy/raw_study_service.py b/antarest/study/storage/rawstudy/raw_study_service.py index e90b9b33ed..0f4504e4c7 100644 --- a/antarest/study/storage/rawstudy/raw_study_service.py +++ b/antarest/study/storage/rawstudy/raw_study_service.py @@ -304,6 +304,8 @@ def delete_output(self, metadata: RawStudy, output_name: str) -> None: output_path = study_path / "output" / output_name if output_path.exists() and output_path.is_dir(): shutil.rmtree(output_path, ignore_errors=True) + elif (output_path.parent / f"{output_name}.7z").exists(): + (output_path.parent / f"{output_name}.7z").unlink(missing_ok=True) else: output_path = output_path.parent / f"{output_name}.zip" output_path.unlink(missing_ok=True) diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index 85ba04b630..97ce7e3bcf 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -23,6 +23,8 @@ from uuid import uuid4 from zipfile import ZipFile +import py7zr + from antares.study.version import StudyVersion from antarest.core.exceptions import StudyValidationError, UnsupportedStudyVersion @@ -79,12 +81,12 @@ def fix_study_root(study_path: Path) -> None: Args: study_path: the study initial root path """ - # TODO: what if it is a zipped output ? - if is_archive_format(study_path.suffix): + # TODO: what if it is a archived output ? + if study_path.suffix in {".zip", ".7z"}: return None if not study_path.is_dir(): - raise StudyValidationError("Not a directory: '{study_path}'") + raise StudyValidationError(f"Not a directory: '{study_path}'") root_path = study_path contents = os.listdir(root_path) @@ -125,13 +127,17 @@ def is_output_archived(path_output: Path) -> bool: def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = None) -> str: ini_reader = IniReader() - archived = is_output_archived(path_output) - if archived: + is_output_archived = path_output.suffix in {".zip", ".7z"} + if is_output_archived: temp_dir = tempfile.TemporaryDirectory() s = StopWatch() - with ZipFile(path_output, "r") as zip_obj: - zip_obj.extract("info.antares-output", temp_dir.name) - info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output") + if path_output.suffix == ".zip": + with ZipFile(path_output, "r") as zip_obj: + zip_obj.extract("info.antares-output", temp_dir.name) + else: + with py7zr.SevenZipFile(path_output, mode="r") as archive: + archive.extract(targets=["info.antares-output"], path=temp_dir.name) + info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output") s.log_elapsed(lambda x: logger.info(f"info.antares_output has been read in {x}s")) temp_dir.cleanup() From 32bb478622cfc04b048faf8c6b68553a228c5140 Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Tue, 28 May 2024 11:18:38 +0200 Subject: [PATCH 02/13] test(archive-output): use ".7z" format to archive outputs --- tests/core/utils/test_extract_zip.py | 1 - .../business/test_raw_study_service.py | 27 ++++++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/core/utils/test_extract_zip.py b/tests/core/utils/test_extract_zip.py index 5d24e6c772..9327bfc21e 100644 --- a/tests/core/utils/test_extract_zip.py +++ b/tests/core/utils/test_extract_zip.py @@ -20,7 +20,6 @@ from antarest.core.exceptions import BadArchiveContent from antarest.core.utils.archives import extract_archive - class TestExtractArchive: """ Test the `extract_zip` function. diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index 2e9bb25c5a..f07955bbe1 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -20,6 +20,7 @@ from unittest.mock import Mock from zipfile import ZIP_DEFLATED, ZipFile +import py7zr import pytest from antarest.core.config import Config, StorageConfig, WorkspaceConfig @@ -464,7 +465,7 @@ def test_copy_study( @pytest.mark.unit_test -def test_zipped_output(tmp_path: Path) -> None: +def test_archived_output(tmp_path: Path) -> None: if not platform.platform().startswith("Windows"): os.environ["TZ"] = "Europe/Paris" # set new timezone time.tzset() @@ -484,10 +485,9 @@ def test_zipped_output(tmp_path: Path) -> None: md = RawStudy(id=name, workspace="foo", path=str(study_path)) - zipped_output = tmp_path / "output.zip" - with ZipFile(zipped_output, "w", ZIP_DEFLATED) as output_data: + archived_output = tmp_path / "output.7z" + with py7zr.SevenZipFile(archived_output, "w") as output_data: output_data.writestr( - "info.antares-output", """[general] version = 700 name = 11mc @@ -496,32 +496,33 @@ def test_zipped_output(tmp_path: Path) -> None: title = 2020.09.07 - 16:15 timestamp = 1599488150 """, + "info.antares-output", ) expected_output_name = "20200907-1615eco-11mc" - output_name = study_service.import_output(md, zipped_output) + output_name = study_service.import_output(md, archived_output) if output_name != expected_output_name: # because windows sucks... expected_output_name = "20200907-1415eco-11mc" assert output_name == expected_output_name - assert (study_path / "output" / (expected_output_name + ".zip")).exists() + assert (study_path / "output" / (expected_output_name + ".7z")).exists() study_service.unarchive_study_output(md, expected_output_name, False) assert (study_path / "output" / expected_output_name).exists() - assert not (study_path / "output" / (expected_output_name + ".zip")).exists() + assert not (study_path / "output" / (expected_output_name + ".7z")).exists() study_service.delete_output(md, output_name) assert not (study_path / "output" / expected_output_name).exists() - output_name = study_service.import_output(md, zipped_output) + output_name = study_service.import_output(md, archived_output) study_service.unarchive_study_output(md, expected_output_name, True) - assert (study_path / "output" / (expected_output_name + ".zip")).exists() - os.unlink(study_path / "output" / (expected_output_name + ".zip")) - assert not (study_path / "output" / (expected_output_name + ".zip")).exists() + assert (study_path / "output" / (expected_output_name + ".7z")).exists() + os.unlink(study_path / "output" / (expected_output_name + ".7z")) + assert not (study_path / "output" / (expected_output_name + ".7z")).exists() study_service.archive_study_output(md, expected_output_name) assert not (study_path / "output" / expected_output_name).exists() - assert (study_path / "output" / (expected_output_name + ".zip")).exists() + assert (study_path / "output" / (expected_output_name + ".7z")).exists() study_service.delete_output(md, output_name) - assert not (study_path / "output" / (expected_output_name + ".zip")).exists() + assert not (study_path / "output" / (expected_output_name + ".7z")).exists() @pytest.mark.unit_test From fe068de74760418d34f9f63ad2da13b6a6d0eed6 Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Wed, 5 Jun 2024 22:17:04 +0200 Subject: [PATCH 03/13] feat(archive-output): improved archive search function --- .../study/storage/abstract_storage_service.py | 15 ++++++++------- tests/storage/business/test_raw_study_service.py | 1 - 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 738499d081..c5a6029970 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -51,12 +51,13 @@ logger = logging.getLogger(__name__) -def find_output_archive(study: T, output_id: str) -> t.Optional[Path]: - if (Path(study.path) / "output" / f"{output_id}.7z").exists(): - return Path(study.path) / "output" / f"{output_id}.7z" - elif (Path(study.path) / "output" / f"{output_id}.zip").exists(): - return Path(study.path) / "output" / f"{output_id}.zip" - return None +def _search_output_archive(study_path: str, output_id: str) -> t.Optional[Path]: + """Search for an output archive file in the study path, return ``None`` if not found.""" + locations = [ + Path(study_path) / "output" / f"{output_id}.7z", + Path(study_path) / "output" / f"{output_id}.zip", + ] + return next(iter(path for path in locations if path.exists()), None) class AbstractStorageService(IStudyStorageService[T], ABC): @@ -364,7 +365,7 @@ def archive_study_output(self, study: T, output_id: str) -> bool: return False def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> bool: - archive_path = find_output_archive(study, output_id) + archive_path = _search_output_archive(study.path, output_id) if archive_path is None: logger.warning( f"Failed to archive study {study.name} output {output_id}. Maybe it's already unarchived", diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index f07955bbe1..20494e8f10 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -18,7 +18,6 @@ from pathlib import Path from typing import Callable from unittest.mock import Mock -from zipfile import ZIP_DEFLATED, ZipFile import py7zr import pytest From a09c12eecd3e8e48dcf9ebefac82f25fdd51eb63 Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Wed, 5 Jun 2024 22:24:14 +0200 Subject: [PATCH 04/13] style(archive-output): improved typing in unit tests --- .../business/test_raw_study_service.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index 20494e8f10..8eaee65579 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -15,8 +15,8 @@ import platform import re import time +import typing as t from pathlib import Path -from typing import Callable from unittest.mock import Mock import py7zr @@ -35,7 +35,7 @@ def build_config( study_path: Path, workspace_name: str = DEFAULT_WORKSPACE_NAME, allow_deletion: bool = False, -): +) -> Config: return Config( storage=StorageConfig( workspaces={workspace_name: WorkspaceConfig(path=study_path)}, @@ -45,7 +45,7 @@ def build_config( @pytest.mark.unit_test -def test_get(tmp_path: str, project_path) -> None: +def test_get(tmp_path: str, project_path: Path) -> None: """ path_to_studies |_study1 (d) @@ -117,7 +117,7 @@ def test_get_cache(tmp_path: str) -> None: config=Mock(), cache=cache, study_factory=study_factory, - path_resources="", + path_resources=Path(), patch_service=Mock(), ) @@ -132,7 +132,7 @@ def test_get_cache(tmp_path: str) -> None: @pytest.mark.unit_test -def test_check_errors(): +def test_check_errors() -> None: study = Mock() study.check_errors.return_value = ["Hello"] @@ -156,7 +156,7 @@ def test_check_errors(): @pytest.mark.unit_test -def test_assert_study_exist(tmp_path: str, project_path) -> None: +def test_assert_study_exist(tmp_path: str, project_path: Path) -> None: tmp = Path(tmp_path) (tmp / "study1").mkdir() (tmp / "study.antares").touch() @@ -182,7 +182,7 @@ def test_assert_study_exist(tmp_path: str, project_path) -> None: @pytest.mark.unit_test -def test_assert_study_not_exist(tmp_path: str, project_path) -> None: +def test_assert_study_not_exist(tmp_path: str, project_path: Path) -> None: # Create folders tmp = Path(tmp_path) (tmp / "study1").mkdir() @@ -246,7 +246,7 @@ def test_create(tmp_path: Path, project_path: Path) -> None: @pytest.mark.unit_test -def test_create_study_versions(tmp_path: str, project_path) -> None: +def test_create_study_versions(tmp_path: str, project_path: Path) -> None: path_studies = Path(tmp_path) study = Mock() @@ -264,7 +264,7 @@ def test_create_study_versions(tmp_path: str, project_path) -> None: patch_service=Mock(), ) - def create_study(version: str): + def create_study(version: str) -> RawStudy: metadata = RawStudy( id=f"study{version}", workspace=DEFAULT_WORKSPACE_NAME, @@ -410,7 +410,7 @@ def create_study(version: str): @pytest.mark.unit_test def test_copy_study( tmp_path: str, - clean_ini_writer: Callable, + clean_ini_writer: t.Callable[[Path, str], None], ) -> None: path_studies = Path(tmp_path) source_name = "study1" @@ -432,8 +432,7 @@ def test_copy_study( study.get.return_value = value study_factory = Mock() - config = Mock() - study_factory.create_from_fs.return_value = FileStudy(config, study) + study_factory.create_from_fs.return_value = FileStudy(Mock(spec=Config), study) study_factory.create_from_config.return_value = study url_engine = Mock() @@ -463,6 +462,17 @@ def test_copy_study( study.get.assert_called_once_with(["study"]) +GENERAL_SECTION = """\ +[general] +version = 700 +name = 11mc +mode = Economy +date = 2020.09.07 - 16:15 +title = 2020.09.07 - 16:15 +timestamp = 1599488150 +""" + + @pytest.mark.unit_test def test_archived_output(tmp_path: Path) -> None: if not platform.platform().startswith("Windows"): @@ -486,17 +496,7 @@ def test_archived_output(tmp_path: Path) -> None: archived_output = tmp_path / "output.7z" with py7zr.SevenZipFile(archived_output, "w") as output_data: - output_data.writestr( - """[general] -version = 700 -name = 11mc -mode = Economy -date = 2020.09.07 - 16:15 -title = 2020.09.07 - 16:15 -timestamp = 1599488150 - """, - "info.antares-output", - ) + output_data.writestr(GENERAL_SECTION, "info.antares-output") expected_output_name = "20200907-1615eco-11mc" output_name = study_service.import_output(md, archived_output) @@ -513,6 +513,7 @@ def test_archived_output(tmp_path: Path) -> None: assert not (study_path / "output" / expected_output_name).exists() output_name = study_service.import_output(md, archived_output) + assert output_name is not None study_service.unarchive_study_output(md, expected_output_name, True) assert (study_path / "output" / (expected_output_name + ".7z")).exists() os.unlink(study_path / "output" / (expected_output_name + ".7z")) @@ -589,7 +590,7 @@ def test_initialize_additional_data(tmp_path: Path) -> None: assert not study_service.initialize_additional_data(raw_study) - study_service._read_additional_data_from_files = Mock(return_value=study_additional_data) + study_service._read_additional_data_from_files = Mock(return_value=study_additional_data) # type: ignore assert study_service.initialize_additional_data(raw_study) From bf015df3ad2f69d8beb62bbd14ed6d308d79e606 Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Wed, 5 Jun 2024 22:54:20 +0200 Subject: [PATCH 05/13] feat(archive-output): improved study output un-archiving --- .../study/storage/abstract_storage_service.py | 26 ++++++++++--------- antarest/study/storage/utils.py | 26 +++++++++---------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index c5a6029970..11679db61f 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -14,6 +14,7 @@ import shutil import tempfile import typing as t +import zipfile from abc import ABC from pathlib import Path from uuid import uuid4 @@ -263,9 +264,9 @@ def import_output( elif output.suffix in {".zip", ".7z"}: is_archived = True path_output.rmdir() - path_output = Path(str(path_output) + output.suffix) + path_output = path_output.with_suffix(output.suffix) shutil.copyfile(output, path_output) - extension = ".zip" if output.suffix == ".zip" else ".7z" + extension = output.suffix else: extract_archive(output, path_output) @@ -285,7 +286,7 @@ def import_output( logger.error("Failed to import output", exc_info=e) shutil.rmtree(path_output, ignore_errors=True) if is_archived: - Path(str(path_output) + f"{ArchiveFormat.ZIP}").unlink(missing_ok=True) + path_output.with_suffix(extension).unlink(missing_ok=True) output_full_name = None return output_full_name @@ -371,19 +372,20 @@ def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: boo f"Failed to archive study {study.name} output {output_id}. Maybe it's already unarchived", ) return False + try: - # use 7zip to uncompress the output folder if archive_path.suffix == ".7z": - with py7zr.SevenZipFile(archive_path, "r") as szf: + with py7zr.SevenZipFile(archive_path, mode="r") as szf: szf.extractall(Path(study.path) / "output" / output_id) - if not keep_src_archive: - archive_path.unlink() + elif archive_path.suffix == ".zip": + with zipfile.ZipFile(archive_path, mode="r") as zipf: + zipf.extractall(Path(study.path) / "output" / output_id) else: - unzip( - Path(study.path) / "output" / output_id, - Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}", - remove_source_zip=not keep_src_archive, - ) + raise NotImplementedError(f"Unsupported archive format {archive_path.suffix}") + + if not keep_src_archive: + archive_path.unlink() + remove_from_cache(self.cache, study.id) return True except Exception as e: diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index 97ce7e3bcf..f268f25028 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -128,21 +128,21 @@ def is_output_archived(path_output: Path) -> bool: def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = None) -> str: ini_reader = IniReader() is_output_archived = path_output.suffix in {".zip", ".7z"} + info_filename = "info.antares-output" if is_output_archived: - temp_dir = tempfile.TemporaryDirectory() - s = StopWatch() - if path_output.suffix == ".zip": - with ZipFile(path_output, "r") as zip_obj: - zip_obj.extract("info.antares-output", temp_dir.name) - else: - with py7zr.SevenZipFile(path_output, mode="r") as archive: - archive.extract(targets=["info.antares-output"], path=temp_dir.name) - info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output") - s.log_elapsed(lambda x: logger.info(f"info.antares_output has been read in {x}s")) - temp_dir.cleanup() + with tempfile.TemporaryDirectory() as temp_dir: + s = StopWatch() + if path_output.suffix == ".zip": + with ZipFile(path_output, mode="r") as zip_obj: + zip_obj.extract(info_filename, temp_dir) + else: + with py7zr.SevenZipFile(path_output, mode="r") as archive: + archive.extract(targets=[info_filename], path=temp_dir) + info_antares_output = ini_reader.read(Path(temp_dir) / info_filename) + s.log_elapsed(lambda x: logger.info(f"'{info_filename}' has been read in {x}s")) else: - info_antares_output = ini_reader.read(path_output / "info.antares-output") + info_antares_output = ini_reader.read(path_output / info_filename) general_info = info_antares_output["general"] @@ -155,7 +155,7 @@ def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = No general_info["name"] = suffix_name if not archived: ini_writer = IniWriter() - ini_writer.write(info_antares_output, path_output / "info.antares-output") + ini_writer.write(info_antares_output, path_output / info_filename) else: logger.warning("Could not rewrite the new name inside the output: the output is archived") From f92f67c004fa10cfbe4d2ce59c932a63d5fe362c Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Wed, 5 Jun 2024 23:03:40 +0200 Subject: [PATCH 06/13] feat(archive-output): improved study output deletion --- .../storage/rawstudy/raw_study_service.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/antarest/study/storage/rawstudy/raw_study_service.py b/antarest/study/storage/rawstudy/raw_study_service.py index 0f4504e4c7..ddf4aa40b6 100644 --- a/antarest/study/storage/rawstudy/raw_study_service.py +++ b/antarest/study/storage/rawstudy/raw_study_service.py @@ -292,23 +292,20 @@ def delete(self, metadata: RawStudy) -> None: def delete_output(self, metadata: RawStudy, output_name: str) -> None: """ - Delete output folder - Args: - metadata: study - output_name: output simulation - - Returns: + Delete output folder or archive (zip or 7z) of a study. + Args: + metadata: The study metadata. + output_name: Output name to be deleted. """ study_path = self.get_study_path(metadata) output_path = study_path / "output" / output_name - if output_path.exists() and output_path.is_dir(): + if output_path.is_dir(): shutil.rmtree(output_path, ignore_errors=True) - elif (output_path.parent / f"{output_name}.7z").exists(): - (output_path.parent / f"{output_name}.7z").unlink(missing_ok=True) else: - output_path = output_path.parent / f"{output_name}.zip" - output_path.unlink(missing_ok=True) + locations = [output_path.with_suffix(".7z"), output_path.with_suffix(".zip")] + for path in locations: + path.unlink(missing_ok=True) remove_from_cache(self.cache, metadata.id) def import_study(self, metadata: RawStudy, stream: t.BinaryIO) -> Study: From 8e9b6b5e1102a44dac770d21dfc75cfe7dbdcb84 Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Thu, 6 Jun 2024 09:19:18 +0200 Subject: [PATCH 07/13] feat(archive-output): improve error message handling in archiving/unarchiving --- antarest/study/common/studystorage.py | 23 ++++- antarest/study/service.py | 42 ++++----- .../study/storage/abstract_storage_service.py | 91 ++++++++----------- .../business/test_raw_study_service.py | 27 +++++- 4 files changed, 101 insertions(+), 82 deletions(-) diff --git a/antarest/study/common/studystorage.py b/antarest/study/common/studystorage.py index 906564da35..1da82c4e42 100644 --- a/antarest/study/common/studystorage.py +++ b/antarest/study/common/studystorage.py @@ -264,10 +264,25 @@ def initialize_additional_data(self, study: T) -> bool: """Initialize additional data for a study.""" @abstractmethod - def archive_study_output(self, study: T, output_id: str) -> bool: - """Archive a study output.""" + def archive_study_output(self, study: T, output_id: str) -> None: + """ + Archive study output in a 7z file and remove the output folder. + + Args: + study: Study to archive the output from. + output_id: Output to archive. + """ # noinspection SpellCheckingInspection @abstractmethod - def unarchive_study_output(self, study: T, output_id: str, keep_src_zip: bool) -> bool: - """Un-archive a study output.""" + def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> None: + """ + Unarchive a study output archive and remove the archive file if needed. + + The output archive file could be a 7z file or a zip file. + + Args: + study: Study to unarchive the output from. + output_id: Output to unarchive. + keep_src_archive: Whether to keep the source archive file. + """ diff --git a/antarest/study/service.py b/antarest/study/service.py index 28afa70858..78a35331c8 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -2360,24 +2360,22 @@ def archive_output( if len(list(filter(lambda t: t.name in archive_task_names, study_tasks))): raise TaskAlreadyRunning() - def archive_output_task( - notifier: TaskUpdateNotifier, - ) -> TaskResult: + def archive_output_task(_: TaskUpdateNotifier) -> TaskResult: try: - study = self.get_study(study_id) - stopwatch = StopWatch() - self.storage_service.get_storage(study).archive_study_output(study, output_id) - stopwatch.log_elapsed(lambda x: logger.info(f"Output {output_id} of study {study_id} archived in {x}s")) + _study = self.get_study(study_id) + _stopwatch = StopWatch() + self.storage_service.get_storage(_study).archive_study_output(_study, output_id) + _stopwatch.log_elapsed( + lambda x: logger.info(f"Output {output_id} of study {study_id} archived in {x}s") + ) return TaskResult( success=True, message=f"Study output {study_id}/{output_id} successfully archived", ) except Exception as e: - logger.warning( - f"Could not archive the output {study_id}/{output_id}", - exc_info=e, - ) - raise e + _err_msg = f"Could not archive the output {study_id}/{output_id}" + logger.warning(_err_msg, exc_info=e) + return TaskResult(success=False, message=f"Unhandled exception: {_err_msg}: {e}") task_id = self.task_service.add_task( archive_output_task, @@ -2422,14 +2420,12 @@ def unarchive_output( if len(list(filter(lambda t: t.name in archive_task_names, study_tasks))): raise TaskAlreadyRunning() - def unarchive_output_task( - notifier: TaskUpdateNotifier, - ) -> TaskResult: + def unarchive_output_task(_: TaskUpdateNotifier) -> TaskResult: try: - study = self.get_study(study_id) - stopwatch = StopWatch() - self.storage_service.get_storage(study).unarchive_study_output(study, output_id, keep_src_zip) - stopwatch.log_elapsed( + _study = self.get_study(study_id) + _stopwatch = StopWatch() + self.storage_service.get_storage(_study).unarchive_study_output(_study, output_id, keep_src_zip) + _stopwatch.log_elapsed( lambda x: logger.info(f"Output {output_id} of study {study_id} unarchived in {x}s") ) return TaskResult( @@ -2437,11 +2433,9 @@ def unarchive_output_task( message=f"Study output {study_id}/{output_id} successfully unarchived", ) except Exception as e: - logger.warning( - f"Could not unarchive the output {study_id}/{output_id}", - exc_info=e, - ) - raise e + _err_msg = f"Could not unarchive the output {study_id}/{output_id}" + logger.warning(_err_msg, exc_info=e) + return TaskResult(success=False, message=f"Unhandled exception: {_err_msg}: {e}") task_id: t.Optional[str] = None workspace = getattr(study, "workspace", DEFAULT_WORKSPACE_NAME) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 11679db61f..b609814044 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -52,15 +52,6 @@ logger = logging.getLogger(__name__) -def _search_output_archive(study_path: str, output_id: str) -> t.Optional[Path]: - """Search for an output archive file in the study path, return ``None`` if not found.""" - locations = [ - Path(study_path) / "output" / f"{output_id}.7z", - Path(study_path) / "output" / f"{output_id}.zip", - ] - return next(iter(path for path in locations if path.exists()), None) - - class AbstractStorageService(IStudyStorageService[T], ABC): def __init__( self, @@ -347,50 +338,46 @@ def _read_additional_data_from_files(self, file_study: FileStudy) -> StudyAdditi study_additional_data = StudyAdditionalData(horizon=horizon, author=author, patch=patch.model_dump_json()) return study_additional_data - def archive_study_output(self, study: T, output_id: str) -> bool: - try: - # use 7zip to compress the output folder - archive_dir( - Path(study.path) / "output" / output_id, - Path(study.path) / "output" / f"{output_id}{ArchiveFormat.SEVEN_ZIP}", - remove_source_dir=True, - archive_format=ArchiveFormat.SEVEN_ZIP, - ) - remove_from_cache(self.cache, study.id) - return True - except Exception as e: - logger.warning( - f"Failed to archive study {study.name} output {output_id}", - exc_info=e, - ) - return False + def archive_study_output(self, study: T, output_id: str) -> None: + study_path = Path(study.path) + output_dir = study_path / "output" / output_id + if not output_dir.exists(): + _err_msg = f"Output '{output_id}' not found in study '{study.name}'" + actual_files = [p.relative_to(study_path) for p in study_path.glob("output/*") if p.is_dir()] + logger.error(f"{_err_msg}. Found folders: {', '.join([str(file) for file in actual_files])}") + raise FileNotFoundError(_err_msg) - def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> bool: - archive_path = _search_output_archive(study.path, output_id) - if archive_path is None: - logger.warning( - f"Failed to archive study {study.name} output {output_id}. Maybe it's already unarchived", - ) - return False + archive_path = study_path / "output" / f"{output_id}.7z" + with py7zr.SevenZipFile(archive_path, "w") as szf: + szf.writeall(output_dir, arcname=".") - try: - if archive_path.suffix == ".7z": - with py7zr.SevenZipFile(archive_path, mode="r") as szf: - szf.extractall(Path(study.path) / "output" / output_id) - elif archive_path.suffix == ".zip": - with zipfile.ZipFile(archive_path, mode="r") as zipf: - zipf.extractall(Path(study.path) / "output" / output_id) - else: - raise NotImplementedError(f"Unsupported archive format {archive_path.suffix}") + shutil.rmtree(output_dir) + remove_from_cache(self.cache, study.id) + + def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> None: + # Search for an output archive file in the study path + study_path = Path(study.path) + locations = [ + study_path / "output" / f"{output_id}.7z", + study_path / "output" / f"{output_id}.zip", + ] + archive_path = next(iter(path for path in locations if path.exists()), None) + if archive_path is None: + _err_msg = f"Archive for study '{study.name}' output '{output_id}' not found" + actual_files = [p.relative_to(study_path) for p in study_path.glob("output/*") if p.is_file()] + logger.error(f"{_err_msg}. Found files: {', '.join([str(file) for file in actual_files])}") + raise FileNotFoundError(_err_msg) + + if archive_path.suffix == ".7z": + with py7zr.SevenZipFile(archive_path, mode="r") as szf: + szf.extractall(study_path / "output" / output_id) + elif archive_path.suffix == ".zip": + with zipfile.ZipFile(archive_path, mode="r") as zipf: + zipf.extractall(study_path / "output" / output_id) + else: + raise NotImplementedError(f"Unsupported archive format {archive_path.suffix}") - if not keep_src_archive: - archive_path.unlink() + if not keep_src_archive: + archive_path.unlink() - remove_from_cache(self.cache, study.id) - return True - except Exception as e: - logger.warning( - f"Failed to unarchive study {study.name} output {output_id}", - exc_info=e, - ) - return False + remove_from_cache(self.cache, study.id) diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index 8eaee65579..49586bd3aa 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -11,16 +11,19 @@ # This file is part of the Antares project. import datetime +import logging import os import platform import re import time import typing as t +import uuid from pathlib import Path from unittest.mock import Mock import py7zr import pytest +from _pytest.logging import LogCaptureFixture from antarest.core.config import Config, StorageConfig, WorkspaceConfig from antarest.core.exceptions import StudyDeletionNotAllowed, StudyNotFoundError @@ -474,7 +477,7 @@ def test_copy_study( @pytest.mark.unit_test -def test_archived_output(tmp_path: Path) -> None: +def test_archived_output(tmp_path: Path, caplog: LogCaptureFixture) -> None: if not platform.platform().startswith("Windows"): os.environ["TZ"] = "Europe/Paris" # set new timezone time.tzset() @@ -492,7 +495,7 @@ def test_archived_output(tmp_path: Path) -> None: patch_service=Mock(), ) - md = RawStudy(id=name, workspace="foo", path=str(study_path)) + md = RawStudy(id=str(uuid.uuid4()), workspace="foo", path=str(study_path), name=name) archived_output = tmp_path / "output.7z" with py7zr.SevenZipFile(archived_output, "w") as output_data: @@ -514,10 +517,30 @@ def test_archived_output(tmp_path: Path) -> None: output_name = study_service.import_output(md, archived_output) assert output_name is not None + + # If we try to unarchive an output that does not exist, we should have an error + # and an error message should be displayed in the logs to list the files + # present in the `output` folder. + with caplog.at_level(logging.ERROR): + with pytest.raises(FileNotFoundError) as ctx: + study_service.unarchive_study_output(md, "unknown", False) + assert str(ctx.value) == f"Archive for study '{name}' output 'unknown' not found" + assert f"output/{expected_output_name}.7z" in caplog.text + study_service.unarchive_study_output(md, expected_output_name, True) assert (study_path / "output" / (expected_output_name + ".7z")).exists() os.unlink(study_path / "output" / (expected_output_name + ".7z")) assert not (study_path / "output" / (expected_output_name + ".7z")).exists() + + # If we try to archive an output that does not exist, we should have an error + # and an error message should be displayed in the logs to list the files + # present in the `output` folder. + with caplog.at_level(logging.ERROR): + with pytest.raises(FileNotFoundError) as ctx: + study_service.archive_study_output(md, "unknown") + assert str(ctx.value) == f"Output 'unknown' not found in study '{name}'" + assert f"output/{expected_output_name}" in caplog.text + study_service.archive_study_output(md, expected_output_name) assert not (study_path / "output" / expected_output_name).exists() assert (study_path / "output" / (expected_output_name + ".7z")).exists() From 5bf668932b15897e990554dbca1b1eeaf8b61e7e Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Thu, 6 Jun 2024 10:29:36 +0200 Subject: [PATCH 08/13] feat(archive-output): improve implementation of simulation results listing --- antarest/study/storage/abstract_storage_service.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index b609814044..38ca363c74 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -44,7 +44,6 @@ ) from antarest.study.storage.patch_service import PatchService from antarest.study.storage.rawstudy.model.filesystem.config.files import get_playlist -from antarest.study.storage.rawstudy.model.filesystem.config.model import Simulation from antarest.study.storage.rawstudy.model.filesystem.factory import FileStudy, StudyFactory from antarest.study.storage.rawstudy.model.helpers import FileStudyHelpers from antarest.study.storage.utils import extract_output_name, fix_study_root, remove_from_cache @@ -187,10 +186,10 @@ def get_study_sim_result( results: t.List[StudySimResultDTO] = [] if study_data.config.outputs is not None: reference = (patch_metadata.outputs or PatchOutputs()).reference - for output in study_data.config.outputs: - output_data: Simulation = study_data.config.outputs[output] + for output, output_data in study_data.config.outputs.items(): try: - file_metadata = FileStudyHelpers.get_config(study_data, output_data.get_file()) + output_file = output_data.get_file() + file_metadata = FileStudyHelpers.get_config(study_data, output_file) settings = StudySimSettingsDTO( general=file_metadata["general"], input=file_metadata["input"], @@ -204,7 +203,7 @@ def get_study_sim_result( results.append( StudySimResultDTO( - name=output_data.get_file(), + name=output_file, type=output_data.mode, settings=settings, completionDate="", From 10a0b9654574016816ab6d0770eda59440c14279 Mon Sep 17 00:00:00 2001 From: Laurent LAPORTE Date: Thu, 6 Jun 2024 11:02:14 +0200 Subject: [PATCH 09/13] feat(archive-output): support `.7z` format for outputs parsing --- .../rawstudy/model/filesystem/config/files.py | 42 +++++++++------ .../rawstudy/model/filesystem/config/model.py | 3 +- .../filesystem/config/test_files.py | 53 +++++++++++++++---- 3 files changed, 70 insertions(+), 28 deletions(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/files.py b/antarest/study/storage/rawstudy/model/filesystem/config/files.py index 8f23fecd25..104fd4359f 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/files.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/files.py @@ -23,6 +23,8 @@ import py7zr from antares.study.version import StudyVersion +import py7zr + from antarest.core.model import JSON from antarest.core.serialization import from_json from antarest.core.utils.archives import ( @@ -285,9 +287,9 @@ def parse_outputs(output_path: Path) -> t.Dict[str, Simulation]: try: if suffix == ".tmp" or path_name.startswith("~"): continue - elif suffix == ".zip": + elif suffix in {".zip", ".7z"}: if path.stem not in sims: - if simulation := parse_simulation_zip(path): + if simulation := parse_simulation_archive(path): sims[path.stem] = simulation elif (path / "about-the-study/parameters.ini").exists(): if simulation := parse_simulation(path, canonical_name=path_name): @@ -297,26 +299,32 @@ def parse_outputs(output_path: Path) -> t.Dict[str, Simulation]: return sims -def parse_simulation_zip(path: Path) -> Simulation: +def parse_simulation_archive(path: Path) -> Simulation: xpansion_path = "expansion/out.json" ini_path = "about-the-study/parameters.ini" integrity_path = "checkIntegrity.txt" with tempfile.TemporaryDirectory(dir=path.parent, prefix=f"~{path.stem}-", suffix=".tmp") as output_dir: try: - with zipfile.ZipFile(path) as zf: - try: - zf.extract(ini_path, output_dir) - except KeyError: - raise SimulationParsingError( - path, - f"Parameters file '{ini_path}' not found", - ) from None - if xpansion_path in zf.namelist(): - zf.extract(xpansion_path, output_dir) - if integrity_path in zf.namelist(): - zf.extract(integrity_path, output_dir) - except zipfile.BadZipFile as exc: - raise SimulationParsingError(path, f"Bad ZIP file: {exc}") from exc + if path.suffix == ".zip": + with zipfile.ZipFile(path) as zf: + try: + zf.extract(ini_path, output_dir) + except KeyError: + raise SimulationParsingError(path, f"Parameters file '{ini_path}' not found") from None + if xpansion_path in zf.namelist(): + zf.extract(xpansion_path, output_dir) + if integrity_path in zf.namelist(): + zf.extract(integrity_path, output_dir) + elif path.suffix == ".7z": + with py7zr.SevenZipFile(path, mode="r") as z: + z.extract(output_dir, [ini_path, xpansion_path, integrity_path]) + if not (Path(output_dir) / ini_path).exists(): + raise SimulationParsingError(path, f"Parameters file '{ini_path}' not found") + else: + raise ValueError(f"Unsupported file format: {path.suffix}") + + except (zipfile.BadZipFile, py7zr.exceptions.Bad7zFile) as exc: + raise SimulationParsingError(path, f"Bad archive file: {exc}") from exc simulation = parse_simulation(Path(output_dir), canonical_name=path.stem) simulation.archived = True return simulation diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/model.py b/antarest/study/storage/rawstudy/model/filesystem/config/model.py index 64c5be0ee7..73202d8be8 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/model.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/model.py @@ -193,7 +193,8 @@ def __init__( def next_file(self, name: str, is_output: bool = False) -> "FileStudyTreeConfig": if is_output and name in self.outputs and self.outputs[name].archived: - archive_path: t.Optional[Path] = self.path / f"{name}.zip" + locations = [self.path / f"{name}.7z", self.path / f"{name}.zip"] + archive_path = next((p for p in locations if p.exists())) else: archive_path = self.archive_path diff --git a/tests/storage/repository/filesystem/config/test_files.py b/tests/storage/repository/filesystem/config/test_files.py index fbdf160bce..1e623daf70 100644 --- a/tests/storage/repository/filesystem/config/test_files.py +++ b/tests/storage/repository/filesystem/config/test_files.py @@ -14,17 +14,18 @@ from pathlib import Path from unittest.mock import Mock, patch +import py7zr import pytest from antarest.study.storage.rawstudy.model.filesystem.config.exceptions import SimulationParsingError -from antarest.study.storage.rawstudy.model.filesystem.config.files import parse_simulation_zip +from antarest.study.storage.rawstudy.model.filesystem.config.files import parse_simulation_archive from antarest.study.storage.rawstudy.model.filesystem.config.model import Simulation PARSE_SIMULATION_NAME = "antarest.study.storage.rawstudy.model.filesystem.config.files.parse_simulation" class TestParseSimulationZip: - def test_parse_simulation_zip__nominal(self, tmp_path: Path): + def test_parse_simulation_archive__nominal_zip(self, tmp_path: Path): # prepare a ZIP file with the following files archived_files = [ "about-the-study/parameters.ini", @@ -49,14 +50,46 @@ def my_parse_simulation(path: Path, canonical_name: str) -> Simulation: assert uncompressed.is_file(), f"Missing {name_}" return Mock(spec=Simulation) - # Call the `parse_simulation_zip` but using `my_parse_simulation` + # Call the `parse_simulation_archive` but using `my_parse_simulation` with patch(PARSE_SIMULATION_NAME, new=my_parse_simulation): - actual = parse_simulation_zip(zip_path) + actual = parse_simulation_archive(zip_path) # check the result assert actual.archived is True - def test_parse_simulation_zip__missing_required_files(self, tmp_path: Path): + def test_parse_simulation_archive__nominal_7z(self, tmp_path: Path): + # prepare a ZIP file with the following files + archived_files = [ + "about-the-study/parameters.ini", + "expansion/out.json", + "checkIntegrity.txt", + ] + archive_path = tmp_path.joinpath("dummy.7z") + with py7zr.SevenZipFile(archive_path, mode="w") as zf: + for name in archived_files: + zf.writestr(b"dummy data", name) + + def my_parse_simulation(path: Path, canonical_name: str) -> Simulation: + """ + Mock function of the function `parse_simulation`, which is used to: + - avoid calling the original function which do a lot of stuff, + - check that the required files exist in a temporary directory. + """ + assert path.is_dir() + assert canonical_name == archive_path.stem + for name_ in archived_files: + uncompressed = path.joinpath(name_) + assert uncompressed.is_file(), f"Missing {name_}" + return Mock(spec=Simulation) + + # Call the `parse_simulation_archive` but using `my_parse_simulation` + with patch(PARSE_SIMULATION_NAME, new=my_parse_simulation): + actual = parse_simulation_archive(archive_path) + + # check the result + assert actual.archived is True + + def test_parse_simulation_archive__missing_required_files(self, tmp_path: Path): # prepare a ZIP file with the following files archived_files = [ # "about-the-study/parameters.ini", # <- required @@ -68,17 +101,17 @@ def test_parse_simulation_zip__missing_required_files(self, tmp_path: Path): for name in archived_files: zf.writestr(name, b"dummy data") - # Call the `parse_simulation_zip` but using `my_parse_simulation` + # Call the `parse_simulation_archive` but using `my_parse_simulation` with patch(PARSE_SIMULATION_NAME): with pytest.raises(SimulationParsingError): - parse_simulation_zip(zip_path) + parse_simulation_archive(zip_path) - def test_parse_simulation_zip__bad_zip_file(self, tmp_path: Path): + def test_parse_simulation_archive__bad_zip_file(self, tmp_path: Path): # prepare a bad ZIP file zip_path = tmp_path.joinpath("dummy.zip") zip_path.write_bytes(b"PK") - # Call the `parse_simulation_zip` but using `my_parse_simulation` + # Call the `parse_simulation_archive` but using `my_parse_simulation` with patch(PARSE_SIMULATION_NAME): with pytest.raises(SimulationParsingError): - parse_simulation_zip(zip_path) + parse_simulation_archive(zip_path) From b42e2128789a29c0c1ac58f1a3c93704599cb1d3 Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Mon, 21 Oct 2024 16:04:44 +0200 Subject: [PATCH 10/13] fix(archive-output): correct formatting and typing issues --- .../study/storage/rawstudy/model/filesystem/config/files.py | 3 --- antarest/study/storage/utils.py | 1 - 2 files changed, 4 deletions(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/files.py b/antarest/study/storage/rawstudy/model/filesystem/config/files.py index 104fd4359f..656ddaeca7 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/files.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/files.py @@ -23,12 +23,9 @@ import py7zr from antares.study.version import StudyVersion -import py7zr - from antarest.core.model import JSON from antarest.core.serialization import from_json from antarest.core.utils.archives import ( - ArchiveFormat, extract_lines_from_archive, is_archive_format, read_file_from_archive, diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index f268f25028..9da3394920 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -24,7 +24,6 @@ from zipfile import ZipFile import py7zr - from antares.study.version import StudyVersion from antarest.core.exceptions import StudyValidationError, UnsupportedStudyVersion From 5e49ae066cb814d4fa764609ea3451f7272d24c8 Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Thu, 24 Oct 2024 21:09:15 +0200 Subject: [PATCH 11/13] fix(archive-output): correct code errors and formatting --- antarest/study/storage/abstract_storage_service.py | 2 +- .../study/storage/rawstudy/model/filesystem/config/files.py | 6 +----- .../study/storage/rawstudy/model/filesystem/config/model.py | 2 +- antarest/study/storage/utils.py | 2 +- tests/core/utils/test_extract_zip.py | 1 + 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 38ca363c74..94773d0011 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -263,7 +263,7 @@ def import_output( stopwatch.log_elapsed(lambda elapsed_time: logger.info(f"Copied output for {study_id} in {elapsed_time}s")) fix_study_root(path_output) output_full_name = extract_output_name(path_output, output_name) - extension = f"{ArchiveFormat.ZIP}" if is_zipped else "" + extension = f"{ArchiveFormat.SEVEN_ZIP}" if is_archived else "" path_output = path_output.rename(Path(path_output.parent, output_full_name + extension)) data = self.get(metadata, f"output/{output_full_name}", 1, use_cache=False) diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/files.py b/antarest/study/storage/rawstudy/model/filesystem/config/files.py index 656ddaeca7..ba5865fd3a 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/files.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/files.py @@ -25,11 +25,7 @@ from antarest.core.model import JSON from antarest.core.serialization import from_json -from antarest.core.utils.archives import ( - extract_lines_from_archive, - is_archive_format, - read_file_from_archive, -) +from antarest.core.utils.archives import extract_lines_from_archive, is_archive_format, read_file_from_archive from antarest.study.model import STUDY_VERSION_8_1, STUDY_VERSION_8_6 from antarest.study.storage.rawstudy.ini_reader import IniReader from antarest.study.storage.rawstudy.model.filesystem.config.binding_constraint import ( diff --git a/antarest/study/storage/rawstudy/model/filesystem/config/model.py b/antarest/study/storage/rawstudy/model/filesystem/config/model.py index 73202d8be8..0812aee9ff 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/config/model.py +++ b/antarest/study/storage/rawstudy/model/filesystem/config/model.py @@ -194,7 +194,7 @@ def __init__( def next_file(self, name: str, is_output: bool = False) -> "FileStudyTreeConfig": if is_output and name in self.outputs and self.outputs[name].archived: locations = [self.path / f"{name}.7z", self.path / f"{name}.zip"] - archive_path = next((p for p in locations if p.exists())) + archive_path: t.Optional[Path] = next((p for p in locations if p.exists())) else: archive_path = self.archive_path diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index 9da3394920..9d8093b7d5 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -152,7 +152,7 @@ def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = No if new_suffix_name: suffix_name = new_suffix_name general_info["name"] = suffix_name - if not archived: + if not is_output_archived: ini_writer = IniWriter() ini_writer.write(info_antares_output, path_output / info_filename) else: diff --git a/tests/core/utils/test_extract_zip.py b/tests/core/utils/test_extract_zip.py index 9327bfc21e..5d24e6c772 100644 --- a/tests/core/utils/test_extract_zip.py +++ b/tests/core/utils/test_extract_zip.py @@ -20,6 +20,7 @@ from antarest.core.exceptions import BadArchiveContent from antarest.core.utils.archives import extract_archive + class TestExtractArchive: """ Test the `extract_zip` function. From 44a6c7858a9974dfa02770d51727773c5401e32d Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Fri, 25 Oct 2024 10:52:24 +0200 Subject: [PATCH 12/13] refactor(archive-output): move generic functions to `antarest.core.utils.archives.py` --- antarest/study/service.py | 10 +--- antarest/study/storage/utils.py | 83 +++++++++++++++------------------ 2 files changed, 40 insertions(+), 53 deletions(-) diff --git a/antarest/study/service.py b/antarest/study/service.py index 78a35331c8..7f66ea1900 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -58,7 +58,7 @@ from antarest.core.serialization import to_json from antarest.core.tasks.model import TaskListFilter, TaskResult, TaskStatus, TaskType from antarest.core.tasks.service import ITaskService, TaskUpdateNotifier, noop_notifier -from antarest.core.utils.archives import ArchiveFormat, is_archive_format +from antarest.core.utils.archives import ArchiveFormat, is_archive_format, is_output_archived from antarest.core.utils.fastapi_sqlalchemy import db from antarest.core.utils.utils import StopWatch from antarest.login.model import Group @@ -140,13 +140,7 @@ from antarest.study.storage.storage_service import StudyStorageService from antarest.study.storage.study_download_utils import StudyDownloader, get_output_variables_information from antarest.study.storage.study_upgrader import StudyUpgrader, check_versions_coherence, find_next_version -from antarest.study.storage.utils import ( - assert_permission, - get_start_date, - is_managed, - is_output_archived, - remove_from_cache, -) +from antarest.study.storage.utils import assert_permission, get_start_date, is_managed, remove_from_cache from antarest.study.storage.variantstudy.business.utils import transform_command_to_dto from antarest.study.storage.variantstudy.model.command.generate_thermal_cluster_timeseries import ( GenerateThermalClusterTimeSeries, diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index 9d8093b7d5..71f9d3d11d 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -32,7 +32,6 @@ from antarest.core.model import PermissionInfo, StudyPermissionType from antarest.core.permissions import check_permission from antarest.core.requests import UserHasNotPermissionError -from antarest.core.utils.archives import is_archive_format from antarest.core.utils.utils import StopWatch from antarest.study.model import ( DEFAULT_WORKSPACE_NAME, @@ -118,50 +117,6 @@ def find_single_output_path(all_output_path: Path) -> Path: return all_output_path -def is_output_archived(path_output: Path) -> bool: - # Returns True it the given path is archived or if adding a suffix to the path points to an existing path - suffixes = [".zip"] - return path_output.suffix in suffixes or any(path_output.with_suffix(suffix).exists() for suffix in suffixes) - - -def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = None) -> str: - ini_reader = IniReader() - is_output_archived = path_output.suffix in {".zip", ".7z"} - info_filename = "info.antares-output" - if is_output_archived: - with tempfile.TemporaryDirectory() as temp_dir: - s = StopWatch() - if path_output.suffix == ".zip": - with ZipFile(path_output, mode="r") as zip_obj: - zip_obj.extract(info_filename, temp_dir) - else: - with py7zr.SevenZipFile(path_output, mode="r") as archive: - archive.extract(targets=[info_filename], path=temp_dir) - info_antares_output = ini_reader.read(Path(temp_dir) / info_filename) - s.log_elapsed(lambda x: logger.info(f"'{info_filename}' has been read in {x}s")) - - else: - info_antares_output = ini_reader.read(path_output / info_filename) - - general_info = info_antares_output["general"] - - date = datetime.fromtimestamp(int(general_info["timestamp"])).strftime("%Y%m%d-%H%M") - - mode = "eco" if general_info["mode"] == "Economy" else "adq" - suffix_name = general_info["name"] or "" - if new_suffix_name: - suffix_name = new_suffix_name - general_info["name"] = suffix_name - if not is_output_archived: - ini_writer = IniWriter() - ini_writer.write(info_antares_output, path_output / info_filename) - else: - logger.warning("Could not rewrite the new name inside the output: the output is archived") - - name = f"-{suffix_name}" if suffix_name else "" - return f"{date}{mode}{name}" - - def is_managed(study: Study) -> bool: return not hasattr(study, "workspace") or study.workspace == DEFAULT_WORKSPACE_NAME @@ -395,3 +350,41 @@ def ignore_outputs(directory: str, _: t.Sequence[str]) -> t.Sequence[str]: study.tree.denormalize() duration = "{:.3f}".format(time.time() - stop_time) logger.info(f"Study '{study_dir}' denormalized in {duration}s") + + +def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = None) -> str: + ini_reader = IniReader() + is_output_archived = path_output.suffix in {".zip", ".7z"} + info_filename = "info.antares-output" + if is_output_archived: + with tempfile.TemporaryDirectory() as temp_dir: + s = StopWatch() + if path_output.suffix == ".zip": + with ZipFile(path_output, mode="r") as zip_obj: + zip_obj.extract(info_filename, temp_dir) + else: + with py7zr.SevenZipFile(path_output, mode="r") as archive: + archive.extract(targets=[info_filename], path=temp_dir) + info_antares_output = ini_reader.read(Path(temp_dir) / info_filename) + s.log_elapsed(lambda x: logger.info(f"'{info_filename}' has been read in {x}s")) + + else: + info_antares_output = ini_reader.read(path_output / info_filename) + + general_info = info_antares_output["general"] + + date = datetime.fromtimestamp(int(general_info["timestamp"])).strftime("%Y%m%d-%H%M") + + mode = "eco" if general_info["mode"] == "Economy" else "adq" + suffix_name = general_info["name"] or "" + if new_suffix_name: + suffix_name = new_suffix_name + general_info["name"] = suffix_name + if not is_output_archived: + ini_writer = IniWriter() + ini_writer.write(info_antares_output, path_output / info_filename) + else: + logger.warning("Could not rewrite the new name inside the output: the output is archived") + + name = f"-{suffix_name}" if suffix_name else "" + return f"{date}{mode}{name}" From 3eae66e50c8d54e5168104296f1de8d286a9582a Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Fri, 25 Oct 2024 10:53:43 +0200 Subject: [PATCH 13/13] fix(archive-output): add `.7z` as an archiving format --- antarest/core/utils/archives.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/antarest/core/utils/archives.py b/antarest/core/utils/archives.py index a128cd4438..a5af9a36a6 100644 --- a/antarest/core/utils/archives.py +++ b/antarest/core/utils/archives.py @@ -34,6 +34,12 @@ def is_archive_format(suffix: str) -> bool: return suffix in {ArchiveFormat.ZIP, ArchiveFormat.SEVEN_ZIP} +def is_output_archived(path_output: Path) -> bool: + # Returns True it the given path is archived or if adding a suffix to the path points to an existing path + suffixes = [".zip", ".7z"] + return path_output.suffix in suffixes or any(path_output.with_suffix(suffix).exists() for suffix in suffixes) + + def archive_dir( src_dir_path: Path, target_archive_path: Path,