feat(archive-output): use ".7z" format to archive outputs

AntaresSimulatorTeam · mabw-rte · May 28, 2024 · May 28, 2024 · Jun 5, 2024 · Jun 5, 2024
commit 3b607e593ef62db0c16adfd8a5ab436d9f617759
diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py
@@ -51,6 +51,14 @@
 logger = logging.getLogger(__name__)
 
 
+def find_output_archive(study: T, output_id: str) -> t.Optional[Path]:
+    if (Path(study.path) / "output" / f"{output_id}.7z").exists():
+        return Path(study.path) / "output" / f"{output_id}.7z"
+    elif (Path(study.path) / "output" / f"{output_id}.zip").exists():
+        return Path(study.path) / "output" / f"{output_id}.zip"
+    return None
+
+
 class AbstractStorageService(IStudyStorageService[T], ABC):
     def __init__(
         self,
@@ -244,17 +252,19 @@ def import_output(
         study_id = metadata.id
         path_output.mkdir(parents=True)
         output_full_name: t.Optional[str]
-        is_zipped = False
+        is_archived = False
+        extension = ""
         stopwatch = StopWatch()
         try:
             if isinstance(output, Path):
-                if output != path_output and output.suffix != ArchiveFormat.ZIP:
+                if output != path_output and output.suffix not in {".zip", ".7z"}:
                     shutil.copytree(output, path_output / "imported")
-                elif output.suffix == ArchiveFormat.ZIP:
-                    is_zipped = True
+                elif output.suffix in {".zip", ".7z"}:
+                    is_archived = True
                     path_output.rmdir()
-                    path_output = Path(str(path_output) + f"{ArchiveFormat.ZIP}")
+                    path_output = Path(str(path_output) + output.suffix)
                     shutil.copyfile(output, path_output)
+                    extension = ".zip" if output.suffix == ".zip" else ".7z"
             else:
                 extract_archive(output, path_output)
 
@@ -273,7 +283,7 @@ def import_output(
         except Exception as e:
             logger.error("Failed to import output", exc_info=e)
             shutil.rmtree(path_output, ignore_errors=True)
-            if is_zipped:
+            if is_archived:
                 Path(str(path_output) + f"{ArchiveFormat.ZIP}").unlink(missing_ok=True)
             output_full_name = None
 
@@ -337,11 +347,12 @@ def _read_additional_data_from_files(self, file_study: FileStudy) -> StudyAdditi
 
     def archive_study_output(self, study: T, output_id: str) -> bool:
         try:
+            # use 7zip to compress the output folder
             archive_dir(
                 Path(study.path) / "output" / output_id,
-                Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}",
+                Path(study.path) / "output" / f"{output_id}{ArchiveFormat.SEVEN_ZIP}",
                 remove_source_dir=True,
-                archive_format=ArchiveFormat.ZIP,
+                archive_format=ArchiveFormat.SEVEN_ZIP,
             )
             remove_from_cache(self.cache, study.id)
             return True
@@ -352,18 +363,26 @@ def archive_study_output(self, study: T, output_id: str) -> bool:
             )
             return False
 
-    def unarchive_study_output(self, study: T, output_id: str, keep_src_zip: bool) -> bool:
-        if not (Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}").exists():
+    def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> bool:
+        archive_path = find_output_archive(study, output_id)
+        if archive_path is None:
             logger.warning(
                 f"Failed to archive study {study.name} output {output_id}. Maybe it's already unarchived",
             )
             return False
         try:
-            unzip(
-                Path(study.path) / "output" / output_id,
-                Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}",
-                remove_source_zip=not keep_src_zip,
-            )
+            # use 7zip to uncompress the output folder
+            if archive_path.suffix == ".7z":
+                with py7zr.SevenZipFile(archive_path, "r") as szf:
+                    szf.extractall(Path(study.path) / "output" / output_id)
+                if not keep_src_archive:
+                    archive_path.unlink()
+            else:
+                unzip(
+                    Path(study.path) / "output" / output_id,
+                    Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}",
+                    remove_source_zip=not keep_src_archive,
+                )
             remove_from_cache(self.cache, study.id)
             return True
         except Exception as e:

diff --git a/antarest/study/storage/rawstudy/raw_study_service.py b/antarest/study/storage/rawstudy/raw_study_service.py
@@ -304,6 +304,8 @@ def delete_output(self, metadata: RawStudy, output_name: str) -> None:
         output_path = study_path / "output" / output_name
         if output_path.exists() and output_path.is_dir():
             shutil.rmtree(output_path, ignore_errors=True)
+        elif (output_path.parent / f"{output_name}.7z").exists():
+            (output_path.parent / f"{output_name}.7z").unlink(missing_ok=True)
         else:
             output_path = output_path.parent / f"{output_name}.zip"
             output_path.unlink(missing_ok=True)

diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py
@@ -23,6 +23,8 @@
 from uuid import uuid4
 from zipfile import ZipFile
 
+import py7zr
+
 from antares.study.version import StudyVersion
 
 from antarest.core.exceptions import StudyValidationError, UnsupportedStudyVersion
@@ -79,12 +81,12 @@ def fix_study_root(study_path: Path) -> None:
     Args:
         study_path: the study initial root path
     """
-    # TODO: what if it is a zipped output ?
-    if is_archive_format(study_path.suffix):
+    # TODO: what if it is a archived output ?
+    if study_path.suffix in {".zip", ".7z"}:
         return None
 
     if not study_path.is_dir():
-        raise StudyValidationError("Not a directory: '{study_path}'")
+        raise StudyValidationError(f"Not a directory: '{study_path}'")
 
     root_path = study_path
     contents = os.listdir(root_path)
@@ -125,13 +127,17 @@ def is_output_archived(path_output: Path) -> bool:
 
 def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = None) -> str:
     ini_reader = IniReader()
-    archived = is_output_archived(path_output)
-    if archived:
+    is_output_archived = path_output.suffix in {".zip", ".7z"}
+    if is_output_archived:
         temp_dir = tempfile.TemporaryDirectory()
         s = StopWatch()
-        with ZipFile(path_output, "r") as zip_obj:
-            zip_obj.extract("info.antares-output", temp_dir.name)
-            info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output")
+        if path_output.suffix == ".zip":
+            with ZipFile(path_output, "r") as zip_obj:
+                zip_obj.extract("info.antares-output", temp_dir.name)
+        else:
+            with py7zr.SevenZipFile(path_output, mode="r") as archive:
+                archive.extract(targets=["info.antares-output"], path=temp_dir.name)
+        info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output")
         s.log_elapsed(lambda x: logger.info(f"info.antares_output has been read in {x}s"))
         temp_dir.cleanup()