Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(archive-output): add .7z archiving and unarchiving support for study outputs #2039

Draft
wants to merge 13 commits into
base: dev
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat(archive-output): use ".7z" format to archive outputs
  • Loading branch information
mabw-rte committed Oct 25, 2024
commit 3b607e593ef62db0c16adfd8a5ab436d9f617759
49 changes: 34 additions & 15 deletions antarest/study/storage/abstract_storage_service.py
Original file line number Diff line number Diff line change
@@ -51,6 +51,14 @@
logger = logging.getLogger(__name__)


def find_output_archive(study: T, output_id: str) -> t.Optional[Path]:
if (Path(study.path) / "output" / f"{output_id}.7z").exists():
return Path(study.path) / "output" / f"{output_id}.7z"
elif (Path(study.path) / "output" / f"{output_id}.zip").exists():
return Path(study.path) / "output" / f"{output_id}.zip"
return None


class AbstractStorageService(IStudyStorageService[T], ABC):
def __init__(
self,
@@ -244,17 +252,19 @@ def import_output(
study_id = metadata.id
path_output.mkdir(parents=True)
output_full_name: t.Optional[str]
is_zipped = False
is_archived = False
extension = ""
stopwatch = StopWatch()
try:
if isinstance(output, Path):
if output != path_output and output.suffix != ArchiveFormat.ZIP:
if output != path_output and output.suffix not in {".zip", ".7z"}:
shutil.copytree(output, path_output / "imported")
elif output.suffix == ArchiveFormat.ZIP:
is_zipped = True
elif output.suffix in {".zip", ".7z"}:
is_archived = True
path_output.rmdir()
path_output = Path(str(path_output) + f"{ArchiveFormat.ZIP}")
path_output = Path(str(path_output) + output.suffix)
shutil.copyfile(output, path_output)
extension = ".zip" if output.suffix == ".zip" else ".7z"
else:
extract_archive(output, path_output)

@@ -273,7 +283,7 @@ def import_output(
except Exception as e:
logger.error("Failed to import output", exc_info=e)
shutil.rmtree(path_output, ignore_errors=True)
if is_zipped:
if is_archived:
Path(str(path_output) + f"{ArchiveFormat.ZIP}").unlink(missing_ok=True)
output_full_name = None

@@ -337,11 +347,12 @@ def _read_additional_data_from_files(self, file_study: FileStudy) -> StudyAdditi

def archive_study_output(self, study: T, output_id: str) -> bool:
try:
# use 7zip to compress the output folder
archive_dir(
Path(study.path) / "output" / output_id,
Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}",
Path(study.path) / "output" / f"{output_id}{ArchiveFormat.SEVEN_ZIP}",
remove_source_dir=True,
archive_format=ArchiveFormat.ZIP,
archive_format=ArchiveFormat.SEVEN_ZIP,
)
remove_from_cache(self.cache, study.id)
return True
@@ -352,18 +363,26 @@ def archive_study_output(self, study: T, output_id: str) -> bool:
)
return False

def unarchive_study_output(self, study: T, output_id: str, keep_src_zip: bool) -> bool:
if not (Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}").exists():
def unarchive_study_output(self, study: T, output_id: str, keep_src_archive: bool) -> bool:
archive_path = find_output_archive(study, output_id)
if archive_path is None:
logger.warning(
f"Failed to archive study {study.name} output {output_id}. Maybe it's already unarchived",
)
return False
try:
unzip(
Path(study.path) / "output" / output_id,
Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}",
remove_source_zip=not keep_src_zip,
)
# use 7zip to uncompress the output folder
if archive_path.suffix == ".7z":
with py7zr.SevenZipFile(archive_path, "r") as szf:
szf.extractall(Path(study.path) / "output" / output_id)
if not keep_src_archive:
archive_path.unlink()
else:
unzip(
Path(study.path) / "output" / output_id,
Path(study.path) / "output" / f"{output_id}{ArchiveFormat.ZIP}",
remove_source_zip=not keep_src_archive,
)
remove_from_cache(self.cache, study.id)
return True
except Exception as e:
2 changes: 2 additions & 0 deletions antarest/study/storage/rawstudy/raw_study_service.py
Original file line number Diff line number Diff line change
@@ -304,6 +304,8 @@ def delete_output(self, metadata: RawStudy, output_name: str) -> None:
output_path = study_path / "output" / output_name
if output_path.exists() and output_path.is_dir():
shutil.rmtree(output_path, ignore_errors=True)
elif (output_path.parent / f"{output_name}.7z").exists():
(output_path.parent / f"{output_name}.7z").unlink(missing_ok=True)
else:
output_path = output_path.parent / f"{output_name}.zip"
output_path.unlink(missing_ok=True)
22 changes: 14 additions & 8 deletions antarest/study/storage/utils.py
Original file line number Diff line number Diff line change
@@ -23,6 +23,8 @@
from uuid import uuid4
from zipfile import ZipFile

import py7zr

from antares.study.version import StudyVersion

from antarest.core.exceptions import StudyValidationError, UnsupportedStudyVersion
@@ -79,12 +81,12 @@ def fix_study_root(study_path: Path) -> None:
Args:
study_path: the study initial root path
"""
# TODO: what if it is a zipped output ?
if is_archive_format(study_path.suffix):
# TODO: what if it is a archived output ?
if study_path.suffix in {".zip", ".7z"}:
return None

if not study_path.is_dir():
raise StudyValidationError("Not a directory: '{study_path}'")
raise StudyValidationError(f"Not a directory: '{study_path}'")

root_path = study_path
contents = os.listdir(root_path)
@@ -125,13 +127,17 @@ def is_output_archived(path_output: Path) -> bool:

def extract_output_name(path_output: Path, new_suffix_name: t.Optional[str] = None) -> str:
ini_reader = IniReader()
archived = is_output_archived(path_output)
if archived:
is_output_archived = path_output.suffix in {".zip", ".7z"}
if is_output_archived:
temp_dir = tempfile.TemporaryDirectory()
s = StopWatch()
with ZipFile(path_output, "r") as zip_obj:
zip_obj.extract("info.antares-output", temp_dir.name)
info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output")
if path_output.suffix == ".zip":
with ZipFile(path_output, "r") as zip_obj:
zip_obj.extract("info.antares-output", temp_dir.name)
else:
with py7zr.SevenZipFile(path_output, mode="r") as archive:
archive.extract(targets=["info.antares-output"], path=temp_dir.name)
info_antares_output = ini_reader.read(Path(temp_dir.name) / "info.antares-output")
s.log_elapsed(lambda x: logger.info(f"info.antares_output has been read in {x}s"))
temp_dir.cleanup()