From 0c9b16f123d5df4c4ea1c0b18c8276b540f70493 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Tue, 5 Sep 2023 15:52:44 +0200 Subject: [PATCH 01/39] try getting just stage name, but fall back to str representation of stage; close #197 --- pypiper/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypiper/pipeline.py b/pypiper/pipeline.py index 69474c2..a5546ce 100644 --- a/pypiper/pipeline.py +++ b/pypiper/pipeline.py @@ -314,7 +314,7 @@ def run(self, start_point=None, stop_before=None, stop_after=None): # between results from different stages. skip_mode = False - print("Running stage: {}".format(stage)) + print(f"Running stage: {getattr(stage, "name", str(stage))}") stage.run() self.executed.append(stage) From 0b430c7ab416f35389a85683c56c2258adb31e60 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 4 Oct 2023 15:56:14 -0400 Subject: [PATCH 02/39] version 0.13.3a1 for pipestat 0.6.0a1 --- docs/changelog.md | 4 ++++ pypiper/_version.py | 2 +- pypiper/manager.py | 21 ++++++++++++--------- pypiper/utils.py | 12 ++++++------ requirements/requirements-pypiper.txt | 2 +- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index d7281c6..81a0729 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,9 @@ # Changelog +## [0.13.3a1] -- 2023-10-04 +### Fixed +- refactor for pipestat 0.6.0a1 + ## [0.13.2] -- 2023-08-02 ### Fixed - fixed self.new_start overriding checkpoints. diff --git a/pypiper/_version.py b/pypiper/_version.py index 83ce76f..78a3de9 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.13.2" +__version__ = "0.13.3a1" diff --git a/pypiper/manager.py b/pypiper/manager.py index c3f4cf6..65adad4 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -344,7 +344,7 @@ def _get_arg(args_dict, arg_name): return None if arg_name not in args_dict else args_dict[arg_name] self._pipestat_manager = PipestatManager( - sample_name=self.pipestat_sample_name + record_identifier=self.pipestat_sample_name or _get_arg(args_dict, "pipestat_sample_name") or DEFAULT_SAMPLE_NAME, pipeline_name=self.name, @@ -724,7 +724,8 @@ def start_pipeline(self, args=None, multi=False): self.info("\n----------------------------------------\n") self.status = "running" self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, status_identifier="running" + record_identifier=self._pipestat_manager.sample_name, + status_identifier="running", ) # Record the start in PIPE_profile and PIPE_commands output files so we @@ -770,7 +771,8 @@ def _set_status_flag(self, status): prev_status = self.status self.status = status self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, status_identifier=status + record_identifier=self._pipestat_manager.sample_name, + status_identifier=status, ) self.debug("\nChanged status from {} to {}.".format(prev_status, self.status)) @@ -1419,7 +1421,7 @@ def _wait_for_lock(self, lock_file): ) # self._set_status_flag(WAIT_FLAG) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, + record_identifier=self._pipestat_manager.sample_name, status_identifier="waiting", ) first_message_flag = True @@ -1443,7 +1445,7 @@ def _wait_for_lock(self, lock_file): self.timestamp("File unlocked.") # self._set_status_flag(RUN_FLAG) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, + record_identifier=self._pipestat_manager.sample_name, status_identifier="running", ) @@ -1602,7 +1604,7 @@ def report_result(self, key, value, nolog=False, result_formatter=None): reported_result = self.pipestat.report( values={key: value}, - sample_name=self.pipestat_sample_name, + record_identifier=self.pipestat_sample_name, result_formatter=rf, ) @@ -1684,7 +1686,7 @@ def report_object( val = {key: message_raw.replace("\t", " ")} reported_result = self.pipestat.report( - values=val, sample_name=self.pipestat_sample_name, result_formatter=rf + values=val, record_identifier=self.pipestat_sample_name, result_formatter=rf ) if not nolog: for r in reported_result: @@ -2026,7 +2028,7 @@ def fail_pipeline(self, exc, dynamic_recover=False): self.info("Failure reason: " + str(exc)) # self._set_status_flag(FAIL_FLAG) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, + record_identifier=self._pipestat_manager.sample_name, status_identifier="failed", ) @@ -2087,7 +2089,8 @@ def stop_pipeline(self, status=COMPLETE_FLAG): """ # self._set_status_flag(status) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, status_identifier=status + record_identifier=self._pipestat_manager.sample_name, + status_identifier=status, ) self._cleanup() elapsed_time_this_run = str( diff --git a/pypiper/utils.py b/pypiper/utils.py index 2c5ac75..3d64071 100644 --- a/pypiper/utils.py +++ b/pypiper/utils.py @@ -387,20 +387,20 @@ def split_by_pipes(cmd): cmdlist = [] newcmd = str() for char in cmd: - if char is "{": + if char == "{": stack_brace.append("{") - elif char is "}": + elif char == "}": stack_brace.pop() - elif char is "(": + elif char == "(": stack_paren.append("(") - elif char is ")": + elif char == ")": stack_paren.pop() if len(stack_brace) > 0 or len(stack_paren) > 0: # We are inside a parenthetic of some kind; emit character # no matter what it is newcmd += char - elif char is "|": + elif char == "|": # if it's a pipe, finish the command and start a new one cmdlist.append(newcmd) newcmd = str() @@ -1110,7 +1110,7 @@ def _add_args(parser, args, required): return parser -def result_formatter_markdown(pipeline_name, sample_name, res_id, value) -> str: +def result_formatter_markdown(pipeline_name, record_identifier, res_id, value) -> str: """ Returns Markdown formatted value as string """ diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index 886be3e..04e6661 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -4,4 +4,4 @@ psutil pandas ubiquerg>=0.4.5 yacman -pipestat>=0.4.0 +pipestat>=0.6.0a1 From d9c818d3135e924d8696ab1ea9deeba8a92f67a8 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 7 Nov 2023 22:44:07 +0100 Subject: [PATCH 03/39] updated to pipestat 0.6.0 --- pypiper/manager.py | 47 ++++++++++++++++++++++++++-------------------- pypiper/utils.py | 4 +++- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index c3f4cf6..57af110 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -330,7 +330,6 @@ def __init__( # pipestat setup self.pipestat_sample_name = pipestat_sample_name or DEFAULT_SAMPLE_NAME - # getattr(self, "sample_name", DEFAULT_SAMPLE_NAME) # don't force default pipestat_results_file value unless # pipestat config not provided @@ -344,7 +343,7 @@ def _get_arg(args_dict, arg_name): return None if arg_name not in args_dict else args_dict[arg_name] self._pipestat_manager = PipestatManager( - sample_name=self.pipestat_sample_name + record_identifier=self.pipestat_sample_name or _get_arg(args_dict, "pipestat_sample_name") or DEFAULT_SAMPLE_NAME, pipeline_name=self.name, @@ -437,7 +436,7 @@ def _completed(self): :return bool: Whether the managed pipeline is in a completed state. """ return ( - self.pipestat.get_status(self._pipestat_manager.sample_name) + self.pipestat.get_status(self._pipestat_manager.cfg["record_identifier"]) == COMPLETE_FLAG ) @@ -448,7 +447,10 @@ def _failed(self): :return bool: Whether the managed pipeline is in a failed state. """ - return self.pipestat.get_status(self._pipestat_manager.sample_name) == FAIL_FLAG + return ( + self.pipestat.get_status(self._pipestat_manager.cfg["record_identifier"]) + == FAIL_FLAG + ) @property def halted(self): @@ -457,7 +459,8 @@ def halted(self): :return bool: Whether the managed pipeline is in a paused/halted state. """ return ( - self.pipestat.get_status(self._pipestat_manager.sample_name) == PAUSE_FLAG + self.pipestat.get_status(self._pipestat_manager.cfg["record_identifier"]) + == PAUSE_FLAG ) @property @@ -724,7 +727,8 @@ def start_pipeline(self, args=None, multi=False): self.info("\n----------------------------------------\n") self.status = "running" self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, status_identifier="running" + record_identifier=self._pipestat_manager.cfg["record_identifier"], + status_identifier="running", ) # Record the start in PIPE_profile and PIPE_commands output files so we @@ -770,7 +774,8 @@ def _set_status_flag(self, status): prev_status = self.status self.status = status self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, status_identifier=status + record_identifier=self._pipestat_manager.cfg["record_identifier"], + status_identifier=status, ) self.debug("\nChanged status from {} to {}.".format(prev_status, self.status)) @@ -786,7 +791,7 @@ def _flag_file_path(self, status=None): """ flag_file_name = "{}_{}_{}".format( - self._pipestat_manager["_pipeline_name"], + self._pipestat_manager.cfg["pipeline_name"], self.pipestat_sample_name, flag_name(status or self.status), ) @@ -1419,7 +1424,7 @@ def _wait_for_lock(self, lock_file): ) # self._set_status_flag(WAIT_FLAG) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, + record_identifier=self._pipestat_manager.cfg["record_identifier"], status_identifier="waiting", ) first_message_flag = True @@ -1443,7 +1448,7 @@ def _wait_for_lock(self, lock_file): self.timestamp("File unlocked.") # self._set_status_flag(RUN_FLAG) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, + record_identifier=self._pipestat_manager.cfg["record_identifier"], status_identifier="running", ) @@ -1602,7 +1607,7 @@ def report_result(self, key, value, nolog=False, result_formatter=None): reported_result = self.pipestat.report( values={key: value}, - sample_name=self.pipestat_sample_name, + record_identifier=self.pipestat_sample_name, result_formatter=rf, ) @@ -1684,7 +1689,7 @@ def report_object( val = {key: message_raw.replace("\t", " ")} reported_result = self.pipestat.report( - values=val, sample_name=self.pipestat_sample_name, result_formatter=rf + values=val, record_identifier=self.pipestat_sample_name, result_formatter=rf ) if not nolog: for r in reported_result: @@ -1851,10 +1856,12 @@ def _refresh_stats(self): _, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file") print(data) pipeline_key = list( - data[self.pipestat["_pipeline_name"]][self.pipestat["_pipeline_type"]] + data[self._pipestat_manager.cfg["pipeline_name"]][ + self.pipestat["_pipeline_type"] + ] )[0] if self.name == pipeline_key: - for key, value in data[self.pipestat["_pipeline_name"]][ + for key, value in data[self._pipestat_manager.cfg["pipeline_name"]][ self.pipestat["_pipeline_type"] ][pipeline_key].items(): self.stats_dict[key] = value.strip() @@ -1989,12 +1996,12 @@ def complete(self): """Stop a completely finished pipeline.""" self.stop_pipeline(status=COMPLETE_FLAG) - def fail_pipeline(self, exc, dynamic_recover=False): + def fail_pipeline(self, exc: Exception, dynamic_recover: bool = False): """ If the pipeline does not complete, this function will stop the pipeline gracefully. It sets the status flag to failed and skips the normal success completion procedure. - :param Exception e: Exception to raise. + :param Exception exc: Exception to raise. :param bool dynamic_recover: Whether to recover e.g. for job termination. """ # Take care of any active running subprocess @@ -2024,9 +2031,8 @@ def fail_pipeline(self, exc, dynamic_recover=False): total_time = datetime.timedelta(seconds=self.time_elapsed(self.starttime)) self.info("Total time: " + str(total_time)) self.info("Failure reason: " + str(exc)) - # self._set_status_flag(FAIL_FLAG) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, + record_identifier=self._pipestat_manager.cfg["record_identifier"], status_identifier="failed", ) @@ -2087,7 +2093,8 @@ def stop_pipeline(self, status=COMPLETE_FLAG): """ # self._set_status_flag(status) self.pipestat.set_status( - sample_name=self._pipestat_manager.sample_name, status_identifier=status + record_identifier=self._pipestat_manager.cfg["record_identifier"], + status_identifier=status, ) self._cleanup() elapsed_time_this_run = str( @@ -2457,7 +2464,7 @@ def _cleanup(self, dry_run=False): for fn in glob.glob(self.outfolder + flag_name("*")) if COMPLETE_FLAG not in os.path.basename(fn) and not "{}_{}_{}".format( - self._pipestat_manager["_pipeline_name"], + self._pipestat_manager.cfg["pipeline_name"], self.pipestat_sample_name, run_flag, ) diff --git a/pypiper/utils.py b/pypiper/utils.py index 2c5ac75..54677c8 100644 --- a/pypiper/utils.py +++ b/pypiper/utils.py @@ -1110,9 +1110,11 @@ def _add_args(parser, args, required): return parser -def result_formatter_markdown(pipeline_name, sample_name, res_id, value) -> str: +def result_formatter_markdown(pipeline_name, record_identifier, res_id, value) -> str: """ Returns Markdown formatted value as string + + # Pipeline_name and record_identifier should be kept because pipestat needs it """ message_markdown = "\n> `{key}`\t{value}\t_RES_".format(key=res_id, value=value) From e00e8d8d930f22cc3b0312ed0f066db6027ebb88 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 7 Nov 2023 22:49:45 +0100 Subject: [PATCH 04/39] updated requirements --- requirements/requirements-pypiper.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index 04e6661..0b74f55 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -1,7 +1,6 @@ -attmap>=0.12.5 logmuse>=0.2.4 psutil pandas ubiquerg>=0.4.5 yacman -pipestat>=0.6.0a1 +pipestat>=0.6.0a4 From 39b6e9931f19cc46ecf95c9432a58ea5bc6c37b3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 7 Nov 2023 22:56:38 +0100 Subject: [PATCH 05/39] testing, drop python 3.7 --- .github/workflows/run-pytest.yml | 2 +- pypiper/manager.py | 2 -- setup.py | 5 ++++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 6bf573b..cc3d35d 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.10"] os: [ubuntu-latest] steps: diff --git a/pypiper/manager.py b/pypiper/manager.py index fa73352..57af110 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -728,7 +728,6 @@ def start_pipeline(self, args=None, multi=False): self.status = "running" self.pipestat.set_status( record_identifier=self._pipestat_manager.cfg["record_identifier"], - status_identifier="running", ) @@ -1426,7 +1425,6 @@ def _wait_for_lock(self, lock_file): # self._set_status_flag(WAIT_FLAG) self.pipestat.set_status( record_identifier=self._pipestat_manager.cfg["record_identifier"], - status_identifier="waiting", ) first_message_flag = True diff --git a/setup.py b/setup.py index d485071..06a9d08 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,10 @@ def read_reqs_file(reqs_name): classifiers=[ "Development Status :: 4 - Beta", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Bio-Informatics", ], author="Nathan Sheffield, Johanna Klughammer, Andre Rendeiro", From 1c2c844dacf29c1d693af20e57f20e9cc1b60d4d Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 8 Nov 2023 14:34:47 -0500 Subject: [PATCH 06/39] fix f-string quote issue for python 3.10 --- pypiper/pipeline.py | 2 +- requirements/requirements-pypiper.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pypiper/pipeline.py b/pypiper/pipeline.py index 652d6bf..95d31f2 100644 --- a/pypiper/pipeline.py +++ b/pypiper/pipeline.py @@ -330,7 +330,7 @@ def run(self, start_point=None, stop_before=None, stop_after=None): # between results from different stages. skip_mode = False - print(f"Running stage: {getattr(stage, "name", str(stage))}") + print(f"Running stage: {getattr(stage, 'name', str(stage))}") stage.run() self.executed.append(stage) diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index 0b74f55..dd3bca5 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -3,4 +3,4 @@ psutil pandas ubiquerg>=0.4.5 yacman -pipestat>=0.6.0a4 +pipestat>=0.6.0a5 From 5b4668291cea761f5ca1de3750207291b21b5d6b Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:48:31 -0500 Subject: [PATCH 07/39] minor refactor to use pipestat properties instead of cfg dict --- pypiper/manager.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 57af110..660ed85 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -329,7 +329,7 @@ def __init__( signal.signal(signal.SIGTERM, self._signal_term_handler) # pipestat setup - self.pipestat_sample_name = pipestat_sample_name or DEFAULT_SAMPLE_NAME + self.pipestat_record_identifier = pipestat_sample_name or DEFAULT_SAMPLE_NAME # don't force default pipestat_results_file value unless # pipestat config not provided @@ -343,7 +343,7 @@ def _get_arg(args_dict, arg_name): return None if arg_name not in args_dict else args_dict[arg_name] self._pipestat_manager = PipestatManager( - record_identifier=self.pipestat_sample_name + record_identifier=self.pipestat_record_identifier or _get_arg(args_dict, "pipestat_sample_name") or DEFAULT_SAMPLE_NAME, pipeline_name=self.name, @@ -436,7 +436,7 @@ def _completed(self): :return bool: Whether the managed pipeline is in a completed state. """ return ( - self.pipestat.get_status(self._pipestat_manager.cfg["record_identifier"]) + self.pipestat.get_status(self._pipestat_manager.record_identifier) == COMPLETE_FLAG ) @@ -448,7 +448,7 @@ def _failed(self): :return bool: Whether the managed pipeline is in a failed state. """ return ( - self.pipestat.get_status(self._pipestat_manager.cfg["record_identifier"]) + self.pipestat.get_status(self._pipestat_manager.record_identifier) == FAIL_FLAG ) @@ -459,7 +459,7 @@ def halted(self): :return bool: Whether the managed pipeline is in a paused/halted state. """ return ( - self.pipestat.get_status(self._pipestat_manager.cfg["record_identifier"]) + self.pipestat.get_status(self._pipestat_manager.record_identifier) == PAUSE_FLAG ) @@ -723,11 +723,11 @@ def start_pipeline(self, args=None, multi=False): results = self._pipestat_manager.__str__().split("\n") for i in results: self.info("* " + i) - self.info("* Sample name: " + self.pipestat_sample_name + "\n") + self.info("* Sample name: " + self.pipestat_record_identifier + "\n") self.info("\n----------------------------------------\n") self.status = "running" self.pipestat.set_status( - record_identifier=self._pipestat_manager.cfg["record_identifier"], + record_identifier=self._pipestat_manager.record_identifier, status_identifier="running", ) @@ -774,7 +774,7 @@ def _set_status_flag(self, status): prev_status = self.status self.status = status self.pipestat.set_status( - record_identifier=self._pipestat_manager.cfg["record_identifier"], + record_identifier=self._pipestat_manager.record_identifier, status_identifier=status, ) self.debug("\nChanged status from {} to {}.".format(prev_status, self.status)) @@ -791,8 +791,8 @@ def _flag_file_path(self, status=None): """ flag_file_name = "{}_{}_{}".format( - self._pipestat_manager.cfg["pipeline_name"], - self.pipestat_sample_name, + self._pipestat_manager.pipeline_name, + self.pipestat_record_identifier, flag_name(status or self.status), ) return pipeline_filepath(self, filename=flag_file_name) @@ -1424,7 +1424,7 @@ def _wait_for_lock(self, lock_file): ) # self._set_status_flag(WAIT_FLAG) self.pipestat.set_status( - record_identifier=self._pipestat_manager.cfg["record_identifier"], + record_identifier=self._pipestat_manager.record_identifier, status_identifier="waiting", ) first_message_flag = True @@ -1448,7 +1448,7 @@ def _wait_for_lock(self, lock_file): self.timestamp("File unlocked.") # self._set_status_flag(RUN_FLAG) self.pipestat.set_status( - record_identifier=self._pipestat_manager.cfg["record_identifier"], + record_identifier=self._pipestat_manager.record_identifier, status_identifier="running", ) @@ -1607,7 +1607,7 @@ def report_result(self, key, value, nolog=False, result_formatter=None): reported_result = self.pipestat.report( values={key: value}, - record_identifier=self.pipestat_sample_name, + record_identifier=self.pipestat_record_identifier, result_formatter=rf, ) @@ -1689,7 +1689,9 @@ def report_object( val = {key: message_raw.replace("\t", " ")} reported_result = self.pipestat.report( - values=val, record_identifier=self.pipestat_sample_name, result_formatter=rf + values=val, + record_identifier=self.pipestat_record_identifier, + result_formatter=rf, ) if not nolog: for r in reported_result: @@ -1856,12 +1858,12 @@ def _refresh_stats(self): _, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file") print(data) pipeline_key = list( - data[self._pipestat_manager.cfg["pipeline_name"]][ + data[self._pipestat_manager.pipeline_name][ self.pipestat["_pipeline_type"] ] )[0] if self.name == pipeline_key: - for key, value in data[self._pipestat_manager.cfg["pipeline_name"]][ + for key, value in data[self._pipestat_manager.pipeline_name][ self.pipestat["_pipeline_type"] ][pipeline_key].items(): self.stats_dict[key] = value.strip() @@ -2032,7 +2034,7 @@ def fail_pipeline(self, exc: Exception, dynamic_recover: bool = False): self.info("Total time: " + str(total_time)) self.info("Failure reason: " + str(exc)) self.pipestat.set_status( - record_identifier=self._pipestat_manager.cfg["record_identifier"], + record_identifier=self._pipestat_manager.record_identifier, status_identifier="failed", ) @@ -2093,7 +2095,7 @@ def stop_pipeline(self, status=COMPLETE_FLAG): """ # self._set_status_flag(status) self.pipestat.set_status( - record_identifier=self._pipestat_manager.cfg["record_identifier"], + record_identifier=self._pipestat_manager.record_identifier, status_identifier=status, ) self._cleanup() @@ -2464,8 +2466,8 @@ def _cleanup(self, dry_run=False): for fn in glob.glob(self.outfolder + flag_name("*")) if COMPLETE_FLAG not in os.path.basename(fn) and not "{}_{}_{}".format( - self._pipestat_manager.cfg["pipeline_name"], - self.pipestat_sample_name, + self._pipestat_manager.pipeline_name, + self.pipestat_record_identifier, run_flag, ) == os.path.basename(fn) From d360ba1425d6dfd2cc37232835517bbbb72c138d Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:54:22 -0500 Subject: [PATCH 08/39] update changelog and version number --- docs/changelog.md | 6 ++++++ pypiper/_version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 81a0729..472efbe 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,11 @@ # Changelog +## [0.13.3a2] -- 2023-11-xx +### Changed +- minor refactor for pipestat 0.6.0 release +- drop python 2.7 +- updated requirements + ## [0.13.3a1] -- 2023-10-04 ### Fixed - refactor for pipestat 0.6.0a1 diff --git a/pypiper/_version.py b/pypiper/_version.py index 78a3de9..7c16d15 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.13.3a1" +__version__ = "0.13.3a2" From 4ea21c2bb99e66e0b85a101af31a900ab0162f1c Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 14 Nov 2023 08:46:02 -0500 Subject: [PATCH 09/39] update v0.13.3 and changelog --- docs/changelog.md | 10 ++++------ pypiper/_version.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 472efbe..d0fe4f9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,14 +1,12 @@ # Changelog -## [0.13.3a2] -- 2023-11-xx +## [0.13.3] -- 2023-11-xx ### Changed -- minor refactor for pipestat 0.6.0 release +- refactor for pipestat v0.6.0 release - drop python 2.7 - updated requirements - -## [0.13.3a1] -- 2023-10-04 -### Fixed -- refactor for pipestat 0.6.0a1 +- ### Fixed +- fixed #196 and #197 ## [0.13.2] -- 2023-08-02 ### Fixed diff --git a/pypiper/_version.py b/pypiper/_version.py index 7c16d15..26c36ca 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.13.3a2" +__version__ = "0.13.3" From ed6b7fb43140526d831ce976d68f6c554efaf166 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Tue, 14 Nov 2023 16:06:06 -0500 Subject: [PATCH 10/39] fix _refresh_stats bug and change version to 0.14.0 --- docs/changelog.md | 2 +- pypiper/_version.py | 2 +- pypiper/manager.py | 15 ++++++++------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index d0fe4f9..fb4b8c7 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,6 @@ # Changelog -## [0.13.3] -- 2023-11-xx +## [0.14.0] -- 2023-11-xx ### Changed - refactor for pipestat v0.6.0 release - drop python 2.7 diff --git a/pypiper/_version.py b/pypiper/_version.py index 26c36ca..9e78220 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.13.3" +__version__ = "0.14.0" diff --git a/pypiper/manager.py b/pypiper/manager.py index 660ed85..d3c058d 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -1856,17 +1856,18 @@ def _refresh_stats(self): if os.path.isfile(self.pipeline_stats_file): _, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file") - print(data) - pipeline_key = list( + record_identifier = list( data[self._pipestat_manager.pipeline_name][ - self.pipestat["_pipeline_type"] + self._pipestat_manager.pipeline_type ] )[0] - if self.name == pipeline_key: + + # Confirm that the loaded stats file is the same namespace as the pipeline manager + if record_identifier == self._pipestat_manager.record_identifier: for key, value in data[self._pipestat_manager.pipeline_name][ - self.pipestat["_pipeline_type"] - ][pipeline_key].items(): - self.stats_dict[key] = value.strip() + self._pipestat_manager.pipeline_type + ][record_identifier].items(): + self.stats_dict[key] = value def get_stat(self, key): """ From b0c5f8d5a06c039e2b9df7fc5a60407dd2a4ddfe Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 15 Nov 2023 15:14:52 -0500 Subject: [PATCH 11/39] potential fix for #201 --- pypiper/manager.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index d3c058d..c87ec9c 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -1587,7 +1587,9 @@ def _report_profile( with open(self.pipeline_profile_file, "a") as myfile: myfile.write(message_raw + "\n") - def report_result(self, key, value, nolog=False, result_formatter=None): + def report_result( + self, key, value, nolog=False, result_formatter=None, force_overwrite=False + ): """ Writes a key:value pair to self.pipeline_stats_file. @@ -1597,6 +1599,7 @@ def report_result(self, key, value, nolog=False, result_formatter=None): logfile. Use sparingly in case you will be printing the result in a different format. :param str result_formatter: function for formatting via pipestat backend + :param bool force_overwrite: overwrite results if they already exist? :return str reported_result: the reported result is returned as a list of formatted strings. """ @@ -1609,11 +1612,17 @@ def report_result(self, key, value, nolog=False, result_formatter=None): values={key: value}, record_identifier=self.pipestat_record_identifier, result_formatter=rf, + force_overwrite=force_overwrite, ) if not nolog: - for r in reported_result: - self.info(r) + if isinstance( + reported_result, bool + ): # Pipestat can return False if results are NOT reported. + self.info("Result successfully reported? " + str(reported_result)) + else: + for r in reported_result: + self.info(r) return reported_result @@ -1626,6 +1635,7 @@ def report_object( annotation=None, nolog=False, result_formatter=None, + force_overwrite=False, ): """ Writes a key:value pair to self.pipeline_stats_file. Note: this function @@ -1646,6 +1656,7 @@ def report_object( logfile. Use sparingly in case you will be printing the result in a different format. :param str result_formatter: function for formatting via pipestat backend + :param bool force_overwrite: overwrite results if they already exist? :return str reported_result: the reported result is returned as a list of formatted strings. """ warnings.warn( @@ -1692,11 +1703,17 @@ def report_object( values=val, record_identifier=self.pipestat_record_identifier, result_formatter=rf, + force_overwrite=force_overwrite, ) + if not nolog: - for r in reported_result: - self.info(r) - return reported_result + if isinstance( + reported_result, bool + ): # Pipestat can return False if results are NOT reported. + self.info("Result successfully reported? " + str(reported_result)) + else: + for r in reported_result: + self.info(r) def _safe_write_to_file(self, file, message): """ From d7b9c8c70d6e87805b0257d5cf510dba4e2c153a Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 15 Nov 2023 15:17:13 -0500 Subject: [PATCH 12/39] changelog --- docs/changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index fb4b8c7..2c21384 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,6 +7,8 @@ - updated requirements - ### Fixed - fixed #196 and #197 +- ### Added +- added `force_overwrite` to `report_result` and `report_object` ## [0.13.2] -- 2023-08-02 ### Fixed From f92d034b5d5ee4c01f4d7e7077d883a79d93b30e Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 15 Nov 2023 18:12:54 -0500 Subject: [PATCH 13/39] v0.14.0a1 prerelease --- pypiper/_version.py | 2 +- requirements/requirements-pypiper.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pypiper/_version.py b/pypiper/_version.py index 9e78220..679c8ec 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.14.0" +__version__ = "0.14.0a1" diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index dd3bca5..17bf456 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -3,4 +3,4 @@ psutil pandas ubiquerg>=0.4.5 yacman -pipestat>=0.6.0a5 +pipestat>=0.6.0a7 From 1a677dad34ffe77dd14cff1034d02e9fde09c117 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 20 Nov 2023 14:41:28 -0500 Subject: [PATCH 14/39] report_object -> change message_raw to be a values dict to conform with pipestat output schemas --- pypiper/manager.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index c87ec9c..d9ae454 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -1675,29 +1675,14 @@ def report_object( anchor_text = str(key).strip() # better to use a relative path in this file # convert any absolute paths into relative paths - relative_filename = ( - os.path.relpath(filename, self.outfolder) - if os.path.isabs(filename) - else filename - ) - - if anchor_image: - relative_anchor_image = ( - os.path.relpath(anchor_image, self.outfolder) - if os.path.isabs(anchor_image) - else anchor_image - ) - else: - relative_anchor_image = "None" - message_raw = "{filename}\t{anchor_text}\t{anchor_image}\t{annotation}".format( - filename=relative_filename, - anchor_text=anchor_text, - anchor_image=relative_anchor_image, - annotation=annotation, - ) - - val = {key: message_raw.replace("\t", " ")} + values = { + "path": filename, + "thumbnail_path": anchor_image, + "title": anchor_text, + "annotation": annotation, + } + val = {key: values} reported_result = self.pipestat.report( values=val, From ed95993f5031dc4e25f8fb162b7184e26786177a Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 20 Nov 2023 17:26:41 -0500 Subject: [PATCH 15/39] self.pipestat_results_file should take priority over self.pipeline_stats_file related to https://github.com/databio/pepatac/issues/257 --- pypiper/manager.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index d9ae454..903badc 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -338,6 +338,9 @@ def __init__( self, filename="pipestat_results.yaml" ) + if pipestat_results_file: + self.pipestat_results_file = pipestat_results_file + def _get_arg(args_dict, arg_name): """safely get argument from arg dict -- return None if doesn't exist""" return None if arg_name not in args_dict else args_dict[arg_name] @@ -350,8 +353,9 @@ def _get_arg(args_dict, arg_name): schema_path=pipestat_schema or _get_arg(args_dict, "pipestat_schema") or default_pipestat_output_schema(sys.argv[0]), - results_file_path=self.pipeline_stats_file - or _get_arg(args_dict, "pipestat_results_file"), + results_file_path=self.pipestat_results_file + or _get_arg(args_dict, "pipestat_results_file") + or self.pipeline_stats_file, config_file=pipestat_config or _get_arg(args_dict, "pipestat_config"), multi_pipelines=multi, ) From cc8407089f8df7aac666ed68727b69d7bd369bba Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:22:49 -0500 Subject: [PATCH 16/39] make pipestat_results_file = pipeline_stats_file if it is not provided --- pypiper/manager.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 903badc..5b6bb3c 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -334,11 +334,8 @@ def __init__( # don't force default pipestat_results_file value unless # pipestat config not provided if pipestat_config is None and pipestat_results_file is None: - pipestat_results_file = pipeline_filepath( - self, filename="pipestat_results.yaml" - ) - - if pipestat_results_file: + self.pipestat_results_file = self.pipeline_stats_file + elif pipestat_results_file: self.pipestat_results_file = pipestat_results_file def _get_arg(args_dict, arg_name): From 2de4e842d6ba555cc1f18473403c04085dc586b2 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 27 Nov 2023 13:06:49 -0500 Subject: [PATCH 17/39] set pipeline_stats_file if pipestat_results_file IS provided, remove checking for the first record_identifier during get_stat --- pypiper/manager.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 5b6bb3c..1a4b1ab 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -337,6 +337,7 @@ def __init__( self.pipestat_results_file = self.pipeline_stats_file elif pipestat_results_file: self.pipestat_results_file = pipestat_results_file + self.pipeline_stats_file = self.pipestat_results_file def _get_arg(args_dict, arg_name): """safely get argument from arg dict -- return None if doesn't exist""" @@ -1859,18 +1860,11 @@ def _refresh_stats(self): if os.path.isfile(self.pipeline_stats_file): _, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file") - record_identifier = list( - data[self._pipestat_manager.pipeline_name][ - self._pipestat_manager.pipeline_type - ] - )[0] - - # Confirm that the loaded stats file is the same namespace as the pipeline manager - if record_identifier == self._pipestat_manager.record_identifier: - for key, value in data[self._pipestat_manager.pipeline_name][ - self._pipestat_manager.pipeline_type - ][record_identifier].items(): - self.stats_dict[key] = value + + for key, value in data[self._pipestat_manager.pipeline_name][ + self._pipestat_manager.pipeline_type + ][self._pipestat_manager.record_identifier].items(): + self.stats_dict[key] = value def get_stat(self, key): """ From d5446448656d6f52b637c4f4cb739b5b7a385777 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:38:25 -0500 Subject: [PATCH 18/39] add pipestat_pipeline_type, defaulting to sample --- pypiper/manager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pypiper/manager.py b/pypiper/manager.py index 1a4b1ab..67441d6 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -141,6 +141,7 @@ def __init__( pipestat_schema=None, pipestat_results_file=None, pipestat_config=None, + pipestat_pipeline_type=None, pipestat_result_formatter=None, **kwargs, ): @@ -330,6 +331,7 @@ def __init__( # pipestat setup self.pipestat_record_identifier = pipestat_sample_name or DEFAULT_SAMPLE_NAME + self.pipestat_pipeline_type = pipestat_pipeline_type or "sample" # don't force default pipestat_results_file value unless # pipestat config not provided @@ -356,6 +358,7 @@ def _get_arg(args_dict, arg_name): or self.pipeline_stats_file, config_file=pipestat_config or _get_arg(args_dict, "pipestat_config"), multi_pipelines=multi, + pipeline_type=self.pipestat_pipeline_type, ) self.start_pipeline(args, multi) From 649c985ed4d74034aaba2056307faf016037d1fc Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 14 Dec 2023 10:56:15 -0500 Subject: [PATCH 19/39] pipestat req version bump, v0.14.0a2 bump for pre-release --- pypiper/_version.py | 2 +- requirements/requirements-docs.txt | 2 +- requirements/requirements-pypiper.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pypiper/_version.py b/pypiper/_version.py index 679c8ec..04ffdb1 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.14.0a1" +__version__ = "0.14.0a2" diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt index 4471914..45b990b 100644 --- a/requirements/requirements-docs.txt +++ b/requirements/requirements-docs.txt @@ -2,5 +2,5 @@ mkdocs>=1.0 markdown-include pydoc-markdown piper -pipestat>=0.4.0 +pipestat>=0.6.0a9 https://github.com/databio/mkdocs-databio/archive/master.zip \ No newline at end of file diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index 17bf456..2b9fe4b 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -3,4 +3,4 @@ psutil pandas ubiquerg>=0.4.5 yacman -pipestat>=0.6.0a7 +pipestat>=0.6.0a9 From 9349aa8279433b1acac144318d9593f2cdbeb737 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 22 Dec 2023 11:37:37 -0500 Subject: [PATCH 20/39] v0.14.0 release prep --- docs/changelog.md | 4 +++- pypiper/_version.py | 2 +- requirements/requirements-docs.txt | 2 +- requirements/requirements-pypiper.txt | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 2c21384..edbdfef 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,14 +1,16 @@ # Changelog -## [0.14.0] -- 2023-11-xx +## [0.14.0] -- 2023-12-22 ### Changed - refactor for pipestat v0.6.0 release - drop python 2.7 - updated requirements +- changed message_raw to be a value_dict when reporting to conform to pipestat - ### Fixed - fixed #196 and #197 - ### Added - added `force_overwrite` to `report_result` and `report_object` +- added pipestat_pipeline_type, defaulting to sample-level ## [0.13.2] -- 2023-08-02 ### Fixed diff --git a/pypiper/_version.py b/pypiper/_version.py index 04ffdb1..9e78220 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.14.0a2" +__version__ = "0.14.0" diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt index 45b990b..ef5b4e6 100644 --- a/requirements/requirements-docs.txt +++ b/requirements/requirements-docs.txt @@ -2,5 +2,5 @@ mkdocs>=1.0 markdown-include pydoc-markdown piper -pipestat>=0.6.0a9 +pipestat>=0.6.0 https://github.com/databio/mkdocs-databio/archive/master.zip \ No newline at end of file diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index 2b9fe4b..9a35f34 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -3,4 +3,4 @@ psutil pandas ubiquerg>=0.4.5 yacman -pipestat>=0.6.0a9 +pipestat>=0.6.0 From b7d3f6bc8cb1b88a9d14a6af5ef1bab9d228ae1d Mon Sep 17 00:00:00 2001 From: nsheff Date: Tue, 16 Jan 2024 21:11:35 -0500 Subject: [PATCH 21/39] https updates --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 046003d..7e6db31 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Documentation Status](https://readthedocs.org/projects/pypiper/badge/?version=latest)](http://pypiper.readthedocs.org/en/latest/?badge=latest) [![Build Status](https://github.com/databio/pypiper/actions/workflows/run-pytest.yml/badge.svg)](https://github.com/databio/pypiper/actions/workflows/run-pytest.yml) -[![PEP compatible](http://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) +[![PEP compatible](https://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) [![pypi-badge](https://img.shields.io/pypi/v/piper)](https://pypi.org/project/piper) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) From 87f1074bc781d94eef45758c5f7cbab6d48d7a88 Mon Sep 17 00:00:00 2001 From: nsheff Date: Sat, 17 Feb 2024 14:42:07 -0500 Subject: [PATCH 22/39] log outfile directly, and clean up logging code. Fix #210 --- .gitignore | 3 ++ pypiper/manager.py | 95 ++++++++++++---------------------------------- 2 files changed, 27 insertions(+), 71 deletions(-) diff --git a/.gitignore b/.gitignore index bc05997..c1e53a2 100644 --- a/.gitignore +++ b/.gitignore @@ -86,3 +86,6 @@ piper.egg-info/ *ipynb_checkpoints* *.egg-info* + + +example_pipelines/pipeline_output diff --git a/pypiper/manager.py b/pypiper/manager.py index 67441d6..d18eabc 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -631,88 +631,41 @@ def start_pipeline(self, args=None, multi=False): # Print out a header section in the pipeline log: # Wrap things in backticks to prevent markdown from interpreting underscores as emphasis. # print("----------------------------------------") - self.info("### Pipeline run code and environment:\n") - self.info( - "* " + "Command".rjust(20) + ": " + "`" + str(" ".join(sys.argv)) + "`" - ) - self.info("* " + "Compute host".rjust(20) + ": " + platform.node()) - self.info("* " + "Working dir".rjust(20) + ": " + os.getcwd()) - self.info("* " + "Outfolder".rjust(20) + ": " + self.outfolder) + def logfmt(key, value=None, padding=16): + padded_key = key.rjust(padding) + formatted_val = f"`{value}`" if value else "" + return f"* {padded_key}: {formatted_val}" - self.timestamp("* " + "Pipeline started at".rjust(20) + ": ") + self.info("### Pipeline run code and environment:\n") + self.info(logfmt("Command", str(" ".join(sys.argv)))) + self.info(logfmt("Compute host", platform.node())) + self.info(logfmt("Working dir", os.getcwd())) + self.info(logfmt("Outfolder", self.outfolder)) + self.info(logfmt("Log file", self.pipeline_log_file)) + self.timestamp(logfmt("Start time")) self.info("\n### Version log:\n") - self.info("* " + "Python version".rjust(20) + ": " + platform.python_version()) + self.info(logfmt("Python version", platform.python_version())) try: - self.info( - "* " - + "Pypiper dir".rjust(20) - + ": " - + "`" - + gitvars["pypiper_dir"].strip() - + "`" - ) - self.info("* " + "Pypiper version".rjust(20) + ": " + __version__) - self.info( - "* " + "Pypiper hash".rjust(20) + ": " + str(gitvars["pypiper_hash"]) - ) - self.info( - "* " - + "Pypiper branch".rjust(20) - + ": " - + str(gitvars["pypiper_branch"]) - ) - self.info( - "* " + "Pypiper date".rjust(20) + ": " + str(gitvars["pypiper_date"]) - ) + self.info(logfmt("Pypiper dir", gitvars["pypiper_dir"].strip())) + self.info(logfmt("Pypiper version", __version__)) + self.info(logfmt("Pypiper hash", gitvars["pypiper_hash"])) + self.info(logfmt("Pypiper branch", gitvars["pypiper_branch"])) + self.info(logfmt("Pypiper date", gitvars["pypiper_date"])) if gitvars["pypiper_diff"]: - self.info( - "* " - + "Pypiper diff".rjust(20) - + ": " - + str(gitvars["pypiper_diff"]) - ) + self.info(logfmt("Pypiper diff", gitvars["pypiper_diff"])) except KeyError: # It is ok if keys aren't set, it means pypiper isn't in a git repo. pass try: - self.info( - "* " - + "Pipeline dir".rjust(20) - + ": " - + "`" - + gitvars["pipe_dir"].strip() - + "`" - ) - self.info( - "* " + "Pipeline version".rjust(20) + ": " + str(self.pl_version) - ) - self.info( - "* " - + "Pipeline hash".rjust(20) - + ": " - + str(gitvars["pipe_hash"]).strip() - ) - self.info( - "* " - + "Pipeline branch".rjust(20) - + ": " - + str(gitvars["pipe_branch"]).strip() - ) - self.info( - "* " - + "Pipeline date".rjust(20) - + ": " - + str(gitvars["pipe_date"]).strip() - ) + self.info(logfmt("Pipeline dir", gitvars["pipe_dir"].strip())) + self.info(logfmt("Pipeline version", self.pl_version)) + self.info(logfmt("Pipeline hash", gitvars["pipe_hash"]).strip()) + self.info(logfmt("Pipeline branch", gitvars["pipe_branch"]).strip()) + self.info(logfmt("Pipeline date", gitvars["pipe_date"]).strip()) if gitvars["pipe_diff"] != "": - self.info( - "* " - + "Pipeline diff".rjust(20) - + ": " - + str(gitvars["pipe_diff"]).strip() - ) + self.info(logfmt("Pipeline diff", gitvars["pipe_diff"]).strip()) except KeyError: # It is ok if keys aren't set, it means the pipeline isn't a git repo. pass From 65ed31d91b5f8f6977361cb04e5e27372648bd0f Mon Sep 17 00:00:00 2001 From: nsheff Date: Sat, 17 Feb 2024 14:48:07 -0500 Subject: [PATCH 23/39] use f-strings in example pipelines --- example_pipelines/basic.py | 4 ++-- example_pipelines/hello_pypiper.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/example_pipelines/basic.py b/example_pipelines/basic.py index 34a0d37..c7497b2 100755 --- a/example_pipelines/basic.py +++ b/example_pipelines/basic.py @@ -26,7 +26,7 @@ tgt = "pipeline_output/test.out" # build the command -cmd = "shuf -i 1-500000000 -n 10000000 > " + tgt +cmd = f"shuf -i 1-500000000 -n 10000000 > {tgt}" # and run with run(). pm.run(cmd, target=tgt) @@ -34,7 +34,7 @@ # Now copy the data into a new file. # first specify target file and build command: tgt = "pipeline_output/copied.out" -cmd = "cp pipeline_output/test.out " + tgt +cmd = f"cp pipeline_output/test.out {tgt}" pm.run(cmd, target=tgt) # You can also string multiple commands together, which will execute diff --git a/example_pipelines/hello_pypiper.py b/example_pipelines/hello_pypiper.py index 88abecf..17d424a 100755 --- a/example_pipelines/hello_pypiper.py +++ b/example_pipelines/hello_pypiper.py @@ -12,7 +12,7 @@ # Now build a command-line command however you like, and pass it to pm.run() target_file = "hello_pypiper_results/output.txt" -cmd = "echo 'Hello, Pypiper!' > " + target_file +cmd = f"echo 'Hello, Pypiper!' > {target_file}" pm.run(cmd, target_file) pm.stop_pipeline() From bddeddcb220dc0ab8515945b6e514dd6ccd77d0a Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 14:01:41 +0100 Subject: [PATCH 24/39] simplify syntax --- pypiper/manager.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 67441d6..7e9dc2d 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -193,10 +193,7 @@ def __init__( # If no starting point was specified, assume that the pipeline's # execution is to begin right away and set the internal flag so that # run() is let loose to execute instructions given. - if not self.start_point: - self._active = True - else: - self._active = False + self._active = not self.start_point # Pipeline-level variables to track global state and pipeline stats # Pipeline settings From 005869e2a2896b99fa573d726d0c2594fdd04141 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 14:03:44 +0100 Subject: [PATCH 25/39] separate what can and can't cause exception --- pypiper/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 7e9dc2d..7a73f0c 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -223,10 +223,11 @@ def __init__( logger_kwargs.setdefault("name", default_logname) try: self._logger = logmuse.logger_via_cli(args) - self.debug("Logger set with logmuse.logger_via_cli") except logmuse.est.AbsentOptionException: self._logger = logmuse.init_logger("pypiper", level="DEBUG") self.debug("logger_via_cli failed; Logger set with logmuse.init_logger") + else: + self.debug("Logger set with logmuse.logger_via_cli") # Keep track of an ID for the number of processes attempted self.proc_count = 0 From d8504f6bf972c1d38e603f535af84e69edef8b9b Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 14:12:15 +0100 Subject: [PATCH 26/39] copy the kwargs to avoid modification --- pypiper/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 7a73f0c..00c6653 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -8,6 +8,7 @@ """ import atexit +import copy import datetime import errno import glob @@ -208,7 +209,7 @@ def __init__( self.testmode = params["testmode"] # Set up logger - logger_kwargs = logger_kwargs or {} + logger_kwargs = copy.deepcopy(logger_kwargs) or {} default_logname = ".".join([__name__, self.__class__.__name__, self.name]) if not args: # strict is only for logger_via_cli. From 28a608aa36fb34ddab13e3f5eeeda04652b18579 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 14:12:45 +0100 Subject: [PATCH 27/39] homogenize messaging about logger establishment --- pypiper/manager.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 00c6653..0f160ae 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -224,11 +224,13 @@ def __init__( logger_kwargs.setdefault("name", default_logname) try: self._logger = logmuse.logger_via_cli(args) - except logmuse.est.AbsentOptionException: + except logmuse.est.AbsentOptionException as e: self._logger = logmuse.init_logger("pypiper", level="DEBUG") - self.debug("logger_via_cli failed; Logger set with logmuse.init_logger") + logger_builder_method = "init_logger" + self.debug(f"logger_via_cli failed: {e}") else: - self.debug("Logger set with logmuse.logger_via_cli") + logger_builder_method = "logger_via_cli" + self.debug(f"Logger set with logmuse.{logger_builder_method}") # Keep track of an ID for the number of processes attempted self.proc_count = 0 From 9b2ccbc5f83894b1731a45b9ca282c160591a12f Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:00:42 +0100 Subject: [PATCH 28/39] organize logger construction for pipeline manager, and homogenize treatment of arguments across branches --- pypiper/manager.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 0f160ae..76bb306 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -209,28 +209,27 @@ def __init__( self.testmode = params["testmode"] # Set up logger - logger_kwargs = copy.deepcopy(logger_kwargs) or {} + logger_kwargs = logger_kwargs or {} default_logname = ".".join([__name__, self.__class__.__name__, self.name]) - if not args: + self._logger = None + if args: + logger_builder_method = "logger_via_cli" + try: + self._logger = logmuse.logger_via_cli(args, **logger_kwargs) + except logmuse.est.AbsentOptionException as e: + # Defer logger construction to init_logger. + self.debug(f"logger_via_cli failed: {e}") + if self._logger is None: + logger_builder_method = "init_logger" + # covers cases of bool(args) being False, or failure of logger_via_cli. # strict is only for logger_via_cli. - kwds = {k: v for k, v in logger_kwargs.items() if k != "strict"} + logger_kwargs = {k: v for k, v in logger_kwargs.items() if k != "strict"} try: - name = kwds.pop("name") + name = logger_kwargs.pop("name") except KeyError: name = default_logname - self._logger = logmuse.init_logger(name, **kwds) - self.debug("Logger set with logmuse.init_logger") - else: - logger_kwargs.setdefault("name", default_logname) - try: - self._logger = logmuse.logger_via_cli(args) - except logmuse.est.AbsentOptionException as e: - self._logger = logmuse.init_logger("pypiper", level="DEBUG") - logger_builder_method = "init_logger" - self.debug(f"logger_via_cli failed: {e}") - else: - logger_builder_method = "logger_via_cli" - self.debug(f"Logger set with logmuse.{logger_builder_method}") + self._logger = logmuse.init_logger(name, **logger_kwargs) + self.debug(f"Logger set with logmuse.{logger_builder_method}") # Keep track of an ID for the number of processes attempted self.proc_count = 0 From 76ffdc6ba7988b5e62d510c09e37b052d0f5167a Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:00:58 +0100 Subject: [PATCH 29/39] tweak syntax --- pypiper/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypiper/utils.py b/pypiper/utils.py index 8973466..37b396c 100644 --- a/pypiper/utils.py +++ b/pypiper/utils.py @@ -788,7 +788,7 @@ def pipeline_filepath(pm, filename=None, suffix=None): if filename is None and suffix is None: raise TypeError( - "Provide filename and/or suffix to create " "path to a pipeline file." + "Provide filename and/or suffix to create path to a pipeline file." ) filename = (filename or pm.name) + (suffix or "") From 7baa056fda3d35a83c022565223b9f75f38ec466 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:01:20 +0100 Subject: [PATCH 30/39] use default strict=False behavior for logger via CLI --- pypiper/manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 76bb306..80b8d1e 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -215,7 +215,7 @@ def __init__( if args: logger_builder_method = "logger_via_cli" try: - self._logger = logmuse.logger_via_cli(args, **logger_kwargs) + self._logger = logger_via_cli(args, **logger_kwargs) except logmuse.est.AbsentOptionException as e: # Defer logger construction to init_logger. self.debug(f"logger_via_cli failed: {e}") @@ -229,7 +229,7 @@ def __init__( except KeyError: name = default_logname self._logger = logmuse.init_logger(name, **logger_kwargs) - self.debug(f"Logger set with logmuse.{logger_builder_method}") + self.debug(f"Logger set with {logger_builder_method}") # Keep track of an ID for the number of processes attempted self.proc_count = 0 From a83025bc550f8d224bf3e352c26cbe6c8400dfc8 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:05:48 +0100 Subject: [PATCH 31/39] move up logfile setting to set up solution for #212 --- pypiper/manager.py | 9 +++++---- pypiper/utils.py | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 80b8d1e..7473d97 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -8,7 +8,6 @@ """ import atexit -import copy import datetime import errno import glob @@ -208,6 +207,11 @@ def __init__( self.output_parent = params["output_parent"] self.testmode = params["testmode"] + # Establish the log file to check safety with logging keyword arguments. + # Establish the output folder since it's required for the log file. + self.outfolder = os.path.join(outfolder, "") # trailing slash + self.pipeline_log_file = pipeline_filepath(self, suffix="_log.md") + # Set up logger logger_kwargs = logger_kwargs or {} default_logname = ".".join([__name__, self.__class__.__name__, self.name]) @@ -276,10 +280,7 @@ def __init__( # self.output_parent = os.path.join(os.getcwd(), self.output_parent) # File paths: - self.outfolder = os.path.join(outfolder, "") # trailing slash self.make_sure_path_exists(self.outfolder) - self.pipeline_log_file = pipeline_filepath(self, suffix="_log.md") - self.pipeline_profile_file = pipeline_filepath(self, suffix="_profile.tsv") # Stats and figures are general and so lack the pipeline name. diff --git a/pypiper/utils.py b/pypiper/utils.py index 37b396c..1c65f59 100644 --- a/pypiper/utils.py +++ b/pypiper/utils.py @@ -785,12 +785,10 @@ def pipeline_filepath(pm, filename=None, suffix=None): filename as given or determined by the pipeline name, and suffix appended if given. """ - if filename is None and suffix is None: raise TypeError( "Provide filename and/or suffix to create path to a pipeline file." ) - filename = (filename or pm.name) + (suffix or "") # Note that Pipeline and PipelineManager define the same outfolder. From ba423a428ff8279b508951efdc20d1206c1f14c1 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:08:48 +0100 Subject: [PATCH 32/39] check that logfile for logger doesn't match manager's own logfile; #212 --- pypiper/manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pypiper/manager.py b/pypiper/manager.py index 7473d97..6cae014 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -214,6 +214,10 @@ def __init__( # Set up logger logger_kwargs = logger_kwargs or {} + if logger_kwargs.get("logfile") == self.pipeline_log_file: + raise ValueError( + f"The logfile given for the pipeline manager's logger matches that which will be used by the manager itself: {self.pipeline_log_file}" + ) default_logname = ".".join([__name__, self.__class__.__name__, self.name]) self._logger = None if args: From 97821e986a5c248cba2dd885ace7d34bf3d5f3e5 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:16:23 +0100 Subject: [PATCH 33/39] test solution for #212 --- pypiper/manager.py | 3 ++- .../test_manager_constructor.py | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 6cae014..df661b3 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -58,6 +58,7 @@ LOCK_PREFIX = "lock." +LOGFILE_SUFFIX = "_log.md" class Unbuffered(object): @@ -210,7 +211,7 @@ def __init__( # Establish the log file to check safety with logging keyword arguments. # Establish the output folder since it's required for the log file. self.outfolder = os.path.join(outfolder, "") # trailing slash - self.pipeline_log_file = pipeline_filepath(self, suffix="_log.md") + self.pipeline_log_file = pipeline_filepath(self, suffix=LOGFILE_SUFFIX) # Set up logger logger_kwargs = logger_kwargs or {} diff --git a/tests/pipeline_manager/test_manager_constructor.py b/tests/pipeline_manager/test_manager_constructor.py index 0792bf1..b327fb2 100644 --- a/tests/pipeline_manager/test_manager_constructor.py +++ b/tests/pipeline_manager/test_manager_constructor.py @@ -1,10 +1,11 @@ """ Test effects of construction of a pipeline manager. """ import argparse +import os import pytest -from pypiper.manager import CHECKPOINT_SPECIFICATIONS +from pypiper.manager import CHECKPOINT_SPECIFICATIONS, LOGFILE_SUFFIX from tests.helpers import named_param __author__ = "Vince Reuter" @@ -24,6 +25,22 @@ def test_manager_starts_in_null_checkpoint_state(get_pipe_manager, checkpoint_ty assert getattr(pm, checkpoint_type) is None +def test_logger_logfile_collision_with_manager_logfile_is_expected_error__issue_212( + get_pipe_manager, tmpdir +): + pipe_name = "test_issue212" + with pytest.raises(ValueError) as err_ctx: + get_pipe_manager( + name=pipe_name, + logger_kwargs={ + "logfile": os.path.join(tmpdir.strpath, pipe_name + LOGFILE_SUFFIX) + }, + ) + assert str(err_ctx.value).startswith( + f"The logfile given for the pipeline manager's logger matches that which will be used by the manager itself" + ) + + class ManagerConstructorCheckpointSpecificationTests: """Tests for manager's constructor's ability to parse and set checkpoint specifications, which can determine aspects of control flow.""" From fcb02facd2da19b3e129769a67729f35a571696d Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Sun, 18 Feb 2024 15:50:28 +0100 Subject: [PATCH 34/39] apply new black formatting --- pypiper/const.py | 1 - pypiper/ngstk.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pypiper/const.py b/pypiper/const.py index 2749529..0bfbdb6 100644 --- a/pypiper/const.py +++ b/pypiper/const.py @@ -1,6 +1,5 @@ """ Pypiper constants. """ - CHECKPOINT_EXTENSION = ".checkpoint" DEFAULT_SAMPLE_NAME = "DEFAULT_SAMPLE_NAME" PIPELINE_CHECKPOINT_DELIMITER = "_" diff --git a/pypiper/ngstk.py b/pypiper/ngstk.py index 329b321..b607913 100755 --- a/pypiper/ngstk.py +++ b/pypiper/ngstk.py @@ -153,8 +153,7 @@ def get_file_size(self, filenames): return sum([self.get_file_size(filename) for filename in filenames]) return round( - sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) - / (1024**2), + sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) / (1024**2), 4, ) From 94806df7f097e70ec93afe43853f7212ec50c63e Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Mon, 19 Feb 2024 09:49:28 -0500 Subject: [PATCH 35/39] Fix #213, remove pipestat_project_name, refactor pipestat_sample_name to pipestat_record_identifier, update doc strings --- docs/changelog.md | 6 ++++++ pypiper/manager.py | 13 ++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index edbdfef..42d8f16 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,11 @@ # Changelog +## [0.15.0] -- 2024-xx-xx +### Changed +- remove pipestat_project_name from PipelineManager parameters +- refactor pipestat_sample_name to pipestat_record_identifier in PipelineManager parameters + + ## [0.14.0] -- 2023-12-22 ### Changed - refactor for pipestat v0.6.0 release diff --git a/pypiper/manager.py b/pypiper/manager.py index d18eabc..ce1e385 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -112,6 +112,12 @@ class PipelineManager(object): protect from a case in which a restart begins upstream of a stage for which a checkpoint file already exists, but that depends on the upstream stage and thus should be rerun if it's "parent" is rerun. + :param str pipestat_record_identifier: record_identifier to report results via pipestat + :param str pipestat_schema: output schema used by pipestat to report results + :param str pipestat_results_file: path to file backend for reporting results + :param str pipestat_config_file: path to pipestat configuration file + :param str pipestat_pipeline_type: Sample or Project level pipeline + :param pipestat_result_formatter: function used to style reported results, defaults to result_formatter_markdown :raise TypeError: if start or stop point(s) are provided both directly and via args namespace, or if both stopping types (exclusive/prospective and inclusive/retrospective) are provided. @@ -136,8 +142,7 @@ def __init__( output_parent=None, overwrite_checkpoints=False, logger_kwargs=None, - pipestat_project_name=None, - pipestat_sample_name=None, + pipestat_record_identifier=None, pipestat_schema=None, pipestat_results_file=None, pipestat_config=None, @@ -330,7 +335,9 @@ def __init__( signal.signal(signal.SIGTERM, self._signal_term_handler) # pipestat setup - self.pipestat_record_identifier = pipestat_sample_name or DEFAULT_SAMPLE_NAME + self.pipestat_record_identifier = ( + pipestat_record_identifier or DEFAULT_SAMPLE_NAME + ) self.pipestat_pipeline_type = pipestat_pipeline_type or "sample" # don't force default pipestat_results_file value unless From 04abc1fba2325b6c418dc2d82ba90ddaf79d1950 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:23:47 -0400 Subject: [PATCH 36/39] #209 Switch force_overwrite to default to True --- pypiper/manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index 5aad867..e4e72cb 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -1559,7 +1559,7 @@ def _report_profile( myfile.write(message_raw + "\n") def report_result( - self, key, value, nolog=False, result_formatter=None, force_overwrite=False + self, key, value, nolog=False, result_formatter=None, force_overwrite=True ): """ Writes a key:value pair to self.pipeline_stats_file. @@ -1606,7 +1606,7 @@ def report_object( annotation=None, nolog=False, result_formatter=None, - force_overwrite=False, + force_overwrite=True, ): """ Writes a key:value pair to self.pipeline_stats_file. Note: this function From 5413eecc040da9d41c81959a80cf4049db6a359b Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 4 Apr 2024 14:00:03 -0400 Subject: [PATCH 37/39] version bump on requirements #216 --- pypiper/manager.py | 4 ++-- pypiper/ngstk.py | 3 ++- requirements/requirements-docs.txt | 2 +- requirements/requirements-pypiper.txt | 6 +++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pypiper/manager.py b/pypiper/manager.py index e4e72cb..965945b 100644 --- a/pypiper/manager.py +++ b/pypiper/manager.py @@ -52,7 +52,7 @@ default_pipestat_output_schema, result_formatter_markdown, ) -from pipestat.helpers import read_yaml_data +from yacman import load_yaml __all__ = ["PipelineManager"] @@ -1828,7 +1828,7 @@ def _refresh_stats(self): """ if os.path.isfile(self.pipeline_stats_file): - _, data = read_yaml_data(path=self.pipeline_stats_file, what="stats_file") + data = load_yaml(filepath=self.pipeline_stats_file) for key, value in data[self._pipestat_manager.pipeline_name][ self._pipestat_manager.pipeline_type diff --git a/pypiper/ngstk.py b/pypiper/ngstk.py index b607913..329b321 100755 --- a/pypiper/ngstk.py +++ b/pypiper/ngstk.py @@ -153,7 +153,8 @@ def get_file_size(self, filenames): return sum([self.get_file_size(filename) for filename in filenames]) return round( - sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) / (1024**2), + sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) + / (1024**2), 4, ) diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt index ef5b4e6..9ae381f 100644 --- a/requirements/requirements-docs.txt +++ b/requirements/requirements-docs.txt @@ -2,5 +2,5 @@ mkdocs>=1.0 markdown-include pydoc-markdown piper -pipestat>=0.6.0 +pipestat>=0.9.0a1 https://github.com/databio/mkdocs-databio/archive/master.zip \ No newline at end of file diff --git a/requirements/requirements-pypiper.txt b/requirements/requirements-pypiper.txt index 9a35f34..180ad3d 100644 --- a/requirements/requirements-pypiper.txt +++ b/requirements/requirements-pypiper.txt @@ -1,6 +1,6 @@ logmuse>=0.2.4 psutil pandas -ubiquerg>=0.4.5 -yacman -pipestat>=0.6.0 +ubiquerg>=0.8.0 +yacman>=0.9.3 +pipestat>=0.9.0a1 From 3e102133327be3c76366ba49903afd9e1ee78831 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 19 Apr 2024 09:49:32 -0400 Subject: [PATCH 38/39] update version and changelog for 0.14.1 release --- docs/changelog.md | 4 +++- pypiper/_version.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 42d8f16..feba103 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,9 +1,11 @@ # Changelog -## [0.15.0] -- 2024-xx-xx +## [0.14.1] -- 2024-04-19 ### Changed - remove pipestat_project_name from PipelineManager parameters - refactor pipestat_sample_name to pipestat_record_identifier in PipelineManager parameters +- update requirements for pipestat 0.9.0, ubiquerg 0.8.0, and yacman 0.9.3 +- set `force_overwrite` to default to true, Issue #209 ## [0.14.0] -- 2023-12-22 diff --git a/pypiper/_version.py b/pypiper/_version.py index 9e78220..f075dd3 100644 --- a/pypiper/_version.py +++ b/pypiper/_version.py @@ -1 +1 @@ -__version__ = "0.14.0" +__version__ = "0.14.1" From 703580d2f99c04d65b0a2ed57a14813fb96997a7 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Fri, 19 Apr 2024 09:53:19 -0400 Subject: [PATCH 39/39] lint --- pypiper/ngstk.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pypiper/ngstk.py b/pypiper/ngstk.py index 329b321..b607913 100755 --- a/pypiper/ngstk.py +++ b/pypiper/ngstk.py @@ -153,8 +153,7 @@ def get_file_size(self, filenames): return sum([self.get_file_size(filename) for filename in filenames]) return round( - sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) - / (1024**2), + sum([float(os.stat(f).st_size) for f in filenames.split(" ")]) / (1024**2), 4, )