diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index be1e6755..63152ac5 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -2,9 +2,9 @@ name: Run pytests on: push: - branches: [master] + branches: [master, dev] pull_request: - branches: [master] + branches: [master, dev] workflow_dispatch: inputs: null diff --git a/.gitignore b/.gitignore index 8b31233b..55dc4909 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,4 @@ dmypy.json # Pyre type checker .pyre/ +/tests/data/reports/ diff --git a/MANIFEST.in b/MANIFEST.in index 9a4e5c11..d198e4e0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,4 @@ include requirements/* include README.md include pipestat/schemas/* include pipestat/backends/* +include pipestat/jinja_templates/* diff --git a/codecov.yml b/codecov.yml index 35424d84..5f02882d 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,6 +5,10 @@ coverage: target: 80% # the required coverage value threshold: 1% # the leniency in hitting the target informational: true + patch: + default: + target: 80% + informational: true ignore: - "*/argparser.py" - "*/cli.py" diff --git a/docs/api_docs.md b/docs/api_docs.md index 5de67cb4..625b8578 100644 --- a/docs/api_docs.md +++ b/docs/api_docs.md @@ -39,7 +39,7 @@ Pipestat standardizes reporting of pipeline results and pipeline status manageme ```python -def __init__(self, sample_name: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=, multi_pipelines: bool=False) +def __init__(self, sample_name: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=, multi_pipelines: bool=False) ``` Initialize the PipestatManager object @@ -292,6 +292,12 @@ Status schema source +```python +def summarize(self, *args, **kwargs) +``` + + + ```python def validate_schema(self) -> None ``` @@ -307,4 +313,4 @@ Check schema for any possible issues -*Version Information: `pipestat` v0.4.0, generated by `lucidoc` v0.4.4* +*Version Information: `pipestat` v0.5.0, generated by `lucidoc` v0.4.4* diff --git a/docs/changelog.md b/docs/changelog.md index eb38c086..6fb68177 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.5.0] - 2023-08-08 +### Added + +- Add summarize function to generate static html results report. ## [0.4.1] - 2023-07-26 diff --git a/docs/usage.md b/docs/usage.md index 5b2eb8c5..8e88225c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -5,19 +5,21 @@ Pipestat offers a CLI that can be access via the `pipestat` command in the shell Here you can see the command-line usage instructions for the main command and for each subcommand: ## `pipestat --help` ```console -version: 0.4.0 -usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev] {report,inspect,remove,retrieve,status,init} ... +version: 0.5.0 +usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev] + {report,inspect,remove,retrieve,status,init,summarize} ... pipestat - report pipeline results positional arguments: - {report,inspect,remove,retrieve,status,init} + {report,inspect,remove,retrieve,status,init,summarize} report Report a result. inspect Inspect a database. remove Remove a result. retrieve Retrieve a result. status Manage pipeline status. init Initialize generic config file + summarize Generates HTML Report options: -h, --help show this help message and exit @@ -26,10 +28,12 @@ options: --verbosity V Set logging level (1-5 or logging module level name) --logdev Expand content of logging message format. -Pipestat standardizes reporting of pipeline results and pipeline status management. It formalizes a way for pipeline -developers and downstream tools developers to communicate -- results produced by a pipeline can easily and -reliably become an input for downstream analyses. A PipestatManager object exposes an API for interacting with the -results and pipeline status and can be backed by either a YAML-formatted file or a database. +Pipestat standardizes reporting of pipeline results and pipeline status +management. It formalizes a way for pipeline developers and downstream tools +developers to communicate -- results produced by a pipeline can easily and +reliably become an input for downstream analyses. A PipestatManager object +exposes an API for interacting with the results and pipeline status and can be +backed by either a YAML-formatted file or a database. ``` ## `pipestat report --help` @@ -55,8 +59,8 @@ options: reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will be used. Currently not set --status-schema ST Path to the status schema. Default will be used if not - provided: /usr/local/lib/python3.10/site- - packages/pipestat/schemas/status_schema.yaml + provided: /home/drc/GITHUB/pipestat/pipestat/venv/lib/pytho + n3.10/site-packages/pipestat/schemas/status_schema.yaml --flag-dir FD Path to the flag directory in case YAML file is the pipestat backend. -i I, --result-identifier I ID of the result to report; needs to be defined in the @@ -69,7 +73,6 @@ options: name clashes -t, --skip-convert Whether skip result type conversion into the required class in case it does not meet the schema requirements - ``` ## `pipestat inspect --help` @@ -79,9 +82,9 @@ usage: pipestat inspect [-h] [-n N] [-f F] [-c C] [-a] [-s S] [--status-schema S Inspect a database. -optional arguments: +options: -h, --help show this help message and exit - -n N, --project-name N Name of the pipeline to report result for. If not provided + -n N, --project-name N Name of the pipeline to report result for. If not provided 'PIPESTAT_PROJECT_NAME' env var will be used. Currently not set -f F, --results-file F Path to the YAML file where the results will be stored. This file will be used as pipestat backend and to restore the @@ -94,7 +97,7 @@ optional arguments: reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will be used. Currently not set --status-schema ST Path to the status schema. Default will be used if not provided: - /usr/local/lib/python3.9/site- + /home/drc/GITHUB/pipestat/pipestat/venv/lib/python3.10/site- packages/pipestat/schemas/status_schema.yaml --flag-dir FD Path to the flag directory in case YAML file is the pipestat backend. @@ -124,8 +127,8 @@ options: reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will be used. Currently not set --status-schema ST Path to the status schema. Default will be used if not - provided: /usr/local/lib/python3.10/site- - packages/pipestat/schemas/status_schema.yaml + provided: /home/drc/GITHUB/pipestat/pipestat/venv/lib/pytho + n3.10/site-packages/pipestat/schemas/status_schema.yaml --flag-dir FD Path to the flag directory in case YAML file is the pipestat backend. -i I, --result-identifier I ID of the result to report; needs to be defined in the @@ -133,7 +136,6 @@ options: -r R, --sample-name R ID of the record to report the result for. If not provided 'PIPESTAT_SAMPLE_NAME' env var will be used. Currently not set - ``` ## `pipestat retrieve --help` @@ -159,8 +161,8 @@ options: reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will be used. Currently not set --status-schema ST Path to the status schema. Default will be used if not - provided: /usr/local/lib/python3.10/site- - packages/pipestat/schemas/status_schema.yaml + provided: /home/drc/GITHUB/pipestat/pipestat/venv/lib/pytho + n3.10/site-packages/pipestat/schemas/status_schema.yaml --flag-dir FD Path to the flag directory in case YAML file is the pipestat backend. -i I, --result-identifier I ID of the result to report; needs to be defined in the @@ -168,7 +170,6 @@ options: -r R, --sample-name R ID of the record to report the result for. If not provided 'PIPESTAT_SAMPLE_NAME' env var will be used. Currently not set - ``` ## `pipestat status --help` @@ -182,7 +183,7 @@ positional arguments: set Set status. get Get status. -optional arguments: +options: -h, --help show this help message and exit ``` @@ -208,8 +209,8 @@ options: reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will be used. Currently not set --status-schema ST Path to the status schema. Default will be used if not provided: - /usr/local/lib/python3.10 - /site-packages/pipestat/schemas/status_schema.yaml + /home/drc/GITHUB/pipestat/pipestat/venv/lib/python3.10/site- + packages/pipestat/schemas/status_schema.yaml --flag-dir FD Path to the flag directory in case YAML file is the pipestat backend. -r R, --sample-name R ID of the record to report the result for. If not provided @@ -242,9 +243,11 @@ options: reported. If not provided 'PIPESTAT_RESULTS_SCHEMA' env var will be used. Currently not set --status-schema ST Path to the status schema. Default will be used if not provided: - /usr/local/lib/python3.10/site-packages/pipestat/schemas/status_schema.yaml + /home/drc/GITHUB/pipestat/pipestat/venv/lib/python3.10/site- + packages/pipestat/schemas/status_schema.yaml --flag-dir FD Path to the flag directory in case YAML file is the pipestat backend. -r R, --sample-name R ID of the record to report the result for. If not provided 'PIPESTAT_SAMPLE_NAME' env var will be used. Currently not set ``` + diff --git a/docs_jupyter/cli.ipynb b/docs_jupyter/cli.ipynb index 625424ae..b604f536 100644 --- a/docs_jupyter/cli.ipynb +++ b/docs_jupyter/cli.ipynb @@ -484,6 +484,15 @@ "cat /usr/local/lib/python3.9/site-packages/pipestat/schemas/status_schema.yaml" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## HTML Report Generation\n", + "\n", + "To generate a static html report, call `pipestat summarize --results-file PIPESTAT_RESULTS_FILE --schema PIPESTAT_RESULTS_SCHEMA`" + ] + }, { "cell_type": "code", "execution_count": 51, diff --git a/docs_jupyter/python_api.ipynb b/docs_jupyter/python_api.ipynb index 59f40faf..88cebed4 100644 --- a/docs_jupyter/python_api.ipynb +++ b/docs_jupyter/python_api.ipynb @@ -1128,12 +1128,23 @@ "psm_no_schema.get_status(sample_name=\"sample1\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate static HTML Report using the `summarize` command\n", + "\n", + "You can generate a static browsable html report using the `summarize` function:" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "psm.summarize()" + ] } ], "metadata": { diff --git a/pipestat/_version.py b/pipestat/_version.py index 3d26edf7..3d187266 100644 --- a/pipestat/_version.py +++ b/pipestat/_version.py @@ -1 +1 @@ -__version__ = "0.4.1" +__version__ = "0.5.0" diff --git a/pipestat/argparser.py b/pipestat/argparser.py index 850871dd..e67420a1 100644 --- a/pipestat/argparser.py +++ b/pipestat/argparser.py @@ -13,6 +13,7 @@ RETRIEVE_CMD = "retrieve" STATUS_CMD = "status" INIT_CMD = "init" +SUMMARIZE_CMD = "summarize" SUBPARSER_MESSAGES = { REPORT_CMD: "Report a result.", INSPECT_CMD: "Inspect a database.", @@ -20,6 +21,7 @@ RETRIEVE_CMD: "Retrieve a result.", STATUS_CMD: "Manage pipeline status.", INIT_CMD: "Initialize generic config file", + SUMMARIZE_CMD: "Generates HTML Report", } STATUS_GET_CMD = "get" @@ -255,4 +257,30 @@ def add_subparser( "-d", "--data", action="store_true", help="Whether to display the data" ) + # Summarize + for cmd in [SUMMARIZE_CMD]: + sps[cmd].add_argument( + "-f", + "--results-file", + type=str, + metavar="F", + help=f"Path to the YAML file where the results will be stored. " + f"This file will be used as {PKG_NAME} backend and to restore" + f" the reported results across sessions", + ) + sps[cmd].add_argument( + "-c", + "--config", + type=str, + metavar="C", + help=f"Path to the YAML configuration file. {_env_txt('config')}", + ) + sps[cmd].add_argument( + "-s", + "--schema", + type=str, + metavar="S", + help=f"Path to the schema that defines the results that can be reported. {_env_txt('schema')}", + ) + return parser diff --git a/pipestat/backends/abstract.py b/pipestat/backends/abstract.py index 45db9937..f719ff58 100644 --- a/pipestat/backends/abstract.py +++ b/pipestat/backends/abstract.py @@ -87,6 +87,13 @@ def count_records( _LOGGER.warning("Not implemented yet for this backend") pass + def get_samples( + self, + pipeline_type: Optional[str] = None, + ): + _LOGGER.warning("Not implemented yet for this backend") + pass + def get_status(self, sample_name: str, pipeline_type: Optional[str] = None) -> Optional[str]: _LOGGER.warning("Not implemented yet for this backend") @@ -137,3 +144,7 @@ def remove_record( pipeline_type: Optional[str] = None, ) -> bool: _LOGGER.warning("Not implemented yet for this backend") + + def summarize(self) -> None: + _LOGGER.warning("Not implemented yet for this backend") + pass diff --git a/pipestat/backends/dbbackend.py b/pipestat/backends/dbbackend.py index 0fb899e0..7ba55a41 100644 --- a/pipestat/backends/dbbackend.py +++ b/pipestat/backends/dbbackend.py @@ -138,6 +138,42 @@ def get_one_record(self, table_name: str, rid: Optional[str] = None): if record: return record + def get_samples( + self, + pipeline_type: Optional[str] = None, + ) -> Optional[list]: + """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level""" + all_samples_list = [] + + if pipeline_type is not None: + table_name = self.get_table_name(pipeline_type) + mod = self.get_model(table_name=table_name, strict=True) + with self.session as s: + sample_list = [] + stmt = sql_select(mod) + records = s.exec(stmt).all() + for i in records: + pair = (i.sample_name, pipeline_type) + sample_list.append(pair) + + return sample_list + else: + pipelines = ["sample", "project"] + for i in pipelines: + pipeline_type = i + table_name = self.get_table_name(pipeline_type) + mod = self.get_model(table_name=table_name, strict=True) + with self.session as s: + sample_list = [] + stmt = sql_select(mod) + records = s.exec(stmt).all() + for i in records: + pair = (i.sample_name, pipeline_type) + sample_list.append(pair) + + all_samples_list += sample_list + return all_samples_list + def get_status(self, sample_name: str, pipeline_type: Optional[str] = None) -> Optional[str]: """ Get pipeline status diff --git a/pipestat/backends/filebackend.py b/pipestat/backends/filebackend.py index d62fff00..dae76864 100644 --- a/pipestat/backends/filebackend.py +++ b/pipestat/backends/filebackend.py @@ -144,6 +144,29 @@ def get_flag_file(self, sample_name: str = None) -> Union[str, List[str], None]: return None pass + def get_samples( + self, + pipeline_type: Optional[str] = None, + ) -> Optional[list]: + """Returns list of sample names and pipeline type as a list of tuples that have been reported, regardless of sample or project level""" + all_samples_list = [] + + if pipeline_type is not None: + for k in list(self._data.data[self.pipeline_name][pipeline_type].keys()): + pair = (k, pipeline_type) + all_samples_list.append(pair) + return all_samples_list + + else: + keys = self._data.data[self.pipeline_name].keys() + for k in keys: + sample_list = [] + for i in list(self._data.data[self.pipeline_name][k].keys()): + pair = (i, k) + sample_list.append(pair) + all_samples_list += sample_list + return all_samples_list + def get_status(self, sample_name: str, pipeline_type: Optional[str] = None) -> Optional[str]: """ Get the current pipeline status @@ -442,6 +465,23 @@ def set_status( if prev_status: _LOGGER.debug(f"Changed status from '{prev_status}' to '{status_identifier}'") + def summarize(self) -> None: + """ + summarize all reported results by building html report + """ + _LOGGER.debug("Make HTML report here") + print("DEBUG SUMMARIZE") + self._htmlreportbuilder() + + def _htmlreportbuilder(self): + """ + build html report based on all reported results + """ + + # build new folder for the report + self.reports_dir = os.path.join(self.results_file_path, "reports") + _LOGGER.debug(f"Reports dir: {self.reports_dir}") + def _init_results_file(self) -> None: """ Initialize YAML results file if it does not exist. diff --git a/pipestat/cli.py b/pipestat/cli.py index 133c356b..dd8aee44 100644 --- a/pipestat/cli.py +++ b/pipestat/cli.py @@ -15,6 +15,7 @@ STATUS_GET_CMD, STATUS_SET_CMD, INIT_CMD, + SUMMARIZE_CMD, ) from .const import * from .exceptions import SchemaNotFoundError, PipestatStartupError @@ -46,6 +47,18 @@ def main(): "constructor or via environment variable. \nPlease see: http://pipestat.databio.org/en/dev/cli/" ) raise PipestatStartupError(msg) + if args.command == SUMMARIZE_CMD: + psm = PipestatManager( + schema_path=args.schema, + results_file_path=args.results_file, + config_file=args.config, + ) + results_path = args.config or args.results_file + html_report_path = psm.summarize() + _LOGGER.info(f"\nGenerating HTML Report from {results_path} at: {html_report_path}\n") + + sys.exit(0) + psm = PipestatManager( schema_path=args.schema, results_file_path=args.results_file, @@ -105,4 +118,5 @@ def main(): status_identifier=args.status_identifier, sample_name=args.sample_name, ) + sys.exit(0) diff --git a/pipestat/const.py b/pipestat/const.py index 1a386096..ec67074b 100644 --- a/pipestat/const.py +++ b/pipestat/const.py @@ -50,6 +50,13 @@ "RESULT_FORMATTER", "DEFAULT_PIPELINE_NAME", "MULTI_PIPELINE", + "OBJECT_TYPES", + "OUTPUT_SCHEMA_KEY", + "BUTTON_APPEARANCE_BY_FLAG", + "APPEARANCE_BY_FLAG", + "TEMPLATES_DIRNAME", + "NO_DATA_PLACEHOLDER", + "PROFILE_COLNAMES", ] PKG_NAME = "pipestat" @@ -140,3 +147,40 @@ RESULT_FORMATTER = "_result_formatter" DEFAULT_PIPELINE_NAME = "default_pipeline_name" MULTI_PIPELINE = "_multi_pipelines" +TEMPLATES_DIRNAME = "jinja_templates" + + +OBJECT_TYPES = ["object", "file", "image", "array"] +OUTPUT_SCHEMA_KEY = "output_schema" +NO_DATA_PLACEHOLDER = "NA" +# this strongly depends on pypiper's profile.tsv format +PROFILE_COLNAMES = ["pid", "hash", "cid", "runtime", "mem", "cmd", "lock"] + +APPEARANCE_BY_FLAG = { + "completed": {"button_class": "{type}-success", "flag": "Completed"}, + "running": {"button_class": "{type}-primary", "flag": "Running"}, + "failed": {"button_class": "{type}-danger", "flag": "Failed"}, + "parital": {"button_class": "{type}-warning", "flag": "Partial"}, + "waiting": {"button_class": "{type}-info", "flag": "Waiting"}, +} + + +def _get_apperance_dict(type, templ=APPEARANCE_BY_FLAG): + """ + Based on the type of the HTML element provided construct the appearence + mapping using the template + + :param dict templ: appearance template to populate + :param str type: type of HTML element to populate template with + :return dict: populated appearance template + """ + from copy import deepcopy + + ret = deepcopy(templ) + for flag, app_dict in ret.items(): + for key, app in app_dict.items(): + ret[flag][key] = ret[flag][key].format(type=type) + return ret + + +BUTTON_APPEARANCE_BY_FLAG = _get_apperance_dict("btn btn") diff --git a/pipestat/html_reports_pipestat.py b/pipestat/html_reports_pipestat.py new file mode 100644 index 00000000..416bb551 --- /dev/null +++ b/pipestat/html_reports_pipestat.py @@ -0,0 +1,954 @@ +""" Generate HTML reports """ + +from logging import getLogger +import os +import sys +from datetime import timedelta +from json import dumps + +import jinja2 +import pandas as _pd +from eido import read_schema +from peppy.const import * + +from ._version import __version__ as v +from .const import * + + +_LOGGER = getLogger(PKG_NAME) + + +class HTMLReportBuilder(object): + """Generate HTML summary report for project/samples""" + + def __init__(self, prj): + """ + The Project defines the instance. + + :param PipestatManager prj: Project with which to work/operate on + """ + + self.prj = prj + self.jinja_env = get_jinja_env() + results_file_path = getattr(self.prj.backend, "results_file_path", None) + config_path = getattr(self.prj, "config_path", None) + self.output_dir = results_file_path or config_path + self.output_dir = os.path.dirname(self.output_dir) + self.reports_dir = os.path.join(self.output_dir, "reports") + _LOGGER.debug(f"Reports dir: {self.reports_dir}") + + def __call__(self, pipeline_name, project_index_html=None, amendment=None): + """ + Generate HTML report. + + :param str pipeline_name: ID of the pipeline to generate the report for + :param Iterable[str] amendment: name indicating amendment to use, optional + :return str: path to the index page of the generated HTML report + """ + # Generate HTML report + self.pipeline_name = pipeline_name + self.amendment = amendment + self.amendments_str = "_".join(self.amendment) if self.amendment else "" + self.pipeline_reports = os.path.join( + self.reports_dir, + f"{self.pipeline_name}_{self.amendments_str}" + if self.amendments_str + else self.pipeline_name, + ) + self.prj_index_html_path = project_index_html + self.index_html_path = os.path.join(self.pipeline_reports, "index.html") + schema_path = self.prj.schema_path + self.schema = read_schema(schema_path)[0] + navbar = self.create_navbar( + navbar_links=self.create_navbar_links( + wd=self.pipeline_reports, + project_index_html_relpath=os.path.relpath( + self.prj_index_html_path, self.pipeline_reports + ) + if self.prj_index_html_path + else None, + ), + index_html_relpath=os.path.relpath(self.index_html_path, self.pipeline_reports), + ) + self.create_index_html(navbar, self.create_footer()) + return self.index_html_path + + def create_object_parent_html(self, navbar, footer): + """ + Generates a page listing all the project objects with links + to individual object pages + + :param str navbar: HTML to be included as the navbar in the main summary page + :param str footer: HTML to be included as the footer + :return str: Rendered parent objects HTML file + """ + if not os.path.exists(self.pipeline_reports): + os.makedirs(self.pipeline_reports) + pages = [] + labels = [] + obj_result_ids = self.get_nonhighlighted_results(OBJECT_TYPES) + + for key in obj_result_ids: + desc = ( + self.prj.result_schemas[key]["description"] + if "description" in self.prj.result_schemas[key] + else "" + ) + labels.append(f"{key.replace('_', ' ')}: {desc}") + page_path = os.path.join(self.pipeline_reports, f"{key}.html".lower()) + pages.append(os.path.relpath(page_path, self.pipeline_reports)) + + template_vars = dict( + navbar=navbar, footer=footer, labels=labels, pages=pages, header="Objects" + ) + _LOGGER.debug(f"object navbar_list_parent.html | template_vars:" f"\n{template_vars}") + return render_jinja_template("navbar_list_parent.html", self.jinja_env, template_vars) + + def create_sample_parent_html(self, navbar, footer): + """ + Generates a page listing all the project samples with links + to individual sample pages + :param str navbar: HTML to be included as the navbar in the main summary page + :param str footer: HTML to be included as the footer + :return str: Rendered parent samples HTML file + """ + if not os.path.exists(self.pipeline_reports): + os.makedirs(self.pipeline_reports) + pages = [] + labels = [] + for sample in self.prj.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + sample_dir = self.pipeline_reports + + # Confirm sample directory exists, then build page + if os.path.exists(sample_dir): + page_path = os.path.join( + self.pipeline_reports, + f"{sample_name}.html".replace(" ", "_").lower(), + ) + page_relpath = os.path.relpath(page_path, self.pipeline_reports) + pages.append(page_relpath) + labels.append(sample_name) + + template_vars = dict( + navbar=navbar, footer=footer, labels=labels, pages=pages, header="Samples" + ) + _LOGGER.debug(f"sample navbar_list_parent.html | template_vars:" f"\n{template_vars}") + return render_jinja_template("navbar_list_parent.html", self.jinja_env, template_vars) + + def create_navbar(self, navbar_links, index_html_relpath): + """ + Creates the navbar using the provided links + + :param str navbar_links: HTML list of links to be inserted into a navbar + :return str: navbar HTML + """ + template_vars = dict(navbar_links=navbar_links, index_html=index_html_relpath) + return render_jinja_template("navbar.html", self.jinja_env, template_vars) + + def create_footer(self): + """ + Renders the footer from the templates directory + + :return str: footer HTML + """ + return render_jinja_template("footer.html", self.jinja_env, dict(version=v)) + + def create_navbar_links(self, wd=None, context=None, project_index_html_relpath=None): + """ + Return a string containing the navbar prebuilt html. + + Generates links to each page relative to the directory of interest + (wd arg) or uses the provided context to create the paths (context arg) + + :param path wd: the working directory of the current HTML page being + generated, enables navbar links relative to page + :param list[str] context: the context the links will be used in. + The sequence of directories to be prepended to the HTML file in + the resulting navbar + :return str: navbar links as HTML-formatted string + """ + # determine paths + if wd is None and context is None: + raise ValueError( + "Either 'wd' (path the links should be relative to) or " + "'context' (the context for the links) has to be provided." + ) + status_relpath = _make_relpath( + file_name=os.path.join(self.pipeline_reports, "status.html"), + wd=wd, + context=context, + ) + objects_relpath = _make_relpath( + file_name=os.path.join(self.pipeline_reports, "objects.html"), + wd=wd, + context=context, + ) + samples_relpath = _make_relpath( + file_name=os.path.join(self.pipeline_reports, "samples.html"), + wd=wd, + context=context, + ) + # determine the outputs IDs by type + obj_result_ids = self.get_nonhighlighted_results(OBJECT_TYPES) + dropdown_keys_objects = None + dropdown_relpaths_objects = None + sample_names = None + if len(obj_result_ids) > 0: + # If the number of objects is 20 or less, use a drop-down menu + if len(obj_result_ids) <= 20: + ( + dropdown_relpaths_objects, + dropdown_keys_objects, + ) = self._get_navbar_dropdown_data_objects( + objs=obj_result_ids, wd=wd, context=context + ) + else: + dropdown_relpaths_objects = objects_relpath + if self.prj.record_count <= 20: + ( + dropdown_relpaths_samples, + sample_names, + ) = self._get_navbar_dropdown_data_samples(wd=wd, context=context) + else: + # Create a menu link to the samples parent page + dropdown_relpaths_samples = samples_relpath + template_vars = dict( + status_html_page=status_relpath, + status_page_name="Status", + dropdown_keys_objects=dropdown_keys_objects, + objects_page_name="Objects", + samples_page_name="Samples", + objects_html_page=dropdown_relpaths_objects, + samples_html_page=dropdown_relpaths_samples, + menu_name_objects="Objects", + menu_name_samples="Samples", + sample_names=sample_names, + all_samples=samples_relpath, + all_objects=objects_relpath, + sample_reports_parent=None, + project_report=project_index_html_relpath, + ) + _LOGGER.debug(f"navbar_links.html | template_vars:\n{template_vars}") + return render_jinja_template("navbar_links.html", self.jinja_env, template_vars) + + def create_object_htmls(self, navbar, footer): + """ + Generates a page for an individual object type with all of its + plots from each sample + + :param str navbar: HTML to be included as the navbar in the main summary page + :param str footer: HTML to be included as the footer + """ + file_results = self.get_nonhighlighted_results(["file"]) + image_results = self.get_nonhighlighted_results(["image"]) + + if not os.path.exists(self.pipeline_reports): + os.makedirs(self.pipeline_reports) + for file_result in file_results: + links = [] + html_page_path = os.path.join(self.pipeline_reports, f"{file_result}.html".lower()) + + for sample in self.prj.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + sample_result = fetch_pipeline_results( + project=self.prj, + pipeline_name=self.pipeline_name, + sample_name=sample_name, + pipeline_type=pipeline_type, + ) + if file_result not in sample_result: + pass + else: + try: + links.append( + [ + sample_name, + os.path.relpath( + sample_result[file_result]["path"], self.pipeline_reports + ), + ] + ) + except: + links.append(["LinkPathNotFound"]) + else: + link_desc = ( + self.prj.result_schemas[file_result]["description"] + if "description" in self.prj.result_schemas[file_result] + else "No description in schema" + ) + template_vars = dict( + navbar=navbar, + footer=footer, + name=file_result, + figures=[], + links=links, + desc=link_desc, + ) + save_html( + html_page_path, + render_jinja_template("object.html", self.jinja_env, args=template_vars), + ) + + for image_result in image_results: + html_page_path = os.path.join(self.pipeline_reports, f"{image_result}.html".lower()) + figures = [] + + for sample in self.prj.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + sample_result = fetch_pipeline_results( + project=self.prj, + pipeline_name=self.pipeline_name, + sample_name=sample_name, + pipeline_type=pipeline_type, + ) + if image_result not in sample_result: + pass + else: + try: + figures.append( + [ + os.path.relpath( + sample_result[image_result]["path"], self.pipeline_reports + ), + sample_name, + os.path.relpath( + sample_result[image_result]["thumbnail_path"], + self.pipeline_reports, + ), + ] + ) + except: + figures.append(["FigurePathNotFound"]) + else: + img_desc = ( + self.prj.result_schemas[image_result]["description"] + if "description" in self.prj.result_schemas[image_result] + else "No description in schema" + ) + template_vars = dict( + navbar=navbar, + footer=footer, + name=image_result, + figures=figures, + links=[], + desc=img_desc, + ) + _LOGGER.debug(f"object.html | template_vars:\n{template_vars}") + save_html( + html_page_path, + render_jinja_template("object.html", self.jinja_env, args=template_vars), + ) + + def create_sample_html(self, sample_stats, navbar, footer, sample_name, pipeline_type): + """ + Produce an HTML page containing all of a sample's objects + and the sample summary statistics + + :param str sample_name: the name of the current sample + :param dict sample_stats: pipeline run statistics for the current sample + :param str navbar: HTML to be included as the navbar in the main summary page + :param str footer: HTML to be included as the footer + :param str pipeline_type: pipeline_type, 'project' or 'sample' + :return str: path to the produced HTML page + """ + if not os.path.exists(self.pipeline_reports): + os.makedirs(self.pipeline_reports) + html_page = os.path.join(self.pipeline_reports, f"{sample_name}.html".lower()) + + flag = self.prj.get_status(sample_name=sample_name, pipeline_type=pipeline_type) + if not flag: + button_class = "btn btn-secondary" + flag = "Missing" + else: + try: + flag_dict = BUTTON_APPEARANCE_BY_FLAG[flag] + except KeyError: + button_class = "btn btn-secondary" + flag = "Unknown" + else: + button_class = flag_dict["button_class"] + flag = flag_dict["flag"] + highlighted_results = fetch_pipeline_results( + project=self.prj, + pipeline_name=self.prj.pipeline_name, + sample_name=sample_name, + inclusion_fun=lambda x: x == "file", + highlighted=True, + pipeline_type=pipeline_type, + ) + + for k in highlighted_results.keys(): + highlighted_results[k]["path"] = os.path.relpath( + highlighted_results[k]["path"], self.pipeline_reports + ) + + links = [] + file_results = fetch_pipeline_results( + project=self.prj, + pipeline_name=self.pipeline_name, + sample_name=sample_name, + inclusion_fun=lambda x: x == "file", + pipeline_type=pipeline_type, + ) + for result_id, result in file_results.items(): + desc = ( + self.schema[result_id]["description"] + if "description" in self.schema[result_id] + else "" + ) + links.append( + [ + f"{result['title']}: {desc}", + os.path.relpath(result["path"], self.pipeline_reports), + ] + ) + image_results = fetch_pipeline_results( + project=self.prj, + pipeline_name=self.pipeline_name, + sample_name=sample_name, + inclusion_fun=lambda x: x == "image", + pipeline_type=pipeline_type, + ) + figures = [] + for result_id, result in image_results.items(): + figures.append( + [ + os.path.relpath(result["path"], self.pipeline_reports), + result["title"], + os.path.relpath(result["thumbnail_path"], self.pipeline_reports), + ] + ) + + template_vars = dict( + report_class="Sample", + navbar=navbar, + footer=footer, + sample_name=sample_name, + links=links, + figures=figures, + button_class=button_class, + sample_stats=sample_stats, + flag=flag, + highlighted_results=highlighted_results, + pipeline_name=self.pipeline_name, + amendments="", + ) + _LOGGER.debug(f"sample.html | template_vars:\n{template_vars}") + save_html(html_page, render_jinja_template("sample.html", self.jinja_env, template_vars)) + return html_page + + def create_status_html(self, status_table, navbar, footer): + """ + Generates a page listing all the samples, their run status, their + log file, and the total runtime if completed. + + :param str navbar: HTML to be included as the navbar in the main summary page + :param str footer: HTML to be included as the footer + :return str: rendered status HTML file + """ + _LOGGER.debug("Building status page...") + template_vars = dict(status_table=status_table, navbar=navbar, footer=footer) + _LOGGER.debug(f"status.html | template_vars:\n{template_vars}") + return render_jinja_template("status.html", self.jinja_env, template_vars) + + def create_index_html(self, navbar, footer): + """ + Generate an index.html style project home page w/ sample summary + statistics + + :param str navbar: HTML to be included as the navbar in the main + summary page + :param str footer: HTML to be included as the footer + """ + # set default encoding when running in python2 + if sys.version[0] == "2": + from importlib import reload + + reload(sys) + sys.setdefaultencoding("utf-8") + _LOGGER.info(f"Building index page for pipeline: {self.pipeline_name}") + + # Add stats_summary.tsv button link + stats_file_path = get_file_for_project( + prj=self.prj, + pipeline_name=self.prj.pipeline_name, + appendix="stats_summary.tsv", + reportdir=self.reports_dir, + ) + stats_file_path = ( + os.path.relpath(stats_file_path, self.pipeline_reports) + if os.path.exists(stats_file_path) + else None + ) + + # Add objects_summary.yaml button link + objs_file_path = get_file_for_project( + prj=self.prj, + pipeline_name=self.prj.pipeline_name, + appendix="objs_summary.yaml", + reportdir=self.reports_dir, + ) + objs_file_path = ( + os.path.relpath(objs_file_path, self.pipeline_reports, None, self.reports_dir) + if os.path.exists(objs_file_path) + else None + ) + + # Add stats summary table to index page and produce individual + # sample pages + # Produce table rows + table_row_data = [] + _LOGGER.info(" * Creating sample pages") + for sample in self.prj.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + sample_stat_results = fetch_pipeline_results( + project=self.prj, + pipeline_name=self.pipeline_name, + sample_name=sample_name, + inclusion_fun=None, + casting_fun=str, + pipeline_type=pipeline_type, + ) + sample_html = self.create_sample_html( + sample_stat_results, navbar, footer, sample_name, pipeline_type + ) + rel_sample_html = os.path.relpath(sample_html, self.pipeline_reports) + # treat sample_name column differently - will need to provide + # a link to the sample page + table_cell_data = [[rel_sample_html, sample_name]] + table_cell_data.append(list(sample_stat_results.values())) + table_row_data.append(table_cell_data) + # Create parent samples page with links to each sample + save_html( + path=os.path.join(self.pipeline_reports, "samples.html"), + template=self.create_sample_parent_html(navbar, footer), + ) + _LOGGER.info(" * Creating object pages") + # Create objects pages + self.create_object_htmls(navbar, footer) + + # Create parent objects page with links to each object type + save_html( + path=os.path.join(self.pipeline_reports, "objects.html"), + template=self.create_object_parent_html(navbar, footer), + ) + # Create status page with each sample's status listed + status_tab = create_status_table( + pipeline_name=self.prj.pipeline_name, + project=self.prj, + pipeline_reports_dir=self.pipeline_reports, + ) + save_html( + path=os.path.join(self.pipeline_reports, "status.html"), + template=self.create_status_html(status_tab, navbar, footer), + ) + # Complete and close HTML file + columns = ["Record Identifiers", "Results"] + template_vars = dict( + navbar=navbar, + stats_file_path=stats_file_path, + objs_file_path=objs_file_path, + columns=columns, + columns_json=dumps(columns), + table_row_data=table_row_data, + project_name=self.prj.project_name, + pipeline_name=self.prj.pipeline_name, + stats_json=self._stats_to_json_str(), + footer=footer, + amendments="", + ) + _LOGGER.debug(f"index.html | template_vars:\n{template_vars}") + save_html( + self.index_html_path, + render_jinja_template("index.html", self.jinja_env, template_vars), + ) + + def get_nonhighlighted_results(self, types): + """ + Get a list of non-highlighted results in the schema + + :param list[str] types: types to narrow down the results + :return list[str]: result ID that are of the requested type and + are not highlighted + """ + results = [] + for k, v in self.schema["samples"].items(): + if self.schema["samples"][k]["type"] in types: + if "highlight" not in self.schema["samples"][k].keys(): + results.append(k) + # intentionally "== False" to exclude "falsy" values + elif self.schema["samples"][k]["highlight"] is False: + results.append(k) + for k, v in self.schema["project"].items(): + if self.schema["project"][k]["type"] in types: + if "highlight" not in self.schema["project"][k].keys(): + results.append(k) + # intentionally "== False" to exclude "falsy" values + elif self.schema["project"][k]["highlight"] is False: + results.append(k) + return results + + def _stats_to_json_str(self): + results = {} + for sample in self.prj.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + results[sample_name] = fetch_pipeline_results( + project=self.prj, + sample_name=sample_name, + pipeline_name=self.prj.pipeline_name, + inclusion_fun=lambda x: x not in OBJECT_TYPES, + casting_fun=str, + pipeline_type=pipeline_type, + ) + return dumps(results) + + def _get_navbar_dropdown_data_objects(self, objs, wd, context): + if objs is None or len(objs) == 0: + return None, None + relpaths = [] + displayable_ids = [] + for obj_id in objs: + displayable_ids.append(obj_id.replace("_", " ")) + page_name = os.path.join( + self.pipeline_reports, (obj_id + ".html").replace(" ", "_").lower() + ) + relpaths.append(_make_relpath(page_name, wd, context)) + return relpaths, displayable_ids + + def _get_navbar_dropdown_data_samples(self, wd, context): + relpaths = [] + sample_names = [] + for sample in self.prj.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + page_name = os.path.join( + self.pipeline_reports, + f"{sample_name}.html".replace(" ", "_").lower(), + ) + relpaths.append(_make_relpath(page_name, wd, context)) + sample_names.append(sample_name) + + return relpaths, sample_names + + +def render_jinja_template(name, jinja_env, args=dict()): + """ + Render template in the specified jinja environment using the provided args + + :param str name: name of the template + :param dict args: arguments to pass to the template + :param jinja2.Environment jinja_env: the initialized environment to use in + this the looper HTML reports context + :return str: rendered template + """ + assert isinstance(args, dict), "args has to be a dict" + template = jinja_env.get_template(name) + return template.render(**args) + + +def save_html(path, template): + """ + Save rendered template as an HTML file + + :param str path: the desired location for the file to be produced + :param str template: the template or just string + """ + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + try: + with open(path, "w") as f: + f.write(template) + except IOError: + _LOGGER.error("Could not write the HTML file: {}".format(path)) + + +def get_jinja_env(templates_dirname=None): + """ + Create jinja environment with the provided path to the templates directory + + :param str templates_dirname: path to the templates directory + :return jinja2.Environment: jinja environment + """ + if templates_dirname is None: + file_dir = os.path.dirname(os.path.realpath(__file__)) + templates_dirname = os.path.join(file_dir, TEMPLATES_DIRNAME) + _LOGGER.debug("Using templates dir: " + templates_dirname) + return jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dirname)) + + +def _get_file_for_sample(prj, sample_name, appendix, pipeline_name=None, basename=False): + """ + Safely looks for files matching the appendix in the specified + location for the sample + + :param str sample_name: name of the sample that the file name + should be found for + :param str appendix: the ending pecific for the file + :param bool basename: whether to return basename only + :return str: the name of the matched file + """ + fp = os.path.join(prj.results_folder, sample_name) + prepend_name = "" + if pipeline_name: + prepend_name += pipeline_name + if hasattr(prj, AMENDMENTS_KEY) and getattr(prj, AMENDMENTS_KEY): + prepend_name += f"_{'_'.join(getattr(prj, AMENDMENTS_KEY))}" + prepend_name = prepend_name + "_" if prepend_name else "" + fp = os.path.join(fp, f"{prepend_name}{appendix}") + if os.path.exists(fp): + return os.path.basename(fp) if basename else fp + raise FileNotFoundError(fp) + + +def _get_relpath_to_file(file_name, sample_name, location, relative_to): + """ + Safely gets the relative path for the file for the specified sample + + :param str file_name: name of the file + :param str sample_name: name of the sample that the file path + should be found for + :param str location: where to look for the file + :param str relative_to: path the result path should be relative to + :return str: a path to the file + """ + abs_file_path = os.path.join(location, sample_name, file_name) + rel_file_path = os.path.relpath(abs_file_path, relative_to) + if file_name is None or not os.path.exists(abs_file_path): + return None + return rel_file_path + + +def _make_relpath(file_name, wd, context=None): + """ + Create a path relative to the context. This function introduces the + flexibility to the navbar links creation, which the can be used outside + of the native looper summary pages. + + :param str file_name: the path to make relative + :param str wd: the dir the path should be relative to + :param list[str] context: the context the links will be used in. The + sequence of directories to be prepended to the HTML + file in the resulting navbar + :return str: relative path + """ + relpath = os.path.relpath(file_name, wd) + return relpath if not context else os.path.join(os.path.join(*context), relpath) + + +def _read_csv_encodings(path, encodings=["utf-8", "ascii"], **kwargs): + """ + Try to read file with the provided encodings + + :param str path: path to file + :param list encodings: list of encodings to try + """ + idx = 0 + while idx < len(encodings): + e = encodings[idx] + try: + t = _pd.read_csv(path, encoding=e, **kwargs) + return t + except UnicodeDecodeError: + pass + idx = idx + 1 + _LOGGER.warning(f"Could not read the log file '{path}' with encodings '{encodings}'") + + +def _read_tsv_to_json(path): + """ + Read a tsv file to a JSON formatted string + + :param path: to file path + :return str: JSON formatted string + """ + assert os.path.exists(path), "The file '{}' does not exist".format(path) + _LOGGER.debug("Reading TSV from '{}'".format(path)) + df = _pd.read_csv(path, sep="\t", index_col=False, header=None) + return df.to_json() + + +def fetch_pipeline_results( + project, + pipeline_name, + sample_name=None, + inclusion_fun=None, + casting_fun=None, + highlighted=False, + pipeline_type=None, +): + """ + Get the specific pipeline results for sample based on inclusion function + + :param looper.Project project: project to get the results for + :param str pipeline_name: pipeline ID + :param str sample_name: sample ID + :param callable(str) inclusion_fun: a function that determines whether the + result should be returned based on it's type. Example input that the + function will be fed with is: 'image' or 'integer' + :param callable(str) casting_fun: a function that will be used to cast the + each of the results to a proper type before returning, e.g int, str + :param bool highlighted: return the highlighted or regular results + :param str pipeline_type: pipeline_type, 'project' or 'sample' + :return dict: selected pipeline results + """ + pass_all_fun = lambda x: x + inclusion_fun = inclusion_fun or pass_all_fun + casting_fun = casting_fun or pass_all_fun + psm = project + # exclude object-like results from the stats results mapping + rep_data = psm.retrieve(sample_name=sample_name, pipeline_type=pipeline_type) + results = { + k: casting_fun(v) + for k, v in rep_data.items() + if k in psm.result_schemas and inclusion_fun(psm.result_schemas[k]["type"]) + } + if highlighted: + return {k: v for k, v in results.items() if k in psm.highlighted_results} + return {k: v for k, v in results.items() if k not in psm.highlighted_results} + + +def uniqify(seq): + """Fast way to uniqify while preserving input order.""" + # http://stackoverflow.com/questions/480214/ + seen = set() + seen_add = seen.add + return [x for x in seq if not (x in seen or seen_add(x))] + + +def create_status_table(project, pipeline_name, pipeline_reports_dir): + """ + Creates status table, the core of the status page. + + :return str: rendered status HTML file + """ + + def _rgb2hex(r, g, b): + return "#{:02x}{:02x}{:02x}".format(r, g, b) + + def _warn(what, e, sn): + _LOGGER.warning( + f"Caught exception: {e}\n" + f"Could not determine {what} for sample: {sn}. " + f"Not reported or pipestat status schema is faulty." + ) + + log_paths = [] + log_link_names = [] + sample_paths = [] + sample_names = [] + statuses = [] + status_styles = [] + times = [] + mems = [] + status_descs = [] + for sample in project.backend.get_samples(): + sample_name = sample[0] + pipeline_type = sample[1] + psm = project + sample_names.append(sample_name) + # status and status style + try: + status = psm.get_status(sample_name=sample_name, pipeline_type=pipeline_type) + statuses.append(status) + status_metadata = psm.status_schema[status] + status_styles.append(_rgb2hex(*status_metadata["color"])) + status_descs.append(status_metadata["description"]) + except Exception as e: + _warn("status", e, sample_name) + statuses.append(NO_DATA_PLACEHOLDER) + status_styles.append(NO_DATA_PLACEHOLDER) + status_descs.append(NO_DATA_PLACEHOLDER) + sample_paths.append(f"{sample_name}.html".replace(" ", "_").lower()) + # log file path + try: + log = psm.retrieve(result_identifier="log")["path"] + assert os.path.exists(log), FileNotFoundError(f"Not found: {log}") + log_link_names.append(os.path.basename(log)) + log_paths.append(os.path.relpath(log, pipeline_reports_dir)) + except Exception as e: + _warn("log", e, sample) + log_link_names.append(NO_DATA_PLACEHOLDER) + log_paths.append("") + # runtime and peak mem + try: + profile = psm.retrieve(result_identifier="profile")["path"] + assert os.path.exists(profile), FileNotFoundError(f"Not found: {profile}") + df = _pd.read_csv(profile, sep="\t", comment="#", names=PROFILE_COLNAMES) + df["runtime"] = _pd.to_timedelta(df["runtime"]) + times.append(_get_runtime(df)) + mems.append(_get_maxmem(df)) + except Exception as e: + _warn("profile", e, sample) + times.append(NO_DATA_PLACEHOLDER) + mems.append(NO_DATA_PLACEHOLDER) + + template_vars = dict( + sample_names=sample_names, + log_paths=log_paths, + status_styles=status_styles, + statuses=statuses, + times=times, + mems=mems, + sample_paths=sample_paths, + log_link_names=log_link_names, + status_descs=status_descs, + ) + _LOGGER.debug(f"status_table.html | template_vars:\n{template_vars}") + return render_jinja_template("status_table.html", get_jinja_env(), template_vars) + + +def _get_maxmem(profile): + """ + Get current peak memory + + :param pandas.core.frame.DataFrame profile: a data frame representing + the current profile.tsv for a sample + :return str: max memory + """ + return f"{str(max(profile['mem']) if not profile['mem'].empty else 0)} GB" + + +def _get_runtime(profile_df): + """ + Collect the unique and last duplicated runtimes, sum them and then + return in str format + + :param pandas.core.frame.DataFrame profile_df: a data frame representing + the current profile.tsv for a sample + :return str: sum of runtimes + """ + unique_df = profile_df[~profile_df.duplicated("cid", keep="last").values] + return str( + timedelta(seconds=sum(unique_df["runtime"].apply(lambda x: x.total_seconds()))) + ).split(".")[0] + + +def get_file_for_project(prj, pipeline_name, appendix=None, directory=None, reportdir=None): + """ + Create a path to the file for the current project. + Takes the possibility of amendment being activated at the time + + Format of the output path: + {output_dir}/{directory}/{p.name}_{pipeline_name}_{active_amendments}_{appendix} + + :param looper.Project prj: project object + :param str pipeline_name: name of the pipeline to get the file for + :param str appendix: the appendix of the file to create the path for, + like 'objs_summary.tsv' for objects summary file + :return str: path to the file + """ + if prj["project_name"] is None: + fp = os.path.join(reportdir, directory or "", f"NO_PROJECT_NAME_{pipeline_name}") + else: + fp = os.path.join(reportdir, directory or "", f"{prj['project_name']}_{pipeline_name}") + + if hasattr(prj, "amendments") and getattr(prj, "amendments"): + fp += f"_{'_'.join(prj.amendments)}" + fp += f"_{appendix}" + return fp diff --git a/pipestat/jinja_templates/footer.html b/pipestat/jinja_templates/footer.html new file mode 100644 index 00000000..7b3e0c5e --- /dev/null +++ b/pipestat/jinja_templates/footer.html @@ -0,0 +1,3 @@ +
+Generated with looper v{{ version }} +© 2018-2021 Sheffield Computational Biology Lab diff --git a/pipestat/jinja_templates/footer_index.html b/pipestat/jinja_templates/footer_index.html new file mode 100644 index 00000000..b2d5fc65 --- /dev/null +++ b/pipestat/jinja_templates/footer_index.html @@ -0,0 +1,76 @@ + + + + + diff --git a/pipestat/jinja_templates/head.html b/pipestat/jinja_templates/head.html new file mode 100644 index 00000000..0f20ab19 --- /dev/null +++ b/pipestat/jinja_templates/head.html @@ -0,0 +1,150 @@ + + + + + + + + + diff --git a/pipestat/jinja_templates/index.html b/pipestat/jinja_templates/index.html new file mode 100644 index 00000000..d519663c --- /dev/null +++ b/pipestat/jinja_templates/index.html @@ -0,0 +1,309 @@ + + + + + {% include "head.html" %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Looper: {{ project_name }} summary + + +{{ navbar }} + + +
+
+
+

Looper summary

+

Project name: {{ project_name }}

+

Pipeline name: {{ pipeline_name }}

+ {% if amendments is not none %} +

Amendments: {{ amendments|join(', ') }}

+ {% endif %} +
+
+ +
+
+
+
+ + + + {% for column in columns %} + + {% endfor %} + + + + {% for table_cell_data in table_row_data %} + + {% for values in table_cell_data %} + {% if values is not string %} + + {% else %} + {% if values|length > 60 %} + + {% else %} + + {% endif %} + {% endif %} + {% endfor %} + + {% endfor %} + +
{{ column }}
+ {{ values[1] }} + {{ values|truncate(60,True) }} {{ values }}
+
+
+
+
+
+
    +
  • Plot a column
  • +
+
+
+
+
+
+
+
+
+
+
+ +
+
+ {{ footer }} + {% include "footer_index.html" %} +
+ + diff --git a/pipestat/jinja_templates/logo.html b/pipestat/jinja_templates/logo.html new file mode 100644 index 00000000..2932c944 --- /dev/null +++ b/pipestat/jinja_templates/logo.html @@ -0,0 +1 @@ +LOOPER diff --git a/pipestat/jinja_templates/navbar.html b/pipestat/jinja_templates/navbar.html new file mode 100644 index 00000000..65265f09 --- /dev/null +++ b/pipestat/jinja_templates/navbar.html @@ -0,0 +1,16 @@ +
+ diff --git a/pipestat/jinja_templates/navbar_links.html b/pipestat/jinja_templates/navbar_links.html new file mode 100644 index 00000000..de0b1189 --- /dev/null +++ b/pipestat/jinja_templates/navbar_links.html @@ -0,0 +1,62 @@ + {% if project_report is not none %} + + {% endif %} + {% if sample_reports_parent is not none %} + + {% endif %} + {% if status_page_name is not none %} + + + {% endif %} + {% if dropdown_keys_objects is none %} + + {% if objects_html_page is not none %} + + {% endif %} + {% else %} + + + {% endif %} + {% if sample_names is none %} + + {% if samples_html_page is not none %} + + {% endif %} + {% else %} + + + {% endif %} diff --git a/pipestat/jinja_templates/navbar_list_parent.html b/pipestat/jinja_templates/navbar_list_parent.html new file mode 100644 index 00000000..bc41dce1 --- /dev/null +++ b/pipestat/jinja_templates/navbar_list_parent.html @@ -0,0 +1,24 @@ + + + + {% include "head.html" %} + Looper: {{ header }} + + {{ navbar }} + +
+
+

{{ header }}

+
+ {% for label in labels %} + {% set i = loop.index - 1 %} + {{ label }} + {% endfor %} +
+
+ +
+
+ {{ footer }} +
+ diff --git a/pipestat/jinja_templates/object.html b/pipestat/jinja_templates/object.html new file mode 100644 index 00000000..41a6c115 --- /dev/null +++ b/pipestat/jinja_templates/object.html @@ -0,0 +1,50 @@ + + + + + {% include "head.html" %} + Looper: {{ name }} objects + +{{ navbar }} + + +
+
+ {% if links[0] is defined or figures[0] is defined %} +

{{ name }}: {{ desc }}

+ {% else %} +

No objects to display for: {{ name }} +

+ {% endif %} + + {% if links[0] is defined %} +

    +
  • Get a reported object for sample
  • + {% for link in links %} + {{ link[0] }} + {% endfor %} +
+ {% endif %} + {% if figures[0] is defined %} +
+ {% for figure in figures %} +
+ + + + +
Fig.{{ loop.index }} {{ figure[1] }} +
+
+
+ {% endfor %} +
+ {% endif %} +
+ +
+
+ {{ footer }} +
+ + diff --git a/pipestat/jinja_templates/project_object.html b/pipestat/jinja_templates/project_object.html new file mode 100644 index 00000000..5d9e4873 --- /dev/null +++ b/pipestat/jinja_templates/project_object.html @@ -0,0 +1,49 @@ +{% if links[0] is defined or figures[0] is defined %} +
+
+

Looper project objects

+
+
+{% endif %} +{% if figures[0] is defined %} +
+
+
Figures
+
+
+
+
+ +
+ {% for figure in figures %} +
+ + + + +
Fig.{{ loop.index }} {{ figure[1] }} +
+
+
+ {% endfor %} +
+
+
+{% endif %} +{% if links[0] is defined %} +
+
+
Links
+
+
+
+
+ +
+ {% for link in links %} + {{ link[0] }} + {% endfor %} +
+
+
+{% endif %} diff --git a/pipestat/jinja_templates/sample.html b/pipestat/jinja_templates/sample.html new file mode 100644 index 00000000..eca4b25f --- /dev/null +++ b/pipestat/jinja_templates/sample.html @@ -0,0 +1,101 @@ + + + + + {% include "head.html" %} + + Looper: {{ sample_name }} + +{{ navbar }} + + +
+
+

{{ report_class }} name: {{ sample_name }}

+

Pipeline name: {{ pipeline_name }}

+ {% if amendments is not none %} +

Amendments: {{ amendments|join(', ') }}

+ {% endif %} +
+
+

+ +

+
    +
  • + Get files produced by the pipeline run +
  • + {% for hr_id, hr in highlighted_results.items() %} + {{ hr["title"] + }} + {% endfor %} +
+
+
+
+

Reported statistics

+
+ + + {% for row_name, row_value in sample_stats.items() %} + + + {% if row_value|length > 60 %} + + {% else %} + + {% endif %} + + {% endfor %} + +
{{ row_name }}{{ row_value|truncate(60,True) }} {{ row_value }}
+
+
+
+ {% if links[0] is defined or figures[0] is defined %} +
+

Reported objects

+ {% endif %} + + {% if links[0] is defined %} +
Links
+
    +
  • Get a reported object for sample +
  • + {% for link in links %} + {{ link[0] }} + {% endfor %} +
+ {% endif %} + {% if figures[0] is defined %} +
Figures
+ {% for figure in figures %} +
+ + + + +
Fig.{{ loop.index }} {{ figure[1] }} +
+
+
+ {% endfor %} + {% endif %} +
+
+ +
+
+ {{ footer }} +
+ + diff --git a/pipestat/jinja_templates/status.html b/pipestat/jinja_templates/status.html new file mode 100644 index 00000000..d213c329 --- /dev/null +++ b/pipestat/jinja_templates/status.html @@ -0,0 +1,20 @@ + + + + {% include "head.html" %} + Looper: status + + {{ navbar }} + +
+
+

Status by sample

+
+ {{ status_table }} +
+ +
+
+ {{ footer }} +
+ diff --git a/pipestat/jinja_templates/status_table.html b/pipestat/jinja_templates/status_table.html new file mode 100644 index 00000000..476e4299 --- /dev/null +++ b/pipestat/jinja_templates/status_table.html @@ -0,0 +1,50 @@ +
+
+
+ + + + + + + + + + + + {% for element in sample_paths %} + {% set i = loop.index - 1 %} + + + + + + + + {% endfor %} + +
Sample nameStatusLog fileRuntimePeak memory use
+ {{ sample_names[i] }} + + {{ statuses[i] }} + + {{ log_link_names[i] }} + + {{ times[i] }} + + {{ mems[i] }} +
+
+
+
+ + + + + + + diff --git a/pipestat/jinja_templates/status_table_no_links.html b/pipestat/jinja_templates/status_table_no_links.html new file mode 100644 index 00000000..a0c77f1a --- /dev/null +++ b/pipestat/jinja_templates/status_table_no_links.html @@ -0,0 +1,58 @@ +
+ + + + + + + + + + + {% for element in sample_link_names %} + {% set i = loop.index - 1 %} + + + + + + + {% endfor %} + + +
Sample nameStatusRuntimePeak memory use
+ {{ sample_link_names[i] }} + + {{ flags[i] }} + + {{ times[i] }} + + {{ mems[i] }} +
+
+ + + + + + diff --git a/pipestat/parsed_schema.py b/pipestat/parsed_schema.py index 1d5904fd..419683a8 100644 --- a/pipestat/parsed_schema.py +++ b/pipestat/parsed_schema.py @@ -7,7 +7,7 @@ from pydantic import create_model # from sqlalchemy.dialects.postgresql import ARRAY -from sqlalchemy import Column +from sqlalchemy import Column, null from sqlalchemy.dialects.postgresql import JSONB from sqlmodel import Field, SQLModel from .const import * @@ -91,9 +91,11 @@ def __init__(self, data: Union[Dict[str, Any], Path, str]) -> None: self._status_data = _safe_pop_one_mapping(key="status", data=data, info_name="status") if data: - raise SchemaError( - f"Extra top-level key(s) in given schema data: {', '.join(data.keys())}" + _LOGGER.info( + "Top-Level arguments found in output schema. They will be assigned to project-level." ) + extra_project_data = _recursively_replace_custom_types(data) + self._project_level_data.update(extra_project_data) # Check that no reserved keywords were used as data items. resv_kwds = {"id", SAMPLE_NAME} @@ -112,6 +114,23 @@ def __init__(self, data: Union[Dict[str, Any], Path, str]) -> None: f"Overlap between project- and sample-level keys: {', '.join(project_sample_overlap)}" ) + def __str__(self): + """ + Generate string representation of the object + + :return str: string representation of the object + """ + res = f"{self.__class__.__name__} ({self._pipeline_name})" + if self._project_level_data is not None: + res += f"\n Project Level Data:" + for k, v in self._project_level_data.items(): + res += f"\n - {k} : {v}" + if self._sample_level_data is not None: + res += f"\n Sample Level Data:" + for k, v in self._sample_level_data.items(): + res += f"\n - {k} : {v}" + return res + @property def pipeline_name(self): return self._pipeline_name @@ -158,7 +177,7 @@ def _make_field_definitions(self, data: Dict[str, Any], require_type: bool): if data_type == CLASSES_BY_TYPE["object"] or data_type == CLASSES_BY_TYPE["array"]: defs[name] = ( data_type, - Field(sa_column=Column(JSONB), default={}), + Field(sa_column=Column(JSONB), default=null()), ) else: defs[name] = ( @@ -187,6 +206,7 @@ def build_project_model(self): field_defs = self._make_field_definitions(data, require_type=True) field_defs = self._add_status_field(field_defs) field_defs = self._add_sample_name_field(field_defs) + field_defs = self._add_project_name_field(field_defs) field_defs = self._add_id_field(field_defs) if not field_defs: return None diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py index 072759c2..68b93187 100644 --- a/pipestat/pipestat.py +++ b/pipestat/pipestat.py @@ -11,6 +11,9 @@ from .helpers import * from .parsed_schema import ParsedSchema +from .html_reports_pipestat import HTMLReportBuilder, fetch_pipeline_results + + _LOGGER = getLogger(PKG_NAME) @@ -438,6 +441,22 @@ def set_status( pipeline_type = pipeline_type or self[PIPELINE_TYPE] self.backend.set_status(status_identifier, sample_name, pipeline_type) + @require_backend + def summarize( + self, + amendment: Optional[str] = None, + ) -> None: + """ + Builds a browsable html report for reported results. + :param Iterable[str] amendment: name indicating amendment to use, optional + :return str: report_path + + """ + + html_report_builder = HTMLReportBuilder(prj=self) + report_path = html_report_builder(pipeline_name=self.pipeline_name, amendment=amendment) + return report_path + def _get_attr(self, attr: str) -> Any: """ Safely get the name of the selected attribute of this object diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index cbc754f5..73654d31 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -9,3 +9,4 @@ yacman>=0.9.1 PyYAML pandas eido + diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 6da9fa9f..e79e9bf1 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -2,4 +2,5 @@ black coveralls pytest>=4.6.9 pytest-cov>=2.8.1 -jinja2 \ No newline at end of file +jinja2 + diff --git a/tests/conftest.py b/tests/conftest.py index 7d347e47..cf62a478 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -81,3 +81,8 @@ def custom_status_schema(): @pytest.fixture def custom_status_schema2(): return get_data_file_path("custom_status_schema_2.yaml") + + +@pytest.fixture +def output_schema_html_report(): + return get_data_file_path("output_schema_html_report.yaml") diff --git a/tests/data/output_schema_html_report.yaml b/tests/data/output_schema_html_report.yaml new file mode 100644 index 00000000..e235ed0b --- /dev/null +++ b/tests/data/output_schema_html_report.yaml @@ -0,0 +1,33 @@ +pipeline_name: default_pipeline_name +project: + number_of_things: + type: integer + description: "Number of things" + percentage_of_things: + type: number + description: "Percentage of things" + name_of_something: + type: string + description: "Name of something" + switch_value: + type: boolean + description: "Is the switch on or off" +samples: + smooth_bw: + path: "aligned_{genome}/{sample_name}_smooth.bw" + type: string + description: "A smooth bigwig file" + aligned_bam: + path: "aligned_{genome}/{sample_name}_sort.bam" + type: string + description: "A sorted, aligned BAM file" + peaks_bed: + path: "peak_calling_{genome}/{sample_name}_peaks.bed" + type: string + description: "Peaks in BED format" + output_file: + type: file + description: "This a path to the output file" + output_image: + type: image + description: "This a path to the output image" \ No newline at end of file diff --git a/tests/test_parsed_schema.py b/tests/test_parsed_schema.py index eee1c18b..c097faa3 100644 --- a/tests/test_parsed_schema.py +++ b/tests/test_parsed_schema.py @@ -149,12 +149,18 @@ def test_empty__fails_with_missing_pipeline_name(prepare_schema_from_mapping): for attr, exp in attr_exp_pairs ], ) -def test_parsed_schema__has_correct_data(prepare_schema_from_file, filename, attr_name, expected): +def test_parsed_schema__has_correct_data_and_print( + prepare_schema_from_file, filename, attr_name, expected +): data_file = get_data_file_path(filename) raw_schema = prepare_schema_from_file(data_file) schema = ParsedSchema(raw_schema) observed = getattr(schema, attr_name) assert observed == expected + try: + print(str(schema)) + except: + assert False SCHEMA_DATA_TUPLES_WITHOUT_PIPELINE_NAME = [ @@ -197,13 +203,6 @@ def test_parsed_schema__has_correct_data(prepare_schema_from_file, filename, att f"Could not find valid pipeline identifier (key '{SCHEMA_PIPELINE_NAME_KEY}') in given schema data", ) for data in SCHEMA_DATA_TUPLES_WITHOUT_PIPELINE_NAME - ] - + [ - ( - dict(data + [(SCHEMA_PIPELINE_NAME_KEY, "test_pipe"), ("extra_key", "placeholder")]), - "Extra top-level key(s) in given schema data: extra_key", - ) - for data in SCHEMA_DATA_TUPLES_WITHOUT_PIPELINE_NAME ], ) def test_insufficient_schema__raises_expected_error_and_message(schema_data, expected_message): diff --git a/tests/test_pipestat.py b/tests/test_pipestat.py index 121830f3..cad0355b 100644 --- a/tests/test_pipestat.py +++ b/tests/test_pipestat.py @@ -72,9 +72,15 @@ def test_basics( psm = PipestatManager(**args) psm.report(sample_name=rec_id, values=val, force_overwrite=True) val_name = list(val.keys())[0] + psm.set_status(status_identifier="running", sample_name=rec_id) + status = psm.get_status(sample_name=rec_id) + assert status == "running" assert val_name in psm.retrieve(sample_name=rec_id) psm.remove(sample_name=rec_id, result_identifier=val_name) if backend == "file": + psm.clear_status(sample_name=rec_id) + status = psm.get_status(sample_name=rec_id) + assert status is None with pytest.raises(PipestatDataError): psm.retrieve(sample_name=rec_id) if backend == "db": @@ -701,3 +707,73 @@ def test_manager_has_correct_status_schema_and_status_schema_source( psm = PipestatManager(schema_path=schema_file_path, **backend_data) assert psm.status_schema == exp_status_schema assert psm.status_schema_source == exp_status_schema_path + + +class TestHTMLReport: + @pytest.mark.parametrize( + ["rec_id", "val"], + [ + ("sample1", {"name_of_something": "test_name"}), + ], + ) + @pytest.mark.parametrize("backend", ["file", "db"]) + def test_basics( + self, + rec_id, + val, + config_file_path, + output_schema_html_report, + results_file_path, + backend, + ): + values_project = [ + {"sample2": {"number_of_things": 2}}, + {"sample3": {"name_of_something": "name of something string"}}, + ] + values_sample = [ + {"sample4": {"smooth_bw": "smooth_bw string"}}, + {"sample5": {"output_file": {"path": "path_string", "title": "title_string"}}}, + {"sample4": {"aligned_bam": "aligned_bam string"}}, + {"sample6": {"output_file": {"path": "path_string", "title": "title_string"}}}, + { + "sample7": { + "output_image": { + "path": "path_string", + "thumbnail_path": "path_string", + "title": "title_string", + } + } + }, + ] + with NamedTemporaryFile() as f, ContextManagerDBTesting(DB_URL): + results_file_path = f.name + args = dict(schema_path=output_schema_html_report, database_only=False) + backend_data = ( + {"config_file": config_file_path} + if backend == "db" + else {"results_file_path": results_file_path} + ) + args.update(backend_data) + psm = PipestatManager(**args) + psm.report( + sample_name=rec_id, values=val, force_overwrite=True, pipeline_type="project" + ) + psm.set_status( + sample_name=rec_id, status_identifier="completed", pipeline_type="project" + ) + for i in values_project: + for r, v in i.items(): + psm.report( + sample_name=r, values=v, force_overwrite=True, pipeline_type="project" + ) + psm.set_status( + sample_name=r, status_identifier="running", pipeline_type="project" + ) + for i in values_sample: + for r, v in i.items(): + psm.report( + sample_name=r, values=v, force_overwrite=True, pipeline_type="sample" + ) + psm.set_status(sample_name=r, status_identifier="running") + + htmlreportpath = psm.summarize(amendment="")