diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index b58acf258..5e9be8bae 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -11,25 +11,23 @@ on: jobs: build: - runs-on: ${{ matrix.os }} - env: - JDK_VERSION: ${{ matrix.jdk }} - strategy: fail-fast: false matrix: os: [ windows-latest, macOS-latest, ubuntu-latest ] jdk: [ 17 ] + runs-on: ${{ matrix.os }} + steps: - uses: actions/checkout@v2 - name: Set up JDK uses: actions/setup-java@v2 with: - java-version: '17' + java-version: ${{ matrix.jdk }} distribution: 'adopt' - name: Build with Maven - run: ./mvnw --batch-mode verify + run: ./mvnw --quiet --batch-mode verify diff --git a/.github/workflows/pages-latest.yml b/.github/workflows/pages-latest.yml deleted file mode 100644 index 9974797bd..000000000 --- a/.github/workflows/pages-latest.yml +++ /dev/null @@ -1,71 +0,0 @@ -# Simple workflow for deploying static content to GitHub Pages -name: Deploy the latest documentation to GitHub pages - -on: - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - # Runs on pushes targeting the main branch - push: - branches: [ develop ] - -jobs: - build-docs: - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 # otherwise, you will fail to push refs to dest repo - ref: develop - - - name: Set up Python 3 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - - name: Install Python dependencies - run: | - pip install sphinx - pip install sphinx-rtd-theme - - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - java-version: '17' - distribution: 'adopt' - - - name: Build LIRICAL - run: ./mvnw -Prelease -DskipTests package # We test elsewhere - - - name: Build documentation - run: | - ## Init the target folder. - # We will put all site documentation there. - DOCS_VERSION=latest - mkdir -p gh-pages/${DOCS_VERSION} - touch gh-pages/.nojekyll - - ## Copy Javadoc - # Copy aggregated Javadoc into `apidocs` folder. - # The aggregated docs are built by Maven in `package` phase. - - APIDOCS=$(pwd)/gh-pages/${DOCS_VERSION}/apidocs - printf "Copying Javadocs from %s to %s\n" $(pwd)/target/site/apidocs ${APIDOCS} - cp -r target/site/apidocs ${APIDOCS} - - ## Build the docs - # Generate the HTML pages and move the generated content into the target folder. - printf "Building the ${DOCS_VERSION} documentation\n" - cd docs/ - make html - cd .. - mv docs/_build/html/* gh-pages/${DOCS_VERSION}/ - - - - name: Deploy documentation - if: ${{ github.event_name == 'push' }} - uses: JamesIves/github-pages-deploy-action@v4.4.1 - with: - folder: gh-pages - force: false diff --git a/.github/workflows/pages-stable.yml b/.github/workflows/pages-stable.yml deleted file mode 100644 index 04e7ea1eb..000000000 --- a/.github/workflows/pages-stable.yml +++ /dev/null @@ -1,71 +0,0 @@ -# Simple workflow for deploying static content to GitHub Pages -name: Deploy the stable documentation to GitHub pages - -on: - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - # Runs on pushes targeting the main branch - push: - branches: [ master ] - -jobs: - build-docs: - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 # otherwise, you will fail to push refs to dest repo - ref: master - - - name: Set up Python 3 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - - name: Install Python dependencies - run: | - pip install sphinx - pip install sphinx-rtd-theme - - - name: Set up JDK 17 - uses: actions/setup-java@v3 - with: - java-version: '17' - distribution: 'adopt' - - - name: Build LIRICAL - run: ./mvnw -Prelease -DskipTests package # We test elsewhere - - - name: Build documentation - run: | - ## Init the target folder. - # We will put all site documentation there. - DOCS_VERSION=stable - mkdir -p gh-pages/${DOCS_VERSION} - touch gh-pages/.nojekyll - - ## Copy Javadoc - # Copy aggregated Javadoc into `apidocs` folder. - # The aggregated docs are built by Maven in `package` phase. - - APIDOCS=$(pwd)/gh-pages/${DOCS_VERSION}/apidocs - printf "Copying Javadocs from %s to %s\n" $(pwd)/target/site/apidocs ${APIDOCS} - cp -r target/site/apidocs ${APIDOCS} - - ## Build the docs - # Generate the HTML pages and move the generated content into the target folder. - printf "Building the ${DOCS_VERSION} documentation\n" - cd docs/ - make html - cd .. - mv docs/_build/html/* gh-pages/${DOCS_VERSION}/ - - - - name: Deploy documentation - if: ${{ github.event_name == 'push' }} - uses: JamesIves/github-pages-deploy-action@v4.4.1 - with: - folder: gh-pages - force: false diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 000000000..a7038d466 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,84 @@ +# Simple workflow for deploying static content to GitHub Pages +name: Deploy docs to GitHub pages + +on: + # Runs on pushes targeting the master and develop branches + push: + branches: [ master, develop ] + +jobs: + build-docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 # otherwise, you will fail to push refs to dest repo + ref: develop + + - name: Set up Python 3 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Install Python dependencies + run: | + pip install sphinx + pip install sphinx-rtd-theme + + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + + - name: Build documentation + run: | + ## Init the target folder. + # We will put all site documentation there. + mkdir -p gh-pages + touch gh-pages/.nojekyll + + function build_docs { + # The function will checkout a branch and build the Javadoc & documentation + # into provided documentation directory. + BRANCH=${1} + DOCDIR=${2} + + git checkout ${BRANCH} + git fetch + git pull + ## Init the target folder. + # We will put all site documentation there. + mkdir -p gh-pages/${DOCDIR} + + ## Javadoc + # Build the aggregated Javadoc + ./mvnw --quiet -Ddoclint=none -Dinherited=false clean javadoc:aggregate + # Copy aggregated Javadoc into `apidocs` folder. + APIDOCS=$(pwd)/gh-pages/${DOCDIR}/apidocs + printf "Copying Javadocs from %s to %s\n" $(pwd)/target/site/apidocs ${APIDOCS} + cp -r target/site/apidocs ${APIDOCS} + + ## Build the docs + # Generate the HTML pages and move the generated content into the target folder. + printf "Building the %s documentation\n" ${DOCDIR} + cd docs/ + make html + cd .. + mv docs/_build/html/* gh-pages/${DOCDIR} + } + + # We store the docs for `master` in `stable` dir + build_docs master stable + # We store the docs for `develop` in `latest` dir + build_docs develop latest + + + - name: Deploy documentation + if: ${{ github.event_name == 'push' }} + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + folder: gh-pages + force: false diff --git a/.gitignore b/.gitignore index 1aed115da..f715ac990 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ target/ *.iml *.log data/ +results/ dependency-reduced-pom.xml .settings .project diff --git a/README.md b/README.md index c60b3da4f..8fccafa6d 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,40 @@ +# LIRICAL + [![GitHub release](https://img.shields.io/github/release/TheJacksonLaboratory/LIRICAL.svg)](https://github.com/TheJacksonLaboratory/LIRICAL/releases) [![Java CI with Maven](https://github.com/TheJacksonLaboratory/LIRICAL/workflows/Java%20CI%20with%20Maven/badge.svg)](https://github.com/TheJacksonLaboratory/LIRICAL/actions/workflows/maven.yml) -# LIRICAL LIRICAL (LIkelihood Ratio Interpretation of Clinical AbnormaLities) is designed to provide clinically interpretable computational analysis of phenotypic abnormalities (encoded using the [Human Phenotype Ontology](http://www.human-phenotype-ontology.org)), optionally combined with an analysis of variants and genotypes if a VCF file is provided with the results of diagnostic gene panel, exome, or genome sequencing. -A manuscript describing LIRICAL is available at the -[American Journal of Human Genetics](https://pubmed.ncbi.nlm.nih.gov/32755546/). +The prioritized diseases are reported in human-friendly HTML report. +The report summarizes the most likely differential diagnoses: + +Top matches + +and breaks down the contributions of the HPO terms and deleterious variants in the associated genes for each diagnosis: + + + +The report is also available in JSON/TSV formats suitable for programmatic post-processing. ## Availability -Most users should download the latest distribution ZIP file from -the [Releases page](https://github.com/TheJacksonLaboratory/LIRICAL/releases). +Most users should download the latest distribution ZIP file from the [Releases page](https://github.com/TheJacksonLaboratory/LIRICAL/releases). + + +## Learn more -## Documentation -Please consult the documentation for installation instructions and a tutorial: -- [reference documentation](https://thejacksonlaboratory.github.io/LIRICAL/stable) -- [edge release documentation](https://thejacksonlaboratory.github.io/LIRICAL/latest) +Read more about the LIRICAL *algorithm* +in the manuscript available at the [American Journal of Human Genetics](https://pubmed.ncbi.nlm.nih.gov/32755546/). +Consult the documentation for *installation instructions* and a *tutorial*: +- [Stable documentation](https://thejacksonlaboratory.github.io/LIRICAL/stable) +- [Edge release documentation](https://thejacksonlaboratory.github.io/LIRICAL/latest) -## API docs -Developers can access Javadoc at: -- [reference](https://thejacksonlaboratory.github.io/LIRICAL/stable/apidocs) -- [edge release](https://thejacksonlaboratory.github.io/LIRICAL/latest/apidocs) +Developers can access *API reference* at: +- [Stable](https://thejacksonlaboratory.github.io/LIRICAL/stable/apidocs) +- [Edge release](https://thejacksonlaboratory.github.io/LIRICAL/latest/apidocs) diff --git a/docs/_static/lirical-sparkline-lds2.top5.png b/docs/_static/lirical-sparkline-lds2.top5.png new file mode 100644 index 000000000..d9b84a91e Binary files /dev/null and b/docs/_static/lirical-sparkline-lds2.top5.png differ diff --git a/docs/conf.py b/docs/conf.py index 5ddc3479d..d926b1675 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ # The short X.Y version. version = u'2.0' # The full version, including alpha/beta/rc tags. -release = u'2.0.0-RC2' +release = u'2.0.0-RC3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 1bed1977d..fc5f36492 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,7 @@ as `Human Phenotype Ontology (HPO) `_ t setup tutorial running + input-sanitation output explanations advanced diff --git a/docs/input-sanitation.rst b/docs/input-sanitation.rst new file mode 100644 index 000000000..f358cfcc7 --- /dev/null +++ b/docs/input-sanitation.rst @@ -0,0 +1,179 @@ +.. _rst-input-sanitation: + +========================= +Analysis input validation +========================= + +LIRICAL performs Q/C checks and sanitation before running the analysis. + +Here we summarize the requirements and checks performed on all sections of the analysis input. + +Analysis requirements +^^^^^^^^^^^^^^^^^^^^^ + +Here we summarize the requirements of inputs that LIRICAL needs for the analysis. + +Sample identifier +~~~~~~~~~~~~~~~~~ + +Sample identifier MUST be provided if the analysis is run with a multi-sample VCF file. Otherwise, LIRICAL is unable +to choose the variant genotypes. +The identifier is *optional* if running a phenotype-only analysis or with a single-sample VCF file, +where LIRICAL uses the identifier found in the VCF file. + +The analysis will stop if run with multi-sample VCF file and the identifier is not available, +or if the provided identifier is not found in the VCF file (applies to single-sample VCFs as well). + + +Phenotypic features +~~~~~~~~~~~~~~~~~~~ + +LIRICAL uses a set of phenotypic features that were observed or specifically excluded in the subject to prioritize +the diseases and several checks are applied to mitigate common errors ensure correctness of the analysis. + +The checks focus on the following: + +- At least one present or excluded HPO term is provided. +- All phenotypic features are formatted as *Compact Uniform Resource Identifiers* (CURIEs), such as ``HP:0001250`` + for *Seizure*. A valid CURIE consists of a prefix (e.g. ``HP``), delimiter (``:`` or ``_``), and id (e.g. ``0001250``). +- The CURIEs are *unique*, i.e. used at most once. +- The CURIEs correspond to identifiers of *current* or *obsolete* HPO terms. +- The HPO terms are descendants of `Phenotypic abnormality `_ branch. +- The HPO terms are logically consistent: + + - The subject is not annotated with an HPO term in observed and excluded state at the same time. + - The subject is not annotated with an observed HPO term and its observed or excluded ancestor. + - The subject is not annotated with an excluded HPO term and its excluded ancestor. + +Age +~~~ + +LIRICAL does not use the age of the subject at the moment. However, if set, the age must be formatted +as ISO8601 duration. For instance ``P1Y8M`` for 1 year and 8 months of age. + +Sex +~~~ + +The sex must be provided as one of {``MALE``, ``FEMALE``, ``UNKNOWN``}. If the input is not parsable, +``UNKNOWN`` is used by default. + +VCF file +~~~~~~~~ + +The path to VCF file can be provided via CLI or through phenopacket/YAML file. The path must point to a file +that is readable by the user running the LIRICAL process. + + +Validation policy +^^^^^^^^^^^^^^^^^ + +LIRICAL enforces the requirements depending on the validation policy. There are three validation policies: + +- *MINIMAL* +- *LENIENT* +- *STRICT* + +with increasing sanity requirements. + +The input validation results are always logged in the output. The log includes the following line if the input is OK:: + + Input sanitation found no issues + +Alternatively, the issues are logged to the terminal. For instance:: + + Found issues 0 errors and 1 warnings + Errors 😱 + - Sample must not be annotated with Clonic seizure [HP:0020221] while its ancestor + Seizure [HP:0001250] is excluded. Resolve the logical inconsistency by choosing + one of the terms. + Warnings 😧 + - Sample should not be annotated with Patent foramen ovale [HP:0001655] and its ancestor + Atrial septal defect [HP:0001631]. Remove Atrial septal defect [HP:0001631] + from the phenotype terms. + +The issues are classified as errors and warnings. +*Error* is a serious issue that MUST be fixed and human intervention is required. +Warning is an issue that SHOULD be fixed. However, unlike an error, warning can be fixed automatically. +The output includes a suggested resolution, e.g. choosing *Clonic seizure* or *Seizure* in the error above. + +The warnings are be fixed depending on the used validation policy. + + +`MINIMAL` validation policy +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Minimal validation policy enforces the least constraint on the analysis inputs. +The analysis is run *"as is"* and the run is aborted only if the most important information is missing. +Only the most rudimentary sanitation is applied. + +Requirements +############ + +The analysis requires the following: + +- At least one HPO term is provided. +- VCF path points to a readable file, if provided. +- Sample identifier is provided if run with a multi-sample VCF and the sample identifier + must be present in the VCF file. + +Sanitation +########## + +The following actions are performed on the analysis input: + +- Malformed CURIEs are removed. +- CURIEs that do not correspond to current or obsolete HPO terms are removed. +- The obsolete HPO term identifiers are replaced with the current identifiers. + + +`LENIENT` validation policy +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Lenient validation policy attempts to fix as many issues as possible. + +Requirements +############ + +The policy requires all points of the minimal policy, plus: + +- The subject is *NOT* annotated with an HPO term that is both present and excluded. +- The subject is *NOT* annotated with a present HPO term and its excluded ancestor. + +Sanitation +########## + +The actions of the minimal policy are performed, plus: + +- Duplicate HPO terms are removed such that each term is present at most once. +- The HPO terms that are not descendants of Phenotypic abnormality are removed. +- The logical inconsistencies are resolved: + + - If the subject is annotated with an excluded HPO term (e.g. no Focal seizure) and its excluded ancestor + (e.g. no Seizure) then the term is removed and the ancestor is kept. + - If the subject is annotated with a present HPO term (e.g. Focal seizure) and its present ancestor (e.g. Seizure), + then the ancestor is removed and the term is kept. + +`STRICT` validation policy +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Strict validation policy adds no additional requirements than those of *lenient* policy. However, the analysis +is not run unless no errors or warnings are found. + +Requirements +############ + +On top of the lenient policy, strict policy requires the following: + +- HPO terms are unique. +- HPO terms are descendants of Phenotypic abnormality. +- There are no logical inconsistencies in HPO terms. +- Age is well formatted, if provided. +- Sex is well formatted, if provided. + +Sanitation +########## + +Strict policy applies no sanitation. + + +Use the ``--dry-run`` option to check if the inputs can be run under given validation policy. diff --git a/docs/lirical-tsv.rst b/docs/lirical-tsv.rst index 58af9cccd..f619666a8 100644 --- a/docs/lirical-tsv.rst +++ b/docs/lirical-tsv.rst @@ -12,19 +12,27 @@ For example, the following command will run LIRICAL on a Phenopacket and output By default, LIRICAL outputs the data to a file called ``lirical.tsv``. This can be altered with the ``-x `` option. +The TSV output consists of the header and the body. The header includes lines that start with an exclamation mark, +to provide information about the HPO terms used to run the analysis. + +The body section summarizes the matches between the patient data and the diseases, one disease per row, ranked by +the post-test probability. +Each row includes the disease credentials, the pre-test and post-test probabilities, the composite likelihood ratio. +If the analysis was run with a VCF file, the report includes two extra columns with the gene associated with the disease +and the variants found in the gene. .. list-table:: LIRICAL's TSV format :header-rows: 1 :widths: 40 60 - * - Item + * - Column name - Explanation * - rank - - placement of the candidate diagnosis by LIRICAL + - Placement of the candidate diagnosis by LIRICAL * - diseaseName - Name of the candidate disease * - diseaseCurie - - disease ID, e.g., OMIM:154700 + - Disease identifier, e.g., `OMIM:154700` * - pretestprob - Pretest probability of the candidate disease * - postestprob @@ -32,11 +40,7 @@ By default, LIRICAL outputs the data to a file called ``lirical.tsv``. This can * - compositeLR - Combined likelihood ratio of the candidate disease (logarithm of the product of all individual LRs) * - entrezGeneId - - Identifier of the candidate disease gene (if available) + - Identifier of the candidate disease gene (if run with a VCF file) * - variants - - variant evaluation (if available) - - -The file begins with comment lines (that start with an exclamation mark) that provide information about the -HPO terms used to run the analysis. + - Variant evaluation (if run with a VCF file) diff --git a/docs/running.rst b/docs/running.rst index 191de7162..17c891749 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -51,10 +51,10 @@ LIRICAL offers several commands for receiving phenotype and genotype inputs via However the commands share many CLI arguments for setting up the resource paths, the analysis configuration, and where results should be written. We describe the shared CLI arguments in this section. -Resource paths -~~~~~~~~~~~~~~ +Resources +~~~~~~~~~ -The options from this group point LIRICAL to resources required for analysis. +The options from this group set up resources required for LIRICAL analysis. * ``-d | --data``: path to LIRICAL data directory. Required if the ``data`` folder is not set up next to the LIRICAL JAR file. @@ -65,6 +65,7 @@ The options from this group point LIRICAL to resources required for analysis. * ``-b | --background``: path to file with background variant frequencies for genes. This option should not be used unless there is a very good reason to do that. The background variant frequencies are bundled with the LIRICAL code. See :ref:`rstbg-var-freqs` for more info. +* ``--parallelism``: the number of workers/threads to use. The value must be a positive integer (default: ``1``). Configuration options ~~~~~~~~~~~~~~~~~~~~~ @@ -74,11 +75,22 @@ The configuration options tweak the analysis. * ``-g | --global``: global analysis, see :ref:`rstglobal-mode` for more info (default: ``false``). * ``--ddndv``: disregard a disease if no deleterious variants are found in the gene associated with the disease. Used only if running with a VCF file (default: ``true``). + **Deprecation note**: the option has been deprecated and will be removed since `v2.0.0` because + it was not possible to be unset. Using the option **will stop the analysis**. + Use ``--sdwndv`` as a replacement. +* ``--sdwndv``: show diseases even if no deleterious variants are found in the gene associated with the disease. + The option is a flag (takes no value) and its presence will lead to showing *all* diseases, + even those with no deleterious variants. + Only applicable to the HTML report when running with a VCF file (genotype-aware mode). * ``--transcript-db``: transcript database (default: ``RefSeq``), see :ref:`rsttx-dbs` for more info. -* ``--use-orphanet``: use `Orphanet `_ annotations (default: ``false``) +* ``--use-orphanet``: use `Orphanet `_ annotations (default: ``false``). * ``--strict``: use strict penalties if the genotype does not match the disease model in terms of number of called pathogenic alleles (default: ``false``). * ``--pathogenicity-threshold``: Variants with greater pathogenicity score is considered deleterious (default: ``0.8``). +* ``--validation-policy``: set the level of input sanity check, see :ref:`rst-input-sanitation` for more info. + Choose from `MINIMAL`, `LENIENT`, `STRICT` (default ``MINIMAL``). +* ``--dry-run``: check if the inputs meet the validation policy requirements, report any issues, + and exit without running the analysis (default: ``false``). Output options ~~~~~~~~~~~~~~ @@ -87,9 +99,9 @@ The output options dictate the format and location for the analysis results. * ``-o | --output-directory``: where to write the analysis outputs (default: current working directory). * ``-f | --output-format``: Output format to use for writing the results, can be provided multiple times. - Choose from `html`, `tsv`, and `json` (default: ``html``) + Choose from `html`, `tsv`, and `json` (default: ``html``). * ``-x | --prefix``: prefix of the output files (default: ``lirical``) -* ``-t | --threshold``: minimum post-test probability to show diagnosis in the HTML report. +* ``-t | --threshold``: minimum post-test probability to show diagnosi.s in the HTML report. The value must be in range :math:`[0, 1]`. The option must not be used with ``-m | -mindiff`` option at the same time. * ``-m | --mindiff``: Minimal number of differential diagnoses to show. * ``--display-all-variants``: Display all variants in the HTML report, not just the variants passing @@ -116,8 +128,8 @@ The ``prioritize`` command takes the following options: that correspond to the phenotype terms negated/excluded in the proband. * ``--assembly`` genome build, choose from `hg19` or `hg38`, must be provided if ``--vcf`` is used (default: ``hg38``). * ``--vcf``: path to VCF file with exome/genome sequencing results. The file can be compressed. -* ``--sample-id``: proband's identifier (default: `Sample`). -* ``--age``: proband's age as an ISO8601 duration +* ``--sample-id``: proband's identifier, must be provided if running with a multi-sample VCF file (default: `subject`). +* ``--age``: proband's age as an ISO8601 duration. (e.g. ``P9Y`` for 9 years, ``P2Y3M`` for 2 years and 3 months, or ``P33W`` for the 33th gestational week). * ``--sex``: proband's sex, choose from `MALE`, `FEMALE`, `UNKNOWN` (default: `UNKNOWN`). diff --git a/docs/setup.rst b/docs/setup.rst index 1d64cc97c..94e2196ae 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -19,7 +19,7 @@ build LIRICAL from source, then the build process described below requires .. note:: The v1 of LIRICAL was written in Java 8 but starting from v2 we require Java 17 or better to take advantage - of numerous performance improvements and novel language features. + of the novel Java features. Building from sources ~~~~~~~~~~~~~~~~~~~~~ diff --git a/lirical-background/pom.xml b/lirical-background/pom.xml index fdf7813ea..7efe01e44 100644 --- a/lirical-background/pom.xml +++ b/lirical-background/pom.xml @@ -5,7 +5,7 @@ LIRICAL org.monarchinitiative.lirical - 2.0.0-RC2 + 2.0.0-RC3 4.0.0 @@ -17,6 +17,18 @@ lirical-configuration ${project.parent.version} + + org.monarchinitiative.phenol + phenol-core + + + org.monarchinitiative.svart + svart + + + org.monarchinitiative.phenol + phenol-annotations + info.picocli picocli diff --git a/lirical-cli/pom.xml b/lirical-cli/pom.xml index 8cc803526..3f5aa210e 100644 --- a/lirical-cli/pom.xml +++ b/lirical-cli/pom.xml @@ -5,7 +5,7 @@ LIRICAL org.monarchinitiative.lirical - 2.0.0-RC2 + 2.0.0-RC3 4.0.0 @@ -18,16 +18,32 @@ ${project.parent.version} - org.monarchinitiative.biodownload - biodownload + de.charite.compbio + jannovar-core + + + info.picocli + picocli org.apache.commons commons-csv - info.picocli - picocli + org.monarchinitiative.biodownload + biodownload + + + org.monarchinitiative.phenol + phenol-core + + + org.monarchinitiative.phenol + phenol-annotations + + + org.monarchinitiative.svart + svart ch.qos.logback diff --git a/lirical-cli/src/examples/LDS2.v2.json b/lirical-cli/src/examples/LDS2.v2.json index 6d9b4d094..86bc5e808 100644 --- a/lirical-cli/src/examples/LDS2.v2.json +++ b/lirical-cli/src/examples/LDS2.v2.json @@ -33,18 +33,6 @@ "label": "author statement used in manual assertion" } }] - }, { - "type": { - "id": "HP:0001631", - "label": "Atrial septal defect" - }, - "excluded": false, - "evidence": [{ - "evidenceCode": { - "id": "ECO:0000302", - "label": "author statement used in manual assertion" - } - }] }, { "type": { "id": "HP:0000193", diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/Main.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/Main.java index 5af027d90..ad8ea60d8 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/Main.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/Main.java @@ -1,6 +1,7 @@ package org.monarchinitiative.lirical.cli; import org.monarchinitiative.lirical.cli.cmd.*; +import org.monarchinitiative.lirical.cli.cmd.experimental.ExperimentalCommand; import picocli.CommandLine; import java.util.concurrent.Callable; @@ -15,7 +16,7 @@ footer = Main.FOOTER) public class Main implements Callable { - public static final String VERSION = "lirical v2.0.0-RC2"; + public static final String VERSION = "lirical v2.0.0-RC3"; public static final int WIDTH = 120; public static final String FOOTER = "\nSee the full documentation at https://thejacksonlaboratory.github.io/LIRICAL/stable"; @@ -38,6 +39,8 @@ public static void main(String[] args) { .addSubcommand("prioritize", new PrioritizeCommand()) .addSubcommand("phenopacket", new PhenopacketCommand()) .addSubcommand("yaml", new YamlCommand()) + // hidden commands + .addSubcommand("experimental", new ExperimentalCommand()) .addSubcommand("benchmark", new BenchmarkCommand()); cline.setToggleBooleanFlags(false); System.exit(cline.execute(args)); diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java index f502d0c06..e35653fec 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AbstractPrioritizeCommand.java @@ -3,70 +3,32 @@ import org.monarchinitiative.lirical.core.Lirical; import org.monarchinitiative.lirical.core.analysis.*; import org.monarchinitiative.lirical.core.exception.LiricalException; +import org.monarchinitiative.lirical.core.model.FilteringStats; +import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; import org.monarchinitiative.lirical.core.output.*; -import org.monarchinitiative.lirical.core.exception.LiricalRuntimeException; -import org.monarchinitiative.lirical.core.model.*; +import org.monarchinitiative.lirical.core.sanitize.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import picocli.CommandLine; import java.io.IOException; -import java.nio.file.Path; import java.time.LocalDateTime; -import java.time.Period; -import java.time.format.DateTimeParseException; -import java.util.*; +import java.util.List; +import java.util.Optional; /** - * This is a common superclass for {@link YamlCommand}, {@link PhenopacketCommand}, and {@link PrioritizeCommand}. - * Its purpose is to provide command line parameters and variables that are used - * in the same way by all the subclasses. + * The driver class for an analysis of a single individual. + *

+ * This class is the superclass for {@link YamlCommand}, {@link PhenopacketCommand}, and {@link PrioritizeCommand}. + * The subclasses must provide the input data and the driver takes care of the rest. * * @author Peter N Robinson + * @author Daniel Danis */ -abstract class AbstractPrioritizeCommand extends LiricalConfigurationCommand { +abstract class AbstractPrioritizeCommand extends OutputCommand { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractPrioritizeCommand.class); - private static final String UNKNOWN_VERSION_PLACEHOLDER = "UNKNOWN VERSION"; - - // ---------------------------------------------- OUTPUTS ---------------------------------------------------------- - @CommandLine.ArgGroup(validate = false, heading = "Output options:%n") - public Output output = new Output(); - - public static class Output { - @CommandLine.Option(names = {"-o", "--output-directory"}, - description = "Directory into which to write output (default: ${DEFAULT-VALUE}).") - public Path outdir = Path.of(""); - - @CommandLine.Option(names = {"-f", "--output-format"}, - arity = "0..*", - description = { - "An output format to use for writing the results, can be provided multiple times.", - "Choose from {${COMPLETION-CANDIDATES}}", - "Default: ${DEFAULT-VALUE}" - }) - public Set outputFormats = Set.of(OutputFormat.HTML); - /** - * Prefix of the output file. For instance, if the user enters {@code -x sample1} and an HTML file is output, - * the name of the HTML file will be {@code sample1.html}. If a TSV file is output, the name of the file will - * be {@code sample1.tsv}. - */ - @CommandLine.Option(names = {"-x", "--prefix"}, - description = "Prefix of outfile (default: ${DEFAULT-VALUE}).") - public String outfilePrefix = "lirical"; - - @CommandLine.Option(names = {"-t", "--threshold"}, - description = "Minimum post-test probability to show diagnosis in HTML output. The value should range between [0,1].") - public Double lrThreshold = null; - - @CommandLine.Option(names = {"-m", "--mindiff"}, - description = "Minimal number of differential diagnoses to show.") - public Integer minDifferentialsToShow = null; - - @CommandLine.Option(names = {"--display-all-variants"}, - description = "Display all variants in output, not just variants passing pathogenicity threshold (default ${DEFAULT-VALUE})") - public boolean displayAllVariants = false; - } @Override public Integer execute() { @@ -76,7 +38,7 @@ public Integer execute() { if (!errors.isEmpty()) { LOGGER.error("Errors:"); for (String error : errors) - LOGGER.error("- {}", error); + LOGGER.error(" {}", error); return 1; } @@ -87,27 +49,62 @@ public Integer execute() { LOGGER.debug("Using {} transcripts", runConfiguration.transcriptDb); TranscriptDatabase transcriptDb = runConfiguration.transcriptDb; + LOGGER.info("Parsing the analysis inputs"); + SanitationInputs inputs = procureSanitationInputs(); + // 1 - bootstrap the app + LOGGER.info("Bootstrapping LIRICAL"); Lirical lirical = bootstrapLirical(genomeBuild); LOGGER.info("Configured LIRICAL {}", lirical.version() .map("v%s"::formatted) .orElse(UNKNOWN_VERSION_PLACEHOLDER)); - // 2 - prepare inputs - LOGGER.info("Preparing the analysis data"); - AnalysisData analysisData = prepareAnalysisData(lirical, genomeBuild, transcriptDb); - if (analysisData.presentPhenotypeTerms().isEmpty() && analysisData.negatedPhenotypeTerms().isEmpty()) { - LOGGER.warn("No phenotype terms were provided. Aborting.."); - return 1; + // 2 - sanitize inputs + InputSanitizerFactory sanitizerFactory = new InputSanitizerFactory(lirical.phenotypeService().hpo()); + InputSanitizer sanitizer = selectSanitizer(sanitizerFactory); + SanitationResult result = sanitizer.sanitize(inputs); + LOGGER.info(summarizeSanitationResult(result)); + + // We abort on dry run or if the issues are above the failure policy tolerance. + if (runConfiguration.dryRun) { + boolean canBeRun = switch (runConfiguration.validationPolicy) { + case STRICT -> !result.hasErrorOrWarnings(); + case LENIENT, MINIMAL -> !result.hasErrors(); + }; + LOGGER.info("The analysis can be run under {} validation policy: {}", + runConfiguration.validationPolicy.name(), canBeRun); + LOGGER.info("Aborting due to `--dry-run` option"); + return 0; + } else { + switch (runConfiguration.validationPolicy) { + case STRICT -> { + if (result.hasErrorOrWarnings()) { + LOGGER.info("Aborting the run. Fix the errors and warnings or use more permissive failure policy"); + return 1; + } + } + case LENIENT, MINIMAL -> { + if (result.hasErrors()) { + LOGGER.info("Aborting the run due to input errors. Fix the errors and try again"); + return 1; + } + } + default -> throw new IllegalStateException("Unexpected value: " + runConfiguration.validationPolicy); + } } - // 3 - run the analysis + // 3 - prepare analysis data + AnalysisData analysisData = prepareAnalysisData(lirical, genomeBuild, transcriptDb, result.sanitizedInputs()); + + // 4 - run the analysis AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical, genomeBuild, transcriptDb); LOGGER.info("Starting the analysis"); - LiricalAnalysisRunner analysisRunner = lirical.analysisRunner(); - AnalysisResults results = analysisRunner.run(analysisData, analysisOptions); + AnalysisResults results; + try (LiricalAnalysisRunner analysisRunner = lirical.analysisRunner()) { + results = analysisRunner.run(analysisData, analysisOptions); + } - // 4 - write out the results + // 5 - write out the results LOGGER.info("Writing out the results"); FilteringStats filteringStats = analysisData.genes().computeFilteringStats(); AnalysisResultsMetadata metadata = AnalysisResultsMetadata.builder() @@ -115,16 +112,16 @@ public Integer execute() { .setHpoVersion(lirical.phenotypeService().hpo().version().orElse(UNKNOWN_VERSION_PLACEHOLDER)) .setTranscriptDatabase(transcriptDb.toString()) .setLiricalPath(dataSection.liricalDataDirectory.toAbsolutePath().toString()) - .setExomiserPath(dataSection.exomiserDatabase == null ? "" : dataSection.exomiserDatabase.toAbsolutePath().toString()) + .setExomiserPath(figureOutExomiserPath()) .setAnalysisDate(LocalDateTime.now().toString()) .setSampleName(analysisData.sampleId()) - .setnGoodQualityVariants(filteringStats.nGoodQualityVariants()) + .setnPassingVariants(filteringStats.nPassingVariants()) .setnFilteredVariants(filteringStats.nFilteredVariants()) - .setGenesWithVar(0) // TODO + .setGenesWithVar(filteringStats.genesWithVariants()) .setGlobalMode(runConfiguration.globalAnalysisMode) .build(); - OutputOptions outputOptions = createOutputOptions(); + OutputOptions outputOptions = createOutputOptions(output.outfilePrefix); AnalysisResultWriterFactory factory = lirical.analysisResultsWriterFactory(); for (OutputFormat fmt : output.outputFormats) { @@ -143,46 +140,36 @@ public Integer execute() { return 0; } - protected List checkInput() { - List errors = super.checkInput(); - - // thresholds - if (output.lrThreshold != null && output.minDifferentialsToShow != null) { - String msg = "Only one of the options -t/--threshold and -m/--mindiff can be used at once."; - LOGGER.error(msg); - errors.add(msg); - } - if (output.lrThreshold != null) { - if (output.lrThreshold < 0.0 || output.lrThreshold > 1.0) { - String msg = "Post-test probability (-t/--threshold) must be between 0.0 and 1.0."; - LOGGER.error(msg); - errors.add(msg); - } + protected abstract SanitationInputs procureSanitationInputs() throws LiricalParseException; + + private static AnalysisData prepareAnalysisData(Lirical lirical, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDb, + SanitizedInputs inputs) throws LiricalParseException { + // Read VCF file if present. + String sampleId; + GenesAndGenotypes genes; + if (inputs.vcf() == null) { + // Use placeholder, because the user did not provide sample ID, + // and we're running phenotype-only analysis. + sampleId = "subject"; + genes = GenesAndGenotypes.empty(); + } else { + SampleIdAndGenesAndGenotypes sampleAndGenotypes = readVariantsFromVcfFile(inputs.sampleId(), + inputs.vcf(), + genomeBuild, + transcriptDb, + lirical.variantParserFactory()); + sampleId = sampleAndGenotypes.sampleId(); + genes = sampleAndGenotypes.genesAndGenotypes(); } - return errors; - } - protected abstract AnalysisData prepareAnalysisData(Lirical lirical, GenomeBuild genomeBuild, TranscriptDatabase transcriptDb) throws LiricalParseException; - - protected OutputOptions createOutputOptions() { - LrThreshold lrThreshold = output.lrThreshold == null ? LrThreshold.notInitialized() : LrThreshold.setToUserDefinedThreshold(output.lrThreshold); - MinDiagnosisCount minDiagnosisCount = output.minDifferentialsToShow == null ? MinDiagnosisCount.notInitialized() : MinDiagnosisCount.setToUserDefinedMinCount(output.minDifferentialsToShow); - return new OutputOptions(lrThreshold, minDiagnosisCount, runConfiguration.pathogenicityThreshold, - output.displayAllVariants, output.outdir, output.outfilePrefix); + // Put together the analysis data + return AnalysisData.of(sampleId, + inputs.age(), + inputs.sex(), + inputs.presentHpoTerms(), + inputs.excludedHpoTerms(), + genes); } - - protected static Age parseAge(String age) { - if (age == null) { - LOGGER.debug("The age was not provided"); - return Age.ageNotKnown(); - } - try { - Period period = Period.parse(age); - LOGGER.info("Using age {}", period); - return Age.parse(period); - } catch (DateTimeParseException e) { - throw new LiricalRuntimeException("Unable to parse age '" + age + "': " + e.getMessage(), e); - } - } - } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java deleted file mode 100644 index 54bf99d90..000000000 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/AnalysisDataParserAwareCommand.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.monarchinitiative.lirical.cli.cmd; - -import org.monarchinitiative.lirical.core.Lirical; -import org.monarchinitiative.lirical.core.analysis.AnalysisData; -import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.core.model.GenomeBuild; -import org.monarchinitiative.lirical.core.model.TranscriptDatabase; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.lirical.io.analysis.AnalysisDataParserFactory; - -abstract class AnalysisDataParserAwareCommand extends AbstractPrioritizeCommand { - - @Override - protected AnalysisData prepareAnalysisData(Lirical lirical, GenomeBuild genomeBuild, TranscriptDatabase transcriptDb) throws LiricalParseException { - HpoTermSanitizer sanitizer = new HpoTermSanitizer(lirical.phenotypeService().hpo()); - AnalysisDataParserFactory parserFactory = new AnalysisDataParserFactory(sanitizer, lirical.variantParserFactory(), lirical.phenotypeService().associationData()); - return prepareAnalysisData(parserFactory); - } - - protected abstract AnalysisData prepareAnalysisData(AnalysisDataParserFactory factory) throws LiricalParseException; -} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java index 896f34ea7..5721cccd3 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/BenchmarkCommand.java @@ -3,26 +3,26 @@ import de.charite.compbio.jannovar.annotation.VariantEffect; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; +import org.monarchinitiative.lirical.cli.cmd.util.Util; import org.monarchinitiative.lirical.core.Lirical; import org.monarchinitiative.lirical.core.analysis.*; import org.monarchinitiative.lirical.core.exception.LiricalException; import org.monarchinitiative.lirical.core.io.VariantParser; import org.monarchinitiative.lirical.core.model.*; +import org.monarchinitiative.lirical.core.sanitize.InputSanitizer; +import org.monarchinitiative.lirical.core.sanitize.InputSanitizerFactory; +import org.monarchinitiative.lirical.core.sanitize.SanitationResult; +import org.monarchinitiative.lirical.core.sanitize.SanitizedInputs; import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotator; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; import org.monarchinitiative.lirical.core.service.VariantMetadataService; import org.monarchinitiative.lirical.io.analysis.PhenopacketData; -import org.monarchinitiative.lirical.io.analysis.PhenopacketImporter; -import org.monarchinitiative.lirical.io.analysis.PhenopacketImporters; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStreamWriter; +import java.io.*; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; @@ -44,8 +44,7 @@ public class BenchmarkCommand extends LiricalConfigurationCommand { private static final Logger LOGGER = LoggerFactory.getLogger(BenchmarkCommand.class); - @CommandLine.Option(names = {"-p", "--phenopacket"}, - arity = "1..*", + @CommandLine.Parameters(arity = "1..*", description = "Path(s) to phenopacket JSON file(s).") protected List phenopacketPaths; @@ -91,22 +90,44 @@ public Integer execute() { AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical, genomeBuild, transcriptDb); List backgroundVariants = readBackgroundVariants(lirical, genomeBuild, transcriptDb); - try (BufferedWriter writer = openWriter(outputPath); + List sanitationResults = sanitizePhenopackets(phenopacketPaths, + lirical.phenotypeService().hpo()); + + boolean proceed = true; + for (DataAndSanitationResultsAndPath result : sanitationResults) { + SanitationResult sanitationResult = result.result(); + if (!Util.phenopacketIsEligibleForAnalysis(sanitationResult, runConfiguration.validationPolicy)) { + LOGGER.info("Found issues in {}", result.path().toAbsolutePath()); + proceed = false; + LOGGER.info(result.path().toFile().getName()); + LOGGER.info(summarizeSanitationResult(sanitationResult)); + } + } + + if (!proceed) { + // Abort unless all phenopackets are eligible for analysis under the failure policy. + LOGGER.info("Aborting due to errors in phenopackets"); + return 1; + } + + try (LiricalAnalysisRunner analysisRunner = lirical.analysisRunner(); + BufferedWriter writer = openWriter(outputPath); CSVPrinter printer = CSVFormat.DEFAULT.print(writer)) { printer.printRecord("phenopacket", "background_vcf", "sample_id", "rank", "is_causal", "disease_id", "post_test_proba"); // header - for (Path phenopacketPath : phenopacketPaths) { + for (int i = 0; i < sanitationResults.size(); i++) { + DataAndSanitationResultsAndPath result = sanitationResults.get(i); + LOGGER.info("Starting the analysis of [{}/{}] {}", i + 1, sanitationResults.size(), + result.path().toFile().getName()); // 3 - prepare benchmark data per phenopacket - BenchmarkData benchmarkData = prepareBenchmarkData(lirical, backgroundVariants, phenopacketPath); + BenchmarkData benchmarkData = prepareBenchmarkData(lirical, genomeBuild, transcriptDb, backgroundVariants, result); // 4 - run the analysis. - LOGGER.info("Starting the analysis: {}", analysisOptions); - LiricalAnalysisRunner analysisRunner = lirical.analysisRunner(); AnalysisResults results = analysisRunner.run(benchmarkData.analysisData(), analysisOptions); // 5 - summarize the results. - String phenopacketName = phenopacketPath.toFile().getName(); + String phenopacketName = result.path().toFile().getName(); String backgroundVcf = vcfPath == null ? "" : vcfPath.toFile().getName(); writeResults(phenopacketName, backgroundVcf, benchmarkData, results, printer); } @@ -128,11 +149,8 @@ protected List checkInput() { // Check if all phenopackets are valid and die quickly if not. LOGGER.info("Checking validity of {} phenopackets", phenopacketPaths.size()); for (Path phenopacketPath : phenopacketPaths) { - try { - readPhenopacketData(phenopacketPath); - } catch (LiricalParseException e) { - errors.add("Invalid phenopacket %s: %s".formatted(phenopacketPath.toAbsolutePath(), e.getMessage())); - } + if (!Files.isRegularFile(phenopacketPath) || !Files.isReadable(phenopacketPath)) + errors.add("%s does not point to a readable file".formatted(phenopacketPath.toAbsolutePath())); } return errors; @@ -147,11 +165,12 @@ private List readBackgroundVariants(Lirical lirical, GenomeBuild genomeBuild, TranscriptDatabase transcriptDatabase) throws LiricalParseException { if (vcfPath == null) { - LOGGER.info("Path to VCF file was not provided."); + LOGGER.info("Path to VCF file was not provided"); return List.of(); } - Optional parser = lirical.variantParserFactory().forPath(vcfPath, genomeBuild, transcriptDatabase); + Optional parser = lirical.variantParserFactory() + .forPath(vcfPath, genomeBuild, transcriptDatabase); if (parser.isEmpty()) { LOGGER.warn("Cannot obtain parser for processing the VCF file {} with {} {} due to missing resources", vcfPath.toAbsolutePath(), genomeBuild, transcriptDatabase); @@ -160,7 +179,7 @@ private List readBackgroundVariants(Lirical lirical, try (VariantParser variantParser = parser.get()) { // Read variants - LOGGER.info("Reading background variants from {}.", vcfPath.toAbsolutePath()); + LOGGER.info("Reading background variants from {}", vcfPath.toAbsolutePath()); ProgressReporter progressReporter = new ProgressReporter(10_000, "variants"); List variants = variantParser.variantStream() .peek(v -> progressReporter.log()) @@ -173,15 +192,10 @@ private List readBackgroundVariants(Lirical lirical, } private BenchmarkData prepareBenchmarkData(Lirical lirical, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase, List backgroundVariants, - Path phenopacketPath) throws LiricalParseException { - LOGGER.info("Reading phenopacket from {}.", phenopacketPath.toAbsolutePath()); - PhenopacketData data = readPhenopacketData(phenopacketPath); - - HpoTermSanitizer sanitizer = new HpoTermSanitizer(lirical.phenotypeService().hpo()); - List presentTerms = data.getHpoTerms().map(sanitizer::replaceIfObsolete).flatMap(Optional::stream).toList(); - List excludedTerms = data.getNegatedHpoTerms().map(sanitizer::replaceIfObsolete).flatMap(Optional::stream).toList(); - + DataAndSanitationResultsAndPath resultsAndPath) throws LiricalParseException { GenesAndGenotypes genes; if (phenotypeOnly) { // We omit the VCF even if provided. @@ -193,10 +207,15 @@ private BenchmarkData prepareBenchmarkData(Lirical lirical, genes = GenesAndGenotypes.empty(); else { // Annotate the causal variants found in the phenopacket. - FunctionalVariantAnnotator annotator = lirical.functionalVariantAnnotator(); - VariantMetadataService metadataService = lirical.variantMetadataService(); + FunctionalVariantAnnotator annotator = lirical.functionalVariantAnnotatorService() + .getFunctionalAnnotator(genomeBuild, transcriptDatabase).orElseThrow(); + VariantMetadataService metadataService = lirical.variantMetadataServiceFactory() + .getVariantMetadataService(genomeBuild).orElseThrow(); List backgroundAndCausal = new ArrayList<>(backgroundVariants.size() + 10); - for (GenotypedVariant variant : data.getVariants()) { + + backgroundAndCausal.addAll(backgroundVariants); + + for (GenotypedVariant variant : resultsAndPath.phenopacketData().variants()) { List annotations = annotator.annotate(variant.variant()); List effects = annotations.stream() .map(TranscriptAnnotation::getVariantEffects) @@ -210,47 +229,20 @@ private BenchmarkData prepareBenchmarkData(Lirical lirical, } // Read the VCF file. - genes = prepareGenesAndGenotypes(backgroundAndCausal); + genes = GenesAndGenotypes.fromVariants(List.of(resultsAndPath.phenopacketData().sampleId()), backgroundAndCausal); } } - AnalysisData analysisData = AnalysisData.of(data.getSampleId(), - data.getAge().orElse(null), - data.getSex().orElse(null), - presentTerms, - excludedTerms, + SanitationResult sanitationResult = resultsAndPath.result(); + SanitizedInputs sanitized = sanitationResult.sanitizedInputs(); + AnalysisData analysisData = AnalysisData.of(sanitized.sampleId(), + sanitized.age(), + sanitized.sex(), + sanitized.presentHpoTerms(), + sanitized.excludedHpoTerms(), genes); - return new BenchmarkData(data.getDiseaseIds().get(0), analysisData); - } - - private static PhenopacketData readPhenopacketData(Path phenopacketPath) throws LiricalParseException { - PhenopacketData data = null; - try (InputStream is = Files.newInputStream(phenopacketPath)) { - PhenopacketImporter v2 = PhenopacketImporters.v2(); - data = v2.read(is); - LOGGER.debug("Success!"); - } catch (Exception e) { - LOGGER.debug("Unable to parse as v2 phenopacket, trying v1."); - } - - if (data == null) { - try (InputStream is = Files.newInputStream(phenopacketPath)) { - PhenopacketImporter v1 = PhenopacketImporters.v1(); - data = v1.read(is); - LOGGER.debug("Success!"); - } catch (IOException e) { - LOGGER.debug("Unable to parser as v1 phenopacket."); - throw new LiricalParseException("Unable to parse phenopacket from " + phenopacketPath.toAbsolutePath()); - } - } - - // Check we have exactly one disease ID. - if (data.getDiseaseIds().isEmpty()) - throw new LiricalParseException("Missing disease ID which is required for the benchmark!"); - else if (data.getDiseaseIds().size() > 1) - throw new LiricalParseException("Saw >1 disease IDs {}, but we need exactly one for the benchmark!"); - return data; + return new BenchmarkData(resultsAndPath.phenopacketData().diseaseIds().get(0), analysisData); } /** @@ -288,4 +280,26 @@ private static BufferedWriter openWriter(Path outputPath) throws IOException { private record BenchmarkData(TermId diseaseId, AnalysisData analysisData) { } + + private List sanitizePhenopackets(List phenopackets, + MinimalOntology hpo) { + InputSanitizerFactory factory = new InputSanitizerFactory(hpo); + InputSanitizer sanitizer = selectSanitizer(factory); + List sanitationResults = new ArrayList<>(phenopackets.size()); + for (Path phenopacketPath : phenopackets) { + DataAndSanitationResultsAndPath resultAndPath; + try { + PhenopacketData inputs = PhenopacketUtil.readPhenopacketData(phenopacketPath); + SanitationResult sanitationResult = sanitizer.sanitize(inputs); + resultAndPath = new DataAndSanitationResultsAndPath(inputs, sanitationResult, phenopacketPath); + } catch (LiricalException e) { + resultAndPath = new DataAndSanitationResultsAndPath(null, null, phenopacketPath); + } + sanitationResults.add(resultAndPath); + } + return sanitationResults; + } + + private record DataAndSanitationResultsAndPath(PhenopacketData phenopacketData, SanitationResult result, Path path) { + } } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/LiricalConfigurationCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/LiricalConfigurationCommand.java index 1c3857819..eddfcc25f 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/LiricalConfigurationCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/LiricalConfigurationCommand.java @@ -9,10 +9,13 @@ import org.monarchinitiative.lirical.core.analysis.probability.PretestDiseaseProbability; import org.monarchinitiative.lirical.core.io.VariantParser; import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.lirical.core.model.*; +import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.LiricalVariant; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; +import org.monarchinitiative.lirical.core.sanitize.*; import org.monarchinitiative.lirical.io.LiricalDataException; import org.monarchinitiative.lirical.io.background.CustomBackgroundVariantFrequencyServiceFactory; -import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.monarchinitiative.phenol.ontology.data.Identified; import org.monarchinitiative.phenol.ontology.data.TermId; @@ -31,6 +34,7 @@ abstract class LiricalConfigurationCommand extends BaseCommand { private static final Logger LOGGER = LoggerFactory.getLogger(LiricalConfigurationCommand.class); + protected static final String UNKNOWN_VERSION_PLACEHOLDER = "UNKNOWN VERSION"; // ---------------------------------------------- RESOURCES -------------------------------------------------------- @CommandLine.ArgGroup(validate = false, heading = "Resource paths:%n") @@ -57,6 +61,13 @@ public static class DataSection { @CommandLine.Option(names = {"-b", "--background"}, description = "Path to non-default background frequency file.") public Path backgroundFrequencyFile = null; + + @CommandLine.Option(names = "--parallelism", + description = { + "The number of workers/threads to use.", + "The value must be a positive integer.", + "Default: ${DEFAULT-VALUE}"}) + public int parallelism = 1; } @@ -74,9 +85,23 @@ public static class RunConfiguration { public boolean globalAnalysisMode = false; @CommandLine.Option(names = {"--ddndv"}, - description = "Disregard a disease if no deleterious variants are found in the gene associated with the disease. " - + "Used only if running with a VCF file. (default: ${DEFAULT-VALUE})") - public boolean disregardDiseaseWithNoDeleteriousVariants = true; + description = { + "Disregard a disease if no deleterious variants are found in the gene associated with the disease.", + "Used only if running with a VCF file.", + "NOTE: the option has been DEPRECATED, use `--sdwndv` instead", + "(default: ${DEFAULT-VALUE})" + }) + // REMOVE(v2.0.0) + @Deprecated(forRemoval = true) + public Boolean disregardDiseaseWithNoDeleteriousVariants = null; + + @CommandLine.Option(names = {"--sdwndv"}, + description = { + "Include diseases even if no deleterious variants are found in the gene associated with the disease.", + "Only applicable to the HTML report when running with a VCF file (genotype-aware mode).", + "(default: ${DEFAULT-VALUE})" + }) + public boolean showDiseasesWithNoDeleteriousVariants = false; @CommandLine.Option(names = {"--transcript-db"}, paramLabel = "{REFSEQ,UCSC}", @@ -112,6 +137,18 @@ public static class RunConfiguration { "NOTE: the option has been DEPRECATED" }) public float defaultAlleleFrequency = Float.NaN; + + @CommandLine.Option(names = {"--validation-policy"}, + paramLabel = "{STRICT, LENIENT, MINIMAL}", + description = {"Validation policy for the analysis", "(default: ${DEFAULT-VALUE})."}) + public ValidationPolicy validationPolicy = ValidationPolicy.MINIMAL; + + @CommandLine.Option(names ={"--dry-run"}, + description = { + "Validate the input, report potential issues, and exit without running the analysis.", + "(default ${DEFAULT-VALUE})" + }) + public boolean dryRun = false; } protected List checkInput() { @@ -126,30 +163,36 @@ protected List checkInput() { dataSection.liricalDataDirectory = codeHomeDataDir; } else { String msg = "Path to LIRICAL data directory must be provided via `-d | --data` option"; - LOGGER.error(msg); errors.add(msg); } } - LOGGER.info("Using data folder at {}", dataSection.liricalDataDirectory.toAbsolutePath()); + if (dataSection.liricalDataDirectory != null) + LOGGER.info("Using data folder at {}", dataSection.liricalDataDirectory.toAbsolutePath()); + + LOGGER.debug("Analysis input validation policy: {}", runConfiguration.validationPolicy.name()); // Obsolete options must/should not be used if (dataSection.exomiserDatabase != null) { // Check the obsolete `-e | --exomiser` option is not being used. String msg = "`-e | --exomiser` option has been deprecated. Use `-e19 or -e38` to set paths to Exomiser variant databases for hg19 and hg38, respectively"; - LOGGER.error(msg); errors.add(msg); } if (!Float.isNaN(runConfiguration.defaultAlleleFrequency)) { String msg = "`--default-allele-frequency` option has been deprecated."; - LOGGER.error(msg); + LOGGER.warn(msg); + } + + if (runConfiguration.disregardDiseaseWithNoDeleteriousVariants != null) { + String msg = "`--ddndv` option has been deprecated and must not be used. Use `--sdwndv` if you want to show all diseases in the HTML report."; + LOGGER.warn(msg); + errors.add(msg); } Optional genomeBuild = GenomeBuild.parse(getGenomeBuild()); if (genomeBuild.isEmpty()) { // We must have genome build! String msg = "Genome build must be set"; - LOGGER.error(msg); errors.add(msg); } else { // Check Exomiser db seem to match the genome build. @@ -157,20 +200,23 @@ protected List checkInput() { case HG19 -> { if (dataSection.exomiserHg19Database == null && dataSection.exomiserHg38Database != null) { String msg = "Genome build set to %s but Exomiser variant database is set for %s: %s".formatted(GenomeBuild.HG19, GenomeBuild.HG38, dataSection.exomiserHg38Database.toAbsolutePath()); - LOGGER.error(msg); errors.add(msg); } } case HG38 -> { if (dataSection.exomiserHg38Database == null && dataSection.exomiserHg19Database != null) { String msg = "Genome build set to %s but Exomiser variant database is set for %s: %s".formatted(GenomeBuild.HG38, GenomeBuild.HG19, dataSection.exomiserHg19Database.toAbsolutePath()); - LOGGER.error(msg); errors.add(msg); } } } } + if (dataSection.parallelism <= 0) { + String msg = "Parallelism must be a positive integer but was %d".formatted(dataSection.parallelism); + errors.add(msg); + } + return errors; } @@ -200,7 +246,8 @@ protected Lirical bootstrapLirical(GenomeBuild genomeBuild) throws LiricalDataEx builder.backgroundVariantFrequencyServiceFactory(backgroundFreqFactory); } - return builder.build(); + return builder.parallelism(dataSection.parallelism) + .build(); } protected abstract String getGenomeBuild(); @@ -256,65 +303,127 @@ protected AnalysisOptions prepareAnalysisOptions(Lirical lirical, GenomeBuild ge PretestDiseaseProbability pretestDiseaseProbability = PretestDiseaseProbabilities.uniform(diseaseIds); builder.pretestProbability(pretestDiseaseProbability); - LOGGER.debug("Disregarding diseases with no deleterious variants? {}", runConfiguration.disregardDiseaseWithNoDeleteriousVariants); - builder.disregardDiseaseWithNoDeleteriousVariants(runConfiguration.disregardDiseaseWithNoDeleteriousVariants); + LOGGER.debug("Showing diseases with no deleterious variants in the gene associated with the disease? {}", runConfiguration.showDiseasesWithNoDeleteriousVariants); + builder.includeDiseasesWithNoDeleteriousVariants(!runConfiguration.showDiseasesWithNoDeleteriousVariants); return builder.build(); } - protected static GenesAndGenotypes readVariantsFromVcfFile(String sampleId, - Path vcfPath, - GenomeBuild genomeBuild, - TranscriptDatabase transcriptDatabase, - VariantParserFactory parserFactory) throws LiricalParseException { - if (parserFactory == null) { - LOGGER.warn("Cannot process the provided VCF file {}, resources are not set.", vcfPath.toAbsolutePath()); - return GenesAndGenotypes.empty(); - } - + protected static SampleIdAndGenesAndGenotypes readVariantsFromVcfFile(String sampleId, + Path vcfPath, + GenomeBuild genomeBuild, + TranscriptDatabase transcriptDatabase, + VariantParserFactory parserFactory) throws LiricalParseException { + LOGGER.debug("Getting variant parser to parse a VCF file using {} assembly and {} transcripts", genomeBuild, transcriptDatabase); Optional parser = parserFactory.forPath(vcfPath, genomeBuild, transcriptDatabase); if (parser.isEmpty()) { - LOGGER.warn("Cannot obtain parser for processing the VCF file {} with {} {} due to missing resources", - vcfPath.toAbsolutePath(), genomeBuild, transcriptDatabase); - return GenesAndGenotypes.empty(); + throw new LiricalParseException( + "Cannot obtain parser for processing the VCF file %s with %s %s due to missing resources" + .formatted(vcfPath.toAbsolutePath(), genomeBuild, transcriptDatabase) + ); } - List variants; + try (VariantParser variantParser = parser.get()) { - // Ensure the VCF file contains the sample - if (!variantParser.sampleNames().contains(sampleId)) - throw new LiricalParseException("The sample " + sampleId + " is not present in VCF at '" + vcfPath.toAbsolutePath() + '\''); - LOGGER.debug("Found sample {} in the VCF file at {}", sampleId, vcfPath.toAbsolutePath()); + Collection sampleNames = variantParser.sampleNames(); + String usedId = validateSampleId(sampleId, vcfPath, sampleNames); // Read variants LOGGER.info("Reading variants from {}", vcfPath.toAbsolutePath()); ProgressReporter progressReporter = new ProgressReporter(); - variants = variantParser.variantStream() + List variants = variantParser.variantStream() .peek(v -> progressReporter.log()) .toList(); progressReporter.summarize(); + + GenesAndGenotypes genesAndGenotypes = GenesAndGenotypes.fromVariants(sampleNames, variants); + return new SampleIdAndGenesAndGenotypes(usedId, genesAndGenotypes); } catch (Exception e) { throw new LiricalParseException(e); } + } - return prepareGenesAndGenotypes(variants); + /** + * Check if the VCF file is a single-sample or multi-sample VCF file with the given sample ID. If `sampleId` is + * {@code null}, we can only accept a single-sample VCF, mainly as a convenience. + * + * @throws LiricalParseException if the VCF includes no sample data, the sample is not present, + * or it is a multi-sample file and the sample ID is unset. + */ + private static String validateSampleId(String sampleId, + Path vcfPath, + Collection sampleNames) throws LiricalParseException { + if (sampleNames.isEmpty()) + throw new LiricalParseException("No samples found in the VCF file at '" + vcfPath.toAbsolutePath() + '\''); + if (sampleId == null) { + if (sampleNames.size() != 1) { + // The user did not provide the sample ID. We can proceed if the variant source contains 1 subject only. + throw new LiricalParseException(("The VCF file includes %d samples but the ID of the index sample " + + "is unset. Set the sample ID if VCF reports >1 sample").formatted(sampleNames.size())); + } else { + String inferredId = sampleNames.iterator().next(); + LOGGER.info("Sample ID is unset. However, since the VCF file includes just a single sample (" + + inferredId + "), we will proceed with that one"); + return inferredId; + } + } else if (!sampleNames.contains(sampleId)) { + String included = sampleNames.stream().collect(Collectors.joining(", ", "{", "}")); + throw new LiricalParseException( + "The VCF at '%s' includes samples %s but it does not include the index sample %s" + .formatted(vcfPath.toAbsolutePath(), included, sampleId) + ); + } else { + LOGGER.debug("Found the index sample {} in the VCF file at {}", sampleId, vcfPath.toAbsolutePath()); + } + return sampleId; } - protected static GenesAndGenotypes prepareGenesAndGenotypes(List variants) { - // Group variants by Entrez ID. - Map> gene2Genotype = new HashMap<>(); - for (LiricalVariant variant : variants) { - variant.annotations().stream() - .map(TranscriptAnnotation::getGeneId) - .distinct() - .forEach(geneId -> gene2Genotype.computeIfAbsent(geneId, e -> new LinkedList<>()).add(variant)); + protected static String summarizeSanitationResult(SanitationResult sanitationResult) { + if (sanitationResult.hasErrorOrWarnings()) { + Map> byLevel = sanitationResult.issues().stream() + .collect(Collectors.groupingBy(SanityIssue::level)); + + List errors = byLevel.getOrDefault(SanityLevel.ERROR, List.of()); + List warnings = byLevel.getOrDefault(SanityLevel.WARNING, List.of()); + + List lines = new ArrayList<>(); + lines.add("Input sanitation found %d errors and %d warnings".formatted(errors.size(), warnings.size())); + if (!errors.isEmpty()) { + lines.add(" Errors \uD83D\uDE31"); + for (SanityIssue issue : errors) { + lines.add(" - %s. %s".formatted(issue.message(), issue.solution())); + } + } + + if (!warnings.isEmpty()) { + lines.add(" Warnings \uD83D\uDE27"); + for (SanityIssue issue : warnings) { + lines.add(" - %s. %s".formatted(issue.message(), issue.solution())); + } + } + + return String.join(System.lineSeparator(), lines); + } else { + return "Input sanitation found no issues"; } + } - // Collect the variants into Gene2Genotype container - List g2g = gene2Genotype.entrySet().stream() - .map(e -> Gene2Genotype.of(e.getKey(), e.getValue())) - .toList(); + protected String figureOutExomiserPath() { + if (dataSection.exomiserHg19Database == null && dataSection.exomiserHg38Database == null) { + return ""; + } else { + if (dataSection.exomiserHg19Database == null) { + return dataSection.exomiserHg38Database.toAbsolutePath().toString(); + } else { + return dataSection.exomiserHg19Database.toAbsolutePath().toString(); + } + } + } - return GenesAndGenotypes.of(g2g); + protected InputSanitizer selectSanitizer(InputSanitizerFactory factory) { + return switch (runConfiguration.validationPolicy) { + case STRICT, LENIENT -> factory.forType(SanitizerType.COMPREHENSIVE); + case MINIMAL -> factory.forType(SanitizerType.MINIMAL); + }; } protected static void reportElapsedTime(long startTime, long stopTime) { @@ -340,4 +449,7 @@ private static Path codeHomeDir() { return Path.of(codePath).toAbsolutePath().getParent(); } + protected record SampleIdAndGenesAndGenotypes(String sampleId, GenesAndGenotypes genesAndGenotypes) { + } + } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/OutputCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/OutputCommand.java new file mode 100644 index 000000000..3e3801310 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/OutputCommand.java @@ -0,0 +1,77 @@ +package org.monarchinitiative.lirical.cli.cmd; + +import org.monarchinitiative.lirical.core.output.LrThreshold; +import org.monarchinitiative.lirical.core.output.MinDiagnosisCount; +import org.monarchinitiative.lirical.core.output.OutputFormat; +import org.monarchinitiative.lirical.core.output.OutputOptions; +import picocli.CommandLine; + +import java.nio.file.Path; +import java.util.List; +import java.util.Set; + +public abstract class OutputCommand extends LiricalConfigurationCommand { + + // ---------------------------------------------- OUTPUTS ---------------------------------------------------------- + @CommandLine.ArgGroup(validate = false, heading = "Output options:%n") + public Output output = new Output(); + + public static class Output { + @CommandLine.Option(names = {"-o", "--output-directory"}, + description = "Directory into which to write output (default: ${DEFAULT-VALUE}).") + public Path outdir = Path.of(""); + + @CommandLine.Option(names = {"-f", "--output-format"}, + arity = "0..*", + description = { + "An output format to use for writing the results, can be provided multiple times.", + "Choose from {${COMPLETION-CANDIDATES}}", + "Default: ${DEFAULT-VALUE}" + }) + public Set outputFormats = Set.of(OutputFormat.HTML); + /** + * Prefix of the output file. For instance, if the user enters {@code -x sample1} and an HTML file is output, + * the name of the HTML file will be {@code sample1.html}. If a TSV file is output, the name of the file will + * be {@code sample1.tsv}. + */ + @CommandLine.Option(names = {"-x", "--prefix"}, + description = "Prefix of outfile (default: ${DEFAULT-VALUE}).") + public String outfilePrefix = "lirical"; + + @CommandLine.Option(names = {"-t", "--threshold"}, + description = "Minimum post-test probability to show diagnosis in HTML output. The value should range between [0,1].") + public Double lrThreshold = null; + + @CommandLine.Option(names = {"-m", "--mindiff"}, + description = "Minimal number of differential diagnoses to show.") + public Integer minDifferentialsToShow = null; + + @CommandLine.Option(names = {"--display-all-variants"}, + description = "Display all variants in output, not just variants passing pathogenicity threshold (default ${DEFAULT-VALUE})") + public boolean displayAllVariants = false; + } + + protected List checkInput() { + List errors = super.checkInput(); + + // thresholds + if (output.lrThreshold != null && output.minDifferentialsToShow != null) { + String msg = "Only one of the options -t/--threshold and -m/--mindiff can be used at once."; + errors.add(msg); + } + if (output.lrThreshold != null) { + if (output.lrThreshold < 0.0 || output.lrThreshold > 1.0) { + String msg = "Post-test probability (-t/--threshold) must be between 0.0 and 1.0."; + errors.add(msg); + } + } + return errors; + } + + protected OutputOptions createOutputOptions(String prefix) { + LrThreshold lrThreshold = output.lrThreshold == null ? LrThreshold.notInitialized() : LrThreshold.setToUserDefinedThreshold(output.lrThreshold); + MinDiagnosisCount minDiagnosisCount = output.minDifferentialsToShow == null ? MinDiagnosisCount.notInitialized() : MinDiagnosisCount.setToUserDefinedMinCount(output.minDifferentialsToShow); + return new OutputOptions(lrThreshold, minDiagnosisCount, runConfiguration.pathogenicityThreshold, + output.displayAllVariants, output.outdir, prefix); + } +} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java index 153dcd7b7..2b051c9a4 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketCommand.java @@ -1,25 +1,10 @@ package org.monarchinitiative.lirical.cli.cmd; -import org.monarchinitiative.lirical.core.Lirical; -import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; -import org.monarchinitiative.lirical.core.model.GenomeBuild; -import org.monarchinitiative.lirical.core.model.TranscriptDatabase; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.lirical.io.analysis.*; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.monarchinitiative.lirical.core.sanitize.SanitationInputs; import picocli.CommandLine; -import java.io.BufferedInputStream; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; import java.nio.file.Path; -import java.util.List; -import java.util.Optional; /** * Run LIRICAL from a Phenopacket -- with or without accompanying VCF file. @@ -34,8 +19,6 @@ description = "Run LIRICAL from a Phenopacket.") public class PhenopacketCommand extends AbstractPrioritizeCommand { - private static final Logger LOGGER = LoggerFactory.getLogger(PhenopacketCommand.class); - @CommandLine.Option(names = {"--assembly"}, paramLabel = "{hg19,hg38}", description = "Genome build (default: ${DEFAULT-VALUE}).") @@ -48,7 +31,7 @@ public class PhenopacketCommand extends AbstractPrioritizeCommand { @CommandLine.Option(names = {"--vcf"}, description = "Path to a VCF file. This path has priority over any VCF files described in phenopacket.") - public Path vcfPath; + public String vcfPath; @Override protected String getGenomeBuild() { @@ -56,52 +39,21 @@ protected String getGenomeBuild() { } @Override - protected AnalysisData prepareAnalysisData(Lirical lirical, - GenomeBuild genomeBuild, - TranscriptDatabase transcriptDb) throws LiricalParseException { - LOGGER.info("Reading phenopacket from {}", phenopacketPath.toAbsolutePath()); - - PhenopacketData data = null; - try (InputStream is = new BufferedInputStream(new FileInputStream(phenopacketPath.toFile()))) { - PhenopacketImporter v2 = PhenopacketImporters.v2(); - data = v2.read(is); - LOGGER.info("Success!"); - } catch (PhenopacketImportException | IOException e) { - LOGGER.info("Unable to parse as v2 phenopacket, trying v1"); - } - - if (data == null) { - try (InputStream is = new BufferedInputStream(new FileInputStream(phenopacketPath.toFile()))) { - PhenopacketImporter v1 = PhenopacketImporters.v1(); - data = v1.read(is); - } catch (PhenopacketImportException | IOException e) { - LOGGER.info("Unable to parse as v1 phenopacket"); - throw new LiricalParseException("Unable to parse phenopacket from " + phenopacketPath.toAbsolutePath()); - } - } - - HpoTermSanitizer sanitizer = new HpoTermSanitizer(lirical.phenotypeService().hpo()); - List presentTerms = data.getHpoTerms().map(sanitizer::replaceIfObsolete).flatMap(Optional::stream).toList(); - List excludedTerms = data.getNegatedHpoTerms().map(sanitizer::replaceIfObsolete).flatMap(Optional::stream).toList(); - - // Read VCF file. - GenesAndGenotypes genes; - // Path to VCF set via CLI has priority. - Path vcfPath = this.vcfPath != null - ? this.vcfPath - : data.getVcfPath().orElse(null); - String sampleId = data.getSampleId(); - if (vcfPath == null) { - genes = GenesAndGenotypes.empty(); - } else { - genes = readVariantsFromVcfFile(sampleId, vcfPath, genomeBuild, transcriptDb, lirical.variantParserFactory()); - } - return AnalysisData.of(sampleId, - data.getAge().orElse(null), - data.getSex().orElse(null), - presentTerms, - excludedTerms, - genes); + protected SanitationInputs procureSanitationInputs() throws LiricalParseException { + // We could have returned `data` right away, but we must ensure that VCF handed over via CLI has + // a greater priority + SanitationInputs data = PhenopacketUtil.readPhenopacketData(phenopacketPath); + + String vcf = vcfPath != null + ? vcfPath + : data.vcf(); + + return new SanitationInputsDefault(data.sampleId(), + data.presentHpoTerms(), + data.excludedHpoTerms(), + data.age(), + data.sex(), + vcf); } } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketUtil.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketUtil.java new file mode 100644 index 000000000..3ea15b526 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PhenopacketUtil.java @@ -0,0 +1,47 @@ +package org.monarchinitiative.lirical.cli.cmd; + +import org.monarchinitiative.lirical.core.analysis.LiricalParseException; +import org.monarchinitiative.lirical.io.analysis.PhenopacketData; +import org.monarchinitiative.lirical.io.analysis.PhenopacketImportException; +import org.monarchinitiative.lirical.io.analysis.PhenopacketImporter; +import org.monarchinitiative.lirical.io.analysis.PhenopacketImporters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +public class PhenopacketUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(PhenopacketUtil.class); + + private PhenopacketUtil() { + } + + public static PhenopacketData readPhenopacketData(Path phenopacket) throws LiricalParseException { + LOGGER.trace("Reading phenopacket from {}", phenopacket.toAbsolutePath()); + PhenopacketData data = null; + try (InputStream is = new BufferedInputStream(Files.newInputStream(phenopacket))) { + PhenopacketImporter v2 = PhenopacketImporters.v2(); + data = v2.read(is); + LOGGER.trace("Success!"); + } catch (PhenopacketImportException | IOException e) { + LOGGER.trace("Unable to parse as v2 phenopacket, trying v1"); + } + + if (data == null) { + try (InputStream is = new BufferedInputStream(Files.newInputStream(phenopacket))) { + PhenopacketImporter v1 = PhenopacketImporters.v1(); + data = v1.read(is); + } catch (PhenopacketImportException | IOException e) { + LOGGER.trace("Unable to parse as v1 phenopacket"); + throw new LiricalParseException("Unable to parse phenopacket from " + phenopacket.toAbsolutePath()); + } + } + return data; + } + +} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java index 3e2b2019b..ed206f8e1 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/PrioritizeCommand.java @@ -1,20 +1,11 @@ package org.monarchinitiative.lirical.cli.cmd; -import org.monarchinitiative.lirical.core.Lirical; -import org.monarchinitiative.lirical.core.analysis.AnalysisData; -import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; -import org.monarchinitiative.lirical.core.model.GenomeBuild; -import org.monarchinitiative.lirical.core.model.Sex; -import org.monarchinitiative.lirical.core.model.TranscriptDatabase; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.phenol.ontology.data.TermId; +import org.monarchinitiative.lirical.core.sanitize.SanitationInputs; import picocli.CommandLine; -import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Optional; @CommandLine.Command(name = "prioritize", aliases = {"R"}, @@ -38,11 +29,11 @@ public class PrioritizeCommand extends AbstractPrioritizeCommand { @CommandLine.Option(names = {"--vcf"}, description = "Path to VCF file (optional).") - public Path vcfPath = null; + public String vcfPath = null; @CommandLine.Option(names = {"--sample-id"}, description = "Proband's identifier (default: ${DEFAULT-VALUE}).") - public String sampleId = "Sample"; + public String sampleId = null; @CommandLine.Option(names = {"--age"}, description = "Proband's age.") @@ -51,7 +42,7 @@ public class PrioritizeCommand extends AbstractPrioritizeCommand { @CommandLine.Option(names = {"--sex"}, paramLabel = "{MALE,FEMALE,UNKNOWN}", description = "Proband's sex (default: ${DEFAULT-VALUE}).") - public Sex sex = Sex.UNKNOWN; + public String sex = "UNKNOWN"; @Override @@ -60,43 +51,24 @@ protected String getGenomeBuild() { } @Override - protected AnalysisData prepareAnalysisData(Lirical lirical, - GenomeBuild genomeBuild, - TranscriptDatabase transcriptDb) throws LiricalParseException { - HpoTermSanitizer sanitizer = new HpoTermSanitizer(lirical.phenotypeService().hpo()); - - List observedTerms; - if (observed != null) - observedTerms = Arrays.stream(observed.split(",")) + protected SanitationInputs procureSanitationInputs() { + List presentTerms = new ArrayList<>(); + if (observed != null) { + Arrays.stream(observed.split(",")) .map(String::trim) - .map(TermId::of) - .map(sanitizer::replaceIfObsolete) - .flatMap(Optional::stream) .distinct() - .toList(); - else - observedTerms = List.of(); + .forEachOrdered(presentTerms::add); + } - List negatedTerms; - if (negated != null) - negatedTerms = Arrays.stream(negated.split(",")) + List excludedTerms = new ArrayList<>(); + if (negated != null) { + Arrays.stream(negated.split(",")) .map(String::trim) - .map(TermId::of) - .map(sanitizer::replaceIfObsolete) - .flatMap(Optional::stream) .distinct() - .toList(); - else - negatedTerms = List.of(); - - GenesAndGenotypes genes; - if (vcfPath == null) { - genes = GenesAndGenotypes.empty(); - } else { - genes = readVariantsFromVcfFile(sampleId, vcfPath, genomeBuild, transcriptDb, lirical.variantParserFactory()); + .forEachOrdered(excludedTerms::add); } - return AnalysisData.of(sampleId, parseAge(age), sex, observedTerms, negatedTerms, genes); + return new SanitationInputsDefault(sampleId, presentTerms, excludedTerms, age, sex, vcfPath); } } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/SanitationInputsDefault.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/SanitationInputsDefault.java new file mode 100644 index 000000000..2ebf5fa2a --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/SanitationInputsDefault.java @@ -0,0 +1,15 @@ +package org.monarchinitiative.lirical.cli.cmd; + +import org.monarchinitiative.lirical.core.sanitize.SanitationInputs; + +import java.util.List; + +record SanitationInputsDefault( + String sampleId, + List presentHpoTerms, + List excludedHpoTerms, + String age, + String sex, + String vcf +) implements SanitationInputs { +} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/ValidationPolicy.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/ValidationPolicy.java new file mode 100644 index 000000000..9f6bad8a2 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/ValidationPolicy.java @@ -0,0 +1,21 @@ +package org.monarchinitiative.lirical.cli.cmd; + +/** + * What to do in case of errors or imperfections in the input data. + */ +public enum ValidationPolicy { + /** + * Only run the analysis if no errors or warnings are found. + */ + STRICT, + + /** + * Run the analysis if the user input contains non-controversial issues that can be fixed automatically. + */ + LENIENT, + + /** + * Run the bare minimum of checks required to run the analysis but not even a bit more. + */ + MINIMAL +} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/YamlCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/YamlCommand.java index 11b1f622e..5e0ac049b 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/YamlCommand.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/YamlCommand.java @@ -1,10 +1,8 @@ package org.monarchinitiative.lirical.cli.cmd; -import org.monarchinitiative.lirical.core.analysis.AnalysisData; -import org.monarchinitiative.lirical.core.analysis.AnalysisDataParser; import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.io.analysis.AnalysisDataFormat; -import org.monarchinitiative.lirical.io.analysis.AnalysisDataParserFactory; +import org.monarchinitiative.lirical.cli.yaml.YamlParser; +import org.monarchinitiative.lirical.core.sanitize.SanitationInputs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; @@ -24,7 +22,7 @@ sortOptions = false, mixinStandardHelpOptions = true, description = "Run LIRICAL from a YAML file.") -public class YamlCommand extends AnalysisDataParserAwareCommand { +public class YamlCommand extends AbstractPrioritizeCommand { private static final Logger LOGGER = LoggerFactory.getLogger(YamlCommand.class); @CommandLine.Option(names = {"-y","--yaml"}, @@ -43,12 +41,10 @@ protected String getGenomeBuild() { } @Override - protected AnalysisData prepareAnalysisData(AnalysisDataParserFactory factory) throws LiricalParseException { - AnalysisDataParser parser = factory.forFormat(AnalysisDataFormat.YAML); - + protected SanitationInputs procureSanitationInputs() throws LiricalParseException { LOGGER.info("Parsing YAML input file at {}", yamlPath); try (InputStream is = Files.newInputStream(yamlPath)) { - return parser.parse(is); + return YamlParser.parse(is); } catch (IOException e) { throw new LiricalParseException(e); } diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/ExperimentalCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/ExperimentalCommand.java new file mode 100644 index 000000000..cd3f757e7 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/ExperimentalCommand.java @@ -0,0 +1,22 @@ +package org.monarchinitiative.lirical.cli.cmd.experimental; + +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +@CommandLine.Command(name = "experimental", + hidden = true, + subcommands = { + PhenopacketsCommand.class, + }, + sortOptions = false, + mixinStandardHelpOptions = true, + description = "Run experimental LIRICAL commands.") +public class ExperimentalCommand implements Callable { + + @Override + public Integer call() { + // Work is done in subcommands. + return 0; + } +} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/PhenopacketsCommand.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/PhenopacketsCommand.java new file mode 100644 index 000000000..93a63487d --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/PhenopacketsCommand.java @@ -0,0 +1,193 @@ +package org.monarchinitiative.lirical.cli.cmd.experimental; + +import org.monarchinitiative.lirical.cli.cmd.OutputCommand; +import org.monarchinitiative.lirical.cli.cmd.PhenopacketUtil; +import org.monarchinitiative.lirical.cli.cmd.util.Util; +import org.monarchinitiative.lirical.core.Lirical; +import org.monarchinitiative.lirical.core.analysis.*; +import org.monarchinitiative.lirical.core.exception.LiricalException; +import org.monarchinitiative.lirical.core.model.FilteringStats; +import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.output.*; +import org.monarchinitiative.lirical.core.sanitize.*; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.IOException; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * Run LIRICAL in phenotype-only mode on a collection of phenopackets. + */ +@CommandLine.Command(name = "phenopackets", + sortOptions = false, + mixinStandardHelpOptions = true, + description = "Run LIRICAL for several phenopackets at once in phenotype-only mode.") +public class PhenopacketsCommand extends OutputCommand { + + private static final Logger LOGGER = LoggerFactory.getLogger(PhenopacketsCommand.class); + + @CommandLine.Option(names = {"--assembly"}, + paramLabel = "{hg19,hg38}", + description = "Genome build (default: ${DEFAULT-VALUE}).") + public String genomeBuild = "hg38"; + + @CommandLine.Parameters( + paramLabel = "phenopacket file(s)", + description = { + "Input phenopacket(s).", + } + ) + public List phenopacketPaths = null; + + @Override + protected String getGenomeBuild() { + return genomeBuild; + } + + @Override + protected Integer execute() { + long start = System.currentTimeMillis(); + // 0 - check input + List errors = checkInput(); + if (!errors.isEmpty()) { + LOGGER.error("Errors:"); + for (String error : errors) + LOGGER.error(" {}", error); + return 1; + } + + Lirical lirical; + GenomeBuild genomeBuild; + try { + genomeBuild = parseGenomeBuild(getGenomeBuild()); + LOGGER.debug("Using genome build {}", genomeBuild); + LOGGER.debug("Using {} transcripts", runConfiguration.transcriptDb); + + // 1 - bootstrap the app + lirical = bootstrapLirical(genomeBuild); + LOGGER.info("Configured LIRICAL {}", lirical.version() + .map("v%s"::formatted) + .orElse(UNKNOWN_VERSION_PLACEHOLDER)); + } catch (LiricalException e) { + LOGGER.error("Error: {}", e.getMessage()); + LOGGER.debug("More info:", e); + return 1; + } + + // 2 - sanitize the input data + LOGGER.info("Reading and sanitizing {} phenopacket(s)", phenopacketPaths.size()); + + MinimalOntology hpo = lirical.phenotypeService().hpo(); + List sanitationResults = sanitizePhenopackets(phenopacketPaths, hpo); + if (runConfiguration.dryRun) { + // summarize and quit + for (SanitationResultsAndPath result : sanitationResults) { + LOGGER.info("Summary for {}", result.path().toAbsolutePath()); + LOGGER.info(summarizeSanitationResult(result.result())); + } + return 0; + } + + // 3 - process phenopackets + LOGGER.info("Processing phenopackets"); + AnalysisOptions analysisOptions = prepareAnalysisOptions(lirical, genomeBuild, runConfiguration.transcriptDb); + + try (LiricalAnalysisRunner analysisRunner = lirical.analysisRunner()) { + for (SanitationResultsAndPath result : sanitationResults) { + SanitationResult sanitationResult = result.result(); + if (!Util.phenopacketIsEligibleForAnalysis(sanitationResult, runConfiguration.validationPolicy)) { + LOGGER.info(result.path().toAbsolutePath().toString()); + LOGGER.info(summarizeSanitationResult(sanitationResult)); + } else { + LOGGER.info("Processing {}", result.path().toAbsolutePath()); + } + try { + SanitizedInputs sanitized = sanitationResult.sanitizedInputs(); + AnalysisData analysisData = AnalysisData.of(sanitized.sampleId(), + sanitized.age(), sanitized.sex(), + sanitized.presentHpoTerms(), + sanitized.excludedHpoTerms(), + GenesAndGenotypes.empty()); + + LOGGER.debug("Running the analysis"); + AnalysisResults results = analysisRunner.run(analysisData, analysisOptions); + + LOGGER.debug("Writing out the results"); + FilteringStats filteringStats = analysisData.genes().computeFilteringStats(); + AnalysisResultsMetadata metadata = AnalysisResultsMetadata.builder() + .setLiricalVersion(lirical.version().orElse(UNKNOWN_VERSION_PLACEHOLDER)) + .setHpoVersion(hpo.version().orElse(UNKNOWN_VERSION_PLACEHOLDER)) + .setTranscriptDatabase(runConfiguration.transcriptDb.toString()) + .setLiricalPath(dataSection.liricalDataDirectory.toAbsolutePath().toString()) + .setExomiserPath(figureOutExomiserPath()) + .setAnalysisDate(LocalDateTime.now().toString()) + .setSampleName(analysisData.sampleId()) + .setnPassingVariants(filteringStats.nPassingVariants()) + .setnFilteredVariants(filteringStats.nFilteredVariants()) + .setGenesWithVar(filteringStats.genesWithVariants()) + .setGlobalMode(runConfiguration.globalAnalysisMode) + .build(); + + AnalysisResultWriterFactory factory = lirical.analysisResultsWriterFactory(); + + OutputOptions outputOptions = createOutputOptions(sanitized.sampleId()); + for (OutputFormat fmt : output.outputFormats) { + Optional writer = factory.getWriter(fmt); + if (writer.isPresent()) { + writer.get().process(analysisData, results, metadata, outputOptions); + } + } + } catch (IOException | LiricalException e) { + LOGGER.error("Error processing {}: {}", result.path(), e.getMessage()); + LOGGER.debug("More info:", e); + return 1; + } + } + reportElapsedTime(start, System.currentTimeMillis()); + } catch (IOException e) { + LOGGER.error("Error occurred: {}", e.getMessage()); + LOGGER.debug("More info:", e); + return 1; + } + + return 0; + } + + protected List checkInput() { + List errors = super.checkInput(); + + if (phenopacketPaths == null || phenopacketPaths.isEmpty()) + errors.add("At least one phenopacket path must be provided"); + + return errors; + } + + private List sanitizePhenopackets(List phenopackets, + MinimalOntology hpo) { + InputSanitizerFactory factory = new InputSanitizerFactory(hpo); + InputSanitizer sanitizer = selectSanitizer(factory); + + List sanitationResults = new ArrayList<>(phenopackets.size()); + for (Path phenopacketPath : phenopackets) { + try { + SanitationInputs inputs = PhenopacketUtil.readPhenopacketData(phenopacketPath); + SanitationResult sanitationResult = sanitizer.sanitize(inputs); + sanitationResults.add(new SanitationResultsAndPath(sanitationResult, phenopacketPath)); + } catch (LiricalException e) { + sanitationResults.add(new SanitationResultsAndPath(null, phenopacketPath)); + } + } + return sanitationResults; + } + + private record SanitationResultsAndPath(SanitationResult result, Path path) { + } +} diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/package-info.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/package-info.java new file mode 100644 index 000000000..c22c6f573 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/experimental/package-info.java @@ -0,0 +1,4 @@ +/** + * Package with experimental LIRICAL commands. The commands are hidden from the public CLI. + */ +package org.monarchinitiative.lirical.cli.cmd.experimental; \ No newline at end of file diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/package-info.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/package-info.java index 0e5e4c8c4..6eee9015e 100644 --- a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/package-info.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/package-info.java @@ -1,2 +1,4 @@ -/** Commands to run LIRICAL in different configurations. */ +/** + * Commands to run LIRICAL in different configurations. + */ package org.monarchinitiative.lirical.cli.cmd; \ No newline at end of file diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/util/Util.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/util/Util.java new file mode 100644 index 000000000..d92c8bf27 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/cmd/util/Util.java @@ -0,0 +1,15 @@ +package org.monarchinitiative.lirical.cli.cmd.util; + +import org.monarchinitiative.lirical.cli.cmd.ValidationPolicy; +import org.monarchinitiative.lirical.core.sanitize.SanitationResult; + +public class Util { + private Util(){} + + public static boolean phenopacketIsEligibleForAnalysis(SanitationResult result, ValidationPolicy validationPolicy) { + return switch (validationPolicy) { + case STRICT -> !result.hasErrorOrWarnings(); + case LENIENT, MINIMAL -> !result.hasErrors(); + }; + } +} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlConfig.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/YamlConfig.java similarity index 77% rename from lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlConfig.java rename to lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/YamlConfig.java index 04c7ec1d6..d8f73771d 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlConfig.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/YamlConfig.java @@ -1,4 +1,6 @@ -package org.monarchinitiative.lirical.io.analysis; +package org.monarchinitiative.lirical.cli.yaml; + +import org.monarchinitiative.lirical.core.sanitize.SanitationInputs; import java.nio.file.Path; import java.util.List; @@ -8,7 +10,7 @@ * This class is used to input the YAML configuration file. * @author Peter Robinson */ -public class YamlConfig { +public class YamlConfig implements SanitationInputs { private String sampleId; private List hpoIds; @@ -53,6 +55,21 @@ public List getHpoIds() { public List getNegatedHpoIds() { return negatedHpoIds == null ? List.of() : negatedHpoIds; } + @Override + public String sampleId() { + return sampleId; + } + + @Override + public List presentHpoTerms() { + return hpoIds; + } + + @Override + public List excludedHpoTerms() { + return negatedHpoIds; + } + public String age() { return age; } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlParser.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/YamlParser.java similarity index 90% rename from lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlParser.java rename to lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/YamlParser.java index 21e7e9fab..a9bb5f0a0 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlParser.java +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/YamlParser.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.io.analysis; +package org.monarchinitiative.lirical.cli.yaml; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; @@ -14,6 +14,8 @@ */ public class YamlParser { + private YamlParser(){} + public static YamlConfig parse(Path path) throws IOException { try (InputStream is = Files.newInputStream(path)) { return parse(is); diff --git a/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/package-info.java b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/package-info.java new file mode 100644 index 000000000..cd8d95560 --- /dev/null +++ b/lirical-cli/src/main/java/org/monarchinitiative/lirical/cli/yaml/package-info.java @@ -0,0 +1,4 @@ +/** + * Read user input formatted into YAML file. + */ +package org.monarchinitiative.lirical.cli.yaml; \ No newline at end of file diff --git a/lirical-cli/src/main/resources/logback.xml b/lirical-cli/src/main/resources/logback.xml index 6d55578e9..724be14f0 100644 --- a/lirical-cli/src/main/resources/logback.xml +++ b/lirical-cli/src/main/resources/logback.xml @@ -1,6 +1,9 @@ + + + System.err diff --git a/lirical-cli/src/test/java/org/monarchinitiative/lirical/cli/TestResources.java b/lirical-cli/src/test/java/org/monarchinitiative/lirical/cli/TestResources.java new file mode 100644 index 000000000..a2dd8723d --- /dev/null +++ b/lirical-cli/src/test/java/org/monarchinitiative/lirical/cli/TestResources.java @@ -0,0 +1,15 @@ +package org.monarchinitiative.lirical.cli; + +import java.nio.file.Path; + +/** + * Utility class with lazily-loaded resources for testing + */ +public class TestResources { + + public static final Path TEST_BASE = Path.of("src/test/resources"); + public static final Path LIRICAL_TEST_BASE = TestResources.TEST_BASE.resolve("org").resolve("monarchinitiative").resolve("lirical").resolve("cli"); + + private TestResources() { + } +} diff --git a/lirical-io/src/test/java/org/monarchinitiative/lirical/io/analysis/YamlParserTest.java b/lirical-cli/src/test/java/org/monarchinitiative/lirical/cli/yaml/YamlParserTest.java similarity index 93% rename from lirical-io/src/test/java/org/monarchinitiative/lirical/io/analysis/YamlParserTest.java rename to lirical-cli/src/test/java/org/monarchinitiative/lirical/cli/yaml/YamlParserTest.java index 16d5ac2f4..f950e1a19 100644 --- a/lirical-io/src/test/java/org/monarchinitiative/lirical/io/analysis/YamlParserTest.java +++ b/lirical-cli/src/test/java/org/monarchinitiative/lirical/cli/yaml/YamlParserTest.java @@ -1,7 +1,7 @@ -package org.monarchinitiative.lirical.io.analysis; +package org.monarchinitiative.lirical.cli.yaml; import org.junit.jupiter.api.Test; -import org.monarchinitiative.lirical.io.TestResources; +import org.monarchinitiative.lirical.cli.TestResources; import java.nio.file.Path; import java.util.List; diff --git a/lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/BBS1.yml b/lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/BBS1.yml similarity index 100% rename from lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/BBS1.yml rename to lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/BBS1.yml diff --git a/lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/example1.yml b/lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/example1.yml similarity index 100% rename from lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/example1.yml rename to lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/example1.yml diff --git a/lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/example2.yml b/lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/example2.yml similarity index 100% rename from lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/example2.yml rename to lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/example2.yml diff --git a/lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/hpo_and_vcf.yml b/lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/hpo_and_vcf.yml similarity index 100% rename from lirical-io/src/test/resources/org/monarchinitiative/lirical/io/yaml/hpo_and_vcf.yml rename to lirical-cli/src/test/resources/org/monarchinitiative/lirical/cli/yaml/hpo_and_vcf.yml diff --git a/lirical-configuration/pom.xml b/lirical-configuration/pom.xml index d3578d54b..510da9aac 100644 --- a/lirical-configuration/pom.xml +++ b/lirical-configuration/pom.xml @@ -5,7 +5,7 @@ LIRICAL org.monarchinitiative.lirical - 2.0.0-RC2 + 2.0.0-RC3 4.0.0 @@ -22,6 +22,22 @@ lirical-exomiser-db-adapter ${project.parent.version} + + org.monarchinitiative.phenol + phenol-core + + + org.monarchinitiative.phenol + phenol-io + + + org.monarchinitiative.phenol + phenol-annotations + + + ch.qos.logback + logback-classic + diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/AnalysisResultWriterFactoryImpl.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/AnalysisResultWriterFactoryImpl.java index 3b815e444..7a718ed36 100644 --- a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/AnalysisResultWriterFactoryImpl.java +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/AnalysisResultWriterFactoryImpl.java @@ -3,22 +3,22 @@ import org.monarchinitiative.lirical.core.output.AnalysisResultWriterFactory; import org.monarchinitiative.lirical.core.output.AnalysisResultsWriter; import org.monarchinitiative.lirical.core.output.OutputFormat; -import org.monarchinitiative.lirical.core.output.TemplateBasedAnalysisResultsWriter; import org.monarchinitiative.lirical.io.output.JsonAnalysisResultWriter; +import org.monarchinitiative.lirical.io.output.TemplateBasedAnalysisResultsWriter; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import java.util.Objects; import java.util.Optional; class AnalysisResultWriterFactoryImpl implements AnalysisResultWriterFactory { - private final Ontology hpo; + private final MinimalOntology hpo; private final HpoDiseases diseases; private AnalysisResultsWriter html, tsv, json; - AnalysisResultWriterFactoryImpl(Ontology hpo, HpoDiseases diseases) { + AnalysisResultWriterFactoryImpl(MinimalOntology hpo, HpoDiseases diseases) { this.hpo = Objects.requireNonNull(hpo); this.diseases = Objects.requireNonNull(diseases); } diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java index 9a07a880f..7414cdaa0 100644 --- a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LiricalBuilder.java @@ -2,6 +2,7 @@ import org.monarchinitiative.lirical.configuration.impl.BundledBackgroundVariantFrequencyServiceFactory; import org.monarchinitiative.lirical.core.Lirical; +import org.monarchinitiative.lirical.core.LiricalOptions; import org.monarchinitiative.lirical.core.analysis.probability.PretestDiseaseProbability; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLikelihoodRatio; import org.monarchinitiative.lirical.core.likelihoodratio.PhenotypeLikelihoodRatio; @@ -18,7 +19,7 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +44,8 @@ public class LiricalBuilder { private VariantMetadataServiceFactory variantMetadataServiceFactory = null; private FunctionalVariantAnnotatorService functionalVariantAnnotatorService = null; + private int parallelism = 1; + public static LiricalBuilder builder(Path liricalDataDirectory) throws LiricalDataException { return new LiricalBuilder(liricalDataDirectory); } @@ -251,6 +254,16 @@ public LiricalBuilder functionalVariantAnnotator(FunctionalVariantAnnotator func return this; } + /** + * Set the number threads/workers in the LIRICAL worker pool. + */ + public LiricalBuilder parallelism(int parallelism) { + if (parallelism <=0) + throw new IllegalArgumentException("Parallelism %d must be greater than 0".formatted(parallelism)); + this.parallelism = parallelism; + return this; + } + public Lirical build() throws LiricalDataException { // First, services if (phenotypeService == null) { @@ -291,18 +304,22 @@ public Lirical build() throws LiricalDataException { // Analysis result writer factory AnalysisResultWriterFactory analysisResultWriterFactory = new AnalysisResultWriterFactoryImpl(phenotypeService.hpo(), phenotypeService.diseases()); + // Last, the global options. + LiricalOptions options = new LiricalOptions(LIRICAL_VERSION, parallelism); + return Lirical.of( variantParserFactory, phenotypeService, backgroundVariantFrequencyServiceFactory, variantMetadataServiceFactory, + functionalVariantAnnotatorService, analysisResultWriterFactory, - LIRICAL_VERSION); + options); } private static PhenotypeService configurePhenotypeService(Path dataDirectory, HpoDiseaseLoaderOptions options) throws LiricalDataException { LiricalDataResolver liricalDataResolver = LiricalDataResolver.of(dataDirectory); - Ontology hpo = LoadUtils.loadOntology(liricalDataResolver.hpoJson()); + MinimalOntology hpo = LoadUtils.loadOntology(liricalDataResolver.hpoJson()); HpoDiseases diseases = LoadUtils.loadHpoDiseases(liricalDataResolver.phenotypeAnnotations(), hpo, options); HpoAssociationData associationData = HpoAssociationData.builder(hpo) .hgncCompleteSetArchive(liricalDataResolver.hgncCompleteSet()) diff --git a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java index 256082d75..238c8baee 100644 --- a/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java +++ b/lirical-configuration/src/main/java/org/monarchinitiative/lirical/configuration/LoadUtils.java @@ -6,8 +6,8 @@ import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaders; import org.monarchinitiative.phenol.base.PhenolRuntimeException; -import org.monarchinitiative.phenol.io.OntologyLoader; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.io.MinimalOntologyLoader; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,17 +22,17 @@ private LoadUtils() { } - static Ontology loadOntology(Path ontologyPath) throws LiricalDataException { + static MinimalOntology loadOntology(Path ontologyPath) throws LiricalDataException { try { LOGGER.debug("Loading HPO from {}", ontologyPath.toAbsolutePath()); - return OntologyLoader.loadOntology(ontologyPath.toFile()); + return MinimalOntologyLoader.loadOntology(ontologyPath.toFile()); } catch (PhenolRuntimeException e) { throw new LiricalDataException(e); } } static HpoDiseases loadHpoDiseases(Path annotationPath, - Ontology hpo, + MinimalOntology hpo, HpoDiseaseLoaderOptions options) throws LiricalDataException { try { LOGGER.debug("Loading HPO annotations from {}", annotationPath.toAbsolutePath()); diff --git a/lirical-core/pom.xml b/lirical-core/pom.xml index 31f8d7f72..7e739ce98 100644 --- a/lirical-core/pom.xml +++ b/lirical-core/pom.xml @@ -5,7 +5,7 @@ LIRICAL org.monarchinitiative.lirical - 2.0.0-RC2 + 2.0.0-RC3 4.0.0 @@ -27,25 +27,25 @@ de.charite.compbio jannovar-core - - - - commons-net - commons-net - - - org.ini4j - ini4j - - - commons-lang - commons-lang + com.fasterxml.jackson.core + jackson-annotations + provided - org.freemarker - freemarker + com.fasterxml.jackson.core + jackson-core + provided + + + com.fasterxml.jackson.core + jackson-databind + provided + + + org.apache.commons + commons-lang3 diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java index 29264f0a8..12761cd23 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/Lirical.java @@ -14,13 +14,14 @@ public class Lirical { private final VariantParserFactory variantParserFactory; private final PhenotypeService phenotypeService; + private final FunctionalVariantAnnotatorService functionalVariantAnnotatorService; private final VariantMetadataServiceFactory variantMetadataServiceFactory; private final LiricalAnalysisRunner analysisRunner; private final AnalysisResultWriterFactory analysisResultWriterFactory; - private final String version; // nullable + private final LiricalOptions options; /** - * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, AnalysisResultWriterFactory, String)} } + * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, FunctionalVariantAnnotatorService, AnalysisResultWriterFactory, LiricalOptions)} instead. * instead */ // REMOVE(v2.0.0) @@ -35,7 +36,7 @@ public static Lirical of(VariantParserFactory variantParserFactory, } /** - * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, AnalysisResultWriterFactory, String)} instead. + * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, FunctionalVariantAnnotatorService, AnalysisResultWriterFactory, LiricalOptions)} instead. */ // REMOVE(v2.0.0) @Deprecated(since = "2.0.0-RC2", forRemoval = true) @@ -43,40 +44,67 @@ public static Lirical of(VariantParserFactory variantParserFactory, PhenotypeService phenotypeService, BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory variantMetadataService, + FunctionalVariantAnnotatorService functionalVariantAnnotatorService, AnalysisResultWriterFactory analysisResultWriterFactory) { return of(variantParserFactory, phenotypeService, backgroundVariantFrequencyServiceFactory, variantMetadataService, + functionalVariantAnnotatorService, analysisResultWriterFactory, - null); + (String) null); } + /** + * @deprecated use {@link #of(VariantParserFactory, PhenotypeService, BackgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory, FunctionalVariantAnnotatorService, AnalysisResultWriterFactory, LiricalOptions)} instead. + */ + // REMOVE(v2.0.0) + @Deprecated(since = "2.0.0-RC2", forRemoval = true) public static Lirical of(VariantParserFactory variantParserFactory, PhenotypeService phenotypeService, BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory variantMetadataService, + FunctionalVariantAnnotatorService functionalVariantAnnotatorService, AnalysisResultWriterFactory analysisResultWriterFactory, String version) { return new Lirical(variantParserFactory, phenotypeService, backgroundVariantFrequencyServiceFactory, variantMetadataService, + functionalVariantAnnotatorService, + analysisResultWriterFactory, + new LiricalOptions(version, 2)); + } + + public static Lirical of(VariantParserFactory variantParserFactory, + PhenotypeService phenotypeService, + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, + VariantMetadataServiceFactory variantMetadataService, + FunctionalVariantAnnotatorService functionalVariantAnnotatorService, + AnalysisResultWriterFactory analysisResultWriterFactory, + LiricalOptions options) { + return new Lirical(variantParserFactory, + phenotypeService, + backgroundVariantFrequencyServiceFactory, + variantMetadataService, + functionalVariantAnnotatorService, analysisResultWriterFactory, - version); + options); } private Lirical(VariantParserFactory variantParserFactory, PhenotypeService phenotypeService, BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, VariantMetadataServiceFactory variantMetadataServiceFactory, + FunctionalVariantAnnotatorService functionalVariantAnnotatorService, AnalysisResultWriterFactory analysisResultWriterFactory, - String version) { + LiricalOptions options) { this.variantParserFactory = Objects.requireNonNull(variantParserFactory); this.phenotypeService = Objects.requireNonNull(phenotypeService); this.variantMetadataServiceFactory = Objects.requireNonNull(variantMetadataServiceFactory); - this.version = version; // nullable - this.analysisRunner = LiricalAnalysisRunnerImpl.of(phenotypeService, backgroundVariantFrequencyServiceFactory); + this.functionalVariantAnnotatorService = Objects.requireNonNull(functionalVariantAnnotatorService); + this.options = Objects.requireNonNull(options); + this.analysisRunner = LiricalAnalysisRunnerImpl.of(phenotypeService, backgroundVariantFrequencyServiceFactory, options.parallelism()); this.analysisResultWriterFactory = Objects.requireNonNull(analysisResultWriterFactory); } @@ -91,12 +119,19 @@ public PhenotypeService phenotypeService() { return phenotypeService; } + /** + * @deprecated use {@link #functionalVariantAnnotatorService()} instead + */ @Deprecated(since = "2.0.0-RC2", forRemoval = true) // REMOVE(v2.0.0) public FunctionalVariantAnnotator functionalVariantAnnotator() { return null; } + public FunctionalVariantAnnotatorService functionalVariantAnnotatorService() { + return functionalVariantAnnotatorService; + } + /** * * @deprecated use {@link #variantMetadataServiceFactory()} instead @@ -120,6 +155,6 @@ public AnalysisResultWriterFactory analysisResultsWriterFactory() { } public Optional version() { - return Optional.ofNullable(version); + return options.version(); } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/LiricalOptions.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/LiricalOptions.java new file mode 100644 index 000000000..370574150 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/LiricalOptions.java @@ -0,0 +1,27 @@ +package org.monarchinitiative.lirical.core; + +import java.util.Optional; + +/** + * Global options to parameterize LIRICAL execution. + *

+ * Note, these options do not parameterize the analyses. + */ +public class LiricalOptions { + + private final String version; // nullable + private final int parallelism; + + public LiricalOptions(String version, int parallelism) { + this.version = version; + this.parallelism = parallelism; + } + + public Optional version() { + return Optional.ofNullable(version); + } + + public int parallelism() { + return parallelism; + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java index 75d3f651d..2a9e5d55f 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java @@ -1,22 +1,36 @@ package org.monarchinitiative.lirical.core.analysis; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnore; import org.monarchinitiative.lirical.core.model.Age; import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; import org.monarchinitiative.lirical.core.model.Sex; import org.monarchinitiative.phenol.ontology.data.TermId; +import java.util.Collection; import java.util.List; +import java.util.Optional; /** - * An interface for representing proband data. + * Representation of subject data required by LIRICAL analysis. */ public interface AnalysisData { + /** + * Construct analysis data from the inputs. + * + * @param sampleId non-null sample identifier. + * @param age subject's age or {@code null} if not available. + * @param sex non-null sex. + * @param presentPhenotypeTerms a collection of observed HPO terms. + * @param negatedPhenotypeTerms a collection of excluded HPO terms. + * @param genes non-null container of genes and genotypes. + */ static AnalysisData of(String sampleId, Age age, Sex sex, - List presentPhenotypeTerms, - List negatedPhenotypeTerms, + Collection presentPhenotypeTerms, + Collection negatedPhenotypeTerms, GenesAndGenotypes genes) { return new AnalysisDataDefault(sampleId, age, @@ -26,16 +40,40 @@ static AnalysisData of(String sampleId, genes); } + /** + * @return a non-null sample ID. + */ + @JsonGetter String sampleId(); - Age age(); + /** + * @return an optional with age or empty optional if age is not available. + */ + @JsonGetter + Optional age(); + /** + * @return a non-null sex of the subject. + */ + @JsonGetter(value = "sex") Sex sex(); + /** + * @return a list of the HPO terms that were observed in the subject. + */ + @JsonGetter(value = "observedPhenotypicFeatures") List presentPhenotypeTerms(); + /** + * @return a list of the HPO terms whose presence was explicitly excluded in the subject. + */ + @JsonGetter(value = "excludedPhenotypicFeatures") List negatedPhenotypeTerms(); + /** + * @return container with genes and genotypes observed in the subject. + */ + @JsonIgnore GenesAndGenotypes genes(); } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java index 1820dd0fb..8acccf6c4 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java @@ -1,19 +1,97 @@ package org.monarchinitiative.lirical.core.analysis; -import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; import org.monarchinitiative.lirical.core.model.Age; +import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; import org.monarchinitiative.lirical.core.model.Sex; import org.monarchinitiative.phenol.ontology.data.TermId; +import java.util.Collection; import java.util.List; +import java.util.Objects; +import java.util.Optional; /** * Default implementation of {@link AnalysisData}. */ -record AnalysisDataDefault(String sampleId, - Age age, - Sex sex, - List presentPhenotypeTerms, - List negatedPhenotypeTerms, - GenesAndGenotypes genes) implements AnalysisData { +final class AnalysisDataDefault implements AnalysisData { + private final String sampleId; + private final Age age; + private final Sex sex; + private final List presentPhenotypeTerms; + private final List negatedPhenotypeTerms; + private final GenesAndGenotypes genes; + + AnalysisDataDefault(String sampleId, + Age age, + Sex sex, + Collection presentPhenotypeTerms, + Collection negatedPhenotypeTerms, + GenesAndGenotypes genes) { + this.sampleId = Objects.requireNonNull(sampleId); + this.age = age; + this.sex = Objects.requireNonNull(sex); + this.presentPhenotypeTerms = List.copyOf(Objects.requireNonNull(presentPhenotypeTerms)); + this.negatedPhenotypeTerms = List.copyOf(Objects.requireNonNull(negatedPhenotypeTerms)); + this.genes = Objects.requireNonNull(genes); + } + + @Override + public String sampleId() { + return sampleId; + } + + @Override + public Optional age() { + return Optional.ofNullable(age); + } + + @Override + public Sex sex() { + return sex; + } + + @Override + public List presentPhenotypeTerms() { + return presentPhenotypeTerms; + } + + @Override + public List negatedPhenotypeTerms() { + return negatedPhenotypeTerms; + } + + @Override + public GenesAndGenotypes genes() { + return genes; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (AnalysisDataDefault) obj; + return Objects.equals(this.sampleId, that.sampleId) && + Objects.equals(this.age, that.age) && + Objects.equals(this.sex, that.sex) && + Objects.equals(this.presentPhenotypeTerms, that.presentPhenotypeTerms) && + Objects.equals(this.negatedPhenotypeTerms, that.negatedPhenotypeTerms) && + Objects.equals(this.genes, that.genes); + } + + @Override + public int hashCode() { + return Objects.hash(sampleId, age, sex, presentPhenotypeTerms, negatedPhenotypeTerms, genes); + } + + @Override + public String toString() { + return "AnalysisDataDefault[" + + "sampleId=" + sampleId + ", " + + "age=" + age + ", " + + "sex=" + sex + ", " + + "presentPhenotypeTerms=" + presentPhenotypeTerms + ", " + + "negatedPhenotypeTerms=" + negatedPhenotypeTerms + ", " + + "genes=" + genes + ']'; + } + } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java index c205c84fd..fd521014a 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java @@ -1,9 +1,23 @@ package org.monarchinitiative.lirical.core.analysis; +import org.monarchinitiative.lirical.core.model.GenomeBuild; +import org.monarchinitiative.lirical.core.model.TranscriptDatabase; + import java.io.InputStream; +// REMOVE(v2.0.0) +@Deprecated(forRemoval = true) public interface AnalysisDataParser { - AnalysisData parse(InputStream is) throws LiricalParseException; + /** + * @deprecated use {@link #parse(InputStream, GenomeBuild, TranscriptDatabase)} instead. + */ + // REMOVE(v2.0.0) + @Deprecated(forRemoval = true) + default AnalysisData parse(InputStream is) throws LiricalParseException { + return parse(is, GenomeBuild.HG38, TranscriptDatabase.REFSEQ); + } + + AnalysisData parse(InputStream is, GenomeBuild build, TranscriptDatabase transcriptDatabase) throws LiricalParseException; } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java index 07dc95869..16c911b5f 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java @@ -120,8 +120,21 @@ default float defaultVariantAlleleFrequency() { * with the disease. The option is used only if the variants are available for the investigated individual. * * @return true if the candidate disease should be disregarded. + * @deprecated use {@link #includeDiseasesWithNoDeleteriousVariants()} instead */ - boolean disregardDiseaseWithNoDeleteriousVariants(); + // REMOVE(v2.0.0) + @Deprecated(forRemoval = true) + default boolean disregardDiseaseWithNoDeleteriousVariants() { + return !includeDiseasesWithNoDeleteriousVariants(); + } + + /** + * Include a disease if no known or predicted deleterious variants are found in the gene associated + * with the disease. The option is used only if the variants are available for the investigated individual. + * + * @return true if the candidate disease should be disregarded. + */ + boolean includeDiseasesWithNoDeleteriousVariants(); /** * Variant with pathogenicity value greater or equal to this threshold is considered deleterious. @@ -150,7 +163,7 @@ class Builder { private boolean useStrictPenalties = false; private boolean useGlobal = false; private PretestDiseaseProbability pretestDiseaseProbability = null; - private boolean disregardDiseaseWithNoDeleteriousVariants = true; + private boolean includeDiseasesWithNoDeleteriousVariants = false; private Builder() { } @@ -226,9 +239,18 @@ public Builder pretestProbability(PretestDiseaseProbability pretestDiseaseProbab return this; } - + /** + * @deprecated use {@link #includeDiseasesWithNoDeleteriousVariants} instead. Note, that you'll have + * to negate the value to obtain the same result + */ + @Deprecated(forRemoval = true) public Builder disregardDiseaseWithNoDeleteriousVariants(boolean disregardDiseaseWithNoDeleteriousVariants) { - this.disregardDiseaseWithNoDeleteriousVariants = disregardDiseaseWithNoDeleteriousVariants; + this.includeDiseasesWithNoDeleteriousVariants = !disregardDiseaseWithNoDeleteriousVariants; + return this; + } + + public Builder includeDiseasesWithNoDeleteriousVariants(boolean includeDiseasesWithNoDeleteriousVariants) { + this.includeDiseasesWithNoDeleteriousVariants = includeDiseasesWithNoDeleteriousVariants; return this; } @@ -241,7 +263,7 @@ public AnalysisOptions build() { useStrictPenalties, useGlobal, pretestDiseaseProbability, - disregardDiseaseWithNoDeleteriousVariants); + includeDiseasesWithNoDeleteriousVariants); } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java index 24f44e308..f5ae32c3d 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java @@ -16,6 +16,6 @@ record AnalysisOptionsDefault( boolean useStrictPenalties, boolean useGlobal, PretestDiseaseProbability pretestDiseaseProbability, - boolean disregardDiseaseWithNoDeleteriousVariants + boolean includeDiseasesWithNoDeleteriousVariants ) implements AnalysisOptions { } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java index 4233dc038..39e8483e7 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java @@ -1,5 +1,6 @@ package org.monarchinitiative.lirical.core.analysis; +import com.fasterxml.jackson.annotation.JsonIgnore; import org.monarchinitiative.phenol.ontology.data.TermId; import java.util.Comparator; @@ -26,18 +27,23 @@ static AnalysisResults of(List results) { /** * @return test result count */ + @JsonIgnore int size(); + @JsonIgnore default boolean isEmpty() { return size() == 0; } + @JsonIgnore Optional resultByDiseaseId(TermId diseaseId); + @JsonIgnore default Stream results() { return StreamSupport.stream(spliterator(), false); } + @JsonIgnore default Stream resultsWithDescendingPostTestProbability() { return results().sorted(Comparator.comparingDouble(TestResult::posttestProbability).reversed()); } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java index acba94f4a..c0a8762f4 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java @@ -2,12 +2,14 @@ import org.monarchinitiative.lirical.core.exception.LiricalAnalysisException; +import java.io.Closeable; + /** * The analysis runner runs LIRICAL analysis on provided analysis subject ({@link AnalysisData}). The analysis * is parametrized by {@link AnalysisOptions}. The runner throws {@link LiricalAnalysisException} if the analysis * cannot be run as dictated by the options. */ -public interface LiricalAnalysisRunner { +public interface LiricalAnalysisRunner extends Closeable { /** * Run analysis parametrized by {@code analysisOptions} on {@code analysisData}. diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java index 854832ce0..e88a91a70 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java @@ -5,6 +5,7 @@ /** * An exception thrown when user-provided input is invalid. */ +// TODO - move to CLI after removing AnalysisDataParser. public class LiricalParseException extends LiricalException { public LiricalParseException() { diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java index 60136bd93..1aa0dc069 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java @@ -1,6 +1,8 @@ package org.monarchinitiative.lirical.core.analysis; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnore; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.lirical.core.likelihoodratio.LrWithExplanation; import org.monarchinitiative.phenol.ontology.data.TermId; @@ -94,6 +96,7 @@ private static double calculateCompositeLR(List observed, Lis return observedLr * excludedLr * genotypeLrForCalculationOfCompositeLr; } + @JsonGetter(value = "observedPhenotypicFeatures") public List observedResults() { return observedResults; } @@ -102,6 +105,7 @@ public List observedTerms() { return observedResults.stream().map(LrWithExplanation::queryTerm).toList(); } + @JsonGetter(value = "excludedPhenotypicFeatures") public List excludedResults() { return excludedResults; } @@ -113,6 +117,7 @@ public List excludedTerms() { /** * @return the composite likelihood ratio (product of the LRs of the individual tests). */ + @JsonGetter public double getCompositeLR() { return compositeLR; } @@ -120,6 +125,7 @@ public double getCompositeLR() { /** * @return the total count of tests performed (excluding genotype). */ + @JsonIgnore public int getNumberOfTests() { return observedResults.size() + excludedResults.size(); } @@ -138,7 +144,7 @@ public double posttestOdds() { return pretestOdds() * getCompositeLR(); } - + @JsonGetter public double pretestProbability() { return pretestProbability; } @@ -148,6 +154,7 @@ private double calculatePosttestProbability() { return po / (1 + po); } + @JsonGetter public double posttestProbability() { return posttestProbability; } @@ -189,6 +196,7 @@ public double getExcludedPhenotypeRatio(int i) { /** * @return name of the disease being tested. */ + @JsonGetter public TermId diseaseId() { return diseaseId; } @@ -202,10 +210,12 @@ public boolean hasGenotypeLR() { return false; } + @JsonGetter(value = "genotypeLR") public Optional genotypeLr() { return Optional.ofNullable(genotypeLr); } + @JsonIgnore @Deprecated(forRemoval = true) // get explanations from results // REMOVE(v2.0.0) public List getObservedPhenotypeExplanation() { @@ -215,6 +225,7 @@ public List getObservedPhenotypeExplanation() { .toList(); } + @JsonIgnore @Deprecated(forRemoval = true) // get explanations from excludedResults // REMOVE(v2.0.0) public List getExcludedPhenotypeExplanation() { @@ -229,6 +240,7 @@ public List getExcludedPhenotypeExplanation() { * * @return maximum abs(LR) */ + @JsonIgnore public double getMaximumIndividualLR() { double m1 = this.observedResults.stream() .map(LrWithExplanation::lr) diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java index 3178678d5..e8869eb9d 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java @@ -29,18 +29,20 @@ public class LiricalAnalysisRunnerImpl implements LiricalAnalysisRunner { private final ForkJoinPool pool; public static LiricalAnalysisRunnerImpl of(PhenotypeService phenotypeService, - BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) { - return new LiricalAnalysisRunnerImpl(phenotypeService, backgroundVariantFrequencyServiceFactory); + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, + int parallelism) { + return new LiricalAnalysisRunnerImpl(phenotypeService, + backgroundVariantFrequencyServiceFactory, + parallelism); } private LiricalAnalysisRunnerImpl(PhenotypeService phenotypeService, - BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) { + BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory, + int parallelism) { this.phenotypeService = Objects.requireNonNull(phenotypeService); this.phenotypeLrEvaluator = new PhenotypeLikelihoodRatio(phenotypeService.hpo(), phenotypeService.diseases()); this.bgFreqFactory = backgroundVariantFrequencyServiceFactory; - // TODO - set parallelism - int parallelism = Runtime.getRuntime().availableProcessors(); - LOGGER.debug("Creating LIRICAL pool with {} workers.", parallelism); + LOGGER.debug("Creating LIRICAL pool with {} worker(s).", parallelism); this.pool = new ForkJoinPool(parallelism, LiricalWorkerThread::new, null, false); } @@ -118,16 +120,16 @@ private Optional analyzeDisease(GenotypeLikelihoodRatio genotypeLike GenotypeLrWithExplanation candidate = genotypeLikelihoodRatio.evaluateGenotype(analysisData.sampleId(), g2g, disease.modesOfInheritance()); bestGenotypeLr = takeNonNullOrGreaterLr(bestGenotypeLr, candidate); - if (options.disregardDiseaseWithNoDeleteriousVariants()) { + if (!options.includeDiseasesWithNoDeleteriousVariants()) { // has at least one pathogenic clinvar variant or predicted pathogenic variant? if (g2g.pathogenicClinVarCount(analysisData.sampleId()) > 0 - || g2g.pathogenicAlleleCount(analysisData.sampleId(), options.variantDeleteriousnessThreshold()) > 0) { + || g2g.deleteriousAlleleCount(analysisData.sampleId(), options.variantDeleteriousnessThreshold()) > 0) { noPredictedDeleteriousVariantsWereFound = false; } } } - if (options.disregardDiseaseWithNoDeleteriousVariants() && noPredictedDeleteriousVariantsWereFound) + if (!options.includeDiseasesWithNoDeleteriousVariants() && noPredictedDeleteriousVariantsWereFound) return Optional.empty(); /* @@ -182,4 +184,10 @@ private Optional configureGenotypeLikelihoodRatio(Genom }); } + @Override + public void close() { + LOGGER.debug("Shutting down the analysis runner"); + // TODO - use close after updating Java to 19+ + pool.shutdownNow(); + } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/package-info.java new file mode 100644 index 000000000..5f1c4b3b4 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/package-info.java @@ -0,0 +1,4 @@ +/** + * Default LIRICAL analysis implementation. + */ +package org.monarchinitiative.lirical.core.analysis.impl; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java index 8696a3557..36e03cb6c 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java @@ -1,4 +1,11 @@ /** - * Classes for coordinating the main Lirical analysis goals. + * A high-level representation of LIRICAL analysis. + *

+ * The analysis subject is provided as {@link org.monarchinitiative.lirical.core.analysis.AnalysisData}. The analysis + * is parameterized by {@link org.monarchinitiative.lirical.core.analysis.AnalysisOptions}. + * {@link org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner} executes the analysis. The output + * are wrapped into {@link org.monarchinitiative.lirical.core.analysis.AnalysisResults} which reports results + * of matching the subject to computational disease models, + * one {@link org.monarchinitiative.lirical.core.analysis.TestResult} per disease. */ package org.monarchinitiative.lirical.core.analysis; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/probability/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/probability/package-info.java new file mode 100644 index 000000000..2445e98ba --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/probability/package-info.java @@ -0,0 +1,4 @@ +/** + * Model of pretest probability of diseases. + */ +package org.monarchinitiative.lirical.core.analysis.probability; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java index 0d8c4c51f..1bd447c83 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java @@ -3,7 +3,10 @@ /** * An exception thrown by {@link org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner} if the analysis * cannot be run. + * @deprecated will be moved into {@link org.monarchinitiative.lirical.core.analysis} package. */ +// TODO - move to analysis package. +@Deprecated(forRemoval = true) public class LiricalAnalysisException extends LiricalException { public LiricalAnalysisException() { super(); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/package-info.java new file mode 100644 index 000000000..50b907022 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/package-info.java @@ -0,0 +1,4 @@ +/** + * Top-level exceptions. + */ +package org.monarchinitiative.lirical.core.exception; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/package-info.java new file mode 100644 index 000000000..a9c674720 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/package-info.java @@ -0,0 +1,14 @@ +/** + * APIs for reading and annotation of genomic variants. + *

+ * LIRICAL needs to read genomic variants, perform functional annotation, and fetch variant frequencies for the variants. + * LIRICAL does not care about how this is done, as long as the variants meet + * the {@link org.monarchinitiative.lirical.core.model.LiricalVariant} requirements. + *

+ * One way to configure the functional annotation is to implement {@link org.monarchinitiative.lirical.core.io.VariantParserFactory} + * which can provide a {@link org.monarchinitiative.lirical.core.io.VariantParser} to read variants + * from a {@link java.nio.file.Path} given {@link org.monarchinitiative.lirical.core.model.GenomeBuild} + * and {@link org.monarchinitiative.lirical.core.model.TranscriptDatabase}. For instance, to read variants + * from a VCF file. + */ +package org.monarchinitiative.lirical.core.io; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java index 8c7688502..74551bc27 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java @@ -15,7 +15,9 @@ /** - * This class is responsible for calculating the genotype-based likelihood ratio. + * This class is responsible for calculating the likelihood ratio for genotypes, as described + * in the Material and Methods | Likelihood Ratio for Genotypes section + * of the LIRICAL manuscript. * * @author Peter Robinson */ @@ -102,56 +104,57 @@ private double updateMax(double left, Double right) { * for autosomal recessive. * * @param g2g {@link Gene2Genotype} object with list of variants in current gene. Can be null if no variants were found in the gene - * @param inheritancemodes list of modes of inheritance associated with disease being investigated (usually with just one entry). + * @param inheritanceModes list of modes of inheritance associated with disease being investigated (usually with just one entry). * @return likelihood ratio of the genotype given the disease/geniId combination */ - public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype g2g, List inheritancemodes) { + public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype g2g, List inheritanceModes) { // special case 1: No variant found in this gene if (!g2g.hasVariants()) { - return getLRifNoVariantAtAllWasIdentified(inheritancemodes, g2g); + return getLRifNoVariantAtAllWasIdentified(inheritanceModes, g2g); } - // special case 2: Clinvar-pathogenic variant(s) found in this gene. + // special case 2: ClinVar-pathogenic or likely pathogenic (P/LP) variant(s) found in this gene. // The likelihood ratio is defined as 1000**count, where 1 for autosomal dominant and - // 2 for autosomal recessive. (If the count of pathogenic alleles does not match + // 2 for autosomal recessive. If the count of P/LP alleles does not match // the expected count, return 1000. int pathogenicClinVarAlleleCount = g2g.pathogenicClinVarCount(sampleId); if (pathogenicClinVarAlleleCount > 0) { - if (inheritancemodes.contains(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE)) { + if (inheritanceModes.contains(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE)) { if (pathogenicClinVarAlleleCount == 2) { return GenotypeLrWithExplanation.twoPathClinVarAllelesRecessive(g2g.geneId(),Math.pow(1000d, 2)); } + // A case of one ClinVar P/LP allele in an AR disease will fall through.. } else { // for all other MoI, including AD, assume that only one ClinVar allele is pathogenic return GenotypeLrWithExplanation.pathClinVar(g2g.geneId(), Math.pow(1000d, 1d)); } } - int pathogenicAlleleCount = g2g.pathogenicAlleleCount(sampleId, pathogenicityThreshold); - double observedWeightedPathogenicVariantCount = g2g.getSumOfPathBinScores(sampleId, pathogenicityThreshold); - if (pathogenicAlleleCount == 0 || observedWeightedPathogenicVariantCount < EPSILON) { - // no identified variant or the pathogenicity score of identified variant is close to zero + int deleteriousAlleleCount = g2g.deleteriousAlleleCount(sampleId, pathogenicityThreshold); + double observedWeightedDeleteriousVariantCount = g2g.getSumOfPathBinScores(sampleId, pathogenicityThreshold); + if (deleteriousAlleleCount == 0 || observedWeightedDeleteriousVariantCount < EPSILON) { + // no identified deleterious variant or the deleteriousness score of identified variant is close to zero // essentially same as no identified variant, this should happen rarely if ever. - return getLRifNoVariantAtAllWasIdentified(inheritancemodes, g2g); + return getLRifNoVariantAtAllWasIdentified(inheritanceModes, g2g); } // if we get here then // 1. g2g was not null // 2. There was at least one observed variant - // 3. There was no pathogenic variant listed in ClinVar. + // 3. There was no P/LP variant listed in ClinVar or at most one variant but the disease is AR. // Therefore, we apply the main algorithm for calculating the LR genotype score. double lambda_background = backgroundVariantFrequencyService.frequencyForGene(g2g.geneId().id()) .orElse(backgroundVariantFrequencyService.defaultVariantBackgroundFrequency()); - if (inheritancemodes == null || inheritancemodes.isEmpty()) { + if (inheritanceModes == null || inheritanceModes.isEmpty()) { // This is probably because the HPO annotation file is incomplete logger.warn("No inheritance mode annotation found for geneId {}, reverting to default", g2g.geneId().id().getValue()); // Add a default dominant mode to avoid not ranking this gene at all - inheritancemodes = List.of(HpoModeOfInheritanceTermIds.AUTOSOMAL_DOMINANT); + inheritanceModes = List.of(HpoModeOfInheritanceTermIds.AUTOSOMAL_DOMINANT); } // The following is a heuristic to avoid giving genes with a high background count // a better score for pathogenic than background -- the best explanation for // a gene with high background is that a variant is background (unless variant is ClinVar-path, see above). if (lambda_background > 1.0) { - lambda_background = Math.min(lambda_background, pathogenicAlleleCount); + lambda_background = Math.min(lambda_background, deleteriousAlleleCount); } // Use the following four vars to keep track of which option was the max. Double max = null; @@ -163,7 +166,7 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype //last if/else double B = 1.0; // background double D = 1.0; // disease - for (TermId inheritanceId : inheritancemodes) { + for (TermId inheritanceId : inheritanceModes) { double lambda_disease; PoissonDistribution pdDisease; if (inheritanceId.equals(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE) || inheritanceId.equals(HpoModeOfInheritanceTermIds.X_LINKED_RECESSIVE)) { @@ -178,16 +181,16 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype // will take the observed path weighted count to not be more than lambda_disease. // this will have the effect of not downweighting these genes // the user will have to judge whether one of the variants is truly pathogenic. - if (strict && pathogenicAlleleCount > (lambda_disease + EPSILON)) { - double HEURISTIC = HEURISTIC_PATH_ALLELE_COUNT_ABOVE_LAMBDA_D * (pathogenicAlleleCount - lambda_disease); + if (strict && deleteriousAlleleCount > (lambda_disease + EPSILON)) { + double HEURISTIC = HEURISTIC_PATH_ALLELE_COUNT_ABOVE_LAMBDA_D * (deleteriousAlleleCount - lambda_disease); max = updateMax(HEURISTIC, max); maxInheritanceMode = inheritanceId; heuristicPathCountAboveLambda = true; } else { // the following is the general case, where either the variant count // matches or we are not using the strict option. - D = pdDisease.probability(observedWeightedPathogenicVariantCount); + D = pdDisease.probability(observedWeightedDeleteriousVariantCount); PoissonDistribution pdBackground = new PoissonDistribution(lambda_background); - B = pdBackground.probability(observedWeightedPathogenicVariantCount); + B = pdBackground.probability(observedWeightedDeleteriousVariantCount); if (B > 0 && D > 0) { double ratio = D / B; if (max != null && ratio > max) { @@ -211,7 +214,7 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype returnvalue, maxInheritanceMode, lambda_background, - observedWeightedPathogenicVariantCount); + observedWeightedDeleteriousVariantCount); } else { return GenotypeLrWithExplanation.explanation(g2g.geneId(), returnvalue, @@ -219,7 +222,7 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype lambda_background, B, D, - observedWeightedPathogenicVariantCount); + observedWeightedDeleteriousVariantCount); } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrMatchType.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrMatchType.java new file mode 100644 index 000000000..2cb60c4b7 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrMatchType.java @@ -0,0 +1,82 @@ +package org.monarchinitiative.lirical.core.likelihoodratio; + +/** + * The enum for representing the type of the genotype likelihood ratio analysis performed for a gene. + * + * @see GenotypeLrWithExplanation + */ +public enum GenotypeLrMatchType { + + /** + * No variants were detected in a gene associated with a disease with autosomal dominant inheritance. + */ + NO_VARIANTS_DETECTED_AD, + + /** + * No variants were detected in a gene associated with a disease with autosomal recessive inheritance. + */ + NO_VARIANTS_DETECTED_AR, + + /** + * One ClinVar pathogenic or likely pathogenic allele discovered in a disease + * with autosomal dominant inheritance. + */ + ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD, + + /** + * Two ClinVar pathogenic or likely pathogenic alleles discovered in a disease + * with autosomal recessive inheritance. + */ + TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR, + + /** + * One deleterious allele detected with autosomal recessive disease. + */ + ONE_DELETERIOUS_VARIANT_IN_AR, + + /** + * Heuristic for the case where we have more called pathogenic variants than we should have + * in a gene without a high background count -- we will model this as technical error and + * will take the observed path weighted count to not be more than λdisease. + * this will have the effect of not down-weighting these genes + * the user will have to judge whether one of the variants is truly pathogenic. + */ + HIGH_NUMBER_OF_OBSERVED_PREDICTED_PATHOGENIC_VARIANTS, + + /** + * Gene scored using LIRICAL genotype LR model. + *

+ * For more details, consult the Material and Methods | Likelihood Ratio for Genotypes section + * of the LIRICAL manuscript. + */ + LIRICAL_GT_MODEL, + + /** + * DO NOT USE. + * + * @deprecated the method has been deprecated and will be removed in v3.0.0. + * Use {@link #ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD} instead. + */ + @Deprecated(forRemoval = true, since = "v2.0.0") + // REMOVE(v3.0.0) + ONE_DELETERIOUS_CLINVAR_VARIANT_IN_AD, + + /** + * DO NOT USE. + * + * @deprecated the method has been deprecated and will be removed in v3.0.0. + * Use {@link #TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR} instead. + */ + @Deprecated(forRemoval = true, since = "v2.0.0") + // REMOVE(v3.0.0) + TWO_DELETERIOUS_CLINVAR_VARIANTS_IN_AR, + + /** + * DO NOT USE. A placeholder value used in the deprecated methods for backward compatibility. + * + * @deprecated the field will be removed in v3.0.0. + */ + @Deprecated(forRemoval = true) + // REMOVE(v3.0.0) + UNKNOWN +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java index f8abcc07a..2ecdcfaa9 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java @@ -1,36 +1,41 @@ package org.monarchinitiative.lirical.core.likelihoodratio; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnore; import org.monarchinitiative.phenol.annotations.constants.hpo.HpoModeOfInheritanceTermIds; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.ontology.data.TermId; import java.util.Objects; - +/** + * Results of genotype likelihood ratio evaluation for a single gene. + */ public class GenotypeLrWithExplanation { private final GeneIdentifier geneId; - /** The likelihood ratio of the genotype. */ + private final GenotypeLrMatchType matchType; + /** The untransformed likelihood ratio of the genotype. */ private final double lr; private final String explanation; static GenotypeLrWithExplanation noVariantsDetectedAutosomalRecessive(GeneIdentifier geneId, double ratio) { final String expl = String.format("log10(LR)=%.3f. No variants detected with autosomal recessive disease.", Math.log10(ratio)); - return new GenotypeLrWithExplanation(geneId, ratio, expl); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.NO_VARIANTS_DETECTED_AR, ratio, expl); } static GenotypeLrWithExplanation noVariantsDetectedAutosomalDominant(GeneIdentifier geneId, double ratio) { final String expl = String.format("log10(LR)=%.3f. No variants detected.", Math.log10(ratio)); - return new GenotypeLrWithExplanation(geneId, ratio, expl); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.NO_VARIANTS_DETECTED_AD, ratio, expl); } static GenotypeLrWithExplanation twoPathClinVarAllelesRecessive(GeneIdentifier geneId, double ratio) { final String expl = String.format("log10(LR)=%.3f. Two pathogenic ClinVar variants detected with autosomal recessive disease.", Math.log10(ratio)); - return new GenotypeLrWithExplanation(geneId, ratio, expl); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR, ratio, expl); } static GenotypeLrWithExplanation pathClinVar(GeneIdentifier geneId, double ratio) { final String expl = String.format("log10(LR)=%.3f. Pathogenic ClinVar variant detected.", Math.log10(ratio)); - return new GenotypeLrWithExplanation(geneId, ratio, expl); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD, ratio, expl); } static GenotypeLrWithExplanation explainOneAlleleRecessive(GeneIdentifier geneId, double ratio, double observedWeightedPathogenicVariantCount, double lambda_background) { @@ -38,7 +43,7 @@ static GenotypeLrWithExplanation explainOneAlleleRecessive(GeneIdentifier geneId String expl = String.format("log10(LR)=%.3f. One pathogenic allele detected with autosomal recessive disease. " + "Observed weighted pathogenic variant count: %.2f. λdisease=%d. λbackground=%.4f.", Math.log10(ratio), observedWeightedPathogenicVariantCount, lambda_disease, lambda_background); - return new GenotypeLrWithExplanation(geneId, ratio, expl); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.ONE_DELETERIOUS_VARIANT_IN_AR, ratio, expl); } @@ -50,18 +55,24 @@ static GenotypeLrWithExplanation explainPathCountAboveLambdaB(GeneIdentifier gen String expl = String.format("log10(LR)=%.3f. %s. Heuristic for high number of observed predicted pathogenic variants. " + "Observed weighted pathogenic variant count: %.2f. λdisease=%d. λbackground=%.4f.", Math.log10(ratio), getMoIString(MoI), observedWeightedPathogenicVariantCount, lambda_disease, lambda_background); - return new GenotypeLrWithExplanation(geneId, ratio, expl); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.HIGH_NUMBER_OF_OBSERVED_PREDICTED_PATHOGENIC_VARIANTS, ratio, expl); } - static GenotypeLrWithExplanation explanation(GeneIdentifier geneId, double ratio, TermId modeOfInh, double lambda_b, double D, double B, double observedWeightedPathogenicVariantCount) { + static GenotypeLrWithExplanation explanation(GeneIdentifier geneId, + double ratio, + TermId modeOfInh, + double lambda_b, + double D, + double B, + double observedWeightedDeleteriousVariantCount) { int lambda_disease = 1; if (modeOfInh.equals(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE) || modeOfInh.equals(HpoModeOfInheritanceTermIds.X_LINKED_RECESSIVE)) { lambda_disease = 2; } String msg = String.format("P(G|D)=%.4f. P(G|¬D)=%.4f", D, B); - msg = String.format("log10(LR)=%.3f %s. %s. Observed weighted pathogenic variant count: %.2f. λdisease=%d. λbackground=%.4f.", - Math.log10(ratio), msg, getMoIString(modeOfInh), observedWeightedPathogenicVariantCount, lambda_disease, lambda_b); - return new GenotypeLrWithExplanation(geneId, ratio, msg); + msg = String.format("log10(LR)=%.3f %s. %s. Observed weighted deleterious variant count: %.2f. λdisease=%d. λbackground=%.4f.", + Math.log10(ratio), msg, getMoIString(modeOfInh), observedWeightedDeleteriousVariantCount, lambda_disease, lambda_b); + return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.LIRICAL_GT_MODEL, ratio, msg); } private static String getMoIString(TermId MoI) { @@ -77,25 +88,66 @@ private static String getMoIString(TermId MoI) { return " Mode of inheritance: not available"; // should never happen } + /** + * @deprecated the method has been deprecated and will be removed in v3.0.0. + * Use {@link #of(GeneIdentifier, GenotypeLrMatchType, double, String)} instead. + */ + @Deprecated(forRemoval = true, since = "v2.0.0-RC3") public static GenotypeLrWithExplanation of(GeneIdentifier geneId, double lr, String explanation) { - return new GenotypeLrWithExplanation(geneId, lr, explanation); + return of(geneId, GenotypeLrMatchType.UNKNOWN, lr, explanation); + } + + public static GenotypeLrWithExplanation of(GeneIdentifier geneId, GenotypeLrMatchType matchType, double lr, String explanation) { + return new GenotypeLrWithExplanation(geneId, matchType, lr, explanation); } - private GenotypeLrWithExplanation(GeneIdentifier geneId, double lr, String explanation) { + private GenotypeLrWithExplanation(GeneIdentifier geneId, GenotypeLrMatchType matchType, double lr, String explanation) { this.geneId = Objects.requireNonNull(geneId); + this.matchType = Objects.requireNonNull(matchType); this.lr = lr; this.explanation = Objects.requireNonNull(explanation, "Explanation must not be null"); } - + /** + * Get the gene identifier for this genotype LR. + */ + @JsonGetter public GeneIdentifier geneId() { return geneId; } + /** + * Get the genotype LR match type. + */ + @JsonGetter + public GenotypeLrMatchType matchType() { + return matchType; + } + + /** + * Get the genotype likelihood ratio for the gene. Use {@link #log10Lr()} to get the log LR. + * + * @return the genotype likelihood ratio + */ + @JsonGetter public double lr() { return lr; } + /** + * Get the log10 LR for the gene. Use {@link #lr()} to get the non-transformed value. + * + * @return the log10 of the genotype LR + */ + @JsonIgnore + public double log10Lr() { + return Math.log10(lr); + } + + /** + * @return an explanation of the genotype likelihood ratio + */ + @JsonGetter public String explanation() { return explanation; } @@ -105,18 +157,20 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; GenotypeLrWithExplanation that = (GenotypeLrWithExplanation) o; - return Double.compare(that.lr, lr) == 0 && Objects.equals(explanation, that.explanation); + return Double.compare(that.lr, lr) == 0 && Objects.equals(geneId, that.geneId) && matchType == that.matchType && Objects.equals(explanation, that.explanation); } @Override public int hashCode() { - return Objects.hash(lr, explanation); + return Objects.hash(geneId, matchType, lr, explanation); } @Override public String toString() { return "GenotypeLrWithExplanation{" + - "LR=" + lr + + "geneId=" + geneId + + ", matchType=" + matchType + + ", lr=" + lr + ", explanation='" + explanation + '\'' + '}'; } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java index 8274fe135..a889dfc0e 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java @@ -4,8 +4,7 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation; import org.monarchinitiative.phenol.annotations.constants.hpo.HpoSubOntologyRootTermIds; -import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.TermId; import java.util.*; @@ -42,8 +41,8 @@ public class InducedDiseaseGraph { private record CandidateMatch(TermId termId, int distance) { } - public static InducedDiseaseGraph create(HpoDisease disease, Ontology ontology) { - Map termFrequencies = new HashMap<>(disease.annotationCount()); + public static InducedDiseaseGraph create(HpoDisease disease, MinimalOntology ontology) { + Map termFrequencies = new HashMap<>(disease.annotations().size()); for (HpoDiseaseAnnotation annotation : disease.annotations()) { double frequency = annotation.frequency(); @@ -52,8 +51,7 @@ public static InducedDiseaseGraph create(HpoDisease disease, Ontology ontology) stack.push(cmatch); while (!stack.empty()) { CandidateMatch cm = stack.pop(); - Set parents = OntologyAlgorithm.getParentTerms(ontology, cm.termId, false); - for (TermId parentTermId : parents) { + for (TermId parentTermId : ontology.graph().getParents(cm.termId, false)) { if (parentTermId.equals(HpoSubOntologyRootTermIds.PHENOTYPIC_ABNORMALITY)) { continue; } @@ -68,10 +66,12 @@ public static InducedDiseaseGraph create(HpoDisease disease, Ontology ontology) } } } - Set absentPhenotypeTerms = disease.absentAnnotationsStream() + + Set negativeInducedGraph = disease.absentAnnotationsStream() .map(HpoDiseaseAnnotation::id) - .collect(Collectors.toUnmodifiableSet()); - Set negativeInducedGraph = OntologyAlgorithm.getAncestorTerms(ontology, absentPhenotypeTerms, true); + .distinct() + .flatMap(absent -> ontology.graph().getAncestorsStream(absent, true)) + .collect(Collectors.toSet()); return new InducedDiseaseGraph(disease, termFrequencies, negativeInducedGraph); } @@ -117,7 +117,7 @@ public HpoDisease getDisease() { * @param ontology HPO * @return The best hit */ - Term2Freq getClosestAncestor(TermId tid, Ontology ontology) { + Term2Freq getClosestAncestor(TermId tid, MinimalOntology ontology) { Queue queue = new LinkedList<>(); queue.add(tid); @@ -126,8 +126,8 @@ Term2Freq getClosestAncestor(TermId tid, Ontology ontology) { if (term2frequencyMap.containsKey(t)) { return new Term2Freq(t, term2frequencyMap.get(t)); } else { - Set parents = OntologyAlgorithm.getParentTerms(ontology, t, false); - queue.addAll(parents); + ontology.graph().getParents(t, false) + .forEach(queue::add); } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java index a6837c60f..3fdd2c313 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java @@ -1,7 +1,10 @@ package org.monarchinitiative.lirical.core.likelihoodratio; -import org.apache.commons.lang.StringUtils; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnore; +import org.apache.commons.lang3.StringUtils; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; /** @@ -36,22 +39,27 @@ private LrWithExplanation(TermId q, TermId m, LrMatchType mt, double lr, String this.explanation = explanation; } + @JsonGetter(value = "query") public TermId queryTerm() { return queryTerm; } + @JsonGetter(value = "match") public TermId matchingTerm() { return matchingTerm; } + @JsonGetter public LrMatchType matchType() { return matchType; } + @JsonGetter public double lr() { return lr; } + @JsonGetter public String explanation() { return explanation; } @@ -59,6 +67,7 @@ public String explanation() { /** * @return explanation text suitable for including in HTML documents */ + @JsonIgnore public String escapedExplanation() { return StringUtils.replaceEach(explanation, EXPLANATION_SEARCH_LIST, EXPLANATION_REPLACEMENT_LIST); } @@ -68,9 +77,9 @@ public String escapedExplanation() { */ // REMOVE(v2.0.0) @Deprecated(forRemoval = true) - public String getExplanation(Ontology ontology) { - String qtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.queryTerm).getName(), queryTerm.getValue()); - String mtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.matchingTerm).getName(), matchingTerm.getValue()); + public String getExplanation(MinimalOntology ontology) { + String qtermlabel = String.format("%s[%s]", ontology.termForTermId(queryTerm).map(Term::getName).orElse("UNKNOWN"), queryTerm.getValue()); + String mtermlabel = String.format("%s[%s]", ontology.termForTermId(matchingTerm).map(Term::getName).orElse("UNKNOWN"), matchingTerm.getValue()); double log10LR = Math.log10(lr); switch (this.matchType) { case EXACT_MATCH: @@ -105,9 +114,9 @@ public String getExplanation(Ontology ontology) { */ // REMOVE(v2.0.0) @Deprecated(forRemoval = true) - String getEscapedExplanation(Ontology ontology) { - String qtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.queryTerm).getName(), queryTerm.getValue()); - String mtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.matchingTerm).getName(), matchingTerm.getValue()); + String getEscapedExplanation(MinimalOntology ontology) { + String qtermlabel = String.format("%s[%s]", ontology.termForTermId(queryTerm).map(Term::getName).orElse("UNKNOWN"), queryTerm.getValue()); + String mtermlabel = String.format("%s[%s]", ontology.termForTermId(matchingTerm).map(Term::getName).orElse("UNKNOWN"), matchingTerm.getValue()); double log10LR = Math.log10(lr); switch (this.matchType) { case EXACT_MATCH: diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java index 9e0b2f353..c6f783787 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java @@ -1,13 +1,14 @@ package org.monarchinitiative.lirical.core.likelihoodratio; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; public class LrWithExplanationFactory { - private final Ontology ontology; + private final MinimalOntology ontology; - public LrWithExplanationFactory(Ontology ontology) { + public LrWithExplanationFactory(MinimalOntology ontology) { this.ontology = ontology; } @@ -20,8 +21,8 @@ public LrWithExplanation create(TermId queryTerm, TermId matchingTerm, LrMatchTy } private String getExplanation(TermId queryTerm, TermId matchingTerm, LrMatchType matchType, double lr) { - String queryTermLabel = String.format("%s[%s]", ontology.getTermMap().get(queryTerm).getName(), queryTerm.getValue()); - String matchTermLabel = String.format("%s[%s]", ontology.getTermMap().get(matchingTerm).getName(), matchingTerm.getValue()); + String queryTermLabel = String.format("%s[%s]", ontology.termForTermId(queryTerm).map(Term::getName).orElse("UNKNOWN"), queryTerm.getValue()); + String matchTermLabel = String.format("%s[%s]", ontology.termForTermId(matchingTerm).map(Term::getName).orElse("UNKNOWN"), matchingTerm.getValue()); double log10LR = Math.log10(lr); return switch (matchType) { case EXACT_MATCH -> String.format("E:%s[%.3f]", queryTermLabel, log10LR); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java index 10e407f64..acb5495b7 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java @@ -6,13 +6,14 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; +import java.util.stream.Collectors; /** * This class is designed to calculate the background and foreground frequencies of any HPO term in any disease @@ -38,7 +39,7 @@ public class PhenotypeLikelihoodRatio { /** The default frequency of a term in a disease if the explicit frequency is not available. */ public static final float DEFAULT_TERM_FREQUENCY = 1.f; // TODO - is this the right thing to do? /** The HPO ontology with all of its subontologies. */ - private final Ontology ontology; + private final MinimalOntology ontology; /** This map has one entry for each disease in our database. Key--the disease ID, e.g., OMIM:600200.*/ private final Map diseaseMap; private final LrWithExplanationFactory explanationFactory; @@ -60,7 +61,7 @@ public class PhenotypeLikelihoodRatio { * @param ontology The HPO ontology object * @param diseases List of all diseases for this simulation */ - public PhenotypeLikelihoodRatio(Ontology ontology, HpoDiseases diseases) { + public PhenotypeLikelihoodRatio(MinimalOntology ontology, HpoDiseases diseases) { this.ontology = ontology; this.diseaseMap = diseases.diseaseById(); this.explanationFactory = new LrWithExplanationFactory(ontology); // TODO - DI? @@ -77,7 +78,9 @@ public PhenotypeLikelihoodRatio(Ontology ontology, HpoDiseases diseases) { */ public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph idg) { HpoDisease disease = idg.getDisease(); - Set queryAncestors = OntologyAlgorithm.getAncestorTerms(ontology,queryTid,true); + Set queryAncestors = ontology.graph() + .getAncestorsStream(queryTid, true) + .collect(Collectors.toSet()); if (disease.absentAnnotationsStream().anyMatch(a -> queryAncestors.contains(a.id()))) { // i.e., the query term is explicitly EXCLUDED in the disease definition return explanationFactory.create(queryTid, @@ -105,7 +108,7 @@ public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph for (HpoDiseaseAnnotation annotation : disease.annotations()) { double frequency = annotation.frequency(); // is query an ancestor of a term that annotates the disease? - if (OntologyAlgorithm.isSubclass(ontology,annotation.id(),queryTid)) { + if (ontology.graph().isAncestorOf(queryTid, annotation.id())) { maximumFrequencyOfDescendantTerm=Math.max(maximumFrequencyOfDescendantTerm,frequency); diseaseMatchingTerm=annotation.id(); isAncestor=true; @@ -134,7 +137,7 @@ public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph TermId bestMatchTermId = null; double denominatorForNonRootCommandAnc = getBackgroundFrequency(queryTid); for (HpoDiseaseAnnotation annotation : disease.annotations()) { - if (OntologyAlgorithm.isSubclass(ontology, queryTid, annotation.id())){ + if (ontology.graph().isAncestorOf(annotation.id(), queryTid)){ double proportionalFrequency = getProportionInChildren(queryTid,annotation.id()); double queryFrequency = annotation.frequency(); double f = proportionalFrequency*queryFrequency; @@ -218,10 +221,12 @@ public LrWithExplanation lrForExcludedTerm(TermId queryTid, InducedDiseaseGraph * @param ontology Reference to the HPO ontology * @return frequency of the term in the disease (including annotation propagation) */ - private static double getFrequencyOfTermInDiseaseWithAnnotationPropagation(TermId query, HpoDisease disease, Ontology ontology) { + private static double getFrequencyOfTermInDiseaseWithAnnotationPropagation(TermId query, HpoDisease disease, MinimalOntology ontology) { double maxFrequency = 0.0; for (HpoDiseaseAnnotation annotation : disease.annotations()) { - Set ancestors = ontology.getAncestorTermIds(annotation.id(),true); + Set ancestors = ontology.graph() + .getAncestorsStream(annotation.id(), true) + .collect(Collectors.toSet()); if (ancestors.contains(query)) maxFrequency = Math.max(maxFrequency, disease.getFrequencyOfTermInDisease(annotation.id()).map(Ratio::frequency).orElse(DEFAULT_TERM_FREQUENCY)); } @@ -263,14 +268,15 @@ private double getProportionInChildren(TermId queryTid, TermId diseaseTid) { if (queryTid.getId().equals(diseaseTid.getId())) { return 1.0; } - Set directChildren= OntologyAlgorithm.getChildTerms(ontology,diseaseTid,false); - if (directChildren.isEmpty()) { + List children = ontology.graph() + .getChildrenStream(diseaseTid, false) + .toList(); + if (children.isEmpty()) return 0.0; - } - for (TermId tid : directChildren) { + for (TermId tid : children) { if (queryTid.equals(tid)) { - return 1.0/(double)directChildren.size(); + return 1.0/(double) children.size(); } } // if we get here, there was no match @@ -301,7 +307,7 @@ private double getProportionInChildren(TermId queryTid, TermId diseaseTid) { * HPO terms in the ontology. */ private void initializeFrequencyMap() { Map mp = new HashMap<>(); - for (TermId tid : ontology.getNonObsoleteTermIds()) { + for (TermId tid : ontology.nonObsoleteTermIds()) { mp.put(tid, 0.0D); } Map mapbuilder = new HashMap<>(); @@ -313,16 +319,18 @@ private void initializeFrequencyMap() { for (HpoDiseaseAnnotation annotation : dis.annotations()) { TermId tid = annotation.id(); double termFrequency = annotation.frequency(); - TermId primaryTermId = ontology.getPrimaryTermId(tid); - if (primaryTermId == null) { + Optional term = ontology.termForTermId(tid); + if (term.isEmpty()) { logger.warn("Primary term ID for {} was not found!", tid.getValue()); continue; } + // All of the ancestor terms are implicitly annotated to tid // therefore, add this to their background frequencies. // Note we also include the original term here (third arg: true) - Set ancs = OntologyAlgorithm.getAncestorTerms(ontology,primaryTermId,true); - for (TermId at : ancs) { + // Regarding the unchecked `get()` below, we check that `term` is not empty above. + //noinspection OptionalGetWithoutIsPresent + for (TermId at : ontology.graph().getAncestors(term.map(Term::id).get(), true)) { updateMap.putIfAbsent(at,termFrequency); // put the maximum frequency for this term given it is // an ancestor of one or more of the HPO terms that annotate diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java index 63abeb561..f27fabc45 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java @@ -1,4 +1,6 @@ -/** Classes related to the calculation of likelihood ratios for phenotypic or genotypic test results. +/** + * Package with logic for calculation of likelihood ratios for phenotypic or genotypic test results. + * * @author Peter Robinson */ package org.monarchinitiative.lirical.core.likelihoodratio; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java index f5d68c946..3ab068313 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java @@ -1,105 +1,130 @@ package org.monarchinitiative.lirical.core.model; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + import java.time.Period; import java.util.Objects; /** - * Convenience class to represent the age of a proband. Note that if (@link #initialized} is false, - * then we are representing the fact that we do not know the age we will disregard the feature - * in our calculations. We will represent prenatal age as number of completed gestational weeks and days, - * and {@link #isGestational()} flag will be set. + * Convenience class to represent the age of a subject. + *

+ * We represent both postnatal and gestational age. Use {@link #isGestational()} + * or {@link #isPostnatal()} to tell them apart. + *

+ * The postnatal age has {@link #getYears()}, {@link #getMonths()}, and {@link #getDays()} fields set + * and {@link #getWeeks()} should be ignored. + *

+ * The gestational age uses {@link #getWeeks()} and {@link #getDays()} fields. + * * @author Peter Robinson */ +@JsonSerialize(using = AgeSerializer.class) public class Age { - private final boolean isUnknown; private final boolean isGestational; private final int years; private final int months; private final int weeks; private final int days; - /** Used as a constant if we do not have information about the age of a proband. */ - private final static Age NOT_KNOWN = new Age(); - - private Age(int years, int months, int weeks, int days) { - this.years=years; - this.months=months; - this.weeks=weeks; - this.days=days; - this.isUnknown = false; - this.isGestational = weeks != 0; - } - - private Age() { - this.years=0; - this.months=0; - this.weeks=0; - this.days=0; - this.isUnknown = true; - this.isGestational = false; - } - public static Age ageNotKnown() { - return NOT_KNOWN; + private Age(int years, int months, int weeks, int days, boolean isGestational) { + this.years=requireNonNegativeInt(years, "Years must not be negative"); + this.months=requireNonNegativeInt(months, "Months must not be negative"); + this.weeks=requireNonNegativeInt(weeks, "Weeks must not be negative"); + this.days=requireNonNegativeInt(days, "Days must not be negative"); + this.isGestational = isGestational; } + @JsonIgnore public int getYears() { return years; } + @JsonIgnore public int getMonths() { return months; } + @JsonIgnore public int getWeeks() { return weeks; } + @JsonIgnore public int getDays() { return days; } - public boolean isUnknown() { - return isUnknown; - } - + @JsonIgnore public boolean isGestational() { return isGestational; } + @JsonIgnore public boolean isPostnatal() { return !isGestational; } + /** + * Create a postnatal age to represent {@code y} years of age. + * + * @param y a non-negative number of years. + */ public static Age ageInYears(int y) { return of(y,0,0); } + /** + * Create a postnatal age to represent {@code m} months of age. + * + * @param m a non-negative number of months. + */ public static Age ageInMonths(int m) { return of(0,m,0); } + /** + * Create a postnatal age to represent {@code d} days of age. + * + * @param d a non-negative number of days. + */ public static Age ageInDays(int d) { return of(0,0,d); } /** * @param period representing postnatal (not gestational) age. - * @return age object */ public static Age parse(Period period) { Period normalized = period.normalized(); return of(normalized.getYears(), normalized.getMonths(), normalized.getDays()); } + /** + * Create a gestational age to represent {@code weeks} and {@code days}. + *

+ * {@code weeks} should generally be not be greater than 42, and it must not be negative. + * {@code days} must be in range {@code [0,6]}. + * + * @param weeks a non-negative number of completed gestational weeks. + * @param days the number of completed gestational days. + */ public static Age gestationalAge(int weeks, int days) { - return new Age(0, 0, weeks, days); + return new Age(0, 0, weeks, days, true); } /** * Create a postnatal age from given inputs. */ public static Age of(int years, int months, int days) { - return new Age(years, months, 0, days); + return new Age(years, months, 0, days, false); + } + + private static int requireNonNegativeInt(int value, String msg) { + if (value < 0) { + throw new IllegalArgumentException(msg); + } else + return value; } @Override @@ -107,8 +132,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Age age = (Age) o; - return isUnknown == age.isUnknown && - years == age.years && + return years == age.years && months == age.months && weeks == age.weeks && days == age.days; @@ -116,14 +140,13 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(isUnknown, years, months, weeks, days); + return Objects.hash(years, months, weeks, days); } @Override public String toString() { return "Age{" + - "isUnknown=" + isUnknown + - ", years=" + years + + "years=" + years + ", months=" + months + ", weeks=" + weeks + ", days=" + days + diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/AgeSerializer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/AgeSerializer.java new file mode 100644 index 000000000..cddcc47f0 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/AgeSerializer.java @@ -0,0 +1,25 @@ +package org.monarchinitiative.lirical.core.model; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; + +import java.io.IOException; +import java.time.Period; + +class AgeSerializer extends StdSerializer { + + AgeSerializer() { + super(Age.class); + } + + AgeSerializer(Class t) { + super(t); + } + + @Override + public void serialize(Age age, JsonGenerator gen, SerializerProvider provider) throws IOException { + Period p = Period.of(age.getYears(), age.getMonths(), age.getDays()); + gen.writeString(p.normalized().toString()); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinVarAlleleData.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinVarAlleleData.java new file mode 100644 index 000000000..63898430e --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinVarAlleleData.java @@ -0,0 +1,73 @@ +package org.monarchinitiative.lirical.core.model; + +import java.util.Objects; +import java.util.Optional; + +/** + * A subset of ClinVar allele data relevant for LIRICAL analysis. + *

+ * We use the primary interpretation for prioritization and the allele ID for linking out + * (e.g. here for an allele ID 270003) + */ +public class ClinVarAlleleData { + + private final ClinvarClnSig clinvarClnSig; + private final Long alleleId; // we box since the alleleId is nullable. + + public static ClinVarAlleleData of(ClinvarClnSig clinvarClnSig, Long alleleId) { + return new ClinVarAlleleData(clinvarClnSig, alleleId); + } + + private ClinVarAlleleData(ClinvarClnSig clinvarClnSig, Long alleleId) { + this.clinvarClnSig = Objects.requireNonNull(clinvarClnSig); + this.alleleId = alleleId; // nullable + } + + /** + * @return the primary interpretation of the ClinVar data for the variant + */ + public ClinvarClnSig getClinvarClnSig() { + return clinvarClnSig; + } + + /** + * Get ClinVar allele ID. + *

+ * E.g. + * + * + * @return an {@linkplain Optional} ClinVar allele ID {@linkplain Long} or an empty {@linkplain Optional}. + */ + public Optional getAlleleId() { + return Optional.ofNullable(alleleId); + } + + /** + * @return ClinVar allele ID as {@linkplain String} + * @see #getAlleleId() + */ + public Optional getAlleleIdString() { + return alleleId == null ? Optional.empty() : Optional.of(alleleId.toString()); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ClinVarAlleleData that = (ClinVarAlleleData) o; + return clinvarClnSig == that.clinvarClnSig && Objects.equals(alleleId, that.alleleId); + } + + @Override + public int hashCode() { + return Objects.hash(clinvarClnSig, alleleId); + } + + @Override + public String toString() { + return "ClinVarAlleleData{" + + "clinvarClnSig=" + clinvarClnSig + + ", alleleId=" + alleleId + + '}'; + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java index a68906c51..0b11ffb38 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java @@ -31,4 +31,21 @@ public boolean isPathogenicOrLikelyPathogenic() { default -> false; }; } + + /** + * @return {@code true} if the significance is one of {@link #BENIGN}, {@link #LIKELY_BENIGN}, or {@link #BENIGN_OR_LIKELY_BENIGN} + */ + public boolean isBenignOrLikelyBenign() { + return switch (this) { + case BENIGN, LIKELY_BENIGN, BENIGN_OR_LIKELY_BENIGN -> true; + default -> false; + }; + } + + /** + * @return {@code false} if the significance is one of {@link #BENIGN}, {@link #LIKELY_BENIGN}, or {@link #BENIGN_OR_LIKELY_BENIGN} + */ + public boolean notBenignOrLikelyBenign() { + return !isBenignOrLikelyBenign(); + } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java index 2366f0c0d..aa4512def 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java @@ -1,9 +1,21 @@ package org.monarchinitiative.lirical.core.model; -public record FilteringStats(long nGoodQualityVariants, long nFilteredVariants) { +/** + * A summary of variant input and functional annotation. + * + * @param nPassingVariants number of variants that passed the input filtering and were subject to LIRICAL analysis. + * @param nFilteredVariants number of variants that failed the filtering and were not included in the analysis. + * @param genesWithVariants number of genes with one or more passing variant. + */ +public record FilteringStats(long nPassingVariants, + long nFilteredVariants, + long genesWithVariants) { + /** + * @return the total number of variants (good quality + filtered). + */ public long variantCount() { - return nGoodQualityVariants + nFilteredVariants; + return nPassingVariants + nFilteredVariants; } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java index e209e3191..1be04f4be 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java @@ -4,14 +4,59 @@ import org.monarchinitiative.phenol.ontology.data.Identified; import org.monarchinitiative.phenol.ontology.data.TermId; -import java.util.Collection; -import java.util.Optional; +import java.util.*; import java.util.stream.Stream; +/** + * {@linkplain Gene2Genotype} represents variants that have been annotated to a single gene. The gene data includes + * the identifier of a gene, the variants annotated with respect to the gene, and convenience methods for using + * in the {@code LIRICAL} algorithm. + *

+ * Note, we only need the variants that passed the filtering for the analysis. + */ public interface Gene2Genotype extends Identified { + /** + * Create {@linkplain Gene2Genotype} from a collection of variants that can include the variants + * that failed the initial filtering. + *

+ * The failing variants will not be retained. + * + * @deprecated the method has been deprecated and will be removed in {@code v2.0.0}. + * Use {@link #of(GeneIdentifier, Collection, int)} instead. + * @param id the gene credentials. + * @param variants a collection of variants that passed/failed the initial filtering. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") static Gene2Genotype of(GeneIdentifier id, Collection variants) { - return Gene2GenotypeDefault.of(id, variants); + int filteredOutVariantCount = 0; + List passingVariants = new ArrayList<>(variants.size()); + for (LiricalVariant variant : variants) { + if (variant.passedFilters()) + passingVariants.add(variant); + else + filteredOutVariantCount++; + } + return of(id, passingVariants, filteredOutVariantCount); + } + + /** + * Create {@linkplain Gene2Genotype} from provided data. + * + * @param geneId the gene credentials. + * @param passingVariants a collection of variants that passed the initial filtering. + * @param filteredOutVariantCount the number of variants that failed the initial filtering. + */ + static Gene2Genotype of(GeneIdentifier geneId, + Collection passingVariants, + int filteredOutVariantCount) { + Objects.requireNonNull(geneId, "Gene ID must not be null"); + Objects.requireNonNull(passingVariants, "Variants must not be null"); + if (passingVariants.isEmpty()) { + return new Gene2GenotypeDefault.Gene2GenotypeNoVariants(geneId, filteredOutVariantCount); + } else { + return new Gene2GenotypeDefault.Gene2GenotypeFull(geneId, passingVariants, filteredOutVariantCount); + } } // REMOVE(v2.0.0) @@ -21,6 +66,9 @@ default TermId id() { return geneId().id(); } + /** + * Get the credentials of the gene. + */ GeneIdentifier geneId(); /** @@ -33,25 +81,80 @@ default String symbol() { } /** + * Get a {@linkplain Stream} of variants annotated to this gene. * - * @return list of all variants found in this gene + * @return a stream of variants found in this gene. */ Stream variants(); + /** + * Get the count of variants annotated to this gene that passed the filtering. + */ int variantCount(); + /** + * @return {@code true} if the gene is annotated with 1 or more variants that passed the filtering. + */ default boolean hasVariants() { return variantCount() != 0; } + /** + * Get the count of variants annotated to this gene which failed the filtering. + */ + default int filteredOutVariantCount() { + // This can explode if the number of variants overflows int. + // However, this is super unlikely to happen in practice. + return Math.toIntExact(variants().filter(LiricalVariant::failedFilters).count()); + } + + /** + * Get the number of predicted pathogenic/deleterious alleles in the gene for the {@code sampleId}. + *

+ * Note, only the variant that passed the filtering are considered. + */ default int pathogenicClinVarCount(String sampleId) { - return variants().filter(lv -> lv.clinvarClnSig().isPathogenicOrLikelyPathogenic()) + if (sampleId == null) + return 0; + return variants() + .filter(lv -> lv.clinVarAlleleData() + .map(cv -> cv.getClinvarClnSig().isPathogenicOrLikelyPathogenic()) + .orElse(false)) .mapToInt(var -> var.pathogenicClinVarAlleleCount(sampleId)) .sum(); } + /** + * @deprecated the method was deprecated and will be removed in v3.0.0. + * Use {@link #deleteriousAlleleCount(String, float)} instead. + * @see #deleteriousAlleleCount(String, float) + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") default int pathogenicAlleleCount(String sampleId, float pathogenicityThreshold) { - return variants().filter(var -> var.pathogenicityScore().map(f -> f >= pathogenicityThreshold).orElse(false)) + // REMOVE(v3.0.0) + return deleteriousAlleleCount(sampleId, pathogenicityThreshold); + } + + /** + * Get the count of alleles of predicted pathogenic/deleterious variants in the gene for the {@code sampleId}. + * The variants that are both not labeled as benign or likely benign by ClinVar and have the + * {@link LiricalVariant#pathogenicityScore()} at or above the provided {@code pathogenicityThreshold} + * are deemed to be predicted pathogenic/deleterious. + *

+ * Note, we take specific precautions to not clash with ClinVar variant interpretation and consider ClinVar benign + * or likely benign variants as deleterious. + */ + default int deleteriousAlleleCount(String sampleId, float pathogenicityThreshold) { + if (sampleId == null) + return 0; + // The first part of the filter clause ensures we do not clash with ClinVar variant interpretation. + // In other words, a ClinVar benign or likely benign variant CANNOT be interpreted as deleterious + // based on in silico pathogenicity scores. + return variants() + .filter(var -> var.clinVarAlleleData() + .map(cv -> cv.getClinvarClnSig().notBenignOrLikelyBenign()) + .orElse(true) + && var.pathogenicityScore().map(f -> f >= pathogenicityThreshold).orElse(false)) .map(var -> var.alleleCount(sampleId)) .flatMap(Optional::stream) .mapToInt(AlleleCount::alt) @@ -59,7 +162,15 @@ default int pathogenicAlleleCount(String sampleId, float pathogenicityThreshold) } default double getSumOfPathBinScores(String sampleId, float pathogenicityThreshold) { - return variants().filter(variant -> variant.pathogenicityScore().orElse(0f) >= pathogenicityThreshold) + if (sampleId == null) + return 0.; + // Same as in `pathogenicAlleleCount(..)` above, the first part of the filter clause ensures + // we do not clash with ClinVar variant interpretation. + return variants() + .filter(variant -> variant.clinVarAlleleData() + .map(cv -> cv.getClinvarClnSig().notBenignOrLikelyBenign()) + .orElse(true) + && variant.pathogenicityScore().orElse(0f) >= pathogenicityThreshold) .mapToDouble(variant -> { int altAlleleCount = variant.alleleCount(sampleId).map(AlleleCount::alt).orElse((byte) 0); return altAlleleCount * variant.pathogenicity(); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java index ede1fb184..3004a1dfb 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java @@ -9,24 +9,16 @@ class Gene2GenotypeDefault { - static Gene2Genotype of(GeneIdentifier geneId, Collection variants) { - Objects.requireNonNull(geneId, "Gene ID must not be null"); - Objects.requireNonNull(variants, "Variants must not be null"); - if (variants.isEmpty()) { - return new Gene2GenotypeNoVariants(geneId); - } else { - return new Gene2GenotypeFull(geneId, variants); - } - } - - private static class Gene2GenotypeFull implements Gene2Genotype { + static class Gene2GenotypeFull implements Gene2Genotype { private final GeneIdentifier geneId; private final List variants; + private final int filteredOutVariantCount; - private Gene2GenotypeFull(GeneIdentifier geneId, Collection variants) { + Gene2GenotypeFull(GeneIdentifier geneId, Collection variants, int filteredOutVariantCount) { this.geneId = geneId; this.variants = List.copyOf(variants); + this.filteredOutVariantCount = filteredOutVariantCount; } @Override @@ -44,17 +36,22 @@ public int variantCount() { return variants.size(); } + @Override + public int filteredOutVariantCount() { + return filteredOutVariantCount; + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Gene2GenotypeFull that = (Gene2GenotypeFull) o; - return Objects.equals(geneId, that.geneId) && Objects.equals(variants, that.variants); + return filteredOutVariantCount == that.filteredOutVariantCount && Objects.equals(geneId, that.geneId) && Objects.equals(variants, that.variants); } @Override public int hashCode() { - return Objects.hash(geneId, variants); + return Objects.hash(geneId, variants, filteredOutVariantCount); } @Override @@ -62,11 +59,12 @@ public String toString() { return "Gene2GenotypeFull{" + "geneId=" + geneId + ", variants=" + variants + - '}'; + ", filteredOutVariantCount=" + filteredOutVariantCount + + '}'; } } - private record Gene2GenotypeNoVariants(GeneIdentifier geneId) implements Gene2Genotype { + record Gene2GenotypeNoVariants(GeneIdentifier geneId, int filteredOutVariantCount) implements Gene2Genotype { @Override public GeneIdentifier geneId() { @@ -85,5 +83,4 @@ public int variantCount() { } - } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java index d74cd7a2e..3af06cc33 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java @@ -1,6 +1,8 @@ package org.monarchinitiative.lirical.core.model; -import java.util.List; +import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; + +import java.util.*; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -14,12 +16,66 @@ static GenesAndGenotypes empty() { return GenesAndGenotypesDefault.empty(); } + /** + * @deprecated use {@link #fromVariants(Collection, Iterable)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") + static GenesAndGenotypes fromVariants(Iterable variants) { + return fromVariants(null, variants); + } + + static GenesAndGenotypes fromVariants(Collection sampleNames, Iterable variants) { + List g2g = groupVariantsByGenId(variants); + if (sampleNames == null) { + // TODO - remove after removal of the deprecated method above. + return of(g2g); + } else { + return of(sampleNames, g2g); + } + } + + private static List groupVariantsByGenId(Iterable variants) { + // Group variants by gene id. + Map> gene2Genotype = new HashMap<>(); + Map failedVariantCount = new HashMap<>(); + for (LiricalVariant variant : variants) { + Stream identifiers = variant.annotations().stream() + .map(TranscriptAnnotation::getGeneId) + .distinct(); + if (variant.passedFilters()) + identifiers.forEach(geneId -> gene2Genotype.computeIfAbsent(geneId, e -> new ArrayList<>()).add(variant)); + else + identifiers.forEach(geneId -> failedVariantCount.merge(geneId, 1, Integer::sum)); + } + + // Collect the variants into Gene2Genotype container + return gene2Genotype.entrySet().stream() + // We have 0 failed variants by default + .map(e -> Gene2Genotype.of(e.getKey(), e.getValue(), failedVariantCount.getOrDefault(e.getKey(), 0))) + .toList(); + } + + /** + * @deprecated use {@link #of(Collection, Collection)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") static GenesAndGenotypes of(List genes) { return genes.isEmpty() ? empty() : GenesAndGenotypesDefault.of(genes); } + static GenesAndGenotypes of(Collection sampleNames, Collection genes) { + return genes.isEmpty() + ? empty() + : GenesAndGenotypesDefault.of(sampleNames, genes); + } + + /** + * @return a collection with sample identifiers for whom we have the genotype data. + */ + Collection sampleNames(); + /** * @return number of genes in the container. */ @@ -35,13 +91,15 @@ default Stream genes() { default FilteringStats computeFilteringStats() { AtomicLong passed = new AtomicLong(); AtomicLong failed = new AtomicLong(); - genes().flatMap(Gene2Genotype::variants) - .forEach(v -> { - if (v.passedFilters()) - passed.incrementAndGet(); - else failed.incrementAndGet(); - }); - return new FilteringStats(passed.get(), failed.get()); + AtomicLong genesWithVariants = new AtomicLong(); + genes().forEach(g -> { + if (g.hasVariants()) + genesWithVariants.incrementAndGet(); + passed.addAndGet(g.variantCount()); + failed.addAndGet(g.filteredOutVariantCount()); + }); + + return new FilteringStats(passed.get(), failed.get(), genesWithVariants.get()); } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java index 82b9672eb..6f36a0fa9 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java @@ -1,9 +1,7 @@ package org.monarchinitiative.lirical.core.model; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; +import java.util.*; +import java.util.stream.Collectors; class GenesAndGenotypesDefault { @@ -11,15 +9,28 @@ static GenesAndGenotypes empty() { return GenesAndGenotypesEmpty.INSTANCE; } - public static GenesAndGenotypes of(List genes) { - return new GenesAndGenotypesFull(genes); + /** + * @deprecated use {@link #of(Collection, Collection)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") + public static GenesAndGenotypes of(Collection genes) { + Set sampleNames = genes.stream() + .flatMap(Gene2Genotype::variants) + .flatMap(v -> v.sampleNames().stream()) + .collect(Collectors.toSet()); + return of(sampleNames, genes); } - record GenesAndGenotypesFull(List geneList) implements GenesAndGenotypes { + public static GenesAndGenotypes of(Collection sampleNames, + Collection genes) { + return new GenesAndGenotypesFull( + List.copyOf(Objects.requireNonNull(sampleNames, "Sample names must not be null")), + List.copyOf(Objects.requireNonNull(genes, "Gene list must not be null")) + ); + } - GenesAndGenotypesFull(List geneList) { - this.geneList = Objects.requireNonNull(geneList, "Gene list must not be null"); - } + record GenesAndGenotypesFull(List sampleNames, + List geneList) implements GenesAndGenotypes { @Override public int size() { @@ -39,6 +50,11 @@ private static class GenesAndGenotypesEmpty implements GenesAndGenotypes { private GenesAndGenotypesEmpty() { } + @Override + public Collection sampleNames() { + return List.of(); + } + @Override public int size() { return 0; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java index 0ea27c681..4404feda3 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java @@ -2,29 +2,74 @@ import org.monarchinitiative.svart.GenomicVariant; -import java.util.Map; -import java.util.Optional; -import java.util.Set; +import java.util.*; +/** + * A description of variant coordinates, sample genotypes, and filtering status for LIRICAL analysis. + *

+ * The variant has a {@link #genomeBuild()} to describe the reference system. + * The {@link #variant()} provides variant coordinates using Svart's {@link GenomicVariant} data structure. + * The variant genotypes for a set of samples can be accessed via {@link #alleleCount(String)}. + * Last, LIRICAL uses the variants that passed all filters in the analysis ({@link #passedFilters()}). + * However, we need to retain the failed variants too to report the passed/failed variants in the report. + */ public interface GenotypedVariant { + /** + * @deprecated deprecated in {@code v2.0.0} and subject to removal in {@code v3.0.0}. + * Use {@link #of(GenomeBuild, GenomicVariant, Collection, boolean)} instead. + */ + // REMOVE(v3.0.0) + @Deprecated(forRemoval = true, since = "2.0.0-RC3") static GenotypedVariant of(GenomeBuild genomeBuild, GenomicVariant variant, Map genotypes, boolean passedFilters) { - return new GenotypedVariantDefault(genomeBuild, variant, genotypes, passedFilters); + List alleleCounts = genotypes.entrySet().stream() + .map(e -> SampleAlleleCount.of(e.getKey(), e.getValue())) + .toList(); + return of(genomeBuild, variant, alleleCounts, passedFilters); + } + + static GenotypedVariant of(GenomeBuild genomeBuild, + GenomicVariant variant, + Collection alleleCounts, + boolean passedFilters) { + return GenotypedVariantDefault.of(genomeBuild, variant, alleleCounts, passedFilters); } + /** + * @return the genome build of the variant. + */ GenomeBuild genomeBuild(); + /** + * @return the variant coordinates in Svart's {@linkplain GenomicVariant}. + */ GenomicVariant variant(); + /** + * @return a set of sample identifiers where we have genotype data for this variant. + */ Set sampleNames(); - Optional alleleCount(String sample); + /** + * Get allele count for given sample. + * + * @param sampleId String with sample identifier. + * @return optional with the allele count or an empty optional if data for the sample is missing. + */ + Optional alleleCount(String sampleId); /** - * @return true if the variant passed the filters in the variant source + * @return {@code true} if the variant passed the filters, according to the variant source (e.g. VCF file). */ boolean passedFilters(); + + /** + * @return {@code true} if the variant failed the filters, according to the variant source (e.g. VCF file). + */ + default boolean failedFilters() { + return !passedFilters(); + } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java index c8d4ad81f..683af3f2e 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java @@ -1,29 +1,39 @@ package org.monarchinitiative.lirical.core.model; +import org.monarchinitiative.lirical.core.util.BinarySearch; import org.monarchinitiative.svart.GenomicVariant; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; +import java.util.*; +import java.util.stream.Collectors; /** - * Implementation of {@link GenotypedVariant} with genotypes are stored in a {@link Map}. + * Implementation of {@link GenotypedVariant} with genotypes stored in an array. */ class GenotypedVariantDefault implements GenotypedVariant { private final GenomeBuild genomeBuild; private final GenomicVariant variant; - private final Map genotypes; + private final SampleAlleleCount[] alleleCounts; private final boolean passedFilters; + static GenotypedVariantDefault of(GenomeBuild genomeBuild, + GenomicVariant variant, + Collection alleleCounts, + boolean passedFilters) { + // We sort the counts by sample id to take advantage of the binary search. + SampleAlleleCount[] counts = alleleCounts.stream() + .sorted(Comparator.comparing(SampleAlleleCount::getSampleId)) + .toArray(SampleAlleleCount[]::new); + return new GenotypedVariantDefault(genomeBuild, variant, counts, passedFilters); + } + GenotypedVariantDefault(GenomeBuild genomeBuild, GenomicVariant variant, - Map genotypes, + SampleAlleleCount[] alleleCounts, boolean passedFilters) { this.genomeBuild = Objects.requireNonNull(genomeBuild); this.variant = Objects.requireNonNull(variant); - this.genotypes = Objects.requireNonNull(genotypes); + this.alleleCounts = Objects.requireNonNull(alleleCounts); this.passedFilters = passedFilters; } @@ -40,12 +50,17 @@ public GenomicVariant variant() { @Override public Set sampleNames() { - return genotypes.keySet(); + return Arrays.stream(alleleCounts) + .map(SampleAlleleCount::getSampleId) + .collect(Collectors.toUnmodifiableSet()); } @Override - public Optional alleleCount(String sample) { - return Optional.ofNullable(genotypes.get(sample)); + public Optional alleleCount(String sampleId) { + if (sampleId == null) + return Optional.empty(); + return BinarySearch.binarySearch(alleleCounts, SampleAlleleCount::getSampleId, sampleId) + .map(SampleAlleleCount::getAlleleCount); } @Override @@ -58,12 +73,12 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; GenotypedVariantDefault that = (GenotypedVariantDefault) o; - return genomeBuild == that.genomeBuild && Objects.equals(variant, that.variant) && Objects.equals(genotypes, that.genotypes) && passedFilters == that.passedFilters; + return genomeBuild == that.genomeBuild && Objects.equals(variant, that.variant) && Arrays.equals(alleleCounts, that.alleleCounts) && passedFilters == that.passedFilters; } @Override public int hashCode() { - return Objects.hash(genomeBuild, variant, genotypes, passedFilters); + return Objects.hash(genomeBuild, variant, Arrays.hashCode(alleleCounts), passedFilters); } @Override @@ -71,7 +86,7 @@ public String toString() { return "GenotypedVariantDefault{" + "genomeBuild=" + genomeBuild + ", variant=" + variant + - ", genotypes=" + genotypes + + ", alleleCounts=" + Arrays.toString(alleleCounts) + ", passedFilters=" + passedFilters + '}'; } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java index a69a77463..49c4e6373 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java @@ -42,7 +42,7 @@ private HpoCase(String sampleId, List observedTerms, List exclud this.excludedAbnormalities = Objects.requireNonNull(excludedTerms); this.results = Objects.requireNonNull(results); this.sex = Objects.requireNonNull(sex); - this.age = Objects.requireNonNull(age); + this.age = age; } public String sampleId() { @@ -106,7 +106,7 @@ public Builder(String sampleId, List abnormalPhenotypes) { this.observedAbnormalities = List.copyOf(Objects.requireNonNull(abnormalPhenotypes)); excludedAbnormalities=List.of(); // default empty list sex=Sex.UNKNOWN; - age=Age.ageNotKnown(); + age=null; } public Builder excluded(List excludedPhenotypes) { diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java index 3376a16ba..2c7c12176 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java @@ -16,7 +16,9 @@ static LiricalVariant of(GenotypedVariant variant, List an * @return number of pathogenic alleles that are registered in ClinVar */ default int pathogenicClinVarAlleleCount(String sampleId) { - if (!clinvarClnSig().isPathogenicOrLikelyPathogenic()) { + if (sampleId == null) + return 0; + if (!clinVarAlleleData().map(cv -> cv.getClinvarClnSig().isPathogenicOrLikelyPathogenic()).orElse(false)) { return 0; } else { return alleleCount(sampleId) diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java index c3af7402b..50090cc9d 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java @@ -34,8 +34,8 @@ public Set sampleNames() { } @Override - public Optional alleleCount(String sample) { - return genotypedVariant.alleleCount(sample); + public Optional alleleCount(String sampleId) { + return genotypedVariant.alleleCount(sampleId); } @Override @@ -54,8 +54,8 @@ public float pathogenicity() { } @Override - public ClinvarClnSig clinvarClnSig() { - return variantMetadata.clinvarClnSig(); + public Optional clinVarAlleleData() { + return variantMetadata.clinVarAlleleData(); } @Override diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/SampleAlleleCount.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/SampleAlleleCount.java new file mode 100644 index 000000000..0d43e43ec --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/SampleAlleleCount.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.lirical.core.model; + +import java.util.Objects; + +/** + * A container for associating sample id and the {@link AlleleCount}. + */ +public class SampleAlleleCount { + + private final String sampleId; + private final AlleleCount alleleCount; + + public static SampleAlleleCount of(String sampleId, AlleleCount alleleCount) { + return new SampleAlleleCount(sampleId, alleleCount); + } + + private SampleAlleleCount(String sampleId, AlleleCount alleleCount) { + this.sampleId = Objects.requireNonNull(sampleId); + this.alleleCount = Objects.requireNonNull(alleleCount); + } + + public String getSampleId() { + return sampleId; + } + + public AlleleCount getAlleleCount() { + return alleleCount; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + SampleAlleleCount that = (SampleAlleleCount) o; + return Objects.equals(sampleId, that.sampleId) && Objects.equals(alleleCount, that.alleleCount); + } + + @Override + public int hashCode() { + return Objects.hash(sampleId, alleleCount); + } + + @Override + public String toString() { + return "SampleAlleleCount{" + + "sampleId='" + sampleId + '\'' + + ", alleleCount=" + alleleCount + + '}'; + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java index e1485d536..ae7ca240a 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java @@ -8,12 +8,21 @@ static VariantMetadata empty() { return VariantMetadataDefault.empty(); } + /** + * @deprecated from {@code 2.0.0-RC3}. Use {@link #of(float, float, ClinVarAlleleData)} instead. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") static VariantMetadata of(float frequency, float pathogenicity, ClinvarClnSig clinvarClnSig) { - return new VariantMetadataDefault(frequency, - pathogenicity, - clinvarClnSig); + ClinVarAlleleData data = ClinVarAlleleData.of(clinvarClnSig, null); + return of(frequency, pathogenicity, data); + } + + static VariantMetadata of(float frequency, + float pathogenicity, + ClinVarAlleleData clinVarAlleleData) { + return new VariantMetadataDefault(frequency, pathogenicity, clinVarAlleleData); } /** @@ -50,15 +59,29 @@ static VariantMetadata of(float frequency, default Optional pathogenicityScore() { // Heuristic -- Count ClinVar pathogenic or likely pathogenic as 1.0 (maximum pathogenicity score) // regardless of the Exomiser pathogenicity score - return clinvarClnSig().isPathogenicOrLikelyPathogenic() + return clinVarAlleleData() + .map(a -> a.getClinvarClnSig().isPathogenicOrLikelyPathogenic()) + .orElse(false) // go to the frequencyScore branch ? Optional.of(1f) : frequencyScore().map(fs -> fs * pathogenicity()); } /** + * @deprecated since 2.0.0-RC3 and will be removed in v3.0.0. Use {@link #clinVarAlleleData()} instead. * @return Clinvar clinical significance category. */ - ClinvarClnSig clinvarClnSig(); + // REMOVE(v3.0.0) + @Deprecated(forRemoval = true, since = "2.0.0-RC3") + default ClinvarClnSig clinvarClnSig() { + return clinVarAlleleData() + .map(ClinVarAlleleData::getClinvarClnSig) + .orElse(ClinvarClnSig.NOT_PROVIDED); + } + + /** + * @return ClinvarData for the variant, if available. + */ + Optional clinVarAlleleData(); /** * This is the frequency factor used for the Exomiser like pathogenicity score. It penalizes variants that have a higher @@ -77,8 +100,12 @@ default Optional frequencyScore() { }); } - + /** + * @deprecated the function has been deprecated without replacement and will be removed in v3.0.0. + */ + @Deprecated(forRemoval = true, since = "2.0.0-RC3") static int compareByPathogenicity(VariantMetadata left, VariantMetadata right) { + // REMOVE(v3.0.0) return Float.compare(left.pathogenicity(), right.pathogenicity()); } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java index 4b3c6fb87..b7b3ed5e2 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java @@ -5,7 +5,7 @@ class VariantMetadataDefault implements VariantMetadata { - private static final VariantMetadataDefault EMPTY = new VariantMetadataDefault(Float.NaN, Float.NaN, ClinvarClnSig.NOT_PROVIDED); + private static final VariantMetadataDefault EMPTY = new VariantMetadataDefault(Float.NaN, Float.NaN, null); static VariantMetadataDefault empty() { return EMPTY; @@ -13,14 +13,14 @@ static VariantMetadataDefault empty() { private final float frequency; private final float pathogenicity; - private final ClinvarClnSig clinvarClnSig; + private final ClinVarAlleleData clinVarAlleleData; VariantMetadataDefault(float frequency, float pathogenicity, - ClinvarClnSig clinvarClnSig) { + ClinVarAlleleData clinVarAlleleData) { this.frequency = frequency; this.pathogenicity = pathogenicity; - this.clinvarClnSig = Objects.requireNonNull(clinvarClnSig); + this.clinVarAlleleData = clinVarAlleleData; // nullable } @Override @@ -36,8 +36,8 @@ public float pathogenicity() { } @Override - public ClinvarClnSig clinvarClnSig() { - return clinvarClnSig; + public Optional clinVarAlleleData() { + return Optional.ofNullable(clinVarAlleleData); } @Override @@ -45,7 +45,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; VariantMetadataDefault that = (VariantMetadataDefault) o; - return Float.compare(that.frequency, frequency) == 0 && Float.compare(that.pathogenicity, pathogenicity) == 0 && Objects.equals(clinvarClnSig, that.clinvarClnSig); + return Float.compare(that.frequency, frequency) == 0 && Float.compare(that.pathogenicity, pathogenicity) == 0 && Objects.equals(clinVarAlleleData, that.clinVarAlleleData); } @Override @@ -58,7 +58,7 @@ public String toString() { return "VariantMetadataDefault{" + "frequency=" + frequency + ", pathogenicity=" + pathogenicity + - ", clinvarClnSig=" + clinvarClnSig + + ", clinvarClnSig=" + clinVarAlleleData + '}'; } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/package-info.java new file mode 100644 index 000000000..b11a05ebb --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/package-info.java @@ -0,0 +1,4 @@ +/** + * Package with data models. + */ +package org.monarchinitiative.lirical.core.model; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java index 0a56365ea..7d633dfcf 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java @@ -1,5 +1,8 @@ package org.monarchinitiative.lirical.core.output; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonIgnore; + public class AnalysisResultsMetadata { private String liricalVersion; private String hpoVersion; @@ -8,9 +11,9 @@ public class AnalysisResultsMetadata { private String exomiserPath; private String analysisDate; private String sampleName; - private long nGoodQualityVariants; + private long nPassingVariants; private long nFilteredVariants; - private int genesWithVar; + private long genesWithVar; private boolean globalMode; private AnalysisResultsMetadata(String liricalVersion, @@ -20,9 +23,9 @@ private AnalysisResultsMetadata(String liricalVersion, String exomiserPath, String analysisDate, String sampleName, - long nGoodQualityVariants, + long nPassingVariants, long nFilteredVariants, - int genesWithVar, + long genesWithVar, boolean globalMode) { this.liricalVersion = liricalVersion; this.hpoVersion = hpoVersion; @@ -31,7 +34,7 @@ private AnalysisResultsMetadata(String liricalVersion, this.exomiserPath = exomiserPath; this.analysisDate = analysisDate; this.sampleName = sampleName; - this.nGoodQualityVariants = nGoodQualityVariants; + this.nPassingVariants = nPassingVariants; this.nFilteredVariants = nFilteredVariants; this.genesWithVar = genesWithVar; this.globalMode = globalMode; @@ -57,6 +60,7 @@ public void setTranscriptDatabase(String transcriptDatabase) { this.transcriptDatabase = transcriptDatabase; } + @JsonIgnore public String getLiricalPath() { return liricalPath; } @@ -65,6 +69,7 @@ public void setLiricalPath(String liricalPath) { this.liricalPath = liricalPath; } + @JsonIgnore public String getExomiserPath() { return exomiserPath; } @@ -89,14 +94,16 @@ public void setSampleName(String sampleName) { this.sampleName = sampleName; } - public long getnGoodQualityVariants() { - return nGoodQualityVariants; + @JsonIgnore + public long getnPassingVariants() { + return nPassingVariants; } - public void setnGoodQualityVariants(long nGoodQualityVariants) { - this.nGoodQualityVariants = nGoodQualityVariants; + public void setnPassingVariants(long nPassingVariants) { + this.nPassingVariants = nPassingVariants; } + @JsonIgnore public long getnFilteredVariants() { return nFilteredVariants; } @@ -105,14 +112,16 @@ public void setnFilteredVariants(long nFilteredVariants) { this.nFilteredVariants = nFilteredVariants; } - public int getGenesWithVar() { + @JsonIgnore + public long getGenesWithVar() { return genesWithVar; } - public void setGenesWithVar(int genesWithVar) { + public void setGenesWithVar(long genesWithVar) { this.genesWithVar = genesWithVar; } + @JsonGetter(value = "isGlobalAnalysisMode") public boolean getGlobalMode() { return globalMode; } @@ -135,7 +144,7 @@ public String toString() { ", exomiserPath='" + exomiserPath + '\'' + ", analysisDate='" + analysisDate + '\'' + ", sampleName='" + sampleName + '\'' + - ", nGoodQualityVariants=" + nGoodQualityVariants + + ", nPassingVariants=" + nPassingVariants + ", nFilteredVariants=" + nFilteredVariants + ", genesWithVar=" + genesWithVar + ", globalMode=" + globalMode + @@ -150,9 +159,9 @@ public static class Builder { private String exomiserPath; private String analysisDate; private String sampleName = "SAMPLE_ID"; - private long nGoodQualityVariants; + private long nPassingVariants; private long nFilteredVariants; - private int genesWithVar; + private long genesWithVar; private boolean globalMode; private Builder() { @@ -193,8 +202,8 @@ public Builder setSampleName(String sampleName) { return this; } - public Builder setnGoodQualityVariants(long nGoodQualityVariants) { - this.nGoodQualityVariants = nGoodQualityVariants; + public Builder setnPassingVariants(long nPassingVariants) { + this.nPassingVariants = nPassingVariants; return this; } @@ -203,7 +212,7 @@ public Builder setnFilteredVariants(long nFilteredVariants) { return this; } - public Builder setGenesWithVar(int genesWithVar) { + public Builder setGenesWithVar(long genesWithVar) { this.genesWithVar = genesWithVar; return this; } @@ -221,7 +230,7 @@ public AnalysisResultsMetadata build() { exomiserPath, analysisDate, sampleName, - nGoodQualityVariants, + nPassingVariants, nFilteredVariants, genesWithVar, globalMode); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java index 9473d23b0..277126549 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java @@ -1,4 +1,4 @@ /** - * Classes for creating HTML or TSV output files for LIRICAL. + * Base data model for writing out the results of LIRICAL analysis. */ package org.monarchinitiative.lirical.core.output; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/package-info.java new file mode 100644 index 000000000..99afd9954 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/package-info.java @@ -0,0 +1,4 @@ +/** + * Core functionality of the LIRICAL algorithm. + */ +package org.monarchinitiative.lirical.core; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/BaseInputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/BaseInputSanitizer.java new file mode 100644 index 000000000..5b338edbf --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/BaseInputSanitizer.java @@ -0,0 +1,117 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +/** + * Shared functions for {@link InputSanitizer}s. + * + * @author Daniel Danis + */ +abstract class BaseInputSanitizer implements InputSanitizer { + + protected final MinimalOntology hpo; + + BaseInputSanitizer(MinimalOntology hpo) { + this.hpo = Objects.requireNonNull(hpo); + } + + protected void checkCuriesArePresentInHpo(List termIds, List issues) { + List toRemove = new ArrayList<>(); + int i = 0; + for (TermId termId : termIds) { + if (!hpo.containsTermId(termId)) { + issues.add(SanityIssue.warning( + "Term %s does not exist in HPO version %s".formatted(termId.getValue(), hpo.version().orElse("UNKNOWN")), + "Consider updating HPO or explore the HPO browser to choose alternative term")); + toRemove.add(i); + } + i++; + } + BaseInputSanitizer.removeElements(termIds, toRemove); + } + + protected static void removeElements(List termIds, Collection toRemove) { + toRemove.stream() + .distinct() + .sorted(Comparator.reverseOrder()) + .mapToInt(idx -> idx) + .forEachOrdered(termIds::remove); + } + + protected void checkTermsUsePrimaryIdentifiers(List termIds, List issues) { + List replacements = new ArrayList<>(termIds.size()); + for (TermId termId : termIds) { + Term term = hpo.termForTermId(termId) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue()))); + + TermId primary = term.id(); + if (termId.equals(primary)) { + replacements.add(null); + } else { + issues.add(SanityIssue.warning( + "%s is an obsolete id of %s".formatted(termId.getValue(), term.getName()), + "Use %s instead".formatted(primary.getValue()))); + replacements.add(primary); + } + } + + for (int i = 0; i < replacements.size(); i++) { + TermId replacement = replacements.get(i); + if (replacement != null) + termIds.set(i, replacement); + } + } + + protected void checkVcf(String vcf, SanitizedInputs sanitized, List issues) { + if (vcf != null) { + Path path = Path.of(vcf); + if (Files.isRegularFile(path) && Files.isReadable(path)) { + sanitized.setVcf(path); + } else { + issues.add(SanityIssue.error( + "VCF path is set but %s does not point to a readable file".formatted(path.toAbsolutePath()), + "Update the path or the file permissions")); + } + + } + } + + protected static void checkCuriesAreWellFormed(SanitizedInputs sanitized, + List inputPresentTermIds, + List inputExcludedTermIds, + List issues) { + if (inputPresentTermIds.isEmpty() && inputExcludedTermIds.isEmpty()) { + issues.add(SanityIssue.error("No HPO terms were provided", "Add at least 1 HPO term to start")); + } else { + // We can check if the present terms are valid. + for (String curie : inputPresentTermIds) { + checkCurieIsValid(curie, sanitized.presentHpoTerms(), issues); + } + + // We can check if the excluded term IDs are valid. + for (String curie : inputExcludedTermIds) { + checkCurieIsValid(curie, sanitized.excludedHpoTerms(), issues); + } + } + } + + private static void checkCurieIsValid(String curie, + List termIds, + List issues) { + try { + termIds.add(TermId.of(curie)); + } catch (PhenolRuntimeException e) { + issues.add(SanityIssue.warning( + "The term ID %s is invalid: %s".formatted(curie, e.getMessage()), + "Ensure the term ID consists of a valid prefix (e.g. `HP`) and id (e.g. `0001250`) " + + "joined by colon `:` or underscore `_`.")); + } + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/ComprehensiveInputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/ComprehensiveInputSanitizer.java new file mode 100644 index 000000000..a69568cbf --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/ComprehensiveInputSanitizer.java @@ -0,0 +1,222 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import org.monarchinitiative.lirical.core.model.Age; +import org.monarchinitiative.lirical.core.model.Sex; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.time.Period; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +class ComprehensiveInputSanitizer extends BaseInputSanitizer { + + private static final TermId PHENOTYPIC_ABNORMALITY = TermId.of("HP:0000118"); + + public ComprehensiveInputSanitizer(MinimalOntology hpo){ + super(hpo); + } + + @Override + public SanitationResult sanitize(SanitationInputs inputs) { + List issues = new ArrayList<>(); + // sampleId is nullable, nothing to be checked there at this point. + SanitizedInputs sanitized = new SanitizedInputs(inputs.sampleId()); + + // Check phenotypic features + checkCuriesAreWellFormed(sanitized, inputs.presentHpoTerms(), inputs.excludedHpoTerms(), issues); + checkPhenotypicFeatures(sanitized, issues); + + checkAge(inputs.age(), sanitized, issues); + checkSex(inputs.sex(), sanitized, issues); + + checkVcf(inputs.vcf(), sanitized, issues); + + return new SanitationResultDefault(sanitized, issues); + } + + private void checkPhenotypicFeatures(SanitizedInputs sanitized, List issues) { + checkTermsAreUnique(sanitized.presentHpoTerms(), issues); + checkTermsAreUnique(sanitized.excludedHpoTerms(), issues); + + checkCuriesArePresentInHpo(sanitized.presentHpoTerms(), issues); + checkCuriesArePresentInHpo(sanitized.excludedHpoTerms(), issues); + + checkTermsUsePrimaryIdentifiers(sanitized.presentHpoTerms(), issues); + checkTermsUsePrimaryIdentifiers(sanitized.excludedHpoTerms(), issues); + + checkTermsAreDescendantsOfPhenotypicAbnormality(sanitized.presentHpoTerms(), issues); + checkTermsAreDescendantsOfPhenotypicAbnormality(sanitized.excludedHpoTerms(), issues); + + checkTermsAreLogicallyConsistent(sanitized, issues); + } + + private void checkTermsAreUnique(List termIds, List issues) { + Map termCounts = termIds.stream() + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + + List toClean = new ArrayList<>(); + for (Map.Entry e : termCounts.entrySet()) { + if (e.getValue() > 1) { + issues.add(SanityIssue.warning( + "Term should be used at most once but %s is used %d times".formatted(e.getKey().getValue(), e.getValue()), + "Use a term at most once")); + toClean.add(e.getKey()); + } + } + + for (TermId termId : toClean) { + // Find indices to for removal. + boolean found = false; + List toRemove = new ArrayList<>(); + for (int i = 0; i < termIds.size(); i++) { + TermId t = termIds.get(i); + if (t.equals(termId)) { + if (!found) { + found = true; + } else { + toRemove.add(i); + } + } + } + + // And then remove the terms + removeElements(termIds, toRemove); + } + } + + private void checkTermsAreDescendantsOfPhenotypicAbnormality(List termIds, List issues) { + List toRemove = new ArrayList<>(); + int i = 0; + for (TermId termId : termIds) { + if (!termId.equals(PHENOTYPIC_ABNORMALITY) && !hpo.graph().existsPath(termId, PHENOTYPIC_ABNORMALITY)) { + Term term = hpo.termForTermId(termId) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue()))); + issues.add(SanityIssue.warning( + "Term %s is not a descendant of Phenotypic abnormality".formatted( + formatTerm(term)), + "Consider removing %s from the phenotypic features".formatted(formatTerm(term)))); + toRemove.add(i); + } + i++; + } + removeElements(termIds, toRemove); + } + + private void checkTermsAreLogicallyConsistent(SanitizedInputs sanitized, List issues) { + pruneExcludedHpoTerms(sanitized.excludedHpoTerms(), issues); + prunePresentHpoTerms(sanitized.presentHpoTerms(), issues); + + checkNoPresentFeatureHasExcludedAncestor(sanitized, issues); + } + + private void pruneExcludedHpoTerms(List excludedTerms, + List issues) { + // Check the excluded features use the most general term. + // All terms whose ancestor is among excluded term ids must be removed. + List toRemove = new ArrayList<>(); + int i = 0; + for (TermId termId : excludedTerms) { + for (TermId other : excludedTerms) { + if (!termId.equals(other) && hpo.graph().existsPath(termId, other)) { + Term term = hpo.termForTermId(termId) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue()))); + Term ancestor = hpo.termForTermId(other) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(other.getValue()))); + issues.add(SanityIssue.warning( + "Sample should not be annotated with excluded %s and its excluded ancestor %s".formatted(formatTerm(term), formatTerm(ancestor)), + "Remove %s from the phenotype terms".formatted(formatTerm(term)))); + toRemove.add(i); + break; + } + } + i++; + } + + removeElements(excludedTerms, toRemove); + } + + private void prunePresentHpoTerms(List presentTerms, List issues) { + // Check the present features use the most specific term. + // All ancestors of the present term ids must be removed. + List toRemove = new ArrayList<>(); + int i = 0; + for (TermId termId : presentTerms) { + for (TermId other : presentTerms) { + if (!termId.equals(other) && hpo.graph().existsPath(other, termId)) { + Term term = hpo.termForTermId(other) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(other.getValue()))); + Term ancestor = hpo.termForTermId(termId) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue()))); + issues.add(SanityIssue.warning( + "Sample should not be annotated with %s and its ancestor %s".formatted(formatTerm(term), formatTerm(ancestor)), + "Remove %s from the phenotype terms".formatted(formatTerm(ancestor)))); + toRemove.add(i); + break; + } + } + i++; + } + + removeElements(presentTerms, toRemove); + } + + private void checkNoPresentFeatureHasExcludedAncestor(SanitizedInputs sanitized, List issues) { + for (TermId present : sanitized.presentHpoTerms()) { + for (TermId excluded : sanitized.excludedHpoTerms()) { + if (present.equals(excluded)) { + // Term is both present and excluded. + Term term = hpo.termForTermId(present) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(present.getValue()))); + issues.add(SanityIssue.error( + "Sample must not be annotated with %s in present and excluded state at the same time".formatted(formatTerm(term)), + "Make up your mind")); + } else if (hpo.graph().getAncestorsStream(present).anyMatch(anc -> anc.equals(excluded))) { + // Term has an excluded ancestor. + Term presentTerm = hpo.termForTermId(present) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(present.getValue()))); + Term excludedTerm = hpo.termForTermId(excluded) + .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(excluded.getValue()))); + issues.add(SanityIssue.error( + "Sample must not be annotated with %s while its ancestor %s is excluded".formatted( + formatTerm(presentTerm), formatTerm(excludedTerm)), + "Resolve the logical inconsistency by choosing one of the terms")); + } + } + } + } + + private static void checkAge(String age, SanitizedInputs sanitized, List issues) { + if (age != null) { + try { + Period period = Period.parse(age); + sanitized.setAge(Age.parse(period)); + } catch (DateTimeParseException e) { + issues.add(SanityIssue.warning( + "Age %s could not be parsed: %s".formatted(age, e.getMessage()), + "Format age as a ISO8601 duration (e.g. `P22Y6M`)")); + } + } + } + + private static void checkSex(String sex, SanitizedInputs sanitized, List issues) { + if (sex != null) { + try { + sanitized.setSex(Sex.valueOf(sex.toUpperCase())); + } catch (IllegalArgumentException e) { + issues.add(SanityIssue.warning( + "Sex %s could not be parsed".formatted(sex), + "Use one of {'male', 'female', 'unknown'}")); + } + } + } + + private static String formatTerm(Term term) { + return "%s [%s]".formatted(term.getName(), term.id().getValue()); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizer.java new file mode 100644 index 000000000..5c3ebe68f --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizer.java @@ -0,0 +1,11 @@ +package org.monarchinitiative.lirical.core.sanitize; + +/** + * Sanitize the user input before running the analysis. + * + * @author Daniel Danis + */ +public interface InputSanitizer { + + SanitationResult sanitize(SanitationInputs inputs); +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizerFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizerFactory.java new file mode 100644 index 000000000..a62cc62f5 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizerFactory.java @@ -0,0 +1,22 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; + +/** + * Get the input sanitizer with required level + */ +public class InputSanitizerFactory { + + private final MinimalOntology hpo; + + public InputSanitizerFactory(MinimalOntology hpo) { + this.hpo = hpo; + } + + public InputSanitizer forType(SanitizerType type) { + return switch (type) { + case COMPREHENSIVE -> new ComprehensiveInputSanitizer(hpo); + case MINIMAL -> new MinimalInputSanitizer(hpo); + }; + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/MinimalInputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/MinimalInputSanitizer.java new file mode 100644 index 000000000..e478f6e50 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/MinimalInputSanitizer.java @@ -0,0 +1,76 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import org.monarchinitiative.lirical.core.model.Age; +import org.monarchinitiative.lirical.core.model.Sex; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; + +import java.time.Period; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.List; + +/** + * Minimal sanitizer performs as few checks as possible. + *

+ * The HPO terms are checked if they are well-formed CURIEs that exist in given HPO. Obsolete term IDs are replaced + * with the current term IDs. + *

+ * If path to VCF is set, then it must point to a readable file. + * + * @author Daniel Danis + */ +class MinimalInputSanitizer extends BaseInputSanitizer { + + MinimalInputSanitizer(MinimalOntology hpo) { + super(hpo); + } + + @Override + public SanitationResult sanitize(SanitationInputs inputs) { + List issues = new ArrayList<>(); + + // sampleId is nullable, nothing to be checked there at this point. + SanitizedInputs sanitized = new SanitizedInputs(inputs.sampleId()); + + // Check phenotypic features + checkCuriesAreWellFormed(sanitized, inputs.presentHpoTerms(), inputs.excludedHpoTerms(), issues); + checkPhenotypicFeatures(sanitized, issues); + + // Convert the age and sex if possible, or ignore. + sanitized.setAge(parseAgeOrNull(inputs.age())); + sanitized.setSex(parseSexOrNull(inputs.sex())); + + // + checkVcf(inputs.vcf(), sanitized, issues); + + + return new SanitationResultDefault(sanitized, issues); + } + + private static Age parseAgeOrNull(String age) { + try { + return Age.parse(Period.parse(age)); + } catch (Exception ignored) { + return null; + } + } + + private static Sex parseSexOrNull(String sex) { + try { + return Sex.valueOf(sex.toUpperCase()); + } catch (Exception ignored) { + return null; + } + } + + /** + * Check that CURIEs are present in HPO and upgrade to primary identifier if the obsolete term is being used. + */ + private void checkPhenotypicFeatures(SanitizedInputs sanitized, List issues) { + checkCuriesArePresentInHpo(sanitized.presentHpoTerms(), issues); + checkCuriesArePresentInHpo(sanitized.excludedHpoTerms(), issues); + + checkTermsUsePrimaryIdentifiers(sanitized.presentHpoTerms(), issues); + checkTermsUsePrimaryIdentifiers(sanitized.excludedHpoTerms(), issues); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationInputs.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationInputs.java new file mode 100644 index 000000000..50244f054 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationInputs.java @@ -0,0 +1,39 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import java.util.List; + +/** + * The sanitation requirements. + */ +public interface SanitationInputs { + /** + * @return a string with the sample ID or {@code null} if not available. + */ + String sampleId(); + + /** + * @return a list with CURIEs of HPO terms that represent the phenotypic features observed in the index patient. + */ + List presentHpoTerms(); + + /** + * @return a list with CURIEs of HPO terms that represent the phenotypic features that were investigated + * and excluded in the index patient. + */ + List excludedHpoTerms(); + + /** + * @return a string with the age or {@code null} if not available. + */ + String age(); + + /** + * @return a string with the sex or {@code null} if not available. + */ + String sex(); + + /** + * @return a string with the path of the VCF file with variants or {@code null} if not available. + */ + String vcf(); +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResult.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResult.java new file mode 100644 index 000000000..b1043c911 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResult.java @@ -0,0 +1,41 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import java.util.Collection; + +/** + * Result of the input sanitation. + *

+ * The result consists of the inputs that were sanitized to the greatest extent possible + * and of the collection of issues that were found. Note that the sanitized data may be invalid + * even after the sanitation if further sanitation is impossible without manual intervention. + * + * @author Daniel Danis + */ +public interface SanitationResult { + + /** + * @return the inputs sanitized to the greatest extent possible. + */ + SanitizedInputs sanitizedInputs(); + + /** + * @return a collection with sanity issues found in the input data. + */ + Collection issues(); + + /** + * @return {@code true} if there is at least one issue in the analysis inputs. + */ + default boolean hasErrorOrWarnings() { + return !issues().isEmpty(); + } + + /** + * @return {@code true} if there is at least one serious issue/error in the analysis inputs. + */ + default boolean hasErrors() { + return issues().stream() + .anyMatch(i -> i.level().equals(SanityLevel.ERROR)); + } + +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultDefault.java new file mode 100644 index 000000000..c96512d8a --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultDefault.java @@ -0,0 +1,16 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import java.util.Collection; + +/** + * Results of the sanitation of {@link SanitationInputs} by {@link InputSanitizer}. + *

+ * The {@link #sanitizedInputs()} provides data that were sanitized to the greatest extent possible. + * + * @param sanitizedInputs the sanitized data. + * @param issues a collection of issues found during sanitation. + */ +record SanitationResultDefault(SanitizedInputs sanitizedInputs, + Collection issues) implements SanitationResult { + +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultNotRun.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultNotRun.java new file mode 100644 index 000000000..19dd54d06 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultNotRun.java @@ -0,0 +1,47 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import org.monarchinitiative.lirical.core.model.Age; +import org.monarchinitiative.lirical.core.model.Sex; +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.nio.file.Path; +import java.time.Period; +import java.time.format.DateTimeParseException; +import java.util.Collection; +import java.util.List; + +class SanitationResultNotRun implements SanitationResult { + + private final SanitizedInputs sanitizedInputs; + + private static Age parseAge(String input) { + if (input == null) + return null; + try { + return Age.parse(Period.parse(input)); + } catch (DateTimeParseException e) { + return null; + } + } + + SanitationResultNotRun(SanitationInputs inputs) { + sanitizedInputs = new SanitizedInputs(inputs.sampleId(), + inputs.presentHpoTerms().stream().map(TermId::of).toList(), + inputs.excludedHpoTerms().stream().map(TermId::of).toList(), + parseAge(inputs.age()), + Sex.valueOf(inputs.sex()), + inputs.vcf() == null ? null : Path.of(inputs.vcf()) + ); + } + + + @Override + public SanitizedInputs sanitizedInputs() { + return sanitizedInputs; + } + + @Override + public Collection issues() { + return List.of(); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizedInputs.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizedInputs.java new file mode 100644 index 000000000..762539ac0 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizedInputs.java @@ -0,0 +1,88 @@ +package org.monarchinitiative.lirical.core.sanitize; + +import org.monarchinitiative.lirical.core.model.Age; +import org.monarchinitiative.lirical.core.model.Sex; +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * Result of input sanitation. + *

+ * The HPO terms are guaranteed to be valid HPO CURIEs, age and sex are either well-formed or {@code null}, + * and VCF points to a readable file. + * + * @author Daniel Danis + */ +public final class SanitizedInputs { + private final String sampleId; + private final List presentHpoTerms = new ArrayList<>(); + private final List excludedHpoTerms = new ArrayList<>(); + private Age age; + private Sex sex; + private Path vcf; + + SanitizedInputs(String sampleId) { + this.sampleId = sampleId; + } + + SanitizedInputs(String sampleId, + Collection present, + List excluded, + Age age, + Sex sex, + Path vcf) { + this.sampleId = sampleId; + this.presentHpoTerms.addAll(present); + this.excludedHpoTerms.addAll(excluded); + this.age = age; // nullable + this.sex = sex; // nullable + this.vcf = vcf; // nullable + } + + public String sampleId() { + return sampleId; + } + + public List presentHpoTerms() { + return presentHpoTerms; + } + + public List excludedHpoTerms() { + return excludedHpoTerms; + } + + void setAge(Age age) { + this.age = age; + } + + public Age age() { + return age; + } + + + void setSex(Sex sex) { + this.sex = sex; + } + + public Sex sex() { + return sex; + } + + void setVcf(Path vcf) { + this.vcf = vcf; + } + + public Path vcf() { + return vcf; + } + + @Override + public String toString() { + return "SanitizedInputs[" + "sampleId=" + sampleId + ", " + "presentHpoTerms=" + presentHpoTerms + ", " + "excludedHpoTerms=" + excludedHpoTerms + ", " + "age=" + age + ", " + "sex=" + sex + ", " + "vcf=" + vcf + ']'; + } + +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizerType.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizerType.java new file mode 100644 index 000000000..0bcc0a9d3 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizerType.java @@ -0,0 +1,18 @@ +package org.monarchinitiative.lirical.core.sanitize; + +/** + * Enum to represent the existing sanitizer types. + * + * @author Daniel Danis + */ +public enum SanitizerType { + /** + * Comprehensive sanitizer performs the broadest array of checks to point out all errors and warnings. + */ + COMPREHENSIVE, + + /** + * Minimal sanitizer performs the minimal checks required for the analysis to runnable. + */ + MINIMAL +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityIssue.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityIssue.java new file mode 100644 index 000000000..5a2eb5b95 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityIssue.java @@ -0,0 +1,20 @@ +package org.monarchinitiative.lirical.core.sanitize; + +/** + * An issue that was found in the analysis input. + * + * @param level severity of the issue. + * @param message description of the issue for humans. + * @param solution the proposed solution or {@code null} if N/A. + * + * @author Daniel Danis + */ +public record SanityIssue(SanityLevel level, String message, String solution) { + public static SanityIssue error(String message, String solution) { + return new SanityIssue(SanityLevel.ERROR, message, solution); + } + + public static SanityIssue warning(String message, String solution) { + return new SanityIssue(SanityLevel.WARNING, message, solution); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityLevel.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityLevel.java new file mode 100644 index 000000000..db19186f9 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityLevel.java @@ -0,0 +1,19 @@ +package org.monarchinitiative.lirical.core.sanitize; + +/** + * Represents the severity of an issue found during input data sanitation. + * + * @author Daniel Danis + */ +public enum SanityLevel { + + /** + * Serious issues in the input data and the analysis cannot be carried on. + */ + ERROR, + + /** + * Something is not right, and you probably should not proceed. However, the analysis will likely complete. + */ + WARNING, +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/package-info.java new file mode 100644 index 000000000..50c136a98 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/package-info.java @@ -0,0 +1,10 @@ +/** + * Sanitize the user-provided {@link org.monarchinitiative.lirical.core.sanitize.SanitationInputs}. + * + * @see org.monarchinitiative.lirical.core.sanitize.InputSanitizerFactory + * @see org.monarchinitiative.lirical.core.sanitize.InputSanitizer + * @see org.monarchinitiative.lirical.core.sanitize.SanitationResult + * + * @author Daniel Danis + */ +package org.monarchinitiative.lirical.core.sanitize; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java index e78e54777..33a84607c 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java @@ -1,6 +1,7 @@ package org.monarchinitiative.lirical.core.service; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,9 +12,9 @@ public class HpoTermSanitizer { private static final Logger LOGGER = LoggerFactory.getLogger(HpoTermSanitizer.class); - private final Ontology hpo; + private final MinimalOntology hpo; - public HpoTermSanitizer(Ontology hpo) { + public HpoTermSanitizer(MinimalOntology hpo) { this.hpo = hpo; } @@ -25,14 +26,15 @@ public HpoTermSanitizer(Ontology hpo) { * */ public Optional replaceIfObsolete(TermId termId) { - if (!hpo.getTermMap().containsKey(termId)) { + Optional term = hpo.termForTermId(termId); + if (term.isEmpty()) { LOGGER.warn("Dropping unknown HPO term id {}", termId.getValue()); return Optional.empty(); } - if (hpo.getObsoleteTermIds().contains(termId)) { - TermId primary = hpo.getPrimaryTermId(termId); - LOGGER.info("Replacing obsolete HPO term id {} with current id {}", termId, primary); - return Optional.of(primary); + Term t = term.get(); + if (!t.id().equals(termId)) { + LOGGER.info("Replacing obsolete HPO term id {} with current id {}", termId, t.id()); + return Optional.of(t.id()); } return Optional.of(termId); } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java index 6a398aeeb..e58a06383 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java @@ -2,17 +2,17 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; public interface PhenotypeService { - static PhenotypeService of(Ontology ontology, + static PhenotypeService of(MinimalOntology ontology, HpoDiseases diseases, HpoAssociationData associationData) { return new PhenotypeServiceImpl(ontology, diseases, associationData); } - Ontology hpo(); + MinimalOntology hpo(); HpoDiseases diseases(); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java index 03cf8ec06..2ef85c010 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java @@ -2,9 +2,9 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; -record PhenotypeServiceImpl(Ontology hpo, +record PhenotypeServiceImpl(MinimalOntology hpo, HpoDiseases diseases, HpoAssociationData associationData) implements PhenotypeService { } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/package-info.java new file mode 100644 index 000000000..7ad044fe5 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/package-info.java @@ -0,0 +1,4 @@ +/** + * Package with services used across LIRICAL. + */ +package org.monarchinitiative.lirical.core.service; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/BinarySearch.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/BinarySearch.java new file mode 100644 index 000000000..f81eaae91 --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/BinarySearch.java @@ -0,0 +1,75 @@ +package org.monarchinitiative.lirical.core.util; + +import java.util.Comparator; +import java.util.Optional; +import java.util.function.Function; + +/** + * Static utility class with the binary search implementation for arrays of items with custom key extractor function. + */ +public class BinarySearch { + + private BinarySearch() { + } + + /** + * Perform a binary search on an array of sorted {@link T}s using the {@code keyExtractor} function for extracting + * the key for comparison. + *

+ * The array must be sorted by the {@code keyExtractor} function. Otherwise, the behavior is undefined. + * + * @param haystack an array of items sorted by {@code keyExtractor} function. + * @param keyExtractor a function for extracting a key with natural comparison order. + * @param needle the item we are searching for. + * @return an {@link Optional} with the found item or an empty optional if the item is not present in the array. + * @param type of the array items + * @param type of the comparison key + */ + public static > Optional binarySearch(T[] haystack, + Function keyExtractor, + U needle) { + return binarySearch(haystack, keyExtractor, U::compareTo, needle); + } + + /** + * Perform a binary search on an array of sorted {@link T}s using the {@code keyExtractor} function for extracting + * the key for comparison. + *

+ * The array must be sorted by the {@code keyExtractor} and {@code comparator} functions. + * Otherwise, the behavior is undefined. + * + * @param haystack an array of items sorted by {@code keyExtractor} function. + * @param keyExtractor a function for extracting a key with natural comparison order. + * @param comparator a function for comparing the key instances. + * @param needle the item we are searching for. + * @return an {@link Optional} with the found item or an empty optional if the item is not present in the array. + * @param type of the array items + * @param type of the comparison key + */ + public static Optional binarySearch(T[] haystack, + Function keyExtractor, + Comparator comparator, + U needle) { + if (haystack.length == 0) + return Optional.empty(); + + int low = 0, high = haystack.length; + + while (low <= high) { + int mid = low + ((high - low) / 2); + if (mid == haystack.length) + break; + T item = haystack[mid]; + int comparison = comparator.compare(needle, keyExtractor.apply(item)); + if (comparison == 0) { + return Optional.ofNullable(item); + } else if (comparison < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + + return Optional.empty(); + } +} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/package-info.java new file mode 100644 index 000000000..078d35c2e --- /dev/null +++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/package-info.java @@ -0,0 +1,4 @@ +/** + * Package with utility methods such as binary search with custom comparator for extracting the sorting key. + */ +package org.monarchinitiative.lirical.core.util; \ No newline at end of file diff --git a/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalTSV.ftl b/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalTSV.ftl deleted file mode 100644 index cc22b0295..000000000 --- a/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalTSV.ftl +++ /dev/null @@ -1,11 +0,0 @@ -! LIRICAL TSV Output (${resultsMeta.liricalVersion}) -! Sample: ${resultsMeta.sampleName!"n/a"} -! Observed HPO terms -<#assign tab="\t"> -<#list observedHPOs as hpo> -! ${hpo} - -${header} -<#list diff as dd> -${dd.rank}${tab}${dd.diseaseName}${tab}${dd.diseaseCurie}${tab}${dd.pretestprob}${tab}${dd.posttestprob}${tab}${dd.compositeLR}${tab}${dd.entrezGeneId}${tab}${dd.varString} - \ No newline at end of file diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java index 50d7814ba..b12e3a61a 100644 --- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java +++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java @@ -48,6 +48,7 @@ public void testOneClinVarVariant() { Gene2Genotype g2g = setupGeneToGenotype(MADE_UP_GENE, 1, 1, 0.8); GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(Map.of(), 0.1), OPTIONS); GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_DOMINANT)); + assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD)); Assertions.assertEquals(1000, gle.lr(), EPSILON); } @@ -62,6 +63,7 @@ public void testTwoClinVarVariants() { GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(Map.of(), 0.1), OPTIONS); GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_RECESSIVE)); + assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR)); Assertions.assertEquals(1000. * 1000, gle.lr(), EPSILON); } @@ -80,6 +82,7 @@ public void testHLA_Bsituation() { GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(background, 0.1), OPTIONS); GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_DOMINANT)); // heuristic score + assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.NO_VARIANTS_DETECTED_AD)); Assertions.assertEquals(0.05, gle.lr(), EPSILON); } @@ -97,6 +100,7 @@ public void testRecessiveManyCalledPathVariants() { GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(g2background, 0.1), OPTIONS); GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_RECESSIVE)); // heuristic score for AR + assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.NO_VARIANTS_DETECTED_AR)); Assertions.assertEquals(0.05 * 0.05, gle.lr(), EPSILON); } @@ -108,7 +112,7 @@ public void thrbExample() { when(g2g.geneId()).thenReturn(thrbId); when(g2g.hasVariants()).thenReturn(true); when(g2g.pathogenicClinVarCount(SAMPLE_ID)).thenReturn(0); - when(g2g.pathogenicAlleleCount(SAMPLE_ID, PATHOGENICITY_THRESHOLD)).thenReturn(56); + when(g2g.deleteriousAlleleCount(SAMPLE_ID, PATHOGENICITY_THRESHOLD)).thenReturn(56); when(g2g.getSumOfPathBinScores(SAMPLE_ID, PATHOGENICITY_THRESHOLD)).thenReturn(44.80000); Map gene2Background = Map.of(thrbId.id(), 0.006973); @@ -118,7 +122,8 @@ public void thrbExample() { // TODO - check assertThat(gle.geneId(), equalTo(thrbId)); + assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.LIRICAL_GT_MODEL)); assertThat(gle.lr(), is(closeTo(1.719420800179587e109, EPSILON))); - assertThat(gle.explanation(), equalTo("log10(LR)=109.235 P(G|D)=0.0000. P(G|¬D)=0.0000. Mode of inheritance: autosomal recessive. Observed weighted pathogenic variant count: 44.80. λdisease=2. λbackground=0.0070.")); + assertThat(gle.explanation(), equalTo("log10(LR)=109.235 P(G|D)=0.0000. P(G|¬D)=0.0000. Mode of inheritance: autosomal recessive. Observed weighted deleterious variant count: 44.80. λdisease=2. λbackground=0.0070.")); } } diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java index 61d06126c..6d8464701 100644 --- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java +++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java @@ -58,7 +58,7 @@ public void init() { List list1 = createTestList(some, 2.0, 3.0, 4.0); List excluded = List.of(); double prevalence = 0.025; - GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, 2.0, "Explanation"); + GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, GenotypeLrMatchType.LIRICAL_GT_MODEL, 2.0, "Explanation"); tresultWithGenotype = TestResult.of(d1.id(), prevalence, list1,excluded, genotypeLr); tresultNoGenotype = TestResult.of(d1.id(), prevalence, list1,excluded, null); } @@ -183,7 +183,7 @@ public void testTestResultSorting() { // The ranks of the objects get set in the evaluate method of HpoCase so cannot be tested here. // now add another test result, same as result3 but with additional genotype evidence // result4 should now be the top hit - GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, 2.0, "Explanation"); + GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, GenotypeLrMatchType.LIRICAL_GT_MODEL, 2.0, "Explanation"); TestResult result4= TestResult.of(d3.id(), prevalence, list3,excluded, genotypeLr); lst.add(result4); assertEquals(lst.get(3),result4); diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesTest.java new file mode 100644 index 000000000..359798208 --- /dev/null +++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesTest.java @@ -0,0 +1,60 @@ +package org.monarchinitiative.lirical.core.model; + +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.monarchinitiative.svart.CoordinateSystem; +import org.monarchinitiative.svart.GenomicVariant; +import org.monarchinitiative.svart.Strand; +import org.monarchinitiative.svart.assembly.GenomicAssemblies; +import org.monarchinitiative.svart.assembly.GenomicAssembly; + +import java.util.List; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +public class GenesAndGenotypesTest { + + private static final GenomicAssembly HG38 = GenomicAssemblies.GRCh38p13(); + + @Test + public void computeFilteringStats() { + GenesAndGenotypes gag = prepareToyGenesAndGenotypes(); + + FilteringStats filteringStats = gag.computeFilteringStats(); + assertThat(filteringStats.nFilteredVariants(), equalTo(13L)); + assertThat(filteringStats.nPassingVariants(), equalTo(2L)); + assertThat(filteringStats.genesWithVariants(), equalTo(1L)); + } + + private static GenesAndGenotypes prepareToyGenesAndGenotypes() { + return GenesAndGenotypes.of(List.of( + Gene2Genotype.of( + GeneIdentifier.of(TermId.of("HGNC:1234"), "FAKE1234"), + List.of( + LiricalVariant.of( + GenotypedVariant.of(GenomeBuild.HG38, + GenomicVariant.of(HG38.contigByName("1"), "SNP1", + Strand.POSITIVE, CoordinateSystem.ONE_BASED, 101, + "C", "G"), + List.of(), + true), + List.of(), VariantMetadata.empty()), // irrelevant + LiricalVariant.of( + GenotypedVariant.of(GenomeBuild.HG38, + GenomicVariant.of(HG38.contigByName("1"), "SNP1", + Strand.POSITIVE, CoordinateSystem.ONE_BASED, 201, + "T", "A"), + List.of(), true), + List.of(), VariantMetadata.empty()) // irrelevant + ), + 3), + Gene2Genotype.of( + GeneIdentifier.of(TermId.of("HGNC:1234"), "FAKE1234"), + List.of(), + 10 + ) + )); + } +} \ No newline at end of file diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java index bc507af58..8eed271da 100644 --- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java +++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java @@ -9,8 +9,7 @@ import java.util.ArrayList; import java.util.List; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; /** @@ -74,7 +73,7 @@ public void testGetExcludedAbnormalities() { @Test public void testAge() { // we did not specify the age, so it should return not known - assertEquals(Age.ageNotKnown(),hpocase.getAge()); + assertNull(hpocase.getAge()); } @Test diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/util/BinarySearchTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/util/BinarySearchTest.java new file mode 100644 index 000000000..b87c1d33d --- /dev/null +++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/util/BinarySearchTest.java @@ -0,0 +1,86 @@ +package org.monarchinitiative.lirical.core.util; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +public class BinarySearchTest { + @ParameterizedTest + @CsvSource({ + "1|2, 1", + "1|2, 2", + + "1|2|3|4, 1", + "1|2|3|4, 2", + "1|2|3|4, 3", + "1|2|3|4, 4", + }) + public void binarySearch_evenItemCount(String payload, int key) { + String[] array = payload.split("\\|"); + Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key); + + assertThat(resultArray.isPresent(), equalTo(true)); + assertThat(resultArray.get(), equalTo(String.valueOf(key))); + } + + @ParameterizedTest + @CsvSource({ + "1|3, 0", + "1|3, 2", + "1|3, 4", + }) + public void binarySearch_evenItemCount_notPresent(String payload, int key) { + String[] array = payload.split("\\|"); + Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key); + + assertThat(resultArray.isEmpty(), equalTo(true)); + } + + @ParameterizedTest + @CsvSource({ + "1, 1", + + "1|2|3, 1", + "1|2|3, 2", + "1|2|3, 3", + + "1|2|3|4|5, 1", + "1|2|3|4|5, 2", + "1|2|3|4|5, 3", + "1|2|3|4|5, 4", + "1|2|3|4|5, 5", + }) + public void binarySearch_oddItemCount(String payload, int key) { + String[] array = payload.split("\\|"); + Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key); + + assertThat(resultArray.isPresent(), equalTo(true)); + assertThat(resultArray.get(), equalTo(String.valueOf(key))); + } + + @ParameterizedTest + @CsvSource({ + "1|3|5, 0", + "1|3|5, 2", + "1|3|5, 4", + "1|3|5, 6", + }) + public void binarySearch_oddItemCount_notPresent(String payload, int key) { + String[] array = payload.split("\\|"); + Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key); + + assertThat(resultArray.isEmpty(), equalTo(true)); + } + + @Test + public void binarySearch_emptyCollection() { + Optional resultArray = BinarySearch.binarySearch(new String[0], Integer::parseInt, 1); + assertThat(resultArray.isEmpty(), equalTo(true)); + } +} \ No newline at end of file diff --git a/lirical-exomiser-db-adapter/pom.xml b/lirical-exomiser-db-adapter/pom.xml index 61ee297b5..05de45bb2 100644 --- a/lirical-exomiser-db-adapter/pom.xml +++ b/lirical-exomiser-db-adapter/pom.xml @@ -5,7 +5,7 @@ LIRICAL org.monarchinitiative.lirical - 2.0.0-RC2 + 2.0.0-RC3 4.0.0 @@ -18,12 +18,16 @@ ${project.parent.version} - com.google.protobuf - protobuf-java + org.monarchinitiative.svart + svart + + + de.charite.compbio + jannovar-core com.google.protobuf - protobuf-java-util + protobuf-java com.h2database diff --git a/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java b/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java index ca52223b1..0fbcb3783 100644 --- a/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java +++ b/lirical-exomiser-db-adapter/src/main/java/org/monarchinitiative/lirical/exomiser_db_adapter/ExomiserMvStoreMetadataService.java @@ -4,6 +4,7 @@ import org.h2.mvstore.MVMap; import org.h2.mvstore.MVStore; import org.monarchinitiative.exomiser.core.proto.AlleleProto; +import org.monarchinitiative.lirical.core.model.ClinVarAlleleData; import org.monarchinitiative.lirical.core.model.ClinvarClnSig; import org.monarchinitiative.lirical.core.model.VariantMetadata; import org.monarchinitiative.lirical.core.service.VariantMetadataService; @@ -15,6 +16,8 @@ import org.monarchinitiative.svart.CoordinateSystem; import org.monarchinitiative.svart.GenomicVariant; import org.monarchinitiative.svart.Strand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.nio.file.Path; import java.util.List; @@ -26,6 +29,7 @@ @Deprecated public class ExomiserMvStoreMetadataService implements VariantMetadataService { + private static final Logger LOGGER = LoggerFactory.getLogger(ExomiserMvStoreMetadataService.class); // Note: Repeated retrieval of AlleleProperties from MVMap will hopefully not pose a huge performance issue // since MVMap uses caching (16MB, 16 segments) by default. @@ -63,7 +67,7 @@ public VariantMetadata metadata(GenomicVariant variant, List effe float frequency; float pathogenicity; - ClinvarClnSig clinvarClnSig; + ClinVarAlleleData clinVarAlleleData; if (alleleProp == null) { frequency = DEFAULT_FREQUENCY; @@ -71,7 +75,7 @@ public VariantMetadata metadata(GenomicVariant variant, List effe .map(VariantEffectPathogenicityScore::getPathogenicityScoreOf) .max(Float::compareTo) .orElse(0f); - clinvarClnSig = ClinvarClnSig.NOT_PROVIDED; + clinVarAlleleData = null; } else { FrequencyData frequencyData = AlleleProtoAdaptor.toFrequencyData(alleleProp); frequency = frequencyData.getMaxFreq(); @@ -79,24 +83,35 @@ public VariantMetadata metadata(GenomicVariant variant, List effe PathogenicityData pathogenicityData = AlleleProtoAdaptor.toPathogenicityData(alleleProp); pathogenicity = calculatePathogenicity(effects, pathogenicityData); - clinvarClnSig = processClinicalSignificance(pathogenicityData); + clinVarAlleleData = processClinicalSignificance(pathogenicityData); } - return VariantMetadata.of(frequency, pathogenicity, clinvarClnSig); + + return VariantMetadata.of(frequency, pathogenicity, clinVarAlleleData); } - private static ClinvarClnSig processClinicalSignificance(PathogenicityData pathogenicityData) { + private static ClinVarAlleleData processClinicalSignificance(PathogenicityData pathogenicityData) { ClinvarClnSig clinvarClnSig; ClinVarData cVarData = pathogenicityData.getClinVarData(); - // Only use ClinVar data if it is backed up by assertions. + String alleleId = cVarData.getAlleleId(); + if (cVarData.getReviewStatus().startsWith("no_assertion")) { clinvarClnSig = ClinvarClnSig.NOT_PROVIDED; } else { - ClinVarData.ClinSig primaryInterpretation = cVarData.getPrimaryInterpretation(); - clinvarClnSig = mapToClinvarClnSig(primaryInterpretation); + clinvarClnSig = mapToClinvarClnSig(cVarData.getPrimaryInterpretation()); + } + + if (clinvarClnSig.equals(ClinvarClnSig.NOT_PROVIDED) && alleleId.isBlank()) + return null; // we have no useful data + + try { + long alleleIdd = Long.parseLong(alleleId); + return ClinVarAlleleData.of(clinvarClnSig, alleleIdd); + } catch (NumberFormatException nfe) { + LOGGER.warn("Non-parsable ClinVar allele ID {}, please report to developers.", alleleId); + return ClinVarAlleleData.of(clinvarClnSig, null); } - return clinvarClnSig; } private static ClinvarClnSig mapToClinvarClnSig(ClinVarData.ClinSig primaryInterpretation) { diff --git a/lirical-io/pom.xml b/lirical-io/pom.xml index f4cddfb84..55be9053a 100644 --- a/lirical-io/pom.xml +++ b/lirical-io/pom.xml @@ -5,7 +5,7 @@ LIRICAL org.monarchinitiative.lirical - 2.0.0-RC2 + 2.0.0-RC3 4.0.0 @@ -18,18 +18,37 @@ ${project.parent.version} - org.phenopackets.phenopackettools - phenopacket-tools-io + org.monarchinitiative.svart + svart + + + org.monarchinitiative.phenol + phenol-core org.monarchinitiative.phenol - phenol-io + phenol-annotations com.github.samtools htsjdk - + + org.phenopackets + phenopacket-schema + + + org.phenopackets.phenopackettools + phenopacket-tools-core + + + org.phenopackets.phenopackettools + phenopacket-tools-io + + + org.phenopackets.phenopackettools + phenopacket-tools-util + com.google.protobuf protobuf-java @@ -38,6 +57,18 @@ com.google.protobuf protobuf-java-util + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.datatype + jackson-datatype-jdk8 + + + org.freemarker + freemarker + diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/AnalysisDataFormat.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/AnalysisDataFormat.java deleted file mode 100644 index ee844a251..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/AnalysisDataFormat.java +++ /dev/null @@ -1,9 +0,0 @@ -package org.monarchinitiative.lirical.io.analysis; - -public enum AnalysisDataFormat { - - PHENOPACKET_v1, - PHENOPACKET_v2, - YAML - -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/AnalysisDataParserFactory.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/AnalysisDataParserFactory.java deleted file mode 100644 index 559ca52c2..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/AnalysisDataParserFactory.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.monarchinitiative.lirical.io.analysis; - -import org.monarchinitiative.lirical.core.analysis.AnalysisDataParser; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; - -import java.util.Objects; - -public class AnalysisDataParserFactory { - - private static final PhenopacketImporter V1_PHENOPACKET_IMPORTER = PhenopacketImporters.v1(); - private static final PhenopacketImporter V2_PHENOPACKET_IMPORTER = PhenopacketImporters.v2(); - private final HpoTermSanitizer sanitizer; - private final VariantParserFactory variantParserFactory; - private final HpoAssociationData associationData; - - public AnalysisDataParserFactory(HpoTermSanitizer sanitizer, - VariantParserFactory variantParserFactory, - HpoAssociationData associationData) { - this.sanitizer = Objects.requireNonNull(sanitizer); - this.variantParserFactory = variantParserFactory; // nullable - this.associationData = associationData; // nullable - } - - public AnalysisDataParser forFormat(AnalysisDataFormat format) { - return switch (format) { - case PHENOPACKET_v1 -> new PhenopacketAnalysisDataParser(sanitizer, variantParserFactory, associationData, V1_PHENOPACKET_IMPORTER); - case PHENOPACKET_v2 -> new PhenopacketAnalysisDataParser(sanitizer, variantParserFactory, associationData, V2_PHENOPACKET_IMPORTER); - case YAML -> new YamlAnalysisDataParser(sanitizer, variantParserFactory, associationData); - }; - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/BaseAnalysisDataParser.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/BaseAnalysisDataParser.java deleted file mode 100644 index 141ec2d75..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/BaseAnalysisDataParser.java +++ /dev/null @@ -1,100 +0,0 @@ -package org.monarchinitiative.lirical.io.analysis; - -import org.monarchinitiative.lirical.core.analysis.AnalysisDataParser; -import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.core.model.*; -import org.monarchinitiative.lirical.core.io.VariantParser; -import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.nio.file.Path; -import java.time.Period; -import java.time.format.DateTimeParseException; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Consumer; -import java.util.stream.Collectors; - -abstract class BaseAnalysisDataParser implements AnalysisDataParser { - - private static final Logger LOGGER = LoggerFactory.getLogger(BaseAnalysisDataParser.class); - - private final VariantParserFactory variantParserFactory; - private final HpoAssociationData associationData; - - protected BaseAnalysisDataParser(VariantParserFactory variantParserFactory, HpoAssociationData associationData) { - this.variantParserFactory = variantParserFactory; // nullable - this.associationData = associationData; // nullable - } - - protected GenesAndGenotypes parseGeneToGenotype(String sampleId, Path vcfPath) throws LiricalParseException { - if (vcfPath == null) { - return GenesAndGenotypes.empty(); - } else { - if (variantParserFactory == null || associationData == null) { - LOGGER.warn("Unable to parse VCF at {} since parser or association data is missing", vcfPath.toAbsolutePath()); - return GenesAndGenotypes.empty(); - } else { - try (VariantParser variantParser = variantParserFactory.forPath(vcfPath)) { - // Ensure the VCF file contains the sample - if (!variantParser.sampleNames().contains(sampleId)) - throw new LiricalParseException("The sample " + sampleId + " is not present in VCF at '" + vcfPath.toAbsolutePath() + '\''); - LOGGER.debug("Found sample {} in the VCF file at {}", sampleId, vcfPath.toAbsolutePath()); - - // Read variants - LOGGER.info("Reading variants from {}", vcfPath.toAbsolutePath()); - AtomicInteger counter = new AtomicInteger(); - List variants = variantParser.variantStream() - .peek(logProgress(counter)) - .toList(); - LOGGER.info("Read {} variants", variants.size()); - - // Group variants by gene symbol. It would be better to group the variants by e.g. Entrez ID, - // but the ID is not available from TranscriptAnnotation - Map> gene2Genotype = new HashMap<>(); - for (LiricalVariant variant : variants) { - variant.annotations().stream() - .map(TranscriptAnnotation::getGeneId) - .distinct() - .forEach(geneId -> gene2Genotype.computeIfAbsent(geneId, e -> new LinkedList<>()).add(variant)); - } - - // Collect the variants into Gene2Genotype container - List g2g = gene2Genotype.entrySet().stream() - .map(e -> Gene2Genotype.of(e.getKey(), e.getValue())) - .toList(); - - return GenesAndGenotypes.of(g2g); - } catch (Exception e) { - throw new LiricalParseException(e); - } - } - } - } - - private static Consumer logProgress(AtomicInteger counter) { - return v -> { - int current = counter.incrementAndGet(); - if (current % 5000 == 0) - LOGGER.info("Read {} variants", current); - }; - } - - protected static Age parseAge(String age) { - if (age == null) { - LOGGER.debug("The age was not provided"); - return Age.ageNotKnown(); - } - try { - Period period = Period.parse(age); - LOGGER.info("Using age {}", period); - return Age.parse(period); - } catch (DateTimeParseException e) { - LOGGER.warn("Unable to parse age '{}': {}", age, e.getMessage()); - return Age.ageNotKnown(); - } - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketAnalysisDataParser.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketAnalysisDataParser.java deleted file mode 100644 index 7156b9ac9..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketAnalysisDataParser.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.monarchinitiative.lirical.io.analysis; - -import org.monarchinitiative.lirical.core.analysis.AnalysisData; -import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; - -import java.io.InputStream; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; - -class PhenopacketAnalysisDataParser extends SanitizingAnalysisDataParser { - - private final PhenopacketImporter importer; - protected PhenopacketAnalysisDataParser(HpoTermSanitizer sanitizer, - VariantParserFactory variantParserFactory, - HpoAssociationData associationData, PhenopacketImporter importer) { - super(sanitizer, variantParserFactory, associationData); - this.importer = Objects.requireNonNull(importer); - } - - - @Override - public AnalysisData parse(InputStream is) throws LiricalParseException { - PhenopacketData data = importer.read(is); - - GenesAndGenotypes genes = parseGeneToGenotype(data.getSampleId(), data.getVcfPath().orElse(null)); - - return AnalysisData.of(data.getSampleId(), - data.getAge().orElse(null), - data.getSex().orElse(null), - data.getHpoTerms().map(this::sanitize).flatMap(Optional::stream).collect(Collectors.toList()), - data.getNegatedHpoTerms().map(this::sanitize).flatMap(Optional::stream).collect(Collectors.toList()), - genes); - } - -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketData.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketData.java index b80f22ff9..2d3a12ced 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketData.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketData.java @@ -1,37 +1,41 @@ package org.monarchinitiative.lirical.io.analysis; import org.monarchinitiative.lirical.core.model.Age; -import org.monarchinitiative.lirical.core.model.GenotypedVariant; import org.monarchinitiative.lirical.core.model.Sex; +import org.monarchinitiative.lirical.core.model.GenotypedVariant; +import org.monarchinitiative.lirical.core.sanitize.SanitationInputs; import org.monarchinitiative.phenol.ontology.data.TermId; -import java.nio.file.Path; +import java.time.Period; import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.stream.Stream; -public class PhenopacketData { +/** + * Phenopacket attributes that are relevant for LIRICAL. + */ +public class PhenopacketData implements SanitationInputs { private final String genomeAssembly; private final String sampleId; - private final List hpoTerms; - private final List negatedHpoTerms; + private final List hpoTerms; + private final List negatedHpoTerms; private final List variants; - private final Age age; - private final Sex sex; + private final String age; + private final String sex; private final List diseaseIds; - private final Path vcfPath; + private final String vcfPath; PhenopacketData(String genomeAssembly, String sampleId, - List hpoTerms, - List negatedHpoTerms, - Age age, - Sex sex, + List hpoTerms, + List negatedHpoTerms, + String age, + String sex, List diseaseIds, List variants, - Path vcfPath) { + String vcfPath) { this.genomeAssembly = genomeAssembly; this.sampleId = Objects.requireNonNull(sampleId); this.hpoTerms = Objects.requireNonNull(hpoTerms); @@ -43,39 +47,103 @@ public class PhenopacketData { this.vcfPath = vcfPath; } - public Optional getGenomeAssembly() { - return Optional.ofNullable(genomeAssembly); + @Override + public String sampleId() { + return sampleId; } - public String getSampleId() { - return sampleId; + @Override + public List presentHpoTerms() { + return hpoTerms; } - public Stream getHpoTerms() { - return hpoTerms.stream(); + public Stream presentHpoTermIds() { + return hpoTerms.stream().map(TermId::of); } - public Stream getNegatedHpoTerms() { - return negatedHpoTerms.stream(); + @Override + public List excludedHpoTerms() { + return negatedHpoTerms; } - public Iterable getVariants() { - return variants; + public Stream excludedHpoTermIds() { + return negatedHpoTerms.stream().map(TermId::of); + } + + @Override + public String age() { + return age; + } + + /** + * Try to parse the age and return an empty optional if the parsing fails. + */ + public Optional parseAge() { + try { + return Optional.of(Age.parse(Period.parse(age))); + } catch (IllegalArgumentException | NullPointerException e) { + return Optional.empty(); + } + } + + @Override + public String sex() { + return sex; + } + + /** + * Try to parse the sex and return an empty optional if the parsing fails. + */ + public Optional parseSex() { + try { + return Optional.of(Sex.valueOf(sex.toUpperCase())); + } catch (IllegalArgumentException | NullPointerException e) { + return Optional.empty(); + } } - public Optional getAge() { - return Optional.ofNullable(age); + @Override + public String vcf() { + return vcfPath; } - public Optional getSex() { - return Optional.ofNullable(sex); + public Optional genomeAssembly() { + return Optional.ofNullable(genomeAssembly); } - public List getDiseaseIds() { + public Iterable variants() { + return variants; + } + + public List diseaseIds() { return diseaseIds; } - public Optional getVcfPath() { - return Optional.ofNullable(vcfPath); + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PhenopacketData that = (PhenopacketData) o; + return Objects.equals(genomeAssembly, that.genomeAssembly) && Objects.equals(sampleId, that.sampleId) && Objects.equals(hpoTerms, that.hpoTerms) && Objects.equals(negatedHpoTerms, that.negatedHpoTerms) && Objects.equals(variants, that.variants) && Objects.equals(age, that.age) && Objects.equals(sex, that.sex) && Objects.equals(diseaseIds, that.diseaseIds) && Objects.equals(vcfPath, that.vcfPath); + } + + @Override + public int hashCode() { + return Objects.hash(genomeAssembly, sampleId, hpoTerms, negatedHpoTerms, variants, age, sex, diseaseIds, vcfPath); + } + + @Override + public String toString() { + return "PhenopacketData{" + + "genomeAssembly='" + genomeAssembly + '\'' + + ", sampleId='" + sampleId + '\'' + + ", hpoTerms=" + hpoTerms + + ", negatedHpoTerms=" + negatedHpoTerms + + ", variants=" + variants + + ", age='" + age + '\'' + + ", sex='" + sex + '\'' + + ", diseaseIds=" + diseaseIds + + ", vcfPath='" + vcfPath + '\'' + + '}'; } } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporter.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporter.java index 8a2b85bf1..4dbc425b0 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporter.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporter.java @@ -2,6 +2,9 @@ import java.io.InputStream; +/** + * Decode phenopacket data from an input stream. + */ public interface PhenopacketImporter { PhenopacketData read(InputStream is) throws PhenopacketImportException; diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporters.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporters.java index e799665d7..c9521f64e 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporters.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketImporters.java @@ -1,5 +1,8 @@ package org.monarchinitiative.lirical.io.analysis; +/** + * Get {@link PhenopacketImporter}s to ingest v1 or v2 phenopackets. + */ public class PhenopacketImporters { private PhenopacketImporters() { diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV1Importer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV1Importer.java index e099cc2f4..72c449b25 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV1Importer.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV1Importer.java @@ -1,11 +1,6 @@ package org.monarchinitiative.lirical.io.analysis; -import com.google.protobuf.util.JsonFormat; - -import org.monarchinitiative.lirical.core.model.Age; -import org.monarchinitiative.lirical.core.model.AlleleCount; -import org.monarchinitiative.lirical.core.model.GenomeBuild; -import org.monarchinitiative.lirical.core.model.GenotypedVariant; +import org.monarchinitiative.lirical.core.model.*; import org.monarchinitiative.phenol.ontology.data.TermId; import org.monarchinitiative.svart.Contig; @@ -16,13 +11,11 @@ import org.monarchinitiative.svart.util.VcfConverter; import org.phenopackets.schema.v1.Phenopacket; import org.phenopackets.schema.v1.core.*; +import org.phenopackets.schema.v1.core.Sex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.InputStream; -import java.nio.file.Path; -import java.time.Period; -import java.time.format.DateTimeParseException; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -58,30 +51,24 @@ public PhenopacketData read(InputStream is) throws PhenopacketImportException { Individual subject = phenopacket.getSubject(); String sampleId = subject.getId(); - List observedTerms = phenopacket.getPhenotypicFeaturesList().stream() + List observedTerms = phenopacket.getPhenotypicFeaturesList().stream() .filter(pf -> !pf.getNegated()) .map(PhenotypicFeature::getType) .map(OntologyClass::getId) - .map(AnalysisIoUtils::createTermId) - .flatMap(Optional::stream) - .distinct() .toList(); - List negatedTerms = phenopacket.getPhenotypicFeaturesList().stream() + List negatedTerms = phenopacket.getPhenotypicFeaturesList().stream() .filter(PhenotypicFeature::getNegated) .map(PhenotypicFeature::getType) .map(OntologyClass::getId) - .map(AnalysisIoUtils::createTermId) - .flatMap(Optional::stream) - .distinct() .toList(); - org.monarchinitiative.lirical.core.model.Age age = subject.getAgeCase().equals(Individual.AgeCase.AGE_AT_COLLECTION) + String age = subject.getAgeCase().equals(Individual.AgeCase.AGE_AT_COLLECTION) ? mapToAge(subject.getAgeAtCollection()) : null; - org.monarchinitiative.lirical.core.model.Sex sex = toSex(subject.getSex()); + String sex = toSex(subject.getSex()); Optional firstVcf = phenopacket.getHtsFilesList().stream() @@ -104,8 +91,7 @@ public PhenopacketData read(InputStream is) throws PhenopacketImportException { .flatMap(Optional::stream) .toList(); - Path vcfPath = firstVcf.flatMap(hts -> SanitizingAnalysisDataParser.toUri(hts.getUri())) - .map(Path::of) + String vcfPath = firstVcf.map(HtsFile::getUri) .orElse(null); return new PhenopacketData(genomeBuild, @@ -168,7 +154,7 @@ private Function> toGenotypedVariant(String return Optional.empty(); } } - Map alleles = Map.of(sampleId, alleleCount); + List alleles = List.of(SampleAlleleCount.of(sampleId, alleleCount)); // 3 - assemble the final variant. Uff.. return Optional.of(GenotypedVariant.of(genomeBuild, @@ -192,21 +178,15 @@ private static Map parseVcfInfoFields(String info) { } } - private static Age mapToAge(org.phenopackets.schema.v1.core.Age age) { - try { - Period iso8601 = Period.parse(age.getAge()); - return Age.of(iso8601.getYears(), iso8601.getMonths(), iso8601.getDays()); - } catch (DateTimeParseException e) { - LOGGER.warn("Ignoring unparasble age {}", age.getAge()); - return null; - } + private static String mapToAge(org.phenopackets.schema.v1.core.Age age) { + return age.getAge(); } - private static org.monarchinitiative.lirical.core.model.Sex toSex(Sex sex) { + private static String toSex(Sex sex) { return switch (sex) { - case FEMALE -> org.monarchinitiative.lirical.core.model.Sex.FEMALE; - case MALE -> org.monarchinitiative.lirical.core.model.Sex.MALE; - case UNKNOWN_SEX, OTHER_SEX, UNRECOGNIZED -> org.monarchinitiative.lirical.core.model.Sex.UNKNOWN; + case FEMALE -> org.monarchinitiative.lirical.core.model.Sex.FEMALE.name(); + case MALE -> org.monarchinitiative.lirical.core.model.Sex.MALE.name(); + case UNKNOWN_SEX, OTHER_SEX, UNRECOGNIZED -> org.monarchinitiative.lirical.core.model.Sex.UNKNOWN.name(); }; } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV2Importer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV2Importer.java index 0004c0bbe..4fb0cbdf7 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV2Importer.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/PhenopacketV2Importer.java @@ -1,6 +1,5 @@ package org.monarchinitiative.lirical.io.analysis; -import org.monarchinitiative.lirical.core.model.Age; import org.monarchinitiative.lirical.core.model.Sex; import org.monarchinitiative.lirical.core.model.GenotypedVariant; import org.monarchinitiative.phenol.ontology.data.TermId; @@ -10,8 +9,7 @@ import org.slf4j.LoggerFactory; import java.io.InputStream; -import java.nio.file.Path; -import java.time.Period; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; @@ -34,38 +32,39 @@ public PhenopacketData read(InputStream is) throws PhenopacketImportException { String sampleId = subject.getId(); // Phenotype terms - List presentTerms = phenopacket.getPhenotypicFeaturesList().stream() + List presentTerms = phenopacket.getPhenotypicFeaturesList().stream() .filter(pf -> !pf.getExcluded()) .map(PhenotypicFeature::getType) .map(OntologyClass::getId) - .map(AnalysisIoUtils::createTermId) - .flatMap(Optional::stream) - .distinct() .toList(); - List negatedHpoTerms = phenopacket.getPhenotypicFeaturesList().stream() + List negatedHpoTerms = phenopacket.getPhenotypicFeaturesList().stream() .filter(PhenotypicFeature::getExcluded) .map(PhenotypicFeature::getType) .map(OntologyClass::getId) - .map(AnalysisIoUtils::createTermId) - .flatMap(Optional::stream) - .distinct() .toList(); // Age - Age age = parseAge(subject.getTimeAtLastEncounter(), subject.getId()); + String age = parseAge(subject.getTimeAtLastEncounter(), subject.getId()); // Sex - org.monarchinitiative.lirical.core.model.Sex sex = toSex(subject.getSex()); + String sex = toSex(subject.getSex()); // Disease IDs - List diseaseIds = phenopacket.getDiseasesList().stream() - .map(Disease::getTerm) - .map(OntologyClass::getId) - .map(AnalysisIoUtils::createTermId) - .flatMap(Optional::stream) - .distinct() - .toList(); + List diseaseIds = new ArrayList<>(); + for (Interpretation interp : phenopacket.getInterpretationsList()) { + AnalysisIoUtils.createTermId(interp.getDiagnosis().getDisease().getId()) + .ifPresent(diseaseIds::add); + } + if (diseaseIds.isEmpty()) { + diseaseIds = phenopacket.getDiseasesList().stream() + .map(Disease::getTerm) + .map(OntologyClass::getId) + .map(AnalysisIoUtils::createTermId) + .flatMap(Optional::stream) + .distinct() + .toList(); + } // Variants List variants = List.of(); // TODO - implement real variant parsing. @@ -81,8 +80,7 @@ public PhenopacketData read(InputStream is) throws PhenopacketImportException { Optional firstVcf = phenopacket.getFilesList().stream() .filter(file -> "vcf".equalsIgnoreCase(file.getFileAttributesOrDefault("fileFormat", ""))) .findFirst(); - Path firstVcfPath = firstVcf.flatMap(file -> SanitizingAnalysisDataParser.toUri(file.getUri())) - .map(Path::of) + String firstVcfPath = firstVcf.map(File::getUri) .orElse(null); String genomeAssembly = firstVcf.map(f -> f.getFileAttributesOrDefault("genomeAssembly", null)) .orElse(null); @@ -98,33 +96,33 @@ public PhenopacketData read(InputStream is) throws PhenopacketImportException { firstVcfPath); } - private static Sex toSex(org.phenopackets.schema.v2.core.Sex sex) { + private static String toSex(org.phenopackets.schema.v2.core.Sex sex) { return switch (sex) { - case FEMALE -> Sex.FEMALE; - case MALE -> Sex.MALE; - case UNKNOWN_SEX, OTHER_SEX, UNRECOGNIZED -> Sex.UNKNOWN; + case FEMALE -> Sex.FEMALE.name(); + case MALE -> Sex.MALE.name(); + case UNKNOWN_SEX, OTHER_SEX, UNRECOGNIZED -> Sex.UNKNOWN.name(); }; } - private static Age parseAge(TimeElement timeAtLastEncounter, String subjectId) { + private static String parseAge(TimeElement timeAtLastEncounter, String subjectId) { return switch (timeAtLastEncounter.getElementCase()) { case GESTATIONAL_AGE -> { GestationalAge ga = timeAtLastEncounter.getGestationalAge(); LOGGER.debug("Parsing gestational age {}w {}d of subject {}", ga.getWeeks(), ga.getDays(), subjectId); - yield Age.gestationalAge(ga.getWeeks(), ga.getDays()); + yield "P%dW%dD".formatted(ga.getWeeks(), ga.getDays()); } case AGE -> { org.phenopackets.schema.v2.core.Age a = timeAtLastEncounter.getAge(); LOGGER.debug("Parsing age {} of subject {}", a.getIso8601Duration(), subjectId); - yield Age.parse(Period.parse(a.getIso8601Duration())); + yield a.getIso8601Duration(); } case AGE_RANGE, ONTOLOGY_CLASS, TIMESTAMP, INTERVAL -> { LOGGER.warn("Ignoring unsupported age format {} for subject {}", timeAtLastEncounter.getElementCase(), subjectId); - yield Age.ageNotKnown(); + yield null; } case ELEMENT_NOT_SET -> { LOGGER.warn("Time at last encounter was not set for subject {}", subjectId); - yield Age.ageNotKnown(); + yield null; } }; } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/SanitizingAnalysisDataParser.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/SanitizingAnalysisDataParser.java deleted file mode 100644 index c464cc09e..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/SanitizingAnalysisDataParser.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.monarchinitiative.lirical.io.analysis; - -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; -import org.monarchinitiative.phenol.base.PhenolRuntimeException; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.*; - -abstract class SanitizingAnalysisDataParser extends BaseAnalysisDataParser { - - private static final Logger LOGGER = LoggerFactory.getLogger(SanitizingAnalysisDataParser.class); - - protected final HpoTermSanitizer sanitizer; - - protected SanitizingAnalysisDataParser(HpoTermSanitizer sanitizer, - VariantParserFactory variantParserFactory, - HpoAssociationData associationData) { - super(variantParserFactory, associationData); - this.sanitizer = Objects.requireNonNull(sanitizer); - } - - protected Optional toTermId(String payload) { - try { - return sanitize(TermId.of(payload)); - } catch (PhenolRuntimeException e) { - LOGGER.warn("Skipping non-parsable term {}", payload); - return Optional.empty(); - } - } - - protected Optional sanitize(TermId termId) { - return sanitizer.replaceIfObsolete(termId); - } - - protected static Optional toUri(String uri) { - try { - return Optional.of(new URI(uri)); - } catch (URISyntaxException e) { - LOGGER.warn("Invalid URI {}: {}", uri, e.getMessage()); - return Optional.empty(); - } - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlAnalysisDataParser.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlAnalysisDataParser.java deleted file mode 100644 index 869d122bf..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/YamlAnalysisDataParser.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.monarchinitiative.lirical.io.analysis; - -import org.monarchinitiative.lirical.core.analysis.AnalysisData; -import org.monarchinitiative.lirical.core.analysis.LiricalParseException; -import org.monarchinitiative.lirical.core.model.Age; -import org.monarchinitiative.lirical.core.model.GenesAndGenotypes; -import org.monarchinitiative.lirical.core.model.Sex; -import org.monarchinitiative.lirical.core.service.HpoTermSanitizer; -import org.monarchinitiative.lirical.core.io.VariantParserFactory; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.Optional; - -class YamlAnalysisDataParser extends SanitizingAnalysisDataParser { - - private static final Logger LOGGER = LoggerFactory.getLogger(YamlAnalysisDataParser.class); - - YamlAnalysisDataParser(HpoTermSanitizer sanitizer, - VariantParserFactory variantParserFactory, - HpoAssociationData associationData) { - super(sanitizer, variantParserFactory, associationData); - } - - @Override - public AnalysisData parse(InputStream is) throws LiricalParseException { - YamlConfig config; - try { - config = YamlParser.parse(is); - } catch (IOException e) { - throw new LiricalParseException(e); - } - - String sampleId = config.getSampleId(); - Age age = parseAge(config.age()); - Sex sex = parseSex(config.sex()); - - List presentTerms = config.getHpoIds().stream() - .map(this::toTermId) - .flatMap(Optional::stream) - .distinct() - .toList(); - List absentTerms = config.getNegatedHpoIds().stream() - .map(this::toTermId) - .flatMap(Optional::stream) - .distinct() - .toList(); - - GenesAndGenotypes genes = parseGeneToGenotype(sampleId, config.vcfPath().orElse(null)); - - return AnalysisData.of(sampleId, age, sex, presentTerms, absentTerms, genes); - } - - - private static Sex parseSex(String sex) { - return switch (sex.toLowerCase()) { - case "male" -> Sex.MALE; - case "female" -> Sex.FEMALE; - default -> { - LOGGER.warn("Unknown sex {}", sex); - yield Sex.UNKNOWN; - } - }; - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/package-info.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/package-info.java new file mode 100644 index 000000000..4bbbb0ff2 --- /dev/null +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/analysis/package-info.java @@ -0,0 +1,4 @@ +/** + * Read user input formatted as a v1 or v2 phenopacket. + */ +package org.monarchinitiative.lirical.io.analysis; \ No newline at end of file diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/BaseDifferential.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/BaseDifferential.java similarity index 99% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/BaseDifferential.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/BaseDifferential.java index 0747e5490..48844aa96 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/BaseDifferential.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/BaseDifferential.java @@ -1,7 +1,7 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; -import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.lirical.core.analysis.TestResult; +import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.ontology.data.TermId; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/DifferentialDiagnosis.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/DifferentialDiagnosis.java similarity index 98% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/DifferentialDiagnosis.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/DifferentialDiagnosis.java index ff0f2f649..60daec779 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/DifferentialDiagnosis.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/DifferentialDiagnosis.java @@ -1,9 +1,8 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import org.monarchinitiative.lirical.core.analysis.TestResult; import org.monarchinitiative.phenol.ontology.data.TermId; - import java.util.List; import java.util.Objects; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/HtmlTemplate.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/HtmlTemplate.java similarity index 94% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/HtmlTemplate.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/HtmlTemplate.java index 9e728edca..faaf94390 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/HtmlTemplate.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/HtmlTemplate.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import freemarker.template.Template; import freemarker.template.TemplateException; @@ -6,11 +6,12 @@ import org.monarchinitiative.lirical.core.analysis.AnalysisResults; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.lirical.core.model.Gene2Genotype; -import org.monarchinitiative.lirical.core.output.svg.Lr2Svg; +import org.monarchinitiative.lirical.core.output.*; +import org.monarchinitiative.lirical.io.output.svg.Lr2Svg; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -20,7 +21,6 @@ import java.nio.file.Files; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; import java.util.stream.Stream; /** @@ -52,7 +52,7 @@ public class HtmlTemplate extends LiricalTemplate { /** * Constructor to initialize the data that will be needed to output an HTML page. */ - HtmlTemplate(Ontology hpo, + HtmlTemplate(MinimalOntology hpo, HpoDiseases diseases, AnalysisData analysisData, AnalysisResults analysisResults, @@ -72,11 +72,7 @@ public class HtmlTemplate extends LiricalTemplate { int N = totalDetailedDiagnosesToShow(analysisResults); List sparklinePackets = SparklinePacket.sparklineFactory(analysisResults, diseases, hpo, N); this.templateData.put("sparkline", sparklinePackets); - this.templateData.put("hasGenotypes", "true"); - if (symbolsWithoutGeneIds == null || symbolsWithoutGeneIds.isEmpty()) { - this.templateData.put("hasGeneSymbolsWithoutIds", "false"); - } else { - this.templateData.put("hasGeneSymbolsWithoutIds", "true"); + if (symbolsWithoutGeneIds != null && !symbolsWithoutGeneIds.isEmpty()) { this.templateData.put("geneSymbolsWithoutIds", symbolsWithoutGeneIds); } Map diseaseById = diseases.diseaseById(); @@ -154,7 +150,7 @@ private static String createGenotypeExplanation(GenotypeLrWithExplanation genoty return genotypeLr.explanation(); } } else { - return "No known disease gene"; + return null; } } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/ImprobableDifferential.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/ImprobableDifferential.java similarity index 97% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/ImprobableDifferential.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/ImprobableDifferential.java index 4db28c816..38773795f 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/ImprobableDifferential.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/ImprobableDifferential.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import java.text.DecimalFormat; diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/JsonAnalysisResultWriter.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/JsonAnalysisResultWriter.java index a09a6aa94..d0ebbdf6b 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/JsonAnalysisResultWriter.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/JsonAnalysisResultWriter.java @@ -1,16 +1,14 @@ package org.monarchinitiative.lirical.io.output; import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; -import com.fasterxml.jackson.databind.module.SimpleModule; +import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.AnalysisResults; import org.monarchinitiative.lirical.core.output.AnalysisResultsMetadata; import org.monarchinitiative.lirical.core.output.AnalysisResultsWriter; import org.monarchinitiative.lirical.core.output.OutputOptions; -import org.monarchinitiative.lirical.io.output.serialize.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,8 +21,6 @@ public class JsonAnalysisResultWriter implements AnalysisResultsWriter { private static final Logger LOGGER = LoggerFactory.getLogger(JsonAnalysisResultWriter.class); - - private static final Version VERSION = new Version(0, 1, 0, null, null, null); private static final JsonAnalysisResultWriter INSTANCE = new JsonAnalysisResultWriter(); private final ObjectMapper objectMapper; @@ -35,7 +31,7 @@ public static JsonAnalysisResultWriter of() { private JsonAnalysisResultWriter() { objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); - objectMapper.registerModule(prepareModule()); + objectMapper.registerModule(new Jdk8Module()); } @Override @@ -59,20 +55,4 @@ public void process(AnalysisData analysisData, } } - private static SimpleModule prepareModule() { - SimpleModule module = new SimpleModule("JsonAnalysisResultSerializer", VERSION); - - // serializers - module.addSerializer(new AnalysisResultsMetadataSerializer()); - module.addSerializer(new AnalysisDataSerializer()); - - module.addSerializer(new AnalysisResultsSerializer()); - module.addSerializer(new TestResultSerializer()); - module.addSerializer(new LrWithExplanationSerializer()); - module.addSerializer(new GenotypeLrWithExplanationSerializer()); - module.addSerializer(new GeneIdentifierSerializer()); - - return module; - } - } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/LiricalTemplate.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/LiricalTemplate.java similarity index 79% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/LiricalTemplate.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/LiricalTemplate.java index f6dbbcb49..094364ec8 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/LiricalTemplate.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/LiricalTemplate.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import freemarker.template.Configuration; import freemarker.template.Version; @@ -6,11 +6,11 @@ import org.monarchinitiative.lirical.core.exception.LiricalRuntimeException; import org.monarchinitiative.lirical.core.model.Gene2Genotype; import org.monarchinitiative.lirical.core.model.LiricalVariant; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.lirical.core.output.AnalysisResultsMetadata; +import org.monarchinitiative.lirical.core.output.OutputOptions; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; @@ -25,7 +25,6 @@ * @author Peter Robinson */ public abstract class LiricalTemplate { - private static final Logger logger = LoggerFactory.getLogger(LiricalTemplate.class); private final AnalysisData analysisData; /** Map of data that will be used for the FreeMark template. */ @@ -44,7 +43,7 @@ public abstract class LiricalTemplate { protected List topDiagnosisAnchors; protected final Map geneById; - protected LiricalTemplate(Ontology hpo, + protected LiricalTemplate(MinimalOntology hpo, AnalysisData analysisData, AnalysisResultsMetadata resultsMetadata, OutputOptions outputOptions) { @@ -62,28 +61,30 @@ protected LiricalTemplate(Ontology hpo, protected abstract String outputFormatString(); private void initTemplateData(AnalysisData analysisData, - Ontology ontology, + MinimalOntology ontology, AnalysisResultsMetadata resultsMetadata) { templateData.put("resultsMeta", resultsMetadata); List observedHPOs = new ArrayList<>(); for (TermId id:analysisData.presentPhenotypeTerms()) { - Term term = ontology.getTermMap().get(id); - String tstr = String.format("%s %s",term.getName(),id.getValue(),id.getValue()); + String termName = ontology.termForTermId(id) + .map(Term::getName) + .orElse("UNKNOWN"); + String tstr = String.format("%s %s",termName,id.getValue(),id.getValue()); observedHPOs.add(tstr); } - this.templateData.put("observedHPOs",observedHPOs); + templateData.put("observedHPOs",observedHPOs); List excludedHpos = new ArrayList<>(); for (TermId id:analysisData.negatedPhenotypeTerms()) { - Term term = ontology.getTermMap().get(id); - String tstr = String.format("%s %s",term.getName(),id.getValue(),id.getValue()); + String termName = ontology.termForTermId(id) + .map(Term::getName) + .orElse("UNKNOWN"); + String tstr = String.format("%s %s",termName,id.getValue(),id.getValue()); excludedHpos.add(tstr); } - this.templateData.put("excludedHPOs",excludedHpos); - // This is a flag for the output to only show the list if there are some phenotypes that were excluded in the - // proband. - if (excludedHpos.size()>0) { - this.templateData.put("hasExcludedHPOs","true"); - } + templateData.put("excludedHPOs",excludedHpos); + // Indicates that LIRICAL was run without a VCF file. + templateData.put("phenotypeOnly", analysisData.genes().size() == 0); + } /** Some of our name strings contain multiple synonyms. This function removes all but the first.*/ @@ -95,8 +96,6 @@ protected String shortName(String name) { return name; } - public Path getOutPath() { return outputPath;} - protected static Path createOutputFile(Path outdir, String prefix, String format) { if (!Files.isDirectory(outdir)) mkdirIfNotExist(outdir); @@ -120,7 +119,7 @@ protected static void mkdirIfNotExist(Path dir) { } protected Function toVisualizableVariant() { - return lv -> new VisualizableVariantDefault(analysisData.sampleId(), lv, isPassingPathogenicThreshold(lv)); + return lv -> VisualizableVariant.of(analysisData.sampleId(), lv, isPassingPathogenicThreshold(lv)); } private boolean isPassingPathogenicThreshold(LiricalVariant lv) { diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/SparklinePacket.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/SparklinePacket.java similarity index 95% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/SparklinePacket.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/SparklinePacket.java index 1fd0e4639..92b549edd 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/SparklinePacket.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/SparklinePacket.java @@ -1,14 +1,14 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; -import org.monarchinitiative.lirical.core.output.svg.Sparkline2Svg; import org.monarchinitiative.lirical.core.analysis.AnalysisResults; -import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.lirical.core.analysis.TestResult; +import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; +import org.monarchinitiative.lirical.io.output.svg.Sparkline2Svg; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.TermId; import java.util.ArrayList; @@ -40,7 +40,7 @@ public class SparklinePacket { */ public static List sparklineFactory(AnalysisResults results, HpoDiseases diseases, - Ontology ontology, + MinimalOntology ontology, int N) { if (results.isEmpty()) return List.of(); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TemplateBasedAnalysisResultsWriter.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TemplateBasedAnalysisResultsWriter.java similarity index 74% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TemplateBasedAnalysisResultsWriter.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TemplateBasedAnalysisResultsWriter.java index e3b08d1ea..2282e6eca 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TemplateBasedAnalysisResultsWriter.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TemplateBasedAnalysisResultsWriter.java @@ -1,9 +1,13 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.AnalysisResults; +import org.monarchinitiative.lirical.core.output.AnalysisResultsMetadata; +import org.monarchinitiative.lirical.core.output.AnalysisResultsWriter; +import org.monarchinitiative.lirical.core.output.OutputFormat; +import org.monarchinitiative.lirical.core.output.OutputOptions; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import java.util.List; import java.util.Objects; @@ -13,10 +17,10 @@ public class TemplateBasedAnalysisResultsWriter implements AnalysisResultsWriter { private final OutputFormat format; - private final Ontology hpo; + private final MinimalOntology hpo; private final HpoDiseases diseases; - public TemplateBasedAnalysisResultsWriter(OutputFormat format, Ontology hpo, HpoDiseases diseases) { + public TemplateBasedAnalysisResultsWriter(OutputFormat format, MinimalOntology hpo, HpoDiseases diseases) { this.format = Objects.requireNonNull(format); this.hpo = Objects.requireNonNull(hpo); this.diseases = Objects.requireNonNull(diseases); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TsvDifferential.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TsvDifferential.java similarity index 97% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TsvDifferential.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TsvDifferential.java index ebb97404c..1263cce4f 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TsvDifferential.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TsvDifferential.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import org.monarchinitiative.lirical.core.analysis.TestResult; import org.monarchinitiative.lirical.core.model.TranscriptAnnotation; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TsvTemplate.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TsvTemplate.java similarity index 88% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TsvTemplate.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TsvTemplate.java index fa8cfb8c8..bd18cef51 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/TsvTemplate.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/TsvTemplate.java @@ -1,13 +1,15 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import freemarker.template.Template; import freemarker.template.TemplateException; import org.monarchinitiative.lirical.core.analysis.AnalysisData; import org.monarchinitiative.lirical.core.analysis.AnalysisResults; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; +import org.monarchinitiative.lirical.core.output.AnalysisResultsMetadata; +import org.monarchinitiative.lirical.core.output.OutputOptions; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,10 +31,7 @@ public class TsvTemplate extends LiricalTemplate { private static final Logger logger = LoggerFactory.getLogger(TsvTemplate.class); - private static final String[] tsvHeader = {"rank", "diseaseName", "diseaseCurie", "pretestprob", "posttestprob", - "compositeLR", "entrezGeneId", "variants"}; - - TsvTemplate(Ontology hpo, + TsvTemplate(MinimalOntology hpo, HpoDiseases diseases, AnalysisData analysisData, AnalysisResults analysisResults, @@ -40,7 +39,6 @@ public class TsvTemplate extends LiricalTemplate { OutputOptions outputOptions) { super(hpo, analysisData, resultsMetadata, outputOptions); cfg.setClassForTemplateLoading(TsvTemplate.class, ""); - templateData.put("header", String.join("\t", tsvHeader)); AtomicInteger rank = new AtomicInteger(); Map diseaseById = diseases.diseaseById(); List diff = new ArrayList<>(); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariant.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/VisualizableVariant.java similarity index 64% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariant.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/VisualizableVariant.java index 841b58a01..c7364b00f 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariant.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/VisualizableVariant.java @@ -1,5 +1,6 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; +import org.monarchinitiative.lirical.core.model.LiricalVariant; import org.monarchinitiative.lirical.core.model.TranscriptAnnotation; import java.util.List; @@ -10,6 +11,10 @@ public interface VisualizableVariant { // Note: the interface MUST be public, otherwise FreeMarker will not work. + static VisualizableVariant of(String sampleId, LiricalVariant lv, boolean isPassingPathogenicityThreshold) { + return new VisualizableVariantDefault(sampleId, lv, isPassingPathogenicityThreshold); + } + String contigName(); int pos(); diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/VisualizableVariantDefault.java similarity index 92% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/VisualizableVariantDefault.java index fc724ff91..30471201e 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/VisualizableVariantDefault.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/VisualizableVariantDefault.java @@ -1,7 +1,6 @@ -package org.monarchinitiative.lirical.core.output; +package org.monarchinitiative.lirical.io.output; import org.monarchinitiative.lirical.core.model.*; -import org.monarchinitiative.lirical.core.service.VariantMetadataService; import org.monarchinitiative.svart.CoordinateSystem; import org.monarchinitiative.svart.GenomicVariant; import org.monarchinitiative.svart.Strand; @@ -115,10 +114,11 @@ private static String genotypeFromAlleleCount(AlleleCount ac) { @Override public String getClinvar() { - ClinvarClnSig clnsig = variant.clinvarClnSig(); - return (clnsig.equals(ClinvarClnSig.NOT_PROVIDED)) - ? "n/a" - : clnsig.toString(); + return variant.clinVarAlleleData() + .map(ClinVarAlleleData::getClinvarClnSig) + .filter(cv -> !cv.equals(ClinvarClnSig.NOT_PROVIDED)) + .map(Object::toString) + .orElse("n/a"); } @Override diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/package-info.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/package-info.java new file mode 100644 index 000000000..5bbfe8f8f --- /dev/null +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/package-info.java @@ -0,0 +1,4 @@ +/** + * Classes for writing out LIRICAL analysis results into HTML, TSV, and JSON files. + */ +package org.monarchinitiative.lirical.io.output; \ No newline at end of file diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisDataSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisDataSerializer.java deleted file mode 100644 index 9814ce15e..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisDataSerializer.java +++ /dev/null @@ -1,48 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.lirical.core.analysis.AnalysisData; -import org.monarchinitiative.lirical.core.model.Age; -import org.monarchinitiative.phenol.ontology.data.TermId; - -import java.io.IOException; -import java.time.Period; - -public class AnalysisDataSerializer extends StdSerializer { - - public AnalysisDataSerializer() { - this(AnalysisData.class); - } - - protected AnalysisDataSerializer(Class t) { - super(t); - } - - @Override - public void serialize(AnalysisData value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartObject(); - - gen.writeStringField("sampleId", value.sampleId()); - if (!value.age().equals(Age.ageNotKnown())) { - Age age = value.age(); - Period p = Period.of(age.getYears(), age.getMonths(), age.getDays()); - gen.writeObjectField("age", p.normalized().toString()); - } - - gen.writeObjectField("sex", value.sex()); - - gen.writeArrayFieldStart("observedPhenotypicFeatures"); - for (TermId termId : value.presentPhenotypeTerms()) - gen.writeString(termId.getValue()); - gen.writeEndArray(); - - gen.writeArrayFieldStart("excludedPhenotypicFeatures"); - for (TermId termId : value.negatedPhenotypeTerms()) - gen.writeString(termId.getValue()); - gen.writeEndArray(); - - gen.writeEndObject(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisResultsMetadataSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisResultsMetadataSerializer.java deleted file mode 100644 index 871c56b03..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisResultsMetadataSerializer.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.lirical.core.output.AnalysisResultsMetadata; - -import java.io.IOException; - -public class AnalysisResultsMetadataSerializer extends StdSerializer { - - - public AnalysisResultsMetadataSerializer() { - this(AnalysisResultsMetadata.class); - } - - protected AnalysisResultsMetadataSerializer(Class t) { - super(t); - } - - @Override - public void serialize(AnalysisResultsMetadata value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartObject(); - - gen.writeStringField("liricalVersion", value.getLiricalVersion()); - gen.writeStringField("hpoVersion", value.getHpoVersion()); - gen.writeStringField("transcriptDatabase", value.getTranscriptDatabase()); - gen.writeStringField("analysisDate", value.getAnalysisDate()); - gen.writeStringField("sampleName", value.getSampleName()); - gen.writeBooleanField("isGlobalAnalysisMode", value.getGlobalMode()); - - gen.writeEndObject(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisResultsSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisResultsSerializer.java deleted file mode 100644 index fd69037a2..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/AnalysisResultsSerializer.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.lirical.core.analysis.AnalysisResults; -import org.monarchinitiative.lirical.core.analysis.TestResult; - -import java.io.IOException; - -public class AnalysisResultsSerializer extends StdSerializer { - - public AnalysisResultsSerializer() { - this(AnalysisResults.class); - } - - protected AnalysisResultsSerializer(Class t) { - super(t); - } - - @Override - public void serialize(AnalysisResults value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartArray(); - - for (TestResult result : value.resultsWithDescendingPostTestProbability().toList()) - gen.writeObject(result); - - gen.writeEndArray(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/GeneIdentifierSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/GeneIdentifierSerializer.java deleted file mode 100644 index 6cefdc84a..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/GeneIdentifierSerializer.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; - -import java.io.IOException; - -public class GeneIdentifierSerializer extends StdSerializer { - - public GeneIdentifierSerializer() { - this(GeneIdentifier.class); - } - - protected GeneIdentifierSerializer(Class t) { - super(t); - } - - @Override - public void serialize(GeneIdentifier value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartObject(); - - gen.writeStringField("id", value.id().getValue()); - gen.writeStringField("symbol", value.symbol()); - - gen.writeEndObject(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/GenotypeLrWithExplanationSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/GenotypeLrWithExplanationSerializer.java deleted file mode 100644 index e35bffaa0..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/GenotypeLrWithExplanationSerializer.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; - -import java.io.IOException; - -public class GenotypeLrWithExplanationSerializer extends StdSerializer { - - public GenotypeLrWithExplanationSerializer() { - this(GenotypeLrWithExplanation.class); - } - - protected GenotypeLrWithExplanationSerializer(Class t) { - super(t); - } - - @Override - public void serialize(GenotypeLrWithExplanation value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartObject(); - - gen.writeObjectField("geneId", value.geneId()); - gen.writeNumberField("lr", value.lr()); - gen.writeStringField("explanation", value.explanation()); - - gen.writeEndObject(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/LrWithExplanationSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/LrWithExplanationSerializer.java deleted file mode 100644 index b891056a0..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/LrWithExplanationSerializer.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.lirical.core.likelihoodratio.LrWithExplanation; - -import java.io.IOException; - -public class LrWithExplanationSerializer extends StdSerializer { - - public LrWithExplanationSerializer() { - this(LrWithExplanation.class); - } - - protected LrWithExplanationSerializer(Class t) { - super(t); - } - - @Override - public void serialize(LrWithExplanation value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartObject(); - - gen.writeStringField("query", value.queryTerm().getValue()); - gen.writeStringField("match", value.matchingTerm().getValue()); - gen.writeObjectField("matchType", value.matchType()); - gen.writeNumberField("lr", value.lr()); - gen.writeStringField("explanation", value.explanation()); - - gen.writeEndObject(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/TestResultSerializer.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/TestResultSerializer.java deleted file mode 100644 index 08a9b8e28..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/serialize/TestResultSerializer.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.monarchinitiative.lirical.io.output.serialize; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import org.monarchinitiative.lirical.core.analysis.TestResult; -import org.monarchinitiative.lirical.core.likelihoodratio.LrWithExplanation; - -import java.io.IOException; - -public class TestResultSerializer extends StdSerializer { - - public TestResultSerializer() { - this(TestResult.class); - } - - protected TestResultSerializer(Class t) { - super(t); - } - - @Override - public void serialize(TestResult value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeStartObject(); - - gen.writeStringField("diseaseId", value.diseaseId().getValue()); - gen.writeNumberField("pretestProbability", value.pretestProbability()); - - // observed phenotypic features - gen.writeArrayFieldStart("observedPhenotypicFeatures"); - for (LrWithExplanation lre : value.observedResults()) - gen.writeObject(lre); - gen.writeEndArray(); - - // excluded phenotypic features - gen.writeArrayFieldStart("excludedPhenotypicFeatures"); - for (LrWithExplanation lre : value.excludedResults()) - gen.writeObject(lre); - gen.writeEndArray(); - - - // genotypeLR - if (value.genotypeLr().isPresent()) - gen.writeObjectField("genotypeLR", value.genotypeLr().get()); - - - gen.writeNumberField("compositeLR", value.getCompositeLR()); - gen.writeNumberField("posttestProbability", value.posttestProbability()); - - gen.writeEndObject(); - } -} diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Lirical2Svg.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Lirical2Svg.java similarity index 98% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Lirical2Svg.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Lirical2Svg.java index 35a4457a4..5d1fe38c5 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Lirical2Svg.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Lirical2Svg.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.output.svg; +package org.monarchinitiative.lirical.io.output.svg; import java.io.IOException; import java.io.Writer; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Lr2Svg.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Lr2Svg.java similarity index 96% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Lr2Svg.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Lr2Svg.java index d1b3e0a03..115a3d963 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Lr2Svg.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Lr2Svg.java @@ -1,10 +1,10 @@ -package org.monarchinitiative.lirical.core.output.svg; +package org.monarchinitiative.lirical.io.output.svg; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.lirical.core.likelihoodratio.LrWithExplanation; import org.monarchinitiative.lirical.core.analysis.TestResult; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; @@ -23,7 +23,7 @@ public class Lr2Svg extends Lirical2Svg { /** * An object representing the Human Phenotype Ontology */ - private final Ontology hpo; + private final MinimalOntology hpo; /** * We show the results as an SVG diagram for this disease. */ @@ -82,7 +82,7 @@ public Lr2Svg(TestResult result, int rank, TermId diseaseId, String originalDiseaseName, - Ontology hpo, + MinimalOntology hpo, String symbol) { this.diseaseCURIE = diseaseId; this.diseaseName = prettifyDiseaseName(originalDiseaseName); @@ -250,8 +250,10 @@ private void writeLrBoxes(Writer writer) throws IOException { "onmouseover=\"showTooltip(evt,'%s')\"/>\n", BOX_HEIGHT, (int) boxwidth, currentY, (int) xstart, color, observedTerms.get(originalIndex).escapedExplanation())); } // add label of corresponding HPO term - Term term = hpo.getTermMap().get(tid); - String label = String.format("%s [%s]", term.getName(), tid.getValue()); + String termName = hpo.termForTermId(tid) + .map(Term::getName) + .orElse("UNKNOWN"); + String label = String.format("%s [%s]", termName, tid.getValue()); writer.write(String.format("%s\n", XbeginOfText, currentY + BOX_HEIGHT, label)); currentY += BOX_HEIGHT + BOX_OFFSET; explanationIndex++; @@ -280,8 +282,10 @@ private void writeLrBoxes(Writer writer) throws IOException { BOX_HEIGHT, (int) boxwidth, currentY, (int) xstart, color, excludedTerms.get(originalIndex).escapedExplanation())); } // add label of corresponding HPO term - Term term = hpo.getTermMap().get(tid); - String label = String.format("Excluded: %s [%s]", term.getName(), tid.getValue()); + String termName = hpo.termForTermId(tid) + .map(Term::getName) + .orElse("UNKNOWN"); + String label = String.format("Excluded: %s [%s]", termName, tid.getValue()); writer.write(String.format("%s\n", XbeginOfText, currentY + BOX_HEIGHT, label)); currentY += BOX_HEIGHT + BOX_OFFSET; explanationIndex++; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Posttest2Svg.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Posttest2Svg.java similarity index 99% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Posttest2Svg.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Posttest2Svg.java index b3a85450d..998277d79 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Posttest2Svg.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Posttest2Svg.java @@ -1,4 +1,4 @@ -package org.monarchinitiative.lirical.core.output.svg; +package org.monarchinitiative.lirical.io.output.svg; import org.monarchinitiative.lirical.core.analysis.AnalysisResults; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Sparkline2Svg.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Sparkline2Svg.java similarity index 97% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Sparkline2Svg.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Sparkline2Svg.java index fe3493238..5171d72e6 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/Sparkline2Svg.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/Sparkline2Svg.java @@ -1,9 +1,10 @@ -package org.monarchinitiative.lirical.core.output.svg; +package org.monarchinitiative.lirical.io.output.svg; import org.monarchinitiative.lirical.core.analysis.AnalysisResults; import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation; import org.monarchinitiative.lirical.core.analysis.TestResult; -import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenol.ontology.data.MinimalOntology; +import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,18 +60,22 @@ public class Sparkline2Svg extends Lirical2Svg { * @param result A representation of the Case * */ - public Sparkline2Svg(TestResult result, boolean useGenotype, Ontology ontology) { + public Sparkline2Svg(TestResult result, boolean useGenotype, MinimalOntology ontology) { this.termIdList = result.observedTerms(); this.excludedTermIdList = result.excludedTerms(); observedTermToolTipLabels = new ArrayList<>(); for (TermId t : this.termIdList) { - String label = ontology.getTermMap().get(t).getName(); + String label = ontology.termForTermId(t) + .map(Term::getName) + .orElse("UNKNOWN"); String tooltip = String.format("%s [%s]", label, t.getValue()); this.observedTermToolTipLabels.add(tooltip); } excludedTermToolTipLabels = new ArrayList<>(); for (TermId t : this.excludedTermIdList) { - String label = ontology.getTermMap().get(t).getName(); + String label = ontology.termForTermId(t) + .map(Term::getName) + .orElse("UNKNOWN"); String tooltip = String.format("%s [%s]", label, t.getValue()); this.excludedTermToolTipLabels.add(tooltip); } diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/package-info.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/package-info.java similarity index 76% rename from lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/package-info.java rename to lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/package-info.java index df1f97bb7..169da3e89 100644 --- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/svg/package-info.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/output/svg/package-info.java @@ -1,4 +1,4 @@ /** Classes to create an SVG graphic representing the result of likelihood raio analysis of one HPO Case. * @author Peter Robinson */ -package org.monarchinitiative.lirical.core.output.svg; \ No newline at end of file +package org.monarchinitiative.lirical.io.output.svg; \ No newline at end of file diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/package-info.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/package-info.java index c653d40f4..a4605a425 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/package-info.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/package-info.java @@ -1,3 +1,5 @@ -/** Parsers for LIRICAL. +/** + * Parsers for LIRICAL. + * * @author Peter Robinson*/ package org.monarchinitiative.lirical.io; \ No newline at end of file diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotator.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotator.java index e583db662..9c7080f15 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotator.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarFunctionalVariantAnnotator.java @@ -9,6 +9,7 @@ import de.charite.compbio.jannovar.reference.PositionType; import org.monarchinitiative.lirical.core.model.TranscriptAnnotation; import org.monarchinitiative.lirical.core.service.FunctionalVariantAnnotator; +import org.monarchinitiative.lirical.core.util.BinarySearch; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; import org.monarchinitiative.phenol.annotations.formats.GeneIdentifiers; import org.monarchinitiative.svart.CoordinateSystem; @@ -19,7 +20,6 @@ import java.util.*; import java.util.function.Function; -import java.util.stream.Collectors; public class JannovarFunctionalVariantAnnotator implements FunctionalVariantAnnotator { @@ -28,7 +28,7 @@ public class JannovarFunctionalVariantAnnotator implements FunctionalVariantAnno private static final AnnotationBuilderOptions OPTIONS = new AnnotationBuilderOptions(); private final ReferenceDictionary rd; private final VariantAnnotator annotator; - private final Map symbolToGeneId; + private final GeneIdentifier[] identifiers; /** * @deprecated to be removed in v2.0.0, use {@link #of(JannovarData, GeneIdentifiers)} instead. @@ -46,8 +46,9 @@ public static JannovarFunctionalVariantAnnotator of(JannovarData jannovarData, G private JannovarFunctionalVariantAnnotator(JannovarData jannovarData, GeneIdentifiers geneIdentifiers) { this.rd = Objects.requireNonNull(jannovarData).getRefDict(); this.annotator = new VariantAnnotator(rd, jannovarData.getChromosomes(), OPTIONS); - this.symbolToGeneId = Objects.requireNonNull(geneIdentifiers).stream() - .collect(Collectors.toMap(GeneIdentifier::symbol, Function.identity())); + this.identifiers = geneIdentifiers.stream() + .sorted(Comparator.comparing(GeneIdentifier::symbol)) + .toArray(GeneIdentifier[]::new); } @Override @@ -88,13 +89,19 @@ private static String formatVariant(GenomicVariant variant) { private Function> toTranscriptAnnotation() { return ann -> { - GeneIdentifier id = symbolToGeneId.get(ann.getGeneSymbol()); - if (id == null) { + Optional id = BinarySearch.binarySearch(identifiers, GeneIdentifier::symbol, ann.getGeneSymbol()); + + if (id.isEmpty()) { LOGGER.trace("Unknown gene symbol {}", ann.getGeneSymbol()); return Optional.empty(); } - return Optional.of(new JannovarTranscriptAnnotation(id, ann)); + return Optional.of( + new SimpleTxAnnotation(id.get(), + ann.getTranscript().getAccession(), + List.copyOf(ann.getEffects()), + ann.getCDSNTChangeStr(), + ann.getProteinChangeStr())); }; } } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarTranscriptAnnotation.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarTranscriptAnnotation.java deleted file mode 100644 index cb3d750cb..000000000 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/JannovarTranscriptAnnotation.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.monarchinitiative.lirical.io.service; - -import de.charite.compbio.jannovar.annotation.Annotation; -import de.charite.compbio.jannovar.annotation.VariantEffect; -import org.monarchinitiative.lirical.core.model.TranscriptAnnotation; -import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; - -import java.util.List; -import java.util.Objects; - -class JannovarTranscriptAnnotation implements TranscriptAnnotation { - - private final GeneIdentifier geneIdentifier; - private final Annotation annotation; - - JannovarTranscriptAnnotation(GeneIdentifier geneIdentifier, Annotation annotation) { - this.geneIdentifier = Objects.requireNonNull(geneIdentifier); - this.annotation = Objects.requireNonNull(annotation); - } - - - @Override - public GeneIdentifier getGeneId() { - return geneIdentifier; - } - - @Override - public String getAccession() { - return annotation.getTranscript().getAccession(); - } - - @Override - public VariantEffect getMostPathogenicVariantEffect() { - return annotation.getMostPathogenicVarType(); - } - - @Override - public List getVariantEffects() { - return List.copyOf(annotation.getEffects()); - } - - @Override - public String getVariantEffect() { - return String.join("+", - getVariantEffects().stream() - .map(VariantEffect::name) - .toList()); - } - - @Override - public String getHgvsCdna() { - return annotation.getCDSNTChangeStr(); - } - - @Override - public String getHgvsProtein() { - return annotation.getProteinChangeStr(); - } -} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/SimpleTxAnnotation.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/SimpleTxAnnotation.java new file mode 100644 index 000000000..253a17e2a --- /dev/null +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/service/SimpleTxAnnotation.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.lirical.io.service; + +import de.charite.compbio.jannovar.annotation.VariantEffect; +import org.monarchinitiative.lirical.core.model.TranscriptAnnotation; +import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier; + +import java.util.List; +import java.util.stream.Collectors; + +record SimpleTxAnnotation( + GeneIdentifier geneId, + String accession, + List variantEffects, + String hgvsCdna, + String hgvsProtein +) implements TranscriptAnnotation { + + @Override + public GeneIdentifier getGeneId() { + return geneId; + } + + @Override + public String getAccession() { + return accession; + } + + @Override + public List getVariantEffects() { + return variantEffects; + } + + @Override + public String getVariantEffect() { + return getVariantEffects().stream() + .map(VariantEffect::name) + .collect(Collectors.joining("+")); + } + + @Override + public String getHgvsCdna() { + return hgvsCdna; + } + + @Override + public String getHgvsProtein() { + return hgvsProtein; + } + +} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/GenotypedVariantIterator.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/GenotypedVariantIterator.java index 99c97462c..4fd4f874e 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/GenotypedVariantIterator.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/GenotypedVariantIterator.java @@ -5,6 +5,7 @@ import org.monarchinitiative.lirical.core.model.AlleleCount; import org.monarchinitiative.lirical.core.model.GenomeBuild; import org.monarchinitiative.lirical.core.model.GenotypedVariant; +import org.monarchinitiative.lirical.core.model.SampleAlleleCount; import org.monarchinitiative.svart.Contig; import org.monarchinitiative.svart.GenomicVariant; import org.monarchinitiative.svart.assembly.GenomicAssembly; @@ -69,18 +70,18 @@ private void readNextVariant() { Allele ref = vc.getReference(); for (Allele alt : alts) { GenomicVariant variant = converter.convert(contig, vc.getID(), start, ref.getBaseString(), alt.getBaseString()); - Map countMap = countGenotypes(ref, alt, vc.getGenotypes()); - queue.add(GenotypedVariant.of(genomeBuild, variant, countMap, vc.isNotFiltered())); + List alleleCounts = countGenotypes(ref, alt, vc.getGenotypes()); + queue.add(GenotypedVariant.of(genomeBuild, variant, alleleCounts, vc.isNotFiltered())); } } break; } } - private static Map countGenotypes(Allele ref, - Allele alt, - GenotypesContext genotypes) { - Map countMap = new HashMap<>(genotypes.size()); + private static List countGenotypes(Allele ref, + Allele alt, + GenotypesContext genotypes) { + List alleleCounts = new ArrayList<>(genotypes.size()); for (Genotype gt : genotypes) { if (gt.isNoCall()) continue; @@ -105,9 +106,9 @@ private static Map countGenotypes(Allele ref, } else { ac = AlleleCount.of(((byte) refCount), (byte) altCount); } - countMap.put(gt.getSampleName(), ac); + alleleCounts.add(SampleAlleleCount.of(gt.getSampleName(), ac)); } - return Collections.unmodifiableMap(countMap); + return alleleCounts; } } diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantFailingFilters.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantFailingFilters.java new file mode 100644 index 000000000..6218b48c5 --- /dev/null +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantFailingFilters.java @@ -0,0 +1,56 @@ +package org.monarchinitiative.lirical.io.vcf; + +import org.monarchinitiative.lirical.core.model.*; +import org.monarchinitiative.svart.GenomicVariant; + +import java.util.List; +import java.util.Optional; +import java.util.Set; + +record LiricalVariantFailingFilters(GenotypedVariant gv, List annotations) implements LiricalVariant { + + @Override + public GenomeBuild genomeBuild() { + return gv.genomeBuild(); + } + + @Override + public GenomicVariant variant() { + return gv.variant(); + } + + @Override + public Set sampleNames() { + return gv.sampleNames(); + } + + @Override + public Optional alleleCount(String sampleId) { + return gv.alleleCount(sampleId); + } + + @Override + public boolean passedFilters() { + return false; + } + + @Override + public boolean failedFilters() { + return true; + } + + @Override + public Optional frequency() { + return Optional.empty(); + } + + @Override + public float pathogenicity() { + return 0; + } + + @Override + public Optional clinVarAlleleData() { + return Optional.empty(); + } +} diff --git a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantIterator.java b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantIterator.java index 0157972fc..9ed3781c0 100644 --- a/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantIterator.java +++ b/lirical-io/src/main/java/org/monarchinitiative/lirical/io/vcf/LiricalVariantIterator.java @@ -33,7 +33,11 @@ public boolean hasNext() { @Override public LiricalVariant next() { GenotypedVariant gv = iterator.next(); + List annotations = variantAnnotator.annotate(gv.variant()); + if (gv.failedFilters()) + // No point in further variant annotation if the variant failed the initial filtering. + return new LiricalVariantFailingFilters(gv, annotations); List effects = annotations.stream() .map(TranscriptAnnotation::getVariantEffects) diff --git a/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalHTML.ftl b/lirical-io/src/main/resources/org/monarchinitiative/lirical/io/output/liricalHTML.ftl similarity index 94% rename from lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalHTML.ftl rename to lirical-io/src/main/resources/org/monarchinitiative/lirical/io/output/liricalHTML.ftl index 17f31d9e5..f4b16c891 100644 --- a/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalHTML.ftl +++ b/lirical-io/src/main/resources/org/monarchinitiative/lirical/io/output/liricalHTML.ftl @@ -470,7 +470,7 @@

-

${resultsMeta.sampleName!"n/a"} Phenotypic Features

+

Phenotypic features of ${resultsMeta.sampleName!"n/a"}

@@ -533,14 +533,14 @@

${topdifferentialcount}

- <#if hasGenotypes?has_content> + <#if phenotypeOnly> + - <#list sparkline as sprk> @@ -548,29 +548,29 @@ + -
Rank Post-test probability DiseaseID PhenotypesGene LR (log10)
${sprk.rank} ${sprk.posttestBarSvg} ${sprk.diseaseName}${sprk.diseaseAnchor} ${sprk.sparklineSvg}${sprk.geneSparklineSvg} ${sprk.compositeLikelihoodRatio}
<#else> - +
- - + + <#list sparkline as sprk> - + @@ -605,10 +605,10 @@
Rank Post-test probability DiseaseID PhenotypesLR (log)GeneLR (log10)
${sprk.rank} ${sprk.posttestBarSvg} ${sprk.diseaseName}${sprk.diseaseAnchor} ${sprk.sparklineSvg}${sprk.geneSparklineSvg} ${sprk.compositeLikelihoodRatio}
${dd.posttestprob}
-
+ <#if dd.entrezGeneId != "n/a">

${dd.geneSymbol} @@ -619,6 +619,7 @@

+ <#if dd.hasGenotypeExplanation()>

${dd.genotypeExplanation}

@@ -712,7 +713,7 @@ often accession numbers of poorly defined entities. The following list shows gene IDs that could not be identified in this run. If there are many entries in this list, we recommend trying a different - annotation source (e.g., refseq). See also the online documentation of LIRICAL.

+ annotation source (e.g., RefSeq). See also the online documentation of LIRICAL.

Show Table Hide Table @@ -738,7 +739,7 @@ It displays detailed information for the top differential diagnoses (by default all diseases with a posttest probability above ${postprobthreshold} and at least 5; these thresholds can be adjusted if desired). The following text provides brief explanations of the symbols used by - LIRICAL to explain how phenotype likelihood ratio scores were generaed.

+ LIRICAL to explain how phenotype likelihood ratio scores were generated.

  • E: Exact match between query term and disease term.
  • Q<D: Query term is a child of disease term.
  • @@ -748,7 +749,7 @@
  • X: Query term is explicitly annotated as being not present in disease
  • XX: Term excluded by query and explicitly annotated as being not present in disease
  • XA: Term excluded by query and not explicitly annotated as being present in disease
  • -
  • XP: Term excluded by query but is explictly annotated as being present in disease
  • +
  • XP: Term excluded by query but is explicitly annotated as being present in disease
  • U: Flag for unusual background query (please report to developers)
@@ -774,30 +775,32 @@

This LIRICAL run had the following configuration:

    - <#if resultsMeta.hpoVersion?has_content> -
  • Human Phenotype Ontology version: ${resultsMeta.hpoVersion}
  • - - <#if resultsMeta.transcriptDatabase?has_content> -
  • Transcript database: ${resultsMeta.transcriptDatabase}
  • - - <#if resultsMeta.nGoodQualityVariants?has_content> -
  • Good quality variants: ${resultsMeta.nGoodQualityVariants}
  • - - <#if resultsMeta.nFilteredVariants?has_content> -
  • Variants removed due to failing quality filter: ${resultsMeta.nFilteredVariants}
  • - - <#if resultsMeta.genesWithVar?has_content> -
  • Genes found to have at least one variant: ${resultsMeta.genesWithVar}
  • - <#if resultsMeta.liricalPath?has_content> -
  • Path to Lirical data directory: ${resultsMeta.liricalPath}
  • +
  • Path to LIRICAL data directory: ${resultsMeta.liricalPath}
  • - <#if resultsMeta.exomiserPath?has_content> -
  • Path to Exomiser data directory: ${resultsMeta.exomiserPath}
  • + <#if resultsMeta.hpoVersion?has_content> +
  • Human Phenotype Ontology version: ${resultsMeta.hpoVersion}
  • <#if resultsMeta.globalMode?has_content>
  • Global analysis mode: ${resultsMeta.globalMode?string("Yes", "No")}
  • + <#if !phenotypeOnly> + <#if resultsMeta.transcriptDatabase?has_content> +
  • Transcript database: ${resultsMeta.transcriptDatabase}
  • + + <#if resultsMeta.exomiserPath?has_content> +
  • Path to Exomiser database file: ${resultsMeta.exomiserPath}
  • + + <#if resultsMeta.nPassingVariants?has_content> +
  • Good quality variants: ${resultsMeta.nPassingVariants}
  • + + <#if resultsMeta.nFilteredVariants?has_content> +
  • Variants removed due to failing quality filter: ${resultsMeta.nFilteredVariants}
  • + + <#if resultsMeta.genesWithVar?has_content> +
  • Genes found to have at least one variant: ${resultsMeta.genesWithVar}
  • + +

@@ -805,7 +808,7 @@
-

LIRICAL ${resultsMeta.liricalVersion!""} © 2022

+

LIRICAL ${resultsMeta.liricalVersion!""} © 2023