version() {
- return Optional.ofNullable(version);
+ return options.version();
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/LiricalOptions.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/LiricalOptions.java
new file mode 100644
index 000000000..370574150
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/LiricalOptions.java
@@ -0,0 +1,27 @@
+package org.monarchinitiative.lirical.core;
+
+import java.util.Optional;
+
+/**
+ * Global options to parameterize LIRICAL execution.
+ *
+ * Note, these options do not parameterize the analyses.
+ */
+public class LiricalOptions {
+
+ private final String version; // nullable
+ private final int parallelism;
+
+ public LiricalOptions(String version, int parallelism) {
+ this.version = version;
+ this.parallelism = parallelism;
+ }
+
+ public Optional version() {
+ return Optional.ofNullable(version);
+ }
+
+ public int parallelism() {
+ return parallelism;
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java
index 75d3f651d..2a9e5d55f 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisData.java
@@ -1,22 +1,36 @@
package org.monarchinitiative.lirical.core.analysis;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import org.monarchinitiative.lirical.core.model.Age;
import org.monarchinitiative.lirical.core.model.GenesAndGenotypes;
import org.monarchinitiative.lirical.core.model.Sex;
import org.monarchinitiative.phenol.ontology.data.TermId;
+import java.util.Collection;
import java.util.List;
+import java.util.Optional;
/**
- * An interface for representing proband data.
+ * Representation of subject data required by LIRICAL analysis.
*/
public interface AnalysisData {
+ /**
+ * Construct analysis data from the inputs.
+ *
+ * @param sampleId non-null sample identifier.
+ * @param age subject's age or {@code null} if not available.
+ * @param sex non-null sex.
+ * @param presentPhenotypeTerms a collection of observed HPO terms.
+ * @param negatedPhenotypeTerms a collection of excluded HPO terms.
+ * @param genes non-null container of genes and genotypes.
+ */
static AnalysisData of(String sampleId,
Age age,
Sex sex,
- List presentPhenotypeTerms,
- List negatedPhenotypeTerms,
+ Collection presentPhenotypeTerms,
+ Collection negatedPhenotypeTerms,
GenesAndGenotypes genes) {
return new AnalysisDataDefault(sampleId,
age,
@@ -26,16 +40,40 @@ static AnalysisData of(String sampleId,
genes);
}
+ /**
+ * @return a non-null sample ID.
+ */
+ @JsonGetter
String sampleId();
- Age age();
+ /**
+ * @return an optional with age or empty optional if age is not available.
+ */
+ @JsonGetter
+ Optional age();
+ /**
+ * @return a non-null sex of the subject.
+ */
+ @JsonGetter(value = "sex")
Sex sex();
+ /**
+ * @return a list of the HPO terms that were observed in the subject.
+ */
+ @JsonGetter(value = "observedPhenotypicFeatures")
List presentPhenotypeTerms();
+ /**
+ * @return a list of the HPO terms whose presence was explicitly excluded in the subject.
+ */
+ @JsonGetter(value = "excludedPhenotypicFeatures")
List negatedPhenotypeTerms();
+ /**
+ * @return container with genes and genotypes observed in the subject.
+ */
+ @JsonIgnore
GenesAndGenotypes genes();
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java
index 1820dd0fb..8acccf6c4 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataDefault.java
@@ -1,19 +1,97 @@
package org.monarchinitiative.lirical.core.analysis;
-import org.monarchinitiative.lirical.core.model.GenesAndGenotypes;
import org.monarchinitiative.lirical.core.model.Age;
+import org.monarchinitiative.lirical.core.model.GenesAndGenotypes;
import org.monarchinitiative.lirical.core.model.Sex;
import org.monarchinitiative.phenol.ontology.data.TermId;
+import java.util.Collection;
import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
/**
* Default implementation of {@link AnalysisData}.
*/
-record AnalysisDataDefault(String sampleId,
- Age age,
- Sex sex,
- List presentPhenotypeTerms,
- List negatedPhenotypeTerms,
- GenesAndGenotypes genes) implements AnalysisData {
+final class AnalysisDataDefault implements AnalysisData {
+ private final String sampleId;
+ private final Age age;
+ private final Sex sex;
+ private final List presentPhenotypeTerms;
+ private final List negatedPhenotypeTerms;
+ private final GenesAndGenotypes genes;
+
+ AnalysisDataDefault(String sampleId,
+ Age age,
+ Sex sex,
+ Collection presentPhenotypeTerms,
+ Collection negatedPhenotypeTerms,
+ GenesAndGenotypes genes) {
+ this.sampleId = Objects.requireNonNull(sampleId);
+ this.age = age;
+ this.sex = Objects.requireNonNull(sex);
+ this.presentPhenotypeTerms = List.copyOf(Objects.requireNonNull(presentPhenotypeTerms));
+ this.negatedPhenotypeTerms = List.copyOf(Objects.requireNonNull(negatedPhenotypeTerms));
+ this.genes = Objects.requireNonNull(genes);
+ }
+
+ @Override
+ public String sampleId() {
+ return sampleId;
+ }
+
+ @Override
+ public Optional age() {
+ return Optional.ofNullable(age);
+ }
+
+ @Override
+ public Sex sex() {
+ return sex;
+ }
+
+ @Override
+ public List presentPhenotypeTerms() {
+ return presentPhenotypeTerms;
+ }
+
+ @Override
+ public List negatedPhenotypeTerms() {
+ return negatedPhenotypeTerms;
+ }
+
+ @Override
+ public GenesAndGenotypes genes() {
+ return genes;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this) return true;
+ if (obj == null || obj.getClass() != this.getClass()) return false;
+ var that = (AnalysisDataDefault) obj;
+ return Objects.equals(this.sampleId, that.sampleId) &&
+ Objects.equals(this.age, that.age) &&
+ Objects.equals(this.sex, that.sex) &&
+ Objects.equals(this.presentPhenotypeTerms, that.presentPhenotypeTerms) &&
+ Objects.equals(this.negatedPhenotypeTerms, that.negatedPhenotypeTerms) &&
+ Objects.equals(this.genes, that.genes);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(sampleId, age, sex, presentPhenotypeTerms, negatedPhenotypeTerms, genes);
+ }
+
+ @Override
+ public String toString() {
+ return "AnalysisDataDefault[" +
+ "sampleId=" + sampleId + ", " +
+ "age=" + age + ", " +
+ "sex=" + sex + ", " +
+ "presentPhenotypeTerms=" + presentPhenotypeTerms + ", " +
+ "negatedPhenotypeTerms=" + negatedPhenotypeTerms + ", " +
+ "genes=" + genes + ']';
+ }
+
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java
index c205c84fd..fd521014a 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisDataParser.java
@@ -1,9 +1,23 @@
package org.monarchinitiative.lirical.core.analysis;
+import org.monarchinitiative.lirical.core.model.GenomeBuild;
+import org.monarchinitiative.lirical.core.model.TranscriptDatabase;
+
import java.io.InputStream;
+// REMOVE(v2.0.0)
+@Deprecated(forRemoval = true)
public interface AnalysisDataParser {
- AnalysisData parse(InputStream is) throws LiricalParseException;
+ /**
+ * @deprecated use {@link #parse(InputStream, GenomeBuild, TranscriptDatabase)} instead.
+ */
+ // REMOVE(v2.0.0)
+ @Deprecated(forRemoval = true)
+ default AnalysisData parse(InputStream is) throws LiricalParseException {
+ return parse(is, GenomeBuild.HG38, TranscriptDatabase.REFSEQ);
+ }
+
+ AnalysisData parse(InputStream is, GenomeBuild build, TranscriptDatabase transcriptDatabase) throws LiricalParseException;
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java
index 07dc95869..16c911b5f 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptions.java
@@ -120,8 +120,21 @@ default float defaultVariantAlleleFrequency() {
* with the disease. The option is used only if the variants are available for the investigated individual.
*
* @return true
if the candidate disease should be disregarded.
+ * @deprecated use {@link #includeDiseasesWithNoDeleteriousVariants()} instead
*/
- boolean disregardDiseaseWithNoDeleteriousVariants();
+ // REMOVE(v2.0.0)
+ @Deprecated(forRemoval = true)
+ default boolean disregardDiseaseWithNoDeleteriousVariants() {
+ return !includeDiseasesWithNoDeleteriousVariants();
+ }
+
+ /**
+ * Include a disease if no known or predicted deleterious variants are found in the gene associated
+ * with the disease. The option is used only if the variants are available for the investigated individual.
+ *
+ * @return true
if the candidate disease should be disregarded.
+ */
+ boolean includeDiseasesWithNoDeleteriousVariants();
/**
* Variant with pathogenicity value greater or equal to this threshold is considered deleterious.
@@ -150,7 +163,7 @@ class Builder {
private boolean useStrictPenalties = false;
private boolean useGlobal = false;
private PretestDiseaseProbability pretestDiseaseProbability = null;
- private boolean disregardDiseaseWithNoDeleteriousVariants = true;
+ private boolean includeDiseasesWithNoDeleteriousVariants = false;
private Builder() {
}
@@ -226,9 +239,18 @@ public Builder pretestProbability(PretestDiseaseProbability pretestDiseaseProbab
return this;
}
-
+ /**
+ * @deprecated use {@link #includeDiseasesWithNoDeleteriousVariants} instead. Note, that you'll have
+ * to negate the value to obtain the same result
+ */
+ @Deprecated(forRemoval = true)
public Builder disregardDiseaseWithNoDeleteriousVariants(boolean disregardDiseaseWithNoDeleteriousVariants) {
- this.disregardDiseaseWithNoDeleteriousVariants = disregardDiseaseWithNoDeleteriousVariants;
+ this.includeDiseasesWithNoDeleteriousVariants = !disregardDiseaseWithNoDeleteriousVariants;
+ return this;
+ }
+
+ public Builder includeDiseasesWithNoDeleteriousVariants(boolean includeDiseasesWithNoDeleteriousVariants) {
+ this.includeDiseasesWithNoDeleteriousVariants = includeDiseasesWithNoDeleteriousVariants;
return this;
}
@@ -241,7 +263,7 @@ public AnalysisOptions build() {
useStrictPenalties,
useGlobal,
pretestDiseaseProbability,
- disregardDiseaseWithNoDeleteriousVariants);
+ includeDiseasesWithNoDeleteriousVariants);
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java
index 24f44e308..f5ae32c3d 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisOptionsDefault.java
@@ -16,6 +16,6 @@ record AnalysisOptionsDefault(
boolean useStrictPenalties,
boolean useGlobal,
PretestDiseaseProbability pretestDiseaseProbability,
- boolean disregardDiseaseWithNoDeleteriousVariants
+ boolean includeDiseasesWithNoDeleteriousVariants
) implements AnalysisOptions {
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java
index 4233dc038..39e8483e7 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/AnalysisResults.java
@@ -1,5 +1,6 @@
package org.monarchinitiative.lirical.core.analysis;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import org.monarchinitiative.phenol.ontology.data.TermId;
import java.util.Comparator;
@@ -26,18 +27,23 @@ static AnalysisResults of(List results) {
/**
* @return test result count
*/
+ @JsonIgnore
int size();
+ @JsonIgnore
default boolean isEmpty() {
return size() == 0;
}
+ @JsonIgnore
Optional resultByDiseaseId(TermId diseaseId);
+ @JsonIgnore
default Stream results() {
return StreamSupport.stream(spliterator(), false);
}
+ @JsonIgnore
default Stream resultsWithDescendingPostTestProbability() {
return results().sorted(Comparator.comparingDouble(TestResult::posttestProbability).reversed());
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java
index acba94f4a..c0a8762f4 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalAnalysisRunner.java
@@ -2,12 +2,14 @@
import org.monarchinitiative.lirical.core.exception.LiricalAnalysisException;
+import java.io.Closeable;
+
/**
* The analysis runner runs LIRICAL analysis on provided analysis subject ({@link AnalysisData}). The analysis
* is parametrized by {@link AnalysisOptions}. The runner throws {@link LiricalAnalysisException} if the analysis
* cannot be run as dictated by the options.
*/
-public interface LiricalAnalysisRunner {
+public interface LiricalAnalysisRunner extends Closeable {
/**
* Run analysis parametrized by {@code analysisOptions} on {@code analysisData}.
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java
index 854832ce0..e88a91a70 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/LiricalParseException.java
@@ -5,6 +5,7 @@
/**
* An exception thrown when user-provided input is invalid.
*/
+// TODO - move to CLI after removing AnalysisDataParser.
public class LiricalParseException extends LiricalException {
public LiricalParseException() {
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java
index 60136bd93..1aa0dc069 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/TestResult.java
@@ -1,6 +1,8 @@
package org.monarchinitiative.lirical.core.analysis;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import org.monarchinitiative.lirical.core.likelihoodratio.GenotypeLrWithExplanation;
import org.monarchinitiative.lirical.core.likelihoodratio.LrWithExplanation;
import org.monarchinitiative.phenol.ontology.data.TermId;
@@ -94,6 +96,7 @@ private static double calculateCompositeLR(List observed, Lis
return observedLr * excludedLr * genotypeLrForCalculationOfCompositeLr;
}
+ @JsonGetter(value = "observedPhenotypicFeatures")
public List observedResults() {
return observedResults;
}
@@ -102,6 +105,7 @@ public List observedTerms() {
return observedResults.stream().map(LrWithExplanation::queryTerm).toList();
}
+ @JsonGetter(value = "excludedPhenotypicFeatures")
public List excludedResults() {
return excludedResults;
}
@@ -113,6 +117,7 @@ public List excludedTerms() {
/**
* @return the composite likelihood ratio (product of the LRs of the individual tests).
*/
+ @JsonGetter
public double getCompositeLR() {
return compositeLR;
}
@@ -120,6 +125,7 @@ public double getCompositeLR() {
/**
* @return the total count of tests performed (excluding genotype).
*/
+ @JsonIgnore
public int getNumberOfTests() {
return observedResults.size() + excludedResults.size();
}
@@ -138,7 +144,7 @@ public double posttestOdds() {
return pretestOdds() * getCompositeLR();
}
-
+ @JsonGetter
public double pretestProbability() {
return pretestProbability;
}
@@ -148,6 +154,7 @@ private double calculatePosttestProbability() {
return po / (1 + po);
}
+ @JsonGetter
public double posttestProbability() {
return posttestProbability;
}
@@ -189,6 +196,7 @@ public double getExcludedPhenotypeRatio(int i) {
/**
* @return name of the disease being tested.
*/
+ @JsonGetter
public TermId diseaseId() {
return diseaseId;
}
@@ -202,10 +210,12 @@ public boolean hasGenotypeLR() {
return false;
}
+ @JsonGetter(value = "genotypeLR")
public Optional genotypeLr() {
return Optional.ofNullable(genotypeLr);
}
+ @JsonIgnore
@Deprecated(forRemoval = true) // get explanations from results
// REMOVE(v2.0.0)
public List getObservedPhenotypeExplanation() {
@@ -215,6 +225,7 @@ public List getObservedPhenotypeExplanation() {
.toList();
}
+ @JsonIgnore
@Deprecated(forRemoval = true) // get explanations from excludedResults
// REMOVE(v2.0.0)
public List getExcludedPhenotypeExplanation() {
@@ -229,6 +240,7 @@ public List getExcludedPhenotypeExplanation() {
*
* @return maximum abs(LR)
*/
+ @JsonIgnore
public double getMaximumIndividualLR() {
double m1 = this.observedResults.stream()
.map(LrWithExplanation::lr)
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java
index 3178678d5..e8869eb9d 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/LiricalAnalysisRunnerImpl.java
@@ -29,18 +29,20 @@ public class LiricalAnalysisRunnerImpl implements LiricalAnalysisRunner {
private final ForkJoinPool pool;
public static LiricalAnalysisRunnerImpl of(PhenotypeService phenotypeService,
- BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) {
- return new LiricalAnalysisRunnerImpl(phenotypeService, backgroundVariantFrequencyServiceFactory);
+ BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory,
+ int parallelism) {
+ return new LiricalAnalysisRunnerImpl(phenotypeService,
+ backgroundVariantFrequencyServiceFactory,
+ parallelism);
}
private LiricalAnalysisRunnerImpl(PhenotypeService phenotypeService,
- BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory) {
+ BackgroundVariantFrequencyServiceFactory backgroundVariantFrequencyServiceFactory,
+ int parallelism) {
this.phenotypeService = Objects.requireNonNull(phenotypeService);
this.phenotypeLrEvaluator = new PhenotypeLikelihoodRatio(phenotypeService.hpo(), phenotypeService.diseases());
this.bgFreqFactory = backgroundVariantFrequencyServiceFactory;
- // TODO - set parallelism
- int parallelism = Runtime.getRuntime().availableProcessors();
- LOGGER.debug("Creating LIRICAL pool with {} workers.", parallelism);
+ LOGGER.debug("Creating LIRICAL pool with {} worker(s).", parallelism);
this.pool = new ForkJoinPool(parallelism, LiricalWorkerThread::new, null, false);
}
@@ -118,16 +120,16 @@ private Optional analyzeDisease(GenotypeLikelihoodRatio genotypeLike
GenotypeLrWithExplanation candidate = genotypeLikelihoodRatio.evaluateGenotype(analysisData.sampleId(), g2g, disease.modesOfInheritance());
bestGenotypeLr = takeNonNullOrGreaterLr(bestGenotypeLr, candidate);
- if (options.disregardDiseaseWithNoDeleteriousVariants()) {
+ if (!options.includeDiseasesWithNoDeleteriousVariants()) {
// has at least one pathogenic clinvar variant or predicted pathogenic variant?
if (g2g.pathogenicClinVarCount(analysisData.sampleId()) > 0
- || g2g.pathogenicAlleleCount(analysisData.sampleId(), options.variantDeleteriousnessThreshold()) > 0) {
+ || g2g.deleteriousAlleleCount(analysisData.sampleId(), options.variantDeleteriousnessThreshold()) > 0) {
noPredictedDeleteriousVariantsWereFound = false;
}
}
}
- if (options.disregardDiseaseWithNoDeleteriousVariants() && noPredictedDeleteriousVariantsWereFound)
+ if (!options.includeDiseasesWithNoDeleteriousVariants() && noPredictedDeleteriousVariantsWereFound)
return Optional.empty();
/*
@@ -182,4 +184,10 @@ private Optional configureGenotypeLikelihoodRatio(Genom
});
}
+ @Override
+ public void close() {
+ LOGGER.debug("Shutting down the analysis runner");
+ // TODO - use close after updating Java to 19+
+ pool.shutdownNow();
+ }
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/package-info.java
new file mode 100644
index 000000000..5f1c4b3b4
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/impl/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Default LIRICAL analysis implementation.
+ */
+package org.monarchinitiative.lirical.core.analysis.impl;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java
index 8696a3557..36e03cb6c 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/package-info.java
@@ -1,4 +1,11 @@
/**
- * Classes for coordinating the main Lirical analysis goals.
+ * A high-level representation of LIRICAL analysis.
+ *
+ * The analysis subject is provided as {@link org.monarchinitiative.lirical.core.analysis.AnalysisData}. The analysis
+ * is parameterized by {@link org.monarchinitiative.lirical.core.analysis.AnalysisOptions}.
+ * {@link org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner} executes the analysis. The output
+ * are wrapped into {@link org.monarchinitiative.lirical.core.analysis.AnalysisResults} which reports results
+ * of matching the subject to computational disease models,
+ * one {@link org.monarchinitiative.lirical.core.analysis.TestResult} per disease.
*/
package org.monarchinitiative.lirical.core.analysis;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/probability/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/probability/package-info.java
new file mode 100644
index 000000000..2445e98ba
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/analysis/probability/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Model of pretest probability of diseases.
+ */
+package org.monarchinitiative.lirical.core.analysis.probability;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java
index 0d8c4c51f..1bd447c83 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/LiricalAnalysisException.java
@@ -3,7 +3,10 @@
/**
* An exception thrown by {@link org.monarchinitiative.lirical.core.analysis.LiricalAnalysisRunner} if the analysis
* cannot be run.
+ * @deprecated will be moved into {@link org.monarchinitiative.lirical.core.analysis} package.
*/
+// TODO - move to analysis package.
+@Deprecated(forRemoval = true)
public class LiricalAnalysisException extends LiricalException {
public LiricalAnalysisException() {
super();
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/package-info.java
new file mode 100644
index 000000000..50b907022
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/exception/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Top-level exceptions.
+ */
+package org.monarchinitiative.lirical.core.exception;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/package-info.java
new file mode 100644
index 000000000..a9c674720
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/io/package-info.java
@@ -0,0 +1,14 @@
+/**
+ * APIs for reading and annotation of genomic variants.
+ *
+ * LIRICAL needs to read genomic variants, perform functional annotation, and fetch variant frequencies for the variants.
+ * LIRICAL does not care about how this is done, as long as the variants meet
+ * the {@link org.monarchinitiative.lirical.core.model.LiricalVariant} requirements.
+ *
+ * One way to configure the functional annotation is to implement {@link org.monarchinitiative.lirical.core.io.VariantParserFactory}
+ * which can provide a {@link org.monarchinitiative.lirical.core.io.VariantParser} to read variants
+ * from a {@link java.nio.file.Path} given {@link org.monarchinitiative.lirical.core.model.GenomeBuild}
+ * and {@link org.monarchinitiative.lirical.core.model.TranscriptDatabase}. For instance, to read variants
+ * from a VCF file.
+ */
+package org.monarchinitiative.lirical.core.io;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java
index 8c7688502..74551bc27 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatio.java
@@ -15,7 +15,9 @@
/**
- * This class is responsible for calculating the genotype-based likelihood ratio.
+ * This class is responsible for calculating the likelihood ratio for genotypes, as described
+ * in the Material and Methods | Likelihood Ratio for Genotypes section
+ * of the LIRICAL manuscript.
*
* @author Peter Robinson
*/
@@ -102,56 +104,57 @@ private double updateMax(double left, Double right) {
* for autosomal recessive.
*
* @param g2g {@link Gene2Genotype} object with list of variants in current gene. Can be null if no variants were found in the gene
- * @param inheritancemodes list of modes of inheritance associated with disease being investigated (usually with just one entry).
+ * @param inheritanceModes list of modes of inheritance associated with disease being investigated (usually with just one entry).
* @return likelihood ratio of the genotype given the disease/geniId combination
*/
- public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype g2g, List inheritancemodes) {
+ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype g2g, List inheritanceModes) {
// special case 1: No variant found in this gene
if (!g2g.hasVariants()) {
- return getLRifNoVariantAtAllWasIdentified(inheritancemodes, g2g);
+ return getLRifNoVariantAtAllWasIdentified(inheritanceModes, g2g);
}
- // special case 2: Clinvar-pathogenic variant(s) found in this gene.
+ // special case 2: ClinVar-pathogenic or likely pathogenic (P/LP) variant(s) found in this gene.
// The likelihood ratio is defined as 1000**count, where 1 for autosomal dominant and
- // 2 for autosomal recessive. (If the count of pathogenic alleles does not match
+ // 2 for autosomal recessive. If the count of P/LP alleles does not match
// the expected count, return 1000.
int pathogenicClinVarAlleleCount = g2g.pathogenicClinVarCount(sampleId);
if (pathogenicClinVarAlleleCount > 0) {
- if (inheritancemodes.contains(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE)) {
+ if (inheritanceModes.contains(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE)) {
if (pathogenicClinVarAlleleCount == 2) {
return GenotypeLrWithExplanation.twoPathClinVarAllelesRecessive(g2g.geneId(),Math.pow(1000d, 2));
}
+ // A case of one ClinVar P/LP allele in an AR disease will fall through..
} else { // for all other MoI, including AD, assume that only one ClinVar allele is pathogenic
return GenotypeLrWithExplanation.pathClinVar(g2g.geneId(), Math.pow(1000d, 1d));
}
}
- int pathogenicAlleleCount = g2g.pathogenicAlleleCount(sampleId, pathogenicityThreshold);
- double observedWeightedPathogenicVariantCount = g2g.getSumOfPathBinScores(sampleId, pathogenicityThreshold);
- if (pathogenicAlleleCount == 0 || observedWeightedPathogenicVariantCount < EPSILON) {
- // no identified variant or the pathogenicity score of identified variant is close to zero
+ int deleteriousAlleleCount = g2g.deleteriousAlleleCount(sampleId, pathogenicityThreshold);
+ double observedWeightedDeleteriousVariantCount = g2g.getSumOfPathBinScores(sampleId, pathogenicityThreshold);
+ if (deleteriousAlleleCount == 0 || observedWeightedDeleteriousVariantCount < EPSILON) {
+ // no identified deleterious variant or the deleteriousness score of identified variant is close to zero
// essentially same as no identified variant, this should happen rarely if ever.
- return getLRifNoVariantAtAllWasIdentified(inheritancemodes, g2g);
+ return getLRifNoVariantAtAllWasIdentified(inheritanceModes, g2g);
}
// if we get here then
// 1. g2g was not null
// 2. There was at least one observed variant
- // 3. There was no pathogenic variant listed in ClinVar.
+ // 3. There was no P/LP variant listed in ClinVar or at most one variant but the disease is AR.
// Therefore, we apply the main algorithm for calculating the LR genotype score.
double lambda_background = backgroundVariantFrequencyService.frequencyForGene(g2g.geneId().id())
.orElse(backgroundVariantFrequencyService.defaultVariantBackgroundFrequency());
- if (inheritancemodes == null || inheritancemodes.isEmpty()) {
+ if (inheritanceModes == null || inheritanceModes.isEmpty()) {
// This is probably because the HPO annotation file is incomplete
logger.warn("No inheritance mode annotation found for geneId {}, reverting to default", g2g.geneId().id().getValue());
// Add a default dominant mode to avoid not ranking this gene at all
- inheritancemodes = List.of(HpoModeOfInheritanceTermIds.AUTOSOMAL_DOMINANT);
+ inheritanceModes = List.of(HpoModeOfInheritanceTermIds.AUTOSOMAL_DOMINANT);
}
// The following is a heuristic to avoid giving genes with a high background count
// a better score for pathogenic than background -- the best explanation for
// a gene with high background is that a variant is background (unless variant is ClinVar-path, see above).
if (lambda_background > 1.0) {
- lambda_background = Math.min(lambda_background, pathogenicAlleleCount);
+ lambda_background = Math.min(lambda_background, deleteriousAlleleCount);
}
// Use the following four vars to keep track of which option was the max.
Double max = null;
@@ -163,7 +166,7 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype
//last if/else
double B = 1.0; // background
double D = 1.0; // disease
- for (TermId inheritanceId : inheritancemodes) {
+ for (TermId inheritanceId : inheritanceModes) {
double lambda_disease;
PoissonDistribution pdDisease;
if (inheritanceId.equals(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE) || inheritanceId.equals(HpoModeOfInheritanceTermIds.X_LINKED_RECESSIVE)) {
@@ -178,16 +181,16 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype
// will take the observed path weighted count to not be more than lambda_disease.
// this will have the effect of not downweighting these genes
// the user will have to judge whether one of the variants is truly pathogenic.
- if (strict && pathogenicAlleleCount > (lambda_disease + EPSILON)) {
- double HEURISTIC = HEURISTIC_PATH_ALLELE_COUNT_ABOVE_LAMBDA_D * (pathogenicAlleleCount - lambda_disease);
+ if (strict && deleteriousAlleleCount > (lambda_disease + EPSILON)) {
+ double HEURISTIC = HEURISTIC_PATH_ALLELE_COUNT_ABOVE_LAMBDA_D * (deleteriousAlleleCount - lambda_disease);
max = updateMax(HEURISTIC, max);
maxInheritanceMode = inheritanceId;
heuristicPathCountAboveLambda = true;
} else { // the following is the general case, where either the variant count
// matches or we are not using the strict option.
- D = pdDisease.probability(observedWeightedPathogenicVariantCount);
+ D = pdDisease.probability(observedWeightedDeleteriousVariantCount);
PoissonDistribution pdBackground = new PoissonDistribution(lambda_background);
- B = pdBackground.probability(observedWeightedPathogenicVariantCount);
+ B = pdBackground.probability(observedWeightedDeleteriousVariantCount);
if (B > 0 && D > 0) {
double ratio = D / B;
if (max != null && ratio > max) {
@@ -211,7 +214,7 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype
returnvalue,
maxInheritanceMode,
lambda_background,
- observedWeightedPathogenicVariantCount);
+ observedWeightedDeleteriousVariantCount);
} else {
return GenotypeLrWithExplanation.explanation(g2g.geneId(),
returnvalue,
@@ -219,7 +222,7 @@ public GenotypeLrWithExplanation evaluateGenotype(String sampleId, Gene2Genotype
lambda_background,
B,
D,
- observedWeightedPathogenicVariantCount);
+ observedWeightedDeleteriousVariantCount);
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrMatchType.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrMatchType.java
new file mode 100644
index 000000000..2cb60c4b7
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrMatchType.java
@@ -0,0 +1,82 @@
+package org.monarchinitiative.lirical.core.likelihoodratio;
+
+/**
+ * The enum for representing the type of the genotype likelihood ratio analysis performed for a gene.
+ *
+ * @see GenotypeLrWithExplanation
+ */
+public enum GenotypeLrMatchType {
+
+ /**
+ * No variants were detected in a gene associated with a disease with autosomal dominant inheritance.
+ */
+ NO_VARIANTS_DETECTED_AD,
+
+ /**
+ * No variants were detected in a gene associated with a disease with autosomal recessive inheritance.
+ */
+ NO_VARIANTS_DETECTED_AR,
+
+ /**
+ * One ClinVar pathogenic or likely pathogenic allele discovered in a disease
+ * with autosomal dominant inheritance.
+ */
+ ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD,
+
+ /**
+ * Two ClinVar pathogenic or likely pathogenic alleles discovered in a disease
+ * with autosomal recessive inheritance.
+ */
+ TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR,
+
+ /**
+ * One deleterious allele detected with autosomal recessive disease.
+ */
+ ONE_DELETERIOUS_VARIANT_IN_AR,
+
+ /**
+ * Heuristic for the case where we have more called pathogenic variants than we should have
+ * in a gene without a high background count -- we will model this as technical error and
+ * will take the observed path weighted count to not be more than λdisease.
+ * this will have the effect of not down-weighting these genes
+ * the user will have to judge whether one of the variants is truly pathogenic.
+ */
+ HIGH_NUMBER_OF_OBSERVED_PREDICTED_PATHOGENIC_VARIANTS,
+
+ /**
+ * Gene scored using LIRICAL genotype LR model.
+ *
+ * For more details, consult the Material and Methods | Likelihood Ratio for Genotypes section
+ * of the LIRICAL manuscript.
+ */
+ LIRICAL_GT_MODEL,
+
+ /**
+ * DO NOT USE.
+ *
+ * @deprecated the method has been deprecated and will be removed in v3.0.0
.
+ * Use {@link #ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD} instead.
+ */
+ @Deprecated(forRemoval = true, since = "v2.0.0")
+ // REMOVE(v3.0.0)
+ ONE_DELETERIOUS_CLINVAR_VARIANT_IN_AD,
+
+ /**
+ * DO NOT USE.
+ *
+ * @deprecated the method has been deprecated and will be removed in v3.0.0
.
+ * Use {@link #TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR} instead.
+ */
+ @Deprecated(forRemoval = true, since = "v2.0.0")
+ // REMOVE(v3.0.0)
+ TWO_DELETERIOUS_CLINVAR_VARIANTS_IN_AR,
+
+ /**
+ * DO NOT USE. A placeholder value used in the deprecated methods for backward compatibility.
+ *
+ * @deprecated the field will be removed in v3.0.0
.
+ */
+ @Deprecated(forRemoval = true)
+ // REMOVE(v3.0.0)
+ UNKNOWN
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java
index f8abcc07a..2ecdcfaa9 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLrWithExplanation.java
@@ -1,36 +1,41 @@
package org.monarchinitiative.lirical.core.likelihoodratio;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import org.monarchinitiative.phenol.annotations.constants.hpo.HpoModeOfInheritanceTermIds;
import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier;
import org.monarchinitiative.phenol.ontology.data.TermId;
import java.util.Objects;
-
+/**
+ * Results of genotype likelihood ratio evaluation for a single gene.
+ */
public class GenotypeLrWithExplanation {
private final GeneIdentifier geneId;
- /** The likelihood ratio of the genotype. */
+ private final GenotypeLrMatchType matchType;
+ /** The untransformed likelihood ratio of the genotype. */
private final double lr;
private final String explanation;
static GenotypeLrWithExplanation noVariantsDetectedAutosomalRecessive(GeneIdentifier geneId, double ratio) {
final String expl = String.format("log10(LR)=%.3f. No variants detected with autosomal recessive disease.", Math.log10(ratio));
- return new GenotypeLrWithExplanation(geneId, ratio, expl);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.NO_VARIANTS_DETECTED_AR, ratio, expl);
}
static GenotypeLrWithExplanation noVariantsDetectedAutosomalDominant(GeneIdentifier geneId, double ratio) {
final String expl = String.format("log10(LR)=%.3f. No variants detected.", Math.log10(ratio));
- return new GenotypeLrWithExplanation(geneId, ratio, expl);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.NO_VARIANTS_DETECTED_AD, ratio, expl);
}
static GenotypeLrWithExplanation twoPathClinVarAllelesRecessive(GeneIdentifier geneId, double ratio) {
final String expl = String.format("log10(LR)=%.3f. Two pathogenic ClinVar variants detected with autosomal recessive disease.", Math.log10(ratio));
- return new GenotypeLrWithExplanation(geneId, ratio, expl);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR, ratio, expl);
}
static GenotypeLrWithExplanation pathClinVar(GeneIdentifier geneId, double ratio) {
final String expl = String.format("log10(LR)=%.3f. Pathogenic ClinVar variant detected.", Math.log10(ratio));
- return new GenotypeLrWithExplanation(geneId, ratio, expl);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD, ratio, expl);
}
static GenotypeLrWithExplanation explainOneAlleleRecessive(GeneIdentifier geneId, double ratio, double observedWeightedPathogenicVariantCount, double lambda_background) {
@@ -38,7 +43,7 @@ static GenotypeLrWithExplanation explainOneAlleleRecessive(GeneIdentifier geneId
String expl = String.format("log10(LR)=%.3f. One pathogenic allele detected with autosomal recessive disease. " +
"Observed weighted pathogenic variant count: %.2f. λdisease=%d. λbackground=%.4f.",
Math.log10(ratio), observedWeightedPathogenicVariantCount, lambda_disease, lambda_background);
- return new GenotypeLrWithExplanation(geneId, ratio, expl);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.ONE_DELETERIOUS_VARIANT_IN_AR, ratio, expl);
}
@@ -50,18 +55,24 @@ static GenotypeLrWithExplanation explainPathCountAboveLambdaB(GeneIdentifier gen
String expl = String.format("log10(LR)=%.3f. %s. Heuristic for high number of observed predicted pathogenic variants. "
+ "Observed weighted pathogenic variant count: %.2f. λdisease=%d. λbackground=%.4f.",
Math.log10(ratio), getMoIString(MoI), observedWeightedPathogenicVariantCount, lambda_disease, lambda_background);
- return new GenotypeLrWithExplanation(geneId, ratio, expl);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.HIGH_NUMBER_OF_OBSERVED_PREDICTED_PATHOGENIC_VARIANTS, ratio, expl);
}
- static GenotypeLrWithExplanation explanation(GeneIdentifier geneId, double ratio, TermId modeOfInh, double lambda_b, double D, double B, double observedWeightedPathogenicVariantCount) {
+ static GenotypeLrWithExplanation explanation(GeneIdentifier geneId,
+ double ratio,
+ TermId modeOfInh,
+ double lambda_b,
+ double D,
+ double B,
+ double observedWeightedDeleteriousVariantCount) {
int lambda_disease = 1;
if (modeOfInh.equals(HpoModeOfInheritanceTermIds.AUTOSOMAL_RECESSIVE) || modeOfInh.equals(HpoModeOfInheritanceTermIds.X_LINKED_RECESSIVE)) {
lambda_disease = 2;
}
String msg = String.format("P(G|D)=%.4f. P(G|¬D)=%.4f", D, B);
- msg = String.format("log10(LR)=%.3f %s. %s. Observed weighted pathogenic variant count: %.2f. λdisease=%d. λbackground=%.4f.",
- Math.log10(ratio), msg, getMoIString(modeOfInh), observedWeightedPathogenicVariantCount, lambda_disease, lambda_b);
- return new GenotypeLrWithExplanation(geneId, ratio, msg);
+ msg = String.format("log10(LR)=%.3f %s. %s. Observed weighted deleterious variant count: %.2f. λdisease=%d. λbackground=%.4f.",
+ Math.log10(ratio), msg, getMoIString(modeOfInh), observedWeightedDeleteriousVariantCount, lambda_disease, lambda_b);
+ return new GenotypeLrWithExplanation(geneId, GenotypeLrMatchType.LIRICAL_GT_MODEL, ratio, msg);
}
private static String getMoIString(TermId MoI) {
@@ -77,25 +88,66 @@ private static String getMoIString(TermId MoI) {
return " Mode of inheritance: not available"; // should never happen
}
+ /**
+ * @deprecated the method has been deprecated and will be removed in v3.0.0
.
+ * Use {@link #of(GeneIdentifier, GenotypeLrMatchType, double, String)} instead.
+ */
+ @Deprecated(forRemoval = true, since = "v2.0.0-RC3")
public static GenotypeLrWithExplanation of(GeneIdentifier geneId, double lr, String explanation) {
- return new GenotypeLrWithExplanation(geneId, lr, explanation);
+ return of(geneId, GenotypeLrMatchType.UNKNOWN, lr, explanation);
+ }
+
+ public static GenotypeLrWithExplanation of(GeneIdentifier geneId, GenotypeLrMatchType matchType, double lr, String explanation) {
+ return new GenotypeLrWithExplanation(geneId, matchType, lr, explanation);
}
- private GenotypeLrWithExplanation(GeneIdentifier geneId, double lr, String explanation) {
+ private GenotypeLrWithExplanation(GeneIdentifier geneId, GenotypeLrMatchType matchType, double lr, String explanation) {
this.geneId = Objects.requireNonNull(geneId);
+ this.matchType = Objects.requireNonNull(matchType);
this.lr = lr;
this.explanation = Objects.requireNonNull(explanation, "Explanation must not be null");
}
-
+ /**
+ * Get the gene identifier for this genotype LR.
+ */
+ @JsonGetter
public GeneIdentifier geneId() {
return geneId;
}
+ /**
+ * Get the genotype LR match type.
+ */
+ @JsonGetter
+ public GenotypeLrMatchType matchType() {
+ return matchType;
+ }
+
+ /**
+ * Get the genotype likelihood ratio for the gene. Use {@link #log10Lr()} to get the log LR.
+ *
+ * @return the genotype likelihood ratio
+ */
+ @JsonGetter
public double lr() {
return lr;
}
+ /**
+ * Get the log10 LR for the gene. Use {@link #lr()} to get the non-transformed value.
+ *
+ * @return the log10 of the genotype LR
+ */
+ @JsonIgnore
+ public double log10Lr() {
+ return Math.log10(lr);
+ }
+
+ /**
+ * @return an explanation of the genotype likelihood ratio
+ */
+ @JsonGetter
public String explanation() {
return explanation;
}
@@ -105,18 +157,20 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
GenotypeLrWithExplanation that = (GenotypeLrWithExplanation) o;
- return Double.compare(that.lr, lr) == 0 && Objects.equals(explanation, that.explanation);
+ return Double.compare(that.lr, lr) == 0 && Objects.equals(geneId, that.geneId) && matchType == that.matchType && Objects.equals(explanation, that.explanation);
}
@Override
public int hashCode() {
- return Objects.hash(lr, explanation);
+ return Objects.hash(geneId, matchType, lr, explanation);
}
@Override
public String toString() {
return "GenotypeLrWithExplanation{" +
- "LR=" + lr +
+ "geneId=" + geneId +
+ ", matchType=" + matchType +
+ ", lr=" + lr +
", explanation='" + explanation + '\'' +
'}';
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java
index 8274fe135..a889dfc0e 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/InducedDiseaseGraph.java
@@ -4,8 +4,7 @@
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation;
import org.monarchinitiative.phenol.annotations.constants.hpo.HpoSubOntologyRootTermIds;
-import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
import org.monarchinitiative.phenol.ontology.data.TermId;
import java.util.*;
@@ -42,8 +41,8 @@ public class InducedDiseaseGraph {
private record CandidateMatch(TermId termId, int distance) {
}
- public static InducedDiseaseGraph create(HpoDisease disease, Ontology ontology) {
- Map termFrequencies = new HashMap<>(disease.annotationCount());
+ public static InducedDiseaseGraph create(HpoDisease disease, MinimalOntology ontology) {
+ Map termFrequencies = new HashMap<>(disease.annotations().size());
for (HpoDiseaseAnnotation annotation : disease.annotations()) {
double frequency = annotation.frequency();
@@ -52,8 +51,7 @@ public static InducedDiseaseGraph create(HpoDisease disease, Ontology ontology)
stack.push(cmatch);
while (!stack.empty()) {
CandidateMatch cm = stack.pop();
- Set parents = OntologyAlgorithm.getParentTerms(ontology, cm.termId, false);
- for (TermId parentTermId : parents) {
+ for (TermId parentTermId : ontology.graph().getParents(cm.termId, false)) {
if (parentTermId.equals(HpoSubOntologyRootTermIds.PHENOTYPIC_ABNORMALITY)) {
continue;
}
@@ -68,10 +66,12 @@ public static InducedDiseaseGraph create(HpoDisease disease, Ontology ontology)
}
}
}
- Set absentPhenotypeTerms = disease.absentAnnotationsStream()
+
+ Set negativeInducedGraph = disease.absentAnnotationsStream()
.map(HpoDiseaseAnnotation::id)
- .collect(Collectors.toUnmodifiableSet());
- Set negativeInducedGraph = OntologyAlgorithm.getAncestorTerms(ontology, absentPhenotypeTerms, true);
+ .distinct()
+ .flatMap(absent -> ontology.graph().getAncestorsStream(absent, true))
+ .collect(Collectors.toSet());
return new InducedDiseaseGraph(disease, termFrequencies, negativeInducedGraph);
}
@@ -117,7 +117,7 @@ public HpoDisease getDisease() {
* @param ontology HPO
* @return The best hit
*/
- Term2Freq getClosestAncestor(TermId tid, Ontology ontology) {
+ Term2Freq getClosestAncestor(TermId tid, MinimalOntology ontology) {
Queue queue = new LinkedList<>();
queue.add(tid);
@@ -126,8 +126,8 @@ Term2Freq getClosestAncestor(TermId tid, Ontology ontology) {
if (term2frequencyMap.containsKey(t)) {
return new Term2Freq(t, term2frequencyMap.get(t));
} else {
- Set parents = OntologyAlgorithm.getParentTerms(ontology, t, false);
- queue.addAll(parents);
+ ontology.graph().getParents(t, false)
+ .forEach(queue::add);
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java
index a6837c60f..3fdd2c313 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanation.java
@@ -1,7 +1,10 @@
package org.monarchinitiative.lirical.core.likelihoodratio;
-import org.apache.commons.lang.StringUtils;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import org.apache.commons.lang3.StringUtils;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
/**
@@ -36,22 +39,27 @@ private LrWithExplanation(TermId q, TermId m, LrMatchType mt, double lr, String
this.explanation = explanation;
}
+ @JsonGetter(value = "query")
public TermId queryTerm() {
return queryTerm;
}
+ @JsonGetter(value = "match")
public TermId matchingTerm() {
return matchingTerm;
}
+ @JsonGetter
public LrMatchType matchType() {
return matchType;
}
+ @JsonGetter
public double lr() {
return lr;
}
+ @JsonGetter
public String explanation() {
return explanation;
}
@@ -59,6 +67,7 @@ public String explanation() {
/**
* @return explanation text suitable for including in HTML documents
*/
+ @JsonIgnore
public String escapedExplanation() {
return StringUtils.replaceEach(explanation, EXPLANATION_SEARCH_LIST, EXPLANATION_REPLACEMENT_LIST);
}
@@ -68,9 +77,9 @@ public String escapedExplanation() {
*/
// REMOVE(v2.0.0)
@Deprecated(forRemoval = true)
- public String getExplanation(Ontology ontology) {
- String qtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.queryTerm).getName(), queryTerm.getValue());
- String mtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.matchingTerm).getName(), matchingTerm.getValue());
+ public String getExplanation(MinimalOntology ontology) {
+ String qtermlabel = String.format("%s[%s]", ontology.termForTermId(queryTerm).map(Term::getName).orElse("UNKNOWN"), queryTerm.getValue());
+ String mtermlabel = String.format("%s[%s]", ontology.termForTermId(matchingTerm).map(Term::getName).orElse("UNKNOWN"), matchingTerm.getValue());
double log10LR = Math.log10(lr);
switch (this.matchType) {
case EXACT_MATCH:
@@ -105,9 +114,9 @@ public String getExplanation(Ontology ontology) {
*/
// REMOVE(v2.0.0)
@Deprecated(forRemoval = true)
- String getEscapedExplanation(Ontology ontology) {
- String qtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.queryTerm).getName(), queryTerm.getValue());
- String mtermlabel = String.format("%s[%s]", ontology.getTermMap().get(this.matchingTerm).getName(), matchingTerm.getValue());
+ String getEscapedExplanation(MinimalOntology ontology) {
+ String qtermlabel = String.format("%s[%s]", ontology.termForTermId(queryTerm).map(Term::getName).orElse("UNKNOWN"), queryTerm.getValue());
+ String mtermlabel = String.format("%s[%s]", ontology.termForTermId(matchingTerm).map(Term::getName).orElse("UNKNOWN"), matchingTerm.getValue());
double log10LR = Math.log10(lr);
switch (this.matchType) {
case EXACT_MATCH:
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java
index 9e0b2f353..c6f783787 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/LrWithExplanationFactory.java
@@ -1,13 +1,14 @@
package org.monarchinitiative.lirical.core.likelihoodratio;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
public class LrWithExplanationFactory {
- private final Ontology ontology;
+ private final MinimalOntology ontology;
- public LrWithExplanationFactory(Ontology ontology) {
+ public LrWithExplanationFactory(MinimalOntology ontology) {
this.ontology = ontology;
}
@@ -20,8 +21,8 @@ public LrWithExplanation create(TermId queryTerm, TermId matchingTerm, LrMatchTy
}
private String getExplanation(TermId queryTerm, TermId matchingTerm, LrMatchType matchType, double lr) {
- String queryTermLabel = String.format("%s[%s]", ontology.getTermMap().get(queryTerm).getName(), queryTerm.getValue());
- String matchTermLabel = String.format("%s[%s]", ontology.getTermMap().get(matchingTerm).getName(), matchingTerm.getValue());
+ String queryTermLabel = String.format("%s[%s]", ontology.termForTermId(queryTerm).map(Term::getName).orElse("UNKNOWN"), queryTerm.getValue());
+ String matchTermLabel = String.format("%s[%s]", ontology.termForTermId(matchingTerm).map(Term::getName).orElse("UNKNOWN"), matchingTerm.getValue());
double log10LR = Math.log10(lr);
return switch (matchType) {
case EXACT_MATCH -> String.format("E:%s[%.3f]", queryTermLabel, log10LR);
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java
index 10e407f64..acb5495b7 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/PhenotypeLikelihoodRatio.java
@@ -6,13 +6,14 @@
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases;
-import org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
+import java.util.stream.Collectors;
/**
* This class is designed to calculate the background and foreground frequencies of any HPO term in any disease
@@ -38,7 +39,7 @@ public class PhenotypeLikelihoodRatio {
/** The default frequency of a term in a disease if the explicit frequency is not available. */
public static final float DEFAULT_TERM_FREQUENCY = 1.f; // TODO - is this the right thing to do?
/** The HPO ontology with all of its subontologies. */
- private final Ontology ontology;
+ private final MinimalOntology ontology;
/** This map has one entry for each disease in our database. Key--the disease ID, e.g., OMIM:600200.*/
private final Map diseaseMap;
private final LrWithExplanationFactory explanationFactory;
@@ -60,7 +61,7 @@ public class PhenotypeLikelihoodRatio {
* @param ontology The HPO ontology object
* @param diseases List of all diseases for this simulation
*/
- public PhenotypeLikelihoodRatio(Ontology ontology, HpoDiseases diseases) {
+ public PhenotypeLikelihoodRatio(MinimalOntology ontology, HpoDiseases diseases) {
this.ontology = ontology;
this.diseaseMap = diseases.diseaseById();
this.explanationFactory = new LrWithExplanationFactory(ontology); // TODO - DI?
@@ -77,7 +78,9 @@ public PhenotypeLikelihoodRatio(Ontology ontology, HpoDiseases diseases) {
*/
public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph idg) {
HpoDisease disease = idg.getDisease();
- Set queryAncestors = OntologyAlgorithm.getAncestorTerms(ontology,queryTid,true);
+ Set queryAncestors = ontology.graph()
+ .getAncestorsStream(queryTid, true)
+ .collect(Collectors.toSet());
if (disease.absentAnnotationsStream().anyMatch(a -> queryAncestors.contains(a.id()))) {
// i.e., the query term is explicitly EXCLUDED in the disease definition
return explanationFactory.create(queryTid,
@@ -105,7 +108,7 @@ public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph
for (HpoDiseaseAnnotation annotation : disease.annotations()) {
double frequency = annotation.frequency();
// is query an ancestor of a term that annotates the disease?
- if (OntologyAlgorithm.isSubclass(ontology,annotation.id(),queryTid)) {
+ if (ontology.graph().isAncestorOf(queryTid, annotation.id())) {
maximumFrequencyOfDescendantTerm=Math.max(maximumFrequencyOfDescendantTerm,frequency);
diseaseMatchingTerm=annotation.id();
isAncestor=true;
@@ -134,7 +137,7 @@ public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph
TermId bestMatchTermId = null;
double denominatorForNonRootCommandAnc = getBackgroundFrequency(queryTid);
for (HpoDiseaseAnnotation annotation : disease.annotations()) {
- if (OntologyAlgorithm.isSubclass(ontology, queryTid, annotation.id())){
+ if (ontology.graph().isAncestorOf(annotation.id(), queryTid)){
double proportionalFrequency = getProportionInChildren(queryTid,annotation.id());
double queryFrequency = annotation.frequency();
double f = proportionalFrequency*queryFrequency;
@@ -218,10 +221,12 @@ public LrWithExplanation lrForExcludedTerm(TermId queryTid, InducedDiseaseGraph
* @param ontology Reference to the HPO ontology
* @return frequency of the term in the disease (including annotation propagation)
*/
- private static double getFrequencyOfTermInDiseaseWithAnnotationPropagation(TermId query, HpoDisease disease, Ontology ontology) {
+ private static double getFrequencyOfTermInDiseaseWithAnnotationPropagation(TermId query, HpoDisease disease, MinimalOntology ontology) {
double maxFrequency = 0.0;
for (HpoDiseaseAnnotation annotation : disease.annotations()) {
- Set ancestors = ontology.getAncestorTermIds(annotation.id(),true);
+ Set ancestors = ontology.graph()
+ .getAncestorsStream(annotation.id(), true)
+ .collect(Collectors.toSet());
if (ancestors.contains(query))
maxFrequency = Math.max(maxFrequency, disease.getFrequencyOfTermInDisease(annotation.id()).map(Ratio::frequency).orElse(DEFAULT_TERM_FREQUENCY));
}
@@ -263,14 +268,15 @@ private double getProportionInChildren(TermId queryTid, TermId diseaseTid) {
if (queryTid.getId().equals(diseaseTid.getId())) {
return 1.0;
}
- Set directChildren= OntologyAlgorithm.getChildTerms(ontology,diseaseTid,false);
- if (directChildren.isEmpty()) {
+ List children = ontology.graph()
+ .getChildrenStream(diseaseTid, false)
+ .toList();
+ if (children.isEmpty())
return 0.0;
- }
- for (TermId tid : directChildren) {
+ for (TermId tid : children) {
if (queryTid.equals(tid)) {
- return 1.0/(double)directChildren.size();
+ return 1.0/(double) children.size();
}
}
// if we get here, there was no match
@@ -301,7 +307,7 @@ private double getProportionInChildren(TermId queryTid, TermId diseaseTid) {
* HPO terms in the ontology. */
private void initializeFrequencyMap() {
Map mp = new HashMap<>();
- for (TermId tid : ontology.getNonObsoleteTermIds()) {
+ for (TermId tid : ontology.nonObsoleteTermIds()) {
mp.put(tid, 0.0D);
}
Map mapbuilder = new HashMap<>();
@@ -313,16 +319,18 @@ private void initializeFrequencyMap() {
for (HpoDiseaseAnnotation annotation : dis.annotations()) {
TermId tid = annotation.id();
double termFrequency = annotation.frequency();
- TermId primaryTermId = ontology.getPrimaryTermId(tid);
- if (primaryTermId == null) {
+ Optional term = ontology.termForTermId(tid);
+ if (term.isEmpty()) {
logger.warn("Primary term ID for {} was not found!", tid.getValue());
continue;
}
+
// All of the ancestor terms are implicitly annotated to tid
// therefore, add this to their background frequencies.
// Note we also include the original term here (third arg: true)
- Set ancs = OntologyAlgorithm.getAncestorTerms(ontology,primaryTermId,true);
- for (TermId at : ancs) {
+ // Regarding the unchecked `get()` below, we check that `term` is not empty above.
+ //noinspection OptionalGetWithoutIsPresent
+ for (TermId at : ontology.graph().getAncestors(term.map(Term::id).get(), true)) {
updateMap.putIfAbsent(at,termFrequency);
// put the maximum frequency for this term given it is
// an ancestor of one or more of the HPO terms that annotate
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java
index 63abeb561..f27fabc45 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/likelihoodratio/package-info.java
@@ -1,4 +1,6 @@
-/** Classes related to the calculation of likelihood ratios for phenotypic or genotypic test results.
+/**
+ * Package with logic for calculation of likelihood ratios for phenotypic or genotypic test results.
+ *
* @author Peter Robinson
*/
package org.monarchinitiative.lirical.core.likelihoodratio;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java
index f5d68c946..3ab068313 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Age.java
@@ -1,105 +1,130 @@
package org.monarchinitiative.lirical.core.model;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
import java.time.Period;
import java.util.Objects;
/**
- * Convenience class to represent the age of a proband. Note that if (@link #initialized} is false,
- * then we are representing the fact that we do not know the age we will disregard the feature
- * in our calculations. We will represent prenatal age as number of completed gestational weeks and days,
- * and {@link #isGestational()} flag will be set.
+ * Convenience class to represent the age of a subject.
+ *
+ * We represent both postnatal and gestational age. Use {@link #isGestational()}
+ * or {@link #isPostnatal()} to tell them apart.
+ *
+ * The postnatal age has {@link #getYears()}, {@link #getMonths()}, and {@link #getDays()} fields set
+ * and {@link #getWeeks()} should be ignored.
+ *
+ * The gestational age uses {@link #getWeeks()} and {@link #getDays()} fields.
+ *
* @author Peter Robinson
*/
+@JsonSerialize(using = AgeSerializer.class)
public class Age {
- private final boolean isUnknown;
private final boolean isGestational;
private final int years;
private final int months;
private final int weeks;
private final int days;
- /** Used as a constant if we do not have information about the age of a proband. */
- private final static Age NOT_KNOWN = new Age();
-
- private Age(int years, int months, int weeks, int days) {
- this.years=years;
- this.months=months;
- this.weeks=weeks;
- this.days=days;
- this.isUnknown = false;
- this.isGestational = weeks != 0;
- }
-
- private Age() {
- this.years=0;
- this.months=0;
- this.weeks=0;
- this.days=0;
- this.isUnknown = true;
- this.isGestational = false;
- }
- public static Age ageNotKnown() {
- return NOT_KNOWN;
+ private Age(int years, int months, int weeks, int days, boolean isGestational) {
+ this.years=requireNonNegativeInt(years, "Years must not be negative");
+ this.months=requireNonNegativeInt(months, "Months must not be negative");
+ this.weeks=requireNonNegativeInt(weeks, "Weeks must not be negative");
+ this.days=requireNonNegativeInt(days, "Days must not be negative");
+ this.isGestational = isGestational;
}
+ @JsonIgnore
public int getYears() {
return years;
}
+ @JsonIgnore
public int getMonths() {
return months;
}
+ @JsonIgnore
public int getWeeks() {
return weeks;
}
+ @JsonIgnore
public int getDays() {
return days;
}
- public boolean isUnknown() {
- return isUnknown;
- }
-
+ @JsonIgnore
public boolean isGestational() {
return isGestational;
}
+ @JsonIgnore
public boolean isPostnatal() {
return !isGestational;
}
+ /**
+ * Create a postnatal age to represent {@code y} years of age.
+ *
+ * @param y a non-negative number of years.
+ */
public static Age ageInYears(int y) {
return of(y,0,0);
}
+ /**
+ * Create a postnatal age to represent {@code m} months of age.
+ *
+ * @param m a non-negative number of months.
+ */
public static Age ageInMonths(int m) {
return of(0,m,0);
}
+ /**
+ * Create a postnatal age to represent {@code d} days of age.
+ *
+ * @param d a non-negative number of days.
+ */
public static Age ageInDays(int d) {
return of(0,0,d);
}
/**
* @param period representing postnatal (not gestational) age.
- * @return age object
*/
public static Age parse(Period period) {
Period normalized = period.normalized();
return of(normalized.getYears(), normalized.getMonths(), normalized.getDays());
}
+ /**
+ * Create a gestational age to represent {@code weeks} and {@code days}.
+ *
+ * {@code weeks} should generally be not be greater than 42, and it must not be negative.
+ * {@code days} must be in range {@code [0,6]}.
+ *
+ * @param weeks a non-negative number of completed gestational weeks.
+ * @param days the number of completed gestational days.
+ */
public static Age gestationalAge(int weeks, int days) {
- return new Age(0, 0, weeks, days);
+ return new Age(0, 0, weeks, days, true);
}
/**
* Create a postnatal age from given inputs.
*/
public static Age of(int years, int months, int days) {
- return new Age(years, months, 0, days);
+ return new Age(years, months, 0, days, false);
+ }
+
+ private static int requireNonNegativeInt(int value, String msg) {
+ if (value < 0) {
+ throw new IllegalArgumentException(msg);
+ } else
+ return value;
}
@Override
@@ -107,8 +132,7 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Age age = (Age) o;
- return isUnknown == age.isUnknown &&
- years == age.years &&
+ return years == age.years &&
months == age.months &&
weeks == age.weeks &&
days == age.days;
@@ -116,14 +140,13 @@ public boolean equals(Object o) {
@Override
public int hashCode() {
- return Objects.hash(isUnknown, years, months, weeks, days);
+ return Objects.hash(years, months, weeks, days);
}
@Override
public String toString() {
return "Age{" +
- "isUnknown=" + isUnknown +
- ", years=" + years +
+ "years=" + years +
", months=" + months +
", weeks=" + weeks +
", days=" + days +
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/AgeSerializer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/AgeSerializer.java
new file mode 100644
index 000000000..cddcc47f0
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/AgeSerializer.java
@@ -0,0 +1,25 @@
+package org.monarchinitiative.lirical.core.model;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+
+import java.io.IOException;
+import java.time.Period;
+
+class AgeSerializer extends StdSerializer {
+
+ AgeSerializer() {
+ super(Age.class);
+ }
+
+ AgeSerializer(Class t) {
+ super(t);
+ }
+
+ @Override
+ public void serialize(Age age, JsonGenerator gen, SerializerProvider provider) throws IOException {
+ Period p = Period.of(age.getYears(), age.getMonths(), age.getDays());
+ gen.writeString(p.normalized().toString());
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinVarAlleleData.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinVarAlleleData.java
new file mode 100644
index 000000000..63898430e
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinVarAlleleData.java
@@ -0,0 +1,73 @@
+package org.monarchinitiative.lirical.core.model;
+
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * A subset of ClinVar allele data relevant for LIRICAL analysis.
+ *
+ * We use the primary interpretation for prioritization and the allele ID for linking out
+ * (e.g. here for an allele ID 270003
)
+ */
+public class ClinVarAlleleData {
+
+ private final ClinvarClnSig clinvarClnSig;
+ private final Long alleleId; // we box since the alleleId is nullable.
+
+ public static ClinVarAlleleData of(ClinvarClnSig clinvarClnSig, Long alleleId) {
+ return new ClinVarAlleleData(clinvarClnSig, alleleId);
+ }
+
+ private ClinVarAlleleData(ClinvarClnSig clinvarClnSig, Long alleleId) {
+ this.clinvarClnSig = Objects.requireNonNull(clinvarClnSig);
+ this.alleleId = alleleId; // nullable
+ }
+
+ /**
+ * @return the primary interpretation of the ClinVar data for the variant
+ */
+ public ClinvarClnSig getClinvarClnSig() {
+ return clinvarClnSig;
+ }
+
+ /**
+ * Get ClinVar allele ID.
+ *
+ * E.g.
+ *
+ *
+ * @return an {@linkplain Optional} ClinVar allele ID {@linkplain Long} or an empty {@linkplain Optional}.
+ */
+ public Optional getAlleleId() {
+ return Optional.ofNullable(alleleId);
+ }
+
+ /**
+ * @return ClinVar allele ID as {@linkplain String}
+ * @see #getAlleleId()
+ */
+ public Optional getAlleleIdString() {
+ return alleleId == null ? Optional.empty() : Optional.of(alleleId.toString());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ ClinVarAlleleData that = (ClinVarAlleleData) o;
+ return clinvarClnSig == that.clinvarClnSig && Objects.equals(alleleId, that.alleleId);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(clinvarClnSig, alleleId);
+ }
+
+ @Override
+ public String toString() {
+ return "ClinVarAlleleData{" +
+ "clinvarClnSig=" + clinvarClnSig +
+ ", alleleId=" + alleleId +
+ '}';
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java
index a68906c51..0b11ffb38 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/ClinvarClnSig.java
@@ -31,4 +31,21 @@ public boolean isPathogenicOrLikelyPathogenic() {
default -> false;
};
}
+
+ /**
+ * @return {@code true} if the significance is one of {@link #BENIGN}, {@link #LIKELY_BENIGN}, or {@link #BENIGN_OR_LIKELY_BENIGN}
+ */
+ public boolean isBenignOrLikelyBenign() {
+ return switch (this) {
+ case BENIGN, LIKELY_BENIGN, BENIGN_OR_LIKELY_BENIGN -> true;
+ default -> false;
+ };
+ }
+
+ /**
+ * @return {@code false} if the significance is one of {@link #BENIGN}, {@link #LIKELY_BENIGN}, or {@link #BENIGN_OR_LIKELY_BENIGN}
+ */
+ public boolean notBenignOrLikelyBenign() {
+ return !isBenignOrLikelyBenign();
+ }
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java
index 2366f0c0d..aa4512def 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/FilteringStats.java
@@ -1,9 +1,21 @@
package org.monarchinitiative.lirical.core.model;
-public record FilteringStats(long nGoodQualityVariants, long nFilteredVariants) {
+/**
+ * A summary of variant input and functional annotation.
+ *
+ * @param nPassingVariants number of variants that passed the input filtering and were subject to LIRICAL analysis.
+ * @param nFilteredVariants number of variants that failed the filtering and were not included in the analysis.
+ * @param genesWithVariants number of genes with one or more passing variant.
+ */
+public record FilteringStats(long nPassingVariants,
+ long nFilteredVariants,
+ long genesWithVariants) {
+ /**
+ * @return the total number of variants (good quality + filtered).
+ */
public long variantCount() {
- return nGoodQualityVariants + nFilteredVariants;
+ return nPassingVariants + nFilteredVariants;
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java
index e209e3191..1be04f4be 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2Genotype.java
@@ -4,14 +4,59 @@
import org.monarchinitiative.phenol.ontology.data.Identified;
import org.monarchinitiative.phenol.ontology.data.TermId;
-import java.util.Collection;
-import java.util.Optional;
+import java.util.*;
import java.util.stream.Stream;
+/**
+ * {@linkplain Gene2Genotype} represents variants that have been annotated to a single gene. The gene data includes
+ * the identifier of a gene, the variants annotated with respect to the gene, and convenience methods for using
+ * in the {@code LIRICAL} algorithm.
+ *
+ * Note, we only need the variants that passed the filtering for the analysis.
+ */
public interface Gene2Genotype extends Identified {
+ /**
+ * Create {@linkplain Gene2Genotype} from a collection of variants that can include the variants
+ * that failed the initial filtering.
+ *
+ * The failing variants will not be retained.
+ *
+ * @deprecated the method has been deprecated and will be removed in {@code v2.0.0}.
+ * Use {@link #of(GeneIdentifier, Collection, int)} instead.
+ * @param id the gene credentials.
+ * @param variants a collection of variants that passed/failed the initial filtering.
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
static Gene2Genotype of(GeneIdentifier id, Collection variants) {
- return Gene2GenotypeDefault.of(id, variants);
+ int filteredOutVariantCount = 0;
+ List passingVariants = new ArrayList<>(variants.size());
+ for (LiricalVariant variant : variants) {
+ if (variant.passedFilters())
+ passingVariants.add(variant);
+ else
+ filteredOutVariantCount++;
+ }
+ return of(id, passingVariants, filteredOutVariantCount);
+ }
+
+ /**
+ * Create {@linkplain Gene2Genotype} from provided data.
+ *
+ * @param geneId the gene credentials.
+ * @param passingVariants a collection of variants that passed the initial filtering.
+ * @param filteredOutVariantCount the number of variants that failed the initial filtering.
+ */
+ static Gene2Genotype of(GeneIdentifier geneId,
+ Collection passingVariants,
+ int filteredOutVariantCount) {
+ Objects.requireNonNull(geneId, "Gene ID must not be null");
+ Objects.requireNonNull(passingVariants, "Variants must not be null");
+ if (passingVariants.isEmpty()) {
+ return new Gene2GenotypeDefault.Gene2GenotypeNoVariants(geneId, filteredOutVariantCount);
+ } else {
+ return new Gene2GenotypeDefault.Gene2GenotypeFull(geneId, passingVariants, filteredOutVariantCount);
+ }
}
// REMOVE(v2.0.0)
@@ -21,6 +66,9 @@ default TermId id() {
return geneId().id();
}
+ /**
+ * Get the credentials of the gene.
+ */
GeneIdentifier geneId();
/**
@@ -33,25 +81,80 @@ default String symbol() {
}
/**
+ * Get a {@linkplain Stream} of variants annotated to this gene.
*
- * @return list of all variants found in this gene
+ * @return a stream of variants found in this gene.
*/
Stream variants();
+ /**
+ * Get the count of variants annotated to this gene that passed the filtering.
+ */
int variantCount();
+ /**
+ * @return {@code true} if the gene is annotated with 1 or more variants that passed the filtering.
+ */
default boolean hasVariants() {
return variantCount() != 0;
}
+ /**
+ * Get the count of variants annotated to this gene which failed the filtering.
+ */
+ default int filteredOutVariantCount() {
+ // This can explode if the number of variants overflows int.
+ // However, this is super unlikely to happen in practice.
+ return Math.toIntExact(variants().filter(LiricalVariant::failedFilters).count());
+ }
+
+ /**
+ * Get the number of predicted pathogenic/deleterious alleles in the gene for the {@code sampleId}.
+ *
+ * Note, only the variant that passed the filtering are considered.
+ */
default int pathogenicClinVarCount(String sampleId) {
- return variants().filter(lv -> lv.clinvarClnSig().isPathogenicOrLikelyPathogenic())
+ if (sampleId == null)
+ return 0;
+ return variants()
+ .filter(lv -> lv.clinVarAlleleData()
+ .map(cv -> cv.getClinvarClnSig().isPathogenicOrLikelyPathogenic())
+ .orElse(false))
.mapToInt(var -> var.pathogenicClinVarAlleleCount(sampleId))
.sum();
}
+ /**
+ * @deprecated the method was deprecated and will be removed in v3.0.0
.
+ * Use {@link #deleteriousAlleleCount(String, float)} instead.
+ * @see #deleteriousAlleleCount(String, float)
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
default int pathogenicAlleleCount(String sampleId, float pathogenicityThreshold) {
- return variants().filter(var -> var.pathogenicityScore().map(f -> f >= pathogenicityThreshold).orElse(false))
+ // REMOVE(v3.0.0)
+ return deleteriousAlleleCount(sampleId, pathogenicityThreshold);
+ }
+
+ /**
+ * Get the count of alleles of predicted pathogenic/deleterious variants in the gene for the {@code sampleId}.
+ * The variants that are both not labeled as benign or likely benign by ClinVar and have the
+ * {@link LiricalVariant#pathogenicityScore()} at or above the provided {@code pathogenicityThreshold}
+ * are deemed to be predicted pathogenic/deleterious.
+ *
+ * Note, we take specific precautions to not clash with ClinVar variant interpretation and consider ClinVar benign
+ * or likely benign variants as deleterious.
+ */
+ default int deleteriousAlleleCount(String sampleId, float pathogenicityThreshold) {
+ if (sampleId == null)
+ return 0;
+ // The first part of the filter clause ensures we do not clash with ClinVar variant interpretation.
+ // In other words, a ClinVar benign or likely benign variant CANNOT be interpreted as deleterious
+ // based on in silico pathogenicity scores.
+ return variants()
+ .filter(var -> var.clinVarAlleleData()
+ .map(cv -> cv.getClinvarClnSig().notBenignOrLikelyBenign())
+ .orElse(true)
+ && var.pathogenicityScore().map(f -> f >= pathogenicityThreshold).orElse(false))
.map(var -> var.alleleCount(sampleId))
.flatMap(Optional::stream)
.mapToInt(AlleleCount::alt)
@@ -59,7 +162,15 @@ default int pathogenicAlleleCount(String sampleId, float pathogenicityThreshold)
}
default double getSumOfPathBinScores(String sampleId, float pathogenicityThreshold) {
- return variants().filter(variant -> variant.pathogenicityScore().orElse(0f) >= pathogenicityThreshold)
+ if (sampleId == null)
+ return 0.;
+ // Same as in `pathogenicAlleleCount(..)` above, the first part of the filter clause ensures
+ // we do not clash with ClinVar variant interpretation.
+ return variants()
+ .filter(variant -> variant.clinVarAlleleData()
+ .map(cv -> cv.getClinvarClnSig().notBenignOrLikelyBenign())
+ .orElse(true)
+ && variant.pathogenicityScore().orElse(0f) >= pathogenicityThreshold)
.mapToDouble(variant -> {
int altAlleleCount = variant.alleleCount(sampleId).map(AlleleCount::alt).orElse((byte) 0);
return altAlleleCount * variant.pathogenicity();
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java
index ede1fb184..3004a1dfb 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/Gene2GenotypeDefault.java
@@ -9,24 +9,16 @@
class Gene2GenotypeDefault {
- static Gene2Genotype of(GeneIdentifier geneId, Collection variants) {
- Objects.requireNonNull(geneId, "Gene ID must not be null");
- Objects.requireNonNull(variants, "Variants must not be null");
- if (variants.isEmpty()) {
- return new Gene2GenotypeNoVariants(geneId);
- } else {
- return new Gene2GenotypeFull(geneId, variants);
- }
- }
-
- private static class Gene2GenotypeFull implements Gene2Genotype {
+ static class Gene2GenotypeFull implements Gene2Genotype {
private final GeneIdentifier geneId;
private final List variants;
+ private final int filteredOutVariantCount;
- private Gene2GenotypeFull(GeneIdentifier geneId, Collection variants) {
+ Gene2GenotypeFull(GeneIdentifier geneId, Collection variants, int filteredOutVariantCount) {
this.geneId = geneId;
this.variants = List.copyOf(variants);
+ this.filteredOutVariantCount = filteredOutVariantCount;
}
@Override
@@ -44,17 +36,22 @@ public int variantCount() {
return variants.size();
}
+ @Override
+ public int filteredOutVariantCount() {
+ return filteredOutVariantCount;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Gene2GenotypeFull that = (Gene2GenotypeFull) o;
- return Objects.equals(geneId, that.geneId) && Objects.equals(variants, that.variants);
+ return filteredOutVariantCount == that.filteredOutVariantCount && Objects.equals(geneId, that.geneId) && Objects.equals(variants, that.variants);
}
@Override
public int hashCode() {
- return Objects.hash(geneId, variants);
+ return Objects.hash(geneId, variants, filteredOutVariantCount);
}
@Override
@@ -62,11 +59,12 @@ public String toString() {
return "Gene2GenotypeFull{" +
"geneId=" + geneId +
", variants=" + variants +
- '}';
+ ", filteredOutVariantCount=" + filteredOutVariantCount
+ + '}';
}
}
- private record Gene2GenotypeNoVariants(GeneIdentifier geneId) implements Gene2Genotype {
+ record Gene2GenotypeNoVariants(GeneIdentifier geneId, int filteredOutVariantCount) implements Gene2Genotype {
@Override
public GeneIdentifier geneId() {
@@ -85,5 +83,4 @@ public int variantCount() {
}
-
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java
index d74cd7a2e..3af06cc33 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypes.java
@@ -1,6 +1,8 @@
package org.monarchinitiative.lirical.core.model;
-import java.util.List;
+import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier;
+
+import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
@@ -14,12 +16,66 @@ static GenesAndGenotypes empty() {
return GenesAndGenotypesDefault.empty();
}
+ /**
+ * @deprecated use {@link #fromVariants(Collection, Iterable)} instead.
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
+ static GenesAndGenotypes fromVariants(Iterable variants) {
+ return fromVariants(null, variants);
+ }
+
+ static GenesAndGenotypes fromVariants(Collection sampleNames, Iterable variants) {
+ List g2g = groupVariantsByGenId(variants);
+ if (sampleNames == null) {
+ // TODO - remove after removal of the deprecated method above.
+ return of(g2g);
+ } else {
+ return of(sampleNames, g2g);
+ }
+ }
+
+ private static List groupVariantsByGenId(Iterable variants) {
+ // Group variants by gene id.
+ Map> gene2Genotype = new HashMap<>();
+ Map failedVariantCount = new HashMap<>();
+ for (LiricalVariant variant : variants) {
+ Stream identifiers = variant.annotations().stream()
+ .map(TranscriptAnnotation::getGeneId)
+ .distinct();
+ if (variant.passedFilters())
+ identifiers.forEach(geneId -> gene2Genotype.computeIfAbsent(geneId, e -> new ArrayList<>()).add(variant));
+ else
+ identifiers.forEach(geneId -> failedVariantCount.merge(geneId, 1, Integer::sum));
+ }
+
+ // Collect the variants into Gene2Genotype container
+ return gene2Genotype.entrySet().stream()
+ // We have 0 failed variants by default
+ .map(e -> Gene2Genotype.of(e.getKey(), e.getValue(), failedVariantCount.getOrDefault(e.getKey(), 0)))
+ .toList();
+ }
+
+ /**
+ * @deprecated use {@link #of(Collection, Collection)} instead.
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
static GenesAndGenotypes of(List genes) {
return genes.isEmpty()
? empty()
: GenesAndGenotypesDefault.of(genes);
}
+ static GenesAndGenotypes of(Collection sampleNames, Collection genes) {
+ return genes.isEmpty()
+ ? empty()
+ : GenesAndGenotypesDefault.of(sampleNames, genes);
+ }
+
+ /**
+ * @return a collection with sample identifiers for whom we have the genotype data.
+ */
+ Collection sampleNames();
+
/**
* @return number of genes in the container.
*/
@@ -35,13 +91,15 @@ default Stream genes() {
default FilteringStats computeFilteringStats() {
AtomicLong passed = new AtomicLong();
AtomicLong failed = new AtomicLong();
- genes().flatMap(Gene2Genotype::variants)
- .forEach(v -> {
- if (v.passedFilters())
- passed.incrementAndGet();
- else failed.incrementAndGet();
- });
- return new FilteringStats(passed.get(), failed.get());
+ AtomicLong genesWithVariants = new AtomicLong();
+ genes().forEach(g -> {
+ if (g.hasVariants())
+ genesWithVariants.incrementAndGet();
+ passed.addAndGet(g.variantCount());
+ failed.addAndGet(g.filteredOutVariantCount());
+ });
+
+ return new FilteringStats(passed.get(), failed.get(), genesWithVariants.get());
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java
index 82b9672eb..6f36a0fa9 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesDefault.java
@@ -1,9 +1,7 @@
package org.monarchinitiative.lirical.core.model;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Objects;
+import java.util.*;
+import java.util.stream.Collectors;
class GenesAndGenotypesDefault {
@@ -11,15 +9,28 @@ static GenesAndGenotypes empty() {
return GenesAndGenotypesEmpty.INSTANCE;
}
- public static GenesAndGenotypes of(List genes) {
- return new GenesAndGenotypesFull(genes);
+ /**
+ * @deprecated use {@link #of(Collection, Collection)} instead.
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
+ public static GenesAndGenotypes of(Collection genes) {
+ Set sampleNames = genes.stream()
+ .flatMap(Gene2Genotype::variants)
+ .flatMap(v -> v.sampleNames().stream())
+ .collect(Collectors.toSet());
+ return of(sampleNames, genes);
}
- record GenesAndGenotypesFull(List geneList) implements GenesAndGenotypes {
+ public static GenesAndGenotypes of(Collection sampleNames,
+ Collection genes) {
+ return new GenesAndGenotypesFull(
+ List.copyOf(Objects.requireNonNull(sampleNames, "Sample names must not be null")),
+ List.copyOf(Objects.requireNonNull(genes, "Gene list must not be null"))
+ );
+ }
- GenesAndGenotypesFull(List geneList) {
- this.geneList = Objects.requireNonNull(geneList, "Gene list must not be null");
- }
+ record GenesAndGenotypesFull(List sampleNames,
+ List geneList) implements GenesAndGenotypes {
@Override
public int size() {
@@ -39,6 +50,11 @@ private static class GenesAndGenotypesEmpty implements GenesAndGenotypes {
private GenesAndGenotypesEmpty() {
}
+ @Override
+ public Collection sampleNames() {
+ return List.of();
+ }
+
@Override
public int size() {
return 0;
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java
index 0ea27c681..4404feda3 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariant.java
@@ -2,29 +2,74 @@
import org.monarchinitiative.svart.GenomicVariant;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
+import java.util.*;
+/**
+ * A description of variant coordinates, sample genotypes, and filtering status for LIRICAL analysis.
+ *
+ * The variant has a {@link #genomeBuild()} to describe the reference system.
+ * The {@link #variant()} provides variant coordinates using Svart's {@link GenomicVariant} data structure.
+ * The variant genotypes for a set of samples can be accessed via {@link #alleleCount(String)}.
+ * Last, LIRICAL uses the variants that passed all filters in the analysis ({@link #passedFilters()}).
+ * However, we need to retain the failed variants too to report the passed/failed variants in the report.
+ */
public interface GenotypedVariant {
+ /**
+ * @deprecated deprecated in {@code v2.0.0} and subject to removal in {@code v3.0.0}.
+ * Use {@link #of(GenomeBuild, GenomicVariant, Collection, boolean)} instead.
+ */
+ // REMOVE(v3.0.0)
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
static GenotypedVariant of(GenomeBuild genomeBuild,
GenomicVariant variant,
Map genotypes,
boolean passedFilters) {
- return new GenotypedVariantDefault(genomeBuild, variant, genotypes, passedFilters);
+ List alleleCounts = genotypes.entrySet().stream()
+ .map(e -> SampleAlleleCount.of(e.getKey(), e.getValue()))
+ .toList();
+ return of(genomeBuild, variant, alleleCounts, passedFilters);
+ }
+
+ static GenotypedVariant of(GenomeBuild genomeBuild,
+ GenomicVariant variant,
+ Collection alleleCounts,
+ boolean passedFilters) {
+ return GenotypedVariantDefault.of(genomeBuild, variant, alleleCounts, passedFilters);
}
+ /**
+ * @return the genome build of the variant.
+ */
GenomeBuild genomeBuild();
+ /**
+ * @return the variant coordinates in Svart's {@linkplain GenomicVariant}.
+ */
GenomicVariant variant();
+ /**
+ * @return a set of sample identifiers where we have genotype data for this variant.
+ */
Set sampleNames();
- Optional alleleCount(String sample);
+ /**
+ * Get allele count for given sample.
+ *
+ * @param sampleId String with sample identifier.
+ * @return optional with the allele count or an empty optional if data for the sample is missing.
+ */
+ Optional alleleCount(String sampleId);
/**
- * @return true if the variant passed the filters in the variant source
+ * @return {@code true} if the variant passed the filters, according to the variant source (e.g. VCF file).
*/
boolean passedFilters();
+
+ /**
+ * @return {@code true} if the variant failed the filters, according to the variant source (e.g. VCF file).
+ */
+ default boolean failedFilters() {
+ return !passedFilters();
+ }
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java
index c8d4ad81f..683af3f2e 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/GenotypedVariantDefault.java
@@ -1,29 +1,39 @@
package org.monarchinitiative.lirical.core.model;
+import org.monarchinitiative.lirical.core.util.BinarySearch;
import org.monarchinitiative.svart.GenomicVariant;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.Set;
+import java.util.*;
+import java.util.stream.Collectors;
/**
- * Implementation of {@link GenotypedVariant} with genotypes are stored in a {@link Map}.
+ * Implementation of {@link GenotypedVariant} with genotypes stored in an array.
*/
class GenotypedVariantDefault implements GenotypedVariant {
private final GenomeBuild genomeBuild;
private final GenomicVariant variant;
- private final Map genotypes;
+ private final SampleAlleleCount[] alleleCounts;
private final boolean passedFilters;
+ static GenotypedVariantDefault of(GenomeBuild genomeBuild,
+ GenomicVariant variant,
+ Collection alleleCounts,
+ boolean passedFilters) {
+ // We sort the counts by sample id to take advantage of the binary search.
+ SampleAlleleCount[] counts = alleleCounts.stream()
+ .sorted(Comparator.comparing(SampleAlleleCount::getSampleId))
+ .toArray(SampleAlleleCount[]::new);
+ return new GenotypedVariantDefault(genomeBuild, variant, counts, passedFilters);
+ }
+
GenotypedVariantDefault(GenomeBuild genomeBuild,
GenomicVariant variant,
- Map genotypes,
+ SampleAlleleCount[] alleleCounts,
boolean passedFilters) {
this.genomeBuild = Objects.requireNonNull(genomeBuild);
this.variant = Objects.requireNonNull(variant);
- this.genotypes = Objects.requireNonNull(genotypes);
+ this.alleleCounts = Objects.requireNonNull(alleleCounts);
this.passedFilters = passedFilters;
}
@@ -40,12 +50,17 @@ public GenomicVariant variant() {
@Override
public Set sampleNames() {
- return genotypes.keySet();
+ return Arrays.stream(alleleCounts)
+ .map(SampleAlleleCount::getSampleId)
+ .collect(Collectors.toUnmodifiableSet());
}
@Override
- public Optional alleleCount(String sample) {
- return Optional.ofNullable(genotypes.get(sample));
+ public Optional alleleCount(String sampleId) {
+ if (sampleId == null)
+ return Optional.empty();
+ return BinarySearch.binarySearch(alleleCounts, SampleAlleleCount::getSampleId, sampleId)
+ .map(SampleAlleleCount::getAlleleCount);
}
@Override
@@ -58,12 +73,12 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
GenotypedVariantDefault that = (GenotypedVariantDefault) o;
- return genomeBuild == that.genomeBuild && Objects.equals(variant, that.variant) && Objects.equals(genotypes, that.genotypes) && passedFilters == that.passedFilters;
+ return genomeBuild == that.genomeBuild && Objects.equals(variant, that.variant) && Arrays.equals(alleleCounts, that.alleleCounts) && passedFilters == that.passedFilters;
}
@Override
public int hashCode() {
- return Objects.hash(genomeBuild, variant, genotypes, passedFilters);
+ return Objects.hash(genomeBuild, variant, Arrays.hashCode(alleleCounts), passedFilters);
}
@Override
@@ -71,7 +86,7 @@ public String toString() {
return "GenotypedVariantDefault{" +
"genomeBuild=" + genomeBuild +
", variant=" + variant +
- ", genotypes=" + genotypes +
+ ", alleleCounts=" + Arrays.toString(alleleCounts) +
", passedFilters=" + passedFilters +
'}';
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java
index a69a77463..49c4e6373 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/HpoCase.java
@@ -42,7 +42,7 @@ private HpoCase(String sampleId, List observedTerms, List exclud
this.excludedAbnormalities = Objects.requireNonNull(excludedTerms);
this.results = Objects.requireNonNull(results);
this.sex = Objects.requireNonNull(sex);
- this.age = Objects.requireNonNull(age);
+ this.age = age;
}
public String sampleId() {
@@ -106,7 +106,7 @@ public Builder(String sampleId, List abnormalPhenotypes) {
this.observedAbnormalities = List.copyOf(Objects.requireNonNull(abnormalPhenotypes));
excludedAbnormalities=List.of(); // default empty list
sex=Sex.UNKNOWN;
- age=Age.ageNotKnown();
+ age=null;
}
public Builder excluded(List excludedPhenotypes) {
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java
index 3376a16ba..2c7c12176 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariant.java
@@ -16,7 +16,9 @@ static LiricalVariant of(GenotypedVariant variant, List an
* @return number of pathogenic alleles that are registered in ClinVar
*/
default int pathogenicClinVarAlleleCount(String sampleId) {
- if (!clinvarClnSig().isPathogenicOrLikelyPathogenic()) {
+ if (sampleId == null)
+ return 0;
+ if (!clinVarAlleleData().map(cv -> cv.getClinvarClnSig().isPathogenicOrLikelyPathogenic()).orElse(false)) {
return 0;
} else {
return alleleCount(sampleId)
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java
index c3af7402b..50090cc9d 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/LiricalVariantDefault.java
@@ -34,8 +34,8 @@ public Set sampleNames() {
}
@Override
- public Optional alleleCount(String sample) {
- return genotypedVariant.alleleCount(sample);
+ public Optional alleleCount(String sampleId) {
+ return genotypedVariant.alleleCount(sampleId);
}
@Override
@@ -54,8 +54,8 @@ public float pathogenicity() {
}
@Override
- public ClinvarClnSig clinvarClnSig() {
- return variantMetadata.clinvarClnSig();
+ public Optional clinVarAlleleData() {
+ return variantMetadata.clinVarAlleleData();
}
@Override
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/SampleAlleleCount.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/SampleAlleleCount.java
new file mode 100644
index 000000000..0d43e43ec
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/SampleAlleleCount.java
@@ -0,0 +1,50 @@
+package org.monarchinitiative.lirical.core.model;
+
+import java.util.Objects;
+
+/**
+ * A container for associating sample id and the {@link AlleleCount}.
+ */
+public class SampleAlleleCount {
+
+ private final String sampleId;
+ private final AlleleCount alleleCount;
+
+ public static SampleAlleleCount of(String sampleId, AlleleCount alleleCount) {
+ return new SampleAlleleCount(sampleId, alleleCount);
+ }
+
+ private SampleAlleleCount(String sampleId, AlleleCount alleleCount) {
+ this.sampleId = Objects.requireNonNull(sampleId);
+ this.alleleCount = Objects.requireNonNull(alleleCount);
+ }
+
+ public String getSampleId() {
+ return sampleId;
+ }
+
+ public AlleleCount getAlleleCount() {
+ return alleleCount;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ SampleAlleleCount that = (SampleAlleleCount) o;
+ return Objects.equals(sampleId, that.sampleId) && Objects.equals(alleleCount, that.alleleCount);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(sampleId, alleleCount);
+ }
+
+ @Override
+ public String toString() {
+ return "SampleAlleleCount{" +
+ "sampleId='" + sampleId + '\'' +
+ ", alleleCount=" + alleleCount +
+ '}';
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java
index e1485d536..ae7ca240a 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadata.java
@@ -8,12 +8,21 @@ static VariantMetadata empty() {
return VariantMetadataDefault.empty();
}
+ /**
+ * @deprecated from {@code 2.0.0-RC3}. Use {@link #of(float, float, ClinVarAlleleData)} instead.
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
static VariantMetadata of(float frequency,
float pathogenicity,
ClinvarClnSig clinvarClnSig) {
- return new VariantMetadataDefault(frequency,
- pathogenicity,
- clinvarClnSig);
+ ClinVarAlleleData data = ClinVarAlleleData.of(clinvarClnSig, null);
+ return of(frequency, pathogenicity, data);
+ }
+
+ static VariantMetadata of(float frequency,
+ float pathogenicity,
+ ClinVarAlleleData clinVarAlleleData) {
+ return new VariantMetadataDefault(frequency, pathogenicity, clinVarAlleleData);
}
/**
@@ -50,15 +59,29 @@ static VariantMetadata of(float frequency,
default Optional pathogenicityScore() {
// Heuristic -- Count ClinVar pathogenic or likely pathogenic as 1.0 (maximum pathogenicity score)
// regardless of the Exomiser pathogenicity score
- return clinvarClnSig().isPathogenicOrLikelyPathogenic()
+ return clinVarAlleleData()
+ .map(a -> a.getClinvarClnSig().isPathogenicOrLikelyPathogenic())
+ .orElse(false) // go to the frequencyScore branch
? Optional.of(1f)
: frequencyScore().map(fs -> fs * pathogenicity());
}
/**
+ * @deprecated since 2.0.0-RC3
and will be removed in v3.0.0
. Use {@link #clinVarAlleleData()} instead.
* @return Clinvar clinical significance category.
*/
- ClinvarClnSig clinvarClnSig();
+ // REMOVE(v3.0.0)
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
+ default ClinvarClnSig clinvarClnSig() {
+ return clinVarAlleleData()
+ .map(ClinVarAlleleData::getClinvarClnSig)
+ .orElse(ClinvarClnSig.NOT_PROVIDED);
+ }
+
+ /**
+ * @return ClinvarData
for the variant, if available.
+ */
+ Optional clinVarAlleleData();
/**
* This is the frequency factor used for the Exomiser like pathogenicity score. It penalizes variants that have a higher
@@ -77,8 +100,12 @@ default Optional frequencyScore() {
});
}
-
+ /**
+ * @deprecated the function has been deprecated without replacement and will be removed in v3.0.0
.
+ */
+ @Deprecated(forRemoval = true, since = "2.0.0-RC3")
static int compareByPathogenicity(VariantMetadata left, VariantMetadata right) {
+ // REMOVE(v3.0.0)
return Float.compare(left.pathogenicity(), right.pathogenicity());
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java
index 4b3c6fb87..b7b3ed5e2 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/VariantMetadataDefault.java
@@ -5,7 +5,7 @@
class VariantMetadataDefault implements VariantMetadata {
- private static final VariantMetadataDefault EMPTY = new VariantMetadataDefault(Float.NaN, Float.NaN, ClinvarClnSig.NOT_PROVIDED);
+ private static final VariantMetadataDefault EMPTY = new VariantMetadataDefault(Float.NaN, Float.NaN, null);
static VariantMetadataDefault empty() {
return EMPTY;
@@ -13,14 +13,14 @@ static VariantMetadataDefault empty() {
private final float frequency;
private final float pathogenicity;
- private final ClinvarClnSig clinvarClnSig;
+ private final ClinVarAlleleData clinVarAlleleData;
VariantMetadataDefault(float frequency,
float pathogenicity,
- ClinvarClnSig clinvarClnSig) {
+ ClinVarAlleleData clinVarAlleleData) {
this.frequency = frequency;
this.pathogenicity = pathogenicity;
- this.clinvarClnSig = Objects.requireNonNull(clinvarClnSig);
+ this.clinVarAlleleData = clinVarAlleleData; // nullable
}
@Override
@@ -36,8 +36,8 @@ public float pathogenicity() {
}
@Override
- public ClinvarClnSig clinvarClnSig() {
- return clinvarClnSig;
+ public Optional clinVarAlleleData() {
+ return Optional.ofNullable(clinVarAlleleData);
}
@Override
@@ -45,7 +45,7 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
VariantMetadataDefault that = (VariantMetadataDefault) o;
- return Float.compare(that.frequency, frequency) == 0 && Float.compare(that.pathogenicity, pathogenicity) == 0 && Objects.equals(clinvarClnSig, that.clinvarClnSig);
+ return Float.compare(that.frequency, frequency) == 0 && Float.compare(that.pathogenicity, pathogenicity) == 0 && Objects.equals(clinVarAlleleData, that.clinVarAlleleData);
}
@Override
@@ -58,7 +58,7 @@ public String toString() {
return "VariantMetadataDefault{" +
"frequency=" + frequency +
", pathogenicity=" + pathogenicity +
- ", clinvarClnSig=" + clinvarClnSig +
+ ", clinvarClnSig=" + clinVarAlleleData +
'}';
}
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/package-info.java
new file mode 100644
index 000000000..b11a05ebb
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/model/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Package with data models.
+ */
+package org.monarchinitiative.lirical.core.model;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java
index 0a56365ea..7d633dfcf 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/AnalysisResultsMetadata.java
@@ -1,5 +1,8 @@
package org.monarchinitiative.lirical.core.output;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
public class AnalysisResultsMetadata {
private String liricalVersion;
private String hpoVersion;
@@ -8,9 +11,9 @@ public class AnalysisResultsMetadata {
private String exomiserPath;
private String analysisDate;
private String sampleName;
- private long nGoodQualityVariants;
+ private long nPassingVariants;
private long nFilteredVariants;
- private int genesWithVar;
+ private long genesWithVar;
private boolean globalMode;
private AnalysisResultsMetadata(String liricalVersion,
@@ -20,9 +23,9 @@ private AnalysisResultsMetadata(String liricalVersion,
String exomiserPath,
String analysisDate,
String sampleName,
- long nGoodQualityVariants,
+ long nPassingVariants,
long nFilteredVariants,
- int genesWithVar,
+ long genesWithVar,
boolean globalMode) {
this.liricalVersion = liricalVersion;
this.hpoVersion = hpoVersion;
@@ -31,7 +34,7 @@ private AnalysisResultsMetadata(String liricalVersion,
this.exomiserPath = exomiserPath;
this.analysisDate = analysisDate;
this.sampleName = sampleName;
- this.nGoodQualityVariants = nGoodQualityVariants;
+ this.nPassingVariants = nPassingVariants;
this.nFilteredVariants = nFilteredVariants;
this.genesWithVar = genesWithVar;
this.globalMode = globalMode;
@@ -57,6 +60,7 @@ public void setTranscriptDatabase(String transcriptDatabase) {
this.transcriptDatabase = transcriptDatabase;
}
+ @JsonIgnore
public String getLiricalPath() {
return liricalPath;
}
@@ -65,6 +69,7 @@ public void setLiricalPath(String liricalPath) {
this.liricalPath = liricalPath;
}
+ @JsonIgnore
public String getExomiserPath() {
return exomiserPath;
}
@@ -89,14 +94,16 @@ public void setSampleName(String sampleName) {
this.sampleName = sampleName;
}
- public long getnGoodQualityVariants() {
- return nGoodQualityVariants;
+ @JsonIgnore
+ public long getnPassingVariants() {
+ return nPassingVariants;
}
- public void setnGoodQualityVariants(long nGoodQualityVariants) {
- this.nGoodQualityVariants = nGoodQualityVariants;
+ public void setnPassingVariants(long nPassingVariants) {
+ this.nPassingVariants = nPassingVariants;
}
+ @JsonIgnore
public long getnFilteredVariants() {
return nFilteredVariants;
}
@@ -105,14 +112,16 @@ public void setnFilteredVariants(long nFilteredVariants) {
this.nFilteredVariants = nFilteredVariants;
}
- public int getGenesWithVar() {
+ @JsonIgnore
+ public long getGenesWithVar() {
return genesWithVar;
}
- public void setGenesWithVar(int genesWithVar) {
+ public void setGenesWithVar(long genesWithVar) {
this.genesWithVar = genesWithVar;
}
+ @JsonGetter(value = "isGlobalAnalysisMode")
public boolean getGlobalMode() {
return globalMode;
}
@@ -135,7 +144,7 @@ public String toString() {
", exomiserPath='" + exomiserPath + '\'' +
", analysisDate='" + analysisDate + '\'' +
", sampleName='" + sampleName + '\'' +
- ", nGoodQualityVariants=" + nGoodQualityVariants +
+ ", nPassingVariants=" + nPassingVariants +
", nFilteredVariants=" + nFilteredVariants +
", genesWithVar=" + genesWithVar +
", globalMode=" + globalMode +
@@ -150,9 +159,9 @@ public static class Builder {
private String exomiserPath;
private String analysisDate;
private String sampleName = "SAMPLE_ID";
- private long nGoodQualityVariants;
+ private long nPassingVariants;
private long nFilteredVariants;
- private int genesWithVar;
+ private long genesWithVar;
private boolean globalMode;
private Builder() {
@@ -193,8 +202,8 @@ public Builder setSampleName(String sampleName) {
return this;
}
- public Builder setnGoodQualityVariants(long nGoodQualityVariants) {
- this.nGoodQualityVariants = nGoodQualityVariants;
+ public Builder setnPassingVariants(long nPassingVariants) {
+ this.nPassingVariants = nPassingVariants;
return this;
}
@@ -203,7 +212,7 @@ public Builder setnFilteredVariants(long nFilteredVariants) {
return this;
}
- public Builder setGenesWithVar(int genesWithVar) {
+ public Builder setGenesWithVar(long genesWithVar) {
this.genesWithVar = genesWithVar;
return this;
}
@@ -221,7 +230,7 @@ public AnalysisResultsMetadata build() {
exomiserPath,
analysisDate,
sampleName,
- nGoodQualityVariants,
+ nPassingVariants,
nFilteredVariants,
genesWithVar,
globalMode);
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java
index 9473d23b0..277126549 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/output/package-info.java
@@ -1,4 +1,4 @@
/**
- * Classes for creating HTML or TSV output files for LIRICAL.
+ * Base data model for writing out the results of LIRICAL analysis.
*/
package org.monarchinitiative.lirical.core.output;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/package-info.java
new file mode 100644
index 000000000..99afd9954
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Core functionality of the LIRICAL algorithm.
+ */
+package org.monarchinitiative.lirical.core;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/BaseInputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/BaseInputSanitizer.java
new file mode 100644
index 000000000..5b338edbf
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/BaseInputSanitizer.java
@@ -0,0 +1,117 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import org.monarchinitiative.phenol.base.PhenolRuntimeException;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+import org.monarchinitiative.phenol.ontology.data.Term;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+
+/**
+ * Shared functions for {@link InputSanitizer}s.
+ *
+ * @author Daniel Danis
+ */
+abstract class BaseInputSanitizer implements InputSanitizer {
+
+ protected final MinimalOntology hpo;
+
+ BaseInputSanitizer(MinimalOntology hpo) {
+ this.hpo = Objects.requireNonNull(hpo);
+ }
+
+ protected void checkCuriesArePresentInHpo(List termIds, List issues) {
+ List toRemove = new ArrayList<>();
+ int i = 0;
+ for (TermId termId : termIds) {
+ if (!hpo.containsTermId(termId)) {
+ issues.add(SanityIssue.warning(
+ "Term %s does not exist in HPO version %s".formatted(termId.getValue(), hpo.version().orElse("UNKNOWN")),
+ "Consider updating HPO or explore the HPO browser to choose alternative term"));
+ toRemove.add(i);
+ }
+ i++;
+ }
+ BaseInputSanitizer.removeElements(termIds, toRemove);
+ }
+
+ protected static void removeElements(List termIds, Collection toRemove) {
+ toRemove.stream()
+ .distinct()
+ .sorted(Comparator.reverseOrder())
+ .mapToInt(idx -> idx)
+ .forEachOrdered(termIds::remove);
+ }
+
+ protected void checkTermsUsePrimaryIdentifiers(List termIds, List issues) {
+ List replacements = new ArrayList<>(termIds.size());
+ for (TermId termId : termIds) {
+ Term term = hpo.termForTermId(termId)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue())));
+
+ TermId primary = term.id();
+ if (termId.equals(primary)) {
+ replacements.add(null);
+ } else {
+ issues.add(SanityIssue.warning(
+ "%s is an obsolete id of %s".formatted(termId.getValue(), term.getName()),
+ "Use %s instead".formatted(primary.getValue())));
+ replacements.add(primary);
+ }
+ }
+
+ for (int i = 0; i < replacements.size(); i++) {
+ TermId replacement = replacements.get(i);
+ if (replacement != null)
+ termIds.set(i, replacement);
+ }
+ }
+
+ protected void checkVcf(String vcf, SanitizedInputs sanitized, List issues) {
+ if (vcf != null) {
+ Path path = Path.of(vcf);
+ if (Files.isRegularFile(path) && Files.isReadable(path)) {
+ sanitized.setVcf(path);
+ } else {
+ issues.add(SanityIssue.error(
+ "VCF path is set but %s does not point to a readable file".formatted(path.toAbsolutePath()),
+ "Update the path or the file permissions"));
+ }
+
+ }
+ }
+
+ protected static void checkCuriesAreWellFormed(SanitizedInputs sanitized,
+ List inputPresentTermIds,
+ List inputExcludedTermIds,
+ List issues) {
+ if (inputPresentTermIds.isEmpty() && inputExcludedTermIds.isEmpty()) {
+ issues.add(SanityIssue.error("No HPO terms were provided", "Add at least 1 HPO term to start"));
+ } else {
+ // We can check if the present terms are valid.
+ for (String curie : inputPresentTermIds) {
+ checkCurieIsValid(curie, sanitized.presentHpoTerms(), issues);
+ }
+
+ // We can check if the excluded term IDs are valid.
+ for (String curie : inputExcludedTermIds) {
+ checkCurieIsValid(curie, sanitized.excludedHpoTerms(), issues);
+ }
+ }
+ }
+
+ private static void checkCurieIsValid(String curie,
+ List termIds,
+ List issues) {
+ try {
+ termIds.add(TermId.of(curie));
+ } catch (PhenolRuntimeException e) {
+ issues.add(SanityIssue.warning(
+ "The term ID %s is invalid: %s".formatted(curie, e.getMessage()),
+ "Ensure the term ID consists of a valid prefix (e.g. `HP`) and id (e.g. `0001250`) " +
+ "joined by colon `:` or underscore `_`."));
+ }
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/ComprehensiveInputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/ComprehensiveInputSanitizer.java
new file mode 100644
index 000000000..a69568cbf
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/ComprehensiveInputSanitizer.java
@@ -0,0 +1,222 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import org.monarchinitiative.lirical.core.model.Age;
+import org.monarchinitiative.lirical.core.model.Sex;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+import org.monarchinitiative.phenol.ontology.data.Term;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+
+import java.time.Period;
+import java.time.format.DateTimeParseException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+class ComprehensiveInputSanitizer extends BaseInputSanitizer {
+
+ private static final TermId PHENOTYPIC_ABNORMALITY = TermId.of("HP:0000118");
+
+ public ComprehensiveInputSanitizer(MinimalOntology hpo){
+ super(hpo);
+ }
+
+ @Override
+ public SanitationResult sanitize(SanitationInputs inputs) {
+ List issues = new ArrayList<>();
+ // sampleId is nullable, nothing to be checked there at this point.
+ SanitizedInputs sanitized = new SanitizedInputs(inputs.sampleId());
+
+ // Check phenotypic features
+ checkCuriesAreWellFormed(sanitized, inputs.presentHpoTerms(), inputs.excludedHpoTerms(), issues);
+ checkPhenotypicFeatures(sanitized, issues);
+
+ checkAge(inputs.age(), sanitized, issues);
+ checkSex(inputs.sex(), sanitized, issues);
+
+ checkVcf(inputs.vcf(), sanitized, issues);
+
+ return new SanitationResultDefault(sanitized, issues);
+ }
+
+ private void checkPhenotypicFeatures(SanitizedInputs sanitized, List issues) {
+ checkTermsAreUnique(sanitized.presentHpoTerms(), issues);
+ checkTermsAreUnique(sanitized.excludedHpoTerms(), issues);
+
+ checkCuriesArePresentInHpo(sanitized.presentHpoTerms(), issues);
+ checkCuriesArePresentInHpo(sanitized.excludedHpoTerms(), issues);
+
+ checkTermsUsePrimaryIdentifiers(sanitized.presentHpoTerms(), issues);
+ checkTermsUsePrimaryIdentifiers(sanitized.excludedHpoTerms(), issues);
+
+ checkTermsAreDescendantsOfPhenotypicAbnormality(sanitized.presentHpoTerms(), issues);
+ checkTermsAreDescendantsOfPhenotypicAbnormality(sanitized.excludedHpoTerms(), issues);
+
+ checkTermsAreLogicallyConsistent(sanitized, issues);
+ }
+
+ private void checkTermsAreUnique(List termIds, List issues) {
+ Map termCounts = termIds.stream()
+ .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
+
+ List toClean = new ArrayList<>();
+ for (Map.Entry e : termCounts.entrySet()) {
+ if (e.getValue() > 1) {
+ issues.add(SanityIssue.warning(
+ "Term should be used at most once but %s is used %d times".formatted(e.getKey().getValue(), e.getValue()),
+ "Use a term at most once"));
+ toClean.add(e.getKey());
+ }
+ }
+
+ for (TermId termId : toClean) {
+ // Find indices to for removal.
+ boolean found = false;
+ List toRemove = new ArrayList<>();
+ for (int i = 0; i < termIds.size(); i++) {
+ TermId t = termIds.get(i);
+ if (t.equals(termId)) {
+ if (!found) {
+ found = true;
+ } else {
+ toRemove.add(i);
+ }
+ }
+ }
+
+ // And then remove the terms
+ removeElements(termIds, toRemove);
+ }
+ }
+
+ private void checkTermsAreDescendantsOfPhenotypicAbnormality(List termIds, List issues) {
+ List toRemove = new ArrayList<>();
+ int i = 0;
+ for (TermId termId : termIds) {
+ if (!termId.equals(PHENOTYPIC_ABNORMALITY) && !hpo.graph().existsPath(termId, PHENOTYPIC_ABNORMALITY)) {
+ Term term = hpo.termForTermId(termId)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue())));
+ issues.add(SanityIssue.warning(
+ "Term %s is not a descendant of Phenotypic abnormality".formatted(
+ formatTerm(term)),
+ "Consider removing %s from the phenotypic features".formatted(formatTerm(term))));
+ toRemove.add(i);
+ }
+ i++;
+ }
+ removeElements(termIds, toRemove);
+ }
+
+ private void checkTermsAreLogicallyConsistent(SanitizedInputs sanitized, List issues) {
+ pruneExcludedHpoTerms(sanitized.excludedHpoTerms(), issues);
+ prunePresentHpoTerms(sanitized.presentHpoTerms(), issues);
+
+ checkNoPresentFeatureHasExcludedAncestor(sanitized, issues);
+ }
+
+ private void pruneExcludedHpoTerms(List excludedTerms,
+ List issues) {
+ // Check the excluded features use the most general term.
+ // All terms whose ancestor is among excluded term ids must be removed.
+ List toRemove = new ArrayList<>();
+ int i = 0;
+ for (TermId termId : excludedTerms) {
+ for (TermId other : excludedTerms) {
+ if (!termId.equals(other) && hpo.graph().existsPath(termId, other)) {
+ Term term = hpo.termForTermId(termId)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue())));
+ Term ancestor = hpo.termForTermId(other)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(other.getValue())));
+ issues.add(SanityIssue.warning(
+ "Sample should not be annotated with excluded %s and its excluded ancestor %s".formatted(formatTerm(term), formatTerm(ancestor)),
+ "Remove %s from the phenotype terms".formatted(formatTerm(term))));
+ toRemove.add(i);
+ break;
+ }
+ }
+ i++;
+ }
+
+ removeElements(excludedTerms, toRemove);
+ }
+
+ private void prunePresentHpoTerms(List presentTerms, List issues) {
+ // Check the present features use the most specific term.
+ // All ancestors of the present term ids must be removed.
+ List toRemove = new ArrayList<>();
+ int i = 0;
+ for (TermId termId : presentTerms) {
+ for (TermId other : presentTerms) {
+ if (!termId.equals(other) && hpo.graph().existsPath(other, termId)) {
+ Term term = hpo.termForTermId(other)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(other.getValue())));
+ Term ancestor = hpo.termForTermId(termId)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(termId.getValue())));
+ issues.add(SanityIssue.warning(
+ "Sample should not be annotated with %s and its ancestor %s".formatted(formatTerm(term), formatTerm(ancestor)),
+ "Remove %s from the phenotype terms".formatted(formatTerm(ancestor))));
+ toRemove.add(i);
+ break;
+ }
+ }
+ i++;
+ }
+
+ removeElements(presentTerms, toRemove);
+ }
+
+ private void checkNoPresentFeatureHasExcludedAncestor(SanitizedInputs sanitized, List issues) {
+ for (TermId present : sanitized.presentHpoTerms()) {
+ for (TermId excluded : sanitized.excludedHpoTerms()) {
+ if (present.equals(excluded)) {
+ // Term is both present and excluded.
+ Term term = hpo.termForTermId(present)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(present.getValue())));
+ issues.add(SanityIssue.error(
+ "Sample must not be annotated with %s in present and excluded state at the same time".formatted(formatTerm(term)),
+ "Make up your mind"));
+ } else if (hpo.graph().getAncestorsStream(present).anyMatch(anc -> anc.equals(excluded))) {
+ // Term has an excluded ancestor.
+ Term presentTerm = hpo.termForTermId(present)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(present.getValue())));
+ Term excludedTerm = hpo.termForTermId(excluded)
+ .orElseThrow(() -> new RuntimeException("%s should be a term from HPO at this point".formatted(excluded.getValue())));
+ issues.add(SanityIssue.error(
+ "Sample must not be annotated with %s while its ancestor %s is excluded".formatted(
+ formatTerm(presentTerm), formatTerm(excludedTerm)),
+ "Resolve the logical inconsistency by choosing one of the terms"));
+ }
+ }
+ }
+ }
+
+ private static void checkAge(String age, SanitizedInputs sanitized, List issues) {
+ if (age != null) {
+ try {
+ Period period = Period.parse(age);
+ sanitized.setAge(Age.parse(period));
+ } catch (DateTimeParseException e) {
+ issues.add(SanityIssue.warning(
+ "Age %s could not be parsed: %s".formatted(age, e.getMessage()),
+ "Format age as a ISO8601 duration (e.g. `P22Y6M`)"));
+ }
+ }
+ }
+
+ private static void checkSex(String sex, SanitizedInputs sanitized, List issues) {
+ if (sex != null) {
+ try {
+ sanitized.setSex(Sex.valueOf(sex.toUpperCase()));
+ } catch (IllegalArgumentException e) {
+ issues.add(SanityIssue.warning(
+ "Sex %s could not be parsed".formatted(sex),
+ "Use one of {'male', 'female', 'unknown'}"));
+ }
+ }
+ }
+
+ private static String formatTerm(Term term) {
+ return "%s [%s]".formatted(term.getName(), term.id().getValue());
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizer.java
new file mode 100644
index 000000000..5c3ebe68f
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizer.java
@@ -0,0 +1,11 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+/**
+ * Sanitize the user input before running the analysis.
+ *
+ * @author Daniel Danis
+ */
+public interface InputSanitizer {
+
+ SanitationResult sanitize(SanitationInputs inputs);
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizerFactory.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizerFactory.java
new file mode 100644
index 000000000..a62cc62f5
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/InputSanitizerFactory.java
@@ -0,0 +1,22 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+
+/**
+ * Get the input sanitizer with required level
+ */
+public class InputSanitizerFactory {
+
+ private final MinimalOntology hpo;
+
+ public InputSanitizerFactory(MinimalOntology hpo) {
+ this.hpo = hpo;
+ }
+
+ public InputSanitizer forType(SanitizerType type) {
+ return switch (type) {
+ case COMPREHENSIVE -> new ComprehensiveInputSanitizer(hpo);
+ case MINIMAL -> new MinimalInputSanitizer(hpo);
+ };
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/MinimalInputSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/MinimalInputSanitizer.java
new file mode 100644
index 000000000..e478f6e50
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/MinimalInputSanitizer.java
@@ -0,0 +1,76 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import org.monarchinitiative.lirical.core.model.Age;
+import org.monarchinitiative.lirical.core.model.Sex;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+
+import java.time.Period;
+import java.time.format.DateTimeParseException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Minimal sanitizer performs as few checks as possible.
+ *
+ * The HPO terms are checked if they are well-formed CURIEs that exist in given HPO. Obsolete term IDs are replaced
+ * with the current term IDs.
+ *
+ * If path to VCF is set, then it must point to a readable file.
+ *
+ * @author Daniel Danis
+ */
+class MinimalInputSanitizer extends BaseInputSanitizer {
+
+ MinimalInputSanitizer(MinimalOntology hpo) {
+ super(hpo);
+ }
+
+ @Override
+ public SanitationResult sanitize(SanitationInputs inputs) {
+ List issues = new ArrayList<>();
+
+ // sampleId is nullable, nothing to be checked there at this point.
+ SanitizedInputs sanitized = new SanitizedInputs(inputs.sampleId());
+
+ // Check phenotypic features
+ checkCuriesAreWellFormed(sanitized, inputs.presentHpoTerms(), inputs.excludedHpoTerms(), issues);
+ checkPhenotypicFeatures(sanitized, issues);
+
+ // Convert the age and sex if possible, or ignore.
+ sanitized.setAge(parseAgeOrNull(inputs.age()));
+ sanitized.setSex(parseSexOrNull(inputs.sex()));
+
+ //
+ checkVcf(inputs.vcf(), sanitized, issues);
+
+
+ return new SanitationResultDefault(sanitized, issues);
+ }
+
+ private static Age parseAgeOrNull(String age) {
+ try {
+ return Age.parse(Period.parse(age));
+ } catch (Exception ignored) {
+ return null;
+ }
+ }
+
+ private static Sex parseSexOrNull(String sex) {
+ try {
+ return Sex.valueOf(sex.toUpperCase());
+ } catch (Exception ignored) {
+ return null;
+ }
+ }
+
+ /**
+ * Check that CURIEs are present in HPO and upgrade to primary identifier if the obsolete term is being used.
+ */
+ private void checkPhenotypicFeatures(SanitizedInputs sanitized, List issues) {
+ checkCuriesArePresentInHpo(sanitized.presentHpoTerms(), issues);
+ checkCuriesArePresentInHpo(sanitized.excludedHpoTerms(), issues);
+
+ checkTermsUsePrimaryIdentifiers(sanitized.presentHpoTerms(), issues);
+ checkTermsUsePrimaryIdentifiers(sanitized.excludedHpoTerms(), issues);
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationInputs.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationInputs.java
new file mode 100644
index 000000000..50244f054
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationInputs.java
@@ -0,0 +1,39 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import java.util.List;
+
+/**
+ * The sanitation requirements.
+ */
+public interface SanitationInputs {
+ /**
+ * @return a string with the sample ID or {@code null} if not available.
+ */
+ String sampleId();
+
+ /**
+ * @return a list with CURIEs of HPO terms that represent the phenotypic features observed in the index patient.
+ */
+ List presentHpoTerms();
+
+ /**
+ * @return a list with CURIEs of HPO terms that represent the phenotypic features that were investigated
+ * and excluded in the index patient.
+ */
+ List excludedHpoTerms();
+
+ /**
+ * @return a string with the age or {@code null} if not available.
+ */
+ String age();
+
+ /**
+ * @return a string with the sex or {@code null} if not available.
+ */
+ String sex();
+
+ /**
+ * @return a string with the path of the VCF file with variants or {@code null} if not available.
+ */
+ String vcf();
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResult.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResult.java
new file mode 100644
index 000000000..b1043c911
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResult.java
@@ -0,0 +1,41 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import java.util.Collection;
+
+/**
+ * Result of the input sanitation.
+ *
+ * The result consists of the inputs that were sanitized to the greatest extent possible
+ * and of the collection of issues that were found. Note that the sanitized data may be invalid
+ * even after the sanitation if further sanitation is impossible without manual intervention.
+ *
+ * @author Daniel Danis
+ */
+public interface SanitationResult {
+
+ /**
+ * @return the inputs sanitized to the greatest extent possible.
+ */
+ SanitizedInputs sanitizedInputs();
+
+ /**
+ * @return a collection with sanity issues found in the input data.
+ */
+ Collection issues();
+
+ /**
+ * @return {@code true} if there is at least one issue in the analysis inputs.
+ */
+ default boolean hasErrorOrWarnings() {
+ return !issues().isEmpty();
+ }
+
+ /**
+ * @return {@code true} if there is at least one serious issue/error in the analysis inputs.
+ */
+ default boolean hasErrors() {
+ return issues().stream()
+ .anyMatch(i -> i.level().equals(SanityLevel.ERROR));
+ }
+
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultDefault.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultDefault.java
new file mode 100644
index 000000000..c96512d8a
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultDefault.java
@@ -0,0 +1,16 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import java.util.Collection;
+
+/**
+ * Results of the sanitation of {@link SanitationInputs} by {@link InputSanitizer}.
+ *
+ * The {@link #sanitizedInputs()} provides data that were sanitized to the greatest extent possible.
+ *
+ * @param sanitizedInputs the sanitized data.
+ * @param issues a collection of issues found during sanitation.
+ */
+record SanitationResultDefault(SanitizedInputs sanitizedInputs,
+ Collection issues) implements SanitationResult {
+
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultNotRun.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultNotRun.java
new file mode 100644
index 000000000..19dd54d06
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitationResultNotRun.java
@@ -0,0 +1,47 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import org.monarchinitiative.lirical.core.model.Age;
+import org.monarchinitiative.lirical.core.model.Sex;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+
+import java.nio.file.Path;
+import java.time.Period;
+import java.time.format.DateTimeParseException;
+import java.util.Collection;
+import java.util.List;
+
+class SanitationResultNotRun implements SanitationResult {
+
+ private final SanitizedInputs sanitizedInputs;
+
+ private static Age parseAge(String input) {
+ if (input == null)
+ return null;
+ try {
+ return Age.parse(Period.parse(input));
+ } catch (DateTimeParseException e) {
+ return null;
+ }
+ }
+
+ SanitationResultNotRun(SanitationInputs inputs) {
+ sanitizedInputs = new SanitizedInputs(inputs.sampleId(),
+ inputs.presentHpoTerms().stream().map(TermId::of).toList(),
+ inputs.excludedHpoTerms().stream().map(TermId::of).toList(),
+ parseAge(inputs.age()),
+ Sex.valueOf(inputs.sex()),
+ inputs.vcf() == null ? null : Path.of(inputs.vcf())
+ );
+ }
+
+
+ @Override
+ public SanitizedInputs sanitizedInputs() {
+ return sanitizedInputs;
+ }
+
+ @Override
+ public Collection issues() {
+ return List.of();
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizedInputs.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizedInputs.java
new file mode 100644
index 000000000..762539ac0
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizedInputs.java
@@ -0,0 +1,88 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+import org.monarchinitiative.lirical.core.model.Age;
+import org.monarchinitiative.lirical.core.model.Sex;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Result of input sanitation.
+ *
+ * The HPO terms are guaranteed to be valid HPO CURIEs, age and sex are either well-formed or {@code null},
+ * and VCF points to a readable file.
+ *
+ * @author Daniel Danis
+ */
+public final class SanitizedInputs {
+ private final String sampleId;
+ private final List presentHpoTerms = new ArrayList<>();
+ private final List excludedHpoTerms = new ArrayList<>();
+ private Age age;
+ private Sex sex;
+ private Path vcf;
+
+ SanitizedInputs(String sampleId) {
+ this.sampleId = sampleId;
+ }
+
+ SanitizedInputs(String sampleId,
+ Collection present,
+ List excluded,
+ Age age,
+ Sex sex,
+ Path vcf) {
+ this.sampleId = sampleId;
+ this.presentHpoTerms.addAll(present);
+ this.excludedHpoTerms.addAll(excluded);
+ this.age = age; // nullable
+ this.sex = sex; // nullable
+ this.vcf = vcf; // nullable
+ }
+
+ public String sampleId() {
+ return sampleId;
+ }
+
+ public List presentHpoTerms() {
+ return presentHpoTerms;
+ }
+
+ public List excludedHpoTerms() {
+ return excludedHpoTerms;
+ }
+
+ void setAge(Age age) {
+ this.age = age;
+ }
+
+ public Age age() {
+ return age;
+ }
+
+
+ void setSex(Sex sex) {
+ this.sex = sex;
+ }
+
+ public Sex sex() {
+ return sex;
+ }
+
+ void setVcf(Path vcf) {
+ this.vcf = vcf;
+ }
+
+ public Path vcf() {
+ return vcf;
+ }
+
+ @Override
+ public String toString() {
+ return "SanitizedInputs[" + "sampleId=" + sampleId + ", " + "presentHpoTerms=" + presentHpoTerms + ", " + "excludedHpoTerms=" + excludedHpoTerms + ", " + "age=" + age + ", " + "sex=" + sex + ", " + "vcf=" + vcf + ']';
+ }
+
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizerType.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizerType.java
new file mode 100644
index 000000000..0bcc0a9d3
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanitizerType.java
@@ -0,0 +1,18 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+/**
+ * Enum to represent the existing sanitizer types.
+ *
+ * @author Daniel Danis
+ */
+public enum SanitizerType {
+ /**
+ * Comprehensive sanitizer performs the broadest array of checks to point out all errors and warnings.
+ */
+ COMPREHENSIVE,
+
+ /**
+ * Minimal sanitizer performs the minimal checks required for the analysis to runnable.
+ */
+ MINIMAL
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityIssue.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityIssue.java
new file mode 100644
index 000000000..5a2eb5b95
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityIssue.java
@@ -0,0 +1,20 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+/**
+ * An issue that was found in the analysis input.
+ *
+ * @param level severity of the issue.
+ * @param message description of the issue for humans.
+ * @param solution the proposed solution or {@code null} if N/A.
+ *
+ * @author Daniel Danis
+ */
+public record SanityIssue(SanityLevel level, String message, String solution) {
+ public static SanityIssue error(String message, String solution) {
+ return new SanityIssue(SanityLevel.ERROR, message, solution);
+ }
+
+ public static SanityIssue warning(String message, String solution) {
+ return new SanityIssue(SanityLevel.WARNING, message, solution);
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityLevel.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityLevel.java
new file mode 100644
index 000000000..db19186f9
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/SanityLevel.java
@@ -0,0 +1,19 @@
+package org.monarchinitiative.lirical.core.sanitize;
+
+/**
+ * Represents the severity of an issue found during input data sanitation.
+ *
+ * @author Daniel Danis
+ */
+public enum SanityLevel {
+
+ /**
+ * Serious issues in the input data and the analysis cannot be carried on.
+ */
+ ERROR,
+
+ /**
+ * Something is not right, and you probably should not proceed. However, the analysis will likely complete.
+ */
+ WARNING,
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/package-info.java
new file mode 100644
index 000000000..50c136a98
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/sanitize/package-info.java
@@ -0,0 +1,10 @@
+/**
+ * Sanitize the user-provided {@link org.monarchinitiative.lirical.core.sanitize.SanitationInputs}.
+ *
+ * @see org.monarchinitiative.lirical.core.sanitize.InputSanitizerFactory
+ * @see org.monarchinitiative.lirical.core.sanitize.InputSanitizer
+ * @see org.monarchinitiative.lirical.core.sanitize.SanitationResult
+ *
+ * @author Daniel Danis
+ */
+package org.monarchinitiative.lirical.core.sanitize;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java
index e78e54777..33a84607c 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/HpoTermSanitizer.java
@@ -1,6 +1,7 @@
package org.monarchinitiative.lirical.core.service;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
+import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -11,9 +12,9 @@ public class HpoTermSanitizer {
private static final Logger LOGGER = LoggerFactory.getLogger(HpoTermSanitizer.class);
- private final Ontology hpo;
+ private final MinimalOntology hpo;
- public HpoTermSanitizer(Ontology hpo) {
+ public HpoTermSanitizer(MinimalOntology hpo) {
this.hpo = hpo;
}
@@ -25,14 +26,15 @@ public HpoTermSanitizer(Ontology hpo) {
*
*/
public Optional replaceIfObsolete(TermId termId) {
- if (!hpo.getTermMap().containsKey(termId)) {
+ Optional term = hpo.termForTermId(termId);
+ if (term.isEmpty()) {
LOGGER.warn("Dropping unknown HPO term id {}", termId.getValue());
return Optional.empty();
}
- if (hpo.getObsoleteTermIds().contains(termId)) {
- TermId primary = hpo.getPrimaryTermId(termId);
- LOGGER.info("Replacing obsolete HPO term id {} with current id {}", termId, primary);
- return Optional.of(primary);
+ Term t = term.get();
+ if (!t.id().equals(termId)) {
+ LOGGER.info("Replacing obsolete HPO term id {} with current id {}", termId, t.id());
+ return Optional.of(t.id());
}
return Optional.of(termId);
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java
index 6a398aeeb..e58a06383 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeService.java
@@ -2,17 +2,17 @@
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
public interface PhenotypeService {
- static PhenotypeService of(Ontology ontology,
+ static PhenotypeService of(MinimalOntology ontology,
HpoDiseases diseases,
HpoAssociationData associationData) {
return new PhenotypeServiceImpl(ontology, diseases, associationData);
}
- Ontology hpo();
+ MinimalOntology hpo();
HpoDiseases diseases();
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java
index 03cf8ec06..2ef85c010 100644
--- a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/PhenotypeServiceImpl.java
@@ -2,9 +2,9 @@
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
+import org.monarchinitiative.phenol.ontology.data.MinimalOntology;
-record PhenotypeServiceImpl(Ontology hpo,
+record PhenotypeServiceImpl(MinimalOntology hpo,
HpoDiseases diseases,
HpoAssociationData associationData) implements PhenotypeService {
}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/package-info.java
new file mode 100644
index 000000000..7ad044fe5
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/service/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Package with services used across LIRICAL.
+ */
+package org.monarchinitiative.lirical.core.service;
\ No newline at end of file
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/BinarySearch.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/BinarySearch.java
new file mode 100644
index 000000000..f81eaae91
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/BinarySearch.java
@@ -0,0 +1,75 @@
+package org.monarchinitiative.lirical.core.util;
+
+import java.util.Comparator;
+import java.util.Optional;
+import java.util.function.Function;
+
+/**
+ * Static utility class with the binary search implementation for arrays of items with custom key extractor function.
+ */
+public class BinarySearch {
+
+ private BinarySearch() {
+ }
+
+ /**
+ * Perform a binary search on an array of sorted {@link T}s using the {@code keyExtractor} function for extracting
+ * the key for comparison.
+ *
+ * The array must be sorted by the {@code keyExtractor} function. Otherwise, the behavior is undefined.
+ *
+ * @param haystack an array of items sorted by {@code keyExtractor} function.
+ * @param keyExtractor a function for extracting a key with natural comparison order.
+ * @param needle the item we are searching for.
+ * @return an {@link Optional} with the found item or an empty optional if the item is not present in the array.
+ * @param type of the array items
+ * @param type of the comparison key
+ */
+ public static > Optional binarySearch(T[] haystack,
+ Function keyExtractor,
+ U needle) {
+ return binarySearch(haystack, keyExtractor, U::compareTo, needle);
+ }
+
+ /**
+ * Perform a binary search on an array of sorted {@link T}s using the {@code keyExtractor} function for extracting
+ * the key for comparison.
+ *
+ * The array must be sorted by the {@code keyExtractor} and {@code comparator} functions.
+ * Otherwise, the behavior is undefined.
+ *
+ * @param haystack an array of items sorted by {@code keyExtractor} function.
+ * @param keyExtractor a function for extracting a key with natural comparison order.
+ * @param comparator a function for comparing the key instances.
+ * @param needle the item we are searching for.
+ * @return an {@link Optional} with the found item or an empty optional if the item is not present in the array.
+ * @param type of the array items
+ * @param type of the comparison key
+ */
+ public static Optional binarySearch(T[] haystack,
+ Function keyExtractor,
+ Comparator super U> comparator,
+ U needle) {
+ if (haystack.length == 0)
+ return Optional.empty();
+
+ int low = 0, high = haystack.length;
+
+ while (low <= high) {
+ int mid = low + ((high - low) / 2);
+ if (mid == haystack.length)
+ break;
+ T item = haystack[mid];
+ int comparison = comparator.compare(needle, keyExtractor.apply(item));
+ if (comparison == 0) {
+ return Optional.ofNullable(item);
+ } else if (comparison < 0) {
+ high = mid - 1;
+ } else {
+ low = mid + 1;
+ }
+ }
+
+ return Optional.empty();
+ }
+}
diff --git a/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/package-info.java b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/package-info.java
new file mode 100644
index 000000000..078d35c2e
--- /dev/null
+++ b/lirical-core/src/main/java/org/monarchinitiative/lirical/core/util/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Package with utility methods such as binary search with custom comparator for extracting the sorting key.
+ */
+package org.monarchinitiative.lirical.core.util;
\ No newline at end of file
diff --git a/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalTSV.ftl b/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalTSV.ftl
deleted file mode 100644
index cc22b0295..000000000
--- a/lirical-core/src/main/resources/org/monarchinitiative/lirical/core/output/liricalTSV.ftl
+++ /dev/null
@@ -1,11 +0,0 @@
-! LIRICAL TSV Output (${resultsMeta.liricalVersion})
-! Sample: ${resultsMeta.sampleName!"n/a"}
-! Observed HPO terms
-<#assign tab="\t">
-<#list observedHPOs as hpo>
-! ${hpo}
-#list>
-${header}
-<#list diff as dd>
-${dd.rank}${tab}${dd.diseaseName}${tab}${dd.diseaseCurie}${tab}${dd.pretestprob}${tab}${dd.posttestprob}${tab}${dd.compositeLR}${tab}${dd.entrezGeneId}${tab}${dd.varString}
-#list>
\ No newline at end of file
diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java
index 50d7814ba..b12e3a61a 100644
--- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java
+++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/GenotypeLikelihoodRatioTest.java
@@ -48,6 +48,7 @@ public void testOneClinVarVariant() {
Gene2Genotype g2g = setupGeneToGenotype(MADE_UP_GENE, 1, 1, 0.8);
GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(Map.of(), 0.1), OPTIONS);
GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_DOMINANT));
+ assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.ONE_P_OR_LP_CLINVAR_ALLELE_IN_AD));
Assertions.assertEquals(1000, gle.lr(), EPSILON);
}
@@ -62,6 +63,7 @@ public void testTwoClinVarVariants() {
GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(Map.of(), 0.1), OPTIONS);
GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_RECESSIVE));
+ assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.TWO_P_OR_LP_CLINVAR_ALLELES_IN_AR));
Assertions.assertEquals(1000. * 1000, gle.lr(), EPSILON);
}
@@ -80,6 +82,7 @@ public void testHLA_Bsituation() {
GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(background, 0.1), OPTIONS);
GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_DOMINANT));
// heuristic score
+ assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.NO_VARIANTS_DETECTED_AD));
Assertions.assertEquals(0.05, gle.lr(), EPSILON);
}
@@ -97,6 +100,7 @@ public void testRecessiveManyCalledPathVariants() {
GenotypeLikelihoodRatio glr = new GenotypeLikelihoodRatio(BackgroundVariantFrequencyService.of(g2background, 0.1), OPTIONS);
GenotypeLrWithExplanation gle = glr.evaluateGenotype(SAMPLE_ID, g2g, List.of(AUTOSOMAL_RECESSIVE));
// heuristic score for AR
+ assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.NO_VARIANTS_DETECTED_AR));
Assertions.assertEquals(0.05 * 0.05, gle.lr(), EPSILON);
}
@@ -108,7 +112,7 @@ public void thrbExample() {
when(g2g.geneId()).thenReturn(thrbId);
when(g2g.hasVariants()).thenReturn(true);
when(g2g.pathogenicClinVarCount(SAMPLE_ID)).thenReturn(0);
- when(g2g.pathogenicAlleleCount(SAMPLE_ID, PATHOGENICITY_THRESHOLD)).thenReturn(56);
+ when(g2g.deleteriousAlleleCount(SAMPLE_ID, PATHOGENICITY_THRESHOLD)).thenReturn(56);
when(g2g.getSumOfPathBinScores(SAMPLE_ID, PATHOGENICITY_THRESHOLD)).thenReturn(44.80000);
Map gene2Background = Map.of(thrbId.id(), 0.006973);
@@ -118,7 +122,8 @@ public void thrbExample() {
// TODO - check
assertThat(gle.geneId(), equalTo(thrbId));
+ assertThat(gle.matchType(), equalTo(GenotypeLrMatchType.LIRICAL_GT_MODEL));
assertThat(gle.lr(), is(closeTo(1.719420800179587e109, EPSILON)));
- assertThat(gle.explanation(), equalTo("log10(LR)=109.235 P(G|D)=0.0000. P(G|¬D)=0.0000. Mode of inheritance: autosomal recessive. Observed weighted pathogenic variant count: 44.80. λdisease=2. λbackground=0.0070."));
+ assertThat(gle.explanation(), equalTo("log10(LR)=109.235 P(G|D)=0.0000. P(G|¬D)=0.0000. Mode of inheritance: autosomal recessive. Observed weighted deleterious variant count: 44.80. λdisease=2. λbackground=0.0070."));
}
}
diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java
index 61d06126c..6d8464701 100644
--- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java
+++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/likelihoodratio/TestResultTest.java
@@ -58,7 +58,7 @@ public void init() {
List list1 = createTestList(some, 2.0, 3.0, 4.0);
List excluded = List.of();
double prevalence = 0.025;
- GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, 2.0, "Explanation");
+ GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, GenotypeLrMatchType.LIRICAL_GT_MODEL, 2.0, "Explanation");
tresultWithGenotype = TestResult.of(d1.id(), prevalence, list1,excluded, genotypeLr);
tresultNoGenotype = TestResult.of(d1.id(), prevalence, list1,excluded, null);
}
@@ -183,7 +183,7 @@ public void testTestResultSorting() {
// The ranks of the objects get set in the evaluate method of HpoCase so cannot be tested here.
// now add another test result, same as result3 but with additional genotype evidence
// result4 should now be the top hit
- GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, 2.0, "Explanation");
+ GenotypeLrWithExplanation genotypeLr = GenotypeLrWithExplanation.of(MADE_UP_GENE, GenotypeLrMatchType.LIRICAL_GT_MODEL, 2.0, "Explanation");
TestResult result4= TestResult.of(d3.id(), prevalence, list3,excluded, genotypeLr);
lst.add(result4);
assertEquals(lst.get(3),result4);
diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesTest.java
new file mode 100644
index 000000000..359798208
--- /dev/null
+++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/GenesAndGenotypesTest.java
@@ -0,0 +1,60 @@
+package org.monarchinitiative.lirical.core.model;
+
+import org.junit.jupiter.api.Test;
+import org.monarchinitiative.phenol.annotations.formats.GeneIdentifier;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+import org.monarchinitiative.svart.CoordinateSystem;
+import org.monarchinitiative.svart.GenomicVariant;
+import org.monarchinitiative.svart.Strand;
+import org.monarchinitiative.svart.assembly.GenomicAssemblies;
+import org.monarchinitiative.svart.assembly.GenomicAssembly;
+
+import java.util.List;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+public class GenesAndGenotypesTest {
+
+ private static final GenomicAssembly HG38 = GenomicAssemblies.GRCh38p13();
+
+ @Test
+ public void computeFilteringStats() {
+ GenesAndGenotypes gag = prepareToyGenesAndGenotypes();
+
+ FilteringStats filteringStats = gag.computeFilteringStats();
+ assertThat(filteringStats.nFilteredVariants(), equalTo(13L));
+ assertThat(filteringStats.nPassingVariants(), equalTo(2L));
+ assertThat(filteringStats.genesWithVariants(), equalTo(1L));
+ }
+
+ private static GenesAndGenotypes prepareToyGenesAndGenotypes() {
+ return GenesAndGenotypes.of(List.of(
+ Gene2Genotype.of(
+ GeneIdentifier.of(TermId.of("HGNC:1234"), "FAKE1234"),
+ List.of(
+ LiricalVariant.of(
+ GenotypedVariant.of(GenomeBuild.HG38,
+ GenomicVariant.of(HG38.contigByName("1"), "SNP1",
+ Strand.POSITIVE, CoordinateSystem.ONE_BASED, 101,
+ "C", "G"),
+ List.of(),
+ true),
+ List.of(), VariantMetadata.empty()), // irrelevant
+ LiricalVariant.of(
+ GenotypedVariant.of(GenomeBuild.HG38,
+ GenomicVariant.of(HG38.contigByName("1"), "SNP1",
+ Strand.POSITIVE, CoordinateSystem.ONE_BASED, 201,
+ "T", "A"),
+ List.of(), true),
+ List.of(), VariantMetadata.empty()) // irrelevant
+ ),
+ 3),
+ Gene2Genotype.of(
+ GeneIdentifier.of(TermId.of("HGNC:1234"), "FAKE1234"),
+ List.of(),
+ 10
+ )
+ ));
+ }
+}
\ No newline at end of file
diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java
index bc507af58..8eed271da 100644
--- a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java
+++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/model/HpoCaseTest.java
@@ -9,8 +9,7 @@
import java.util.ArrayList;
import java.util.List;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.*;
/**
@@ -74,7 +73,7 @@ public void testGetExcludedAbnormalities() {
@Test
public void testAge() {
// we did not specify the age, so it should return not known
- assertEquals(Age.ageNotKnown(),hpocase.getAge());
+ assertNull(hpocase.getAge());
}
@Test
diff --git a/lirical-core/src/test/java/org/monarchinitiative/lirical/core/util/BinarySearchTest.java b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/util/BinarySearchTest.java
new file mode 100644
index 000000000..b87c1d33d
--- /dev/null
+++ b/lirical-core/src/test/java/org/monarchinitiative/lirical/core/util/BinarySearchTest.java
@@ -0,0 +1,86 @@
+package org.monarchinitiative.lirical.core.util;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.hamcrest.MatcherAssert.*;
+import static org.hamcrest.Matchers.*;
+
+public class BinarySearchTest {
+ @ParameterizedTest
+ @CsvSource({
+ "1|2, 1",
+ "1|2, 2",
+
+ "1|2|3|4, 1",
+ "1|2|3|4, 2",
+ "1|2|3|4, 3",
+ "1|2|3|4, 4",
+ })
+ public void binarySearch_evenItemCount(String payload, int key) {
+ String[] array = payload.split("\\|");
+ Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key);
+
+ assertThat(resultArray.isPresent(), equalTo(true));
+ assertThat(resultArray.get(), equalTo(String.valueOf(key)));
+ }
+
+ @ParameterizedTest
+ @CsvSource({
+ "1|3, 0",
+ "1|3, 2",
+ "1|3, 4",
+ })
+ public void binarySearch_evenItemCount_notPresent(String payload, int key) {
+ String[] array = payload.split("\\|");
+ Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key);
+
+ assertThat(resultArray.isEmpty(), equalTo(true));
+ }
+
+ @ParameterizedTest
+ @CsvSource({
+ "1, 1",
+
+ "1|2|3, 1",
+ "1|2|3, 2",
+ "1|2|3, 3",
+
+ "1|2|3|4|5, 1",
+ "1|2|3|4|5, 2",
+ "1|2|3|4|5, 3",
+ "1|2|3|4|5, 4",
+ "1|2|3|4|5, 5",
+ })
+ public void binarySearch_oddItemCount(String payload, int key) {
+ String[] array = payload.split("\\|");
+ Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key);
+
+ assertThat(resultArray.isPresent(), equalTo(true));
+ assertThat(resultArray.get(), equalTo(String.valueOf(key)));
+ }
+
+ @ParameterizedTest
+ @CsvSource({
+ "1|3|5, 0",
+ "1|3|5, 2",
+ "1|3|5, 4",
+ "1|3|5, 6",
+ })
+ public void binarySearch_oddItemCount_notPresent(String payload, int key) {
+ String[] array = payload.split("\\|");
+ Optional resultArray = BinarySearch.binarySearch(array, Integer::parseInt, key);
+
+ assertThat(resultArray.isEmpty(), equalTo(true));
+ }
+
+ @Test
+ public void binarySearch_emptyCollection() {
+ Optional resultArray = BinarySearch.binarySearch(new String[0], Integer::parseInt, 1);
+ assertThat(resultArray.isEmpty(), equalTo(true));
+ }
+}
\ No newline at end of file
diff --git a/lirical-exomiser-db-adapter/pom.xml b/lirical-exomiser-db-adapter/pom.xml
index 61ee297b5..05de45bb2 100644
--- a/lirical-exomiser-db-adapter/pom.xml
+++ b/lirical-exomiser-db-adapter/pom.xml
@@ -5,7 +5,7 @@
LIRICAL
org.monarchinitiative.lirical
- 2.0.0-RC2
+ 2.0.0-RC3
4.0.0
@@ -18,12 +18,16 @@
${project.parent.version}