Skip to content

Commit

Permalink
Merge pull request #669 from TheJacksonLaboratory/release-2.0.4
Browse files Browse the repository at this point in the history
Make release 2.0.4
  • Loading branch information
ielis authored Jan 31, 2025
2 parents 7cd5019 + 5f03369 commit d9b4c64
Show file tree
Hide file tree
Showing 14 changed files with 140 additions and 35 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
# The short X.Y version.
version = u'2.0'
# The full version, including alpha/beta/rc tags.
release = u'2.0.3'
release = u'2.0.4'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
4 changes: 4 additions & 0 deletions docs/running.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ The configuration options tweak the analysis.
The JSON report will include *all* diseases all the time.
* ``--transcript-db``: transcript database (default: ``RefSeq``), see :ref:`rsttx-dbs` for more info.
* ``--use-orphanet``: use `Orphanet <https://www.orpha.net/consor/cgi-bin/index.php>`_ annotations (default: ``false``).
* ``--target-diseases``: limit the analysis to the provided disease IDs.
Expecting a comma-separated list of diseaes IDs, such as `OMIM:614102,OMIM:619340`.
The ``--use-orphanet`` option is ignored if at least one disease ID is provided.
All diseases are analyzed by default.
* ``--strict``: use strict penalties if the genotype does not match the disease model
in terms of number of called pathogenic alleles (default: ``false``).
* ``--pathogenicity-threshold``: Variants with greater pathogenicity score is considered deleterious (default: ``0.8``).
Expand Down
2 changes: 1 addition & 1 deletion lirical-background/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>LIRICAL</artifactId>
<groupId>org.monarchinitiative.lirical</groupId>
<version>2.0.3</version>
<version>2.0.4</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion lirical-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>LIRICAL</artifactId>
<groupId>org.monarchinitiative.lirical</groupId>
<version>2.0.3</version>
<version>2.0.4</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
Expand All @@ -35,6 +37,7 @@ abstract class LiricalConfigurationCommand extends BaseCommand {

private static final Logger LOGGER = LoggerFactory.getLogger(LiricalConfigurationCommand.class);
protected static final String UNKNOWN_VERSION_PLACEHOLDER = "UNKNOWN VERSION";
private static final Pattern DISEASE_ID = Pattern.compile("^\\w+:\\w+$");

// ---------------------------------------------- RESOURCES --------------------------------------------------------
@CommandLine.ArgGroup(validate = false, heading = "Resource paths:%n")
Expand Down Expand Up @@ -97,6 +100,16 @@ public static class RunConfiguration {
description = "Use Orphanet annotation data (default: ${DEFAULT-VALUE}).")
public boolean useOrphanet = false;

@CommandLine.Option(names = {"--target-diseases"},
split = ",",
paramLabel = "disease",
description = {
"Limit the analysis to the provided disease IDs. ",
"(default: analyze all diseases)."
}
)
public List<String> targetDiseases= null;

@CommandLine.Option(names = {"--strict"},
description = "Use strict penalties if the genotype does not match the disease model in terms " +
"of number of called pathogenic alleles. (default: ${DEFAULT-VALUE}).")
Expand Down Expand Up @@ -175,6 +188,16 @@ protected List<String> checkInput() {
errors.add(msg);
}

if (runConfiguration.targetDiseases != null
&& !runConfiguration.targetDiseases.stream()
.allMatch(DISEASE_ID.asMatchPredicate())) {
String failures = runConfiguration.targetDiseases.stream()
.filter(Predicate.not(DISEASE_ID.asMatchPredicate()))
.collect(Collectors.joining(","));
String msg = "One or more target disease IDs do not look like a compact URI: %s".formatted(failures);
errors.add(msg);
}

return errors;
}

Expand Down Expand Up @@ -234,6 +257,15 @@ protected AnalysisOptions prepareAnalysisOptions(Lirical lirical, GenomeBuild ge
LOGGER.debug("Using disease databases {}", usedDatabasesSummary);
builder.setDiseaseDatabases(diseaseDatabases);

if (runConfiguration.targetDiseases != null) {
String usedDiseaseIds = runConfiguration.targetDiseases.stream().collect(Collectors.joining(", ", "[", "]"));
LOGGER.debug("Limiting the analysis to the following diseases: {}", usedDiseaseIds);
List<TermId> targetDiseases = runConfiguration.targetDiseases.stream()
.map(TermId::of)
.toList();
builder.setTargetDiseases(targetDiseases);
}

// The rest..
LOGGER.debug("Variants with pathogenicity score >{} are considered deleterious", runConfiguration.pathogenicityThreshold);
builder.variantDeleteriousnessThreshold(runConfiguration.pathogenicityThreshold);
Expand Down
2 changes: 1 addition & 1 deletion lirical-configuration/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>LIRICAL</artifactId>
<groupId>org.monarchinitiative.lirical</groupId>
<version>2.0.3</version>
<version>2.0.4</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion lirical-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>LIRICAL</artifactId>
<groupId>org.monarchinitiative.lirical</groupId>
<version>2.0.3</version>
<version>2.0.4</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.monarchinitiative.lirical.core.model.GenomeBuild;
import org.monarchinitiative.lirical.core.model.TranscriptDatabase;
import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -33,6 +34,14 @@ static Builder builder() {
*/
Set<DiseaseDatabase> diseaseDatabases();

/**
* Limit the analysis to specific diseases.
*
* @return a collection of disease IDs of the diseases of interest or {@code null}
* if <em>all</em> diseases should be tested.
*/
Collection<TermId> targetDiseases();

/**
* @return threshold for determining if the variant is deleterious or not.
* The threshold range must be in range of <code>[0,1]</code>.
Expand Down Expand Up @@ -71,14 +80,16 @@ static Builder builder() {

/**
* A builder for {@link AnalysisOptions}.
* <p>
* The builder is <em>NOT</em> thread safe!
*/
class Builder {

private static final Logger LOGGER = LoggerFactory.getLogger(Builder.class);

private GenomeBuild genomeBuild = GenomeBuild.HG38;
private TranscriptDatabase transcriptDatabase = TranscriptDatabase.REFSEQ;
private final Set<DiseaseDatabase> diseaseDatabases = new HashSet<>(List.of(DiseaseDatabase.OMIM, DiseaseDatabase.DECIPHER));
private Set<TermId> targetDiseases = null; // null = test all diseases
private float variantDeleteriousnessThreshold = .8f;
private double defaultVariantBackgroundFrequency = .1;
private boolean useStrictPenalties = false;
Expand Down Expand Up @@ -135,6 +146,42 @@ public Builder setDiseaseDatabases(Collection<DiseaseDatabase> diseaseDatabases)
return this;
}

public Builder clearTargetDiseases() {
if (this.targetDiseases != null)
this.targetDiseases.clear();
return this;
}

public Builder addTargetDiseases(TermId... diseaseIds) {
return addTargetDiseases(Arrays.asList(diseaseIds));
}

public Builder addTargetDiseases(Collection<TermId> diseaseIds) {
if (diseaseIds == null) {
LOGGER.warn("Target disease IDs must not be `null`!");
return this;
}

if (this.targetDiseases == null) this.targetDiseases = new HashSet<>();

this.targetDiseases.addAll(diseaseIds);

return this;
}

public Builder setTargetDiseases(Collection<TermId> diseaseIds) {
if (diseaseIds == null) {
LOGGER.warn("Target disease IDs must not be `null`!");
return this;
}

if (this.targetDiseases == null) this.targetDiseases = new HashSet<>();

this.targetDiseases.clear();
this.targetDiseases.addAll(diseaseIds);
return this;
}

public Builder variantDeleteriousnessThreshold(float variantDeleteriousnessThreshold) {
this.variantDeleteriousnessThreshold = variantDeleteriousnessThreshold;
return this;
Expand Down Expand Up @@ -169,6 +216,7 @@ public AnalysisOptions build() {
return new AnalysisOptionsDefault(genomeBuild,
transcriptDatabase,
diseaseDatabases,
targetDiseases,
variantDeleteriousnessThreshold,
defaultVariantBackgroundFrequency,
useStrictPenalties,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import org.monarchinitiative.lirical.core.model.GenomeBuild;
import org.monarchinitiative.lirical.core.model.TranscriptDatabase;
import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase;
import org.monarchinitiative.phenol.ontology.data.TermId;

import java.util.Collection;
import java.util.Set;

record AnalysisOptionsDefault(
GenomeBuild genomeBuild,
TranscriptDatabase transcriptDatabase,
Set<DiseaseDatabase> diseaseDatabases,
Collection<TermId> targetDiseases,
float variantDeleteriousnessThreshold,
double defaultVariantBackgroundFrequency,
boolean useStrictPenalties,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.function.Predicate;
import java.util.stream.Stream;

public class LiricalAnalysisRunnerImpl implements LiricalAnalysisRunner {
Expand Down Expand Up @@ -48,22 +49,20 @@ private LiricalAnalysisRunnerImpl(PhenotypeService phenotypeService,

@Override
public AnalysisResults run(AnalysisData data, AnalysisOptions options) throws LiricalAnalysisException {
Collection<String> diseaseDatabasePrefixes = options.diseaseDatabases().stream()
.map(DiseaseDatabase::prefix)
.toList();
Map<TermId, List<Gene2Genotype>> diseaseToGenotype = groupDiseasesByGene(data.genes());

Optional<GenotypeLikelihoodRatio> genotypeLikelihoodRatio = configureGenotypeLikelihoodRatio(options.genomeBuild(),
options.variantDeleteriousnessThreshold(),
options.defaultVariantBackgroundFrequency(),
options.useStrictPenalties());
if (genotypeLikelihoodRatio.isEmpty())
if (genotypeLikelihoodRatio.isEmpty()) {
throw new LiricalAnalysisException("Cannot configure genotype LR for %s".formatted(options.genomeBuild()));
}

ProgressReporter progressReporter = new ProgressReporter(1_000, "diseases");
Stream<TestResult> testResultStream = phenotypeService.diseases().hpoDiseases()
.parallel() // why not?
.filter(disease -> diseaseDatabasePrefixes.contains(disease.id().getPrefix()))
.filter(prepareDiseaseFilter(options.diseaseDatabases(), options.targetDiseases()))
.peek(d -> progressReporter.log())
.map(disease -> analyzeDisease(genotypeLikelihoodRatio.get(), disease, data, options, diseaseToGenotype))
.flatMap(Optional::stream);
Expand All @@ -78,6 +77,22 @@ public AnalysisResults run(AnalysisData data, AnalysisOptions options) throws Li
}
}

private static Predicate<HpoDisease> prepareDiseaseFilter(
Set<DiseaseDatabase> diseaseDatabasePrefixes,
Collection<TermId> targetDiseases
) {
if (targetDiseases == null) {
// Restrict the analysis to the disease with the chosen prefixes.
List<String> prefixes = diseaseDatabasePrefixes.stream()
.map(DiseaseDatabase::prefix)
.toList();
return disease -> prefixes.contains(disease.id().getPrefix());
} else {
// Restrict the analysis to the selected diseases.
return disease -> targetDiseases.contains(disease.id());
}
}

private Map<TermId, List<Gene2Genotype>> groupDiseasesByGene(GenesAndGenotypes genes) {
Map<TermId, Collection<TermId>> geneToDisease = phenotypeService.associationData().associations().geneIdToDiseaseIds();
Map<TermId, List<Gene2Genotype>> diseaseToGenotype = new HashMap<>(genes.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

/**
* This class is designed to calculate the background and foreground frequencies of any HPO term in any disease
* (This is calculated by {@link #initializeFrequencyMap()} and stored in {@link #hpoTerm2OverallFrequency}).
* (This is calculated by {@link #initializeFrequencyMap(MinimalOntology, HpoDiseases)} and stored in {@link #hpoTerm2OverallFrequency}).
* The main entry point into this class is the function {@link #lrForObservedTerm}, which is called by
* {@link LiricalAnalysisRunner} once for each HPO term
* to which the case is annotation; it calls it once for each disease in our
Expand All @@ -39,11 +39,9 @@ public class PhenotypeLikelihoodRatio {
public static final float DEFAULT_TERM_FREQUENCY = 1.f; // TODO - is this the right thing to do?
/** The HPO ontology with all of its subontologies. */
private final MinimalOntology ontology;
/** This map has one entry for each disease in our database. Key--the disease ID, e.g., OMIM:600200.*/
private final Map<TermId, HpoDisease> diseaseMap;
private final LrWithExplanationFactory explanationFactory;
/** Overall, i.e., background frequency of each HPO term. */
private Map<TermId, Double> hpoTerm2OverallFrequency = null;
private final Map<TermId, Double> hpoTerm2OverallFrequency;
/**
* This is the probability of a finding if the disease is not annotated to it and there
* is no common ancestor except the root. There are many possible causes of findings called
Expand All @@ -62,9 +60,8 @@ public class PhenotypeLikelihoodRatio {
*/
public PhenotypeLikelihoodRatio(MinimalOntology ontology, HpoDiseases diseases) {
this.ontology = ontology;
this.diseaseMap = diseases.diseaseById();
this.explanationFactory = new LrWithExplanationFactory(ontology); // TODO - DI?
initializeFrequencyMap();
this.hpoTerm2OverallFrequency = initializeFrequencyMap(ontology, diseases);
}

/**
Expand Down Expand Up @@ -153,7 +150,7 @@ public LrWithExplanation lrForObservedTerm(TermId queryTid, InducedDiseaseGraph
}
// If we get here, queryId is not directly annotated in the disease, and it is not a child
// of a disease term, nor is a disease term a subclass of queryTid. The next bit of code
// checks whether they have a common ancestor that is more specfic that Phenotypic_Abnormality
// checks whether they have a common ancestor that is more specific that Phenotypic_Abnormality
Term2Freq t2f = idg.getClosestAncestor(queryTid, ontology);
if (t2f.nonRootCommonAncestor()) {
double numerator = t2f.frequency();
Expand Down Expand Up @@ -312,14 +309,25 @@ private double getProportionInChildren(TermId queryTid, TermId diseaseTid) {

/**
* Initialize the {@link #hpoTerm2OverallFrequency} object that has the background frequencies of each of the
* HPO terms in the ontology. */
private void initializeFrequencyMap() {
* HPO terms in the ontology.
*
* @return the frequency map
*/
private static Map<TermId, Double> initializeFrequencyMap(
MinimalOntology ontology,
HpoDiseases diseases
) {
Map<TermId, Double> mp = new HashMap<>();
for (TermId tid : ontology.nonObsoleteTermIds()) {
mp.put(tid, 0.0D);
}
Map<TermId, Double> mapbuilder = new HashMap<>();
for (HpoDisease dis : this.diseaseMap.values()) {

Set<TermId> seenDiseases = new HashSet<>(); // Make sure we only see each disease once.
for (HpoDisease dis : diseases) {
if (!seenDiseases.add(dis.id()))
// `add` returns true if the ID was added (was new).
// Therefore, we continue if the ID was NOT added.
continue;
// We construct a map in order to get the maximum frequencies for any
// given ancestor term, also in order to avoid double counting.
Map<TermId, Double> updateMap=new HashMap<>();
Expand Down Expand Up @@ -349,19 +357,14 @@ private void initializeFrequencyMap() {
: previous + updateMap.get(tid)); // cumulative
}
}
Map<TermId, Double> builder = new HashMap<>();
// Now we need to normalize by the number of diseases.
double N = getNumberOfDiseases();
for (Map.Entry<TermId, Double> me : mp.entrySet()) {
double f = me.getValue() / N;
mapbuilder.put(me.getKey(), f);
double f = me.getValue() / diseases.size();
builder.put(me.getKey(), f);
}
hpoTerm2OverallFrequency = Map.copyOf(mapbuilder);
logger.trace("Got data on background frequency for " + hpoTerm2OverallFrequency.size() + " terms");
}

/** @return the number of diseases we are using for the calculations. */
private int getNumberOfDiseases() {
return diseaseMap.size();
logger.trace("Got data on background frequency for {} terms", builder.size());
return Map.copyOf(builder);
}

}
2 changes: 1 addition & 1 deletion lirical-exomiser-db-adapter/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>LIRICAL</artifactId>
<groupId>org.monarchinitiative.lirical</groupId>
<version>2.0.3</version>
<version>2.0.4</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Loading

0 comments on commit d9b4c64

Please sign in to comment.