diff --git a/src/main/java/genepi/riskscore/commands/CreateCollectionCommand.java b/src/main/java/genepi/riskscore/commands/CreateCollectionCommand.java index 75959ea..5f3dee0 100644 --- a/src/main/java/genepi/riskscore/commands/CreateCollectionCommand.java +++ b/src/main/java/genepi/riskscore/commands/CreateCollectionCommand.java @@ -10,7 +10,9 @@ import genepi.riskscore.io.csv.CsvWithHeaderTableReader; import genepi.riskscore.io.csv.CsvWithHeaderTableWriter; import genepi.riskscore.io.formats.PGSCatalogFormat; +import genepi.riskscore.io.formats.PGSCatalogHarmonizedFormat; import genepi.riskscore.io.formats.RiskScoreFormatImpl; +import genepi.riskscore.io.scores.MergedRiskScoreCollection; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import picocli.CommandLine.Parameters; @@ -18,281 +20,280 @@ @Command(name = "create-collection", version = App.VERSION) public class CreateCollectionCommand implements Callable { - private static final String CHROMOSOME = "hm_chr"; + @Option(names = "--out", description = "output score file", required = false) + private String output = null; - private static final String POSITION = "hm_pos"; - - @Option(names = "--out", description = "output score file", required = false) - private String output = null; - - @Parameters(description = "score files") - private String[] filenames; - - public static String[] chromosomeOrder = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "XY"}; - - public static Map chromosomeOrderIndex = new HashMap(); - - static { - for (int i = 0 ; i < chromosomeOrder.length; i++){ - chromosomeOrderIndex.put(chromosomeOrder[i], i); - } - } - - @Override - public Integer call() throws Exception { - - String[] names = new String[filenames.length]; - CsvWithHeaderTableReader[] readers = new CsvWithHeaderTableReader[filenames.length]; - RiskScoreFormatImpl[] formats = new RiskScoreFormatImpl[filenames.length]; - Variant[] variants = new Variant[filenames.length]; - - for (int i = 0; i < filenames.length; i++){ - names[i] = RiskScoreFile.getName(filenames[i]); - formats[i] = new PGSCatalogFormat(filenames[i], false); - readers[i] = new CsvWithHeaderTableReader(filenames[i],formats[i].getSeparator()); - try { - variants[i] = readVariant(readers[i], formats[i]); - }catch (Exception e){ - throw new RuntimeException("File " + filenames[i], e); - } - } - - List header = new Vector(); - header.add("#Date=" + new Date()); - header.add("#Scores=" + filenames.length); - - CsvWithHeaderTableWriter writer = null; - if (output != null){ - writer = new CsvWithHeaderTableWriter(output, '\t', header); - } else { - writer = new CsvWithHeaderTableWriter('\t', header); - } - - String[] variantColumns = new String[]{"chr_name", "chr_position","effect_allele","other_allele"}; - String[] columns = merge(variantColumns, names); - writer.setColumns(columns); - - int variantsWritten = 0; - - while(!isEmpty(variants)){ - Variant variant = findMinVariant(variants); - addVariant(writer, variant); - for (int i = 0 ; i < variants.length; i++){ - if (variants[i] != null && variants[i].matches(variant)){ - writeVariant(writer, names[i], variants[i].getNormalizedEffect(variant)); - Variant nextVariant = null; - try { - nextVariant = readVariant(readers[i], formats[i]); - }catch (Exception e){ - throw new RuntimeException("File " + filenames[i], e); - } - if (nextVariant != null && nextVariant.isBefore(variants[i])){ - throw new RuntimeException(filenames[i] + ": Not sorted. " + nextVariant + " is before " + variants[i] ); - } - variants[i] = nextVariant; - } else { - writeMissing(writer, names[i]); - } - } - writer.next(); - variantsWritten++; - } - - - writer.close(); - - for (ITableReader reader: readers){ - reader.close(); - } - - System.err.println("Wrote " + variantsWritten + " unique variants and " + filenames.length + " scores."); - - return 0; - - } - - public void setOutput(String output) { - this.output = output; - } - - public void setFilenames(String[] filenames) { - this.filenames = filenames; - } - - - public boolean isEmpty(Variant[] variants){ + @Parameters(description = "score files") + private String[] filenames; + + public static String[] chromosomeOrder = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "XY"}; + + public static Map chromosomeOrderIndex = new HashMap(); + + static { + for (int i = 0; i < chromosomeOrder.length; i++) { + chromosomeOrderIndex.put(chromosomeOrder[i], i); + } + } + + @Override + public Integer call() throws Exception { + + String[] names = new String[filenames.length]; + CsvWithHeaderTableReader[] readers = new CsvWithHeaderTableReader[filenames.length]; + RiskScoreFormatImpl[] formats = new RiskScoreFormatImpl[filenames.length]; + Variant[] variants = new Variant[filenames.length]; + + for (int i = 0; i < filenames.length; i++) { + names[i] = RiskScoreFile.getName(filenames[i]); + formats[i] = new PGSCatalogHarmonizedFormat(); + readers[i] = new CsvWithHeaderTableReader(filenames[i], formats[i].getSeparator()); + try { + variants[i] = readVariant(readers[i], formats[i]); + } catch (Exception e) { + throw new RuntimeException("File " + filenames[i], e); + } + } + + List header = new Vector(); + header.add(MergedRiskScoreCollection.HEADER); + header.add("#Date=" + new Date()); + header.add("#Scores=" + filenames.length); + + CsvWithHeaderTableWriter writer = null; + if (output != null) { + writer = new CsvWithHeaderTableWriter(output, '\t', header); + } else { + writer = new CsvWithHeaderTableWriter('\t', header); + } + + String[] variantColumns = new String[]{MergedRiskScoreCollection.COLUMN_CHROMOSOME, + MergedRiskScoreCollection.COLUMN_POSITION, MergedRiskScoreCollection.COLUMN_EFFECT_ALLELE, + MergedRiskScoreCollection.COLUMN_OTHER_ALLELE}; + String[] columns = merge(variantColumns, names); + writer.setColumns(columns); + + int variantsWritten = 0; + + while (!isEmpty(variants)) { + Variant variant = findMinVariant(variants); + addVariant(writer, variant); + for (int i = 0; i < variants.length; i++) { + if (variants[i] != null && variants[i].matches(variant)) { + writeVariant(writer, names[i], variants[i].getNormalizedEffect(variant)); + Variant nextVariant = null; + try { + nextVariant = readVariant(readers[i], formats[i]); + } catch (Exception e) { + throw new RuntimeException("File " + filenames[i], e); + } + if (nextVariant != null && nextVariant.isBefore(variants[i])) { + throw new RuntimeException(filenames[i] + ": Not sorted. " + nextVariant + " is before " + variants[i]); + } + variants[i] = nextVariant; + } else { + writeMissing(writer, names[i]); + } + } + writer.next(); + variantsWritten++; + } + + + writer.close(); + + for (ITableReader reader : readers) { + reader.close(); + } + + System.err.println("Wrote " + variantsWritten + " unique variants and " + filenames.length + " scores."); + + return 0; + + } + + public void setOutput(String output) { + this.output = output; + } + + public void setFilenames(String[] filenames) { + this.filenames = filenames; + } + + + public boolean isEmpty(Variant[] variants) { for (Variant variant : variants) { if (variant != null) { return false; } } - return true; - } - - public Variant findMinVariant(Variant[] variants){ - Variant minVariant = variants[0]; - for (int i = 1; i < variants.length; i++){ - if (minVariant == null) { - minVariant = variants[i]; - continue; - } - if (variants[i] == null){ - continue; - } - if (variants[i].isBefore(minVariant)) { - minVariant = variants[i]; - } - } - return minVariant; - } - - private String[] merge(String[] first, String[] second) { - int fal = first.length; - int sal = second.length; - String[] result = new String[fal + sal]; - System.arraycopy(first, 0, result, 0, fal); - System.arraycopy(second, 0, result, fal, sal); - return result; - } - - public Variant readVariant(ITableReader reader, RiskScoreFormatImpl format) { - if (!reader.next()){ - return null; - } - Variant variant = new Variant(); - variant.setChromosome(reader.getString(CHROMOSOME)); - if (reader.getString(POSITION).isEmpty()){ - throw new RuntimeException("Not position found."); - } - variant.setPosition(reader.getInteger(POSITION)); - variant.setEffectAllele(reader.getString(format.getEffectAllele())); - variant.setOtherAllele(reader.getString(format.getOtherAllele())); - variant.setEffect(reader.getDouble(format.getEffectWeight())); - return variant; - } - - - public void addVariant(ITableWriter writer, Variant variant){ - writer.setString("chr_name", variant.getChromosome()); - writer.setInteger("chr_position", variant.getPosition()); - writer.setString("effect_allele", variant.getEffectAllele()); - writer.setString("other_allele", variant.getOtherAllele()); - } - - public void writeVariant(ITableWriter writer, String score, double effect){ - writer.setDouble(score, effect); - } - - public void writeMissing(ITableWriter writer, String score){ - writer.setString(score, ""); - } - - public class Variant { - private int position = 0; - private String chromosome = null; - private double effect = 0; - private String effectAllele = null; - private String otherAllele = null; - - public int getPosition() { - return position; - } - - public void setPosition(int position) { - this.position = position; - } - - public String getChromosome() { - return chromosome; - } - - public void setChromosome(String chromosome) { - this.chromosome = chromosome; - } - - public double getEffect() { - return effect; - } - - public void setEffect(double effect) { - this.effect = effect; - } - - public String getEffectAllele() { - return effectAllele; - } - - public void setEffectAllele(String effectAllele) { - this.effectAllele = effectAllele; - } - - public String getOtherAllele() { - return otherAllele; - } - - public void setOtherAllele(String otherAllele) { - this.otherAllele = otherAllele; - } - - public double getNormalizedEffect(Variant variant) { - if (this.hasSameAlleles(variant)) { - return effect; - } - - if (this.hasSwappedAlleles(variant)) { - return -effect; - } - - throw new RuntimeException("Error. Wrong alleles!!"); - } - - private boolean hasSameAlleles(Variant variant) { - return this.effectAllele.equals(variant.effectAllele) && this.otherAllele.equals(variant.otherAllele); - } - - private boolean hasSwappedAlleles(Variant variant) { - return this.effectAllele.equals(variant.otherAllele) && this.otherAllele.equals(variant.effectAllele); - } - - public boolean hasSamePosition(Variant variant) { - return this.getPosition() == variant.getPosition() && this.getChromosome().equals(variant.getChromosome()); - } - - private int getChromosomeOrder(String chr) { - if (chromosomeOrderIndex.containsKey(chr)) { - return chromosomeOrderIndex.get(chr); - } - - throw new RuntimeException("Unknown Chromosome: " + chr); - } - - public int compare(Variant variant) { - int chrOrderA = getChromosomeOrder(getChromosome()); - int chrOrderB = getChromosomeOrder(variant.getChromosome()); - - if (chrOrderA == chrOrderB) { - return Integer.compare(getPosition(), variant.getPosition()); - } else if (chrOrderA < chrOrderB) { - return -1; - } else { - return 1; - } - - } - - public boolean isBefore(Variant variant) { - return this.compare(variant) < 0; - } - - @Override - public String toString() { - return chromosome + ":" + position; - } - - public boolean matches(Variant variant) { - return hasSamePosition(variant) && (hasSameAlleles(variant) || hasSwappedAlleles(variant)); - } - } + return true; + } + + public Variant findMinVariant(Variant[] variants) { + Variant minVariant = variants[0]; + for (int i = 1; i < variants.length; i++) { + if (minVariant == null) { + minVariant = variants[i]; + continue; + } + if (variants[i] == null) { + continue; + } + if (variants[i].isBefore(minVariant)) { + minVariant = variants[i]; + } + } + return minVariant; + } + + private String[] merge(String[] first, String[] second) { + int fal = first.length; + int sal = second.length; + String[] result = new String[fal + sal]; + System.arraycopy(first, 0, result, 0, fal); + System.arraycopy(second, 0, result, fal, sal); + return result; + } + + public Variant readVariant(ITableReader reader, RiskScoreFormatImpl format) { + if (!reader.next()) { + return null; + } + Variant variant = new Variant(); + variant.setChromosome(reader.getString(format.getChromosome())); + if (reader.getString(format.getPosition()).isEmpty()) { + throw new RuntimeException("Not position found."); + } + variant.setPosition(reader.getInteger(format.getPosition())); + variant.setEffectAllele(reader.getString(format.getEffectAllele())); + variant.setOtherAllele(reader.getString(format.getOtherAllele())); + variant.setEffect(reader.getDouble(format.getEffectWeight())); + return variant; + } + + + public void addVariant(ITableWriter writer, Variant variant) { + writer.setString(MergedRiskScoreCollection.COLUMN_CHROMOSOME, variant.getChromosome()); + writer.setInteger(MergedRiskScoreCollection.COLUMN_POSITION, variant.getPosition()); + writer.setString(MergedRiskScoreCollection.COLUMN_EFFECT_ALLELE, variant.getEffectAllele()); + writer.setString(MergedRiskScoreCollection.COLUMN_OTHER_ALLELE, variant.getOtherAllele()); + } + + public void writeVariant(ITableWriter writer, String score, double effect) { + writer.setDouble(score, effect); + } + + public void writeMissing(ITableWriter writer, String score) { + writer.setString(score, ""); + } + + public class Variant { + private int position = 0; + private String chromosome = null; + private double effect = 0; + private String effectAllele = null; + private String otherAllele = null; + + public int getPosition() { + return position; + } + + public void setPosition(int position) { + this.position = position; + } + + public String getChromosome() { + return chromosome; + } + + public void setChromosome(String chromosome) { + this.chromosome = chromosome; + } + + public double getEffect() { + return effect; + } + + public void setEffect(double effect) { + this.effect = effect; + } + + public String getEffectAllele() { + return effectAllele; + } + + public void setEffectAllele(String effectAllele) { + this.effectAllele = effectAllele; + } + + public String getOtherAllele() { + return otherAllele; + } + + public void setOtherAllele(String otherAllele) { + this.otherAllele = otherAllele; + } + + public double getNormalizedEffect(Variant variant) { + if (this.hasSameAlleles(variant)) { + return effect; + } + + if (this.hasSwappedAlleles(variant)) { + return -effect; + } + + throw new RuntimeException("Error. Wrong alleles!!"); + } + + private boolean hasSameAlleles(Variant variant) { + return this.effectAllele.equals(variant.effectAllele) && this.otherAllele.equals(variant.otherAllele); + } + + private boolean hasSwappedAlleles(Variant variant) { + return this.effectAllele.equals(variant.otherAllele) && this.otherAllele.equals(variant.effectAllele); + } + + public boolean hasSamePosition(Variant variant) { + return this.getPosition() == variant.getPosition() && this.getChromosome().equals(variant.getChromosome()); + } + + private int getChromosomeOrder(String chr) { + if (chromosomeOrderIndex.containsKey(chr)) { + return chromosomeOrderIndex.get(chr); + } + + throw new RuntimeException("Unknown Chromosome: " + chr); + } + + public int compare(Variant variant) { + int chrOrderA = getChromosomeOrder(getChromosome()); + int chrOrderB = getChromosomeOrder(variant.getChromosome()); + + if (chrOrderA == chrOrderB) { + return Integer.compare(getPosition(), variant.getPosition()); + } else if (chrOrderA < chrOrderB) { + return -1; + } else { + return 1; + } + + } + + public boolean isBefore(Variant variant) { + return this.compare(variant) < 0; + } + + @Override + public String toString() { + return chromosome + ":" + position; + } + + public boolean matches(Variant variant) { + return hasSamePosition(variant) && (hasSameAlleles(variant) || hasSwappedAlleles(variant)); + } + } } diff --git a/src/main/java/genepi/riskscore/io/scores/MergedRiskScoreCollection.java b/src/main/java/genepi/riskscore/io/scores/MergedRiskScoreCollection.java index 06972fe..16bcca0 100644 --- a/src/main/java/genepi/riskscore/io/scores/MergedRiskScoreCollection.java +++ b/src/main/java/genepi/riskscore/io/scores/MergedRiskScoreCollection.java @@ -32,6 +32,8 @@ public class MergedRiskScoreCollection implements IRiskScoreCollection { private Map> variantsIndex = new HashMap>(); + public static String HEADER = "# PGS-Collection v1"; + public static String COLUMN_CHROMOSOME = "chr_name"; public static String COLUMN_POSITION = "chr_position"; diff --git a/src/main/java/genepi/riskscore/tasks/ApplyScoreTask.java b/src/main/java/genepi/riskscore/tasks/ApplyScoreTask.java index 3f8f9b3..d1f0019 100644 --- a/src/main/java/genepi/riskscore/tasks/ApplyScoreTask.java +++ b/src/main/java/genepi/riskscore/tasks/ApplyScoreTask.java @@ -169,8 +169,9 @@ public void run(ITaskMonitor monitor) throws Exception { throw new Exception("Reference score or collection can not be null or empty."); } - //TODO "## PGS-Collection v1" - if (riskScoreFilenames.length == 1 && new File(riskScoreFilenames[0]).exists() && RiskScoreFormatFactory.readHeader(riskScoreFilenames[0]).startsWith("#Date=")) { + //TODO: move to factory + if (riskScoreFilenames.length == 1 && new File(riskScoreFilenames[0]).exists() && + RiskScoreFormatFactory.readHeader(riskScoreFilenames[0]).startsWith(MergedRiskScoreCollection.HEADER)) { collection = new MergedRiskScoreCollection(riskScoreFilenames[0]); } else { collection = new RiskScoreCollection(riskScoreFilenames, formats);