Skip to content

Commit

Permalink
Merge pull request #90 from genepi/features/pgs-formats
Browse files Browse the repository at this point in the history
Features/pgs formats
  • Loading branch information
seppinho authored Sep 20, 2022
2 parents 4532d31 + 47ff370 commit c2af781
Show file tree
Hide file tree
Showing 8 changed files with 255 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import genepi.io.FileUtil;
import genepi.io.text.LineWriter;
import genepi.riskscore.io.MetaFile;
import genepi.riskscore.io.OutputFile;
import genepi.riskscore.io.ReportFile;
import genepi.riskscore.tasks.CreateHtmlReportTask;
import genepi.riskscore.tasks.MergeReportTask;
Expand Down Expand Up @@ -303,12 +302,18 @@ public boolean run(WorkflowContext context) {

String folder = getFolder(CompressionEncryption.class);

MetaFile metaFile = MetaFile.load(FileUtil.path(folder, "pgs-catalog.json"));
report.mergeWithMeta(metaFile);
String metaFilename = pgsPanel.getMeta() != null ? pgsPanel.getMeta()
: FileUtil.path(folder, "pgs-catalog.json");

if (new File(metaFilename).exists()) {
MetaFile metaFile = MetaFile.load(metaFilename);
report.mergeWithMeta(metaFile);
}

CreateHtmlReportTask htmlReportTask = new CreateHtmlReportTask();
htmlReportTask.setApplicationName("");
htmlReportTask.setVersion("PGS Server Beta <small>(" + ImputationPipeline.PIPELINE_VERSION + ")</small>");
htmlReportTask
.setVersion("PGS Server Beta <small>(" + ImputationPipeline.PIPELINE_VERSION + ")</small>");
htmlReportTask.setShowCommand(false);
htmlReportTask.setReport(report);
htmlReportTask.setOutput(outputFileHtml);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class ImputationJob extends HadoopJob {
public static final String R2_FILTER = "R2_FILTER";

public static final String PHASING_ONLY = "PHASING_ONLY";

public static final String PHASING_REQUIRED = "PHASING_REQUIRED";

public static final String PHASING_ENGINE = "PHASING_ENGINE";
Expand Down Expand Up @@ -174,6 +174,9 @@ protected void setupDistributedCache(CacheStore cache) throws IOException {
for (String score : scores) {
if (HdfsUtil.exists(score)) {
cache.addFile(score);
if (HdfsUtil.exists(score + ".format")) {
cache.addFile(score + ".format");
}
} else {
log.info("PGS score file '" + score + "' not found.");
throw new IOException("PGS score file '" + score + "' not found.");
Expand Down Expand Up @@ -267,7 +270,7 @@ public void setBuild(String build) {
public void setR2Filter(String r2Filter) {
set(R2_FILTER, r2Filter);
}

public void setPhasingRequired(String phasingRequired) {
set(PHASING_REQUIRED, phasingRequired);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ protected void setup(Context context) throws IOException, InterruptedException {
String eagleCommand = cache.getFile("eagle");
String beagleCommand = cache.getFile("beagle.jar");
String tabixCommand = cache.getFile("tabix");

// create temp directory
DefaultPreferenceStore store = new DefaultPreferenceStore(context.getConfiguration());
folder = store.getString("minimac.tmp");
folder = FileUtil.path(folder, context.getTaskAttemptID().toString());
boolean created = FileUtil.createDirectory(folder);

if (!created) {
throw new IOException(folder + " is not writable!");
}

// scores
String scoresFilenames = parameters.get(ImputationJob.SCORES);
Expand All @@ -168,23 +178,21 @@ protected void setup(Context context) throws IOException, InterruptedException {
String name = FileUtil.getFilename(filename);
String localFilename = cache.getFile(name);
scores[i] = localFilename;
// check if score file has format file
String formatFile = cache.getFile(name + ".format");
if (formatFile != null) {
// create symbolic link to format file. they have to be in the same folder
Files.createSymbolicLink(Paths.get(FileUtil.path(folder,name)), Paths.get(localFilename));
Files.createSymbolicLink(Paths.get(FileUtil.path(folder,name+".format")), Paths.get(formatFile));
scores[i] = FileUtil.path(folder,name);
}
}
System.out.println("Loaded " + scores.length + " score files from distributed cache");

} else {
System.out.println("No scores files et.");
}

// create temp directory
DefaultPreferenceStore store = new DefaultPreferenceStore(context.getConfiguration());
folder = store.getString("minimac.tmp");
folder = FileUtil.path(folder, context.getTaskAttemptID().toString());
boolean created = FileUtil.createDirectory(folder);

if (!created) {
throw new IOException(folder + " is not writable!");
}

// create symbolic link --> index file is in the same folder as data
if (refEagleFilename != null) {
Files.createSymbolicLink(Paths.get(FileUtil.path(folder, "ref.bcf")), Paths.get(refEagleFilename));
Expand Down Expand Up @@ -303,7 +311,7 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio

FileMerger.splitIntoHeaderAndData(outputChunk.getImputedVcfFilename(), outHeader, outData,
imputationParameters);

// store vcf file (remove header)
BgzipSplitOutputStream outDataMeta = new BgzipSplitOutputStream(
HdfsUtil.create(HdfsUtil.path(output, chunk + ".data.empiricalDose.vcf.gz")));
Expand All @@ -313,8 +321,7 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio

FileMerger.splitIntoHeaderAndData(outputChunk.getMetaVcfFilename(), outHeaderMeta, outDataMeta,
imputationParameters);



long end = System.currentTimeMillis();

statistics.setImportTime((end - start) / 1000);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import genepi.io.FileUtil;
import genepi.riskscore.io.Chunk;
import genepi.riskscore.io.PGSCatalog;
import genepi.riskscore.io.formats.RiskScoreFormatFactory.RiskScoreFormat;
import genepi.riskscore.tasks.ApplyScoreTask;
import groovy.text.SimpleTemplateEngine;
import htsjdk.samtools.util.StopWatch;
Expand Down Expand Up @@ -344,6 +345,14 @@ private boolean runPgsCalc(VcfChunkOutput output) {
task.setVcfFilename(output.getImputedVcfFilename());
task.setChunk(scoreChunk);
task.setRiskScoreFilenames(scores);

for (String file : scores) {
String autoFormat = file + ".format";
if (new File(autoFormat).exists()) {
task.setRiskScoreFormat(file, RiskScoreFormat.MAPPING_FILE);
}
}

task.setOutputReportFilename(output.getScoreFilename() + ".json");
task.setOutput(output.getScoreFilename());

Expand Down
10 changes: 10 additions & 0 deletions src/main/java/genepi/imputationserver/util/PgsPanel.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ public class PgsPanel {

private String build = "";

private String meta = null;

private List<String> scores = new Vector<>();

private PgsPanel() {
Expand Down Expand Up @@ -58,4 +60,12 @@ public String getBuild() {
return build;
}

public String getMeta() {
return meta;
}

public void setMeta(String meta) {
this.meta = meta;
}

}
98 changes: 95 additions & 3 deletions src/test/java/genepi/imputationserver/steps/ImputationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -803,10 +803,102 @@ public void testPipelineWithEagleAndScores() throws IOException, ZipException {
}
readerExpected.close();
readerActual.close();

//check if html report file exisits

// check if html report file exisits
new File("test-data/tmp/local/scores.html").exists();

FileUtil.deleteDirectory("test-data/tmp");

}

@Test
public void testPipelineWithEagleAndScoresAndFormat() throws IOException, ZipException {

String configFolder = "test-data/configs/hapmap-chr20";
String inputFolder = "test-data/data/chr20-unphased";

// import scores into hdfs
String score1 = "test-data/data/prsweb/PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS.txt";
String format1 = "test-data/data/prsweb/PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS.txt.format";

String targetScore1 = HdfsUtil.path("scores-hdfs", "PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS.txt");
HdfsUtil.put(score1, targetScore1);

String targetFormat1 = HdfsUtil.path("scores-hdfs",
"PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS.txt.format");
HdfsUtil.put(format1, targetFormat1);

// create workflow context and set scores
WorkflowTestContext context = buildContext(inputFolder, "hapmap2");
context.setOutput("outputScores", "cloudgene2-hdfs");

Map<String, Object> pgsPanel = new HashMap<String, Object>();
List<String> scores = new Vector<String>();
scores.add("PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS.txt");
pgsPanel.put("location", "scores-hdfs");
pgsPanel.put("scores", scores);
pgsPanel.put("build", "hg19");
context.setData("pgsPanel", pgsPanel);

// run qc to create chunkfile

InputValidation inputValidation = new InputValidationMock(configFolder);
// run and test
boolean result = run(context, inputValidation);
assertTrue(result);

QcStatisticsMock qcStats = new QcStatisticsMock(configFolder);
result = run(context, qcStats);

assertTrue(result);

// add panel to hdfs
importRefPanel(FileUtil.path(configFolder, "ref-panels"));
// importMinimacMap("test-data/B38_MAP_FILE.map");
importBinaries("files/bin");

// run imputation
ImputationMinimac3Mock imputation = new ImputationMinimac3Mock(configFolder);
result = run(context, imputation);
assertTrue(result);

// run export
CompressionEncryptionMock export = new CompressionEncryptionMock("files");
result = run(context, export);
assertTrue(result);

ZipFile zipFile = new ZipFile("test-data/tmp/local/chr_20.zip", PASSWORD.toCharArray());
zipFile.extractAll("test-data/tmp");

VcfFile file = VcfFileUtil.load("test-data/tmp/chr20.dose.vcf.gz", 100000000, false);

assertEquals("20", file.getChromosome());
assertEquals(51, file.getNoSamples());
assertEquals(true, file.isPhased());
assertEquals(TOTAL_REFPANEL_CHR20_B37, file.getNoSnps());

int snpInInfo = getLineCount("test-data/tmp/chr20.info.gz") - 1;
assertEquals(snpInInfo, file.getNoSnps());

String[] args = { "test-data/tmp/chr20.dose.vcf.gz", "--ref", score1, "--out", "test-data/tmp/expected.txt" };
int resultScore = new CommandLine(new ApplyScoreCommand()).execute(args);
assertEquals(0, resultScore);

zipFile = new ZipFile("test-data/tmp/local/scores.zip", PASSWORD.toCharArray());
zipFile.extractAll("test-data/tmp");
CsvTableReader readerExpected = new CsvTableReader("test-data/tmp/expected.txt", ',');
CsvTableReader readerActual = new CsvTableReader("test-data/tmp/scores.txt", ',');

while (readerExpected.next() && readerActual.next()) {
assertEquals(readerExpected.getDouble("PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS"),
readerActual.getDouble("PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608_WEIGHTS"), 0.00001);
}
readerExpected.close();
readerActual.close();

// check if html report file exisits
new File("test-data/tmp/local/scores.html").exists();

FileUtil.deleteDirectory("test-data/tmp");

}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
## PRSweb reference PRSWEB_PHECODE153_CRC-Huyghe_PT_UKB_20200608
## PRSweb LD reference MGI
## PRSweb date 20200608
## GWAS source 30510241
## GWAS reference PUBMED
## GWAS phenotype Colorectal cancer
## GWAS id CRC_Huyghe
## GWAS URL https://www.nature.com/articles/s41588-018-0286-6#Sec35
## PRS method LD Clumping (MAF >= 1%, r^2 <= 0.1) & P-value thresholding (see tuning parameter)
## PRS tuning parameter 7.8e-06
## PRS evaluation in UKB
## Genome build GRCh37/hg19
CHROM POS REF ALT EA OA PVALUE WEIGHT
1 38455891 G C G C 3.8e-09 0.0523
1 55246035 T C C T 3.3e-11 0.0665
1 183002639 A G A G 2.4e-16 0.073
1 222112634 A G G A 6.1e-16 0.0877
2 159964552 T C C T 4.4e-08 0.0511
2 199612407 T C C T 5e-09 0.0535
2 199781586 T C T C 3.7e-11 0.0627
2 219191256 T C T C 1.5e-11 0.0613
3 40915239 A G G A 1.2e-16 0.0994
3 66365163 G A A G 7.1e-08 0.0597
3 112999560 G A G A 1.4e-08 0.1761
3 133701119 G A A G 3.8e-09 0.0597
3 169517436 C T C T 7.8e-06 0.0453
4 94938618 C A A C 1.2e-08 0.052
4 106128760 G A A G 1.6e-08 0.0522
4 145659064 T C C T 2.9e-08 0.0842
5 1240204 C T T C 5.1e-09 0.1119
5 1296486 A G G A 1.4e-22 0.0865
5 40102443 G A A G 4.2e-09 0.0545
5 40280076 G A A G 9.3e-25 0.1013
5 134467220 C T C T 4.8e-15 0.0693
6 31449620 C T C T 1.8e-10 0.1118
6 32593080 A G G A 4.9e-14 0.0889
6 35569562 A G A G 3.6e-08 0.0778
6 36623379 G A A G 8.6e-08 0.054
6 55712124 C T C T 1.1e-11 0.0724
7 45136423 T C T C 4.7e-08 0.065
8 117630683 A C C A 7.3e-28 0.2099
8 128413305 G T G T 1.1e-15 0.1052
8 128571855 G T G T 1.8e-09 0.0608
9 22103183 G T G T 1.4e-08 0.0504
9 101679752 T G T G 3.1e-08 0.0818
9 113671403 T C C T 2.8e-09 0.0637
10 8739580 T A T A 1.3e-25 0.1064
10 52648454 C T C T 5e-10 0.073
10 80819132 A G G A 1.8e-17 0.0765
10 101351704 A G G A 1e-17 0.0889
10 114288619 T C C T 1.3e-11 0.0975
10 114722621 G A A G 7e-07 0.0527
11 61549025 G A G A 1.2e-11 0.0636
11 74280012 T G G T 8.9e-19 0.078
11 74427921 C T C T 3.7e-16 0.1934
11 101656397 T A T A 1.1e-09 0.0537
11 111156836 T C T C 1.9e-31 0.1122
12 4368607 T C C T 3.6e-14 0.089
12 4388271 C T T C 1.6e-15 0.1181
12 4400808 C T T C 2.4e-09 0.055
12 6421174 A T T A 4.1e-09 0.0597
12 43134191 A G G A 1.3e-09 0.053
12 51171090 A G G A 1.9e-23 0.0896
12 57533690 C A A C 9.4e-09 0.053
12 111973358 A G G A 2.6e-16 0.0737
12 115890922 T C C T 8.1e-14 0.066
13 34092164 C T C T 3.4e-07 0.0468
13 37462010 A G G A 6.3e-13 0.0758
13 73791554 T C C T 2.6e-08 0.0982
13 111075881 C T T C 1.8e-09 0.0549
14 54419106 A C C A 2.1e-23 0.0912
14 54445157 G A G A 3.1e-07 0.0465
14 59189361 G A G A 9.9e-07 0.0691
15 32992836 G A G A 1.1e-06 0.0464
15 33010736 G A A G 2.3e-29 0.1248
15 33156386 G A A G 1.5e-10 0.0705
15 67402824 T C C T 2.4e-13 0.0689
16 68743939 A C A C 3.1e-08 0.055
16 80043258 C A C A 2.1e-08 0.0498
16 86339315 T C T C 2.8e-08 0.0487
16 86703949 C T T C 6.6e-06 0.0481
17 809643 G A G A 6.8e-08 0.0514
17 10707241 G A A G 6.6e-12 0.0748
17 70413253 G A A G 5.6e-09 0.0595
18 46453156 A T A T 3.8e-74 0.1606
19 16417198 C T T C 4.2e-10 0.0868
19 33519927 T G T G 3.7e-23 0.1939
19 41871573 G A A G 9.5e-07 0.0441
19 59079096 C T T C 4.2e-08 0.0632
20 6376457 G C G C 1.1e-16 0.0795
20 6603622 C T C T 6.9e-12 0.0627
20 6699595 T G G T 2.3e-18 0.0819
20 6762221 C T T C 3.3e-14 0.0714
20 7740976 A G G A 3.4e-13 0.0874
20 33213196 A C C A 3e-07 0.045
20 42666475 C T T C 6.8e-09 0.0597
20 47340117 A G A G 5.9e-15 0.0719
20 49055318 C T C T 3.3e-09 0.0547
20 60932414 T C C T 1.1e-26 0.1146
20 62308612 T G T G 5.3e-08 0.0593
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"chromosome": "CHROM",
"position": "POS",
"effect_weight": "WEIGHT",
"otherAllele": "OA",
"effectAllele": "EA"
}

0 comments on commit c2af781

Please sign in to comment.