diff --git a/src/main/java/genepi/imputationserver/steps/FastQualityControl.java b/src/main/java/genepi/imputationserver/steps/FastQualityControl.java index 6acea130..5456093b 100644 --- a/src/main/java/genepi/imputationserver/steps/FastQualityControl.java +++ b/src/main/java/genepi/imputationserver/steps/FastQualityControl.java @@ -189,6 +189,7 @@ public boolean run(WorkflowContext context) { double sampleCallrate = panel.getQcFilterByKey("sampleCallrate"); double mixedGenotypesChrX = panel.getQcFilterByKey("mixedGenotypeschrX"); int strandFlips = (int) (panel.getQcFilterByKey("strandFlips")); + int alleleSwitches = (int) (panel.getQcFilterByKey("alleleSwitches")); String ranges = panel.getRange(); if (ranges != null) { @@ -336,6 +337,15 @@ else if (task.getStrandFlipSimple() + task.getStrandFlipAndAlleleSwitch() > stra return false; } + // Check if too many allele switches are detected + else if (task.getAlleleSwitch() > alleleSwitches) { + text.append("
Error: More than " + alleleSwitches + + " allele switches have been detected. Imputation cannot be started!"); + context.error(text.toString()); + + return false; + } + else if (task.isChrXMissingRate()) { text.append( "
Error: Chromosome X nonPAR region includes > 10 % mixed genotypes. Imputation cannot be started!"); diff --git a/src/main/java/genepi/imputationserver/util/RefPanel.java b/src/main/java/genepi/imputationserver/util/RefPanel.java index 1f009f48..31f57692 100644 --- a/src/main/java/genepi/imputationserver/util/RefPanel.java +++ b/src/main/java/genepi/imputationserver/util/RefPanel.java @@ -13,6 +13,7 @@ public class RefPanel { + public static final String ALLELE_SWITCHES = String.valueOf(Integer.MAX_VALUE); public static final String STRAMD_FLIPS = "100"; public static final String SAMPLE_CALL_RATE = "0.5"; public static final String MIN_SNPS = "3"; @@ -57,6 +58,7 @@ public RefPanel() { defaultQcFilter.put("sampleCallrate", SAMPLE_CALL_RATE); defaultQcFilter.put("mixedGenotypeschrX", CHR_X_MIXED_GENOTYPES); defaultQcFilter.put("strandFlips", STRAMD_FLIPS); + defaultQcFilter.put("alleleSwitches", ALLELE_SWITCHES); } public String getId() { diff --git a/src/test/java/genepi/imputationserver/steps/FastQualityControlTest.java b/src/test/java/genepi/imputationserver/steps/FastQualityControlTest.java index ef798b40..8e9bb827 100644 --- a/src/test/java/genepi/imputationserver/steps/FastQualityControlTest.java +++ b/src/test/java/genepi/imputationserver/steps/FastQualityControlTest.java @@ -519,6 +519,47 @@ public void testQcStatisticsDontAllowStrandFlips() throws IOException { "Error: More than -1 obvious strand flips have been detected. Please check strand. Imputation cannot be started!")); } + + public void testQcStatisticsAllowAlleleSwitches() throws IOException { + + String configFolder = "test-data/configs/hapmap-3chr"; + String inputFolder = "test-data/data/simulated-chip-3chr-imputation-switches"; + + // create workflow context + WorkflowTestContext context = buildContext(inputFolder, "hapmap2"); + + // create step instance + FastQualityControlMock qcStats = new FastQualityControlMock(configFolder); + + // run and test + boolean result = run(context, qcStats); + + // check statistics + + assertTrue(context.hasInMemory("Excluded sites in total: 2,967")); + assertTrue(context.hasInMemory("Allele switch: 118,209")); + } + + public void testQcStatisticsDontAllowAlleleSwitches() throws IOException { + + String configFolder = "test-data/configs/hapmap-3chr"; + String inputFolder = "test-data/data/simulated-chip-3chr-imputation-switches"; + + // create workflow context + WorkflowTestContext context = buildContext(inputFolder, "hapmap2-qcfilter-alleleswitches"); + + // create step instance + FastQualityControlMock qcStats = new FastQualityControlMock(configFolder); + + // run and test + boolean result = run(context, qcStats); + + // check statistics + + assertTrue(context.hasInMemory("Excluded sites in total: 2,967")); + assertTrue(context.hasInMemory("Allele switch: 118,209")); + assertTrue(context.hasInMemory("Error: More than 33 allele switches have been detected. Instructions to solve this issue can be found in our documentation. Imputation cannot be started!")); + } public void testQcStatisticsFilterOverlap() throws IOException { diff --git a/test-data/configs/hapmap-3chr/panels.txt b/test-data/configs/hapmap-3chr/panels.txt index 4eea42e8..bf1e7ae0 100644 --- a/test-data/configs/hapmap-3chr/panels.txt +++ b/test-data/configs/hapmap-3chr/panels.txt @@ -71,4 +71,18 @@ panels: qcFilter: sampleCallrate: 1.01 strandFlips: 100 + + - id: hapmap2-qcfilter-alleleswitches + hdfs: ref-panels/hapmap_r22.chr$chr.CEU.hg19.msav + legend: ref-panels/hapmap_r22.chr$chr.CEU.hg19_impute.legend.gz + mapEagle: ref-panels/genetic_map_hg19_chr1.txt + refEagle: ref-panels/hapmap_r22.eagle/hapmap_r22.chr$chr.CEU.hg19.recode.bcf + samples: + eur: 60 + mixed: -1 + populations: + eur: EUR + mixed: Mixed + qcFilter: + alleleSwitches: 33 \ No newline at end of file diff --git a/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr1.HumanHap550.small.recode.switches.vcf.gz b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr1.HumanHap550.small.recode.switches.vcf.gz new file mode 100644 index 00000000..17370ef6 Binary files /dev/null and b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr1.HumanHap550.small.recode.switches.vcf.gz differ diff --git a/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr1.HumanHap550.small.recode.switches.vcf.gz.tbi b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr1.HumanHap550.small.recode.switches.vcf.gz.tbi new file mode 100644 index 00000000..7c5d9220 Binary files /dev/null and b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr1.HumanHap550.small.recode.switches.vcf.gz.tbi differ diff --git a/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr2.HumanHap550.small.recode.switches.vcf.gz b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr2.HumanHap550.small.recode.switches.vcf.gz new file mode 100644 index 00000000..17370ef6 Binary files /dev/null and b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr2.HumanHap550.small.recode.switches.vcf.gz differ diff --git a/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr2.HumanHap550.small.recode.switches.vcf.gz.tbi b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr2.HumanHap550.small.recode.switches.vcf.gz.tbi new file mode 100644 index 00000000..7c5d9220 Binary files /dev/null and b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr2.HumanHap550.small.recode.switches.vcf.gz.tbi differ diff --git a/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr3.HumanHap550.small.recode.switches.vcf.gz b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr3.HumanHap550.small.recode.switches.vcf.gz new file mode 100644 index 00000000..17370ef6 Binary files /dev/null and b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr3.HumanHap550.small.recode.switches.vcf.gz differ diff --git a/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr3.HumanHap550.small.recode.switches.vcf.gz.tbi b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr3.HumanHap550.small.recode.switches.vcf.gz.tbi new file mode 100644 index 00000000..7c5d9220 Binary files /dev/null and b/test-data/data/simulated-chip-3chr-imputation-switches/1000genomes.chr3.HumanHap550.small.recode.switches.vcf.gz.tbi differ