-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
776 additions
and
133 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,2 @@ | ||
# DivImpute | ||
|
||
Scripts to start imputation jobs on the virtual machines of a project running on the de.NBI Cloud. | ||
Three virtual machines are used in this particular setup. One Head Node to start the jobs by running the scripts and two Computational Nodes. All nodes run with Ubuntu and the later specified software was installed. The nodes store the data in a Cinder Volume mounted as /volumes/volume001. | ||
|
||
## Required Tools | ||
|
||
- [Beagle4.1](https://faculty.washington.edu/browning/beagle/b4_1.html) and bref | ||
- [bcftools 1.10](http://www.htslib.org/download/) | ||
|
||
## Usage | ||
|
||
1. Install required tools | ||
3. The enumerated scripts are run from a header node, that can access some computational nodes by running the scripts *batch_impute.sh* or *imputeOnReferencePanel.sh* | ||
4. the single node example can be accessed through the script *03_singleFileImputation.sh* | ||
|
||
## Parameters | ||
|
||
### 01_split.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | Input VCF | input.vcf.gz | ||
$2 | Output Name | output | ||
|
||
### 02_send_data.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | List of Chromosome Names | chr_names.txt | ||
$2 | IP Address Computational Node | 192.168.0.80 | ||
$3 | Private Key | .ssh/my-private-key | ||
|
||
### 03_singleFileImputation.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | IP Address Computational Node | 192.168.0.80 | ||
$2 | Private Key | .ssh/my-private-key | ||
|
||
### 03_start_job_gt.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | IP Address Computational Node | 192.168.0.80 | ||
$2 | Private Key | .ssh/my-private-key | ||
|
||
### 03_start_job.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | IP Address Computational Node | 192.168.0.80 | ||
$2 | Private Key | .ssh/my-private-key | ||
|
||
### 04_receive_data.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | IP Address Computational Node | 192.168.0.80 | ||
$2 | Private Key | .ssh/my-private-key | ||
|
||
### 05_get_logs.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | IP Address Computational Node | 192.168.0.80 | ||
$2 | Private Key | .ssh/my-private-key | ||
|
||
### 06_cleanup.sh | ||
Parameter | Description | Example | ||
--- | --- | --- | ||
$1 | IP Address Computational Node | 192.168.0.80 | ||
$2 | Private Key | .ssh/my-private-key | ||
# DivImpute | ||
|
This file was deleted.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Empty file.
Empty file.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
process indexVCF { | ||
container 'quay.io/biocontainers/bcftools:1.18--h8b25389_0' | ||
publishDir params.outdir+'/raw', mode: 'copy' | ||
|
||
input: | ||
path(vcfFile) | ||
|
||
output: | ||
path("${vcfFile}"), emit: vcf | ||
path("${vcfFile}.csi"), emit: index | ||
|
||
script: | ||
""" | ||
bcftools index ${vcfFile} | ||
""" | ||
} | ||
|
||
process listVariants { | ||
container 'quay.io/biocontainers/bcftools:1.18--h8b25389_0' | ||
|
||
input: | ||
path(vcfFile) | ||
path(vcfFileIndex) | ||
output: | ||
path("variants.txt") | ||
script: | ||
""" | ||
bcftools query -f '%CHROM\t%POS\n' ${vcfFile} > variants.txt | ||
""" | ||
} | ||
|
||
process writeWindows { | ||
container 'quay.io/biocontainers/pandas:1.5.2' | ||
publishDir params.outdir, mode: 'copy' | ||
|
||
input: | ||
path(variantFile) | ||
val(windowSize) | ||
val(overlap) | ||
|
||
output: | ||
path("windows.txt") | ||
|
||
script: | ||
""" | ||
#!/usr/bin/env python3 | ||
import pandas as pd | ||
from math import ceil | ||
windowSize=$windowSize | ||
overlap=$overlap | ||
df=pd.read_csv('${variantFile}', header=None, sep='\t') | ||
grouped_df = df.groupby(df[0]) | ||
with open('windows.txt', 'w') as f: | ||
for group_name, group_data in grouped_df: | ||
tmp = group_data.reset_index(drop=True) | ||
end = 0 | ||
for i in range(0, ceil(len(tmp)/windowSize)+1): | ||
start = max(end-overlap, 0) | ||
end = min(start+windowSize, len(tmp)) | ||
f.write(str(group_name)+':'+str(tmp[1][start])+'-'+str(tmp[1][end-1])+'\\n') | ||
""" | ||
} | ||
|
||
process splitVCFByWindow { | ||
container 'quay.io/biocontainers/bcftools:1.18--h8b25389_0' | ||
publishDir params.outdir+'/windows', mode: 'copy' | ||
|
||
input: | ||
val(window) | ||
path(vcfFile) | ||
path(vcfFileIndex) | ||
output: | ||
path("${window}_${vcfFile}") | ||
script: | ||
""" | ||
bcftools view -r ${window} ${vcfFile} -Oz -o ${window}_${vcfFile} | ||
""" | ||
} | ||
|
||
process imputeWindows { | ||
container 'quay.io/biocontainers/beagle:5.4_22Jul22.46e--hdfd78af_0' | ||
publishDir params.outdir+'/imputed/windows', mode: 'copy' | ||
|
||
input: | ||
path(vcfFile) | ||
output: | ||
path("imputed_${vcfFile}") | ||
script: | ||
""" | ||
beagle gt=${vcfFile} out=imputed_${vcfFile.name.replaceAll('.vcf.gz', '')} | ||
""" | ||
} | ||
|
||
process indexImputedWindow { | ||
container 'quay.io/biocontainers/bcftools:1.18--h8b25389_0' | ||
|
||
input: | ||
path(vcfFile) | ||
output: | ||
path("${vcfFile}.csi"), emit: index | ||
script: | ||
""" | ||
bcftools index ${vcfFile} | ||
""" | ||
} | ||
|
||
process mergeImputedWindows { | ||
container 'quay.io/biocontainers/bcftools:1.18--h8b25389_0' | ||
publishDir params.outdir+'/imputed/', mode: 'copy' | ||
|
||
input: | ||
path(vcfFiles) | ||
path(vcfIndices) | ||
output: | ||
path("merged_imputed.vcf.gz") | ||
script: | ||
""" | ||
bcftools concat imputed*.vcf.gz -Oz -o merged_imputed.vcf.gz | ||
""" | ||
} | ||
|
||
workflow { | ||
indexVCF(params.vcf) | ||
| listVariants | ||
|
||
writeWindows(listVariants.out, params.windowSize, params.overlap) | ||
| splitText | ||
| map { it.trim()} | ||
| set { ch_windows} | ||
|
||
splitVCFByWindow(ch_windows, indexVCF.out.vcf, indexVCF.out.index) | ||
| imputeWindows | ||
| collect | ||
| set { ch_imputedWindows} | ||
|
||
indexImputedWindow(imputeWindows.out) | ||
| collect | ||
| set { ch_windowIndices} | ||
|
||
mergeImputedWindows(ch_imputedWindows, ch_windowIndices) | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
params { | ||
vcf = '/home/ubuntu/divimpute/data/bridge_core1000_renamed.vcf.gz' | ||
outdir = './results' | ||
windowSize = 5000 | ||
overlap = 1000 | ||
} | ||
|
||
docker { | ||
enabled = true | ||
} |
This file was deleted.
Oops, something went wrong.