diff --git a/README.md b/README.md index 5e59b78..cae9931 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ ![logo](.misc/logo.png) - -_It's Lep-Map3, but with snakes_ 🐍🐍 +_It's Lep-Map3, but with snakes 🐍🐍_ [![alt text](https://img.shields.io/badge/docs-wiki-75ae6c?style=for-the-badge&logo=Read%20The%20Docs)](https://github.com/pdimens/LepWrap/wiki) # LepWrap -LepWrap is a reusable pipeline to use the linkage map software [Lep-Map3](https://sourceforge.net/projects/lep-map3/) and [Lep-Anchor](https://sourceforge.net/projects/lep-anchor/). It is the Snakemake-based successor to [LepMapp3r](https://github.com/pdimens/LepMapp3r). Check out [the wiki](https://github.com/pdimens/LepWrap/wiki) for detailed installation, usage, and workflow information. +LepWrap is a reusable pipeline to use the linkage map software [Lep-Map3](https://sourceforge.net/projects/lep-map3/). It is the Snakemake-based successor to [LepMapp3r](https://github.com/pdimens/LepMapp3r). Check out [the wiki](https://github.com/pdimens/LepWrap/wiki) for detailed installation, usage, and workflow information. @@ -24,9 +23,7 @@ git clone https://github.com/pdimens/LepWrap.git Assuming you have `anaconda` or `miniconda` installed: ```bash cd LepWrap - conda env create -f conda_setup.yml - ``` This will create an environment called `lepwrap` that can be activated with: ```bash @@ -44,10 +41,9 @@ where `` is an integer of the maximum number of cores/threads y LepWrap does things a certain way, employing the most common/reasonable way of using Lep-Map3 (and LepAnchor more or less). Version `3.2+` is **a lot** more flexible that its predecessors, but might still lack something you're looking for. Your study is unique, and I encourage youto clone/fork this repository and adapt LepWrap to your needs! All of the code in LepWrap is written in human-readable bash or aggressively annotated R, so give it a shot and adapt it to your workflow. PR's always welcome! - ## Citation If using LepWrap in a publication, cite **Pasi Rastas** for their work on Lep-Map3/Lep-Anchor and please include a link to this repository. If you like using it, give me (Pavel) a shout out on Twitter [@pvdimens](https://twitter.com/PVDimens) [![alt text](http://i.imgur.com/wWzX9uB.png)](https://twitter.com/PVDimens) =) > Pasi Rastas, Lep-MAP3: robust linkage mapping even for low-coverage whole genome sequencing data, Bioinformatics, Volume 33, Issue 23, 01 December 2017, Pages 3726–3732,https://doi.org/10.1093/bioinformatics/btx494 -> Pasi Rastas, Lep-Anchor: automated construction of linkage map anchored haploid genomes, Bioinformatics, Volume 36, Issue 8, 15 April 2020, Pages 2359–2364, https://doi.org/10.1093/bioinformatics/btz978 \ No newline at end of file +> Pasi Rastas, Lep-Anchor: automated construction of linkage map anchored haploid genomes, Bioinformatics, Volume 36, Issue 8, 15 April 2020, Pages 2359–2364, https://doi.org/10.1093/bioinformatics/btz978 diff --git a/config.yaml b/config.yaml index d74c0a5..c70e73f 100644 --- a/config.yaml +++ b/config.yaml @@ -1,14 +1,14 @@ # Configuration file for LepWrap -################### -#### Lep-Map 3 #### -################### +######################################################### +# Lep-Map 3 # +######################################################### #---------------# # ParentCall2 # #---------------# # the filtered VCF file with your genotype likelihoods: -vcf: "out.15.miss95recode.vcf" +vcf: "out.15.missrecode90.vcf" # Instructions to create pedigree file: https://sourceforge.net/p/lep-map3/wiki/software/LepMap3 Home/#parentcall2 # the pedigree file associated with your data @@ -35,16 +35,16 @@ extra_params_Filtering: "" # LOD score in the range of lod_min to lod_max # The minimum LOD for SeperateChromosomes2 -lod_min: 20 +lod_min: 10 # The maximum LOD for SeperateChromosomes2 -lod_max: 40 +lod_max: 50 # Use only markers with informative father (1), mother(2), both parents(3) or neither parent(0) -informative: "informativeMask=123" +informative: "informativeMask=3" # If there are any additional parameters you would like to use for SeparateChromosomes2 (e.g. distrotionLOD=1), add them here -extra_params_SeparateChromosomes: "sizeLimit=5 distortionLOD=1" +extra_params_SeparateChromosomes: "sizeLimit=5 distortionLod=1" #-------------------# @@ -61,7 +61,7 @@ lod_limit: "lodLimit=2" lod_difference: "lodDifference=2" # If there are any additional parameters you would like to use for JoinSingles2All (e.g. iterate=1), add them here -extra_params_JoinSingles: "iterate=1 distortionLOD=1" +extra_params_JoinSingles: "iterate=1 distortionLod=1" #-----------------# @@ -72,11 +72,9 @@ extra_params_JoinSingles: "iterate=1 distortionLOD=1" # Set exp_lg to your expected number of chromosomes exp_lg: 24 -# Set iterations to the number of iterations you want per chromosome (more is better). Recommend 30 or more -iterations: 100 - # If there are any additional parameters you would like to use for OrderMarkers2 (e.g. hyperPhaser=1), add them here -extra_params_OrderMarkers: "hyperPhaser=1 useKosambi=1 phasingIterations=2" +# I recommend setting numMergeIterations to ~100 (Lep-Map3 default is 6) +extra_params_OrderMarkers: "hyperPhaser=1 useKosambi=1 phasingIterations=2 numMergeIterations=100" #-----------------# @@ -99,25 +97,25 @@ trim_cutoff: 100 #--------------------# # The second round of OrderMarkers will use the same basic parameters as the first round (but not the extra params) # If there are additional parameters you would like to use, add them here: -extra_params_reOrderMarkers: "improveOrder=1 useKosambi=1" - +extra_params_reOrderMarkers: "improveOrder=1 useKosambi=1 numMergeIterations=75" #-----------------------# # Calculate Distances # #-----------------------# # If you used useKosambi=1 or useMorgan=1 for Ordering/reOrdering, add that same # parameter to distance_method: -distance_method: "" +distance_method: "useKosambi=1" + +######################################################### +# Lep-Anchor # +######################################################### -#################### -#### Lep-Anchor #### -#################### # change this to true if you also want to run Lep-Anchor run_lepanchor: true # the path to the genome assembly you are trying to anchor -# ideally it *is not* gzipped and ends in .fa, but there are built-in workarounds if it is. +# ideally it is *not* gzipped and ends in .fa, but there are built-in workarounds if it is. assembly: "YFT.genome.latest.fasta" # the number of linkage groups you have @@ -182,4 +180,4 @@ extra_params_PlaceOrient: "keepEmptyIntervals=1 numRuns=10" LA_edge_length: 20 # Set trim_cuttoff to the centiMorgan distance cutoff (5 is reasonable) -LA_trim_cutoff: 3 \ No newline at end of file +LA_trim_cutoff: 5 \ No newline at end of file diff --git a/rules/LepMap3/LepMap3.smk b/rules/LepMap3/LepMap3.smk index a7c9f93..4c53bb0 100644 --- a/rules/LepMap3/LepMap3.smk +++ b/rules/LepMap3/LepMap3.smk @@ -13,7 +13,7 @@ data_tol=config["data_tol"] filtering_extra = config["extra_params_Filtering"] # separate chromosomes # lod_max = str(config["lod_max"]) -lod_range = list(range(config["lod_min"], config["lod_max"]+1)) +lod_range = list(range(config["lod_min"], lod_max+1)) informative = config["informative"] sepchrom_extra = config["extra_params_SeparateChromosomes"] # join singles # @@ -22,16 +22,14 @@ lod_lim = config["lod_limit"] lod_diff = config["lod_difference"] js2a_extra = config["extra_params_JoinSingles"] # ordering # -lg_range = list(range(1, config["exp_lg"]+1)) lg_count = config["exp_lg"] -ITER = config["iterations"] +lg_range = list(range(1, lg_count+1)) order_extra = config["extra_params_OrderMarkers"] # trimming # edge_len = str(config["edge_length"]) trim_thresh = str(config["trim_cutoff"]) # ordering II # reorder_extra = config["extra_params_reOrderMarkers"] -ITER2 = round(ITER/2) # distances # dist_method = config["distance_method"] diff --git a/rules/LepMap3/generate_map.smk b/rules/LepMap3/generate_map.smk index 1adb2cb..5612f06 100644 --- a/rules/LepMap3/generate_map.smk +++ b/rules/LepMap3/generate_map.smk @@ -3,7 +3,7 @@ rule separate_chromosomes: output: "3_SeparateChromosomes/LOD.{lod_range}" log: "3_SeparateChromosomes/logs/LOD.{lod_range}.log" message: "Clustering markers for lodLimit={params.lod} >> {output}" - threads: 30 + threads: 40 params: lod = "{lod_range}", extra = sepchrom_extra, @@ -12,19 +12,34 @@ rule separate_chromosomes: zcat {input} | java -cp software/LepMap3 SeparateChromosomes2 data=- {params.extra} {informative} lodLimit={params.lod} numThreads={threads} > {output} 2> {log} """ + rule map_summary: input: expand("3_SeparateChromosomes/LOD.{LOD}", LOD = lod_range) output: "3_SeparateChromosomes/all.LOD.summary" message: "Summarizing SeperateChromosomes2 maps >> {output}" shell: "scripts/MapSummary.r 3_SeparateChromosomes" + +rule choose_map: + input: "3_SeparateChromosomes/all.LOD.summary" + output: "3_SeparateChromosomes/chosen.LOD" + message: "Examine {input} and decide on a map of a given LOD limit before proceeding" + shell: + """ + echo -n -e '\nWhich map would you like to use (e.g. LOD.15)? LOD.' + read -r + echo -e "# the map chosen to use with OrderMarkers2\n3_SeparateChromosomes/LOD.$REPLY" > {output} + echo "A record of your choice can be found in {output}" + sleep 2s + """ + + rule join_singles: input: datacall = "2_Filtering/data.filtered.lepmap3.gz", - map_summ = "3_SeparateChromosomes/all.LOD.summary" + map_choice = "3_SeparateChromosomes/chosen.LOD" output: "LOD.master" - log: "3_SeparateChromosomes/chosen.LOD" - threads: 30 + threads: 40 message: "Joining singles to linkage groups" params: run_js2all = joinsingles, @@ -33,16 +48,13 @@ rule join_singles: extra = js2a_extra shell: """ - echo -n -e '\nWhich map would you like to use (e.g. LOD.15)? LOD.' - read -r - echo -e "# the map chosen to use with OrderMarkers2\nLOD.$REPLY" > {log} - echo "A record of your choice can be found in {log}" JS2A=$(echo {params.run_js2all} | tr '[:upper:]' '[:lower:]') + THEMAP=$(tail -1 {input.map_choice}) if [ $JS2A == "true" ]; then - zcat {input.datacall} | java -cp software/LepMap3 JoinSingles2All map=3_SeparateChromosomes/LOD.$REPLY data=- {params.extra} {params.lod_limit} {params.lod_diff} numThreads={threads} > {output} + zcat {input.datacall} | java -cp software/LepMap3 JoinSingles2All map=$THEMAP data=- {params.extra} {params.lod_limit} {params.lod_diff} numThreads={threads} > {output} else - echo -e "\nSkipping JoinSingles2All and creating a symlink instead" - ln -sr 3_SeparateChromosomes/LOD.$REPLY {output} + echo -e "\nSkipping JoinSingles2All and creating a symlink to $THEMAP instead" + ln -sr $THEMAP {output} fi sleep 2s - """ + """ \ No newline at end of file diff --git a/rules/LepMap3/order.smk b/rules/LepMap3/order.smk index 885924d..618611d 100644 --- a/rules/LepMap3/order.smk +++ b/rules/LepMap3/order.smk @@ -2,23 +2,23 @@ rule order_markers: input: datacall = "2_Filtering/data.filtered.lepmap3.gz", filt_map = "LOD.master" - output: "4_OrderMarkers/ordered.{lg_range}" + output: + lg = "4_OrderMarkers/ordered.{lg_range}", + runlog = temp("4_OrderMarkers/logs/ordered.{lg_range}.running") log: - runlog = temp("4_OrderMarkers/logs/ordered.{lg_range}.running"), run = "4_OrderMarkers/logs/ordered.{lg_range}.log", recomb = "4_OrderMarkers/recombination/ordered.{lg_range}.recombinations" message: "Ordering linkage group {params.chrom} with {params.iterations} iterations" params: chrom = "{lg_range}", - iterations = ITER, extra = order_extra threads: 2 shell: """ - zcat {input.datacall} | java -cp software/LepMap3 OrderMarkers2 map={input.filt_map} {params.extra} data=- numThreads={threads} numMergeIterations={params.iterations} chromosome={params.chrom} &> {log.runlog} - sed -n '/\*\*\* LG \=/,$p' {log.runlog} > {output} - grep "recombin" {log.runlog} > {log.recomb} - awk '/#java/{{flag=1}} flag; /logL/{{flag=0}}' {log.runlog} > {log.run} + zcat {input.datacall} | java -cp software/LepMap3 OrderMarkers2 map={input.filt_map} {params.extra} data=- numThreads={threads} chromosome={params.chrom} &> {output.runlog} + sed -n '/\*\*\* LG \=/,$p' {output.runlog} > {output.lg} + grep "recombin" {output.runlog} > {log.recomb} + awk '/#java/{{flag=1}} flag; /logL/{{flag=0}}' {output.runlog} > {log.run} """ rule recomb_summary: diff --git a/rules/LepMap3/reorder.smk b/rules/LepMap3/reorder.smk index 819d3c1..d82c260 100644 --- a/rules/LepMap3/reorder.smk +++ b/rules/LepMap3/reorder.smk @@ -3,23 +3,23 @@ rule reorder_markers: datacall = "2_Filtering/data.filtered.lepmap3.gz", filt_map = "LOD.master", lg_order = "5_Trim/ordered.{lg_range}.trimmed" - output: "6_OrderMarkers/ordered.{lg_range}" + output: + lg = "6_OrderMarkers/ordered.{lg_range}", + runlog = temp("6_OrderMarkers/logs/ordered.{lg_range}.running") log: - runlog = temp("6_OrderMarkers/logs/ordered.{lg_range}.running"), run = "6_OrderMarkers/logs/ordered.{lg_range}.log", recomb = "6_OrderMarkers/recombination/ordered.{lg_range}.recombination" message: "Reordering linkage group {params.lg} with {params.iterations} iterations" params: lg = "{lg_range}", - iterations = ITER2, extra = reorder_extra threads: 2 shell: """ - zcat {input.datacall} | java -cp software/LepMap3 OrderMarkers2 {params.extra} map={input.filt_map} data=- numThreads={threads} evaluateOrder={input.lg_order} numMergeIterations={params.iterations} &> {log.runlog} - sed -n '/\*\*\* LG \=/,$p' {log.runlog} > {output} - grep "recombin" {log.runlog} > {log.recomb} - awk '/#java/{{flag=1}} flag; /logL/{{flag=0}}' {log.runlog} > {log.run} + zcat {input.datacall} | java -cp software/LepMap3 OrderMarkers2 {params.extra} map={input.filt_map} data=- numThreads={threads} evaluateOrder={input.lg_order} &> {output.runlog} + sed -n '/\*\*\* LG \=/,$p' {output.runlog} > {output.lg} + grep "recombin" {output.runlog} > {log.recomb} + awk '/#java/{{flag=1}} flag; /logL/{{flag=0}}' {output.runlog} > {log.run} """ rule reorder_summary: diff --git a/scripts/MapSummary.r b/scripts/MapSummary.r index 0df8b32..03b3899 100755 --- a/scripts/MapSummary.r +++ b/scripts/MapSummary.r @@ -62,8 +62,6 @@ write.table(summtable, file = out_tmp, quote = FALSE, row.names = FALSE, col.nam cmd <- paste("column -t", out_tmp, ">", out_file, "&& rm", out_tmp) system(cmd) -cat(paste0("Examine the map summary (", out_file, ") and decide on the best map before proceeding\n\n")) - } else { cat("Error: the argument must be either a mapfile or a directory of mapfiles") } \ No newline at end of file diff --git a/scripts/params b/scripts/params new file mode 100755 index 0000000..6cec06c --- /dev/null +++ b/scripts/params @@ -0,0 +1,33 @@ +#! /usr/bin/env bash + +if [[ -z "$1" ]]; then +cat < +[example]: scripts/params OrderMarkers2 + +LepMap3 modules: +- ParentCall2 +- Filtering2 +- SeparateChromosomes2 +- JoinSingles2All +- OrderMarkers2 + +LepAnchor modules: +- Map2Bed +- CleanMap +- PlaceAndOrientContigs + +EOF + exit 1 +fi + +if [[ $1 == "Map2Bed" || $1 == "CleanMap" || $1 == "PlaceAndOrientContigs" ]]; then + java -cp software/LepAnchor $1 2>&1 +else + java -cp software/LepMap3 $1 2>&1 | tail -n +2 +fi \ No newline at end of file