update

joybio · Feb 28, 2023 · aacf3b9 · aacf3b9
1 parent ff8f762
commit aacf3b9
Show file tree

Hide file tree

Showing 8 changed files with 772 additions and 226 deletions.
diff --git a/multiPrime.yaml b/multiPrime.yaml
@@ -45,7 +45,7 @@ max_seq: 500
 ## primer design by degePrimer
 ##-------------------------------------------------------------------------------
 #param in DegePrime.pl -d {}. number of the degenerate nucleotide
-degeneracy: 2 
+degeneracy: 10 
 #param in DegePrime.pl -l {}. length of the degenerate primers
 primer_len: 18
 
@@ -63,10 +63,10 @@ primer_len: 18
 coverage: 0.7
 
 #PCR PRODUCT SIZE. Filter primers by PRODUCT size.default [150,400].
-PRODUCT_size: 300,700
+PRODUCT_size: 150,1200
 
 #Filter primers by GC content. default [0.45,0.65].
-gc_content: 0.4,0.6
+gc_content: 0.2,0.7
 
 #Filter param of hairpin, which means distance of the minimal paired bases. Default: 4. Example:(number of X) AGCT[XXXX]AGCT
 distance: 4

diff --git a/multiPrime2.py b/multiPrime2.py
@@ -124,7 +124,9 @@ def aggregate_input(wildcards):
 	params:
 		script = config["scripts_dir"],
 		max_seq = config["max_seq"],
-		threshold =  config["seq_number_ANI"]
+		threshold = config["seq_number_ANI"],
+		drop = config["drop"],
+		ani = config["ani"]
 	message:
 		"Step5: extract fasta in each cluster from cd-hit results .."
 	shell:
@@ -133,7 +135,7 @@ def aggregate_input(wildcards):
 			 -m {params.max_seq} -o {output[0]} -y {output[2]} -d {output[1]};
 
 		python {params.script}/merge_cluster_by_ANI.py -i {output[0]} -p 20 -t {params.threshold} \
-			-o {output[3]}
+			-o {output[3]} -d {params.drop} -a {params.ani}
 		'''
 
 #-------------------------------------------------------------------------------------------

diff --git a/multiPrime2.yaml b/multiPrime2.yaml
@@ -41,7 +41,12 @@ identity: 0.7
 # greater than {seq_number_ANI}. It is used to reduce cluster number. 
 # If seq_number_ANI = 0, then all clusters will be processed 
 # If seq_number_ANI = 1, then no clusters will be processed
-seq_number_ANI: 1
+seq_number_ANI: 60
+# Threshold of average nucleotide identity, minimum:0.7.
+ani: 0.8
+# Merge or drop those clusters with rare sequences and shows high ANI with others. T: drop; F: merge into others
+drop: "T"
+
 #Max sequence number of each cluster used in multi-alignment {muscle}. 
 #We randomly choose {maxseq} sequences in cluster for next steps.
 #It wont affect the calculation of primer coverage in the final primer set, 

diff --git a/scripts/extract_cluster_V3.py b/scripts/extract_cluster_V3.py
@@ -74,20 +74,20 @@ def argsParse():
                            'It will be used as input file of snakemake pipeline')
     parser.add_option('-m', '--max',
                       dest='max',
-                      default=1000,
+                      default=500,
                       type="int",
-                      help='max sequence number in 1 cluster. Default: 1000.')
+                      help='max sequence number in 1 cluster. Default: 500.')
     parser.add_option('-d', '--dir',
                       dest='dir',
-                      default="Cluster_fa",
+                      default="Clusters_fa",
                       type="str",
-                      help='directory of output fasta: clusters information. Default: Cluster_fa.')
+                      help='directory of output fasta: clusters information. Default: Clusters_fa.')
 
     parser.add_option('-p', '--proc',
                       dest='proc',
-                      default="10",
+                      default="1",
                       type="int",
-                      help='Number of process to launch.  default: 10.')
+                      help='Number of process to launch.  default: 1.')
 
     (options, args) = parser.parse_args()
     import sys

diff --git a/scripts/merge_cluster_by_ANI.py b/scripts/merge_cluster_by_ANI.py
diff --git a/scripts/merge_cluster_by_ANI.py b/scripts/merge_cluster_by_ANI.py
@@ -0,0 +1 @@
+merge_cluster_by_ANI_V3.py