-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a956183
commit 7b1c36c
Showing
5 changed files
with
190 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,51 @@ | ||
# RNA-Seq分析流程 | ||
## 安装 | ||
此流程用OSA进行比对,安装OSA参考此[网页](http://www.arrayserver.com/wiki/index.php?title=Oshell#Overview)。 | ||
此流程依赖于array suite,安装array suite参考此[网页](http://www.arrayserver.com/wiki/index.php?title=Oshell#Overview)。 | ||
|
||
sample.list格式为所有样品目录路径,目录下为fastq文件 | ||
## 主要参数 | ||
主程序为`RNA_seq_align_all_module.pl`,用`-h`显示所有参数如下: | ||
``` | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S1 | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S2 | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S3 | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S4 | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S5 | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S6 | ||
RNA-Seq pipeline | ||
Usage: RNA_seq_align_all_module.pl <options> | ||
-sample_list sample list | ||
-out_dir output result folder | ||
-ref_dir reference OSA base path | ||
-ref reference name | ||
-gene_model gene_model name | ||
-gtf gene gtf file | ||
-gtf_exon exon gff file for DEXSeq | ||
-step align or merge_rmdup or count or count_exon or tdf | ||
-h or -help Show Help , have a choice | ||
``` | ||
|
||
nohup /usr/local/bin/bcl2fastq --runfolder-dir /home/yliu/data/Xin/2016_08_01_ZY/ --output-dir 2016_08_01_ZY_100bp --sample-sheet 2016_08_01_ZY.csv --use-bases-mask Y100n51,I6,Y100n51 | ||
|
||
mono /home/others/xli/soft/oshell/16-8-11/oshell.exe --runscript /home/others/xli/data/genome /home/others/xli/RNA-Seq/Alignment.oscript /tmp/ /opt/apps/mono/bin/mono | ||
`sample.list`为所有样品目录路径,目录下为fastq.gz文件,格式如下: | ||
``` | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S1 | ||
/home/others/xli/raw_data/2016_08_01_ZY/RNA-Seq/S2 | ||
``` | ||
`-ref`默认基因组版本Human.B37.3 | ||
`-gene_model`默认版本为RefGene | ||
基因组和基因注释版本对应信息参考此[网页](http://www.arrayserver.com/wiki/index.php?title=A_list_of_compiled_genome_and_gene_model_from_OmicSoft) | ||
|
||
|
||
perl ~/scripts/RNA_seq_align_all_module.pl -sample_list sample.list -step align | ||
## alignment | ||
``` | ||
perl ~/scripts/rnaseq_pipeline/RNA_seq_align_all_module.pl -sample_list sample.list -out_dir /home/others/xli/RNA-Seq -step align | ||
``` | ||
生成`align.sh`文件和响应的`*.oscript`文件。用`qsub`提交`align.sh`运行alignment步骤。完成后所有结果在`/home/others/xli/RNA-Seq/sample_name`目录。 | ||
|
||
perl ~/scripts/RNA_seq_align.pl -sample_list sample.list -ref_dir ~/amber3/no_back_up/data/genome/hg19_OSA/ -ref Human.B37 -gene_model RefGene -step align | ||
## 合并bam文件去duplicates | ||
|
||
perl ~/scripts/RNA_seq_align.pl -sample_list sample.list -ref_dir ~/amber3/no_back_up/data/genome/galgal4/for_OSA/ -ref gal4 -gene_model gal4gene -step merge | ||
perl ~/scripts/RNA_seq_align.pl -sample_list sample.list -gtf ~/amber3/no_back_up/data/gene/chicken/Gallus_gallus.Galgal4.71.gtf -step count | ||
perl ~/scripts/RNA_seq_align.pl -sample_list sample.list -gtf_exon ~/amber3/no_back_up/data/gene/chicken/Gallus_gallus.Galgal4.71.for.dexseq.gff -step count_exon | ||
perl ~/scripts/RNA_seq_align.pl -sample_list sample.list -ref_dir ~/amber3/no_back_up/data/genome/galgal4/for_OSA/ -ref gal4 -gene_model gal4gene -step align_sum | ||
perl ~/scripts/RNA_seq_align.pl -sample_list sample.list -ref_dir ~/amber3/no_back_up/data/genome/galgal4/for_OSA/ -step tdf | ||
## 生成可供IGV broswer浏览的tdf文件 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,9 @@ | |
|
||
chomp($fastq_files); | ||
|
||
my $slash="\\\\"; | ||
my $back="\b"; | ||
|
||
print O <<"EOT"; | ||
Begin Macro; | ||
|
@@ -53,156 +56,157 @@ | |
\@GeneModelName\@ $gene_model; | ||
End; | ||
EOT | ||
|
||
|
||
|
||
print O <<'EOT2'; | ||
#region Oshell run | ||
//Create the OmicSoft project enviroment | ||
Begin NewProject; | ||
File "\@ProjectFolder\@/\@ProjectName\@.osprj"; | ||
File "@ProjectFolder@/@[email protected]"; | ||
Options /Distributed=true; | ||
End; | ||
#region Raw data QC | ||
//QC: Raw sequence, using NgsQCWizard | ||
Begin NgsQCWizard /Namespace=NgsLib; | ||
Files | ||
"\@FileNames\@"; | ||
Options /ThreadNumberPerJob=\@ThreadNum\@ /FileFormat=AUTO /CompressionMethod=\@CompressionMethod\@ /QualityEncoding=Automatic /PreviewMode=false | ||
"@FileNames@"; | ||
Options /ThreadNumberPerJob=@ThreadNum@ /FileFormat=AUTO /CompressionMethod=@CompressionMethod@ /QualityEncoding=Automatic /PreviewMode=false | ||
/BasicStatistics=true /BaseDistribution=true /QualityBoxPlot=true /KMerAnalysis=true /SequenceDuplication=true; | ||
Output rawseq_qc; | ||
End; | ||
// save the OmicSoft project enviroment after each major step | ||
Begin SaveProject; | ||
Project \@ProjectName\@; | ||
File "\@ProjectFolder\@/\@ProjectName\@.osprj"; | ||
Project @ProjectName@; | ||
File "@ProjectFolder@/@[email protected]"; | ||
End; | ||
#endregion | ||
#region RNA-Seq alignment to human reference using OSA | ||
Begin MapRnaSeqReadsToGenome /Namespace=NgsLib; | ||
Files | ||
"\@FileNames\@"; | ||
Reference \@ReferenceName\@; | ||
GeneModel \@GeneModelName\@; | ||
"@FileNames@"; | ||
Reference @ReferenceName@; | ||
GeneModel @GeneModelName@; | ||
Trimming /Mode=TrimByQuality /ReadTrimQuality=2; | ||
Options /BamSubFolder=primary_alignment /ParallelJobNumber=1 /PairedEnd=\@PairedEnd\@ /FileFormat=AUTO /AutoPenalty=True | ||
Options /BamSubFolder=primary_alignment /ParallelJobNumber=1 /PairedEnd=@PairedEnd@ /FileFormat=AUTO /AutoPenalty=True | ||
/FixedPenalty=2 /Greedy=false /IndelPenalty=2 /DetectIndels=False /MaxMiddleInsertionSize=10 /MaxMiddleDeletionSize=10 | ||
/MaxEndInsertionSize=10 /MaxEndDeletionSize=10 /MinDistalEndSize=3 /ExcludeNonUniqueMapping=True /ReportCutoff=10 | ||
/WriteReadsInSeparateFiles=True /OutputFolder="" /GenerateSamFiles=True /ThreadNumberPerJob=\@ThreadNum\@ | ||
/WriteReadsInSeparateFiles=True /OutputFolder="" /GenerateSamFiles=True /ThreadNumberPerJob=@ThreadNum@ | ||
/InsertSizeStandardDeviation=40 /ExpectedInsertSize=300 /InsertOnSameStrand=False /InsertOnDifferentStrand=True | ||
/QualityEncoding=Automatic /CompressionMethod=\@CompressionMethod\@ /Gzip=\@Gzip\@ /SearchNovelExonJunction=True /ExcludeUnmappedInBam=True; | ||
/QualityEncoding=Automatic /CompressionMethod=@CompressionMethod@ /Gzip=@Gzip@ /SearchNovelExonJunction=True /ExcludeUnmappedInBam=True; | ||
Output primary_alignment; | ||
End; | ||
Begin SaveProject; | ||
Project \@ProjectName\@; | ||
File "\@ProjectFolder\@/\@ProjectName\@.osprj"; | ||
Project @ProjectName@; | ||
File "@ProjectFolder@/@[email protected]"; | ||
End; | ||
#endregion | ||
#region Alignment QC | ||
//Alignment QC Metrics using RnaSeqQCMetrics | ||
Begin RnaSeqQCMetrics /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
GeneModel \@GeneModelName\@; | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
GeneModel @GeneModelName@; | ||
Metrics Alignment,Flag,Profile,Source,InsertSize,Duplication,Coverage,Strand; | ||
Options /ThreadNumberPerJob=\@ThreadNum\@ /ExcludeFailedAlignments=true /ExcludeSecondaryAlignments=true | ||
Options /ThreadNumberPerJob=@ThreadNum@ /ExcludeFailedAlignments=true /ExcludeSecondaryAlignments=true | ||
/ExcludeMultiReads=false /ExcludeSingletons=false; | ||
Output alignment_qc; | ||
End; | ||
//RNA-Seq 5'->3' trend using SummarizeRnaSeqTrend53 | ||
Begin SummarizeRnaSeqTrend53 /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
GeneModel \@GeneModelName\@; | ||
Options /OutputFolder="\@ProjectFolder\@/\@ProjectName\@/primary_alignment" /ThreadNumberPerJob=\@ThreadNum\@ | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
GeneModel @GeneModelName@; | ||
Options /OutputFolder="@ProjectFolder@/@ProjectName@/primary_alignment" /ThreadNumberPerJob=@ThreadNum@ | ||
/BinNumber=100 /TranscriptLengthBins=500, 1000, 2000, 3000, 4000, 5000 /ExcludeGenesWithMultipleIsoforms=true | ||
/ScaleCoverage=true /ReportTranscriptData=false; | ||
Output qc_trend53; | ||
End; | ||
Begin SaveProject; | ||
Project \@ProjectName\@; | ||
File "\@ProjectFolder\@/\@ProjectName\@.osprj"; | ||
Project @ProjectName@; | ||
File "@ProjectFolder@/@[email protected]"; | ||
End; | ||
#endregion | ||
#region Quantification at gene, isoform, exon and exon-junction level | ||
//gene level rsem estimation | ||
Begin ReportGeneTranscriptCounts /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
GeneModel \@GeneModelName\@; | ||
Options /ExpressionMeasurement=RPKM /ThreadNumberPerJob=\@ThreadNum\@ /Add1=False /CountFragments=True /ExcludeMultiReads=False | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
GeneModel @GeneModelName@; | ||
Options /ExpressionMeasurement=RPKM /ThreadNumberPerJob=@ThreadNum@ /Add1=False /CountFragments=True /ExcludeMultiReads=False | ||
/UseEffectiveTranscriptLength=True /CountStrandedReads=True /CountReverseStrandedReads=True; | ||
Output quantification_gene; | ||
End; | ||
//transcript level rsem estimation | ||
Begin ReportGeneTranscriptCounts /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
GeneModel \@GeneModelName\@; | ||
Options /ExpressionMeasurement=RPKM_Transcript /ThreadNumberPerJob=\@ThreadNum\@ /Add1=False /CountFragments=True /ExcludeMultiReads=False | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
GeneModel @GeneModelName@; | ||
Options /ExpressionMeasurement=RPKM_Transcript /ThreadNumberPerJob=@ThreadNum@ /Add1=False /CountFragments=True /ExcludeMultiReads=False | ||
/UseEffectiveTranscriptLength=True /CountStrandedReads=True /CountReverseStrandedReads=True; | ||
Output quantification_tx; | ||
End; | ||
//quantify the known exon and exon junctions based on a gene model | ||
Begin ReportExonCounts /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
GeneModel \@GeneModelName\@; | ||
Options /ThreadNumberPerJob=\@ThreadNum\@ /ReportExonCounts=True /ReportExonJunctionCounts=True /ExcludeSingletons=False /Rpkm=True | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
GeneModel @GeneModelName@; | ||
Options /ThreadNumberPerJob=@ThreadNum@ /ReportExonCounts=True /ReportExonJunctionCounts=True /ExcludeSingletons=False /Rpkm=True | ||
/CountStrandedReads=True /CountReverseStrandedReads=True; | ||
Output quantification_known; | ||
End; | ||
//quantify the each exon junction based on alignment and annotate with gene model info (known or novel exon junctions) | ||
Begin ReportExonJunctions /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
Options /ExcludeMultiReads=False /ExcludeSingletons=False /NMCutoff=4 /JunctionOverhangCutoff=4 /GenerateReport=True | ||
/GenerateBedFile=False /BedFileOutputFolder="" /MinimalHit=1 /ThreadNumberPerJob=\@ThreadNum\@ | ||
/OutputFolder="\@ProjectFolder\@/\@ProjectName\@/primary_alignment"; | ||
/GenerateBedFile=False /BedFileOutputFolder="" /MinimalHit=1 /ThreadNumberPerJob=@ThreadNum@ | ||
/OutputFolder="@ProjectFolder@/@ProjectName@/primary_alignment"; | ||
Output quantification_jxn; | ||
End; | ||
Begin SaveProject; | ||
Project \@ProjectName\@; | ||
File "\@ProjectFolder\@/\@ProjectName\@.osprj"; | ||
Project @ProjectName@; | ||
File "@ProjectFolder@/@[email protected]"; | ||
End; | ||
#endregion | ||
#region Fusion gene detection | ||
//Detect fusion based on inter-transcript read pairs (PE) | ||
Begin ReportPairedEndFusionGenes /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
GeneModel \@GeneModelName\@; | ||
Options /GenerateData=True /OutputFusionReads=True /MinimalHit=2 /FusionReportCutoff=1 /ThreadNumberPerJob=\@ThreadNum\@ | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
GeneModel @GeneModelName@; | ||
Options /GenerateData=True /OutputFusionReads=True /MinimalHit=2 /FusionReportCutoff=1 /ThreadNumberPerJob=@ThreadNum@ | ||
/FilterBy=DefaultList /DefaultFilterListVersion=v1 /FilterGeneListFileName="" /FilterGeneFamilyFileName="" | ||
/GenerateTableland=True /OutputFolder="\@ProjectFolder\@/\@ProjectName\@/fusion_PEReads"; | ||
/GenerateTableland=True /OutputFolder="@ProjectFolder@/@ProjectName@/fusion_PEReads"; | ||
Output FusionPE; | ||
End; | ||
//Detect fusion based alignment of junction spanning reads (SE) | ||
Begin MapFusionReads /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
Reference \@ReferenceName\@; | ||
GeneModel \@GeneModelName\@; | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
Reference @ReferenceName@; | ||
GeneModel @GeneModelName@; | ||
Trimming /Mode=TrimByQuality /ReadTrimQuality=2; | ||
Options /FusionVersion=2 /ParallelJobNumber=1 /PairedEnd=False /RnaMode=True /FileFormat=BAM /AutoPenalty=True /FixedPenalty=2 | ||
/OutputFolder="\@ProjectFolder\@/\@ProjectName\@/fusion_SE_alignment" /MaxMiddleInsertionSize=10 | ||
/ThreadNumberPerJob=\@ThreadNum\@ /QualityEncoding=Automatic /CompressionMethod=None /Gzip=False /MinimalFusionAlignmentLength=25 | ||
/OutputFolder="@ProjectFolder@/@ProjectName@/fusion_SE_alignment" /MaxMiddleInsertionSize=10 | ||
/ThreadNumberPerJob=@ThreadNum@ /QualityEncoding=Automatic /CompressionMethod=None /Gzip=False /MinimalFusionAlignmentLength=25 | ||
/FilterUnlikelyFusionReads=True /FullLengthPenaltyProportion=8 /OutputFusionReads=True /MinimalHit=4 /MinimalFusionSpan=5000 | ||
/FusionReportCutoff=1 /NonCanonicalSpliceJunctionPenalty=2 /FilterBy=DefaultList /DefaultFilterListVersion=v1 | ||
/FilterGeneListFileName="" /FilterGeneFamilyFileName="" /GenerateTableland=True; | ||
|
@@ -213,47 +217,47 @@ | |
#region Mutation detection | ||
//detect mutation and annotate with dbSNP; will download dbSNP from omicsoft at first time | ||
Begin SummarizeMutation /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\primary_alignment; | ||
Project @ProjectName@; | ||
Data @ProjectName@\\primary_alignment; | ||
Options /BaseQualityCutoff=20 /MapQualityCutoff=20 /MinimalIndelSize=1 /ExcludeSingletons=False /ExcludeMultiReads=False | ||
/LeftExclusion=5 /RightExclusion=5 /ThreadNumberPerJob=\@ThreadNum\@ /MinimalTotalHit=20 /MinimalMutationHit=5 /MinimalMutationFrequency=0.15 | ||
/LeftExclusion=5 /RightExclusion=5 /ThreadNumberPerJob=@ThreadNum@ /MinimalTotalHit=20 /MinimalMutationHit=5 /MinimalMutationFrequency=0.15 | ||
/ExcludeNonMutantSites=True /GenerateSummarizedReport=True /GenerateIndividualReport=False /GenerateTableland=True | ||
/MaxFrequencyCutoff=0.15 /DbsnpVersion=v135 /OutputFolder="\@ProjectFolder\@/\@ProjectName\@/mutation"; | ||
/MaxFrequencyCutoff=0.15 /DbsnpVersion=v135 /OutputFolder="@ProjectFolder@/@ProjectName@/mutation"; | ||
Output MutationSummary; | ||
End; | ||
//Annotate Mutation | ||
Begin AnnotateMutation /Namespace=NgsLib; | ||
Project \@ProjectName\@; | ||
Data \@ProjectName\@\\MutationSummary.MutationReport; | ||
Project @ProjectName@; | ||
Data @ProjectName@\\MutationSummary.MutationReport; | ||
ID ID; | ||
Chromosome Chromosome; | ||
Position Position; | ||
Mutation Mutation; | ||
Other (default); | ||
Options /ReferenceLibraryID=\@ReferenceName\@ /GeneModelID=\@GeneModelName\@ /DbsnpVersion=v135 /GenerateClusteringFlag=True | ||
Options /ReferenceLibraryID=@ReferenceName@ /GeneModelID=@GeneModelName@ /DbsnpVersion=v135 /GenerateClusteringFlag=True | ||
/ClusteringFlagWindowSize=10 /GenerateTableland=True /AnnotateLongestTranscriptOnly=False /AnnotateFunctionalMutation=False | ||
/AnnotateSomaticMutation=False; | ||
Output MutationAnnotated.MutationReport; | ||
End; | ||
Begin SaveProject; | ||
Project \@ProjectName\@; | ||
File "\@ProjectFolder\@/\@ProjectName\@.osprj"; | ||
Project @ProjectName@; | ||
File "@ProjectFolder@/@[email protected]"; | ||
End; | ||
#endregion | ||
Begin ExportView; | ||
Project \@ProjectName\@; | ||
OutputFolder "\@ProjectFolder\@/\@ProjectName\@/ExportedResults"; | ||
Project @ProjectName@; | ||
OutputFolder "@ProjectFolder@/@ProjectName@/ExportedResults"; | ||
End; | ||
Begin CloseProject; | ||
Project \@ProjectName\@; | ||
Project @ProjectName@; | ||
End; | ||
EOT | ||
EOT2 | ||
|
||
|
||
|
||
|
Oops, something went wrong.