-
Notifications
You must be signed in to change notification settings - Fork 1
/
config_AML.yaml
65 lines (46 loc) · 2.81 KB
/
config_AML.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#############################################
# Output
#############################################
output_dir: "results_AML"
n_reports: -1 # Number of top pyjacker hits for which pyjacker will generate a detailed report with figures.
# if -1, will generate figures for all putative enhancer hijacking events.
# setting this to a lower value will reduce the runtime, since fewer (or even zero) figures will need to be generated.
image_format: "svg" # Format of the images generated in the report. Can be svg, png or pdf
image_dpi: 200 # Resolution of the images generated in the report, if the format is png
#############################################
# Required inputs
#############################################
RNA_TPM_file: "data/TPM_ckAML.tsv"
breakpoints: "data/breakpoints_ckAML.tsv"
gtf: data/Homo_sapiens.GRCh37.75.gtf.gz
cytobands: "data/cytobands_hg19.tsv"
#############################################
# Optional but strongly recommended inputs
#############################################
# Copy number alterations (used to correct for gene expression, and also add breakpoints if missed by the SV caller)
CNAs: "data/CNAs_ckAML.tsv"
# Directory containing the output of fast_ase (or GATK ASEReadcounter). This directory contains one tsv file per sample, called {sample}.tsv
ase_dir: "data/ASE_ckAML"
# Allele-specific expression will be ignored for imprinted genes, if they are provided.
imprinted_genes_file: "data/imprinted_genes.txt"
# pyjacker looks for SVs in the same TAD as a gene (+ a margin).
# If not TADs are provided, it will look for SVs which are up to max_dist_bp2tss from the TSS.
TADs_file: "data/TADs_HSPC_hg19.bed"
#max_dist_bp2tss: 1500000
# Provide enhancers scored by ROSE, for the correct cell type.
# This is optional, but if provided, it will be used to score the candidate enhancer hijacking events, in addition to outlier expression and allele-specific expression.
enhancers: "data/enhancers_myeloid_hg19.tsv"
##############################################
# Additional parameters
##############################################
# Provide fusion transcripts (used for annotating the results)
fusions: "data/fusions_ckAML.tsv"
# These weights influence the scoring of the candidate enhancer hijacking events, but their default values should work well.
weight_OHE: 4 # weight for the outlier high expression score
weight_ASE: 2 # weight for the allele specific expression
weight_enhancers: 1 # weight for the enhancers score
weight_deletion: 5 # weight for the deletion (penalize if a gene is deleted in a sample)
# Number of times to iterate through all genes when generating the null distribution for the false discovery rate.
# A smaller number of iterations is faster, but leads to an FDR which is not as precise.
n_iterations_FDR: 50
n_threads: 6 # Number of threads to use