forked from polyaB/nn_anopheles
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_big_region_prediction.py
48 lines (42 loc) · 2.65 KB
/
test_big_region_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import sys
source_path = os.path.dirname(os.path.abspath(sys.argv[0])) + "/basenji/source"
source_path2 = os.path.dirname(os.path.abspath(sys.argv[0])) + "/basenji/basenji"
source_path3 = os.path.dirname(os.path.abspath(sys.argv[0])) + "/3Dpredictor/source"
source_path4 = os.path.dirname(os.path.abspath(sys.argv[0])) + "/source"
sys.path.append(source_path)
sys.path.append(source_path2)
sys.path.append(source_path3)
sys.path.append(source_path4)
from shared import Interval
import json
import dataset, dna_io, seqnn
from Predictions_interpeter import from_upper_triu, predict_big_region_from_seq
# model_dir = './explore_best_model/'
# model_dir = '/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles/dataset_like_Akita/data/Aste_2048/train_out/'
model_dir = '/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles/dataset_like_Akita/data/Aste_2048/train_out/'
params_file = model_dir+'params.json'
# model_file = model_dir+'model_check.h5'
model_file = model_dir+'model_best.h5'
with open(params_file) as params_open:
params = json.load(params_open)
params_model = params['model']
params_train = params['train']
seqnn_model = seqnn.SeqNN(params_model)
# read data parameters
data_dir ='/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles/dataset_like_Akita/data/Aste_2048'
data_stats_file = '%s/statistics.json' % data_dir
with open(data_stats_file) as data_stats_open:
data_stats = json.load(data_stats_open)
seq_length = data_stats['seq_length']
target_length = data_stats['target_length']
hic_diags = data_stats['diagonal_offset']
target_crop = data_stats['crop_bp'] // data_stats['pool_width']
target_length1 = data_stats['seq_length'] // data_stats['pool_width']
target_length1_cropped = target_length1 - 2*target_crop
predict_big_region_from_seq(Interval("2L", 20000000, 25000000), binsize=data_stats['pool_width'], seq_len=seq_length, stride = 400*data_stats['pool_width'],
fasta_file="/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles/input/genomes/AsteI2_V4.fa", seqnn_model = seqnn_model,
crop_bp = data_stats['crop_bp'],target_length_cropped=target_length1_cropped, hic_diags = hic_diags,
prediction_folder="/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles//dataset_like_Akita/data/Aste_2048/prediction/",
genome_hic_expected_file='/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles/input/coolers/Aste_2048.expected',
use_control=True, genome_cool_file = '/mnt/scratch/ws/psbelokopytova/202103211631polina/nn_anopheles/input/coolers/Aste_2048.cool')