Skip to content

Commit

Permalink
Merge pull request #60 from databio/dev_igd_45
Browse files Browse the repository at this point in the history
Dev igd 45
  • Loading branch information
donaldcampbelljr authored Dec 20, 2024
2 parents b8afd94 + abaeb96 commit b33c233
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 117 deletions.
2 changes: 1 addition & 1 deletion gtars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ glob = "0.3.1"


[dev-dependencies]
rstest = "0.18.2"
rstest = "0.23.0"
tempfile = "3.8.1"
pretty_assertions = "1.4.0"
22 changes: 16 additions & 6 deletions gtars/src/igd/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ pub struct igd_t {
pub mctg: i32, //data type: 0, 1, 2 etc; size differs
pub total: i64, // total region in each ctg
pub ctg: Vec<ctg_t>, // this is the list of contigs (of size n-ctg) // this might need to be a reference
pub total_regions: i32,
pub total_average: f32,
pub average_length: f32,
}

impl igd_t {
Expand Down Expand Up @@ -100,11 +103,11 @@ pub fn igd_get_create_matches(matches: &ArgMatches) {
.get_one::<String>("dbname")
.expect("File list path is required");

create_igd_f(output_path, filelist, db_output_name);
let _igd = create_igd_f(output_path, filelist, db_output_name);
}

/// Creates IGD database from a directory of bed files.
pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &String) {
pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &String) -> igd_t {
//println!("{}",db_output_name);
//Initialize IGD into Memory
let mut igd = igd_t::new();
Expand Down Expand Up @@ -373,14 +376,20 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St
// Sort tile data and save into single files per ctg
igd_save_db(&mut igd, output_path, db_output_name);

igd.total_regions=total_regions;
igd.total_average=total_avg_size;
igd.average_length= total_avg_size / total_regions as f32;

let save_path = format!("{}{}{}", output_path, db_output_name, ".igd");
println!("IGD saved to: {}", save_path);
println!(
"Total Intervals: {}, l_avg: {}",
total_regions,
total_avg_size / total_regions as f32
igd.total_regions,
igd.average_length
);
println!("nctg:{} nbp:{}", igd.nctg, igd.nbp);

igd // return for testing purposes
}

/// Saves the primary .igd database file by reading the temp_tiles, sorting them, and then writing the sorted tiles to disk.
Expand Down Expand Up @@ -560,7 +569,7 @@ pub fn igd_save_db(igd: &mut igd_t, output_path: &String, db_output_name: &Strin
let _ = main_db_file.write_all(&temp_buffer);
}

q.nCnts = 0;
//q.nCnts = 0;
}
}

Expand Down Expand Up @@ -631,7 +640,7 @@ pub fn igd_saveT(igd: &mut igd_t, output_file_path: &String) {
}
file.write_all(&buffer).unwrap();

current_tile.nCnts = current_tile.ncnts + 1;
current_tile.nCnts = current_tile.nCnts + current_tile.ncnts;

if current_tile.ncnts > 8 {
current_tile.mcnts = 8;
Expand Down Expand Up @@ -811,6 +820,7 @@ pub fn igd_add(
gdata.start = start;
gdata.end = end;
gdata.value = v;
//println!("Adding to igd, start {}, idx {}", start,idx);
gdata.idx = idx as i32;

igd.total += 1;
Expand Down
22 changes: 14 additions & 8 deletions gtars/src/igd/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,14 +296,14 @@ fn get_overlaps(
// );

if tmpi > 0 {
if n1 != *preIdx || ichr != *preChr {

// println!(
// "n1 != *preIdx || ichr!= *preChr {} vs {} {} vs {} \n",
// n1, preIdx, ichr, preChr
// );

//println!("Seek start here: {}",IGD.tIdx[ichr as usize][n1 as usize]);

//let ichr = 1;
db_reader
.seek(SeekFrom::Start(IGD.tIdx[ichr as usize][n1 as usize] as u64))
.unwrap();
Expand Down Expand Up @@ -333,8 +333,9 @@ fn get_overlaps(
let end = rdr.read_i32::<LittleEndian>().unwrap();
let value = rdr.read_i32::<LittleEndian>().unwrap();

//println!("for tmpi>0 where tmpi = {}", tmpi);
//println!("Looping through g_datat in temp files\n");
// println!("idx: {} start: {} end: {}\n", idx,start,end);
//println!("idx: {} start: {} end: {}\n", idx,start,end);

gData[i as usize] = gdata_t {
idx: idx,
Expand All @@ -352,7 +353,7 @@ fn get_overlaps(

if query_end > gData[0].start {
// sorted by start
//println!("query_end > gData[0].start: {} > {}", query_end,gData[0].start);
//println!("n1 != *preIdx || ichr != *preChr query_end > gData[0].start: {} > {}", query_end,gData[0].start);
// find the 1st rs<qe
tL = 0;
tR = tmpi1;
Expand All @@ -371,17 +372,19 @@ fn get_overlaps(
}
//--------------------------
for i in (0..=tL).rev() {
//println!("Countdownfrom TL");
// count down from tL (inclusive to tL)
//println!("iterate over i: {} ", i);
//println!("iterate over i: {} from tL {}", i, tL);
//println!("gdata[i].end {} vs query start {}",gData[i as usize].end,query_start);
if gData[i as usize].end > query_start {
//println!("ADDING TO HITS");
//println!(" > gData[i].end > query_start {} > {}", gData[i as usize].end, query_start);
hits[gData[i as usize].idx as usize] =
hits[gData[i as usize].idx as usize] + 1;
}
}
}
}


if n2 > n1 {
//println!("n2>n1 {} vs {} ", n2, n1);
Expand Down Expand Up @@ -423,8 +426,9 @@ fn get_overlaps(
let end = rdr.read_i32::<LittleEndian>().unwrap();
let value = rdr.read_i32::<LittleEndian>().unwrap();


//println!("Looping through g_datat in temp files\n");
//println!("idx: {} start: {} end: {}\n", idx,start,end);
// println!("idx: {} start: {} end: {}\n", idx,start,end);

gData.push(gdata_t {
idx: idx,
Expand All @@ -439,6 +443,7 @@ fn get_overlaps(
}

if query_end > gData[0].start {
//println!("n2>n1 query_end > gData[0].start: {} > {}", query_end,gData[0].start);
tS = 0;

while tS < tmpi && gData[tS as usize].start < bd {
Expand Down Expand Up @@ -478,6 +483,7 @@ fn get_overlaps(
}
}
}
//println!("here are the hits {:?}", hits);
return nols; //TODO this is from the original code but its not actually being used for anything. hits vec IS the main thing.
}

Expand Down Expand Up @@ -567,7 +573,7 @@ pub fn get_igd_info(
reader.read_exact(&mut buffer)?;
let nCtg = i32::from_le_bytes(buffer);

//println!("Found:\n nbp:{} gtype: {} nCtg: {}", nbp,gType,nCtg);
println!("Found:\n nbp:{} gtype: {} nCtg: {}", nbp,gType,nCtg);

igd.nbp = nbp;
igd.gType = gType;
Expand Down
15 changes: 0 additions & 15 deletions gtars/tests/data/igd_file_list/bad_bed_file.notbed

This file was deleted.

8 changes: 0 additions & 8 deletions gtars/tests/data/igd_file_list/bad_bed_file_2.notbed

This file was deleted.

4 changes: 0 additions & 4 deletions gtars/tests/data/igd_file_list/igd_bed_file_1.bed

This file was deleted.

37 changes: 0 additions & 37 deletions gtars/tests/data/igd_file_list/igd_bed_file_2.notbed

This file was deleted.

8 changes: 8 additions & 0 deletions gtars/tests/data/igd_file_list_01/igd_bed_file_1.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chr1 1 100
chr1 200 300
chr1 32768 32868
chr1 49152 49352
chr2 1 100
chr2 200 300
chr3 32768 32868
chr3 49152 49352
8 changes: 8 additions & 0 deletions gtars/tests/data/igd_file_list_02/igd_bed_file_1.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chr1 1 100
chr1 200 300
chr1 32768 32868
chr1 49152 49352
chr2 1 100
chr2 200 300
chr3 32768 32868
chr3 49152 49352
4 changes: 4 additions & 0 deletions gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
chr2 652554 652780 SRX4150706.05_peak_5 157 . 2.14622 20.42377 15.73019 44
chr2 653837 654214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191
chr11 951681 952010 SRX4150706.05_peak_247 205 . 11.82913 25.65609 20.56433 139
chr11 1248894 1249428 SRX4150706.05_peak_248 252 . 11.83432 30.63056 25.20567 179
8 changes: 8 additions & 0 deletions gtars/tests/data/igd_query_files/query1.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chr1 1 100
chr1 200 300
chr1 32768 32868
chr1 49152 49352
chr2 1 100
chr2 200 300
chr3 32768 32868
chr3 49152 49352
2 changes: 2 additions & 0 deletions gtars/tests/data/igd_query_files/query2.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chr3 49152 49352
chr2 653837 654214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191
Loading

0 comments on commit b33c233

Please sign in to comment.