From d17c7dadd807614c5672fffe67c3c6ed15373fe7 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 13:36:28 -0500 Subject: [PATCH 01/11] begin adding more tests to cover igd workflow --- gtars/src/igd/create.rs | 6 +- gtars/src/igd/search.rs | 4 +- .../data/igd_file_list_01/igd_bed_file_2.bed | 8 ++ gtars/tests/test.rs | 91 ++++++++++++++----- 4 files changed, 80 insertions(+), 29 deletions(-) create mode 100644 gtars/tests/data/igd_file_list_01/igd_bed_file_2.bed diff --git a/gtars/src/igd/create.rs b/gtars/src/igd/create.rs index 1bceea3e..ebd31ef3 100644 --- a/gtars/src/igd/create.rs +++ b/gtars/src/igd/create.rs @@ -100,11 +100,11 @@ pub fn igd_get_create_matches(matches: &ArgMatches) { .get_one::("dbname") .expect("File list path is required"); - create_igd_f(output_path, filelist, db_output_name); + let _igd = create_igd_f(output_path, filelist, db_output_name); } /// Creates IGD database from a directory of bed files. -pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &String) { +pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &String) -> igd_t { //println!("{}",db_output_name); //Initialize IGD into Memory let mut igd = igd_t::new(); @@ -381,6 +381,8 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St total_avg_size / total_regions as f32 ); println!("nctg:{} nbp:{}", igd.nctg, igd.nbp); + + igd // return for testing purposes } /// Saves the primary .igd database file by reading the temp_tiles, sorting them, and then writing the sorted tiles to disk. diff --git a/gtars/src/igd/search.rs b/gtars/src/igd/search.rs index 7426706b..1e0b9cc2 100644 --- a/gtars/src/igd/search.rs +++ b/gtars/src/igd/search.rs @@ -303,7 +303,7 @@ fn get_overlaps( // ); //println!("Seek start here: {}",IGD.tIdx[ichr as usize][n1 as usize]); - + //let ichr = 1; db_reader .seek(SeekFrom::Start(IGD.tIdx[ichr as usize][n1 as usize] as u64)) .unwrap(); @@ -567,7 +567,7 @@ pub fn get_igd_info( reader.read_exact(&mut buffer)?; let nCtg = i32::from_le_bytes(buffer); - //println!("Found:\n nbp:{} gtype: {} nCtg: {}", nbp,gType,nCtg); + println!("Found:\n nbp:{} gtype: {} nCtg: {}", nbp,gType,nCtg); igd.nbp = nbp; igd.gType = gType; diff --git a/gtars/tests/data/igd_file_list_01/igd_bed_file_2.bed b/gtars/tests/data/igd_file_list_01/igd_bed_file_2.bed new file mode 100644 index 00000000..daae26c5 --- /dev/null +++ b/gtars/tests/data/igd_file_list_01/igd_bed_file_2.bed @@ -0,0 +1,8 @@ +chr1 1 100 +chr1 200 300 +chr1 32768 32868 +chr1 49152 49352 +chr2 1 100 +chr2 200 300 +chr3 32768 32868 +chr3 49152 49352 diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index dd39cfc5..7502671d 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -111,18 +111,53 @@ mod tests { #[rstest] fn test_igd_create() { + //let tempdir = tempfile::tempdir().unwrap(); + //let path = PathBuf::from(&tempdir.path()); + // let db_path_unwrapped = path.into_os_string().into_string().unwrap(); + // let db_output_path = db_path_unwrapped; + + let db_output_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/"); + + let path_to_crate = env!("CARGO_MANIFEST_DIR"); + //let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/"); + let testfilelists = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/source_files/"); + + let demo_name = String::from("demo"); + + create_igd_f(&db_output_path, &testfilelists, &demo_name); + } + #[rstest] + fn test_igd_create_short_long_regions() { + // Depending on start and end coordinates which are divided by nbp=16384 + // the number of tiles per ctg are adjusted, this tests to ensure they are created appropriately let tempdir = tempfile::tempdir().unwrap(); let path = PathBuf::from(&tempdir.path()); - let db_path_unwrapped = path.into_os_string().into_string().unwrap(); let db_output_path = db_path_unwrapped; + //let db_output_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/"); + let path_to_crate = env!("CARGO_MANIFEST_DIR"); - let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/"); + let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/"); + //let testfilelists = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/source_files/"); let demo_name = String::from("demo"); - create_igd_f(&db_output_path, &testfilelists, &demo_name); + let igd = create_igd_f(&db_output_path, &testfilelists, &demo_name); + assert_eq!(igd.ctg[0].name, "chr1"); + assert_eq!(igd.ctg[1].name, "chr2"); + assert_eq!(igd.ctg[2].name, "chr3"); + assert_eq!(igd.nctg, 3); + + + + assert_eq!(igd.ctg[0].mTiles, 4); // chr1 has 4 Tiles because of the 32768, and 49152 starts + assert_eq!(igd.ctg[1].mTiles, 1); // chr only has 1 Tile due to the 200 start + + assert_eq!(igd.ctg[0].gTile[0].gList[0].start, 1); + assert_eq!(igd.ctg[0].gTile[(igd.ctg[0].mTiles-1)as usize].gList[0].start,49152) + + } // #[rstest] @@ -146,28 +181,34 @@ mod tests { // First must create temp igd // Temp dir to hold igd - let tempdir = tempfile::tempdir().unwrap(); - let path = PathBuf::from(&tempdir.path()); - let db_path_unwrapped = path.into_os_string().into_string().unwrap(); - let db_output_path = db_path_unwrapped; - - // bed files used to create IGD - let path_to_crate = env!("CARGO_MANIFEST_DIR"); - let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/"); - - let demo_name = String::from("demo"); - - // Create IGD from directory of bed files - create_igd_f(&db_output_path, &testfilelists, &demo_name); - - // Get a query file path from test files - let query_file = format!( - "{}{}", - path_to_crate, "/tests/data/igd_file_list/igd_bed_file_1.bed" - ); - - // the final db path will be constructed within igd_save_db like so - let final_db_save_path = format!("{}{}{}", db_output_path, demo_name, ".igd"); + // let tempdir = tempfile::tempdir().unwrap(); + // let path = PathBuf::from(&tempdir.path()); + // let db_path_unwrapped = path.into_os_string().into_string().unwrap(); + // let db_output_path = db_path_unwrapped; + // + // // bed files used to create IGD + // let path_to_crate = env!("CARGO_MANIFEST_DIR"); + // let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/"); + // + // let demo_name = String::from("demo"); + // + // // Create IGD from directory of bed files + // create_igd_f(&db_output_path, &testfilelists, &demo_name); + // + // // Get a query file path from test files + // let query_file = format!( + // "{}{}", + // path_to_crate, "/tests/data/igd_file_list/igd_bed_file_1.bed" + // ); + // + // // the final db path will be constructed within igd_save_db like so + // let final_db_save_path = format!("{}{}{}", db_output_path, demo_name, ".igd"); + + // let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/rust_test.igd"); + // let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/search_file/query4.bed"); + + let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/output_files/rust_test2.igd"); + let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/query2.bed"); let res = igd_search(&final_db_save_path, &query_file).expect("Error during testing:"); From 5c53208d6d574a350be05fe940cc00a29429ed17 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:19:02 -0500 Subject: [PATCH 02/11] change nCnts incrementing --- gtars/src/igd/create.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtars/src/igd/create.rs b/gtars/src/igd/create.rs index ebd31ef3..68780974 100644 --- a/gtars/src/igd/create.rs +++ b/gtars/src/igd/create.rs @@ -633,7 +633,7 @@ pub fn igd_saveT(igd: &mut igd_t, output_file_path: &String) { } file.write_all(&buffer).unwrap(); - current_tile.nCnts = current_tile.ncnts + 1; + current_tile.nCnts = current_tile.nCnts + current_tile.ncnts; if current_tile.ncnts > 8 { current_tile.mcnts = 8; From d28ff7d05c010b574f2165e6cd9f8c3fc5920189 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:34:01 -0500 Subject: [PATCH 03/11] do not reset nCnts, use it for tests --- gtars/src/igd/create.rs | 2 +- gtars/tests/test.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/gtars/src/igd/create.rs b/gtars/src/igd/create.rs index 68780974..e6080be8 100644 --- a/gtars/src/igd/create.rs +++ b/gtars/src/igd/create.rs @@ -562,7 +562,7 @@ pub fn igd_save_db(igd: &mut igd_t, output_path: &String, db_output_name: &Strin let _ = main_db_file.write_all(&temp_buffer); } - q.nCnts = 0; + //q.nCnts = 0; } } diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 7502671d..d33a4409 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -154,8 +154,12 @@ mod tests { assert_eq!(igd.ctg[0].mTiles, 4); // chr1 has 4 Tiles because of the 32768, and 49152 starts assert_eq!(igd.ctg[1].mTiles, 1); // chr only has 1 Tile due to the 200 start - assert_eq!(igd.ctg[0].gTile[0].gList[0].start, 1); - assert_eq!(igd.ctg[0].gTile[(igd.ctg[0].mTiles-1)as usize].gList[0].start,49152) + assert_eq!(igd.ctg[0].gTile[0].gList[0].start, 1); // look specific tile's start + assert_eq!(igd.ctg[0].gTile[(igd.ctg[0].mTiles-1)as usize].gList[0].start,49152); // look specific tile's start + + assert_eq!(igd.ctg[0].gTile[0].nCnts, 2); // look at nCnts + assert_eq!(igd.ctg[0].gTile[1].nCnts, 0); // look at nCnts + assert_eq!(igd.ctg[0].gTile[2].nCnts, 1); // look at nCnts } From 93fef4cc2d64e1b3b75f510d33e36869c1af4c1d Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:53:28 -0500 Subject: [PATCH 04/11] add fields to igd_t struct to help with testing during creation --- gtars/src/igd/create.rs | 11 +++++++++-- gtars/tests/test.rs | 4 ++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/gtars/src/igd/create.rs b/gtars/src/igd/create.rs index e6080be8..adb1ca1b 100644 --- a/gtars/src/igd/create.rs +++ b/gtars/src/igd/create.rs @@ -69,6 +69,9 @@ pub struct igd_t { pub mctg: i32, //data type: 0, 1, 2 etc; size differs pub total: i64, // total region in each ctg pub ctg: Vec, // this is the list of contigs (of size n-ctg) // this might need to be a reference + pub total_regions: i32, + pub total_average: f32, + pub average_length: f32, } impl igd_t { @@ -373,12 +376,16 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St // Sort tile data and save into single files per ctg igd_save_db(&mut igd, output_path, db_output_name); + igd.total_regions=total_regions; + igd.total_average=total_avg_size; + igd.average_length= total_avg_size / total_regions as f32; + let save_path = format!("{}{}{}", output_path, db_output_name, ".igd"); println!("IGD saved to: {}", save_path); println!( "Total Intervals: {}, l_avg: {}", - total_regions, - total_avg_size / total_regions as f32 + igd.total_regions, + igd.average_length ); println!("nctg:{} nbp:{}", igd.nctg, igd.nbp); diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index d33a4409..322735ce 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -161,6 +161,10 @@ mod tests { assert_eq!(igd.ctg[0].gTile[1].nCnts, 0); // look at nCnts assert_eq!(igd.ctg[0].gTile[2].nCnts, 1); // look at nCnts + assert_eq!(igd.total_regions, 8); + assert_eq!(igd.total_average, 998.0); + assert_eq!(igd.average_length, 124.75); + } From af8bbbcc25e6422be057255f6449b5a8df688392 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:58:36 -0500 Subject: [PATCH 05/11] some clean up --- gtars/tests/test.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 322735ce..9b2f4574 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -110,7 +110,7 @@ mod tests { } #[rstest] - fn test_igd_create() { + fn test_igd_create_local() { //let tempdir = tempfile::tempdir().unwrap(); //let path = PathBuf::from(&tempdir.path()); // let db_path_unwrapped = path.into_os_string().into_string().unwrap(); @@ -135,11 +135,8 @@ mod tests { let db_path_unwrapped = path.into_os_string().into_string().unwrap(); let db_output_path = db_path_unwrapped; - //let db_output_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/"); - let path_to_crate = env!("CARGO_MANIFEST_DIR"); let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/"); - //let testfilelists = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/source_files/"); let demo_name = String::from("demo"); @@ -149,8 +146,6 @@ mod tests { assert_eq!(igd.ctg[2].name, "chr3"); assert_eq!(igd.nctg, 3); - - assert_eq!(igd.ctg[0].mTiles, 4); // chr1 has 4 Tiles because of the 32768, and 49152 starts assert_eq!(igd.ctg[1].mTiles, 1); // chr only has 1 Tile due to the 200 start @@ -161,6 +156,7 @@ mod tests { assert_eq!(igd.ctg[0].gTile[1].nCnts, 0); // look at nCnts assert_eq!(igd.ctg[0].gTile[2].nCnts, 1); // look at nCnts + // Overall stats assert_eq!(igd.total_regions, 8); assert_eq!(igd.total_average, 998.0); assert_eq!(igd.average_length, 124.75); @@ -168,6 +164,7 @@ mod tests { } + // TODO this test will need to copy files to temp directory, create a new textfile with the temp files and then read in the txt file // #[rstest] // fn test_igd_create_txt() { // let tempdir = tempfile::tempdir().unwrap(); From 2998139dc2d2fb85f2bd3634be822b84a2cd5ba4 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:32:36 -0500 Subject: [PATCH 06/11] add new test_igd_create_then_load_from_disk --- gtars/tests/test.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 9b2f4574..b1521661 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -73,7 +73,7 @@ fn path_to_core_bedgraph_output() -> &'static str { mod tests { use super::*; use gtars::igd::create::{create_igd_f, igd_add, igd_saveT, igd_save_db, igd_t, parse_bed}; - use gtars::igd::search::igd_search; + use gtars::igd::search::{getOverlaps, get_file_info_tsv, get_igd_info, get_tsv_path, igd_search, igd_t_from_disk}; use gtars::uniwig::{uniwig_main, Chromosome}; @@ -85,6 +85,7 @@ mod tests { use gtars::uniwig::writing::write_bw_files; use std::collections::HashMap; + use gtars::common::consts::{BED_FILE_EXTENSION, IGD_FILE_EXTENSION}; // IGD TESTS #[rstest] @@ -181,6 +182,63 @@ mod tests { // create_igd_f(&db_output_path, &testfilelists, &demo_name); // } + #[rstest] + fn test_igd_create_then_load_from_disk() { + // Depending on start and end coordinates which are divided by nbp=16384 + // the number of tiles per ctg are adjusted, this tests to ensure they are created appropriately + let tempdir = tempfile::tempdir().unwrap(); + let path = PathBuf::from(&tempdir.path()); + let mut db_path_unwrapped = path.into_os_string().into_string().unwrap(); + db_path_unwrapped.push_str("/"); + let db_output_path = db_path_unwrapped.clone(); + + let path_to_crate = env!("CARGO_MANIFEST_DIR"); + let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/"); + + let demo_name = String::from("demo"); + + let igd_saved = create_igd_f(&db_output_path, &testfilelists, &demo_name); + + println!("dboutput_path {}", db_output_path); + + db_path_unwrapped.push_str("/demo.igd"); + + let mut hash_table: HashMap = HashMap::new(); + + // Create IGD Struct from database + let mut igd_from_disk: igd_t_from_disk = get_igd_info(&db_path_unwrapped, &mut hash_table).expect("Could not open IGD"); + let tsv_path = get_tsv_path(db_path_unwrapped.as_str()).unwrap(); + get_file_info_tsv(tsv_path, &mut igd_from_disk).unwrap(); //sets igd.finfo + + assert_eq!(igd_saved.ctg.len(), igd_from_disk.nCtg as usize); + + assert_eq!(igd_from_disk.nFiles, 1); + + assert_eq!(igd_from_disk.nCnt[0].len(), igd_saved.ctg[0].mTiles as usize); + assert_eq!(igd_from_disk.nCnt[1].len(), igd_saved.ctg[1].mTiles as usize); + assert_eq!(igd_from_disk.nCnt[2].len(), igd_saved.ctg[2].mTiles as usize); + + assert_eq!(igd_from_disk.nCnt[0][0], igd_saved.ctg[0].gTile[0].nCnts); + assert_eq!(igd_from_disk.nCnt[0][1], igd_saved.ctg[0].gTile[1].nCnts); + assert_eq!(igd_from_disk.nCnt[0][2], igd_saved.ctg[0].gTile[2].nCnts); + assert_eq!(igd_from_disk.nCnt[0][3], igd_saved.ctg[0].gTile[3].nCnts); + + //assert_eq!(igd.total_regions, 8); + + // Finally, can we get overlaps? + let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; + + let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_2.bed"); + + let overlaps = getOverlaps(&mut igd_from_disk,&db_path_unwrapped,&queryfile,&mut hits, &mut hash_table); + + assert_eq!(overlaps, igd_saved.total_regions); + + println!("done"); + + + + } #[rstest] fn test_igd_search() { // First must create temp igd From 6f383aa007e841367898a664e79e142f127c10d8 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:40:47 -0500 Subject: [PATCH 07/11] attempt to read from buffer for test_igd_create_then_load_from_disk for test assertions --- gtars/tests/test.rs | 80 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 4 deletions(-) diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index b1521661..9e7a93f2 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -72,7 +72,7 @@ fn path_to_core_bedgraph_output() -> &'static str { mod tests { use super::*; - use gtars::igd::create::{create_igd_f, igd_add, igd_saveT, igd_save_db, igd_t, parse_bed}; + use gtars::igd::create::{create_igd_f, gdata_t, igd_add, igd_saveT, igd_save_db, igd_t, parse_bed}; use gtars::igd::search::{getOverlaps, get_file_info_tsv, get_igd_info, get_tsv_path, igd_search, igd_t_from_disk}; use gtars::uniwig::{uniwig_main, Chromosome}; @@ -85,6 +85,10 @@ mod tests { use gtars::uniwig::writing::write_bw_files; use std::collections::HashMap; + use std::fs::OpenOptions; + use std::io::{Seek, SeekFrom}; + use anyhow::Context; + use byteorder::{LittleEndian, ReadBytesExt}; use gtars::common::consts::{BED_FILE_EXTENSION, IGD_FILE_EXTENSION}; // IGD TESTS @@ -225,14 +229,82 @@ mod tests { //assert_eq!(igd.total_regions, 8); - // Finally, can we get overlaps? - let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; + // let parent_path = db_path_unwrapped.clone(); + let dbpath = std::path::Path::new(&db_path_unwrapped); + + let db_file = OpenOptions::new() + .create(true) + .append(true) + .read(true) + .open(dbpath) + .unwrap(); + + let mut db_reader = BufReader::new(db_file); + + for k in 0..2 { + let nCnt_len = igd_from_disk.nCnt[k].len(); + + for l in 0..nCnt_len { + + let tmpi = igd_from_disk.nCnt[k][l]; + + db_reader + .seek(SeekFrom::Start(igd_from_disk.tIdx[k][l] as u64)) + .unwrap(); + + let mut gData: Vec = Vec::new(); + + for j in 0..tmpi { + gData.push(gdata_t::default()) + } + + for i in 0..tmpi { + let mut buf = [0u8; 16]; + + let n = db_reader.read(&mut buf).unwrap(); + + if n == 0 { + //println!("Breaking loop while reading tempfile"); + break; + } else if n != 16 { + //panic!("Cannot read temp file."); + break; + } + + let mut rdr = &buf[..] as &[u8]; + let idx = rdr.read_i32::().unwrap(); + let start = rdr.read_i32::().unwrap(); + let end = rdr.read_i32::().unwrap(); + let value = rdr.read_i32::().unwrap(); + + //println!("Looping through g_datat in temp files"); + //println!("idx: {} start: {} end: {}", idx, start, end); + + gData[i as usize] = gdata_t { + idx: idx, + start, + end, + value, + }; + } + + println!("here is k {}, l {}",k,l); + for g in gData.iter(){ + println!("Start {}, End {}", g.start,g.end); + } + //println!("Before assertion, k {}, l, {}, gData[0].start {}, igd_saved.ctg[k].gTile[l].gList[0].start {}",k,l,gData[0].start,igd_saved.ctg[k].gTile[l].gList[0].start); + //assert_eq!(gData[0].start, igd_saved.ctg[k].gTile[l].gList[0].start); + } + } + + // Finally, can we get overlaps? + let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_2.bed"); let overlaps = getOverlaps(&mut igd_from_disk,&db_path_unwrapped,&queryfile,&mut hits, &mut hash_table); - assert_eq!(overlaps, igd_saved.total_regions); + //assert_eq!(overlaps, igd_saved.total_regions); println!("done"); From 925c05695c66d0cb0207b622065e1fd5e89458ac Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 20:05:55 -0500 Subject: [PATCH 08/11] update test assertions --- gtars/src/igd/create.rs | 1 + gtars/tests/test.rs | 41 +++++++++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/gtars/src/igd/create.rs b/gtars/src/igd/create.rs index adb1ca1b..3f698c03 100644 --- a/gtars/src/igd/create.rs +++ b/gtars/src/igd/create.rs @@ -820,6 +820,7 @@ pub fn igd_add( gdata.start = start; gdata.end = end; gdata.value = v; + //println!("Adding to igd, start {}, idx {}", start,idx); gdata.idx = idx as i32; igd.total += 1; diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 9e7a93f2..60791f0d 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -2,6 +2,7 @@ use std::fs::File; use std::io::{BufRead, BufReader, Read}; use std::path::{Path, PathBuf}; + use rstest::*; #[fixture] @@ -85,6 +86,7 @@ mod tests { use gtars::uniwig::writing::write_bw_files; use std::collections::HashMap; + use std::collections::HashSet; use std::fs::OpenOptions; use std::io::{Seek, SeekFrom}; use anyhow::Context; @@ -227,38 +229,39 @@ mod tests { assert_eq!(igd_from_disk.nCnt[0][2], igd_saved.ctg[0].gTile[2].nCnts); assert_eq!(igd_from_disk.nCnt[0][3], igd_saved.ctg[0].gTile[3].nCnts); - //assert_eq!(igd.total_regions, 8); - - // let parent_path = db_path_unwrapped.clone(); + // Check to see if the regions on disk are the same as the original igd (minus the unused zeros) let dbpath = std::path::Path::new(&db_path_unwrapped); - let db_file = OpenOptions::new() .create(true) .append(true) .read(true) .open(dbpath) .unwrap(); - let mut db_reader = BufReader::new(db_file); - for k in 0..2 { + for k in 0..3 { let nCnt_len = igd_from_disk.nCnt[k].len(); for l in 0..nCnt_len { + let mut a: HashSet= Default::default(); + let mut b: HashSet= Default::default(); - let tmpi = igd_from_disk.nCnt[k][l]; + let tmpi = igd_from_disk.nCnt[k][l]; // number of gdata_t to read + //println!("Here is k {}, l {}, and igd_from_disk.tIdx[k][l] {}",k,l, igd_from_disk.tIdx[k][l]); db_reader - .seek(SeekFrom::Start(igd_from_disk.tIdx[k][l] as u64)) + .seek(SeekFrom::Start(igd_from_disk.tIdx[k][l] as u64)) // [k]contig [l] tile position .unwrap(); let mut gData: Vec = Vec::new(); + //println!("Creating gData with tmpi {}", tmpi); for j in 0..tmpi { gData.push(gdata_t::default()) } - for i in 0..tmpi { + for i in 0..tmpi { // number of gdata_t to read + //println!("Iterating with i {} of tmpi {} ",i,tmpi); let mut buf = [0u8; 16]; let n = db_reader.read(&mut buf).unwrap(); @@ -278,7 +281,7 @@ mod tests { let value = rdr.read_i32::().unwrap(); //println!("Looping through g_datat in temp files"); - //println!("idx: {} start: {} end: {}", idx, start, end); + //println!("Chr_name: {} Filename: {} start: {} end: {}", igd_from_disk.cName[k], igd_from_disk.file_info[idx as usize].fileName, start, end); gData[i as usize] = gdata_t { idx: idx, @@ -288,16 +291,26 @@ mod tests { }; } - println!("here is k {}, l {}",k,l); + //println!("here is k {}, l {}",k,l); for g in gData.iter(){ - println!("Start {}, End {}", g.start,g.end); + //println!("Inserting {} from gData on Disk", g.start); + a.insert(g.start); } - //println!("Before assertion, k {}, l, {}, gData[0].start {}, igd_saved.ctg[k].gTile[l].gList[0].start {}",k,l,gData[0].start,igd_saved.ctg[k].gTile[l].gList[0].start); - //assert_eq!(gData[0].start, igd_saved.ctg[k].gTile[l].gList[0].start); + for g in igd_saved.ctg[k].gTile[l].gList.iter(){ + //println!("Inserting {} from original gList ", g.start); + b.insert(g.start); + } + //println!("A: {:?}", a); + //println!("B: {:?}", b); + // There difference should at most be a 0 from unused tiles, therefore the difference length should at MOST be 1. + let diff = b.difference(&a).collect::>(); + //println!("Difference: {:?}", diff); + assert!(diff.len() <=1 ) } } + // Finally, can we get overlaps? let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_2.bed"); From e53e457d320beffc7eef3c565977bc4041de12ee Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:31:47 -0500 Subject: [PATCH 09/11] add igd test create then search --- gtars/src/igd/search.rs | 32 ++++++++----- ...{igd_bed_file_2.bed => igd_bed_file_1.bed} | 0 .../data/igd_file_list_02/igd_bed_file_1.bed | 8 ++++ .../data/igd_file_list_02/igd_bed_file_2.bed | 3 ++ gtars/tests/test.rs | 48 +++++++++++++++++-- 5 files changed, 74 insertions(+), 17 deletions(-) rename gtars/tests/data/igd_file_list_01/{igd_bed_file_2.bed => igd_bed_file_1.bed} (100%) create mode 100644 gtars/tests/data/igd_file_list_02/igd_bed_file_1.bed create mode 100644 gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed diff --git a/gtars/src/igd/search.rs b/gtars/src/igd/search.rs index 1e0b9cc2..3ee25fd1 100644 --- a/gtars/src/igd/search.rs +++ b/gtars/src/igd/search.rs @@ -297,10 +297,10 @@ fn get_overlaps( if tmpi > 0 { if n1 != *preIdx || ichr != *preChr { - // println!( - // "n1 != *preIdx || ichr!= *preChr {} vs {} {} vs {} \n", - // n1, preIdx, ichr, preChr - // ); + println!( + "n1 != *preIdx || ichr!= *preChr {} vs {} {} vs {} \n", + n1, preIdx, ichr, preChr + ); //println!("Seek start here: {}",IGD.tIdx[ichr as usize][n1 as usize]); //let ichr = 1; @@ -333,8 +333,9 @@ fn get_overlaps( let end = rdr.read_i32::().unwrap(); let value = rdr.read_i32::().unwrap(); - //println!("Looping through g_datat in temp files\n"); - // println!("idx: {} start: {} end: {}\n", idx,start,end); + println!("for tmpi>0 where tmpi = {}", tmpi); + println!("Looping through g_datat in temp files\n"); + println!("idx: {} start: {} end: {}\n", idx,start,end); gData[i as usize] = gdata_t { idx: idx, @@ -352,7 +353,7 @@ fn get_overlaps( if query_end > gData[0].start { // sorted by start - //println!("query_end > gData[0].start: {} > {}", query_end,gData[0].start); + println!("n1 != *preIdx || ichr != *preChr query_end > gData[0].start: {} > {}", query_end,gData[0].start); // find the 1st rs query_start { - //println!(" > gData[i].end > query_start {} > {}", gData[i as usize].end, query_start); + println!("ADDING TO HITS"); + println!(" > gData[i].end > query_start {} > {}", gData[i as usize].end, query_start); hits[gData[i as usize].idx as usize] = hits[gData[i as usize].idx as usize] + 1; } @@ -384,7 +387,7 @@ fn get_overlaps( } if n2 > n1 { - //println!("n2>n1 {} vs {} ", n2, n1); + println!("n2>n1 {} vs {} ", n2, n1); let mut bd = IGD.nbp * (n1 + 1); // only keep the first for j in (n1 + 1)..=n2 { @@ -423,8 +426,9 @@ fn get_overlaps( let end = rdr.read_i32::().unwrap(); let value = rdr.read_i32::().unwrap(); - //println!("Looping through g_datat in temp files\n"); - //println!("idx: {} start: {} end: {}\n", idx,start,end); + + println!("Looping through g_datat in temp files\n"); + println!("idx: {} start: {} end: {}\n", idx,start,end); gData.push(gdata_t { idx: idx, @@ -439,6 +443,7 @@ fn get_overlaps( } if query_end > gData[0].start { + println!("n2>n1 query_end > gData[0].start: {} > {}", query_end,gData[0].start); tS = 0; while tS < tmpi && gData[tS as usize].start < bd { @@ -478,6 +483,7 @@ fn get_overlaps( } } } + println!("here are the hits {:?}", hits); return nols; //TODO this is from the original code but its not actually being used for anything. hits vec IS the main thing. } diff --git a/gtars/tests/data/igd_file_list_01/igd_bed_file_2.bed b/gtars/tests/data/igd_file_list_01/igd_bed_file_1.bed similarity index 100% rename from gtars/tests/data/igd_file_list_01/igd_bed_file_2.bed rename to gtars/tests/data/igd_file_list_01/igd_bed_file_1.bed diff --git a/gtars/tests/data/igd_file_list_02/igd_bed_file_1.bed b/gtars/tests/data/igd_file_list_02/igd_bed_file_1.bed new file mode 100644 index 00000000..daae26c5 --- /dev/null +++ b/gtars/tests/data/igd_file_list_02/igd_bed_file_1.bed @@ -0,0 +1,8 @@ +chr1 1 100 +chr1 200 300 +chr1 32768 32868 +chr1 49152 49352 +chr2 1 100 +chr2 200 300 +chr3 32768 32868 +chr3 49152 49352 diff --git a/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed b/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed new file mode 100644 index 00000000..1c1d4886 --- /dev/null +++ b/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed @@ -0,0 +1,3 @@ +chr4 400 500 +chr4 600 700 +chr5 65536 65636 \ No newline at end of file diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 60791f0d..2b37d070 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -312,18 +312,58 @@ mod tests { // Finally, can we get overlaps? - let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; - let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_2.bed"); + //let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; + //let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_1.bed"); - let overlaps = getOverlaps(&mut igd_from_disk,&db_path_unwrapped,&queryfile,&mut hits, &mut hash_table); + //let _overlaps = getOverlaps(&mut igd_from_disk,&db_path_unwrapped,&queryfile,&mut hits, &mut hash_table); - //assert_eq!(overlaps, igd_saved.total_regions); + //assert_eq!(hits.len(), igd_saved.total_regions); println!("done"); + } + + #[rstest] + fn test_igd_create_then_search() { + // Depending on start and end coordinates which are divided by nbp=16384 + // the number of tiles per ctg are adjusted, this tests to ensure they are created appropriately + let tempdir = tempfile::tempdir().unwrap(); + let path = PathBuf::from(&tempdir.path()); + let mut db_path_unwrapped = path.into_os_string().into_string().unwrap(); + db_path_unwrapped.push_str("/"); + let db_output_path = db_path_unwrapped.clone(); + let path_to_crate = env!("CARGO_MANIFEST_DIR"); + let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/"); + + let demo_name = String::from("demo"); + + let igd_saved = create_igd_f(&db_output_path, &testfilelists, &demo_name); + + println!("dboutput_path {}", db_output_path); + + db_path_unwrapped.push_str("/demo.igd"); + + let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_1.bed"); + let res = igd_search(&db_path_unwrapped, &queryfile).expect("Error during testing:"); + let mut res_iter = res[1].split('\t'); + + // Skip the first two columns + res_iter.next().unwrap(); + + // Extract the third and fourth columns + let second_column = res_iter.next().unwrap().to_string(); + let third_column = res_iter.next().unwrap().to_string(); + + println!("Number of Regions: {}", second_column); + println!("Number of Hits: {}", third_column); + + assert_eq!(second_column,"8"); + assert_eq!(second_column,"6"); } + + #[rstest] fn test_igd_search() { // First must create temp igd From 8f3dc68b029c604577d879ddc2cd8b23224c5d28 Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:23:29 -0500 Subject: [PATCH 10/11] potential fix #45, comment out debugging lines --- gtars/src/igd/search.rs | 40 ++++++++++++++++++++-------------------- gtars/tests/test.rs | 8 ++++---- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/gtars/src/igd/search.rs b/gtars/src/igd/search.rs index 3ee25fd1..fc31c31d 100644 --- a/gtars/src/igd/search.rs +++ b/gtars/src/igd/search.rs @@ -296,11 +296,11 @@ fn get_overlaps( // ); if tmpi > 0 { - if n1 != *preIdx || ichr != *preChr { - println!( - "n1 != *preIdx || ichr!= *preChr {} vs {} {} vs {} \n", - n1, preIdx, ichr, preChr - ); + + // println!( + // "n1 != *preIdx || ichr!= *preChr {} vs {} {} vs {} \n", + // n1, preIdx, ichr, preChr + // ); //println!("Seek start here: {}",IGD.tIdx[ichr as usize][n1 as usize]); //let ichr = 1; @@ -333,9 +333,9 @@ fn get_overlaps( let end = rdr.read_i32::().unwrap(); let value = rdr.read_i32::().unwrap(); - println!("for tmpi>0 where tmpi = {}", tmpi); - println!("Looping through g_datat in temp files\n"); - println!("idx: {} start: {} end: {}\n", idx,start,end); + //println!("for tmpi>0 where tmpi = {}", tmpi); + //println!("Looping through g_datat in temp files\n"); + //println!("idx: {} start: {} end: {}\n", idx,start,end); gData[i as usize] = gdata_t { idx: idx, @@ -353,7 +353,7 @@ fn get_overlaps( if query_end > gData[0].start { // sorted by start - println!("n1 != *preIdx || ichr != *preChr query_end > gData[0].start: {} > {}", query_end,gData[0].start); + //println!("n1 != *preIdx || ichr != *preChr query_end > gData[0].start: {} > {}", query_end,gData[0].start); // find the 1st rs query_start { - println!("ADDING TO HITS"); - println!(" > gData[i].end > query_start {} > {}", gData[i as usize].end, query_start); + //println!("ADDING TO HITS"); + //println!(" > gData[i].end > query_start {} > {}", gData[i as usize].end, query_start); hits[gData[i as usize].idx as usize] = hits[gData[i as usize].idx as usize] + 1; } } } - } + if n2 > n1 { - println!("n2>n1 {} vs {} ", n2, n1); + //println!("n2>n1 {} vs {} ", n2, n1); let mut bd = IGD.nbp * (n1 + 1); // only keep the first for j in (n1 + 1)..=n2 { @@ -427,8 +427,8 @@ fn get_overlaps( let value = rdr.read_i32::().unwrap(); - println!("Looping through g_datat in temp files\n"); - println!("idx: {} start: {} end: {}\n", idx,start,end); + //println!("Looping through g_datat in temp files\n"); + // println!("idx: {} start: {} end: {}\n", idx,start,end); gData.push(gdata_t { idx: idx, @@ -443,7 +443,7 @@ fn get_overlaps( } if query_end > gData[0].start { - println!("n2>n1 query_end > gData[0].start: {} > {}", query_end,gData[0].start); + //println!("n2>n1 query_end > gData[0].start: {} > {}", query_end,gData[0].start); tS = 0; while tS < tmpi && gData[tS as usize].start < bd { @@ -483,7 +483,7 @@ fn get_overlaps( } } } - println!("here are the hits {:?}", hits); + //println!("here are the hits {:?}", hits); return nols; //TODO this is from the original code but its not actually being used for anything. hits vec IS the main thing. } diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index 2b37d070..ac9939be 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -359,13 +359,13 @@ mod tests { println!("Number of Hits: {}", third_column); assert_eq!(second_column,"8"); - assert_eq!(second_column,"6"); + assert_eq!(second_column,"8"); } #[rstest] - fn test_igd_search() { + fn test_igd_search_local() { // First must create temp igd // Temp dir to hold igd @@ -395,8 +395,8 @@ mod tests { // let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/rust_test.igd"); // let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/search_file/query4.bed"); - let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/output_files/rust_test2.igd"); - let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/query2.bed"); + let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/output/rust_test2.igd"); + let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/test4/igd_bed_file_1.bed"); let res = igd_search(&final_db_save_path, &query_file).expect("Error during testing:"); From abaeb960c4c28cee3fd32b7167e0806324920e2a Mon Sep 17 00:00:00 2001 From: Donald Campbell <125581724+donaldcampbelljr@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:52:44 -0500 Subject: [PATCH 11/11] update rstest, use cases for new test, rethink source bedfiles and query, remove local tests --- gtars/Cargo.toml | 2 +- .../data/igd_file_list/bad_bed_file.notbed | 15 --- .../data/igd_file_list/bad_bed_file_2.notbed | 8 -- .../data/igd_file_list/igd_bed_file_1.bed | 4 - .../data/igd_file_list/igd_bed_file_2.notbed | 37 ------- .../data/igd_file_list_02/igd_bed_file_2.bed | 7 +- gtars/tests/data/igd_query_files/query1.bed | 8 ++ gtars/tests/data/igd_query_files/query2.bed | 2 + gtars/tests/test.rs | 99 ++----------------- 9 files changed, 23 insertions(+), 159 deletions(-) delete mode 100644 gtars/tests/data/igd_file_list/bad_bed_file.notbed delete mode 100644 gtars/tests/data/igd_file_list/bad_bed_file_2.notbed delete mode 100644 gtars/tests/data/igd_file_list/igd_bed_file_1.bed delete mode 100644 gtars/tests/data/igd_file_list/igd_bed_file_2.notbed create mode 100644 gtars/tests/data/igd_query_files/query1.bed create mode 100644 gtars/tests/data/igd_query_files/query2.bed diff --git a/gtars/Cargo.toml b/gtars/Cargo.toml index 7e13f7a5..be23b212 100644 --- a/gtars/Cargo.toml +++ b/gtars/Cargo.toml @@ -32,6 +32,6 @@ glob = "0.3.1" [dev-dependencies] -rstest = "0.18.2" +rstest = "0.23.0" tempfile = "3.8.1" pretty_assertions = "1.4.0" diff --git a/gtars/tests/data/igd_file_list/bad_bed_file.notbed b/gtars/tests/data/igd_file_list/bad_bed_file.notbed deleted file mode 100644 index e31a333e..00000000 --- a/gtars/tests/data/igd_file_list/bad_bed_file.notbed +++ /dev/null @@ -1,15 +0,0 @@ -chr1 7 10 -chr1 8 12 -chr1 9 15 -chr1 10 17 -chr1 11 18 -chr1 12 19 -chr1 13 20 -chr1 14 22 -chr1 16 23 -chr1 18 24 -chr1 19 27 -chr1 20 28 -chr1 22 30 -chr1 23 31 -chr1 24 32 \ No newline at end of file diff --git a/gtars/tests/data/igd_file_list/bad_bed_file_2.notbed b/gtars/tests/data/igd_file_list/bad_bed_file_2.notbed deleted file mode 100644 index 1b91112d..00000000 --- a/gtars/tests/data/igd_file_list/bad_bed_file_2.notbed +++ /dev/null @@ -1,8 +0,0 @@ -chr11 10 50 -chr11 20 76 -chr12 769 2395 -chr13 771 3000 -chr14 800 2900 -chr21 1 30 -chr21 2 19 -chr21 16 31 diff --git a/gtars/tests/data/igd_file_list/igd_bed_file_1.bed b/gtars/tests/data/igd_file_list/igd_bed_file_1.bed deleted file mode 100644 index ab24a1b0..00000000 --- a/gtars/tests/data/igd_file_list/igd_bed_file_1.bed +++ /dev/null @@ -1,4 +0,0 @@ -chr1 632554 632780 SRX4150706.05_peak_5 157 . 2.14622 20.42377 15.73019 44 -chr1 633837 634214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191 -chr10 931681 932010 SRX4150706.05_peak_247 205 . 11.82913 25.65609 20.56433 139 -chr10 1048894 1049428 SRX4150706.05_peak_248 252 . 11.83432 30.63056 25.20567 179 \ No newline at end of file diff --git a/gtars/tests/data/igd_file_list/igd_bed_file_2.notbed b/gtars/tests/data/igd_file_list/igd_bed_file_2.notbed deleted file mode 100644 index d1b2de09..00000000 --- a/gtars/tests/data/igd_file_list/igd_bed_file_2.notbed +++ /dev/null @@ -1,37 +0,0 @@ -chr1 32481 32787 SRX4150706.05_peak_1 92 . 7.69231 13.22648 9.25988 155 -chr1 629094 630022 SRX4150706.05_peak_2 820 . 3.81936 88.76474 82.09715 743 -chr1 630770 631348 SRX4150706.05_peak_3 333 . 2.69642 39.15731 33.36833 464 -chr1 631874 632292 SRX4150706.05_peak_4 513 . 3.14391 57.55429 51.34151 169 -chr10 3172518 3172964 SRX4150706.05_peak_249 114 . 8.40708 15.69710 11.46197 371 -chr10 3785332 3785624 SRX4150706.05_peak_250 140 . 9.57811 18.59647 14.07850 164 -chr10 4848619 4848897 SRX4150706.05_peak_251 148 . 10.09615 19.45367 14.85063 121 -chr10 4867612 4867959 SRX4150706.05_peak_252 148 . 10.40312 19.46796 14.86100 138 -chr12 26274777 26275010 SRX4150706.05_peak_502 155 . 11.35647 20.23804 15.56519 190 -chr12 30754778 30755141 SRX4150706.05_peak_503 146 . 9.98811 19.27493 14.68905 175 -chr12 31066520 31066788 SRX4150706.05_peak_504 94 . 8.08625 13.48456 9.48825 107 -chr12 31728967 31729242 SRX4150706.05_peak_505 197 . 12.33933 24.77604 19.74551 126 -chr12 40105822 40106052 SRX4150706.05_peak_506 112 . 9.06516 15.49433 11.28455 71 -chr12 42144779 42145013 SRX4150706.05_peak_507 128 . 9.88372 17.27142 12.88671 94 -chr12 43758834 43759073 SRX4150706.05_peak_508 87 . 7.83217 12.71157 8.79783 147 -chr16 1678069 1678364 SRX4150706.05_peak_757 114 . 9.18221 15.69259 11.46152 121 -chr16 1782651 1782896 SRX4150706.05_peak_758 161 . 10.92328 20.82612 16.10091 109 -chr16 1943243 1943468 SRX4150706.05_peak_759 88 . 8.14941 12.77668 8.85488 116 -chr16 2136005 2136235 SRX4150706.05_peak_760 145 . 10.16518 19.07285 14.50998 104 -chr16 2214862 2215110 SRX4150706.05_peak_761 111 . 8.74036 15.35579 11.15965 171 -chr16 2223339 2223636 SRX4150706.05_peak_762 128 . 9.88372 17.27142 12.88671 145 -chr16 3003944 3004198 SRX4150706.05_peak_763 114 . 9.18221 15.69259 11.46152 106 -chr16 3400901 3401238 SRX4150706.05_peak_764 101 . 8.82852 14.21739 10.13631 147 -chr16 4307669 4307938 SRX4150706.05_peak_765 145 . 10.49724 19.15774 14.58114 107 -chr17 10697460 10697723 SRX4150706.05_peak_821 76 . 7.47029 11.37055 7.60573 50 -chr17 15490746 15490988 SRX4150706.05_peak_822 153 . 11.37124 19.94566 15.30242 133 -chr17 15651622 15651906 SRX4150706.05_peak_823 125 . 10.03344 16.89878 12.54836 108 -chr17 15699452 15699766 SRX4150706.05_peak_824 148 . 11.20841 19.40026 14.80545 161 -chr17 15999582 15999891 SRX4150706.05_peak_825 153 . 11.19751 19.95225 15.30478 125 -chr17 16535698 16535959 SRX4150706.05_peak_826 120 . 9.55224 16.32735 12.03429 147 -chr17 17972524 17972813 SRX4150706.05_peak_827 131 . 10.24000 17.54836 13.13781 133 -chr17 19062312 19062585 SRX4150706.05_peak_828 140 . 8.64086 18.53730 14.02305 137 -chr19 1275440 1275769 SRX4150706.05_peak_900 80 . 6.87433 11.89345 8.07370 138 -chr19 1812463 1812867 SRX4150706.05_peak_901 74 . 7.09413 11.16432 7.41911 181 -chr19 2042147 2042419 SRX4150706.05_peak_902 106 . 8.83652 14.74695 10.61464 170 -chr19 2151617 2151889 SRX4150706.05_peak_903 133 . 9.94475 17.78651 13.34663 162 -chr19 4471718 4472167 SRX4150706.05_peak_904 109 . 8.83978 15.11550 10.94480 106 diff --git a/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed b/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed index 1c1d4886..23f3e131 100644 --- a/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed +++ b/gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed @@ -1,3 +1,4 @@ -chr4 400 500 -chr4 600 700 -chr5 65536 65636 \ No newline at end of file +chr2 652554 652780 SRX4150706.05_peak_5 157 . 2.14622 20.42377 15.73019 44 +chr2 653837 654214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191 +chr11 951681 952010 SRX4150706.05_peak_247 205 . 11.82913 25.65609 20.56433 139 +chr11 1248894 1249428 SRX4150706.05_peak_248 252 . 11.83432 30.63056 25.20567 179 \ No newline at end of file diff --git a/gtars/tests/data/igd_query_files/query1.bed b/gtars/tests/data/igd_query_files/query1.bed new file mode 100644 index 00000000..daae26c5 --- /dev/null +++ b/gtars/tests/data/igd_query_files/query1.bed @@ -0,0 +1,8 @@ +chr1 1 100 +chr1 200 300 +chr1 32768 32868 +chr1 49152 49352 +chr2 1 100 +chr2 200 300 +chr3 32768 32868 +chr3 49152 49352 diff --git a/gtars/tests/data/igd_query_files/query2.bed b/gtars/tests/data/igd_query_files/query2.bed new file mode 100644 index 00000000..6c6ece21 --- /dev/null +++ b/gtars/tests/data/igd_query_files/query2.bed @@ -0,0 +1,2 @@ +chr3 49152 49352 +chr2 653837 654214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191 \ No newline at end of file diff --git a/gtars/tests/test.rs b/gtars/tests/test.rs index ac9939be..c20f186f 100644 --- a/gtars/tests/test.rs +++ b/gtars/tests/test.rs @@ -91,7 +91,6 @@ mod tests { use std::io::{Seek, SeekFrom}; use anyhow::Context; use byteorder::{LittleEndian, ReadBytesExt}; - use gtars::common::consts::{BED_FILE_EXTENSION, IGD_FILE_EXTENSION}; // IGD TESTS #[rstest] @@ -116,23 +115,6 @@ mod tests { assert_eq!(end, 32787); } - #[rstest] - fn test_igd_create_local() { - //let tempdir = tempfile::tempdir().unwrap(); - //let path = PathBuf::from(&tempdir.path()); - // let db_path_unwrapped = path.into_os_string().into_string().unwrap(); - // let db_output_path = db_path_unwrapped; - - let db_output_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/"); - - let path_to_crate = env!("CARGO_MANIFEST_DIR"); - //let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/"); - let testfilelists = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/source_files/"); - - let demo_name = String::from("demo"); - - create_igd_f(&db_output_path, &testfilelists, &demo_name); - } #[rstest] fn test_igd_create_short_long_regions() { // Depending on start and end coordinates which are divided by nbp=16384 @@ -171,22 +153,6 @@ mod tests { } - // TODO this test will need to copy files to temp directory, create a new textfile with the temp files and then read in the txt file - // #[rstest] - // fn test_igd_create_txt() { - // let tempdir = tempfile::tempdir().unwrap(); - // let path = PathBuf::from(&tempdir.path()); - // - // let db_path_unwrapped = path.into_os_string().into_string().unwrap(); - // let db_output_path = db_path_unwrapped; - // - // let path_to_crate = env!("CARGO_MANIFEST_DIR"); - // let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igdlist.txt"); - // - // let demo_name = String::from("demo"); - // - // create_igd_f(&db_output_path, &testfilelists, &demo_name); - // } #[rstest] fn test_igd_create_then_load_from_disk() { @@ -310,23 +276,13 @@ mod tests { } } - - // Finally, can we get overlaps? - //let mut hits: Vec = vec![0; igd_from_disk.nFiles as usize]; - //let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_1.bed"); - - //let _overlaps = getOverlaps(&mut igd_from_disk,&db_path_unwrapped,&queryfile,&mut hits, &mut hash_table); - - //assert_eq!(hits.len(), igd_saved.total_regions); - - println!("done"); - } #[rstest] - fn test_igd_create_then_search() { - // Depending on start and end coordinates which are divided by nbp=16384 - // the number of tiles per ctg are adjusted, this tests to ensure they are created appropriately + #[case("/tests/data/igd_file_list_01/","/tests/data/igd_query_files/query1.bed" ,8, 8)] + #[case("/tests/data/igd_file_list_02/","/tests/data/igd_query_files/query2.bed" ,4, 1)] + fn test_igd_create_then_search(#[case] input: &str, #[case] query_file: &str,#[case] expected_regions: u32, #[case] expected_hits: u32) { + let tempdir = tempfile::tempdir().unwrap(); let path = PathBuf::from(&tempdir.path()); let mut db_path_unwrapped = path.into_os_string().into_string().unwrap(); @@ -334,7 +290,7 @@ mod tests { let db_output_path = db_path_unwrapped.clone(); let path_to_crate = env!("CARGO_MANIFEST_DIR"); - let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/"); + let testfilelists = format!("{}{}", path_to_crate, input); let demo_name = String::from("demo"); @@ -344,7 +300,7 @@ mod tests { db_path_unwrapped.push_str("/demo.igd"); - let queryfile = format!("{}{}", path_to_crate, "/tests/data/igd_file_list_01/igd_bed_file_1.bed"); + let queryfile = format!("{}{}", path_to_crate, query_file); let res = igd_search(&db_path_unwrapped, &queryfile).expect("Error during testing:"); let mut res_iter = res[1].split('\t'); @@ -358,51 +314,12 @@ mod tests { println!("Number of Regions: {}", second_column); println!("Number of Hits: {}", third_column); - assert_eq!(second_column,"8"); - assert_eq!(second_column,"8"); + assert_eq!(second_column,expected_regions.to_string()); + assert_eq!(third_column,expected_hits.to_string()); } - #[rstest] - fn test_igd_search_local() { - // First must create temp igd - - // Temp dir to hold igd - // let tempdir = tempfile::tempdir().unwrap(); - // let path = PathBuf::from(&tempdir.path()); - // let db_path_unwrapped = path.into_os_string().into_string().unwrap(); - // let db_output_path = db_path_unwrapped; - // - // // bed files used to create IGD - // let path_to_crate = env!("CARGO_MANIFEST_DIR"); - // let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/"); - // - // let demo_name = String::from("demo"); - // - // // Create IGD from directory of bed files - // create_igd_f(&db_output_path, &testfilelists, &demo_name); - // - // // Get a query file path from test files - // let query_file = format!( - // "{}{}", - // path_to_crate, "/tests/data/igd_file_list/igd_bed_file_1.bed" - // ); - // - // // the final db path will be constructed within igd_save_db like so - // let final_db_save_path = format!("{}{}{}", db_output_path, demo_name, ".igd"); - - // let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/output/rust_test.igd"); - // let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/search_file/query4.bed"); - - let final_db_save_path = String::from("/home/drc/Downloads/igd_testing_17dec2024/test2/output/rust_test2.igd"); - let query_file = String::from("/home/drc/Downloads/igd_testing_17dec2024/test4/igd_bed_file_1.bed"); - - let res = igd_search(&final_db_save_path, &query_file).expect("Error during testing:"); - - - } - #[rstest] fn test_igd_add() { // First create a new igd struct