-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #38 from databio/region_scoring
Add fragment file region scoring matrix creation
- Loading branch information
Showing
25 changed files
with
540 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
use std::str::FromStr; | ||
|
||
use anyhow::Result; | ||
|
||
use crate::common::models::Region; | ||
|
||
#[allow(unused)] | ||
pub struct Fragment { | ||
pub chr: String, | ||
pub start: u32, | ||
pub end: u32, | ||
pub barcode: String, | ||
pub read_support: u32, | ||
} | ||
|
||
impl FromStr for Fragment { | ||
type Err = anyhow::Error; | ||
|
||
fn from_str(s: &str) -> Result<Self> { | ||
let parts: Vec<&str> = s.split_whitespace().collect(); | ||
// dont check file integrity right now | ||
// if parts.len() != 6 { | ||
// anyhow::bail!( | ||
// "Error parsing fragment file line: {}. Is your fragment file malformed? Found {} parts.", | ||
// s, | ||
// parts.len() | ||
// ) | ||
// } | ||
|
||
let start = parts[1].parse::<u32>()?; | ||
let end = parts[2].parse::<u32>()?; | ||
let read_support = parts[4].parse::<u32>()?; | ||
|
||
Ok(Fragment { | ||
chr: parts[0].to_string(), | ||
start, | ||
end, | ||
barcode: parts[3].to_string(), | ||
read_support, | ||
}) | ||
} | ||
} | ||
|
||
impl From<Fragment> for Region { | ||
fn from(val: Fragment) -> Self { | ||
Region { | ||
chr: val.chr, | ||
start: val.start, | ||
end: val.end, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
use std::collections::HashSet; | ||
use std::io::BufRead; | ||
use std::path::PathBuf; | ||
|
||
use anyhow::Result; | ||
use clap::{arg, Arg, ArgMatches, Command}; | ||
|
||
use super::*; | ||
use crate::scoring::{region_scoring_from_fragments, ConsensusSet, FragmentFileGlob}; | ||
|
||
pub fn make_fscoring_cli() -> Command { | ||
Command::new(consts::FSCORING_CMD) | ||
.author("Nathan LeRoy") | ||
.about("Create a scoring matrix for a set of fragment files over a consensus peak set.") | ||
.arg(Arg::new("fragments")) | ||
.arg(Arg::new("consensus")) | ||
.arg(arg!(--mode <mode>)) | ||
.arg(arg!(--output <output>)) | ||
} | ||
|
||
pub mod handlers { | ||
|
||
use std::str::FromStr; | ||
|
||
use consts::DEFAULT_SCORING_MODE; | ||
|
||
use super::*; | ||
|
||
pub fn region_fragment_scoring(matches: &ArgMatches) -> Result<()> { | ||
// get arguments from CLI | ||
let fragments = matches | ||
.get_one::<String>("fragments") | ||
.expect("A path to fragment files is required."); | ||
|
||
let consensus = matches | ||
.get_one::<String>("consensus") | ||
.expect("A path to a mapping file is required."); | ||
|
||
let default_out = consts::DEFAULT_OUT.to_string(); | ||
let output = matches.get_one::<String>("output").unwrap_or(&default_out); | ||
let mode = match matches.get_one::<String>("mode") { | ||
Some(mode) => { | ||
let supplied_mode = ScoringMode::from_str(mode); | ||
match supplied_mode { | ||
Ok(mode) => mode, | ||
Err(_err) => anyhow::bail!("Unknown scoring mode supplied: {}", mode) | ||
} | ||
}, | ||
None => DEFAULT_SCORING_MODE, | ||
}; | ||
|
||
// coerce arguments to types | ||
let mut fragments = FragmentFileGlob::new(fragments)?; | ||
let consensus = PathBuf::from(consensus); | ||
let consensus = ConsensusSet::new(consensus)?; | ||
|
||
let count_mat = region_scoring_from_fragments( | ||
&mut fragments, | ||
&consensus, | ||
mode, | ||
)?; | ||
|
||
count_mat.write_to_file(output)?; | ||
|
||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
use crate::scoring::ScoringMode; | ||
|
||
pub const FSCORING_CMD: &str = "fscoring"; | ||
pub const DEFAULT_OUT: &str = "fscoring.csv.gz"; | ||
pub const DEFAULT_SCORING_MODE: ScoringMode = ScoringMode::Atac; | ||
pub const START_SHIFT: i8 = 4; | ||
pub const END_SHIFT: i8 = 5; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
use std::fs::File; | ||
use std::io::{BufWriter, Write}; | ||
use std::ops::{Add, AddAssign}; | ||
|
||
use anyhow::Result; | ||
use flate2::write::GzEncoder; | ||
use flate2::Compression; | ||
|
||
pub struct CountMatrix<T> { | ||
data: Vec<T>, | ||
pub rows: usize, | ||
pub cols: usize, | ||
} | ||
|
||
pub struct RowIterator<'a, T> { | ||
matrix: &'a CountMatrix<T>, | ||
current_row: usize, | ||
} | ||
|
||
impl<T> CountMatrix<T> | ||
where | ||
T: Copy + Default + Add<Output = T> + AddAssign + From<u8>, | ||
{ | ||
pub fn new(rows: usize, cols: usize) -> Self { | ||
Self { | ||
data: vec![T::default(); rows * cols], | ||
rows, | ||
cols, | ||
} | ||
} | ||
|
||
pub fn get(&self, row: usize, col: usize) -> Option<&T> { | ||
self.data.get(row * self.cols + col) | ||
} | ||
|
||
pub fn set(&mut self, row: usize, col: usize, value: T) -> Result<(), String> { | ||
if row < self.rows && col < self.cols { | ||
self.data[row * self.cols + col] = value; | ||
Ok(()) | ||
} else { | ||
Err(format!("Index out of bounds: row {}, col {}", row, col)) | ||
} | ||
} | ||
|
||
pub fn increment(&mut self, row: usize, col: usize) { | ||
if row < self.rows && col < self.cols { | ||
let index = row * self.cols + col; | ||
if let Some(value) = self.data.get_mut(index) { | ||
*value += 1.into(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<'a, T> Iterator for RowIterator<'a, T> | ||
where | ||
T: Copy + Default, | ||
{ | ||
type Item = &'a [T]; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
if self.current_row < self.matrix.rows { | ||
let start = self.current_row * self.matrix.cols; | ||
let end = start + self.matrix.cols; | ||
self.current_row += 1; | ||
Some(&self.matrix.data[start..end]) | ||
} else { | ||
None | ||
} | ||
} | ||
} | ||
|
||
impl<T> CountMatrix<T> | ||
where | ||
T: Copy + Default, | ||
{ | ||
pub fn iter_rows(&self) -> RowIterator<T> { | ||
RowIterator { | ||
matrix: self, | ||
current_row: 0, | ||
} | ||
} | ||
} | ||
|
||
impl<T> CountMatrix<T> | ||
where | ||
T: Copy + Default + ToString, | ||
{ | ||
pub fn write_to_file(&self, filename: &str) -> std::io::Result<()> { | ||
let file = File::create(filename)?; | ||
let mut buf_writer = BufWriter::new(GzEncoder::new(file, Compression::default())); | ||
|
||
for row in self.iter_rows() { | ||
let row_str: String = row | ||
.iter() | ||
.map(|v| v.to_string()) | ||
.collect::<Vec<String>>() | ||
.join(","); | ||
buf_writer.write_all(row_str.as_bytes())?; | ||
buf_writer.write_all(b"\n")?; // Add a newline after each row | ||
} | ||
|
||
Ok(()) | ||
} | ||
} |
Oops, something went wrong.