-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Basic iterator trait, range pushing methods, to coitrees, and more!
- tests and test utilies - rough join stuff - interval/range conversion methods - new GRanges methods refined - iteration methods - read_seqlens - PathBuf-based arguments in io - lazy BED parsing - invalid BED tests - adjust range operation with tests - clippy & fmt - GitHub Rust workflow added
- Loading branch information
Showing
25 changed files
with
1,529 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: Rust | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
env: | ||
CARGO_TERM_COLOR: always | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Build | ||
run: cargo build --verbose | ||
- name: Run tests | ||
run: cargo test --verbose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,33 @@ | ||
[package] | ||
name = "granges2" | ||
name = "granges" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
license = "MIT" | ||
authors = ["Vince Buffalo <[email protected]>"] | ||
keywords = ["genomics", "bioinformatics", "compbio"] | ||
categories = ["science"] | ||
documentation = "https://docs.rs/granges/" | ||
repository = "https://github.com/vsbuffalo/granges" | ||
description = "A Rust library and command line tool for genomic range operations." | ||
|
||
[dependencies] | ||
# clap = { version = "4.4.18", features = ["derive"], optional = true } | ||
clap = { version = "4.4.18", features = ["derive"] } | ||
coitrees = { version = "0.4.0", features = ["nosimd"] } | ||
genomap = "0.1.5" | ||
flate2 = "1.0.28" | ||
genomap = "0.2.6" | ||
indexmap = "2.2.3" | ||
ndarray = "0.15.6" | ||
noodles = { version = "0.63.0", features = ["core", "bed"] } | ||
rand = "0.8.5" | ||
thiserror = "1.0.57" | ||
|
||
# [features] | ||
# cli = [ "clap" ] | ||
|
||
[[bin]] | ||
name = "granges" | ||
path = "src/main/mod.rs" | ||
# required-features = ["cli"] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
//! Data container implementations. | ||
pub mod vec; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
//! Data container implementations for [`ndarray::Array1`] and [`ndarray::Array2`]. | ||
use ndarray::{Array1, Array2, ArrayView1}; | ||
use crate::traits::IndexedDataContainer; | ||
|
||
impl<'a, U> IndexedDataContainer<'a> for Array1<U> | ||
where | ||
U: Copy + Default + 'a, | ||
{ | ||
type Item = U; | ||
type Output = Array1<U>; | ||
|
||
fn get_value(&'a self, index: usize) -> Self::Item { | ||
self[index] | ||
} | ||
|
||
fn len(&self) -> usize { | ||
self.len() | ||
} | ||
|
||
fn is_valid_index(&self, index: usize) -> bool { | ||
index < self.shape()[0] | ||
} | ||
|
||
fn new_from_indices(&self, indices: &[usize]) -> Self::Output { | ||
Array1::from_iter(indices.iter().map(|&idx| self.get_value(idx))) | ||
} | ||
} | ||
|
||
impl<'a, U> IndexedDataContainer<'a> for Array2<U> | ||
where | ||
U: Copy + Default + 'a, | ||
{ | ||
type Item = ArrayView1<'a, U>; | ||
type Output = Array2<U>; | ||
|
||
fn get_value(&'a self, index: usize) -> Self::Item { | ||
self.row(index) | ||
} | ||
|
||
fn len(&self) -> usize { | ||
self.shape()[0] | ||
} | ||
|
||
fn is_valid_index(&self, index: usize) -> bool { | ||
index < self.shape()[0] | ||
} | ||
|
||
fn new_from_indices(&self, indices: &[usize]) -> Self::Output { | ||
let cols = self.shape()[1]; | ||
|
||
let rows_data: Vec<U> = indices | ||
.iter() | ||
.flat_map(|&idx| self.row(idx).iter().cloned().collect::<Vec<_>>()) | ||
.collect(); | ||
|
||
// create a new Array2<U> from the rows | ||
// shape is (number of indices, number of columns) | ||
Array2::from_shape_vec((indices.len(), cols), rows_data) | ||
.expect("Shape and collected data size mismatch") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//! Data container implementations for [`Vec<U>`]. | ||
|
||
/// Trait methods for the commonly-used `Vec<U>` data container. | ||
/// | ||
/// Note that the associated `Item` type is always a *reference* to the data elements. | ||
impl<'a, U> IndexedDataContainer<'a> for Vec<U> | ||
where | ||
U: Clone + 'a, | ||
{ | ||
type Item = &'a U; | ||
type Output = Vec<U>; | ||
|
||
fn get_value(&'a self, index: usize) -> Self::Item { | ||
self.get(index).unwrap() | ||
} | ||
|
||
fn len(&self) -> usize { | ||
self.len() | ||
} | ||
|
||
fn is_valid_index(&self, index: usize) -> bool { | ||
self.get(index).is_some() | ||
} | ||
|
||
fn new_from_indices(&self, indices: &[usize]) -> Self::Output { | ||
Vec::from_iter(indices.iter().map(|&idx| (*self.get_value(idx)).clone())) | ||
} | ||
} | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,33 @@ | ||
use std::num::{ParseIntError, ParseFloatError}; | ||
|
||
use genomap::GenomeMapError; | ||
use thiserror::Error; | ||
|
||
use crate::Position; | ||
|
||
#[derive(Debug, Error)] | ||
pub enum GRangesError { | ||
// IO related errors | ||
#[error("File reading eror: {0}")] | ||
IOError(#[from] std::io::Error), | ||
|
||
// File parsing related errors | ||
#[error("Integer parsing error: {0}")] | ||
ParseIntError(#[from] ParseIntError), | ||
#[error("Float parsing error: {0}")] | ||
ParseFloatError(#[from] ParseFloatError), | ||
#[error("Bed-like file has too few columns. The first three columns must be sequence name, and start and end positions.\nLine: {0}")] | ||
BedlikeTooFewColumns(String), | ||
#[error("File has invalid column type entry: {0}")] | ||
InvalidColumnType(String), | ||
|
||
// Invalid genomic range errors | ||
#[error("Range invalid: start ({0}) must be greater than end ({1})")] | ||
InvalidGenomicRange(Position, Position), | ||
|
||
#[error("Range [{0}, {1}] is invalid for sequence of length {2}")] | ||
InvalidGenomicRangeForSequence(Position, Position, Position), | ||
|
||
#[error("Sequence name '{0}' is not the ranges container")] | ||
MissingSequence(String), | ||
#[error("Error encountered in genomap::GenomeMap")] | ||
GenomeMapError(#[from] GenomeMapError), | ||
} |
Oops, something went wrong.