Skip to content

Commit

Permalink
Merge pull request #58 from databio/digests
Browse files Browse the repository at this point in the history
add ga4gh refget digest functionality
  • Loading branch information
nsheff authored Dec 20, 2024
2 parents a05b2ed + 86ffa77 commit 47b6316
Show file tree
Hide file tree
Showing 11 changed files with 311 additions and 8 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

This repo is organized like so:

1. A rust library crate (`/gtars/lib.rs`) that provides functions, traits, and structs for working with genomic interval data.
2. A rust binary crate (in `/gtars/main.rs`), a small, wrapper command-line interface for the library crate.
3. A rust crate (in `/bindings`) that provides Python bindings, and a resulting Python package, so that it can be used within Python.
1. The main gtars rust package in `/gtars`, which contains two crates:
1a. A rust library crate (`/gtars/lib.rs`) that provides functions, traits, and structs for working with genomic interval data.
1b. A rust binary crate (in `/gtars/main.rs`), a small, wrapper command-line interface for the library crate.
2. Python bindings (in `/bindings/python`), which consists of a rust package with a library crate (no binary crate) and Python package.
3. R bindings (in `/bindinds/r`), which consists of an R package.

This repository is a work in progress, and still in early development.

Expand Down
19 changes: 18 additions & 1 deletion bindings/python/README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,35 @@
# gtars

This is a python wrapper around the `gtars` crate. It provides an easy interface for using `gtars` in python. It is currently in early development, and as such, it does not have a lot of functionality yet, but new tools are being worked on right now.

## Installation

You can get `gtars` from PyPI:

```bash
pip install gtars
```

## Usage

Import the package, and use the tools:
```python
import gtars as gt

gt.prune_universe(...)
```
## Developer docs
Write the develop docs here...

To build for development:

```bash
cd bindings/python
maturin build --release
```

Then install the local wheel that was just built:

```
version=`grep '^version =' Cargo.toml | cut -d '"' -f 2`
pip install --force-reinstall target/wheels/gtars-${version}-cp312-cp312-manylinux_2_38_x86_64.whl
```
1 change: 1 addition & 0 deletions bindings/python/gtars/digests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .gtars.digests import * # noqa: F403
71 changes: 71 additions & 0 deletions bindings/python/src/digests/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// This is intended to provide minimal Python bindings to functions in the `digests` module of the `gtars` crate.

use pyo3::prelude::*;
use gtars::digests::{sha512t24u, md5, DigestResult};

#[pyfunction]
pub fn sha512t24u_digest(readable: &str) -> String {
return sha512t24u(readable);
}

#[pyfunction]
pub fn md5_digest(readable: &str) -> String {
return md5(readable);
}

#[pyfunction]
pub fn digest_fasta(fasta: &str) -> PyResult<Vec<PyDigestResult>> {
match gtars::digests::digest_fasta(fasta) {
Ok(digest_results) => {
let py_digest_results: Vec<PyDigestResult> = digest_results.into_iter().map(PyDigestResult::from).collect();
Ok(py_digest_results)
},
Err(e) => Err(PyErr::new::<pyo3::exceptions::PyIOError, _>(format!("Error processing FASTA file: {}", e))),
}
}

#[pyclass]
#[pyo3(name="DigestResult")]
pub struct PyDigestResult {
#[pyo3(get,set)]
pub id: String,
#[pyo3(get,set)]
pub length: usize,
#[pyo3(get,set)]
pub sha512t24u: String,
#[pyo3(get,set)]
pub md5: String
}

#[pymethods]
impl PyDigestResult {
fn __repr__(&self) -> String {
format!("<DigestResult for {}>", self.id)
}

fn __str__(&self) -> PyResult<String> {
Ok(format!("DigestResult for sequence {}\n length: {}\n sha512t24u: {}\n md5: {}", self.id, self.length, self.sha512t24u, self.md5))
}
}

impl From<DigestResult> for PyDigestResult {
fn from(value: DigestResult) -> Self {
PyDigestResult {
id: value.id,
length: value.length,
sha512t24u: value.sha512t24u,
md5: value.md5
}
}
}

// This represents the Python module to be created
#[pymodule]
pub fn digests(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sha512t24u_digest, m)?)?;
m.add_function(wrap_pyfunction!(md5_digest, m)?)?;
m.add_function(wrap_pyfunction!(digest_fasta, m)?)?;
m.add_class::<PyDigestResult>()?;
Ok(())
}

4 changes: 4 additions & 0 deletions bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod ailist;
mod models;
mod tokenizers;
mod utils;
mod digests;

pub const VERSION: &str = env!("CARGO_PKG_VERSION");

Expand All @@ -14,11 +15,13 @@ fn gtars(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
let ailist_module = pyo3::wrap_pymodule!(ailist::ailist);
let utils_module = pyo3::wrap_pymodule!(utils::utils);
let models_module = pyo3::wrap_pymodule!(models::models);
let digests_module = pyo3::wrap_pymodule!(digests::digests);

m.add_wrapped(tokenize_module)?;
m.add_wrapped(ailist_module)?;
m.add_wrapped(utils_module)?;
m.add_wrapped(models_module)?;
m.add_wrapped(digests_module)?;

let sys = PyModule::import_bound(py, "sys")?;
let binding = sys.getattr("modules")?;
Expand All @@ -29,6 +32,7 @@ fn gtars(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
sys_modules.set_item("gtars.ailist", m.getattr("ailist")?)?;
sys_modules.set_item("gtars.utils", m.getattr("utils")?)?;
sys_modules.set_item("gtars.models", m.getattr("models")?)?;
sys_modules.set_item("gtars.digests", m.getattr("digests")?)?;

// add constants
m.add("__version__", VERSION)?;
Expand Down
5 changes: 4 additions & 1 deletion gtars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ bigtools = "0.5.4"
tokio = "1.40.0"
os_pipe = "1.2.1"
glob = "0.3.1"

base64-url = "2.0.0"
sha2 = "0.10.7"
md-5 = "0.10.5"
seq_io = "0.3.2"


[dev-dependencies]
Expand Down
26 changes: 23 additions & 3 deletions gtars/src/common/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,25 @@ use std::io::{BufRead, BufReader};
use std::path::Path;

use anyhow::{Context, Result};
use flate2::read::GzDecoder;
use flate2::read::MultiGzDecoder;
use rust_lapper::{Interval, Lapper};

use crate::common::models::region::Region;
use crate::common::models::universe::Universe;

///
/// Function to return a reader for either a gzip'd or non-gzip'd file.
/// Get a reader for either a gzip'd or non-gzip'd file.
///
/// # Arguments
///
/// - path: path to the file to read
///
pub fn get_dynamic_reader(path: &Path) -> Result<BufReader<Box<dyn Read>>> {
let is_gzipped = path.extension() == Some(OsStr::new("gz"));
let file = File::open(path).with_context(|| "Failed to open bed file.")?;

let file: Box<dyn Read> = match is_gzipped {
true => Box::new(GzDecoder::new(file)),
true => Box::new(MultiGzDecoder::new(file)),
false => Box::new(file),
};

Expand All @@ -32,6 +33,25 @@ pub fn get_dynamic_reader(path: &Path) -> Result<BufReader<Box<dyn Read>>> {
Ok(reader)
}

/// Get a reader for either a gzipped, non-gzipped file, or stdin
///
/// # Arguments
///
/// - file_path: path to the file to read, or '-' for stdin
///
/// # Returns
///
/// A `BufReader` object for a given file path or stdin.
pub fn get_dynamic_reader_w_stdin(file_path_str: &str) -> Result<BufReader<Box<dyn Read>>> {
if file_path_str == "-" {
Ok(BufReader::new(Box::new(std::io::stdin()) as Box<dyn Read>))

Check warning on line 47 in gtars/src/common/utils.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/common/utils.rs#L45-L47

Added lines #L45 - L47 were not covered by tests
} else {
let file_path = Path::new(file_path_str);
return get_dynamic_reader(&file_path);

Check warning on line 50 in gtars/src/common/utils.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/common/utils.rs#L49-L50

Added lines #L49 - L50 were not covered by tests
}
}

Check warning on line 52 in gtars/src/common/utils.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/common/utils.rs#L52

Added line #L52 was not covered by tests


///
/// Create a region-to-id hash-map from a list of regions
///
Expand Down
Loading

0 comments on commit 47b6316

Please sign in to comment.