Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dunder methods to the Python Record class #91

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7a46cfb
Add dunder methods for the Python Record class
apcamargo Dec 19, 2024
afe778c
Improve `Record.__repr__()`
apcamargo Dec 19, 2024
19a6040
Add a constructor to `Record`
apcamargo Dec 19, 2024
3497b4e
Expose the `Record` class
apcamargo Dec 19, 2024
c417c3d
Turn `is_fasta` and `is_fastq` into properties
apcamargo Dec 19, 2024
786d6b8
Refactor snippet generation
apcamargo Dec 20, 2024
b39ab08
Add the `name` and `description` properties
apcamargo Dec 20, 2024
3f348ec
Condense long strings in Record's __repr__
apcamargo Dec 20, 2024
a4687e8
Shorten Record's __repr__
apcamargo Dec 20, 2024
ce44c50
Do not wrap sequences in __str__
apcamargo Jan 22, 2025
3770636
Add include a newline at the end of FASTX strings
apcamargo Jan 22, 2025
9cdf8e5
Make is_fasta and is_fastq regular methods again
apcamargo Jan 22, 2025
95e2553
Merge branch 'onecodex:master' into dunder-methods
apcamargo Jan 23, 2025
01fe91e
Add docstrings to the Python classes and functions
apcamargo Jan 23, 2025
78d8a7f
Add a to-do list to the beggining of the file
apcamargo Jan 23, 2025
7cd2c47
Add item to to-do list
apcamargo Jan 23, 2025
b900fb8
Fix indentation in `normalize_seq` docstring
apcamargo Jan 24, 2025
eaba5b1
Update `parse_fastx_file` to accept `pathlib.Path` objects
apcamargo Jan 24, 2025
4fcf73b
Set the default of valuehe iupac parameter to False
apcamargo Jan 24, 2025
89d94f2
Add new task to to-do
apcamargo Jan 24, 2025
1e8b7f4
Update Python tests
apcamargo Jan 24, 2025
4de47d9
Fix linting issue
apcamargo Jan 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 98 additions & 6 deletions src/python.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
//! Python bindings for needletail

use std::io::Cursor;

use pyo3::prelude::*;
use pyo3::{create_exception, wrap_pyfunction};

use crate::sequence::{complement, normalize};
use crate::{
parse_fastx_file as rs_parse_fastx_file, parse_fastx_reader, parser::SequenceRecord,
FastxReader,
};

use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::{create_exception, wrap_pyfunction};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::Cursor;

create_exception!(needletail, NeedletailError, pyo3::exceptions::PyException);

// Avoid some boilerplate with the error handling
Expand All @@ -25,6 +26,16 @@ pub struct PyFastxReader {
reader: Box<dyn FastxReader>,
}

fn get_seq_snippet(seq: &str, max_len: usize) -> String {
if seq.len() > max_len {
let start = &seq[..max_len - 4];
let end = &seq[seq.len() - 3..];
format!("{}…{}", start, end)
} else {
seq.to_string()
}
}

#[pymethods]
impl PyFastxReader {
fn __repr__(&self) -> PyResult<String> {
Expand Down Expand Up @@ -67,6 +78,24 @@ impl Record {

#[pymethods]
impl Record {
#[getter]
pub fn name(&self) -> PyResult<&str> {
if let Some(pos) = self.id.find(char::is_whitespace) {
Ok(&self.id[..pos])
} else {
Ok(&self.id)
}
}

#[getter]
pub fn description(&self) -> PyResult<Option<&str>> {
if let Some(pos) = self.id.find(char::is_whitespace) {
Ok(Some(&self.id[pos..].trim_start()))
} else {
Ok(None)
}
}

pub fn is_fasta(&self) -> PyResult<bool> {
apcamargo marked this conversation as resolved.
Show resolved Hide resolved
Ok(self.qual.is_none())
}
Expand All @@ -81,6 +110,69 @@ impl Record {
}
Ok(())
}

#[new]
#[pyo3(signature = (id, seq, qual=None))]
fn new(id: String, seq: String, qual: Option<String>) -> PyResult<Record> {
// If `qual` is not None, check if it has the same length as `seq`
if let Some(qual) = &qual {
if qual.len() != seq.len() {
return Err(PyValueError::new_err(
"Sequence and quality strings must have the same length",
));
}
}
Ok(Record { id, seq, qual })
}

pub fn __hash__(&self) -> PyResult<u64> {
let mut hasher = DefaultHasher::new();
self.id.hash(&mut hasher);
self.seq.hash(&mut hasher);
match &self.qual {
Some(qual) => qual.hash(&mut hasher),
None => {}
}
Ok(hasher.finish())
}

pub fn __eq__(&self, other: &Record) -> PyResult<bool> {
Ok(self.id == other.id && self.seq == other.seq && self.qual == other.qual)
}

pub fn __len__(&self) -> PyResult<usize> {
Ok(self.seq.len())
}

pub fn __str__(&self) -> PyResult<String> {
if self.qual.is_none() {
Ok(format!(">{}\n{}\n", self.id, self.seq))
} else {
Ok(format!(
"@{}\n{}\n+\n{}\n",
self.id,
self.seq,
self.qual.clone().unwrap()
))
}
}

fn __repr__(&self) -> PyResult<String> {
let id_snippet = match self.name() {
Ok(name) if name != self.id => format!("{}…", name),
Ok(name) => name.to_string(),
Err(_) => self.id.clone(),
};
let seq_snippet = get_seq_snippet(&self.seq, 25);
let quality_snippet = match &self.qual {
Some(qual) => get_seq_snippet(qual, 25),
None => "None".to_string(),
};
Ok(format!(
"Record(id={}, seq={}, qual={})",
id_snippet, seq_snippet, quality_snippet
))
}
}

// TODO: what would be really nice is to detect the type of pyobject so it would on file object etc
Expand Down Expand Up @@ -121,11 +213,11 @@ pub fn reverse_complement(seq: &str) -> String {
#[pymodule]
fn needletail(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyFastxReader>()?;
m.add_class::<Record>()?;
m.add_wrapped(wrap_pyfunction!(parse_fastx_file))?;
m.add_wrapped(wrap_pyfunction!(parse_fastx_string))?;
m.add_wrapped(wrap_pyfunction!(normalize_seq))?;
m.add_wrapped(wrap_pyfunction!(reverse_complement))?;
m.add("NeedletailError", py.get_type_bound::<NeedletailError>())?;

Ok(())
}
Loading