Skip to content

Commit

Permalink
Merge branch 'onecodex:master' into dunder-methods
Browse files Browse the repository at this point in the history
  • Loading branch information
apcamargo authored Jan 23, 2025
2 parents 9cdf8e5 + ba898bd commit 95e2553
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 27 deletions.
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
[build-system]
requires = ["maturin>=0.14,<0.15"]
requires = ["maturin>=1.7,<2.0"]
build-backend = "maturin"

[project]
name = "needletail"
dynamic = ["version"]
classifier = [
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
]

[tool.maturin]
bindings = "pyo3"
features = ["python"]
87 changes: 83 additions & 4 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,31 +97,51 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>(
GZ_MAGIC => {
let mut gz_reader = MultiGzDecoder::new(new_reader);
let mut first = [0; 1];
gz_reader.read_exact(&mut first)?;
gz_reader
.read_exact(&mut first)
.map_err(|e| match e.kind() {
io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(),
_ => e.into(),
})?;
let r = Cursor::new(first).chain(gz_reader);
get_fastx_reader(r, first[0])
}
#[cfg(feature = "bzip2")]
BZ_MAGIC => {
let mut bz_reader = BzDecoder::new(new_reader);
let mut first = [0; 1];
bz_reader.read_exact(&mut first)?;
bz_reader
.read_exact(&mut first)
.map_err(|e| match e.kind() {
io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(),
_ => e.into(),
})?;
let r = Cursor::new(first).chain(bz_reader);
get_fastx_reader(r, first[0])
}
#[cfg(feature = "xz2")]
XZ_MAGIC => {
let mut xz_reader = XzDecoder::new(new_reader);
let mut first = [0; 1];
xz_reader.read_exact(&mut first)?;
xz_reader
.read_exact(&mut first)
.map_err(|e| match e.kind() {
io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(),
_ => e.into(),
})?;
let r = Cursor::new(first).chain(xz_reader);
get_fastx_reader(r, first[0])
}
#[cfg(feature = "zstd")]
ZST_MAGIC => {
let mut zst_reader = ZstdDecoder::new(new_reader)?;
let mut first = [0; 1];
zst_reader.read_exact(&mut first)?;
zst_reader
.read_exact(&mut first)
.map_err(|e| match e.kind() {
io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(),
_ => e.into(),
})?;
let r = Cursor::new(first).chain(zst_reader);
get_fastx_reader(r, first[0])
}
Expand Down Expand Up @@ -150,6 +170,14 @@ pub use utils::{Format, LineEnding};
mod test {
use crate::errors::ParseErrorKind;
use crate::parse_fastx_reader;
#[cfg(feature = "bzip2")]
use bzip2::{read::BzEncoder, Compression as BzCompression};
#[cfg(feature = "flate2")]
use flate2::{write::GzEncoder, Compression as GzCompression};
#[cfg(feature = "xz2")]
use liblzma::write::XzEncoder;
#[cfg(feature = "zstd")]
use zstd::stream::write::Encoder as ZstdEncoder;

#[test]
fn test_empty_file_raises_parser_error_of_same_kind() {
Expand All @@ -172,4 +200,55 @@ mod test {
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}

#[cfg(feature = "flate2")]
#[test]
fn test_empty_gz_raises_empty_file_error() {
let encoder = GzEncoder::new(Vec::new(), GzCompression::default());
let compressed_bytes = encoder.finish().unwrap();
let actual = parse_fastx_reader(compressed_bytes.as_slice());
assert!(actual.is_err());

let actual_err = actual.err().unwrap().kind;
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}

#[cfg(feature = "bzip2")]
#[test]
fn test_empty_bz_raises_empty_file_error() {
let encoder = BzEncoder::new("".as_bytes(), BzCompression::default());
let actual = parse_fastx_reader(encoder);
assert!(actual.is_err());

let actual_err = actual.err().unwrap().kind;
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}

#[cfg(feature = "xz2")]
#[test]
fn test_empty_xz_raises_empty_file_error() {
let encoder = XzEncoder::new(Vec::new(), 9);
let compressed_bytes = encoder.finish().unwrap();
let actual = parse_fastx_reader(compressed_bytes.as_slice());
assert!(actual.is_err());

let actual_err = actual.err().unwrap().kind;
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}

#[cfg(feature = "zstd")]
#[test]
fn test_empty_zstd_raises_empty_file_error() {
let encoder = ZstdEncoder::new(Vec::new(), zstd::DEFAULT_COMPRESSION_LEVEL).unwrap();
let compressed_bytes = encoder.finish().unwrap();
let actual = parse_fastx_reader(compressed_bytes.as_slice());
assert!(actual.is_err());

let actual_err = actual.err().unwrap().kind;
let expected_err = ParseErrorKind::EmptyFile;
assert_eq!(actual_err, expected_err);
}
}
33 changes: 12 additions & 21 deletions src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,20 @@ fn get_seq_snippet(seq: &str, max_len: usize) -> String {
#[pymethods]
impl PyFastxReader {
fn __repr__(&self) -> PyResult<String> {
Ok("<FastxParser>".to_string())
Ok("<FastxReader>".to_string())
}

fn __iter__(slf: PyRefMut<Self>, py: Python<'_>) -> PyResult<FastxReaderIterator> {
Ok(FastxReaderIterator { t: slf.into_py(py) })
fn __iter__(slf: PyRefMut<Self>) -> PyRefMut<Self> {
slf
}

fn __next__(mut slf: PyRefMut<Self>) -> PyResult<Option<Record>> {
if let Some(rec) = slf.reader.next() {
let record = py_try!(rec);
Ok(Some(Record::from_sequence_record(&record)))
} else {
Ok(None)
}
}
}

Expand Down Expand Up @@ -166,24 +175,6 @@ impl Record {
}
}

#[pyclass]
pub struct FastxReaderIterator {
t: PyObject,
}

#[pymethods]
impl FastxReaderIterator {
fn __next__(slf: PyRef<Self>, py: Python<'_>) -> PyResult<Option<Record>> {
let mut parser: PyRefMut<PyFastxReader> = slf.t.extract(py)?;
if let Some(rec) = parser.reader.next() {
let record = py_try!(rec);
Ok(Some(Record::from_sequence_record(&record)))
} else {
Ok(None)
}
}
}

// TODO: what would be really nice is to detect the type of pyobject so it would on file object etc
// not for initial release though

Expand Down

0 comments on commit 95e2553

Please sign in to comment.