From a63a3901130b8e672fa2d79aa7ef7882dad470d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?= Date: Thu, 19 Dec 2024 04:58:37 -0800 Subject: [PATCH 1/7] Set pyo3 binding in pyproject.toml --- pyproject.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6339e16..ae6a18e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,4 +9,8 @@ classifier = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Topic :: Scientific/Engineering :: Bio-Informatics", -] \ No newline at end of file +] + +[tool.maturin] +bindings = "pyo3" +features = ["python"] From ae8653fe7b977539b19019cadaa4ee8363d6a782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?= Date: Thu, 19 Dec 2024 14:11:50 -0800 Subject: [PATCH 2/7] Bump `maturin` requirements --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ae6a18e..9d3703d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=0.14,<0.15"] +requires = ["maturin>=1.7,<2.0"] build-backend = "maturin" [project] From 88d177f041a43ff492f1e2b60e752de5b18e1297 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Tue, 21 Jan 2025 19:01:31 -0800 Subject: [PATCH 3/7] Add dynamic version based on Cargo to pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 9d3703d..3394db4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,6 +4,7 @@ build-backend = "maturin" [project] name = "needletail" +dynamic = ["version"] classifier = [ "Intended Audience :: Science/Research", "Programming Language :: Python :: 3", From 5bfa6239cb2c2eab4cc83da4e3725b54af4e71e5 Mon Sep 17 00:00:00 2001 From: Josh Chorlton Date: Mon, 20 Jan 2025 20:27:21 +0000 Subject: [PATCH 4/7] Use same empty-file error if compressed files are empty --- src/parser/mod.rs | 81 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9c70bf7..6075c30 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -97,7 +97,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( GZ_MAGIC => { let mut gz_reader = MultiGzDecoder::new(new_reader); let mut first = [0; 1]; - gz_reader.read_exact(&mut first)?; + gz_reader.read_exact(&mut first).map_err(|e| { + match e.kind() { + io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), + _ => e.into() + } + })?; let r = Cursor::new(first).chain(gz_reader); get_fastx_reader(r, first[0]) } @@ -105,7 +110,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( BZ_MAGIC => { let mut bz_reader = BzDecoder::new(new_reader); let mut first = [0; 1]; - bz_reader.read_exact(&mut first)?; + bz_reader.read_exact(&mut first).map_err(|e| { + match e.kind() { + io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), + _ => e.into() + } + })?; let r = Cursor::new(first).chain(bz_reader); get_fastx_reader(r, first[0]) } @@ -113,7 +123,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( XZ_MAGIC => { let mut xz_reader = XzDecoder::new(new_reader); let mut first = [0; 1]; - xz_reader.read_exact(&mut first)?; + xz_reader.read_exact(&mut first).map_err(|e| { + match e.kind() { + io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), + _ => e.into() + } + })?; let r = Cursor::new(first).chain(xz_reader); get_fastx_reader(r, first[0]) } @@ -121,7 +136,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( ZST_MAGIC => { let mut zst_reader = ZstdDecoder::new(new_reader)?; let mut first = [0; 1]; - zst_reader.read_exact(&mut first)?; + zst_reader.read_exact(&mut first).map_err(|e| { + match e.kind() { + io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), + _ => e.into() + } + })?; let r = Cursor::new(first).chain(zst_reader); get_fastx_reader(r, first[0]) } @@ -150,6 +170,12 @@ pub use utils::{Format, LineEnding}; mod test { use crate::errors::ParseErrorKind; use crate::parse_fastx_reader; + use flate2::write::GzEncoder; + use flate2::Compression as GzCompression; + use bzip2::read::BzEncoder; + use bzip2::Compression as BzCompressionn; + use liblzma::write::XzEncoder; + use zstd::stream::write::Encoder as ZstdEncoder; #[test] fn test_empty_file_raises_parser_error_of_same_kind() { @@ -172,4 +198,51 @@ mod test { let expected_err = ParseErrorKind::EmptyFile; assert_eq!(actual_err, expected_err); } + + #[test] + fn test_empty_gz_raises_empty_file_error() { + let encoder = GzEncoder::new(Vec::new(), GzCompression::default()); + let compressed_bytes = encoder.finish().unwrap(); + let actual = parse_fastx_reader(compressed_bytes.as_slice()); + assert!(actual.is_err()); + + let actual_err = actual.err().unwrap().kind; + let expected_err = ParseErrorKind::EmptyFile; + assert_eq!(actual_err, expected_err); + } + + #[test] + fn test_empty_bz_raises_empty_file_error() { + let encoder = BzEncoder::new("".as_bytes(), BzCompressionn::default()); + let actual = parse_fastx_reader(encoder); + assert!(actual.is_err()); + + let actual_err = actual.err().unwrap().kind; + let expected_err = ParseErrorKind::EmptyFile; + assert_eq!(actual_err, expected_err); + } + + #[test] + fn test_empty_xz_raises_empty_file_error() { + let encoder = XzEncoder::new(Vec::new(), 9); + let compressed_bytes = encoder.finish().unwrap(); + let actual = parse_fastx_reader(compressed_bytes.as_slice()); + assert!(actual.is_err()); + + let actual_err = actual.err().unwrap().kind; + let expected_err = ParseErrorKind::EmptyFile; + assert_eq!(actual_err, expected_err); + } + + #[test] + fn test_empty_zstd_raises_empty_file_error() { + let encoder = ZstdEncoder::new(Vec::new(), zstd::DEFAULT_COMPRESSION_LEVEL).unwrap(); + let compressed_bytes = encoder.finish().unwrap(); + let actual = parse_fastx_reader(compressed_bytes.as_slice()); + assert!(actual.is_err()); + + let actual_err = actual.err().unwrap().kind; + let expected_err = ParseErrorKind::EmptyFile; + assert_eq!(actual_err, expected_err); + } } From 81095598a1ac55c42d17b3cd4b020bf2a09c7752 Mon Sep 17 00:00:00 2001 From: Josh Chorlton Date: Mon, 20 Jan 2025 20:27:40 +0000 Subject: [PATCH 5/7] fmt --- src/parser/mod.rs | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6075c30..57e6dd2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -97,12 +97,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( GZ_MAGIC => { let mut gz_reader = MultiGzDecoder::new(new_reader); let mut first = [0; 1]; - gz_reader.read_exact(&mut first).map_err(|e| { - match e.kind() { + gz_reader + .read_exact(&mut first) + .map_err(|e| match e.kind() { io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), - _ => e.into() - } - })?; + _ => e.into(), + })?; let r = Cursor::new(first).chain(gz_reader); get_fastx_reader(r, first[0]) } @@ -110,12 +110,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( BZ_MAGIC => { let mut bz_reader = BzDecoder::new(new_reader); let mut first = [0; 1]; - bz_reader.read_exact(&mut first).map_err(|e| { - match e.kind() { + bz_reader + .read_exact(&mut first) + .map_err(|e| match e.kind() { io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), - _ => e.into() - } - })?; + _ => e.into(), + })?; let r = Cursor::new(first).chain(bz_reader); get_fastx_reader(r, first[0]) } @@ -123,12 +123,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( XZ_MAGIC => { let mut xz_reader = XzDecoder::new(new_reader); let mut first = [0; 1]; - xz_reader.read_exact(&mut first).map_err(|e| { - match e.kind() { + xz_reader + .read_exact(&mut first) + .map_err(|e| match e.kind() { io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), - _ => e.into() - } - })?; + _ => e.into(), + })?; let r = Cursor::new(first).chain(xz_reader); get_fastx_reader(r, first[0]) } @@ -136,12 +136,12 @@ pub fn parse_fastx_reader<'a, R: 'a + io::Read + Send>( ZST_MAGIC => { let mut zst_reader = ZstdDecoder::new(new_reader)?; let mut first = [0; 1]; - zst_reader.read_exact(&mut first).map_err(|e| { - match e.kind() { + zst_reader + .read_exact(&mut first) + .map_err(|e| match e.kind() { io::ErrorKind::UnexpectedEof => ParseError::new_empty_file(), - _ => e.into() - } - })?; + _ => e.into(), + })?; let r = Cursor::new(first).chain(zst_reader); get_fastx_reader(r, first[0]) } @@ -170,10 +170,10 @@ pub use utils::{Format, LineEnding}; mod test { use crate::errors::ParseErrorKind; use crate::parse_fastx_reader; - use flate2::write::GzEncoder; - use flate2::Compression as GzCompression; use bzip2::read::BzEncoder; use bzip2::Compression as BzCompressionn; + use flate2::write::GzEncoder; + use flate2::Compression as GzCompression; use liblzma::write::XzEncoder; use zstd::stream::write::Encoder as ZstdEncoder; From f19eba70271436309cdc6823631f70bec76bd663 Mon Sep 17 00:00:00 2001 From: Josh Chorlton Date: Wed, 22 Jan 2025 04:51:36 +0000 Subject: [PATCH 6/7] gate tests --- src/parser/mod.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 57e6dd2..1c8b867 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -170,11 +170,13 @@ pub use utils::{Format, LineEnding}; mod test { use crate::errors::ParseErrorKind; use crate::parse_fastx_reader; - use bzip2::read::BzEncoder; - use bzip2::Compression as BzCompressionn; - use flate2::write::GzEncoder; - use flate2::Compression as GzCompression; + #[cfg(feature = "bzip2")] + use bzip2::{read::BzEncoder, Compression as BzCompression}; + #[cfg(feature = "flate2")] + use flate2::{write::GzEncoder, Compression as GzCompression}; + #[cfg(feature = "xz2")] use liblzma::write::XzEncoder; + #[cfg(feature = "zstd")] use zstd::stream::write::Encoder as ZstdEncoder; #[test] @@ -199,6 +201,7 @@ mod test { assert_eq!(actual_err, expected_err); } + #[cfg(feature = "flate2")] #[test] fn test_empty_gz_raises_empty_file_error() { let encoder = GzEncoder::new(Vec::new(), GzCompression::default()); @@ -211,9 +214,10 @@ mod test { assert_eq!(actual_err, expected_err); } + #[cfg(feature = "bzip2")] #[test] fn test_empty_bz_raises_empty_file_error() { - let encoder = BzEncoder::new("".as_bytes(), BzCompressionn::default()); + let encoder = BzEncoder::new("".as_bytes(), BzCompression::default()); let actual = parse_fastx_reader(encoder); assert!(actual.is_err()); @@ -222,6 +226,7 @@ mod test { assert_eq!(actual_err, expected_err); } + #[cfg(feature = "xz2")] #[test] fn test_empty_xz_raises_empty_file_error() { let encoder = XzEncoder::new(Vec::new(), 9); @@ -234,6 +239,7 @@ mod test { assert_eq!(actual_err, expected_err); } + #[cfg(feature = "zstd")] #[test] fn test_empty_zstd_raises_empty_file_error() { let encoder = ZstdEncoder::new(Vec::new(), zstd::DEFAULT_COMPRESSION_LEVEL).unwrap(); From ba898bd4f6ab6bb7e6732bf964fc62c864247238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ant=C3=B4nio=20Camargo?= Date: Thu, 19 Dec 2024 12:01:34 -0800 Subject: [PATCH 7/7] Add a `__next__` method to `PyFastxReader` --- src/python.rs | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/src/python.rs b/src/python.rs index 35b2822..e6f1408 100644 --- a/src/python.rs +++ b/src/python.rs @@ -28,11 +28,20 @@ pub struct PyFastxReader { #[pymethods] impl PyFastxReader { fn __repr__(&self) -> PyResult { - Ok("".to_string()) + Ok("".to_string()) } - fn __iter__(slf: PyRefMut, py: Python<'_>) -> PyResult { - Ok(FastxReaderIterator { t: slf.into_py(py) }) + fn __iter__(slf: PyRefMut) -> PyRefMut { + slf + } + + fn __next__(mut slf: PyRefMut) -> PyResult> { + if let Some(rec) = slf.reader.next() { + let record = py_try!(rec); + Ok(Some(Record::from_sequence_record(&record))) + } else { + Ok(None) + } } } @@ -74,24 +83,6 @@ impl Record { } } -#[pyclass] -pub struct FastxReaderIterator { - t: PyObject, -} - -#[pymethods] -impl FastxReaderIterator { - fn __next__(slf: PyRef, py: Python<'_>) -> PyResult> { - let mut parser: PyRefMut = slf.t.extract(py)?; - if let Some(rec) = parser.reader.next() { - let record = py_try!(rec); - Ok(Some(Record::from_sequence_record(&record))) - } else { - Ok(None) - } - } -} - // TODO: what would be really nice is to detect the type of pyobject so it would on file object etc // not for initial release though