Skip to content

Commit

Permalink
Add needletail (#52723)
Browse files Browse the repository at this point in the history
* Add needletail

* Add test file

* Add test files

* Include test files in meta.yaml

* Include test files in meta.yaml

* Fix test file

* Build with pip

* Add cffi requirement

* Remove ARM builds

* Rename test.py to run_test.py

* remove obselete test commands

* copy orjson recipe

* Delete recipes/needletail/build.sh

* skip version constraint lint

* use correct key for skip and maintainer

* set bindings and features for maturin

* add ARM builds

---------

Co-authored-by: Michael Hall <[email protected]>
Co-authored-by: Luiz Irber <[email protected]>
  • Loading branch information
3 people authored Dec 19, 2024
1 parent 5815cf1 commit c779ff8
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 0 deletions.
62 changes: 62 additions & 0 deletions recipes/needletail/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{% set name = "needletail" %}
{% set version = "0.6.1" %}
{% set sha256 = "58c1e04fc706060192fa2669327d45ebad1ab99fb15f73f2e040e4f8b1d051d9" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
url: https://github.com/onecodex/{{ name }}/archive/refs/tags/v{{ version }}.tar.gz
sha256: {{ sha256 }}
patches:
- patches/01-maturin-pyo3.patch

build:
number: 0
run_exports:
- {{ pin_subpackage(name|lower, max_pin="x.x") }}
script:
- {{ PYTHON }} -m pip install . --no-deps --no-build-isolation --disable-pip-version-check
- cargo-bundle-licenses --format yaml --output THIRDPARTY.yml

requirements:
build:
- python # [build_platform != target_platform]
- cross-python_{{ target_platform }} # [build_platform != target_platform]
- crossenv # [build_platform != target_platform]
- maturin >=1,<2 # [build_platform != target_platform]
- {{ compiler("c") }}
- {{ compiler("rust") }}
- {{ stdlib("c") }}
- cargo-bundle-licenses
host:
- python
- pip
- maturin >=1,<2
run:
- python

test:
files:
- test.fa
- test.fq

about:
home: https://github.com/onecodex/needletail
license: MIT
license_family: MIT
license_file:
- LICENSE
- THIRDPARTY.yml
summary: Fast FASTX parsing in Python
dev_url: https://github.com/onecodex/needletail

extra:
skip-lints:
- version_constraints_missing_whitespace # see https://github.com/bioconda/bioconda-recipes/issues/51185
recipe-maintainers:
- apcamargo
additional-platforms:
- linux-aarch64
- osx-arm64
15 changes: 15 additions & 0 deletions recipes/needletail/patches/01-maturin-pyo3.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
diff --git a/pyproject.toml b/pyproject.toml
index 6339e16..ae6a18e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,4 +9,8 @@ classifier = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering :: Bio-Informatics",
-]
\ No newline at end of file
+]
+
+[tool.maturin]
+bindings = "pyo3"
+features = ["python"]
102 changes: 102 additions & 0 deletions recipes/needletail/run_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import unittest

from needletail import (
parse_fastx_file,
parse_fastx_string,
NeedletailError,
reverse_complement,
normalize_seq,
)


FASTA_FILE, FASTQ_FILE = "test.fa", "test.fq"


class ParsingTestCase(unittest.TestCase):
def get_fasta_reader(self):
return parse_fastx_file(FASTA_FILE)

def get_fastq_reader(self):
return parse_fastx_file(FASTQ_FILE)

def test_can_parse_fasta_file(self):
for i, record in enumerate(self.get_fasta_reader()):
if i == 0:
self.assertEqual(record.id, "test")
self.assertEqual(record.seq, "AGCTGATCGA")
self.assertIsNone(record.qual)
record.normalize(iupac=False)
self.assertEqual(record.seq, "AGCTGATCGA")
self.assertTrue(record.is_fasta())
if i == 1:
self.assertEqual(record.id, "test2")
self.assertEqual(record.seq, "TAGC")
self.assertIsNone(record.qual)
record.normalize(iupac=False)
self.assertEqual(record.seq, "TAGC")
self.assertTrue(record.is_fasta())

self.assertTrue(i <= 1)

def test_can_parse_fastq_file(self):
for i, record in enumerate(self.get_fastq_reader()):
if i == 0:
self.assertEqual(record.id, "EAS54_6_R1_2_1_413_324")
self.assertEqual(record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
self.assertEqual(record.qual, ";;3;;;;;;;;;;;;7;;;;;;;88")
record.normalize(iupac=False)
self.assertEqual(record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
self.assertTrue(record.is_fastq())
if i == 1:
self.assertEqual(record.id, "EAS54_6_R1_2_1_540_792")
self.assertEqual(record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
self.assertEqual(record.qual, ";;;;;;;;;;;7;;;;;-;;;3;83")
record.normalize(iupac=False)
self.assertEqual(record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
self.assertTrue(record.is_fastq())

self.assertTrue(i <= 2)


class ParsingStrTestCase(ParsingTestCase):
def get_fasta_reader(self):
with open(FASTA_FILE) as f:
content = f.read()
return parse_fastx_string(content)

def get_fastq_reader(self):
with open(FASTQ_FILE) as f:
content = f.read()
return parse_fastx_string(content)


class MiscelleanousTestCase(unittest.TestCase):
def test_normalize_seq(self):
self.assertEqual(normalize_seq("ACGTU", iupac=False), "ACGTT")
self.assertEqual(normalize_seq("acgtu", iupac=False), "ACGTT")
self.assertEqual(normalize_seq("N.N-N~N N", iupac=False), "N-N-N-NN")
self.assertEqual(normalize_seq("BDHVRYSWKM", iupac=True), "BDHVRYSWKM")
self.assertEqual(normalize_seq("bdhvryswkm", iupac=True), "BDHVRYSWKM")

def test_reverse_complement(self):
self.assertEqual(reverse_complement("a"), "t")
self.assertEqual(reverse_complement("c"), "g")
self.assertEqual(reverse_complement("g"), "c")
self.assertEqual(reverse_complement("n"), "n")

self.assertEqual(reverse_complement("atcg"), "cgat")


class ErroringTestCase(unittest.TestCase):
def test_file_not_found(self):
with self.assertRaises(NeedletailError):
parse_fastx_file("hey")

def test_invalid_record(self):
with self.assertRaises(NeedletailError):
for i, record in enumerate(parse_fastx_string("Not a valid file")):
print(i)


if __name__ == "__main__":
unittest.main()
4 changes: 4 additions & 0 deletions recipes/needletail/test.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>test
AGCTGATCGA
>test2
TAGC
12 changes: 12 additions & 0 deletions recipes/needletail/test.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@EAS54_6_R1_2_1_413_324
CCCTTCTTGTCTTCAGCGTTTCTCC
+
;;3;;;;;;;;;;;;7;;;;;;;88
@EAS54_6_R1_2_1_540_792
TTGGCAGGCCAAGGCCGATGGATCA
+
;;;;;;;;;;;7;;;;;-;;;3;83
@EAS54_6_R1_2_1_443_348
GTTGCTTCTGGCGTGGGTGGGGGGG
+
;;;;;;;;;;;9;7;;.7;393333

0 comments on commit c779ff8

Please sign in to comment.