Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add needletail #52723

Merged
merged 17 commits into from
Dec 19, 2024
Merged
62 changes: 62 additions & 0 deletions recipes/needletail/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{% set name = "needletail" %}
{% set version = "0.6.1" %}
{% set sha256 = "58c1e04fc706060192fa2669327d45ebad1ab99fb15f73f2e040e4f8b1d051d9" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
url: https://github.com/onecodex/{{ name }}/archive/refs/tags/v{{ version }}.tar.gz
sha256: {{ sha256 }}
patches:
- patches/01-maturin-pyo3.patch

build:
number: 0
run_exports:
- {{ pin_subpackage(name|lower, max_pin="x.x") }}
script:
- {{ PYTHON }} -m pip install . --no-deps --no-build-isolation --disable-pip-version-check
- cargo-bundle-licenses --format yaml --output THIRDPARTY.yml

requirements:
build:
- python # [build_platform != target_platform]
- cross-python_{{ target_platform }} # [build_platform != target_platform]
- crossenv # [build_platform != target_platform]
- maturin >=1,<2 # [build_platform != target_platform]

This comment was marked as outdated.

- {{ compiler("c") }}
- {{ compiler("rust") }}
- {{ stdlib("c") }}
- cargo-bundle-licenses
host:
- python
- pip
- maturin >=1,<2

This comment was marked as outdated.

run:
- python

test:
files:
- test.fa
- test.fq

about:
home: https://github.com/onecodex/needletail
license: MIT
license_family: MIT
license_file:
- LICENSE
- THIRDPARTY.yml
summary: Fast FASTX parsing in Python
dev_url: https://github.com/onecodex/needletail

extra:
skip-lints:
- version_constraints_missing_whitespace # see https://github.com/bioconda/bioconda-recipes/issues/51185
recipe-maintainers:
- apcamargo
additional-platforms:
- linux-aarch64
- osx-arm64
15 changes: 15 additions & 0 deletions recipes/needletail/patches/01-maturin-pyo3.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
diff --git a/pyproject.toml b/pyproject.toml
index 6339e16..ae6a18e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,4 +9,8 @@ classifier = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering :: Bio-Informatics",
-]
\ No newline at end of file
+]
+
+[tool.maturin]
+bindings = "pyo3"
+features = ["python"]
102 changes: 102 additions & 0 deletions recipes/needletail/run_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import unittest

from needletail import (
parse_fastx_file,
parse_fastx_string,
NeedletailError,
reverse_complement,
normalize_seq,
)


FASTA_FILE, FASTQ_FILE = "test.fa", "test.fq"


class ParsingTestCase(unittest.TestCase):
def get_fasta_reader(self):
return parse_fastx_file(FASTA_FILE)

def get_fastq_reader(self):
return parse_fastx_file(FASTQ_FILE)

def test_can_parse_fasta_file(self):
for i, record in enumerate(self.get_fasta_reader()):
if i == 0:
self.assertEqual(record.id, "test")
self.assertEqual(record.seq, "AGCTGATCGA")
self.assertIsNone(record.qual)
record.normalize(iupac=False)
self.assertEqual(record.seq, "AGCTGATCGA")
self.assertTrue(record.is_fasta())
if i == 1:
self.assertEqual(record.id, "test2")
self.assertEqual(record.seq, "TAGC")
self.assertIsNone(record.qual)
record.normalize(iupac=False)
self.assertEqual(record.seq, "TAGC")
self.assertTrue(record.is_fasta())

self.assertTrue(i <= 1)

def test_can_parse_fastq_file(self):
for i, record in enumerate(self.get_fastq_reader()):
if i == 0:
self.assertEqual(record.id, "EAS54_6_R1_2_1_413_324")
self.assertEqual(record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
self.assertEqual(record.qual, ";;3;;;;;;;;;;;;7;;;;;;;88")
record.normalize(iupac=False)
self.assertEqual(record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
self.assertTrue(record.is_fastq())
if i == 1:
self.assertEqual(record.id, "EAS54_6_R1_2_1_540_792")
self.assertEqual(record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
self.assertEqual(record.qual, ";;;;;;;;;;;7;;;;;-;;;3;83")
record.normalize(iupac=False)
self.assertEqual(record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
self.assertTrue(record.is_fastq())

self.assertTrue(i <= 2)


class ParsingStrTestCase(ParsingTestCase):
def get_fasta_reader(self):
with open(FASTA_FILE) as f:
content = f.read()
return parse_fastx_string(content)

def get_fastq_reader(self):
with open(FASTQ_FILE) as f:
content = f.read()
return parse_fastx_string(content)


class MiscelleanousTestCase(unittest.TestCase):
def test_normalize_seq(self):
self.assertEqual(normalize_seq("ACGTU", iupac=False), "ACGTT")
self.assertEqual(normalize_seq("acgtu", iupac=False), "ACGTT")
self.assertEqual(normalize_seq("N.N-N~N N", iupac=False), "N-N-N-NN")
self.assertEqual(normalize_seq("BDHVRYSWKM", iupac=True), "BDHVRYSWKM")
self.assertEqual(normalize_seq("bdhvryswkm", iupac=True), "BDHVRYSWKM")

def test_reverse_complement(self):
self.assertEqual(reverse_complement("a"), "t")
self.assertEqual(reverse_complement("c"), "g")
self.assertEqual(reverse_complement("g"), "c")
self.assertEqual(reverse_complement("n"), "n")

self.assertEqual(reverse_complement("atcg"), "cgat")


class ErroringTestCase(unittest.TestCase):
def test_file_not_found(self):
with self.assertRaises(NeedletailError):
parse_fastx_file("hey")

def test_invalid_record(self):
with self.assertRaises(NeedletailError):
for i, record in enumerate(parse_fastx_string("Not a valid file")):
print(i)


if __name__ == "__main__":
unittest.main()
4 changes: 4 additions & 0 deletions recipes/needletail/test.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>test
AGCTGATCGA
>test2
TAGC
12 changes: 12 additions & 0 deletions recipes/needletail/test.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@EAS54_6_R1_2_1_413_324
CCCTTCTTGTCTTCAGCGTTTCTCC
+
;;3;;;;;;;;;;;;7;;;;;;;88
@EAS54_6_R1_2_1_540_792
TTGGCAGGCCAAGGCCGATGGATCA
+
;;;;;;;;;;;7;;;;;-;;;3;83
@EAS54_6_R1_2_1_443_348
GTTGCTTCTGGCGTGGGTGGGGGGG
+
;;;;;;;;;;;9;7;;.7;393333