From 13a931fd9a61eeba4744c14ed7188f45a8bee918 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 11 Jun 2021 23:37:59 +0200 Subject: [PATCH 01/68] adapt all methods of align to support empty component --- lib/bx/align/core.py | 52 +++++++++++++++++++++++-------- lib/bx/align/maf_tests.py | 65 ++++++++++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 19 deletions(-) diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index d565e610..b42d3421 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -108,9 +108,6 @@ def get_component_by_src_start(self, src): def slice(self, start, end): new = Alignment(score=self.score, attributes=self.attributes) for component in self.components: - # FIXME: Is this the right solution? - if component.empty: - continue new.components.append(component.slice(start, end)) new.text_size = end - start return new @@ -149,8 +146,11 @@ def slice_by_component(self, component_index, start, end): return self.slice(start_col, end_col) def column_iter(self): + # FIXME: The empty component are not present + # in column_iter. + # Maybe it would be good to use - and = for i in range(self.text_size): - yield [c.text[i] for c in self.components] + yield [c.text[i] for c in self.components if not c.empty] def limit_to_species(self, species): new = Alignment(score=self.score, attributes=self.attributes) @@ -167,6 +167,8 @@ def remove_all_gap_columns(self): """ seqs = [] for c in self.components: + if c.empty: + seqs.append(None) try: seqs.append(list(c.text)) except TypeError: @@ -236,7 +238,7 @@ def __init__(self, src='', start=0, size=0, strand=None, src_size=None, text='') self.synteny_right = None self.synteny_empty = None # If true, this component actually represents a non-aligning region, - # and has no text. + # and text is None. self.empty = False # Index maps a coordinate (distance along + strand from + start) to alignment column self.index = None @@ -289,16 +291,33 @@ def reverse_complement(self): strand = "-" else: strand = "+" - comp = [ch for ch in self.text.translate(DNA_COMP)] - comp.reverse() - text = "".join(comp) + if self.empty: + text = None + else: + comp = [ch for ch in self.text.translate(DNA_COMP)] + comp.reverse() + text = "".join(comp) new = Component(self.src, start, self.size, strand, self._src_size, text) + if self.empty: + new.empty = True + new.synteny_empty = self.synteny_empty + # Propagate supplementary info + if self.synteny_left: + new.synteny_right = self.synteny_left + if self.synteny_right: + new.synteny_left = self.synteny_right new._alignment = self._alignment return new def slice(self, start, end): new = Component(src=self.src, start=self.start, strand=self.strand, src_size=self._src_size) new._alignment = self._alignment + if self.empty: + new.empty = True + new.size = self.size + new.text = None + new.synteny_empty = self.synteny_empty + return new new.text = self.text[start:end] # for i in range( 0, start ): @@ -312,8 +331,14 @@ def slice(self, start, end): # one of the ends changes. In general the 'i' rows of a MAF only # make sense in context (relative to the previous and next alignments # in a stream, slicing breaks that). - new.synteny_left = self.synteny_left - new.synteny_right = self.synteny_right + # LD: Indeed, I think it is wrong to keep them. Let's keep the info + # only when the boundaries are kept. + if self.synteny_left: + if start == 0: + new.synteny_left = self.synteny_left + if self.synteny_right: + if end == self.size: + new.synteny_right = self.synteny_right return new @@ -337,6 +362,8 @@ def coord_to_col(self, pos): pos is relative to the + strand, regardless of the component's strand. """ + if self.empty: + raise ValueError("There is no column index. It is empty.") start, end = self.get_forward_strand_start(), self.get_forward_strand_end() if pos < start or pos > end: raise ValueError("Range error: %d not in %d-%d" % (pos, start, end)) @@ -438,10 +465,11 @@ def get_indexed(format, filename, index_filename=None, keep_open=False, species_ def shuffle_columns(a): """Randomize the columns of an alignment""" - mask = range(a.text_size) + mask = list(range(a.text_size)) random.shuffle(mask) for c in a.components: - c.text = ''.join([c.text[i] for i in mask]) + if not c.empty: + c.text = ''.join([c.text[i] for i in mask]) def src_split(src): # splits src into species,chrom diff --git a/lib/bx/align/maf_tests.py b/lib/bx/align/maf_tests.py index 7db7ab52..993a4ddb 100644 --- a/lib/bx/align/maf_tests.py +++ b/lib/bx/align/maf_tests.py @@ -55,6 +55,17 @@ """ +complex_maf = align.Alignment() +complex_maf.score = "7009" +complex_maf.components.append(align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="ACA-TTACT")) +complex_maf.components.append(align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="ACAATTGCT")) +complex_maf.components[-1].synteny_left = (maf.MAF_NEW_STATUS, 0) +complex_maf.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) +complex_maf.components.append(align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="---ATT---")) +complex_maf.components.append(align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None)) +complex_maf.components[-1].empty = True +complex_maf.components[-1].synteny_empty = maf.MAF_INSERT_STATUS +complex_maf.text_size = 9 def test_reader(): @@ -108,15 +119,14 @@ def test_writer(): def test_slice(): - a = align.Alignment() - a.score = "7009" - a.components.append(align.Component(src="human_hoxa", start=100, size=9, strand="+", src_size=100257, text="ACA-TTACT")) - a.components.append(align.Component(src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT")) - - b = a.slice_by_component(0, 101, 105) + b = complex_maf.slice_by_component(0, 101, 105) check_component(b.components[0], src="human_hoxa", start=101, size=4, strand="+", src_size=100257, text="CA-TT") check_component(b.components[1], src="horse_hoxa", start=121, size=5, strand="-", src_size=98892, text="CAATT") + check_component(b.components[2], src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="--ATT") + check_component(b.components[3], src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None) + assert b.components[3].empty + assert b.components[3].synteny_empty == maf.MAF_INSERT_STATUS # test slicing with + strand src reader = maf.Reader(StringIO(test_maf_3)) @@ -134,7 +144,48 @@ def test_slice(): assert a is None -def test_with_synteny(): +def test_reverse_complement(): + + b = complex_maf.reverse_complement() + + check_component(b.components[0], src="human_hoxa", start=100257-100-8, size=8, strand="-", src_size=100257, text="AGTAA-TGT") + check_component(b.components[1], src="horse_hoxa", start=98892-120-9, size=9, strand="+", src_size=98892, text="AGCAATTGT") + assert b.components[1].synteny_right == (maf.MAF_NEW_STATUS, 0) + assert b.components[1].synteny_left == (maf.MAF_CONTIG_STATUS, 0) + check_component(b.components[2], src="unknown_1", start=98892-150-3, size=3, strand="+", src_size=98892, text="---AAT---") + check_component(b.components[3], src="unknown_2", start=1200-12-1000, size=1000, strand="-", src_size=1200, text=None) + assert b.components[3].empty + assert b.components[3].synteny_empty == maf.MAF_INSERT_STATUS + +def test_column_iter(): + expected = [['A', 'A', '-'], + ['C', 'C', '-'], + ['A', 'A', '-'], + ['-', 'A', 'A'], + ['T', 'T', 'T'], + ['T', 'T', 'T'], + ['A', 'G', '-'], + ['C', 'C', '-'], + ['T', 'T', '-']] + for i, c in enumerate(complex_maf.column_iter()): + assert c == expected[i] + +def test_remove_all_gap_column(): + complex_maf_gap = align.Alignment() + complex_maf_gap.score = "7009" + complex_maf_gap.components.append(align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="-ACA--TTACT")) + complex_maf_gap.components.append(align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="-ACA-ATTGCT")) + complex_maf_gap.components[-1].synteny_left = (maf.MAF_NEW_STATUS, 0) + complex_maf_gap.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) + complex_maf_gap.components.append(align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="-----ATT---")) + complex_maf_gap.components.append(align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None)) + complex_maf_gap.components[-1].empty = True + complex_maf_gap.components[-1].synteny_empty = maf.MAF_INSERT_STATUS + complex_maf_gap.text_size = 11 + complex_maf_gap.remove_all_gap_columns() + assert complex_maf_gap == complex_maf + +def test_read_with_synteny(): reader = maf.Reader(StringIO(test_maf_2), parse_e_rows=True) a = next(reader) From 709b73835c0343626830a8c390c6828a8c56edfe Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 11 Jun 2021 23:39:15 +0200 Subject: [PATCH 02/68] fix fuse for synteny_right prevent trying to fuse when empty lines --- lib/bx/align/tools/fuse.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/bx/align/tools/fuse.py b/lib/bx/align/tools/fuse.py index 807a6b03..f36af5e4 100644 --- a/lib/bx/align/tools/fuse.py +++ b/lib/bx/align/tools/fuse.py @@ -36,12 +36,14 @@ def fuse(m1, m2): >>> block1 = bx.align.maf.from_string( ''' ... a score=0.0 ... s hg18.chr10 52686 44 + 135374737 GTGCTAACTTACTGCTCCACAGAAAACATCAATTCTGCTCATGC + ... i hg18.chr10 N 0 C 0 ... s panTro1.chrUn_random 208115356 44 - 240967748 GTGCTAACTGACTGCTCCAGAGAAAACATCAATTCTGTTCATGT ... ''' ) >>> block2 = bx.align.maf.from_string( ''' ... a score=0.0 ... s hg18.chr10 52730 69 + 135374737 GCAGGTACAATTCATCAAGAAAGGAATTACAACTTCAGAAATGTGTTCAAAATATATCCATACTTTGAC + ... i hg18.chr10 C 0 I 12 ... s panTro1.chrUn_random 208115400 69 - 240967748 GCAGCTACTATTCATCAAGAAAGGGATTACAACTTCAGAAATGTGTTCAAAGTGTATCCATACTTTGAT ... ''' ) @@ -50,10 +52,12 @@ def fuse(m1, m2): >>> print(fused) a score=0.0 s hg18.chr10 52686 113 + 135374737 GTGCTAACTTACTGCTCCACAGAAAACATCAATTCTGCTCATGCGCAGGTACAATTCATCAAGAAAGGAATTACAACTTCAGAAATGTGTTCAAAATATATCCATACTTTGAC + i hg18.chr10 N 0 I 12 s panTro1.chrUn_random 208115356 113 - 240967748 GTGCTAACTGACTGCTCCAGAGAAAACATCAATTCTGTTCATGTGCAGCTACTATTCATCAAGAAAGGGATTACAACTTCAGAAATGTGTTCAAAGTGTATCCATACTTTGAT """ - # Check if the blocks are adjacent, return none if not. + # Check if the blocks are adjacent and easily fusable + # return none if not. if len(m1.components) != len(m2.components): return None for c1, c2 in zip(m1.components, m2.components): @@ -63,11 +67,15 @@ def fuse(m1, m2): return None if c1.end != c2.start: return None + if c1.empty or c2.empty: + return None # Try to fuse: n = deepcopy(m1) for c1, c2 in zip(n.components, m2.components): c1.text += c2.text c1.size += c2.size + # Propagate the synteny right + c1.synteny_right = c2.synteny_right n.text_size = len(n.components[0].text) return n From b70684e872b1ce22cf71bcd33b2598e6f584c44c Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 11 Jun 2021 23:42:06 +0200 Subject: [PATCH 03/68] use method remove_all_gap_columns --- lib/bx/align/tools/thread.py | 27 +-------------------------- scripts/maf_thread_for_species.py | 5 ++--- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/lib/bx/align/tools/thread.py b/lib/bx/align/tools/thread.py index ad68261b..bdb845e6 100644 --- a/lib/bx/align/tools/thread.py +++ b/lib/bx/align/tools/thread.py @@ -58,10 +58,10 @@ def thread(mafs, species): new_maf = deepcopy(m) new_components = get_components_for_species(new_maf, species) if new_components: - remove_all_gap_columns(new_components) new_maf.components = new_components new_maf.score = 0.0 new_maf.text_size = len(new_components[0].text) + new_maf.remove_all_gap_columns() yield new_maf @@ -77,28 +77,3 @@ def get_components_for_species(alignment, species): return [index[s] for s in species] except Exception: return None - - -def remove_all_gap_columns(components): - """ - Remove any columns containing only gaps from a set of alignment components, - text of components is modified IN PLACE. - - TODO: Optimize this with Pyrex. - """ - seqs = [list(c.text) for c in components] - i = 0 - text_size = len(seqs[0]) - while i < text_size: - all_gap = True - for seq in seqs: - if seq[i] != '-': - all_gap = False - if all_gap: - for seq in seqs: - del seq[i] - text_size -= 1 - else: - i += 1 - for i in range(len(components)): - components[i].text = ''.join(seqs[i]) diff --git a/scripts/maf_thread_for_species.py b/scripts/maf_thread_for_species.py index 49f95a56..931d70a0 100755 --- a/scripts/maf_thread_for_species.py +++ b/scripts/maf_thread_for_species.py @@ -15,8 +15,7 @@ import bx.align.maf from bx.align.tools.fuse import FusingAlignmentWriter from bx.align.tools.thread import ( - get_components_for_species, - remove_all_gap_columns + get_components_for_species ) from bx.cookbook import doc_optparse @@ -43,9 +42,9 @@ def main(): for m in maf_reader: new_components = get_components_for_species(m, species) if new_components: - remove_all_gap_columns(new_components) m.components = new_components m.score = 0.0 + m.remove_all_gap_columns() maf_writer.write(m) maf_reader.close() From 4d08a8c604c2be82161e9d007e1e15f4210aabdb Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Fri, 11 Jun 2021 23:45:07 +0200 Subject: [PATCH 04/68] use parse_e_rows=True in scripts --- scripts/maf_build_index.py | 2 +- scripts/maf_chunk.py | 2 +- scripts/maf_extract_ranges.py | 4 ++-- scripts/maf_filter.py | 2 +- scripts/maf_filter_max_wc.py | 2 +- scripts/maf_limit_to_species.py | 2 +- scripts/maf_select.py | 2 +- scripts/maf_shuffle_columns.py | 2 +- scripts/maf_thread_for_species.py | 2 +- scripts/maf_truncate.py | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/maf_build_index.py b/scripts/maf_build_index.py index 1c7c51c6..5dd1f831 100755 --- a/scripts/maf_build_index.py +++ b/scripts/maf_build_index.py @@ -58,7 +58,7 @@ def main(): doc_optparse.exception() maf_in = TextIOWrapper(maf_in, encoding="ascii") - maf_reader = bx.align.maf.Reader(maf_in) + maf_reader = bx.align.maf.Reader(maf_in, parse_e_rows=True) indexes = interval_index_file.Indexes() diff --git a/scripts/maf_chunk.py b/scripts/maf_chunk.py index feadd500..0dd3ab30 100755 --- a/scripts/maf_chunk.py +++ b/scripts/maf_chunk.py @@ -31,7 +31,7 @@ def __main__(): out_dir = args[1] prob = options.prob - maf_reader = bx.align.maf.Reader(sys.stdin) + maf_reader = bx.align.maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = None diff --git a/scripts/maf_extract_ranges.py b/scripts/maf_extract_ranges.py index ac24accf..388eb791 100755 --- a/scripts/maf_extract_ranges.py +++ b/scripts/maf_extract_ranges.py @@ -49,7 +49,7 @@ def __main__(): # Iterate over input MAF - for maf in bx.align.maf.Reader(sys.stdin): + for maf in bx.align.maf.Reader(sys.stdin, parse_e_rows=True): ref = maf.components[refindex] # Find overlap with reference component intersections = sorted(intersecter.find(ref.get_forward_strand_start(), ref.get_forward_strand_end())) @@ -61,7 +61,7 @@ def __main__(): sliced = maf.slice_by_component(refindex, start, end) good = True for c in sliced.components: - if c.size < 1: + if c.size < 1 and not c.empty: good = False if good and sliced.text_size > mincols: out.write(sliced) diff --git a/scripts/maf_filter.py b/scripts/maf_filter.py index b12d2791..dfe3592f 100755 --- a/scripts/maf_filter.py +++ b/scripts/maf_filter.py @@ -37,7 +37,7 @@ def __main__(): if expr: expr = compile(expr, '', 'eval') - maf_reader = maf.Reader(sys.stdin) + maf_reader = maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = maf.Writer(sys.stdout) for m in maf_reader: diff --git a/scripts/maf_filter_max_wc.py b/scripts/maf_filter_max_wc.py index 2979c809..322e495d 100755 --- a/scripts/maf_filter_max_wc.py +++ b/scripts/maf_filter_max_wc.py @@ -20,7 +20,7 @@ def main(): min_good = int(sys.argv[1]) min_species = int(sys.argv[2]) - maf_reader = maf.Reader(sys.stdin) + maf_reader = maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = maf.Writer(sys.stdout) for m in maf_reader: diff --git a/scripts/maf_limit_to_species.py b/scripts/maf_limit_to_species.py index 03212b21..0ed9b42b 100755 --- a/scripts/maf_limit_to_species.py +++ b/scripts/maf_limit_to_species.py @@ -16,7 +16,7 @@ def main(): species = sys.argv[1].split(',') - maf_reader = bx.align.maf.Reader(sys.stdin) + maf_reader = bx.align.maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = bx.align.maf.Writer(sys.stdout) for m in maf_reader: diff --git a/scripts/maf_select.py b/scripts/maf_select.py index a0328e6e..b95f9f44 100755 --- a/scripts/maf_select.py +++ b/scripts/maf_select.py @@ -23,7 +23,7 @@ def __main__(): feature_vector = [int(line) for line in open(feature_file)] - maf_reader = bx.align.maf.Reader(sys.stdin) + maf_reader = bx.align.maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = bx.align.maf.Writer(sys.stdout) index = 0 diff --git a/scripts/maf_shuffle_columns.py b/scripts/maf_shuffle_columns.py index c916789e..17dbd7e5 100755 --- a/scripts/maf_shuffle_columns.py +++ b/scripts/maf_shuffle_columns.py @@ -15,7 +15,7 @@ def __main__(): - maf_reader = align.maf.Reader(sys.stdin) + maf_reader = align.maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = align.maf.Writer(sys.stdout) for m in maf_reader: diff --git a/scripts/maf_thread_for_species.py b/scripts/maf_thread_for_species.py index 931d70a0..5571bdbc 100755 --- a/scripts/maf_thread_for_species.py +++ b/scripts/maf_thread_for_species.py @@ -33,7 +33,7 @@ def main(): except Exception: doc_optparse.exit() - maf_reader = bx.align.maf.Reader(sys.stdin) + maf_reader = bx.align.maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = bx.align.maf.Writer(sys.stdout) if fuse: diff --git a/scripts/maf_truncate.py b/scripts/maf_truncate.py index f21c196f..134f6351 100755 --- a/scripts/maf_truncate.py +++ b/scripts/maf_truncate.py @@ -19,7 +19,7 @@ def __main__(): (options, args) = parser.parse_args() - maf_reader = maf.Reader(sys.stdin) + maf_reader = maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = maf.Writer(sys.stdout) if not options.cols: From 9b769901419b609243ad58f91663a12672f3e650 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 16:57:04 +0200 Subject: [PATCH 05/68] fix maf_chunk --- scripts/maf_chunk.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/maf_chunk.py b/scripts/maf_chunk.py index 0dd3ab30..37eb3433 100755 --- a/scripts/maf_chunk.py +++ b/scripts/maf_chunk.py @@ -14,10 +14,11 @@ import random import sys from optparse import OptionParser +import numpy as np import bx.align.maf -INF = "inf" +INF = np.inf def __main__(): @@ -46,8 +47,6 @@ def __main__(): interval_file = open("%s/intervals.txt" % out_dir, "w") for m in maf_reader: - chunk_min = min(chunk_min, m.components[0].start) - chunk_max = max(chunk_max, m.components[0].end) if not maf_writer or count + m.text_size > chunk_size: current_chunk += 1 # Finish the last chunk @@ -70,11 +69,14 @@ def __main__(): maf_writer.write(m) # count += m.text_size count += m.components[0].size + chunk_min = min(chunk_min, m.components[0].start) + chunk_max = max(chunk_max, m.components[0].end) if maf_writer: maf_writer.close() interval_file.write(f"{chunk_min} {chunk_max}\n") - interval_file.close() + + interval_file.close() if __name__ == "__main__": From 3b3d85d1c8a444cc12cb9ec74e6f03e7e72f10d3 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 16:58:33 +0200 Subject: [PATCH 06/68] add tests for index and maf_extract_ranges_indexed --- script_tests/maf_build_index_tests.py | 12 +++ .../maf_extract_ranges_indexed_tests.py | 17 ++++ test_data/maf_tests/empty.maf | 1 + test_data/maf_tests/hg18.bed | 1 + test_data/maf_tests/mm10_chr12_lessspe.maf | 90 ++++++++++++++++++ .../maf_tests/mm10_chr12_lessspe.maf.index | Bin 0 -> 32204 bytes test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf | 1 + .../maf_tests/mm8_chr7_tiny_mm8_ind.maf.index | Bin 0 -> 13680 bytes test_data/maf_tests/test_hg18.maf | 36 +++++++ 9 files changed, 158 insertions(+) create mode 100644 test_data/maf_tests/empty.maf create mode 100644 test_data/maf_tests/hg18.bed create mode 100644 test_data/maf_tests/mm10_chr12_lessspe.maf create mode 100644 test_data/maf_tests/mm10_chr12_lessspe.maf.index create mode 120000 test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf create mode 100644 test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf.index create mode 100644 test_data/maf_tests/test_hg18.maf diff --git a/script_tests/maf_build_index_tests.py b/script_tests/maf_build_index_tests.py index 9c2ced8e..475b78b7 100644 --- a/script_tests/maf_build_index_tests.py +++ b/script_tests/maf_build_index_tests.py @@ -19,3 +19,15 @@ class Test3(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_build_index.py ${maf_lzo} ${maf_index}" input_maf_lzo = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny.maf.lzo") output_maf_index = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny.maf.index") + + +class TestindexOnlyRef(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_build_index.py -s mm8 ${maf} ${maf_index}" + input_maf = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf") + output_maf_index = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf.index") + + +class TestindexWithElines(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_build_index.py ${maf} ${maf_index}" + input_maf = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") + output_maf_index = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf.index") diff --git a/script_tests/maf_extract_ranges_indexed_tests.py b/script_tests/maf_extract_ranges_indexed_tests.py index 863032b8..15bead54 100644 --- a/script_tests/maf_extract_ranges_indexed_tests.py +++ b/script_tests/maf_extract_ranges_indexed_tests.py @@ -7,3 +7,20 @@ class Test(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny.maf -c -m 5 -p mm8." input_stdin = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") + +class TestAccessNotRef(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny.maf -c -m 5 -p hg18." + input_stdin = base.TestFile(filename="./test_data/maf_tests/hg18.bed") + output_stdout = base.TestFile(filename="./test_data/maf_tests/test_hg18.maf") + + +class TestAccessRef(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p mm8." + input_stdin = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.bed") + output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") + + +class TestAccessNotRefNotIndexed(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p hg18." + input_stdin = base.TestFile(filename="./test_data/maf_tests/hg18.bed") + output_stdout = base.TestFile(filename="./test_data/maf_tests/empty.maf") diff --git a/test_data/maf_tests/empty.maf b/test_data/maf_tests/empty.maf new file mode 100644 index 00000000..ce2c5560 --- /dev/null +++ b/test_data/maf_tests/empty.maf @@ -0,0 +1 @@ +##maf version=1 diff --git a/test_data/maf_tests/hg18.bed b/test_data/maf_tests/hg18.bed new file mode 100644 index 00000000..11eb46d3 --- /dev/null +++ b/test_data/maf_tests/hg18.bed @@ -0,0 +1 @@ +chr15 88557590 88557866 \ No newline at end of file diff --git a/test_data/maf_tests/mm10_chr12_lessspe.maf b/test_data/maf_tests/mm10_chr12_lessspe.maf new file mode 100644 index 00000000..f81a9bed --- /dev/null +++ b/test_data/maf_tests/mm10_chr12_lessspe.maf @@ -0,0 +1,90 @@ +##maf version=1 +a score=-22139.000000 +s mm10.chr12 56694975 11 + 120129022 GTCCTGTG---CTC-- +s hetGla2.JH602151 6912237 8 - 7060556 GTT---TG---CGC-- +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109306 8 + 248211 TTG---TG---CGC-- +i micMur1.scaffold_1897 C 0 I 1 +s tupBel1.scaffold_149545.1-136892 2298 8 + 136892 CCG---TG---CGC-- +i tupBel1.scaffold_149545.1-136892 C 0 I 1 +s pteVam1.scaffold_182 42249 8 + 455609 CTG---TG---CGC-- +i pteVam1.scaffold_182 C 0 I 1 +s eriEur1.scaffold_370206 11234 8 + 65867 CTG---AA---AAT-- +i eriEur1.scaffold_370206 C 0 I 1 +s sorAra1.scaffold_233549 569 11 + 65803 CTT---GGTGCACT-- +i sorAra1.scaffold_233549 C 0 I 1 +s loxAfr3.scaffold_9 12283154 10 + 83325590 CGG---TG---CGATC +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +a score=-247111.000000 +s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA +s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- +i micMur1.scaffold_1897 I 1 C 0 +s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- +i tupBel1.scaffold_149545.1-136892 I 1 C 0 +s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- +i pteVam1.scaffold_182 I 1 I 2 +s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- +i eriEur1.scaffold_370206 I 1 I 2 +s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- +i sorAra1.scaffold_233549 I 1 I 2 +s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +a score=18354.000000 +s mm10.chr12 56695026 1 + 120129022 C +s hetGla2.JH602151 6912285 1 - 7060556 T +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109352 1 + 248211 C +i micMur1.scaffold_1897 C 0 C 0 +s tupBel1.scaffold_149545.1-136892 2349 1 + 136892 C +i tupBel1.scaffold_149545.1-136892 C 0 C 0 +s pteVam1.scaffold_182 42297 1 + 455609 C +i pteVam1.scaffold_182 I 2 C 0 +s eriEur1.scaffold_370206 11281 1 + 65867 C +i eriEur1.scaffold_370206 I 2 C 0 +s sorAra1.scaffold_233549 621 1 + 65803 C +i sorAra1.scaffold_233549 I 2 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I +e loxAfr3.scaffold_9 12283188 0 + 83325590 C + +a score=36657.000000 +s mm10.chr12 56695027 7 + 120129022 CTGGCCT +s hetGla2.JH602151 6912286 6 - 7060556 GCGGAC- +i hetGla2.JH602151 C 0 I 1 +s micMur1.scaffold_1897 109353 7 + 248211 GTGACCC +i micMur1.scaffold_1897 C 0 C 0 +s tupBel1.scaffold_149545.1-136892 2350 7 + 136892 GCGGCTA +i tupBel1.scaffold_149545.1-136892 C 0 C 0 +s bosTau7.chr21 47705765 7 + 69078422 GGCCTCC +i bosTau7.chr21 I 182 C 0 +s pteVam1.scaffold_182 42298 4 + 455609 --AG-CT +i pteVam1.scaffold_182 C 0 C 0 +s eriEur1.scaffold_370206 11282 7 + 65867 TGAGCCC +i eriEur1.scaffold_370206 C 0 C 0 +s sorAra1.scaffold_233549 622 7 + 65803 CCTGGCC +i sorAra1.scaffold_233549 C 0 C 0 +e loxAfr3.scaffold_9 12283188 0 + 83325590 C + +a score=99018.000000 +s mm10.chr12 56695034 37 + 120129022 CTGCTAGGAT--------CC------------------------------------------------------------------------------------------------TGCTTGACTGA-GG--TTTAGCC-------------CCTC------------------A +s hetGla2.JH602151 6912293 37 - 7060556 TTGCCTGGAC--------CT------------------------------------------------------------------------------------------------CGCGTAGCCAG-GG--ATTGGTC-------------CTGC------------------G +i hetGla2.JH602151 I 1 C 0 +s micMur1.scaffold_1897 109360 57 + 248211 CTGCCTC-AGCCTGCAAACC------------------------------------------------------------------------------------------------TGCGCGGCTTA-GT--GTTGGCCCGCGCTCCCAGACGAGC------------------C +i micMur1.scaffold_1897 C 0 C 0 +s tupBel1.scaffold_149545.1-136892 2357 33 + 136892 CTGCCTCGAG--------CC------------------------------------------------------------------------------------------------TGCTCGACTAA-GG--GTTGACC-----------------------------------C +i tupBel1.scaffold_149545.1-136892 C 0 C 0 +s bosTau7.chr21 47705772 33 + 69078422 ATCTCTGGCT--------CC------------------------------------------------------------------------------------------------T-----GCTCg-tg--tttagtc-------------gctc-----------------ag +i bosTau7.chr21 C 0 C 0 +s pteVam1.scaffold_182 42302 133 + 455609 CTGCCTGGAG--------CCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCACTAA-GG--TTTGTCC-------------CCGC------------------G +i pteVam1.scaffold_182 C 0 C 0 +s eriEur1.scaffold_370206 11289 37 + 65867 CTGCCTGGAC--------CG------------------------------------------------------------------------------------------------TGAGCTGCTGG-GG--TTTGGGC-------------CTGA------------------G +i eriEur1.scaffold_370206 C 0 C 0 +s sorAra1.scaffold_233549 629 36 + 65803 CTGCTGGGAG--------CC------------------------------------------------------------------------------------------------CAAGGGGCTAT-GG--TTT-GAC-------------CCGA------------------G +i sorAra1.scaffold_233549 C 0 C 0 +s loxAfr3.scaffold_9 12283188 30 + 83325590 ------TTAG--------CC------------------------------------------------------------------------------------------------TGCTTGGCGAG-GG--TTCGCCC--------------TGA------------------G +i loxAfr3.scaffold_9 C 0 C 0 diff --git a/test_data/maf_tests/mm10_chr12_lessspe.maf.index b/test_data/maf_tests/mm10_chr12_lessspe.maf.index new file mode 100644 index 0000000000000000000000000000000000000000..06cc54f4455620875bc2e9908de503c23f020c32 GIT binary patch literal 32204 zcmeI5O=}ZT6o&8IY1OF~V^M2CRIq4^Y0TuKO~tDnH@@J(t}F%=Hf#V zGv&n#6)%&zI69clxOoikU5WQ~RjN0Kr-N*Bcu}bnYw_L#{_NEF55e6+aY(80?X!3I zzMGb7ax-}MRy=I~{9<{+^INXTEnxrBqj-P!LNyo;yq5i$Y&M_6nU{OxLyj%h=FgQY zEr+J*p<%md~D zR)JoB|52}h2;nB0zP;7-ok`gbd#Kc&`=YRyIJ_XMNLkh;u=PAVu=dm|*c=v$?d zdzG@cp!;B5C@q?dG}$Au;6syV8?olr#!wBkg7MgXsp8s9m661iY9s33HV?X_l`fq$ zxu8pZ@KYv7M0M%3$sJK$x~e3nM0E-IB)Nv#w9f%*vc}$tPvtx?54aA<0djyGAP2|+ za)2Bl2gm_(fE*wP$N_SI93ThC0mA`&Hg0}ki-e#?T;M!#9%KfX2h0QJ0oMUJKn{=t z>exiBdfL>tiCXp9?bti}+rOf9*^-Gx z0tz_|1%4)Hwj@J!UBSTsamV(C#V8a{2D%d&POru2@+IW)#->3xh? zdIUBX#nL|f8&ND#U5jFg>U)h?`XG7{FW<&^Y)0j}Uqr8W;a;8ScLd6@;SWTyMIH3U Qm0rq5^HFRE>P$%N@4@??T>t<8 literal 0 HcmV?d00001 diff --git a/test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf b/test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf new file mode 120000 index 00000000..ca89cabf --- /dev/null +++ b/test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf @@ -0,0 +1 @@ +mm8_chr7_tiny.maf \ No newline at end of file diff --git a/test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf.index b/test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf.index new file mode 100644 index 0000000000000000000000000000000000000000..794120070a9d7896c70179af5ab778dd70d4c93e GIT binary patch literal 13680 zcmeI(EeZlr6vgqQyf7%pfJtFMFia7fXcAn(bgRWOAg(~S-~z;CbPa;MY;h}oG`)AQ z*p2^iI4`|$dH2`e;Js-`s#|K7q%0@H#d1BibI%eiN{+jhG)nzUGY8%Qdw>HtfCD&y z12}*KIDi8>KCllq0X)D1JYWxS00(dY2XFuf{_jAD_r2w8H>chqK63j8Rd`ztQuE>+ f;QPu`Y7>So@StCY>XLwwDo{?yxYlk)8cW?8S$ literal 0 HcmV?d00001 diff --git a/test_data/maf_tests/test_hg18.maf b/test_data/maf_tests/test_hg18.maf new file mode 100644 index 00000000..eed2a6a0 --- /dev/null +++ b/test_data/maf_tests/test_hg18.maf @@ -0,0 +1,36 @@ +##maf version=1 +a score=10542.0 +s mm8.chr7 80082352 16 + 145134094 G---AGGGCGGTCC--------------CAGCA- +s rn4.chr1 136011803 16 + 267910886 G---AGGGCGGTCC--------------CAGCA- +s oryCun1.scaffold_199771 14034 30 - 75077 G---AGGGGAACCTCTCCTCCCCTCCGACAAAG- +s hg18.chr15 88557590 17 + 100338915 GA--AGGGAAGCCC--------------CAGAA- +s panTro2.chr15 87959847 17 + 100063422 GA--AGGGAAGCCC--------------CAGAA- +s rheMac2.chr7 69864725 17 + 169801366 GA--AGGGAAGCCC--------------CAGAA- +s canFam2.chr3 56030590 19 + 94715083 AAGGAGGAGAATCC--------------CTATG- +s dasNov1.scaffold_106893 7453 16 + 9831 G---AGGGGGCTGC--------------CCACA- +s loxAfr1.scaffold_8298 30282 20 + 78952 GAGGAGGGGAACCC--------------CTAGGA +s echTel1.scaffold_304651 611 20 - 10007 TAGGAGGGGAATCC--------------CCAGGA + +a score=-33148.0 +s mm8.chr7 80082368 103 + 145134094 TGAGAGGGCATGCT-GTGAAGGGACTGTGCT---CAGTTCAAGGCATAGTCCACTTCC--------CTTCCCTTGGTCATTCTGTTCGGTGTGTTTCCAGCAGATATGGAGAGT-------------------------------------C---- +s rn4.chr1 136011819 86 + 267910886 TGAGAGGGCATGTT-ATGAAGGCACTGTGCT--------------------CACTTTC--------CATCCCATGGTCATTCTGTTGAGTGTGTTCCCAGCAGATACGGAAAGT-------------------------------------C---- +s oryCun1.scaffold_199771 14064 74 - 75077 TAGGACTGCCTGGTGGGGGGGGCCCTGCACC--------------------TACTTCTGCAAGGCACGTCCCGCG----------TCTGTGCCTTCGCCGCA-----------T-------------------------------------C---- +s hg18.chr15 88557607 128 + 100338915 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTCTACTCCCAGCATGGCTGGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC---- +s panTro2.chr15 87959864 116 + 100063422 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTA------------GGCTAGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC---- +s rheMac2.chr7 69864742 107 + 169801366 GGAGAAAGCCTGGT-TAAGGGGCCCTTCA-----CTCTCCAAGGCACATTCCACTTTC--------TGTCCCTTTGTCATTCCATTCACTCTACTCCCCGCATGGCTAGAGGGC----------------------TGG---------AGGC---- +s canFam2.chr3 56030609 103 + 94715083 AGGGAATGCATGGTGTATGGGGGCCCCCGTC--------------------CACTTC---------TGTCCCGTTGCTATTTCCTTGACCATACTTCCAGTATGACTGGGGGAG---GTGCGG---TGGAGCAGGTTC------------------ +s loxAfr1.scaffold_8298 30302 144 + 78952 --TGGATGCCTGGT-TTAAGGATCC-GCTCACCCACTTCTGAGTCACGTTACACTTTC--------TGCCCCTTTGCCATTTCATTTATGGTACTCCCAACACCGGGGGAGGGTGCGCTTTGGTTCTTGAGCAGTTTGTGTATATAGGGGGCTGAG +s echTel1.scaffold_304651 631 67 - 10007 --TGGAGGGCTACT-TTAAGAAACC----CTCCCGTTTCTCAG-------------CC--------TGCTTC---------------------------------------------CTTTGGGTTTGAGGTACTTTGT----------------G + +a score=87527.0 +s mm8.chr7 80082471 121 + 145134094 CTG-AGC---------------CGCTGGCCCCTGGGCTTCCCCTCCAGCCTGGCTTGACTTTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CCCAGGCTGAAGTGGAGGGGGTGTTGAGCTGCCACCTGGGACTT +s rn4.chr1 136011905 121 + 267910886 TCG-GAC---------------CGCTGGCACCCAGGCTTCCCCTCCAGCCTGGCCTGACTCTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CGCAGGCAAAAGTGGAGGGGATGTTGAGCTGCCACCTGGAACTT +s oryCun1.scaffold_199771 14138 103 - 75077 CCGCAGT---------------GGATCCCACCTCGGCTGTAGCAGTAGGCCAACCAGG----GCCCGACAGGCGCCCGGCTGTGCTGGCTTCCA-CACCCTCTCCCAGGC---------------------CTGCCACCCAGGC--- +s hg18.chr15 88557735 127 + 100338915 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG +s panTro2.chr15 87959980 127 + 100063422 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGTGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG +s rheMac2.chr7 69864849 116 + 169801366 CTG-GGCTGAACCAGGGGCT--GGCTGGTCTGCAG----------------GGCTGCACTCTGTCTATAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTTCCAGTCGTAAGTGGG------GTTGAGCTGCCACCTGGGACTG +s bosTau2.scaffold2397 93191 110 + 117874 CTG-GGC---------------AGCTGGCGCCTCGGCTGCCCCTCCCACCTGGCT-------------GTGACCCTTGGCAAG-TCTCCCCGCCCCCCATGCCCCCAGGCCTGAGCAAG------GCTGAGCTGCCACCT-GGACTA +s canFam2.chr3 56030712 116 + 94715083 TCT-AGC---------------AGCTGGCGCCCCAGCTGTCCTTCCAACCTGGCTGTGCTCTGTCTACGTGACCTTTGGCAGA-TTGCCACTCC-------CTCCCAGGCCCGAGCAGG------GCCAAGCTGCCACCT-GGATGG +s loxAfr1.scaffold_8298 30446 129 + 78952 CTG-AAC-----CAGGGACTGCAGCTAGTGCCTGGGCCACCGCTCCAGCCTGGCTGTGCTCTGTCTACAGGACGCATGGCAAG-TTGCCACCCC----CCTCTCCCAGG-CTAGGTGGG------GCTAAGCTGCCACTTGAAACTT +s echTel1.scaffold_304651 698 101 - 10007 CTG-GAC-----CAGGAACTGCAGCT---------GCTGCCCCTCTAGCCTACCTGTGC---------------CTTGGCAGG-TTGCCAGCCC-------CTCCCAGGCCTAGGTGGG------GTGACGCTGCCTCCTGGGAC-- + From 4ba019d7d82e6050ee02b6dfa6d0c03545f10d99 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 16:59:19 +0200 Subject: [PATCH 07/68] add tests for maf_chunk --- script_tests/base/__init__.py | 11 +++ script_tests/maf_chunk_tests.py | 15 ++++ test_data/maf_tests/chunk1/000000000.maf | 13 ++++ test_data/maf_tests/chunk1/000000001.maf | 12 ++++ test_data/maf_tests/chunk1/000000002.maf | 13 ++++ test_data/maf_tests/chunk1/000000003.maf | 12 ++++ test_data/maf_tests/chunk1/000000004.maf | 10 +++ test_data/maf_tests/chunk1/000000005.maf | 10 +++ test_data/maf_tests/chunk1/000000006.maf | 8 +++ test_data/maf_tests/chunk1/000000007.maf | 5 ++ test_data/maf_tests/chunk1/intervals.txt | 8 +++ test_data/maf_tests/chunk1000/000000000.maf | 76 +++++++++++++++++++++ test_data/maf_tests/chunk1000/intervals.txt | 1 + 13 files changed, 194 insertions(+) create mode 100644 script_tests/maf_chunk_tests.py create mode 100644 test_data/maf_tests/chunk1/000000000.maf create mode 100644 test_data/maf_tests/chunk1/000000001.maf create mode 100644 test_data/maf_tests/chunk1/000000002.maf create mode 100644 test_data/maf_tests/chunk1/000000003.maf create mode 100644 test_data/maf_tests/chunk1/000000004.maf create mode 100644 test_data/maf_tests/chunk1/000000005.maf create mode 100644 test_data/maf_tests/chunk1/000000006.maf create mode 100644 test_data/maf_tests/chunk1/000000007.maf create mode 100644 test_data/maf_tests/chunk1/intervals.txt create mode 100644 test_data/maf_tests/chunk1000/000000000.maf create mode 100644 test_data/maf_tests/chunk1000/intervals.txt diff --git a/script_tests/base/__init__.py b/script_tests/base/__init__.py index 9e7a9374..cf464812 100644 --- a/script_tests/base/__init__.py +++ b/script_tests/base/__init__.py @@ -40,6 +40,7 @@ def test_script(self): # Accumulate parameters input_files = dict() output_files = dict() + out_dir = None stdin = stdout = stderr = None for key in dir(self): if key == 'command_line': @@ -54,6 +55,9 @@ def test_script(self): assert isinstance(value, TestFile) arg_name = key[7:] output_files[arg_name] = value + elif key == 'out_dir': + out_dir = getattr(self, key) + assert os.path.isdir(out_dir) # Build the command line input_fnames = dict() output_fnames = dict() @@ -73,6 +77,13 @@ def test_script(self): if key == 'stderr': stderr = open(output_fnames[key], 'w') stdout.flush() + if out_dir is not None: + temp_out_dir = tempfile.mkdtemp() + all_fnames['out_dir'] = temp_out_dir + for root, _, files in os.walk(out_dir): + for file in files: + output_files[os.path.join(root, file)] = TestFile(filename=os.path.join(root, file)) + output_fnames[os.path.join(root, file)] = os.path.join(temp_out_dir, file) real_command = string.Template(command_line).substitute(all_fnames) # Augment PYTHONPATH, bit of a HACK here! need to suck this data from setuptools or something? env = dict(os.environ) diff --git a/script_tests/maf_chunk_tests.py b/script_tests/maf_chunk_tests.py new file mode 100644 index 00000000..9dac0e7d --- /dev/null +++ b/script_tests/maf_chunk_tests.py @@ -0,0 +1,15 @@ +import unittest + +import base + +class Test1(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_chunk.py 1 ${out_dir}" + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny.maf") + out_dir = "./test_data/maf_tests/chunk1" + + +class Test2(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_chunk.py 1000 ${out_dir}" + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny.maf") + out_dir = "./test_data/maf_tests/chunk1000" + diff --git a/test_data/maf_tests/chunk1/000000000.maf b/test_data/maf_tests/chunk1/000000000.maf new file mode 100644 index 00000000..383e0e17 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000000.maf @@ -0,0 +1,13 @@ +##maf version=1 +a score=10542.0 +s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA- +s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA- +s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG- +s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA- +s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA- +s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA- +s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG- +s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA- +s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA +s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA + diff --git a/test_data/maf_tests/chunk1/000000001.maf b/test_data/maf_tests/chunk1/000000001.maf new file mode 100644 index 00000000..2b8658c9 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000001.maf @@ -0,0 +1,12 @@ +##maf version=1 +a score=-33148.0 +s mm8.chr7 80082368 103 + 145134094 TGAGAGGGCATGCT-GTGAAGGGACTGTGCT---CAGTTCAAGGCATAGTCCACTTCC--------CTTCCCTTGGTCATTCTGTTCGGTGTGTTTCCAGCAGATATGGAGAGT-------------------------------------C---- +s rn4.chr1 136011819 86 + 267910886 TGAGAGGGCATGTT-ATGAAGGCACTGTGCT--------------------CACTTTC--------CATCCCATGGTCATTCTGTTGAGTGTGTTCCCAGCAGATACGGAAAGT-------------------------------------C---- +s oryCun1.scaffold_199771 14064 74 - 75077 TAGGACTGCCTGGTGGGGGGGGCCCTGCACC--------------------TACTTCTGCAAGGCACGTCCCGCG----------TCTGTGCCTTCGCCGCA-----------T-------------------------------------C---- +s hg18.chr15 88557607 128 + 100338915 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTCTACTCCCAGCATGGCTGGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC---- +s panTro2.chr15 87959864 116 + 100063422 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTA------------GGCTAGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC---- +s rheMac2.chr7 69864742 107 + 169801366 GGAGAAAGCCTGGT-TAAGGGGCCCTTCA-----CTCTCCAAGGCACATTCCACTTTC--------TGTCCCTTTGTCATTCCATTCACTCTACTCCCCGCATGGCTAGAGGGC----------------------TGG---------AGGC---- +s canFam2.chr3 56030609 103 + 94715083 AGGGAATGCATGGTGTATGGGGGCCCCCGTC--------------------CACTTC---------TGTCCCGTTGCTATTTCCTTGACCATACTTCCAGTATGACTGGGGGAG---GTGCGG---TGGAGCAGGTTC------------------ +s loxAfr1.scaffold_8298 30302 144 + 78952 --TGGATGCCTGGT-TTAAGGATCC-GCTCACCCACTTCTGAGTCACGTTACACTTTC--------TGCCCCTTTGCCATTTCATTTATGGTACTCCCAACACCGGGGGAGGGTGCGCTTTGGTTCTTGAGCAGTTTGTGTATATAGGGGGCTGAG +s echTel1.scaffold_304651 631 67 - 10007 --TGGAGGGCTACT-TTAAGAAACC----CTCCCGTTTCTCAG-------------CC--------TGCTTC---------------------------------------------CTTTGGGTTTGAGGTACTTTGT----------------G + diff --git a/test_data/maf_tests/chunk1/000000002.maf b/test_data/maf_tests/chunk1/000000002.maf new file mode 100644 index 00000000..ea7ce440 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000002.maf @@ -0,0 +1,13 @@ +##maf version=1 +a score=87527.0 +s mm8.chr7 80082471 121 + 145134094 CTG-AGC---------------CGCTGGCCCCTGGGCTTCCCCTCCAGCCTGGCTTGACTTTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CCCAGGCTGAAGTGGAGGGGGTGTTGAGCTGCCACCTGGGACTT +s rn4.chr1 136011905 121 + 267910886 TCG-GAC---------------CGCTGGCACCCAGGCTTCCCCTCCAGCCTGGCCTGACTCTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CGCAGGCAAAAGTGGAGGGGATGTTGAGCTGCCACCTGGAACTT +s oryCun1.scaffold_199771 14138 103 - 75077 CCGCAGT---------------GGATCCCACCTCGGCTGTAGCAGTAGGCCAACCAGG----GCCCGACAGGCGCCCGGCTGTGCTGGCTTCCA-CACCCTCTCCCAGGC---------------------CTGCCACCCAGGC--- +s hg18.chr15 88557735 127 + 100338915 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG +s panTro2.chr15 87959980 127 + 100063422 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGTGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG +s rheMac2.chr7 69864849 116 + 169801366 CTG-GGCTGAACCAGGGGCT--GGCTGGTCTGCAG----------------GGCTGCACTCTGTCTATAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTTCCAGTCGTAAGTGGG------GTTGAGCTGCCACCTGGGACTG +s bosTau2.scaffold2397 93191 110 + 117874 CTG-GGC---------------AGCTGGCGCCTCGGCTGCCCCTCCCACCTGGCT-------------GTGACCCTTGGCAAG-TCTCCCCGCCCCCCATGCCCCCAGGCCTGAGCAAG------GCTGAGCTGCCACCT-GGACTA +s canFam2.chr3 56030712 116 + 94715083 TCT-AGC---------------AGCTGGCGCCCCAGCTGTCCTTCCAACCTGGCTGTGCTCTGTCTACGTGACCTTTGGCAGA-TTGCCACTCC-------CTCCCAGGCCCGAGCAGG------GCCAAGCTGCCACCT-GGATGG +s loxAfr1.scaffold_8298 30446 129 + 78952 CTG-AAC-----CAGGGACTGCAGCTAGTGCCTGGGCCACCGCTCCAGCCTGGCTGTGCTCTGTCTACAGGACGCATGGCAAG-TTGCCACCCC----CCTCTCCCAGG-CTAGGTGGG------GCTAAGCTGCCACTTGAAACTT +s echTel1.scaffold_304651 698 101 - 10007 CTG-GAC-----CAGGAACTGCAGCT---------GCTGCCCCTCTAGCCTACCTGTGC---------------CTTGGCAGG-TTGCCAGCCC-------CTCCCAGGCCTAGGTGGG------GTGACGCTGCCTCCTGGGAC-- + diff --git a/test_data/maf_tests/chunk1/000000003.maf b/test_data/maf_tests/chunk1/000000003.maf new file mode 100644 index 00000000..2b3b3b92 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000003.maf @@ -0,0 +1,12 @@ +##maf version=1 +a score=185399.0 +s mm8.chr7 80082592 121 + 145134094 GTGCTTATCTCGGACTCTTGGCATTTCTGTTTCTGGACAGAACCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCC----ACCCAGGTGGAAATG--CCCTCCGGTGCAGGCAGATAAGCTCTGG +s rn4.chr1 136012026 121 + 267910886 GTGCTTATCTTGGCCTCTTGGCATTTCTGTATCTGGACAGAATCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCC----ACCCAGGTGGAAATG--CCCTCCGGAGCAGGCAGATAAGCTCTGG +s oryCun1.scaffold_199771 14241 119 - 75077 ---CTTATCTCCGACTGCTGGCATTGCTGTGTCTGGGCAGAGGCCAAGGGCGGCCTCCCGCACAGACACTCGGGGCCC----GCCCAGGTAGAAGTG-CCCCTCCTGTGCAGGCAGATAAGCGCTGG +s hg18.chr15 88557862 119 + 100338915 AGGCTTATCTCTGACTCTTGGCATTTCTTTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCT----GCCCAGGTGGAAACG--CTCTTT--TGCAGGTAGATAAGCACGGG +s panTro2.chr15 87960107 119 + 100063422 AGGCTTATCTCTGACTCTTGGCATTTCTTTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCT----GCCCAGGTGGAAACG--CTCTTT--TGCAGGTAGATAAGCACGGG +s rheMac2.chr7 69864965 114 + 169801366 AGGCTTATCTCTGATCCTTGGCATTTCTGTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCC----GCCCAGGTG-----G--CTCTTC--TGCAGGTAGATAAGCATGGG +s bosTau2.scaffold2397 93301 123 + 117874 AAGCTTATCTCTGACCTTTGGCATTCCTGTGTGTGGACAGATTGCAAGAGCAGCCTCT-GCCCAGGCTTACGGGGACCTGCTGCCTCGGTAGAAATG-CGCCTCCTCTGTAGGCAGATAAGCCCT-- +s canFam2.chr3 56030828 121 + 94715083 CAACTTATCTTTGACCTTCGGCATTTCTATATCTGGATGGATCCTAAGTGCAGCCTCCAGCCTAGACTTCCAGGACCC----ACCCTGGGA-AGATG-CCCCTCCTGTGTGGGCAGATAAATGTTGG +s echTel1.scaffold_304651 799 118 - 10007 ATGACAATCT--GACCTTTGACATT--TGTTTTAGGATAGGTTCCAAGTGAAGCCTCCTGCCTAGACTTCCTGATTCT-----CCCAGATAGAAGCGCCCCCTTCTTGGAAGACAGATAAGCGATAA + diff --git a/test_data/maf_tests/chunk1/000000004.maf b/test_data/maf_tests/chunk1/000000004.maf new file mode 100644 index 00000000..c825ddd6 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000004.maf @@ -0,0 +1,10 @@ +##maf version=1 +a score=30120.0 +s mm8.chr7 80082713 54 + 145134094 CAA-------ACCAAAGGCAGCCTGT-GCTTCCAGAAAACCTT-GAGGGGTGCAAGAGATAAA +s rn4.chr1 136012147 54 + 267910886 CAA-------ACCAGAGGCAGCCTAC-GTTTCCAGAAAACCTT-GAGGGGTACAAGAGATAAA +s hg18.chr15 88557981 62 + 100338915 CAACCAGCTTATCTGAACCAGCCCTT-GCTTCCAGAGAACTATGGAAAAATCCAAAAGATAAG +s panTro2.chr15 87960226 62 + 100063422 CAACCAGCTTATCTGAACCAGCCCTT-GCTTCCAGAGAACTATGGAAAAATCCAAAAGATAAG +s rheMac2.chr7 69865079 62 + 169801366 CAACCAGCTTATCTGAACCAGCCCTC-GTTTCCAGGTAACTCTGGAAAAATCCAAAAGATGAG +s canFam2.chr3 56030949 40 + 94715083 -------CATATTTGACCCAGCCCTTGGCTTTCAGAAAACC------------ACAA----AG +s echTel1.scaffold_304651 917 55 - 10007 CAA-------ATTCCATCCCACCCTT-CGTTCTGGACGGGCTGGGAGGGGTACAAAAGATAAA + diff --git a/test_data/maf_tests/chunk1/000000005.maf b/test_data/maf_tests/chunk1/000000005.maf new file mode 100644 index 00000000..cdb331f1 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000005.maf @@ -0,0 +1,10 @@ +##maf version=1 +a score=58255.0 +s mm8.chr7 80082767 128 + 145134094 GGGGTGCAGGAGCTGTG----TGTCTTGATCTCCCAGA----GTCTTCGTGAGCCT-----------CACTTTTTGTCTTATCCCT---GTGATACACACAGG-AAGCCACAGTGAATTCAGTGGGTGTCAT---------ACAGAAGGGCCTCC-TGGAG- +s rn4.chr1 136012201 139 + 267910886 GGGGTACAGGAGCTGTG----TG-CTTGATGTCGCTGA----GCCTTCGTGAGGCTCCTGTGAGCTGCACTTTTTGTCTCGTCCCT---GTGATAGACACAAG-AAGCCACAGTGAATTCAGTGGGTATCAT---------ATGGAAGGGCCTCCTTGGAC- +s hg18.chr15 88558043 143 + 100338915 AAGGGACCGCAG-TGTC----TGTCTTGGTCTCAC--------TCCTCTTGAGACTCCTGTGAT---CTTTATATGTCTCATTCCTCCCGTGACATGTATGAG-AAACTGCAGCTCATTGAGACGATGTCTCTGCTGCCTGACAGAAGGGCCTAC-TTGAG- +s panTro2.chr15 87960288 143 + 100063422 AAGGGACCGCAG-TGTC----TGTCTTGGTCTCAC--------TCCTCTTGAGACTCCTGTGAT---CTTTATATGTCTCATTCCTCCCGTGACATGTATGAG-AAACTGCAGCTCATTGAGACGATGTCTCTGCTGCCTGACAGAAGGGCCTAC-TTGAG- +s rheMac2.chr7 69865141 147 + 169801366 GAGGGACCACAG-TGTCTGTTTGTCCTGGTCTCAC--------TCCTCATGAGACTCCTGTGAT---CTTTGTATGTCTCATTCCTCCTGTGACATGTATGAG-AATGTACAGCTCAGTGAGATGATGTCTCTGCTGCCTGACAGAAGTGCCTAC-TTGAG- +s bosTau2.scaffold2397 93775 133 + 117874 GGACTGCAGTGGCCATT----TGCTCTGGCCTCACTGA----CTCCTTGTGAGCCCGCTGTGAG---TTTTGTTT---TCATTATCCCCAT------TATGAGAAAACTCCAGTTTGGTGAGATGGCATCTACCCTGCCCT--------ACAAAC-ATGgtg +s canFam2.chr3 56030989 153 + 94715083 GGGATGTGGAAGACGTT----TGCCCTCGTCTCACAGACTCCCTCCTTGTAAGGCTGCTGGGAG---TCATATTTTGCTCATTATCCCTGCGGTATGTATGAG-AAGCCAAAGGTCAGTGAGCTGGAGTTTGCACTGCCCTCCAGAGGGACCGAC-ATGgtg + diff --git a/test_data/maf_tests/chunk1/000000006.maf b/test_data/maf_tests/chunk1/000000006.maf new file mode 100644 index 00000000..cbe64d87 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000006.maf @@ -0,0 +1,8 @@ +##maf version=1 +a score=2607.0 +s mm8.chr7 80082895 114 + 145134094 CTTCTCAGAGTGTAGT-----------CCTTGGGCTACC-TCCTCCTAAGTCACTGGG-----------------------AGCTGGTCA-AGAGG------CTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCT--------GGAGTCCGGGTGATGTT +s rn4.chr1 136012340 112 + 267910886 CTTCTCAGA--GTAGT-----------CCTTGGGCCACC-TCCTTCTAAGTTACTGAG-----------------------AGCTGGTCA-AGAGG------CTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCT--------GGAGTCAAGGTACTGTT +s rheMac2.chr7 69865323 119 + 169801366 CTTCTTGTTGACTAGTGTCACCCCCACCCGAGGGCTTCCTTCCTCATTTGCTGCCAGGTGTAAAGCTGAGCTTC-------agctgggcgcagtgg------ctcacacccataatcctagca--ttttgggag------------------------------ +s bosTau2.scaffold2397 93908 136 + 117874 cttctcaaagtgtgct-----------ccatgagcctcc-tacttcagaatcccctgg---------gagattcaaaaccttgcatgttc-tcaggccccatcacgggccagcatcgtcagagtcttcagggtcagctcgtggatctagagtgtaggt------ +s canFam2.chr3 56031142 126 + 94715083 cttttcagagggtggt-----------ccctgggcctcc-cactttggaattgcctgg---------gag-ctcatagaattgcccgttg-tcagg--ccatcccagggcagtggcagcag-gcctctagggcaggcct------------ttcaggtgacttt + diff --git a/test_data/maf_tests/chunk1/000000007.maf b/test_data/maf_tests/chunk1/000000007.maf new file mode 100644 index 00000000..94f735f6 --- /dev/null +++ b/test_data/maf_tests/chunk1/000000007.maf @@ -0,0 +1,5 @@ +##maf version=1 +a score=8132.0 +s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC +s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC + diff --git a/test_data/maf_tests/chunk1/intervals.txt b/test_data/maf_tests/chunk1/intervals.txt new file mode 100644 index 00000000..314ea263 --- /dev/null +++ b/test_data/maf_tests/chunk1/intervals.txt @@ -0,0 +1,8 @@ +80082334 80082368 +80082368 80082471 +80082471 80082592 +80082592 80082713 +80082713 80082767 +80082767 80082895 +80082895 80083009 +80083009 80083156 diff --git a/test_data/maf_tests/chunk1000/000000000.maf b/test_data/maf_tests/chunk1000/000000000.maf new file mode 100644 index 00000000..d21c5676 --- /dev/null +++ b/test_data/maf_tests/chunk1000/000000000.maf @@ -0,0 +1,76 @@ +##maf version=1 +a score=10542.0 +s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA- +s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA- +s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG- +s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA- +s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA- +s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA- +s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG- +s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA- +s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA +s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA + +a score=-33148.0 +s mm8.chr7 80082368 103 + 145134094 TGAGAGGGCATGCT-GTGAAGGGACTGTGCT---CAGTTCAAGGCATAGTCCACTTCC--------CTTCCCTTGGTCATTCTGTTCGGTGTGTTTCCAGCAGATATGGAGAGT-------------------------------------C---- +s rn4.chr1 136011819 86 + 267910886 TGAGAGGGCATGTT-ATGAAGGCACTGTGCT--------------------CACTTTC--------CATCCCATGGTCATTCTGTTGAGTGTGTTCCCAGCAGATACGGAAAGT-------------------------------------C---- +s oryCun1.scaffold_199771 14064 74 - 75077 TAGGACTGCCTGGTGGGGGGGGCCCTGCACC--------------------TACTTCTGCAAGGCACGTCCCGCG----------TCTGTGCCTTCGCCGCA-----------T-------------------------------------C---- +s hg18.chr15 88557607 128 + 100338915 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTCTACTCCCAGCATGGCTGGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC---- +s panTro2.chr15 87959864 116 + 100063422 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTA------------GGCTAGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC---- +s rheMac2.chr7 69864742 107 + 169801366 GGAGAAAGCCTGGT-TAAGGGGCCCTTCA-----CTCTCCAAGGCACATTCCACTTTC--------TGTCCCTTTGTCATTCCATTCACTCTACTCCCCGCATGGCTAGAGGGC----------------------TGG---------AGGC---- +s canFam2.chr3 56030609 103 + 94715083 AGGGAATGCATGGTGTATGGGGGCCCCCGTC--------------------CACTTC---------TGTCCCGTTGCTATTTCCTTGACCATACTTCCAGTATGACTGGGGGAG---GTGCGG---TGGAGCAGGTTC------------------ +s loxAfr1.scaffold_8298 30302 144 + 78952 --TGGATGCCTGGT-TTAAGGATCC-GCTCACCCACTTCTGAGTCACGTTACACTTTC--------TGCCCCTTTGCCATTTCATTTATGGTACTCCCAACACCGGGGGAGGGTGCGCTTTGGTTCTTGAGCAGTTTGTGTATATAGGGGGCTGAG +s echTel1.scaffold_304651 631 67 - 10007 --TGGAGGGCTACT-TTAAGAAACC----CTCCCGTTTCTCAG-------------CC--------TGCTTC---------------------------------------------CTTTGGGTTTGAGGTACTTTGT----------------G + +a score=87527.0 +s mm8.chr7 80082471 121 + 145134094 CTG-AGC---------------CGCTGGCCCCTGGGCTTCCCCTCCAGCCTGGCTTGACTTTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CCCAGGCTGAAGTGGAGGGGGTGTTGAGCTGCCACCTGGGACTT +s rn4.chr1 136011905 121 + 267910886 TCG-GAC---------------CGCTGGCACCCAGGCTTCCCCTCCAGCCTGGCCTGACTCTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CGCAGGCAAAAGTGGAGGGGATGTTGAGCTGCCACCTGGAACTT +s oryCun1.scaffold_199771 14138 103 - 75077 CCGCAGT---------------GGATCCCACCTCGGCTGTAGCAGTAGGCCAACCAGG----GCCCGACAGGCGCCCGGCTGTGCTGGCTTCCA-CACCCTCTCCCAGGC---------------------CTGCCACCCAGGC--- +s hg18.chr15 88557735 127 + 100338915 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG +s panTro2.chr15 87959980 127 + 100063422 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGTGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG +s rheMac2.chr7 69864849 116 + 169801366 CTG-GGCTGAACCAGGGGCT--GGCTGGTCTGCAG----------------GGCTGCACTCTGTCTATAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTTCCAGTCGTAAGTGGG------GTTGAGCTGCCACCTGGGACTG +s bosTau2.scaffold2397 93191 110 + 117874 CTG-GGC---------------AGCTGGCGCCTCGGCTGCCCCTCCCACCTGGCT-------------GTGACCCTTGGCAAG-TCTCCCCGCCCCCCATGCCCCCAGGCCTGAGCAAG------GCTGAGCTGCCACCT-GGACTA +s canFam2.chr3 56030712 116 + 94715083 TCT-AGC---------------AGCTGGCGCCCCAGCTGTCCTTCCAACCTGGCTGTGCTCTGTCTACGTGACCTTTGGCAGA-TTGCCACTCC-------CTCCCAGGCCCGAGCAGG------GCCAAGCTGCCACCT-GGATGG +s loxAfr1.scaffold_8298 30446 129 + 78952 CTG-AAC-----CAGGGACTGCAGCTAGTGCCTGGGCCACCGCTCCAGCCTGGCTGTGCTCTGTCTACAGGACGCATGGCAAG-TTGCCACCCC----CCTCTCCCAGG-CTAGGTGGG------GCTAAGCTGCCACTTGAAACTT +s echTel1.scaffold_304651 698 101 - 10007 CTG-GAC-----CAGGAACTGCAGCT---------GCTGCCCCTCTAGCCTACCTGTGC---------------CTTGGCAGG-TTGCCAGCCC-------CTCCCAGGCCTAGGTGGG------GTGACGCTGCCTCCTGGGAC-- + +a score=185399.0 +s mm8.chr7 80082592 121 + 145134094 GTGCTTATCTCGGACTCTTGGCATTTCTGTTTCTGGACAGAACCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCC----ACCCAGGTGGAAATG--CCCTCCGGTGCAGGCAGATAAGCTCTGG +s rn4.chr1 136012026 121 + 267910886 GTGCTTATCTTGGCCTCTTGGCATTTCTGTATCTGGACAGAATCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCC----ACCCAGGTGGAAATG--CCCTCCGGAGCAGGCAGATAAGCTCTGG +s oryCun1.scaffold_199771 14241 119 - 75077 ---CTTATCTCCGACTGCTGGCATTGCTGTGTCTGGGCAGAGGCCAAGGGCGGCCTCCCGCACAGACACTCGGGGCCC----GCCCAGGTAGAAGTG-CCCCTCCTGTGCAGGCAGATAAGCGCTGG +s hg18.chr15 88557862 119 + 100338915 AGGCTTATCTCTGACTCTTGGCATTTCTTTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCT----GCCCAGGTGGAAACG--CTCTTT--TGCAGGTAGATAAGCACGGG +s panTro2.chr15 87960107 119 + 100063422 AGGCTTATCTCTGACTCTTGGCATTTCTTTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCT----GCCCAGGTGGAAACG--CTCTTT--TGCAGGTAGATAAGCACGGG +s rheMac2.chr7 69864965 114 + 169801366 AGGCTTATCTCTGATCCTTGGCATTTCTGTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCC----GCCCAGGTG-----G--CTCTTC--TGCAGGTAGATAAGCATGGG +s bosTau2.scaffold2397 93301 123 + 117874 AAGCTTATCTCTGACCTTTGGCATTCCTGTGTGTGGACAGATTGCAAGAGCAGCCTCT-GCCCAGGCTTACGGGGACCTGCTGCCTCGGTAGAAATG-CGCCTCCTCTGTAGGCAGATAAGCCCT-- +s canFam2.chr3 56030828 121 + 94715083 CAACTTATCTTTGACCTTCGGCATTTCTATATCTGGATGGATCCTAAGTGCAGCCTCCAGCCTAGACTTCCAGGACCC----ACCCTGGGA-AGATG-CCCCTCCTGTGTGGGCAGATAAATGTTGG +s echTel1.scaffold_304651 799 118 - 10007 ATGACAATCT--GACCTTTGACATT--TGTTTTAGGATAGGTTCCAAGTGAAGCCTCCTGCCTAGACTTCCTGATTCT-----CCCAGATAGAAGCGCCCCCTTCTTGGAAGACAGATAAGCGATAA + +a score=30120.0 +s mm8.chr7 80082713 54 + 145134094 CAA-------ACCAAAGGCAGCCTGT-GCTTCCAGAAAACCTT-GAGGGGTGCAAGAGATAAA +s rn4.chr1 136012147 54 + 267910886 CAA-------ACCAGAGGCAGCCTAC-GTTTCCAGAAAACCTT-GAGGGGTACAAGAGATAAA +s hg18.chr15 88557981 62 + 100338915 CAACCAGCTTATCTGAACCAGCCCTT-GCTTCCAGAGAACTATGGAAAAATCCAAAAGATAAG +s panTro2.chr15 87960226 62 + 100063422 CAACCAGCTTATCTGAACCAGCCCTT-GCTTCCAGAGAACTATGGAAAAATCCAAAAGATAAG +s rheMac2.chr7 69865079 62 + 169801366 CAACCAGCTTATCTGAACCAGCCCTC-GTTTCCAGGTAACTCTGGAAAAATCCAAAAGATGAG +s canFam2.chr3 56030949 40 + 94715083 -------CATATTTGACCCAGCCCTTGGCTTTCAGAAAACC------------ACAA----AG +s echTel1.scaffold_304651 917 55 - 10007 CAA-------ATTCCATCCCACCCTT-CGTTCTGGACGGGCTGGGAGGGGTACAAAAGATAAA + +a score=58255.0 +s mm8.chr7 80082767 128 + 145134094 GGGGTGCAGGAGCTGTG----TGTCTTGATCTCCCAGA----GTCTTCGTGAGCCT-----------CACTTTTTGTCTTATCCCT---GTGATACACACAGG-AAGCCACAGTGAATTCAGTGGGTGTCAT---------ACAGAAGGGCCTCC-TGGAG- +s rn4.chr1 136012201 139 + 267910886 GGGGTACAGGAGCTGTG----TG-CTTGATGTCGCTGA----GCCTTCGTGAGGCTCCTGTGAGCTGCACTTTTTGTCTCGTCCCT---GTGATAGACACAAG-AAGCCACAGTGAATTCAGTGGGTATCAT---------ATGGAAGGGCCTCCTTGGAC- +s hg18.chr15 88558043 143 + 100338915 AAGGGACCGCAG-TGTC----TGTCTTGGTCTCAC--------TCCTCTTGAGACTCCTGTGAT---CTTTATATGTCTCATTCCTCCCGTGACATGTATGAG-AAACTGCAGCTCATTGAGACGATGTCTCTGCTGCCTGACAGAAGGGCCTAC-TTGAG- +s panTro2.chr15 87960288 143 + 100063422 AAGGGACCGCAG-TGTC----TGTCTTGGTCTCAC--------TCCTCTTGAGACTCCTGTGAT---CTTTATATGTCTCATTCCTCCCGTGACATGTATGAG-AAACTGCAGCTCATTGAGACGATGTCTCTGCTGCCTGACAGAAGGGCCTAC-TTGAG- +s rheMac2.chr7 69865141 147 + 169801366 GAGGGACCACAG-TGTCTGTTTGTCCTGGTCTCAC--------TCCTCATGAGACTCCTGTGAT---CTTTGTATGTCTCATTCCTCCTGTGACATGTATGAG-AATGTACAGCTCAGTGAGATGATGTCTCTGCTGCCTGACAGAAGTGCCTAC-TTGAG- +s bosTau2.scaffold2397 93775 133 + 117874 GGACTGCAGTGGCCATT----TGCTCTGGCCTCACTGA----CTCCTTGTGAGCCCGCTGTGAG---TTTTGTTT---TCATTATCCCCAT------TATGAGAAAACTCCAGTTTGGTGAGATGGCATCTACCCTGCCCT--------ACAAAC-ATGgtg +s canFam2.chr3 56030989 153 + 94715083 GGGATGTGGAAGACGTT----TGCCCTCGTCTCACAGACTCCCTCCTTGTAAGGCTGCTGGGAG---TCATATTTTGCTCATTATCCCTGCGGTATGTATGAG-AAGCCAAAGGTCAGTGAGCTGGAGTTTGCACTGCCCTCCAGAGGGACCGAC-ATGgtg + +a score=2607.0 +s mm8.chr7 80082895 114 + 145134094 CTTCTCAGAGTGTAGT-----------CCTTGGGCTACC-TCCTCCTAAGTCACTGGG-----------------------AGCTGGTCA-AGAGG------CTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCT--------GGAGTCCGGGTGATGTT +s rn4.chr1 136012340 112 + 267910886 CTTCTCAGA--GTAGT-----------CCTTGGGCCACC-TCCTTCTAAGTTACTGAG-----------------------AGCTGGTCA-AGAGG------CTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCT--------GGAGTCAAGGTACTGTT +s rheMac2.chr7 69865323 119 + 169801366 CTTCTTGTTGACTAGTGTCACCCCCACCCGAGGGCTTCCTTCCTCATTTGCTGCCAGGTGTAAAGCTGAGCTTC-------agctgggcgcagtgg------ctcacacccataatcctagca--ttttgggag------------------------------ +s bosTau2.scaffold2397 93908 136 + 117874 cttctcaaagtgtgct-----------ccatgagcctcc-tacttcagaatcccctgg---------gagattcaaaaccttgcatgttc-tcaggccccatcacgggccagcatcgtcagagtcttcagggtcagctcgtggatctagagtgtaggt------ +s canFam2.chr3 56031142 126 + 94715083 cttttcagagggtggt-----------ccctgggcctcc-cactttggaattgcctgg---------gag-ctcatagaattgcccgttg-tcagg--ccatcccagggcagtggcagcag-gcctctagggcaggcct------------ttcaggtgacttt + +a score=8132.0 +s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC +s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC + diff --git a/test_data/maf_tests/chunk1000/intervals.txt b/test_data/maf_tests/chunk1000/intervals.txt new file mode 100644 index 00000000..0d6af52e --- /dev/null +++ b/test_data/maf_tests/chunk1000/intervals.txt @@ -0,0 +1 @@ +80082334 80083156 From 2efd3074e748156aec9bb29d7f129952cc3bdaab Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 17:37:53 +0200 Subject: [PATCH 08/68] fix syntheny right in slice --- lib/bx/align/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index b42d3421..476144a5 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -337,7 +337,7 @@ def slice(self, start, end): if start == 0: new.synteny_left = self.synteny_left if self.synteny_right: - if end == self.size: + if end == len(self.text): new.synteny_right = self.synteny_right return new From 63e7cee92cf517678764bf1ed403643f80483e2f Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 17:54:13 +0200 Subject: [PATCH 09/68] fix maf_extract_ranges_indexed for 'e' lines with size 0 --- scripts/maf_extract_ranges_indexed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/maf_extract_ranges_indexed.py b/scripts/maf_extract_ranges_indexed.py index 5103b19f..ee60f58c 100755 --- a/scripts/maf_extract_ranges_indexed.py +++ b/scripts/maf_extract_ranges_indexed.py @@ -103,7 +103,7 @@ def main(): if sliced.get_component_by_src(src).size < 1: continue # Keep only components that are not empty - sliced.components = [c for c in sliced.components if c.size > 0] + sliced.components = [c for c in sliced.components if c.size > 0 or c.empty] # Reverse complement if needed if strand is not None and ref.strand != strand: sliced = sliced.reverse_complement() From 9a1e39042ac03b9f73494ff0c266ee579c4ec6ac Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 17:54:28 +0200 Subject: [PATCH 10/68] add note in maf_extract_ranges --- scripts/maf_extract_ranges.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/maf_extract_ranges.py b/scripts/maf_extract_ranges.py index 388eb791..93345a73 100755 --- a/scripts/maf_extract_ranges.py +++ b/scripts/maf_extract_ranges.py @@ -9,6 +9,9 @@ NOTE: chromosome/src information in the MAF is ignored by this variant. +NOTE: if a single alignment in a block become empty during slicing, the block + is ignored. + usage: %prog interval_file refindex [options] < maf_file -m, --mincols=10: Minimum length (columns) required for alignment to be output """ From 3fc97b296065babf86c4b323a31e90ec5df59ffd Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 17:54:55 +0200 Subject: [PATCH 11/68] add tests for maf_extract_ranges --- .../maf_extract_ranges_indexed_tests.py | 6 +++ script_tests/maf_extract_ranges_tests.py | 16 +++++++ test_data/maf_tests/mm10_chr12.bed | 1 + test_data/maf_tests/mm10_chr12_slice.maf | 47 +++++++++++++++++++ test_data/maf_tests/mm10_chr12_slice2.maf | 37 +++++++++++++++ .../maf_tests/mm8_chr7_tiny_no_index.maf | 1 + 6 files changed, 108 insertions(+) create mode 100644 script_tests/maf_extract_ranges_tests.py create mode 100644 test_data/maf_tests/mm10_chr12.bed create mode 100644 test_data/maf_tests/mm10_chr12_slice.maf create mode 100644 test_data/maf_tests/mm10_chr12_slice2.maf create mode 120000 test_data/maf_tests/mm8_chr7_tiny_no_index.maf diff --git a/script_tests/maf_extract_ranges_indexed_tests.py b/script_tests/maf_extract_ranges_indexed_tests.py index 15bead54..ae89cf70 100644 --- a/script_tests/maf_extract_ranges_indexed_tests.py +++ b/script_tests/maf_extract_ranges_indexed_tests.py @@ -24,3 +24,9 @@ class TestAccessNotRefNotIndexed(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p hg18." input_stdin = base.TestFile(filename="./test_data/maf_tests/hg18.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/empty.maf") + + +class TestELines(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm10_chr12_lessspe.maf -c -m 5 -p mm10." + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12.bed") + output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_slice.maf") diff --git a/script_tests/maf_extract_ranges_tests.py b/script_tests/maf_extract_ranges_tests.py new file mode 100644 index 00000000..3c6d80d9 --- /dev/null +++ b/script_tests/maf_extract_ranges_tests.py @@ -0,0 +1,16 @@ +import unittest + +import base + + +class Test(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_extract_ranges.py ${inverval_file} 0" + input_inverval_file = base.TestFile("80082367 80083066") + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") + output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") + +class TestElines(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_extract_ranges.py ${inverval_file} 0 -m 5" + input_inverval_file = base.TestFile("56694985 56695040") + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") + output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_slice2.maf") diff --git a/test_data/maf_tests/mm10_chr12.bed b/test_data/maf_tests/mm10_chr12.bed new file mode 100644 index 00000000..c3001f2c --- /dev/null +++ b/test_data/maf_tests/mm10_chr12.bed @@ -0,0 +1 @@ +chr12 56694985 56695040 \ No newline at end of file diff --git a/test_data/maf_tests/mm10_chr12_slice.maf b/test_data/maf_tests/mm10_chr12_slice.maf new file mode 100644 index 00000000..9687bf67 --- /dev/null +++ b/test_data/maf_tests/mm10_chr12_slice.maf @@ -0,0 +1,47 @@ +##maf version=1 +a score=-247111.0 +s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA +s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- +i micMur1.scaffold_1897 I 1 C 0 +s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- +i tupBel1.scaffold_149545.1-136892 I 1 C 0 +s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- +i pteVam1.scaffold_182 I 1 I 2 +s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- +i eriEur1.scaffold_370206 I 1 I 2 +s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- +i sorAra1.scaffold_233549 I 1 I 2 +s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +a score=36657.0 +s mm10.chr12 56695027 7 + 120129022 CTGGCCT +s hetGla2.JH602151 6912286 6 - 7060556 GCGGAC- +i hetGla2.JH602151 C 0 I 1 +s micMur1.scaffold_1897 109353 7 + 248211 GTGACCC +i micMur1.scaffold_1897 C 0 C 0 +s tupBel1.scaffold_149545.1-136892 2350 7 + 136892 GCGGCTA +i tupBel1.scaffold_149545.1-136892 C 0 C 0 +s bosTau7.chr21 47705765 7 + 69078422 GGCCTCC +i bosTau7.chr21 I 182 C 0 +s pteVam1.scaffold_182 42298 4 + 455609 --AG-CT +i pteVam1.scaffold_182 C 0 C 0 +s eriEur1.scaffold_370206 11282 7 + 65867 TGAGCCC +i eriEur1.scaffold_370206 C 0 C 0 +s sorAra1.scaffold_233549 622 7 + 65803 CCTGGCC +i sorAra1.scaffold_233549 C 0 C 0 +e loxAfr3.scaffold_9 12283188 0 + 83325590 C + +a score=99018.0 +s mm10.chr12 56695034 6 + 120129022 CTGCTA +s hetGla2.JH602151 6912293 6 - 7060556 TTGCCT +s micMur1.scaffold_1897 109360 6 + 248211 CTGCCT +s tupBel1.scaffold_149545.1-136892 2357 6 + 136892 CTGCCT +s bosTau7.chr21 47705772 6 + 69078422 ATCTCT +s pteVam1.scaffold_182 42302 6 + 455609 CTGCCT +s eriEur1.scaffold_370206 11289 6 + 65867 CTGCCT +s sorAra1.scaffold_233549 629 6 + 65803 CTGCTG + diff --git a/test_data/maf_tests/mm10_chr12_slice2.maf b/test_data/maf_tests/mm10_chr12_slice2.maf new file mode 100644 index 00000000..8f44acc1 --- /dev/null +++ b/test_data/maf_tests/mm10_chr12_slice2.maf @@ -0,0 +1,37 @@ +##maf version=1 +a score=-247111.0 +s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA +s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- +i micMur1.scaffold_1897 I 1 C 0 +s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- +i tupBel1.scaffold_149545.1-136892 I 1 C 0 +s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- +i pteVam1.scaffold_182 I 1 I 2 +s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- +i eriEur1.scaffold_370206 I 1 I 2 +s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- +i sorAra1.scaffold_233549 I 1 I 2 +s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +a score=36657.0 +s mm10.chr12 56695027 7 + 120129022 CTGGCCT +s hetGla2.JH602151 6912286 6 - 7060556 GCGGAC- +i hetGla2.JH602151 C 0 I 1 +s micMur1.scaffold_1897 109353 7 + 248211 GTGACCC +i micMur1.scaffold_1897 C 0 C 0 +s tupBel1.scaffold_149545.1-136892 2350 7 + 136892 GCGGCTA +i tupBel1.scaffold_149545.1-136892 C 0 C 0 +s bosTau7.chr21 47705765 7 + 69078422 GGCCTCC +i bosTau7.chr21 I 182 C 0 +s pteVam1.scaffold_182 42298 4 + 455609 --AG-CT +i pteVam1.scaffold_182 C 0 C 0 +s eriEur1.scaffold_370206 11282 7 + 65867 TGAGCCC +i eriEur1.scaffold_370206 C 0 C 0 +s sorAra1.scaffold_233549 622 7 + 65803 CCTGGCC +i sorAra1.scaffold_233549 C 0 C 0 +e loxAfr3.scaffold_9 12283188 0 + 83325590 C + diff --git a/test_data/maf_tests/mm8_chr7_tiny_no_index.maf b/test_data/maf_tests/mm8_chr7_tiny_no_index.maf new file mode 120000 index 00000000..ca89cabf --- /dev/null +++ b/test_data/maf_tests/mm8_chr7_tiny_no_index.maf @@ -0,0 +1 @@ +mm8_chr7_tiny.maf \ No newline at end of file From 69e06ed05c1d3bea59b4954d14eb5e5001a4bd2c Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 18:14:55 +0200 Subject: [PATCH 12/68] add test for limit_to_species --- script_tests/maf_limit_to_species_tests.py | 15 +++++++++ ...0_chr12_lessspe_onlymouse_cow_elephant.maf | 31 +++++++++++++++++ .../mm8_chr7_tiny_only_mouse_rat.maf | 33 +++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 script_tests/maf_limit_to_species_tests.py create mode 100644 test_data/maf_tests/mm10_chr12_lessspe_onlymouse_cow_elephant.maf create mode 100644 test_data/maf_tests/mm8_chr7_tiny_only_mouse_rat.maf diff --git a/script_tests/maf_limit_to_species_tests.py b/script_tests/maf_limit_to_species_tests.py new file mode 100644 index 00000000..f352fb96 --- /dev/null +++ b/script_tests/maf_limit_to_species_tests.py @@ -0,0 +1,15 @@ +import unittest + +import base + +class Test1(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_limit_to_species.py mm8,rn4" + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") + output_stdout = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_only_mouse_rat.maf") + + +class TestWithE(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_limit_to_species.py mm10,bosTau7,loxAfr3" + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") + output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe_onlymouse_cow_elephant.maf") + diff --git a/test_data/maf_tests/mm10_chr12_lessspe_onlymouse_cow_elephant.maf b/test_data/maf_tests/mm10_chr12_lessspe_onlymouse_cow_elephant.maf new file mode 100644 index 00000000..a7c51fab --- /dev/null +++ b/test_data/maf_tests/mm10_chr12_lessspe_onlymouse_cow_elephant.maf @@ -0,0 +1,31 @@ +##maf version=1 +a score=-22139.0 +s mm10.chr12 56694975 11 + 120129022 GTCCTGTGCTC-- +s loxAfr3.scaffold_9 12283154 10 + 83325590 CGG---TGCGATC +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +a score=-247111.0 +s mm10.chr12 56694986 40 + 120129022 CTCTTTAGTCTGGTTTTTTAATTTTTTTTTCC-TCACTGCA +s loxAfr3.scaffold_9 12283164 24 + 83325590 ---------GCTTGTAGTTAAGTTCTC---GGTACA----- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +a score=18354.0 +s mm10.chr12 56695026 1 + 120129022 C +e bosTau7.chr21 47705583 182 + 69078422 I +e loxAfr3.scaffold_9 12283188 0 + 83325590 C + +a score=36657.0 +s mm10.chr12 56695027 7 + 120129022 CTGGCCT +s bosTau7.chr21 47705765 7 + 69078422 GGCCTCC +i bosTau7.chr21 I 182 C 0 +e loxAfr3.scaffold_9 12283188 0 + 83325590 C + +a score=99018.0 +s mm10.chr12 56695034 37 + 120129022 CTGCTAGGATCCTGCTTGACTGAGGTTTAGCCCCTC-A +s bosTau7.chr21 47705772 33 + 69078422 ATCTCTGGCTCCT-----GCTCgtgtttagtcgctcag +i bosTau7.chr21 C 0 C 0 +s loxAfr3.scaffold_9 12283188 30 + 83325590 ------TTAGCCTGCTTGGCGAGGGTTCGCCC-TGA-G +i loxAfr3.scaffold_9 C 0 C 0 + diff --git a/test_data/maf_tests/mm8_chr7_tiny_only_mouse_rat.maf b/test_data/maf_tests/mm8_chr7_tiny_only_mouse_rat.maf new file mode 100644 index 00000000..1bd8befb --- /dev/null +++ b/test_data/maf_tests/mm8_chr7_tiny_only_mouse_rat.maf @@ -0,0 +1,33 @@ +##maf version=1 +a score=10542.0 +s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGCAGGGATGGAGGGCGGTCCCAGCA +s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGCAGGGACGGAGGGCGGTCCCAGCA + +a score=-33148.0 +s mm8.chr7 80082368 103 + 145134094 TGAGAGGGCATGCTGTGAAGGGACTGTGCTCAGTTCAAGGCATAGTCCACTTCCCTTCCCTTGGTCATTCTGTTCGGTGTGTTTCCAGCAGATATGGAGAGTC +s rn4.chr1 136011819 86 + 267910886 TGAGAGGGCATGTTATGAAGGCACTGTGCT-----------------CACTTTCCATCCCATGGTCATTCTGTTGAGTGTGTTCCCAGCAGATACGGAAAGTC + +a score=87527.0 +s mm8.chr7 80082471 121 + 145134094 CTGAGCCGCTGGCCCCTGGGCTTCCCCTCCAGCCTGGCTTGACTTTGTCTGAGGGACCCTGGGCAGCTTGCCATCCACCCAGGCTGAAGTGGAGGGGGTGTTGAGCTGCCACCTGGGACTT +s rn4.chr1 136011905 121 + 267910886 TCGGACCGCTGGCACCCAGGCTTCCCCTCCAGCCTGGCCTGACTCTGTCTGAGGGACCCTGGGCAGCTTGCCATCCACGCAGGCAAAAGTGGAGGGGATGTTGAGCTGCCACCTGGAACTT + +a score=185399.0 +s mm8.chr7 80082592 121 + 145134094 GTGCTTATCTCGGACTCTTGGCATTTCTGTTTCTGGACAGAACCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCCACCCAGGTGGAAATGCCCTCCGGTGCAGGCAGATAAGCTCTGG +s rn4.chr1 136012026 121 + 267910886 GTGCTTATCTTGGCCTCTTGGCATTTCTGTATCTGGACAGAATCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCCACCCAGGTGGAAATGCCCTCCGGAGCAGGCAGATAAGCTCTGG + +a score=30120.0 +s mm8.chr7 80082713 54 + 145134094 CAAACCAAAGGCAGCCTGTGCTTCCAGAAAACCTTGAGGGGTGCAAGAGATAAA +s rn4.chr1 136012147 54 + 267910886 CAAACCAGAGGCAGCCTACGTTTCCAGAAAACCTTGAGGGGTACAAGAGATAAA + +a score=58255.0 +s mm8.chr7 80082767 128 + 145134094 GGGGTGCAGGAGCTGTGTGTCTTGATCTCCCAGAGTCTTCGTGAGCCT-----------CACTTTTTGTCTTATCCCTGTGATACACACAGGAAGCCACAGTGAATTCAGTGGGTGTCATACAGAAGGGCCTCC-TGGAG +s rn4.chr1 136012201 139 + 267910886 GGGGTACAGGAGCTGTGTG-CTTGATGTCGCTGAGCCTTCGTGAGGCTCCTGTGAGCTGCACTTTTTGTCTCGTCCCTGTGATAGACACAAGAAGCCACAGTGAATTCAGTGGGTATCATATGGAAGGGCCTCCTTGGAC + +a score=2607.0 +s mm8.chr7 80082895 114 + 145134094 CTTCTCAGAGTGTAGTCCTTGGGCTACCTCCTCCTAAGTCACTGGGAGCTGGTCAAGAGGCTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCTGGAGTCCGGGTGATGTT +s rn4.chr1 136012340 112 + 267910886 CTTCTCAGA--GTAGTCCTTGGGCCACCTCCTTCTAAGTTACTGAGAGCTGGTCAAGAGGCTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCTGGAGTCAAGGTACTGTT + +a score=8132.0 +s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC +s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC + From ab57496d41353f7f9094b0dcb6e8eb017bdd2b63 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 18:27:49 +0200 Subject: [PATCH 13/68] add maf_select_tests --- script_tests/maf_select_tests.py | 55 ++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 script_tests/maf_select_tests.py diff --git a/script_tests/maf_select_tests.py b/script_tests/maf_select_tests.py new file mode 100644 index 00000000..a7f84ecd --- /dev/null +++ b/script_tests/maf_select_tests.py @@ -0,0 +1,55 @@ +import unittest + +import base + + +class Test(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_select.py ${features}" + input_features = base.TestFile("""0 + 0 + 0 + 0 + 0 + 0 + 0 + 1""") + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") + output_stdout = base.TestFile("""##maf version=1 +a score=8132.0 +s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC +s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC + +""") + + +class TestWithE(base.BaseScriptTest, unittest.TestCase): + command_line = "./scripts/maf_select.py ${features}" + input_features = base.TestFile("""0 + 1 + 0 + 0 + 0 + 0 + 0 + 0""") + input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") + output_stdout = base.TestFile("""##maf version=1 +a score=-247111.0 +s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA +s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- +i micMur1.scaffold_1897 I 1 C 0 +s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- +i tupBel1.scaffold_149545.1-136892 I 1 C 0 +s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- +i pteVam1.scaffold_182 I 1 I 2 +s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- +i eriEur1.scaffold_370206 I 1 I 2 +s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- +i sorAra1.scaffold_233549 I 1 I 2 +s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + +""") From 32cab6e999f8327d21723790fc869732dca55374 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Mon, 14 Jun 2021 18:42:22 +0200 Subject: [PATCH 14/68] fix maf_shuffle_column import --- scripts/maf_shuffle_columns.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/maf_shuffle_columns.py b/scripts/maf_shuffle_columns.py index 17dbd7e5..f74c688b 100755 --- a/scripts/maf_shuffle_columns.py +++ b/scripts/maf_shuffle_columns.py @@ -11,12 +11,13 @@ import sys from bx import align +from bx.align import maf def __main__(): - maf_reader = align.maf.Reader(sys.stdin, parse_e_rows=True) - maf_writer = align.maf.Writer(sys.stdout) + maf_reader = maf.Reader(sys.stdin, parse_e_rows=True) + maf_writer = maf.Writer(sys.stdout) for m in maf_reader: From 9403ba84a14d9a7f49659e1049cc4dfda97791f7 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 8 Jul 2021 10:48:06 +0200 Subject: [PATCH 15/68] linting --- lib/bx/align/maf_tests.py | 10 +++-- script_tests/maf_chunk_tests.py | 2 +- .../maf_extract_ranges_indexed_tests.py | 3 +- script_tests/maf_extract_ranges_tests.py | 1 + script_tests/maf_limit_to_species_tests.py | 2 +- script_tests/maf_select_tests.py | 37 +++++++++---------- scripts/maf_chunk.py | 2 +- 7 files changed, 31 insertions(+), 26 deletions(-) diff --git a/lib/bx/align/maf_tests.py b/lib/bx/align/maf_tests.py index 993a4ddb..1de58f03 100644 --- a/lib/bx/align/maf_tests.py +++ b/lib/bx/align/maf_tests.py @@ -60,13 +60,14 @@ complex_maf.components.append(align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="ACA-TTACT")) complex_maf.components.append(align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="ACAATTGCT")) complex_maf.components[-1].synteny_left = (maf.MAF_NEW_STATUS, 0) -complex_maf.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) +complex_maf.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) complex_maf.components.append(align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="---ATT---")) complex_maf.components.append(align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None)) complex_maf.components[-1].empty = True complex_maf.components[-1].synteny_empty = maf.MAF_INSERT_STATUS complex_maf.text_size = 9 + def test_reader(): reader = maf.Reader(StringIO(test_maf)) @@ -151,12 +152,13 @@ def test_reverse_complement(): check_component(b.components[0], src="human_hoxa", start=100257-100-8, size=8, strand="-", src_size=100257, text="AGTAA-TGT") check_component(b.components[1], src="horse_hoxa", start=98892-120-9, size=9, strand="+", src_size=98892, text="AGCAATTGT") assert b.components[1].synteny_right == (maf.MAF_NEW_STATUS, 0) - assert b.components[1].synteny_left == (maf.MAF_CONTIG_STATUS, 0) + assert b.components[1].synteny_left == (maf.MAF_CONTIG_STATUS, 0) check_component(b.components[2], src="unknown_1", start=98892-150-3, size=3, strand="+", src_size=98892, text="---AAT---") check_component(b.components[3], src="unknown_2", start=1200-12-1000, size=1000, strand="-", src_size=1200, text=None) assert b.components[3].empty assert b.components[3].synteny_empty == maf.MAF_INSERT_STATUS + def test_column_iter(): expected = [['A', 'A', '-'], ['C', 'C', '-'], @@ -170,13 +172,14 @@ def test_column_iter(): for i, c in enumerate(complex_maf.column_iter()): assert c == expected[i] + def test_remove_all_gap_column(): complex_maf_gap = align.Alignment() complex_maf_gap.score = "7009" complex_maf_gap.components.append(align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="-ACA--TTACT")) complex_maf_gap.components.append(align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="-ACA-ATTGCT")) complex_maf_gap.components[-1].synteny_left = (maf.MAF_NEW_STATUS, 0) - complex_maf_gap.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) + complex_maf_gap.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) complex_maf_gap.components.append(align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="-----ATT---")) complex_maf_gap.components.append(align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None)) complex_maf_gap.components[-1].empty = True @@ -185,6 +188,7 @@ def test_remove_all_gap_column(): complex_maf_gap.remove_all_gap_columns() assert complex_maf_gap == complex_maf + def test_read_with_synteny(): reader = maf.Reader(StringIO(test_maf_2), parse_e_rows=True) diff --git a/script_tests/maf_chunk_tests.py b/script_tests/maf_chunk_tests.py index 9dac0e7d..9962e4d7 100644 --- a/script_tests/maf_chunk_tests.py +++ b/script_tests/maf_chunk_tests.py @@ -2,6 +2,7 @@ import base + class Test1(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_chunk.py 1 ${out_dir}" input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny.maf") @@ -12,4 +13,3 @@ class Test2(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_chunk.py 1000 ${out_dir}" input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny.maf") out_dir = "./test_data/maf_tests/chunk1000" - diff --git a/script_tests/maf_extract_ranges_indexed_tests.py b/script_tests/maf_extract_ranges_indexed_tests.py index ae89cf70..a5d5df87 100644 --- a/script_tests/maf_extract_ranges_indexed_tests.py +++ b/script_tests/maf_extract_ranges_indexed_tests.py @@ -8,6 +8,7 @@ class Test(base.BaseScriptTest, unittest.TestCase): input_stdin = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") + class TestAccessNotRef(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny.maf -c -m 5 -p hg18." input_stdin = base.TestFile(filename="./test_data/maf_tests/hg18.bed") @@ -18,7 +19,7 @@ class TestAccessRef(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p mm8." input_stdin = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") - + class TestAccessNotRefNotIndexed(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p hg18." diff --git a/script_tests/maf_extract_ranges_tests.py b/script_tests/maf_extract_ranges_tests.py index 3c6d80d9..d37435de 100644 --- a/script_tests/maf_extract_ranges_tests.py +++ b/script_tests/maf_extract_ranges_tests.py @@ -9,6 +9,7 @@ class Test(base.BaseScriptTest, unittest.TestCase): input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") + class TestElines(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_extract_ranges.py ${inverval_file} 0 -m 5" input_inverval_file = base.TestFile("56694985 56695040") diff --git a/script_tests/maf_limit_to_species_tests.py b/script_tests/maf_limit_to_species_tests.py index f352fb96..2866eb16 100644 --- a/script_tests/maf_limit_to_species_tests.py +++ b/script_tests/maf_limit_to_species_tests.py @@ -2,6 +2,7 @@ import base + class Test1(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_limit_to_species.py mm8,rn4" input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") @@ -12,4 +13,3 @@ class TestWithE(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_limit_to_species.py mm10,bosTau7,loxAfr3" input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe_onlymouse_cow_elephant.maf") - diff --git a/script_tests/maf_select_tests.py b/script_tests/maf_select_tests.py index a7f84ecd..b41d17b1 100644 --- a/script_tests/maf_select_tests.py +++ b/script_tests/maf_select_tests.py @@ -16,8 +16,8 @@ class Test(base.BaseScriptTest, unittest.TestCase): input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") output_stdout = base.TestFile("""##maf version=1 a score=8132.0 -s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC -s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC +s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC +s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC """) @@ -35,21 +35,20 @@ class TestWithE(base.BaseScriptTest, unittest.TestCase): input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") output_stdout = base.TestFile("""##maf version=1 a score=-247111.0 -s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA -s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA -i hetGla2.JH602151 C 0 C 0 -s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- -i micMur1.scaffold_1897 I 1 C 0 -s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- -i tupBel1.scaffold_149545.1-136892 I 1 C 0 -s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- -i pteVam1.scaffold_182 I 1 I 2 -s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- -i eriEur1.scaffold_370206 I 1 I 2 -s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- -i sorAra1.scaffold_233549 I 1 I 2 -s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- -i loxAfr3.scaffold_9 C 0 C 0 -e bosTau7.chr21 47705583 182 + 69078422 I - +s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA +s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- +i micMur1.scaffold_1897 I 1 C 0 +s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- +i tupBel1.scaffold_149545.1-136892 I 1 C 0 +s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- +i pteVam1.scaffold_182 I 1 I 2 +s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- +i eriEur1.scaffold_370206 I 1 I 2 +s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- +i sorAra1.scaffold_233549 I 1 I 2 +s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I """) diff --git a/scripts/maf_chunk.py b/scripts/maf_chunk.py index 37eb3433..953d00c7 100755 --- a/scripts/maf_chunk.py +++ b/scripts/maf_chunk.py @@ -75,7 +75,7 @@ def __main__(): if maf_writer: maf_writer.close() interval_file.write(f"{chunk_min} {chunk_max}\n") - + interval_file.close() From 2ea243347770a7cc30ca2fded101116db6b81ed6 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 8 Jul 2021 10:52:42 +0200 Subject: [PATCH 16/68] separate Third Party import from Stdlib import --- scripts/maf_chunk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/maf_chunk.py b/scripts/maf_chunk.py index 953d00c7..f29bfc34 100755 --- a/scripts/maf_chunk.py +++ b/scripts/maf_chunk.py @@ -14,6 +14,7 @@ import random import sys from optparse import OptionParser + import numpy as np import bx.align.maf From 9e7df2b688d20de44edec8f960de7bdae54e6820 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 8 Jul 2021 11:26:07 +0200 Subject: [PATCH 17/68] use output files to have both tests and linting passing --- script_tests/maf_select_tests.py | 27 ++----------------- .../mm10_chr12_lessspe_one_selected.maf | 19 +++++++++++++ .../maf_tests/mm8_chr7_tiny_last_selected.maf | 5 ++++ 3 files changed, 26 insertions(+), 25 deletions(-) create mode 100644 test_data/maf_tests/mm10_chr12_lessspe_one_selected.maf create mode 100644 test_data/maf_tests/mm8_chr7_tiny_last_selected.maf diff --git a/script_tests/maf_select_tests.py b/script_tests/maf_select_tests.py index b41d17b1..2268c656 100644 --- a/script_tests/maf_select_tests.py +++ b/script_tests/maf_select_tests.py @@ -14,12 +14,7 @@ class Test(base.BaseScriptTest, unittest.TestCase): 0 1""") input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") - output_stdout = base.TestFile("""##maf version=1 -a score=8132.0 -s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC -s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC - -""") + output_stdout = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_last_selected.maf") class TestWithE(base.BaseScriptTest, unittest.TestCase): @@ -33,22 +28,4 @@ class TestWithE(base.BaseScriptTest, unittest.TestCase): 0 0""") input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") - output_stdout = base.TestFile("""##maf version=1 -a score=-247111.0 -s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA -s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA -i hetGla2.JH602151 C 0 C 0 -s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- -i micMur1.scaffold_1897 I 1 C 0 -s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- -i tupBel1.scaffold_149545.1-136892 I 1 C 0 -s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- -i pteVam1.scaffold_182 I 1 I 2 -s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- -i eriEur1.scaffold_370206 I 1 I 2 -s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- -i sorAra1.scaffold_233549 I 1 I 2 -s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- -i loxAfr3.scaffold_9 C 0 C 0 -e bosTau7.chr21 47705583 182 + 69078422 I -""") + output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe_one_selected.maf") diff --git a/test_data/maf_tests/mm10_chr12_lessspe_one_selected.maf b/test_data/maf_tests/mm10_chr12_lessspe_one_selected.maf new file mode 100644 index 00000000..262ccf46 --- /dev/null +++ b/test_data/maf_tests/mm10_chr12_lessspe_one_selected.maf @@ -0,0 +1,19 @@ +##maf version=1 +a score=-247111.0 +s mm10.chr12 56694986 40 + 120129022 CTCTTTAG---TCTGG--------TTTTTTAATTTTTTTTTCC-T------CA-----CTGCA +s hetGla2.JH602151 6912245 40 - 7060556 AATTTCAGCCCCCCCG--------ATGCCTAGGTTTCC---CC-G------CA-----CTGGA +i hetGla2.JH602151 C 0 C 0 +s micMur1.scaffold_1897 109315 37 + 248211 CGCTTCAG--CTGCAC--------GTGTTTAAATTCCC---GGCA------CA-----TGG-- +i micMur1.scaffold_1897 I 1 C 0 +s tupBel1.scaffold_149545.1-136892 2307 42 + 136892 AGCTTTCG--CTGCGT--------GTGGTTACTTTCTC---CGCA------CACCGCGCAG-- +i tupBel1.scaffold_149545.1-136892 I 1 C 0 +s pteVam1.scaffold_182 42258 37 + 455609 AGCTTTAG--CTGCAA--------GTGGTTACATTCTC---TGCA------CA-----CCT-- +i pteVam1.scaffold_182 I 1 I 2 +s eriEur1.scaffold_370206 11243 36 + 65867 ACTTTAAG--TAACTTAAAGAAACTCGGCTACACACTC------------------------- +i eriEur1.scaffold_370206 I 1 I 2 +s sorAra1.scaffold_233549 581 38 + 65803 GCTTTTAG--CCACTCAAGG----TCGGTTATGTCCAC---TGCA------CA---------- +i sorAra1.scaffold_233549 I 1 I 2 +s loxAfr3.scaffold_9 12283164 24 + 83325590 ------------GCTT--------GTAGTTAAGTTCTC---GGTA------CA---------- +i loxAfr3.scaffold_9 C 0 C 0 +e bosTau7.chr21 47705583 182 + 69078422 I + diff --git a/test_data/maf_tests/mm8_chr7_tiny_last_selected.maf b/test_data/maf_tests/mm8_chr7_tiny_last_selected.maf new file mode 100644 index 00000000..94f735f6 --- /dev/null +++ b/test_data/maf_tests/mm8_chr7_tiny_last_selected.maf @@ -0,0 +1,5 @@ +##maf version=1 +a score=8132.0 +s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC +s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC + From f4e6a5c93e719db69b5798b6fdd9b167da358316 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 25 Nov 2021 03:26:09 +0000 Subject: [PATCH 18/68] Release 0.8.12 --- lib/bx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index 2863f686..0a5ca3c0 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = '0.8.11' +__version__ = '0.8.12' From f8775d0f2dc9b93e2d04e3f68b70159ac30a07b9 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Mon, 13 Dec 2021 19:25:55 +0000 Subject: [PATCH 19/68] Release 0.8.13 --- lib/bx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index 0a5ca3c0..ab26e3de 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = '0.8.12' +__version__ = '0.8.13' From 183a9e9f5417afa08efcea7bbd0a65e0085dbcba Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 15 Dec 2021 01:26:29 +0000 Subject: [PATCH 20/68] Replace nose with pytest nose doesn't work on Python 3.10 . --- lib/bx/bbi/bigwig_tests.py | 35 +++++++++++------------------------ lib/bx/phylo/newick_tests.py | 12 ++++-------- lib/bx/seq/twobit_tests.py | 17 ++++++----------- pytest.ini | 4 ++++ setup.cfg | 10 ---------- tox.ini | 10 +++++----- 6 files changed, 30 insertions(+), 58 deletions(-) create mode 100644 pytest.ini diff --git a/lib/bx/bbi/bigwig_tests.py b/lib/bx/bbi/bigwig_tests.py index ab8be5d9..302c0b53 100644 --- a/lib/bx/bbi/bigwig_tests.py +++ b/lib/bx/bbi/bigwig_tests.py @@ -1,9 +1,8 @@ import os import sys -import unittest -from functools import partial import numpy +import pytest try: sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) @@ -21,7 +20,8 @@ def allclose(a, b, tol=0.00001): return numpy.all(numpy.isnan(d) | (d < tol)) -class TestBigWig(unittest.TestCase): +class TestBigWig: + @pytest.fixture(autouse=True) def setUp(self): f = open("test_data/bbi_tests/test.bw", 'rb') self.bw = BigWigFile(file=f) @@ -40,8 +40,8 @@ def test_get_summary(self): data = self.bw.query("chr1", 10000, 20000, 1) maxs = [x['max'] for x in data] mins = [x['min'] for x in data] - self.assertEqual([float(_) for _ in maxs], [0.289000004529953]) - self.assertEqual([float(_) for _ in mins], [-3.9100000858306885]) + assert [float(_) for _ in maxs] == [0.289000004529953] + assert [float(_) for _ in mins] == [-3.9100000858306885] def test_get_leaf(self): data = self.bw.query("chr1", 11000, 11005, 5) @@ -52,20 +52,15 @@ def test_get_leaf(self): data = self.bw.query("chr1", 11000, 11005, 1) maxs = [x['max'] for x in data] mins = [x['min'] for x in data] - self.assertEqual([float(_) for _ in maxs], [0.050842501223087311]) - self.assertEqual([float(_) for _ in mins], [-2.4589500427246094]) + assert [float(_) for _ in maxs] == [0.050842501223087311] + assert [float(_) for _ in mins] == [-2.4589500427246094] def test_wrong_nochrom(self): data = self.bw.query("chr2", 0, 10000, 10) - self.assertEqual(data, None) + assert data is None -# Nose test generator - - -def test_summaries_from_file(): - bw = BigWigFile(file=open("test_data/bbi_tests/test.bw", 'rb')) - - def check_summary(line): + @pytest.mark.parametrize("line", open("test_data/bbi_tests/test.expectation").readlines()) + def test_summary_from_file(self, line): fields = line.split() chrom = fields[0] start = int(fields[1]) @@ -73,7 +68,7 @@ def check_summary(line): n = int(fields[3]) t = fields[4] values = [float(v.replace('n/a', 'NaN')) for v in fields[5:]] - sd = bw.summarize(chrom, start, end, n) + sd = self.bw.summarize(chrom, start, end, n) if t == 'mean': print(sd.sum_data / sd.valid_count) print(values) @@ -84,11 +79,3 @@ def check_summary(line): assert allclose(sd.max_val, values) # elif t == 'std': # assert numpy.allclose( sd.max_val, values ) - for i, line in enumerate(open("test_data/bbi_tests/test.expectation")): - f = partial(check_summary, line) - f.description = "Test summaries line %d: %s" % (i, line[:40]) - yield (f, ) - - -if __name__ == '__main__': - unittest.main() diff --git a/lib/bx/phylo/newick_tests.py b/lib/bx/phylo/newick_tests.py index c226e3b9..1a1cd15b 100644 --- a/lib/bx/phylo/newick_tests.py +++ b/lib/bx/phylo/newick_tests.py @@ -2,7 +2,7 @@ Tests for `bx.phylo.newick`. """ -from nose.tools import ok_ +import pytest from bx.phylo.newick import ( Edge, @@ -29,10 +29,6 @@ (Tree(None, [Edge(None, Tree(None, [Edge(None, Tree('A', None)), Edge(None, Tree('D', None))])), Edge(None, Tree(None, [Edge(None, Tree('C', None)), Edge(None, Tree('B', None))]))])), ] -def tests(): - for i in range(len(trees)): - def _(): - return ok_(newick_parser.parse_string(trees[i]) == results[i]) - - _.description = "check tree parsing " + str(i) - yield _, +@pytest.mark.parametrize("tree,result", zip(trees, results)) +def test_newick_tree(tree, result): + assert newick_parser.parse_string(tree) == result diff --git a/lib/bx/seq/twobit_tests.py b/lib/bx/seq/twobit_tests.py index 83d71b78..f5f28ddc 100644 --- a/lib/bx/seq/twobit_tests.py +++ b/lib/bx/seq/twobit_tests.py @@ -1,5 +1,7 @@ import random +import pytest + from . import twobit @@ -21,17 +23,10 @@ def quick_fasta_iter(f): current_sequence = [] -def test(): - """ - Nose test generator - """ - for t in ["test", "testN", "testMask"]: - test_fa = "test_data/seq_tests/%s.fa" % t - test_twobit = "test_data/seq_tests/%s.2bit" % t - yield check_random_subseq_matches, test_fa, test_twobit - - -def check_random_subseq_matches(test_fa, test_twobit): +@pytest.mark.parametrize("filename", ["test", "testN", "testMask"]) +def test_random_subseq_matches(filename): + test_fa = f"test_data/seq_tests/{filename}.fa" + test_twobit = f"test_data/seq_tests/{filename}.2bit" # Load Fasta data expected = {} with open(test_fa) as f: diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..e77e08f2 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +addopts = --doctest-cython --doctest-modules +python_files = *_tests.py +testpaths = lib script_tests/ diff --git a/setup.cfg b/setup.cfg index fc403370..c95d15eb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,9 +34,6 @@ package_dir = py_modules = psyco_full python_requires = >=3.6 -tests_require = - nose - python-lzo zip_safe = False [options.package_data] @@ -55,13 +52,6 @@ exclude = .git,.tox,.venv,build,doc/source/conf.py import-order-style = smarkets application-import-names = bx,bx_extras -[nosetests] -tests=lib/, script_tests/ -#verbosity=2 -#detailed-errors=1 -with-doctest=1 -doctest-extension=pyx - [build_sphinx] source-dir = doc/source build-dir = doc/docbuild diff --git a/tox.ini b/tox.ini index 7f4cb79e..e17f80cc 100644 --- a/tox.ini +++ b/tox.ini @@ -2,11 +2,11 @@ skip_install = true commands_pre = python setup.py build_ext --inplace - pip install . commands = - nosetests {posargs} + pytest {posargs} deps = Cython - nose - numpy - python-lzo + numpy + pytest + pytest-cython + git+https://github.com/jd-boyd/python-lzo.git@master # https://github.com/jd-boyd/python-lzo/issues/52 From 1283016a55ac4e8ced6dc623215ea6d19a18d0a2 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 14 Dec 2021 12:25:42 +0000 Subject: [PATCH 21/68] Drop support for Python 3.6, add 3.10 Python 3.6 is reaching End Of Life this month. Also: - Upgrade syntax to Python 3.7 using `ack --type=python -f | xargs pyupgrade --py37-plus` --- .github/workflows/test.yaml | 4 ++-- lib/bx/align/core.py | 2 +- lib/bx/align/epo_tests.py | 2 +- lib/bx/align/maf.py | 4 ++-- lib/bx/align/score_tests.py | 12 ++++++++---- lib/bx/bitset_utils.py | 4 ++-- lib/bx/cookbook/argparse.py | 8 ++++---- lib/bx/gene_reader.py | 4 ++-- lib/bx/intervals/intersection_tests.py | 2 +- lib/bx/intervals/io.py | 2 +- lib/bx/intervals/random_intervals.py | 2 +- lib/bx/phylo/newick.py | 6 +++--- lib/bx/pwm/maf_select_motifs.py | 2 +- lib/bx/pwm/position_weight_matrix.py | 12 ++++++------ lib/bx/pwm/pwm_tests.py | 6 +++--- lib/bx/seq/seq.py | 2 +- lib/bx/seq/seq_tests.py | 4 ++-- lib/bx_extras/pyparsing.py | 10 +++++----- lib/bx_extras/stats.py | 2 +- scripts/axt_to_fasta.py | 2 +- setup.cfg | 4 ++-- 21 files changed, 50 insertions(+), 46 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e8324e6c..07f10da6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.6', '3.9'] + python-version: ['3.7', '3.10'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.6', '3.7', '3.8', '3.9'] + python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index 476144a5..a80b4381 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -63,7 +63,7 @@ def set_score(self, score): def __str__(self): s = "a score=" + str(self.score) for key in self.attributes: - s += " {}={}".format(key, self.attributes[key]) + s += f" {key}={self.attributes[key]}" s += "\n" # Components for c in self.components: diff --git a/lib/bx/align/epo_tests.py b/lib/bx/align/epo_tests.py index a448e013..ff58b4ce 100644 --- a/lib/bx/align/epo_tests.py +++ b/lib/bx/align/epo_tests.py @@ -173,7 +173,7 @@ def cch(cigar, s, e): for s, t, q in zip(S, T, Q): if not (cch(c1, th, th+s) and cch(c2, th, th+s)): pdb.set_trace() - assert cch(c1, th, th+s) and cch(c2, th, th+s), "{} and {}".format(c1[th:th+s], c2[th:th+s]) + assert cch(c1, th, th+s) and cch(c2, th, th+s), f"{c1[th:th+s]} and {c2[th:th+s]}" if t > q: cch(c1, th+s, th+s+t) and c1[th+s:th+s+t] == '-'*t else: diff --git a/lib/bx/align/maf.py b/lib/bx/align/maf.py index ed47951c..24ae8c89 100644 --- a/lib/bx/align/maf.py +++ b/lib/bx/align/maf.py @@ -112,13 +112,13 @@ def __init__(self, file, attributes=None): for key in attributes: if key == 'version': continue - self.file.writelines(" {}={}".format(key, attributes[key])) + self.file.writelines(f" {key}={attributes[key]}") self.file.write("\n") def write(self, alignment): self.file.write("a score=" + str(alignment.score)) for key in alignment.attributes: - self.file.write(" {}={}".format(key, alignment.attributes[key])) + self.file.write(f" {key}={alignment.attributes[key]}") self.file.write("\n") # Components rows = [] diff --git a/lib/bx/align/score_tests.py b/lib/bx/align/score_tests.py index e0907443..0e57623a 100644 --- a/lib/bx/align/score_tests.py +++ b/lib/bx/align/score_tests.py @@ -74,10 +74,14 @@ def test_align(self): def test_accumulate(self): ss = bx.align.score.hox70 - self.assert_(allclose(bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA"), - cumsum(array([-430, -30, -30, -30, -30, -31, 91, 91, -123])))) - self.assert_(allclose(bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA", skip_ref_gaps=True), - cumsum(array([-581, 91, 91, -123])))) + self.assertTrue(allclose( + bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA"), + cumsum(array([-430, -30, -30, -30, -30, -31, 91, 91, -123])) + )) + self.assertTrue(allclose( + bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA", skip_ref_gaps=True), + cumsum(array([-581, 91, 91, -123])) + )) def test_nonsymm_scoring(self): ss = nonsymm_scheme diff --git a/lib/bx/bitset_utils.py b/lib/bx/bitset_utils.py index 432a23b8..60aa34d8 100644 --- a/lib/bx/bitset_utils.py +++ b/lib/bx/bitset_utils.py @@ -51,8 +51,8 @@ def bitset_complement(exons): bits.invert() # only complement within the range of the list - ex_start = min([a[0] for a in exons]) - ex_end = max([a[1] for a in exons]) + ex_start = min(a[0] for a in exons) + ex_end = max(a[1] for a in exons) end = ex_start len = ex_end while True: diff --git a/lib/bx/cookbook/argparse.py b/lib/bx/cookbook/argparse.py index 4b198382..39f2e99e 100644 --- a/lib/bx/cookbook/argparse.py +++ b/lib/bx/cookbook/argparse.py @@ -280,7 +280,7 @@ def add_argument(self, action): invocations.append(get_invocation(subaction)) # update the maximum item length - invocation_length = max([len(s) for s in invocations]) + invocation_length = max(len(s) for s in invocations) action_length = invocation_length + self._current_indent self._action_max_length = max(self._action_max_length, action_length) @@ -1151,7 +1151,7 @@ def __call__(self, string): def __repr__(self): args = [self._mode, self._bufsize] args_str = ', '.join([repr(arg) for arg in args if arg is not None]) - return '{}({})'.format(type(self).__name__, args_str) + return f'{type(self).__name__}({args_str})' # =========================== # Optional and Positional Parsing @@ -1911,10 +1911,10 @@ def consume_positionals(start_index): while start_index <= max_option_string_index: # consume any Positionals preceding the next option - next_option_string_index = min([ + next_option_string_index = min( index for index in option_string_indices - if index >= start_index]) + if index >= start_index) if start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) diff --git a/lib/bx/gene_reader.py b/lib/bx/gene_reader.py index b5bdfcd0..06f87a8e 100644 --- a/lib/bx/gene_reader.py +++ b/lib/bx/gene_reader.py @@ -150,7 +150,7 @@ def CDSReader(fh, format='gff'): # for gene in genelist.values(): for gene in grouplist: chrom, strand, cds_exons = genelist[gene] - seqlen = sum([a[1]-a[0] for a in cds_exons]) + seqlen = sum(a[1]-a[0] for a in cds_exons) overhang = seqlen % 3 if overhang > 0: if strand == '+': @@ -288,7 +288,7 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None introns = bitset_union(introns) # assure CDS is a multiple of 3, trim from last exon if necessary - seqlen = sum([a[1]-a[0] for a in cds_exons]) + seqlen = sum(a[1]-a[0] for a in cds_exons) overhang = seqlen % 3 if overhang > 0: if strand == '+': diff --git a/lib/bx/intervals/intersection_tests.py b/lib/bx/intervals/intersection_tests.py index 32b1a439..3993ab11 100644 --- a/lib/bx/intervals/intersection_tests.py +++ b/lib/bx/intervals/intersection_tests.py @@ -201,7 +201,7 @@ def test_empty(self): def test_public_interval(self): def fn(ival): - return self.assert_(ival.interval) + return self.assertTrue(ival.interval) self.iv.traverse(fn) diff --git a/lib/bx/intervals/io.py b/lib/bx/intervals/io.py index 7522f3d2..ffd2feda 100644 --- a/lib/bx/intervals/io.py +++ b/lib/bx/intervals/io.py @@ -185,7 +185,7 @@ def binned_bitsets(self, upstream_pad=0, downstream_pad=0, lens=None): except ValueError as e: # We will only reach here when constructing this bitset from the lens dict # since the value of MAX is always safe. - raise Exception("Invalid chrom length {} in 'lens' dictionary. {}".format(str(size), str(e))) + raise Exception(f"Invalid chrom length {str(size)} in 'lens' dictionary. {str(e)}") bitsets[chrom] = bbs last_chrom = chrom last_bitset = bitsets[chrom] diff --git a/lib/bx/intervals/random_intervals.py b/lib/bx/intervals/random_intervals.py index 7869495d..bd5f343c 100644 --- a/lib/bx/intervals/random_intervals.py +++ b/lib/bx/intervals/random_intervals.py @@ -82,7 +82,7 @@ def throw_random_intervals(lengths, regions, save_interval_func=None, allow_over region with start and end modified. """ # Copy regions - regions = sorted([(x[1]-x[0], x[0], x) for x in regions]) + regions = sorted((x[1]-x[0], x[0], x) for x in regions) # Sort (long regions first) regions.reverse() # Throw diff --git a/lib/bx/phylo/newick.py b/lib/bx/phylo/newick.py index 911cbf5e..37a077a1 100644 --- a/lib/bx/phylo/newick.py +++ b/lib/bx/phylo/newick.py @@ -50,7 +50,7 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ def __repr__(self): - return "Tree( {}, {} )".format(repr(self.label), repr(self.edges)) + return f"Tree( {repr(self.label)}, {repr(self.edges)} )" @total_ordering @@ -60,7 +60,7 @@ def __init__(self, length, tip): self.tip = tip def pretty(self): - return "Edge( {}, \n{}\n)".format(repr(self.length), indent(repr(self.tip))) + return f"Edge( {repr(self.length)}, \n{indent(repr(self.tip))}\n)" def __lt__(self, other): return self.__dict__ < other.__dict__ @@ -69,7 +69,7 @@ def __eq__(self, other): return self.__dict__ == other.__dict__ def __repr__(self): - return "Edge( {}, {} )".format(repr(self.length), repr(self.tip)) + return f"Edge( {repr(self.length)}, {repr(self.tip)} )" def create_parser(): diff --git a/lib/bx/pwm/maf_select_motifs.py b/lib/bx/pwm/maf_select_motifs.py index 33e39985..a21d6409 100755 --- a/lib/bx/pwm/maf_select_motifs.py +++ b/lib/bx/pwm/maf_select_motifs.py @@ -39,7 +39,7 @@ def main(): def mafwrite(alignment, kvec=None, jvec=None, file=sys.stdout): file.write("a score=" + str(alignment.score)) for key in alignment.attributes: - file.write(" {}={}".format(key, alignment.attributes[key])) + file.write(f" {key}={alignment.attributes[key]}") file.write("\n") rows = [] if not kvec: diff --git a/lib/bx/pwm/position_weight_matrix.py b/lib/bx/pwm/position_weight_matrix.py index 93f69599..448c3f45 100755 --- a/lib/bx/pwm/position_weight_matrix.py +++ b/lib/bx/pwm/position_weight_matrix.py @@ -267,7 +267,7 @@ def __init__(self, id, rows, alphabet, background=None, score_correction=True): # Reference 2: Gertz et al.: Genome Res. 2005 Aug;15(8):1145-52. def information_content_calculation(self, i, counts): # Reference 1) - return 2 + sum([self.information_base_content(base, i, counts) for base in self.alphabet]) + return 2 + sum(self.information_base_content(base, i, counts) for base in self.alphabet) # Reference 2) # return sum( [ self.information_base_content(base,i,counts) for base in self.alphabet ] ) @@ -522,8 +522,8 @@ def score_quantum_seq(self, seq): raw = 0 try: for i, nt in enumerate(subseq): - numer = sum([subseq[i][nt] * self.probs[i][nt] for nt in subseq[i]]) - denom = sum([subseq[i][nt] * self.background[nt] for nt in subseq[i]]) + numer = sum(subseq[i][nt] * self.probs[i][nt] for nt in subseq[i]) + denom = sum(subseq[i][nt] * self.background[nt] for nt in subseq[i]) raw += math.log(numer/denom, 2) scaled = self.scaled(raw) except KeyError: @@ -559,7 +559,7 @@ def simple_probability(self, freq, base, i): # ---------------------- # sum(f(base,{A,C,G,T})) - return float(freq[i][base]) / sum([freq[i][nt] for nt in self.alphabet]) + return float(freq[i][base]) / sum(freq[i][nt] for nt in self.alphabet) def corrected_probability_score(self, freq, base, i): # p(base,i) = f(base,i) + s(base) @@ -812,7 +812,7 @@ def sum_of_squares(x, y=None): xmean = float(sum(x)) / len(x) ymean = float(sum(y)) / len(y) assert len(x) == len(y) - return sum([float(xi)*float(yi) for xi, yi in zip(x, y)]) - len(x)*xmean*ymean + return sum(float(xi)*float(yi) for xi, yi in zip(x, y)) - len(x)*xmean*ymean def consensus_symbol(pattern): @@ -857,7 +857,7 @@ def consensus_symbol(pattern): if tops[1] > 0.5 and tops[1] >= 2 * tops[0]: return symbols[f.index(tops[1])] elif tops[0] < 0.5 and sum(tops) >= 0.75: - degen = frozenset([symbols[f.index(v)] for v in tops]) + degen = frozenset(symbols[f.index(v)] for v in tops) for degenSymbol, wobbles in wobblers.items(): # print >>sys.stderr,wobbles if degen == wobbles: diff --git a/lib/bx/pwm/pwm_tests.py b/lib/bx/pwm/pwm_tests.py index 6a6e0e3f..8c4cc06e 100644 --- a/lib/bx/pwm/pwm_tests.py +++ b/lib/bx/pwm/pwm_tests.py @@ -78,7 +78,7 @@ def testReader(self): wm = wms[0] dScores = wm.score_seq(dSeq) assert len(dScores) == 2 - assert "{:.4f} {:.4f} {:.4f} {:.4f}".format(dScores[0][0], dScores[0][1], dScores[1][0], dScores[1][1]) == dScoresExpected + assert f"{dScores[0][0]:.4f} {dScores[0][1]:.4f} {dScores[1][0]:.4f} {dScores[1][1]:.4f}" == dScoresExpected qdSeq = [] for (ix, nt) in enumerate(dSeq): @@ -86,8 +86,8 @@ def testReader(self): qdSeq[ix][nt] = 1.0 qScores = wm.score_seq(qdSeq) assert len(qScores) == 2 - assert "{:.4f} {:.4f} {:.4f} {:.4f}".format(qScores[0][0], qScores[0][1], qScores[1][0], qScores[1][1]) == dScoresExpected + assert f"{qScores[0][0]:.4f} {qScores[0][1]:.4f} {qScores[1][0]:.4f} {qScores[1][1]:.4f}" == dScoresExpected qScores = wm.score_seq(qSeq) assert len(qScores) == 1 - assert "{:.4f} {:.4f}".format(qScores[0][0], qScores[0][1]) == qScoresExpected + assert f"{qScores[0][0]:.4f} {qScores[0][1]:.4f}" == qScoresExpected diff --git a/lib/bx/seq/seq.py b/lib/bx/seq/seq.py index 79aa153a..2a948218 100644 --- a/lib/bx/seq/seq.py +++ b/lib/bx/seq/seq.py @@ -90,7 +90,7 @@ def get(self, start, length): assert length >= 0, "Length must be non-negative (got %d)" % length assert start >= 0, "Start must be greater than 0 (got %d)" % start assert start + length <= self.length, \ - "Interval beyond end of sequence ({}..{} > {})".format(start, start + length, self.length) + f"Interval beyond end of sequence ({start}..{start + length} > {self.length})" # Fetch sequence and reverse complement if necesary if not self.revcomp: return self.raw_fetch(start, length) diff --git a/lib/bx/seq/seq_tests.py b/lib/bx/seq/seq_tests.py index 3761e977..126d8218 100644 --- a/lib/bx/seq/seq_tests.py +++ b/lib/bx/seq/seq_tests.py @@ -44,8 +44,8 @@ def test_get_reader(self): text = "%s" % seq fields = text.split() assert (len(fields) == 2), "SeqReader.__str__ returns incorrect sequence string \"%s\" (%d)" % text - assert (fields[0] == valid2_fa[ix][0]), "FastaReader returned the wrong name ({},{})".format(fields[0], valid2_fa[ix][0]) - assert (fields[1] == valid2_fa[ix][1]), "FastaReader returned the wrong text ({},{})".format(fields[1], valid2_fa[ix][1]) + assert (fields[0] == valid2_fa[ix][0]), f"FastaReader returned the wrong name ({fields[0]},{valid2_fa[ix][0]})" + assert (fields[1] == valid2_fa[ix][1]), f"FastaReader returned the wrong text ({fields[1]},{valid2_fa[ix][1]})" def check_get(seqfile, valid_seq, start, len): diff --git a/lib/bx_extras/pyparsing.py b/lib/bx_extras/pyparsing.py index 03e0dc12..d5b60f58 100644 --- a/lib/bx_extras/pyparsing.py +++ b/lib/bx_extras/pyparsing.py @@ -383,7 +383,7 @@ def __iadd__(self, other): return self def __repr__(self): - return "({}, {})".format(repr(self.__toklist), repr(self.__tokdict)) + return f"({repr(self.__toklist)}, {repr(self.__tokdict)})" def __str__(self): out = "[" @@ -1665,7 +1665,7 @@ def charsAsStr(s): return s if self.initCharsOrig != self.bodyCharsOrig: - self.strRepr = "W:({},{})".format(charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig)) + self.strRepr = f"W:({charsAsStr(self.initCharsOrig)},{charsAsStr(self.bodyCharsOrig)})" else: self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) @@ -2204,7 +2204,7 @@ def __str__(self): pass if self.strRepr is None: - self.strRepr = "{}:({})".format(self.__class__.__name__, str(self.exprs)) + self.strRepr = f"{self.__class__.__name__}:({str(self.exprs)})" return self.strRepr def streamline(self): @@ -2599,7 +2599,7 @@ def __str__(self): pass if self.strRepr is None and self.expr is not None: - self.strRepr = "{}:({})".format(self.__class__.__name__, str(self.expr)) + self.strRepr = f"{self.__class__.__name__}:({str(self.expr)})" return self.strRepr @@ -3410,7 +3410,7 @@ def pa(s, l, tokens): if attrName not in tokens: raise ParseException(s, l, "no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s, l, "attribute '{}' has value '{}', must be '{}'".format(attrName, tokens[attrName], attrValue)) + raise ParseException(s, l, f"attribute '{attrName}' has value '{tokens[attrName]}', must be '{attrValue}'") return pa diff --git a/lib/bx_extras/stats.py b/lib/bx_extras/stats.py index 40474743..ff0f0c53 100644 --- a/lib/bx_extras/stats.py +++ b/lib/bx_extras/stats.py @@ -3829,7 +3829,7 @@ def asign(a): Returns: array shape of a, with -1 where a<0 and +1 where a>=0 """ a = N.asarray(a) - if ((isinstance(a, type(1.4))) or (isinstance(a, type(1)))): + if ((isinstance(a, float)) or (isinstance(a, int))): return a-a-N.less(a, 0)+N.greater(a, 0) else: return N.zeros(N.shape(a))-N.less(a, 0)+N.greater(a, 0) diff --git a/scripts/axt_to_fasta.py b/scripts/axt_to_fasta.py index f059adab..512e148d 100755 --- a/scripts/axt_to_fasta.py +++ b/scripts/axt_to_fasta.py @@ -47,7 +47,7 @@ def main(): # $$$ this should be moved to a bx.align.fasta module def print_component_as_fasta(c, id=None): - header = ">{}_{}_{}".format(c.src, c.start, c.start + c.size) + header = f">{c.src}_{c.start}_{c.start + c.size}" if id is not None: header += " " + id print(header) diff --git a/setup.cfg b/setup.cfg index c95d15eb..0b8287d1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,10 +8,10 @@ classifiers = License :: OSI Approved :: MIT License Operating System :: POSIX Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Topic :: Scientific/Engineering :: Bio-Informatics Topic :: Software Development :: Libraries :: Python Modules name = bx-python @@ -33,7 +33,7 @@ package_dir = =lib py_modules = psyco_full -python_requires = >=3.6 +python_requires = >=3.7 zip_safe = False [options.package_data] From b7712e94e80b89f6120109887c985fbafaa6a589 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 15 Dec 2021 01:28:18 +0000 Subject: [PATCH 22/68] Fix numpy and flake8-bugbear warnings --- lib/bx/bbi/bigwig_tests.py | 3 --- lib/bx/binned_array.py | 24 ++++++++++++------------ lib/bx/binned_array_tests.py | 8 ++++---- lib/bx/seq/twobit_tests.py | 2 +- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/lib/bx/bbi/bigwig_tests.py b/lib/bx/bbi/bigwig_tests.py index 302c0b53..4cba88fa 100644 --- a/lib/bx/bbi/bigwig_tests.py +++ b/lib/bx/bbi/bigwig_tests.py @@ -29,7 +29,6 @@ def setUp(self): def test_get_summary(self): data = self.bw.query("chr1", 10000, 20000, 10) means = [x['mean'] for x in data] - print(means) assert numpy.allclose([float(_) for _ in means], [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998]) # Summarize variant @@ -70,8 +69,6 @@ def test_summary_from_file(self, line): values = [float(v.replace('n/a', 'NaN')) for v in fields[5:]] sd = self.bw.summarize(chrom, start, end, n) if t == 'mean': - print(sd.sum_data / sd.valid_count) - print(values) assert allclose(sd.sum_data / sd.valid_count, values) elif t == 'min': assert allclose(sd.min_val, values) diff --git a/lib/bx/binned_array.py b/lib/bx/binned_array.py index ed2490b7..1038d415 100644 --- a/lib/bx/binned_array.py +++ b/lib/bx/binned_array.py @@ -19,7 +19,7 @@ from numpy import ( array, concatenate, - fromstring, + frombuffer, NaN, resize, zeros @@ -43,9 +43,9 @@ # Compression types -comp_types = dict() - -comp_types['none'] = (lambda x: x, lambda x: x) +comp_types = { + 'none': (lambda x: x, lambda x: x) +} try: import zlib @@ -150,7 +150,7 @@ def to_file(self, f, comp_type='zlib'): # around that by byteswapping the array if platform_is_little_endian: a = a.byteswap() - f.write(a.tostring()) + f.write(a.tobytes()) # Save current position (start of bin offsets) index_start_pos = f.tell() # Skip forward to save space for index @@ -163,9 +163,9 @@ def to_file(self, f, comp_type='zlib'): else: assert bin.dtype.char == self.typecode if platform_is_little_endian: - s = bin.byteswap().tostring() + s = bin.byteswap().tobytes() else: - s = bin.tostring() + s = bin.tobytes() compressed = compress(s) bin_pos_and_size.append((f.tell(), len(compressed))) f.write(compressed) @@ -201,7 +201,7 @@ def __init__(self, f, cache=32): self.decompress = comp_types[self.comp_type][1] # Read default value s = f.read(calcsize(self.typecode)) - a = fromstring(s, self.typecode) + a = frombuffer(s, self.typecode) if platform_is_little_endian: a = a.byteswap() self.default = a[0] @@ -220,7 +220,7 @@ def load_bin(self, index): assert self.bin_pos[index] != 0 self.f.seek(self.bin_pos[index]) raw = self.f.read(self.bin_sizes[index]) - a = fromstring(self.decompress(raw), self.typecode) + a = frombuffer(self.decompress(raw), self.typecode) if platform_is_little_endian: a = a.byteswap() assert len(a) == self.bin_size @@ -307,7 +307,7 @@ def write_header(self): # around that by byteswapping the array if platform_is_little_endian: a = a.byteswap() - self.f.write(a.tostring()) + self.f.write(a.tobytes()) # Save current position (start of bin offsets) self.index_pos = self.f.tell() self.data_offset = self.index_pos + (self.nbins * calcsize(">2I")) @@ -344,9 +344,9 @@ def flush(self): if self.buffer_contains_values: pos = self.f.tell() if platform_is_little_endian: - s = self.buffer.byteswap().tostring() + s = self.buffer.byteswap().tobytes() else: - s = self.buffer.tostring() + s = self.buffer.tobytes() compressed = self.compress(s) size = len(compressed) assert len(self.bin_index) == self.bin diff --git a/lib/bx/binned_array_tests.py b/lib/bx/binned_array_tests.py index 5e40083b..22cb7b0a 100644 --- a/lib/bx/binned_array_tests.py +++ b/lib/bx/binned_array_tests.py @@ -29,7 +29,7 @@ def setup(): global source global target source = [] - for i in range(13): + for _ in range(13): if random() < 0.5: source = concatenate((source, random(CHUNK_SIZE_RANDOM))) else: @@ -48,7 +48,7 @@ def test_simple(): for i in range(len(source)): assert source[i] == target[i], "No match, index: %d, source: %f, target: %f, len( source ): %d" % (i, source[i], target[i], len(source)) # Verify with slices - for i in range(10): + for _ in range(10): a = int(random() * len(source)) b = int(random() * len(source)) if b < a: @@ -65,7 +65,7 @@ def test_file(): assert source[i] == target2[i], "No match, index: %d, source: %d, target: %d" % (i, source[i], target2[i]) # Verify with slices target2 = FileBinnedArray(open("/tmp/foo", 'rb')) - for i in range(10): + for _ in range(10): a = int(random() * len(source)) b = int(random() * len(source)) if b < a: @@ -83,7 +83,7 @@ def test_file_lzo(): assert source[i] == target3[i], "No match, index: %d, source: %d, target: %d" % (i, source[i], target3[i]) # Verify with slices target3 = FileBinnedArray(open("/tmp/foo3", 'rb')) - for i in range(10): + for _ in range(10): a = int(random() * len(source)) b = int(random() * len(source)) if b < a: diff --git a/lib/bx/seq/twobit_tests.py b/lib/bx/seq/twobit_tests.py index f5f28ddc..f286962b 100644 --- a/lib/bx/seq/twobit_tests.py +++ b/lib/bx/seq/twobit_tests.py @@ -40,7 +40,7 @@ def test_random_subseq_matches(filename): assert k in t.index # assert t.index[k].size == len(s) length = len(s) - for i in range(100): + for _ in range(100): start = random.randint(0, length-2) end = random.randint(start+1, length) assert t[k].get(start, end) == s[start:end] From 88428965dc4a38c915920337ce653289be839db8 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 14 Dec 2021 12:28:58 +0000 Subject: [PATCH 23/68] Skip building the PyPy 3.7 wheel for macOS Numpy doesn't have a wheel on PyPI for this combination and it fails to install since macos-latest has changed to version 11. --- .github/workflows/deploy.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 5dbef45a..13ae549d 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -30,7 +30,9 @@ jobs: CIBW_ARCHS: ${{matrix.arch}} # Skip building musllinux wheels for aarch64, each one currently takes # more than 2 hours to build. - CIBW_SKIP: '*-musllinux_aarch64' + # Skip also building the PyPy 3.7 wheel for macOS, because numpy + # doesn't have a wheel on PyPI and it fails to install. + CIBW_SKIP: '*-musllinux_aarch64 pp37-macosx_x86_64' - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v2 From b42b95f2bfe9e34e603b29f7e05dddacca89f67d Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 4 Jan 2022 20:22:20 +0000 Subject: [PATCH 24/68] Use new python-lzo release supporting Python 3.10 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index e17f80cc..fc996be9 100644 --- a/tox.ini +++ b/tox.ini @@ -9,4 +9,4 @@ deps = numpy pytest pytest-cython - git+https://github.com/jd-boyd/python-lzo.git@master # https://github.com/jd-boyd/python-lzo/issues/52 + python-lzo >= 1.14 # Python 3.10 support From a55598b9e1939031dfe4b188de2fde40103d2ab9 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 25 Aug 2022 01:15:28 +0100 Subject: [PATCH 25/68] Fix new E275 linting error Introduced in pycodestyle 2.9.0 . --- lib/bx/align/core.py | 4 +- lib/bx/align/score.py | 2 +- lib/bx/align/sitemask/sitemask_tests.py | 6 +-- lib/bx/seqmapping_tests.py | 8 ++-- lib/bx_extras/fpconst.py | 52 ++++++++++++------------- lib/bx_extras/pyparsing.py | 18 ++++----- scripts/lzop_build_offset_table.py | 4 +- scripts/maf_thread_for_species.py | 2 +- 8 files changed, 48 insertions(+), 48 deletions(-) diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index a80b4381..2ae95f08 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -211,7 +211,7 @@ def __eq__(self, other): return True def __ne__(self, other): - return not(self.__eq__(other)) + return not (self.__eq__(other)) def __deepcopy__(self, memo): from copy import deepcopy @@ -405,7 +405,7 @@ def __eq__(self, other): and self.empty == other.empty) def __ne__(self, other): - return not(self.__eq__(other)) + return not (self.__eq__(other)) def __deepcopy__(self, memo): new = Component(src=self.src, start=self.start, size=self.size, strand=self.strand, src_size=self._src_size, text=self.text) diff --git a/lib/bx/align/score.py b/lib/bx/align/score.py index ffdb7a6d..cf8068a6 100644 --- a/lib/bx/align/score.py +++ b/lib/bx/align/score.py @@ -329,7 +329,7 @@ def accumulate_scores(scoring_scheme, text1, text2, skip_ref_gaps=False): else: score += scoring_scheme._get_score((ord(a), ord(b))) last_gap_a = last_gap_b = False - if not(skip_ref_gaps) or a != scoring_scheme.gap1: + if not (skip_ref_gaps) or a != scoring_scheme.gap1: rval[pos] = score pos += 1 return rval diff --git a/lib/bx/align/sitemask/sitemask_tests.py b/lib/bx/align/sitemask/sitemask_tests.py index a87e7b8a..61d096c0 100644 --- a/lib/bx/align/sitemask/sitemask_tests.py +++ b/lib/bx/align/sitemask/sitemask_tests.py @@ -45,7 +45,7 @@ def test_cpg_inclusive(): j = 0 for line in open(out.name): line = line.strip() - if not(line): + if not (line): continue assert cpg_inclusive_result[j] == ",".join(line.split()) j += 1 @@ -61,7 +61,7 @@ def test_cpg_restricted(): j = 0 for line in open(out.name): line = line.strip() - if not(line): + if not (line): continue assert cpg_restricted_result[j] == ",".join(line.split()) j += 1 @@ -77,7 +77,7 @@ def test_non_cpg(): j = 0 for line in open(out.name): line = line.strip() - if not(line): + if not (line): continue assert noncpg_result[j] == ",".join(line.split()) j += 1 diff --git a/lib/bx/seqmapping_tests.py b/lib/bx/seqmapping_tests.py index 1aec59ad..8c9ddfee 100644 --- a/lib/bx/seqmapping_tests.py +++ b/lib/bx/seqmapping_tests.py @@ -16,12 +16,12 @@ class CharMappingTests(unittest.TestCase): __test__ = False def test_DNA(self): - assert(allclose( + assert (allclose( bx.seqmapping.DNA.translate("ACGTacgt-?X"), [0, 1, 2, 3, 0, 1, 2, 3, 4, -1, -1])) def test_DNA_list(self): - assert(allclose( + assert (allclose( bx.seqmapping.DNA.translate_list(["ACGTA", "TGCAX"]), [0 + 3*6, 1 + 2*6, 2 + 1*6, 3 + 0*6, -1])) @@ -29,7 +29,7 @@ def test_other(self): m = bx.seqmapping.CharToIntArrayMapping() m.set_mapping("A", 0) m.set_mapping("B", 7) - assert(allclose(m.translate("ABCCBA"), [0, 7, -1, -1, 7, 0])) + assert (allclose(m.translate("ABCCBA"), [0, 7, -1, -1, 7, 0])) class IntMappingTests(unittest.TestCase): @@ -41,7 +41,7 @@ def test_simple(self): m.set_mapping(2, 0) m.set_mapping(1, 1) m.set_mapping(3, 1) - assert(allclose(m.translate(array([0, 1, 2, 3, 4], 'i')), array([0, 1, 0, 1, -1]))) + assert (allclose(m.translate(array([0, 1, 2, 3, 4], 'i')), array([0, 1, 0, 1, -1]))) eight_species_mapping = """TTTTTTTT 0 diff --git a/lib/bx_extras/fpconst.py b/lib/bx_extras/fpconst.py index 2369a5a3..1f56b3ce 100644 --- a/lib/bx_extras/fpconst.py +++ b/lib/bx_extras/fpconst.py @@ -34,7 +34,7 @@ _big_endian = struct.pack('i', 1)[:1] != b'\x01' # and define appropriate constants -if(_big_endian): +if (_big_endian): NaN = struct.unpack('d', b'\x7F\xF8\x00\x00\x00\x00\x00\x00')[0] PosInf = struct.unpack('d', b'\x7F\xF0\x00\x00\x00\x00\x00\x00')[0] NegInf = -PosInf @@ -131,43 +131,43 @@ def isNegInf(value): def test_isNaN(): - assert(not isNaN(PosInf)) - assert(not isNaN(NegInf)) - assert(isNaN(NaN)) - assert(not isNaN(1.0)) - assert(not isNaN(-1.0)) + assert (not isNaN(PosInf)) + assert (not isNaN(NegInf)) + assert (isNaN(NaN)) + assert (not isNaN(1.0)) + assert (not isNaN(-1.0)) def test_isInf(): - assert(isInf(PosInf)) - assert(isInf(NegInf)) - assert(not isInf(NaN)) - assert(not isInf(1.0)) - assert(not isInf(-1.0)) + assert (isInf(PosInf)) + assert (isInf(NegInf)) + assert (not isInf(NaN)) + assert (not isInf(1.0)) + assert (not isInf(-1.0)) def test_isFinite(): - assert(not isFinite(PosInf)) - assert(not isFinite(NegInf)) - assert(not isFinite(NaN)) - assert(isFinite(1.0)) - assert(isFinite(-1.0)) + assert (not isFinite(PosInf)) + assert (not isFinite(NegInf)) + assert (not isFinite(NaN)) + assert (isFinite(1.0)) + assert (isFinite(-1.0)) def test_isPosInf(): - assert(isPosInf(PosInf)) - assert(not isPosInf(NegInf)) - assert(not isPosInf(NaN)) - assert(not isPosInf(1.0)) - assert(not isPosInf(-1.0)) + assert (isPosInf(PosInf)) + assert (not isPosInf(NegInf)) + assert (not isPosInf(NaN)) + assert (not isPosInf(1.0)) + assert (not isPosInf(-1.0)) def test_isNegInf(): - assert(not isNegInf(PosInf)) - assert(isNegInf(NegInf)) - assert(not isNegInf(NaN)) - assert(not isNegInf(1.0)) - assert(not isNegInf(-1.0)) + assert (not isNegInf(PosInf)) + assert (isNegInf(NegInf)) + assert (not isNegInf(NaN)) + assert (not isNegInf(1.0)) + assert (not isNegInf(-1.0)) # overall test diff --git a/lib/bx_extras/pyparsing.py b/lib/bx_extras/pyparsing.py index d5b60f58..91b784bc 100644 --- a/lib/bx_extras/pyparsing.py +++ b/lib/bx_extras/pyparsing.py @@ -137,11 +137,11 @@ def __getattr__(self, aname): - col - returns the column number of the exception text - line - returns the line containing the exception text """ - if(aname == "lineno"): + if (aname == "lineno"): return lineno(self.loc, self.pstr) - elif(aname in ("col", "column")): + elif (aname in ("col", "column")): return col(self.loc, self.pstr) - elif(aname == "line"): + elif (aname == "line"): return line(self.loc, self.pstr) else: raise AttributeError(aname) @@ -1617,7 +1617,7 @@ def parseImpl(self, instring, loc, doActions=True): loc = result.end() return loc, result.group() - if not(instring[loc] in self.initChars): + if not (instring[loc] in self.initChars): # ~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc @@ -1956,7 +1956,7 @@ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): self.minLen = exact def parseImpl(self, instring, loc, doActions=True): - if not(instring[loc] in self.matchWhite): + if not (instring[loc] in self.matchWhite): exc = self.myException exc.loc = loc exc.pstr = instring @@ -2026,7 +2026,7 @@ def preParse(self, instring, loc): return loc def parseImpl(self, instring, loc, doActions=True): - if not(loc == 0 + if not (loc == 0 or (loc == self.preParse(instring, 0)) or (instring[loc-1] == "\n")): # col(loc, instring) != 1: exc = self.myException @@ -2219,7 +2219,7 @@ def streamline(self): if len(self.exprs) == 2: other = self.exprs[0] if (isinstance(other, self.__class__) - and not(other.parseAction) + and not (other.parseAction) and other.resultsName is None and not other.debug): self.exprs = other.exprs[:] + [self.exprs[1]] @@ -2229,7 +2229,7 @@ def streamline(self): other = self.exprs[-1] if (isinstance(other, self.__class__) - and not(other.parseAction) + and not (other.parseAction) and other.resultsName is None and not other.debug): self.exprs = self.exprs[:-1] + other.exprs[:] @@ -3575,7 +3575,7 @@ def checkUnindent(s, l, t): if l >= len(s): return curCol = col(l, s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + if not (indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s, l, "not an unindent") indentStack.pop() diff --git a/scripts/lzop_build_offset_table.py b/scripts/lzop_build_offset_table.py index 24189ad3..8e32483c 100755 --- a/scripts/lzop_build_offset_table.py +++ b/scripts/lzop_build_offset_table.py @@ -57,7 +57,7 @@ def main(): assert 1 <= method <= 3, "Only LZO compression is currently supported" f.get("!B") # level flags = f.get("!I") - assert not(flags & F_H_FILTER), "LZOP filters not supported" + assert not (flags & F_H_FILTER), "LZOP filters not supported" has_compressed_crc = (flags & F_CRC32_C or flags & F_ADLER32_C) has_uncompressed_crc = (flags & F_CRC32_D or flags & F_ADLER32_D) f.get("!I") # mode @@ -79,7 +79,7 @@ def main(): size = f.get("!I") if size == 0: break - assert not(expect_no_more), \ + assert not (expect_no_more), \ "Encountered an undersized block that was not the last block" if block_size is None: print("s", size) diff --git a/scripts/maf_thread_for_species.py b/scripts/maf_thread_for_species.py index 5571bdbc..5a287624 100755 --- a/scripts/maf_thread_for_species.py +++ b/scripts/maf_thread_for_species.py @@ -29,7 +29,7 @@ def main(): # Allow a comma separated list, TODO: allow a newick format tree if len(species) == 1 and ',' in species[0]: species = species[0].split(',') - fuse = not(bool(options.nofuse)) + fuse = not (bool(options.nofuse)) except Exception: doc_optparse.exit() From 981890f7aaefac000d4e6c9dad39e8800eab4309 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Thu, 25 Aug 2022 02:55:49 +0200 Subject: [PATCH 26/68] Upgrade GitHub Actions (#83) --- .github/workflows/deploy.yaml | 18 +++++++++++------- .github/workflows/test.yaml | 8 ++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 13ae549d..498b8e14 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -15,8 +15,10 @@ jobs: - os: ubuntu-latest arch: aarch64 steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.x' - name: Set up QEMU to build non-native architectures if: ${{ matrix.arch == 'aarch64' }} uses: docker/setup-qemu-action@v1 @@ -35,7 +37,7 @@ jobs: CIBW_SKIP: '*-musllinux_aarch64 pp37-macosx_x86_64' - name: Check packages run: twine check dist/* - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: packages path: dist/ @@ -43,8 +45,10 @@ jobs: build_sdist: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.x' - name: Install required Python packages run: | python -m pip install --upgrade pip setuptools wheel @@ -59,7 +63,7 @@ jobs: python -c 'import bx, bx.align, bx.align.sitemask, bx.align.tools, bx.arrays, bx.bbi, bx.cookbook, bx.intervals, bx.intervals.operations, bx.intseq, bx.misc, bx.motif, bx.motif.io, bx.motif.logo, bx.phylo, bx.pwm, bx.seq, bx.tabular, bx_extras' - name: Check packages run: twine check dist/* - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: packages path: dist/ @@ -69,7 +73,7 @@ jobs: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: packages path: dist diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 07f10da6..291cd36e 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -10,8 +10,8 @@ jobs: matrix: python-version: ['3.7', '3.10'] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install flake8 @@ -25,8 +25,8 @@ jobs: matrix: python-version: ['3.7', '3.8', '3.9', '3.10'] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install liblzo2-dev From f8beab2f2e72024098b44b95f350467ba5da202e Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 25 Aug 2022 03:20:50 +0100 Subject: [PATCH 27/68] Index TwoBitFile sequences by str not bytes Fix https://github.com/bxlab/bx-python/issues/31 . Also: - Add some type annotations. - Remove unused attributes of TwoBitSequence objects. - Fix use of TwoBitFile in scripts and tests. --- lib/bx/seq/twobit.py | 31 ++++++++++++++++++++----------- lib/bx/seq/twobit_tests.py | 1 - scripts/maf_tile_2bit.py | 5 ++--- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/lib/bx/seq/twobit.py b/lib/bx/seq/twobit.py index 5ba7e3f9..b558c6f1 100644 --- a/lib/bx/seq/twobit.py +++ b/lib/bx/seq/twobit.py @@ -3,6 +3,12 @@ """ from collections.abc import Mapping from struct import calcsize, unpack +from typing import ( + BinaryIO, + Dict, + List, + Tuple, +) from . import _twobit @@ -14,13 +20,16 @@ class TwoBitSequence: + masked_block_sizes: List + masked_block_starts: List + n_block_sizes: List + n_block_starts: List + def __init__(self, tbf, header_offset=None): self.tbf = tbf self.header_offset = header_offset self.sequence_offset = None self.size = None - self.n_blocks = None - self.masked_blocks = None self.loaded = False def __getitem__(self, slice): @@ -49,7 +58,7 @@ def get(self, start, end): class TwoBitFile(Mapping): - def __init__(self, file, do_mask=True): + def __init__(self, file: BinaryIO, do_mask: bool = True): self.do_mask = do_mask # Read magic and determine byte order self.byte_order = ">" @@ -71,14 +80,14 @@ def __init__(self, file, do_mask=True): # Header contains some reserved space self.reserved = self.read("L") # Read index of sequence names to offsets - index = dict() + index: Dict[str, TwoBitSequence] = dict() for _ in range(self.seq_count): name = self.read_p_string() offset = self.read("L") index[name] = TwoBitSequence(self, offset) self.index = index - def __getitem__(self, name): + def __getitem__(self, name: str) -> TwoBitSequence: seq = self.index[name] if not seq.loaded: self.load_sequence(name) @@ -87,10 +96,10 @@ def __getitem__(self, name): def __iter__(self): return iter(self.index.keys()) - def __len__(self): + def __len__(self) -> int: return len(self.index) - def load_sequence(self, name): + def load_sequence(self, name: str) -> None: seq = self.index[name] # Seek to start of sequence block self.file.seek(seq.header_offset) @@ -106,7 +115,7 @@ def load_sequence(self, name): # Mark as loaded seq.loaded = True - def read_block_coords(self): + def read_block_coords(self) -> Tuple[list, list]: block_count = self.read("L") if block_count == 0: return [], [] @@ -114,16 +123,16 @@ def read_block_coords(self): sizes = self.read(str(block_count) + "L", untuple=False) return list(starts), list(sizes) - def read(self, pattern, untuple=True): + def read(self, pattern: str, untuple: bool = True): rval = unpack(self.byte_order + pattern, self.file.read(calcsize(self.byte_order + pattern))) if untuple and len(rval) == 1: return rval[0] return rval - def read_p_string(self): + def read_p_string(self) -> str: """ Read a length-prefixed string """ length = self.read("B") - return self.file.read(length) + return self.file.read(length).decode() diff --git a/lib/bx/seq/twobit_tests.py b/lib/bx/seq/twobit_tests.py index f286962b..ab94c436 100644 --- a/lib/bx/seq/twobit_tests.py +++ b/lib/bx/seq/twobit_tests.py @@ -36,7 +36,6 @@ def test_random_subseq_matches(filename): with open(test_twobit, 'rb') as f: t = twobit.TwoBitFile(f) for k, s in expected.items(): - k = k.encode() assert k in t.index # assert t.index[k].size == len(s) length = len(s) diff --git a/scripts/maf_tile_2bit.py b/scripts/maf_tile_2bit.py index 43d1a0bd..abc1e5c6 100755 --- a/scripts/maf_tile_2bit.py +++ b/scripts/maf_tile_2bit.py @@ -27,7 +27,6 @@ -s, --strand: Use strand information for intervals, reveres complement if '-' """ -import string import sys import bx.align as align @@ -36,7 +35,7 @@ import bx.seq.twobit from bx.cookbook import doc_optparse -tree_tx = string.maketrans("(),", " ") +tree_tx = str.maketrans("(),", " ") def main(): @@ -44,7 +43,7 @@ def main(): options, args = doc_optparse.parse(__doc__) try: sources = args[0].translate(tree_tx).split() - ref_2bit = bx.seq.twobit.TwoBitFile(open(args[1])) + ref_2bit = bx.seq.twobit.TwoBitFile(open(args[1], "rb")) index = maf.MultiIndexed(args[2:]) out = maf.Writer(sys.stdout) From d962e71197625071fabad60cf1d5631ca287a3a5 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 25 Aug 2022 03:28:18 +0100 Subject: [PATCH 28/68] Fix maketrans use in scripts --- scripts/gene_fourfold_sites.py | 3 +-- scripts/maf_tile.py | 3 +-- scripts/maf_tile_2.py | 3 +-- scripts/maf_translate_chars.py | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/scripts/gene_fourfold_sites.py b/scripts/gene_fourfold_sites.py index 1baf787f..c28be7b7 100755 --- a/scripts/gene_fourfold_sites.py +++ b/scripts/gene_fourfold_sites.py @@ -14,7 +14,6 @@ import os import re -import string import sys from bx.cookbook import doc_optparse @@ -123,7 +122,7 @@ def getnib(nibdir): return seqs -REVMAP = string.maketrans("ACGTacgt", "TGCAtgca") +REVMAP = str.maketrans("ACGTacgt", "TGCAtgca") def revComp(seq): diff --git a/scripts/maf_tile.py b/scripts/maf_tile.py index 81d3925b..8e299845 100755 --- a/scripts/maf_tile.py +++ b/scripts/maf_tile.py @@ -15,7 +15,6 @@ -m, --missingData: Inserts wildcards for missing block rows instead of '-' """ -import string import sys import bx.align as align @@ -23,7 +22,7 @@ import bx.seq.nib from bx.cookbook import doc_optparse -tree_tx = string.maketrans("(),", " ") +tree_tx = str.maketrans("(),", " ") def main(): diff --git a/scripts/maf_tile_2.py b/scripts/maf_tile_2.py index 68a72e42..0b757028 100755 --- a/scripts/maf_tile_2.py +++ b/scripts/maf_tile_2.py @@ -29,7 +29,6 @@ -s, --strand: Use strand information for intervals, reveres complement if '-' """ -import string import sys from cookbook import doc_optparse @@ -38,7 +37,7 @@ import bx.align.maf as maf import bx.seq.nib -tree_tx = string.maketrans("(),", " ") +tree_tx = str.maketrans("(),", " ") def main(): diff --git a/scripts/maf_translate_chars.py b/scripts/maf_translate_chars.py index 69d9482f..3205a0ec 100755 --- a/scripts/maf_translate_chars.py +++ b/scripts/maf_translate_chars.py @@ -11,12 +11,11 @@ usage: %prog < maf > maf """ -import string import sys from bx.align import maf -table = string.maketrans("#=X@", "-***") +table = str.maketrans("#=X@", "-***") def main(): From 1f5650ffd1267297f58a5775bb3fe97e2da6c99d Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 25 Aug 2022 13:09:49 +0100 Subject: [PATCH 29/68] Skip building also other musslinux wheels --- .github/workflows/deploy.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 498b8e14..326375f7 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -30,11 +30,11 @@ jobs: run: python -m cibuildwheel --output-dir dist env: CIBW_ARCHS: ${{matrix.arch}} - # Skip building musllinux wheels for aarch64, each one currently takes - # more than 2 hours to build. + # Skip building musllinux wheels for now, they take too long to build, + # mainly because numpy doesn't have musllinux wheels on PyPI yet. # Skip also building the PyPy 3.7 wheel for macOS, because numpy # doesn't have a wheel on PyPI and it fails to install. - CIBW_SKIP: '*-musllinux_aarch64 pp37-macosx_x86_64' + CIBW_SKIP: '*-musllinux* pp37-macosx_x86_64' - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v3 From a100c005f0714ebca78ec7770f770d6522b3b870 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Fri, 26 Aug 2022 12:35:14 +0100 Subject: [PATCH 30/68] Format Python code with black and isort --- .github/workflows/test.yaml | 2 +- .isort.cfg | 11 + doc/source/conf.py | 88 +- lib/bx/__init__.py | 2 +- lib/bx/_seqmapping.pyx | 6 +- lib/bx/align/_epo.pyx | 5 +- lib/bx/align/axt.py | 47 +- lib/bx/align/core.py | 108 +- lib/bx/align/epo.py | 114 +- lib/bx/align/epo_tests.py | 121 +- lib/bx/align/lav.py | 138 +- lib/bx/align/lav_tests.py | 43 +- lib/bx/align/maf.py | 56 +- lib/bx/align/maf_tests.py | 127 +- lib/bx/align/score.py | 68 +- lib/bx/align/score_tests.py | 79 +- lib/bx/align/sitemask/_cpg.pyx | 1 + lib/bx/align/sitemask/core.py | 2 +- lib/bx/align/sitemask/cpg.py | 20 +- lib/bx/align/sitemask/quality.py | 28 +- lib/bx/align/sitemask/sitemask_tests.py | 18 +- lib/bx/align/tools/chop.py | 2 +- lib/bx/align/tools/thread.py | 2 +- lib/bx/align/tools/tile.py | 11 +- lib/bx/arrays/array_tree.pyx | 6 +- lib/bx/arrays/array_tree_tests.py | 40 +- lib/bx/bbi/bbi_file.pxd | 5 +- lib/bx/bbi/bbi_file.pyx | 14 +- lib/bx/bbi/bigbed_file.pyx | 11 +- lib/bx/bbi/bigwig_file.pyx | 12 +- lib/bx/bbi/bigwig_tests.py | 65 +- lib/bx/bbi/bpt_file.pxd | 1 + lib/bx/bbi/cirtree_file.pxd | 1 + lib/bx/binned_array.py | 40 +- lib/bx/binned_array_tests.py | 55 +- lib/bx/bitset.pyx | 1 + lib/bx/bitset_builders.py | 22 +- lib/bx/bitset_tests.py | 13 +- lib/bx/cookbook/__init__.py | 6 +- lib/bx/cookbook/argparse.py | 861 ++--- lib/bx/cookbook/attribute.py | 44 +- lib/bx/cookbook/doc_optparse.py | 6 +- lib/bx/cookbook/progress_bar.py | 14 +- lib/bx/gene_reader.py | 114 +- lib/bx/interval_index_file.py | 53 +- lib/bx/interval_index_file_tests.py | 8 +- lib/bx/intervals/cluster_tests.py | 46 +- lib/bx/intervals/intersection.pyx | 1 + lib/bx/intervals/intersection_tests.py | 29 +- lib/bx/intervals/io.py | 46 +- lib/bx/intervals/operations/__init__.py | 2 +- lib/bx/intervals/operations/complement.py | 23 +- lib/bx/intervals/operations/coverage.py | 8 +- lib/bx/intervals/operations/find_clusters.py | 2 +- lib/bx/intervals/operations/intersect.py | 8 +- lib/bx/intervals/operations/join.py | 26 +- lib/bx/intervals/operations/merge.py | 2 +- lib/bx/intervals/operations/quicksect.py | 2 +- lib/bx/intervals/operations/subtract.py | 4 +- lib/bx/intervals/random_intervals.py | 29 +- lib/bx/intseq/ngramcount.pyx | 1 + lib/bx/misc/__init__.py | 2 +- lib/bx/misc/_seekbzip2.pyx | 5 +- lib/bx/misc/binary_file.py | 14 +- lib/bx/misc/cdb.py | 6 +- lib/bx/misc/cdb_tests.py | 6 +- lib/bx/misc/filecache.py | 4 +- lib/bx/misc/readlengths.py | 2 +- lib/bx/misc/seekbzip2.py | 2 +- lib/bx/misc/seekbzip2_tests.py | 5 +- lib/bx/misc/seeklzop.py | 8 +- lib/bx/motif/_pwm.pyx | 1 + lib/bx/motif/io/transfac.py | 14 +- lib/bx/motif/io/transfac_tests.py | 4 +- lib/bx/motif/logo/__init__.py | 26 +- lib/bx/motif/pwm.py | 2 +- lib/bx/motif/pwm_tests.py | 22 +- lib/bx/phylo/newick.py | 19 +- lib/bx/phylo/newick_tests.py | 231 +- lib/bx/phylo/phast_tests.py | 27 +- lib/bx/pwm/bed_score_aligned_pwm.py | 14 +- lib/bx/pwm/bed_score_aligned_string.py | 14 +- lib/bx/pwm/maf_select_motifs.py | 4 +- lib/bx/pwm/position_weight_matrix.py | 238 +- lib/bx/pwm/pwm_score_maf.py | 26 +- lib/bx/pwm/pwm_score_motifs.py | 12 +- lib/bx/pwm/pwm_score_positions.py | 12 +- lib/bx/pwm/pwm_tests.py | 54 +- lib/bx/seq/_nib.pyx | 5 +- lib/bx/seq/_twobit.pyx | 6 +- lib/bx/seq/core.py | 42 +- lib/bx/seq/fasta.py | 38 +- lib/bx/seq/fasta_tests.py | 19 +- lib/bx/seq/nib.py | 14 +- lib/bx/seq/nib_tests.py | 21 +- lib/bx/seq/qdna.py | 29 +- lib/bx/seq/qdna_tests.py | 19 +- lib/bx/seq/seq.py | 27 +- lib/bx/seq/seq_tests.py | 31 +- lib/bx/seq/twobit.py | 8 +- lib/bx/seq/twobit_tests.py | 15 +- lib/bx/seqmapping_tests.py | 32 +- lib/bx/tabular/io.py | 6 +- lib/bx/wiggle.py | 20 +- lib/bx/wiggle_tests.py | 10 +- lib/bx_extras/fpconst.py | 86 +- lib/bx_extras/lrucache.py | 12 +- lib/bx_extras/pstat.py | 649 ++-- lib/bx_extras/pyparsing.py | 1319 ++++--- lib/bx_extras/stats.py | 3352 +++++++++-------- lib/psyco_full.py | 1 + pyproject.toml | 8 + script_tests/base/__init__.py | 27 +- script_tests/bnMapper_tests.py | 4 +- script_tests/line_select_tests.py | 18 +- .../maf_extract_ranges_indexed_tests.py | 12 +- script_tests/maf_select_tests.py | 12 +- scripts/aggregate_scores_in_intervals.py | 9 +- scripts/axt_to_fasta.py | 4 +- scripts/axt_to_lav.py | 13 +- scripts/axt_to_maf.py | 36 +- scripts/bed_bigwig_profile.py | 10 +- scripts/bed_count_by_interval.py | 8 +- scripts/bed_count_overlapping.py | 8 +- scripts/bed_coverage_by_interval.py | 4 +- scripts/bed_intersect.py | 6 +- scripts/bed_rand_intersect.py | 7 +- scripts/bnMapper.py | 217 +- scripts/div_snp_table_chr.py | 22 +- scripts/gene_fourfold_sites.py | 140 +- scripts/get_scores_in_intervals.py | 2 +- scripts/int_seqs_to_char_strings.py | 2 +- scripts/interval_count_intersections.py | 6 +- scripts/lav_to_axt.py | 2 +- scripts/lav_to_maf.py | 2 +- scripts/line_select.py | 2 +- scripts/lzop_build_offset_table.py | 10 +- scripts/mMK_bitset.py | 28 +- scripts/maf_build_index.py | 12 +- scripts/maf_chunk.py | 3 +- scripts/maf_col_counts.py | 2 +- scripts/maf_col_counts_all.py | 6 +- scripts/maf_count.py | 2 +- scripts/maf_covered_ranges.py | 8 +- scripts/maf_covered_regions.py | 23 +- scripts/maf_div_sites.py | 6 +- scripts/maf_extract_chrom_ranges.py | 2 +- scripts/maf_extract_ranges_indexed.py | 2 +- scripts/maf_filter.py | 2 +- scripts/maf_filter_max_wc.py | 2 +- scripts/maf_gap_frequency.py | 2 +- scripts/maf_gc_content.py | 10 +- scripts/maf_interval_alignibility.py | 19 +- scripts/maf_limit_to_species.py | 4 +- scripts/maf_mapping_word_frequency.py | 12 +- scripts/maf_mask_cpg.py | 2 +- scripts/maf_mean_length_ungapped_piece.py | 2 +- scripts/maf_percent_identity.py | 2 +- scripts/maf_print_chroms.py | 2 +- scripts/maf_print_scores.py | 14 +- scripts/maf_region_coverage_by_src.py | 4 +- scripts/maf_species_in_all_files.py | 2 +- scripts/maf_split_by_src.py | 3 +- scripts/maf_thread_for_species.py | 8 +- scripts/maf_tile.py | 12 +- scripts/maf_tile_2.py | 50 +- scripts/maf_tile_2bit.py | 48 +- scripts/maf_to_concat_fasta.py | 4 +- scripts/maf_to_fasta.py | 2 +- scripts/maf_to_int_seqs.py | 2 +- scripts/maf_word_frequency.py | 2 +- scripts/mask_quality.py | 10 +- scripts/out_to_chain.py | 23 +- scripts/prefix_lines.py | 2 +- scripts/qv_to_bqv.py | 4 +- scripts/random_lines.py | 2 +- scripts/table_add_column.py | 2 +- scripts/table_filter.py | 4 +- scripts/ucsc_gene_table_to_intervals.py | 47 +- scripts/wiggle_to_array_tree.py | 2 +- setup.cfg | 2 +- setup.py | 91 +- tox.ini | 16 +- 183 files changed, 5875 insertions(+), 4480 deletions(-) create mode 100644 .isort.cfg diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 291cd36e..a4c9ff81 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -15,7 +15,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install flake8 - run: pip install flake8 flake8-import-order + run: pip install flake8 - name: Lint run: flake8 . test: diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 00000000..e740b8c4 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,11 @@ +[settings] +force_alphabetical_sort_within_sections=true +# Override force_grid_wrap value from profile=black, but black is still happy +force_grid_wrap=2 +known_first_party=bx,bx_extras +# Same line length as for black +line_length=120 +no_lines_before=LOCALFOLDER +profile=black +reverse_relative=true +skip_gitignore=true diff --git a/doc/source/conf.py b/doc/source/conf.py index 59c781e9..43935eff 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -23,23 +23,23 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.intersphinx'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['templates'] +templates_path = ["templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8' +# source_encoding = 'utf-8' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'bx-python' -copyright = '2017, James Taylor' +project = "bx-python" +copyright = "2017, James Taylor" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -53,37 +53,37 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. -#unused_docs = [] +# unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # Options for HTML output @@ -92,40 +92,40 @@ # The style sheet to use for HTML and HTML Help pages. A file of that name # must exist either in Sphinx' static/ path, or in one of the custom paths # given in html_static_path. -html_style = 'base.css' +html_style = "base.css" # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['static'] +html_static_path = ["static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -html_index = 'index.html' -html_sidebars = {'index': 'indexsidebar.html'} +html_index = "index.html" +html_sidebars = {"index": "indexsidebar.html"} # Additional templates that should be rendered to pages, maps page names to # template names. @@ -134,61 +134,61 @@ ##} # If false, no module index is generated. -#html_use_modindex = True +# html_use_modindex = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, the reST sources are included in the HTML build as _sources/. -#html_copy_source = True +# html_copy_source = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' +# html_file_suffix = '' # Output file base name for HTML help builder. -htmlhelp_basename = 'bx-doc' +htmlhelp_basename = "bx-doc" # Options for LaTeX output # ------------------------ # The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' +# latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' +# latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). -latex_documents = [( - 'index', 'bx-python.tex', 'bx-python Documentation', - 'James Taylor', 'manual'), ] +latex_documents = [ + ("index", "bx-python.tex", "bx-python Documentation", "James Taylor", "manual"), +] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # Additional stuff for the LaTeX preamble. -#latex_preamble = '' +# latex_preamble = '' # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_use_modindex = True +# latex_use_modindex = True # Example configuration for intersphinx: refer to the Python standard library. -#intersphinx_mapping = {'http://docs.python.org/dev': None} +# intersphinx_mapping = {'http://docs.python.org/dev': None} diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index ab26e3de..07508a60 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = '0.8.13' +__version__ = "0.8.13" diff --git a/lib/bx/_seqmapping.pyx b/lib/bx/_seqmapping.pyx index 29c7e5e6..ecdbaa5f 100644 --- a/lib/bx/_seqmapping.pyx +++ b/lib/bx/_seqmapping.pyx @@ -11,10 +11,12 @@ cdef extern from "Python.h": int PyObject_AsWriteBuffer(object, void **, Py_ssize_t *) except -1 int PyBytes_AsStringAndSize(object, char **, Py_ssize_t *) except -1 -from numpy import zeros -from math import floor import random import sys +from math import floor + +from numpy import zeros + cdef class CharToIntArrayMapping: """Mapping for converting strings to int arrays""" diff --git a/lib/bx/align/_epo.pyx b/lib/bx/align/_epo.pyx index e2b95455..ba8db44a 100644 --- a/lib/bx/align/_epo.pyx +++ b/lib/bx/align/_epo.pyx @@ -1,8 +1,11 @@ -import logging, gzip +import gzip +import logging from collections import namedtuple + import numpy + cimport numpy log = logging.getLogger(__name__) diff --git a/lib/bx/align/axt.py b/lib/bx/align/axt.py index 00848e65..b8ec7f9f 100644 --- a/lib/bx/align/axt.py +++ b/lib/bx/align/axt.py @@ -9,7 +9,7 @@ from bx.align import ( Alignment, Component, - src_split + src_split, ) # Tools for dealing with pairwise alignments in AXT format @@ -31,7 +31,16 @@ def get(self, src, start, end): class Indexed: """Indexed access to a axt using overlap queries, requires an index file""" - def __init__(self, axt_filename, index_filename=None, keep_open=False, species1=None, species2=None, species_to_lengths=None, support_ids=False): + def __init__( + self, + axt_filename, + index_filename=None, + keep_open=False, + species1=None, + species2=None, + species_to_lengths=None, + support_ids=False, + ): if index_filename is None: index_filename = axt_filename + ".index" self.indexes = interval_index_file.Indexes(filename=index_filename) @@ -44,7 +53,7 @@ def __init__(self, axt_filename, index_filename=None, keep_open=False, species1= if self.species2 is None: self.species2 = "species2" self.species_to_lengths = species_to_lengths - self.support_ids = support_ids # for extra text at end of axt header lines + self.support_ids = support_ids # for extra text at end of axt header lines if keep_open: self.f = open(axt_filename) else: @@ -80,7 +89,7 @@ def __init__(self, file, species1=None, species2=None, species_to_lengths=None, if self.species2 is None: self.species2 = "species2" self.species_to_lengths = species_to_lengths - self.support_ids = support_ids # for extra text at end of axt header lines + self.support_ids = support_ids # for extra text at end of axt header lines self.attributes = {} def __next__(self): @@ -108,7 +117,6 @@ def __next__(self): class Writer: - def __init__(self, file, attributes=None): if attributes is None: attributes = {} @@ -120,9 +128,7 @@ def __init__(self, file, attributes=None): def write(self, alignment): if len(alignment.components) != 2: - raise ValueError( - "%d-component alignment is not compatible with axt" % - len(alignment.components)) + raise ValueError("%d-component alignment is not compatible with axt" % len(alignment.components)) c1 = alignment.components[0] c2 = alignment.components[1] @@ -137,10 +143,19 @@ def write(self, alignment): chr1, chr2 = c1.src, c2.src self.file.write( - "%d %s %d %d %s %d %d %s %s\n" % - (self.block, chr1, c1.start+1, c1.start+c1.size, - chr2, c2.start+1, c2.start+c2.size, c2.strand, - alignment.score)) + "%d %s %d %d %s %d %d %s %s\n" + % ( + self.block, + chr1, + c1.start + 1, + c1.start + c1.size, + chr2, + c2.start + 1, + c2.start + c2.size, + c2.strand, + alignment.score, + ) + ) self.file.write("%s\n" % c1.text) self.file.write("%s\n" % c2.text) self.file.write("\n") @@ -149,6 +164,7 @@ def write(self, alignment): def close(self): self.file.close() + # ---- Helper methods --------------------------------------------------------- # typical axt block: @@ -183,8 +199,9 @@ def read_next_axt(file, species1, species2, species_to_lengths=None, support_ids component.src = fields[1] if species1 != "": component.src = species1 + "." + component.src - component.start = int(fields[2]) - 1 # (axt intervals are origin-1 - end = int(fields[3]) # and inclusive on both ends) + # axt intervals are origin-1 and inclusive on both ends + component.start = int(fields[2]) - 1 + end = int(fields[3]) component.size = end - component.start component.strand = "+" component.text = seq1.strip() @@ -217,5 +234,5 @@ def readline(file, skip_blank=False): line = file.readline() if not line: return None - if line[0] != '#' and not (skip_blank and line.isspace()): + if line[0] != "#" and not (skip_blank and line.isspace()): return line diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index 2ae95f08..f8e7e376 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -16,7 +16,6 @@ class Alignment: - def __init__(self, score=0, attributes=None, species_to_lengths=None): # species_to_lengths is needed only for file formats that don't provide # chromosome lengths; it maps each species name to one of these: @@ -58,6 +57,7 @@ def set_score(self, score): except ValueError: pass self.__score = score + score = property(fget=get_score, fset=set_score) def __str__(self): @@ -79,7 +79,7 @@ def src_size(self, src): chrom_to_length = self.species_to_lengths else: raise ValueError("no src_size (no length file for %s)" % species) - if isinstance(chrom_to_length, int): # (if it's a single length) + if isinstance(chrom_to_length, int): # (if it's a single length) return chrom_to_length if isinstance(chrom_to_length, str): # (if it's a file name) chrom_to_length = read_lengths_file(chrom_to_length) @@ -141,7 +141,7 @@ def slice_by_component(self, component_index, start, end): raise ValueError("can't figure out what to do") start_col = ref.coord_to_col(start) end_col = ref.coord_to_col(end) - if ref.strand == '-': + if ref.strand == "-": (start_col, end_col) = (end_col, start_col) return self.slice(start_col, end_col) @@ -156,7 +156,7 @@ def limit_to_species(self, species): new = Alignment(score=self.score, attributes=self.attributes) new.text_size = self.text_size for component in self.components: - if component.src.split('.')[0] in species: + if component.src.split(".")[0] in species: new.add_component(component) return new @@ -180,7 +180,7 @@ def remove_all_gap_columns(self): for seq in seqs: if seq is None: continue - if seq[i] != '-': + if seq[i] != "-": all_gap = False if all_gap: for seq in seqs: @@ -193,7 +193,7 @@ def remove_all_gap_columns(self): for i in range(len(self.components)): if seqs[i] is None: continue - self.components[i].text = ''.join(seqs[i]) + self.components[i].text = "".join(seqs[i]) self.text_size = text_size def __eq__(self, other): @@ -215,21 +215,25 @@ def __ne__(self, other): def __deepcopy__(self, memo): from copy import deepcopy - new = Alignment(score=self.score, attributes=deepcopy(self.attributes), species_to_lengths=deepcopy(self.species_to_lengths)) + + new = Alignment( + score=self.score, attributes=deepcopy(self.attributes), species_to_lengths=deepcopy(self.species_to_lengths) + ) for component in self.components: new.add_component(deepcopy(component)) return new class Component: - - def __init__(self, src='', start=0, size=0, strand=None, src_size=None, text=''): + def __init__(self, src="", start=0, size=0, strand=None, src_size=None, text=""): self._alignment = None self.src = src - self.start = start # Nota Bene: start,size,strand are as they - self.size = size # .. appear in a MAF file-- origin-zero, end - self.strand = strand # .. excluded, and minus strand counts from - self._src_size = src_size # .. end of sequence + # Nota Bene: start, size, strand are as they appear in a MAF file: + # origin-zero, end excluded, and minus strand counts from end of sequence + self.start = start + self.size = size + self.strand = strand + self._src_size = src_size self.text = text self.quality = None # Optional fields to keep track of synteny status (only makes sense @@ -246,18 +250,28 @@ def __init__(self, src='', start=0, size=0, strand=None, src_size=None, text='') def __str__(self): if self.empty: rval = "e %s %d %d %s %d %s" % ( - self.src, self.start, self.size, self.strand, self.src_size, self.synteny_empty) + self.src, + self.start, + self.size, + self.strand, + self.src_size, + self.synteny_empty, + ) else: - rval = "s %s %d %d %s %d %s" % ( - self.src, self.start, self.size, self.strand, self.src_size, self.text) + rval = "s %s %d %d %s %d %s" % (self.src, self.start, self.size, self.strand, self.src_size, self.text) if self.synteny_left and self.synteny_right: rval += "\ni %s %s %d %s %d" % ( - self.src, self.synteny_left[0], self.synteny_left[1], - self.synteny_right[0], self.synteny_right[1]) + self.src, + self.synteny_left[0], + self.synteny_left[1], + self.synteny_right[0], + self.synteny_right[1], + ) return rval def get_end(self): return self.start + self.size + end = property(fget=get_end) def get_src_size(self): @@ -269,20 +283,23 @@ def get_src_size(self): def set_src_size(self, src_size): self._src_size = src_size + src_size = property(fget=get_src_size, fset=set_src_size) def get_forward_strand_start(self): - if self.strand == '-': + if self.strand == "-": return self.src_size - self.end else: return self.start + forward_strand_start = property(fget=get_forward_strand_start) def get_forward_strand_end(self): - if self.strand == '-': + if self.strand == "-": return self.src_size - self.start else: return self.end + forward_strand_end = property(fget=get_forward_strand_end) def reverse_complement(self): @@ -324,8 +341,8 @@ def slice(self, start, end): # if self.text[i] != '-': new.start += 1 # for c in new.text: # if c != '-': new.size += 1 - new.start += start - self.text.count('-', 0, start) - new.size = len(new.text) - new.text.count('-') + new.start += start - self.text.count("-", 0, start) + new.size = len(new.text) - new.text.count("-") # FIXME: This annotation probably means nothing after slicing if # one of the ends changes. In general the 'i' rows of a MAF only @@ -351,7 +368,7 @@ def slice_by_coord(self, start, end): """ start_col = self.coord_to_col(start) end_col = self.coord_to_col(end) - if (self.strand == '-'): + if self.strand == "-": (start_col, end_col) = (end_col, start_col) return self.slice(start_col, end_col) @@ -369,18 +386,18 @@ def coord_to_col(self, pos): raise ValueError("Range error: %d not in %d-%d" % (pos, start, end)) if not self.index: self.index = list() - if self.strand == '-': + if self.strand == "-": # nota bene: for - strand self.index[x] maps to one column # higher than is actually associated with the position; thus # when slice_by_component() and slice_by_coord() flip the ends, # the resulting slice is correct - for x in range(len(self.text)-1, -1, -1): - if not self.text[x] == '-': + for x in range(len(self.text) - 1, -1, -1): + if not self.text[x] == "-": self.index.append(x + 1) self.index.append(0) else: for x in range(len(self.text)): - if not self.text[x] == '-': + if not self.text[x] == "-": self.index.append(x) self.index.append(len(self.text)) x = None @@ -393,22 +410,26 @@ def coord_to_col(self, pos): def __eq__(self, other): if other is None or not isinstance(other, type(self)): return False - return (self.src == other.src - and self.start == other.start - and self.size == other.size - and self.strand == other.strand - and self._src_size == other._src_size - and self.text == other.text - and self.synteny_left == other.synteny_left - and self.synteny_right == other.synteny_right - and self.synteny_empty == other.synteny_empty - and self.empty == other.empty) + return ( + self.src == other.src + and self.start == other.start + and self.size == other.size + and self.strand == other.strand + and self._src_size == other._src_size + and self.text == other.text + and self.synteny_left == other.synteny_left + and self.synteny_right == other.synteny_right + and self.synteny_empty == other.synteny_empty + and self.empty == other.empty + ) def __ne__(self, other): return not (self.__eq__(other)) def __deepcopy__(self, memo): - new = Component(src=self.src, start=self.start, size=self.size, strand=self.strand, src_size=self._src_size, text=self.text) + new = Component( + src=self.src, start=self.start, size=self.size, strand=self.strand, src_size=self._src_size, text=self.text + ) new._alignment = self._alignment new.quality = self.quality new.synteny_left = self.synteny_left @@ -423,6 +444,7 @@ def get_reader(format, infile, species_to_lengths=None): import bx.align.axt import bx.align.lav import bx.align.maf + if format == "maf": return bx.align.maf.Reader(infile, species_to_lengths) elif format == "axt": @@ -437,6 +459,7 @@ def get_writer(format, outfile, attributes=None): import bx.align.axt import bx.align.lav import bx.align.maf + if attributes is None: attributes = {} if format == "maf": @@ -453,6 +476,7 @@ def get_indexed(format, filename, index_filename=None, keep_open=False, species_ import bx.align.axt import bx.align.lav import bx.align.maf + if format == "maf": return bx.align.maf.Indexed(filename, index_filename, keep_open, species_to_lengths) elif format == "axt": @@ -469,7 +493,7 @@ def shuffle_columns(a): random.shuffle(mask) for c in a.components: if not c.empty: - c.text = ''.join([c.text[i] for i in mask]) + c.text = "".join(c.text[i] for i in mask) def src_split(src): # splits src into species,chrom @@ -477,7 +501,7 @@ def src_split(src): # splits src into species,chrom if dot == -1: return None, src else: - return src[:dot], src[dot+1:] + return src[:dot], src[dot + 1 :] def src_merge(species, chrom, contig=None): # creates src (inverse of src_split) @@ -489,16 +513,18 @@ def src_merge(species, chrom, contig=None): # creates src (inverse of src_split src += "[%s]" % contig return src + # ---- Read C extension if available --------------------------------------- try: from ._core import coord_to_col except ImportError: + def coord_to_col(start, text, pos): col = 0 while start < pos: - if text[col] != '-': + if text[col] != "-": start += 1 col += 1 return col diff --git a/lib/bx/align/epo.py b/lib/bx/align/epo.py index f0ba7fd1..e8a1a582 100644 --- a/lib/bx/align/epo.py +++ b/lib/bx/align/epo.py @@ -7,18 +7,17 @@ import re from collections import namedtuple - from ._epo import ( # noqa: F401 bed_union, cummulative_intervals, fastLoadChain, - rem_dash + rem_dash, ) log = logging.getLogger(__name__) -class Chain(namedtuple('Chain', 'score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id')): +class Chain(namedtuple("Chain", "score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id")): """A Chain header as in http://genome.ucsc.edu/goldenPath/help/chain.html chain coordinates are with respect to the strand, so for example tStart on the + strand is the @@ -27,7 +26,9 @@ class Chain(namedtuple('Chain', 'score tName tSize tStrand tStart tEnd qName qSi __slots__ = () def __str__(self): - return "chain {score} {tName} {tSize} {tStrand} {tStart} {tEnd} {qName} {qSize} {qStrand} {qStart} {qEnd} {id}".format(**self._asdict()) + return "chain {score} {tName} {tSize} {tStrand} {tStart} {tEnd} {qName} {qSize} {qStrand} {qStart} {qEnd} {id}".format( + **self._asdict() + ) @classmethod def _strfactory(cls, line): @@ -66,8 +67,8 @@ def _make_from_epo(cls, trg_comp, qr_comp, trg_chrom_sizes, qr_chrom_sizes): S, T, Q = [], [], [] # the target strand of the chain must be on the forward strand - trg_intervals = trg_comp.intervals(reverse=trg_comp.strand == '-') - qr_intervals = qr_comp.intervals(reverse=trg_comp.strand == '-') + trg_intervals = trg_comp.intervals(reverse=trg_comp.strand == "-") + qr_intervals = qr_comp.intervals(reverse=trg_comp.strand == "-") if len(trg_intervals) == 0 or len(qr_intervals) == 0: log.warning("deletion/insertion only intervals") return None @@ -103,39 +104,59 @@ def _make_from_epo(cls, trg_comp, qr_comp, trg_chrom_sizes, qr_chrom_sizes): qSize = qr_chrom_sizes[qr_comp.chrom] # UCSC coordinates are 0-based, half-open and e! coordinates are 1-base, closed # chain_start = epo_start - 1 and chain_end = epo_end - if qr_comp.strand == '+': + if qr_comp.strand == "+": chain = Chain( - 0, trg_comp.chrom, tSize, "+", - (trg_comp.start - 1) + tr_start_correction, trg_comp.end - tr_end_correction, - qr_comp.chrom, qSize, (qr_comp.strand == trg_comp.strand and '+' or '-'), - (qr_comp.start - 1) + qr_start_correction, qr_comp.end - qr_end_correction, - qr_comp.gabid) + 0, + trg_comp.chrom, + tSize, + "+", + (trg_comp.start - 1) + tr_start_correction, + trg_comp.end - tr_end_correction, + qr_comp.chrom, + qSize, + (qr_comp.strand == trg_comp.strand and "+" or "-"), + (qr_comp.start - 1) + qr_start_correction, + qr_comp.end - qr_end_correction, + qr_comp.gabid, + ) else: chain = Chain( - 0, trg_comp.chrom, tSize, "+", - (trg_comp.start - 1) + tr_start_correction, trg_comp.end - tr_end_correction, - qr_comp.chrom, qSize, (qr_comp.strand == trg_comp.strand and '+' or '-'), - (qr_comp.start - 1) + qr_end_correction, qr_comp.end - qr_start_correction, - qr_comp.gabid) + 0, + trg_comp.chrom, + tSize, + "+", + (trg_comp.start - 1) + tr_start_correction, + trg_comp.end - tr_end_correction, + qr_comp.chrom, + qSize, + (qr_comp.strand == trg_comp.strand and "+" or "-"), + (qr_comp.start - 1) + qr_end_correction, + qr_comp.end - qr_start_correction, + qr_comp.gabid, + ) # strand correction. in UCSC coordinates this is: size - coord - if chain.qStrand == '-': - chain = chain._replace( - qEnd=chain.qSize - chain.qStart, - qStart=chain.qSize - chain.qEnd) + if chain.qStrand == "-": + chain = chain._replace(qEnd=chain.qSize - chain.qStart, qStart=chain.qSize - chain.qEnd) assert chain.tEnd - chain.tStart == sum(S) + sum(T), "[%s] %d != %d" % ( - str(chain), chain.tEnd - chain.tStart, sum(S) + sum(T)) + str(chain), + chain.tEnd - chain.tStart, + sum(S) + sum(T), + ) assert chain.qEnd - chain.qStart == sum(S) + sum(Q), "[%s] %d != %d" % ( - str(chain), chain.qEnd - chain.qStart, sum(S) + sum(Q)) + str(chain), + chain.qEnd - chain.qStart, + sum(S) + sum(Q), + ) return chain, S, T, Q def slice(self, who): "return the slice entry (in a bed6 format), AS IS in the chain header" - assert who in ('t', 'q'), "who should be 't' or 'q'" + assert who in ("t", "q"), "who should be 't' or 'q'" - if who == 't': + if who == "t": return (self.tName, self.tStart, self.tEnd, self.id, self.score, self.tStrand) else: return (self.qName, self.qStart, self.qEnd, self.id, self.score, self.qStrand) @@ -143,16 +164,16 @@ def slice(self, who): def bedInterval(self, who): "return a BED6 entry, thus DOES coordinate conversion for minus strands" - if who == 't': + if who == "t": st, en = self.tStart, self.tEnd - if self.tStrand == '-': - st, en = self.tSize-en, self.tSize-st + if self.tStrand == "-": + st, en = self.tSize - en, self.tSize - st return (self.tName, st, en, self.id, self.score, self.tStrand) else: st, en = self.qStart, self.qEnd - if self.qStrand == '-': - st, en = self.qSize-en, self.qSize-st - assert en-st == self.qEnd - self.qStart + if self.qStrand == "-": + st, en = self.qSize - en, self.qSize - st + assert en - st == self.qEnd - self.qStart return (self.qName, st, en, self.id, self.score, self.qStrand) @classmethod @@ -165,7 +186,7 @@ def _parse_file(cls, path, pickle=False): if fname.endswith(".gz"): fname = path[:-3] - if fname.endswith('.pkl'): + if fname.endswith(".pkl"): # you asked for the pickled file. I'll give it to you log.debug("loading pickled file %s ...", fname) with open(fname, "rb") as f: @@ -182,14 +203,14 @@ def _parse_file(cls, path, pickle=False): log.warning("Loading pickled file %s.pkl failed", fname) data = fastLoadChain(path, cls._strfactory) - if pickle and not os.path.isfile('%s.pkl' % fname): + if pickle and not os.path.isfile("%s.pkl" % fname): log.info("pickling to %s.pkl", fname) - with open('%s.pkl' % fname, 'wb') as f: + with open("%s.pkl" % fname, "wb") as f: cPickle.dump(data, f) return data -class EPOitem(namedtuple('Epo_item', 'species gabid chrom start end strand cigar')): +class EPOitem(namedtuple("Epo_item", "species gabid chrom start end strand cigar")): "this format is how alignments are delivered from e!" __slots__ = () @@ -214,12 +235,15 @@ def _strfactory(cls, line): if not chrom.startswith("chr"): chrom = "chr%s" % chrom instance = tuple.__new__( - cls, - (cmp[0], cmp[1], chrom, int(cmp[3]), int(cmp[4]), {'1': '+', '-1': '-'}[cmp[5]], cmp[6])) + cls, (cmp[0], cmp[1], chrom, int(cmp[3]), int(cmp[4]), {"1": "+", "-1": "-"}[cmp[5]], cmp[6]) + ) span = instance.end - instance.start + 1 m_num = sum((t[1] == "M" and [t[0]] or [0])[0] for t in instance.cigar_iter(False)) if span != m_num: - log.warning("[{gabid}] {species}.{chrom}:{start}-{end}.".format(**instance._asdict()) + "(span) %d != %d (matches)" % (span, m_num)) + log.warning( + "[{gabid}] {species}.{chrom}:{start}-{end}.".format(**instance._asdict()) + + "(span) %d != %d (matches)" % (span, m_num) + ) return None return instance @@ -256,7 +280,7 @@ def cigar_iter(self, reverse): parsed_cigar = parsed_cigar[::-1] for _l, t in parsed_cigar: # 1M is encoded as M - l = (_l and int(_l) or 1) # int(_l) cannot be 0 + l = _l and int(_l) or 1 # int(_l) cannot be 0 data.append((l, t)) return data @@ -280,13 +304,17 @@ def intervals(self, reverse, thr=0): dl = tup[0] else: s = d[-1][1] + dl - d.append((s, s+tup[0])) + d.append((s, s + tup[0])) assert d[0] == (thr, thr) # assert that nr. of Ms in the interval == sum of produced intervals - assert sum(t[0] for t in self.cigar_iter(False) if t[1] == "M") == sum(t[1]-t[0] for t in d) + assert sum(t[0] for t in self.cigar_iter(False) if t[1] == "M") == sum(t[1] - t[0] for t in d) - d_sum = sum(t[1]-t[0] for t in d) + d_sum = sum(t[1] - t[0] for t in d) assert self.end - self.start + 1 == d_sum, "[ (%d, %d) = %d ] != %d" % ( - self.start, self.end, self.end-self.start+1, d_sum) + self.start, + self.end, + self.end - self.start + 1, + d_sum, + ) return d[1:] # clip the (thr, thr) entry diff --git a/lib/bx/align/epo_tests.py b/lib/bx/align/epo_tests.py index ff58b4ce..99f714d8 100644 --- a/lib/bx/align/epo_tests.py +++ b/lib/bx/align/epo_tests.py @@ -12,7 +12,7 @@ ) from bx.align.epo import ( Chain, - EPOitem + EPOitem, ) @@ -30,7 +30,7 @@ def test_ci(self): for i in range(self.N): assert C[i, 1] - C[i, 0] == S[i] for i in range(1, self.N): - assert C[i, 0] - C[i-1, 1] == D[i-1], "[%d] %d != %d" % (i, C[i, 0] - C[i-1, 1], D[i-1]) + assert C[i, 0] - C[i - 1, 1] == D[i - 1], "[%d] %d != %d" % (i, C[i, 0] - C[i - 1, 1], D[i - 1]) def test_elem_u(self): # back to back, so should return a single interval @@ -38,7 +38,7 @@ def test_elem_u(self): th = 0 for i in range(self.N): size = random.randint(1, 20) - EL.append((th, th+size)) + EL.append((th, th + size)) th += size U = bed_union(np.array(EL, dtype=np.uint64)) assert U[0, 0] == 0 and U[0, 1] == th @@ -48,8 +48,8 @@ def test_elem_u(self): th = 0 for i in range(self.N): size = random.randint(1, 20) - EL.append((th, th+size)) - th += (size + 1) + EL.append((th, th + size)) + th += size + 1 U = bed_union(np.array(EL, dtype=np.uint64)) for i in range(U.shape[0]): assert (U[i, 0], U[i, 1]) == EL[i] @@ -59,37 +59,51 @@ def test_elem_u(self): th = 0 for i in range(self.N): size = random.randint(1, 20) - EL.append((th, th+size)) - th += random.randint(1, size+size) # 50% of overlapping + EL.append((th, th + size)) + th += random.randint(1, size + size) # 50% of overlapping U = bed_union(np.array(EL, dtype=np.uint64)) assert U[0, 1] > U[0, 0] for i in range(1, U.shape[0]): assert U[i, 1] > U[i, 0] - assert U[i, 0] > U[i-1, 1] + assert U[i, 0] > U[i - 1, 1] cigar_pairs = [ - ("GGACCTGGAGAGATCAG---------------------------GACTTCAACTGTGTG-------------TCTTAGACTGGG--------AGGGTGTTA", - "AGGCCAGGAGAGATCAGGTAAGTCTTAATTTAATAAAGAGATAGGACCTGAACTGTGTCTAACAATAGGTAATATTAGACTGGGGGAGAGAGAAGACTTTC"), - ("TTT--------------------------------------------------------------------------------------------------------------------T", - "CTTGTACCAAGGACAGTACTGGCAGCCTAATTGCTAACACTTTGTGGTGGATTGGTCCACTCAATATTTGTTCCCACCTCTTTTCAGTCCAGTTCTATAAAGGACAGAAAGTTGAAAACT"), - ("A-------------------------------------------------ACACTGGACACAGCACTAACACGATTACTTA", - "ACATTTCCCACACTCCCTTGCAGCTAGGTTTCTAGATATAATTTAGATTCCA----------------------------A"), - ("TTTGGTCCTCTGGA------CGAGCAGCCAGTGCT---------------------------------------------------------------------------AAAAAAAA", - "T---CATTCTAGCAGGTGCTGCAGCAGCAGGTAGCCCTGGAGCCAACAGTTGTGGCTATGATTCTTGATCATCAGATTTGGCTCAAGTGATGTGTTCCTCTAGCATGCACTTGAGATA"), - ("G-----------------------C----------------------------------------------------------------------------------------A", - "GGCCTGCACTGCCAGTAATTTTAACAAATTTTTAGGCACTGAATTCCCTGTATTAAATCTGTTTTCCTTAGCGTAAACAGATCTCTGTTAAATGAAACTAAACCCTGACTGATA"), - ("TATT----------------------------------T", - "TCCTTCATTTTATTTCTCCCTTAAAATTTTTTTTATTACT"), - ("TAAAAA--A------A------------------------------------------------------------TTTTTTTTTTT", - "T---AATTATTTTGCAGCAGGTCCTTGATAACATATCATCTATAAATATTTCAGCAAGAATCTCTAAAAGGCAAGAACCTCCTTCTT"), - ("AAACAA---------------------------------------TT---T", - "AAACAATACCACTGCATCACTATCAAACCCAAAAAATAACAAAAATTGGGT"), - ("TCTTAAC---TGCTGAGCCATCCCTCCAGCTCCTGTTTTATTTTTATTATGAAGTAATAATA--ATAG--TAATAATAATGATG", - "TACACTTAATTCTAAAACTTGTTATGAATCATCA----------TTGG--TTTTTTATTGTGAAGAACTAATATAATCAGA--G"), - ("ATGATAATGGTATCCTAGCTCAACACCTG-GAGTTCACCCCAACAGTTAACTAA----GTTTGAGGAAGTGTTAACAAGCCTA---ACAAAGAGGACATGCCAATAGCTGACAGAGTCAC", - "A-------CCTCTGCTAGCTCAACTCCTGAGAATCAATTATATAAGCTAGGTCAGTGGTTTTGAGAAAGTATTAGTAGACATTTCTCCAAAGAATACATAAAAATGGCC-A--CAAGTAT") + ( + "GGACCTGGAGAGATCAG---------------------------GACTTCAACTGTGTG-------------TCTTAGACTGGG--------AGGGTGTTA", + "AGGCCAGGAGAGATCAGGTAAGTCTTAATTTAATAAAGAGATAGGACCTGAACTGTGTCTAACAATAGGTAATATTAGACTGGGGGAGAGAGAAGACTTTC", + ), + ( + "TTT--------------------------------------------------------------------------------------------------------------------T", + "CTTGTACCAAGGACAGTACTGGCAGCCTAATTGCTAACACTTTGTGGTGGATTGGTCCACTCAATATTTGTTCCCACCTCTTTTCAGTCCAGTTCTATAAAGGACAGAAAGTTGAAAACT", + ), + ( + "A-------------------------------------------------ACACTGGACACAGCACTAACACGATTACTTA", + "ACATTTCCCACACTCCCTTGCAGCTAGGTTTCTAGATATAATTTAGATTCCA----------------------------A", + ), + ( + "TTTGGTCCTCTGGA------CGAGCAGCCAGTGCT---------------------------------------------------------------------------AAAAAAAA", + "T---CATTCTAGCAGGTGCTGCAGCAGCAGGTAGCCCTGGAGCCAACAGTTGTGGCTATGATTCTTGATCATCAGATTTGGCTCAAGTGATGTGTTCCTCTAGCATGCACTTGAGATA", + ), + ( + "G-----------------------C----------------------------------------------------------------------------------------A", + "GGCCTGCACTGCCAGTAATTTTAACAAATTTTTAGGCACTGAATTCCCTGTATTAAATCTGTTTTCCTTAGCGTAAACAGATCTCTGTTAAATGAAACTAAACCCTGACTGATA", + ), + ("TATT----------------------------------T", "TCCTTCATTTTATTTCTCCCTTAAAATTTTTTTTATTACT"), + ( + "TAAAAA--A------A------------------------------------------------------------TTTTTTTTTTT", + "T---AATTATTTTGCAGCAGGTCCTTGATAACATATCATCTATAAATATTTCAGCAAGAATCTCTAAAAGGCAAGAACCTCCTTCTT", + ), + ("AAACAA---------------------------------------TT---T", "AAACAATACCACTGCATCACTATCAAACCCAAAAAATAACAAAAATTGGGT"), + ( + "TCTTAAC---TGCTGAGCCATCCCTCCAGCTCCTGTTTTATTTTTATTATGAAGTAATAATA--ATAG--TAATAATAATGATG", + "TACACTTAATTCTAAAACTTGTTATGAATCATCA----------TTGG--TTTTTTATTGTGAAGAACTAATATAATCAGA--G", + ), + ( + "ATGATAATGGTATCCTAGCTCAACACCTG-GAGTTCACCCCAACAGTTAACTAA----GTTTGAGGAAGTGTTAACAAGCCTA---ACAAAGAGGACATGCCAATAGCTGACAGAGTCAC", + "A-------CCTCTGCTAGCTCAACTCCTGAGAATCAATTATATAAGCTAGGTCAGTGGTTTTGAGAAAGTATTAGTAGACATTTCTCCAAAGAATACATAAAAATGGCC-A--CAAGTAT", + ), ] @@ -107,13 +121,13 @@ def toCigar(species, id, s): L.insert(0, 0) size = NZ[i] start = L.index(size) - I.append((I[-1][1] + start, I[-1][1]+start+size)) - L = L[start+1:] + I.append((I[-1][1] + start, I[-1][1] + start + size)) + L = L[start + 1 :] if len(L): I.append((I[-1][1] + len(L), I[-1][1] + len(L))) C = [] for i in range(1, len(I)): - dl = I[i][0] - I[i-1][1] + dl = I[i][0] - I[i - 1][1] ml = I[i][1] - I[i][0] dc = "" @@ -124,10 +138,10 @@ def toCigar(species, id, s): if ml: mc = (ml > 1 and str(ml) or "") + "M" - C.append(dc+mc) - MSUM = sum(i[1]-i[0] for i in I) + C.append(dc + mc) + MSUM = sum(i[1] - i[0] for i in I) start = random.randint(50, 10000) - return "%s\t%d\t1\t%d\t%d\t%d\t%s" % (species, id, start, start+MSUM-1, random.choice((-1, 1)), "".join(C)) + return "%s\t%d\t1\t%d\t%d\t%d\t%s" % (species, id, start, start + MSUM - 1, random.choice((-1, 1)), "".join(C)) class TestEpo(unittest.TestCase): @@ -144,10 +158,10 @@ def test_out(self): def ch(c, ci): th = 0 for l, t in ci: - if t == 'M': - assert c[th:th+l].find('-') == -1 + if t == "M": + assert c[th : th + l].find("-") == -1 else: - assert c[th:th+l] == '-' * l + assert c[th : th + l] == "-" * l th += l for (a, b) in self.epo_records: @@ -157,7 +171,7 @@ def ch(c, ci): def test_make_chain(self): def cch(cigar, s, e): - return cigar[s:e].find('-') == -1 + return cigar[s:e].find("-") == -1 for p in self.epo_records: chain = Chain._make_from_epo(p[0], p[1], {"chr1": 500}, {"chr1": 800}) @@ -166,18 +180,18 @@ def cch(cigar, s, e): ch, S, T, Q = chain i = int(ch.id) c1, c2 = cigar_pairs[i] - if p[0].strand == '-': + if p[0].strand == "-": c1 = c1[::-1] c2 = c2[::-1] th = 0 for s, t, q in zip(S, T, Q): - if not (cch(c1, th, th+s) and cch(c2, th, th+s)): + if not (cch(c1, th, th + s) and cch(c2, th, th + s)): pdb.set_trace() - assert cch(c1, th, th+s) and cch(c2, th, th+s), f"{c1[th:th+s]} and {c2[th:th+s]}" + assert cch(c1, th, th + s) and cch(c2, th, th + s), f"{c1[th:th+s]} and {c2[th:th+s]}" if t > q: - cch(c1, th+s, th+s+t) and c1[th+s:th+s+t] == '-'*t + cch(c1, th + s, th + s + t) and c1[th + s : th + s + t] == "-" * t else: - cch(c2, th+s, th+s+q) and c1[th+s:th+s+q] == '-'*q + cch(c2, th + s, th + s + q) and c1[th + s : th + s + q] == "-" * q th = th + s + max(t, q) def test_rem_dash(self): @@ -192,8 +206,13 @@ def test_rem_dash(self): tStart = random.randint(0, 1000) qStart = random.randint(0, 1000) epo_pair = ( - EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+12-1, "4M2D4M%dD4M" % (dash_cols+3))), - EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+14-1, "7M%dD7M" % (dash_cols+3)))) + EPOitem._strfactory( + "homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart + 12 - 1, "4M2D4M%dD4M" % (dash_cols + 3)) + ), + EPOitem._strfactory( + "mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart + 14 - 1, "7M%dD7M" % (dash_cols + 3)) + ), + ) chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800}) ti = epo_pair[0].intervals(False) qi = epo_pair[1].intervals(False) @@ -216,8 +235,14 @@ def test_rem_dash(self): qStart = random.randint(0, 1000) epo_pair = ( - EPOitem._strfactory("homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart+tm-1, "%dD%dM" % (dash_cols+1, tm))), - EPOitem._strfactory("mus_musculus\t0\t1\t%d\t%d\t1\t%s" % (qStart, qStart+qm+1-1, "M%dD%dM" % (dash_cols+tm-qm, qm)))) + EPOitem._strfactory( + "homo_sapiens\t0\t1\t%d\t%d\t1\t%s" % (tStart, tStart + tm - 1, "%dD%dM" % (dash_cols + 1, tm)) + ), + EPOitem._strfactory( + "mus_musculus\t0\t1\t%d\t%d\t1\t%s" + % (qStart, qStart + qm + 1 - 1, "M%dD%dM" % (dash_cols + tm - qm, qm)) + ), + ) chain = Chain._make_from_epo(epo_pair[0], epo_pair[1], {"chr1": 500}, {"chr1": 800}) if chain[1][-1] != qm: pdb.set_trace() @@ -226,5 +251,5 @@ def test_rem_dash(self): assert (qStart + 1) - 1 == chain[0].qStart, "%d != %d" % (qStart + 1, chain[0].qStart) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/lib/bx/align/lav.py b/lib/bx/align/lav.py index 61b797f6..102ef346 100644 --- a/lib/bx/align/lav.py +++ b/lib/bx/align/lav.py @@ -13,7 +13,7 @@ Alignment, Component, src_merge, - src_split + src_split, ) @@ -23,9 +23,11 @@ class Reader: def __init__(self, file, path_subs=None, fail_to_ns=False): self.file = file self.lineNumber = 0 - self.path_subs = path_subs # list of (prefix,replacement) to allow - if self.path_subs is None: # .. redirection of sequence file paths - self.path_subs = [] # .. on different machines + # list of (prefix, replacement) to allow redirection of sequence file + # paths on different machines + self.path_subs = path_subs + if self.path_subs is None: + self.path_subs = [] self.fail_to_ns = fail_to_ns # True => if sequences fail to open, create a fake file of all Ns self.d_stanza_text = None @@ -53,13 +55,13 @@ def __init__(self, file, path_subs=None, fail_to_ns=False): def __next__(self): while True: line = self.fetch_line(strip=None, requireLine=False) - assert (line), "unexpected end of file (missing #:eof)" + assert line, "unexpected end of file (missing #:eof)" line = line.rstrip() if line == "": # (allow blank lines between stanzas) continue if line == "#:eof": line = self.file.readline().rstrip() - assert (not line), "extra line after #:eof (line %d, \"%s\")" % (self.lineNumber, line) + assert not line, 'extra line after #:eof (line %d, "%s")' % (self.lineNumber, line) return None if line == "#:lav": continue @@ -78,7 +80,7 @@ def __next__(self): if line.endswith("{"): self.parse_unknown_stanza() continue - raise ValueError("incomprehensible line (line %d, \"%s\")" % (self.lineNumber, line)) + raise ValueError('incomprehensible line (line %d, "%s")' % (self.lineNumber, line)) return self.build_alignment(score, pieces) def __iter__(self): @@ -157,8 +159,9 @@ def open_seqs(self): length1 = self.seq1_file.length length2 = self.seq2_file.length - assert (species1 != species2) or (chrom1 != chrom2) or (length1 == length2), \ - "conflicting lengths for %s (%d and %d)" % (self.seq1_src, length1, length2) + assert ( + (species1 != species2) or (chrom1 != chrom2) or (length1 == length2) + ), "conflicting lengths for %s (%d and %d)" % (self.seq1_src, length1, length2) self.species_to_lengths = {} self.species_to_lengths[species1] = {} @@ -177,22 +180,17 @@ def close_seqs(self): def parse_s_stanza(self): self.close_seqs() line = self.fetch_line(report=" in s-stanza") - (self.seq1_filename, - self.seq1_start, - self.seq1_end, - self.seq1_strand, - self.seq1_contig) = self.parse_s_seq(line) + (self.seq1_filename, self.seq1_start, self.seq1_end, self.seq1_strand, self.seq1_contig) = self.parse_s_seq( + line + ) line = self.fetch_line(report=" in s-stanza") - (self.seq2_filename, - self.seq2_start, - self.seq2_end, - self.seq2_strand, - self.seq2_contig) = self.parse_s_seq(line) + (self.seq2_filename, self.seq2_start, self.seq2_end, self.seq2_strand, self.seq2_contig) = self.parse_s_seq( + line + ) line = self.fetch_line(report=" in s-stanza") - assert (line == "}"), "improper s-stanza terminator (line %d, \"%s\")" \ - % (self.lineNumber, line) + assert line == "}", 'improper s-stanza terminator (line %d, "%s")' % (self.lineNumber, line) def parse_s_seq(self, line): fields = line.split() @@ -205,7 +203,7 @@ def parse_s_seq(self, line): else: strand = "+" if filename.endswith("-"): - assert (strand == "-"), "strand mismatch in \"%s\"" % line + assert strand == "-", 'strand mismatch in "%s"' % line filename = filename[:-1] filename = do_path_subs(filename, self.path_subs) return (filename, start, end, strand, contig) @@ -236,18 +234,16 @@ def parse_h_stanza(self): self.seq2_header = "seq2" line = self.fetch_line(report=" in h-stanza") - assert (line == "}"), "improper h-stanza terminator (line %d, \"%s\")" \ - % (self.lineNumber, line) + assert line == "}", 'improper h-stanza terminator (line %d, "%s")' % (self.lineNumber, line) def parse_a_stanza(self): """returns the pair (score,pieces) - where pieces is a list of ungapped segments (start1,start2,length,pctId) - with start1,start2 origin-0""" + where pieces is a list of ungapped segments (start1,start2,length,pctId) + with start1,start2 origin-0""" # 's' line -- score, 1 field line = self.fetch_line(report=" in a-stanza") fields = line.split() - assert (fields[0] == "s"), "s line expected in a-stanza (line %d, \"%s\")" \ - % (self.lineNumber, line) + assert fields[0] == "s", 's line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line) try: score = int(fields[1]) except ValueError: @@ -256,18 +252,16 @@ def parse_a_stanza(self): # 'b' line -- begin positions in seqs, 2 fields line = self.fetch_line(report=" in a-stanza") fields = line.split() - assert (fields[0] == "b"), "b line expected in a-stanza (line %d, \"%s\")" \ - % (self.lineNumber, line) + assert fields[0] == "b", 'b line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line) # 'e' line -- end positions in seqs, 2 fields line = self.fetch_line(report=" in a-stanza") fields = line.split() - assert (fields[0] == "e"), "e line expected in a-stanza (line %d, \"%s\")" \ - % (self.lineNumber, line) + assert fields[0] == "e", 'e line expected in a-stanza (line %d, "%s")' % (self.lineNumber, line) # 'l' lines pieces = [] - while (True): + while True: line = self.fetch_line(report=" in a-stanza") fields = line.split() if fields[0] != "l": @@ -280,17 +274,16 @@ def parse_a_stanza(self): pctId = int(fields[5]) except ValueError: pctId = float(fields[5]) - assert (length2 == length), "length mismatch in a-stanza" - pieces.append((start1+self.seq1_start, start2+self.seq2_start, length, pctId)) - assert (line == "}"), "improper a-stanza terminator (line %d, \"%s\")" \ - % (self.lineNumber, line) + assert length2 == length, "length mismatch in a-stanza" + pieces.append((start1 + self.seq1_start, start2 + self.seq2_start, length, pctId)) + assert line == "}", 'improper a-stanza terminator (line %d, "%s")' % (self.lineNumber, line) return (score, pieces) def parse_unknown_stanza(self): lines = [] - while (True): + while True: line = self.fetch_line() - assert (line), "unexpected end of file (missing #:eof)" + assert line, "unexpected end of file (missing #:eof)" if line == "}": break lines.append(line) @@ -305,8 +298,7 @@ def fetch_line(self, strip=True, requireLine=True, report=""): line = self.file.readline().strip().strip(strip) self.lineNumber += 1 if requireLine: - assert (line), "unexpected blank line or end of file%s (line %d)" \ - % (report, self.lineNumber) + assert line, "unexpected blank line or end of file%s (line %d)" % (report, self.lineNumber) return line def d_stanza(self): @@ -327,20 +319,28 @@ def s_stanza(self): else: seq2_strand = "0" - s = " \"%s\" %d %d %s %d\n"\ - % (self.seq1_filename, self.seq2_start+1, self.seq1_end, - seq1_strand, self.seq1_contig) - s += " \"%s\" %d %d %s %d\n"\ - % (self.seq2_filename, self.seq2_start+1, self.seq2_end, - seq2_strand, self.seq2_contig) + s = ' "%s" %d %d %s %d\n' % ( + self.seq1_filename, + self.seq2_start + 1, + self.seq1_end, + seq1_strand, + self.seq1_contig, + ) + s += ' "%s" %d %d %s %d\n' % ( + self.seq2_filename, + self.seq2_start + 1, + self.seq2_end, + seq2_strand, + self.seq2_contig, + ) return "s {\n%s}" % s def h_stanza(self): if self.seq1_header is None: return "" - s = f" \"{self.seq1_header_prefix}{self.seq1_header}\"\n" - s += f" \"{self.seq2_header_prefix}{self.seq2_header}\"\n" + s = f' "{self.seq1_header_prefix}{self.seq1_header}"\n' + s += f' "{self.seq2_header_prefix}{self.seq2_header}"\n' return "h {\n%s}" % s def build_alignment(self, score, pieces): @@ -352,11 +352,11 @@ def build_alignment(self, score, pieces): for (start1, start2, length, _pctId) in pieces: if end1 is not None: if start1 == end1: # insertion in sequence 2 - text1 += self.seq1_gap * (start2-end2) - text2 += self.seq2_file.get(end2, start2-end2) + text1 += self.seq1_gap * (start2 - end2) + text2 += self.seq2_file.get(end2, start2 - end2) else: # insertion in sequence 1 - text1 += self.seq1_file.get(end1, start1-end1) - text2 += self.seq2_gap * (start1-end1) + text1 += self.seq1_file.get(end1, start1 - end1) + text2 += self.seq2_gap * (start1 - end1) text1 += self.seq1_file.get(start1, length) text2 += self.seq2_file.get(start2, length) @@ -389,13 +389,13 @@ def path_to_src_name(self, path_name): slash = path_name.rfind("/") if slash == -1: return path_name - name = path_name[slash+1:] + name = path_name[slash + 1 :] path_name = path_name[:slash] if path_name.endswith("/seq"): path_name = path_name[:-4] slash = path_name.rfind("/") if slash != -1: - path_name = path_name[slash+1:] + path_name = path_name[slash + 1 :] return path_name + "." + name def header_to_src_name(self, header): @@ -431,7 +431,7 @@ def __next__(self): class LavAsPiecesReader(Reader): """Iterate over all lav blocks in a file in order, returning alignments - as score and pieces, as returned by Reader.parse_a_stanza""" + as score and pieces, as returned by Reader.parse_a_stanza""" def build_alignment(self, score, pieces): return (score, pieces) @@ -465,9 +465,7 @@ def __init__(self, file, attributes=None): def write(self, alignment): if len(alignment.components) != 2: - raise ValueError( - "%d-component alignment is not compatible with lav" % - len(alignment.components)) + raise ValueError("%d-component alignment is not compatible with lav" % len(alignment.components)) c1 = alignment.components[0] c2 = alignment.components[1] @@ -505,16 +503,16 @@ def write_s_stanza(self): fname1 = build_filename(self.fname1, self.src1) fname2 = build_filename(self.fname2, self.src2) print("s {", file=self.file) - print(" \"%s%s\" 1 %d %d 1" % (fname1, strand1, self.length1, flag1), file=self.file) - print(" \"%s%s\" 1 %d %d 1" % (fname2, strand2, self.length2, flag2), file=self.file) + print(' "%s%s" 1 %d %d 1' % (fname1, strand1, self.length1, flag1), file=self.file) + print(' "%s%s" 1 %d %d 1' % (fname2, strand2, self.length2, flag2), file=self.file) print("}", file=self.file) def write_h_stanza(self): strand1 = rc_or_nothing(self.strand1) strand2 = rc_or_nothing(self.strand2) print("h {", file=self.file) - print(f" \"> {self.src1}{strand1}\"", file=self.file) - print(f" \"> {self.src2}{strand2}\"", file=self.file) + print(f' "> {self.src1}{strand1}"', file=self.file) + print(f' "> {self.src2}{strand2}"', file=self.file) print("}", file=self.file) def write_a_stanza(self, alignment): @@ -542,7 +540,7 @@ def write_a_stanza(self, alignment): idCount += 1 elif piece1 is not None: # new gap starts size = pos1 - piece1 - pctId = (200*idCount + size) / (2*size) + pctId = (200 * idCount + size) / (2 * size) pieces.append((piece1, piece2, size, pctId)) piece1 = None @@ -553,7 +551,7 @@ def write_a_stanza(self, alignment): if piece1 is not None: size = pos1 - piece1 - pctId = (200*idCount + size) / (2*size) + pctId = (200 * idCount + size) / (2 * size) pieces.append((piece1, piece2, size, pctId)) # write the block @@ -562,7 +560,7 @@ def write_a_stanza(self, alignment): end1 = start1 + size end2 = start2 + size - (start1, start2, size, pctId) = pieces[0] # get start of first piece + (start1, start2, size, pctId) = pieces[0] # get start of first piece score = int(round(alignment.score)) @@ -582,8 +580,10 @@ def write_trailer(self): def sort_keys_by_chrom(keys): - decorated = sorted((chrom_key(src1), strand1, chrom_key(src2), strand2, (src1, strand1, src2, strand2)) - for (src1, strand1, src2, strand2) in keys) + decorated = sorted( + (chrom_key(src1), strand1, chrom_key(src2), strand2, (src1, strand1, src2, strand2)) + for (src1, strand1, src2, strand2) in keys + ) return [key for (src1, strand1, src2, strand2, key) in decorated] @@ -627,5 +627,5 @@ def rc_or_nothing(strand): def do_path_subs(path, path_subs): for (prefix, replacement) in path_subs: if path.startswith(prefix): - return replacement + path[len(prefix):] + return replacement + path[len(prefix) :] return path diff --git a/lib/bx/align/lav_tests.py b/lib/bx/align/lav_tests.py index 75539f22..f62fc771 100644 --- a/lib/bx/align/lav_tests.py +++ b/lib/bx/align/lav_tests.py @@ -10,7 +10,6 @@ class lavTestCase(unittest.TestCase): - def testReader(self): reader = lav.Reader(open(test_lav)) @@ -18,14 +17,46 @@ def testReader(self): a = next(reader) assert a.score == 10286, "a.score is wrong: %s" % a.score assert len(a.components) == 2 - check_component(a.components[0], "apple", 106, 252, "+", 411, "GTCCGGCCGGCTGAGAGCTACAATACACATGCACGCAGTTTGGCCACTCACATTAAGTATATGAGGAAGGGTTAGCATGAGTTGTACTATAAGGCAGCGGATAGCAGGTTGTGGAAAAATATCCTCCCGATTCAAATCCCCAGGTGCCTAAA----------------GTAGGGCCGGTAGTTGAATGCTTGCCTGTCAGACTGGATGACCAAGTTCAGTATCAACACAATATAGTGCCAGGAGCTAATTGTTCCCCAGCAGCGTGAC") - check_component(a.components[1], "lav_tests.orange", 53, 252, "+", 361, "GTCCGGCCGGCTGTGTGCTACAATACACGTTCACGCAGTTTGGCCAATCACTTTAAGTATATACGAAATGGTTACCATGAGTTGTACTGTAAGGCAGCGGAAAGC---TTGTTAA--------CTCCTGGGCGACATT----GGGGCTGCAACATCGTTTATCCTCCTCTACAACCAATAGCTG-TTGCTTCTTGGTTCAAGTATATCCCATGGATTAGTATCAACACGATATAGTGTCAGGAGCTAATTGTTCCCCAGCAGCGTGAC") + check_component( + a.components[0], + "apple", + 106, + 252, + "+", + 411, + "GTCCGGCCGGCTGAGAGCTACAATACACATGCACGCAGTTTGGCCACTCACATTAAGTATATGAGGAAGGGTTAGCATGAGTTGTACTATAAGGCAGCGGATAGCAGGTTGTGGAAAAATATCCTCCCGATTCAAATCCCCAGGTGCCTAAA----------------GTAGGGCCGGTAGTTGAATGCTTGCCTGTCAGACTGGATGACCAAGTTCAGTATCAACACAATATAGTGCCAGGAGCTAATTGTTCCCCAGCAGCGTGAC", + ) + check_component( + a.components[1], + "lav_tests.orange", + 53, + 252, + "+", + 361, + "GTCCGGCCGGCTGTGTGCTACAATACACGTTCACGCAGTTTGGCCAATCACTTTAAGTATATACGAAATGGTTACCATGAGTTGTACTGTAAGGCAGCGGAAAGC---TTGTTAA--------CTCCTGGGCGACATT----GGGGCTGCAACATCGTTTATCCTCCTCTACAACCAATAGCTG-TTGCTTCTTGGTTCAAGTATATCCCATGGATTAGTATCAACACGATATAGTGTCAGGAGCTAATTGTTCCCCAGCAGCGTGAC", + ) a = next(reader) assert a.score == 3586, "a.score is wrong: %s" % a.score assert len(a.components) == 2 - check_component(a.components[0], "apple", 52, 72, "+", 411, "TGCATATCGACTATTACAGCCACGCGAGTTACATTCCTCTTTTTTTTTGCTGGCGTCCGGCCGGCTGAGAGC") - check_component(a.components[1], "lav_tests.orange", 2, 72, "-", 361, "TGCATATCGACTAGTACAGCCTCTCGAGTTACCCCCCCCATTCCTCTTGCTGACGTCACGCTGCTGGGGAAC") + check_component( + a.components[0], + "apple", + 52, + 72, + "+", + 411, + "TGCATATCGACTATTACAGCCACGCGAGTTACATTCCTCTTTTTTTTTGCTGGCGTCCGGCCGGCTGAGAGC", + ) + check_component( + a.components[1], + "lav_tests.orange", + 2, + 72, + "-", + 361, + "TGCATATCGACTAGTACAGCCTCTCGAGTTACCCCCCCCATTCCTCTTGCTGACGTCACGCTGCTGGGGAAC", + ) a = next(reader) assert a is None @@ -40,4 +71,4 @@ def check_component(c, src, start, size, strand, src_size, text): assert c.size == size, f"c.size = {c.size} (expected {size})" assert c.strand == strand, f"c.strand = {c.strand} (expected {strand})" assert c.src_size == src_size, f"c.src_size = {c.src_size} (expected {src_size})" - assert c.text == text, f"c.text = \"{c.text}\" (expected \"{text}\")" + assert c.text == text, f'c.text = "{c.text}" (expected "{text}")' diff --git a/lib/bx/align/maf.py b/lib/bx/align/maf.py index 24ae8c89..c747bf74 100644 --- a/lib/bx/align/maf.py +++ b/lib/bx/align/maf.py @@ -12,18 +12,18 @@ from bx import interval_index_file from bx.align import ( Alignment, - Component + Component, ) -MAF_INVERSE_STATUS = 'V' -MAF_INSERT_STATUS = 'I' -MAF_CONTIG_STATUS = 'C' -MAF_CONTIG_NESTED_STATUS = 'c' -MAF_NEW_STATUS = 'N' -MAF_NEW_NESTED_STATUS = 'n' -MAF_MAYBE_NEW_STATUS = 'S' -MAF_MAYBE_NEW_NESTED_STATUS = 's' -MAF_MISSING_STATUS = 'M' +MAF_INVERSE_STATUS = "V" +MAF_INSERT_STATUS = "I" +MAF_CONTIG_STATUS = "C" +MAF_CONTIG_NESTED_STATUS = "c" +MAF_NEW_STATUS = "N" +MAF_NEW_NESTED_STATUS = "n" +MAF_MAYBE_NEW_STATUS = "S" +MAF_MAYBE_NEW_NESTED_STATUS = "s" +MAF_MISSING_STATUS = "M" class MAFIndexedAccess(interval_index_file.AbstractIndexedAccess): @@ -47,6 +47,7 @@ class MAFMultiIndexedAccess(interval_index_file.AbstractMultiIndexedAccess): """ Indexed access to multiple MAF files. """ + indexed_access_class = MAFIndexedAccess @@ -67,7 +68,7 @@ def __init__(self, file, **kwargs): self.maf_kwargs = kwargs # Read and verify maf header, store any attributes fields = self.file.readline().split() - if fields[0] != '##maf': + if fields[0] != "##maf": raise Exception("File does not have MAF header") self.attributes = parse_attributes(fields[1:]) @@ -100,17 +101,16 @@ def __next__(self): class Writer: - def __init__(self, file, attributes=None): if attributes is None: attributes = {} self.file = file # Write header, Webb's maf code wants version first, we accomodate - if 'version' not in attributes: - attributes['version'] = 1 - self.file.write("##maf version=%s" % attributes['version']) + if "version" not in attributes: + attributes["version"] = 1 + self.file.write("##maf version=%s" % attributes["version"]) for key in attributes: - if key == 'version': + if key == "version": continue self.file.writelines(f" {key}={attributes[key]}") self.file.write("\n") @@ -141,6 +141,7 @@ def write(self, alignment): def close(self): self.file.close() + # ---- Helper methods ------------------------------------------------------- @@ -160,12 +161,12 @@ def read_next_maf(file, species_to_lengths=None, parse_e_rows=False): if not line: return None fields = line.split() - if fields[0] != 'a': + if fields[0] != "a": raise Exception("Expected 'a ...' line") alignment.attributes = parse_attributes(fields[1:]) - if 'score' in alignment.attributes: - alignment.score = alignment.attributes['score'] - del alignment.attributes['score'] + if "score" in alignment.attributes: + alignment.score = alignment.attributes["score"] + del alignment.attributes["score"] else: alignment.score = 0 # Sequence lines @@ -179,7 +180,7 @@ def read_next_maf(file, species_to_lengths=None, parse_e_rows=False): break # Parse row fields = line.split() - if fields[0] == 's': + if fields[0] == "s": # An 's' row contains sequence for a component component = Component() component.src = fields[1] @@ -192,7 +193,7 @@ def read_next_maf(file, species_to_lengths=None, parse_e_rows=False): # Add to set alignment.add_component(component) last_component = component - elif fields[0] == 'e': + elif fields[0] == "e": # An 'e' row, when no bases align for a given species this tells # us something about the synteny if parse_e_rows: @@ -205,18 +206,17 @@ def read_next_maf(file, species_to_lengths=None, parse_e_rows=False): component.src_size = int(fields[5]) component.text = None synteny = fields[6].strip() - assert len(synteny) == 1, \ - "Synteny status in 'e' rows should be denoted with a single character code" + assert len(synteny) == 1, "Synteny status in 'e' rows should be denoted with a single character code" component.synteny_empty = synteny alignment.add_component(component) last_component = component - elif fields[0] == 'i': + elif fields[0] == "i": # An 'i' row, indicates left and right synteny status for the # previous component, we hope ;) assert fields[1] == last_component.src, "'i' row does not follow matching 's' row" last_component.synteny_left = (fields[2], int(fields[3])) last_component.synteny_right = (fields[4], int(fields[5])) - elif fields[0] == 'q': + elif fields[0] == "q": assert fields[1] == last_component.src, "'q' row does not follow matching 's' row" # TODO: Should convert this to an integer array? last_component.quality = fields[2] @@ -230,7 +230,7 @@ def readline(file, skip_blank=False): line = file.readline() if not line: return None - if line[0] != '#' and not (skip_blank and line.isspace()): + if line[0] != "#" and not (skip_blank and line.isspace()): return line @@ -238,7 +238,7 @@ def parse_attributes(fields): """Parse list of key=value strings into a dict""" attributes = {} for field in fields: - pair = field.split('=') + pair = field.split("=") attributes[pair[0]] = pair[1] return attributes diff --git a/lib/bx/align/maf_tests.py b/lib/bx/align/maf_tests.py index 1de58f03..4e2f0f4f 100644 --- a/lib/bx/align/maf_tests.py +++ b/lib/bx/align/maf_tests.py @@ -57,12 +57,20 @@ complex_maf = align.Alignment() complex_maf.score = "7009" -complex_maf.components.append(align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="ACA-TTACT")) -complex_maf.components.append(align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="ACAATTGCT")) +complex_maf.components.append( + align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="ACA-TTACT") +) +complex_maf.components.append( + align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="ACAATTGCT") +) complex_maf.components[-1].synteny_left = (maf.MAF_NEW_STATUS, 0) complex_maf.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) -complex_maf.components.append(align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="---ATT---")) -complex_maf.components.append(align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None)) +complex_maf.components.append( + align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="---ATT---") +) +complex_maf.components.append( + align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None) +) complex_maf.components[-1].empty = True complex_maf.components[-1].synteny_empty = maf.MAF_INSERT_STATUS complex_maf.text_size = 9 @@ -97,25 +105,32 @@ def test_reader(): def test_writer(): val = StringIO() - writer = maf.Writer(val, {'scoring': 'foobar'}) + writer = maf.Writer(val, {"scoring": "foobar"}) a = align.Alignment() a.score = 7009 - a.components.append(align.Component(src="human_hoxa", start=100, size=9, strand="+", src_size=1000257, text="ACA-TTACT")) - a.components.append(align.Component(src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT")) + a.components.append( + align.Component(src="human_hoxa", start=100, size=9, strand="+", src_size=1000257, text="ACA-TTACT") + ) + a.components.append( + align.Component(src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT") + ) check_component(a.components[0], "human_hoxa", 100, 9, "+", 1000257, "ACA-TTACT") check_component(a.components[1], "horse_hoxa", 120, 10, "-", 98892, "ACAATTGCT") writer.write(a) - assert val.getvalue() == """##maf version=1 scoring=foobar + assert ( + val.getvalue() + == """##maf version=1 scoring=foobar a score=7009 s human_hoxa 100 9 + 1000257 ACA-TTACT s horse_hoxa 120 10 - 98892 ACAATTGCT """ # noqa: W291 + ) def test_slice(): @@ -133,13 +148,33 @@ def test_slice(): reader = maf.Reader(StringIO(test_maf_3)) a = next(reader) b = a.slice_by_component(0, 40, 62) - check_component(b.components[0], src="apple", start=40, size=22, strand="+", src_size=110, text="TTCGTCACT------GTCGTAAGGGTTC") - check_component(b.components[1], src="orange", start=28, size=22, strand="-", src_size=100, text="TT--TCACTGCTATCGTCGTA----TTC") + check_component( + b.components[0], src="apple", start=40, size=22, strand="+", src_size=110, text="TTCGTCACT------GTCGTAAGGGTTC" + ) + check_component( + b.components[1], src="orange", start=28, size=22, strand="-", src_size=100, text="TT--TCACTGCTATCGTCGTA----TTC" + ) # test slicing with - strand src b = a.slice_by_component(1, 30, 68) - check_component(b.components[0], src="apple", start=46, size=41, strand="+", src_size=110, text="ACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTG") - check_component(b.components[1], src="orange", start=32, size=38, strand="-", src_size=100, text="ACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTG") + check_component( + b.components[0], + src="apple", + start=46, + size=41, + strand="+", + src_size=110, + text="ACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTG", + ) + check_component( + b.components[1], + src="orange", + start=32, + size=38, + strand="-", + src_size=100, + text="ACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTG", + ) a = next(reader) assert a is None @@ -149,26 +184,36 @@ def test_reverse_complement(): b = complex_maf.reverse_complement() - check_component(b.components[0], src="human_hoxa", start=100257-100-8, size=8, strand="-", src_size=100257, text="AGTAA-TGT") - check_component(b.components[1], src="horse_hoxa", start=98892-120-9, size=9, strand="+", src_size=98892, text="AGCAATTGT") + check_component( + b.components[0], src="human_hoxa", start=100257 - 100 - 8, size=8, strand="-", src_size=100257, text="AGTAA-TGT" + ) + check_component( + b.components[1], src="horse_hoxa", start=98892 - 120 - 9, size=9, strand="+", src_size=98892, text="AGCAATTGT" + ) assert b.components[1].synteny_right == (maf.MAF_NEW_STATUS, 0) assert b.components[1].synteny_left == (maf.MAF_CONTIG_STATUS, 0) - check_component(b.components[2], src="unknown_1", start=98892-150-3, size=3, strand="+", src_size=98892, text="---AAT---") - check_component(b.components[3], src="unknown_2", start=1200-12-1000, size=1000, strand="-", src_size=1200, text=None) + check_component( + b.components[2], src="unknown_1", start=98892 - 150 - 3, size=3, strand="+", src_size=98892, text="---AAT---" + ) + check_component( + b.components[3], src="unknown_2", start=1200 - 12 - 1000, size=1000, strand="-", src_size=1200, text=None + ) assert b.components[3].empty assert b.components[3].synteny_empty == maf.MAF_INSERT_STATUS def test_column_iter(): - expected = [['A', 'A', '-'], - ['C', 'C', '-'], - ['A', 'A', '-'], - ['-', 'A', 'A'], - ['T', 'T', 'T'], - ['T', 'T', 'T'], - ['A', 'G', '-'], - ['C', 'C', '-'], - ['T', 'T', '-']] + expected = [ + ["A", "A", "-"], + ["C", "C", "-"], + ["A", "A", "-"], + ["-", "A", "A"], + ["T", "T", "T"], + ["T", "T", "T"], + ["A", "G", "-"], + ["C", "C", "-"], + ["T", "T", "-"], + ] for i, c in enumerate(complex_maf.column_iter()): assert c == expected[i] @@ -176,12 +221,20 @@ def test_column_iter(): def test_remove_all_gap_column(): complex_maf_gap = align.Alignment() complex_maf_gap.score = "7009" - complex_maf_gap.components.append(align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="-ACA--TTACT")) - complex_maf_gap.components.append(align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="-ACA-ATTGCT")) + complex_maf_gap.components.append( + align.Component(src="human_hoxa", start=100, size=8, strand="+", src_size=100257, text="-ACA--TTACT") + ) + complex_maf_gap.components.append( + align.Component(src="horse_hoxa", start=120, size=9, strand="-", src_size=98892, text="-ACA-ATTGCT") + ) complex_maf_gap.components[-1].synteny_left = (maf.MAF_NEW_STATUS, 0) complex_maf_gap.components[-1].synteny_right = (maf.MAF_CONTIG_STATUS, 0) - complex_maf_gap.components.append(align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="-----ATT---")) - complex_maf_gap.components.append(align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None)) + complex_maf_gap.components.append( + align.Component(src="unknown_1", start=150, size=3, strand="-", src_size=98892, text="-----ATT---") + ) + complex_maf_gap.components.append( + align.Component(src="unknown_2", start=12, size=1000, strand="+", src_size=1200, text=None) + ) complex_maf_gap.components[-1].empty = True complex_maf_gap.components[-1].synteny_empty = maf.MAF_INSERT_STATUS complex_maf_gap.text_size = 11 @@ -193,8 +246,18 @@ def test_read_with_synteny(): reader = maf.Reader(StringIO(test_maf_2), parse_e_rows=True) a = next(reader) - check_component(a.components[0], "hg17.chr1", 2005, 34, "+", 245522847, "TGTAACTTAATACCACAACCAGGCATAGGGG--AAA-------------") - check_component(a.components[1], "rheMac2.chr11", 9625228, 31, "+", 134511895, "TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------") + check_component( + a.components[0], "hg17.chr1", 2005, 34, "+", 245522847, "TGTAACTTAATACCACAACCAGGCATAGGGG--AAA-------------" + ) + check_component( + a.components[1], + "rheMac2.chr11", + 9625228, + 31, + "+", + 134511895, + "TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------", + ) print(a.components[1].synteny_left) assert a.components[1].synteny_left == (maf.MAF_CONTIG_STATUS, 0) assert a.components[1].synteny_right == (maf.MAF_INSERT_STATUS, 1678) @@ -208,7 +271,7 @@ def test_write_with_synteny(): reader = maf.Reader(StringIO(test_maf_2), parse_e_rows=True) a = next(reader) val = StringIO() - writer = maf.Writer(val, {'scoring': 'foobar'}) + writer = maf.Writer(val, {"scoring": "foobar"}) writer.write(a) actual = val.getvalue() expected = """##maf version=1 scoring=foobar diff --git a/lib/bx/align/score.py b/lib/bx/align/score.py index cf8068a6..0aa1f9b9 100644 --- a/lib/bx/align/score.py +++ b/lib/bx/align/score.py @@ -7,13 +7,25 @@ float32, int32, ones, - zeros + zeros, ) class ScoringScheme: # note that gap_open and gap_extend are penalties, which means you should make them positive - def __init__(self, gap_open, gap_extend, default=-100, alphabet1="ACGT", alphabet2=None, gap1="-", gap2=None, text1_range=128, text2_range=None, typecode=int32): + def __init__( + self, + gap_open, + gap_extend, + default=-100, + alphabet1="ACGT", + alphabet2=None, + gap1="-", + gap2=None, + text1_range=128, + text2_range=None, + typecode=int32, + ): if text2_range is None: text2_range = text1_range if alphabet2 is None: @@ -47,17 +59,17 @@ def set_score(self, a, b, val, foldcase1=False, foldcase2=False): self._set_score((a, b), val) if foldcase1: aCh = chr(a) - if (aCh.isupper()): + if aCh.isupper(): aa = ord(aCh.lower()) - elif (aCh.islower()): + elif aCh.islower(): aa = ord(aCh.upper()) else: foldcase1 = False if foldcase2: bCh = chr(b) - if (bCh.isupper()): + if bCh.isupper(): bb = ord(bCh.lower()) - elif (bCh.islower()): + elif bCh.islower(): bb = ord(bCh.upper()) else: foldcase2 = False @@ -84,12 +96,12 @@ def __str__(self): for a in self.alphabet1: for b in self.alphabet2: score = self._get_score((ord(a), ord(b))) - if (isinstance(score, float)): + if isinstance(score, float): s = "%8.6f" % score else: s = "%s" % score - if (len(s)+1 > width): - width = len(s)+1 + if len(s) + 1 > width: + width = len(s) + 1 lines = [] line = [] if labelRows: @@ -103,7 +115,7 @@ def __str__(self): else: s = "%02X" % ord(b) line.append("%*s" % (width, s)) - lines.append(("".join(line))+"\n") + lines.append(("".join(line)) + "\n") for a in self.alphabet1: line = [] if labelRows: @@ -113,12 +125,12 @@ def __str__(self): line.append("%02X" % ord(a)) for b in self.alphabet2: score = self._get_score((ord(a), ord(b))) - if (isinstance(score, float)): + if isinstance(score, float): s = "%8.6f" % score else: s = "%s" % score line.append("%*s" % (width, s)) - lines.append(("".join(line))+"\n") + lines.append(("".join(line)) + "\n") return "".join(lines) @@ -128,11 +140,11 @@ def read_scoring_scheme(f, gap_open, gap_extend, gap1="-", gap2=None, **kwargs): f can be either a file or the name of a file. """ close_it = False - if (isinstance(f, str)): + if isinstance(f, str): f = open(f) close_it = True ss = build_scoring_scheme("".join([line for line in f]), gap_open, gap_extend, gap1=gap1, gap2=gap2, **kwargs) - if (close_it): + if close_it: f.close() return ss @@ -167,12 +179,12 @@ def build_scoring_scheme(s, gap_open, gap_extend, gap1="-", gap2=None, **kwargs) a_la_blastz = True for i, line in enumerate(lines): row_scores = line.split() - if len(row_scores) == len(symbols2): # blastz-style row + if len(row_scores) == len(symbols2): # blastz-style row if symbols1 is None: if len(lines) != len(symbols2): raise bad_matrix symbols1 = symbols2 - elif (rows_have_syms): + elif rows_have_syms: raise bad_matrix elif len(row_scores) == len(symbols2) + 1: # row starts with symbol if symbols1 is None: @@ -217,7 +229,18 @@ def build_scoring_scheme(s, gap_open, gap_extend, gap1="-", gap2=None, **kwargs) typecode = float32 if isinstance(gap_extend, float): typecode = float32 - ss = ScoringScheme(gap_open, gap_extend, alphabet1=alphabet1, alphabet2=alphabet2, gap1=gap1, gap2=gap2, text1_range=text1_range, text2_range=text2_range, typecode=typecode, **kwargs) + ss = ScoringScheme( + gap_open, + gap_extend, + alphabet1=alphabet1, + alphabet2=alphabet2, + gap1=gap1, + gap2=gap2, + text1_range=text1_range, + text2_range=text2_range, + typecode=typecode, + **kwargs, + ) # fill matrix for i, row_scores in enumerate(rows): for j, score in enumerate(map(int_or_float, row_scores)): @@ -239,6 +262,7 @@ def int_or_float(s): except ValueError: return float(s) + # convert possible two-char symbol to a single character @@ -255,7 +279,7 @@ def score_alignment(scoring_scheme, a): score = 0 ncomps = len(a.components) for i in range(ncomps): - for j in range(i+1, ncomps): + for j in range(i + 1, ncomps): score += score_texts(scoring_scheme, a.components[i].text, a.components[j].text) return score @@ -335,8 +359,12 @@ def accumulate_scores(scoring_scheme, text1, text2, skip_ref_gaps=False): return rval -hox70 = build_scoring_scheme(""" A C G T +hox70 = build_scoring_scheme( + """ A C G T 91 -114 -31 -123 -114 100 -125 -31 -31 -125 100 -114 - -123 -31 -114 91 """, 400, 30) + -123 -31 -114 91 """, + 400, + 30, +) diff --git a/lib/bx/align/score_tests.py b/lib/bx/align/score_tests.py index 0e57623a..b3d86895 100644 --- a/lib/bx/align/score_tests.py +++ b/lib/bx/align/score_tests.py @@ -13,21 +13,25 @@ import bx.align.maf import bx.align.score -aligns = [("CCACTAGTTTTTAAATAATCTACTATCAAATAAAAGATTTGTTAATAATAAATTTTAAATCATTAACACTT", - "CCATTTGGGTTCAAAAATTGATCTATCA----------TGGTGGATTATTATTTAGCCATTAAGGACAAAT", - -111), - ("CCACTAGTTTTTAAATAATCTAC-----AATAAAAGATTTGTTAATAAT---AAATTTTAAATCATTAA-----CACTT", - "CCATTTGGGTTCAAAAATTGATCTATCA----------TGGTGGAT---TATTATTT-----AGCCATTAAGGACAAAT", - -3626), - ("CCACTAGTTTTTGATTC", - "CCATTTGGGTTC-----", - -299), - ("CTTAGTTTTTGATCACC", - "-----CTTGGGTTTACC", - -299), - ("gggaattgaacaatgagaacacatggacacaggaaggggaacatcacacacc----------ggggcctgttgtggggtggggggaag", - "ggaactagaacaagggagacacatacaaacaacaacaacaacaacacagcccttcccttcaaagagcttatagtctgatggaggagag", - 1690)] +aligns = [ + ( + "CCACTAGTTTTTAAATAATCTACTATCAAATAAAAGATTTGTTAATAATAAATTTTAAATCATTAACACTT", + "CCATTTGGGTTCAAAAATTGATCTATCA----------TGGTGGATTATTATTTAGCCATTAAGGACAAAT", + -111, + ), + ( + "CCACTAGTTTTTAAATAATCTAC-----AATAAAAGATTTGTTAATAAT---AAATTTTAAATCATTAA-----CACTT", + "CCATTTGGGTTCAAAAATTGATCTATCA----------TGGTGGAT---TATTATTT-----AGCCATTAAGGACAAAT", + -3626, + ), + ("CCACTAGTTTTTGATTC", "CCATTTGGGTTC-----", -299), + ("CTTAGTTTTTGATCACC", "-----CTTGGGTTTACC", -299), + ( + "gggaattgaacaatgagaacacatggacacaggaaggggaacatcacacacc----------ggggcctgttgtggggtggggggaag", + "ggaactagaacaagggagacacatacaaacaacaacaacaacaacacagcccttcccttcaaagagcttatagtctgatggaggagag", + 1690, + ), +] mafs = """##maf a score=2883.0 @@ -40,28 +44,31 @@ s rheMac1.SCAFFOLD45837 26063 33 - 31516 TGTGTGATTAATGCCTGAGATTGTGTGAAGTAA------- """ -nonsymm_scheme = bx.align.score.build_scoring_scheme(""" A C G T +nonsymm_scheme = bx.align.score.build_scoring_scheme( + """ A C G T 91 0 -31 -123 -114 100 -125 -31 -31 -125 100 -114 - -123 -31 -114 91 """, 400, 30) + -123 -31 -114 91 """, + 400, + 30, +) -aligns_for_nonsymm_scheme = [("AAAACCCCGGGGTTTT", - "ACGTACGTACGTACGT", - -580)] +aligns_for_nonsymm_scheme = [("AAAACCCCGGGGTTTT", "ACGTACGTACGTACGT", -580)] -asymm_scheme = bx.align.score.build_scoring_scheme(""" 01 02 A C G T +asymm_scheme = bx.align.score.build_scoring_scheme( + """ 01 02 A C G T 01 200 -200 -50 100 -50 100 02 -200 200 100 -50 100 -50 """, - 0, 0, gap1='\x00') + 0, + 0, + gap1="\x00", +) -aligns_for_asymm_scheme = [("\x01\x01\x01\x01\x01\x01", - "ACGT\x01\x02", - 100)] +aligns_for_asymm_scheme = [("\x01\x01\x01\x01\x01\x01", "ACGT\x01\x02", 100)] class BasicTests(unittest.TestCase): - def test_scoring_text(self): ss = bx.align.score.hox70 for t1, t2, score in aligns: @@ -74,14 +81,18 @@ def test_align(self): def test_accumulate(self): ss = bx.align.score.hox70 - self.assertTrue(allclose( - bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA"), - cumsum(array([-430, -30, -30, -30, -30, -31, 91, 91, -123])) - )) - self.assertTrue(allclose( - bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA", skip_ref_gaps=True), - cumsum(array([-581, 91, 91, -123])) - )) + self.assertTrue( + allclose( + bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA"), + cumsum(array([-430, -30, -30, -30, -30, -31, 91, 91, -123])), + ) + ) + self.assertTrue( + allclose( + bx.align.score.accumulate_scores(ss, "-----CTTT", "CTTAGTTTA", skip_ref_gaps=True), + cumsum(array([-581, 91, 91, -123])), + ) + ) def test_nonsymm_scoring(self): ss = nonsymm_scheme diff --git a/lib/bx/align/sitemask/_cpg.pyx b/lib/bx/align/sitemask/_cpg.pyx index 44284d08..c4e342a2 100644 --- a/lib/bx/align/sitemask/_cpg.pyx +++ b/lib/bx/align/sitemask/_cpg.pyx @@ -5,6 +5,7 @@ sequences. from cpython.version cimport PY_MAJOR_VERSION + cdef extern from "find_cpg.h": int next_cpg( char * sp1, char * sp2, int start) int next_cpg_restricted( char * sp1, char *sp2, int start) diff --git a/lib/bx/align/sitemask/core.py b/lib/bx/align/sitemask/core.py index d12341ef..ce0cddcc 100644 --- a/lib/bx/align/sitemask/core.py +++ b/lib/bx/align/sitemask/core.py @@ -43,5 +43,5 @@ def __call__(self, block): try: masker.__call__ except AttributeError: - raise Exception("Masker in pipeline does not implement \"filter( self, block )\".") + raise Exception('Masker in pipeline does not implement "filter( self, block )".') masker(block) diff --git a/lib/bx/align/sitemask/cpg.py b/lib/bx/align/sitemask/cpg.py index a21a18f0..7b3be2e5 100644 --- a/lib/bx/align/sitemask/cpg.py +++ b/lib/bx/align/sitemask/cpg.py @@ -12,7 +12,7 @@ # Restricted. Only mask out sites that are defitely CpG class Restricted(Masker): - def __init__(self, mask='?'): + def __init__(self, mask="?"): self.mask = mask self.masked = 0 self.total = 0 @@ -22,9 +22,7 @@ def __call__(self, block): return block if len(block.components) < 2: return - cpglist = list_cpg_restricted( - block.components[0].text.upper(), - block.components[1].text.upper()) + cpglist = list_cpg_restricted(block.components[0].text.upper(), block.components[1].text.upper()) # now we have a fast list of CpG columns, iterate/mask self.masked += len(cpglist) @@ -34,11 +32,12 @@ def __call__(self, block): return block + # Inclusive. Mask out all sites that are not non-CpG sites. class Inclusive(Masker): - def __init__(self, mask='?'): + def __init__(self, mask="?"): self.mask = mask self.masked = 0 self.total = 0 @@ -48,9 +47,7 @@ def __call__(self, block): return block if len(block.components) < 2: return - cpglist = list_cpg( - block.components[0].text.upper(), - block.components[1].text.upper()) + cpglist = list_cpg(block.components[0].text.upper(), block.components[1].text.upper()) self.masked += len(cpglist) self.total += len(block.components[0].text) @@ -59,11 +56,12 @@ def __call__(self, block): return block + # Mak nonCpG sites class nonCpG(Masker): - def __init__(self, mask='?'): + def __init__(self, mask="?"): self.mask = mask self.masked = 0 self.total = 0 @@ -73,9 +71,7 @@ def __call__(self, block): return block if len(block.components) < 2: return - noncpglist = list_non_cpg( - block.components[0].text.upper(), - block.components[1].text.upper()) + noncpglist = list_non_cpg(block.components[0].text.upper(), block.components[1].text.upper()) # now we have a fast list of non-CpG columns, iterate/mask self.masked += len(noncpglist) diff --git a/lib/bx/align/sitemask/quality.py b/lib/bx/align/sitemask/quality.py index 60674a3c..606c6fb7 100644 --- a/lib/bx/align/sitemask/quality.py +++ b/lib/bx/align/sitemask/quality.py @@ -41,8 +41,8 @@ def __init__(self, qualfiles=None, qualspecies=None, minqual=None, mask="?", cac specdict = {} for chrom in self.qualspecies[species]: specdict[chrom] = FileBinnedArray( - open(qualfile + "." + chrom + ".bqv", "rb"), - cache=self.cache/len(qualfiles)) + open(qualfile + "." + chrom + ".bqv", "rb"), cache=self.cache / len(qualfiles) + ) self.qualities[species] = specdict def __call__(self, block): @@ -57,17 +57,17 @@ def __call__(self, block): # get quality slice, for + strand qual = self.qualities[qualspec][chrom][start:end] x = 0 - while start+x < end: + while start + x < end: self.total += 1 # got the column in the alignment for this particular base if qual[x] < self.minqual: - col = comp.coord_to_col(start+x) + col = comp.coord_to_col(start + x) self.masked += 1 for component in block.components: if component.text[col] != "-": - component.text = component.text[0:col] + \ - self.mask + \ - component.text[col+1:len(component.text)] + component.text = ( + component.text[0:col] + self.mask + component.text[col + 1 : len(component.text)] + ) # iterate through quality x += 1 return block @@ -104,8 +104,8 @@ def __init__(self, qualfiles=None, qualspecies=None, minqual=None, mask="?", cac specdict = {} for chrom in self.qualspecies[species]: specdict[chrom] = FileBinnedArray( - open(qualfile + "." + chrom + ".bqv", "rb"), - cache=self.cache/len(qualfiles)) + open(qualfile + "." + chrom + ".bqv", "rb"), cache=self.cache / len(qualfiles) + ) self.qualities[species] = specdict def __call__(self, block): @@ -118,17 +118,17 @@ def __call__(self, block): # get quality slice, for + strand qual = self.qualities[qualspec][chrom][start:end] x = 0 - while start+x < end: + while start + x < end: self.total += 1 # got the column in the alignment for this particular base if qual[x] < self.minqual: - col = comp.coord_to_col(start+x) + col = comp.coord_to_col(start + x) self.masked += 1 for component in block.components: if component.text[col] != "-": - component.text = component.text[0:col] + \ - self.mask + \ - component.text[col+1:len(component.text)] + component.text = ( + component.text[0:col] + self.mask + component.text[col + 1 : len(component.text)] + ) # iterate through quality x += 1 return block diff --git a/lib/bx/align/sitemask/sitemask_tests.py b/lib/bx/align/sitemask/sitemask_tests.py index 61d096c0..dd7fec2b 100644 --- a/lib/bx/align/sitemask/sitemask_tests.py +++ b/lib/bx/align/sitemask/sitemask_tests.py @@ -17,29 +17,29 @@ "##maf,version=1", "a,score=0", "s,apple,34,64,+,110,AGGGA---GTTCGTCACT------GT##TAAGGGTTCAGA--CTGTCTATGTATACACAAGTTGTGTTGCA--ACCG", - "s,orange,19,61,-,100,AGGGATG#GTT--TCACTGCTAT#GT##TA----TTCAGACTTCG-CTATCT------GAGTTGT---GCATTACCG" + "s,orange,19,61,-,100,AGGGATG#GTT--TCACTGCTAT#GT##TA----TTCAGACTTCG-CTATCT------GAGTTGT---GCATTACCG", ] cpg_restricted_result = [ "##maf,version=1", "a,score=0", "s,apple,34,64,+,110,A##GA---#TT##TC#C#------#T##TA###GTTC#GA--C##TC#A#G#ATAC####GT#G#GT#GC#--AC#G", - "s,orange,19,61,-,100,A##GA#G##TT--TC#C#GC#AT##T##TA----TTC#GAC#T##-C#A#C#------##GT#G#---GC#TTAC#G" + "s,orange,19,61,-,100,A##GA#G##TT--TC#C#GC#AT##T##TA----TTC#GAC#T##-C#A#C#------##GT#G#---GC#TTAC#G", ] noncpg_result = [ "##maf,version=1", "a,score=0", "s,apple,34,64,+,110,#GG##---G##CG##A#T------G#CG##AGG####A##--#TG##T#T#T####ACAA##T#T##T##A--##CG", - "s,orange,19,61,-,100,#GG##T#CG##--##A#T##T##CG#CG##----###A###T#CG-#T#T#T------GA##T#T---##A####CG" + "s,orange,19,61,-,100,#GG##T#CG##--##A#T##T##CG#CG##----###A###T#CG-#T#T#T------GA##T#T---##A####CG", ] def test_cpg_inclusive(): reader = bx.align.maf.Reader(StringIO(test_maf_cpg)) - out = tempfile.NamedTemporaryFile('w') + out = tempfile.NamedTemporaryFile("w") writer = bx.align.maf.Writer(out) - cpgfilter = cpg.Inclusive(mask='#') + cpgfilter = cpg.Inclusive(mask="#") cpgfilter.run(reader, writer.write) out.seek(0) j = 0 @@ -53,9 +53,9 @@ def test_cpg_inclusive(): def test_cpg_restricted(): reader = bx.align.maf.Reader(StringIO(test_maf_cpg)) - out = tempfile.NamedTemporaryFile('w') + out = tempfile.NamedTemporaryFile("w") writer = bx.align.maf.Writer(out) - cpgfilter = cpg.Restricted(mask='#') + cpgfilter = cpg.Restricted(mask="#") cpgfilter.run(reader, writer.write) out.seek(0) j = 0 @@ -69,9 +69,9 @@ def test_cpg_restricted(): def test_non_cpg(): reader = bx.align.maf.Reader(StringIO(test_maf_cpg)) - out = tempfile.NamedTemporaryFile('w') + out = tempfile.NamedTemporaryFile("w") writer = bx.align.maf.Writer(out) - cpgfilter = cpg.nonCpG(mask='#') + cpgfilter = cpg.nonCpG(mask="#") cpgfilter.run(reader, writer.write) out.seek(0) j = 0 diff --git a/lib/bx/align/tools/chop.py b/lib/bx/align/tools/chop.py index 13736381..5285ae84 100644 --- a/lib/bx/align/tools/chop.py +++ b/lib/bx/align/tools/chop.py @@ -14,7 +14,7 @@ def chop_list(blocks, src, start, end): for block in blocks: ref = block.get_component_by_src(src) # If the reference component is on the '-' strand we should complement the interval - if ref.strand == '-': + if ref.strand == "-": slice_start = max(ref.src_size - end, ref.start) slice_end = max(ref.src_size - start, ref.end) else: diff --git a/lib/bx/align/tools/thread.py b/lib/bx/align/tools/thread.py index bdb845e6..a8f20395 100644 --- a/lib/bx/align/tools/thread.py +++ b/lib/bx/align/tools/thread.py @@ -72,7 +72,7 @@ def get_components_for_species(alignment, species): if len(alignment.components) < len(species): return None # Otherwise, build an index of components by species, then lookup - index = {c.src.split('.')[0]: c for c in alignment.components} + index = {c.src.split(".")[0]: c for c in alignment.components} try: return [index[s] for s in species] except Exception: diff --git a/lib/bx/align/tools/tile.py b/lib/bx/align/tools/tile.py index 7ee802e9..96c07022 100644 --- a/lib/bx/align/tools/tile.py +++ b/lib/bx/align/tools/tile.py @@ -19,8 +19,9 @@ def tile_interval(sources, index, ref_src, start, end, seq_db=None): `seq_db`: a mapping for source names in the reference species to nib files """ # First entry in sources should also be on the reference species - assert sources[0].split('.')[0] == ref_src.split('.')[0], \ - "{} != {}".format(sources[0].split('.')[0], ref_src.split('.')[0]) + assert sources[0].split(".")[0] == ref_src.split(".")[0], "{} != {}".format( + sources[0].split(".")[0], ref_src.split(".")[0] + ) base_len = end - start blocks = index.get(ref_src, start, end) # From low to high score @@ -32,7 +33,7 @@ def tile_interval(sources, index, ref_src, start, end, seq_db=None): slice_start = max(start, ref.start) slice_end = min(end, ref.end) for j in range(slice_start, slice_end): - mask[j-start] = i + mask[j - start] = i tiled = [] for i in range(len(sources)): tiled.append([]) @@ -41,9 +42,9 @@ def tile_interval(sources, index, ref_src, start, end, seq_db=None): if index < 0: # Get sequence if available, otherwise just use 'N' if seq_db: - tiled[0].append(bx.seq.nib.NibFile(open(seq_db[ref_src])).get(start+ss, ee-ss)) + tiled[0].append(bx.seq.nib.NibFile(open(seq_db[ref_src])).get(start + ss, ee - ss)) else: - tiled[0].append("N" * (ee-ss)) + tiled[0].append("N" * (ee - ss)) # Gaps in all other species for row in tiled[1:]: row.append("-" * (ee - ss)) diff --git a/lib/bx/arrays/array_tree.pyx b/lib/bx/arrays/array_tree.pyx index 77215e29..d6575819 100644 --- a/lib/bx/arrays/array_tree.pyx +++ b/lib/bx/arrays/array_tree.pyx @@ -4,11 +4,15 @@ __all__ = [ 'ArrayTree', 'FileArrayTreeDict', 'array_tree_dict_from_reader' ] import numpy from numpy import * + cimport numpy cimport bx.arrays.wiggle -from bx.misc.binary_file import BinaryFileWriter, BinaryFileReader +from bx.misc.binary_file import ( + BinaryFileReader, + BinaryFileWriter, +) from bx.misc.cdb import FileCDBDict """ diff --git a/lib/bx/arrays/array_tree_tests.py b/lib/bx/arrays/array_tree_tests.py index 5be2dc51..cec7473e 100644 --- a/lib/bx/arrays/array_tree_tests.py +++ b/lib/bx/arrays/array_tree_tests.py @@ -8,7 +8,10 @@ except Exception: sys.path.insert(0, os.path.dirname(os.path.abspath("."))) -from bx.arrays.array_tree import ArrayTree, FileArrayTreeDict +from bx.arrays.array_tree import ( + ArrayTree, + FileArrayTreeDict, +) class TestArrayTree(unittest.TestCase): @@ -24,23 +27,34 @@ def setUp(self): tree.set_range(5000, 9001, 100) tree.root.build_summary() - d = {'test': tree} + d = {"test": tree} f = tempfile.TemporaryFile() FileArrayTreeDict.dict_to_file(d, f) f.seek(0) self.filearraytreedict = FileArrayTreeDict(f) - self.filearraytree = self.filearraytreedict['test'] + self.filearraytree = self.filearraytreedict["test"] def test_get_summary(self): f = self.filearraytree lvl1 = f.get_summary(0, 1) - self.assertEqual([float(_) for _ in lvl1.sums/lvl1.counts], [4.5, 14.5, 24.5, 34.5, 44.5, 54.5, 64.5, 74.5, 84.5, 94.5]) + self.assertEqual( + [float(_) for _ in lvl1.sums / lvl1.counts], [4.5, 14.5, 24.5, 34.5, 44.5, 54.5, 64.5, 74.5, 84.5, 94.5] + ) lvl2 = f.get_summary(0, 2) - self.assertEqual([float(_) for _ in lvl2.sums/lvl2.counts], [49.5, 149.5, 249.5, 349.5, 449.5, 549.5, 649.5, 749.5, 849.5, 949.5]) + self.assertEqual( + [float(_) for _ in lvl2.sums / lvl2.counts], + [49.5, 149.5, 249.5, 349.5, 449.5, 549.5, 649.5, 749.5, 849.5, 949.5], + ) lvl3 = f.get_summary(0, 3) - self.assertEqual([float(_) for _ in lvl3.sums/lvl3.counts], [499.5, 1499.5, 2499.5, 3499.5, 4499.5, 100.0, 100.0, 100.0, 100.0, 100.0]) + self.assertEqual( + [float(_) for _ in lvl3.sums / lvl3.counts], + [499.5, 1499.5, 2499.5, 3499.5, 4499.5, 100.0, 100.0, 100.0, 100.0, 100.0], + ) lvl2_2 = f.get_summary(3000, 2) - self.assertEqual([float(_) for _ in lvl2_2.sums/lvl2_2.counts], [3049.5, 3149.5, 3249.5, 3349.5, 3449.5, 3549.5, 3649.5, 3749.5, 3849.5, 3949.5]) + self.assertEqual( + [float(_) for _ in lvl2_2.sums / lvl2_2.counts], + [3049.5, 3149.5, 3249.5, 3349.5, 3449.5, 3549.5, 3649.5, 3749.5, 3849.5, 3949.5], + ) def test_get_leaf(self): f = self.filearraytree @@ -68,14 +82,14 @@ def test_big(self): tree.set_range(14000000, 15000000, 200) tree.root.build_summary() - d = {'test': tree} + d = {"test": tree} f = tempfile.TemporaryFile() FileArrayTreeDict.dict_to_file(d, f) f.seek(0) - at = FileArrayTreeDict(f)['test'] + at = FileArrayTreeDict(f)["test"] lvl1 = at.get_summary(14000000, 1) - avgs = [float(_) for _ in lvl1.sums/lvl1.counts] + avgs = [float(_) for _ in lvl1.sums / lvl1.counts] self.assertEqual(len(avgs), 1000) self.assertEqual(avgs, [200 for i in range(0, 1000)]) @@ -84,7 +98,9 @@ def test_get_frequencies(self): self.assertEqual([float(_) for _ in f.get_summary(0, 1).frequencies], ([20] * 10)) self.assertEqual([float(_) for _ in f.get_summary(4000, 1).frequencies], ([10] * 10)) self.assertEqual([float(_) for _ in f.get_summary(0, 2).frequencies], ([200] * 10)) - self.assertEqual([int(_) for _ in f.get_summary(0, 3).frequencies], [2000, 2000, 2000, 1000, 1000, 1000, 1000, 1000, 1000, 1]) + self.assertEqual( + [int(_) for _ in f.get_summary(0, 3).frequencies], [2000, 2000, 2000, 1000, 1000, 1000, 1000, 1000, 1000, 1] + ) def test_wrong_dictkey(self): self.assertRaises(KeyError, self.filearraytreedict.__getitem__, "non-existing") @@ -95,5 +111,5 @@ def test_higher_level_than_tree(self): self.assertRaises(ValueError, f.get_summary, 0, 4) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/lib/bx/bbi/bbi_file.pxd b/lib/bx/bbi/bbi_file.pxd index bfe7a887..7117c561 100644 --- a/lib/bx/bbi/bbi_file.pxd +++ b/lib/bx/bbi/bbi_file.pxd @@ -1,10 +1,13 @@ +from types cimport * + from bpt_file cimport BPTFile from cirtree_file cimport CIRTreeFile -from types cimport * import numpy + cimport numpy + cdef class SummaryBlock: """ A block of summary data from disk diff --git a/lib/bx/bbi/bbi_file.pyx b/lib/bx/bbi/bbi_file.pyx index 4066742e..8b80e23d 100644 --- a/lib/bx/bbi/bbi_file.pyx +++ b/lib/bx/bbi/bbi_file.pyx @@ -8,23 +8,29 @@ mirrors Jim Kent's 'bbiRead.c' mostly. """ from cpython.version cimport PY_MAJOR_VERSION + import sys cimport cython from collections import deque -from bpt_file cimport BPTFile -from cirtree_file cimport CIRTreeFile + from types cimport * +from bpt_file cimport BPTFile +from cirtree_file cimport CIRTreeFile from libc cimport limits import numpy + cimport numpy -from bx.misc.binary_file import BinaryFileReader +import math +import zlib from io import BytesIO -import zlib, math + +from bx.misc.binary_file import BinaryFileReader + cdef extern from "Python.h": char * PyBytes_AsString( object ) diff --git a/lib/bx/bbi/bigbed_file.pyx b/lib/bx/bbi/bigbed_file.pyx index 3cea9c2d..b00d204f 100644 --- a/lib/bx/bbi/bigbed_file.pyx +++ b/lib/bx/bbi/bigbed_file.pyx @@ -4,13 +4,18 @@ BigBed file. from bbi_file cimport * from cirtree_file cimport CIRTreeFile + import numpy -cimport numpy + from types cimport * + +cimport numpy + +import zlib +from io import BytesIO + from bx.intervals.io import GenomicInterval from bx.misc.binary_file import BinaryFileReader -from io import BytesIO -import zlib DEF big_bed_sig = 0x8789F2EB diff --git a/lib/bx/bbi/bigwig_file.pyx b/lib/bx/bbi/bigwig_file.pyx index e86bd752..99bf0138 100644 --- a/lib/bx/bbi/bigwig_file.pyx +++ b/lib/bx/bbi/bigwig_file.pyx @@ -3,14 +3,20 @@ BigWig file. """ from collections import deque + from bbi_file cimport * from cirtree_file cimport CIRTreeFile + import numpy -cimport numpy + from types cimport * -from bx.misc.binary_file import BinaryFileReader -from io import BytesIO + +cimport numpy + import zlib +from io import BytesIO + +from bx.misc.binary_file import BinaryFileReader DEF big_wig_sig = 0x888FFC26 DEF bwg_bed_graph = 1 diff --git a/lib/bx/bbi/bigwig_tests.py b/lib/bx/bbi/bigwig_tests.py index 4cba88fa..5f0901a5 100644 --- a/lib/bx/bbi/bigwig_tests.py +++ b/lib/bx/bbi/bigwig_tests.py @@ -23,34 +23,71 @@ def allclose(a, b, tol=0.00001): class TestBigWig: @pytest.fixture(autouse=True) def setUp(self): - f = open("test_data/bbi_tests/test.bw", 'rb') + f = open("test_data/bbi_tests/test.bw", "rb") self.bw = BigWigFile(file=f) def test_get_summary(self): data = self.bw.query("chr1", 10000, 20000, 10) - means = [x['mean'] for x in data] - assert numpy.allclose([float(_) for _ in means], [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998]) + means = [x["mean"] for x in data] + assert numpy.allclose( + [float(_) for _ in means], + [ + -0.17557571594973645, + -0.054009292602539061, + -0.056892242431640622, + -0.03650328826904297, + 0.036112907409667966, + 0.0064466032981872557, + 0.036949024200439454, + 0.076638259887695306, + 0.043518108367919923, + 0.01554749584197998, + ], + ) # Summarize variant sd = self.bw.summarize("chr1", 10000, 20000, 10) - assert numpy.allclose(sd.sum_data / sd.valid_count, [-0.17557571594973645, -0.054009292602539061, -0.056892242431640622, -0.03650328826904297, 0.036112907409667966, 0.0064466032981872557, 0.036949024200439454, 0.076638259887695306, 0.043518108367919923, 0.01554749584197998]) + assert numpy.allclose( + sd.sum_data / sd.valid_count, + [ + -0.17557571594973645, + -0.054009292602539061, + -0.056892242431640622, + -0.03650328826904297, + 0.036112907409667966, + 0.0064466032981872557, + 0.036949024200439454, + 0.076638259887695306, + 0.043518108367919923, + 0.01554749584197998, + ], + ) # Test min and max for this entire summary region data = self.bw.query("chr1", 10000, 20000, 1) - maxs = [x['max'] for x in data] - mins = [x['min'] for x in data] + maxs = [x["max"] for x in data] + mins = [x["min"] for x in data] assert [float(_) for _ in maxs] == [0.289000004529953] assert [float(_) for _ in mins] == [-3.9100000858306885] def test_get_leaf(self): data = self.bw.query("chr1", 11000, 11005, 5) - means = [x['mean'] for x in data] - assert numpy.allclose([float(_) for _ in means], [0.050842501223087311, -2.4589500427246094, 0.050842501223087311, 0.050842501223087311, 0.050842501223087311]) + means = [x["mean"] for x in data] + assert numpy.allclose( + [float(_) for _ in means], + [ + 0.050842501223087311, + -2.4589500427246094, + 0.050842501223087311, + 0.050842501223087311, + 0.050842501223087311, + ], + ) # Test min and max for this entire leaf region data = self.bw.query("chr1", 11000, 11005, 1) - maxs = [x['max'] for x in data] - mins = [x['min'] for x in data] + maxs = [x["max"] for x in data] + mins = [x["min"] for x in data] assert [float(_) for _ in maxs] == [0.050842501223087311] assert [float(_) for _ in mins] == [-2.4589500427246094] @@ -66,13 +103,13 @@ def test_summary_from_file(self, line): end = int(fields[2]) n = int(fields[3]) t = fields[4] - values = [float(v.replace('n/a', 'NaN')) for v in fields[5:]] + values = [float(v.replace("n/a", "NaN")) for v in fields[5:]] sd = self.bw.summarize(chrom, start, end, n) - if t == 'mean': + if t == "mean": assert allclose(sd.sum_data / sd.valid_count, values) - elif t == 'min': + elif t == "min": assert allclose(sd.min_val, values) - elif t == 'max': + elif t == "max": assert allclose(sd.max_val, values) # elif t == 'std': # assert numpy.allclose( sd.max_val, values ) diff --git a/lib/bx/bbi/bpt_file.pxd b/lib/bx/bbi/bpt_file.pxd index d5c2e2a9..a2097ca0 100644 --- a/lib/bx/bbi/bpt_file.pxd +++ b/lib/bx/bbi/bpt_file.pxd @@ -2,6 +2,7 @@ from bx.misc.binary_file import BinaryFileReader from types cimport * + cdef class BPTFile: """ On disk B+ tree compatible with Jim Kent's bPlusTree.c diff --git a/lib/bx/bbi/cirtree_file.pxd b/lib/bx/bbi/cirtree_file.pxd index f6ef36f5..94d057c3 100644 --- a/lib/bx/bbi/cirtree_file.pxd +++ b/lib/bx/bbi/cirtree_file.pxd @@ -1,5 +1,6 @@ from types cimport * + cdef class CIRTreeFile: cdef object file cdef object reader diff --git a/lib/bx/binned_array.py b/lib/bx/binned_array.py index 1038d415..7b54201b 100644 --- a/lib/bx/binned_array.py +++ b/lib/bx/binned_array.py @@ -22,12 +22,12 @@ frombuffer, NaN, resize, - zeros + zeros, ) from bx_extras.lrucache import LRUCache -platform_is_little_endian = (sys.byteorder == 'little') +platform_is_little_endian = sys.byteorder == "little" MAGIC = 0x4AB04612 @@ -43,23 +43,23 @@ # Compression types -comp_types = { - 'none': (lambda x: x, lambda x: x) -} +comp_types = {"none": (lambda x: x, lambda x: x)} try: import zlib - comp_types['zlib'] = (zlib.compress, zlib.decompress) + + comp_types["zlib"] = (zlib.compress, zlib.decompress) except Exception: pass try: import lzo - comp_types['lzo'] = (lzo.compress, lzo.decompress) + + comp_types["lzo"] = (lzo.compress, lzo.decompress) except Exception: pass -MAX = 512*1024*1024 +MAX = 512 * 1024 * 1024 def bytesify(s): @@ -70,7 +70,7 @@ def bytesify(s): class BinnedArray: - def __init__(self, bin_size=512*1024, default=NaN, max_size=MAX, typecode="f"): + def __init__(self, bin_size=512 * 1024, default=NaN, max_size=MAX, typecode="f"): self.max_size = max_size self.bin_size = bin_size self.nbins = int(math.ceil(max_size / self.bin_size)) @@ -116,11 +116,11 @@ def get_range(self, start, end): size = 0 else: if delta < size: - rval.append(self.bins[bin][offset:offset+delta]) + rval.append(self.bins[bin][offset : offset + delta]) size -= delta start += delta else: - rval.append(self.bins[bin][offset:offset+size]) + rval.append(self.bins[bin][offset : offset + size]) size = 0 return concatenate(rval) @@ -135,13 +135,13 @@ def __getitem__(self, key): def __setitem__(self, key, value): return self.set(key, value) - def to_file(self, f, comp_type='zlib'): + def to_file(self, f, comp_type="zlib"): # Get compress method compress, _ = comp_types[comp_type] # Write header write_packed(f, ">5I", MAGIC, VERSION, self.max_size, self.bin_size, self.nbins) # save type code - f.write(pack('c', bytesify(self.typecode))) + f.write(pack("c", bytesify(self.typecode))) # save compression type f.write(bytesify(comp_type[0:4].ljust(4))) # write default value @@ -190,14 +190,14 @@ def __init__(self, f, cache=32): self.bins = LRUCache(size=cache) # Read typecode if V >= 1: - self.typecode = (unpack('c', f.read(1))[0]).decode() + self.typecode = (unpack("c", f.read(1))[0]).decode() else: - self.typecode = 'f' + self.typecode = "f" # Read compression type if V >= 2: self.comp_type = f.read(4).strip().decode() else: - self.comp_type = 'zlib' + self.comp_type = "zlib" self.decompress = comp_types[self.comp_type][1] # Read default value s = f.read(calcsize(self.typecode)) @@ -255,11 +255,11 @@ def get_range(self, start, end): size = 0 else: if delta < size: - rval.append(self.bins[bin][offset:offset+delta]) + rval.append(self.bins[bin][offset : offset + delta]) size -= delta start += delta else: - rval.append(self.bins[bin][offset:offset+size]) + rval.append(self.bins[bin][offset : offset + size]) size = 0 return concatenate(rval) @@ -273,7 +273,7 @@ def __getitem__(self, key): class BinnedArrayWriter: - def __init__(self, f, bin_size=512*1024, default=NaN, max_size=MAX, typecode="f", comp_type='zlib'): + def __init__(self, f, bin_size=512 * 1024, default=NaN, max_size=MAX, typecode="f", comp_type="zlib"): # All parameters in the constructor are immutable after creation self.f = f self.max_size = max_size @@ -297,7 +297,7 @@ def write_header(self): # Write header write_packed(self.f, ">5I", MAGIC, VERSION, self.max_size, self.bin_size, self.nbins) # save type code - self.f.write(pack('c', bytesify(self.typecode))) + self.f.write(pack("c", bytesify(self.typecode))) # write default value a = array(self.default, self.typecode) # write comp type diff --git a/lib/bx/binned_array_tests.py b/lib/bx/binned_array_tests.py index 22cb7b0a..c61c8671 100644 --- a/lib/bx/binned_array_tests.py +++ b/lib/bx/binned_array_tests.py @@ -6,7 +6,7 @@ allclose, concatenate, NaN, - zeros + zeros, ) from numpy.random import random_sample as random @@ -33,8 +33,8 @@ def setup(): if random() < 0.5: source = concatenate((source, random(CHUNK_SIZE_RANDOM))) else: - source = concatenate((source, zeros(CHUNK_SIZE_ZEROS, 'f'))) - source = source.astype('f') + source = concatenate((source, zeros(CHUNK_SIZE_ZEROS, "f"))) + source = source.astype("f") # Set on target target = BinnedArray(128, NaN, len(source)) for i in range(len(source)): @@ -46,61 +46,82 @@ def setup(): def test_simple(): # Verify for i in range(len(source)): - assert source[i] == target[i], "No match, index: %d, source: %f, target: %f, len( source ): %d" % (i, source[i], target[i], len(source)) + assert source[i] == target[i], "No match, index: %d, source: %f, target: %f, len( source ): %d" % ( + i, + source[i], + target[i], + len(source), + ) # Verify with slices for _ in range(10): a = int(random() * len(source)) b = int(random() * len(source)) if b < a: a, b = b, a - assert allclose(source[a:b], target[a:b]), "No match, index: %d:%d, source: %s, target: %s" % \ - (a, b, ",".join(map(str, source[a:a+10])), ",".join(map(str, target[a:a+10]))) + assert allclose(source[a:b], target[a:b]), "No match, index: %d:%d, source: %s, target: %s" % ( + a, + b, + ",".join(map(str, source[a : a + 10])), + ",".join(map(str, target[a : a + 10])), + ) def test_file(): # With a file (zlib) target.to_file(open("/tmp/foo", "wb")) - target2 = FileBinnedArray(open("/tmp/foo", 'rb')) + target2 = FileBinnedArray(open("/tmp/foo", "rb")) for i in range(len(source)): assert source[i] == target2[i], "No match, index: %d, source: %d, target: %d" % (i, source[i], target2[i]) # Verify with slices - target2 = FileBinnedArray(open("/tmp/foo", 'rb')) + target2 = FileBinnedArray(open("/tmp/foo", "rb")) for _ in range(10): a = int(random() * len(source)) b = int(random() * len(source)) if b < a: a, b = b, a - assert allclose(source[a:b], target[a:b]), "No match, index: %d:%d, source: %s, target: %s" % \ - (a, b, ",".join(map(str, source[a:a+10])), ",".join(map(str, target2[a:a+10]))) + assert allclose(source[a:b], target[a:b]), "No match, index: %d:%d, source: %s, target: %s" % ( + a, + b, + ",".join(map(str, source[a : a + 10])), + ",".join(map(str, target2[a : a + 10])), + ) def test_file_lzo(): # With a file (lzo) target.to_file(open("/tmp/foo3", "wb"), comp_type="lzo") - target3 = FileBinnedArray(open("/tmp/foo3", 'rb')) + target3 = FileBinnedArray(open("/tmp/foo3", "rb")) # Verify for i in range(len(source)): assert source[i] == target3[i], "No match, index: %d, source: %d, target: %d" % (i, source[i], target3[i]) # Verify with slices - target3 = FileBinnedArray(open("/tmp/foo3", 'rb')) + target3 = FileBinnedArray(open("/tmp/foo3", "rb")) for _ in range(10): a = int(random() * len(source)) b = int(random() * len(source)) if b < a: a, b = b, a - assert allclose(source[a:b], target3[a:b]), "No match, index: %d:%d, source: %s, target: %s" % \ - (a, b, ",".join(map(str, source[a:a+10])), ",".join(map(str, target3[a:a+10]))) + assert allclose(source[a:b], target3[a:b]), "No match, index: %d:%d, source: %s, target: %s" % ( + a, + b, + ",".join(map(str, source[a : a + 10])), + ",".join(map(str, target3[a : a + 10])), + ) def test_binned_array_writer(): # Test with ba writer o = open("/tmp/foo4", "wb") - w = BinnedArrayWriter(o, 128, comp_type='lzo') + w = BinnedArrayWriter(o, 128, comp_type="lzo") for val in source: w.write(val) w.finish() o.close() # Verify - target4 = FileBinnedArray(open("/tmp/foo4", 'rb')) + target4 = FileBinnedArray(open("/tmp/foo4", "rb")) for i in range(len(source)): - assert allclose(source[i], target4[i]), "No match, index: %d, source: %d, target: %d" % (i, source[i], target4[i]) + assert allclose(source[i], target4[i]), "No match, index: %d, source: %d, target: %d" % ( + i, + source[i], + target4[i], + ) diff --git a/lib/bx/bitset.pyx b/lib/bx/bitset.pyx index 6a68ec98..367e11c2 100644 --- a/lib/bx/bitset.pyx +++ b/lib/bx/bitset.pyx @@ -10,6 +10,7 @@ testing spans larger than the bin size, it can be much faster. import sys + cdef extern from "common.h": ctypedef int boolean diff --git a/lib/bx/bitset_builders.py b/lib/bx/bitset_builders.py index faf55da3..d8657c1f 100644 --- a/lib/bx/bitset_builders.py +++ b/lib/bx/bitset_builders.py @@ -10,11 +10,13 @@ from bx.bitset import ( BinnedBitSet, - MAX + MAX, ) -def binned_bitsets_from_file(f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={}): +def binned_bitsets_from_file( + f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={} +): """ Read a file into a dictionary of bitsets. The defaults arguments @@ -48,11 +50,13 @@ def binned_bitsets_from_file(f, chrom_col=0, start_col=1, end_col=2, strand_col= end = min(size, end + downstream_pad) if start > end: warn("Interval start after end!") - last_bitset.set_range(start, end-start) + last_bitset.set_range(start, end - start) return bitsets -def binned_bitsets_from_bed_file(f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={}): +def binned_bitsets_from_bed_file( + f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={} +): """ Read a file into a dictionary of bitsets. The defaults arguments @@ -96,7 +100,7 @@ def binned_bitsets_from_bed_file(f, chrom_col=0, start_col=1, end_col=2, strand_ end = min(size, end + downstream_pad) if start > end: warn("Interval start after end!") - last_bitset.set_range(start, end-start) + last_bitset.set_range(start, end - start) return bitsets @@ -108,7 +112,6 @@ def binned_bitsets_proximity(f, chrom_col=0, start_col=1, end_col=2, strand_col= for line in f: if line.startswith("#"): continue -# print "input=%s" % ( line ), fields = line.split() strand = "+" if len(fields) >= strand_col + 1: @@ -131,9 +134,8 @@ def binned_bitsets_proximity(f, chrom_col=0, start_col=1, end_col=2, strand_col= end = min(MAX, end + upstream) if downstream: start = max(0, start - downstream) -# print "set: start=%d\tend=%d" % ( start, end ) - if end-start > 0: - last_bitset.set_range(start, end-start) + if end - start > 0: + last_bitset.set_range(start, end - start) return bitsets @@ -163,5 +165,5 @@ def binned_bitsets_by_chrom(f, chrom, chrom_col=0, start_col=1, end_col=2): fields = line.split() if fields[chrom_col] == chrom: start, end = int(fields[start_col]), int(fields[end_col]) - bitset.set_range(start, end-start) + bitset.set_range(start, end - start) return bitset diff --git a/lib/bx/bitset_tests.py b/lib/bx/bitset_tests.py index 8a0dc693..d4854581 100644 --- a/lib/bx/bitset_tests.py +++ b/lib/bx/bitset_tests.py @@ -8,7 +8,6 @@ class AbstractTests: - def assert_bits(self, bits, list): assert bits.size == len(list), "Bitset size and verification list size do not match" for i in range(bits.size): @@ -44,7 +43,7 @@ def test_range_access(self): self.assert_bits(bits, l) # Set some positions for b, e in ((11, 14), (20, 75), (90, 99)): - bits.set_range(b, e-b) + bits.set_range(b, e - b) for pos in range(b, e): l[pos] = 1 self.assert_bits(bits, l) @@ -54,7 +53,7 @@ def test_count(self): bits = self.new_bits(100) # Set some positions for b, e in ((11, 14), (20, 75), (90, 100)): - bits.set_range(b, e-b) + bits.set_range(b, e - b) self.assertEqual(bits.count_range(0, 0), 0) self.assertEqual(bits.count_range(0, 20), 3) self.assertEqual(bits.count_range(25, 25), 25) @@ -66,7 +65,7 @@ def test_find(self): bits = self.new_bits(100) # Set some positions for b, e in ((11, 14), (20, 75), (90, 100)): - bits.set_range(b, e-b) + bits.set_range(b, e - b) # Next set self.assertEqual(bits.next_set(0), 11) self.assertEqual(bits.next_set(13), 13) @@ -83,7 +82,7 @@ def test_and(self): bits1.set_range(20, 40) bits2.set_range(50, 25) bits1.iand(bits2) - l = [0]*100 + l = [0] * 100 for i in range(50, 60): l[i] = 1 self.assert_bits(bits1, l) @@ -94,7 +93,7 @@ def test_or(self): bits1.set_range(20, 40) bits2.set_range(50, 25) bits1.ior(bits2) - l = [0]*100 + l = [0] * 100 for i in range(20, 75): l[i] = 1 self.assert_bits(bits1, l) @@ -103,7 +102,7 @@ def test_not(self): bits = self.new_bits(100) bits.set_range(20, 40) bits.invert() - l = [1]*100 + l = [1] * 100 for i in range(20, 60): l[i] = 0 self.assert_bits(bits, l) diff --git a/lib/bx/cookbook/__init__.py b/lib/bx/cookbook/__init__.py index dec6d6c3..2e3008b7 100644 --- a/lib/bx/cookbook/__init__.py +++ b/lib/bx/cookbook/__init__.py @@ -3,6 +3,7 @@ """ import types + seq_types = type(()), type([]) @@ -21,7 +22,7 @@ def cross_lists(*sets): digits = [next(it) for it in wheels] while True: yield digits[:] - for i in range(len(digits)-1, -1, -1): + for i in range(len(digits) - 1, -1, -1): try: digits[i] = next(wheels[i]) break @@ -31,6 +32,7 @@ def cross_lists(*sets): else: break + # Cached / memoized methods @@ -83,7 +85,7 @@ def __repr__(self): class ImmutableDict(dict): - '''A hashable dict.''' + """A hashable dict.""" def __init__(self, *args, **kwds): dict.__init__(self, *args, **kwds) diff --git a/lib/bx/cookbook/argparse.py b/lib/bx/cookbook/argparse.py index 39f2e99e..74205410 100644 --- a/lib/bx/cookbook/argparse.py +++ b/lib/bx/cookbook/argparse.py @@ -61,24 +61,24 @@ still considered an implementation detail.) """ -__version__ = '1.2.1' +__version__ = "1.2.1" __all__ = [ - 'ArgumentParser', - 'ArgumentError', - 'ArgumentTypeError', - 'FileType', - 'HelpFormatter', - 'ArgumentDefaultsHelpFormatter', - 'RawDescriptionHelpFormatter', - 'RawTextHelpFormatter', - 'Namespace', - 'Action', - 'ONE_OR_MORE', - 'OPTIONAL', - 'PARSER', - 'REMAINDER', - 'SUPPRESS', - 'ZERO_OR_MORE', + "ArgumentParser", + "ArgumentError", + "ArgumentTypeError", + "FileType", + "HelpFormatter", + "ArgumentDefaultsHelpFormatter", + "RawDescriptionHelpFormatter", + "RawTextHelpFormatter", + "Namespace", + "Action", + "ONE_OR_MORE", + "OPTIONAL", + "PARSER", + "REMAINDER", + "SUPPRESS", + "ZERO_OR_MORE", ] @@ -112,17 +112,17 @@ def sorted(iterable, reverse=False): def _callable(obj): - return hasattr(obj, '__call__') or hasattr(obj, '__bases__') + return hasattr(obj, "__call__") or hasattr(obj, "__bases__") -SUPPRESS = '==SUPPRESS==' +SUPPRESS = "==SUPPRESS==" -OPTIONAL = '?' -ZERO_OR_MORE = '*' -ONE_OR_MORE = '+' -PARSER = 'A...' -REMAINDER = '...' -_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' +OPTIONAL = "?" +ZERO_OR_MORE = "*" +ONE_OR_MORE = "+" +PARSER = "A..." +REMAINDER = "..." +_UNRECOGNIZED_ARGS_ATTR = "_unrecognized_args" # ============================= # Utility functions and classes @@ -144,8 +144,8 @@ def __repr__(self): for arg in self._get_args(): arg_strings.append(repr(arg)) for name, value in self._get_kwargs(): - arg_strings.append(f'{name}={value!r}') - return '{}({})'.format(type_name, ', '.join(arg_strings)) + arg_strings.append(f"{name}={value!r}") + return "{}({})".format(type_name, ", ".join(arg_strings)) def _get_kwargs(self): return sorted(self.__dict__.items()) @@ -164,6 +164,7 @@ def _ensure_value(namespace, name, value): # Formatting Help # =============== + class HelpFormatter: """Formatter for generating usage messages and argument help strings. @@ -171,16 +172,12 @@ class HelpFormatter: provided by the class are considered an implementation detail. """ - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): + def __init__(self, prog, indent_increment=2, max_help_position=24, width=None): # default setting for width if width is None: try: - width = int(_os.environ['COLUMNS']) + width = int(_os.environ["COLUMNS"]) except (KeyError, ValueError): width = 80 width -= 2 @@ -197,8 +194,8 @@ def __init__(self, self._root_section = self._Section(self, None) self._current_section = self._root_section - self._whitespace_matcher = _re.compile(r'\s+') - self._long_break_matcher = _re.compile(r'\n\n\n+') + self._whitespace_matcher = _re.compile(r"\s+") + self._long_break_matcher = _re.compile(r"\n\n\n+") # =============================== # Section and indentation methods @@ -209,11 +206,10 @@ def _indent(self): def _dedent(self): self._current_indent -= self._indent_increment - assert self._current_indent >= 0, 'Indent decreased below 0.' + assert self._current_indent >= 0, "Indent decreased below 0." self._level -= 1 class _Section: - def __init__(self, formatter, parent, heading=None): self.formatter = formatter self.parent = parent @@ -233,17 +229,17 @@ def format_help(self): # return nothing if the section was empty if not item_help: - return '' + return "" # add the heading if the section was non-empty if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) + heading = "%*s%s:\n" % (current_indent, "", self.heading) else: - heading = '' + heading = "" # join the section-initial newline, the heading and the help - return join(['\n', heading, item_help, '\n']) + return join(["\n", heading, item_help, "\n"]) def _add_item(self, func, args): self._current_section.items.append((func, args)) @@ -282,8 +278,7 @@ def add_argument(self, action): # update the maximum item length invocation_length = max(len(s) for s in invocations) action_length = invocation_length + self._current_indent - self._action_max_length = max(self._action_max_length, - action_length) + self._action_max_length = max(self._action_max_length, action_length) # add the item to the list self._add_item(self._format_action, [action]) @@ -298,18 +293,16 @@ def add_arguments(self, actions): def format_help(self): help = self._root_section.format_help() if help: - help = self._long_break_matcher.sub('\n\n', help) - help = help.strip('\n') + '\n' + help = self._long_break_matcher.sub("\n\n", help) + help = help.strip("\n") + "\n" return help def _join_parts(self, part_strings): - return ''.join([part - for part in part_strings - if part and part is not SUPPRESS]) + return "".join(part for part in part_strings if part and part is not SUPPRESS) def _format_usage(self, usage, actions, groups, prefix): if prefix is None: - prefix = _('usage: ') + prefix = _("usage: ") # if usage is specified, use that if usage is not None: @@ -317,11 +310,11 @@ def _format_usage(self, usage, actions, groups, prefix): # if no optionals or positionals are available, usage is just prog elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) + usage = "%(prog)s" % dict(prog=self._prog) # if optionals and positionals are available, calculate usage elif usage is None: - prog = '%(prog)s' % dict(prog=self._prog) + prog = "%(prog)s" % dict(prog=self._prog) # split optionals from positionals optionals = [] @@ -335,20 +328,20 @@ def _format_usage(self, usage, actions, groups, prefix): # build full usage string format = self._format_actions_usage action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) + usage = " ".join(s for s in [prog, action_usage] if s) # wrap the usage parts if it's too long text_width = self._width - self._current_indent if len(prefix) + len(usage) > text_width: # break usage into wrappable parts - part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' + part_regexp = r"\(.*?\)+|\[.*?\]+|\S+" opt_usage = format(optionals, groups) pos_usage = format(positionals, groups) opt_parts = _re.findall(part_regexp, opt_usage) pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage + assert " ".join(opt_parts) == opt_usage + assert " ".join(pos_parts) == pos_usage # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -360,20 +353,20 @@ def get_lines(parts, indent, prefix=None): line_len = len(indent) - 1 for part in parts: if line_len + 1 + len(part) > text_width: - lines.append(indent + ' '.join(line)) + lines.append(indent + " ".join(line)) line = [] line_len = len(indent) - 1 line.append(part) line_len += len(part) + 1 if line: - lines.append(indent + ' '.join(line)) + lines.append(indent + " ".join(line)) if prefix is not None: - lines[0] = lines[0][len(indent):] + lines[0] = lines[0][len(indent) :] return lines # if prog is short, follow it with optionals or positionals if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) + indent = " " * (len(prefix) + len(prog) + 1) if opt_parts: lines = get_lines([prog] + opt_parts, indent, prefix) lines.extend(get_lines(pos_parts, indent)) @@ -384,7 +377,7 @@ def get_lines(parts, indent, prefix=None): # if prog is long, put it on its own line else: - indent = ' ' * len(prefix) + indent = " " * len(prefix) parts = opt_parts + pos_parts lines = get_lines(parts, indent) if len(lines) > 1: @@ -394,10 +387,10 @@ def get_lines(parts, indent, prefix=None): lines = [prog] + lines # join lines into usage - usage = '\n'.join(lines) + usage = "\n".join(lines) # prefix with 'usage:' - return f'{prefix}{usage}\n\n' + return f"{prefix}{usage}\n\n" def _format_actions_usage(self, actions, groups): # find group indices and identify actions in groups @@ -415,18 +408,18 @@ def _format_actions_usage(self, actions, groups): group_actions.add(action) if not group.required: if start in inserts: - inserts[start] += ' [' + inserts[start] += " [" else: - inserts[start] = '[' - inserts[end] = ']' + inserts[start] = "[" + inserts[end] = "]" else: if start in inserts: - inserts[start] += ' (' + inserts[start] += " (" else: - inserts[start] = '(' - inserts[end] = ')' + inserts[start] = "(" + inserts[end] = ")" for i in range(start + 1, end): - inserts[i] = '|' + inserts[i] = "|" # collect all actions format strings parts = [] @@ -436,9 +429,9 @@ def _format_actions_usage(self, actions, groups): # remove | separators for suppressed arguments if action.help is SUPPRESS: parts.append(None) - if inserts.get(i) == '|': + if inserts.get(i) == "|": inserts.pop(i) - elif inserts.get(i + 1) == '|': + elif inserts.get(i + 1) == "|": inserts.pop(i + 1) # produce all arg strings @@ -447,7 +440,7 @@ def _format_actions_usage(self, actions, groups): # if it's in a group, strip the outer [] if action in group_actions: - if part[0] == '[' and part[-1] == ']': + if part[0] == "[" and part[-1] == "]": part = part[1:-1] # add the action string to the list @@ -460,18 +453,18 @@ def _format_actions_usage(self, actions, groups): # if the Optional doesn't take a value, format is: # -s or --long if action.nargs == 0: - part = '%s' % option_string + part = "%s" % option_string # if the Optional takes a value, format is: # -s ARGS or --long ARGS else: default = action.dest.upper() args_string = self._format_args(action, default) - part = f'{option_string} {args_string}' + part = f"{option_string} {args_string}" # make it look optional if it's not required or in a group if not action.required and action not in group_actions: - part = '[%s]' % part + part = "[%s]" % part # add the action string to the list parts.append(part) @@ -481,50 +474,49 @@ def _format_actions_usage(self, actions, groups): parts[i:i] = [inserts[i]] # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) + text = " ".join(item for item in parts if item is not None) # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(fr'{open} *{close}', r'', text) - text = _re.sub(r'\(([^|]*)\)', r'\1', text) + open = r"[\[(]" + close = r"[\])]" + text = _re.sub(r"(%s) " % open, r"\1", text) + text = _re.sub(r" (%s)" % close, r"\1", text) + text = _re.sub(rf"{open} *{close}", r"", text) + text = _re.sub(r"\(([^|]*)\)", r"\1", text) text = text.strip() # return the text return text def _format_text(self, text): - if '%(prog)' in text: + if "%(prog)" in text: text = text % dict(prog=self._prog) text_width = self._width - self._current_indent - indent = ' ' * self._current_indent - return self._fill_text(text, text_width, indent) + '\n\n' + indent = " " * self._current_indent + return self._fill_text(text, text_width, indent) + "\n\n" def _format_action(self, action): # determine the required width and the entry label - help_position = min(self._action_max_length + 2, - self._max_help_position) + help_position = min(self._action_max_length + 2, self._max_help_position) help_width = self._width - help_position action_width = help_position - self._current_indent - 2 action_header = self._format_action_invocation(action) # ho nelp; start on same line and add a final newline if not action.help: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup + tup = self._current_indent, "", action_header + action_header = "%*s%s\n" % tup # short action name; start on the same line and pad two spaces elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header - action_header = '%*s%-*s ' % tup + tup = self._current_indent, "", action_width, action_header + action_header = "%*s%-*s " % tup indent_first = 0 # long action name; start on the next line else: - tup = self._current_indent, '', action_header - action_header = '%*s%s\n' % tup + tup = self._current_indent, "", action_header + action_header = "%*s%s\n" % tup indent_first = help_position # collect the pieces of the action help @@ -534,13 +526,13 @@ def _format_action(self, action): if action.help: help_text = self._expand_help(action) help_lines = self._split_lines(help_text, help_width) - parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) + parts.append("%*s%s\n" % (indent_first, "", help_lines[0])) for line in help_lines[1:]: - parts.append('%*s%s\n' % (help_position, '', line)) + parts.append("%*s%s\n" % (help_position, "", line)) # or add a newline if the description doesn't end with one - elif not action_header.endswith('\n'): - parts.append('\n') + elif not action_header.endswith("\n"): + parts.append("\n") # if there are any sub-actions, add their help as well for subaction in self._iter_indented_subactions(action): @@ -551,7 +543,7 @@ def _format_action(self, action): def _format_action_invocation(self, action): if not action.option_strings: - metavar, = self._metavar_formatter(action, action.dest)(1) + (metavar,) = self._metavar_formatter(action, action.dest)(1) return metavar else: @@ -568,16 +560,16 @@ def _format_action_invocation(self, action): default = action.dest.upper() args_string = self._format_args(action, default) for option_string in action.option_strings: - parts.append(f'{option_string} {args_string}') + parts.append(f"{option_string} {args_string}") - return ', '.join(parts) + return ", ".join(parts) def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) + result = "{%s}" % ",".join(choice_strs) else: result = default_metavar @@ -585,26 +577,27 @@ def format(tuple_size): if isinstance(result, tuple): return result else: - return (result, ) * tuple_size + return (result,) * tuple_size + return format def _format_args(self, action, default_metavar): get_metavar = self._metavar_formatter(action, default_metavar) if action.nargs is None: - result = '%s' % get_metavar(1) + result = "%s" % get_metavar(1) elif action.nargs == OPTIONAL: - result = '[%s]' % get_metavar(1) + result = "[%s]" % get_metavar(1) elif action.nargs == ZERO_OR_MORE: - result = '[%s [%s ...]]' % get_metavar(2) + result = "[%s [%s ...]]" % get_metavar(2) elif action.nargs == ONE_OR_MORE: - result = '%s [%s ...]' % get_metavar(2) + result = "%s [%s ...]" % get_metavar(2) elif action.nargs == REMAINDER: - result = '...' + result = "..." elif action.nargs == PARSER: - result = '%s ...' % get_metavar(1) + result = "%s ..." % get_metavar(1) else: - formats = ['%s' for _ in range(action.nargs)] - result = ' '.join(formats) % get_metavar(action.nargs) + formats = ["%s" for _ in range(action.nargs)] + result = " ".join(formats) % get_metavar(action.nargs) return result def _expand_help(self, action): @@ -613,11 +606,11 @@ def _expand_help(self, action): if params[name] is SUPPRESS: del params[name] for name in list(params): - if hasattr(params[name], '__name__'): + if hasattr(params[name], "__name__"): params[name] = params[name].__name__ - if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str + if params.get("choices") is not None: + choices_str = ", ".join(str(c) for c in params["choices"]) + params["choices"] = choices_str return self._get_help_string(action) % params def _iter_indented_subactions(self, action): @@ -631,11 +624,11 @@ def _iter_indented_subactions(self, action): self._dedent() def _split_lines(self, text, width): - text = self._whitespace_matcher.sub(' ', text).strip() + text = self._whitespace_matcher.sub(" ", text).strip() return _textwrap.wrap(text, width) def _fill_text(self, text, width, indent): - text = self._whitespace_matcher.sub(' ', text).strip() + text = self._whitespace_matcher.sub(" ", text).strip() return _textwrap.fill(text, width, initial_indent=indent, subsequent_indent=indent) def _get_help_string(self, action): @@ -650,7 +643,7 @@ class RawDescriptionHelpFormatter(HelpFormatter): """ def _fill_text(self, text, width, indent): - return ''.join([indent + line for line in text.splitlines(True)]) + return "".join(indent + line for line in text.splitlines(True)) class RawTextHelpFormatter(RawDescriptionHelpFormatter): @@ -673,11 +666,11 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter): def _get_help_string(self, action): help = action.help - if '%(default)' not in action.help: + if "%(default)" not in action.help: if action.default is not SUPPRESS: defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' + help += " (default: %(default)s)" return help @@ -685,11 +678,12 @@ def _get_help_string(self, action): # Options and Arguments # ===================== + def _get_action_name(argument): if argument is None: return None elif argument.option_strings: - return '/'.join(argument.option_strings) + return "/".join(argument.option_strings) elif argument.metavar not in (None, SUPPRESS): return argument.metavar elif argument.dest not in (None, SUPPRESS): @@ -711,11 +705,10 @@ def __init__(self, argument, message): def __str__(self): if self.argument_name is None: - format = '%(message)s' + format = "%(message)s" else: - format = 'argument %(argument_name)s: %(message)s' - return format % dict(message=self.message, - argument_name=self.argument_name) + format = "argument %(argument_name)s: %(message)s" + return format % dict(message=self.message, argument_name=self.argument_name) class ArgumentTypeError(Exception): @@ -726,6 +719,7 @@ class ArgumentTypeError(Exception): # Action classes # ============== + class Action(_AttributeHolder): """Information about how to convert command line strings to Python objects. @@ -777,17 +771,19 @@ class Action(_AttributeHolder): help string. If None, the 'dest' value will be used as the name. """ - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): + def __init__( + self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None, + ): self.option_strings = option_strings self.dest = dest self.nargs = nargs @@ -801,41 +797,44 @@ def __init__(self, def _get_kwargs(self): names = [ - 'option_strings', - 'dest', - 'nargs', - 'const', - 'default', - 'type', - 'choices', - 'help', - 'metavar', + "option_strings", + "dest", + "nargs", + "const", + "default", + "type", + "choices", + "help", + "metavar", ] return [(name, getattr(self, name)) for name in names] def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) + raise NotImplementedError(_(".__call__() not defined")) class _StoreAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): + def __init__( + self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None, + ): if nargs == 0: - raise ValueError('nargs for store actions must be > 0; if you ' - 'have nothing to store, actions such as store ' - 'true or store const may be more appropriate') + raise ValueError( + "nargs for store actions must be > 0; if you " + "have nothing to store, actions such as store " + "true or store const may be more appropriate" + ) if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) + raise ValueError("nargs must be %r to supply const" % OPTIONAL) super().__init__( option_strings=option_strings, dest=dest, @@ -846,22 +845,15 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + ) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) class _StoreConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): + def __init__(self, option_strings, dest, const, default=None, required=False, help=None, metavar=None): super().__init__( option_strings=option_strings, dest=dest, @@ -869,65 +861,49 @@ def __init__(self, const=const, default=default, required=required, - help=help) + help=help, + ) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.const) class _StoreTrueAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=False, - required=False, - help=None): + def __init__(self, option_strings, dest, default=False, required=False, help=None): super().__init__( - option_strings=option_strings, - dest=dest, - const=True, - default=default, - required=required, - help=help) + option_strings=option_strings, dest=dest, const=True, default=default, required=required, help=help + ) class _StoreFalseAction(_StoreConstAction): - - def __init__(self, - option_strings, - dest, - default=True, - required=False, - help=None): + def __init__(self, option_strings, dest, default=True, required=False, help=None): super().__init__( - option_strings=option_strings, - dest=dest, - const=False, - default=default, - required=required, - help=help) + option_strings=option_strings, dest=dest, const=False, default=default, required=required, help=help + ) class _AppendAction(Action): - - def __init__(self, - option_strings, - dest, - nargs=None, - const=None, - default=None, - type=None, - choices=None, - required=False, - help=None, - metavar=None): + def __init__( + self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None, + ): if nargs == 0: - raise ValueError('nargs for append actions must be > 0; if arg ' - 'strings are not supplying the value to append, ' - 'the append const action may be more appropriate') + raise ValueError( + "nargs for append actions must be > 0; if arg " + "strings are not supplying the value to append, " + "the append const action may be more appropriate" + ) if const is not None and nargs != OPTIONAL: - raise ValueError('nargs must be %r to supply const' % OPTIONAL) + raise ValueError("nargs must be %r to supply const" % OPTIONAL) super().__init__( option_strings=option_strings, dest=dest, @@ -938,7 +914,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + ) def __call__(self, parser, namespace, values, option_string=None): items = _copy.copy(_ensure_value(namespace, self.dest, [])) @@ -947,15 +924,7 @@ def __call__(self, parser, namespace, values, option_string=None): class _AppendConstAction(Action): - - def __init__(self, - option_strings, - dest, - const, - default=None, - required=False, - help=None, - metavar=None): + def __init__(self, option_strings, dest, const, default=None, required=False, help=None, metavar=None): super().__init__( option_strings=option_strings, dest=dest, @@ -964,7 +933,8 @@ def __init__(self, default=default, required=required, help=help, - metavar=metavar) + metavar=metavar, + ) def __call__(self, parser, namespace, values, option_string=None): items = _copy.copy(_ensure_value(namespace, self.dest, [])) @@ -973,20 +943,10 @@ def __call__(self, parser, namespace, values, option_string=None): class _CountAction(Action): - - def __init__(self, - option_strings, - dest, - default=None, - required=False, - help=None): + def __init__(self, option_strings, dest, default=None, required=False, help=None): super().__init__( - option_strings=option_strings, - dest=dest, - nargs=0, - default=default, - required=required, - help=help) + option_strings=option_strings, dest=dest, nargs=0, default=default, required=required, help=help + ) def __call__(self, parser, namespace, values, option_string=None): new_count = _ensure_value(namespace, self.dest, 0) + 1 @@ -994,18 +954,8 @@ def __call__(self, parser, namespace, values, option_string=None): class _HelpAction(Action): - - def __init__(self, - option_strings, - dest=SUPPRESS, - default=SUPPRESS, - help=None): - super().__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) + def __init__(self, option_strings, dest=SUPPRESS, default=SUPPRESS, help=None): + super().__init__(option_strings=option_strings, dest=dest, default=default, nargs=0, help=help) def __call__(self, parser, namespace, values, option_string=None): parser.print_help() @@ -1013,19 +963,15 @@ def __call__(self, parser, namespace, values, option_string=None): class _VersionAction(Action): - - def __init__(self, - option_strings, - version=None, - dest=SUPPRESS, - default=SUPPRESS, - help="show program's version number and exit"): - super().__init__( - option_strings=option_strings, - dest=dest, - default=default, - nargs=0, - help=help) + def __init__( + self, + option_strings, + version=None, + dest=SUPPRESS, + default=SUPPRESS, + help="show program's version number and exit", + ): + super().__init__(option_strings=option_strings, dest=dest, default=default, nargs=0, help=help) self.version = version def __call__(self, parser, namespace, values, option_string=None): @@ -1038,20 +984,12 @@ def __call__(self, parser, namespace, values, option_string=None): class _SubParsersAction(Action): - class _ChoicesPseudoAction(Action): - def __init__(self, name, help): sup = super(_SubParsersAction._ChoicesPseudoAction, self) sup.__init__(option_strings=[], dest=name, help=help) - def __init__(self, - option_strings, - prog, - parser_class, - dest=SUPPRESS, - help=None, - metavar=None): + def __init__(self, option_strings, prog, parser_class, dest=SUPPRESS, help=None, metavar=None): self._prog_prefix = prog self._parser_class = parser_class @@ -1064,16 +1002,17 @@ def __init__(self, nargs=PARSER, choices=self._name_parser_map, help=help, - metavar=metavar) + metavar=metavar, + ) def add_parser(self, name, **kwargs): # set prog from the existing prefix - if kwargs.get('prog') is None: - kwargs['prog'] = f'{self._prog_prefix} {name}' + if kwargs.get("prog") is None: + kwargs["prog"] = f"{self._prog_prefix} {name}" # create a pseudo-action to hold the choice help - if 'help' in kwargs: - help = kwargs.pop('help') + if "help" in kwargs: + help = kwargs.pop("help") choice_action = self._ChoicesPseudoAction(name, help) self._choices_actions.append(choice_action) @@ -1097,8 +1036,8 @@ def __call__(self, parser, namespace, values, option_string=None): try: parser = self._name_parser_map[parser_name] except KeyError: - tup = parser_name, ', '.join(self._name_parser_map) - msg = _('unknown parser %r (choices: %s)' % tup) + tup = parser_name, ", ".join(self._name_parser_map) + msg = _("unknown parser %r (choices: %s)" % tup) raise ArgumentError(self, msg) # parse all the remaining options into the namespace @@ -1114,6 +1053,7 @@ def __call__(self, parser, namespace, values, option_string=None): # Type classes # ============== + class FileType: """Factory for creating file object types @@ -1127,16 +1067,16 @@ class FileType: the builtin open() function. """ - def __init__(self, mode='r', bufsize=None): + def __init__(self, mode="r", bufsize=None): self._mode = mode self._bufsize = bufsize def __call__(self, string): # the special argument "-" means sys.std{in,out} - if string == '-': - if 'r' in self._mode: + if string == "-": + if "r" in self._mode: return _sys.stdin - elif 'w' in self._mode: + elif "w" in self._mode: return _sys.stdout else: msg = _('argument "-" with mode %r' % self._mode) @@ -1150,8 +1090,9 @@ def __call__(self, string): def __repr__(self): args = [self._mode, self._bufsize] - args_str = ', '.join([repr(arg) for arg in args if arg is not None]) - return f'{type(self).__name__}({args_str})' + args_str = ", ".join(repr(arg) for arg in args if arg is not None) + return f"{type(self).__name__}({args_str})" + # =========================== # Optional and Positional Parsing @@ -1182,12 +1123,7 @@ def __contains__(self, key): class _ActionsContainer: - - def __init__(self, - description, - prefix_chars, - argument_default, - conflict_handler): + def __init__(self, description, prefix_chars, argument_default, conflict_handler): super().__init__() self.description = description @@ -1199,17 +1135,17 @@ def __init__(self, self._registries = {} # register actions - self.register('action', None, _StoreAction) - self.register('action', 'store', _StoreAction) - self.register('action', 'store_const', _StoreConstAction) - self.register('action', 'store_true', _StoreTrueAction) - self.register('action', 'store_false', _StoreFalseAction) - self.register('action', 'append', _AppendAction) - self.register('action', 'append_const', _AppendConstAction) - self.register('action', 'count', _CountAction) - self.register('action', 'help', _HelpAction) - self.register('action', 'version', _VersionAction) - self.register('action', 'parsers', _SubParsersAction) + self.register("action", None, _StoreAction) + self.register("action", "store", _StoreAction) + self.register("action", "store_const", _StoreConstAction) + self.register("action", "store_true", _StoreTrueAction) + self.register("action", "store_false", _StoreFalseAction) + self.register("action", "append", _AppendAction) + self.register("action", "append_const", _AppendConstAction) + self.register("action", "count", _CountAction) + self.register("action", "help", _HelpAction) + self.register("action", "version", _VersionAction) + self.register("action", "parsers", _SubParsersAction) # raise an exception if the conflict handler is invalid self._get_handler() @@ -1226,7 +1162,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + self._negative_number_matcher = _re.compile(r"^-\d+$|^-\d*\.\d+$") # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited @@ -1275,8 +1211,8 @@ def add_argument(self, *args, **kwargs): # argument chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: - if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') + if args and "dest" in kwargs: + raise ValueError("dest supplied twice for positional argument") kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument @@ -1284,12 +1220,12 @@ def add_argument(self, *args, **kwargs): kwargs = self._get_optional_kwargs(*args, **kwargs) # if no default was supplied, use the parser-level default - if 'default' not in kwargs: - dest = kwargs['dest'] + if "default" not in kwargs: + dest = kwargs["dest"] if dest in self._defaults: - kwargs['default'] = self._defaults[dest] + kwargs["default"] = self._defaults[dest] elif self.argument_default is not None: - kwargs['default'] = self.argument_default + kwargs["default"] = self.argument_default # create the action object, and add it to the parser action_class = self._pop_action_class(kwargs) @@ -1298,9 +1234,9 @@ def add_argument(self, *args, **kwargs): action = action_class(**kwargs) # raise an error if the action type is not callable - type_func = self._registry_get('type', action.type, action.type) + type_func = self._registry_get("type", action.type, action.type) if not _callable(type_func): - raise ValueError('%r is not callable' % type_func) + raise ValueError("%r is not callable" % type_func) return self._add_action(action) @@ -1343,7 +1279,7 @@ def _add_container_actions(self, container): title_group_map = {} for group in self._action_groups: if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') + msg = _("cannot merge actions - two groups are named %r") raise ValueError(msg % (group.title)) title_group_map[group.title] = group @@ -1355,9 +1291,8 @@ def _add_container_actions(self, container): # create a new group matching the container's group if group.title not in title_group_map: title_group_map[group.title] = self.add_argument_group( - title=group.title, - description=group.description, - conflict_handler=group.conflict_handler) + title=group.title, description=group.description, conflict_handler=group.conflict_handler + ) # map the actions to their new group for action in group._group_actions: @@ -1367,8 +1302,7 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( - required=group.required) + mutex_group = self.add_mutually_exclusive_group(required=group.required) # map the actions to their new mutex group for action in group._group_actions: @@ -1380,16 +1314,16 @@ def _add_container_actions(self, container): def _get_positional_kwargs(self, dest, **kwargs): # make sure required is not specified - if 'required' in kwargs: + if "required" in kwargs: msg = _("'required' is an invalid argument for positionals") raise TypeError(msg) # mark positional arguments as required if at least one is # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: - kwargs['required'] = True + if kwargs.get("nargs") not in [OPTIONAL, ZERO_OR_MORE]: + kwargs["required"] = True + if kwargs.get("nargs") == ZERO_OR_MORE and "default" not in kwargs: + kwargs["required"] = True # return the keyword arguments with no option strings return dict(kwargs, dest=dest, option_strings=[]) @@ -1401,8 +1335,7 @@ def _get_optional_kwargs(self, *args, **kwargs): for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: - msg = _('invalid option string %r: ' - 'must start with a character %r') + msg = _("invalid option string %r: " "must start with a character %r") tup = option_string, self.prefix_chars raise ValueError(msg % tup) @@ -1414,7 +1347,7 @@ def _get_optional_kwargs(self, *args, **kwargs): long_option_strings.append(option_string) # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' - dest = kwargs.pop('dest', None) + dest = kwargs.pop("dest", None) if dest is None: if long_option_strings: dest_option_string = long_option_strings[0] @@ -1422,24 +1355,24 @@ def _get_optional_kwargs(self, *args, **kwargs): dest_option_string = option_strings[0] dest = dest_option_string.lstrip(self.prefix_chars) if not dest: - msg = _('dest= is required for options like %r') + msg = _("dest= is required for options like %r") raise ValueError(msg % option_string) - dest = dest.replace('-', '_') + dest = dest.replace("-", "_") # return the updated keyword arguments return dict(kwargs, dest=dest, option_strings=option_strings) def _pop_action_class(self, kwargs, default=None): - action = kwargs.pop('action', default) - return self._registry_get('action', action, action) + action = kwargs.pop("action", default) + return self._registry_get("action", action, action) def _get_handler(self): # determine function from conflict handler string - handler_func_name = '_handle_conflict_%s' % self.conflict_handler + handler_func_name = "_handle_conflict_%s" % self.conflict_handler try: return getattr(self, handler_func_name) except AttributeError: - msg = _('invalid conflict_resolution value: %r') + msg = _("invalid conflict_resolution value: %r") raise ValueError(msg % self.conflict_handler) def _check_conflict(self, action): @@ -1457,10 +1390,8 @@ def _check_conflict(self, action): conflict_handler(action, confl_optionals) def _handle_conflict_error(self, action, conflicting_actions): - message = _('conflicting option string(s): %s') - conflict_string = ', '.join([option_string - for option_string, _2 - in conflicting_actions]) + message = _("conflicting option string(s): %s") + conflict_string = ", ".join(option_string for option_string, _2 in conflicting_actions) raise ArgumentError(action, message % conflict_string) def _handle_conflict_resolve(self, action, conflicting_actions): @@ -1478,13 +1409,12 @@ def _handle_conflict_resolve(self, action, conflicting_actions): class _ArgumentGroup(_ActionsContainer): - def __init__(self, container, title=None, description=None, **kwargs): # add any missing keyword arguments by checking the container update = kwargs.setdefault - update('conflict_handler', container.conflict_handler) - update('prefix_chars', container.prefix_chars) - update('argument_default', container.argument_default) + update("conflict_handler", container.conflict_handler) + update("prefix_chars", container.prefix_chars) + update("argument_default", container.argument_default) super_init = super().__init__ super_init(description=description, **kwargs) @@ -1497,8 +1427,7 @@ def __init__(self, container, title=None, description=None, **kwargs): self._actions = container._actions self._option_string_actions = container._option_string_actions self._defaults = container._defaults - self._has_negative_number_optionals = \ - container._has_negative_number_optionals + self._has_negative_number_optionals = container._has_negative_number_optionals def _add_action(self, action): action = super()._add_action(action) @@ -1511,7 +1440,6 @@ def _remove_action(self, action): class _MutuallyExclusiveGroup(_ArgumentGroup): - def __init__(self, container, required=False): super().__init__(container) self.required = required @@ -1519,7 +1447,7 @@ def __init__(self, container, required=False): def _add_action(self, action): if action.required: - msg = _('mutually exclusive arguments must be optional') + msg = _("mutually exclusive arguments must be optional") raise ValueError(msg) action = self._container._add_action(action) self._group_actions.append(action) @@ -1548,33 +1476,40 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer): - add_help -- Add a -h/-help option """ - def __init__(self, - prog=None, - usage=None, - description=None, - epilog=None, - version=None, - parents=[], - formatter_class=HelpFormatter, - prefix_chars='-', - fromfile_prefix_chars=None, - argument_default=None, - conflict_handler='error', - add_help=True): + def __init__( + self, + prog=None, + usage=None, + description=None, + epilog=None, + version=None, + parents=[], + formatter_class=HelpFormatter, + prefix_chars="-", + fromfile_prefix_chars=None, + argument_default=None, + conflict_handler="error", + add_help=True, + ): if version is not None: import warnings + warnings.warn( """The "version" argument to ArgumentParser is deprecated. """ """Please use """ """"add_argument(..., action='version', version="N", ...)" """ - """instead""", DeprecationWarning) + """instead""", + DeprecationWarning, + ) superinit = super().__init__ - superinit(description=description, - prefix_chars=prefix_chars, - argument_default=argument_default, - conflict_handler=conflict_handler) + superinit( + description=description, + prefix_chars=prefix_chars, + argument_default=argument_default, + conflict_handler=conflict_handler, + ) # default setting for prog if prog is None: @@ -1589,32 +1524,39 @@ def __init__(self, self.add_help = add_help add_group = self.add_argument_group - self._positionals = add_group(_('positional arguments')) - self._optionals = add_group(_('optional arguments')) + self._positionals = add_group(_("positional arguments")) + self._optionals = add_group(_("optional arguments")) self._subparsers = None # register types def identity(string): return string - self.register('type', None, identity) + + self.register("type", None, identity) # add help and version arguments if necessary # (using explicit default to override global argument_default) - if '-' in prefix_chars: - default_prefix = '-' + if "-" in prefix_chars: + default_prefix = "-" else: default_prefix = prefix_chars[0] if self.add_help: self.add_argument( - default_prefix+'h', default_prefix*2+'help', - action='help', default=SUPPRESS, - help=_('show this help message and exit')) + default_prefix + "h", + default_prefix * 2 + "help", + action="help", + default=SUPPRESS, + help=_("show this help message and exit"), + ) if self.version: self.add_argument( - default_prefix+'v', default_prefix*2+'version', - action='version', default=SUPPRESS, + default_prefix + "v", + default_prefix * 2 + "version", + action="version", + default=SUPPRESS, version=self.version, - help=_("show program's version number and exit")) + help=_("show program's version number and exit"), + ) # add parent arguments and defaults for parent in parents: @@ -1631,13 +1573,13 @@ def identity(string): # ======================= def _get_kwargs(self): names = [ - 'prog', - 'usage', - 'description', - 'version', - 'formatter_class', - 'conflict_handler', - 'add_help', + "prog", + "usage", + "description", + "version", + "formatter_class", + "conflict_handler", + "add_help", ] return [(name, getattr(self, name)) for name in names] @@ -1646,29 +1588,29 @@ def _get_kwargs(self): # ================================== def add_subparsers(self, **kwargs): if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) + self.error(_("cannot have multiple subparser arguments")) # add the parser class to the arguments if it's not present - kwargs.setdefault('parser_class', type(self)) + kwargs.setdefault("parser_class", type(self)) - if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) + if "title" in kwargs or "description" in kwargs: + title = _(kwargs.pop("title", "subcommands")) + description = _(kwargs.pop("description", None)) self._subparsers = self.add_argument_group(title, description) else: self._subparsers = self._positionals # prog defaults to the usage message of this parser, skipping # optional arguments and with no "usage:" prefix - if kwargs.get('prog') is None: + if kwargs.get("prog") is None: formatter = self._get_formatter() positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') - kwargs['prog'] = formatter.format_help().strip() + formatter.add_usage(self.usage, positionals, groups, "") + kwargs["prog"] = formatter.format_help().strip() # create the parsers action and add it to the positionals list - parsers_class = self._pop_action_class(kwargs, 'parsers') + parsers_class = self._pop_action_class(kwargs, "parsers") action = parsers_class(option_strings=[], **kwargs) self._subparsers._add_action(action) @@ -1683,14 +1625,10 @@ def _add_action(self, action): return action def _get_optional_actions(self): - return [action - for action in self._actions - if action.option_strings] + return [action for action in self._actions if action.option_strings] def _get_positional_actions(self): - return [action - for action in self._actions - if not action.option_strings] + return [action for action in self._actions if not action.option_strings] # ===================================== # Command line argument parsing methods @@ -1698,8 +1636,8 @@ def _get_positional_actions(self): def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _("unrecognized arguments: %s") + self.error(msg % " ".join(argv)) return args def parse_known_args(self, args=None, namespace=None): @@ -1750,7 +1688,7 @@ def _parse_known_args(self, arg_strings, namespace): for i, mutex_action in enumerate(mutex_group._group_actions): conflicts = action_conflicts.setdefault(mutex_action, []) conflicts.extend(group_actions[:i]) - conflicts.extend(group_actions[i + 1:]) + conflicts.extend(group_actions[i + 1 :]) # find all option indices, and determine the arg_string_pattern # which has an 'O' if there is an option at an index, @@ -1761,24 +1699,24 @@ def _parse_known_args(self, arg_strings, namespace): for i, arg_string in enumerate(arg_strings_iter): # all args after -- are non-options - if arg_string == '--': - arg_string_pattern_parts.append('-') + if arg_string == "--": + arg_string_pattern_parts.append("-") for arg_string in arg_strings_iter: - arg_string_pattern_parts.append('A') + arg_string_pattern_parts.append("A") # otherwise, add the arg to the arg strings # and note the index if it was an option else: option_tuple = self._parse_optional(arg_string) if option_tuple is None: - pattern = 'A' + pattern = "A" else: option_string_indices[i] = option_tuple - pattern = 'O' + pattern = "O" arg_string_pattern_parts.append(pattern) # join the pieces together to form the pattern - arg_strings_pattern = ''.join(arg_string_pattern_parts) + arg_strings_pattern = "".join(arg_string_pattern_parts) # converts arg strings to the appropriate and then takes the action seen_actions = set() @@ -1795,7 +1733,7 @@ def take_action(action, argument_strings, option_string=None): seen_non_default_actions.add(action) for conflict_action in action_conflicts.get(action, []): if conflict_action in seen_non_default_actions: - msg = _('not allowed with argument %s') + msg = _("not allowed with argument %s") action_name = _get_action_name(conflict_action) raise ArgumentError(action, msg % action_name) @@ -1825,7 +1763,7 @@ def consume_optional(start_index): # if there is an explicit argument, try to match the # optional's string arguments to only this if explicit_arg is not None: - arg_count = match_argument(action, 'A') + arg_count = match_argument(action, "A") # if the action is a single-dash option and takes no # arguments, try to parse more single-dash options out @@ -1841,7 +1779,7 @@ def consume_optional(start_index): action = optionals_map[option_string] explicit_arg = new_explicit_arg else: - msg = _('ignored explicit argument %r') + msg = _("ignored explicit argument %r") raise ArgumentError(action, msg % explicit_arg) # if the action expect exactly one argument, we've @@ -1855,7 +1793,7 @@ def consume_optional(start_index): # error if a double-dash option did not use the # explicit argument else: - msg = _('ignored explicit argument %r') + msg = _("ignored explicit argument %r") raise ArgumentError(action, msg % explicit_arg) # if there is no explicit argument, try to match the @@ -1891,13 +1829,13 @@ def consume_positionals(start_index): # slice off the appropriate arg strings for each Positional # and add the Positional and its args to the list for action, arg_count in zip(positionals, arg_counts): - args = arg_strings[start_index: start_index + arg_count] + args = arg_strings[start_index : start_index + arg_count] start_index += arg_count take_action(action, args) # slice off the Positionals that we just parsed and return the # index at which the Positionals' string args stopped - positionals[:] = positionals[len(arg_counts):] + positionals[:] = positionals[len(arg_counts) :] return start_index # consume Positionals and Optionals alternately, until we have @@ -1911,10 +1849,7 @@ def consume_positionals(start_index): while start_index <= max_option_string_index: # consume any Positionals preceding the next option - next_option_string_index = min( - index - for index in option_string_indices - if index >= start_index) + next_option_string_index = min(index for index in option_string_indices if index >= start_index) if start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) @@ -1945,14 +1880,14 @@ def consume_positionals(start_index): # if we didn't use all the Positional objects, there were too few # arg strings supplied. if positionals: - self.error(_('too few arguments')) + self.error(_("too few arguments")) # make sure all required actions were present for action in self._actions: if action.required: if action not in seen_actions: name = _get_action_name(action) - self.error(_('argument %s is required') % name) + self.error(_("argument %s is required") % name) # make sure all required groups had one option present for group in self._mutually_exclusive_groups: @@ -1963,11 +1898,9 @@ def consume_positionals(start_index): # if no actions were used, report the error else: - names = [_get_action_name(action) - for action in group._group_actions - if action.help is not SUPPRESS] - msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) + names = [_get_action_name(action) for action in group._group_actions if action.help is not SUPPRESS] + msg = _("one of the arguments %s is required") + self.error(msg % " ".join(names)) # return the updated namespace and the extra arguments return namespace, extras @@ -2012,11 +1945,11 @@ def _match_argument(self, action, arg_strings_pattern): # raise an exception if we weren't able to find a match if match is None: nargs_errors = { - None: _('expected one argument'), - OPTIONAL: _('expected at most one argument'), - ONE_OR_MORE: _('expected at least one argument'), + None: _("expected one argument"), + OPTIONAL: _("expected at most one argument"), + ONE_OR_MORE: _("expected at least one argument"), } - default = _('expected %s argument(s)') % action.nargs + default = _("expected %s argument(s)") % action.nargs msg = nargs_errors.get(action.nargs, default) raise ArgumentError(action, msg) @@ -2029,8 +1962,7 @@ def _match_arguments_partial(self, actions, arg_strings_pattern): result = [] for i in range(len(actions), 0, -1): actions_slice = actions[:i] - pattern = ''.join([self._get_nargs_pattern(action) - for action in actions_slice]) + pattern = "".join(self._get_nargs_pattern(action) for action in actions_slice) match = _re.match(pattern, arg_strings_pattern) if match is not None: result.extend([len(string) for string in match.groups()]) @@ -2058,8 +1990,8 @@ def _parse_optional(self, arg_string): return None # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) + if "=" in arg_string: + option_string, explicit_arg = arg_string.split("=", 1) if option_string in self._option_string_actions: action = self._option_string_actions[option_string] return action, option_string, explicit_arg @@ -2070,14 +2002,14 @@ def _parse_optional(self, arg_string): # if multiple actions match, the option string was ambiguous if len(option_tuples) > 1: - options = ', '.join([_1 for _0, _1, _2 in option_tuples]) + options = ", ".join(_1 for _0, _1, _2 in option_tuples) tup = arg_string, options - self.error(_('ambiguous option: %s could match %s') % tup) + self.error(_("ambiguous option: %s could match %s") % tup) # if exactly one action matched, this segmentation is good, # so return the parsed action elif len(option_tuples) == 1: - option_tuple, = option_tuples + (option_tuple,) = option_tuples return option_tuple # if it was not found as an option, but it looks like a negative @@ -2088,7 +2020,7 @@ def _parse_optional(self, arg_string): return None # if it contains a space, it was meant to be a positional - if ' ' in arg_string: + if " " in arg_string: return None # it was meant to be an optional but there is no such option @@ -2102,8 +2034,8 @@ def _get_option_tuples(self, option_string): # split at the '=' chars = self.prefix_chars if option_string[0] in chars and option_string[1] in chars: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) + if "=" in option_string: + option_prefix, explicit_arg = option_string.split("=", 1) else: option_prefix = option_string explicit_arg = None @@ -2134,7 +2066,7 @@ def _get_option_tuples(self, option_string): # shouldn't ever get here else: - self.error(_('unexpected option string: %s') % option_string) + self.error(_("unexpected option string: %s") % option_string) # return the collected option tuples return result @@ -2146,36 +2078,36 @@ def _get_nargs_pattern(self, action): # the default (None) is assumed to be a single argument if nargs is None: - nargs_pattern = '(-*A-*)' + nargs_pattern = "(-*A-*)" # allow zero or one arguments elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' + nargs_pattern = "(-*A?-*)" # allow zero or more arguments elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' + nargs_pattern = "(-*[A-]*)" # allow one or more arguments elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' + nargs_pattern = "(-*A[A-]*)" # allow any number of options or arguments elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' + nargs_pattern = "([-AO]*)" # allow one argument followed by any number of options or arguments elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' + nargs_pattern = "(-*A[-AO]*)" # all others should be integers else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) + nargs_pattern = "(-*%s-*)" % "-*".join("A" * nargs) # if this is an optional action, -- is not allowed if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') + nargs_pattern = nargs_pattern.replace("-*", "") + nargs_pattern = nargs_pattern.replace("-", "") # return the pattern return nargs_pattern @@ -2186,7 +2118,7 @@ def _get_nargs_pattern(self, action): def _get_values(self, action, arg_strings): # for everything but PARSER args, strip out '--' if action.nargs not in [PARSER, REMAINDER]: - arg_strings = [s for s in arg_strings if s != '--'] + arg_strings = [s for s in arg_strings if s != "--"] # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: @@ -2200,8 +2132,7 @@ def _get_values(self, action, arg_strings): # when nargs='*' on a positional, if there were no command-line # args, use the default if it is anything other than None - elif (not arg_strings and action.nargs == ZERO_OR_MORE - and not action.option_strings): + elif not arg_strings and action.nargs == ZERO_OR_MORE and not action.option_strings: if action.default is not None: value = action.default else: @@ -2210,7 +2141,7 @@ def _get_values(self, action, arg_strings): # single argument or optional argument produces a single value elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: - arg_string, = arg_strings + (arg_string,) = arg_strings value = self._get_value(action, arg_string) self._check_value(action, value) @@ -2233,9 +2164,9 @@ def _get_values(self, action, arg_strings): return value def _get_value(self, action, arg_string): - type_func = self._registry_get('type', action.type, action.type) + type_func = self._registry_get("type", action.type, action.type) if not _callable(type_func): - msg = _('%r is not callable') + msg = _("%r is not callable") raise ArgumentError(action, msg % type_func) # convert the value to the appropriate type @@ -2244,14 +2175,14 @@ def _get_value(self, action, arg_string): # ArgumentTypeErrors indicate errors except ArgumentTypeError: - name = getattr(action.type, '__name__', repr(action.type)) + name = getattr(action.type, "__name__", repr(action.type)) msg = str(_sys.exc_info()[1]) raise ArgumentError(action, msg) # TypeErrors or ValueErrors also indicate errors except (TypeError, ValueError): - name = getattr(action.type, '__name__', repr(action.type)) - msg = _('invalid %s value: %r') + name = getattr(action.type, "__name__", repr(action.type)) + msg = _("invalid %s value: %r") raise ArgumentError(action, msg % (name, arg_string)) # return the converted value @@ -2260,8 +2191,8 @@ def _get_value(self, action, arg_string): def _check_value(self, action, value): # converted value must be one of the choices (if specified) if action.choices is not None and value not in action.choices: - tup = value, ', '.join(map(repr, action.choices)) - msg = _('invalid choice: %r (choose from %s)') % tup + tup = value, ", ".join(map(repr, action.choices)) + msg = _("invalid choice: %r (choose from %s)") % tup raise ArgumentError(action, msg) # ======================= @@ -2269,16 +2200,14 @@ def _check_value(self, action, value): # ======================= def format_usage(self): formatter = self._get_formatter() - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) + formatter.add_usage(self.usage, self._actions, self._mutually_exclusive_groups) return formatter.format_help() def format_help(self): formatter = self._get_formatter() # usage - formatter.add_usage(self.usage, self._actions, - self._mutually_exclusive_groups) + formatter.add_usage(self.usage, self._actions, self._mutually_exclusive_groups) # description formatter.add_text(self.description) @@ -2298,10 +2227,12 @@ def format_help(self): def format_version(self): import warnings + warnings.warn( 'The format_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) + "argument to ArgumentParser is no longer supported.", + DeprecationWarning, + ) formatter = self._get_formatter() formatter.add_text(self.version) return formatter.format_help() @@ -2324,10 +2255,12 @@ def print_help(self, file=None): def print_version(self, file=None): import warnings + warnings.warn( 'The print_version method is deprecated -- the "version" ' - 'argument to ArgumentParser is no longer supported.', - DeprecationWarning) + "argument to ArgumentParser is no longer supported.", + DeprecationWarning, + ) self._print_message(self.format_version(), file) def _print_message(self, message, file=None): @@ -2354,4 +2287,4 @@ def error(self, message): should either exit or raise an exception. """ self.print_usage(_sys.stderr) - self.exit(2, _('%s: error: %s\n') % (self.prog, message)) + self.exit(2, _("%s: error: %s\n") % (self.prog, message)) diff --git a/lib/bx/cookbook/attribute.py b/lib/bx/cookbook/attribute.py index cfaf62e1..4775185f 100644 --- a/lib/bx/cookbook/attribute.py +++ b/lib/bx/cookbook/attribute.py @@ -51,18 +51,18 @@ def del_baz(self): baz = property(fget=get_baz, fset=set_baz, fdel=del_baz, doc="baz") """ -__all__ = ['attribute', 'readable', 'writable'] -__version__ = '3.0' -__author__ = 'Sean Ross' -__credits__ = ['Guido van Rossum', 'Garth Kidd'] -__created__ = '10/21/02' +__all__ = ["attribute", "readable", "writable"] +__version__ = "3.0" +__author__ = "Sean Ross" +__credits__ = ["Guido van Rossum", "Garth Kidd"] +__created__ = "10/21/02" import sys def mangle(classname, attrname): """mangles name according to python name-mangling - conventions for private variables""" + conventions for private variables""" return f"_{classname}__{attrname}" @@ -73,44 +73,49 @@ def class_space(classlevel=3): classdict = frame.f_locals return classname, classdict + # convenience function def readable(**kwds): "returns one read-only property for each (key,value) pair in kwds" - return _attribute(permission='r', **kwds) + return _attribute(permission="r", **kwds) + # convenience function def writable(**kwds): "returns one write-only property for each (key,value) pair in kwds" - return _attribute(permission='w', **kwds) + return _attribute(permission="w", **kwds) + # needed because of the way class_space is resolved in _attribute -def attribute(permission='rwd', **kwds): +def attribute(permission="rwd", **kwds): """returns one property for each (key,value) pair in kwds; - each property provides the specified level of access(permission): - 'r': readable, 'w':writable, 'd':deletable + each property provides the specified level of access(permission): + 'r': readable, 'w':writable, 'd':deletable """ return _attribute(permission, **kwds) + # based on code by Guido van Rossum, comp.lang.python 2001-07-31 -def _attribute(permission='rwd', **kwds): +def _attribute(permission="rwd", **kwds): """returns one property for each (key,value) pair in kwds; - each property provides the specified level of access(permission): - 'r': readable, 'w':writable, 'd':deletable + each property provides the specified level of access(permission): + 'r': readable, 'w':writable, 'd':deletable """ classname, classdict = class_space() def _property(attrname, default): propname, attrname = attrname, mangle(classname, attrname) fget, fset, fdel, doc = None, None, None, propname - if 'r' in permission: + if "r" in permission: + def fget(self): value = default try: @@ -118,10 +123,14 @@ def fget(self): except AttributeError: setattr(self, attrname, default) return value - if 'w' in permission: + + if "w" in permission: + def fset(self, value): setattr(self, attrname, value) - if 'd' in permission: + + if "d" in permission: + def fdel(self): try: delattr(self, attrname) @@ -129,6 +138,7 @@ def fdel(self): pass # calling fget can restore this attribute, so remove property delattr(self.__class__, propname) + return property(fget=fget, fset=fset, fdel=fdel, doc=doc) for attrname, default in kwds.items(): diff --git a/lib/bx/cookbook/doc_optparse.py b/lib/bx/cookbook/doc_optparse.py index 559c0396..81f9afcd 100644 --- a/lib/bx/cookbook/doc_optparse.py +++ b/lib/bx/cookbook/doc_optparse.py @@ -33,7 +33,7 @@ import sys import traceback -USAGE = re.compile(r'(?s)\s*usage: (.*?)(\n[ \t]*\n|$)') +USAGE = re.compile(r"(?s)\s*usage: (.*?)(\n[ \t]*\n|$)") def nonzero(self): # will become the nonzero method of optparse.Values @@ -74,12 +74,12 @@ def parse(docstring, arglist=None): try: p = optparse.OptionParser(optlines[0], conflict_handler="resolve") for line in optlines[1:]: - opt, help = line.split(':')[:2] + opt, help = line.split(":")[:2] # Make both short and long optional (but at least one) # Old: short,long=opt.split(',')[:2] opt_strings = [] action = "store_true" - for k in opt.split(', '): + for k in opt.split(", "): k = k.strip() if k.startswith("--") and "=" in k: action = "store" diff --git a/lib/bx/cookbook/progress_bar.py b/lib/bx/cookbook/progress_bar.py index 2bc6c304..01cd33c1 100644 --- a/lib/bx/cookbook/progress_bar.py +++ b/lib/bx/cookbook/progress_bar.py @@ -12,12 +12,12 @@ class ProgressBar: def __init__(self, minValue=0, maxValue=10, totalWidth=72): - self.progBar = "[]" # This holds the progress bar string + self.progBar = "[]" # This holds the progress bar string self.min = minValue self.max = maxValue self.span = maxValue - minValue self.width = totalWidth - self.amount = 0 # When amount == max, we are 100% done + self.amount = 0 # When amount == max, we are 100% done self.update(0) # Build progress bar string def update(self, newAmount=0): @@ -40,20 +40,20 @@ def update(self, newAmount=0): # build a progress bar with hashes and spaces if allFull == numHashes: - self.progBar = "[" + '='*(numHashes) + "]" + self.progBar = "[" + "=" * (numHashes) + "]" else: - self.progBar = "[" + '='*(numHashes-1) + '>' + ' '*(allFull-numHashes) + "]" + self.progBar = "[" + "=" * (numHashes - 1) + ">" + " " * (allFull - numHashes) + "]" # figure out where to put the percentage, roughly centered percentPlace = (len(self.progBar) / 2) - len(str(percentDone)) percentString = str(percentDone) + "%" # slice the percentage into the bar - self.progBar = self.progBar[0:percentPlace] + percentString + self.progBar[percentPlace+len(percentString):] + self.progBar = self.progBar[0:percentPlace] + percentString + self.progBar[percentPlace + len(percentString) :] def update_and_print(self, newAmount=0, f=sys.stdout): self.update(newAmount) - print("\r", self, end=' ', file=f) + print("\r", self, end=" ", file=f) f.flush() def __str__(self): @@ -75,7 +75,7 @@ def iterprogress(sized_iterable): for i in range(1000): bar.update(i) - print("\r", bar, end=' ') + print("\r", bar, end=" ") sys.stdout.flush() print() diff --git a/lib/bx/gene_reader.py b/lib/bx/gene_reader.py index 06f87a8e..15cd36a9 100644 --- a/lib/bx/gene_reader.py +++ b/lib/bx/gene_reader.py @@ -19,15 +19,15 @@ ) -def GeneReader(fh, format='gff'): - """ yield chrom, strand, gene_exons, name """ +def GeneReader(fh, format="gff"): + """yield chrom, strand, gene_exons, name""" - known_formats = ('gff', 'gtf', 'bed') + known_formats = ("gff", "gtf", "bed") if format not in known_formats: - print('{} format not in {}'.format(format, ",".join(known_formats)), file=sys.stderr) - raise Exception('?') + print("{} format not in {}".format(format, ",".join(known_formats)), file=sys.stderr) + raise Exception("?") - if format == 'bed': + if format == "bed": for line in fh: f = line.strip().split() chrom = f[0] @@ -37,23 +37,23 @@ def GeneReader(fh, format='gff'): int(f[6]) # cdsStart int(f[7]) # cdsEnd int(f[9]) # blockCount - blockSizes = [int(i) for i in f[10].strip(',').split(',')] - blockStarts = [chrom_start + int(i) for i in f[11].strip(',').split(',')] + blockSizes = [int(i) for i in f[10].strip(",").split(",")] + blockStarts = [chrom_start + int(i) for i in f[11].strip(",").split(",")] # grab cdsStart - cdsEnd gene_exons = [] for base, offset in zip(blockStarts, blockSizes): exon_start = base - exon_end = base+offset + exon_end = base + offset gene_exons.append((exon_start, exon_end)) yield chrom, strand, gene_exons, name genelist = {} grouplist = [] - if format == 'gff' or format == 'gtf': + if format == "gff" or format == "gtf": for line in fh: - if line.startswith('#'): + if line.startswith("#"): continue - fields = line.strip().split('\t') + fields = line.strip().split("\t") if len(fields) < 9: continue @@ -64,8 +64,8 @@ def GeneReader(fh, format='gff'): ex_end = int(fields[4]) # + 1 # make exclusive strand = fields[6] - if format == 'gtf': - group = fields[8].split(';')[0] + if format == "gtf": + group = fields[8].split(";")[0] else: group = fields[8] @@ -83,15 +83,15 @@ def GeneReader(fh, format='gff'): yield chrom, strand, gene_exons, gene -def CDSReader(fh, format='gff'): - """ yield chrom, strand, cds_exons, name """ +def CDSReader(fh, format="gff"): + """yield chrom, strand, cds_exons, name""" - known_formats = ('gff', 'gtf', 'bed') + known_formats = ("gff", "gtf", "bed") if format not in known_formats: - print('{} format not in {}'.format(format, ",".join(known_formats)), file=sys.stderr) - raise Exception('?') + print("{} format not in {}".format(format, ",".join(known_formats)), file=sys.stderr) + raise Exception("?") - if format == 'bed': + if format == "bed": for line in fh: f = line.strip().split() chrom = f[0] @@ -101,8 +101,8 @@ def CDSReader(fh, format='gff'): cdsStart = int(f[6]) cdsEnd = int(f[7]) int(f[9]) # blockCount - blockSizes = [int(i) for i in f[10].strip(',').split(',')] - blockStarts = [chrom_start + int(i) for i in f[11].strip(',').split(',')] + blockSizes = [int(i) for i in f[10].strip(",").split(",")] + blockStarts = [chrom_start + int(i) for i in f[11].strip(",").split(",")] # grab cdsStart - cdsEnd cds_exons = [] @@ -112,20 +112,20 @@ def CDSReader(fh, format='gff'): if base > cdsEnd: continue exon_start = max(base, cdsStart) - exon_end = min(base+offset, cdsEnd) + exon_end = min(base + offset, cdsEnd) cds_exons.append((exon_start, exon_end)) yield chrom, strand, cds_exons, name genelist = {} grouplist = [] - if format == 'gff' or format == 'gtf': + if format == "gff" or format == "gtf": for line in fh: - if line.startswith('#'): + if line.startswith("#"): continue - fields = line.strip().split('\t') + fields = line.strip().split("\t") if len(fields) < 9: continue - if fields[2] not in ('CDS', 'stop_codon', 'start_codon'): + if fields[2] not in ("CDS", "stop_codon", "start_codon"): continue # fields @@ -135,8 +135,8 @@ def CDSReader(fh, format='gff'): ex_end = int(fields[4]) # + 1 # make exclusive strand = fields[6] - if format == 'gtf': - group = fields[8].split(';')[0] + if format == "gtf": + group = fields[8].split(";")[0] else: group = fields[8] @@ -150,10 +150,10 @@ def CDSReader(fh, format='gff'): # for gene in genelist.values(): for gene in grouplist: chrom, strand, cds_exons = genelist[gene] - seqlen = sum(a[1]-a[0] for a in cds_exons) + seqlen = sum(a[1] - a[0] for a in cds_exons) overhang = seqlen % 3 if overhang > 0: - if strand == '+': + if strand == "+": cds_exons[-1] = (cds_exons[-1][0], cds_exons[-1][1] - overhang) else: cds_exons[0] = (cds_exons[0][0] + overhang, cds_exons[0][1]) @@ -161,7 +161,7 @@ def CDSReader(fh, format='gff'): yield chrom, strand, cds_exons, gene -def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None): +def FeatureReader(fh, format="gff", alt_introns_subtract="exons", gtf_parse=None): """ yield chrom, strand, cds_exons, introns, exons, name @@ -172,12 +172,12 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None for chrom, strand, cds_exons, introns, exons, name in FeatureReader( sys.stdin, format='gtf', gtf_parse=gene_name ) """ - known_formats = ('gff', 'gtf', 'bed') + known_formats = ("gff", "gtf", "bed") if format not in known_formats: - print('{} format not in {}'.format(format, ",".join(known_formats)), file=sys.stderr) - raise Exception('?') + print("{} format not in {}".format(format, ",".join(known_formats)), file=sys.stderr) + raise Exception("?") - if format == 'bed': + if format == "bed": for line in fh: f = line.strip().split() chrom = f[0] @@ -187,8 +187,8 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None cdsStart = int(f[6]) cdsEnd = int(f[7]) int(f[9]) # blockCount - blockSizes = [int(i) for i in f[10].strip(',').split(',')] - blockStarts = [chrom_start + int(i) for i in f[11].strip(',').split(',')] + blockSizes = [int(i) for i in f[10].strip(",").split(",")] + blockStarts = [chrom_start + int(i) for i in f[11].strip(",").split(",")] # grab cdsStart - cdsEnd cds_exons = [] @@ -201,11 +201,11 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None continue # exons exon_start = base - exon_end = base+offset + exon_end = base + offset exons.append((exon_start, exon_end)) # cds exons exon_start = max(base, cdsStart) - exon_end = min(base+offset, cdsEnd) + exon_end = min(base + offset, cdsEnd) cds_exons.append((exon_start, exon_end)) cds_exons = bitset_union(cds_exons) exons = bitset_union(exons) @@ -214,11 +214,11 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None genelist = {} grouplist = [] - if format == 'gff' or format == 'gtf': + if format == "gff" or format == "gtf": for line in fh: - if line.startswith('#'): + if line.startswith("#"): continue - fields = line.strip().split('\t') + fields = line.strip().split("\t") if len(fields) < 9: continue @@ -229,9 +229,9 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None ex_end = int(fields[4]) # + 1 # make exclusive strand = fields[6] - if format == 'gtf': + if format == "gtf": if not gtf_parse: - group = fields[8].split(';')[0] + group = fields[8].split(";")[0] else: group = gtf_parse(fields[8]) else: @@ -245,24 +245,24 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None # chrom, strand, cds_exons, introns, exons, cds_start, cds_end genelist[group] = [chrom, strand, [], [], [], None, None] - if fields[2] == 'exon': + if fields[2] == "exon": genelist[group][4].append((ex_st, ex_end)) - elif fields[2] in ('CDS', 'stop_codon', 'start_codon'): + elif fields[2] in ("CDS", "stop_codon", "start_codon"): genelist[group][2].append((ex_st, ex_end)) - if fields[2] == 'start_codon': - if strand == '+': + if fields[2] == "start_codon": + if strand == "+": genelist[group][5] = ex_st else: genelist[group][5] = ex_end - if fields[2] == 'stop_codon': - if strand == '+': + if fields[2] == "stop_codon": + if strand == "+": genelist[group][5] = ex_end else: genelist[group][5] = ex_st - elif fields[2] == 'intron': + elif fields[2] == "intron": genelist[group][3].append((ex_st, ex_end)) for gene in grouplist: @@ -273,25 +273,25 @@ def FeatureReader(fh, format='gff', alt_introns_subtract="exons", gtf_parse=None # assure that cds exons were within the cds range if cds_start is not None and cds_end is not None: - if strand == '+': + if strand == "+": cds_exons = bitset_intersect(cds_exons, [(cds_start, cds_end)]) else: cds_exons = bitset_intersect(cds_exons, [(cds_end, cds_start)]) # assure that introns are non-overlapping with themselves or exons if alt_introns_subtract: - if alt_introns_subtract == 'exons': + if alt_introns_subtract == "exons": introns = bitset_subtract(introns, exons) - if alt_introns_subtract == 'cds_exons': + if alt_introns_subtract == "cds_exons": introns = bitset_subtract(introns, cds_exons) else: introns = bitset_union(introns) # assure CDS is a multiple of 3, trim from last exon if necessary - seqlen = sum(a[1]-a[0] for a in cds_exons) + seqlen = sum(a[1] - a[0] for a in cds_exons) overhang = seqlen % 3 if overhang > 0: - if strand == '+': + if strand == "+": cds_exons[-1] = (cds_exons[-1][0], cds_exons[-1][1] - overhang) else: cds_exons[0] = (cds_exons[0][0] + overhang, cds_exons[0][1]) diff --git a/lib/bx/interval_index_file.py b/lib/bx/interval_index_file.py index dc5db458..1adab781 100644 --- a/lib/bx/interval_index_file.py +++ b/lib/bx/interval_index_file.py @@ -84,12 +84,12 @@ import sys from bisect import ( insort, - insort_right + insort_right, ) from struct import ( calcsize, pack, - unpack + unpack, ) from warnings import warn @@ -105,15 +105,15 @@ except ImportError: seeklzop = None -__all__ = ['Indexes', 'Index'] +__all__ = ["Indexes", "Index"] -MAGIC = 0x2cff800a +MAGIC = 0x2CFF800A VERSION = 2 # These three constants determine the structure of the default binning strategy -BIN_LEVELS = 6 # Number of levels of bins to build +BIN_LEVELS = 6 # Number of levels of bins to build BIN_FIRST_SHIFT = 17 # Number of bits for the bottom level bin -BIN_NEXT_SHIFT = 3 # Number of bits for each higher level bin +BIN_NEXT_SHIFT = 3 # Number of bits for each higher level bin # Build offset and max size arrays for each bin level BIN_OFFSETS = [1, 0] @@ -129,7 +129,7 @@ MIN = 0 OLD_MAX = 512 * 1024 * 1024 # Maximum size supported by versions < 2 DEFAULT_MAX = 512 * 1024 * 1024 # Default max size to use when none is passed -MAX = 2 ** 31 # Absolute max size (limited by file format) +MAX = 2**31 # Absolute max size (limited by file format) def offsets_for_max_size(max_size): @@ -141,7 +141,7 @@ def offsets_for_max_size(max_size): break else: raise Exception("%d is larger than the maximum possible size (%d)" % (max_size, BIN_OFFSETS_MAX[0])) - return BIN_OFFSETS[(len(BIN_OFFSETS) - i - 1):] + return BIN_OFFSETS[(len(BIN_OFFSETS) - i - 1) :] def bin_for_range(start, end, offsets=None): @@ -164,13 +164,14 @@ class AbstractMultiIndexedAccess: """ Allows accessing multiple indexes / files as if they were one """ + indexed_access_class = None def __init__(self, filenames, index_filenames=None, keep_open=False, use_cache=False, **kwargs): # TODO: Handle index_filenames argument self.indexes = [ - self.new_indexed_access(fname, keep_open=keep_open, use_cache=use_cache, **kwargs) - for fname in filenames] + self.new_indexed_access(fname, keep_open=keep_open, use_cache=use_cache, **kwargs) for fname in filenames + ] def new_indexed_access(self, data_filename, index_filename=None, keep_open=False, **kwargs): return self.indexed_access_class(data_filename, index_filename, keep_open, **kwargs) @@ -239,7 +240,7 @@ def close(self): def open_data(self): if self.file_type == "plain": - return open(self.data_filename, 'rb') + return open(self.data_filename, "rb") elif self.file_type == "bz2t": f = seekbzip2.SeekableBzip2File(self.data_filename, self.table_filename) if self.use_cache: @@ -251,9 +252,7 @@ def open_data(self): block_cache_size = 20 else: block_cache_size = 0 - f = seeklzop.SeekableLzopFile(self.data_filename, - self.table_filename, - block_cache_size=block_cache_size) + f = seeklzop.SeekableLzopFile(self.data_filename, self.table_filename, block_cache_size=block_cache_size) return f def get(self, src, start, end): @@ -299,7 +298,9 @@ def add(self, name, start, end, val, max=DEFAULT_MAX): def get(self, name): if self.indexes[name] is None: offset, value_size = self.offsets[name] - self.indexes[name] = Index(filename=self.filename, offset=offset, value_size=value_size, version=self.version) + self.indexes[name] = Index( + filename=self.filename, offset=offset, value_size=value_size, version=self.version + ) return self.indexes[name] def find(self, name, start, end): @@ -311,12 +312,16 @@ def find(self, name, start, end): def open(self, filename): self.filename = filename self.offsets = dict() # (will map key to (offset,value_size)) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: magic, version, length = read_packed(f, ">3I") if magic != MAGIC: raise Exception("File does not have expected header") if version > VERSION: - warn("File claims version %d, I don't known anything about versions beyond %d. Attempting to continue", version, VERSION) + warn( + "File claims version %d, I don't known anything about versions beyond %d. Attempting to continue", + version, + VERSION, + ) self.version = version for _ in range(length): key_len = read_packed(f, ">I") @@ -358,10 +363,9 @@ def write(self, f): class Index: - def __init__(self, min=MIN, max=DEFAULT_MAX, filename=None, offset=0, value_size=None, version=None): self._value_size = value_size - self.max_val = 1 # (1, rather than 0, to force value_size > 0) + self.max_val = 1 # (1, rather than 0, to force value_size > 0) if filename is None: self.new(min, max) else: @@ -372,6 +376,7 @@ def get_value_size(self): return self._value_size else: return round_up_to_4(bytes_of(self.max_val)) + value_size = property(fget=get_value_size) def new(self, min, max): @@ -391,7 +396,7 @@ def open(self, filename, offset, version): self.filename = filename self.offset = offset # Open the file and seek to where we expect our header - f = open(filename, 'rb') + f = open(filename, "rb") f.seek(offset) # Read min/max min, max = read_packed(f, ">2I") @@ -445,14 +450,14 @@ def load_bin(self, index): if self.bin_sizes[index] == 0: self.bins[index] = bin return - f = open(self.filename, 'rb') + f = open(self.filename, "rb") f.seek(self.bin_offsets[index]) # One big read for happy NFS item_size = self.value_size + calcsize(">2I") buffer = f.read(self.bin_sizes[index] * item_size) for i in range(self.bin_sizes[index]): - start, end = unpack(">2I", buffer[i*item_size:i*item_size+8]) - val = unpack_uints(buffer[i*item_size+8:(i+1)*item_size]) + start, end = unpack(">2I", buffer[i * item_size : i * item_size + 8]) + val = unpack_uints(buffer[i * item_size + 8 : (i + 1) * item_size]) bin.append((start, end, val)) self.bins[index] = bin f.close() @@ -507,7 +512,7 @@ def write_packed_uints(f, v, num_bytes): def unpack_uints(parts): - chunks = len(parts)/4 + chunks = len(parts) / 4 vals = unpack(">%dI" % chunks, parts) val = vals[0] for v in vals[1:]: diff --git a/lib/bx/interval_index_file_tests.py b/lib/bx/interval_index_file_tests.py index 60994763..0b94fb92 100644 --- a/lib/bx/interval_index_file_tests.py +++ b/lib/bx/interval_index_file_tests.py @@ -6,7 +6,13 @@ def test_offsets(): - assert interval_index_file.offsets_for_max_size(512*1024*1024 - 1) == [512 + 64 + 8 + 1, 64 + 8 + 1, 8 + 1, 1, 0] + assert interval_index_file.offsets_for_max_size(512 * 1024 * 1024 - 1) == [ + 512 + 64 + 8 + 1, + 64 + 8 + 1, + 8 + 1, + 1, + 0, + ] def test_interval_index_file(): diff --git a/lib/bx/intervals/cluster_tests.py b/lib/bx/intervals/cluster_tests.py index 65645450..2a51ffe3 100644 --- a/lib/bx/intervals/cluster_tests.py +++ b/lib/bx/intervals/cluster_tests.py @@ -1,6 +1,7 @@ import os import sys import unittest + try: sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) except Exception: @@ -53,10 +54,10 @@ def test_startbeforeend(self): def test_large_sorted(self): upto = 100000 - pairs = [(2*i + 1, 2*i + 2) for i in range(upto)] + pairs = [(2 * i + 1, 2 * i + 2) for i in range(upto)] self.insertpairs(pairs) - self.tree.insert(0, upto*3, upto) - self.assertEqual([(0, upto*3, [x for x in range(upto+1)])], self.tree.getregions()) + self.tree.insert(0, upto * 3, upto) + self.assertEqual([(0, upto * 3, [x for x in range(upto + 1)])], self.tree.getregions()) def test_minregions(self): self.tree = ClusterTree(0, 2) @@ -80,14 +81,45 @@ def test_merge_left_right(self): self.assertEqual([(1, 2, [3]), (3, 8, [0, 1, 4]), (9, 10, [2])], self.tree.getregions()) def test_larger(self): - pairs = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16), (17, 18), (19, 20), - (1, 3), (4, 10), (10, 15), (15, 20), (21, 22)] + pairs = [ + (1, 2), + (3, 4), + (5, 6), + (7, 8), + (9, 10), + (11, 12), + (13, 14), + (15, 16), + (17, 18), + (19, 20), + (1, 3), + (4, 10), + (10, 15), + (15, 20), + (21, 22), + ] self.insertpairs(pairs) self.assertEqual([(1, 20, [x for x in range(14)]), (21, 22, [14])], self.tree.getregions()) def test_another(self): - pairs = [(3, 4, 1), (13, 14, 6), (21, 22, 14), (5, 6, 2), (4, 10, 11), (1, 2, 0), (11, 12, 5), (1, 3, 10), (7, 8, 3), (15, 16, 7), (15, 20, 13), (19, 20, 9), (10, 15, 12), (17, 18, 8), (9, 10, 4)] + pairs = [ + (3, 4, 1), + (13, 14, 6), + (21, 22, 14), + (5, 6, 2), + (4, 10, 11), + (1, 2, 0), + (11, 12, 5), + (1, 3, 10), + (7, 8, 3), + (15, 16, 7), + (15, 20, 13), + (19, 20, 9), + (10, 15, 12), + (17, 18, 8), + (9, 10, 4), + ] # pairs = [(3, 4, 1), (13, 14, 6), (21, 22, 14), (5, 6, 2), (4, 10, 11), (1, 2, 0), (11, 12, 5), (1, 3, 10), (7, 8, 3), (15, 16, 7), (15, 20, 13), (19, 20, 9), (10, 15, 12), (9, 10, 4)] for s, e, i in pairs: self.tree.insert(s, e, i) @@ -101,5 +133,5 @@ def test_none(self): self.assertEqual([], self.tree.getregions()) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/lib/bx/intervals/intersection.pyx b/lib/bx/intervals/intersection.pyx index 062c6a4d..2b0be0c1 100644 --- a/lib/bx/intervals/intersection.pyx +++ b/lib/bx/intervals/intersection.pyx @@ -21,6 +21,7 @@ preserves all information about the intervals (unlike bitset projection methods) import operator + cdef extern from "stdlib.h": int ceil(float f) float log(float f) diff --git a/lib/bx/intervals/intersection_tests.py b/lib/bx/intervals/intersection_tests.py index 3993ab11..059d87ef 100644 --- a/lib/bx/intervals/intersection_tests.py +++ b/lib/bx/intervals/intersection_tests.py @@ -7,13 +7,14 @@ except Exception: sys.path.insert(0, os.path.dirname(os.path.abspath("."))) -from bx.intervals.intersection import Interval -from bx.intervals.intersection import IntervalNode -from bx.intervals.intersection import IntervalTree +from bx.intervals.intersection import ( + Interval, + IntervalNode, + IntervalTree, +) class NeighborTestCase(unittest.TestCase): - def setUp(self): iv = IntervalNode(50, 59, Interval(50, 59)) for i in range(0, 110, 10): @@ -40,7 +41,7 @@ def test_right(self): self.assertEqual(str(iv.left(60, n=2)), str([Interval(50, 59), Interval(40, 49)])) def get_right_start(b10): - r = iv.right(b10+1, n=1) + r = iv.right(b10 + 1, n=1) assert len(r) == 1 return r[0].start @@ -48,13 +49,12 @@ def get_right_start(b10): self.assertEqual(get_right_start(i), i + 10) for i in range(0, 100, 10): - r = iv.right(i-1, max_dist=10, n=1) + r = iv.right(i - 1, max_dist=10, n=1) print(r) self.assertEqual(r[0].start, i) class UpDownStreamTestCase(unittest.TestCase): - def setUp(self): iv = IntervalTree() iv.add_interval(Interval(50, 59)) @@ -102,7 +102,7 @@ def test_n(self): class LotsaTestCase(unittest.TestCase): - """ put lotsa data in the tree and make sure it works""" + """put lotsa data in the tree and make sure it works""" def setUp(self): iv = IntervalNode(1, 2, Interval(1, 2)) @@ -152,9 +152,7 @@ def test_find(self): results = iv.find(start, end) for feat in results: - self.assertTrue( - (feat.end >= start and feat.end <= end) - or (feat.start <= end and feat.start >= start)) + self.assertTrue((feat.end >= start and feat.end <= end) or (feat.start <= end and feat.start >= start)) class IntervalTreeTest(unittest.TestCase): @@ -163,13 +161,13 @@ def setUp(self): iv = IntervalTree() n = 0 for i in range(1, 1000, 80): - iv.insert(i, i + 10, dict(value=i*i)) + iv.insert(i, i + 10, dict(value=i * i)) # add is synonym for insert. - iv.add(i + 20, i + 30, dict(astr=str(i*i))) + iv.add(i + 20, i + 30, dict(astr=str(i * i))) # or insert/add an interval object with start, end attrs. - iv.insert_interval(Interval(i + 40, i + 50, value=dict(astr=str(i*i)))) - iv.add_interval(Interval(i + 60, i + 70, value=dict(astr=str(i*i)))) + iv.insert_interval(Interval(i + 40, i + 50, value=dict(astr=str(i * i)))) + iv.add_interval(Interval(i + 60, i + 70, value=dict(astr=str(i * i)))) n += 4 self.intervals = self.iv = iv @@ -199,7 +197,6 @@ def test_empty(self): self.assertEqual(None, iv.traverse(lambda x: x.append(1))) def test_public_interval(self): - def fn(ival): return self.assertTrue(ival.interval) diff --git a/lib/bx/intervals/io.py b/lib/bx/intervals/io.py index ffd2feda..e41f6a9a 100644 --- a/lib/bx/intervals/io.py +++ b/lib/bx/intervals/io.py @@ -4,7 +4,7 @@ from bx.bitset import ( BinnedBitSet, - MAX + MAX, ) from bx.tabular.io import ( ParseError, @@ -98,7 +98,9 @@ def __str__(self): return "\t".join(self.fields) def copy(self): - return GenomicInterval(self.reader, list(self.fields), self.chrom_col, self.start_col, self.end_col, self.strand_col, self.strand) + return GenomicInterval( + self.reader, list(self.fields), self.chrom_col, self.start_col, self.end_col, self.strand_col, self.strand + ) class GenomicIntervalReader(TableReader): @@ -133,8 +135,21 @@ class GenomicIntervalReader(TableReader): >>> assert isinstance(elements[4], GenomicInterval) """ - def __init__(self, input, chrom_col=0, start_col=1, end_col=2, strand_col=5, - default_strand="+", return_header=True, return_comments=True, force_header=None, fix_strand=False, comment_lines_startswith=None, allow_spaces=False): + def __init__( + self, + input, + chrom_col=0, + start_col=1, + end_col=2, + strand_col=5, + default_strand="+", + return_header=True, + return_comments=True, + force_header=None, + fix_strand=False, + comment_lines_startswith=None, + allow_spaces=False, + ): if comment_lines_startswith is None: comment_lines_startswith = ["#", "track "] TableReader.__init__(self, input, return_header, return_comments, force_header, comment_lines_startswith) @@ -156,9 +171,15 @@ def parse_row(self, line): for i, sep in enumerate(seps): try: return GenomicInterval( - self, line.split(sep), self.chrom_col, self.start_col, - self.end_col, self.strand_col, self.default_strand, - fix_strand=self.fix_strand) + self, + line.split(sep), + self.chrom_col, + self.start_col, + self.end_col, + self.strand_col, + self.default_strand, + fix_strand=self.fix_strand, + ) except Exception as e: # Catch and store the initial error if i == 0: @@ -191,7 +212,7 @@ def binned_bitsets(self, upstream_pad=0, downstream_pad=0, lens=None): last_bitset = bitsets[chrom] start = max(int(interval[self.start_col]), 0) end = min(int(interval[self.end_col]), last_bitset.size) - last_bitset.set_range(start, end-start) + last_bitset.set_range(start, end - start) return bitsets @@ -249,7 +270,14 @@ def __init__(self, reader, lens=None): # It is assumed that the reader is an interval reader, i.e. it has chr_col, start_col, end_col and strand_col attributes. if lens is None: lens = {} - NiceReaderWrapper.__init__(self, reader.input, chrom_col=reader.chrom_col, start_col=reader.start_col, end_col=reader.end_col, strand_col=reader.strand_col) + NiceReaderWrapper.__init__( + self, + reader.input, + chrom_col=reader.chrom_col, + start_col=reader.start_col, + end_col=reader.end_col, + strand_col=reader.strand_col, + ) self.lens = lens def __next__(self): diff --git a/lib/bx/intervals/operations/__init__.py b/lib/bx/intervals/operations/__init__.py index d6ca0da6..6b352a31 100644 --- a/lib/bx/intervals/operations/__init__.py +++ b/lib/bx/intervals/operations/__init__.py @@ -4,7 +4,7 @@ """ BED_DEFAULT_COLS = 0, 1, 2, 5 -MAX_END = 512*1024*1024 +MAX_END = 512 * 1024 * 1024 def bits_set_in_range(bits, range_start, range_end): diff --git a/lib/bx/intervals/operations/complement.py b/lib/bx/intervals/operations/complement.py index cb2b3a20..6a161243 100644 --- a/lib/bx/intervals/operations/complement.py +++ b/lib/bx/intervals/operations/complement.py @@ -5,7 +5,7 @@ from bx.bitset import MAX from bx.intervals.io import ( BitsetSafeReaderWrapper, - GenomicInterval + GenomicInterval, ) from bx.intervals.operations import bits_set_in_range @@ -25,20 +25,35 @@ def complement(reader, lens): try: # Write the intervals for start, end in out_intervals: - fields = ["." for x in range(max(complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col)+1)] + fields = [ + "." + for x in range( + max(complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col) + 1 + ) + ] # default the column to a + if it exists if complement_reader.strand_col < len(fields) and complement_reader.strand_col >= 0: fields[complement_reader.strand_col] = "+" fields[complement_reader.chrom_col] = chrom fields[complement_reader.start_col] = start fields[complement_reader.end_col] = end - new_interval = GenomicInterval(complement_reader, fields, complement_reader.chrom_col, complement_reader.start_col, complement_reader.end_col, complement_reader.strand_col, "+") + new_interval = GenomicInterval( + complement_reader, + fields, + complement_reader.chrom_col, + complement_reader.start_col, + complement_reader.end_col, + complement_reader.strand_col, + "+", + ) yield new_interval except IndexError as e: complement_reader.skipped += 1 # no reason to stuff an entire bad file into memmory if complement_reader.skipped < 10: - complement_reader.skipped_lines.append((complement_reader.linenum, complement_reader.current_line, str(e))) + complement_reader.skipped_lines.append( + (complement_reader.linenum, complement_reader.current_line, str(e)) + ) continue diff --git a/lib/bx/intervals/operations/coverage.py b/lib/bx/intervals/operations/coverage.py index 4799f397..83a9c1a6 100644 --- a/lib/bx/intervals/operations/coverage.py +++ b/lib/bx/intervals/operations/coverage.py @@ -10,7 +10,7 @@ ) from bx.tabular.io import ( Comment, - Header + Header, ) @@ -47,7 +47,9 @@ def coverage(readers, comments=True): primary.skipped += 1 # no reason to stuff an entire bad file into memmory if primary.skipped < 10: - primary.skipped_lines.append((primary.linenum, primary.current_line, "Interval start after end!")) + primary.skipped_lines.append( + (primary.linenum, primary.current_line, "Interval start after end!") + ) except Exception: pass continue @@ -56,7 +58,7 @@ def coverage(readers, comments=True): percent = 0.0 else: try: - bases_covered = bitsets[chrom].count_range(start, end-start) + bases_covered = bitsets[chrom].count_range(start, end - start) except IndexError as e: try: # This will only work if primary is a NiceReaderWrapper diff --git a/lib/bx/intervals/operations/find_clusters.py b/lib/bx/intervals/operations/find_clusters.py index eccfad02..29406465 100644 --- a/lib/bx/intervals/operations/find_clusters.py +++ b/lib/bx/intervals/operations/find_clusters.py @@ -50,7 +50,7 @@ def __init__(self, start, end, linenum, mincols, minregions): # uniform into a binomial because it naturally scales with # tree size. Also, python's uniform is perfect since the # upper limit is not inclusive, which gives us undefined here. - self.priority = math.ceil((-1.0 / math.log(.5)) * math.log(-1.0 / (random.uniform(0, 1) - 1))) + self.priority = math.ceil((-1.0 / math.log(0.5)) * math.log(-1.0 / (random.uniform(0, 1) - 1))) self.start = start self.end = end self.left = None diff --git a/lib/bx/intervals/operations/intersect.py b/lib/bx/intervals/operations/intersect.py index 7b413f1e..6832cccf 100644 --- a/lib/bx/intervals/operations/intersect.py +++ b/lib/bx/intervals/operations/intersect.py @@ -7,7 +7,7 @@ from bx.intervals.io import ( BitsetSafeReaderWrapper, - GenomicInterval + GenomicInterval, ) from bx.intervals.operations import bits_set_in_range from bx.tabular.io import ( @@ -52,14 +52,16 @@ def intersect(readers, mincols=1, upstream_pad=0, downstream_pad=0, pieces=True, primary.skipped += 1 # no reason to stuff an entire bad file into memmory if primary.skipped < 10: - primary.skipped_lines.append((primary.linenum, primary.current_line, "Interval start after end!")) + primary.skipped_lines.append( + (primary.linenum, primary.current_line, "Interval start after end!") + ) except Exception: pass continue out_intervals = [] # Intersect or Overlap try: - if bitsets[chrom].count_range(start, end-start) >= mincols: + if bitsets[chrom].count_range(start, end - start) >= mincols: if pieces: out_intervals = bits_set_in_range(bitsets[chrom], start, end) else: diff --git a/lib/bx/intervals/operations/join.py b/lib/bx/intervals/operations/join.py index 6044864d..2112d908 100644 --- a/lib/bx/intervals/operations/join.py +++ b/lib/bx/intervals/operations/join.py @@ -32,14 +32,20 @@ def join(leftSet, rightSet, mincols=1, leftfill=True, rightfill=True): rightTree.intersect(interval, lambda node: result.append(node)) overlap_not_met = 0 for item in result: - if item.start in range(interval.start, interval.end+1) and item.end not in range(interval.start, interval.end+1): - overlap = interval.end-item.start - elif item.end in range(interval.start, interval.end+1) and item.start not in range(interval.start, interval.end+1): - overlap = item.end-interval.start - elif item.start in range(interval.start, interval.end+1) and item.end in range(interval.start, interval.end+1): - overlap = item.end-item.start + if item.start in range(interval.start, interval.end + 1) and item.end not in range( + interval.start, interval.end + 1 + ): + overlap = interval.end - item.start + elif item.end in range(interval.start, interval.end + 1) and item.start not in range( + interval.start, interval.end + 1 + ): + overlap = item.end - interval.start + elif item.start in range(interval.start, interval.end + 1) and item.end in range( + interval.start, interval.end + 1 + ): + overlap = item.end - item.start else: # the intersecting item's start and end are outside the interval range - overlap = interval.end-interval.start + overlap = interval.end - interval.start if overlap < mincols: overlap_not_met += 1 continue @@ -54,9 +60,11 @@ def join(leftSet, rightSet, mincols=1, leftfill=True, rightfill=True): yield outfields if leftfill: + def report_unvisited(node, results): if not hasattr(node, "visited"): results.append(node) + results = [] rightTree.traverse(lambda x: report_unvisited(x, results)) for item in results: @@ -120,9 +128,9 @@ def findintersect(interval, sortedlist, mincols): lowerbound = x upperbound = x - while (lowerbound > -1) and (findoverlap(sortedlist[lowerbound-1][0], interval) >= mincols): + while (lowerbound > -1) and (findoverlap(sortedlist[lowerbound - 1][0], interval) >= mincols): lowerbound -= 1 - while (upperbound+1 < len(sortedlist)) and (findoverlap(sortedlist[upperbound+1][0], interval) >= mincols): + while (upperbound + 1 < len(sortedlist)) and (findoverlap(sortedlist[upperbound + 1][0], interval) >= mincols): upperbound += 1 return lowerbound, upperbound diff --git a/lib/bx/intervals/operations/merge.py b/lib/bx/intervals/operations/merge.py index 404ad0e1..d94202c3 100644 --- a/lib/bx/intervals/operations/merge.py +++ b/lib/bx/intervals/operations/merge.py @@ -6,7 +6,7 @@ from bx.intervals.io import BitsetSafeReaderWrapper from bx.intervals.operations import ( bits_set_in_range, - MAX_END + MAX_END, ) diff --git a/lib/bx/intervals/operations/quicksect.py b/lib/bx/intervals/operations/quicksect.py index 0a3d49d4..65abfd6b 100644 --- a/lib/bx/intervals/operations/quicksect.py +++ b/lib/bx/intervals/operations/quicksect.py @@ -44,7 +44,7 @@ def __init__(self, start, end, linenum=0, other=None): # uniform into a binomial because it naturally scales with # tree size. Also, python's uniform is perfect since the # upper limit is not inclusive, which gives us undefined here. - self.priority = math.ceil((-1.0 / math.log(.5)) * math.log(-1.0 / (random.uniform(0, 1) - 1))) + self.priority = math.ceil((-1.0 / math.log(0.5)) * math.log(-1.0 / (random.uniform(0, 1) - 1))) self.start = start self.end = end self.maxend = self.end diff --git a/lib/bx/intervals/operations/subtract.py b/lib/bx/intervals/operations/subtract.py index 8eb499ca..47ee7be8 100644 --- a/lib/bx/intervals/operations/subtract.py +++ b/lib/bx/intervals/operations/subtract.py @@ -10,7 +10,7 @@ from bx.intervals.io import ( BitsetSafeReaderWrapper, - GenomicInterval + GenomicInterval, ) from bx.intervals.operations import bits_clear_in_range from bx.tabular.io import ( @@ -56,7 +56,7 @@ def subtract(readers, mincols=1, upstream_pad=0, downstream_pad=0, pieces=True, # Find the intervals that meet the criteria (for the three sensible # permutations of reverse and pieces) try: - if bitsets[chrom].count_range(start, end-start) >= mincols: + if bitsets[chrom].count_range(start, end - start) >= mincols: if pieces: out_intervals = bits_clear_in_range(bitsets[chrom], start, end) else: diff --git a/lib/bx/intervals/random_intervals.py b/lib/bx/intervals/random_intervals.py index bd5f343c..30ac9668 100644 --- a/lib/bx/intervals/random_intervals.py +++ b/lib/bx/intervals/random_intervals.py @@ -6,7 +6,7 @@ from bx.bitset import BitSet -random = __import__('random') +random = __import__("random") class MaxtriesException(Exception): @@ -52,8 +52,8 @@ def throw_random_gap_list(lengths, mask, save_interval_func, allow_overlap=False if start == mask.size: break end = mask.next_set(start) - if end-start >= min_length: - gaps.append((end-start, start, None)) + if end - start >= min_length: + gaps.append((end - start, start, None)) # Sort (long regions first) gaps.sort() gaps.reverse() @@ -82,11 +82,11 @@ def throw_random_intervals(lengths, regions, save_interval_func=None, allow_over region with start and end modified. """ # Copy regions - regions = sorted((x[1]-x[0], x[0], x) for x in regions) + regions = sorted((x[1] - x[0], x[0], x) for x in regions) # Sort (long regions first) regions.reverse() # Throw - if (save_interval_func is not None): + if save_interval_func is not None: throw_random_private(lengths, regions, save_interval_func, allow_overlap) return else: @@ -181,7 +181,8 @@ def throw_random_private(lengths, regions, save_interval_func, allow_overlap=Fal if candidates == 0: raise MaxtriesException( "No region can fit an interval of length %d (we threw %d of %d)" - % (length, num_thrown, len(lengths))) + % (length, num_thrown, len(lengths)) + ) hi_rgn -= 1 # Select a candidate s = random.randrange(candidates) @@ -195,11 +196,11 @@ def throw_random_private(lengths, regions, save_interval_func, allow_overlap=Fal lo = 0 hi = hi_rgn while hi > lo: - mid = (lo + hi + 1) / 2 # (we round up to prevent infinite loop) + mid = (lo + hi + 1) / 2 # (we round up to prevent infinite loop) if s < cc[mid]: - hi = mid-1 # (s < num candidates from 0..mid-1) + hi = mid - 1 # (s < num candidates from 0..mid-1) else: - lo = mid # (s >= num candidates from 0..mid-1) + lo = mid # (s >= num candidates from 0..mid-1) s -= cc[lo] # If we are not allowing overlaps we will remove the placed interval # from the region list @@ -210,7 +211,13 @@ def throw_random_private(lengths, regions, save_interval_func, allow_overlap=Fal rgn_length, rgn_start, rgn_extra = regions.pop(lo) rgn_end = rgn_start + rgn_length assert s >= 0 - assert rgn_start + s + length <= rgn_end, "Expected: %d + %d + %d == %d <= %d" % (rgn_start, s, length, rgn_start + s + length, rgn_end) + assert rgn_start + s + length <= rgn_end, "Expected: %d + %d + %d == %d <= %d" % ( + rgn_start, + s, + length, + rgn_start + s + length, + rgn_end, + ) regions.reverse() if s >= min_length: bisect.insort(regions, (s, rgn_start, rgn_extra)) @@ -219,7 +226,7 @@ def throw_random_private(lengths, regions, save_interval_func, allow_overlap=Fal regions.reverse() prev_length = None # (force cc array construction) # Save the new interval - if (three_args): + if three_args: save_interval_func(rgn_start + s, rgn_start + s + length, rgn_extra) else: save_interval_func(rgn_start + s, rgn_start + s + length) diff --git a/lib/bx/intseq/ngramcount.pyx b/lib/bx/intseq/ngramcount.pyx index 50b12d4d..dc2e5c58 100644 --- a/lib/bx/intseq/ngramcount.pyx +++ b/lib/bx/intseq/ngramcount.pyx @@ -4,6 +4,7 @@ Tools for counting words (n-grams) in integer sequences. import numpy + cdef extern from "Python.h": ctypedef int Py_intptr_t diff --git a/lib/bx/misc/__init__.py b/lib/bx/misc/__init__.py index 0f701f26..93316314 100644 --- a/lib/bx/misc/__init__.py +++ b/lib/bx/misc/__init__.py @@ -6,7 +6,7 @@ import gzip -def open_compressed(filename, mode='r'): +def open_compressed(filename, mode="r"): if filename.endswith(".bz2"): return bz2.BZ2File(filename, mode) elif filename.endswith(".gz"): diff --git a/lib/bx/misc/_seekbzip2.pyx b/lib/bx/misc/_seekbzip2.pyx index 27bf970f..c71c92a5 100644 --- a/lib/bx/misc/_seekbzip2.pyx +++ b/lib/bx/misc/_seekbzip2.pyx @@ -32,9 +32,10 @@ cdef extern from "unistd.h": cdef extern from "stdlib.h": void free( void *ptr ) -import sys import os - +import sys + + cdef class SeekBzip2: cdef bunzip_data * bd diff --git a/lib/bx/misc/binary_file.py b/lib/bx/misc/binary_file.py index 37ccab38..bce48007 100644 --- a/lib/bx/misc/binary_file.py +++ b/lib/bx/misc/binary_file.py @@ -13,6 +13,7 @@ def bytesify(s): else: return s.encode() + # Standard size: # short is 8 bits # int and long are 32 bits @@ -44,14 +45,15 @@ def __init__(self, file, magic=None, is_little_endian=False): else: raise BadMagicNumber( "File does not have expected magic number: %x != %x or %x" - % (magic, struct.unpack(">I", bytes)[0], struct.unpack("I", bytes)[0], struct.unpack("> 8) + start = hash >> 8 for i in range(subtable_size): offset = subtable_offset + ((start + i) % subtable_size) * 8 self.io.seek(offset) diff --git a/lib/bx/misc/cdb_tests.py b/lib/bx/misc/cdb_tests.py index 35b0b45a..b6cd86a9 100644 --- a/lib/bx/misc/cdb_tests.py +++ b/lib/bx/misc/cdb_tests.py @@ -7,7 +7,7 @@ def test(): d = {} for i in range(10000): - d['foo' + str(i)] = 'bar' + str(i) + d["foo" + str(i)] = "bar" + str(i) # Open temporary file and get name file = NamedTemporaryFile() @@ -18,14 +18,14 @@ def test(): file.flush() # Open on disk - file2 = open(file_name, 'rb') + file2 = open(file_name, "rb") cdb = FileCDBDict(file2) for key, value in d.items(): assert cdb[key] == value try: - cdb['notin'] + cdb["notin"] assert False, "KeyError was not raised" except KeyError: pass diff --git a/lib/bx/misc/filecache.py b/lib/bx/misc/filecache.py index 955a34be..efa6eb25 100644 --- a/lib/bx/misc/filecache.py +++ b/lib/bx/misc/filecache.py @@ -3,7 +3,7 @@ from bx_extras.lrucache import LRUCache DEFAULT_CACHE_SIZE = 10 -DEFAULT_BLOCK_SIZE = 1024*1024*2 +DEFAULT_BLOCK_SIZE = 1024 * 1024 * 2 class FileCache: @@ -90,7 +90,7 @@ def readline(self): while True: line = self.current_block.readline() rval.append(line) - if len(line) > 0 and line[-1] == b'\n': + if len(line) > 0 and line[-1] == b"\n": break elif self.current_block_index == self.nblocks - 1: self.at_eof = True diff --git a/lib/bx/misc/readlengths.py b/lib/bx/misc/readlengths.py index f60979d2..b6851f27 100644 --- a/lib/bx/misc/readlengths.py +++ b/lib/bx/misc/readlengths.py @@ -14,7 +14,7 @@ def read_lengths_file(name): f = open(name) for line in f: line = line.strip() - if line == '' or line[0] == '#': + if line == "" or line[0] == "#": continue try: fields = line.split() diff --git a/lib/bx/misc/seekbzip2.py b/lib/bx/misc/seekbzip2.py index 994d9df0..6f4b6b52 100644 --- a/lib/bx/misc/seekbzip2.py +++ b/lib/bx/misc/seekbzip2.py @@ -67,7 +67,7 @@ def read(self, sizehint=-1): if sizehint < 0: chunks = [] while True: - val = self._read(1024*1024) + val = self._read(1024 * 1024) if val: chunks.append(val) else: diff --git a/lib/bx/misc/seekbzip2_tests.py b/lib/bx/misc/seekbzip2_tests.py index 618eb8fc..550b7cd0 100644 --- a/lib/bx/misc/seekbzip2_tests.py +++ b/lib/bx/misc/seekbzip2_tests.py @@ -25,7 +25,7 @@ def test_linear_reading(): chunk = 1221 pos = 0 for i in range((len(raw_data) // chunk) + 1): - a = raw_data[pos:pos+chunk] + a = raw_data[pos : pos + chunk] b = f.read(chunk) assert a == b pos += chunk @@ -41,13 +41,14 @@ def test_random_seeking(): f.seek(seek_to) a = f.read(chunk) - b = raw_data[seek_to: seek_to + chunk] + b = raw_data[seek_to : seek_to + chunk] assert a == b, "'%s' != '%s' on %dth attempt" % (encode(a, "hex"), encode(b, "hex"), i) assert f.tell() == min(seek_to + chunk, len(raw_data)) f.close() + if T and os.path.exists(T): def test_text_reading(): diff --git a/lib/bx/misc/seeklzop.py b/lib/bx/misc/seeklzop.py index 7124f5b8..ff44e58e 100644 --- a/lib/bx/misc/seeklzop.py +++ b/lib/bx/misc/seeklzop.py @@ -63,7 +63,7 @@ def load_block(self, index): self.file.seek(offset) data = self.file.read(csize) # Need to prepend a header for python-lzo module (silly) - data = b''.join((b'\xf0', struct.pack("!I", size), data)) + data = b"".join((b"\xf0", struct.pack("!I", size), data)) value = lzo.decompress(data) if self.cache is not None: self.cache[index] = value @@ -112,7 +112,7 @@ def read(self, sizehint=-1): if sizehint < 0: chunks = [] while True: - val = self._read(1024*1024) + val = self._read(1024 * 1024) if val: chunks.append(val) else: @@ -124,7 +124,7 @@ def read(self, sizehint=-1): def _read(self, size): if self.dirty: self.fix_dirty() - val = b'' + val = b"" while size: part = self.current_block.read(size) size -= len(part) @@ -149,7 +149,7 @@ def readline(self): line = self.current_block.readline() self.file_pos += len(line) rval.append(line) - if len(line) > 0 and line[-1] == b'\n': + if len(line) > 0 and line[-1] == b"\n": break elif self.current_block_index == self.nblocks - 1: self.at_eof = True diff --git a/lib/bx/motif/_pwm.pyx b/lib/bx/motif/_pwm.pyx index 09c387fb..ba831b8d 100644 --- a/lib/bx/motif/_pwm.pyx +++ b/lib/bx/motif/_pwm.pyx @@ -4,6 +4,7 @@ Extensions used by the `pwm` module. from cpython.version cimport PY_MAJOR_VERSION + cdef extern from "Python.h": int PyBytes_AsStringAndSize(object obj, char **buffer, Py_ssize_t* length) except -1 diff --git a/lib/bx/motif/io/transfac.py b/lib/bx/motif/io/transfac.py index a08f1df5..f412cbee 100644 --- a/lib/bx/motif/io/transfac.py +++ b/lib/bx/motif/io/transfac.py @@ -6,7 +6,6 @@ class TransfacMotif: - def __init__(self): self.accession = None self.id = None @@ -34,7 +33,7 @@ def __init__(self): # For CREAD format files "TY": ("store_single", "type"), "AT": ("store_single_key_value", "attributes"), - "BS": ("store_single_list", "sites") + "BS": ("store_single_list", "sites"), } @@ -129,7 +128,7 @@ def parse_record(self, lines): # Add a single line value to a dictionary if action[0] == "store_single_key_value": key = action[1] - k, v = rest.strip().split('=', 1) + k, v = rest.strip().split("=", 1) if not getattr(motif, key): setattr(motif, key, {}) getattr(motif, key)[k] = v @@ -220,6 +219,13 @@ def write(self, motif): matrix = getattr(motif, key) print(prefix, " ", " ".join(s.rjust(6) for s in matrix.alphabet), file=output) for i in range(matrix.width): - print("%02d" % (i + 1), " ", " ".join(str(matrix.values[i, matrix.char_to_index[ord(s)]]).rjust(6) for s in matrix.alphabet), file=output) + print( + "%02d" % (i + 1), + " ", + " ".join( + str(matrix.values[i, matrix.char_to_index[ord(s)]]).rjust(6) for s in matrix.alphabet + ), + file=output, + ) print("XX", file=output) print("//") diff --git a/lib/bx/motif/io/transfac_tests.py b/lib/bx/motif/io/transfac_tests.py index 6fbed820..1d7c290d 100644 --- a/lib/bx/motif/io/transfac_tests.py +++ b/lib/bx/motif/io/transfac_tests.py @@ -91,7 +91,7 @@ def test_reader(): # Single value parse assert motifs[1].accession == "M00002" # Value list parse - assert motifs[1].dates == ['19.10.92 (created); ewi.', '16.10.95 (updated); ewi.'] + assert motifs[1].dates == ["19.10.92 (created); ewi.", "16.10.95 (updated); ewi."] # Matrix parse - assert motifs[1].matrix.sorted_alphabet == ['A', 'C', 'G', 'T'] + assert motifs[1].matrix.sorted_alphabet == ["A", "C", "G", "T"] assert allclose(motifs[1].matrix.values[0], [400, 400, 300, 0]) diff --git a/lib/bx/motif/logo/__init__.py b/lib/bx/motif/logo/__init__.py index ceae03b0..94ff37bf 100644 --- a/lib/bx/motif/logo/__init__.py +++ b/lib/bx/motif/logo/__init__.py @@ -6,17 +6,17 @@ ceil, log2, transpose, - where + where, ) PAD = 2 # Colors from rgb.txt, DNA_DEFAULT_COLORS = { - 'A': "0.00 1.00 0.00", # green - 'C': "0.00 0.00 1.00", # blue - 'G': "1.00 0.65 0.00", # orange red - 'T': "1.00 0.00 0.00" # red + "A": "0.00 1.00 0.00", # green + "C": "0.00 0.00 1.00", # blue + "G": "1.00 0.65 0.00", # orange red + "T": "1.00 0.00 0.00", # red } # Template is adapted from Jim Kent's lib/dnaMotif.pss to support aritrary @@ -37,7 +37,7 @@ def freqs_to_heights(matrix): # Ensure normalized f = f / sum(f, axis=0) # Shannon entropy (the where replaces 0 with 1 so that '0 log 0 == 0') - H = - sum(f * log2(where(f, f, 1)), axis=0) + H = -sum(f * log2(where(f, f, 1)), axis=0) # Height return transpose(f * (log2(n) - H)) @@ -52,19 +52,21 @@ def eps_logo(matrix, base_width, height, colors=DNA_DEFAULT_COLORS): alphabet = matrix.sorted_alphabet rval = StringIO() # Read header ans substitute in width / height - template_path = os.path.join(os.path.dirname(__file__), 'template.ps') + template_path = os.path.join(os.path.dirname(__file__), "template.ps") with open(template_path) as fh: template_str = fh.read() header = Template(template_str) - rval.write(header.substitute( - bounding_box_width=ceil(base_width * matrix.width) + PAD, - bounding_box_height=ceil(height) + PAD)) + rval.write( + header.substitute( + bounding_box_width=ceil(base_width * matrix.width) + PAD, bounding_box_height=ceil(height) + PAD + ) + ) # Determine heights heights = freqs_to_heights(matrix) height_scale = height / log2(len(alphabet)) # Draw each "row" of the matrix for i, row in enumerate(heights): - x = (i * base_width) + x = i * base_width y = 0 for j, base_height in enumerate(row): char = alphabet[j] @@ -72,7 +74,7 @@ def eps_logo(matrix, base_width, height, colors=DNA_DEFAULT_COLORS): # print matrix.alphabet[j], base_height, height_scale, page_height if page_height > 1: # Draw letter - rval.write("%s setrgbcolor\n" % colors.get(char, '0 0 0')) + rval.write("%s setrgbcolor\n" % colors.get(char, "0 0 0")) rval.write("%3.2f " % x) rval.write("%3.2f " % y) rval.write("%3.2f " % (x + base_width)) diff --git a/lib/bx/motif/pwm.py b/lib/bx/motif/pwm.py index f53179fd..e9147e6a 100644 --- a/lib/bx/motif/pwm.py +++ b/lib/bx/motif/pwm.py @@ -12,7 +12,7 @@ nan, newaxis, ones, - zeros + zeros, ) from . import _pwm diff --git a/lib/bx/motif/pwm_tests.py b/lib/bx/motif/pwm_tests.py index d50af218..b07685f0 100644 --- a/lib/bx/motif/pwm_tests.py +++ b/lib/bx/motif/pwm_tests.py @@ -1,28 +1,28 @@ from numpy import ( allclose, - isnan + isnan, ) from . import pwm def test_create(): - m = pwm.FrequencyMatrix.from_rows(['A', 'C', 'G', 'T'], get_ctcf_rows()) + m = pwm.FrequencyMatrix.from_rows(["A", "C", "G", "T"], get_ctcf_rows()) # Alphabet sort - assert m.sorted_alphabet == ['A', 'C', 'G', 'T'] + assert m.sorted_alphabet == ["A", "C", "G", "T"] # Character to index mapping - assert m.char_to_index[ord('A')] == 0 - assert m.char_to_index[ord('C')] == 1 - assert m.char_to_index[ord('G')] == 2 - assert m.char_to_index[ord('T')] == 3 - assert m.char_to_index[ord('Q')] == -1 + assert m.char_to_index[ord("A")] == 0 + assert m.char_to_index[ord("C")] == 1 + assert m.char_to_index[ord("G")] == 2 + assert m.char_to_index[ord("T")] == 3 + assert m.char_to_index[ord("Q")] == -1 # Values assert allclose(m.values[0], [2620, 2052, 3013, 2314]) assert allclose(m.values[19], [3144, 3231, 3056, 567]) def test_scoring(): - m = pwm.FrequencyMatrix.from_rows(['A', 'C', 'G', 'T'], get_ctcf_rows()) + m = pwm.FrequencyMatrix.from_rows(["A", "C", "G", "T"], get_ctcf_rows()) # Stormo method sm = m.to_stormo_scoring_matrix() # Forward matches @@ -43,7 +43,7 @@ def test_scoring(): def test_scoring_with_gaps(): - m = pwm.FrequencyMatrix.from_rows(['A', 'C', 'G', 'T'], get_ctcf_rows()) + m = pwm.FrequencyMatrix.from_rows(["A", "C", "G", "T"], get_ctcf_rows()) # Stormo method sm = m.to_stormo_scoring_matrix() # Forward matches @@ -84,5 +84,5 @@ def get_ctcf_rows(): [3842, 0, 5545, 611], [0, 5895, 4104, 0], [1615, 4192, 1397, 2794], - [3144, 3231, 3056, 567] + [3144, 3231, 3056, 567], ] diff --git a/lib/bx/phylo/newick.py b/lib/bx/phylo/newick.py index 37a077a1..96fe3b95 100644 --- a/lib/bx/phylo/newick.py +++ b/lib/bx/phylo/newick.py @@ -16,7 +16,7 @@ Optional, QuotedString, Suppress, - Word + Word, ) __all__ = ["Tree", "Edge", "NewickParser", "newick_parser"] @@ -85,8 +85,10 @@ def create_parser(): """ # Basic tokens real = Combine( - Word("+-" + nums, nums) + Optional("." + Optional(Word(nums))) - + Optional(CaselessLiteral("E") + Word("+-" + nums, nums))) + Word("+-" + nums, nums) + + Optional("." + Optional(Word(nums))) + + Optional(CaselessLiteral("E") + Word("+-" + nums, nums)) + ) lpar = Suppress("(") rpar = Suppress(")") colon = Suppress(":") @@ -100,13 +102,12 @@ def create_parser(): # Need to forward declare this due to circularity node_list = Forward() # A node might have an list of edges (for a subtree), a label, and/or a branch length - node = (Optional(node_list, None) + Optional(label, "") + Optional(colon + branch_length, None)) \ - .setParseAction(lambda s, l, t: Edge(t[2], Tree(t[1] or None, t[0]))) - node_list << (lpar + delimitedList(node) + rpar) \ - .setParseAction(lambda s, l, t: [t.asList()]) + node = (Optional(node_list, None) + Optional(label, "") + Optional(colon + branch_length, None)).setParseAction( + lambda s, l, t: Edge(t[2], Tree(t[1] or None, t[0])) + ) + node_list << (lpar + delimitedList(node) + rpar).setParseAction(lambda s, l, t: [t.asList()]) # The root cannot have a branch length - tree = (node_list + Optional(label, "") + semi)\ - .setParseAction(lambda s, l, t: Tree(t[1] or None, t[0])) + tree = (node_list + Optional(label, "") + semi).setParseAction(lambda s, l, t: Tree(t[1] or None, t[0])) # Return the outermost element return tree diff --git a/lib/bx/phylo/newick_tests.py b/lib/bx/phylo/newick_tests.py index 1a1cd15b..d1457fd5 100644 --- a/lib/bx/phylo/newick_tests.py +++ b/lib/bx/phylo/newick_tests.py @@ -7,26 +7,223 @@ from bx.phylo.newick import ( Edge, newick_parser, - Tree + Tree, ) -trees = [r"(B:6.0,(A:5.0,C:3.0,'Foo ''bar':4.0)Q_X:5.0,D:11.0)label;", - "((raccoon:19.19959,bear:6.80041):0.84600,((sea_lion:11.99700, seal:12.00300):7.52973,(( monkey:100.85930,cat:47.14069):20.59201, weasel:18.87953):2.09460):3.87382,dog:25.46154);", - "(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460);", - "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);", - "(B,(A,C,E),D);", - "(,(,,),);", - "(A,(B,C),D);", - "((A,D),(C,B));"] +trees = [ + r"(B:6.0,(A:5.0,C:3.0,'Foo ''bar':4.0)Q_X:5.0,D:11.0)label;", + "((raccoon:19.19959,bear:6.80041):0.84600,((sea_lion:11.99700, seal:12.00300):7.52973,(( monkey:100.85930,cat:47.14069):20.59201, weasel:18.87953):2.09460):3.87382,dog:25.46154);", + "(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.21460);", + "(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.21460);", + "(B,(A,C,E),D);", + "(,(,,),);", + "(A,(B,C),D);", + "((A,D),(C,B));", +] -results = [(Tree('label', [Edge(6.0, Tree('B', None)), Edge(5.0, Tree('Q X', [Edge(5.0, Tree('A', None)), Edge(3.0, Tree('C', None)), Edge(4.0, Tree("Foo 'bar", None))])), Edge(11.0, Tree('D', None))])), - (Tree(None, [Edge(0.84599999999999997, Tree(None, [Edge(19.199590000000001, Tree('raccoon', None)), Edge(6.8004100000000003, Tree('bear', None))])), Edge(3.8738199999999998, Tree(None, [Edge(7.5297299999999998, Tree(None, [Edge(11.997, Tree('sea lion', None)), Edge(12.003, Tree('seal', None))])), Edge(2.0945999999999998, Tree(None, [Edge(20.592009999999998, Tree(None, [Edge(100.8593, Tree('monkey', None)), Edge(47.140689999999999, Tree('cat', None))])), Edge(18.879529999999999, Tree('weasel', None))]))])), Edge(25.461539999999999, Tree('dog', None))])), - (Tree(None, [Edge(0.69394999999999996, Tree('Bovine', None)), Edge(0.54939000000000004, Tree(None, [Edge(0.36079, Tree('Gibbon', None)), Edge(0.15057000000000001, Tree(None, [Edge(0.33635999999999999, Tree('Orang', None)), Edge(0.061240000000000003, Tree(None, [Edge(0.17147000000000001, Tree('Gorilla', None)), Edge(0.083860000000000004, Tree(None, [Edge(0.19267999999999999, Tree('Chimp', None)), Edge(0.11927, Tree('Human', None))]))]))]))])), Edge(1.2145999999999999, Tree('Mouse', None))])), - (Tree(None, [Edge(0.69394999999999996, Tree('Bovine', None)), Edge(0.54939000000000004, Tree(None, [Edge(0.36079, Tree('Hylobates', None)), Edge(0.15057000000000001, Tree(None, [Edge(0.33635999999999999, Tree('Pongo', None)), Edge(0.061240000000000003, Tree(None, [Edge(0.17147000000000001, Tree('G. Gorilla', None)), Edge(0.083860000000000004, Tree(None, [Edge(0.19267999999999999, Tree('P. paniscus', None)), Edge(0.11927, Tree('H. sapiens', None))]))]))]))])), Edge(1.2145999999999999, Tree('Rodent', None))])), - (Tree(None, [Edge(None, Tree('B', None)), Edge(None, Tree(None, [Edge(None, Tree('A', None)), Edge(None, Tree('C', None)), Edge(None, Tree('E', None))])), Edge(None, Tree('D', None))])), - (Tree(None, [Edge(None, Tree(None, None)), Edge(None, Tree(None, [Edge(None, Tree(None, None)), Edge(None, Tree(None, None)), Edge(None, Tree(None, None))])), Edge(None, Tree(None, None))])), - (Tree(None, [Edge(None, Tree('A', None)), Edge(None, Tree(None, [Edge(None, Tree('B', None)), Edge(None, Tree('C', None))])), Edge(None, Tree('D', None))])), - (Tree(None, [Edge(None, Tree(None, [Edge(None, Tree('A', None)), Edge(None, Tree('D', None))])), Edge(None, Tree(None, [Edge(None, Tree('C', None)), Edge(None, Tree('B', None))]))])), ] +results = [ + ( + Tree( + "label", + [ + Edge(6.0, Tree("B", None)), + Edge( + 5.0, + Tree( + "Q X", + [Edge(5.0, Tree("A", None)), Edge(3.0, Tree("C", None)), Edge(4.0, Tree("Foo 'bar", None))], + ), + ), + Edge(11.0, Tree("D", None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge( + 0.84599999999999997, + Tree( + None, + [Edge(19.199590000000001, Tree("raccoon", None)), Edge(6.8004100000000003, Tree("bear", None))], + ), + ), + Edge( + 3.8738199999999998, + Tree( + None, + [ + Edge( + 7.5297299999999998, + Tree(None, [Edge(11.997, Tree("sea lion", None)), Edge(12.003, Tree("seal", None))]), + ), + Edge( + 2.0945999999999998, + Tree( + None, + [ + Edge( + 20.592009999999998, + Tree( + None, + [ + Edge(100.8593, Tree("monkey", None)), + Edge(47.140689999999999, Tree("cat", None)), + ], + ), + ), + Edge(18.879529999999999, Tree("weasel", None)), + ], + ), + ), + ], + ), + ), + Edge(25.461539999999999, Tree("dog", None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge(0.69394999999999996, Tree("Bovine", None)), + Edge( + 0.54939000000000004, + Tree( + None, + [ + Edge(0.36079, Tree("Gibbon", None)), + Edge( + 0.15057000000000001, + Tree( + None, + [ + Edge(0.33635999999999999, Tree("Orang", None)), + Edge( + 0.061240000000000003, + Tree( + None, + [ + Edge(0.17147000000000001, Tree("Gorilla", None)), + Edge( + 0.083860000000000004, + Tree( + None, + [ + Edge(0.19267999999999999, Tree("Chimp", None)), + Edge(0.11927, Tree("Human", None)), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + Edge(1.2145999999999999, Tree("Mouse", None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge(0.69394999999999996, Tree("Bovine", None)), + Edge( + 0.54939000000000004, + Tree( + None, + [ + Edge(0.36079, Tree("Hylobates", None)), + Edge( + 0.15057000000000001, + Tree( + None, + [ + Edge(0.33635999999999999, Tree("Pongo", None)), + Edge( + 0.061240000000000003, + Tree( + None, + [ + Edge(0.17147000000000001, Tree("G. Gorilla", None)), + Edge( + 0.083860000000000004, + Tree( + None, + [ + Edge(0.19267999999999999, Tree("P. paniscus", None)), + Edge(0.11927, Tree("H. sapiens", None)), + ], + ), + ), + ], + ), + ), + ], + ), + ), + ], + ), + ), + Edge(1.2145999999999999, Tree("Rodent", None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge(None, Tree("B", None)), + Edge( + None, + Tree(None, [Edge(None, Tree("A", None)), Edge(None, Tree("C", None)), Edge(None, Tree("E", None))]), + ), + Edge(None, Tree("D", None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge(None, Tree(None, None)), + Edge( + None, + Tree( + None, [Edge(None, Tree(None, None)), Edge(None, Tree(None, None)), Edge(None, Tree(None, None))] + ), + ), + Edge(None, Tree(None, None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge(None, Tree("A", None)), + Edge(None, Tree(None, [Edge(None, Tree("B", None)), Edge(None, Tree("C", None))])), + Edge(None, Tree("D", None)), + ], + ) + ), + ( + Tree( + None, + [ + Edge(None, Tree(None, [Edge(None, Tree("A", None)), Edge(None, Tree("D", None))])), + Edge(None, Tree(None, [Edge(None, Tree("C", None)), Edge(None, Tree("B", None))])), + ], + ) + ), +] @pytest.mark.parametrize("tree,result", zip(trees, results)) diff --git a/lib/bx/phylo/phast_tests.py b/lib/bx/phylo/phast_tests.py index 3c404a59..469afeda 100644 --- a/lib/bx/phylo/phast_tests.py +++ b/lib/bx/phylo/phast_tests.py @@ -6,7 +6,7 @@ from numpy import ( allclose, - array + array, ) from bx.phylo.phast import TreeModel @@ -28,14 +28,23 @@ def test_parser(): tm = TreeModel.from_file(StringIO(test_data)) - assert tm.alphabet == ('A', 'C', 'G', 'T', '-') + assert tm.alphabet == ("A", "C", "G", "T", "-") assert tm.order == 0 assert tm.subst_mod == "HKY85+Gap" assert allclose(tm.background, [0.227006, 0.169993, 0.169307, 0.227262, 0.206432]) - assert allclose(tm.matrix, array( - [[-0.971735, 0.122443, 0.465361, 0.163692, 0.220238], - [0.163508, -1.130351, 0.121949, 0.624656, 0.220238], - [0.623952, 0.122443, -1.130326, 0.163692, 0.220238], - [0.163508, 0.467247, 0.121949, -0.972942, 0.220238], - [0.242187, 0.181362, 0.180630, 0.242461, -0.846640]])) - assert tm.tree == "((((((hg16:0.007738,panTro1:0.008356):0.027141,(baboon:0.009853,rheMac1:0.010187):0.035049):0.103138,galago:0.174770):0.019102,((rn3:0.092633,mm6:0.089667):0.273942,rabbit:0.230839):0.021927):0.023762,(canFam1:0.204637,(elephant:0.123777,tenrec:0.278910):0.085977):0.009439):0.306466,monDom1:0.401151)mammals;" + assert allclose( + tm.matrix, + array( + [ + [-0.971735, 0.122443, 0.465361, 0.163692, 0.220238], + [0.163508, -1.130351, 0.121949, 0.624656, 0.220238], + [0.623952, 0.122443, -1.130326, 0.163692, 0.220238], + [0.163508, 0.467247, 0.121949, -0.972942, 0.220238], + [0.242187, 0.181362, 0.180630, 0.242461, -0.846640], + ] + ), + ) + assert ( + tm.tree + == "((((((hg16:0.007738,panTro1:0.008356):0.027141,(baboon:0.009853,rheMac1:0.010187):0.035049):0.103138,galago:0.174770):0.019102,((rn3:0.092633,mm6:0.089667):0.273942,rabbit:0.230839):0.021927):0.023762,(canFam1:0.204637,(elephant:0.123777,tenrec:0.278910):0.085977):0.009439):0.306466,monDom1:0.401151)mammals;" + ) diff --git a/lib/bx/pwm/bed_score_aligned_pwm.py b/lib/bx/pwm/bed_score_aligned_pwm.py index 5f18c9ad..98d11ce8 100755 --- a/lib/bx/pwm/bed_score_aligned_pwm.py +++ b/lib/bx/pwm/bed_score_aligned_pwm.py @@ -25,7 +25,7 @@ def main(): # read in intervals regions = {} for line in open(sys.argv[1]): - if line.startswith('#'): + if line.startswith("#"): continue fields = line.strip().split() chrom, start, end = fields[0], int(fields[1]), int(fields[2]) @@ -47,11 +47,11 @@ def main(): species = [] - for sp in sys.argv[3].split(','): + for sp in sys.argv[3].split(","): species.append(sp) for maf in align_maf.Reader(inmaf): - mafchrom = maf.components[0].src.split('.')[1] + mafchrom = maf.components[0].src.split(".")[1] mafstart = maf.components[0].start mafend = maf.components[0].end reftext = maf.components[0].text @@ -60,7 +60,7 @@ def main(): for scoremax, width, headers in MafBlockScorer(pwm, species, maf): blocklength = width mafsrc, mafstart, mafend = headers[0] - mafchrom = mafsrc.split('.')[1] + mafchrom = mafsrc.split(".")[1] # lists of scores for each position in scoremax for mx_name, mx in scoremax.items(): @@ -68,7 +68,7 @@ def main(): # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: - refstart = mafstart + offset - reftext.count('-', 0, offset) + refstart = mafstart + offset - reftext.count("-", 0, offset) refend = refstart + len(pwm[mx_name]) data = " ".join(["%.2f" % mx[x][offset] for x in range(len(species))]) @@ -78,10 +78,10 @@ def main(): region_label = r[0].value else: continue - v_name = mx_name.replace(' ', '_') + v_name = mx_name.replace(" ", "_") print(mafchrom, refstart, refend, region_label, v_name, data) break -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/bx/pwm/bed_score_aligned_string.py b/lib/bx/pwm/bed_score_aligned_string.py index 611208af..e00935f8 100755 --- a/lib/bx/pwm/bed_score_aligned_string.py +++ b/lib/bx/pwm/bed_score_aligned_string.py @@ -24,7 +24,7 @@ def main(): # read in intervals regions = {} for line in open(sys.argv[1]): - if line.startswith('#'): + if line.startswith("#"): continue fields = line.strip().split() chrom, start, end = fields[0], int(fields[1]), int(fields[2]) @@ -44,11 +44,11 @@ def main(): species = [] - for sp in sys.argv[3].split(','): + for sp in sys.argv[3].split(","): species.append(sp) for maf in align_maf.Reader(inmaf): - mafchrom = maf.components[0].src.split('.')[1] + mafchrom = maf.components[0].src.split(".")[1] mafstart = maf.components[0].start mafend = maf.components[0].end reftext = maf.components[0].text @@ -60,7 +60,7 @@ def main(): for scoremax, width, headers in MafMotifScorer(species, maf, motif_strings): blocklength = width mafsrc, mafstart, mafend = headers[0] - mafchrom = mafsrc.split('.')[1] + mafchrom = mafsrc.split(".")[1] # lists of scores for each position in scoremax for mx_name, mx in scoremax.items(): @@ -69,7 +69,7 @@ def main(): # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: - refstart = mafstart + offset - reftext.count('-', 0, offset) + refstart = mafstart + offset - reftext.count("-", 0, offset) refend = refstart + len(mx_name) data = " ".join(["%.2f" % mx[x][offset] for x in range(len(species))]) @@ -80,10 +80,10 @@ def main(): else: # region_label = 0 continue - v_name = mx_name.replace(' ', '_') + v_name = mx_name.replace(" ", "_") print(mafchrom, refstart, refend, region_label, v_name, data) break -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/bx/pwm/maf_select_motifs.py b/lib/bx/pwm/maf_select_motifs.py index a21d6409..01367bb8 100755 --- a/lib/bx/pwm/maf_select_motifs.py +++ b/lib/bx/pwm/maf_select_motifs.py @@ -33,7 +33,7 @@ def main(): for maf in align_maf.Reader(inmaf): for mafmotif, pwm_score, motif_score in MafMotifSelect(maf, pwm, motif, threshold): print(mafmotif, pwm_score, motif_score) - print('zzzzzzzzzzzzzzzzzzzzzzzzzzzzz') + print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzz") def mafwrite(alignment, kvec=None, jvec=None, file=sys.stdout): @@ -70,5 +70,5 @@ def format_tabular(rows, align=None): return rval -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/bx/pwm/position_weight_matrix.py b/lib/bx/pwm/position_weight_matrix.py index 448c3f45..4edef1af 100755 --- a/lib/bx/pwm/position_weight_matrix.py +++ b/lib/bx/pwm/position_weight_matrix.py @@ -3,7 +3,12 @@ import math import sys -from numpy import float32, putmask, shape, zeros +from numpy import ( + float32, + putmask, + shape, + zeros, +) # This is the average of all species in the alignment outside of exons # > mean(r) @@ -13,7 +18,7 @@ # A T C G # 0.01316192 0.01371148 0.01293836 0.01386655 -ENCODE_NONCODING_BACKGROUND = {'A': 0.2863776, 'T': 0.2878264, 'G': 0.2128400, 'C': 0.2129560} +ENCODE_NONCODING_BACKGROUND = {"A": 0.2863776, "T": 0.2878264, "G": 0.2128400, "C": 0.2129560} class Align: @@ -26,10 +31,13 @@ def __init__(self, seqrows, headers=None): if ncol is None: ncol = len(row) elif ncol != len(row): - raise ValueError("Align: __init__:alignment block:row %d does not have %d columns, it has %d" % (rownum, ncol, len(row))) + raise ValueError( + "Align: __init__:alignment block:row %d does not have %d columns, it has %d" + % (rownum, ncol, len(row)) + ) except Exception: print(row) - raise Exception('') + raise Exception("") self.ncols = ncol self.dims = (self.nrows, self.ncols) self.headers = headers @@ -40,7 +48,7 @@ def __str__(self): class AlignScoreMatrix: def __init__(self, align): - nan = float('nan') + nan = float("nan") matrix = zeros((align.nrows, align.ncols), float32) @@ -75,11 +83,11 @@ def score_align_motif(align, motif, gapmask=None, byPosition=True): for start in range(ncols): - if align.rows[ir][start] == '-': + if align.rows[ir][start] == "-": continue - elif align.rows[ir][start] == 'n': + elif align.rows[ir][start] == "n": continue - elif align.rows[ir][start] == 'N': + elif align.rows[ir][start] == "N": continue # get enough sequence for the weight matrix @@ -91,13 +99,13 @@ def score_align_motif(align, motif, gapmask=None, byPosition=True): break char = align.rows[ir][ic].upper() ic += 1 - if char == '-' or char == 'N': + if char == "-" or char == "N": continue else: subseq += char if len(subseq) == minSeqLen: - end = ic+1 + end = ic + 1 for_score = int(match_consensus(subseq, motif)) revseq = reverse_complement(subseq) rev_score = int(match_consensus(revseq, motif)) @@ -120,9 +128,10 @@ def score_align_motif(align, motif, gapmask=None, byPosition=True): # mask gap characters if gapmask is None: gapmask = score_align_gaps(align) - putmask(scoremax, gapmask, float('nan')) + putmask(scoremax, gapmask, float("nan")) return scoremax + # ----------- # # WeightMatrix-- @@ -150,20 +159,21 @@ class PositionWeightMatrix: # IUPAC-IUB symbols = { - 'A': frozenset(['A']), - 'C': frozenset(['C']), - 'G': frozenset(['G']), - 'T': frozenset(['T']), - 'R': frozenset(['A', 'G']), - 'Y': frozenset(['C', 'T']), - 'M': frozenset(['A', 'C']), - 'K': frozenset(['G', 'T']), - 'S': frozenset(['G', 'C']), - 'W': frozenset(['A', 'T']), - 'H': frozenset(['A', 'C', 'T']), - 'B': frozenset(['G', 'T', 'C']), - 'V': frozenset(['G', 'C', 'A']), - 'D': frozenset(['G', 'T', 'A'])} + "A": frozenset(["A"]), + "C": frozenset(["C"]), + "G": frozenset(["G"]), + "T": frozenset(["T"]), + "R": frozenset(["A", "G"]), + "Y": frozenset(["C", "T"]), + "M": frozenset(["A", "C"]), + "K": frozenset(["G", "T"]), + "S": frozenset(["G", "C"]), + "W": frozenset(["A", "T"]), + "H": frozenset(["A", "C", "T"]), + "B": frozenset(["G", "T", "C"]), + "V": frozenset(["G", "C", "A"]), + "D": frozenset(["G", "T", "A"]), + } def __init__(self, id, rows, alphabet, background=None, score_correction=True): @@ -179,11 +189,11 @@ def __init__(self, id, rows, alphabet, background=None, score_correction=True): sorted_alphabet = [] sorted_alphabet[:] = self.alphabet[:] sorted_alphabet.sort() - if ['A', 'C', 'G', 'T'] == sorted_alphabet: + if ["A", "C", "G", "T"] == sorted_alphabet: self.background = ENCODE_NONCODING_BACKGROUND else: for x in self.alphabet: - self.background[x] = float(1)/len(self.alphabet) + self.background[x] = float(1) / len(self.alphabet) if score_correction: self.score_correction = self.corrected_probability_score @@ -213,11 +223,6 @@ def __init__(self, id, rows, alphabet, background=None, score_correction=True): rows[i][x] = (w, s) scale = max(s, scale) - # except: - # print >>sys.stderr,rows - # raise ValueError - # raise ValueError, "pwm row %s has wrong field count" % " ".join(fields) - self.consensus.append(consensus) hashRows = [] @@ -230,7 +235,7 @@ def __init__(self, id, rows, alphabet, background=None, score_correction=True): hashRows.append(dict()) for x, sym in enumerate(alphabet): (w, s) = rows[i][x] - hashRows[i][sym] = w * scale/s + hashRows[i][sym] = w * scale / s assert hashRows[i][sym] >= 0 if sym not in self.matrix_base_counts: self.matrix_base_counts[sym] = 0 @@ -291,23 +296,23 @@ def __add__(self, other): if p == q == 0: width = max(len(self), len(other)) elif p > 0: - width = max(len(other)+p, len(self)) + width = max(len(other) + p, len(self)) elif q > 0: - width = max(len(self)+q, len(other)) + width = max(len(self) + q, len(other)) - sumx = zeros((width, len(self.alphabet)), dtype='int') + sumx = zeros((width, len(self.alphabet)), dtype="int") selfx = self.to_count_matrix() otherx = other.to_count_matrix() if p == q == 0: - sumx[:len(self)] += selfx - sumx[:len(other)] += otherx + sumx[: len(self)] += selfx + sumx[: len(other)] += otherx elif p > 0: - sumx[p:p+len(other)] += otherx - sumx[:len(self)] += selfx + sumx[p : p + len(other)] += otherx + sumx[: len(self)] += selfx else: - sumx[:len(other)] += otherx - sumx[q:q+len(self)] += selfx + sumx[: len(other)] += otherx + sumx[q : q + len(self)] += selfx newRows = [] for x in sumx: @@ -315,7 +320,7 @@ def __add__(self, other): y.append(consensus_symbol(y)) y = [str(yi) for yi in y] newRows.append(y) - return PositionWeightMatrix(self.id+other.id, newRows, self.alphabet, self.background) + return PositionWeightMatrix(self.id + other.id, newRows, self.alphabet, self.background) def __old_add__(self, other, maxp=None): @@ -326,9 +331,9 @@ def __old_add__(self, other, maxp=None): prsq = self.correlation(other) maxp = prsq.index(max(prsq)) - leftpad = ' ' * maxp + leftpad = " " * maxp rightsize = bigN - smallN - rightpad = ' ' * rightsize + rightpad = " " * rightsize leftStrings = [] rightStrings = [] @@ -345,7 +350,7 @@ def __old_add__(self, other, maxp=None): sumx = zeros([bigN, len(self.alphabet)]) sumx += larger.to_count_matrix() - sumx[maxp:maxp+smallN] += smaller.to_count_matrix() + sumx[maxp : maxp + smallN] += smaller.to_count_matrix() newRows = [] for i, x in enumerate(sumx): @@ -355,7 +360,7 @@ def __old_add__(self, other, maxp=None): newRows.append(y) # return PositionWeightMatrix(self.id+other.id,newRows[maxp:maxp+smallN],self.alphabet,self.background) - return PositionWeightMatrix(self.id+other.id, newRows, self.alphabet, self.background) + return PositionWeightMatrix(self.id + other.id, newRows, self.alphabet, self.background) def to_matrix(self): m = zeros([len(self), len(self.alphabet)]) @@ -365,7 +370,7 @@ def to_matrix(self): return m def to_count_matrix(self): - m = zeros([len(self), len(self.alphabet)], dtype='int') + m = zeros([len(self), len(self.alphabet)], dtype="int") for i in range(len(self)): for j, a in enumerate(self.alphabet): m[i][j] = self.counts[i][a] @@ -384,28 +389,28 @@ def slide_correlation(self, other): rsq = [] ixtuple = [] # self staggered over other, scan self backwards until flush - for q in range(len(other)-1, -1, -1): + for q in range(len(other) - 1, -1, -1): r = 0 n = 0 for p in range(len(self)): - if q+p < len(other): - r += rsquared(list(selfx[p]), list(otherx[q+p])) + if q + p < len(other): + r += rsquared(list(selfx[p]), list(otherx[q + p])) n += 1 else: n += 1 - rsq.append(r/n) + rsq.append(r / n) ixtuple.append((0, q)) # other staggered below self , scan other forward for p in range(1, len(self)): r = 0 n = 0 for q in range(len(other)): - if p+q < len(self): - r += rsquared(list(selfx[p+q]), list(otherx[q])) + if p + q < len(self): + r += rsquared(list(selfx[p + q]), list(otherx[q])) n += 1 else: n += 1 - rsq.append(r/n) + rsq.append(r / n) ixtuple.append((p, 0)) return rsq, ixtuple @@ -423,10 +428,10 @@ def correlation(self, otherwmx): # slide small over large, for ave rsq for p in range(bigN): - if p+smallN <= bigN: + if p + smallN <= bigN: r = 0 for q in range(smallN): - r += rsquared(list(smaller[q]), list(larger[p+q])) + r += rsquared(list(smaller[q]), list(larger[p + q])) position_rsq.append(r / smallN) return position_rsq @@ -445,11 +450,11 @@ def score_align(self, align, gapmask=None, byPosition=True): continue for start in range(ncols): - if align.rows[ir][start] == '-': + if align.rows[ir][start] == "-": continue - elif align.rows[ir][start] == 'n': + elif align.rows[ir][start] == "n": continue - elif align.rows[ir][start] == 'N': + elif align.rows[ir][start] == "N": continue # get enough sequence for the weight matrix @@ -458,13 +463,13 @@ def score_align(self, align, gapmask=None, byPosition=True): for ic in range(start, ncols): char = align.rows[ir][ic] - if char == '-' or char == 'N': + if char == "-" or char == "N": continue else: subseq += char if len(subseq) == minSeqLen: - end = ic+1 + end = ic + 1 # forward scores = self.score_seq(subseq) @@ -488,7 +493,7 @@ def score_align(self, align, gapmask=None, byPosition=True): # mask gap characters if gapmask is None: gapmask = score_align_gaps(align) - putmask(scoremax, gapmask, float('nan')) + putmask(scoremax, gapmask, float("nan")) return scoremax # seq can be a string, a list of characters, or a quantum sequence (a list @@ -502,14 +507,14 @@ def score_seq(self, seq): for start in range(len(seq)): if start + len(self) > len(seq): break - subseq = seq[start:start+len(self)] + subseq = seq[start : start + len(self)] raw = 0 try: for i, nt in enumerate(subseq): raw += self.rows[i][nt.upper()] scaled = self.scaled(raw) except KeyError: - raw, scaled = float('nan'), float('nan') + raw, scaled = float("nan"), float("nan") scores.append((raw, scaled)) return scores @@ -518,20 +523,20 @@ def score_quantum_seq(self, seq): for start in range(len(seq)): if start + len(self) > len(seq): break - subseq = seq[start:start+len(self)] + subseq = seq[start : start + len(self)] raw = 0 try: for i, nt in enumerate(subseq): numer = sum(subseq[i][nt] * self.probs[i][nt] for nt in subseq[i]) denom = sum(subseq[i][nt] * self.background[nt] for nt in subseq[i]) - raw += math.log(numer/denom, 2) + raw += math.log(numer / denom, 2) scaled = self.scaled(raw) except KeyError: - raw, scaled = float('nan'), float('nan') + raw, scaled = float("nan"), float("nan") except OverflowError: - raw, scaled = float('nan'), float('nan') + raw, scaled = float("nan"), float("nan") except ValueError: - raw, scaled = float('nan'), float('nan') + raw, scaled = float("nan"), float("nan") scores.append((raw, scaled)) return scores @@ -547,6 +552,7 @@ def scaled(self, val): def pseudocount(self, base=None): def f(count): return math.sqrt(count + 1) + if base in self.alphabet: return f(self.matrix_base_counts[base]) elif base is None: @@ -585,15 +591,15 @@ def pwm_score(self, base, i, freq, background=None): # print >>sys.stderr, "k %d %c" % (i,base),freq[i][base] b = background[base] try: - return math.log(p/b, 2) + return math.log(p / b, 2) except OverflowError: # print >>sys.stderr,"base=%c, math.log(%.3f / %.3f)" % (base,p,b) # print >>sys.stderr,self.id - return float('nan') + return float("nan") except ValueError: # print >>sys.stderr,"base=%c, math.log(%.3f / %.3f)" % (base,p,b) # print >>sys.stderr,self.id - return float('nan') + return float("nan") def parse_weight(self, weightString): @@ -607,9 +613,9 @@ def parse_weight(self, weightString): if len(fields) == 2: for _ in range(0, len(fields[1])): s *= 10 - w = s*w + int(fields[1]) + w = s * w + int(fields[1]) - return (w, s) # w = the weight + return (w, s) # w = the weight # s = the scale used (a power of 10) def __str__(self): @@ -619,7 +625,14 @@ def __str__(self): for ix in range(0, len(self.rows)): weights = ["%d" % self.counts[ix][nt] for nt in self.alphabet] # lines.append(("%02d\t" % ix) + "\t".join(weights) + "\t" + self.consensus[ix]) - lines.append(("%02d\t" % ix) + "\t".join(weights) + "\t" + str(sum(self.counts[ix].values())) + "\t" + self.consensus[ix]) + lines.append( + ("%02d\t" % ix) + + "\t".join(weights) + + "\t" + + str(sum(self.counts[ix].values())) + + "\t" + + self.consensus[ix] + ) return "\n".join(lines) @@ -643,12 +656,13 @@ def score_align_gaps(align): continue # scan for gaps for pos in range(ncols): - if align.rows[ir][pos] == '-': + if align.rows[ir][pos] == "-": scoremax[ir][pos] = 1 else: scoremax[ir][pos] = 0 return scoremax + # ----------- # # WeightMatrix Reader-- @@ -660,7 +674,7 @@ def score_align_gaps(align): class Reader: """Iterate over all interesting weight matrices in a file""" - def __init__(self, file, tfIds=None, name=None, format='basic', background=None, score_correction=True): + def __init__(self, file, tfIds=None, name=None, format="basic", background=None, score_correction=True): self.tfIds = tfIds self.file = file self.name = name @@ -679,9 +693,9 @@ def where(self): return "line %d in %s" % (self.lineNumber, self.name) def __iter__(self): - if self.format == 'basic': + if self.format == "basic": return self.read_as_basic() - elif self.format == 'transfac': + elif self.format == "transfac": return self.read_as_transfac() else: raise ValueError("unknown weight matrix file format: '%s'" % self.format) @@ -690,8 +704,8 @@ def read_as_basic(self): tfId = None pwmRows = None - alphabet = ['A', 'C', 'G', 'T'] - while (True): + alphabet = ["A", "C", "G", "T"] + while True: line = self.file.readline() if not line: break @@ -712,7 +726,9 @@ def read_as_basic(self): # print >>sys.stderr,[ "%.2f" % (float(v)/sum(vals)) for v in vals], tokens[-1] pwmRows.append(tokens) if pwmRows is not None: # we've finished collecting a desired matrix - yield PositionWeightMatrix(tfId, pwmRows, alphabet, background=self.background, score_correction=self.score_correction) + yield PositionWeightMatrix( + tfId, pwmRows, alphabet, background=self.background, score_correction=self.score_correction + ) def read_as_transfac(self): self.tfToPwm = {} @@ -730,7 +746,13 @@ def read_as_transfac(self): if pwmRows is not None: # we've finished collecting a desired matrix try: # FIXME: alphabet is undefined here! - yield PositionWeightMatrix(tfId, pwmRows, alphabet, background=self.background, score_correction=self.score_correction) # noqa: F821 + yield PositionWeightMatrix( + tfId, + pwmRows, + alphabet, # noqa: F821 + background=self.background, + score_correction=self.score_correction, + ) except Exception: print("Failed to read", tfId, file=sys.stderr) tfId = None @@ -741,7 +763,7 @@ def read_as_transfac(self): raise ValueError("bad line, need two fields (%s)" % self.where()) tfId = tokens[1] if self.tfIds is not None and (tfId not in self.tfIds): - continue # ignore it, this isn't a desired matrix + continue # ignore it, this isn't a desired matrix if tfId in self.tfToPwm: raise ValueError(f"transcription factor {tfId} appears twice ({self.where()})") pwmRows = [] # start collecting a desired matrix @@ -754,7 +776,7 @@ def read_as_transfac(self): continue # name, if present, added to ID - if line.startswith('NA'): + if line.startswith("NA"): words = line.strip().split() tfId = tfId + "\t" + " ".join(words[1:]) @@ -770,7 +792,7 @@ def read_as_transfac(self): tokens = line.split() try: index = int(tokens[0]) - if index != len(pwmRows)+1: + if index != len(pwmRows) + 1: raise ValueError except Exception: raise ValueError("bad line, bad index (%s)" % self.where()) @@ -782,7 +804,9 @@ def read_as_transfac(self): pwmRows = None continue if pwmRows is not None: # we've finished collecting a desired matrix - yield PositionWeightMatrix(tfId, pwmRows, alphabet, background=self.background, score_correction=self.score_correction) + yield PositionWeightMatrix( + tfId, pwmRows, alphabet, background=self.background, score_correction=self.score_correction + ) # clean up self.tfToPwm = None @@ -800,7 +824,7 @@ def reverse_complement(nukes): def rsquared(x, y): try: - return sum_of_squares(x, y)**2 / (sum_of_squares(x) * sum_of_squares(y)) + return sum_of_squares(x, y) ** 2 / (sum_of_squares(x) * sum_of_squares(y)) except ZeroDivisionError: # return float('nan') return 0 @@ -812,7 +836,7 @@ def sum_of_squares(x, y=None): xmean = float(sum(x)) / len(x) ymean = float(sum(y)) / len(y) assert len(x) == len(y) - return sum(float(xi)*float(yi) for xi, yi in zip(x, y)) - len(x)*xmean*ymean + return sum(float(xi) * float(yi) for xi, yi in zip(x, y)) - len(x) * xmean * ymean def consensus_symbol(pattern): @@ -826,24 +850,25 @@ def consensus_symbol(pattern): # IUPAC-IUB nomenclature for wobblers wobblers = { - 'R': frozenset(['A', 'G']), - 'Y': frozenset(['C', 'T']), - 'M': frozenset(['A', 'C']), - 'K': frozenset(['G', 'T']), - 'S': frozenset(['G', 'C']), - 'W': frozenset(['A', 'T']), - 'H': frozenset(['A', 'C', 'T']), - 'B': frozenset(['G', 'T', 'C']), - 'V': frozenset(['G', 'C', 'A']), - 'D': frozenset(['G', 'T', 'A'])} - - symbols = ['A', 'C', 'G', 'T'] + "R": frozenset(["A", "G"]), + "Y": frozenset(["C", "T"]), + "M": frozenset(["A", "C"]), + "K": frozenset(["G", "T"]), + "S": frozenset(["G", "C"]), + "W": frozenset(["A", "T"]), + "H": frozenset(["A", "C", "T"]), + "B": frozenset(["G", "T", "C"]), + "V": frozenset(["G", "C", "A"]), + "D": frozenset(["G", "T", "A"]), + } + + symbols = ["A", "C", "G", "T"] if isinstance(pattern, dict): pattern = [pattern[u] for u in symbols] total = sum(pattern) - f = [(space/1e5)+(float(x)/total) for space, x in enumerate(pattern)] + f = [(space / 1e5) + (float(x) / total) for space, x in enumerate(pattern)] copy = [] copy[:] = f[:] copy.sort() @@ -863,9 +888,9 @@ def consensus_symbol(pattern): if degen == wobbles: return degenSymbol else: - return 'N' + return "N" print(pattern, file=sys.stderr) - raise Exception('?') + raise Exception("?") # import C extensions @@ -875,12 +900,13 @@ def consensus_symbol(pattern): def match_consensus(sequence, pattern): return c_match_consensus(sequence, pattern, len(sequence)) + # print >>sys.stderr, "C match_consensus used" except ImportError: # print >>sys.stderr, "python match_consensus used" def match_consensus(sequence, pattern, size): for s, p in zip(sequence, pattern): - if p == 'N': + if p == "N": continue if s not in PositionWeightMatrix.symbols[p]: return False diff --git a/lib/bx/pwm/pwm_score_maf.py b/lib/bx/pwm/pwm_score_maf.py index 1f914386..d86c0e8b 100755 --- a/lib/bx/pwm/pwm_score_maf.py +++ b/lib/bx/pwm/pwm_score_maf.py @@ -13,7 +13,7 @@ def isnan(x): return True -NaN = float('nan') +NaN = float("nan") def main(): @@ -27,25 +27,25 @@ def main(): # read alignment species species = [] - for sp in splist.split(','): + for sp in splist.split(","): species.append(sp) # read weight matrices pwm = {} - for wm in pwmx.Reader(open(pwm_file), format='basic'): + for wm in pwmx.Reader(open(pwm_file), format="basic"): pwm[wm.id] = wm fbunch = {} for scoremax, index, headers in MafScorer(pwm, species, inmaf): for k, matrix in scoremax.items(): - fname = k + '.mx' + fname = k + ".mx" if fname not in fbunch: - fbunch[fname] = open(fname, 'w') + fbunch[fname] = open(fname, "w") print("Writing", fname, file=sys.stderr) for i in range(len(matrix)): for j in range(len(matrix[i])): - print("%.2f" % matrix[i][j], end=' ', file=fbunch[fname]) + print("%.2f" % matrix[i][j], end=" ", file=fbunch[fname]) print(file=fbunch[fname]) for file in fbunch.values(): @@ -91,17 +91,17 @@ def MafMotifSelect(mafblock, pwm, motif=None, threshold=0): # record the text sizes from the alignment rows for start in range(ncols - minSeqLen): - if align.rows[0][start] == '-': + if align.rows[0][start] == "-": continue subseq = "" pwm_score_vec = [] motif_score_vec = [] max_cols = 0 for ir in range(nrows): - expanded = align.rows[ir].count('-', start, minSeqLen) - subtext = align.rows[ir][start: minSeqLen+expanded] + expanded = align.rows[ir].count("-", start, minSeqLen) + subtext = align.rows[ir][start : minSeqLen + expanded] max_cols = max(len(subtext), max_cols) - subseq = subtext.replace('-', '') + subseq = subtext.replace("-", "") revseq = pwmx.reverse_complement(subseq) # pwm score nill, f_score = pwm.score_seq(subseq)[0] @@ -178,7 +178,7 @@ def MafBlockScorer(pwm, species, maf): headers = [(c.src, c.start, c.end) for c in maf.components] # expand block rows to full - mafBlockSpecies = [specName.src.split('.')[0] for specName in maf.components] + mafBlockSpecies = [specName.src.split(".")[0] for specName in maf.components] alignlist = [] for sp in species: try: @@ -201,7 +201,7 @@ def MafMotifScorer(species, maf, motifs): headers = [(c.src, c.start, c.end) for c in maf.components] # expand block rows to full - mafBlockSpecies = [specName.src.split('.')[0] for specName in maf.components] + mafBlockSpecies = [specName.src.split(".")[0] for specName in maf.components] alignlist = [] for sp in species: try: @@ -223,5 +223,5 @@ def MafMotifScorer(species, maf, motifs): yield scoremax, width, headers -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/bx/pwm/pwm_score_motifs.py b/lib/bx/pwm/pwm_score_motifs.py index cddbafcb..623c0077 100755 --- a/lib/bx/pwm/pwm_score_motifs.py +++ b/lib/bx/pwm/pwm_score_motifs.py @@ -25,11 +25,11 @@ def main(): species = [] - for sp in sys.argv[3].split(','): + for sp in sys.argv[3].split(","): species.append(sp) for maf in align_maf.Reader(inmaf): - mafchrom = maf.components[0].src.split('.')[1] + mafchrom = maf.components[0].src.split(".")[1] mafstart = maf.components[0].start mafend = maf.components[0].end reftext = maf.components[0].text @@ -38,7 +38,7 @@ def main(): for scoremax, width, headers in MafMotifScorer(species, maf, targmotif): blocklength = width mafsrc, mafstart, mafend = headers[0] - mafchrom = mafsrc.split('.')[1] + mafchrom = mafsrc.split(".")[1] # lists of scores for each position in scoremax mx = scoremax @@ -47,13 +47,13 @@ def main(): # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: - refstart = mafstart + offset - reftext.count('-', 0, offset) + refstart = mafstart + offset - reftext.count("-", 0, offset) refend = refstart + len(targmotif) data = " ".join(["%.2f" % mx[x][offset] for x in range(len(species))]) # quote the motif - print(mafchrom, refstart, refend, "'"+targmotif+"'", data) + print(mafchrom, refstart, refend, "'" + targmotif + "'", data) break -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/bx/pwm/pwm_score_positions.py b/lib/bx/pwm/pwm_score_positions.py index 7167fe05..9ea6a1d8 100755 --- a/lib/bx/pwm/pwm_score_positions.py +++ b/lib/bx/pwm/pwm_score_positions.py @@ -31,11 +31,11 @@ def main(): species = [] - for sp in sys.argv[5].split(','): + for sp in sys.argv[5].split(","): species.append(sp) for maf in align_maf.Reader(inmaf): - mafchrom = maf.components[0].src.split('.')[1] + mafchrom = maf.components[0].src.split(".")[1] mafstart = maf.components[0].start mafend = maf.components[0].end reftext = maf.components[0].text @@ -44,7 +44,7 @@ def main(): for scoremax, width, headers in MafBlockScorer(pwm, species, maf): blocklength = width mafsrc, mafstart, mafend = headers[0] - mafchrom = mafsrc.split('.')[1] + mafchrom = mafsrc.split(".")[1] # lists of scores for each position in scoremax for id, mx in scoremax.items(): @@ -53,13 +53,13 @@ def main(): # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: - refstart = mafstart + offset - reftext.count('-', 0, offset) + refstart = mafstart + offset - reftext.count("-", 0, offset) refend = refstart + len(pwm[id]) data = " ".join(["%.2f" % mx[x][offset] for x in range(len(species))]) # underscore spaces in the name - print(mafchrom, refstart, refend, id.replace(' ', '_'), data) + print(mafchrom, refstart, refend, id.replace(" ", "_"), data) break -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/lib/bx/pwm/pwm_tests.py b/lib/bx/pwm/pwm_tests.py index 8c4cc06e..fe017b01 100644 --- a/lib/bx/pwm/pwm_tests.py +++ b/lib/bx/pwm/pwm_tests.py @@ -3,8 +3,7 @@ import bx.pwm.position_weight_matrix as pwm -basicPwm = \ - """>MA0101 c-REL REL +basicPwm = """>MA0101 c-REL REL 0 5 8 4 0 1 15 1 1 0 15 1 @@ -17,8 +16,7 @@ 1 16 0 0 """ -transfacPwm = \ - """ID TATA +transfacPwm = """ID TATA XX P0 A C G T 01 33 73 78 16 S @@ -39,40 +37,48 @@ XX """ -background = {'A': .28, 'C': .21, 'G': .24, 'T': .27} +background = {"A": 0.28, "C": 0.21, "G": 0.24, "T": 0.27} dSeq = "ACCGAGTTAGCGTAAA" dScoresExpected = "-15.3697 0.4240 -16.5309 0.4027" -qSeq = [{'A': 0.27, 'C': 0.34, 'G': 0.07, 'T': 0.32}, - {'A': 0.24, 'C': 0.32, 'G': 0.09, 'T': 0.35}, - {'A': 0.80, 'C': 0.11, 'G': 0.03, 'T': 0.06}, - {'A': 0.07, 'C': 0.22, 'G': 0.37, 'T': 0.34}, - {'A': 0.07, 'C': 0.44, 'G': 0.03, 'T': 0.46}, - {'A': 0.43, 'C': 0.04, 'G': 0.18, 'T': 0.35}, - {'A': 0.84, 'C': 0.14, 'G': 0.01, 'T': 0.01}, - {'A': 0.31, 'C': 0.52, 'G': 0.13, 'T': 0.04}, - {'A': 0.22, 'C': 0.22, 'G': 0.45, 'T': 0.11}, - {'A': 0.36, 'C': 0.15, 'G': 0.42, 'T': 0.07}, - {'A': 0.11, 'C': 0.78, 'G': 0.07, 'T': 0.04}, - {'A': 0.07, 'C': 0.16, 'G': 0.64, 'T': 0.13}, - {'A': 0.34, 'C': 0.59, 'G': 0.03, 'T': 0.04}, - {'A': 0.32, 'C': 0.15, 'G': 0.07, 'T': 0.46}, - {'A': 0.07, 'C': 0.03, 'G': 0.59, 'T': 0.31}] +qSeq = [ + {"A": 0.27, "C": 0.34, "G": 0.07, "T": 0.32}, + {"A": 0.24, "C": 0.32, "G": 0.09, "T": 0.35}, + {"A": 0.80, "C": 0.11, "G": 0.03, "T": 0.06}, + {"A": 0.07, "C": 0.22, "G": 0.37, "T": 0.34}, + {"A": 0.07, "C": 0.44, "G": 0.03, "T": 0.46}, + {"A": 0.43, "C": 0.04, "G": 0.18, "T": 0.35}, + {"A": 0.84, "C": 0.14, "G": 0.01, "T": 0.01}, + {"A": 0.31, "C": 0.52, "G": 0.13, "T": 0.04}, + {"A": 0.22, "C": 0.22, "G": 0.45, "T": 0.11}, + {"A": 0.36, "C": 0.15, "G": 0.42, "T": 0.07}, + {"A": 0.11, "C": 0.78, "G": 0.07, "T": 0.04}, + {"A": 0.07, "C": 0.16, "G": 0.64, "T": 0.13}, + {"A": 0.34, "C": 0.59, "G": 0.03, "T": 0.04}, + {"A": 0.32, "C": 0.15, "G": 0.07, "T": 0.46}, + {"A": 0.07, "C": 0.03, "G": 0.59, "T": 0.31}, +] qScoresExpected = "4.1106 0.7810" -class PWMTestCase (unittest.TestCase): - +class PWMTestCase(unittest.TestCase): def testReader(self): # test basic format: i.e. for jaspar - wms = [wm for wm in pwm.Reader(StringIO(basicPwm), format="basic", background=background, score_correction=False)] + wms = [ + wm for wm in pwm.Reader(StringIO(basicPwm), format="basic", background=background, score_correction=False) + ] assert len(wms) == 1 # test transfac format - wms = [wm for wm in pwm.Reader(StringIO(transfacPwm), format="transfac", background=background, score_correction=False)] + wms = [ + wm + for wm in pwm.Reader( + StringIO(transfacPwm), format="transfac", background=background, score_correction=False + ) + ] assert len(wms) == 1 wm = wms[0] diff --git a/lib/bx/seq/_nib.pyx b/lib/bx/seq/_nib.pyx index e848dab0..aa2c7488 100644 --- a/lib/bx/seq/_nib.pyx +++ b/lib/bx/seq/_nib.pyx @@ -1,10 +1,13 @@ from cpython.version cimport PY_MAJOR_VERSION + cdef extern from "Python.h": char * PyBytes_AsString( object ) object PyBytes_FromStringAndSize( char *, Py_ssize_t ) -import struct, sys +import struct +import sys + cdef char * NIB_I2C_TABLE cdef char * NIB_I2C_TABLE_FIRST diff --git a/lib/bx/seq/_twobit.pyx b/lib/bx/seq/_twobit.pyx index ae665069..348631f9 100644 --- a/lib/bx/seq/_twobit.pyx +++ b/lib/bx/seq/_twobit.pyx @@ -1,5 +1,6 @@ from cpython.version cimport PY_MAJOR_VERSION + cdef extern from "Python.h": char * PyBytes_AsString( object ) object PyBytes_FromStringAndSize( char *, Py_ssize_t ) @@ -10,10 +11,11 @@ cdef extern from "ctype.h": cdef extern from "string.h": void * memset( void *, int, size_t ) -import struct, sys - +import struct +import sys from bisect import bisect + cdef char* valToNt valToNt = "TCAG" diff --git a/lib/bx/seq/core.py b/lib/bx/seq/core.py index 3be97ed1..09ee8994 100644 --- a/lib/bx/seq/core.py +++ b/lib/bx/seq/core.py @@ -8,14 +8,20 @@ import struct -from . import fasta, nib, qdna +from . import ( + fasta, + nib, + qdna, +) # DNA reverse complement table -DNA_COMP = " - " \ - " TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr " \ - " " \ - " " +DNA_COMP = ( + " - " + " TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr " + " " + " " +) def reverse_complement(text): @@ -23,18 +29,18 @@ def reverse_complement(text): def seq_file(file, format=None, revcomp=False, name="", gap=None, contig=None): - if (format is None): + if format is None: format = infer_format(file) if (contig is not None) and (format not in ["fasta", None]): raise ValueError("Contigs are not supported for format %s" % format) - if (format == "fasta"): + if format == "fasta": return fasta.FastaFile(file, revcomp=revcomp, name=name, gap=gap, contig=contig) - elif (format == "nib"): + elif format == "nib": return nib.NibFile(file, revcomp=revcomp, name=name, gap=gap) - elif (format == "qdna"): + elif format == "qdna": return qdna.QdnaFile(file, revcomp=revcomp, name=name, gap=gap) else: - if (format is None): + if format is None: format = "" else: format = " " + format @@ -42,24 +48,24 @@ def seq_file(file, format=None, revcomp=False, name="", gap=None, contig=None): def seq_reader(file, format=None, revcomp=False, name="", gap=None): - if (format is None): + if format is None: format = infer_format(file) - if (format == "fasta"): + if format == "fasta": return fasta.FastaReader(file, revcomp=revcomp, name=name, gap=gap) - elif (format == "nib"): + elif format == "nib": return nib.NibReader(file, revcomp=revcomp, name=name, gap=gap) - elif (format == "qdna"): + elif format == "qdna": return qdna.QdnaReader(file, revcomp=revcomp, name=name, gap=gap) else: raise ValueError("Unknown sequence format %s" % format) def seq_writer(outfile, format=None, name=""): - if (format == "fasta"): + if format == "fasta": return fasta.FastaWriter(outfile) - elif (format == "nib"): + elif format == "nib": return nib.NibWriter(outfile) - elif (format == "qdna"): + elif format == "qdna": return qdna.QdnaWriter(outfile) else: raise ValueError("Unknown sequence format %s" % format) @@ -74,7 +80,7 @@ def infer_format(file): format = "qdna" else: file.seek(0) - if (file.read(1) == b">"): + if file.read(1) == b">": format = "fasta" file.seek(0) return format diff --git a/lib/bx/seq/fasta.py b/lib/bx/seq/fasta.py index 2b7c8ab1..c8133781 100644 --- a/lib/bx/seq/fasta.py +++ b/lib/bx/seq/fasta.py @@ -31,35 +31,37 @@ """ -from bx.seq.seq import SeqFile, SeqReader +from bx.seq.seq import ( + SeqFile, + SeqReader, +) class FastaFile(SeqFile): - def __init__(self, file, revcomp=False, name="", gap=None, lookahead=None, contig=None): SeqFile.__init__(self, file, revcomp, name, gap) self.lookahead = lookahead - if (contig is None): + if contig is None: contig = 1 - assert (contig >= 1), "contig %d is not legal" % contig + assert contig >= 1, "contig %d is not legal" % contig # nota bene: certainly not the most efficient or elegant implementation currContig = 1 - while (True): - if (self.lookahead is not None): + while True: + if self.lookahead is not None: (line, self.lookahead) = (self.lookahead, None) else: line = self.file.readline() if not isinstance(line, str): line = line.decode() - if (line == ""): + if line == "": break if not line: break - if (line.startswith(">")): - if (self.text is not None): - if (currContig == contig): + if line.startswith(">"): + if self.text is not None: + if currContig == contig: self.lookahead = line # (next sequence header) break currContig += 1 @@ -67,26 +69,24 @@ def __init__(self, file, revcomp=False, name="", gap=None, lookahead=None, conti self.text = [] continue line = line.split() # (remove whitespace) - if (self.text is None): + if self.text is None: self.text = line # (allows headerless fasta) else: self.text.extend(line) - assert (currContig == contig), \ - "contig %d is not legal (file contains only %d)" % (contig, currContig) - if (self.text is not None): + assert currContig == contig, "contig %d is not legal (file contains only %d)" % (contig, currContig) + if self.text is not None: self.text = "".join(self.text) self.length = len(self.text) class FastaReader(SeqReader): - def __init__(self, file, revcomp=False, name="", gap=None): SeqReader.__init__(self, file, revcomp, name, gap) self.lookahead = None def __next__(self): seq = FastaFile(self.file, self.revcomp, self.name, self.gap, self.lookahead) - if (seq.text is None): + if seq.text is None: return self.lookahead = seq.lookahead self.seqs_read += 1 @@ -94,7 +94,6 @@ def __next__(self): class FastaWriter: - def __init__(self, file, columns=50): self.file = file self.columns = columns @@ -103,11 +102,10 @@ def write(self, seq): print(">%s" % seq.name, file=self.file) text = seq.text if (self.columns is not None) and (self.columns > 0): - text = "\n".join([text[ix:ix+self.columns] - for ix in range(0, len(text), self.columns)]) + text = "\n".join(text[ix : ix + self.columns] for ix in range(0, len(text), self.columns)) print(text, file=self.file) def close(self): - assert (self.file is not None) + assert self.file is not None self.file.close() self.file = None diff --git a/lib/bx/seq/fasta_tests.py b/lib/bx/seq/fasta_tests.py index 4879085e..df4105bf 100644 --- a/lib/bx/seq/fasta_tests.py +++ b/lib/bx/seq/fasta_tests.py @@ -10,20 +10,21 @@ # Same sequence data as stored in test.fa -valid_seq = "TGGAGGCATTTGTGATTCAATAGATGCAGAAAGAAACCTTCCTAGAGCTG" \ - + "GCGTTCTCTAACTAAAAGTGGAAAGTTCTGAGGAATGAGGACTGTTATAA" \ - + "ATCCCACCCCACACCGCACCTTCTCCAGGGAAGTTTCATGGCCGTGAAGA" \ - + "GGACAGAAAGTGAGAACCAAGATggaactgaataaacaagcttcacactg" \ - + "ttagtttccccatatgcttaccttcccacagatgccaaccttggaggcct" \ - + "aagaggcctagaatattatcctttgtctgatcatttctctacaaatttat" \ - + "tgttctttgttaagatgctacataagcccaaattctaaccacccctttga" \ +valid_seq = ( + "TGGAGGCATTTGTGATTCAATAGATGCAGAAAGAAACCTTCCTAGAGCTG" + + "GCGTTCTCTAACTAAAAGTGGAAAGTTCTGAGGAATGAGGACTGTTATAA" + + "ATCCCACCCCACACCGCACCTTCTCCAGGGAAGTTTCATGGCCGTGAAGA" + + "GGACAGAAAGTGAGAACCAAGATggaactgaataaacaagcttcacactg" + + "ttagtttccccatatgcttaccttcccacagatgccaaccttggaggcct" + + "aagaggcctagaatattatcctttgtctgatcatttctctacaaatttat" + + "tgttctttgttaagatgctacataagcccaaattctaaccacccctttga" + "gttacccatcatcaagtttctcccatgtg" +) valid_seq_len = len(valid_seq) class FASTATestCase(unittest.TestCase): - def test_get(self): fastafile = fasta.FastaFile(open(test_fa, "rb")) check_get(fastafile, 0, valid_seq_len) @@ -32,4 +33,4 @@ def test_get(self): def check_get(fastafile, start, len): - assert fastafile.get(start, len) == valid_seq[start:start+len] + assert fastafile.get(start, len) == valid_seq[start : start + len] diff --git a/lib/bx/seq/nib.py b/lib/bx/seq/nib.py index 024200e8..95ddd5ef 100644 --- a/lib/bx/seq/nib.py +++ b/lib/bx/seq/nib.py @@ -23,7 +23,10 @@ import math import struct -from bx.seq.seq import SeqFile, SeqReader +from bx.seq.seq import ( + SeqFile, + SeqReader, +) from . import _nib NIB_MAGIC_NUMBER = 0x6BE93D3A @@ -33,13 +36,12 @@ class NibFile(SeqFile): - def __init__(self, file, revcomp=False, name="", gap=None): SeqFile.__init__(self, file, revcomp, name, gap) self.byte_order = ">" magic = struct.unpack(">L", file.read(NIB_MAGIC_SIZE))[0] - if (magic != NIB_MAGIC_NUMBER): + if magic != NIB_MAGIC_NUMBER: if magic == NIB_MAGIC_NUMBER_SWAP: self.byte_order = "<" else: @@ -63,12 +65,11 @@ def raw_fetch(self, start, length): class NibReader(SeqReader): - def __init__(self, file, revcomp=False, name="", gap=None): SeqReader.__init__(self, file, revcomp, name, gap) def __next__(self): - if (self.seqs_read != 0): + if self.seqs_read != 0: return # nib files have just one sequence seq = NibFile(self.file, self.revcomp, self.name, self.gap) self.seqs_read += 1 @@ -76,12 +77,11 @@ def __next__(self): class NibWriter: - def __init__(self, file): self.file = file def write(self, seq): - assert (False), "NibWriter.write() is not implemented yet" + assert False, "NibWriter.write() is not implemented yet" def close(self): self.file.close() diff --git a/lib/bx/seq/nib_tests.py b/lib/bx/seq/nib_tests.py index f179f009..f54ac988 100644 --- a/lib/bx/seq/nib_tests.py +++ b/lib/bx/seq/nib_tests.py @@ -10,22 +10,23 @@ # Same sequence data as stored in test.nib -valid_seq = "TGGAGGCATTTGTGATTCAATAGATGCAGAAAGAAACCTTCCTAGAGCTG" \ - + "GCGTTCTCTAACTAAAAGTGGAAAGTTCTGAGGAATGAGGACTGTTATAA" \ - + "ATCCCACCCCACACCGCACCTTCTCCAGGGAAGTTTCATGGCCGTGAAGA" \ - + "GGACAGAAAGTGAGAACCAAGATggaactgaataaacaagcttcacactg" \ - + "ttagtttccccatatgcttaccttcccacagatgccaaccttggaggcct" \ - + "aagaggcctagaatattatcctttgtctgatcatttctctacaaatttat" \ - + "tgttctttgttaagatgctacataagcccaaattctaaccacccctttga" \ +valid_seq = ( + "TGGAGGCATTTGTGATTCAATAGATGCAGAAAGAAACCTTCCTAGAGCTG" + + "GCGTTCTCTAACTAAAAGTGGAAAGTTCTGAGGAATGAGGACTGTTATAA" + + "ATCCCACCCCACACCGCACCTTCTCCAGGGAAGTTTCATGGCCGTGAAGA" + + "GGACAGAAAGTGAGAACCAAGATggaactgaataaacaagcttcacactg" + + "ttagtttccccatatgcttaccttcccacagatgccaaccttggaggcct" + + "aagaggcctagaatattatcctttgtctgatcatttctctacaaatttat" + + "tgttctttgttaagatgctacataagcccaaattctaaccacccctttga" + "gttacccatcatcaagtttctcccatgtg" +) valid_seq_len = len(valid_seq) class NIBTestCase(unittest.TestCase): - def test_get(self): - nibfile = nib.NibFile(open(test_nib, 'rb')) + nibfile = nib.NibFile(open(test_nib, "rb")) # Try all combinations of even / odd boundaries check_get(nibfile, 0, 10) check_get(nibfile, 1, 10) @@ -44,4 +45,4 @@ def test_get(self): def check_get(nibfile, start, len): - assert nibfile.get(start, len) == valid_seq[start:start+len] + assert nibfile.get(start, len) == valid_seq[start : start + len] diff --git a/lib/bx/seq/qdna.py b/lib/bx/seq/qdna.py index 933ccde1..aa9dbad9 100644 --- a/lib/bx/seq/qdna.py +++ b/lib/bx/seq/qdna.py @@ -42,14 +42,16 @@ import struct from io import StringIO -from bx.seq.seq import SeqFile, SeqReader +from bx.seq.seq import ( + SeqFile, + SeqReader, +) -qdnaMagic = 0xC4B47197 # big endian magic number for qdna files +qdnaMagic = 0xC4B47197 # big endian magic number for qdna files qdnaMagicSwap = 0x9771B4C4 class QdnaFile(SeqFile): - def __init__(self, file, revcomp=False, name="", gap=None, codebook=None): SeqFile.__init__(self, file, revcomp, name, gap) if gap is None: @@ -69,13 +71,11 @@ def __init__(self, file, revcomp=False, name="", gap=None, codebook=None): # process header - self.version = struct.unpack("%sL" % self.byte_order, - self.file.read(4))[0] + self.version = struct.unpack("%sL" % self.byte_order, self.file.read(4))[0] if self.version not in [0x100, 0x200]: raise ValueError("unsupported quantum-dna (version=%08X)" % self.version) - self.headerLength = struct.unpack("%sL" % self.byte_order, - self.file.read(4))[0] + self.headerLength = struct.unpack("%sL" % self.byte_order, self.file.read(4))[0] if self.headerLength < 0x10: raise ValueError("unsupported quantum-dna (header len=%08X)" % self.headerLength) if self.version == 0x100 and self.headerLength != 0x10: @@ -87,8 +87,7 @@ def __init__(self, file, revcomp=False, name="", gap=None, codebook=None): self.propOffset = 0 if self.headerLength >= 0x14: - self.propOffset = struct.unpack("%sL" % self.byte_order, - self.file.read(4))[0] + self.propOffset = struct.unpack("%sL" % self.byte_order, self.file.read(4))[0] self.name = "" if self.nameOffset != 0: @@ -97,7 +96,7 @@ def __init__(self, file, revcomp=False, name="", gap=None, codebook=None): if self.propOffset != 0: self.file.seek(self.propOffset) - while (True): + while True: name = self.read_string() if len(name) == 0: break @@ -114,7 +113,7 @@ def set_property(self, name, value): def read_string(self): s = b"" - while (True): + while True: ch = self.file.read(1) if ch == b"\0": break @@ -133,7 +132,6 @@ def get_quantum(self, start, length): class QdnaReader(SeqReader): - def __init__(self, file, revcomp=False, name="", gap=None, codebook=None): SeqReader.__init__(self, file, revcomp, name, gap) self.codebook = codebook @@ -169,7 +167,6 @@ def __next__(self): class QdnaCodebook: - def __init__(self, file): (self.alphabet, self.codeToProbs) = self.read_codebook(file) @@ -185,8 +182,7 @@ def vector_text(self, codeNum): for sym in self.alphabet: if sym not in vec: vec[sym] = 0.0 - return ("%02X\t" % ord(codeNum)) \ - + "\t".join(["%.6f" % vec[sym] for sym in self.alphabet]) + return ("%02X\t" % ord(codeNum)) + "\t".join(["%.6f" % vec[sym] for sym in self.alphabet]) def __getitem__(self, codeNum): return self.codeToProbs[codeNum] @@ -226,7 +222,7 @@ def read_codebook(self, codeF): p = float(fields[ix]) if p < 0 or p > 1: raise ValueError - vec[alphabet[ix-1]] = p + vec[alphabet[ix - 1]] = p except Exception: raise ValueError("%s is a bad probability value (line %d)" % (fields[ix], lineNum)) @@ -236,7 +232,6 @@ def read_codebook(self, codeF): class QdnaWriter: - def __init__(self, file): self.file = file diff --git a/lib/bx/seq/qdna_tests.py b/lib/bx/seq/qdna_tests.py index 649dc0e5..9c67bb55 100644 --- a/lib/bx/seq/qdna_tests.py +++ b/lib/bx/seq/qdna_tests.py @@ -10,20 +10,21 @@ # Same sequence data as stored in test.qdna -valid_seq = "C7wMwHQrMKqEtSREuUv5nsLinpTS8l7jXpbI7IipvCbHnhOdgx" \ - + "5tzRgzYl4j85d:xSlvKPEKEIvZkfiX1YPkBi1Ibhfn9fTZd8gG" \ - + "Wy284hJnwf93W4eHOjeRk7LuVYmH{UTYkYM:b4J4MruMq1ihhv" \ - + "1Yl5W[xXEmi8[JuuLRgooBpy23PllMuUiIiKVIK5vzhjPPYp5Y" \ - + "1eqPxo[e5I24KeCdTV94MZWNybUb:McC:1n4Jczk8JqnR4q1gY" \ - + "HjLS4Bes3s5YvvWdKzS4VrFZy2erhd7YoWRoS[UK8JtSp1{Z1o" \ - + "5:TpvN8mrmWrghiNw{S6nT8DSfF{1ff6kNGpI:FsZE2RgipTVO" \ +valid_seq = ( + "C7wMwHQrMKqEtSREuUv5nsLinpTS8l7jXpbI7IipvCbHnhOdgx" + + "5tzRgzYl4j85d:xSlvKPEKEIvZkfiX1YPkBi1Ibhfn9fTZd8gG" + + "Wy284hJnwf93W4eHOjeRk7LuVYmH{UTYkYM:b4J4MruMq1ihhv" + + "1Yl5W[xXEmi8[JuuLRgooBpy23PllMuUiIiKVIK5vzhjPPYp5Y" + + "1eqPxo[e5I24KeCdTV94MZWNybUb:McC:1n4Jczk8JqnR4q1gY" + + "HjLS4Bes3s5YvvWdKzS4VrFZy2erhd7YoWRoS[UK8JtSp1{Z1o" + + "5:TpvN8mrmWrghiNw{S6nT8DSfF{1ff6kNGpI:FsZE2RgipTVO" + "mJN6vPm8MUgNYd7MDBEu37YOPzPjO1dr" +) valid_seq_len = len(valid_seq) class QDNATestCase(unittest.TestCase): - def test_get(self): qdnafile = qdna.QdnaFile(open(test_qdna, "rb")) check_get(qdnafile, 0, valid_seq_len) @@ -32,4 +33,4 @@ def test_get(self): def check_get(qdnafile, start, len): - assert qdnafile.get(start, len) == valid_seq[start:start+len] + assert qdnafile.get(start, len) == valid_seq[start : start + len] diff --git a/lib/bx/seq/seq.py b/lib/bx/seq/seq.py index 2a948218..34365952 100644 --- a/lib/bx/seq/seq.py +++ b/lib/bx/seq/seq.py @@ -6,10 +6,12 @@ # DNA reverse complement table -DNA_COMP = " - " \ - " TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr " \ - " " \ - " " +DNA_COMP = ( + " - " + " TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr " + " " + " " +) class SeqFile: @@ -57,7 +59,7 @@ def __init__(self, file=None, revcomp=False, name="", gap=None): self.length = 0 # length or they most override get()) def close(self): - assert (self.file is not None) + assert self.file is not None self.file.close() self.file = None @@ -89,19 +91,20 @@ def get(self, start, length): # Check parameters assert length >= 0, "Length must be non-negative (got %d)" % length assert start >= 0, "Start must be greater than 0 (got %d)" % start - assert start + length <= self.length, \ - f"Interval beyond end of sequence ({start}..{start + length} > {self.length})" + assert ( + start + length <= self.length + ), f"Interval beyond end of sequence ({start}..{start + length} > {self.length})" # Fetch sequence and reverse complement if necesary if not self.revcomp: return self.raw_fetch(start, length) if self.revcomp == "-3'": return self.reverse_complement(self.raw_fetch(start, length)) assert self.revcomp == "-5'", "unrecognized reverse complement scheme" - start = self.length - (start+length) + start = self.length - (start + length) return self.reverse_complement(self.raw_fetch(start, length)) def raw_fetch(self, start, length): - return self.text[start:start+length] + return self.text[start : start + length] def reverse_complement(self, text): comp = [ch for ch in text.translate(DNA_COMP)] @@ -125,8 +128,10 @@ def close(self): def __iter__(self): return SeqReaderIter(self) - def __next__(self): # subclasses should override this method and return the - return # .. next sequence (of type SeqFile or a subclass) read from self.file + def __next__(self): + # subclasses should override this method and return the next sequence + # (of type SeqFile or a subclass) read from self.file + return class SeqReaderIter: diff --git a/lib/bx/seq/seq_tests.py b/lib/bx/seq/seq_tests.py index 126d8218..cc71e311 100644 --- a/lib/bx/seq/seq_tests.py +++ b/lib/bx/seq/seq_tests.py @@ -5,7 +5,11 @@ import unittest import bx.seq -from bx.seq import fasta_tests, nib_tests, qdna_tests +from bx.seq import ( + fasta_tests, + nib_tests, + qdna_tests, +) test_fa = "test_data/seq_tests/test.fa" test2_fa = "test_data/seq_tests/test2.fa" @@ -18,13 +22,14 @@ # Same sequences as stored in test2.fa -valid2_fa = [("apple", "GGCGCTGCGATAAGGTTGCGACAACACGGACCTTCTTTTGCCTACCTCTGTTCTTGGCACG"), - ("orange", "CGTGCCGAGAACAGAAAATACGCCGGGCGGTGCAGTAGTATCTTGGTATCCGATATGCAGG"), - ("grapefruit", "CCTGCATATCGACTAGTACACCCTCCCGAGGTACCCCACCCATCCCTCTTTTCTCGGCGCG")] +valid2_fa = [ + ("apple", "GGCGCTGCGATAAGGTTGCGACAACACGGACCTTCTTTTGCCTACCTCTGTTCTTGGCACG"), + ("orange", "CGTGCCGAGAACAGAAAATACGCCGGGCGGTGCAGTAGTATCTTGGTATCCGATATGCAGG"), + ("grapefruit", "CCTGCATATCGACTAGTACACCCTCCCGAGGTACCCCACCCATCCCTCTTTTCTCGGCGCG"), +] -class SEQTestCase (unittest.TestCase): - +class SEQTestCase(unittest.TestCase): def test_get_fasta(self): fastafile = bx.seq.seq_file(open(test_fa, "rb")) check_get(fastafile, valid_fasta, 3, 40) @@ -40,13 +45,17 @@ def test_get_qdna(self): def test_get_reader(self): reader = bx.seq.seq_reader(open(test2_fa, "rb")) for (ix, seq) in enumerate(reader): - assert (ix < len(valid2_fa)), "FastaReader returns too many sequences" + assert ix < len(valid2_fa), "FastaReader returns too many sequences" text = "%s" % seq fields = text.split() - assert (len(fields) == 2), "SeqReader.__str__ returns incorrect sequence string \"%s\" (%d)" % text - assert (fields[0] == valid2_fa[ix][0]), f"FastaReader returned the wrong name ({fields[0]},{valid2_fa[ix][0]})" - assert (fields[1] == valid2_fa[ix][1]), f"FastaReader returned the wrong text ({fields[1]},{valid2_fa[ix][1]})" + assert len(fields) == 2, 'SeqReader.__str__ returns incorrect sequence string "%s" (%d)' % text + assert ( + fields[0] == valid2_fa[ix][0] + ), f"FastaReader returned the wrong name ({fields[0]},{valid2_fa[ix][0]})" + assert ( + fields[1] == valid2_fa[ix][1] + ), f"FastaReader returned the wrong text ({fields[1]},{valid2_fa[ix][1]})" def check_get(seqfile, valid_seq, start, len): - assert seqfile.get(start, len) == valid_seq[start:start+len] + assert seqfile.get(start, len) == valid_seq[start : start + len] diff --git a/lib/bx/seq/twobit.py b/lib/bx/seq/twobit.py index b558c6f1..77a7d34d 100644 --- a/lib/bx/seq/twobit.py +++ b/lib/bx/seq/twobit.py @@ -2,7 +2,10 @@ Access to files containing sequence data in 'twobit' format. """ from collections.abc import Mapping -from struct import calcsize, unpack +from struct import ( + calcsize, + unpack, +) from typing import ( BinaryIO, Dict, @@ -124,8 +127,7 @@ def read_block_coords(self) -> Tuple[list, list]: return list(starts), list(sizes) def read(self, pattern: str, untuple: bool = True): - rval = unpack(self.byte_order + pattern, - self.file.read(calcsize(self.byte_order + pattern))) + rval = unpack(self.byte_order + pattern, self.file.read(calcsize(self.byte_order + pattern))) if untuple and len(rval) == 1: return rval[0] return rval diff --git a/lib/bx/seq/twobit_tests.py b/lib/bx/seq/twobit_tests.py index ab94c436..3f63e17b 100644 --- a/lib/bx/seq/twobit_tests.py +++ b/lib/bx/seq/twobit_tests.py @@ -33,15 +33,20 @@ def test_random_subseq_matches(filename): for h, s in quick_fasta_iter(f): expected[h] = s # Open 2bit - with open(test_twobit, 'rb') as f: + with open(test_twobit, "rb") as f: t = twobit.TwoBitFile(f) for k, s in expected.items(): assert k in t.index # assert t.index[k].size == len(s) length = len(s) for _ in range(100): - start = random.randint(0, length-2) - end = random.randint(start+1, length) + start = random.randint(0, length - 2) + end = random.randint(start + 1, length) assert t[k].get(start, end) == s[start:end] - assert t[k][start:end] == s[start:end], \ - "seq: %s, start: %d, end: %d\nExpected:\n%s\nActual:\n%s\n" % (k, start, end, s[start:end], t.get(k, start, end)) + assert t[k][start:end] == s[start:end], "seq: %s, start: %d, end: %d\nExpected:\n%s\nActual:\n%s\n" % ( + k, + start, + end, + s[start:end], + t.get(k, start, end), + ) diff --git a/lib/bx/seqmapping_tests.py b/lib/bx/seqmapping_tests.py index 8c9ddfee..879c4f65 100644 --- a/lib/bx/seqmapping_tests.py +++ b/lib/bx/seqmapping_tests.py @@ -16,20 +16,18 @@ class CharMappingTests(unittest.TestCase): __test__ = False def test_DNA(self): - assert (allclose( - bx.seqmapping.DNA.translate("ACGTacgt-?X"), - [0, 1, 2, 3, 0, 1, 2, 3, 4, -1, -1])) + assert allclose(bx.seqmapping.DNA.translate("ACGTacgt-?X"), [0, 1, 2, 3, 0, 1, 2, 3, 4, -1, -1]) def test_DNA_list(self): - assert (allclose( - bx.seqmapping.DNA.translate_list(["ACGTA", "TGCAX"]), - [0 + 3*6, 1 + 2*6, 2 + 1*6, 3 + 0*6, -1])) + assert allclose( + bx.seqmapping.DNA.translate_list(["ACGTA", "TGCAX"]), [0 + 3 * 6, 1 + 2 * 6, 2 + 1 * 6, 3 + 0 * 6, -1] + ) def test_other(self): m = bx.seqmapping.CharToIntArrayMapping() m.set_mapping("A", 0) m.set_mapping("B", 7) - assert (allclose(m.translate("ABCCBA"), [0, 7, -1, -1, 7, 0])) + assert allclose(m.translate("ABCCBA"), [0, 7, -1, -1, 7, 0]) class IntMappingTests(unittest.TestCase): @@ -41,7 +39,7 @@ def test_simple(self): m.set_mapping(2, 0) m.set_mapping(1, 1) m.set_mapping(3, 1) - assert (allclose(m.translate(array([0, 1, 2, 3, 4], 'i')), array([0, 1, 0, 1, -1]))) + assert allclose(m.translate(array([0, 1, 2, 3, 4], "i")), array([0, 1, 0, 1, -1])) eight_species_mapping = """TTTTTTTT 0 @@ -77,14 +75,16 @@ def test_simple(self): ------G- 2 """ -rows = ["AAATTGT-----ATGTCCATCCTTTAAAGGTCATTCCTTTAATGGTCTTTTCTGGACACCACTAGGGGTCAGAAGTAGTTCATCAAAC-----------------TTTCTTCCCTCCC-TACTTCAGTG", - "AAATTGT-----ATGTCCATCCTTTAAAGGTCATTCCTTTAATGGTCTTTTCTGGACACCACTAGGGGTCAGAAGTAGTTCATCAAAC-----------------TTTCTTCCCTCCC-TACTTCAGTG", - "AAATTTT-----ATGTCTATCCTTTAAAGGTCATTCCTCTAATAGTCTTTTCTGGACACCACTAGGGGTCAGAAGTAGTTCATTAAAC-----------------TTTCTTCCCTCCC-TACCTCAGTG", - "AAACTGT-----ATCACCACCTTTTTAAGGTCATTTCTCTAATGATCCTGTT-GCATACCAGTAGGGGGCAGAAGTGTTCCGCTGATTTCCGCCCTCCTCCCCACCCCCCCACCCCCC-TTATTCAAAG", - "*********************************************************************************************************************************", - "-TATTAT-----ATGGCCATGTTCAAAAGGTTGTTTCTCTAATGATTCCTTC-TGATACCAGTAGGGGTCAGAAGTGGTCCATTGATT---------------------CTTTTCCTC-TGATTC-AAG", - "AAATTGA--AAGATCTCACTCTTTGCCAGGTAGTCCATCTAAGGGTCACATATGGATACCAGCAGGGCCT-GAAGAAGCCCATTGAAT------------------------TTTCCC-ATCTTCAAGG", - "AAATTCATGATAGTGTCACTCTTAAATAGATGATTC--------TTCACAT---GATGCCAGCAGGGGGC-AGAGCAGGCTGTGAAAT------------------------TTTCCCTTTCTTCAAAG"] +rows = [ + "AAATTGT-----ATGTCCATCCTTTAAAGGTCATTCCTTTAATGGTCTTTTCTGGACACCACTAGGGGTCAGAAGTAGTTCATCAAAC-----------------TTTCTTCCCTCCC-TACTTCAGTG", + "AAATTGT-----ATGTCCATCCTTTAAAGGTCATTCCTTTAATGGTCTTTTCTGGACACCACTAGGGGTCAGAAGTAGTTCATCAAAC-----------------TTTCTTCCCTCCC-TACTTCAGTG", + "AAATTTT-----ATGTCTATCCTTTAAAGGTCATTCCTCTAATAGTCTTTTCTGGACACCACTAGGGGTCAGAAGTAGTTCATTAAAC-----------------TTTCTTCCCTCCC-TACCTCAGTG", + "AAACTGT-----ATCACCACCTTTTTAAGGTCATTTCTCTAATGATCCTGTT-GCATACCAGTAGGGGGCAGAAGTGTTCCGCTGATTTCCGCCCTCCTCCCCACCCCCCCACCCCCC-TTATTCAAAG", + "*********************************************************************************************************************************", + "-TATTAT-----ATGGCCATGTTCAAAAGGTTGTTTCTCTAATGATTCCTTC-TGATACCAGTAGGGGTCAGAAGTGGTCCATTGATT---------------------CTTTTCCTC-TGATTC-AAG", + "AAATTGA--AAGATCTCACTCTTTGCCAGGTAGTCCATCTAAGGGTCACATATGGATACCAGCAGGGCCT-GAAGAAGCCCATTGAAT------------------------TTTCCC-ATCTTCAAGG", + "AAATTCATGATAGTGTCACTCTTAAATAGATGATTC--------TTCACAT---GATGCCAGCAGGGGGC-AGAGCAGGCTGTGAAAT------------------------TTTCCCTTTCTTCAAAG", +] class AlignmentMappingTests(unittest.TestCase): diff --git a/lib/bx/tabular/io.py b/lib/bx/tabular/io.py index 4a846f44..63d8e517 100644 --- a/lib/bx/tabular/io.py +++ b/lib/bx/tabular/io.py @@ -87,7 +87,9 @@ class TableReader: Reader for iterating tabular data """ - def __init__(self, input, return_header=True, return_comments=True, force_header=None, comment_lines_startswith=["#"]): + def __init__( + self, input, return_header=True, return_comments=True, force_header=None, comment_lines_startswith=["#"] + ): self.input = input self.return_comments = return_comments self.return_header = return_header @@ -105,7 +107,7 @@ def __next__(self): line = line.rstrip("\r\n") # Catch blank lines (throw a warning?) # This will end up adding a '#' at the beginning of blank lines - if line == '': + if line == "": if self.return_comments: return Comment(line) else: diff --git a/lib/bx/wiggle.py b/lib/bx/wiggle.py index ca4454dd..101c3edf 100644 --- a/lib/bx/wiggle.py +++ b/lib/bx/wiggle.py @@ -10,7 +10,7 @@ def parse_header(line): - return dict([field.split('=') for field in line.split()[1:]]) + return dict([field.split("=") for field in line.split()[1:]]) def IntervalReader(f): @@ -24,7 +24,7 @@ def IntervalReader(f): current_step = None # always for wiggle data - strand = '+' + strand = "+" mode = "bed" @@ -33,21 +33,21 @@ def IntervalReader(f): continue elif line.startswith("variableStep"): header = parse_header(line) - current_chrom = header['chrom'] + current_chrom = header["chrom"] current_pos = None current_step = None - if 'span' in header: - current_span = int(header['span']) + if "span" in header: + current_span = int(header["span"]) else: current_span = 1 mode = "variableStep" elif line.startswith("fixedStep"): header = parse_header(line) - current_chrom = header['chrom'] - current_pos = int(header['start']) - 1 - current_step = int(header['step']) - if 'span' in header: - current_span = int(header['span']) + current_chrom = header["chrom"] + current_pos = int(header["start"]) - 1 + current_step = int(header["step"]) + if "span" in header: + current_span = int(header["span"]) else: current_span = 1 mode = "fixedStep" diff --git a/lib/bx/wiggle_tests.py b/lib/bx/wiggle_tests.py index 6a3e0d60..67451775 100644 --- a/lib/bx/wiggle_tests.py +++ b/lib/bx/wiggle_tests.py @@ -45,7 +45,7 @@ "chr19,59304900,59304904,+,12.5", "chr19,59307400,59307403,+,1000.0", "chr19,59307700,59307703,+,900.0", - "chr19,59308000,59308003,+,800.0" + "chr19,59308000,59308003,+,800.0", ] position_reader_result = [ @@ -75,7 +75,7 @@ "chr19,59307702,900.0", "chr19,59308000,800.0", "chr19,59308001,800.0", - "chr19,59308002,800.0" + "chr19,59308002,800.0", ] @@ -86,8 +86,10 @@ def test_reader(self): def test_interval_reader(self): # Test interval reader reader - assert interval_reader_result == [",".join(map(str, value)) for value in wiggle.IntervalReader(StringIO(test_wig))] + assert interval_reader_result == [ + ",".join(map(str, value)) for value in wiggle.IntervalReader(StringIO(test_wig)) + ] -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/lib/bx_extras/fpconst.py b/lib/bx_extras/fpconst.py index 1f56b3ce..7b52187c 100644 --- a/lib/bx_extras/fpconst.py +++ b/lib/bx_extras/fpconst.py @@ -31,26 +31,27 @@ # check endianess -_big_endian = struct.pack('i', 1)[:1] != b'\x01' +_big_endian = struct.pack("i", 1)[:1] != b"\x01" # and define appropriate constants -if (_big_endian): - NaN = struct.unpack('d', b'\x7F\xF8\x00\x00\x00\x00\x00\x00')[0] - PosInf = struct.unpack('d', b'\x7F\xF0\x00\x00\x00\x00\x00\x00')[0] +if _big_endian: + NaN = struct.unpack("d", b"\x7F\xF8\x00\x00\x00\x00\x00\x00")[0] + PosInf = struct.unpack("d", b"\x7F\xF0\x00\x00\x00\x00\x00\x00")[0] NegInf = -PosInf else: - NaN = struct.unpack('d', b'\x00\x00\x00\x00\x00\x00\xf8\xff')[0] - PosInf = struct.unpack('d', b'\x00\x00\x00\x00\x00\x00\xf0\x7f')[0] + NaN = struct.unpack("d", b"\x00\x00\x00\x00\x00\x00\xf8\xff")[0] + PosInf = struct.unpack("d", b"\x00\x00\x00\x00\x00\x00\xf0\x7f")[0] NegInf = -PosInf def _double_as_bytes(dval): "Use struct.unpack to decode a double precision float into eight bytes" - tmp = list(struct.unpack('8B', struct.pack('d', dval))) + tmp = list(struct.unpack("8B", struct.pack("d", dval))) if not _big_endian: tmp.reverse() return tmp + ## # Functions to extract components of the IEEE 754 floating point format ## @@ -71,7 +72,7 @@ def _exponent(dval): by subtracting 1023 from the value returned by this function """ bb = _double_as_bytes(dval) - return (bb[0] << 4 | bb[1] >> 4) & 0x7ff + return (bb[0] << 4 | bb[1] >> 4) & 0x7FF def _mantissa(dval): @@ -79,7 +80,7 @@ def _mantissa(dval): point value.""" bb = _double_as_bytes(dval) - mantissa = bb[1] & 0x0f << 48 + mantissa = bb[1] & 0x0F << 48 mantissa += bb[2] << 40 mantissa += bb[3] << 32 mantissa += bb[4] @@ -90,7 +91,8 @@ def _zero_mantissa(dval): """Determine whether the mantissa bits of the given double are all zero.""" bb = _double_as_bytes(dval) - return ((bb[1] & 0x0f) | reduce(operator.or_, bb[2:])) == 0 + return ((bb[1] & 0x0F) | reduce(operator.or_, bb[2:])) == 0 + ## # Functions to test for IEEE 754 special values @@ -99,31 +101,30 @@ def _zero_mantissa(dval): def isNaN(value): "Determine if the argument is a IEEE 754 NaN (Not a Number) value." - return (_exponent(value) == 0x7ff and not _zero_mantissa(value)) + return _exponent(value) == 0x7FF and not _zero_mantissa(value) def isInf(value): """Determine if the argument is an infinite IEEE 754 value (positive or negative inifinity)""" - return (_exponent(value) == 0x7ff and _zero_mantissa(value)) + return _exponent(value) == 0x7FF and _zero_mantissa(value) def isFinite(value): """Determine if the argument is an finite IEEE 754 value (i.e., is not NaN, positive or negative inifinity)""" - return (_exponent(value) != 0x7ff) + return _exponent(value) != 0x7FF def isPosInf(value): "Determine if the argument is a IEEE 754 positive infinity value" - return (_sign(value) == 0 and _exponent(value) == 0x7ff - and _zero_mantissa(value)) + return _sign(value) == 0 and _exponent(value) == 0x7FF and _zero_mantissa(value) def isNegInf(value): "Determine if the argument is a IEEE 754 negative infinity value" - return (_sign(value) == 1 and _exponent(value) == 0x7ff - and _zero_mantissa(value)) + return _sign(value) == 1 and _exponent(value) == 0x7FF and _zero_mantissa(value) + ## # Functions to test public functions. @@ -131,43 +132,44 @@ def isNegInf(value): def test_isNaN(): - assert (not isNaN(PosInf)) - assert (not isNaN(NegInf)) - assert (isNaN(NaN)) - assert (not isNaN(1.0)) - assert (not isNaN(-1.0)) + assert not isNaN(PosInf) + assert not isNaN(NegInf) + assert isNaN(NaN) + assert not isNaN(1.0) + assert not isNaN(-1.0) def test_isInf(): - assert (isInf(PosInf)) - assert (isInf(NegInf)) - assert (not isInf(NaN)) - assert (not isInf(1.0)) - assert (not isInf(-1.0)) + assert isInf(PosInf) + assert isInf(NegInf) + assert not isInf(NaN) + assert not isInf(1.0) + assert not isInf(-1.0) def test_isFinite(): - assert (not isFinite(PosInf)) - assert (not isFinite(NegInf)) - assert (not isFinite(NaN)) - assert (isFinite(1.0)) - assert (isFinite(-1.0)) + assert not isFinite(PosInf) + assert not isFinite(NegInf) + assert not isFinite(NaN) + assert isFinite(1.0) + assert isFinite(-1.0) def test_isPosInf(): - assert (isPosInf(PosInf)) - assert (not isPosInf(NegInf)) - assert (not isPosInf(NaN)) - assert (not isPosInf(1.0)) - assert (not isPosInf(-1.0)) + assert isPosInf(PosInf) + assert not isPosInf(NegInf) + assert not isPosInf(NaN) + assert not isPosInf(1.0) + assert not isPosInf(-1.0) def test_isNegInf(): - assert (not isNegInf(PosInf)) - assert (isNegInf(NegInf)) - assert (not isNegInf(NaN)) - assert (not isNegInf(1.0)) - assert (not isNegInf(-1.0)) + assert not isNegInf(PosInf) + assert isNegInf(NegInf) + assert not isNegInf(NaN) + assert not isNegInf(1.0) + assert not isNegInf(-1.0) + # overall test diff --git a/lib/bx_extras/lrucache.py b/lib/bx_extras/lrucache.py index 24902378..c1eea95e 100644 --- a/lib/bx_extras/lrucache.py +++ b/lib/bx_extras/lrucache.py @@ -42,8 +42,8 @@ ) __version__ = "0.2" -__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE'] -__docformat__ = 'reStructuredText en' +__all__ = ["CacheKeyError", "LRUCache", "DEFAULT_SIZE"] +__docformat__ = "reStructuredText en" DEFAULT_SIZE = 16 """Default size of a new LRUCache object, if no 'size' argument is given.""" @@ -126,9 +126,7 @@ def __ne__(self, other): return not self.__eq__(other) def __repr__(self): - return "<%s %s => %s (%s)>" % \ - (self.__class__, self.key, self.obj, - time.asctime(time.localtime(self.atime))) + return "<%s %s => %s (%s)>" % (self.__class__, self.key, self.obj, time.asctime(time.localtime(self.atime))) def __init__(self, size=DEFAULT_SIZE): # Check arguments @@ -195,7 +193,7 @@ def __iter__(self): def __setattr__(self, name, value): object.__setattr__(self, name, value) # automagically shrink heap on resize - if name == 'size': + if name == "size": while len(self.__heap) > value: lru = heappop(self.__heap) del self.__dict[lru.key] @@ -225,7 +223,7 @@ def mtime(self, key): print(cache) cache.size = 10 print(cache) - cache[46] = '46' + cache[46] = "46" print(cache) print(len(cache)) for c in cache: diff --git a/lib/bx_extras/pstat.py b/lib/bx_extras/pstat.py index 87ca1157..5b3b60ec 100644 --- a/lib/bx_extras/pstat.py +++ b/lib/bx_extras/pstat.py @@ -110,12 +110,14 @@ import sys if sys.version_info[0] > 2: + def cmp(x, y): """ Replacement for built-in function cmp that was removed in Python 3 """ return (x > y) - (x < y) + __version__ = 0.4 # =========================== LIST FUNCTIONS ========================== @@ -127,46 +129,45 @@ def cmp(x, y): def abut(source, *args): """ -Like the |Stat abut command. It concatenates two lists side-by-side -and returns the result. '2D' lists are also accomodated for either argument -(source or addon). CAUTION: If one list is shorter, it will be repeated -until it is as long as the longest list. If this behavior is not desired, -use pstat.simpleabut(). - -Usage: abut(source, args) where args=any # of lists -Returns: a list of lists as long as the LONGEST list past, source on the - 'left', lists in attached consecutively on the 'right' -""" + Like the |Stat abut command. It concatenates two lists side-by-side + and returns the result. '2D' lists are also accomodated for either argument + (source or addon). CAUTION: If one list is shorter, it will be repeated + until it is as long as the longest list. If this behavior is not desired, + use pstat.simpleabut(). + + Usage: abut(source, args) where args=any # of lists + Returns: a list of lists as long as the LONGEST list past, source on the + 'left', lists in attached consecutively on the 'right'""" if type(source) not in [list, tuple]: source = [source] for addon in args: if type(addon) not in [list, tuple]: addon = [addon] - if len(addon) < len(source): # is source list longer? - if len(source) % len(addon) == 0: # are they integer multiples? - repeats = len(source)/len(addon) # repeat addon n times + if len(addon) < len(source): # is source list longer? + if len(source) % len(addon) == 0: # are they integer multiples? + repeats = len(source) / len(addon) # repeat addon n times origadd = copy.deepcopy(addon) - for i in range(repeats-1): + for i in range(repeats - 1): addon = addon + origadd else: - repeats = len(source)/len(addon)+1 # repeat addon x times, + repeats = len(source) / len(addon) + 1 # repeat addon x times, origadd = copy.deepcopy(addon) # x is NOT an integer - for i in range(repeats-1): + for i in range(repeats - 1): addon = addon + origadd - addon = addon[0:len(source)] - elif len(source) < len(addon): # is addon list longer? - if len(addon) % len(source) == 0: # are they integer multiples? - repeats = len(addon)/len(source) # repeat source n times + addon = addon[0 : len(source)] + elif len(source) < len(addon): # is addon list longer? + if len(addon) % len(source) == 0: # are they integer multiples? + repeats = len(addon) / len(source) # repeat source n times origsour = copy.deepcopy(source) - for i in range(repeats-1): + for i in range(repeats - 1): source = source + origsour else: - repeats = len(addon)/len(source)+1 # repeat source x times, + repeats = len(addon) / len(source) + 1 # repeat source x times, origsour = copy.deepcopy(source) # x is NOT an integer - for i in range(repeats-1): + for i in range(repeats - 1): source = source + origsour - source = source[0:len(addon)] + source = source[0 : len(addon)] source = simpleabut(source, addon) return source @@ -174,63 +175,61 @@ def abut(source, *args): def simpleabut(source, addon): """ -Concatenates two lists as columns and returns the result. '2D' lists -are also accomodated for either argument (source or addon). This DOES NOT -repeat either list to make the 2 lists of equal length. Beware of list pairs -with different lengths ... the resulting list will be the length of the -FIRST list passed. - -Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) -Returns: a list of lists as long as source, with source on the 'left' and - addon on the 'right' -""" + Concatenates two lists as columns and returns the result. '2D' lists + are also accomodated for either argument (source or addon). This DOES NOT + repeat either list to make the 2 lists of equal length. Beware of list pairs + with different lengths ... the resulting list will be the length of the + FIRST list passed. + + Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) + Returns: a list of lists as long as source, with source on the 'left' and + addon on the 'right'""" if type(source) not in [list, tuple]: source = [source] if type(addon) not in [list, tuple]: addon = [addon] minlen = min(len(source), len(addon)) - source_copy = copy.deepcopy(source) # start abut process + source_copy = copy.deepcopy(source) # start abut process if type(source[0]) not in [list, tuple]: if type(addon[0]) not in [list, tuple]: for i in range(minlen): - source_copy[i] = [source[i]] + [addon[i]] # source/addon = column + source_copy[i] = [source[i]] + [addon[i]] # source/addon = column else: for i in range(minlen): - source_copy[i] = [source[i]] + addon[i] # addon=list-of-lists + source_copy[i] = [source[i]] + addon[i] # addon=list-of-lists else: if type(addon[0]) not in [list, tuple]: for i in range(minlen): - source_copy[i] = source[i] + [addon[i]] # source=list-of-lists + source_copy[i] = source[i] + [addon[i]] # source=list-of-lists else: for i in range(minlen): - source_copy[i] = source[i] + addon[i] # source/addon = list-of-lists + source_copy[i] = source[i] + addon[i] # source/addon = list-of-lists source = source_copy return source def colex(listoflists, cnums): """ -Extracts from listoflists the columns specified in the list 'cnums' -(cnums can be an integer, a sequence of integers, or a string-expression that -corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex -columns 3 onward from the listoflists). - -Usage: colex (listoflists,cnums) -Returns: a list-of-lists corresponding to the columns from listoflists - specified by cnums, in the order the column numbers appear in cnums -""" + Extracts from listoflists the columns specified in the list 'cnums' + (cnums can be an integer, a sequence of integers, or a string-expression that + corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex + columns 3 onward from the listoflists). + + Usage: colex (listoflists,cnums) + Returns: a list-of-lists corresponding to the columns from listoflists + specified by cnums, in the order the column numbers appear in cnums""" global index column = 0 - if type(cnums) in [list, tuple]: # if multiple columns to get + if type(cnums) in [list, tuple]: # if multiple columns to get index = cnums[0] column = [x[index] for x in listoflists] for col in cnums[1:]: index = col column = abut(column, [x[index] for x in listoflists]) - elif isinstance(cnums, str): # if an 'x[3:]' type expr. - evalstring = 'map(lambda x: x'+cnums+', listoflists)' + elif isinstance(cnums, str): # if an 'x[3:]' type expr. + evalstring = "map(lambda x: x" + cnums + ", listoflists)" column = eval(evalstring) - else: # else it's just 1 col to get + else: # else it's just 1 col to get index = cnums column = [x[index] for x in listoflists] return column @@ -238,26 +237,26 @@ def colex(listoflists, cnums): def collapse(listoflists, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None): """ -Averages data in collapsecol, keeping all unique items in keepcols -(using unique, which keeps unique LISTS of column numbers), retaining the -unique sets of values in keepcols, the mean for each. Setting fcn1 -and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) -will append those results (e.g., the sterr, N) after each calculated mean. -cfcn is the collapse function to apply (defaults to mean, defined here in the -pstat module to avoid circular imports with stats.py, but harmonicmean or -others could be passed). - -Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) -Returns: a list of lists with all unique permutations of entries appearing in - columns ("conditions") specified by keepcols, abutted with the result of - cfcn (if cfcn=None, defaults to the mean) of each column specified by - collapsecols. -""" + Averages data in collapsecol, keeping all unique items in keepcols + (using unique, which keeps unique LISTS of column numbers), retaining the + unique sets of values in keepcols, the mean for each. Setting fcn1 + and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) + will append those results (e.g., the sterr, N) after each calculated mean. + cfcn is the collapse function to apply (defaults to mean, defined here in the + pstat module to avoid circular imports with stats.py, but harmonicmean or + others could be passed). + + Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) + Returns: a list of lists with all unique permutations of entries appearing in + columns ("conditions") specified by keepcols, abutted with the result of + cfcn (if cfcn=None, defaults to the mean) of each column specified by + collapsecols.""" + def collmean(inlist): s = 0 for item in inlist: s = s + item - return s/float(len(inlist)) + return s / float(len(inlist)) if type(keepcols) not in [list, tuple]: keepcols = [keepcols] @@ -266,7 +265,7 @@ def collmean(inlist): if cfcn is None: cfcn = collmean if keepcols == []: - means = [0]*len(collapsecols) + means = [0] * len(collapsecols) for i in range(len(collapsecols)): avgcol = colex(listoflists, collapsecols[i]) means[i] = cfcn(avgcol) @@ -274,13 +273,13 @@ def collmean(inlist): try: test = fcn1(avgcol) except Exception: - test = 'N/A' + test = "N/A" means[i] = [means[i], test] if fcn2: try: test = fcn2(avgcol) except Exception: - test = 'N/A' + test = "N/A" try: means[i] = means[i] + [len(avgcol)] except TypeError: @@ -303,13 +302,13 @@ def collmean(inlist): try: test = fcn1(avgcol) except Exception: - test = 'N/A' + test = "N/A" item.append(test) if fcn2 is not None: try: test = fcn2(avgcol) except Exception: - test = 'N/A' + test = "N/A" item.append(test) newlist.append(item) return newlist @@ -317,26 +316,24 @@ def collmean(inlist): def dm(listoflists, criterion): """ -Returns rows from the passed list of lists that meet the criteria in -the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' -will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows -with column 2 equal to the string 'N'). + Returns rows from the passed list of lists that meet the criteria in + the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' + will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows + with column 2 equal to the string 'N'). -Usage: dm (listoflists, criterion) -Returns: rows from listoflists that meet the specified criterion. -""" - function = 'filter(lambda x: '+criterion+',listoflists)' + Usage: dm (listoflists, criterion) + Returns: rows from listoflists that meet the specified criterion.""" + function = "filter(lambda x: " + criterion + ",listoflists)" lines = eval(function) return lines def flat(l): """ -Returns the flattened version of a '2D' list. List-correlate to the a.flat() -method of NumPy arrays. + Returns the flattened version of a '2D' list. List-correlate to the a.flat() + method of NumPy arrays. -Usage: flat(l) -""" + Usage: flat(l)""" newl = [] for i in range(len(l)): for j in range(len(l[i])): @@ -346,69 +343,66 @@ def flat(l): def linexand(listoflists, columnlist, valuelist): """ -Returns the rows of a list of lists where col (from columnlist) = val -(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). -len(columnlist) must equal len(valuelist). + Returns the rows of a list of lists where col (from columnlist) = val + (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). + len(columnlist) must equal len(valuelist). -Usage: linexand (listoflists,columnlist,valuelist) -Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i -""" + Usage: linexand (listoflists,columnlist,valuelist) + Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i""" if type(columnlist) not in [list, tuple]: columnlist = [columnlist] if type(valuelist) not in [list, tuple]: valuelist = [valuelist] - criterion = '' + criterion = "" for i in range(len(columnlist)): if isinstance(valuelist[i], str): - critval = '\'' + valuelist[i] + '\'' + critval = "'" + valuelist[i] + "'" else: critval = str(valuelist[i]) - criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' - criterion = criterion[0:-3] # remove the "and" after the last crit - function = 'filter(lambda x: '+criterion+',listoflists)' + criterion = criterion + " x[" + str(columnlist[i]) + "]==" + critval + " and" + criterion = criterion[0:-3] # remove the "and" after the last crit + function = "filter(lambda x: " + criterion + ",listoflists)" lines = eval(function) return lines def linexor(listoflists, columnlist, valuelist): """ -Returns the rows of a list of lists where col (from columnlist) = val -(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). -One value is required for each column in columnlist. If only one value -exists for columnlist but multiple values appear in valuelist, the -valuelist values are all assumed to pertain to the same column. - -Usage: linexor (listoflists,columnlist,valuelist) -Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i -""" + Returns the rows of a list of lists where col (from columnlist) = val + (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). + One value is required for each column in columnlist. If only one value + exists for columnlist but multiple values appear in valuelist, the + valuelist values are all assumed to pertain to the same column. + + Usage: linexor (listoflists,columnlist,valuelist) + Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i""" if type(columnlist) not in [list, tuple]: columnlist = [columnlist] if type(valuelist) not in [list, tuple]: valuelist = [valuelist] - criterion = '' + criterion = "" if len(columnlist) == 1 and len(valuelist) > 1: - columnlist = columnlist*len(valuelist) - for i in range(len(columnlist)): # build an exec string + columnlist = columnlist * len(valuelist) + for i in range(len(columnlist)): # build an exec string if isinstance(valuelist[i], str): - critval = '\'' + valuelist[i] + '\'' + critval = "'" + valuelist[i] + "'" else: critval = str(valuelist[i]) - criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' - criterion = criterion[0:-2] # remove the "or" after the last crit - function = 'filter(lambda x: '+criterion+',listoflists)' + criterion = criterion + " x[" + str(columnlist[i]) + "]==" + critval + " or" + criterion = criterion[0:-2] # remove the "or" after the last crit + function = "filter(lambda x: " + criterion + ",listoflists)" lines = eval(function) return lines def linedelimited(inlist, delimiter): """ -Returns a string composed of elements in inlist, with each element -separated by 'delimiter.' Used by function writedelimited. Use '\t' -for tab-delimiting. + Returns a string composed of elements in inlist, with each element + separated by 'delimiter.' Used by function writedelimited. Use '\t' + for tab-delimiting. -Usage: linedelimited (inlist,delimiter) -""" - outstr = '' + Usage: linedelimited (inlist,delimiter)""" + outstr = "" for item in inlist: if not isinstance(item, str): item = str(item) @@ -419,36 +413,34 @@ def linedelimited(inlist, delimiter): def lineincols(inlist, colsize): """ -Returns a string composed of elements in inlist, with each element -right-aligned in columns of (fixed) colsize. + Returns a string composed of elements in inlist, with each element + right-aligned in columns of (fixed) colsize. -Usage: lineincols (inlist,colsize) where colsize is an integer -""" - outstr = '' + Usage: lineincols (inlist,colsize) where colsize is an integer""" + outstr = "" for item in inlist: if not isinstance(item, str): item = str(item) size = len(item) if size <= colsize: - for i in range(colsize-size): - outstr = outstr + ' ' + for i in range(colsize - size): + outstr = outstr + " " outstr = outstr + item else: - outstr = outstr + item[0:colsize+1] + outstr = outstr + item[0 : colsize + 1] return outstr def lineincustcols(inlist, colsizes): """ -Returns a string composed of elements in inlist, with each element -right-aligned in a column of width specified by a sequence colsizes. The -length of colsizes must be greater than or equal to the number of columns -in inlist. - -Usage: lineincustcols (inlist,colsizes) -Returns: formatted string created from inlist -""" - outstr = '' + Returns a string composed of elements in inlist, with each element + right-aligned in a column of width specified by a sequence colsizes. The + length of colsizes must be greater than or equal to the number of columns + in inlist. + + Usage: lineincustcols (inlist,colsizes) + Returns: formatted string created from inlist""" + outstr = "" for i in range(len(inlist)): if not isinstance(inlist[i], str): item = str(inlist[i]) @@ -456,34 +448,32 @@ def lineincustcols(inlist, colsizes): item = inlist[i] size = len(item) if size <= colsizes[i]: - for j in range(colsizes[i]-size): - outstr = outstr + ' ' + for j in range(colsizes[i] - size): + outstr = outstr + " " outstr = outstr + item else: - outstr = outstr + item[0:colsizes[i]+1] + outstr = outstr + item[0 : colsizes[i] + 1] return outstr -def list2string(inlist, delimit=' '): +def list2string(inlist, delimit=" "): """ -Converts a 1D list to a single long string for file output, using -the string.join function. + Converts a 1D list to a single long string for file output, using + the string.join function. -Usage: list2string (inlist,delimit=' ') -Returns: the string created from inlist -""" + Usage: list2string (inlist,delimit=' ') + Returns: the string created from inlist""" stringlist = [makestr(_) for _ in inlist] return string.join(stringlist, delimit) def makelol(inlist): """ -Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you -want to use put() to write a 1D list one item per line in the file. + Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you + want to use put() to write a 1D list one item per line in the file. -Usage: makelol(inlist) -Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. -""" + Usage: makelol(inlist) + Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.""" x = [] for item in inlist: x.append([item]) @@ -498,36 +488,35 @@ def makestr(x): def printcc(lst, extra=2): """ -Prints a list of lists in columns, customized by the max size of items -within the columns (max size of items in col, plus 'extra' number of spaces). -Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, -respectively. + Prints a list of lists in columns, customized by the max size of items + within the columns (max size of items in col, plus 'extra' number of spaces). + Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, + respectively. -Usage: printcc (lst,extra=2) -Returns: None -""" + Usage: printcc (lst,extra=2) + Returns: None""" if type(lst[0]) not in [list, tuple]: lst = [lst] rowstokill = [] list2print = copy.deepcopy(lst) for i in range(len(lst)): - if lst[i] == ['\n'] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']: + if lst[i] == ["\n"] or lst[i] == "\n" or lst[i] == "dashes" or lst[i] == "" or lst[i] == [""]: rowstokill = rowstokill + [i] - rowstokill.reverse() # delete blank rows from the end + rowstokill.reverse() # delete blank rows from the end for row in rowstokill: del list2print[row] - maxsize = [0]*len(list2print[0]) + maxsize = [0] * len(list2print[0]) for col in range(len(list2print[0])): items = colex(list2print, col) items = [makestr(_) for _ in items] maxsize[col] = max(map(len, items)) + extra for row in lst: - if row == ['\n'] or row == '\n' or row == '' or row == ['']: + if row == ["\n"] or row == "\n" or row == "" or row == [""]: print() - elif row == ['dashes'] or row == 'dashes': - dashes = [0]*len(maxsize) + elif row == ["dashes"] or row == "dashes": + dashes = [0] * len(maxsize) for j in range(len(maxsize)): - dashes[j] = '-'*(maxsize[j]-2) + dashes[j] = "-" * (maxsize[j] - 2) print(lineincustcols(dashes, maxsize)) else: print(lineincustcols(row, maxsize)) @@ -536,12 +525,11 @@ def printcc(lst, extra=2): def printincols(listoflists, colsize): """ -Prints a list of lists in columns of (fixed) colsize width, where -colsize is an integer. + Prints a list of lists in columns of (fixed) colsize width, where + colsize is an integer. -Usage: printincols (listoflists,colsize) -Returns: None -""" + Usage: printincols (listoflists,colsize) + Returns: None""" for row in listoflists: print(lineincols(row, colsize)) return None @@ -549,14 +537,13 @@ def printincols(listoflists, colsize): def pl(listoflists): """ -Prints a list of lists, 1 list (row) at a time. + Prints a list of lists, 1 list (row) at a time. -Usage: pl(listoflists) -Returns: None -""" + Usage: pl(listoflists) + Returns: None""" for row in listoflists: - if row[-1] == '\n': - print(row, end=' ') + if row[-1] == "\n": + print(row, end=" ") else: print(row) return None @@ -570,11 +557,10 @@ def printl(listoflists): def replace(inlst, oldval, newval): """ -Replaces all occurrences of 'oldval' with 'newval', recursively. + Replaces all occurrences of 'oldval' with 'newval', recursively. -Usage: replace (inlst,oldval,newval) -""" - lst = inlst*1 + Usage: replace (inlst,oldval,newval)""" + lst = inlst * 1 for i in range(len(lst)): if type(lst[i]) not in [list, tuple]: if lst[i] == oldval: @@ -586,13 +572,12 @@ def replace(inlst, oldval, newval): def recode(inlist, listmap, cols=None): """ -Changes the values in a list to a new set of values (useful when -you need to recode data from (e.g.) strings to numbers. cols defaults -to None (meaning all columns are recoded). + Changes the values in a list to a new set of values (useful when + you need to recode data from (e.g.) strings to numbers. cols defaults + to None (meaning all columns are recoded). -Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list -Returns: inlist with the appropriate values replaced with new ones -""" + Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list + Returns: inlist with the appropriate values replaced with new ones""" lst = copy.deepcopy(inlist) if cols is not None: if type(cols) not in [list, tuple]: @@ -617,29 +602,27 @@ def recode(inlist, listmap, cols=None): def remap(listoflists, criterion): """ -Remaps values in a given column of a 2D list (listoflists). This requires -a criterion as a function of 'x' so that the result of the following is -returned ... map(lambda x: 'criterion',listoflists). + Remaps values in a given column of a 2D list (listoflists). This requires + a criterion as a function of 'x' so that the result of the following is + returned ... map(lambda x: 'criterion',listoflists). -Usage: remap(listoflists,criterion) criterion=string -Returns: remapped version of listoflists -""" - function = 'map(lambda x: '+criterion+',listoflists)' + Usage: remap(listoflists,criterion) criterion=string + Returns: remapped version of listoflists""" + function = "map(lambda x: " + criterion + ",listoflists)" lines = eval(function) return lines def roundlist(inlist, digits): """ -Goes through each element in a 1D or 2D inlist, and applies the following -function to all elements of float ... round(element,digits). + Goes through each element in a 1D or 2D inlist, and applies the following + function to all elements of float ... round(element,digits). -Usage: roundlist(inlist,digits) -Returns: list with rounded floats -""" + Usage: roundlist(inlist,digits) + Returns: list with rounded floats""" if type(inlist[0]) in [int, float]: inlist = [inlist] - l = inlist*1 + l = inlist * 1 for i in range(len(l)): for j in range(len(l[i])): if isinstance(l[i][j], float): @@ -649,31 +632,29 @@ def roundlist(inlist, digits): def sortby(listoflists, sortcols): """ -Sorts a list of lists on the column(s) specified in the sequence -sortcols. + Sorts a list of lists on the column(s) specified in the sequence + sortcols. -Usage: sortby(listoflists,sortcols) -Returns: sorted list, unchanged column ordering -""" + Usage: sortby(listoflists,sortcols) + Returns: sorted list, unchanged column ordering""" newlist = sorted(abut(colex(listoflists, sortcols), listoflists)) try: numcols = len(sortcols) except TypeError: numcols = 1 - crit = '[' + str(numcols) + ':]' + crit = "[" + str(numcols) + ":]" newlist = colex(newlist, crit) return newlist def unique(inlist): """ -Returns all unique items in the passed list. If the a list-of-lists -is passed, unique LISTS are found (i.e., items in the first dimension are -compared). + Returns all unique items in the passed list. If the a list-of-lists + is passed, unique LISTS are found (i.e., items in the first dimension are + compared). -Usage: unique (inlist) -Returns: the unique elements (or rows) in inlist -""" + Usage: unique (inlist) + Returns: the unique elements (or rows) in inlist""" uniques = [] for item in inlist: if item not in uniques: @@ -683,23 +664,21 @@ def unique(inlist): def duplicates(inlist): """ -Returns duplicate items in the FIRST dimension of the passed list. + Returns duplicate items in the FIRST dimension of the passed list. -Usage: duplicates (inlist) -""" + Usage: duplicates (inlist)""" dups = [] for i in range(len(inlist)): - if inlist[i] in inlist[i+1:]: + if inlist[i] in inlist[i + 1 :]: dups.append(inlist[i]) return dups def nonrepeats(inlist): """ -Returns items that are NOT duplicated in the first dim of the passed list. + Returns items that are NOT duplicated in the first dim of the passed list. -Usage: nonrepeats (inlist) -""" + Usage: nonrepeats (inlist)""" nonrepeats = [] for i in range(len(inlist)): if inlist.count(inlist[i]) == 1: @@ -724,20 +703,21 @@ def nonrepeats(inlist): # =================== PSTAT ARRAY FUNCTIONS ===================== # =================== PSTAT ARRAY FUNCTIONS ===================== -try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE +try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE import Numeric + N = Numeric def aabut(source, *args): """ - Like the |Stat abut command. It concatenates two arrays column-wise - and returns the result. CAUTION: If one array is shorter, it will be - repeated until it is as long as the other. + Like the |Stat abut command. It concatenates two arrays column-wise + and returns the result. CAUTION: If one array is shorter, it will be + repeated until it is as long as the other. - Usage: aabut (source, args) where args=any # of arrays - Returns: an array as long as the LONGEST array past, source appearing on the - 'left', arrays in attached on the 'right'. - """ + Usage: aabut (source, args) where args=any # of arrays + Returns: an array as long as the LONGEST array past, source appearing on the + 'left', arrays in attached on the 'right'. + """ if len(source.shape) == 1: width = 1 source = N.resize(source, [source.shape[0], width]) @@ -758,13 +738,13 @@ def aabut(source, *args): def acolex(a, indices, axis=1): """ - Extracts specified indices (a list) from passed array, along passed - axis (column extraction is default). BEWARE: A 1D array is presumed to be a - column-array (and that the whole array will be returned as a column). + Extracts specified indices (a list) from passed array, along passed + axis (column extraction is default). BEWARE: A 1D array is presumed to be a + column-array (and that the whole array will be returned as a column). - Usage: acolex (a,indices,axis=1) - Returns: the columns of a specified by indices - """ + Usage: acolex (a,indices,axis=1) + Returns: the columns of a specified by indices + """ if type(indices) not in [list, tuple, N.ArrayType]: indices = [indices] if len(N.shape(a)) == 1: @@ -775,16 +755,17 @@ def acolex(a, indices, axis=1): def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None): """ - Averages data in collapsecol, keeping all unique items in keepcols - (using unique, which keeps unique LISTS of column numbers), retaining - the unique sets of values in keepcols, the mean for each. If stderror or - N of the mean are desired, set either or both parameters to 1. - - Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) - Returns: unique 'conditions' specified by the contents of columns specified - by keepcols, abutted with the mean(s) of column(s) specified by - collapsecols - """ + Averages data in collapsecol, keeping all unique items in keepcols + (using unique, which keeps unique LISTS of column numbers), retaining + the unique sets of values in keepcols, the mean for each. If stderror or + N of the mean are desired, set either or both parameters to 1. + + Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) + Returns: unique 'conditions' specified by the contents of columns specified + by keepcols, abutted with the mean(s) of column(s) specified by + collapsecols + """ + def acollmean(inarray): return N.sum(N.ravel(inarray)) @@ -792,24 +773,24 @@ def acollmean(inarray): cfcn = acollmean if keepcols == []: avgcol = acolex(a, collapsecols) - means = N.sum(avgcol)/float(len(avgcol)) + means = N.sum(avgcol) / float(len(avgcol)) if fcn1 is not None: try: test = fcn1(avgcol) except Exception: - test = N.array(['N/A']*len(means)) + test = N.array(["N/A"] * len(means)) means = aabut(means, test) if fcn2 is not None: try: test = fcn2(avgcol) except Exception: - test = N.array(['N/A']*len(means)) + test = N.array(["N/A"] * len(means)) means = aabut(means, test) return means else: if type(keepcols) not in [list, tuple, N.ArrayType]: keepcols = [keepcols] - values = colex(a, keepcols) # so that "item" can be appended (below) + values = colex(a, keepcols) # so that "item" can be appended (below) uniques = sorted(unique(values)) # get a LIST, so .sort keeps rows intact newlist = [] for item in uniques: @@ -823,34 +804,34 @@ def acollmean(inarray): try: test = fcn1(avgcol) except Exception: - test = 'N/A' + test = "N/A" item.append(test) if fcn2 is not None: try: test = fcn2(avgcol) except Exception: - test = 'N/A' + test = "N/A" item.append(test) newlist.append(item) try: new_a = N.array(newlist) except TypeError: - new_a = N.array(newlist, 'O') + new_a = N.array(newlist, "O") return new_a def adm(a, criterion): """ - Returns rows from the passed list of lists that meet the criteria in - the passed criterion expression (a string as a function of x). + Returns rows from the passed list of lists that meet the criteria in + the passed criterion expression (a string as a function of x). - Usage: adm (a,criterion) where criterion is like 'x[2]==37' - """ - function = 'filter(lambda x: '+criterion+',a)' + Usage: adm (a,criterion) where criterion is like 'x[2]==37' + """ + function = "filter(lambda x: " + criterion + ",a)" lines = eval(function) try: lines = N.array(lines) except Exception: - lines = N.array(lines, 'O') + lines = N.array(lines, "O") return lines def isstring(x): @@ -861,125 +842,125 @@ def isstring(x): def alinexand(a, columnlist, valuelist): """ - Returns the rows of an array where col (from columnlist) = val - (from valuelist). One value is required for each column in columnlist. + Returns the rows of an array where col (from columnlist) = val + (from valuelist). One value is required for each column in columnlist. - Usage: alinexand (a,columnlist,valuelist) - Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i - """ + Usage: alinexand (a,columnlist,valuelist) + Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i + """ if type(columnlist) not in [list, tuple, N.ArrayType]: columnlist = [columnlist] if type(valuelist) not in [list, tuple, N.ArrayType]: valuelist = [valuelist] - criterion = '' + criterion = "" for i in range(len(columnlist)): if isinstance(valuelist[i], str): - critval = '\'' + valuelist[i] + '\'' + critval = "'" + valuelist[i] + "'" else: critval = str(valuelist[i]) - criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' - criterion = criterion[0:-3] # remove the "and" after the last crit + criterion = criterion + " x[" + str(columnlist[i]) + "]==" + critval + " and" + criterion = criterion[0:-3] # remove the "and" after the last crit return adm(a, criterion) def alinexor(a, columnlist, valuelist): """ - Returns the rows of an array where col (from columnlist) = val (from - valuelist). One value is required for each column in columnlist. - The exception is if either columnlist or valuelist has only 1 value, - in which case that item will be expanded to match the length of the - other list. - - Usage: alinexor (a,columnlist,valuelist) - Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i - """ + Returns the rows of an array where col (from columnlist) = val (from + valuelist). One value is required for each column in columnlist. + The exception is if either columnlist or valuelist has only 1 value, + in which case that item will be expanded to match the length of the + other list. + + Usage: alinexor (a,columnlist,valuelist) + Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i + """ if type(columnlist) not in [list, tuple, N.ArrayType]: columnlist = [columnlist] if type(valuelist) not in [list, tuple, N.ArrayType]: valuelist = [valuelist] - criterion = '' + criterion = "" if len(columnlist) == 1 and len(valuelist) > 1: - columnlist = columnlist*len(valuelist) + columnlist = columnlist * len(valuelist) elif len(valuelist) == 1 and len(columnlist) > 1: - valuelist = valuelist*len(columnlist) + valuelist = valuelist * len(columnlist) for i in range(len(columnlist)): if isinstance(valuelist[i], str): - critval = '\'' + valuelist[i] + '\'' + critval = "'" + valuelist[i] + "'" else: critval = str(valuelist[i]) - criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' - criterion = criterion[0:-2] # remove the "or" after the last crit + criterion = criterion + " x[" + str(columnlist[i]) + "]==" + critval + " or" + criterion = criterion[0:-2] # remove the "or" after the last crit return adm(a, criterion) def areplace(a, oldval, newval): """ - Replaces all occurrences of oldval with newval in array a. + Replaces all occurrences of oldval with newval in array a. - Usage: areplace(a,oldval,newval) - """ - newa = N.not_equal(a, oldval)*a - return newa+N.equal(a, oldval)*newval + Usage: areplace(a,oldval,newval) + """ + newa = N.not_equal(a, oldval) * a + return newa + N.equal(a, oldval) * newval - def arecode(a, listmap, col='all'): + def arecode(a, listmap, col="all"): """ - Remaps the values in an array to a new set of values (useful when - you need to recode data from (e.g.) strings to numbers as most stats - packages require. Can work on SINGLE columns, or 'all' columns at once. + Remaps the values in an array to a new set of values (useful when + you need to recode data from (e.g.) strings to numbers as most stats + packages require. Can work on SINGLE columns, or 'all' columns at once. - Usage: arecode (a,listmap,col='all') - Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] - """ + Usage: arecode (a,listmap,col='all') + Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] + """ ashape = a.shape - if col == 'all': + if col == "all": work = a.flat else: work = acolex(a, col) work = work.flat for pair in listmap: - if isinstance(pair[1], str) or work.typecode() == 'O' or a.typecode() == 'O': - work = N.array(work, 'O') - a = N.array(a, 'O') + if isinstance(pair[1], str) or work.typecode() == "O" or a.typecode() == "O": + work = N.array(work, "O") + a = N.array(a, "O") for i in range(len(work)): if work[i] == pair[0]: work[i] = pair[1] - if col == 'all': + if col == "all": return N.reshape(work, ashape) else: - return N.concatenate([a[:, 0:col], work[:, N.NewAxis], a[:, col+1:]], 1) - else: # must be a non-Object type array and replacement + return N.concatenate([a[:, 0:col], work[:, N.NewAxis], a[:, col + 1 :]], 1) + else: # must be a non-Object type array and replacement work = N.where(N.equal(work, pair[0]), pair[1], work) - return N.concatenate([a[:, 0:col], work[:, N.NewAxis], a[:, col+1:]], 1) + return N.concatenate([a[:, 0:col], work[:, N.NewAxis], a[:, col + 1 :]], 1) def arowcompare(row1, row2): """ - Compares two numeric rows from an array, + Compares two numeric rows from an array, - Usage: arowcompare(row1,row2) - Returns: an array of equal length containing 1s where the two rows had - identical elements and 0 otherwise - """ + Usage: arowcompare(row1,row2) + Returns: an array of equal length containing 1s where the two rows had + identical elements and 0 otherwise + """ return N.equal(row1, row2) def arowsame(row1, row2): """ - Compares two rows from an array, regardless of whether it is an - array of numbers or of python objects (which requires the cmp function). + Compares two rows from an array, regardless of whether it is an + array of numbers or of python objects (which requires the cmp function). - Usage: arowsame(row1,row2) - Returns: 1 if the two rows are identical, 0 otherwise. - """ + Usage: arowsame(row1,row2) + Returns: 1 if the two rows are identical, 0 otherwise. + """ cmpval = N.alltrue(arowcompare(row1, row2)) return cmpval def asortrows(a, axis=0): """ - Sorts an array "by rows". This differs from the Numeric.sort() function, - which sorts elements WITHIN the given axis. Instead, this function keeps - the elements along the given axis intact, but shifts them 'up or down' - relative to one another. + Sorts an array "by rows". This differs from the Numeric.sort() function, + which sorts elements WITHIN the given axis. Instead, this function keeps + the elements along the given axis intact, but shifts them 'up or down' + relative to one another. - Usage: asortrows(a,axis=0) - Returns: sorted version of a - """ + Usage: asortrows(a,axis=0) + Returns: sorted version of a + """ if axis != 0: a = N.swapaxes(a, axis, 0) l = sorted(a.tolist()) @@ -990,69 +971,69 @@ def asortrows(a, axis=0): def aunique(inarray): """ - Returns unique items in the FIRST dimension of the passed array. Only - works on arrays NOT including string items. + Returns unique items in the FIRST dimension of the passed array. Only + works on arrays NOT including string items. - Usage: aunique (inarray) - """ + Usage: aunique (inarray) + """ uniques = N.array([inarray[0]]) - if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY + if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY for item in inarray[1:]: if N.add.reduce(N.equal(uniques, item).flat) == 0: try: uniques = N.concatenate([uniques, N.array[N.NewAxis, :]]) except TypeError: uniques = N.concatenate([uniques, N.array([item])]) - else: # IT MUST BE A 2+D ARRAY - if inarray.typecode() != 'O': # not an Object array + else: # IT MUST BE A 2+D ARRAY + if inarray.typecode() != "O": # not an Object array for item in inarray[1:]: if not N.sum(N.alltrue(N.equal(uniques, item), 1)): try: uniques = N.concatenate([uniques, item[N.NewAxis, :]]) - except TypeError: # the item to add isn't a list + except TypeError: # the item to add isn't a list uniques = N.concatenate([uniques, N.array([item])]) else: pass # this item is already in the uniques array - else: # must be an Object array, alltrue/equal functions don't work + else: # must be an Object array, alltrue/equal functions don't work for item in inarray[1:]: newflag = 1 for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> # TODO fix this test = N.sum(abs(N.array(list(map(cmp, item, unq))))) - if test == 0: # if item identical to any 1 row in uniques + if test == 0: # if item identical to any 1 row in uniques newflag = 0 # then not a novel item to add break if newflag == 1: try: uniques = N.concatenate([uniques, item[N.NewAxis, :]]) - except TypeError: # the item to add isn't a list + except TypeError: # the item to add isn't a list uniques = N.concatenate([uniques, N.array([item])]) return uniques def aduplicates(inarray): """ - Returns duplicate items in the FIRST dimension of the passed array. Only - works on arrays NOT including string items. + Returns duplicate items in the FIRST dimension of the passed array. Only + works on arrays NOT including string items. - Usage: aunique (inarray) - """ + Usage: aunique (inarray) + """ inarray = N.array(inarray) - if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY + if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY dups = [] inarray = inarray.tolist() for i in range(len(inarray)): - if inarray[i] in inarray[i+1:]: + if inarray[i] in inarray[i + 1 :]: dups.append(inarray[i]) dups = aunique(dups) - else: # IT MUST BE A 2+D ARRAY + else: # IT MUST BE A 2+D ARRAY dups = [] aslist = inarray.tolist() for i in range(len(aslist)): - if aslist[i] in aslist[i+1:]: + if aslist[i] in aslist[i + 1 :]: dups.append(aslist[i]) dups = unique(dups) dups = N.array(dups) return dups -except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs +except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs pass diff --git a/lib/bx_extras/pyparsing.py b/lib/bx_extras/pyparsing.py index 91b784bc..38cff70a 100644 --- a/lib/bx_extras/pyparsing.py +++ b/lib/bx_extras/pyparsing.py @@ -62,32 +62,114 @@ class names, and the use of '+', '|' and '^' operators. import copy import re +import sre_constants import string import sys import warnings import xml.sax.saxutils from weakref import ref as wkref -import sre_constants - __all__ = [ - 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', - 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', - 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', - 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', - 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', - 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', - 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', - 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', - 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', - 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', - 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', - 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', - 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', - 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', - 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', - 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', - 'indentedBlock', + "And", + "CaselessKeyword", + "CaselessLiteral", + "CharsNotIn", + "Combine", + "Dict", + "Each", + "Empty", + "FollowedBy", + "Forward", + "GoToColumn", + "Group", + "Keyword", + "LineEnd", + "LineStart", + "Literal", + "MatchFirst", + "NoMatch", + "NotAny", + "OneOrMore", + "OnlyOnce", + "Optional", + "Or", + "ParseBaseException", + "ParseElementEnhance", + "ParseException", + "ParseExpression", + "ParseFatalException", + "ParseResults", + "ParseSyntaxException", + "ParserElement", + "QuotedString", + "RecursiveGrammarException", + "Regex", + "SkipTo", + "StringEnd", + "StringStart", + "Suppress", + "Token", + "TokenConverter", + "Upcase", + "White", + "Word", + "WordEnd", + "WordStart", + "ZeroOrMore", + "alphanums", + "alphas", + "alphas8bit", + "anyCloseTag", + "anyOpenTag", + "cStyleComment", + "col", + "commaSeparatedList", + "commonHTMLEntity", + "countedArray", + "cppStyleComment", + "dblQuotedString", + "dblSlashComment", + "delimitedList", + "dictOf", + "downcaseTokens", + "empty", + "getTokensEndLoc", + "hexnums", + "htmlComment", + "javaStyleComment", + "keepOriginalText", + "line", + "lineEnd", + "lineStart", + "lineno", + "makeHTMLTags", + "makeXMLTags", + "matchOnlyAtCol", + "matchPreviousExpr", + "matchPreviousLiteral", + "nestedExpr", + "nullDebugAction", + "nums", + "oneOf", + "opAssoc", + "operatorPrecedence", + "printables", + "punc8bit", + "pythonStyleComment", + "quotedString", + "removeQuotes", + "replaceHTMLEntity", + "replaceWith", + "restOfLine", + "sglQuotedString", + "srange", + "stringEnd", + "stringStart", + "traceParseAction", + "unicodeString", + "upcaseTokens", + "withAttribute", + "indentedBlock", ] @@ -117,6 +199,7 @@ class _Constants: class ParseBaseException(Exception): """base exception class for all parsing runtime exceptions""" + __slots__ = ("loc", "msg", "pstr", "parserElement") # Performance tuning: we construct a *lot* of these, so keep this # constructor as small and fast as possible @@ -133,15 +216,15 @@ def __init__(self, pstr, loc=0, msg=None, elem=None): def __getattr__(self, aname): """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text """ - if (aname == "lineno"): + if aname == "lineno": return lineno(self.loc, self.pstr) - elif (aname in ("col", "column")): + elif aname in ("col", "column"): return col(self.loc, self.pstr) - elif (aname == "line"): + elif aname == "line": return line(self.loc, self.pstr) else: raise AttributeError(aname) @@ -154,34 +237,33 @@ def __repr__(self): def markInputline(self, markerString=">!<"): """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. + the location of the exception with a special symbol. """ line_str = self.line line_column = self.column - 1 if markerString: - line_str = "".join([line_str[:line_column], - markerString, line_str[line_column:]]) + line_str = "".join([line_str[:line_column], markerString, line_str[line_column:]]) return line_str.strip() class ParseException(ParseBaseException): """exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text + supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text """ class ParseFatalException(ParseBaseException): """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" + is found; stops all parsing immediately""" class ParseSyntaxException(ParseFatalException): """just like ParseFatalException, but thrown internally when an - ErrorStop indicates that parsing is to stop immediately because - an unbacktrackable syntax error has been found""" + ErrorStop indicates that parsing is to stop immediately because + an unbacktrackable syntax error has been found""" def __init__(self, pe): super().__init__(pe.pstr, pe.loc, pe.msg, pe.parserElement) @@ -210,10 +292,11 @@ def __repr__(self): class ParseResults: """Structured parse results, to provide multiple means of access to the parsed data: - - as a list (len(results)) - - by list index (results[0], results[1], etc.) - - by attribute (results.) - """ + - as a list (len(results)) + - by list index (results[0], results[1], etc.) + - by attribute (results.) + """ + __slots__ = ("__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__") def __new__(cls, toklist, name=None, asList=True, modal=True): @@ -246,7 +329,7 @@ def __init__(self, toklist, name=None, asList=True, modal=True): if isinstance(name, int): name = str(name) self.__name = name - if toklist not in (None, '', []): + if toklist not in (None, "", []): if isinstance(toklist, str): toklist = [toklist] if asList: @@ -292,7 +375,7 @@ def __delitem__(self, i): if isinstance(i, int): if i < 0: i += mylen - i = slice(i, i+1) + i = slice(i, i + 1) # get removed indices removed = list(range(*i.indices(mylen))) removed.reverse() @@ -328,15 +411,15 @@ def keys(self): def pop(self, index=-1): """Removes and returns item at specified index (default=last). - Will work with either numeric indices or dict-key indicies.""" + Will work with either numeric indices or dict-key indicies.""" ret = self[index] del self[index] return ret def get(self, key, defaultValue=None): """Returns named result matching the given key, or if there is no - such name, then returns the given defaultValue or None if no - defaultValue is specified.""" + such name, then returns the given defaultValue or None if no + defaultValue is specified.""" if key in self: return self[key] else: @@ -369,10 +452,14 @@ def __add__(self, other): def __iadd__(self, other): if other.__tokdict: offset = len(self.__toklist) - addoffset = (lambda a: (a < 0 and offset) or (a+offset)) + + def addoffset(a): + return (a < 0 and offset) or (a + offset) + otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) - for (k, vlist) in otheritems for v in vlist] + otherdictitems = [ + (k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) for (k, vlist) in otheritems for v in vlist + ] for k, v in otherdictitems: self[k] = v if isinstance(v[0], ParseResults): @@ -397,7 +484,7 @@ def __str__(self): out += "]" return out - def _asStringList(self, sep=''): + def _asStringList(self, sep=""): out = [] for item in self.__toklist: if out and sep: @@ -463,17 +550,9 @@ def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): for i, res in enumerate(worklist): if isinstance(res, ParseResults): if i in namedItems: - out += [res.asXML( - namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] + out += [res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent, formatted)] else: - out += [res.asXML( - None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] + out += [res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent, formatted)] else: # individual token, see if there is a name for it resTag = None @@ -507,27 +586,25 @@ def getName(self): return par.__lookup(self) else: return None - elif (len(self) == 1 - and len(self.__tokdict) == 1 - and self.__tokdict.values()[0][0][1] in (0, -1)): + elif len(self) == 1 and len(self.__tokdict) == 1 and self.__tokdict.values()[0][0][1] in (0, -1): return self.__tokdict.keys()[0] else: return None - def dump(self, indent='', depth=0): + def dump(self, indent="", depth=0): """Diagnostic method for listing out the contents of a ParseResults. - Accepts an optional indent argument so that this string can be embedded - in a nested display of other data.""" + Accepts an optional indent argument so that this string can be embedded + in a nested display of other data.""" out = [] - out.append(indent+str(self.asList())) + out.append(indent + str(self.asList())) keys = sorted(self.items()) for k, v in keys: if out: - out.append('\n') - out.append("{}{}- {}: ".format(indent, (' '*depth), k)) + out.append("\n") + out.append("{}{}- {}: ".format(indent, (" " * depth), k)) if isinstance(v, ParseResults): if v.keys(): - out.append(v.dump(indent, depth+1)) + out.append(v.dump(indent, depth + 1)) else: out.append(str(v)) else: @@ -536,11 +613,15 @@ def dump(self, indent='', depth=0): # add support for pickle protocol def __getstate__(self): - return (self.__toklist, ( - self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name)) + return ( + self.__toklist, + ( + self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name, + ), + ) def __setstate__(self, state): self.__toklist = state[0] @@ -555,39 +636,38 @@ def __setstate__(self, state): def col(loc, strg): """Returns current column within a string, counting newlines as line separators. - The first column is number 1. + The first column is number 1. - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return (loc < len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return (loc < len(strg) and strg[loc] == "\n") and 1 or loc - strg.rfind("\n", 0, loc) def lineno(loc, strg): """Returns current line number within a string, counting newlines as line separators. - The first line is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ + The first line is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ return strg.count("\n", 0, loc) + 1 def line(loc, strg): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ + """Returns the line of text containing loc within a string, counting newlines as line separators.""" lastCR = strg.rfind("\n", 0, loc) nextCR = strg.find("\n", loc) if nextCR > 0: - return strg[lastCR+1:nextCR] + return strg[lastCR + 1 : nextCR] else: - return strg[lastCR+1:] + return strg[lastCR + 1 :] def _defaultStartDebugAction(instring, loc, expr): @@ -608,12 +688,13 @@ def nullDebugAction(*args): class ParserElement: """Abstract base level parser element class.""" + DEFAULT_WHITE_CHARS = " \n\t\r" def setDefaultWhitespaceChars(chars): - """Overrides the default whitespace chars - """ + """Overrides the default whitespace chars""" ParserElement.DEFAULT_WHITE_CHARS = chars + setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) def __init__(self, savelist=False): @@ -641,7 +722,7 @@ def __init__(self, savelist=False): def copy(self): """Make a copy of this ParserElement. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element.""" + for the same parsing pattern, using copies of the original parse element.""" cpy = copy.copy(self) cpy.parseAction = self.parseAction[:] cpy.ignoreExprs = self.ignoreExprs[:] @@ -659,10 +740,10 @@ def setName(self, name): def setResultsName(self, name, listAllMatches=False): """Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original ParserElement object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. + of the returned parse results. + NOTE: this returns a *copy* of the original ParserElement object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. """ newself = self.copy() newself.resultsName = name @@ -671,16 +752,18 @@ def setResultsName(self, name, listAllMatches=False): def setBreak(self, breakFlag=True): """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set breakFlag to True to enable, False to - disable. + about to be parsed. Set breakFlag to True to enable, False to + disable. """ if breakFlag: _parseMethod = self._parse def breaker(instring, loc, doActions=True, callPreParse=True): import pdb + pdb.set_trace() _parseMethod(instring, loc, doActions, callPreParse) + breaker._originalParseMethod = _parseMethod self._parse = breaker else: @@ -690,7 +773,7 @@ def breaker(instring, loc, doActions=True, callPreParse=True): def _normalizeParseActionArgs(f): """Internal method used to decorate parse actions that take fewer than 3 arguments, - so that all parse actions can be called as f(s,l,t).""" + so that all parse actions can be called as f(s,l,t).""" STAR_ARGS = 4 try: @@ -731,17 +814,25 @@ def _normalizeParseActionArgs(f): return f else: if numargs > 3: + def tmp(s, l, t): return f(f.__call__.__self__, s, l, t) + elif numargs == 2: + def tmp(s, l, t): return f(l, t) + elif numargs == 1: + def tmp(s, l, t): return f(t) + else: # ~ numargs == 0: + def tmp(s, l, t): return f() + try: tmp.__name__ = f.__name__ except (AttributeError, TypeError): @@ -758,27 +849,28 @@ def tmp(s, l, t): # no need for special handling if attribute doesnt exist pass return tmp + _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) def setParseAction(self, *fns, **kwargs): """Define action to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), - fn(loc,toks), fn(toks), or just fn(), where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a ParseResults object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ + Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), + fn(loc,toks), fn(toks), or just fn(), where: + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a ParseResults object + If the functions in fns modify the tokens, they can return them as the return + value from fn, and the modified list of tokens will replace the original. + Otherwise, fn does not need to return any value. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) - self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + self.callDuringTry = "callDuringTry" in kwargs and kwargs["callDuringTry"] return self def addParseAction(self, *fns, **kwargs): @@ -789,14 +881,14 @@ def addParseAction(self, *fns, **kwargs): def setFailAction(self, fn): """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - fn(s,loc,expr,err) where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw ParseFatalException - if it is desired to stop parsing immediately.""" + Fail acton fn is a callable function that takes the arguments + fn(s,loc,expr,err) where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw ParseFatalException + if it is desired to stop parsing immediately.""" self.failAction = fn return self @@ -833,7 +925,7 @@ def postParse(self, instring, loc, tokenlist): # ~ @profile def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): - debugging = (self.debug) # and doActions ) + debugging = self.debug # and doActions ) if debugging or self.failAction: # ~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) @@ -883,7 +975,8 @@ def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): tokens, self.resultsName, asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), - modal=self.modalResults) + modal=self.modalResults, + ) except ParseBaseException as err: # ~ print "Exception raised in user parse action:", err if self.debugActions[2]: @@ -897,7 +990,8 @@ def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): tokens, self.resultsName, asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), - modal=self.modalResults) + modal=self.modalResults, + ) if debugging: # ~ print ("Matched",self,"->",retTokens.asList()) @@ -937,52 +1031,54 @@ def _parseCache(self, instring, loc, doActions=True, callPreParse=True): def resetCache(): ParserElement._exprArgCache.clear() + resetCache = staticmethod(resetCache) _packratEnabled = False def enablePackrat(): """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method ParserElement.enablePackrat(). If - your program uses psyco to "compile as you go", you must call - enablePackrat before calling psyco.full(). If you do not do this, - Python will crash. For best results, call enablePackrat() immediately - after importing pyparsing. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method ParserElement.enablePackrat(). If + your program uses psyco to "compile as you go", you must call + enablePackrat before calling psyco.full(). If you do not do this, + Python will crash. For best results, call enablePackrat() immediately + after importing pyparsing. """ if not ParserElement._packratEnabled: ParserElement._packratEnabled = True ParserElement._parse = ParserElement._parseCache + enablePackrat = staticmethod(enablePackrat) def parseString(self, instring, parseAll=False): """Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set parseAll to True (equivalent to ending - the grammar with StringEnd()). - - Note: parseString implicitly calls expandtabs() on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the loc argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling parseWithTabs on your grammar before calling parseString - (see L{I{parseWithTabs}}) - - define your parse action using the full (s,loc,toks) signature, and - reference the input string using the parse action's s argument - - explictly expand the tabs in your input string before calling - parseString + This is the main interface to the client code, once the complete + expression has been built. + + If you want the grammar to require that the entire input string be + successfully parsed, then set parseAll to True (equivalent to ending + the grammar with StringEnd()). + + Note: parseString implicitly calls expandtabs() on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the loc argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling parseWithTabs on your grammar before calling parseString + (see L{I{parseWithTabs}}) + - define your parse action using the full (s,loc,toks) signature, and + reference the input string using the parse action's s argument + - explictly expand the tabs in your input string before calling + parseString """ ParserElement.resetCache() if not self.streamlined: @@ -999,12 +1095,12 @@ def parseString(self, instring, parseAll=False): def scanString(self, instring, maxMatches=_MAX_INT): """Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - maxMatches argument, to clip scanning after 'n' matches are found. + matching tokens, start location, and end location. May be called with optional + maxMatches argument, to clip scanning after 'n' matches are found. - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs.""" + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}} for more information on parsing + strings with embedded tabs.""" if not self.streamlined: self.streamline() for e in self.ignoreExprs: @@ -1023,7 +1119,7 @@ def scanString(self, instring, maxMatches=_MAX_INT): preloc = preparseFn(instring, loc) nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) except ParseException: - loc = preloc+1 + loc = preloc + 1 else: matches += 1 yield tokens, preloc, nextLoc @@ -1031,11 +1127,11 @@ def scanString(self, instring, maxMatches=_MAX_INT): def transformString(self, instring): """Extension to scanString, to modify matching text with modified tokens that may - be returned from a parse action. To use transformString, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking transformString() on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. transformString() returns the resulting transformed string.""" + be returned from a parse action. To use transformString, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking transformString() on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. transformString() returns the resulting transformed string.""" out = [] lastE = 0 # force preservation of s, to minimize unwanted transformation of string, and to @@ -1056,8 +1152,8 @@ def transformString(self, instring): def searchString(self, instring, maxMatches=_MAX_INT): """Another extension to scanString, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - maxMatches argument, to clip searching after 'n' matches are found. + to match the given parse expression. May be called with optional + maxMatches argument, to clip searching after 'n' matches are found. """ return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)]) @@ -1067,9 +1163,8 @@ def __add__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return And([self, other]) @@ -1079,9 +1174,8 @@ def __radd__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return other + self @@ -1091,9 +1185,8 @@ def __sub__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return And([self, And._ErrorStop(), other]) @@ -1103,9 +1196,8 @@ def __rsub__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return other - self @@ -1126,12 +1218,14 @@ def __mul__(self, other): if other[0] == 1: return OneOrMore(self) else: - return self*other[0] + ZeroOrMore(self) + return self * other[0] + ZeroOrMore(self) elif isinstance(other[0], int) and isinstance(other[1], int): minElements, optElements = other optElements -= minElements else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]), type(other[1])) + raise TypeError( + "cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]), type(other[1]) + ) else: raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects") else: @@ -1145,23 +1239,25 @@ def __mul__(self, other): raise ValueError("cannot multiply ParserElement by 0 or (0,0)") if optElements: + def makeOptionalList(n): if n > 1: - return Optional(self + makeOptionalList(n-1)) + return Optional(self + makeOptionalList(n - 1)) else: return Optional(self) + if minElements: if minElements == 1: ret = self + makeOptionalList(optElements) else: - ret = And([self]*minElements) + makeOptionalList(optElements) + ret = And([self] * minElements) + makeOptionalList(optElements) else: ret = makeOptionalList(optElements) else: if minElements == 1: ret = self else: - ret = And([self]*minElements) + ret = And([self] * minElements) return ret def __rmul__(self, other): @@ -1173,9 +1269,8 @@ def __or__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return MatchFirst([self, other]) @@ -1185,9 +1280,8 @@ def __ror__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return other | self @@ -1197,9 +1291,8 @@ def __xor__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return Or([self, other]) @@ -1209,9 +1302,8 @@ def __rxor__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return other ^ self @@ -1221,9 +1313,8 @@ def __and__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return Each([self, other]) @@ -1233,9 +1324,8 @@ def __rand__(self, other): other = Literal(other) if not isinstance(other, ParserElement): warnings.warn( - "Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, - stacklevel=2) + "Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2 + ) return None return other & self @@ -1245,29 +1335,28 @@ def __invert__(self): def __call__(self, name): """Shortcut for setResultsName, with listAllMatches=default:: - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - could be written as:: - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - """ + userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") + could be written as:: + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + """ return self.setResultsName(name) def suppress(self): """Suppresses the output of this ParserElement; useful to keep punctuation from - cluttering up returned output. + cluttering up returned output. """ return Suppress(self) def leaveWhitespace(self): """Disables the skipping of whitespace before matching the characters in the - ParserElement's defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. + ParserElement's defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. """ self.skipWhitespace = False return self def setWhitespaceChars(self, chars): - """Overrides the default whitespace chars - """ + """Overrides the default whitespace chars""" self.skipWhitespace = True self.whiteChars = chars self.copyDefaultWhiteChars = False @@ -1275,15 +1364,15 @@ def setWhitespaceChars(self, chars): def parseWithTabs(self): """Overrides default behavior to expand s to spaces before parsing the input string. - Must be called before parseString when the input grammar contains elements that - match characters.""" + Must be called before parseString when the input grammar contains elements that + match characters.""" self.keepTabs = True return self def ignore(self, other): """Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. """ if isinstance(other, Suppress): if other not in self.ignoreExprs: @@ -1294,15 +1383,17 @@ def ignore(self, other): def setDebugActions(self, startAction, successAction, exceptionAction): """Enable display of debugging messages while doing pattern matching.""" - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) + self.debugActions = ( + startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction, + ) self.debug = True return self def setDebug(self, flag=True): """Enable display of debugging messages while doing pattern matching. - Set flag to True to enable, False to disable.""" + Set flag to True to enable, False to disable.""" if flag: self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction) else: @@ -1329,8 +1420,8 @@ def validate(self, validateTrace=None): def parseFile(self, file_or_filename): """Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. """ try: file_contents = file_or_filename.read() @@ -1416,10 +1507,7 @@ def __init__(self, matchString): try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn( - "null string passed to Literal; use Empty() instead", - SyntaxWarning, - stacklevel=2) + warnings.warn("null string passed to Literal; use Empty() instead", SyntaxWarning, stacklevel=2) self.__class__ = Empty self.name = '"%s"' % str(self.match) self.errmsg = "Expected " + self.name @@ -1431,9 +1519,8 @@ def __init__(self, matchString): # short-circuit as quickly as possible, and avoid calling startswith # ~ @profile def parseImpl(self, instring, loc, doActions=True): - if (instring[loc] == self.firstMatchChar - and (self.matchLen == 1 or instring.startswith(self.match, loc))): - return loc+self.matchLen, self.match + if instring[loc] == self.firstMatchChar and (self.matchLen == 1 or instring.startswith(self.match, loc)): + return loc + self.matchLen, self.match # ~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc @@ -1446,15 +1533,16 @@ def parseImpl(self, instring, loc, doActions=True): class Keyword(Token): """Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with Literal:: - Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. - Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' - Accepts two optional constructor arguments in addition to the keyword string: - identChars is a string of characters that would be valid identifier characters, - defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive - matching, default is False. + immediately followed by a non-keyword character. Compare with Literal:: + Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. + Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' + Accepts two optional constructor arguments in addition to the keyword string: + identChars is a string of characters that would be valid identifier characters, + defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive + matching, default is False. """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" + + DEFAULT_KEYWORD_CHARS = alphanums + "_$" def __init__(self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False): super().__init__() @@ -1463,10 +1551,7 @@ def __init__(self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False try: self.firstMatchChar = matchString[0] except IndexError: - warnings.warn( - "null string passed to Keyword; use Empty() instead", - SyntaxWarning, - stacklevel=2) + warnings.warn("null string passed to Keyword; use Empty() instead", SyntaxWarning, stacklevel=2) self.name = '"%s"' % self.match self.errmsg = "Expected " + self.name self.mayReturnEmpty = False @@ -1479,16 +1564,22 @@ def __init__(self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False def parseImpl(self, instring, loc, doActions=True): if self.caseless: - if ((instring[loc:loc+self.matchLen].upper() == self.caselessmatch) - and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) - and (loc == 0 or instring[loc-1].upper() not in self.identChars)): - return loc+self.matchLen, self.match + if ( + (instring[loc : loc + self.matchLen].upper() == self.caselessmatch) + and ( + loc >= len(instring) - self.matchLen or instring[loc + self.matchLen].upper() not in self.identChars + ) + and (loc == 0 or instring[loc - 1].upper() not in self.identChars) + ): + return loc + self.matchLen, self.match else: - if (instring[loc] == self.firstMatchChar - and (self.matchLen == 1 or instring.startswith(self.match, loc)) - and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) - and (loc == 0 or instring[loc-1] not in self.identChars)): - return loc+self.matchLen, self.match + if ( + instring[loc] == self.firstMatchChar + and (self.matchLen == 1 or instring.startswith(self.match, loc)) + and (loc >= len(instring) - self.matchLen or instring[loc + self.matchLen] not in self.identChars) + and (loc == 0 or instring[loc - 1] not in self.identChars) + ): + return loc + self.matchLen, self.match # ~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc @@ -1501,16 +1592,16 @@ def copy(self): return c def setDefaultKeywordChars(chars): - """Overrides the default Keyword chars - """ + """Overrides the default Keyword chars""" Keyword.DEFAULT_KEYWORD_CHARS = chars + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) class CaselessLiteral(Literal): """Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. """ def __init__(self, matchString): @@ -1521,8 +1612,8 @@ def __init__(self, matchString): self.errmsg = "Expected " + self.name def parseImpl(self, instring, loc, doActions=True): - if instring[loc:loc+self.matchLen].upper() == self.match: - return loc+self.matchLen, self.returnString + if instring[loc : loc + self.matchLen].upper() == self.match: + return loc + self.matchLen, self.returnString # ~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc @@ -1535,9 +1626,10 @@ def __init__(self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS): super().__init__(matchString, identChars, caseless=True) def parseImpl(self, instring, loc, doActions=True): - if ((instring[loc:loc+self.matchLen].upper() == self.caselessmatch) - and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars)): - return loc+self.matchLen, self.match + if (instring[loc : loc + self.matchLen].upper() == self.caselessmatch) and ( + loc >= len(instring) - self.matchLen or instring[loc + self.matchLen].upper() not in self.identChars + ): + return loc + self.matchLen, self.match # ~ raise ParseException( instring, loc, self.errmsg ) exc = self.myException exc.loc = loc @@ -1547,12 +1639,12 @@ def parseImpl(self, instring, loc, doActions=True): class Word(Token): """Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for min is 1 (a - minimum value < 1 is not valid); the default values for max and exact - are 0, meaning no maximum or exact length restriction. + Defined with string containing all allowed initial characters, + an optional string containing allowed body characters (if omitted, + defaults to the initial character set), and an optional minimum, + maximum, and/or exact length. The default value for min is 1 (a + minimum value < 1 is not valid); the default values for max and exact + are 0, meaning no maximum or exact length restriction. """ def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False): @@ -1569,7 +1661,9 @@ def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=F self.maxSpecified = max > 0 if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") + raise ValueError( + "cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted" + ) self.minLen = min @@ -1587,19 +1681,19 @@ def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=F self.mayIndexError = False self.asKeyword = asKeyword - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0): + if " " not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0): if self.bodyCharsOrig == self.initCharsOrig: self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) elif len(self.bodyCharsOrig) == 1: self.reString = "{}[{}]*".format( - re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig)) + re.escape(self.initCharsOrig), _escapeRegexRangeChars(self.bodyCharsOrig) + ) else: self.reString = "[{}][{}]*".format( - _escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig)) + _escapeRegexRangeChars(self.initCharsOrig), _escapeRegexRangeChars(self.bodyCharsOrig) + ) if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" + self.reString = r"\b" + self.reString + r"\b" try: self.re = re.compile(self.reString) except Exception: @@ -1638,7 +1732,7 @@ def parseImpl(self, instring, loc, doActions=True): if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: throwException = True if self.asKeyword: - if (start > 0 and instring[start-1] in bodychars) or (loc < instrlen and instring[loc] in bodychars): + if (start > 0 and instring[start - 1] in bodychars) or (loc < instrlen and instring[loc] in bodychars): throwException = True if throwException: @@ -1660,7 +1754,7 @@ def __str__(self): def charsAsStr(s): if len(s) > 4: - return s[:4]+"..." + return s[:4] + "..." else: return s @@ -1674,7 +1768,7 @@ def charsAsStr(s): class Regex(Token): """Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. """ def __init__(self, pattern, flags=0): @@ -1682,10 +1776,7 @@ def __init__(self, pattern, flags=0): super().__init__() if len(pattern) == 0: - warnings.warn( - "null string passed to Regex; use Empty() instead", - SyntaxWarning, - stacklevel=2) + warnings.warn("null string passed to Regex; use Empty() instead", SyntaxWarning, stacklevel=2) self.pattern = pattern self.flags = flags @@ -1694,10 +1785,7 @@ def __init__(self, pattern, flags=0): self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error: - warnings.warn( - "invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, - stacklevel=2) + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, SyntaxWarning, stacklevel=2) raise self.name = str(self) @@ -1734,18 +1822,17 @@ def __str__(self): class QuotedString(Token): - """Token for matching strings that are delimited by quoting characters. - """ + """Token for matching strings that are delimited by quoting characters.""" def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): """ - Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=None) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - - multiline - boolean indicating whether quotes can span multiple lines (default=False) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + Defined with the following parameters: + - quoteChar - string of one or more characters defining the quote delimiting string + - escChar - character to escape quotes, typically backslash (default=None) + - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) + - multiline - boolean indicating whether quotes can span multiple lines (default=False) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) """ super().__init__() @@ -1774,37 +1861,41 @@ def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, unqu if multiline: self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'{}(?:[^{}{}]'.format( + self.pattern = r"{}(?:[^{}{}]".format( re.escape(self.quoteChar), _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '')) + (escChar is not None and _escapeRegexRangeChars(escChar) or ""), + ) else: self.flags = 0 - self.pattern = r'{}(?:[^{}\n\r{}]'.format( + self.pattern = r"{}(?:[^{}\n\r{}]".format( re.escape(self.quoteChar), _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '')) + (escChar is not None and _escapeRegexRangeChars(escChar) or ""), + ) if len(self.endQuoteChar) > 1: self.pattern += ( - '|(?:' + ')|(?:'.join(["{}[^{}]".format( - re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i]) - ) for i in range(len(self.endQuoteChar)-1, 0, -1)]) + ')') + "|(?:" + + ")|(?:".join( + [ + "{}[^{}]".format(re.escape(self.endQuoteChar[:i]), _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar) - 1, 0, -1) + ] + ) + + ")" + ) if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + self.pattern += r"|(?:%s)" % re.escape(escQuote) if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + self.pattern += r"|(?:%s.)" % re.escape(escChar) + self.escCharReplacePattern = re.escape(self.escChar) + "(.)" + self.pattern += r")*%s" % re.escape(self.endQuoteChar) try: self.re = re.compile(self.pattern, self.flags) self.reString = self.pattern except sre_constants.error: - warnings.warn( - "invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, - stacklevel=2) + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, SyntaxWarning, stacklevel=2) raise self.name = str(self) @@ -1826,7 +1917,7 @@ def parseImpl(self, instring, loc, doActions=True): if self.unquoteResults: # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] + ret = ret[self.quoteCharLen : -self.endQuoteCharLen] if isinstance(ret, str): # replace escaped characters @@ -1853,10 +1944,10 @@ def __str__(self): class CharsNotIn(Token): """Token for matching words composed of characters *not* in a given set. - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for min is 1 (a - minimum value < 1 is not valid); the default values for max and exact - are 0, meaning no maximum or exact length restriction. + Defined with string containing all disallowed characters, and an optional + minimum, maximum, and/or exact length. The default value for min is 1 (a + minimum value < 1 is not valid); the default values for max and exact + are 0, meaning no maximum or exact length restriction. """ def __init__(self, notChars, min=1, max=0, exact=0): @@ -1865,7 +1956,9 @@ def __init__(self, notChars, min=1, max=0, exact=0): self.notChars = notChars if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") + raise ValueError( + "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted" + ) self.minLen = min @@ -1880,7 +1973,7 @@ def __init__(self, notChars, min=1, max=0, exact=0): self.name = str(self) self.errmsg = "Expected " + self.name - self.mayReturnEmpty = (self.minLen == 0) + self.mayReturnEmpty = self.minLen == 0 self.mayIndexError = False def parseImpl(self, instring, loc, doActions=True): @@ -1894,7 +1987,7 @@ def parseImpl(self, instring, loc, doActions=True): start = loc loc += 1 notchars = self.notChars - maxlen = min(start+self.maxLen, len(instring)) + maxlen = min(start + self.maxLen, len(instring)) while loc < maxlen and (instring[loc] not in notchars): loc += 1 @@ -1924,10 +2017,11 @@ def __str__(self): class White(Token): """Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, - as defined for the Word class.""" + by pyparsing grammars. This class is included when some whitespace structures + are significant. Define with a string containing the whitespace characters to be + matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, + as defined for the Word class.""" + whiteStrs = { " ": "", "\t": "", @@ -1940,7 +2034,7 @@ def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): super().__init__() self.matchWhite = ws self.setWhitespaceChars("".join([c for c in self.whiteChars if c not in self.matchWhite])) - self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) + self.name = "".join([White.whiteStrs[c] for c in self.matchWhite]) self.mayReturnEmpty = True self.errmsg = "Expected " + self.name @@ -2007,7 +2101,7 @@ def parseImpl(self, instring, loc, doActions=True): if thiscol > self.col: raise ParseException(instring, loc, "Text not in expected column", self) newloc = loc + self.col - thiscol - ret = instring[loc: newloc] + ret = instring[loc:newloc] return newloc, ret @@ -2026,9 +2120,9 @@ def preParse(self, instring, loc): return loc def parseImpl(self, instring, loc, doActions=True): - if not (loc == 0 - or (loc == self.preParse(instring, 0)) - or (instring[loc-1] == "\n")): # col(loc, instring) != 1: + if not ( + loc == 0 or (loc == self.preParse(instring, 0)) or (instring[loc - 1] == "\n") + ): # col(loc, instring) != 1: exc = self.myException exc.loc = loc exc.pstr = instring @@ -2047,14 +2141,14 @@ def __init__(self): def parseImpl(self, instring, loc, doActions=True): if loc < len(instring): if instring[loc] == "\n": - return loc+1, "\n" + return loc + 1, "\n" else: exc = self.myException exc.loc = loc exc.pstr = instring raise exc elif loc == len(instring): - return loc+1, [] + return loc + 1, [] else: exc = self.myException exc.loc = loc @@ -2094,7 +2188,7 @@ def parseImpl(self, instring, loc, doActions=True): exc.pstr = instring raise exc elif loc == len(instring): - return loc+1, [] + return loc + 1, [] elif loc > len(instring): return loc, [] else: @@ -2106,10 +2200,10 @@ def parseImpl(self, instring, loc, doActions=True): class WordStart(_PositionToken): """Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of wordChars - (default=printables). To emulate the \b behavior of regular expressions, - use WordStart(alphanums). WordStart will also match at the beginning of - the string being parsed, or at the beginning of a line. + is not preceded by any character in a given set of wordChars + (default=printables). To emulate the \b behavior of regular expressions, + use WordStart(alphanums). WordStart will also match at the beginning of + the string being parsed, or at the beginning of a line. """ def __init__(self, wordChars=printables): @@ -2119,8 +2213,7 @@ def __init__(self, wordChars=printables): def parseImpl(self, instring, loc, doActions=True): if loc != 0: - if (instring[loc-1] in self.wordChars - or instring[loc] not in self.wordChars): + if instring[loc - 1] in self.wordChars or instring[loc] not in self.wordChars: exc = self.myException exc.loc = loc exc.pstr = instring @@ -2130,10 +2223,10 @@ def parseImpl(self, instring, loc, doActions=True): class WordEnd(_PositionToken): """Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of wordChars - (default=printables). To emulate the \b behavior of regular expressions, - use WordEnd(alphanums). WordEnd will also match at the end of - the string being parsed, or at the end of a line. + is not followed by any character in a given set of wordChars + (default=printables). To emulate the \b behavior of regular expressions, + use WordEnd(alphanums). WordEnd will also match at the end of + the string being parsed, or at the end of a line. """ def __init__(self, wordChars=printables): @@ -2145,8 +2238,7 @@ def __init__(self, wordChars=printables): def parseImpl(self, instring, loc, doActions=True): instrlen = len(instring) if instrlen > 0 and loc < instrlen: - if (instring[loc] in self.wordChars - or instring[loc-1] not in self.wordChars): + if instring[loc] in self.wordChars or instring[loc - 1] not in self.wordChars: # ~ raise ParseException( instring, loc, "Expected end of word" ) exc = self.myException exc.loc = loc @@ -2178,7 +2270,7 @@ def append(self, other): def leaveWhitespace(self): """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on - all contained expressions.""" + all contained expressions.""" self.skipWhitespace = False self.exprs = [e.copy() for e in self.exprs] for e in self.exprs: @@ -2218,20 +2310,24 @@ def streamline(self): # (likewise for Or's and MatchFirst's) if len(self.exprs) == 2: other = self.exprs[0] - if (isinstance(other, self.__class__) - and not (other.parseAction) - and other.resultsName is None - and not other.debug): + if ( + isinstance(other, self.__class__) + and not (other.parseAction) + and other.resultsName is None + and not other.debug + ): self.exprs = other.exprs[:] + [self.exprs[1]] self.strRepr = None self.mayReturnEmpty |= other.mayReturnEmpty self.mayIndexError |= other.mayIndexError other = self.exprs[-1] - if (isinstance(other, self.__class__) - and not (other.parseAction) - and other.resultsName is None - and not other.debug): + if ( + isinstance(other, self.__class__) + and not (other.parseAction) + and other.resultsName is None + and not other.debug + ): self.exprs = self.exprs[:-1] + other.exprs[:] self.strRepr = None self.mayReturnEmpty |= other.mayReturnEmpty @@ -2246,7 +2342,7 @@ def setResultsName(self, name, listAllMatches=False): def validate(self, validateTrace=None): if validateTrace is None: validateTrace = [] - tmp = validateTrace[:]+[self] + tmp = validateTrace[:] + [self] for e in self.exprs: e.validate(tmp) self.checkRecursion([]) @@ -2254,13 +2350,14 @@ def validate(self, validateTrace=None): class And(ParseExpression): """Requires all given ParseExpressions to be found in the given order. - Expressions may be separated by whitespace. - May be constructed using the '+' operator. + Expressions may be separated by whitespace. + May be constructed using the '+' operator. """ class _ErrorStop(Empty): def __new__(cls, *args, **kwargs): return And._ErrorStop.instance + _ErrorStop.instance = Empty() _ErrorStop.instance.leaveWhitespace() @@ -2321,8 +2418,8 @@ def __str__(self): class Or(ParseExpression): """Requires that at least one ParseExpression is found. - If two expressions match, the expression that matches the longest string will be used. - May be constructed using the '^' operator. + If two expressions match, the expression that matches the longest string will be used. + May be constructed using the '^' operator. """ def __init__(self, exprs, savelist=False): @@ -2383,8 +2480,8 @@ def checkRecursion(self, parseElementList): class MatchFirst(ParseExpression): """Requires that at least one ParseExpression is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the '|' operator. + If two expressions match, the first one listed is the one that will match. + May be constructed using the '|' operator. """ def __init__(self, exprs, savelist=False): @@ -2443,8 +2540,8 @@ def checkRecursion(self, parseElementList): class Each(ParseExpression): """Requires all given ParseExpressions to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the '&' operator. + Expressions may be separated by whitespace. + May be constructed using the '&' operator. """ def __init__(self, exprs, savelist=True): @@ -2579,7 +2676,7 @@ def streamline(self): def checkRecursion(self, parseElementList): if self in parseElementList: - raise RecursiveGrammarException(parseElementList+[self]) + raise RecursiveGrammarException(parseElementList + [self]) subRecCheckList = parseElementList[:] + [self] if self.expr is not None: self.expr.checkRecursion(subRecCheckList) @@ -2587,7 +2684,7 @@ def checkRecursion(self, parseElementList): def validate(self, validateTrace=None): if validateTrace is None: validateTrace = [] - tmp = validateTrace[:]+[self] + tmp = validateTrace[:] + [self] if self.expr is not None: self.expr.validate(tmp) self.checkRecursion([]) @@ -2629,7 +2726,7 @@ def __init__(self, expr): super().__init__(expr) self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+str(self.expr) + self.errmsg = "Found unwanted token, " + str(self.expr) def parseImpl(self, instring, loc, doActions=True): try: @@ -2664,7 +2761,7 @@ def parseImpl(self, instring, loc, doActions=True): tokens = [] try: loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) - hasIgnoreExprs = (len(self.ignoreExprs) > 0) + hasIgnoreExprs = len(self.ignoreExprs) > 0 while True: if hasIgnoreExprs: preloc = self._skipIgnorables(instring, loc) @@ -2700,7 +2797,7 @@ def parseImpl(self, instring, loc, doActions=True): # must be at least one loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) try: - hasIgnoreExprs = (len(self.ignoreExprs) > 0) + hasIgnoreExprs = len(self.ignoreExprs) > 0 while True: if hasIgnoreExprs: preloc = self._skipIgnorables(instring, loc) @@ -2732,6 +2829,7 @@ def setResultsName(self, name, listAllMatches=False): class _NullToken: def __bool__(self): return False + __nonzero__ = __bool__ def __str__(self): @@ -2743,8 +2841,8 @@ def __str__(self): class Optional(ParseElementEnhance): """Optional matching of the given expression. - A default return string can also be specified, if the optional expression - is not found. + A default return string can also be specified, if the optional expression + is not found. """ def __init__(self, exprs, default=_optionalNotMatched): @@ -2778,9 +2876,9 @@ def __str__(self): class SkipTo(ParseElementEnhance): """Token for skipping over all undefined text until the matched expression is found. - If include is set to true, the matched expression is also consumed. The ignore - argument is used to define grammars (typically quoted strings and comments) that - might contain false matches. + If include is set to true, the matched expression is also consumed. The ignore + argument is used to define grammars (typically quoted strings and comments) that + might contain false matches. """ def __init__(self, other, include=False, ignore=None): @@ -2792,7 +2890,7 @@ def __init__(self, other, include=False, ignore=None): self.mayIndexError = False self.includeMatch = include self.asList = False - self.errmsg = "No match found for "+str(self.expr) + self.errmsg = "No match found for " + str(self.expr) def parseImpl(self, instring, loc, doActions=True): startLoc = loc @@ -2823,17 +2921,17 @@ def parseImpl(self, instring, loc, doActions=True): class Forward(ParseElementEnhance): """Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the Forward variable using the '<<' operator. - - Note: take care when assigning to Forward not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the Forward:: - fwdExpr << (a | b | c) + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the Forward variable using the '<<' operator. + + Note: take care when assigning to Forward not to overlook precedence of operators. + Specifically, '|' has a lower precedence than '<<', so that:: + fwdExpr << a | b | c + will actually be evaluated as:: + (fwdExpr << a) | b | c + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the Forward:: + fwdExpr << (a | b | c) """ def __init__(self, other=None): @@ -2868,7 +2966,7 @@ def validate(self, validateTrace=None): if validateTrace is None: validateTrace = [] if self not in validateTrace: - tmp = validateTrace[:]+[self] + tmp = validateTrace[:] + [self] if self.expr is not None: self.expr.validate(tmp) self.checkRecursion([]) @@ -2885,7 +2983,7 @@ def __str__(self): retString = "None" finally: self.__class__ = Forward - return "Forward: "+retString + return "Forward: " + retString def copy(self): if self.expr is not None: @@ -2915,9 +3013,8 @@ class Upcase(TokenConverter): def __init__(self, *args): super().__init__(*args) warnings.warn( - "Upcase class is deprecated, use upcaseTokens parse action instead", - DeprecationWarning, - stacklevel=2) + "Upcase class is deprecated, use upcaseTokens parse action instead", DeprecationWarning, stacklevel=2 + ) def postParse(self, instring, loc, tokenlist): return list(map(string.upper, tokenlist)) @@ -2925,8 +3022,8 @@ def postParse(self, instring, loc, tokenlist): class Combine(TokenConverter): """Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying 'adjacent=False' in the constructor. + By default, the matching patterns must also be contiguous in the input string; + this can be disabled by specifying 'adjacent=False' in the constructor. """ def __init__(self, expr, joinString="", adjacent=True): @@ -2969,8 +3066,8 @@ def postParse(self, instring, loc, tokenlist): class Dict(TokenConverter): """Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. + Each element can also be referenced using the first token in the expression as its key. + Useful for tabular report scraping when the first column can be used as a item key. """ def __init__(self, exprs): @@ -3038,7 +3135,7 @@ def z(*paArgs): thisFunc = f.func_name s, l, t = paArgs[-3:] if len(paArgs) > 3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc sys.stderr.write(">>entering %s(line: '%s', %d, %s)\n" % (thisFunc, line(l, s), l, t)) try: ret = f(*paArgs) @@ -3047,12 +3144,14 @@ def z(*paArgs): raise sys.stderr.write(f"< "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be:: - a single character - an escaped character with a leading backslash (such as \- or \]) - an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) - an escaped octal character with a leading '\0' (\041, which is a '!' character) - a range of any of the above, separated by a dash ('a-z', etc.) - any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) + syntax from regexp '[]' string range definitions:: + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + The input string must be enclosed in []'s, and the returned string is the expanded + character set joined into a single string. + The values enclosed in the []'s may be:: + a single character + an escaped character with a leading backslash (such as \- or \]) + an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) + an escaped octal character with a leading '\0' (\041, which is a '!' character) + a range of any of the above, separated by a dash ('a-z', etc.) + any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) """ try: return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) @@ -3281,27 +3393,31 @@ def srange(s): def matchOnlyAtCol(n): """Helper method for defining parse actions that require matching at a specific - column in the input text. + column in the input text. """ + def verifyCol(strg, locn, toks): if col(locn, strg) != n: raise ParseException(strg, locn, "matched token not at column %d" % n) + return verifyCol def replaceWith(replStr): """Helper method for common parse actions that simply return a literal value. Especially - useful when used with transformString(). + useful when used with transformString(). """ + def _replFunc(*args): return [replStr] + return _replFunc def removeQuotes(s, l, t): """Helper parse action for removing quotation marks from parsed quoted strings. - To use, add this parse action to quoted string using:: - quotedString.setParseAction( removeQuotes ) + To use, add this parse action to quoted string using:: + quotedString.setParseAction( removeQuotes ) """ return t[0][1:-1] @@ -3318,7 +3434,7 @@ def downcaseTokens(s, l, t): def keepOriginalText(s, startLoc, t): """Helper parse action to preserve original parsed text, - overriding any nested parse actions.""" + overriding any nested parse actions.""" try: endloc = getTokensEndLoc() except ParseException: @@ -3330,8 +3446,9 @@ def keepOriginalText(s, startLoc, t): def getTokensEndLoc(): """Method to be called from within a parse action to determine the end - location of the parsed tokens.""" + location of the parsed tokens.""" import inspect + fstack = inspect.stack() try: # search up the stack (through intervening argument normalizers) for correct calling routine @@ -3340,7 +3457,9 @@ def getTokensEndLoc(): endloc = f[0].f_locals["loc"] return endloc else: - raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") + raise ParseFatalException( + "incorrect usage of getTokensEndLoc - may only be called from within a parse action" + ) finally: del fstack @@ -3353,22 +3472,36 @@ def _makeTags(tagStr, xml): else: resname = tagStr.name - tagAttrName = Word(alphas, alphanums+"_-:") + tagAttrName = Word(alphas, alphanums + "_-:") if xml: tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) - openTag = Suppress("<") + tagStr + \ - Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + \ - Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == '/') + Suppress(">") + openTag = ( + Suppress("<") + + tagStr + + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + + Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == "/") + + Suppress(">") + ) else: printablesLessRAbrack = "".join([c for c in printables if c not in ">"]) tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr + \ - Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) + Optional(Suppress("=") + tagAttrValue)))) + \ - Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == '/') + Suppress(">") + openTag = ( + Suppress("<") + + tagStr + + Dict( + ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) + Optional(Suppress("=") + tagAttrValue))) + ) + + Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == "/") + + Suppress(">") + ) closeTag = Combine(_L("") - openTag = openTag.setResultsName("start"+"".join(resname.replace(":", " ").title().split())).setName("<%s>" % tagStr) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":", " ").title().split())).setName("" % tagStr) + openTag = openTag.setResultsName("start" + "".join(resname.replace(":", " ").title().split())).setName( + "<%s>" % tagStr + ) + closeTag = closeTag.setResultsName("end" + "".join(resname.replace(":", " ").title().split())).setName( + "" % tagStr + ) return openTag, closeTag @@ -3385,20 +3518,20 @@ def makeXMLTags(tagStr): def withAttribute(*args, **attrDict): """Helper to create a validating parse action to be used with start tags created - with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - or
. - - Call withAttribute with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in (class="Customer",align="right"), or - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. - - To verify that the attribute exists, but without specifying a value, pass - withAttribute.ANY_VALUE as the value. - """ + with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as + or
. + + Call withAttribute with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in (class="Customer",align="right"), or + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. + + To verify that the attribute exists, but without specifying a value, pass + withAttribute.ANY_VALUE as the value. + """ if args: attrs = args[:] else: @@ -3410,7 +3543,10 @@ def pa(s, l, tokens): if attrName not in tokens: raise ParseException(s, l, "no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s, l, f"attribute '{attrName}' has value '{tokens[attrName]}', must be '{attrValue}'") + raise ParseException( + s, l, f"attribute '{attrName}' has value '{tokens[attrName]}', must be '{attrValue}'" + ) + return pa @@ -3423,30 +3559,30 @@ def pa(s, l, tokens): def operatorPrecedence(baseExpr, opList): """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants opAssoc.RIGHT and opAssoc.LEFT. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted) + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants opAssoc.RIGHT and opAssoc.LEFT. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted) """ ret = Forward() - lastExpr = baseExpr | (Suppress('(') + ret + Suppress(')')) + lastExpr = baseExpr | (Suppress("(") + ret + Suppress(")")) for operDef in opList: opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] if arity == 3: @@ -3459,12 +3595,15 @@ def operatorPrecedence(baseExpr, opList): matchExpr = FollowedBy(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr)) elif arity == 2: if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr)) + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( + lastExpr + OneOrMore(opExpr + lastExpr) + ) else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) + matchExpr = FollowedBy(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + Group( + lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr + ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") elif rightLeftAssoc == opAssoc.RIGHT: @@ -3475,12 +3614,15 @@ def operatorPrecedence(baseExpr, opList): matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) elif arity == 2: if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr)) + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( + lastExpr + OneOrMore(opExpr + thisExpr) + ) else: matchExpr = FollowedBy(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr)) elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + Group( + lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr + ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") else: @@ -3493,42 +3635,51 @@ def operatorPrecedence(baseExpr, opList): return ret -dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") -sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") -quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()) +dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName( + "string enclosed in double quotes" +) +sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName( + "string enclosed in single quotes" +) +quotedString = Regex( + r"""(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')""" +).setName("quotedString using single or double quotes") +unicodeString = Combine(_L("u") + quotedString.copy()) def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): """Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default="("); can also be a pyparsing expression - - closer - closing character for a nested list (default=")"); can also be a pyparsing expression - - content - expression for items within the nested lists (default=None) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the ignoreExpr argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an Or or MatchFirst. - The default is quotedString, but if no expressions are to be ignored, - then pass None for this argument. + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default="("); can also be a pyparsing expression + - closer - closing character for a nested list (default=")"); can also be a pyparsing expression + - content - expression for items within the nested lists (default=None) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the ignoreExpr argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an Or or MatchFirst. + The default is quotedString, but if no expressions are to be ignored, + then pass None for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if isinstance(opener, str) and isinstance(closer, str): if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS, exact=1))) - .setParseAction(lambda t: t[0].strip())) + content = Combine( + OneOrMore(~ignoreExpr + CharsNotIn(opener + closer + ParserElement.DEFAULT_WHITE_CHARS, exact=1)) + ).setParseAction(lambda t: t[0].strip()) else: - content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t: t[0].strip())) + content = empty + CharsNotIn(opener + closer + ParserElement.DEFAULT_WHITE_CHARS).setParseAction( + lambda t: t[0].strip() + ) else: raise ValueError("opening and closing arguments must be strings if no content expression is given") ret = Forward() @@ -3541,20 +3692,21 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): def indentedBlock(blockStatementExpr, indentStack, indent=True): """Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. - - Parameters: - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single grammar - should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the - the current level; set to False for block of left-most statements - (default=True) - - A valid block must contain at least one blockStatement. + those used to define block statements in Python source code. + + Parameters: + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single grammar + should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond the + the current level; set to False for block of left-most statements + (default=True) + + A valid block must contain at least one blockStatement. """ + def checkPeerIndent(s, l, t): if l >= len(s): return @@ -3585,8 +3737,12 @@ def checkUnindent(s, l, t): UNDENT = Empty().setParseAction(checkUnindent) if indent: smExpr = Group( - Optional(NL) + FollowedBy(blockStatementExpr) - + INDENT + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) + UNDENT) + Optional(NL) + + FollowedBy(blockStatementExpr) + + INDENT + + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) + + UNDENT + ) else: smExpr = Group(Optional(NL) + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)))) blockStatementExpr.ignore("\\" + LineEnd()) @@ -3596,7 +3752,7 @@ def checkUnindent(s, l, t): alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") -anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums+"_:")) +anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:")) commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") + ";") _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(), "><& '")) @@ -3616,7 +3772,11 @@ def replaceHTMLEntity(t): javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#.*").setName("Python style comment") _noncomma = "".join([c for c in printables if c != ","]) -_commasepitem = Combine(OneOrMore(Word(_noncomma) + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") +_commasepitem = ( + Combine(OneOrMore(Word(_noncomma) + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))) + .streamline() + .setName("commaItem") +) commaSeparatedList = delimitedList(Optional(quotedString | _commasepitem, default="")).setName("commaSeparatedList") @@ -3634,7 +3794,7 @@ def test(teststring): except ParseBaseException as err: print(teststring + "->") print(err.line) - print(" "*(err.column-1) + "^") + print(" " * (err.column - 1) + "^") print(err) print() @@ -3648,9 +3808,10 @@ def test(teststring): tableNameList = Group(delimitedList(tableName)) # .setName("tables") simpleSQL = ( selectToken - + ('*' | columnNameList).setResultsName("columns") + + ("*" | columnNameList).setResultsName("columns") + fromToken - + tableNameList.setResultsName("tables")) + + tableNameList.setResultsName("tables") + ) test("SELECT * from XYZZY, ABC") test("select * from SYS.XYZZY") diff --git a/lib/bx_extras/stats.py b/lib/bx_extras/stats.py index ff0f0c53..4140b366 100644 --- a/lib/bx_extras/stats.py +++ b/lib/bx_extras/stats.py @@ -217,8 +217,7 @@ import math import string -from . import pstat # required 3rd party module - +from . import pstat # required 3rd party module __version__ = 0.6 @@ -227,20 +226,19 @@ class Dispatch: """ -The Dispatch class, care of David Ascher, allows different functions to -be called depending on the argument types. This way, there can be one -function name regardless of the argument type. To access function doc -in stats.py module, prefix the function with an 'l' or 'a' for list or -array arguments, respectively. That is, print stats.lmean.__doc__ or -print stats.amean.__doc__ or whatever. -""" + The Dispatch class, care of David Ascher, allows different functions to + be called depending on the argument types. This way, there can be one + function name regardless of the argument type. To access function doc + in stats.py module, prefix the function with an 'l' or 'a' for list or + array arguments, respectively. That is, print stats.lmean.__doc__ or + print stats.amean.__doc__ or whatever.""" def __init__(self, *tuples): self._dispatch = {} for func, types in tuples: for t in types: if t in self._dispatch.keys(): - raise ValueError("can't have two dispatches on "+str(t)) + raise ValueError("can't have two dispatches on " + str(t)) self._dispatch[t] = func self._types = list(self._dispatch.keys()) @@ -256,15 +254,15 @@ def __call__(self, arg1, *args, **kw): # CENTRAL TENDENCY + def lgeometricmean(inlist): """ -Calculates the geometric mean of the values in the passed list. -That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list. + Calculates the geometric mean of the values in the passed list. + That is: n-th root of (x1 * x2 * ... * xn). Assumes a '1D' list. -Usage: lgeometricmean(inlist) -""" + Usage: lgeometricmean(inlist)""" mult = 1.0 - one_over_n = 1.0/len(inlist) + one_over_n = 1.0 / len(inlist) for item in inlist: mult = mult * pow(item, one_over_n) return mult @@ -272,79 +270,74 @@ def lgeometricmean(inlist): def lharmonicmean(inlist): """ -Calculates the harmonic mean of the values in the passed list. -That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list. + Calculates the harmonic mean of the values in the passed list. + That is: n / (1/x1 + 1/x2 + ... + 1/xn). Assumes a '1D' list. -Usage: lharmonicmean(inlist) -""" + Usage: lharmonicmean(inlist)""" sum = 0 for item in inlist: - sum = sum + 1.0/item + sum = sum + 1.0 / item return len(inlist) / sum def lmean(inlist): """ -Returns the arithematic mean of the values in the passed list. -Assumes a '1D' list, but will function on the 1st dim of an array(!). + Returns the arithematic mean of the values in the passed list. + Assumes a '1D' list, but will function on the 1st dim of an array(!). -Usage: lmean(inlist) -""" + Usage: lmean(inlist)""" sum = 0 for item in inlist: sum = sum + item - return sum/float(len(inlist)) + return sum / float(len(inlist)) def lmedian(inlist, numbins=1000): """ -Returns the computed median value of a list of numbers, given the -number of bins to use for the histogram (more bins brings the computed value -closer to the median score, default number of bins = 1000). See G.W. -Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics. + Returns the computed median value of a list of numbers, given the + number of bins to use for the histogram (more bins brings the computed value + closer to the median score, default number of bins = 1000). See G.W. + Heiman's Basic Stats (1st Edition), or CRC Probability & Statistics. -Usage: lmedian (inlist, numbins=1000) -""" + Usage: lmedian (inlist, numbins=1000)""" (hist, smallest, binsize, extras) = histogram(inlist, numbins) # make histog - cumhist = cumsum(hist) # make cumulative histogram - for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score - if cumhist[i] >= len(inlist)/2.0: + cumhist = cumsum(hist) # make cumulative histogram + for i in range(len(cumhist)): # get 1st(!) index holding 50%ile score + if cumhist[i] >= len(inlist) / 2.0: cfbin = i break - LRL = smallest + binsize*cfbin # get lower read limit of that bin - cfbelow = cumhist[cfbin-1] - freq = float(hist[cfbin]) # frequency IN the 50%ile bin - median = LRL + ((len(inlist)/2.0 - cfbelow)/float(freq))*binsize # median formula + LRL = smallest + binsize * cfbin # get lower read limit of that bin + cfbelow = cumhist[cfbin - 1] + freq = float(hist[cfbin]) # frequency IN the 50%ile bin + median = LRL + ((len(inlist) / 2.0 - cfbelow) / float(freq)) * binsize # median formula return median def lmedianscore(inlist): """ -Returns the 'middle' score of the passed list. If there is an even -number of scores, the mean of the 2 middle scores is returned. + Returns the 'middle' score of the passed list. If there is an even + number of scores, the mean of the 2 middle scores is returned. -Usage: lmedianscore(inlist) -""" + Usage: lmedianscore(inlist)""" newlist = sorted(copy.deepcopy(inlist)) - if len(newlist) % 2 == 0: # if even number of scores, average middle 2 - index = len(newlist)/2 # integer division correct - median = float(newlist[index] + newlist[index-1]) / 2 + if len(newlist) % 2 == 0: # if even number of scores, average middle 2 + index = len(newlist) / 2 # integer division correct + median = float(newlist[index] + newlist[index - 1]) / 2 else: - index = len(newlist)/2 # int divsion gives mid value when count from 0 + index = len(newlist) / 2 # int divsion gives mid value when count from 0 median = newlist[index] return median def lmode(inlist): """ -Returns a list of the modal (most common) score(s) in the passed -list. If there is more than one such score, all are returned. The -bin-count for the mode(s) is also returned. + Returns a list of the modal (most common) score(s) in the passed + list. If there is more than one such score, all are returned. The + bin-count for the mode(s) is also returned. -Usage: lmode(inlist) -Returns: bin-count for mode(s), a list of modal value(s) -""" + Usage: lmode(inlist) + Returns: bin-count for mode(s), a list of modal value(s)""" scores = sorted(pstat.unique(inlist)) freq = [] @@ -366,14 +359,14 @@ def lmode(inlist): # MOMENTS + def lmoment(inlist, moment=1): """ -Calculates the nth moment about the mean for a sample (defaults to -the 1st moment). Used to calculate coefficients of skewness and kurtosis. + Calculates the nth moment about the mean for a sample (defaults to + the 1st moment). Used to calculate coefficients of skewness and kurtosis. -Usage: lmoment(inlist,moment=1) -Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r) -""" + Usage: lmoment(inlist,moment=1) + Returns: appropriate moment (r) from ... 1/n * SUM((inlist(i)-mean)**r)""" if moment == 1: return 0.0 else: @@ -381,47 +374,43 @@ def lmoment(inlist, moment=1): n = len(inlist) s = 0 for x in inlist: - s = s + (x-mn)**moment - return s/float(n) + s = s + (x - mn) ** moment + return s / float(n) def lvariation(inlist): """ -Returns the coefficient of variation, as defined in CRC Standard -Probability and Statistics, p.6. + Returns the coefficient of variation, as defined in CRC Standard + Probability and Statistics, p.6. -Usage: lvariation(inlist) -""" - return 100.0*samplestdev(inlist)/float(mean(inlist)) + Usage: lvariation(inlist)""" + return 100.0 * samplestdev(inlist) / float(mean(inlist)) def lskew(inlist): """ -Returns the skewness of a distribution, as defined in Numerical -Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) + Returns the skewness of a distribution, as defined in Numerical + Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) -Usage: lskew(inlist) -""" - return moment(inlist, 3)/pow(moment(inlist, 2), 1.5) + Usage: lskew(inlist)""" + return moment(inlist, 3) / pow(moment(inlist, 2), 1.5) def lkurtosis(inlist): """ -Returns the kurtosis of a distribution, as defined in Numerical -Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) + Returns the kurtosis of a distribution, as defined in Numerical + Recipies (alternate defn in CRC Standard Probability and Statistics, p.6.) -Usage: lkurtosis(inlist) -""" - return moment(inlist, 4)/pow(moment(inlist, 2), 2.0) + Usage: lkurtosis(inlist)""" + return moment(inlist, 4) / pow(moment(inlist, 2), 2.0) def ldescribe(inlist): """ -Returns some descriptive statistics of the passed list (assumed to be 1D). + Returns some descriptive statistics of the passed list (assumed to be 1D). -Usage: ldescribe(inlist) -Returns: n, mean, standard deviation, skew, kurtosis -""" + Usage: ldescribe(inlist) + Returns: n, mean, standard deviation, skew, kurtosis""" n = len(inlist) mm = (min(inlist), max(inlist)) m = mean(inlist) @@ -433,14 +422,14 @@ def ldescribe(inlist): # FREQUENCY STATS + def litemfreq(inlist): """ -Returns a list of pairs. Each pair consists of one of the scores in inlist -and it's frequency count. Assumes a 1D list is passed. + Returns a list of pairs. Each pair consists of one of the scores in inlist + and it's frequency count. Assumes a 1D list is passed. -Usage: litemfreq(inlist) -Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies) -""" + Usage: litemfreq(inlist) + Returns: a 2D frequency table (col [0:n-1]=scores, col n=frequencies)""" scores = sorted(pstat.unique(inlist)) freq = [] for item in scores: @@ -450,85 +439,83 @@ def litemfreq(inlist): def lscoreatpercentile(inlist, percent): """ -Returns the score at a given percentile relative to the distribution -given by inlist. + Returns the score at a given percentile relative to the distribution + given by inlist. -Usage: lscoreatpercentile(inlist,percent) -""" + Usage: lscoreatpercentile(inlist,percent)""" if percent > 1: print("\nDividing percent>1 by 100 in lscoreatpercentile().\n") percent = percent / 100.0 - targetcf = percent*len(inlist) + targetcf = percent * len(inlist) h, lrl, binsize, extras = histogram(inlist) cumhist = cumsum(copy.deepcopy(h)) for i in range(len(cumhist)): if cumhist[i] >= targetcf: break - score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i) + score = binsize * ((targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i) return score def lpercentileofscore(inlist, score, histbins=10, defaultlimits=None): """ -Returns the percentile value of a score relative to the distribution -given by inlist. Formula depends on the values used to histogram the data(!). + Returns the percentile value of a score relative to the distribution + given by inlist. Formula depends on the values used to histogram the data(!). -Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None) -""" + Usage: lpercentileofscore(inlist,score,histbins=10,defaultlimits=None)""" h, lrl, binsize, extras = histogram(inlist, histbins, defaultlimits) cumhist = cumsum(copy.deepcopy(h)) - i = int((score - lrl)/float(binsize)) - pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inlist)) * 100 + i = int((score - lrl) / float(binsize)) + pct = (cumhist[i - 1] + ((score - (lrl + binsize * i)) / float(binsize)) * h[i]) / float(len(inlist)) * 100 return pct def lhistogram(inlist, numbins=10, defaultreallimits=None, printextras=0): """ -Returns (i) a list of histogram bin counts, (ii) the smallest value -of the histogram binning, and (iii) the bin width (the last 2 are not -necessarily integers). Default number of bins is 10. If no sequence object -is given for defaultreallimits, the routine picks (usually non-pretty) bins -spanning all the numbers in the inlist. - -Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0) -Returns: list of bin values, lowerreallimit, binsize, extrapoints -""" - if (defaultreallimits is not None): - if type(defaultreallimits) not in [list, tuple] or len(defaultreallimits) == 1: # only one limit given, assumed to be lower one & upper is calc'd + Returns (i) a list of histogram bin counts, (ii) the smallest value + of the histogram binning, and (iii) the bin width (the last 2 are not + necessarily integers). Default number of bins is 10. If no sequence object + is given for defaultreallimits, the routine picks (usually non-pretty) bins + spanning all the numbers in the inlist. + + Usage: lhistogram (inlist, numbins=10, defaultreallimits=None,suppressoutput=0) + Returns: list of bin values, lowerreallimit, binsize, extrapoints""" + if defaultreallimits is not None: + if ( + type(defaultreallimits) not in [list, tuple] or len(defaultreallimits) == 1 + ): # only one limit given, assumed to be lower one & upper is calc'd lowerreallimit = defaultreallimits upperreallimit = 1.0001 * max(inlist) else: # assume both limits given lowerreallimit = defaultreallimits[0] upperreallimit = defaultreallimits[1] - binsize = (upperreallimit-lowerreallimit)/float(numbins) - else: # no limits given for histogram, both must be calc'd - estbinwidth = (max(inlist)-min(inlist))/float(numbins) + 1 # 1=>cover all - binsize = (max(inlist)-min(inlist)+estbinwidth)/float(numbins) - lowerreallimit = min(inlist) - binsize/2 # lower real limit,1st bin - bins = [0]*(numbins) + binsize = (upperreallimit - lowerreallimit) / float(numbins) + else: # no limits given for histogram, both must be calc'd + estbinwidth = (max(inlist) - min(inlist)) / float(numbins) + 1 # 1=>cover all + binsize = (max(inlist) - min(inlist) + estbinwidth) / float(numbins) + lowerreallimit = min(inlist) - binsize / 2 # lower real limit,1st bin + bins = [0] * (numbins) extrapoints = 0 for num in inlist: try: - if (num-lowerreallimit) < 0: + if (num - lowerreallimit) < 0: extrapoints = extrapoints + 1 else: - bintoincrement = int((num-lowerreallimit)/float(binsize)) + bintoincrement = int((num - lowerreallimit) / float(binsize)) bins[bintoincrement] = bins[bintoincrement] + 1 except Exception: extrapoints = extrapoints + 1 - if (extrapoints > 0 and printextras == 1): - print('\nPoints outside given histogram range =', extrapoints) + if extrapoints > 0 and printextras == 1: + print("\nPoints outside given histogram range =", extrapoints) return (bins, lowerreallimit, binsize, extrapoints) def lcumfreq(inlist, numbins=10, defaultreallimits=None): """ -Returns a cumulative frequency histogram, using the histogram function. + Returns a cumulative frequency histogram, using the histogram function. -Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None) -Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints -""" + Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None) + Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints""" h, l, b, e = histogram(inlist, numbins, defaultreallimits) cumhist = cumsum(copy.deepcopy(h)) return cumhist, l, b, e @@ -536,33 +523,32 @@ def lcumfreq(inlist, numbins=10, defaultreallimits=None): def lrelfreq(inlist, numbins=10, defaultreallimits=None): """ -Returns a relative frequency histogram, using the histogram function. + Returns a relative frequency histogram, using the histogram function. -Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None) -Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints -""" + Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None) + Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints""" h, l, b, e = histogram(inlist, numbins, defaultreallimits) for i in range(len(h)): - h[i] = h[i]/float(len(inlist)) + h[i] = h[i] / float(len(inlist)) return h, l, b, e # VARIABILITY FUNCTIONS + def lobrientransform(*args): """ -Computes a transform on input data (any number of columns). Used to -test for homogeneity of variance prior to running one-way stats. From -Maxwell and Delaney, p.112. + Computes a transform on input data (any number of columns). Used to + test for homogeneity of variance prior to running one-way stats. From + Maxwell and Delaney, p.112. -Usage: lobrientransform(*args) -Returns: transformed data for use in an ANOVA -""" + Usage: lobrientransform(*args) + Returns: transformed data for use in an ANOVA""" TINY = 1e-10 k = len(args) - n = [0.0]*k - v = [0.0]*k - m = [0.0]*k + n = [0.0] * k + v = [0.0] * k + m = [0.0] * k nargs = [] for i in range(k): nargs.append(copy.deepcopy(args[i])) @@ -571,109 +557,101 @@ def lobrientransform(*args): m[i] = mean(nargs[i]) for j in range(k): for i in range(n[j]): - t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2 - t2 = 0.5*v[j]*(n[j]-1.0) - t3 = (n[j]-1.0)*(n[j]-2.0) - nargs[j][i] = (t1-t2) / float(t3) + t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j]) ** 2 + t2 = 0.5 * v[j] * (n[j] - 1.0) + t3 = (n[j] - 1.0) * (n[j] - 2.0) + nargs[j][i] = (t1 - t2) / float(t3) check = 1 for j in range(k): if v[j] - mean(nargs[j]) > TINY: check = 0 if check != 1: - raise ValueError('Problem in obrientransform.') + raise ValueError("Problem in obrientransform.") else: return nargs def lsamplevar(inlist): """ -Returns the variance of the values in the passed list using -N for the denominator (i.e., DESCRIBES the sample variance only). + Returns the variance of the values in the passed list using + N for the denominator (i.e., DESCRIBES the sample variance only). -Usage: lsamplevar(inlist) -""" + Usage: lsamplevar(inlist)""" n = len(inlist) mn = mean(inlist) deviations = [] for item in inlist: - deviations.append(item-mn) - return ss(deviations)/float(n) + deviations.append(item - mn) + return ss(deviations) / float(n) def lsamplestdev(inlist): """ -Returns the standard deviation of the values in the passed list using -N for the denominator (i.e., DESCRIBES the sample stdev only). + Returns the standard deviation of the values in the passed list using + N for the denominator (i.e., DESCRIBES the sample stdev only). -Usage: lsamplestdev(inlist) -""" + Usage: lsamplestdev(inlist)""" return math.sqrt(samplevar(inlist)) def lvar(inlist): """ -Returns the variance of the values in the passed list using N-1 -for the denominator (i.e., for estimating population variance). + Returns the variance of the values in the passed list using N-1 + for the denominator (i.e., for estimating population variance). -Usage: lvar(inlist) -""" + Usage: lvar(inlist)""" n = len(inlist) mn = mean(inlist) - deviations = [0]*len(inlist) + deviations = [0] * len(inlist) for i in range(len(inlist)): deviations[i] = inlist[i] - mn - return ss(deviations)/float(n-1) + return ss(deviations) / float(n - 1) def lstdev(inlist): """ -Returns the standard deviation of the values in the passed list -using N-1 in the denominator (i.e., to estimate population stdev). + Returns the standard deviation of the values in the passed list + using N-1 in the denominator (i.e., to estimate population stdev). -Usage: lstdev(inlist) -""" + Usage: lstdev(inlist)""" return math.sqrt(var(inlist)) def lsterr(inlist): """ -Returns the standard error of the values in the passed list using N-1 -in the denominator (i.e., to estimate population standard error). + Returns the standard error of the values in the passed list using N-1 + in the denominator (i.e., to estimate population standard error). -Usage: lsterr(inlist) -""" + Usage: lsterr(inlist)""" return stdev(inlist) / float(math.sqrt(len(inlist))) def lsem(inlist): """ -Returns the estimated standard error of the mean (sx-bar) of the -values in the passed list. sem = stdev / sqrt(n) + Returns the estimated standard error of the mean (sx-bar) of the + values in the passed list. sem = stdev / sqrt(n) -Usage: lsem(inlist) -""" + Usage: lsem(inlist)""" sd = stdev(inlist) n = len(inlist) - return sd/math.sqrt(n) + return sd / math.sqrt(n) def lz(inlist, score): """ -Returns the z-score for a given input score, given that score and the -list from which that score came. Not appropriate for population calculations. + Returns the z-score for a given input score, given that score and the + list from which that score came. Not appropriate for population calculations. -Usage: lz(inlist, score) -""" - z = (score-mean(inlist))/samplestdev(inlist) + Usage: lz(inlist, score)""" + z = (score - mean(inlist)) / samplestdev(inlist) return z def lzs(inlist): """ -Returns a list of z-scores, one for each score in the passed list. + Returns a list of z-scores, one for each score in the passed list. -Usage: lzs(inlist) -""" + Usage: lzs(inlist)""" zscores = [] for item in inlist: zscores.append(z(inlist, item)) @@ -682,171 +660,170 @@ def lzs(inlist): # TRIMMING FUNCTIONS + def ltrimboth(l, proportiontocut): """ -Slices off the passed proportion of items from BOTH ends of the passed -list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost' -10% of scores. Assumes list is sorted by magnitude. Slices off LESS if -proportion results in a non-integer slice index (i.e., conservatively -slices off proportiontocut). - -Usage: ltrimboth (l,proportiontocut) -Returns: trimmed version of list l -""" - lowercut = int(proportiontocut*len(l)) + Slices off the passed proportion of items from BOTH ends of the passed + list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost' + 10% of scores. Assumes list is sorted by magnitude. Slices off LESS if + proportion results in a non-integer slice index (i.e., conservatively + slices off proportiontocut). + + Usage: ltrimboth (l,proportiontocut) + Returns: trimmed version of list l""" + lowercut = int(proportiontocut * len(l)) uppercut = len(l) - lowercut return l[lowercut:uppercut] -def ltrim1(l, proportiontocut, tail='right'): +def ltrim1(l, proportiontocut, tail="right"): """ -Slices off the passed proportion of items from ONE end of the passed -list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' -10% of scores). Slices off LESS if proportion results in a non-integer -slice index (i.e., conservatively slices off proportiontocut). - -Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left' -Returns: trimmed version of list l -""" - if tail == 'right': + Slices off the passed proportion of items from ONE end of the passed + list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' + 10% of scores). Slices off LESS if proportion results in a non-integer + slice index (i.e., conservatively slices off proportiontocut). + + Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left' + Returns: trimmed version of list l""" + if tail == "right": lowercut = 0 - uppercut = len(l) - int(proportiontocut*len(l)) - elif tail == 'left': - lowercut = int(proportiontocut*len(l)) + uppercut = len(l) - int(proportiontocut * len(l)) + elif tail == "left": + lowercut = int(proportiontocut * len(l)) uppercut = len(l) return l[lowercut:uppercut] # CORRELATION FUNCTIONS + def lpaired(x, y): """ -Interactively determines the type of data and then runs the -appropriated statistic for paired group data. - -Usage: lpaired(x,y) -Returns: appropriate statistic name, value, and probability -""" - samples = '' - while samples not in ['i', 'r', 'I', 'R', 'c', 'C']: - print('\nIndependent or related samples, or correlation (i,r,c): ', end=' ') + Interactively determines the type of data and then runs the + appropriated statistic for paired group data. + + Usage: lpaired(x,y) + Returns: appropriate statistic name, value, and probability""" + samples = "" + while samples not in ["i", "r", "I", "R", "c", "C"]: + print("\nIndependent or related samples, or correlation (i,r,c): ", end=" ") samples = input() - if samples in ['i', 'I', 'r', 'R']: - print('\nComparing variances ...', end=' ') -# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 + if samples in ["i", "I", "r", "R"]: + print("\nComparing variances ...", end=" ") + # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 r = obrientransform(x, y) f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1)) if p < 0.05: - vartype = 'unequal, p='+str(round(p, 4)) + vartype = "unequal, p=" + str(round(p, 4)) else: - vartype = 'equal' + vartype = "equal" print(vartype) - if samples in ['i', 'I']: - if vartype[0] == 'e': + if samples in ["i", "I"]: + if vartype[0] == "e": t, p = ttest_ind(x, y, 0) - print('\nIndependent samples t-test: ', round(t, 4), round(p, 4)) + print("\nIndependent samples t-test: ", round(t, 4), round(p, 4)) else: if len(x) > 20 or len(y) > 20: z, p = ranksums(x, y) - print('\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(p, 4)) + print("\nRank Sums test (NONparametric, n>20): ", round(z, 4), round(p, 4)) else: u, p = mannwhitneyu(x, y) - print('\nMann-Whitney U-test (NONparametric, ns<20): ', round(u, 4), round(p, 4)) + print("\nMann-Whitney U-test (NONparametric, ns<20): ", round(u, 4), round(p, 4)) else: # RELATED SAMPLES - if vartype[0] == 'e': + if vartype[0] == "e": t, p = ttest_rel(x, y, 0) - print('\nRelated samples t-test: ', round(t, 4), round(p, 4)) + print("\nRelated samples t-test: ", round(t, 4), round(p, 4)) else: t, p = ranksums(x, y) - print('\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)) + print("\nWilcoxon T-test (NONparametric): ", round(t, 4), round(p, 4)) else: # CORRELATION ANALYSIS - corrtype = '' - while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']: - print('\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ', end=' ') + corrtype = "" + while corrtype not in ["c", "C", "r", "R", "d", "D"]: + print("\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ", end=" ") corrtype = input() - if corrtype in ['c', 'C']: + if corrtype in ["c", "C"]: m, b, r, p, see = linregress(x, y) - print('\nLinear regression for continuous variables ...') - lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'], [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]] + print("\nLinear regression for continuous variables ...") + lol = [ + ["Slope", "Intercept", "r", "Prob", "SEestimate"], + [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)], + ] pstat.printcc(lol) - elif corrtype in ['r', 'R']: + elif corrtype in ["r", "R"]: r, p = spearmanr(x, y) - print('\nCorrelation for ranked variables ...') + print("\nCorrelation for ranked variables ...") print("Spearman's r: ", round(r, 4), round(p, 4)) else: # DICHOTOMOUS r, p = pointbiserialr(x, y) - print('\nAssuming x contains a dichotomous variable ...') - print('Point Biserial r: ', round(r, 4), round(p, 4)) - print('\n\n') + print("\nAssuming x contains a dichotomous variable ...") + print("Point Biserial r: ", round(r, 4), round(p, 4)) + print("\n\n") return None def lpearsonr(x, y): """ -Calculates a Pearson correlation coefficient and the associated -probability value. Taken from Heiman's Basic Statistics for the Behav. -Sci (2nd), p.195. + Calculates a Pearson correlation coefficient and the associated + probability value. Taken from Heiman's Basic Statistics for the Behav. + Sci (2nd), p.195. -Usage: lpearsonr(x,y) where x and y are equal-length lists -Returns: Pearson's r value, two-tailed p-value -""" + Usage: lpearsonr(x,y) where x and y are equal-length lists + Returns: Pearson's r value, two-tailed p-value""" TINY = 1.0e-30 if len(x) != len(y): - raise ValueError('Input values not paired in pearsonr. Aborting.') + raise ValueError("Input values not paired in pearsonr. Aborting.") n = len(x) x = [float(_) for _ in x] y = [float(_) for _ in y] - r_num = n*(summult(x, y)) - sum(x)*sum(y) - r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y))) - r = (r_num / r_den) # denominator already a float - df = n-2 - t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) - prob = betai(0.5*df, 0.5, df/float(df+t*t)) + r_num = n * (summult(x, y)) - sum(x) * sum(y) + r_den = math.sqrt((n * ss(x) - square_of_sums(x)) * (n * ss(y) - square_of_sums(y))) + r = r_num / r_den # denominator already a float + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = betai(0.5 * df, 0.5, df / float(df + t * t)) return r, prob def lspearmanr(x, y): """ -Calculates a Spearman rank-order correlation coefficient. Taken -from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. + Calculates a Spearman rank-order correlation coefficient. Taken + from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. -Usage: lspearmanr(x,y) where x and y are equal-length lists -Returns: Spearman's r, two-tailed p-value -""" + Usage: lspearmanr(x,y) where x and y are equal-length lists + Returns: Spearman's r, two-tailed p-value""" if len(x) != len(y): - raise ValueError('Input values not paired in spearmanr. Aborting.') + raise ValueError("Input values not paired in spearmanr. Aborting.") n = len(x) rankx = rankdata(x) ranky = rankdata(y) dsq = sumdiffsquared(rankx, ranky) - rs = 1 - 6*dsq / float(n*(n**2-1)) - t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs))) - df = n-2 - probrs = betai(0.5*df, 0.5, df/(df+t*t)) # t already a float -# probability values for rs are from part 2 of the spearman function in -# Numerical Recipies, p.510. They are close to tables, but not exact. (?) + rs = 1 - 6 * dsq / float(n * (n**2 - 1)) + t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs))) + df = n - 2 + probrs = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float + # probability values for rs are from part 2 of the spearman function in + # Numerical Recipies, p.510. They are close to tables, but not exact. (?) return rs, probrs def lpointbiserialr(x, y): """ -Calculates a point-biserial correlation coefficient and the associated -probability value. Taken from Heiman's Basic Statistics for the Behav. -Sci (1st), p.194. + Calculates a point-biserial correlation coefficient and the associated + probability value. Taken from Heiman's Basic Statistics for the Behav. + Sci (1st), p.194. -Usage: lpointbiserialr(x,y) where x,y are equal-length lists -Returns: Point-biserial r, two-tailed p-value -""" + Usage: lpointbiserialr(x,y) where x,y are equal-length lists + Returns: Point-biserial r, two-tailed p-value""" TINY = 1e-30 if len(x) != len(y): - raise ValueError('INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.') + raise ValueError("INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.") data = pstat.abut(x, y) categories = pstat.unique(x) if len(categories) != 2: raise ValueError("Exactly 2 categories required for pointbiserialr().") - else: # there are 2 categories, continue + else: # there are 2 categories, continue codemap = pstat.abut(categories, range(2)) pstat.recode(data, codemap, 0) # recoded x = pstat.linexand(data, 0, categories[0]) @@ -854,31 +831,30 @@ def lpointbiserialr(x, y): xmean = mean(pstat.colex(x, 1)) ymean = mean(pstat.colex(y, 1)) n = len(data) - adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n))) - rpb = (ymean - xmean)/samplestdev(pstat.colex(data, 1))*adjust - df = n-2 - t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY))) - prob = betai(0.5*df, 0.5, df/(df+t*t)) # t already a float + adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n))) + rpb = (ymean - xmean) / samplestdev(pstat.colex(data, 1)) * adjust + df = n - 2 + t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY))) + prob = betai(0.5 * df, 0.5, df / (df + t * t)) # t already a float return rpb, prob def lkendalltau(x, y): """ -Calculates Kendall's tau ... correlation of ordinal data. Adapted -from function kendl1 in Numerical Recipies. Needs good test-routine.@@@ + Calculates Kendall's tau ... correlation of ordinal data. Adapted + from function kendl1 in Numerical Recipies. Needs good test-routine.@@@ -Usage: lkendalltau(x,y) -Returns: Kendall's tau, two-tailed p-value -""" + Usage: lkendalltau(x,y) + Returns: Kendall's tau, two-tailed p-value""" n1 = 0 n2 = 0 iss = 0 - for j in range(len(x)-1): + for j in range(len(x) - 1): for k in range(j, len(y)): a1 = x[j] - x[k] a2 = y[j] - y[k] aa = a1 * a2 - if (aa): # neither list has a tie + if aa: # neither list has a tie n1 = n1 + 1 n2 = n2 + 1 if aa > 0: @@ -886,117 +862,112 @@ def lkendalltau(x, y): else: iss = iss - 1 else: - if (a1): + if a1: n1 = n1 + 1 else: n2 = n2 + 1 - tau = iss / math.sqrt(n1*n2) - svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1)) + tau = iss / math.sqrt(n1 * n2) + svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1)) z = tau / math.sqrt(svar) - prob = erfcc(abs(z)/1.4142136) + prob = erfcc(abs(z) / 1.4142136) return tau, prob def llinregress(x, y): """ -Calculates a regression line on x,y pairs. + Calculates a regression line on x,y pairs. -Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates -Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate -""" + Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates + Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate""" TINY = 1.0e-20 if len(x) != len(y): - raise ValueError('Input values not paired in linregress. Aborting.') + raise ValueError("Input values not paired in linregress. Aborting.") n = len(x) x = [float(_) for _ in x] y = [float(_) for _ in y] xmean = mean(x) ymean = mean(y) - r_num = float(n*(summult(x, y)) - sum(x)*sum(y)) - r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y))) + r_num = float(n * (summult(x, y)) - sum(x) * sum(y)) + r_den = math.sqrt((n * ss(x) - square_of_sums(x)) * (n * ss(y) - square_of_sums(y))) r = r_num / r_den - df = n-2 - t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) - prob = betai(0.5*df, 0.5, df/(df+t*t)) - slope = r_num / float(n*ss(x) - square_of_sums(x)) - intercept = ymean - slope*xmean - sterrest = math.sqrt(1-r*r)*samplestdev(y) + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = betai(0.5 * df, 0.5, df / (df + t * t)) + slope = r_num / float(n * ss(x) - square_of_sums(x)) + intercept = ymean - slope * xmean + sterrest = math.sqrt(1 - r * r) * samplestdev(y) return slope, intercept, r, prob, sterrest # INFERENTIAL STATISTICS -def lttest_1samp(a, popmean, printit=0, name='Sample', writemode='a'): + +def lttest_1samp(a, popmean, printit=0, name="Sample", writemode="a"): """ -Calculates the t-obtained for the independent samples T-test on ONE group -of scores a, given a population mean. If printit=1, results are printed -to the screen. If printit='filename', the results are output to 'filename' -using the given writemode (default=append). Returns t-value, and prob. + Calculates the t-obtained for the independent samples T-test on ONE group + of scores a, given a population mean. If printit=1, results are printed + to the screen. If printit='filename', the results are output to 'filename' + using the given writemode (default=append). Returns t-value, and prob. -Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') -Returns: t-value, two-tailed prob -""" + Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') + Returns: t-value, two-tailed prob""" x = mean(a) v = var(a) n = len(a) - df = n-1 - svar = ((n-1)*v)/float(df) - t = (x-popmean)/math.sqrt(svar*(1.0/n)) - prob = betai(0.5*df, 0.5, float(df)/(df+t*t)) + df = n - 1 + svar = ((n - 1) * v) / float(df) + t = (x - popmean) / math.sqrt(svar * (1.0 / n)) + prob = betai(0.5 * df, 0.5, float(df) / (df + t * t)) if printit != 0: - statname = 'Single-sample T-test.' - outputpairedstats(printit, writemode, - 'Population', '--', popmean, 0, 0, 0, - name, n, x, v, min(a), max(a), - statname, t, prob) + statname = "Single-sample T-test." + outputpairedstats( + printit, writemode, "Population", "--", popmean, 0, 0, 0, name, n, x, v, min(a), max(a), statname, t, prob + ) return t, prob -def lttest_ind(a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'): +def lttest_ind(a, b, printit=0, name1="Samp1", name2="Samp2", writemode="a"): """ -Calculates the t-obtained T-test on TWO INDEPENDENT samples of -scores a, and b. From Numerical Recipies, p.483. If printit=1, results -are printed to the screen. If printit='filename', the results are output -to 'filename' using the given writemode (default=append). Returns t-value, -and prob. - -Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a') -Returns: t-value, two-tailed prob -""" + Calculates the t-obtained T-test on TWO INDEPENDENT samples of + scores a, and b. From Numerical Recipies, p.483. If printit=1, results + are printed to the screen. If printit='filename', the results are output + to 'filename' using the given writemode (default=append). Returns t-value, + and prob. + + Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a') + Returns: t-value, two-tailed prob""" x1 = mean(a) x2 = mean(b) - v1 = stdev(a)**2 - v2 = stdev(b)**2 + v1 = stdev(a) ** 2 + v2 = stdev(b) ** 2 n1 = len(a) n2 = len(b) - df = n1+n2-2 - svar = ((n1-1)*v1+(n2-1)*v2)/float(df) - t = (x1-x2)/math.sqrt(svar*(1.0/n1 + 1.0/n2)) - prob = betai(0.5*df, 0.5, df/(df+t*t)) + df = n1 + n2 - 2 + svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df) + t = (x1 - x2) / math.sqrt(svar * (1.0 / n1 + 1.0 / n2)) + prob = betai(0.5 * df, 0.5, df / (df + t * t)) if printit != 0: - statname = 'Independent samples T-test.' - outputpairedstats(printit, writemode, - name1, n1, x1, v1, min(a), max(a), - name2, n2, x2, v2, min(b), max(b), - statname, t, prob) + statname = "Independent samples T-test." + outputpairedstats( + printit, writemode, name1, n1, x1, v1, min(a), max(a), name2, n2, x2, v2, min(b), max(b), statname, t, prob + ) return t, prob -def lttest_rel(a, b, printit=0, name1='Sample1', name2='Sample2', writemode='a'): +def lttest_rel(a, b, printit=0, name1="Sample1", name2="Sample2", writemode="a"): """ -Calculates the t-obtained T-test on TWO RELATED samples of scores, -a and b. From Numerical Recipies, p.483. If printit=1, results are -printed to the screen. If printit='filename', the results are output to -'filename' using the given writemode (default=append). Returns t-value, -and prob. - -Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a') -Returns: t-value, two-tailed prob -""" + Calculates the t-obtained T-test on TWO RELATED samples of scores, + a and b. From Numerical Recipies, p.483. If printit=1, results are + printed to the screen. If printit='filename', the results are output to + 'filename' using the given writemode (default=append). Returns t-value, + and prob. + + Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a') + Returns: t-value, two-tailed prob""" if len(a) != len(b): - raise ValueError('Unequal length lists in ttest_rel.') + raise ValueError("Unequal length lists in ttest_rel.") x1 = mean(a) x2 = mean(b) v1 = var(a) @@ -1004,48 +975,45 @@ def lttest_rel(a, b, printit=0, name1='Sample1', name2='Sample2', writemode='a') n = len(a) cov = 0 for i in range(len(a)): - cov = cov + (a[i]-x1) * (b[i]-x2) - df = n-1 + cov = cov + (a[i] - x1) * (b[i] - x2) + df = n - 1 cov = cov / float(df) - sd = math.sqrt((v1+v2 - 2.0*cov)/float(n)) - t = (x1-x2)/sd - prob = betai(0.5*df, 0.5, df/(df+t*t)) + sd = math.sqrt((v1 + v2 - 2.0 * cov) / float(n)) + t = (x1 - x2) / sd + prob = betai(0.5 * df, 0.5, df / (df + t * t)) if printit != 0: - statname = 'Related samples T-test.' - outputpairedstats(printit, writemode, - name1, n, x1, v1, min(a), max(a), - name2, n, x2, v2, min(b), max(b), - statname, t, prob) + statname = "Related samples T-test." + outputpairedstats( + printit, writemode, name1, n, x1, v1, min(a), max(a), name2, n, x2, v2, min(b), max(b), statname, t, prob + ) return t, prob def lchisquare(f_obs, f_exp=None): """ -Calculates a one-way chi square for list of observed frequencies and returns -the result. If no expected frequencies are given, the total N is assumed to -be equally distributed across all groups. + Calculates a one-way chi square for list of observed frequencies and returns + the result. If no expected frequencies are given, the total N is assumed to + be equally distributed across all groups. -Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq. -Returns: chisquare-statistic, associated p-value -""" - k = len(f_obs) # number of groups + Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq. + Returns: chisquare-statistic, associated p-value""" + k = len(f_obs) # number of groups if f_exp is None: - f_exp = [sum(f_obs)/float(k)] * len(f_obs) # create k bins with = freq. + f_exp = [sum(f_obs) / float(k)] * len(f_obs) # create k bins with = freq. chisq = 0 for i in range(len(f_obs)): - chisq = chisq + (f_obs[i]-f_exp[i])**2 / float(f_exp[i]) - return chisq, chisqprob(chisq, k-1) + chisq = chisq + (f_obs[i] - f_exp[i]) ** 2 / float(f_exp[i]) + return chisq, chisqprob(chisq, k - 1) def lks_2samp(data1, data2): """ -Computes the Kolmogorov-Smirnof statistic on 2 samples. From -Numerical Recipies in C, page 493. + Computes the Kolmogorov-Smirnof statistic on 2 samples. From + Numerical Recipies in C, page 493. -Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions -Returns: KS D-value, associated p-value -""" + Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions + Returns: KS D-value, associated p-value""" j1 = 0 j2 = 0 fn1 = 0.0 @@ -1061,17 +1029,17 @@ def lks_2samp(data1, data2): d1 = data1[j1] d2 = data2[j2] if d1 <= d2: - fn1 = (j1)/float(en1) + fn1 = (j1) / float(en1) j1 = j1 + 1 if d2 <= d1: - fn2 = (j2)/float(en2) + fn2 = (j2) / float(en2) j2 = j2 + 1 - dt = (fn2-fn1) + dt = fn2 - fn1 if math.fabs(dt) > math.fabs(d): d = dt try: - en = math.sqrt(en1*en2/float(en1+en2)) - prob = ksprob((en+0.12+0.11/en)*abs(d)) + en = math.sqrt(en1 * en2 / float(en1 + en2)) + prob = ksprob((en + 0.12 + 0.11 / en) * abs(d)) except Exception: prob = 1.0 return d, prob @@ -1079,89 +1047,85 @@ def lks_2samp(data1, data2): def lmannwhitneyu(x, y): """ -Calculates a Mann-Whitney U statistic on the provided scores and -returns the result. Use only when the n in each condition is < 20 and -you have 2 independent samples of ranks. NOTE: Mann-Whitney U is -significant if the u-obtained is LESS THAN or equal to the critical -value of U found in the tables. Equivalent to Kruskal-Wallis H with -just 2 groups. - -Usage: lmannwhitneyu(data) -Returns: u-statistic, one-tailed p-value (i.e., p(z(U))) -""" + Calculates a Mann-Whitney U statistic on the provided scores and + returns the result. Use only when the n in each condition is < 20 and + you have 2 independent samples of ranks. NOTE: Mann-Whitney U is + significant if the u-obtained is LESS THAN or equal to the critical + value of U found in the tables. Equivalent to Kruskal-Wallis H with + just 2 groups. + + Usage: lmannwhitneyu(data) + Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))""" n1 = len(x) n2 = len(y) - ranked = rankdata(x+y) - rankx = ranked[0:n1] # get the x-ranks - u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x - u2 = n1*n2 - u1 # remainder is U for y + ranked = rankdata(x + y) + rankx = ranked[0:n1] # get the x-ranks + u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x + u2 = n1 * n2 - u1 # remainder is U for y bigu = max(u1, u2) smallu = min(u1, u2) T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores if T == 0: - raise ValueError('All numbers are identical in lmannwhitneyu') - sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0) - z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc + raise ValueError("All numbers are identical in lmannwhitneyu") + sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0) + z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc return smallu, 1.0 - zprob(z) def ltiecorrect(rankvals): """ -Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See -Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences. -New York: McGraw-Hill. Code adapted from |Stat rankind.c code. + Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See + Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences. + New York: McGraw-Hill. Code adapted from |Stat rankind.c code. -Usage: ltiecorrect(rankvals) -Returns: T correction factor for U or H -""" + Usage: ltiecorrect(rankvals) + Returns: T correction factor for U or H""" sorted, posn = shellsort(rankvals) n = len(sorted) T = 0.0 i = 0 - while (i < n-1): - if sorted[i] == sorted[i+1]: + while i < n - 1: + if sorted[i] == sorted[i + 1]: nties = 1 - while (i < n-1) and (sorted[i] == sorted[i+1]): + while (i < n - 1) and (sorted[i] == sorted[i + 1]): nties = nties + 1 i = i + 1 T = T + nties**3 - nties - i = i+1 - T = T / float(n**3-n) + i = i + 1 + T = T / float(n**3 - n) return 1.0 - T def lranksums(x, y): """ -Calculates the rank sums statistic on the provided scores and -returns the result. Use only when the n in each condition is > 20 and you -have 2 independent samples of ranks. + Calculates the rank sums statistic on the provided scores and + returns the result. Use only when the n in each condition is > 20 and you + have 2 independent samples of ranks. -Usage: lranksums(x,y) -Returns: a z-statistic, two-tailed p-value -""" + Usage: lranksums(x,y) + Returns: a z-statistic, two-tailed p-value""" n1 = len(x) n2 = len(y) - alldata = x+y + alldata = x + y ranked = rankdata(alldata) x = ranked[:n1] y = ranked[n1:] s = sum(x) - expected = n1*(n1+n2+1) / 2.0 - z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0) - prob = 2*(1.0 - zprob(abs(z))) + expected = n1 * (n1 + n2 + 1) / 2.0 + z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0) + prob = 2 * (1.0 - zprob(abs(z))) return z, prob def lwilcoxont(x, y): """ -Calculates the Wilcoxon T-test for related samples and returns the -result. A non-parametric T-test. + Calculates the Wilcoxon T-test for related samples and returns the + result. A non-parametric T-test. -Usage: lwilcoxont(x,y) -Returns: a t-statistic, two-tail probability estimate -""" + Usage: lwilcoxont(x,y) + Returns: a t-statistic, two-tail probability estimate""" if len(x) != len(y): - raise ValueError('Unequal N in wilcoxont. Aborting.') + raise ValueError("Unequal N in wilcoxont. Aborting.") d = [] for i in range(len(x)): diff = x[i] - y[i] @@ -1178,25 +1142,24 @@ def lwilcoxont(x, y): else: r_plus = r_plus + absranked[i] wt = min(r_plus, r_minus) - mn = count * (count+1) * 0.25 - se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0) - z = math.fabs(wt-mn) / se - prob = 2*(1.0 - zprob(abs(z))) + mn = count * (count + 1) * 0.25 + se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0) + z = math.fabs(wt - mn) / se + prob = 2 * (1.0 - zprob(abs(z))) return wt, prob def lkruskalwallish(*args): """ -The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more -groups, requiring at least 5 subjects in each group. This function -calculates the Kruskal-Wallis H-test for 3 or more independent samples -and returns the result. + The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more + groups, requiring at least 5 subjects in each group. This function + calculates the Kruskal-Wallis H-test for 3 or more independent samples + and returns the result. -Usage: lkruskalwallish(*args) -Returns: H-statistic (corrected for ties), associated p-value -""" + Usage: lkruskalwallish(*args) + Returns: H-statistic (corrected for ties), associated p-value""" args = list(args) - n = [0]*len(args) + n = [0] * len(args) all = [] n = [len(_) for _ in args] for i in range(len(args)): @@ -1204,57 +1167,56 @@ def lkruskalwallish(*args): ranked = rankdata(all) T = tiecorrect(ranked) for i in range(len(args)): - args[i] = ranked[0:n[i]] - del ranked[0:n[i]] + args[i] = ranked[0 : n[i]] + del ranked[0 : n[i]] rsums = [] for i in range(len(args)): - rsums.append(sum(args[i])**2) + rsums.append(sum(args[i]) ** 2) rsums[i] = rsums[i] / float(n[i]) ssbn = sum(rsums) totaln = sum(n) - h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1) + h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1) df = len(args) - 1 if T == 0: - raise ValueError('All numbers are identical in lkruskalwallish') + raise ValueError("All numbers are identical in lkruskalwallish") h = h / float(T) return h, chisqprob(h, df) def lfriedmanchisquare(*args): """ -Friedman Chi-Square is a non-parametric, one-way within-subjects -ANOVA. This function calculates the Friedman Chi-square test for repeated -measures and returns the result, along with the associated probability -value. It assumes 3 or more repeated measures. Only 3 levels requires a -minimum of 10 subjects in the study. Four levels requires 5 subjects per -level(??). - -Usage: lfriedmanchisquare(*args) -Returns: chi-square statistic, associated p-value -""" + Friedman Chi-Square is a non-parametric, one-way within-subjects + ANOVA. This function calculates the Friedman Chi-square test for repeated + measures and returns the result, along with the associated probability + value. It assumes 3 or more repeated measures. Only 3 levels requires a + minimum of 10 subjects in the study. Four levels requires 5 subjects per + level(??). + + Usage: lfriedmanchisquare(*args) + Returns: chi-square statistic, associated p-value""" k = len(args) if k < 3: - raise ValueError('Less than 3 levels. Friedman test not appropriate.') + raise ValueError("Less than 3 levels. Friedman test not appropriate.") n = len(args[0]) data = pstat.abut(*tuple(args)) for i in range(len(data)): data[i] = rankdata(data[i]) ssbn = 0 for i in range(k): - ssbn = ssbn + sum(args[i])**2 - chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1) - return chisq, chisqprob(chisq, k-1) + ssbn = ssbn + sum(args[i]) ** 2 + chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1) + return chisq, chisqprob(chisq, k - 1) # PROBABILITY CALCULATIONS + def lchisqprob(chisq, df): """ -Returns the (1-tailed) probability value associated with the provided -chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat. + Returns the (1-tailed) probability value associated with the provided + chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat. -Usage: lchisqprob(chisq,df) -""" + Usage: lchisqprob(chisq,df)""" BIG = 20.0 def ex(x): @@ -1277,7 +1239,7 @@ def ex(x): s = y else: s = 2.0 * zprob(-math.sqrt(chisq)) - if (df > 2): + if df > 2: chisq = 0.5 * (df - 1.0) if even: z = 1.0 @@ -1289,9 +1251,9 @@ def ex(x): else: e = math.log(math.sqrt(math.pi)) c = math.log(a) - while (z <= chisq): + while z <= chisq: e = math.log(z) + e - s = s + ex(c*z-a-e) + s = s + ex(c * z - a - e) z = z + 1.0 return s else: @@ -1300,25 +1262,44 @@ def ex(x): else: e = 1.0 / math.sqrt(math.pi) / math.sqrt(a) c = 0.0 - while (z <= chisq): - e = e * (a/float(z)) + while z <= chisq: + e = e * (a / float(z)) c = c + e z = z + 1.0 - return (c*y+s) + return c * y + s else: return s def lerfcc(x): """ -Returns the complementary error function erfc(x) with fractional -error everywhere less than 1.2e-7. Adapted from Numerical Recipies. + Returns the complementary error function erfc(x) with fractional + error everywhere less than 1.2e-7. Adapted from Numerical Recipies. -Usage: lerfcc(x) -""" + Usage: lerfcc(x)""" z = abs(x) - t = 1.0 / (1.0+0.5*z) - ans = t * math.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277))))))))) + t = 1.0 / (1.0 + 0.5 * z) + ans = t * math.exp( + -z * z + - 1.26551223 + + t + * ( + 1.00002368 + + t + * ( + 0.37409196 + + t + * ( + 0.09678418 + + t + * ( + -0.18628806 + + t * (0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277)))) + ) + ) + ) + ) + ) if x >= 0: return ans else: @@ -1327,269 +1308,315 @@ def lerfcc(x): def lzprob(z): """ -Returns the area under the normal curve 'to the left of' the given z value. -Thus, - for z<0, zprob(z) = 1-tail probability - for z>0, 1.0-zprob(z) = 1-tail probability - for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability -Adapted from z.c in Gary Perlman's |Stat. - -Usage: lzprob(z) -""" - Z_MAX = 6.0 # maximum meaningful z-value + Returns the area under the normal curve 'to the left of' the given z value. + Thus, + for z<0, zprob(z) = 1-tail probability + for z>0, 1.0-zprob(z) = 1-tail probability + for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability + Adapted from z.c in Gary Perlman's |Stat. + + Usage: lzprob(z)""" + Z_MAX = 6.0 # maximum meaningful z-value if z == 0.0: x = 0.0 else: y = 0.5 * math.fabs(z) - if y >= (Z_MAX*0.5): + if y >= (Z_MAX * 0.5): x = 1.0 - elif (y < 1.0): - w = y*y - x = ((((((((0.000124818987 * w - - 0.001075204047) * w + 0.005198775019) * w - - 0.019198292004) * w + 0.059054035642) * w - - 0.151968751364) * w + 0.319152932694) * w - - 0.531923007300) * w + 0.797884560593) * y * 2.0 + elif y < 1.0: + w = y * y + x = ( + ( + ( + ( + ( + ( + (((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w - 0.019198292004) + * w + + 0.059054035642 + ) + * w + - 0.151968751364 + ) + * w + + 0.319152932694 + ) + * w + - 0.531923007300 + ) + * w + + 0.797884560593 + ) + * y + * 2.0 + ) else: y = y - 2.0 - x = (((((((((((((-0.000045255659 * y - + 0.000152529290) * y - 0.000019538132) * y - - 0.000676904986) * y + 0.001390604284) * y - - 0.000794620820) * y - 0.002034254874) * y - + 0.006549791214) * y - 0.010557625006) * y - + 0.011630447319) * y - 0.009279453341) * y - + 0.005353579108) * y - 0.002141268741) * y - + 0.000535310849) * y + 0.999936657524 + x = ( + ( + ( + ( + ( + ( + ( + ( + ( + ( + ( + ((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) + * y + - 0.000676904986 + ) + * y + + 0.001390604284 + ) + * y + - 0.000794620820 + ) + * y + - 0.002034254874 + ) + * y + + 0.006549791214 + ) + * y + - 0.010557625006 + ) + * y + + 0.011630447319 + ) + * y + - 0.009279453341 + ) + * y + + 0.005353579108 + ) + * y + - 0.002141268741 + ) + * y + + 0.000535310849 + ) * y + 0.999936657524 if z > 0.0: - prob = ((x+1.0)*0.5) + prob = (x + 1.0) * 0.5 else: - prob = ((1.0-x)*0.5) + prob = (1.0 - x) * 0.5 return prob def lksprob(alam): """ -Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from -Numerical Recipies. + Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from + Numerical Recipies. -Usage: lksprob(alam) -""" + Usage: lksprob(alam)""" fac = 2.0 sum = 0.0 termbf = 0.0 - a2 = -2.0*alam*alam + a2 = -2.0 * alam * alam for j in range(1, 201): - term = fac*math.exp(a2*j*j) + term = fac * math.exp(a2 * j * j) sum = sum + term - if math.fabs(term) <= (0.001*termbf) or math.fabs(term) < (1.0e-8*sum): + if math.fabs(term) <= (0.001 * termbf) or math.fabs(term) < (1.0e-8 * sum): return sum fac = -fac termbf = math.fabs(term) - return 1.0 # Get here only if fails to converge; was 0.0!! + return 1.0 # Get here only if fails to converge; was 0.0!! def lfprob(dfnum, dfden, F): """ -Returns the (1-tailed) significance level (p-value) of an F -statistic given the degrees of freedom for the numerator (dfR-dfF) and -the degrees of freedom for the denominator (dfF). + Returns the (1-tailed) significance level (p-value) of an F + statistic given the degrees of freedom for the numerator (dfR-dfF) and + the degrees of freedom for the denominator (dfF). -Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn -""" - p = betai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F)) + Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn""" + p = betai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F)) return p def lbetacf(a, b, x): """ -This function evaluates the continued fraction form of the incomplete -Beta function, betai. (Adapted from: Numerical Recipies in C.) + This function evaluates the continued fraction form of the incomplete + Beta function, betai. (Adapted from: Numerical Recipies in C.) -Usage: lbetacf(a,b,x) -""" + Usage: lbetacf(a,b,x)""" ITMAX = 200 EPS = 3.0e-7 bm = az = am = 1.0 - qab = a+b - qap = a+1.0 - qam = a-1.0 - bz = 1.0-qab*x/qap - for i in range(ITMAX+1): - em = float(i+1) + qab = a + b + qap = a + 1.0 + qam = a - 1.0 + bz = 1.0 - qab * x / qap + for i in range(ITMAX + 1): + em = float(i + 1) tem = em + em - d = em*(b-em)*x/((qam+tem)*(a+tem)) - ap = az + d*am - bp = bz+d*bm - d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem)) - app = ap+d*az - bpp = bp+d*bz + d = em * (b - em) * x / ((qam + tem) * (a + tem)) + ap = az + d * am + bp = bz + d * bm + d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem)) + app = ap + d * az + bpp = bp + d * bz aold = az - am = ap/bpp - bm = bp/bpp - az = app/bpp + am = ap / bpp + bm = bp / bpp + az = app / bpp bz = 1.0 - if (abs(az-aold) < (EPS*abs(az))): + if abs(az - aold) < (EPS * abs(az)): return az - print('a or b too big, or ITMAX too small in Betacf.') + print("a or b too big, or ITMAX too small in Betacf.") def lgammln(xx): """ -Returns the gamma function of xx. - Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. -(Adapted from: Numerical Recipies in C.) + Returns the gamma function of xx. + Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. + (Adapted from: Numerical Recipies in C.) -Usage: lgammln(xx) -""" + Usage: lgammln(xx)""" - coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, - 0.120858003e-2, -0.536382e-5] + coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, 0.120858003e-2, -0.536382e-5] x = xx - 1.0 tmp = x + 5.5 - tmp = tmp - (x+0.5)*math.log(tmp) + tmp = tmp - (x + 0.5) * math.log(tmp) ser = 1.0 for j in range(len(coeff)): x = x + 1 - ser = ser + coeff[j]/x - return -tmp + math.log(2.50662827465*ser) + ser = ser + coeff[j] / x + return -tmp + math.log(2.50662827465 * ser) def lbetai(a, b, x): """ -Returns the incomplete beta function: + Returns the incomplete beta function: - I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) + I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt) -where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma -function of a. The continued fraction formulation is implemented here, -using the betacf function. (Adapted from: Numerical Recipies in C.) + where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma + function of a. The continued fraction formulation is implemented here, + using the betacf function. (Adapted from: Numerical Recipies in C.) -Usage: lbetai(a,b,x) -""" - if (x < 0.0 or x > 1.0): - raise ValueError('Bad x in lbetai') - if (x == 0.0 or x == 1.0): + Usage: lbetai(a,b,x)""" + if x < 0.0 or x > 1.0: + raise ValueError("Bad x in lbetai") + if x == 0.0 or x == 1.0: bt = 0.0 else: - bt = math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*math.log(x)+b - * math.log(1.0-x)) - if (x < (a+1.0)/(a+b+2.0)): - return bt*betacf(a, b, x)/float(a) + bt = math.exp(gammln(a + b) - gammln(a) - gammln(b) + a * math.log(x) + b * math.log(1.0 - x)) + if x < (a + 1.0) / (a + b + 2.0): + return bt * betacf(a, b, x) / float(a) else: - return 1.0-bt*betacf(b, a, 1.0-x)/float(b) + return 1.0 - bt * betacf(b, a, 1.0 - x) / float(b) # ANOVA CALCULATIONS + def lF_oneway(*lists): """ -Performs a 1-way ANOVA, returning an F-value and probability given -any number of groups. From Heiman, pp.394-7. + Performs a 1-way ANOVA, returning an F-value and probability given + any number of groups. From Heiman, pp.394-7. -Usage: F_oneway(*lists) where *lists is any number of lists, one per - treatment group -Returns: F value, one-tailed p-value -""" - a = len(lists) # ANOVA on 'a' groups, each in it's own list + Usage: F_oneway(*lists) where *lists is any number of lists, one per + treatment group + Returns: F value, one-tailed p-value""" + a = len(lists) # ANOVA on 'a' groups, each in it's own list alldata = [] for i in range(len(lists)): alldata = alldata + lists[i] alldata = N.array(alldata) bign = len(alldata) - sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign)) + sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign)) ssbn = 0 for list in lists: - ssbn = ssbn + asquare_of_sums(N.array(list))/float(len(list)) - ssbn = ssbn - (asquare_of_sums(alldata)/float(bign)) - sswn = sstot-ssbn - dfbn = a-1 + ssbn = ssbn + asquare_of_sums(N.array(list)) / float(len(list)) + ssbn = ssbn - (asquare_of_sums(alldata) / float(bign)) + sswn = sstot - ssbn + dfbn = a - 1 dfwn = bign - a - msb = ssbn/float(dfbn) - msw = sswn/float(dfwn) - f = msb/msw + msb = ssbn / float(dfbn) + msw = sswn / float(dfwn) + f = msb / msw prob = fprob(dfbn, dfwn, f) return f, prob def lF_value(ER, EF, dfnum, dfden): """ -Returns an F-statistic given the following: - ER = error associated with the null hypothesis (the Restricted model) - EF = error associated with the alternate hypothesis (the Full model) - dfR-dfF = degrees of freedom of the numerator - dfF = degrees of freedom associated with the denominator/Full model + Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR-dfF = degrees of freedom of the numerator + dfF = degrees of freedom associated with the denominator/Full model -Usage: lF_value(ER,EF,dfnum,dfden) -""" - return ((ER-EF)/float(dfnum) / (EF/float(dfden))) + Usage: lF_value(ER,EF,dfnum,dfden)""" + return (ER - EF) / float(dfnum) / (EF / float(dfden)) # SUPPORT FUNCTIONS -def writecc(listoflists, file, writetype='w', extra=2): + +def writecc(listoflists, file, writetype="w", extra=2): """ -Writes a list of lists to a file in columns, customized by the max -size of items within the columns (max size of items in col, +2 characters) -to specified file. File-overwrite is the default. + Writes a list of lists to a file in columns, customized by the max + size of items within the columns (max size of items in col, +2 characters) + to specified file. File-overwrite is the default. -Usage: writecc (listoflists,file,writetype='w',extra=2) -Returns: None -""" + Usage: writecc (listoflists,file,writetype='w',extra=2) + Returns: None""" if type(listoflists[0]) not in [list, tuple]: listoflists = [listoflists] outfile = open(file, writetype) rowstokill = [] list2print = copy.deepcopy(listoflists) for i in range(len(listoflists)): - if listoflists[i] == ['\n'] or listoflists[i] == '\n' or listoflists[i] == 'dashes': + if listoflists[i] == ["\n"] or listoflists[i] == "\n" or listoflists[i] == "dashes": rowstokill = rowstokill + [i] rowstokill.reverse() for row in rowstokill: del list2print[row] - maxsize = [0]*len(list2print[0]) + maxsize = [0] * len(list2print[0]) for col in range(len(list2print[0])): items = pstat.colex(list2print, col) items = [pstat.makestr(_) for _ in items] maxsize[col] = max(map(len, items)) + extra for row in listoflists: - if row == ['\n'] or row == '\n': - outfile.write('\n') - elif row == ['dashes'] or row == 'dashes': - dashes = [0]*len(maxsize) + if row == ["\n"] or row == "\n": + outfile.write("\n") + elif row == ["dashes"] or row == "dashes": + dashes = [0] * len(maxsize) for j in range(len(maxsize)): - dashes[j] = '-'*(maxsize[j]-2) + dashes[j] = "-" * (maxsize[j] - 2) outfile.write(pstat.lineincustcols(dashes, maxsize)) else: outfile.write(pstat.lineincustcols(row, maxsize)) - outfile.write('\n') + outfile.write("\n") outfile.close() return None -def lincr(l, cap): # to increment a list up to a max-list of 'cap' +def lincr(l, cap): # to increment a list up to a max-list of 'cap' """ -Simulate a counting system from an n-dimensional list. + Simulate a counting system from an n-dimensional list. -Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n -Returns: next set of values for list l, OR -1 (if overflow) -""" - l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap) + Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n + Returns: next set of values for list l, OR -1 (if overflow)""" + l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap) for i in range(len(l)): - if l[i] > cap[i] and i < len(l)-1: # if carryover AND not done + if l[i] > cap[i] and i < len(l) - 1: # if carryover AND not done l[i] = 0 - l[i+1] = l[i+1] + 1 - elif l[i] > cap[i] and i == len(l)-1: # overflow past last column, must be finished + l[i + 1] = l[i + 1] + 1 + elif l[i] > cap[i] and i == len(l) - 1: # overflow past last column, must be finished l = -1 return l def lsum(inlist): """ -Returns the sum of the items in the passed list. + Returns the sum of the items in the passed list. -Usage: lsum(inlist) -""" + Usage: lsum(inlist)""" s = 0 for item in inlist: s = s + item @@ -1598,150 +1625,146 @@ def lsum(inlist): def lcumsum(inlist): """ -Returns a list consisting of the cumulative sum of the items in the -passed list. + Returns a list consisting of the cumulative sum of the items in the + passed list. -Usage: lcumsum(inlist) -""" + Usage: lcumsum(inlist)""" newlist = copy.deepcopy(inlist) for i in range(1, len(newlist)): - newlist[i] = newlist[i] + newlist[i-1] + newlist[i] = newlist[i] + newlist[i - 1] return newlist def lss(inlist): """ -Squares each value in the passed list, adds up these squares and -returns the result. + Squares each value in the passed list, adds up these squares and + returns the result. -Usage: lss(inlist) -""" + Usage: lss(inlist)""" ss = 0 for item in inlist: - ss = ss + item*item + ss = ss + item * item return ss def lsummult(list1, list2): """ -Multiplies elements in list1 and list2, element by element, and -returns the sum of all resulting multiplications. Must provide equal -length lists. + Multiplies elements in list1 and list2, element by element, and + returns the sum of all resulting multiplications. Must provide equal + length lists. -Usage: lsummult(list1,list2) -""" + Usage: lsummult(list1,list2)""" if len(list1) != len(list2): raise ValueError("Lists not equal length in summult.") s = 0 for item1, item2 in pstat.abut(list1, list2): - s = s + item1*item2 + s = s + item1 * item2 return s def lsumdiffsquared(x, y): """ -Takes pairwise differences of the values in lists x and y, squares -these differences, and returns the sum of these squares. + Takes pairwise differences of the values in lists x and y, squares + these differences, and returns the sum of these squares. -Usage: lsumdiffsquared(x,y) -Returns: sum[(x[i]-y[i])**2] -""" + Usage: lsumdiffsquared(x,y) + Returns: sum[(x[i]-y[i])**2]""" sds = 0 for i in range(len(x)): - sds = sds + (x[i]-y[i])**2 + sds = sds + (x[i] - y[i]) ** 2 return sds def lsquare_of_sums(inlist): """ -Adds the values in the passed list, squares the sum, and returns -the result. + Adds the values in the passed list, squares the sum, and returns + the result. -Usage: lsquare_of_sums(inlist) -Returns: sum(inlist[i])**2 -""" + Usage: lsquare_of_sums(inlist) + Returns: sum(inlist[i])**2""" s = sum(inlist) - return float(s)*s + return float(s) * s def lshellsort(inlist): """ -Shellsort algorithm. Sorts a 1D-list. + Shellsort algorithm. Sorts a 1D-list. -Usage: lshellsort(inlist) -Returns: sorted-inlist, sorting-index-vector (for original list) -""" + Usage: lshellsort(inlist) + Returns: sorted-inlist, sorting-index-vector (for original list)""" n = len(inlist) svec = copy.deepcopy(inlist) ivec = list(range(n)) - gap = n/2 # integer division needed + gap = n / 2 # integer division needed while gap > 0: for i in range(gap, n): - for j in range(i-gap, -1, -gap): - while j >= 0 and svec[j] > svec[j+gap]: + for j in range(i - gap, -1, -gap): + while j >= 0 and svec[j] > svec[j + gap]: temp = svec[j] - svec[j] = svec[j+gap] - svec[j+gap] = temp + svec[j] = svec[j + gap] + svec[j + gap] = temp itemp = ivec[j] - ivec[j] = ivec[j+gap] - ivec[j+gap] = itemp + ivec[j] = ivec[j + gap] + ivec[j + gap] = itemp gap = gap / 2 # integer division needed -# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]] + # svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]] return svec, ivec def lrankdata(inlist): """ -Ranks the data in inlist, dealing with ties appropritely. Assumes -a 1D inlist. Adapted from Gary Perlman's |Stat ranksort. + Ranks the data in inlist, dealing with ties appropritely. Assumes + a 1D inlist. Adapted from Gary Perlman's |Stat ranksort. -Usage: lrankdata(inlist) -Returns: a list of length equal to inlist, containing rank scores -""" + Usage: lrankdata(inlist) + Returns: a list of length equal to inlist, containing rank scores""" n = len(inlist) svec, ivec = shellsort(inlist) sumranks = 0 dupcount = 0 - newlist = [0]*n + newlist = [0] * n for i in range(n): sumranks = sumranks + i dupcount = dupcount + 1 - if i == n-1 or svec[i] != svec[i+1]: + if i == n - 1 or svec[i] != svec[i + 1]: averank = sumranks / float(dupcount) + 1 - for j in range(i-dupcount+1, i+1): + for j in range(i - dupcount + 1, i + 1): newlist[ivec[j]] = averank sumranks = 0 dupcount = 0 return newlist -def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2, n2, m2, se2, min2, max2, statname, stat, prob): +def outputpairedstats( + fname, writemode, name1, n1, m1, se1, min1, max1, name2, n2, m2, se2, min2, max2, statname, stat, prob +): """ -Prints or write to a file stats for two groups, using the name, n, -mean, sterr, min and max for each group, as well as the statistic name, -its value, and the associated p-value. - -Usage: outputpairedstats(fname,writemode, - name1,n1,mean1,stderr1,min1,max1, - name2,n2,mean2,stderr2,min2,max2, - statname,stat,prob) -Returns: None -""" - suffix = '' # for *s after the p-value + Prints or write to a file stats for two groups, using the name, n, + mean, sterr, min and max for each group, as well as the statistic name, + its value, and the associated p-value. + + Usage: outputpairedstats(fname,writemode, + name1,n1,mean1,stderr1,min1,max1, + name2,n2,mean2,stderr2,min2,max2, + statname,stat,prob) + Returns: None""" + suffix = "" # for *s after the p-value try: prob.shape prob = prob[0] except Exception: pass if prob < 0.001: - suffix = ' ***' + suffix = " ***" elif prob < 0.01: - suffix = ' **' + suffix = " **" elif prob < 0.05: - suffix = ' *' - title = [['Name', 'N', 'Mean', 'SD', 'Min', 'Max']] - lofl = title+[[name1, n1, round(m1, 3), round(math.sqrt(se1), 3), min1, max1], - [name2, n2, round(m2, 3), round(math.sqrt(se2), 3), min2, max2]] + suffix = " *" + title = [["Name", "N", "Mean", "SD", "Min", "Max"]] + lofl = title + [ + [name1, n1, round(m1, 3), round(math.sqrt(se1), 3), min1, max1], + [name2, n2, round(m2, 3), round(math.sqrt(se2), 3), min2, max2], + ] if not isinstance(fname, str) or len(fname) == 0: print() print(statname) @@ -1755,14 +1778,14 @@ def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2, n prob = prob[0] except Exception: pass - print('Test statistic = ', round(stat, 3), ' p = ', round(prob, 3), suffix) + print("Test statistic = ", round(stat, 3), " p = ", round(prob, 3), suffix) print() else: file = open(fname, writemode) - file.write('\n'+statname+'\n\n') + file.write("\n" + statname + "\n\n") file.close() - writecc(lofl, fname, 'a') - file = open(fname, 'a') + writecc(lofl, fname, "a") + file = open(fname, "a") try: if stat.shape == (): stat = stat[0] @@ -1770,25 +1793,26 @@ def outputpairedstats(fname, writemode, name1, n1, m1, se1, min1, max1, name2, n prob = prob[0] except Exception: pass - file.write(pstat.list2string(['\nTest statistic = ', round(stat, 4), ' p = ', round(prob, 4), suffix, '\n\n'])) + file.write( + pstat.list2string(["\nTest statistic = ", round(stat, 4), " p = ", round(prob, 4), suffix, "\n\n"]) + ) file.close() return None def lfindwithin(data): """ -Returns an integer representing a binary vector, where 1=within- -subject factor, 0=between. Input equals the entire data 2D list (i.e., -column 0=random factor, column -1=measured values (those two are skipped). -Note: input data is in |Stat format ... a list of lists ("2D list") with -one row per measured value, first column=subject identifier, last column= -score, one in-between column per factor (these columns contain level -designations on each factor). See also stats.anova.__doc__. - -Usage: lfindwithin(data) data in |Stat format -""" + Returns an integer representing a binary vector, where 1=within- + subject factor, 0=between. Input equals the entire data 2D list (i.e., + column 0=random factor, column -1=measured values (those two are skipped). + Note: input data is in |Stat format ... a list of lists ("2D list") with + one row per measured value, first column=subject identifier, last column= + score, one in-between column per factor (these columns contain level + designations on each factor). See also stats.anova.__doc__. + + Usage: lfindwithin(data) data in |Stat format""" - numfact = len(data[0])-1 + numfact = len(data[0]) - 1 withinvec = 0 for col in range(1, numfact): examplelevel = pstat.unique(pstat.colex(data, col))[0] @@ -1803,89 +1827,219 @@ def lfindwithin(data): # DISPATCH LISTS AND TUPLES TO ABOVE FCNS # CENTRAL TENDENCY: -geometricmean = Dispatch((lgeometricmean, (list, tuple)), ) -harmonicmean = Dispatch((lharmonicmean, (list, tuple)), ) -mean = Dispatch((lmean, (list, tuple)), ) -median = Dispatch((lmedian, (list, tuple)), ) -medianscore = Dispatch((lmedianscore, (list, tuple)), ) -mode = Dispatch((lmode, (list, tuple)), ) +geometricmean = Dispatch( + (lgeometricmean, (list, tuple)), +) +harmonicmean = Dispatch( + (lharmonicmean, (list, tuple)), +) +mean = Dispatch( + (lmean, (list, tuple)), +) +median = Dispatch( + (lmedian, (list, tuple)), +) +medianscore = Dispatch( + (lmedianscore, (list, tuple)), +) +mode = Dispatch( + (lmode, (list, tuple)), +) # MOMENTS: -moment = Dispatch((lmoment, (list, tuple)), ) -variation = Dispatch((lvariation, (list, tuple)), ) -skew = Dispatch((lskew, (list, tuple)), ) -kurtosis = Dispatch((lkurtosis, (list, tuple)), ) -describe = Dispatch((ldescribe, (list, tuple)), ) +moment = Dispatch( + (lmoment, (list, tuple)), +) +variation = Dispatch( + (lvariation, (list, tuple)), +) +skew = Dispatch( + (lskew, (list, tuple)), +) +kurtosis = Dispatch( + (lkurtosis, (list, tuple)), +) +describe = Dispatch( + (ldescribe, (list, tuple)), +) # FREQUENCY STATISTICS: -itemfreq = Dispatch((litemfreq, (list, tuple)), ) -scoreatpercentile = Dispatch((lscoreatpercentile, (list, tuple)), ) -percentileofscore = Dispatch((lpercentileofscore, (list, tuple)), ) -histogram = Dispatch((lhistogram, (list, tuple)), ) -cumfreq = Dispatch((lcumfreq, (list, tuple)), ) -relfreq = Dispatch((lrelfreq, (list, tuple)), ) +itemfreq = Dispatch( + (litemfreq, (list, tuple)), +) +scoreatpercentile = Dispatch( + (lscoreatpercentile, (list, tuple)), +) +percentileofscore = Dispatch( + (lpercentileofscore, (list, tuple)), +) +histogram = Dispatch( + (lhistogram, (list, tuple)), +) +cumfreq = Dispatch( + (lcumfreq, (list, tuple)), +) +relfreq = Dispatch( + (lrelfreq, (list, tuple)), +) # VARIABILITY: -obrientransform = Dispatch((lobrientransform, (list, tuple)), ) -samplevar = Dispatch((lsamplevar, (list, tuple)), ) -samplestdev = Dispatch((lsamplestdev, (list, tuple)), ) -var = Dispatch((lvar, (list, tuple)), ) -stdev = Dispatch((lstdev, (list, tuple)), ) -sterr = Dispatch((lsterr, (list, tuple)), ) -sem = Dispatch((lsem, (list, tuple)), ) -z = Dispatch((lz, (list, tuple)), ) -zs = Dispatch((lzs, (list, tuple)), ) +obrientransform = Dispatch( + (lobrientransform, (list, tuple)), +) +samplevar = Dispatch( + (lsamplevar, (list, tuple)), +) +samplestdev = Dispatch( + (lsamplestdev, (list, tuple)), +) +var = Dispatch( + (lvar, (list, tuple)), +) +stdev = Dispatch( + (lstdev, (list, tuple)), +) +sterr = Dispatch( + (lsterr, (list, tuple)), +) +sem = Dispatch( + (lsem, (list, tuple)), +) +z = Dispatch( + (lz, (list, tuple)), +) +zs = Dispatch( + (lzs, (list, tuple)), +) # TRIMMING FCNS: -trimboth = Dispatch((ltrimboth, (list, tuple)), ) -trim1 = Dispatch((ltrim1, (list, tuple)), ) +trimboth = Dispatch( + (ltrimboth, (list, tuple)), +) +trim1 = Dispatch( + (ltrim1, (list, tuple)), +) # CORRELATION FCNS: -paired = Dispatch((lpaired, (list, tuple)), ) -pearsonr = Dispatch((lpearsonr, (list, tuple)), ) -spearmanr = Dispatch((lspearmanr, (list, tuple)), ) -pointbiserialr = Dispatch((lpointbiserialr, (list, tuple)), ) -kendalltau = Dispatch((lkendalltau, (list, tuple)), ) -linregress = Dispatch((llinregress, (list, tuple)), ) +paired = Dispatch( + (lpaired, (list, tuple)), +) +pearsonr = Dispatch( + (lpearsonr, (list, tuple)), +) +spearmanr = Dispatch( + (lspearmanr, (list, tuple)), +) +pointbiserialr = Dispatch( + (lpointbiserialr, (list, tuple)), +) +kendalltau = Dispatch( + (lkendalltau, (list, tuple)), +) +linregress = Dispatch( + (llinregress, (list, tuple)), +) # INFERENTIAL STATS: -ttest_1samp = Dispatch((lttest_1samp, (list, tuple)), ) -ttest_ind = Dispatch((lttest_ind, (list, tuple)), ) -ttest_rel = Dispatch((lttest_rel, (list, tuple)), ) -chisquare = Dispatch((lchisquare, (list, tuple)), ) -ks_2samp = Dispatch((lks_2samp, (list, tuple)), ) -mannwhitneyu = Dispatch((lmannwhitneyu, (list, tuple)), ) -ranksums = Dispatch((lranksums, (list, tuple)), ) -tiecorrect = Dispatch((ltiecorrect, (list, tuple)), ) -wilcoxont = Dispatch((lwilcoxont, (list, tuple)), ) -kruskalwallish = Dispatch((lkruskalwallish, (list, tuple)), ) -friedmanchisquare = Dispatch((lfriedmanchisquare, (list, tuple)), ) +ttest_1samp = Dispatch( + (lttest_1samp, (list, tuple)), +) +ttest_ind = Dispatch( + (lttest_ind, (list, tuple)), +) +ttest_rel = Dispatch( + (lttest_rel, (list, tuple)), +) +chisquare = Dispatch( + (lchisquare, (list, tuple)), +) +ks_2samp = Dispatch( + (lks_2samp, (list, tuple)), +) +mannwhitneyu = Dispatch( + (lmannwhitneyu, (list, tuple)), +) +ranksums = Dispatch( + (lranksums, (list, tuple)), +) +tiecorrect = Dispatch( + (ltiecorrect, (list, tuple)), +) +wilcoxont = Dispatch( + (lwilcoxont, (list, tuple)), +) +kruskalwallish = Dispatch( + (lkruskalwallish, (list, tuple)), +) +friedmanchisquare = Dispatch( + (lfriedmanchisquare, (list, tuple)), +) # PROBABILITY CALCS: -chisqprob = Dispatch((lchisqprob, (int, float)), ) -zprob = Dispatch((lzprob, (int, float)), ) -ksprob = Dispatch((lksprob, (int, float)), ) -fprob = Dispatch((lfprob, (int, float)), ) -betacf = Dispatch((lbetacf, (int, float)), ) -betai = Dispatch((lbetai, (int, float)), ) -erfcc = Dispatch((lerfcc, (int, float)), ) -gammln = Dispatch((lgammln, (int, float)), ) +chisqprob = Dispatch( + (lchisqprob, (int, float)), +) +zprob = Dispatch( + (lzprob, (int, float)), +) +ksprob = Dispatch( + (lksprob, (int, float)), +) +fprob = Dispatch( + (lfprob, (int, float)), +) +betacf = Dispatch( + (lbetacf, (int, float)), +) +betai = Dispatch( + (lbetai, (int, float)), +) +erfcc = Dispatch( + (lerfcc, (int, float)), +) +gammln = Dispatch( + (lgammln, (int, float)), +) # ANOVA FUNCTIONS: -F_oneway = Dispatch((lF_oneway, (list, tuple)), ) -F_value = Dispatch((lF_value, (list, tuple)), ) +F_oneway = Dispatch( + (lF_oneway, (list, tuple)), +) +F_value = Dispatch( + (lF_value, (list, tuple)), +) # SUPPORT FUNCTIONS: -incr = Dispatch((lincr, (list, tuple)), ) -sum = Dispatch((lsum, (list, tuple)), ) -cumsum = Dispatch((lcumsum, (list, tuple)), ) -ss = Dispatch((lss, (list, tuple)), ) -summult = Dispatch((lsummult, (list, tuple)), ) -square_of_sums = Dispatch((lsquare_of_sums, (list, tuple)), ) -sumdiffsquared = Dispatch((lsumdiffsquared, (list, tuple)), ) -shellsort = Dispatch((lshellsort, (list, tuple)), ) -rankdata = Dispatch((lrankdata, (list, tuple)), ) -findwithin = Dispatch((lfindwithin, (list, tuple)), ) +incr = Dispatch( + (lincr, (list, tuple)), +) +sum = Dispatch( + (lsum, (list, tuple)), +) +cumsum = Dispatch( + (lcumsum, (list, tuple)), +) +ss = Dispatch( + (lss, (list, tuple)), +) +summult = Dispatch( + (lsummult, (list, tuple)), +) +square_of_sums = Dispatch( + (lsquare_of_sums, (list, tuple)), +) +sumdiffsquared = Dispatch( + (lsumdiffsquared, (list, tuple)), +) +shellsort = Dispatch( + (lshellsort, (list, tuple)), +) +rankdata = Dispatch( + (lrankdata, (list, tuple)), +) +findwithin = Dispatch( + (lfindwithin, (list, tuple)), +) # ============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== @@ -1908,36 +2062,38 @@ def lfindwithin(data): # ============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== # ============= THE ARRAY-VERSION OF THE STATS FUNCTIONS =============== -try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE +try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE import Numeric + N = Numeric import LinearAlgebra + LA = LinearAlgebra -# ACENTRAL TENDENCY + # ACENTRAL TENDENCY def ageometricmean(inarray, dimension=None, keepdims=0): """ - Calculates the geometric mean of the values in the passed array. - That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in - the passed array. Use dimension=None to flatten array first. REMEMBER: if - dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and - if dimension is a sequence, it collapses over all specified dimensions. If - keepdims is set to 1, the resulting array will have as many dimensions as - inarray, with only 1 'level' per dim that was collapsed over. - - Usage: ageometricmean(inarray,dimension=None,keepdims=0) - Returns: geometric mean computed over dim(s) listed in dimension - """ + Calculates the geometric mean of the values in the passed array. + That is: n-th root of (x1 * x2 * ... * xn). Defaults to ALL values in + the passed array. Use dimension=None to flatten array first. REMEMBER: if + dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and + if dimension is a sequence, it collapses over all specified dimensions. If + keepdims is set to 1, the resulting array will have as many dimensions as + inarray, with only 1 'level' per dim that was collapsed over. + + Usage: ageometricmean(inarray,dimension=None,keepdims=0) + Returns: geometric mean computed over dim(s) listed in dimension + """ inarray = N.array(inarray, N.Float) if dimension is None: inarray = N.ravel(inarray) size = len(inarray) - mult = N.power(inarray, 1.0/size) + mult = N.power(inarray, 1.0 / size) mult = N.multiply.reduce(mult) elif type(dimension) in [int, float]: size = inarray.shape[dimension] - mult = N.power(inarray, 1.0/size) + mult = N.power(inarray, 1.0 / size) mult = N.multiply.reduce(mult, dimension) if keepdims == 1: shp = list(inarray.shape) @@ -1947,7 +2103,7 @@ def ageometricmean(inarray, dimension=None, keepdims=0): dims = sorted(dimension) dims.reverse() size = N.array(N.multiply.reduce(N.take(inarray.shape, dims)), N.Float) - mult = N.power(inarray, 1.0/size) + mult = N.power(inarray, 1.0 / size) for dim in dims: mult = N.multiply.reduce(mult, dim) if keepdims == 1: @@ -1959,17 +2115,17 @@ def ageometricmean(inarray, dimension=None, keepdims=0): def aharmonicmean(inarray, dimension=None, keepdims=0): """ - Calculates the harmonic mean of the values in the passed array. - That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in - the passed array. Use dimension=None to flatten array first. REMEMBER: if - dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and - if dimension is a sequence, it collapses over all specified dimensions. If - keepdims is set to 1, the resulting array will have as many dimensions as - inarray, with only 1 'level' per dim that was collapsed over. - - Usage: aharmonicmean(inarray,dimension=None,keepdims=0) - Returns: harmonic mean computed over dim(s) in dimension - """ + Calculates the harmonic mean of the values in the passed array. + That is: n / (1/x1 + 1/x2 + ... + 1/xn). Defaults to ALL values in + the passed array. Use dimension=None to flatten array first. REMEMBER: if + dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and + if dimension is a sequence, it collapses over all specified dimensions. If + keepdims is set to 1, the resulting array will have as many dimensions as + inarray, with only 1 'level' per dim that was collapsed over. + + Usage: aharmonicmean(inarray,dimension=None,keepdims=0) + Returns: harmonic mean computed over dim(s) in dimension + """ inarray = inarray.astype(N.Float) if dimension is None: inarray = N.ravel(inarray) @@ -1977,7 +2133,7 @@ def aharmonicmean(inarray, dimension=None, keepdims=0): s = N.add.reduce(1.0 / inarray) elif type(dimension) in [int, float]: size = float(inarray.shape[dimension]) - s = N.add.reduce(1.0/inarray, dimension) + s = N.add.reduce(1.0 / inarray, dimension) if keepdims == 1: shp = list(inarray.shape) shp[dimension] = 1 @@ -1988,7 +2144,7 @@ def aharmonicmean(inarray, dimension=None, keepdims=0): for i in range(len(inarray.shape)): if i not in dims: nondims.append(i) - tinarray = N.transpose(inarray, nondims+dims) # put keep-dims first + tinarray = N.transpose(inarray, nondims + dims) # put keep-dims first idx = [0] * len(nondims) if idx == []: size = len(N.ravel(inarray)) @@ -1997,10 +2153,10 @@ def aharmonicmean(inarray, dimension=None, keepdims=0): s = N.reshape([s], N.ones(len(inarray.shape))) else: idx[0] = -1 - loopcap = N.array(tinarray.shape[0:len(nondims)]) - 1 - s = N.zeros(loopcap+1, N.Float) + loopcap = N.array(tinarray.shape[0 : len(nondims)]) - 1 + s = N.zeros(loopcap + 1, N.Float) while incr(idx, loopcap) != -1: - s[idx] = asum(1.0/tinarray[idx]) + s[idx] = asum(1.0 / tinarray[idx]) size = N.multiply.reduce(N.take(inarray.shape, dims)) if keepdims == 1: shp = list(inarray.shape) @@ -2011,18 +2167,18 @@ def aharmonicmean(inarray, dimension=None, keepdims=0): def amean(inarray, dimension=None, keepdims=0): """ - Calculates the arithmatic mean of the values in the passed array. - That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the - passed array. Use dimension=None to flatten array first. REMEMBER: if - dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and - if dimension is a sequence, it collapses over all specified dimensions. If - keepdims is set to 1, the resulting array will have as many dimensions as - inarray, with only 1 'level' per dim that was collapsed over. - - Usage: amean(inarray,dimension=None,keepdims=0) - Returns: arithematic mean calculated over dim(s) in dimension - """ - if inarray.typecode() in ['l', 's', 'b']: + Calculates the arithmatic mean of the values in the passed array. + That is: 1/n * (x1 + x2 + ... + xn). Defaults to ALL values in the + passed array. Use dimension=None to flatten array first. REMEMBER: if + dimension=0, it collapses over dimension 0 ('rows' in a 2D array) only, and + if dimension is a sequence, it collapses over all specified dimensions. If + keepdims is set to 1, the resulting array will have as many dimensions as + inarray, with only 1 'level' per dim that was collapsed over. + + Usage: amean(inarray,dimension=None,keepdims=0) + Returns: arithematic mean calculated over dim(s) in dimension + """ + if inarray.typecode() in ["l", "s", "b"]: inarray = inarray.astype(N.Float) if dimension is None: inarray = N.ravel(inarray) @@ -2047,48 +2203,48 @@ def amean(inarray, dimension=None, keepdims=0): for dim in dims: shp[dim] = 1 sum = N.reshape(sum, shp) - return sum/denom + return sum / denom def amedian(inarray, numbins=1000): """ - Calculates the COMPUTED median value of an array of numbers, given the - number of bins to use for the histogram (more bins approaches finding the - precise median value of the array; default number of bins = 1000). From - G.W. Heiman's Basic Stats, or CRC Probability & Statistics. - NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first). - - Usage: amedian(inarray,numbins=1000) - Returns: median calculated over ALL values in inarray - """ + Calculates the COMPUTED median value of an array of numbers, given the + number of bins to use for the histogram (more bins approaches finding the + precise median value of the array; default number of bins = 1000). From + G.W. Heiman's Basic Stats, or CRC Probability & Statistics. + NOTE: THIS ROUTINE ALWAYS uses the entire passed array (flattens it first). + + Usage: amedian(inarray,numbins=1000) + Returns: median calculated over ALL values in inarray + """ inarray = N.ravel(inarray) (hist, smallest, binsize, extras) = ahistogram(inarray, numbins) - cumhist = N.cumsum(hist) # make cumulative histogram - otherbins = N.greater_equal(cumhist, len(inarray)/2.0) - otherbins = list(otherbins) # list of 0/1s, 1s start at median bin - cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score - LRL = smallest + binsize*cfbin # get lower read limit of that bin - cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin - freq = hist[cfbin] # frequency IN the 50%ile bin - median = LRL + ((len(inarray)/2.0-cfbelow)/float(freq))*binsize # MEDIAN + cumhist = N.cumsum(hist) # make cumulative histogram + otherbins = N.greater_equal(cumhist, len(inarray) / 2.0) + otherbins = list(otherbins) # list of 0/1s, 1s start at median bin + cfbin = otherbins.index(1) # get 1st(!) index holding 50%ile score + LRL = smallest + binsize * cfbin # get lower read limit of that bin + cfbelow = N.add.reduce(hist[0:cfbin]) # cum. freq. below bin + freq = hist[cfbin] # frequency IN the 50%ile bin + median = LRL + ((len(inarray) / 2.0 - cfbelow) / float(freq)) * binsize # MEDIAN return median def amedianscore(inarray, dimension=None): """ - Returns the 'middle' score of the passed array. If there is an even - number of scores, the mean of the 2 middle scores is returned. Can function - with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can - be None, to pre-flatten the array, or else dimension must equal 0). + Returns the 'middle' score of the passed array. If there is an even + number of scores, the mean of the 2 middle scores is returned. Can function + with 1D arrays, or on the FIRST dimension of 2D arrays (i.e., dimension can + be None, to pre-flatten the array, or else dimension must equal 0). - Usage: amedianscore(inarray,dimension=None) - Returns: 'middle' score of the array, or the mean of the 2 middle scores - """ + Usage: amedianscore(inarray,dimension=None) + Returns: 'middle' score of the array, or the mean of the 2 middle scores + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 inarray = N.sort(inarray, dimension) - if inarray.shape[dimension] % 2 == 0: # if even number of elements - indx = inarray.shape[dimension]/2 # integer division correct - median = N.asarray(inarray[indx]+inarray[indx-1]) / 2.0 + if inarray.shape[dimension] % 2 == 0: # if even number of elements + indx = inarray.shape[dimension] / 2 # integer division correct + median = N.asarray(inarray[indx] + inarray[indx - 1]) / 2.0 else: indx = inarray.shape[dimension] / 2 # integer division correct median = N.take(inarray, [indx], dimension) @@ -2098,19 +2254,19 @@ def amedianscore(inarray, dimension=None): def amode(a, dimension=None): """ - Returns an array of the modal (most common) score in the passed array. - If there is more than one such score, ONLY THE FIRST is returned. - The bin-count for the modal values is also returned. Operates on whole - array (dimension=None), or on a given dimension. + Returns an array of the modal (most common) score in the passed array. + If there is more than one such score, ONLY THE FIRST is returned. + The bin-count for the modal values is also returned. Operates on whole + array (dimension=None), or on a given dimension. - Usage: amode(a, dimension=None) - Returns: array of bin-counts for mode(s), array of corresponding modal values - """ + Usage: amode(a, dimension=None) + Returns: array of bin-counts for mode(s), array of corresponding modal values + """ if dimension is None: a = N.ravel(a) dimension = 0 - scores = pstat.aunique(N.ravel(a)) # get ALL unique values + scores = pstat.aunique(N.ravel(a)) # get ALL unique values testshape = list(a.shape) testshape[dimension] = 1 oldmostfreq = N.zeros(testshape) @@ -2125,15 +2281,15 @@ def amode(a, dimension=None): def atmean(a, limits=None, inclusive=(1, 1)): """ - Returns the arithmetic mean of all values in an array, ignoring values - strictly outside the sequence passed to 'limits'. Note: either limit - in the sequence, or the value of limits itself, can be set to None. The - inclusive list/tuple determines whether the lower and upper limiting bounds - (respectively) are open/exclusive (0) or closed/inclusive (1). + Returns the arithmetic mean of all values in an array, ignoring values + strictly outside the sequence passed to 'limits'. Note: either limit + in the sequence, or the value of limits itself, can be set to None. The + inclusive list/tuple determines whether the lower and upper limiting bounds + (respectively) are open/exclusive (0) or closed/inclusive (1). - Usage: atmean(a,limits=None,inclusive=(1,1)) - """ - if a.typecode() in ['l', 's', 'b']: + Usage: atmean(a,limits=None,inclusive=(1,1)) + """ + if a.typecode() in ["l", "s", "b"]: a = a.astype(N.Float) if limits is None: return mean(a) @@ -2153,27 +2309,27 @@ def atmean(a, limits=None, inclusive=(1, 1)): elif limits[0] is not None and limits[1] is None: mask = lowerfcn(a, limits[0]) elif limits[0] is not None and limits[1] is not None: - mask = lowerfcn(a, limits[0])*upperfcn(a, limits[1]) - s = float(N.add.reduce(N.ravel(a*mask))) + mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1]) + s = float(N.add.reduce(N.ravel(a * mask))) n = float(N.add.reduce(N.ravel(mask))) - return s/n + return s / n def atvar(a, limits=None, inclusive=(1, 1)): """ - Returns the sample variance of values in an array, (i.e., using N-1), - ignoring values strictly outside the sequence passed to 'limits'. - Note: either limit in the sequence, or the value of limits itself, - can be set to None. The inclusive list/tuple determines whether the lower - and upper limiting bounds (respectively) are open/exclusive (0) or - closed/inclusive (1). - - Usage: atvar(a,limits=None,inclusive=(1,1)) - """ + Returns the sample variance of values in an array, (i.e., using N-1), + ignoring values strictly outside the sequence passed to 'limits'. + Note: either limit in the sequence, or the value of limits itself, + can be set to None. The inclusive list/tuple determines whether the lower + and upper limiting bounds (respectively) are open/exclusive (0) or + closed/inclusive (1). + + Usage: atvar(a,limits=None,inclusive=(1,1)) + """ a = a.astype(N.Float) if limits is None or limits == [None, None]: - term1 = N.add.reduce(N.ravel(a*a)) + term1 = N.add.reduce(N.ravel(a * a)) n = float(len(N.ravel(a))) - 1 - term2 = N.add.reduce(N.ravel(a))**2 / n + term2 = N.add.reduce(N.ravel(a)) ** 2 / n print(term1, term2, n) return (term1 - term2) / n assert type(limits) in [list, tuple, N.ArrayType], "Wrong type for limits in atvar" @@ -2192,21 +2348,21 @@ def atvar(a, limits=None, inclusive=(1, 1)): elif limits[0] is not None and limits[1] is None: mask = lowerfcn(a, limits[0]) elif limits[0] is not None and limits[1] is not None: - mask = lowerfcn(a, limits[0])*upperfcn(a, limits[1]) - term1 = N.add.reduce(N.ravel(a*a*mask)) + mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1]) + term1 = N.add.reduce(N.ravel(a * a * mask)) n = float(N.add.reduce(N.ravel(mask))) - 1 - term2 = N.add.reduce(N.ravel(a*mask))**2 / n + term2 = N.add.reduce(N.ravel(a * mask)) ** 2 / n print(term1, term2, n) return (term1 - term2) / n def atmin(a, lowerlimit=None, dimension=None, inclusive=1): """ - Returns the minimum value of a, along dimension, including only values less - than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None, - all values in the array are used. + Returns the minimum value of a, along dimension, including only values less + than (or equal to, if inclusive=1) lowerlimit. If the limit is set to None, + all values in the array are used. - Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1) - """ + Usage: atmin(a,lowerlimit=None,dimension=None,inclusive=1) + """ if inclusive: lowerfcn = N.greater else: @@ -2215,19 +2371,19 @@ def atmin(a, lowerlimit=None, dimension=None, inclusive=1): a = N.ravel(a) dimension = 0 if lowerlimit is None: - lowerlimit = N.minimum.reduce(N.ravel(a))-11 + lowerlimit = N.minimum.reduce(N.ravel(a)) - 11 biggest = N.maximum.reduce(N.ravel(a)) ta = N.where(lowerfcn(a, lowerlimit), a, biggest) return N.minimum.reduce(ta, dimension) def atmax(a, upperlimit, dimension=None, inclusive=1): """ - Returns the maximum value of a, along dimension, including only values greater - than (or equal to, if inclusive=1) upperlimit. If the limit is set to None, - a limit larger than the max value in the array is used. + Returns the maximum value of a, along dimension, including only values greater + than (or equal to, if inclusive=1) upperlimit. If the limit is set to None, + a limit larger than the max value in the array is used. - Usage: atmax(a,upperlimit,dimension=None,inclusive=1) - """ + Usage: atmax(a,upperlimit,dimension=None,inclusive=1) + """ if inclusive: upperfcn = N.less else: @@ -2236,34 +2392,34 @@ def atmax(a, upperlimit, dimension=None, inclusive=1): a = N.ravel(a) dimension = 0 if upperlimit is None: - upperlimit = N.maximum.reduce(N.ravel(a))+1 + upperlimit = N.maximum.reduce(N.ravel(a)) + 1 smallest = N.minimum.reduce(N.ravel(a)) ta = N.where(upperfcn(a, upperlimit), a, smallest) return N.maximum.reduce(ta, dimension) def atstdev(a, limits=None, inclusive=(1, 1)): """ - Returns the standard deviation of all values in an array, ignoring values - strictly outside the sequence passed to 'limits'. Note: either limit - in the sequence, or the value of limits itself, can be set to None. The - inclusive list/tuple determines whether the lower and upper limiting bounds - (respectively) are open/exclusive (0) or closed/inclusive (1). + Returns the standard deviation of all values in an array, ignoring values + strictly outside the sequence passed to 'limits'. Note: either limit + in the sequence, or the value of limits itself, can be set to None. The + inclusive list/tuple determines whether the lower and upper limiting bounds + (respectively) are open/exclusive (0) or closed/inclusive (1). - Usage: atstdev(a,limits=None,inclusive=(1,1)) - """ + Usage: atstdev(a,limits=None,inclusive=(1,1)) + """ return N.sqrt(tvar(a, limits, inclusive)) def atsem(a, limits=None, inclusive=(1, 1)): """ - Returns the standard error of the mean for the values in an array, - (i.e., using N for the denominator), ignoring values strictly outside - the sequence passed to 'limits'. Note: either limit in the sequence, - or the value of limits itself, can be set to None. The inclusive list/tuple - determines whether the lower and upper limiting bounds (respectively) are - open/exclusive (0) or closed/inclusive (1). - - Usage: atsem(a,limits=None,inclusive=(1,1)) - """ + Returns the standard error of the mean for the values in an array, + (i.e., using N for the denominator), ignoring values strictly outside + the sequence passed to 'limits'. Note: either limit in the sequence, + or the value of limits itself, can be set to None. The inclusive list/tuple + determines whether the lower and upper limiting bounds (respectively) are + open/exclusive (0) or closed/inclusive (1). + + Usage: atsem(a,limits=None,inclusive=(1,1)) + """ sd = tstdev(a, limits, inclusive) if limits is None or limits == [None, None]: n = float(len(N.ravel(a))) @@ -2283,12 +2439,12 @@ def atsem(a, limits=None, inclusive=(1, 1)): elif limits[0] is not None and limits[1] is None: mask = lowerfcn(a, limits[0]) elif limits[0] is not None and limits[1] is not None: - mask = lowerfcn(a, limits[0])*upperfcn(a, limits[1]) - N.add.reduce(N.ravel(a*a*mask)) + mask = lowerfcn(a, limits[0]) * upperfcn(a, limits[1]) + N.add.reduce(N.ravel(a * a * mask)) n = float(N.add.reduce(N.ravel(mask))) - return sd/math.sqrt(n) + return sd / math.sqrt(n) -# AMOMENTS + # AMOMENTS def amoment(a, moment=1, dimension=None): """ @@ -2308,7 +2464,7 @@ def amoment(a, moment=1, dimension=None): return 0.0 else: mn = amean(a, dimension, 1) # 1=keepdims - s = N.power((a-mn), moment) + s = N.power((a - mn), moment) return amean(s, dimension) def avariation(a, dimension=None): @@ -2320,7 +2476,7 @@ def avariation(a, dimension=None): Usage: avariation(a,dimension=None) """ - return 100.0*asamplestdev(a, dimension)/amean(a, dimension) + return 100.0 * asamplestdev(a, dimension) / amean(a, dimension) def askew(a, dimension=None): """ @@ -2338,7 +2494,7 @@ def askew(a, dimension=None): if isinstance(denom, N.ArrayType) and asum(zero) != 0: print("Number of zeros in askew: ", asum(zero)) denom = denom + zero # prevent divide-by-zero - return N.where(zero, 0, amoment(a, 3, dimension)/denom) + return N.where(zero, 0, amoment(a, 3, dimension) / denom) def akurtosis(a, dimension=None): """ @@ -2356,7 +2512,7 @@ def akurtosis(a, dimension=None): if isinstance(denom, N.ArrayType) and asum(zero) != 0: print("Number of zeros in akurtosis: ", asum(zero)) denom = denom + zero # prevent divide-by-zero - return N.where(zero, 0, amoment(a, 4, dimension)/denom) + return N.where(zero, 0, amoment(a, 4, dimension) / denom) def adescribe(inarray, dimension=None): """ @@ -2378,7 +2534,7 @@ def adescribe(inarray, dimension=None): kurt = akurtosis(inarray, dimension) return n, mm, m, sd, skew, kurt -# NORMALITY TESTS + # NORMALITY TESTS def askewtest(a, dimension=None): """ @@ -2395,14 +2551,14 @@ def askewtest(a, dimension=None): dimension = 0 b2 = askew(a, dimension) n = float(a.shape[dimension]) - y = b2 * N.sqrt(((n+1)*(n+3)) / (6.0*(n-2))) - beta2 = (3.0*(n*n+27*n-70)*(n+1)*(n+3)) / ((n-2.0)*(n+5)*(n+7)*(n+9)) - W2 = -1 + N.sqrt(2*(beta2-1)) - delta = 1/N.sqrt(N.log(N.sqrt(W2))) - alpha = N.sqrt(2/(W2-1)) + y = b2 * N.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2))) + beta2 = (3.0 * (n * n + 27 * n - 70) * (n + 1) * (n + 3)) / ((n - 2.0) * (n + 5) * (n + 7) * (n + 9)) + W2 = -1 + N.sqrt(2 * (beta2 - 1)) + delta = 1 / N.sqrt(N.log(N.sqrt(W2))) + alpha = N.sqrt(2 / (W2 - 1)) y = N.where(N.equal(y, 0), 1, y) - Z = delta*N.log(y/alpha + N.sqrt((y/alpha)**2+1)) - return Z, (1.0-zprob(Z))*2 + Z = delta * N.log(y / alpha + N.sqrt((y / alpha) ** 2 + 1)) + return Z, (1.0 - zprob(Z)) * 2 def akurtosistest(a, dimension=None): """ @@ -2421,19 +2577,23 @@ def akurtosistest(a, dimension=None): if n < 20: print("akurtosistest only valid for n>=20 ... continuing anyway, n=", n) b2 = akurtosis(a, dimension) - E = 3.0*(n-1) / (n+1) - varb2 = 24.0*n*(n-2)*(n-3) / ((n+1)*(n+1)*(n+3)*(n+5)) - x = (b2-E)/N.sqrt(varb2) - sqrtbeta1 = 6.0*(n*n-5*n+2)/((n+7)*(n+9)) * N.sqrt((6.0*(n+3)*(n+5)) - / (n*(n-2)*(n-3))) - A = 6.0 + 8.0/sqrtbeta1 * (2.0/sqrtbeta1 + N.sqrt(1+4.0/(sqrtbeta1**2))) - term1 = 1 - 2/(9.0*A) - denom = 1 + x*N.sqrt(2/(A-4.0)) + E = 3.0 * (n - 1) / (n + 1) + varb2 = 24.0 * n * (n - 2) * (n - 3) / ((n + 1) * (n + 1) * (n + 3) * (n + 5)) + x = (b2 - E) / N.sqrt(varb2) + sqrtbeta1 = ( + 6.0 + * (n * n - 5 * n + 2) + / ((n + 7) * (n + 9)) + * N.sqrt((6.0 * (n + 3) * (n + 5)) / (n * (n - 2) * (n - 3))) + ) + A = 6.0 + 8.0 / sqrtbeta1 * (2.0 / sqrtbeta1 + N.sqrt(1 + 4.0 / (sqrtbeta1**2))) + term1 = 1 - 2 / (9.0 * A) + denom = 1 + x * N.sqrt(2 / (A - 4.0)) denom = N.where(N.less(denom, 0), 99, denom) - term2 = N.where(N.equal(denom, 0), term1, N.power((1-2.0/A)/denom, 1/3.0)) - Z = (term1 - term2) / N.sqrt(2/(9.0*A)) + term2 = N.where(N.equal(denom, 0), term1, N.power((1 - 2.0 / A) / denom, 1 / 3.0)) + Z = (term1 - term2) / N.sqrt(2 / (9.0 * A)) Z = N.where(N.equal(denom, 99), 0, Z) - return Z, (1.0-zprob(Z))*2 + return Z, (1.0 - zprob(Z)) * 2 def anormaltest(a, dimension=None): """ @@ -2453,7 +2613,7 @@ def anormaltest(a, dimension=None): k2 = N.power(s, 2) + N.power(k, 2) return k2, achisqprob(k2, 2) -# AFREQUENCY FUNCTIONS + # AFREQUENCY FUNCTIONS def aitemfreq(a): """ @@ -2476,65 +2636,65 @@ def ascoreatpercentile(inarray, percent): Returns: score at given percentile, relative to inarray distribution """ percent = percent / 100.0 - targetcf = percent*len(inarray) + targetcf = percent * len(inarray) h, lrl, binsize, extras = histogram(inarray) - cumhist = cumsum(h*1) + cumhist = cumsum(h * 1) for i in range(len(cumhist)): if cumhist[i] >= targetcf: break - score = binsize * ((targetcf - cumhist[i-1]) / float(h[i])) + (lrl+binsize*i) + score = binsize * ((targetcf - cumhist[i - 1]) / float(h[i])) + (lrl + binsize * i) return score def apercentileofscore(inarray, score, histbins=10, defaultlimits=None): """ - Note: result of this function depends on the values used to histogram - the data(!). + Note: result of this function depends on the values used to histogram + the data(!). - Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None) - Returns: percentile-position of score (0-100) relative to inarray - """ + Usage: apercentileofscore(inarray,score,histbins=10,defaultlimits=None) + Returns: percentile-position of score (0-100) relative to inarray + """ h, lrl, binsize, extras = histogram(inarray, histbins, defaultlimits) - cumhist = cumsum(h*1) - i = int((score - lrl)/float(binsize)) - pct = (cumhist[i-1]+((score-(lrl+binsize*i))/float(binsize))*h[i])/float(len(inarray)) * 100 + cumhist = cumsum(h * 1) + i = int((score - lrl) / float(binsize)) + pct = (cumhist[i - 1] + ((score - (lrl + binsize * i)) / float(binsize)) * h[i]) / float(len(inarray)) * 100 return pct def ahistogram(inarray, numbins=10, defaultlimits=None, printextras=1): """ - Returns (i) an array of histogram bin counts, (ii) the smallest value - of the histogram binning, and (iii) the bin width (the last 2 are not - necessarily integers). Default number of bins is 10. Defaultlimits - can be None (the routine picks bins spanning all the numbers in the - inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the - following: array of bin values, lowerreallimit, binsize, extrapoints. - - Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1) - Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range) - """ - inarray = N.ravel(inarray) # flatten any >1D arrays - if (defaultlimits is not None): + Returns (i) an array of histogram bin counts, (ii) the smallest value + of the histogram binning, and (iii) the bin width (the last 2 are not + necessarily integers). Default number of bins is 10. Defaultlimits + can be None (the routine picks bins spanning all the numbers in the + inarray) or a 2-sequence (lowerlimit, upperlimit). Returns all of the + following: array of bin values, lowerreallimit, binsize, extrapoints. + + Usage: ahistogram(inarray,numbins=10,defaultlimits=None,printextras=1) + Returns: (array of bin counts, bin-minimum, min-width, #-points-outside-range) + """ + inarray = N.ravel(inarray) # flatten any >1D arrays + if defaultlimits is not None: lowerreallimit = defaultlimits[0] upperreallimit = defaultlimits[1] - binsize = (upperreallimit-lowerreallimit) / float(numbins) + binsize = (upperreallimit - lowerreallimit) / float(numbins) else: Min = N.minimum.reduce(inarray) Max = N.maximum.reduce(inarray) - estbinwidth = float(Max - Min)/float(numbins) + 1 - binsize = (Max-Min+estbinwidth)/float(numbins) - lowerreallimit = Min - binsize/2.0 # lower real limit,1st bin + estbinwidth = float(Max - Min) / float(numbins) + 1 + binsize = (Max - Min + estbinwidth) / float(numbins) + lowerreallimit = Min - binsize / 2.0 # lower real limit,1st bin bins = N.zeros(numbins) extrapoints = 0 for num in inarray: try: - if (num-lowerreallimit) < 0: + if (num - lowerreallimit) < 0: extrapoints = extrapoints + 1 else: - bintoincrement = int((num-lowerreallimit) / float(binsize)) + bintoincrement = int((num - lowerreallimit) / float(binsize)) bins[bintoincrement] = bins[bintoincrement] + 1 except Exception: # point outside lower/upper limits extrapoints = extrapoints + 1 - if (extrapoints > 0 and printextras == 1): - print('\nPoints outside given histogram range =', extrapoints) + if extrapoints > 0 and printextras == 1: + print("\nPoints outside given histogram range =", extrapoints) return (bins, lowerreallimit, binsize, extrapoints) def acumfreq(a, numbins=10, defaultreallimits=None): @@ -2547,7 +2707,7 @@ def acumfreq(a, numbins=10, defaultreallimits=None): Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints """ h, l, b, e = histogram(a, numbins, defaultreallimits) - cumhist = cumsum(h*1) + cumhist = cumsum(h * 1) return cumhist, l, b, e def arelfreq(a, numbins=10, defaultreallimits=None): @@ -2560,10 +2720,10 @@ def arelfreq(a, numbins=10, defaultreallimits=None): Returns: array of cumfreq bin values, lowerreallimit, binsize, extrapoints """ h, l, b, e = histogram(a, numbins, defaultreallimits) - h = N.array(h/float(a.shape[0])) + h = N.array(h / float(a.shape[0])) return h, l, b, e -# AVARIABILITY FUNCTIONS + # AVARIABILITY FUNCTIONS def aobrientransform(*args): """ @@ -2589,29 +2749,29 @@ def aobrientransform(*args): m[i] = mean(nargs[i]) for j in range(k): for i in range(n[j]): - t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2 - t2 = 0.5*v[j]*(n[j]-1.0) - t3 = (n[j]-1.0)*(n[j]-2.0) - nargs[j][i] = (t1-t2) / float(t3) + t1 = (n[j] - 1.5) * n[j] * (nargs[j][i] - m[j]) ** 2 + t2 = 0.5 * v[j] * (n[j] - 1.0) + t3 = (n[j] - 1.0) * (n[j] - 2.0) + nargs[j][i] = (t1 - t2) / float(t3) check = 1 for j in range(k): if v[j] - mean(nargs[j]) > TINY: check = 0 if check != 1: - raise ValueError('Lack of convergence in obrientransform.') + raise ValueError("Lack of convergence in obrientransform.") else: return N.array(nargs) def asamplevar(inarray, dimension=None, keepdims=0): """ - Returns the sample standard deviation of the values in the passed - array (i.e., using N). Dimension can equal None (ravel array first), - an integer (the dimension over which to operate), or a sequence - (operate over multiple dimensions). Set keepdims=1 to return an array - with the same number of dimensions as inarray. + Returns the sample standard deviation of the values in the passed + array (i.e., using N). Dimension can equal None (ravel array first), + an integer (the dimension over which to operate), or a sequence + (operate over multiple dimensions). Set keepdims=1 to return an array + with the same number of dimensions as inarray. - Usage: asamplevar(inarray,dimension=None,keepdims=0) - """ + Usage: asamplevar(inarray,dimension=None,keepdims=0) + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 @@ -2623,7 +2783,7 @@ def asamplevar(inarray, dimension=None, keepdims=0): if isinstance(dimension, list): n = 1 for d in dimension: - n = n*inarray.shape[d] + n = n * inarray.shape[d] else: n = inarray.shape[dimension] svar = ass(deviations, dimension, keepdims) / float(n) @@ -2631,40 +2791,40 @@ def asamplevar(inarray, dimension=None, keepdims=0): def asamplestdev(inarray, dimension=None, keepdims=0): """ - Returns the sample standard deviation of the values in the passed - array (i.e., using N). Dimension can equal None (ravel array first), - an integer (the dimension over which to operate), or a sequence - (operate over multiple dimensions). Set keepdims=1 to return an array - with the same number of dimensions as inarray. + Returns the sample standard deviation of the values in the passed + array (i.e., using N). Dimension can equal None (ravel array first), + an integer (the dimension over which to operate), or a sequence + (operate over multiple dimensions). Set keepdims=1 to return an array + with the same number of dimensions as inarray. - Usage: asamplestdev(inarray,dimension=None,keepdims=0) - """ + Usage: asamplestdev(inarray,dimension=None,keepdims=0) + """ return N.sqrt(asamplevar(inarray, dimension, keepdims)) def asignaltonoise(instack, dimension=0): """ - Calculates signal-to-noise. Dimension can equal None (ravel array - first), an integer (the dimension over which to operate), or a - sequence (operate over multiple dimensions). + Calculates signal-to-noise. Dimension can equal None (ravel array + first), an integer (the dimension over which to operate), or a + sequence (operate over multiple dimensions). - Usage: asignaltonoise(instack,dimension=0): - Returns: array containing the value of (mean/stdev) along dimension, - or 0 when stdev=0 - """ + Usage: asignaltonoise(instack,dimension=0): + Returns: array containing the value of (mean/stdev) along dimension, + or 0 when stdev=0 + """ m = mean(instack, dimension) sd = stdev(instack, dimension) - return N.where(N.equal(sd, 0), 0, m/sd) + return N.where(N.equal(sd, 0), 0, m / sd) def avar(inarray, dimension=None, keepdims=0): """ - Returns the estimated population variance of the values in the passed - array (i.e., N-1). Dimension can equal None (ravel array first), an - integer (the dimension over which to operate), or a sequence (operate - over multiple dimensions). Set keepdims=1 to return an array with the - same number of dimensions as inarray. + Returns the estimated population variance of the values in the passed + array (i.e., N-1). Dimension can equal None (ravel array first), an + integer (the dimension over which to operate), or a sequence (operate + over multiple dimensions). Set keepdims=1 to return an array with the + same number of dimensions as inarray. - Usage: avar(inarray,dimension=None,keepdims=0) - """ + Usage: avar(inarray,dimension=None,keepdims=0) + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 @@ -2673,34 +2833,34 @@ def avar(inarray, dimension=None, keepdims=0): if isinstance(dimension, list): n = 1 for d in dimension: - n = n*inarray.shape[d] + n = n * inarray.shape[d] else: n = inarray.shape[dimension] - var = ass(deviations, dimension, keepdims)/float(n-1) + var = ass(deviations, dimension, keepdims) / float(n - 1) return var def astdev(inarray, dimension=None, keepdims=0): """ - Returns the estimated population standard deviation of the values in - the passed array (i.e., N-1). Dimension can equal None (ravel array - first), an integer (the dimension over which to operate), or a - sequence (operate over multiple dimensions). Set keepdims=1 to return - an array with the same number of dimensions as inarray. + Returns the estimated population standard deviation of the values in + the passed array (i.e., N-1). Dimension can equal None (ravel array + first), an integer (the dimension over which to operate), or a + sequence (operate over multiple dimensions). Set keepdims=1 to return + an array with the same number of dimensions as inarray. - Usage: astdev(inarray,dimension=None,keepdims=0) - """ + Usage: astdev(inarray,dimension=None,keepdims=0) + """ return N.sqrt(avar(inarray, dimension, keepdims)) def asterr(inarray, dimension=None, keepdims=0): """ - Returns the estimated population standard error of the values in the - passed array (i.e., N-1). Dimension can equal None (ravel array - first), an integer (the dimension over which to operate), or a - sequence (operate over multiple dimensions). Set keepdims=1 to return - an array with the same number of dimensions as inarray. + Returns the estimated population standard error of the values in the + passed array (i.e., N-1). Dimension can equal None (ravel array + first), an integer (the dimension over which to operate), or a + sequence (operate over multiple dimensions). Set keepdims=1 to return + an array with the same number of dimensions as inarray. - Usage: asterr(inarray,dimension=None,keepdims=0) - """ + Usage: asterr(inarray,dimension=None,keepdims=0) + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 @@ -2708,44 +2868,44 @@ def asterr(inarray, dimension=None, keepdims=0): def asem(inarray, dimension=None, keepdims=0): """ - Returns the standard error of the mean (i.e., using N) of the values - in the passed array. Dimension can equal None (ravel array first), an - integer (the dimension over which to operate), or a sequence (operate - over multiple dimensions). Set keepdims=1 to return an array with the - same number of dimensions as inarray. + Returns the standard error of the mean (i.e., using N) of the values + in the passed array. Dimension can equal None (ravel array first), an + integer (the dimension over which to operate), or a sequence (operate + over multiple dimensions). Set keepdims=1 to return an array with the + same number of dimensions as inarray. - Usage: asem(inarray,dimension=None, keepdims=0) - """ + Usage: asem(inarray,dimension=None, keepdims=0) + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 if isinstance(dimension, list): n = 1 for d in dimension: - n = n*inarray.shape[d] + n = n * inarray.shape[d] else: n = inarray.shape[dimension] - s = asamplestdev(inarray, dimension, keepdims) / N.sqrt(n-1) + s = asamplestdev(inarray, dimension, keepdims) / N.sqrt(n - 1) return s def az(a, score): """ - Returns the z-score of a given input score, given thearray from which - that score came. Not appropriate for population calculations, nor for - arrays > 1D. + Returns the z-score of a given input score, given thearray from which + that score came. Not appropriate for population calculations, nor for + arrays > 1D. - Usage: az(a, score) - """ - z = (score-amean(a)) / asamplestdev(a) + Usage: az(a, score) + """ + z = (score - amean(a)) / asamplestdev(a) return z def azs(a): """ - Returns a 1D array of z-scores, one for each score in the passed array, - computed relative to the passed array. + Returns a 1D array of z-scores, one for each score in the passed array, + computed relative to the passed array. - Usage: azs(a) - """ + Usage: azs(a) + """ zscores = [] for item in a: zscores.append(z(a, item)) @@ -2753,17 +2913,17 @@ def azs(a): def azmap(scores, compare, dimension=0): """ - Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to - array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0 - of the compare array. + Returns an array of z-scores the shape of scores (e.g., [x,y]), compared to + array passed to compare (e.g., [time,x,y]). Assumes collapsing over dim 0 + of the compare array. - Usage: azs(scores, compare, dimension=0) - """ + Usage: azs(scores, compare, dimension=0) + """ mns = amean(compare, dimension) sstd = asamplestdev(compare, 0) return (scores - mns) / sstd -# ATRIMMING FUNCTIONS + # ATRIMMING FUNCTIONS def around(a, digits=1): """ @@ -2772,6 +2932,7 @@ def around(a, digits=1): Usage: around(a,digits) Returns: a, where each value is rounded to 'digits' decimals """ + def ar(x, d=digits): return round(x, d) @@ -2779,30 +2940,30 @@ def ar(x, d=digits): try: a = N.array(a) except Exception: - a = N.array(a, 'O') + a = N.array(a, "O") shp = a.shape - if a.typecode() in ['f', 'F', 'd', 'D']: + if a.typecode() in ["f", "F", "d", "D"]: b = N.ravel(a) b = N.array([ar(_) for _ in b]) b.shape = shp - elif a.typecode() in ['o', 'O']: - b = N.ravel(a)*1 + elif a.typecode() in ["o", "O"]: + b = N.ravel(a) * 1 for i in range(len(b)): if isinstance(b[i], float): b[i] = round(b[i], digits) b.shape = shp else: # not a float, double or Object array - b = a*1 + b = a * 1 return b def athreshold(a, threshmin=None, threshmax=None, newval=0): """ - Like Numeric.clip() except that values threshmax are replaced - by newval instead of by threshmin/threshmax (respectively). + Like Numeric.clip() except that values threshmax are replaced + by newval instead of by threshmin/threshmax (respectively). - Usage: athreshold(a,threshmin=None,threshmax=None,newval=0) - Returns: a, with values threshmax replaced with newval - """ + Usage: athreshold(a,threshmin=None,threshmax=None,newval=0) + Returns: a, with values threshmax replaced with newval + """ mask = N.zeros(a.shape) if threshmin is not None: mask = mask + N.where(N.less(a, threshmin), 1, 0) @@ -2813,21 +2974,21 @@ def athreshold(a, threshmin=None, threshmax=None, newval=0): def atrimboth(a, proportiontocut): """ - Slices off the passed proportion of items from BOTH ends of the passed - array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND - 'rightmost' 10% of scores. You must pre-sort the array if you want - "proper" trimming. Slices off LESS if proportion results in a - non-integer slice index (i.e., conservatively slices off - proportiontocut). - - Usage: atrimboth (a,proportiontocut) - Returns: trimmed version of array a - """ - lowercut = int(proportiontocut*len(a)) + Slices off the passed proportion of items from BOTH ends of the passed + array (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND + 'rightmost' 10% of scores. You must pre-sort the array if you want + "proper" trimming. Slices off LESS if proportion results in a + non-integer slice index (i.e., conservatively slices off + proportiontocut). + + Usage: atrimboth (a,proportiontocut) + Returns: trimmed version of array a + """ + lowercut = int(proportiontocut * len(a)) uppercut = len(a) - lowercut return a[lowercut:uppercut] - def atrim1(a, proportiontocut, tail='right'): + def atrim1(a, proportiontocut, tail="right"): """ Slices off the passed proportion of items from ONE end of the passed array (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost' @@ -2837,15 +2998,15 @@ def atrim1(a, proportiontocut, tail='right'): Usage: atrim1(a,proportiontocut,tail='right') or set tail='left' Returns: trimmed version of array a """ - if string.lower(tail) == 'right': + if string.lower(tail) == "right": lowercut = 0 - uppercut = len(a) - int(proportiontocut*len(a)) - elif string.lower(tail) == 'left': - lowercut = int(proportiontocut*len(a)) + uppercut = len(a) - int(proportiontocut * len(a)) + elif string.lower(tail) == "left": + lowercut = int(proportiontocut * len(a)) uppercut = len(a) return a[lowercut:uppercut] -# ACORRELATION FUNCTIONS + # ACORRELATION FUNCTIONS def acovariance(X): """ @@ -2879,59 +3040,62 @@ def apaired(x, y): Usage: apaired(x,y) x,y = the two arrays of values to be compared Returns: appropriate statistic name, value, and probability """ - samples = '' - while samples not in ['i', 'r', 'I', 'R', 'c', 'C']: - print('\nIndependent or related samples, or correlation (i,r,c): ', end=' ') + samples = "" + while samples not in ["i", "r", "I", "R", "c", "C"]: + print("\nIndependent or related samples, or correlation (i,r,c): ", end=" ") samples = input() - if samples in ['i', 'I', 'r', 'R']: - print('\nComparing variances ...', end=' ') + if samples in ["i", "I", "r", "R"]: + print("\nComparing variances ...", end=" ") # USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112 r = obrientransform(x, y) f, p = F_oneway(pstat.colex(r, 0), pstat.colex(r, 1)) if p < 0.05: - vartype = 'unequal, p='+str(round(p, 4)) + vartype = "unequal, p=" + str(round(p, 4)) else: - vartype = 'equal' + vartype = "equal" print(vartype) - if samples in ['i', 'I']: - if vartype[0] == 'e': + if samples in ["i", "I"]: + if vartype[0] == "e": t, p = ttest_ind(x, y, None, 0) - print('\nIndependent samples t-test: ', round(t, 4), round(p, 4)) + print("\nIndependent samples t-test: ", round(t, 4), round(p, 4)) else: if len(x) > 20 or len(y) > 20: z, p = ranksums(x, y) - print('\nRank Sums test (NONparametric, n>20): ', round(z, 4), round(p, 4)) + print("\nRank Sums test (NONparametric, n>20): ", round(z, 4), round(p, 4)) else: u, p = mannwhitneyu(x, y) - print('\nMann-Whitney U-test (NONparametric, ns<20): ', round(u, 4), round(p, 4)) + print("\nMann-Whitney U-test (NONparametric, ns<20): ", round(u, 4), round(p, 4)) else: # RELATED SAMPLES - if vartype[0] == 'e': + if vartype[0] == "e": t, p = ttest_rel(x, y, 0) - print('\nRelated samples t-test: ', round(t, 4), round(p, 4)) + print("\nRelated samples t-test: ", round(t, 4), round(p, 4)) else: t, p = ranksums(x, y) - print('\nWilcoxon T-test (NONparametric): ', round(t, 4), round(p, 4)) + print("\nWilcoxon T-test (NONparametric): ", round(t, 4), round(p, 4)) else: # CORRELATION ANALYSIS - corrtype = '' - while corrtype not in ['c', 'C', 'r', 'R', 'd', 'D']: - print('\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ', end=' ') + corrtype = "" + while corrtype not in ["c", "C", "r", "R", "d", "D"]: + print("\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ", end=" ") corrtype = input() - if corrtype in ['c', 'C']: + if corrtype in ["c", "C"]: m, b, r, p, see = linregress(x, y) - print('\nLinear regression for continuous variables ...') - lol = [['Slope', 'Intercept', 'r', 'Prob', 'SEestimate'], [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)]] + print("\nLinear regression for continuous variables ...") + lol = [ + ["Slope", "Intercept", "r", "Prob", "SEestimate"], + [round(m, 4), round(b, 4), round(r, 4), round(p, 4), round(see, 4)], + ] pstat.printcc(lol) - elif corrtype in ['r', 'R']: + elif corrtype in ["r", "R"]: r, p = spearmanr(x, y) - print('\nCorrelation for ranked variables ...') + print("\nCorrelation for ranked variables ...") print("Spearman's r: ", round(r, 4), round(p, 4)) else: # DICHOTOMOUS r, p = pointbiserialr(x, y) - print('\nAssuming x contains a dichotomous variable ...') - print('Point Biserial r: ', round(r, 4), round(p, 4)) - print('\n\n') + print("\nAssuming x contains a dichotomous variable ...") + print("Point Biserial r: ", round(r, 4), round(p, 4)) + print("\n\n") return None def apearsonr(x, y, verbose=1): @@ -2944,12 +3108,12 @@ def apearsonr(x, y, verbose=1): """ TINY = 1.0e-20 n = len(x) - r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y) - r_den = math.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y)-asquare_of_sums(y))) - r = (r_num / r_den) - df = n-2 - t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) - prob = abetai(0.5*df, 0.5, df/(df+t*t), verbose) + r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y) + r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) * (n * ass(y) - asquare_of_sums(y))) + r = r_num / r_den + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t), verbose) return r, prob def aspearmanr(x, y): @@ -2963,11 +3127,11 @@ def aspearmanr(x, y): n = len(x) rankx = rankdata(x) ranky = rankdata(y) - dsq = N.add.reduce((rankx-ranky)**2) - rs = 1 - 6*dsq / float(n*(n**2-1)) - t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs))) - df = n-2 - probrs = abetai(0.5*df, 0.5, df/(df+t*t)) + dsq = N.add.reduce((rankx - ranky) ** 2) + rs = 1 - 6 * dsq / float(n * (n**2 - 1)) + t = rs * math.sqrt((n - 2) / ((rs + 1.0) * (1.0 - rs))) + df = n - 2 + probrs = abetai(0.5 * df, 0.5, df / (df + t * t)) # probability values for rs are from part 2 of the spearman function in # Numerical Recipies, p.510. They close to tables, but not exact.(?) return rs, probrs @@ -2986,7 +3150,7 @@ def apointbiserialr(x, y): data = pstat.aabut(x, y) if len(categories) != 2: raise ValueError("Exactly 2 categories required (in x) for pointbiserialr().") - else: # there are 2 categories, continue + else: # there are 2 categories, continue codemap = pstat.aabut(categories, N.arange(2)) pstat.arecode(data, codemap, 0) # recoded x = pstat.alinexand(data, 0, categories[0]) @@ -2994,30 +3158,30 @@ def apointbiserialr(x, y): xmean = amean(pstat.acolex(x, 1)) ymean = amean(pstat.acolex(y, 1)) n = len(data) - adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n))) - rpb = (ymean - xmean)/asamplestdev(pstat.acolex(data, 1))*adjust - df = n-2 - t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY))) - prob = abetai(0.5*df, 0.5, df/(df+t*t)) + adjust = math.sqrt((len(x) / float(n)) * (len(y) / float(n))) + rpb = (ymean - xmean) / asamplestdev(pstat.acolex(data, 1)) * adjust + df = n - 2 + t = rpb * math.sqrt(df / ((1.0 - rpb + TINY) * (1.0 + rpb + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) return rpb, prob def akendalltau(x, y): """ - Calculates Kendall's tau ... correlation of ordinal data. Adapted - from function kendl1 in Numerical Recipies. Needs good test-cases.@@@ + Calculates Kendall's tau ... correlation of ordinal data. Adapted + from function kendl1 in Numerical Recipies. Needs good test-cases.@@@ - Usage: akendalltau(x,y) - Returns: Kendall's tau, two-tailed p-value - """ + Usage: akendalltau(x,y) + Returns: Kendall's tau, two-tailed p-value + """ n1 = 0 n2 = 0 iss = 0 - for j in range(len(x)-1): + for j in range(len(x) - 1): for k in range(j, len(y)): a1 = x[j] - x[k] a2 = y[j] - y[k] aa = a1 * a2 - if (aa): # neither array has a tie + if aa: # neither array has a tie n1 = n1 + 1 n2 = n2 + 1 if aa > 0: @@ -3025,25 +3189,25 @@ def akendalltau(x, y): else: iss = iss - 1 else: - if (a1): + if a1: n1 = n1 + 1 else: n2 = n2 + 1 - tau = iss / math.sqrt(n1*n2) - svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1)) + tau = iss / math.sqrt(n1 * n2) + svar = (4.0 * len(x) + 10.0) / (9.0 * len(x) * (len(x) - 1)) z = tau / math.sqrt(svar) - prob = erfcc(abs(z)/1.4142136) + prob = erfcc(abs(z) / 1.4142136) return tau, prob def alinregress(*args): """ - Calculates a regression line on two arrays, x and y, corresponding to x,y - pairs. If a single 2D array is passed, alinregress finds dim with 2 levels - and splits data into x,y pairs along that dim. + Calculates a regression line on two arrays, x and y, corresponding to x,y + pairs. If a single 2D array is passed, alinregress finds dim with 2 levels + and splits data into x,y pairs along that dim. - Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array - Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate - """ + Usage: alinregress(*args) args=2 equal-length arrays, or one 2D array + Returns: slope, intercept, r, two-tailed prob, sterr-of-the-estimate + """ TINY = 1.0e-20 if len(args) == 1: # more than 1D array? args = args[0] @@ -3059,20 +3223,20 @@ def alinregress(*args): n = len(x) xmean = amean(x) ymean = amean(y) - r_num = n*(N.add.reduce(x*y)) - N.add.reduce(x)*N.add.reduce(y) - r_den = math.sqrt((n*ass(x) - asquare_of_sums(x))*(n*ass(y)-asquare_of_sums(y))) + r_num = n * (N.add.reduce(x * y)) - N.add.reduce(x) * N.add.reduce(y) + r_den = math.sqrt((n * ass(x) - asquare_of_sums(x)) * (n * ass(y) - asquare_of_sums(y))) r = r_num / r_den - df = n-2 - t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) - prob = abetai(0.5*df, 0.5, df/(df+t*t)) - slope = r_num / (float(n)*ass(x) - asquare_of_sums(x)) - intercept = ymean - slope*xmean - sterrest = math.sqrt(1-r*r)*asamplestdev(y) + df = n - 2 + t = r * math.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) + slope = r_num / (float(n) * ass(x) - asquare_of_sums(x)) + intercept = ymean - slope * xmean + sterrest = math.sqrt(1 - r * r) * asamplestdev(y) return slope, intercept, r, prob, sterrest -# AINFERENTIAL STATISTICS + # AINFERENTIAL STATISTICS - def attest_1samp(a, popmean, printit=0, name='Sample', writemode='a'): + def attest_1samp(a, popmean, printit=0, name="Sample", writemode="a"): """ Calculates the t-obtained for the independent samples T-test on ONE group of scores a, given a population mean. If printit=1, results are printed @@ -3087,33 +3251,47 @@ def attest_1samp(a, popmean, printit=0, name='Sample', writemode='a'): x = amean(a) v = avar(a) n = len(a) - df = n-1 - svar = ((n-1)*v) / float(df) - t = (x-popmean)/math.sqrt(svar*(1.0/n)) - prob = abetai(0.5*df, 0.5, df/(df+t*t)) + df = n - 1 + svar = ((n - 1) * v) / float(df) + t = (x - popmean) / math.sqrt(svar * (1.0 / n)) + prob = abetai(0.5 * df, 0.5, df / (df + t * t)) if printit != 0: - statname = 'Single-sample T-test.' - outputpairedstats(printit, writemode, - 'Population', '--', popmean, 0, 0, 0, - name, n, x, v, N.minimum.reduce(N.ravel(a)), - N.maximum.reduce(N.ravel(a)), - statname, t, prob) + statname = "Single-sample T-test." + outputpairedstats( + printit, + writemode, + "Population", + "--", + popmean, + 0, + 0, + 0, + name, + n, + x, + v, + N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), + statname, + t, + prob, + ) return t, prob - def attest_ind(a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2', writemode='a'): + def attest_ind(a, b, dimension=None, printit=0, name1="Samp1", name2="Samp2", writemode="a"): + """ + Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores + a, and b. From Numerical Recipies, p.483. If printit=1, results are + printed to the screen. If printit='filename', the results are output + to 'filename' using the given writemode (default=append). Dimension + can equal None (ravel array first), or an integer (the dimension over + which to operate on a and b). + + Usage: attest_ind (a,b,dimension=None,printit=0, + Name1='Samp1',Name2='Samp2',writemode='a') + Returns: t-value, two-tailed p-value """ - Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores - a, and b. From Numerical Recipies, p.483. If printit=1, results are - printed to the screen. If printit='filename', the results are output - to 'filename' using the given writemode (default=append). Dimension - can equal None (ravel array first), or an integer (the dimension over - which to operate on a and b). - - Usage: attest_ind (a,b,dimension=None,printit=0, - Name1='Samp1',Name2='Samp2',writemode='a') - Returns: t-value, two-tailed p-value - """ if dimension is None: a = N.ravel(a) b = N.ravel(b) @@ -3124,13 +3302,13 @@ def attest_ind(a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2', wr v2 = avar(b, dimension) n1 = a.shape[dimension] n2 = b.shape[dimension] - df = n1+n2-2 - svar = ((n1-1)*v1+(n2-1)*v2) / float(df) + df = n1 + n2 - 2 + svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df) zerodivproblem = N.equal(svar, 0) svar = N.where(zerodivproblem, 1, svar) # avoid zero-division in 1st place - t = (x1-x2)/N.sqrt(svar*(1.0/n1 + 1.0/n2)) # N-D COMPUTATION HERE!!!!!! - t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0 - probs = abetai(0.5*df, 0.5, float(df)/(df+t*t)) + t = (x1 - x2) / N.sqrt(svar * (1.0 / n1 + 1.0 / n2)) # N-D COMPUTATION HERE!!!!!! + t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0 + probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) if isinstance(t, N.ArrayType): probs = N.reshape(probs, t.shape) @@ -3142,99 +3320,125 @@ def attest_ind(a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2', wr t = t[0] if isinstance(probs, N.ArrayType): probs = probs[0] - statname = 'Independent samples T-test.' - outputpairedstats(printit, writemode, - name1, n1, x1, v1, N.minimum.reduce(N.ravel(a)), - N.maximum.reduce(N.ravel(a)), - name2, n2, x2, v2, N.minimum.reduce(N.ravel(b)), - N.maximum.reduce(N.ravel(b)), - statname, t, probs) + statname = "Independent samples T-test." + outputpairedstats( + printit, + writemode, + name1, + n1, + x1, + v1, + N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), + name2, + n2, + x2, + v2, + N.minimum.reduce(N.ravel(b)), + N.maximum.reduce(N.ravel(b)), + statname, + t, + probs, + ) return return t, probs - def attest_rel(a, b, dimension=None, printit=0, name1='Samp1', name2='Samp2', writemode='a'): + def attest_rel(a, b, dimension=None, printit=0, name1="Samp1", name2="Samp2", writemode="a"): + """ + Calculates the t-obtained T-test on TWO RELATED samples of scores, a + and b. From Numerical Recipies, p.483. If printit=1, results are + printed to the screen. If printit='filename', the results are output + to 'filename' using the given writemode (default=append). Dimension + can equal None (ravel array first), or an integer (the dimension over + which to operate on a and b). + + Usage: attest_rel(a,b,dimension=None,printit=0, + name1='Samp1',name2='Samp2',writemode='a') + Returns: t-value, two-tailed p-value """ - Calculates the t-obtained T-test on TWO RELATED samples of scores, a - and b. From Numerical Recipies, p.483. If printit=1, results are - printed to the screen. If printit='filename', the results are output - to 'filename' using the given writemode (default=append). Dimension - can equal None (ravel array first), or an integer (the dimension over - which to operate on a and b). - - Usage: attest_rel(a,b,dimension=None,printit=0, - name1='Samp1',name2='Samp2',writemode='a') - Returns: t-value, two-tailed p-value - """ if dimension is None: a = N.ravel(a) b = N.ravel(b) dimension = 0 if len(a) != len(b): - raise ValueError('Unequal length arrays.') + raise ValueError("Unequal length arrays.") x1 = amean(a, dimension) x2 = amean(b, dimension) v1 = avar(a, dimension) v2 = avar(b, dimension) n = a.shape[dimension] - df = float(n-1) - d = (a-b).astype('d') + df = float(n - 1) + d = (a - b).astype("d") - denom = N.sqrt((n*N.add.reduce(d*d, dimension) - N.add.reduce(d, dimension)**2) / df) + denom = N.sqrt((n * N.add.reduce(d * d, dimension) - N.add.reduce(d, dimension) ** 2) / df) zerodivproblem = N.equal(denom, 0) denom = N.where(zerodivproblem, 1, denom) # avoid zero-division in 1st place - t = N.add.reduce(d, dimension) / denom # N-D COMPUTATION HERE!!!!!! - t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0 - probs = abetai(0.5*df, 0.5, float(df)/(df+t*t)) + t = N.add.reduce(d, dimension) / denom # N-D COMPUTATION HERE!!!!!! + t = N.where(zerodivproblem, 1.0, t) # replace NaN/wrong t-values with 1.0 + probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) if isinstance(t, N.ArrayType): probs = N.reshape(probs, t.shape) if len(probs) == 1: probs = probs[0] if printit != 0: - statname = 'Related samples T-test.' - outputpairedstats(printit, writemode, - name1, n, x1, v1, N.minimum.reduce(N.ravel(a)), - N.maximum.reduce(N.ravel(a)), - name2, n, x2, v2, N.minimum.reduce(N.ravel(b)), - N.maximum.reduce(N.ravel(b)), - statname, t, probs) + statname = "Related samples T-test." + outputpairedstats( + printit, + writemode, + name1, + n, + x1, + v1, + N.minimum.reduce(N.ravel(a)), + N.maximum.reduce(N.ravel(a)), + name2, + n, + x2, + v2, + N.minimum.reduce(N.ravel(b)), + N.maximum.reduce(N.ravel(b)), + statname, + t, + probs, + ) return return t, probs def achisquare(f_obs, f_exp=None): """ - Calculates a one-way chi square for array of observed frequencies and returns - the result. If no expected frequencies are given, the total N is assumed to - be equally distributed across all groups. + Calculates a one-way chi square for array of observed frequencies and returns + the result. If no expected frequencies are given, the total N is assumed to + be equally distributed across all groups. - Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq. - Returns: chisquare-statistic, associated p-value - """ + Usage: achisquare(f_obs, f_exp=None) f_obs = array of observed cell freq. + Returns: chisquare-statistic, associated p-value + """ k = len(f_obs) if f_exp is None: - f_exp = N.array([sum(f_obs)/float(k)] * len(f_obs), N.Float) + f_exp = N.array([sum(f_obs) / float(k)] * len(f_obs), N.Float) f_exp = f_exp.astype(N.Float) - chisq = N.add.reduce((f_obs-f_exp)**2 / f_exp) - return chisq, chisqprob(chisq, k-1) + chisq = N.add.reduce((f_obs - f_exp) ** 2 / f_exp) + return chisq, chisqprob(chisq, k - 1) def aks_2samp(data1, data2): """ - Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from - Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc- - like. + Computes the Kolmogorov-Smirnof statistic on 2 samples. Modified from + Numerical Recipies in C, page 493. Returns KS D-value, prob. Not ufunc- + like. - Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays - Returns: KS D-value, p-value - """ - j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE - j2 = 0 # N.zeros(data2.shape[1:]) + Usage: aks_2samp(data1,data2) where data1 and data2 are 1D arrays + Returns: KS D-value, p-value + """ + j1 = 0 # N.zeros(data1.shape[1:]) TRIED TO MAKE THIS UFUNC-LIKE + j2 = 0 # N.zeros(data2.shape[1:]) fn1 = 0.0 # N.zeros(data1.shape[1:],N.Float) fn2 = 0.0 # N.zeros(data2.shape[1:],N.Float) n1 = data1.shape[0] n2 = data2.shape[0] - en1 = n1*1 - en2 = n2*1 + en1 = n1 * 1 + en2 = n2 * 1 d = N.zeros(data1.shape[1:], N.Float) data1 = N.sort(data1, 0) data2 = N.sort(data2, 0) @@ -3242,17 +3446,17 @@ def aks_2samp(data1, data2): d1 = data1[j1] d2 = data2[j2] if d1 <= d2: - fn1 = (j1)/float(en1) + fn1 = (j1) / float(en1) j1 = j1 + 1 if d2 <= d1: - fn2 = (j2)/float(en2) + fn2 = (j2) / float(en2) j2 = j2 + 1 - dt = (fn2-fn1) + dt = fn2 - fn1 if abs(dt) > abs(d): d = dt try: - en = math.sqrt(en1*en2/float(en1+en2)) - prob = aksprob((en+0.12+0.11/en)*N.fabs(d)) + en = math.sqrt(en1 * en2 / float(en1 + en2)) + prob = aksprob((en + 0.12 + 0.11 / en) * N.fabs(d)) except Exception: prob = 1.0 return d, prob @@ -3271,16 +3475,16 @@ def amannwhitneyu(x, y): n1 = len(x) n2 = len(y) ranked = rankdata(N.concatenate((x, y))) - rankx = ranked[0:n1] # get the x-ranks - u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x - u2 = n1*n2 - u1 # remainder is U for y + rankx = ranked[0:n1] # get the x-ranks + u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - sum(rankx) # calc U for x + u2 = n1 * n2 - u1 # remainder is U for y bigu = max(u1, u2) smallu = min(u1, u2) T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores if T == 0: - raise ValueError('All numbers are identical in amannwhitneyu') - sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0) - z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc + raise ValueError("All numbers are identical in amannwhitneyu") + sd = math.sqrt(T * n1 * n2 * (n1 + n2 + 1) / 12.0) + z = abs((bigu - n1 * n2 / 2.0) / sd) # normal approximation for prob calc return smallu, 1.0 - zprob(z) def atiecorrect(rankvals): @@ -3297,25 +3501,25 @@ def atiecorrect(rankvals): n = len(sorted) T = 0.0 i = 0 - while (i < n-1): - if sorted[i] == sorted[i+1]: + while i < n - 1: + if sorted[i] == sorted[i + 1]: nties = 1 - while (i < n-1) and (sorted[i] == sorted[i+1]): + while (i < n - 1) and (sorted[i] == sorted[i + 1]): nties = nties + 1 i = i + 1 T = T + nties**3 - nties - i = i+1 - T = T / float(n**3-n) + i = i + 1 + T = T / float(n**3 - n) return 1.0 - T def aranksums(x, y): """ - Calculates the rank sums statistic on the provided scores and returns - the result. + Calculates the rank sums statistic on the provided scores and returns + the result. - Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions - Returns: z-statistic, two-tailed p-value - """ + Usage: aranksums(x,y) where x,y are arrays of values for 2 conditions + Returns: z-statistic, two-tailed p-value + """ n1 = len(x) n2 = len(y) alldata = N.concatenate((x, y)) @@ -3323,22 +3527,22 @@ def aranksums(x, y): x = ranked[:n1] y = ranked[n1:] s = sum(x) - expected = n1*(n1+n2+1) / 2.0 - z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0) - prob = 2*(1.0 - zprob(abs(z))) + expected = n1 * (n1 + n2 + 1) / 2.0 + z = (s - expected) / math.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.0) + prob = 2 * (1.0 - zprob(abs(z))) return z, prob def awilcoxont(x, y): """ - Calculates the Wilcoxon T-test for related samples and returns the - result. A non-parametric T-test. + Calculates the Wilcoxon T-test for related samples and returns the + result. A non-parametric T-test. - Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions - Returns: t-statistic, two-tailed p-value - """ + Usage: awilcoxont(x,y) where x,y are equal-length arrays for 2 conditions + Returns: t-statistic, two-tailed p-value + """ if len(x) != len(y): - raise ValueError('Unequal N in awilcoxont. Aborting.') - d = x-y + raise ValueError("Unequal N in awilcoxont. Aborting.") + d = x - y d = N.compress(N.not_equal(d, 0), d) # Keep all non-zero differences count = len(d) absd = abs(d) @@ -3351,26 +3555,26 @@ def awilcoxont(x, y): else: r_plus = r_plus + absranked[i] wt = min(r_plus, r_minus) - mn = count * (count+1) * 0.25 - se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0) - z = math.fabs(wt-mn) / se - z = math.fabs(wt-mn) / se - prob = 2*(1.0 - zprob(abs(z))) + mn = count * (count + 1) * 0.25 + se = math.sqrt(count * (count + 1) * (2.0 * count + 1.0) / 24.0) + z = math.fabs(wt - mn) / se + z = math.fabs(wt - mn) / se + prob = 2 * (1.0 - zprob(abs(z))) return wt, prob def akruskalwallish(*args): """ - The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more - groups, requiring at least 5 subjects in each group. This function - calculates the Kruskal-Wallis H and associated p-value for 3 or more - independent samples. + The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more + groups, requiring at least 5 subjects in each group. This function + calculates the Kruskal-Wallis H and associated p-value for 3 or more + independent samples. - Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions - Returns: H-statistic (corrected for ties), associated p-value - """ + Usage: akruskalwallish(*args) args are separate arrays for 3+ conditions + Returns: H-statistic (corrected for ties), associated p-value + """ assert len(args) == 3, "Need at least 3 groups in stats.akruskalwallish()" args = list(args) - n = [0]*len(args) + n = [0] * len(args) n = [len(_) for _ in args] all = [] for i in range(len(args)): @@ -3378,55 +3582,55 @@ def akruskalwallish(*args): ranked = rankdata(all) T = tiecorrect(ranked) for i in range(len(args)): - args[i] = ranked[0:n[i]] - del ranked[0:n[i]] + args[i] = ranked[0 : n[i]] + del ranked[0 : n[i]] rsums = [] for i in range(len(args)): - rsums.append(sum(args[i])**2) + rsums.append(sum(args[i]) ** 2) rsums[i] = rsums[i] / float(n[i]) ssbn = sum(rsums) totaln = sum(n) - h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1) + h = 12.0 / (totaln * (totaln + 1)) * ssbn - 3 * (totaln + 1) df = len(args) - 1 if T == 0: - raise ValueError('All numbers are identical in akruskalwallish') + raise ValueError("All numbers are identical in akruskalwallish") h = h / float(T) return h, chisqprob(h, df) def afriedmanchisquare(*args): """ - Friedman Chi-Square is a non-parametric, one-way within-subjects - ANOVA. This function calculates the Friedman Chi-square test for - repeated measures and returns the result, along with the associated - probability value. It assumes 3 or more repeated measures. Only 3 - levels requires a minimum of 10 subjects in the study. Four levels - requires 5 subjects per level(??). - - Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions - Returns: chi-square statistic, associated p-value - """ + Friedman Chi-Square is a non-parametric, one-way within-subjects + ANOVA. This function calculates the Friedman Chi-square test for + repeated measures and returns the result, along with the associated + probability value. It assumes 3 or more repeated measures. Only 3 + levels requires a minimum of 10 subjects in the study. Four levels + requires 5 subjects per level(??). + + Usage: afriedmanchisquare(*args) args are separate arrays for 2+ conditions + Returns: chi-square statistic, associated p-value + """ k = len(args) if k < 3: - raise ValueError('\nLess than 3 levels. Friedman test not appropriate.\n') + raise ValueError("\nLess than 3 levels. Friedman test not appropriate.\n") n = len(args[0]) data = pstat.aabut(*args) data = data.astype(N.Float) for i in range(len(data)): data[i] = arankdata(data[i]) - ssbn = asum(asum(args, 1)**2) - chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1) - return chisq, chisqprob(chisq, k-1) + ssbn = asum(asum(args, 1) ** 2) + chisq = 12.0 / (k * n * (k + 1)) * ssbn - 3 * n * (k + 1) + return chisq, chisqprob(chisq, k - 1) -# APROBABILITY CALCULATIONS + # APROBABILITY CALCULATIONS def achisqprob(chisq, df): """ - Returns the (1-tail) probability value associated with the provided chi-square - value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can - handle multiple dimensions. + Returns the (1-tail) probability value associated with the provided chi-square + value and df. Heavily modified from chisq.c in Gary Perlman's |Stat. Can + handle multiple dimensions. - Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom - """ + Usage: achisqprob(chisq,df) chisq=chisquare stat., df=degrees of freedom + """ BIG = 200.0 def ex(x): @@ -3445,13 +3649,13 @@ def ex(x): y = ex(-a) if df % 2 == 0: even = 1 - s = y*1 - s2 = s*1 + s = y * 1 + s2 = s * 1 else: even = 0 s = 2.0 * azprob(-N.sqrt(chisq)) - s2 = s*1 - if (df > 2): + s2 = s * 1 + if df > 2: chisq = 0.5 * (df - 1.0) if even: z = N.ones(probs.shape, N.Float) @@ -3468,12 +3672,11 @@ def ex(x): totalelements = N.multiply.reduce(N.array(probs.shape)) while asum(mask) != totalelements: e = N.log(z) + e - s = s + ex(c*z-a-e) + s = s + ex(c * z - a - e) z = z + 1.0 -# print z, e, s newmask = N.greater(z, chisq) - a_big_frozen = N.where(newmask*N.equal(mask, 0)*a_big, s, a_big_frozen) - mask = N.clip(newmask+mask, 0, 1) + a_big_frozen = N.where(newmask * N.equal(mask, 0) * a_big, s, a_big_frozen) + mask = N.clip(newmask + mask, 0, 1) if even: z = N.ones(probs.shape, N.Float) e = N.ones(probs.shape, N.Float) @@ -3484,104 +3687,179 @@ def ex(x): mask = N.zeros(probs.shape) a_notbig_frozen = -1 * N.ones(probs.shape, N.Float) while asum(mask) != totalelements: - e = e * (a/z.astype(N.Float)) + e = e * (a / z.astype(N.Float)) c = c + e z = z + 1.0 -# print '#2', z, e, c, s, c*y+s2 newmask = N.greater(z, chisq) - a_notbig_frozen = N.where(newmask*N.equal(mask, 0)*(1-a_big), - c*y+s2, a_notbig_frozen) - mask = N.clip(newmask+mask, 0, 1) - probs = N.where(N.equal(probs, 1), 1, - N.where(N.greater(a, BIG), a_big_frozen, a_notbig_frozen)) + a_notbig_frozen = N.where(newmask * N.equal(mask, 0) * (1 - a_big), c * y + s2, a_notbig_frozen) + mask = N.clip(newmask + mask, 0, 1) + probs = N.where(N.equal(probs, 1), 1, N.where(N.greater(a, BIG), a_big_frozen, a_notbig_frozen)) return probs else: return s def aerfcc(x): """ - Returns the complementary error function erfc(x) with fractional error - everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can - handle multiple dimensions. + Returns the complementary error function erfc(x) with fractional error + everywhere less than 1.2e-7. Adapted from Numerical Recipies. Can + handle multiple dimensions. - Usage: aerfcc(x) - """ + Usage: aerfcc(x) + """ z = abs(x) - t = 1.0 / (1.0+0.5*z) - ans = t * N.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277))))))))) - return N.where(N.greater_equal(x, 0), ans, 2.0-ans) + t = 1.0 / (1.0 + 0.5 * z) + ans = t * N.exp( + -z * z + - 1.26551223 + + t + * ( + 1.00002368 + + t + * ( + 0.37409196 + + t + * ( + 0.09678418 + + t + * ( + -0.18628806 + + t + * (0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277)))) + ) + ) + ) + ) + ) + return N.where(N.greater_equal(x, 0), ans, 2.0 - ans) def azprob(z): """ - Returns the area under the normal curve 'to the left of' the given z value. - Thus, - for z<0, zprob(z) = 1-tail probability - for z>0, 1.0-zprob(z) = 1-tail probability - for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability - Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions. + Returns the area under the normal curve 'to the left of' the given z value. + Thus, + for z<0, zprob(z) = 1-tail probability + for z>0, 1.0-zprob(z) = 1-tail probability + for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability + Adapted from z.c in Gary Perlman's |Stat. Can handle multiple dimensions. + + Usage: azprob(z) where z is a z-value + """ - Usage: azprob(z) where z is a z-value - """ def yfunc(y): - x = (((((((((((((-0.000045255659 * y - + 0.000152529290) * y - 0.000019538132) * y - - 0.000676904986) * y + 0.001390604284) * y - - 0.000794620820) * y - 0.002034254874) * y - + 0.006549791214) * y - 0.010557625006) * y - + 0.011630447319) * y - 0.009279453341) * y - + 0.005353579108) * y - 0.002141268741) * y - + 0.000535310849) * y + 0.999936657524 + x = ( + ( + ( + ( + ( + ( + ( + ( + ( + ( + ( + ((-0.000045255659 * y + 0.000152529290) * y - 0.000019538132) + * y + - 0.000676904986 + ) + * y + + 0.001390604284 + ) + * y + - 0.000794620820 + ) + * y + - 0.002034254874 + ) + * y + + 0.006549791214 + ) + * y + - 0.010557625006 + ) + * y + + 0.011630447319 + ) + * y + - 0.009279453341 + ) + * y + + 0.005353579108 + ) + * y + - 0.002141268741 + ) + * y + + 0.000535310849 + ) * y + 0.999936657524 return x def wfunc(w): - x = ((((((((0.000124818987 * w - - 0.001075204047) * w + 0.005198775019) * w - - 0.019198292004) * w + 0.059054035642) * w - - 0.151968751364) * w + 0.319152932694) * w - - 0.531923007300) * w + 0.797884560593) * N.sqrt(w) * 2.0 + x = ( + ( + ( + ( + ( + ( + (((0.000124818987 * w - 0.001075204047) * w + 0.005198775019) * w - 0.019198292004) + * w + + 0.059054035642 + ) + * w + - 0.151968751364 + ) + * w + + 0.319152932694 + ) + * w + - 0.531923007300 + ) + * w + + 0.797884560593 + ) + * N.sqrt(w) + * 2.0 + ) return x - Z_MAX = 6.0 # maximum meaningful z-value + Z_MAX = 6.0 # maximum meaningful z-value x = N.zeros(z.shape, N.Float) # initialize y = 0.5 * N.fabs(z) - x = N.where(N.less(y, 1.0), wfunc(y*y), yfunc(y-2.0)) # get x's - x = N.where(N.greater(y, Z_MAX*0.5), 1.0, x) # kill those with big Z - prob = N.where(N.greater(z, 0), (x+1)*0.5, (1-x)*0.5) + x = N.where(N.less(y, 1.0), wfunc(y * y), yfunc(y - 2.0)) # get x's + x = N.where(N.greater(y, Z_MAX * 0.5), 1.0, x) # kill those with big Z + prob = N.where(N.greater(z, 0), (x + 1) * 0.5, (1 - x) * 0.5) return prob def aksprob(alam): """ - Returns the probability value for a K-S statistic computed via ks_2samp. - Adapted from Numerical Recipies. Can handle multiple dimensions. + Returns the probability value for a K-S statistic computed via ks_2samp. + Adapted from Numerical Recipies. Can handle multiple dimensions. - Usage: aksprob(alam) - """ + Usage: aksprob(alam) + """ if isinstance(alam, N.ArrayType): frozen = -1 * N.ones(alam.shape, N.Float64) alam = alam.astype(N.Float64) arrayflag = 1 else: - frozen = N.array(-1.) + frozen = N.array(-1.0) alam = N.array(alam, N.Float64) mask = N.zeros(alam.shape) fac = 2.0 * N.ones(alam.shape, N.Float) sum = N.zeros(alam.shape, N.Float) termbf = N.zeros(alam.shape, N.Float) - a2 = N.array(-2.0*alam*alam, N.Float64) + a2 = N.array(-2.0 * alam * alam, N.Float64) totalelements = N.multiply.reduce(N.array(mask.shape)) for j in range(1, 201): if asum(mask) == totalelements: break - exponents = (a2*j*j) + exponents = a2 * j * j overflowmask = N.less(exponents, -746) frozen = N.where(overflowmask, 0, frozen) - mask = mask+overflowmask - term = fac*N.exp(exponents) + mask = mask + overflowmask + term = fac * N.exp(exponents) sum = sum + term - newmask = N.where(N.less_equal(abs(term), (0.001*termbf)) - + N.less(abs(term), 1.0e-8*sum), 1, 0) - frozen = N.where(newmask*N.equal(mask, 0), sum, frozen) - mask = N.clip(mask+newmask, 0, 1) + newmask = N.where(N.less_equal(abs(term), (0.001 * termbf)) + N.less(abs(term), 1.0e-8 * sum), 1, 0) + frozen = N.where(newmask * N.equal(mask, 0), sum, frozen) + mask = N.clip(mask + newmask, 0, 1) fac = -fac termbf = abs(term) if arrayflag: @@ -3591,25 +3869,25 @@ def aksprob(alam): def afprob(dfnum, dfden, F): """ - Returns the 1-tailed significance level (p-value) of an F statistic - given the degrees of freedom for the numerator (dfR-dfF) and the degrees - of freedom for the denominator (dfF). Can handle multiple dims for F. + Returns the 1-tailed significance level (p-value) of an F statistic + given the degrees of freedom for the numerator (dfR-dfF) and the degrees + of freedom for the denominator (dfF). Can handle multiple dims for F. - Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn - """ + Usage: afprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn + """ if isinstance(F, N.ArrayType): - return abetai(0.5*dfden, 0.5*dfnum, dfden/(1.0*dfden+dfnum*F)) + return abetai(0.5 * dfden, 0.5 * dfnum, dfden / (1.0 * dfden + dfnum * F)) else: - return abetai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F)) + return abetai(0.5 * dfden, 0.5 * dfnum, dfden / float(dfden + dfnum * F)) def abetacf(a, b, x, verbose=1): """ - Evaluates the continued fraction form of the incomplete Beta function, - betai. (Adapted from: Numerical Recipies in C.) Can handle multiple - dimensions for x. + Evaluates the continued fraction form of the incomplete Beta function, + betai. (Adapted from: Numerical Recipies in C.) Can handle multiple + dimensions for x. - Usage: abetacf(a,b,x,verbose=1) - """ + Usage: abetacf(a,b,x,verbose=1) + """ ITMAX = 200 EPS = 3.0e-7 @@ -3622,32 +3900,32 @@ def abetacf(a, b, x, verbose=1): x = N.array([x]) mask = N.zeros(x.shape) bm = az = am = 1.0 - qab = a+b - qap = a+1.0 - qam = a-1.0 - bz = 1.0-qab*x/qap - for i in range(ITMAX+1): + qab = a + b + qap = a + 1.0 + qam = a - 1.0 + bz = 1.0 - qab * x / qap + for i in range(ITMAX + 1): if N.sum(N.ravel(N.equal(frozen, -1))) == 0: break - em = float(i+1) + em = float(i + 1) tem = em + em - d = em*(b-em)*x/((qam+tem)*(a+tem)) - ap = az + d*am - bp = bz+d*bm - d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem)) - app = ap+d*az - bpp = bp+d*bz - aold = az*1 - am = ap/bpp - bm = bp/bpp - az = app/bpp + d = em * (b - em) * x / ((qam + tem) * (a + tem)) + ap = az + d * am + bp = bz + d * bm + d = -(a + em) * (qab + em) * x / ((qap + tem) * (a + tem)) + app = ap + d * az + bpp = bp + d * bz + aold = az * 1 + am = ap / bpp + bm = bp / bpp + az = app / bpp bz = 1.0 - newmask = N.less(abs(az-aold), EPS*abs(az)) - frozen = N.where(newmask*N.equal(mask, 0), az, frozen) - mask = N.clip(mask+newmask, 0, 1) + newmask = N.less(abs(az - aold), EPS * abs(az)) + frozen = N.where(newmask * N.equal(mask, 0), az, frozen) + mask = N.clip(mask + newmask, 0, 1) noconverge = asum(N.equal(frozen, -1)) if noconverge != 0 and verbose: - print('a or b too big, or ITMAX too small in Betacf for ', noconverge, ' elements') + print("a or b too big, or ITMAX too small in Betacf for ", noconverge, " elements") if arrayflag: return frozen else: @@ -3655,23 +3933,22 @@ def abetacf(a, b, x, verbose=1): def agammln(xx): """ - Returns the gamma function of xx. - Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. - Adapted from: Numerical Recipies in C. Can handle multiple dims ... but - probably doesn't normally have to. + Returns the gamma function of xx. + Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt. + Adapted from: Numerical Recipies in C. Can handle multiple dims ... but + probably doesn't normally have to. - Usage: agammln(xx) - """ - coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, - 0.120858003e-2, -0.536382e-5] + Usage: agammln(xx) + """ + coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516, 0.120858003e-2, -0.536382e-5] x = xx - 1.0 tmp = x + 5.5 - tmp = tmp - (x+0.5)*N.log(tmp) + tmp = tmp - (x + 0.5) * N.log(tmp) ser = 1.0 for j in range(len(coeff)): x = x + 1 - ser = ser + coeff[j]/x - return -tmp + N.log(2.50662827465*ser) + ser = ser + coeff[j] / x + return -tmp + N.log(2.50662827465 * ser) def abetai(a, b, x, verbose=1): """ @@ -3688,43 +3965,46 @@ def abetai(a, b, x, verbose=1): """ TINY = 1e-15 if isinstance(a, N.ArrayType): - if asum(N.less(x, 0)+N.greater(x, 1)) != 0: - raise ValueError('Bad x in abetai') + if asum(N.less(x, 0) + N.greater(x, 1)) != 0: + raise ValueError("Bad x in abetai") x = N.where(N.equal(x, 0), TINY, x) - x = N.where(N.equal(x, 1.0), 1-TINY, x) + x = N.where(N.equal(x, 1.0), 1 - TINY, x) - bt = N.where(N.equal(x, 0)+N.equal(x, 1), 0, -1) - exponents = (gammln(a+b)-gammln(a)-gammln(b)+a*N.log(x)+b * N.log(1.0-x)) + bt = N.where(N.equal(x, 0) + N.equal(x, 1), 0, -1) + exponents = gammln(a + b) - gammln(a) - gammln(b) + a * N.log(x) + b * N.log(1.0 - x) # 746 (below) is the MAX POSSIBLE BEFORE OVERFLOW exponents = N.where(N.less(exponents, -740), -740, exponents) bt = N.exp(exponents) if isinstance(x, N.ArrayType): - ans = N.where(N.less(x, (a+1)/(a+b+2.0)), - bt*abetacf(a, b, x, verbose)/float(a), - 1.0-bt*abetacf(b, a, 1.0-x, verbose)/float(b)) + ans = N.where( + N.less(x, (a + 1) / (a + b + 2.0)), + bt * abetacf(a, b, x, verbose) / float(a), + 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b), + ) else: - if x < (a+1)/(a+b+2.0): - ans = bt*abetacf(a, b, x, verbose)/float(a) + if x < (a + 1) / (a + b + 2.0): + ans = bt * abetacf(a, b, x, verbose) / float(a) else: - ans = 1.0-bt*abetacf(b, a, 1.0-x, verbose)/float(b) + ans = 1.0 - bt * abetacf(b, a, 1.0 - x, verbose) / float(b) return ans -# AANOVA CALCULATIONS + # AANOVA CALCULATIONS import LinearAlgebra + LA = LinearAlgebra def aglm(data, para): """ - Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken - from: - Peterson et al. Statistical limitations in functional neuroimaging - I. Non-inferential methods and statistical models. Phil Trans Royal Soc - Lond B 354: 1239-1260. - - Usage: aglm(data,para) - Returns: statistic, p-value ??? - """ + Calculates a linear model fit ... anova/ancova/lin-regress/t-test/etc. Taken + from: + Peterson et al. Statistical limitations in functional neuroimaging + I. Non-inferential methods and statistical models. Phil Trans Royal Soc + Lond B 354: 1239-1260. + + Usage: aglm(data,para) + Returns: statistic, p-value ??? + """ if len(para) != len(data): print("data and para must be same length in aglm") return @@ -3733,18 +4013,16 @@ def aglm(data, para): x = N.zeros((n, len(p))) # design matrix for l in range(len(p)): x[:, l] = N.equal(para, p[l]) - b = N.dot(N.dot(LA.inverse(N.dot(N.transpose(x), x)), # i.e., b=inv(X'X)X'Y - N.transpose(x)), - data) - diffs = (data - N.dot(x, b)) - s_sq = 1./(n-len(p)) * N.dot(N.transpose(diffs), diffs) + b = N.dot(N.dot(LA.inverse(N.dot(N.transpose(x), x)), N.transpose(x)), data) # i.e., b=inv(X'X)X'Y + diffs = data - N.dot(x, b) + s_sq = 1.0 / (n - len(p)) * N.dot(N.transpose(diffs), diffs) if len(p) == 2: # ttest_ind c = N.array([1, -1]) - df = n-2 - fact = asum(1.0/asum(x, 0)) # i.e., 1/n1 + 1/n2 + 1/n3 ... - t = N.dot(c, b) / N.sqrt(s_sq*fact) - probs = abetai(0.5*df, 0.5, float(df)/(df+t*t)) + df = n - 2 + fact = asum(1.0 / asum(x, 0)) # i.e., 1/n1 + 1/n2 + 1/n3 ... + t = N.dot(c, b) / N.sqrt(s_sq * fact) + probs = abetai(0.5 * df, 0.5, float(df) / (df + t * t)) return t, probs def aF_oneway(*args): @@ -3760,29 +4038,29 @@ def aF_oneway(*args): alldata = [] alldata = N.concatenate(args) bign = len(alldata) - sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign)) + sstot = ass(alldata) - (asquare_of_sums(alldata) / float(bign)) ssbn = 0 for a in args: - ssbn = ssbn + asquare_of_sums(N.array(a))/float(len(a)) - ssbn = ssbn - (asquare_of_sums(alldata)/float(bign)) - sswn = sstot-ssbn - dfbn = na-1 + ssbn = ssbn + asquare_of_sums(N.array(a)) / float(len(a)) + ssbn = ssbn - (asquare_of_sums(alldata) / float(bign)) + sswn = sstot - ssbn + dfbn = na - 1 dfwn = bign - na - msb = ssbn/float(dfbn) - msw = sswn/float(dfwn) - f = msb/msw + msb = ssbn / float(dfbn) + msw = sswn / float(dfwn) + f = msb / msw prob = fprob(dfbn, dfwn, f) return f, prob def aF_value(ER, EF, dfR, dfF): """ - Returns an F-statistic given the following: - ER = error associated with the null hypothesis (the Restricted model) - EF = error associated with the alternate hypothesis (the Full model) - dfR = degrees of freedom the Restricted model - dfF = degrees of freedom associated with the Restricted model - """ - return ((ER-EF)/float(dfR-dfF) / (EF/float(dfF))) + Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR = degrees of freedom the Restricted model + dfF = degrees of freedom associated with the Restricted model + """ + return (ER - EF) / float(dfR - dfF) / (EF / float(dfF)) def outputfstats(Enum, Eden, dfnum, dfden, f, prob): Enum = round(Enum, 3) @@ -3791,28 +4069,30 @@ def outputfstats(Enum, Eden, dfnum, dfden, f, prob): dfden = round(dfden, 3) f = round(f, 3) prob = round(prob, 3) - suffix = '' # for *s after the p-value + suffix = "" # for *s after the p-value if prob < 0.001: - suffix = ' ***' + suffix = " ***" elif prob < 0.01: - suffix = ' **' + suffix = " **" elif prob < 0.05: - suffix = ' *' - title = [['EF/ER', 'DF', 'Mean Square', 'F-value', 'prob', '']] - lofl = title+[[Enum, dfnum, round(Enum/float(dfnum), 3), f, prob, suffix], - [Eden, dfden, round(Eden/float(dfden), 3), '', '', '']] + suffix = " *" + title = [["EF/ER", "DF", "Mean Square", "F-value", "prob", ""]] + lofl = title + [ + [Enum, dfnum, round(Enum / float(dfnum), 3), f, prob, suffix], + [Eden, dfden, round(Eden / float(dfden), 3), "", "", ""], + ] pstat.printcc(lofl) return def F_value_multivariate(ER, EF, dfnum, dfden): """ - Returns an F-statistic given the following: - ER = error associated with the null hypothesis (the Restricted model) - EF = error associated with the alternate hypothesis (the Full model) - dfR = degrees of freedom the Restricted model - dfF = degrees of freedom associated with the Restricted model - where ER and EF are matrices from a multivariate F calculation. - """ + Returns an F-statistic given the following: + ER = error associated with the null hypothesis (the Restricted model) + EF = error associated with the alternate hypothesis (the Full model) + dfR = degrees of freedom the Restricted model + dfF = degrees of freedom associated with the Restricted model + where ER and EF are matrices from a multivariate F calculation. + """ if type(ER) in [int, float]: ER = N.array([[ER]]) if type(EF) in [int, float]: @@ -3821,33 +4101,33 @@ def F_value_multivariate(ER, EF, dfnum, dfden): d_en = LA.determinant(EF) / float(dfden) return n_um / d_en -# ASUPPORT FUNCTIONS + # ASUPPORT FUNCTIONS def asign(a): """ - Usage: asign(a) - Returns: array shape of a, with -1 where a<0 and +1 where a>=0 - """ + Usage: asign(a) + Returns: array shape of a, with -1 where a<0 and +1 where a>=0 + """ a = N.asarray(a) - if ((isinstance(a, float)) or (isinstance(a, int))): - return a-a-N.less(a, 0)+N.greater(a, 0) + if (isinstance(a, float)) or (isinstance(a, int)): + return a - a - N.less(a, 0) + N.greater(a, 0) else: - return N.zeros(N.shape(a))-N.less(a, 0)+N.greater(a, 0) + return N.zeros(N.shape(a)) - N.less(a, 0) + N.greater(a, 0) def asum(a, dimension=None, keepdims=0): """ - An alternative to the Numeric.add.reduce function, which allows one to - (1) collapse over multiple dimensions at once, and/or (2) to retain - all dimensions in the original array (squashing one down to size. - Dimension can equal None (ravel array first), an integer (the - dimension over which to operate), or a sequence (operate over multiple - dimensions). If keepdims=1, the resulting array will have as many - dimensions as the input array. - - Usage: asum(a, dimension=None, keepdims=0) - Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1 - """ - if isinstance(a, N.ArrayType) and a.typecode() in ['l', 's', 'b']: + An alternative to the Numeric.add.reduce function, which allows one to + (1) collapse over multiple dimensions at once, and/or (2) to retain + all dimensions in the original array (squashing one down to size. + Dimension can equal None (ravel array first), an integer (the + dimension over which to operate), or a sequence (operate over multiple + dimensions). If keepdims=1, the resulting array will have as many + dimensions as the input array. + + Usage: asum(a, dimension=None, keepdims=0) + Returns: array summed along 'dimension'(s), same _number_ of dims if keepdims=1 + """ + if isinstance(a, N.ArrayType) and a.typecode() in ["l", "s", "b"]: a = a.astype(N.Float) if dimension is None: s = N.sum(N.ravel(a)) @@ -3872,13 +4152,13 @@ def asum(a, dimension=None, keepdims=0): def acumsum(a, dimension=None): """ - Returns an array consisting of the cumulative sum of the items in the - passed array. Dimension can equal None (ravel array first), an - integer (the dimension over which to operate), or a sequence (operate - over multiple dimensions, but this last one just barely makes sense). + Returns an array consisting of the cumulative sum of the items in the + passed array. Dimension can equal None (ravel array first), an + integer (the dimension over which to operate), or a sequence (operate + over multiple dimensions, but this last one just barely makes sense). - Usage: acumsum(a,dimension=None) - """ + Usage: acumsum(a,dimension=None) + """ if dimension is None: a = N.ravel(a) dimension = 0 @@ -3893,106 +4173,106 @@ def acumsum(a, dimension=None): def ass(inarray, dimension=None, keepdims=0): """ - Squares each value in the passed array, adds these squares & returns - the result. Unfortunate function name. :-) Defaults to ALL values in - the array. Dimension can equal None (ravel array first), an integer - (the dimension over which to operate), or a sequence (operate over - multiple dimensions). Set keepdims=1 to maintain the original number - of dimensions. + Squares each value in the passed array, adds these squares & returns + the result. Unfortunate function name. :-) Defaults to ALL values in + the array. Dimension can equal None (ravel array first), an integer + (the dimension over which to operate), or a sequence (operate over + multiple dimensions). Set keepdims=1 to maintain the original number + of dimensions. - Usage: ass(inarray, dimension=None, keepdims=0) - Returns: sum-along-'dimension' for (inarray*inarray) - """ + Usage: ass(inarray, dimension=None, keepdims=0) + Returns: sum-along-'dimension' for (inarray*inarray) + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 - return asum(inarray*inarray, dimension, keepdims) + return asum(inarray * inarray, dimension, keepdims) def asummult(array1, array2, dimension=None, keepdims=0): """ - Multiplies elements in array1 and array2, element by element, and - returns the sum (along 'dimension') of all resulting multiplications. - Dimension can equal None (ravel array first), an integer (the - dimension over which to operate), or a sequence (operate over multiple - dimensions). A trivial function, but included for completeness. + Multiplies elements in array1 and array2, element by element, and + returns the sum (along 'dimension') of all resulting multiplications. + Dimension can equal None (ravel array first), an integer (the + dimension over which to operate), or a sequence (operate over multiple + dimensions). A trivial function, but included for completeness. - Usage: asummult(array1,array2,dimension=None,keepdims=0) - """ + Usage: asummult(array1,array2,dimension=None,keepdims=0) + """ if dimension is None: array1 = N.ravel(array1) array2 = N.ravel(array2) dimension = 0 - return asum(array1*array2, dimension, keepdims) + return asum(array1 * array2, dimension, keepdims) def asquare_of_sums(inarray, dimension=None, keepdims=0): """ - Adds the values in the passed array, squares that sum, and returns the - result. Dimension can equal None (ravel array first), an integer (the - dimension over which to operate), or a sequence (operate over multiple - dimensions). If keepdims=1, the returned array will have the same - NUMBER of dimensions as the original. + Adds the values in the passed array, squares that sum, and returns the + result. Dimension can equal None (ravel array first), an integer (the + dimension over which to operate), or a sequence (operate over multiple + dimensions). If keepdims=1, the returned array will have the same + NUMBER of dimensions as the original. - Usage: asquare_of_sums(inarray, dimension=None, keepdims=0) - Returns: the square of the sum over dim(s) in dimension - """ + Usage: asquare_of_sums(inarray, dimension=None, keepdims=0) + Returns: the square of the sum over dim(s) in dimension + """ if dimension is None: inarray = N.ravel(inarray) dimension = 0 s = asum(inarray, dimension, keepdims) if isinstance(s, N.ArrayType): - return s.astype(N.Float)*s + return s.astype(N.Float) * s else: - return float(s)*s + return float(s) * s def asumdiffsquared(a, b, dimension=None, keepdims=0): """ - Takes pairwise differences of the values in arrays a and b, squares - these differences, and returns the sum of these squares. Dimension - can equal None (ravel array first), an integer (the dimension over - which to operate), or a sequence (operate over multiple dimensions). - keepdims=1 means the return shape = len(a.shape) = len(b.shape) + Takes pairwise differences of the values in arrays a and b, squares + these differences, and returns the sum of these squares. Dimension + can equal None (ravel array first), an integer (the dimension over + which to operate), or a sequence (operate over multiple dimensions). + keepdims=1 means the return shape = len(a.shape) = len(b.shape) - Usage: asumdiffsquared(a,b) - Returns: sum[ravel(a-b)**2] - """ + Usage: asumdiffsquared(a,b) + Returns: sum[ravel(a-b)**2] + """ if dimension is None: N.ravel(a) # inarray dimension = 0 - return asum((a-b)**2, dimension, keepdims) + return asum((a - b) ** 2, dimension, keepdims) def ashellsort(inarray): """ - Shellsort algorithm. Sorts a 1D-array. + Shellsort algorithm. Sorts a 1D-array. - Usage: ashellsort(inarray) - Returns: sorted-inarray, sorting-index-vector (for original array) - """ + Usage: ashellsort(inarray) + Returns: sorted-inarray, sorting-index-vector (for original array) + """ n = len(inarray) svec = inarray * 1.0 ivec = list(range(n)) - gap = n/2 # integer division needed + gap = n / 2 # integer division needed while gap > 0: for i in range(gap, n): - for j in range(i-gap, -1, -gap): - while j >= 0 and svec[j] > svec[j+gap]: + for j in range(i - gap, -1, -gap): + while j >= 0 and svec[j] > svec[j + gap]: temp = svec[j] - svec[j] = svec[j+gap] - svec[j+gap] = temp + svec[j] = svec[j + gap] + svec[j + gap] = temp itemp = ivec[j] - ivec[j] = ivec[j+gap] - ivec[j+gap] = itemp + ivec[j] = ivec[j + gap] + ivec[j + gap] = itemp gap = gap / 2 # integer division needed -# svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]] + # svec is now sorted input vector, ivec has the order svec[i] = vec[ivec[i]] return svec, ivec def arankdata(inarray): """ - Ranks the data in inarray, dealing with ties appropritely. Assumes - a 1D inarray. Adapted from Gary Perlman's |Stat ranksort. + Ranks the data in inarray, dealing with ties appropritely. Assumes + a 1D inarray. Adapted from Gary Perlman's |Stat ranksort. - Usage: arankdata(inarray) - Returns: array of length equal to inarray, containing rank scores - """ + Usage: arankdata(inarray) + Returns: array of length equal to inarray, containing rank scores + """ n = len(inarray) svec, ivec = ashellsort(inarray) sumranks = 0 @@ -4001,9 +4281,9 @@ def arankdata(inarray): for i in range(n): sumranks = sumranks + i dupcount = dupcount + 1 - if i == n-1 or svec[i] != svec[i+1]: + if i == n - 1 or svec[i] != svec[i + 1]: averank = sumranks / float(dupcount) + 1 - for j in range(i-dupcount+1, i+1): + for j in range(i - dupcount + 1, i + 1): newarray[ivec[j]] = averank sumranks = 0 dupcount = 0 @@ -4011,23 +4291,23 @@ def arankdata(inarray): def afindwithin(data): """ - Returns a binary vector, 1=within-subject factor, 0=between. Input - equals the entire data array (i.e., column 1=random factor, last - column = measured values. + Returns a binary vector, 1=within-subject factor, 0=between. Input + equals the entire data array (i.e., column 1=random factor, last + column = measured values. - Usage: afindwithin(data) data in |Stat format - """ - numfact = len(data[0])-2 - withinvec = [0]*numfact - for col in range(1, numfact+1): + Usage: afindwithin(data) data in |Stat format + """ + numfact = len(data[0]) - 2 + withinvec = [0] * numfact + for col in range(1, numfact + 1): rows = pstat.linexand(data, col, pstat.unique(pstat.colex(data, 1))[0]) # get 1 level of this factor - if len(pstat.unique(pstat.colex(rows, 0))) < len(rows): # if fewer subjects than scores on this factor - withinvec[col-1] = 1 + if len(pstat.unique(pstat.colex(rows, 0))) < len(rows): # if fewer subjects than scores on this factor + withinvec[col - 1] = 1 return withinvec # RE-DEFINE DISPATCHES TO INCLUDE ARRAYS -# CENTRAL TENDENCY: + # CENTRAL TENDENCY: geometricmean = Dispatch((lgeometricmean, (list, tuple)), (ageometricmean, (N.ArrayType,))) harmonicmean = Dispatch((lharmonicmean, (list, tuple)), (aharmonicmean, (N.ArrayType,))) mean = Dispatch((lmean, (list, tuple)), (amean, (N.ArrayType,))) @@ -4039,20 +4319,20 @@ def afindwithin(data): tstdev = Dispatch((atstdev, (N.ArrayType,))) tsem = Dispatch((atsem, (N.ArrayType,))) -# VARIATION: + # VARIATION: moment = Dispatch((lmoment, (list, tuple)), (amoment, (N.ArrayType,))) variation = Dispatch((lvariation, (list, tuple)), (avariation, (N.ArrayType,))) skew = Dispatch((lskew, (list, tuple)), (askew, (N.ArrayType,))) kurtosis = Dispatch((lkurtosis, (list, tuple)), (akurtosis, (N.ArrayType,))) describe = Dispatch((ldescribe, (list, tuple)), (adescribe, (N.ArrayType,))) -# DISTRIBUTION TESTS + # DISTRIBUTION TESTS skewtest = Dispatch((askewtest, (list, tuple)), (askewtest, (N.ArrayType,))) kurtosistest = Dispatch((akurtosistest, (list, tuple)), (akurtosistest, (N.ArrayType,))) normaltest = Dispatch((anormaltest, (list, tuple)), (anormaltest, (N.ArrayType,))) -# FREQUENCY STATS: + # FREQUENCY STATS: itemfreq = Dispatch((litemfreq, (list, tuple)), (aitemfreq, (N.ArrayType,))) scoreatpercentile = Dispatch((lscoreatpercentile, (list, tuple)), (ascoreatpercentile, (N.ArrayType,))) percentileofscore = Dispatch((lpercentileofscore, (list, tuple)), (apercentileofscore, (N.ArrayType,))) @@ -4060,11 +4340,13 @@ def afindwithin(data): cumfreq = Dispatch((lcumfreq, (list, tuple)), (acumfreq, (N.ArrayType,))) relfreq = Dispatch((lrelfreq, (list, tuple)), (arelfreq, (N.ArrayType,))) -# VARIABILITY: + # VARIABILITY: obrientransform = Dispatch((lobrientransform, (list, tuple)), (aobrientransform, (N.ArrayType,))) samplevar = Dispatch((lsamplevar, (list, tuple)), (asamplevar, (N.ArrayType,))) samplestdev = Dispatch((lsamplestdev, (list, tuple)), (asamplestdev, (N.ArrayType,))) - signaltonoise = Dispatch((asignaltonoise, (N.ArrayType,)),) + signaltonoise = Dispatch( + (asignaltonoise, (N.ArrayType,)), + ) var = Dispatch((lvar, (list, tuple)), (avar, (N.ArrayType,))) stdev = Dispatch((lstdev, (list, tuple)), (astdev, (N.ArrayType,))) sterr = Dispatch((lsterr, (list, tuple)), (asterr, (N.ArrayType,))) @@ -4072,12 +4354,14 @@ def afindwithin(data): z = Dispatch((lz, (list, tuple)), (az, (N.ArrayType,))) zs = Dispatch((lzs, (list, tuple)), (azs, (N.ArrayType,))) -# TRIMMING FCNS: - threshold = Dispatch((athreshold, (N.ArrayType,)),) + # TRIMMING FCNS: + threshold = Dispatch( + (athreshold, (N.ArrayType,)), + ) trimboth = Dispatch((ltrimboth, (list, tuple)), (atrimboth, (N.ArrayType,))) trim1 = Dispatch((ltrim1, (list, tuple)), (atrim1, (N.ArrayType,))) -# CORRELATION FCNS: + # CORRELATION FCNS: paired = Dispatch((lpaired, (list, tuple)), (apaired, (N.ArrayType,))) pearsonr = Dispatch((lpearsonr, (list, tuple)), (apearsonr, (N.ArrayType,))) spearmanr = Dispatch((lspearmanr, (list, tuple)), (aspearmanr, (N.ArrayType,))) @@ -4085,7 +4369,7 @@ def afindwithin(data): kendalltau = Dispatch((lkendalltau, (list, tuple)), (akendalltau, (N.ArrayType,))) linregress = Dispatch((llinregress, (list, tuple)), (alinregress, (N.ArrayType,))) -# INFERENTIAL STATS: + # INFERENTIAL STATS: ttest_1samp = Dispatch((lttest_1samp, (list, tuple)), (attest_1samp, (N.ArrayType,))) ttest_ind = Dispatch((lttest_ind, (list, tuple)), (attest_ind, (N.ArrayType,))) ttest_rel = Dispatch((lttest_rel, (list, tuple)), (attest_rel, (N.ArrayType,))) @@ -4098,7 +4382,7 @@ def afindwithin(data): kruskalwallish = Dispatch((lkruskalwallish, (list, tuple)), (akruskalwallish, (N.ArrayType,))) friedmanchisquare = Dispatch((lfriedmanchisquare, (list, tuple)), (afriedmanchisquare, (N.ArrayType,))) -# PROBABILITY CALCS: + # PROBABILITY CALCS: chisqprob = Dispatch((lchisqprob, (int, float)), (achisqprob, (N.ArrayType,))) zprob = Dispatch((lzprob, (int, float)), (azprob, (N.ArrayType,))) ksprob = Dispatch((lksprob, (int, float)), (aksprob, (N.ArrayType,))) @@ -4108,12 +4392,14 @@ def afindwithin(data): erfcc = Dispatch((lerfcc, (int, float)), (aerfcc, (N.ArrayType,))) gammln = Dispatch((lgammln, (int, float)), (agammln, (N.ArrayType,))) -# ANOVA FUNCTIONS: + # ANOVA FUNCTIONS: F_oneway = Dispatch((lF_oneway, (list, tuple)), (aF_oneway, (N.ArrayType,))) F_value = Dispatch((lF_value, (list, tuple)), (aF_value, (N.ArrayType,))) -# SUPPORT FUNCTIONS: - incr = Dispatch((lincr, (list, tuple, N.ArrayType)), ) + # SUPPORT FUNCTIONS: + incr = Dispatch( + (lincr, (list, tuple, N.ArrayType)), + ) sum = Dispatch((lsum, (list, tuple)), (asum, (N.ArrayType,))) cumsum = Dispatch((lcumsum, (list, tuple)), (acumsum, (N.ArrayType,))) ss = Dispatch((lss, (list, tuple)), (ass, (N.ArrayType,))) diff --git a/lib/psyco_full.py b/lib/psyco_full.py index ece4c855..961e6376 100644 --- a/lib/psyco_full.py +++ b/lib/psyco_full.py @@ -4,6 +4,7 @@ try: import psyco + psyco.full() except Exception: pass diff --git a/pyproject.toml b/pyproject.toml index 68103df0..69747c83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,11 @@ requires = ["cython", "oldest-supported-numpy", "setuptools", "wheel"] build-backend = "setuptools.build_meta" +[tool.black] +include = '\.pyi?$' +line-length = 120 +target-version = ['py37'] + [tool.cibuildwheel] test-command = "python -c 'import bx, bx.align, bx.align.sitemask, bx.align.tools, bx.arrays, bx.bbi, bx.cookbook, bx.intervals, bx.intervals.operations, bx.intseq, bx.misc, bx.motif, bx.motif.io, bx.motif.logo, bx.phylo, bx.pwm, bx.seq, bx.tabular, bx_extras'" @@ -30,3 +35,6 @@ include_dirs = $(brew --prefix openblas)/include runtime_library_dirs = $(brew --prefix openblas)/lib EOF """ + +[tool.darker] +isort = true diff --git a/script_tests/base/__init__.py b/script_tests/base/__init__.py index cf464812..ad42a6af 100644 --- a/script_tests/base/__init__.py +++ b/script_tests/base/__init__.py @@ -36,6 +36,7 @@ class BaseScriptTest: """ Helper class for testing a command line tool """ + def test_script(self): # Accumulate parameters input_files = dict() @@ -43,19 +44,19 @@ def test_script(self): out_dir = None stdin = stdout = stderr = None for key in dir(self): - if key == 'command_line': + if key == "command_line": command_line = getattr(self, key) - elif key.startswith('input_'): + elif key.startswith("input_"): value = getattr(self, key) assert isinstance(value, TestFile) arg_name = key[6:] input_files[arg_name] = value - elif key.startswith('output_'): + elif key.startswith("output_"): value = getattr(self, key) assert isinstance(value, TestFile) arg_name = key[7:] output_files[arg_name] = value - elif key == 'out_dir': + elif key == "out_dir": out_dir = getattr(self, key) assert os.path.isdir(out_dir) # Build the command line @@ -65,21 +66,21 @@ def test_script(self): for key, value in input_files.items(): input_fnames[key] = value.filename all_fnames[key] = input_fnames[key] - if key == 'stdin': + if key == "stdin": stdin = open(input_fnames[key]) for key in output_files.keys(): _, tf_name = tempfile.mkstemp() output_fnames[key] = tf_name all_fnames[key] = output_fnames[key] - if key == 'stdout': - stdout = open(output_fnames[key], 'w') + if key == "stdout": + stdout = open(output_fnames[key], "w") stdout.flush() - if key == 'stderr': - stderr = open(output_fnames[key], 'w') + if key == "stderr": + stderr = open(output_fnames[key], "w") stdout.flush() if out_dir is not None: temp_out_dir = tempfile.mkdtemp() - all_fnames['out_dir'] = temp_out_dir + all_fnames["out_dir"] = temp_out_dir for root, _, files in os.walk(out_dir): for file in files: output_files[os.path.join(root, file)] = TestFile(filename=os.path.join(root, file)) @@ -87,10 +88,10 @@ def test_script(self): real_command = string.Template(command_line).substitute(all_fnames) # Augment PYTHONPATH, bit of a HACK here! need to suck this data from setuptools or something? env = dict(os.environ) - if 'PYTHONPATH' in env: - env['PYTHONPATH'] = "./lib:" + env['PYTHONPATH'] + if "PYTHONPATH" in env: + env["PYTHONPATH"] = "./lib:" + env["PYTHONPATH"] else: - env['PYTHONPATH'] = "./lib" + env["PYTHONPATH"] = "./lib" # Run the command subprocess.check_call(real_command, stdin=stdin, stdout=stdout, stderr=stderr, shell=True, env=env) # Check the outputs diff --git a/script_tests/bnMapper_tests.py b/script_tests/bnMapper_tests.py index 039529bb..9a8d8c4f 100644 --- a/script_tests/bnMapper_tests.py +++ b/script_tests/bnMapper_tests.py @@ -35,7 +35,9 @@ class Test6(base.BaseScriptTest, unittest.TestCase): class Test7(base.BaseScriptTest, unittest.TestCase): - command_line = "./scripts/bnMapper.py ./test_data/epo_tests/hg19_one_peak.bed ./test_data/epo_tests/hg19.mm9.rBest.chain.gz" + command_line = ( + "./scripts/bnMapper.py ./test_data/epo_tests/hg19_one_peak.bed ./test_data/epo_tests/hg19.mm9.rBest.chain.gz" + ) output_stdout = base.TestFile(filename="./test_data/epo_tests/hg19_one_peak.mapped.bed") diff --git a/script_tests/line_select_tests.py b/script_tests/line_select_tests.py index 3777a0ae..91329d38 100644 --- a/script_tests/line_select_tests.py +++ b/script_tests/line_select_tests.py @@ -5,18 +5,24 @@ class Test(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/line_select.py ${features}" - input_features = base.TestFile("""0 + input_features = base.TestFile( + """0 1 1 0 1 - 0""") - input_stdin = base.TestFile("""a + 0""" + ) + input_stdin = base.TestFile( + """a b d e - f""") - output_stdout = base.TestFile("""b + f""" + ) + output_stdout = base.TestFile( + """b - e""") + e""" + ) diff --git a/script_tests/maf_extract_ranges_indexed_tests.py b/script_tests/maf_extract_ranges_indexed_tests.py index a5d5df87..c85abaae 100644 --- a/script_tests/maf_extract_ranges_indexed_tests.py +++ b/script_tests/maf_extract_ranges_indexed_tests.py @@ -16,18 +16,24 @@ class TestAccessNotRef(base.BaseScriptTest, unittest.TestCase): class TestAccessRef(base.BaseScriptTest, unittest.TestCase): - command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p mm8." + command_line = ( + "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p mm8." + ) input_stdin = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/dcking_ghp074.maf") class TestAccessNotRefNotIndexed(base.BaseScriptTest, unittest.TestCase): - command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p hg18." + command_line = ( + "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm8_chr7_tiny_mm8_ind.maf -c -m 5 -p hg18." + ) input_stdin = base.TestFile(filename="./test_data/maf_tests/hg18.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/empty.maf") class TestELines(base.BaseScriptTest, unittest.TestCase): - command_line = "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm10_chr12_lessspe.maf -c -m 5 -p mm10." + command_line = ( + "./scripts/maf_extract_ranges_indexed.py ./test_data/maf_tests/mm10_chr12_lessspe.maf -c -m 5 -p mm10." + ) input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12.bed") output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_slice.maf") diff --git a/script_tests/maf_select_tests.py b/script_tests/maf_select_tests.py index 2268c656..6c46f472 100644 --- a/script_tests/maf_select_tests.py +++ b/script_tests/maf_select_tests.py @@ -5,27 +5,31 @@ class Test(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_select.py ${features}" - input_features = base.TestFile("""0 + input_features = base.TestFile( + """0 0 0 0 0 0 0 - 1""") + 1""" + ) input_stdin = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_no_index.maf") output_stdout = base.TestFile(filename="./test_data/maf_tests/mm8_chr7_tiny_last_selected.maf") class TestWithE(base.BaseScriptTest, unittest.TestCase): command_line = "./scripts/maf_select.py ${features}" - input_features = base.TestFile("""0 + input_features = base.TestFile( + """0 1 0 0 0 0 0 - 0""") + 0""" + ) input_stdin = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe.maf") output_stdout = base.TestFile(filename="./test_data/maf_tests/mm10_chr12_lessspe_one_selected.maf") diff --git a/scripts/aggregate_scores_in_intervals.py b/scripts/aggregate_scores_in_intervals.py index 36e4d2e9..68ca153c 100755 --- a/scripts/aggregate_scores_in_intervals.py +++ b/scripts/aggregate_scores_in_intervals.py @@ -19,7 +19,10 @@ import bx.wiggle from bx import misc -from bx.binned_array import BinnedArray, FileBinnedArray +from bx.binned_array import ( + BinnedArray, + FileBinnedArray, +) from bx.bitset_builders import binned_bitsets_from_file from bx.cookbook import doc_optparse from bx_extras.fpconst import isNaN @@ -84,7 +87,7 @@ def main(): score_fname = args[0] interval_fname = args[1] if len(args) > 2: - out_file = open(args[2], 'w') + out_file = open(args[2], "w") else: out_file = sys.stdout binned = bool(options.binned) @@ -123,7 +126,7 @@ def main(): max_score = max(score, max_score) min_score = min(score, min_score) if count > 0: - avg = total/count + avg = total / count else: avg = "nan" min_score = "nan" diff --git a/scripts/axt_to_fasta.py b/scripts/axt_to_fasta.py index 512e148d..cb5b87f0 100755 --- a/scripts/axt_to_fasta.py +++ b/scripts/axt_to_fasta.py @@ -31,8 +31,7 @@ def main(): # convert the alignment blocks - reader = bx.align.axt.Reader(sys.stdin, support_ids=True, - species1="", species2="") + reader = bx.align.axt.Reader(sys.stdin, support_ids=True, species1="", species2="") for a in reader: if "id" in a.attributes: @@ -46,6 +45,7 @@ def main(): # $$$ this should be moved to a bx.align.fasta module + def print_component_as_fasta(c, id=None): header = f">{c.src}_{c.start}_{c.start + c.size}" if id is not None: diff --git a/scripts/axt_to_lav.py b/scripts/axt_to_lav.py index ff559047..66ab9693 100755 --- a/scripts/axt_to_lav.py +++ b/scripts/axt_to_lav.py @@ -96,20 +96,13 @@ def main(): # read the alignments - out = bx.align.lav.Writer( - sys.stdout, - attributes={ - "name_format_1": primaryFile, - "name_format_2": secondaryFile}) + out = bx.align.lav.Writer(sys.stdout, attributes={"name_format_1": primaryFile, "name_format_2": secondaryFile}) axtsRead = 0 axtsWritten = 0 for axtBlock in bx.align.axt.Reader( - sys.stdin, - species_to_lengths=speciesToLengths, - species1=primary, - species2=secondary, - support_ids=True): + sys.stdin, species_to_lengths=speciesToLengths, species1=primary, species2=secondary, support_ids=True + ): axtsRead += 1 out.write(axtBlock) axtsWritten += 1 diff --git a/scripts/axt_to_maf.py b/scripts/axt_to_maf.py index 145e407a..4d0134a5 100755 --- a/scripts/axt_to_maf.py +++ b/scripts/axt_to_maf.py @@ -29,7 +29,7 @@ def usage(s=None): message = __doc__ - if (s is None): + if s is None: sys.exit(message) else: sys.exit(f"{s}\n{message}") @@ -49,14 +49,14 @@ def main(): # pick off options args = sys.argv[1:] - while (len(args) > 0): + while len(args) > 0: arg = args.pop(0) val = None fields = arg.split("=", 1) - if (len(fields) == 2): + if len(fields) == 2: arg = fields[0] val = fields[1] - if (val == ""): + if val == "": usage("missing a value in %s=" % arg) if (arg == "--silent") and (val is None): @@ -68,20 +68,20 @@ def main(): else: usage("unknown argument: %s" % arg) - if (primary is None): + if primary is None: usage("missing primary species") - if (secondary is None): + if secondary is None: usage("missing secondary species") fields = primary.split(":") - if (len(fields) != 2): + if len(fields) != 2: usage("bad primary species (must be species:lengths_file") primary = fields[0] primaryLengths = fields[1] fields = secondary.split(":") - if (len(fields) != 2): + if len(fields) != 2: usage("bad secondary species (must be species:lengths_file") secondary = fields[0] secondaryLengths = fields[1] @@ -103,17 +103,15 @@ def main(): axtsRead = 0 axtsWritten = 0 for axtBlock in bx.align.axt.Reader( - sys.stdin, - species_to_lengths=speciesToLengths, - species1=primary, - species2=secondary): + sys.stdin, species_to_lengths=speciesToLengths, species1=primary, species2=secondary + ): axtsRead += 1 p = axtBlock.get_component_by_src_start(primary) - if (p is None): + if p is None: continue s = axtBlock.get_component_by_src_start(secondary) - if (s is None): + if s is None: continue mafBlock = bx.align.Alignment(axtBlock.score, axtBlock.attributes) @@ -123,7 +121,7 @@ def main(): out.write(mafBlock) axtsWritten += 1 - if (not silent): + if not silent: sys.stderr.write("%d blocks read, %d written\n" % (axtsRead, axtsWritten)) @@ -139,13 +137,13 @@ def read_lengths(fileName): for lineNumber, line in enumerate(f): line = line.strip() - if (line == ""): + if line == "": continue - if (line.startswith("#")): + if line.startswith("#"): continue fields = line.split() - if (len(fields) != 2): + if len(fields) != 2: raise ValueError("bad lengths line (%s:%d): %s" % (fileName, lineNumber, line)) chrom = fields[0] @@ -154,7 +152,7 @@ def read_lengths(fileName): except ValueError: raise ValueError("bad lengths line (%s:%d): %s" % (fileName, lineNumber, line)) - if (chrom in chromToLength): + if chrom in chromToLength: raise ValueError("%s appears more than once (%s:%d): %s" % (chrom, fileName, lineNumber, line)) chromToLength[chrom] = length diff --git a/scripts/bed_bigwig_profile.py b/scripts/bed_bigwig_profile.py index ad175bb5..7a454e6f 100755 --- a/scripts/bed_bigwig_profile.py +++ b/scripts/bed_bigwig_profile.py @@ -18,7 +18,7 @@ int32, isnan, savetxt, - zeros + zeros, ) from bx.bbi.bigwig_file import BigWigFile @@ -26,8 +26,8 @@ bw = BigWigFile(open(sys.argv[1])) padding = int(sys.argv[2]) -totals = zeros(padding*2, dtype=float64) -valid = zeros(padding*2, dtype=int32) +totals = zeros(padding * 2, dtype=float64) +valid = zeros(padding * 2, dtype=int32) for interval in GenomicIntervalReader(sys.stdin): center = floor((interval.start + interval.end) / 2) @@ -36,6 +36,6 @@ invalid = isnan(values) values[invalid] = 0 totals += values - valid += (~ invalid) + valid += ~invalid -savetxt(sys.stdout, totals/valid) +savetxt(sys.stdout, totals / valid) diff --git a/scripts/bed_count_by_interval.py b/scripts/bed_count_by_interval.py index b79f2be8..82f5c6f1 100755 --- a/scripts/bed_count_by_interval.py +++ b/scripts/bed_count_by_interval.py @@ -9,7 +9,7 @@ from bx.intervals import ( Intersecter, - Interval + Interval, ) bed1, bed2 = sys.argv[1:3] @@ -17,7 +17,11 @@ ranges = {} for line in open(bed2): fields = line.strip().split() - chrom, start, end, = fields[0], int(fields[1]), int(fields[2]) + chrom, start, end, = ( + fields[0], + int(fields[1]), + int(fields[2]), + ) if chrom not in ranges: ranges[chrom] = Intersecter() ranges[chrom].add_interval(Interval(start, end)) diff --git a/scripts/bed_count_overlapping.py b/scripts/bed_count_overlapping.py index b79f2be8..82f5c6f1 100755 --- a/scripts/bed_count_overlapping.py +++ b/scripts/bed_count_overlapping.py @@ -9,7 +9,7 @@ from bx.intervals import ( Intersecter, - Interval + Interval, ) bed1, bed2 = sys.argv[1:3] @@ -17,7 +17,11 @@ ranges = {} for line in open(bed2): fields = line.strip().split() - chrom, start, end, = fields[0], int(fields[1]), int(fields[2]) + chrom, start, end, = ( + fields[0], + int(fields[1]), + int(fields[2]), + ) if chrom not in ranges: ranges[chrom] = Intersecter() ranges[chrom].add_interval(Interval(start, end)) diff --git a/scripts/bed_coverage_by_interval.py b/scripts/bed_coverage_by_interval.py index 00200239..13f89ff8 100755 --- a/scripts/bed_coverage_by_interval.py +++ b/scripts/bed_coverage_by_interval.py @@ -41,10 +41,10 @@ def clone(bits): chr, start, end = fields[0], int(fields[1]), int(fields[2]) bases_covered = 0 if chr in bitsets: - bases_covered = bitsets[chr].count_range(start, end-start) + bases_covered = bitsets[chr].count_range(start, end - start) length = end - start if mask and chr in mask: - bases_masked = mask[chr].count_range(start, end-start) + bases_masked = mask[chr].count_range(start, end - start) length -= bases_masked assert bases_covered <= length, f"{bases_covered!r}, {bases_masked!r}, {length!r}" if length == 0: diff --git a/scripts/bed_intersect.py b/scripts/bed_intersect.py index c4793abc..bb230722 100755 --- a/scripts/bed_intersect.py +++ b/scripts/bed_intersect.py @@ -50,14 +50,14 @@ start, end = int(fields[1]), int(fields[2]) if start > end: warn("Bed interval start after end!") - if fields[0] in bitsets and bitsets[fields[0]].count_range(start, end-start) >= mincols: + if fields[0] in bitsets and bitsets[fields[0]].count_range(start, end - start) >= mincols: if booleans: if reverse: print(0) else: print(1) elif not reverse: - print(line, end=' ') + print(line, end=" ") else: if booleans: if reverse: @@ -65,4 +65,4 @@ else: print(0) elif reverse: - print(line, end=' ') + print(line, end=" ") diff --git a/scripts/bed_rand_intersect.py b/scripts/bed_rand_intersect.py index b030a273..cc16b52d 100755 --- a/scripts/bed_rand_intersect.py +++ b/scripts/bed_rand_intersect.py @@ -157,10 +157,13 @@ def main(): print(total_samples[i, featnum], file=sys.stderr) fraction_overlap = total_samples / total_lengths2 print("\t".join(intervals2_fnames)) - print("\t".join(map(str, total_actual/total_lengths2))) + print("\t".join(map(str, total_actual / total_lengths2))) for row in fraction_overlap: print("\t".join(map(str, row))) - print("observed overlap: %d, sample mean: %d, sample stdev: %d" % (total_actual, stats.amean(total_samples), stats.asamplestdev(total_samples))) + print( + "observed overlap: %d, sample mean: %d, sample stdev: %d" + % (total_actual, stats.amean(total_samples), stats.asamplestdev(total_samples)) + ) print("z-score:", (total_actual - stats.amean(total_samples)) / stats.asamplestdev(total_samples)) print("percentile:", sum(total_actual > total_samples) / nsamples) diff --git a/scripts/bnMapper.py b/scripts/bnMapper.py index 2e37af97..21ebce7d 100755 --- a/scripts/bnMapper.py +++ b/scripts/bnMapper.py @@ -11,7 +11,10 @@ import sys from functools import reduce from itertools import groupby -from operator import attrgetter, itemgetter +from operator import ( + attrgetter, + itemgetter, +) import numpy as np @@ -23,10 +26,21 @@ IntervalTree, ) -elem_t = np.dtype([('chrom', np.str_, 30), ('start', np.int64), ('end', np.int64), ('id', np.str_, 100)]) -narrowPeak_t = np.dtype([('chrom', np.str_, 30), ('start', np.int64), ('end', np.int64), ('id', np.str_, 100), - ('score', np.int64), ('strand', np.str_, 1), ('signalValue', np.float), - ('pValue', np.float), ('qValue', np.float), ('peak', np.int64)]) +elem_t = np.dtype([("chrom", np.str_, 30), ("start", np.int64), ("end", np.int64), ("id", np.str_, 100)]) +narrowPeak_t = np.dtype( + [ + ("chrom", np.str_, 30), + ("start", np.int64), + ("end", np.int64), + ("id", np.str_, 100), + ("score", np.int64), + ("strand", np.str_, 1), + ("signalValue", np.float), + ("pValue", np.float), + ("qValue", np.float), + ("peak", np.int64), + ] +) LOG_LEVELS = {"info": logging.INFO, "debug": logging.DEBUG, "silent": logging.ERROR} logging.basicConfig() @@ -71,7 +85,7 @@ def transform(elem, chain_CT_CQ, max_gap): elem intersects this chain's ginterval. :return: a list of the type [(to_chr, start, end, elem[id]) ... ]""" (chain, CT, CQ) = chain_CT_CQ - start, end = max(elem['start'], chain.tStart) - chain.tStart, min(elem['end'], chain.tEnd) - chain.tStart + start, end = max(elem["start"], chain.tStart) - chain.tStart, min(elem["end"], chain.tEnd) - chain.tStart assert np.all((CT[:, 1] - CT[:, 0]) == (CQ[:, 1] - CQ[:, 0])) to_chrom = chain.qName @@ -85,24 +99,27 @@ def transform(elem, chain_CT_CQ, max_gap): # apply the gap threshold if max_gap >= 0 and start_idx < end_idx - 1: - if np.max(CT[(start_idx+1):end_idx, 0] - CT[start_idx:(end_idx-1), 1]) > max_gap or np.max(CQ[(start_idx+1):end_idx, 0] - CQ[start_idx:(end_idx-1), 1]) > max_gap: + if ( + np.max(CT[(start_idx + 1) : end_idx, 0] - CT[start_idx : (end_idx - 1), 1]) > max_gap + or np.max(CQ[(start_idx + 1) : end_idx, 0] - CQ[start_idx : (end_idx - 1), 1]) > max_gap + ): return [] assert start < CT[start_idx, 1] assert CT[end_idx, 0] < end to_start = CQ[start_idx, 0] + max(0, start - CT[start_idx, 0]) # correct if on middle of interval - to_end = CQ[end_idx, 1] - max(0, CT[end_idx, 1] - end) # idem + to_end = CQ[end_idx, 1] - max(0, CT[end_idx, 1] - end) # idem if start_idx == end_idx: # elem falls in a single run of matches slices = [(to_start, to_end)] else: slices = [(to_start, CQ[start_idx, 1])] - slices += [(CQ[i, 0], CQ[i, 1]) for i in range(start_idx+1, end_idx)] + slices += [(CQ[i, 0], CQ[i, 1]) for i in range(start_idx + 1, end_idx)] slices.append((CQ[end_idx, 0], to_end)) - if chain.qStrand == '-': + if chain.qStrand == "-": Sz = chain.qEnd - chain.qStart - slices = [(Sz-t[1], Sz-t[0]) for t in slices] - return [(to_chrom, to_gab_start + t[0], to_gab_start + t[1], elem['id']) for t in slices] + slices = [(Sz - t[1], Sz - t[0]) for t in slices] + return [(to_chrom, to_gab_start + t[0], to_gab_start + t[1], elem["id"]) for t in slices] def union_elements(elements): @@ -128,12 +145,12 @@ def transform_by_chrom(all_epo, from_elem_list, tree, chrom, opt, out_fd): BED4_FRM = "%s\t%d\t%d\t%s\n" BED12_FRM = "%s\t%d\t%d\t%s\t1000\t+\t%d\t%d\t0,0,0\t%d\t%s\t%s\n" NPEAK_FRM = "%s\t%d\t%d\t%s\t%d\t%s\t%f\t%f\t%f\t%d\n" - assert len(set(from_elem_list['chrom'])) <= 1 + assert len(set(from_elem_list["chrom"])) <= 1 mapped_elem_count = 0 mapped_summit_count = 0 for from_elem in from_elem_list: - matching_block_ids = [attrgetter("value")(_) for _ in tree.find(chrom, from_elem['start'], from_elem['end'])] + matching_block_ids = [attrgetter("value")(_) for _ in tree.find(chrom, from_elem["start"], from_elem["end"])] # do the actual mapping to_elem_slices = [_ for _ in (transform(from_elem, all_epo[i], opt.gap) for i in matching_block_ids) if _] @@ -166,7 +183,7 @@ def transform_by_chrom(all_epo, from_elem_list, tree, chrom, opt, out_fd): """ End AGD modifications """ # apply threshold - if (from_elem[2] - from_elem[1]) * opt.threshold > reduce(lambda b, a: a[2]-a[1] + b, to_elem_slices, 0): + if (from_elem[2] - from_elem[1]) * opt.threshold > reduce(lambda b, a: a[2] - a[1] + b, to_elem_slices, 0): log.debug("%s did not pass threshold" % (str(from_elem))) continue @@ -181,19 +198,41 @@ def transform_by_chrom(all_epo, from_elem_list, tree, chrom, opt, out_fd): for tel in to_elem_list: out_fd.write(BED4_FRM % tel) elif opt.format == "BED12": - out_fd.write(BED12_FRM % ( - to_elem_list[0][0], start, end, from_elem['id'], - start, end, len(to_elem_list), - ",".join("%d" % (e[2]-e[1]) for e in to_elem_list), - ",".join("%d" % (e[1]-start) for e in to_elem_list))) + out_fd.write( + BED12_FRM + % ( + to_elem_list[0][0], + start, + end, + from_elem["id"], + start, + end, + len(to_elem_list), + ",".join("%d" % (e[2] - e[1]) for e in to_elem_list), + ",".join("%d" % (e[1] - start) for e in to_elem_list), + ) + ) else: # narrowPeak convention is to report the peak location relative to start - peak = int((start + end)/2) - start + peak = int((start + end) / 2) - start if opt.in_format == "narrowPeak": # Map the peak location # sys.stderr.write("{}\n".format(from_elem)) - matching_block_ids = [attrgetter("value")(_) for _ in tree.find(chrom, from_elem['peak'], from_elem['peak'])] - p_elem_slices = [_ for _ in (transform(np.array((chrom, from_elem['peak'], from_elem['peak'], '.'), dtype=elem_t), all_epo[i], opt.gap) for i in matching_block_ids) if _] + matching_block_ids = [ + attrgetter("value")(_) for _ in tree.find(chrom, from_elem["peak"], from_elem["peak"]) + ] + p_elem_slices = [ + _ + for _ in ( + transform( + np.array((chrom, from_elem["peak"], from_elem["peak"], "."), dtype=elem_t), + all_epo[i], + opt.gap, + ) + for i in matching_block_ids + ) + if _ + ] if len(p_elem_slices) >= 1: mapped_summit_count += 1 sys.stderr.write(f"{p_elem_slices}\n") @@ -202,13 +241,29 @@ def transform_by_chrom(all_epo, from_elem_list, tree, chrom, opt, out_fd): peak = p_elem_slices[0][0][1] - start else: mapped_summit_count -= 1 - log.debug(f"Warning: elem {from_elem} summit mapped location falls outside the mapped element start and end. Using the mapped elem midpoint instead.") + log.debug( + f"Warning: elem {from_elem} summit mapped location falls outside the mapped element start and end. Using the mapped elem midpoint instead." + ) else: - log.debug(f"Warning: elem {from_elem} summit maps to a gap region in the target alignment. Using the mapped elem midpoint instead.") - out_fd.write(NPEAK_FRM % (to_elem_list[0][0], start, end, from_elem['id'], - from_elem['score'], from_elem['strand'], from_elem['signalValue'], - from_elem['pValue'], from_elem['qValue'], peak)) + log.debug( + f"Warning: elem {from_elem} summit maps to a gap region in the target alignment. Using the mapped elem midpoint instead." + ) + out_fd.write( + NPEAK_FRM + % ( + to_elem_list[0][0], + start, + end, + from_elem["id"], + from_elem["score"], + from_elem["strand"], + from_elem["signalValue"], + from_elem["pValue"], + from_elem["qValue"], + peak, + ) + ) log.info("%s: %d of %d elements mapped" % (chrom, mapped_elem_count, from_elem_list.shape[0])) if opt.format == "narrowPeak" and opt.in_format == "narrowPeak": log.info("%s: %d peak summits from %d mapped elements mapped" % (chrom, mapped_summit_count, mapped_elem_count)) @@ -219,16 +274,16 @@ def transform_file(ELEMS, ofname, EPO, TREE, opt): BED4_FRM = "%s\t%d\t%d\t%s\n" log.info("%s (%d) elements ..." % (opt.screen and "screening" or "transforming", ELEMS.shape[0])) - with open(ofname, 'w') as out_fd: + with open(ofname, "w") as out_fd: if opt.screen: for elem in ELEMS.flat: - matching_blocks = [attrgetter("value")(_) for _ in TREE.find(elem['chrom'], elem['start'], elem['end'])] + matching_blocks = [attrgetter("value")(_) for _ in TREE.find(elem["chrom"], elem["start"], elem["end"])] assert set(matching_blocks) <= set(EPO.keys()) if matching_blocks: out_fd.write(BED4_FRM % elem) else: - for chrom in set(ELEMS['chrom']): - transform_by_chrom(EPO, ELEMS[ELEMS['chrom'] == chrom], TREE, chrom, opt, out_fd) + for chrom in set(ELEMS["chrom"]): + transform_by_chrom(EPO, ELEMS[ELEMS["chrom"] == chrom], TREE, chrom, opt, out_fd) log.info("DONE!") @@ -240,13 +295,13 @@ def loadChains(path): # compute cummulative intervals for i in range(len(EPO)): ch, S, T, Q = EPO[i] - if ch.tStrand == '-': + if ch.tStrand == "-": ch = ch._replace(tEnd=ch.tSize - ch.tStart, tStart=ch.tSize - ch.tEnd) - if ch.qStrand == '-': + if ch.qStrand == "-": ch = ch._replace(qEnd=ch.qSize - ch.qStart, qStart=ch.qSize - ch.qEnd) EPO[i] = (ch, epo.cummulative_intervals(S, T), epo.cummulative_intervals(S, Q)) # now each element of epo is (chain_header, target_intervals, query_intervals) - assert all(t[0].tStrand == '+' for t in EPO), "all target strands should be +" + assert all(t[0].tStrand == "+" for t in EPO), "all target strands should be +" return EPO @@ -268,38 +323,80 @@ def loadFeatures(path, opt): with open(path) as fd: for line in fd: cols = line.split() - data.append(( - cols[0], int(cols[1]), int(cols[2]), cols[3], int(cols[4]), - cols[5], float(cols[6]), float(cols[7]), float(cols[8]), - int(cols[-1])+int(cols[1]))) + data.append( + ( + cols[0], + int(cols[1]), + int(cols[2]), + cols[3], + int(cols[4]), + cols[5], + float(cols[6]), + float(cols[7]), + float(cols[8]), + int(cols[-1]) + int(cols[1]), + ) + ) data = np.array(data, dtype=narrowPeak_t) return data if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__, epilog="Olgert Denas (Taylor Lab)", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument("input", nargs='+', - help="Input to process. If more than a file is specified, all files will be mapped and placed on --output, which should be a directory.") + parser = argparse.ArgumentParser( + description=__doc__, epilog="Olgert Denas (Taylor Lab)", formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + parser.add_argument( + "input", + nargs="+", + help="Input to process. If more than a file is specified, all files will be mapped and placed on --output, which should be a directory.", + ) parser.add_argument("alignment", help="Alignment file (.chain or .pkl)") - parser.add_argument("-f", '--format', choices=("BED4", "BED12", "narrowPeak"), default="BED4", - help="Output format. BED4 output reports all aligned blocks as separate BED records. BED12 reports a single BED record for each mapped element, with individual blocks given in the BED12 fields. NarrowPeak reports a single narrowPeak record for each mapped element, in which the chromosome, start, end, and peak positions are mapped to the target species and all other columns are passed through unchanged.") - parser.add_argument("-o", '--output', metavar="FILE", default='stdout', - type=lambda s: ((s in ('stdout', '-') and "/dev/stdout") or s), - help="Output file. Mandatory if more than on file in input.") - parser.add_argument("-t", '--threshold', metavar="FLOAT", default=0., type=float, - help="Mapping threshold i.e., |elem| * threshold <= |mapped_elem|") - parser.add_argument("-s", '--screen', default=False, action='store_true', - help="Only report elements in the alignment (without mapping). -t has not effect here (TODO)") - parser.add_argument('-g', '--gap', type=int, default=-1, - help="Ignore elements with an insertion/deletion of this or bigger size.") - parser.add_argument('-v', '--verbose', type=str, choices=list(LOG_LEVELS.keys()), default='info', - help='Verbosity level') - parser.add_argument("-k", '--keep_split', default=False, action='store_true', - help="If elements span multiple chains, report the segment with the longest overlap instead of silently dropping them. (This is the default behavior for liftOver.)") - parser.add_argument("-i", "--in_format", choices=["BED", "narrowPeak"], default="BED", - help="Input file format.") + parser.add_argument( + "-f", + "--format", + choices=("BED4", "BED12", "narrowPeak"), + default="BED4", + help="Output format. BED4 output reports all aligned blocks as separate BED records. BED12 reports a single BED record for each mapped element, with individual blocks given in the BED12 fields. NarrowPeak reports a single narrowPeak record for each mapped element, in which the chromosome, start, end, and peak positions are mapped to the target species and all other columns are passed through unchanged.", + ) + parser.add_argument( + "-o", + "--output", + metavar="FILE", + default="stdout", + type=lambda s: ((s in ("stdout", "-") and "/dev/stdout") or s), + help="Output file. Mandatory if more than on file in input.", + ) + parser.add_argument( + "-t", + "--threshold", + metavar="FLOAT", + default=0.0, + type=float, + help="Mapping threshold i.e., |elem| * threshold <= |mapped_elem|", + ) + parser.add_argument( + "-s", + "--screen", + default=False, + action="store_true", + help="Only report elements in the alignment (without mapping). -t has not effect here (TODO)", + ) + parser.add_argument( + "-g", "--gap", type=int, default=-1, help="Ignore elements with an insertion/deletion of this or bigger size." + ) + parser.add_argument( + "-v", "--verbose", type=str, choices=list(LOG_LEVELS.keys()), default="info", help="Verbosity level" + ) + parser.add_argument( + "-k", + "--keep_split", + default=False, + action="store_true", + help="If elements span multiple chains, report the segment with the longest overlap instead of silently dropping them. (This is the default behavior for liftOver.)", + ) + parser.add_argument("-i", "--in_format", choices=["BED", "narrowPeak"], default="BED", help="Input file format.") opt = parser.parse_args() log.setLevel(LOG_LEVELS[opt.verbose]) diff --git a/scripts/div_snp_table_chr.py b/scripts/div_snp_table_chr.py index 6f094927..ceb23440 100755 --- a/scripts/div_snp_table_chr.py +++ b/scripts/div_snp_table_chr.py @@ -30,7 +30,7 @@ def main(): else: suffix = "" - print("\nReading feature", end=' ', file=sys.stderr) + print("\nReading feature", end=" ", file=sys.stderr) interval_file = open(args[0]) feature = binned_bitsets_from_file(interval_file, lens=lens) interval_file.close() @@ -45,15 +45,15 @@ def main(): intervals[chrom].append([start, end]) interval_file.close() - print("\nReading ar", end=' ', file=sys.stderr) + print("\nReading ar", end=" ", file=sys.stderr) ar = binned_bitsets_from_file(open(args[1]), lens=lens) - print("\nReading snps", end=' ', file=sys.stderr) + print("\nReading snps", end=" ", file=sys.stderr) snp = binned_bitsets_from_file(open(args[2]), lens=lens) snp_mask = clone_inverted(snp) snp_copy = clone(snp) - print("\nMasking AR", end=' ', file=sys.stderr) + print("\nMasking AR", end=" ", file=sys.stderr) ar_mask = clone_inverted(ar) print(file=sys.stderr) @@ -87,9 +87,9 @@ def main(): if chr not in ar: continue - print("reading %s ..." % chr, end=' ', file=sys.stderr) + print("reading %s ..." % chr, end=" ", file=sys.stderr) try: - div = binned_bitsets_from_file(open(dirname + "/%s.bed" % (chr+suffix)), lens=lens) + div = binned_bitsets_from_file(open(dirname + "/%s.bed" % (chr + suffix)), lens=lens) except Exception: print("%s.bed not found" % chr, file=sys.stderr) continue @@ -97,12 +97,12 @@ def main(): div[chr].iand(snp_mask[chr]) # div/snp sites count snp-only div_copy = clone(div) - print("AR:", chr, end=' ', file=sys.stderr) + print("AR:", chr, end=" ", file=sys.stderr) snp[chr].iand(ar[chr]) div[chr].iand(ar[chr]) snp_count = snp[chr].count_range(0, snp[chr].size) ar_snp_count += snp_count - print(snp_count, end=' ', file=sys.stderr) + print(snp_count, end=" ", file=sys.stderr) try: div_count = div[chr].count_range(0, div[chr].size) ar_div_count += div_count @@ -112,7 +112,7 @@ def main(): div = div_copy snp[chr] = snp_copy[chr] - print("feature:", chr, end=' ', file=sys.stderr) + print("feature:", chr, end=" ", file=sys.stderr) feature[chr].iand(ar_mask[chr]) # clip to non-AR only snp[chr].iand(feature[chr]) div[chr].iand(feature[chr]) @@ -124,8 +124,8 @@ def main(): # Note: can loop over feature intervals here for individual counts if chr in intervals: for start, end in intervals[chr]: - ind_div_count = div[chr].count_range(start, end-start) - ind_snp_count = snp[chr].count_range(start, end-start) + ind_div_count = div[chr].count_range(start, end - start) + ind_snp_count = snp[chr].count_range(start, end - start) print(chr, start, end, ind_div_count, ind_snp_count) print("feature snp\t%d" % feature_snp_count) diff --git a/scripts/gene_fourfold_sites.py b/scripts/gene_fourfold_sites.py index c28be7b7..d9ea05a1 100755 --- a/scripts/gene_fourfold_sites.py +++ b/scripts/gene_fourfold_sites.py @@ -95,10 +95,10 @@ def translate(codon, genetic_code): """ parse the doc string to hash the genetic code""" GEN_CODE = {} -for line in GENETIC_CODE.split('\n'): - if line.strip() == '': +for line in GENETIC_CODE.split("\n"): + if line.strip() == "": continue - f = re.split(r'\s|\(|\)|\/', line) + f = re.split(r"\s|\(|\)|\/", line) codon = f[0] c1, c2, c3 = codon aminoacid = f[3] @@ -113,9 +113,9 @@ def translate(codon, genetic_code): def getnib(nibdir): seqs = {} for nibf in os.listdir(nibdir): - if not nibf.endswith('.nib'): + if not nibf.endswith(".nib"): continue - chr = nibf.replace('.nib', '') + chr = nibf.replace(".nib", "") file = os.path.join(nibdir, nibf) seqs[chr] = nib.NibFile(open(file)) @@ -144,7 +144,7 @@ def main(): if options.format: format = options.format else: - format = 'bed' + format = "bed" allpositions = bool(options.allpositions) include_name = bool(options.include_name) @@ -157,18 +157,18 @@ def main(): for chrom, strand, cds_exons, name in CDSReader(open(bedfile), format=format): - cds_seq = '' + cds_seq = "" # genome_seq_index maps the position in CDS to position on the genome genome_seq_index = [] for (c_start, c_end) in cds_exons: - cds_seq += nibs[chrom].get(c_start, c_end-c_start) + cds_seq += nibs[chrom].get(c_start, c_end - c_start) for i in range(c_start, c_end): genome_seq_index.append(i) cds_seq = cds_seq.upper() - if strand == '+': + if strand == "+": frsts = range(0, len(cds_seq), 3) offsign = 1 else: @@ -179,7 +179,7 @@ def main(): offone = 1 * offsign offtwo = 2 * offsign - all = ['A', 'C', 'G', 'T'] + all = ["A", "C", "G", "T"] for first_pos in frsts: c1 = first_pos @@ -195,9 +195,9 @@ def main(): degeneracy3 = str(list(GEN_CODE[codon[0]][codon[1]].values()).count(aa)) + "d" if not include_name: - name_text = '' + name_text = "" else: - name_text = name.replace(' ', '_') + name_text = name.replace(" ", "_") if allpositions: try: @@ -207,25 +207,115 @@ def main(): print(list(GEN_CODE.values()), file=sys.stderr) raise TypeError(s) - if strand == '+': - print(chrom, genome_seq_index[c1], genome_seq_index[c1] + 1, cds_seq[c1], degeneracy1, aa, name_text, file=out) - print(chrom, genome_seq_index[c2], genome_seq_index[c2] + 1, cds_seq[c2], degeneracy2, aa, name_text, file=out) - print(chrom, genome_seq_index[c3], genome_seq_index[c3] + 1, cds_seq[c3], degeneracy3, aa, name_text, file=out) + if strand == "+": + print( + chrom, + genome_seq_index[c1], + genome_seq_index[c1] + 1, + cds_seq[c1], + degeneracy1, + aa, + name_text, + file=out, + ) + print( + chrom, + genome_seq_index[c2], + genome_seq_index[c2] + 1, + cds_seq[c2], + degeneracy2, + aa, + name_text, + file=out, + ) + print( + chrom, + genome_seq_index[c3], + genome_seq_index[c3] + 1, + cds_seq[c3], + degeneracy3, + aa, + name_text, + file=out, + ) else: - print(chrom, genome_seq_index[c3], genome_seq_index[c3] + 1, cds_seq[c3], degeneracy3, aa, name_text, file=out) - print(chrom, genome_seq_index[c2], genome_seq_index[c2] + 1, cds_seq[c2], degeneracy2, aa, name_text, file=out) - print(chrom, genome_seq_index[c1], genome_seq_index[c1] + 1, cds_seq[c1], degeneracy1, aa, name_text, file=out) + print( + chrom, + genome_seq_index[c3], + genome_seq_index[c3] + 1, + cds_seq[c3], + degeneracy3, + aa, + name_text, + file=out, + ) + print( + chrom, + genome_seq_index[c2], + genome_seq_index[c2] + 1, + cds_seq[c2], + degeneracy2, + aa, + name_text, + file=out, + ) + print( + chrom, + genome_seq_index[c1], + genome_seq_index[c1] + 1, + cds_seq[c1], + degeneracy1, + aa, + name_text, + file=out, + ) else: - if strand == '+': + if strand == "+": for b in c1, c2: - print(chrom, genome_seq_index[b], genome_seq_index[b] + 1, cds_seq[b], "1d", aa, name_text, file=out) - print(chrom, genome_seq_index[c3], genome_seq_index[c3] + 1, cds_seq[c3], degeneracy3, aa, name_text, file=out) + print( + chrom, + genome_seq_index[b], + genome_seq_index[b] + 1, + cds_seq[b], + "1d", + aa, + name_text, + file=out, + ) + print( + chrom, + genome_seq_index[c3], + genome_seq_index[c3] + 1, + cds_seq[c3], + degeneracy3, + aa, + name_text, + file=out, + ) else: - print(chrom, genome_seq_index[c3], genome_seq_index[c3] + 1, cds_seq[c3], degeneracy3, aa, name_text, file=out) + print( + chrom, + genome_seq_index[c3], + genome_seq_index[c3] + 1, + cds_seq[c3], + degeneracy3, + aa, + name_text, + file=out, + ) for b in c2, c1: - print(chrom, genome_seq_index[b], genome_seq_index[b] + 1, cds_seq[b], "1d", aa, name_text, file=out) + print( + chrom, + genome_seq_index[b], + genome_seq_index[b] + 1, + cds_seq[b], + "1d", + aa, + name_text, + file=out, + ) out.close() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/scripts/get_scores_in_intervals.py b/scripts/get_scores_in_intervals.py index 25fc51a0..a8c3c5e5 100755 --- a/scripts/get_scores_in_intervals.py +++ b/scripts/get_scores_in_intervals.py @@ -34,7 +34,7 @@ def main(): score_file = open(args[0]) interval_file = open(args[1]) if len(args) > 2: - out_file = open(args[2], 'w') + out_file = open(args[2], "w") else: out_file = sys.stdout except Exception: diff --git a/scripts/int_seqs_to_char_strings.py b/scripts/int_seqs_to_char_strings.py index 152b0cf3..c7d61fbc 100755 --- a/scripts/int_seqs_to_char_strings.py +++ b/scripts/int_seqs_to_char_strings.py @@ -18,7 +18,7 @@ def main(): ints = [int(f) for f in line.split()] if max(ints) > len(table): raise ValueError("Alphabet size too large!") - print(str.join('', [table[i] for i in ints])) + print(str.join("", [table[i] for i in ints])) if __name__ == "__main__": diff --git a/scripts/interval_count_intersections.py b/scripts/interval_count_intersections.py index 20395352..3102185d 100755 --- a/scripts/interval_count_intersections.py +++ b/scripts/interval_count_intersections.py @@ -12,8 +12,10 @@ import sys -from bx import intervals -from bx import misc +from bx import ( + intervals, + misc, +) def main(): diff --git a/scripts/lav_to_axt.py b/scripts/lav_to_axt.py index 1b19b9b2..fd0cb6fa 100755 --- a/scripts/lav_to_axt.py +++ b/scripts/lav_to_axt.py @@ -33,7 +33,7 @@ def main(): for arg in sys.argv[1:]: if "=" in arg: ix = arg.find("=") - pathSubs.append((arg[:ix], arg[ix+1:])) + pathSubs.append((arg[:ix], arg[ix + 1 :])) elif arg == "--silent": silent = True else: diff --git a/scripts/lav_to_maf.py b/scripts/lav_to_maf.py index 27d20ac8..1a61f8c6 100755 --- a/scripts/lav_to_maf.py +++ b/scripts/lav_to_maf.py @@ -31,7 +31,7 @@ def main(): for arg in sys.argv[1:]: if "=" in arg: ix = arg.find("=") - pathSubs.append((arg[:ix], arg[ix+1:])) + pathSubs.append((arg[:ix], arg[ix + 1 :])) elif arg == "--silent": silent = True else: diff --git a/scripts/line_select.py b/scripts/line_select.py index 9cd838e3..bc00fd0f 100755 --- a/scripts/line_select.py +++ b/scripts/line_select.py @@ -26,7 +26,7 @@ def __main__(): for index, line in enumerate(sys.stdin): if feature_vector[index] == match: - print(line, end='') + print(line, end="") if __name__ == "__main__": diff --git a/scripts/lzop_build_offset_table.py b/scripts/lzop_build_offset_table.py index 8e32483c..80510752 100755 --- a/scripts/lzop_build_offset_table.py +++ b/scripts/lzop_build_offset_table.py @@ -58,8 +58,8 @@ def main(): f.get("!B") # level flags = f.get("!I") assert not (flags & F_H_FILTER), "LZOP filters not supported" - has_compressed_crc = (flags & F_CRC32_C or flags & F_ADLER32_C) - has_uncompressed_crc = (flags & F_CRC32_D or flags & F_ADLER32_D) + has_compressed_crc = flags & F_CRC32_C or flags & F_ADLER32_C + has_uncompressed_crc = flags & F_CRC32_D or flags & F_ADLER32_D f.get("!I") # mode f.get("!I") # time f.get("!I") # time_offset @@ -79,8 +79,7 @@ def main(): size = f.get("!I") if size == 0: break - assert not (expect_no_more), \ - "Encountered an undersized block that was not the last block" + assert not (expect_no_more), "Encountered an undersized block that was not the last block" if block_size is None: print("s", size) block_size = size @@ -94,8 +93,7 @@ def main(): f.get("!I") # compressed_crc print("o", f.file.tell(), compressed_size, size) compressed_data = f.read(compressed_size) - assert len(compressed_data) == compressed_size, \ - "EOF reading compressed data" + assert len(compressed_data) == compressed_size, "EOF reading compressed data" if __name__ == "__main__": diff --git a/scripts/mMK_bitset.py b/scripts/mMK_bitset.py index 4cb9a8fe..545215f9 100644 --- a/scripts/mMK_bitset.py +++ b/scripts/mMK_bitset.py @@ -26,7 +26,7 @@ def main(): step_size = int(args[4]) if options.outfile is not None: - out_file = open(options.outfile, 'w') + out_file = open(options.outfile, "w") # Generate snp and neutral bitsets AR_snp_bitsets = binned_bitsets_from_file(open(snp_filename)) @@ -45,7 +45,7 @@ def main(): continue # Chromosome, start, and stop of reference species alignment - chr = comp1.src.split('.')[1] + chr = comp1.src.split(".")[1] start = comp1.start # Get or create bitset for this chromosome @@ -58,9 +58,9 @@ def main(): # Iterate over text and set diverged bit pos = start for ch1, ch2 in zip(comp1.text.upper(), comp2.text.upper()): - if ch1 == '-': + if ch1 == "-": continue - if ch2 == '-': + if ch2 == "-": pos += 1 continue @@ -68,12 +68,6 @@ def main(): bitset.set(pos) pos += 1 - # Debugging Code -# for chr in AR_div_bitsets: -# for pos in range(0, AR_div_bitsets[chr].size): -# if AR_div_bitsets[pos]: -# print >> sys.stderr, chr, pos, pos+1 - # Copy div and snp bitsets nonAR_snp_bitsets = dict() for chr in AR_snp_bitsets: @@ -119,9 +113,15 @@ def main(): MK_pval = MK_fisher_pvalue(nonAR_snp, nonAR_div, AR_snp, AR_div) if options.outfile is not None: - out_file.write("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%1.15f\n" % (chr, window, window+window_size, nonAR_snp, nonAR_div, AR_snp, AR_div, MK_pval)) + out_file.write( + "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%1.15f\n" + % (chr, window, window + window_size, nonAR_snp, nonAR_div, AR_snp, AR_div, MK_pval) + ) else: - print("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%1.15f" % (chr, window, window+window_size, nonAR_snp, nonAR_div, AR_snp, AR_div, MK_pval)) + print( + "%s\t%d\t%d\t%d\t%d\t%d\t%d\t%1.15f" + % (chr, window, window + window_size, nonAR_snp, nonAR_div, AR_snp, AR_div, MK_pval) + ) if options.outfile is not None: out_file.close() @@ -134,14 +134,14 @@ def MK_fisher_pvalue(win_snp, win_div, AR_snp, AR_div): fisher_result = r.fisher_test(r.matrix(r.c([win_snp, win_div, AR_snp, AR_div]), nr=2)) - return fisher_result['p.value'] + return fisher_result["p.value"] def MK_chi_pvalue(win_snp, win_div, AR_snp, AR_div): chi_result = r.chisq_test(r.matrix(r.c([win_snp, win_div, AR_snp, AR_div]), nr=2)) - return chi_result['p.value'] + return chi_result["p.value"] main() diff --git a/scripts/maf_build_index.py b/scripts/maf_build_index.py index 5dd1f831..e8843e48 100755 --- a/scripts/maf_build_index.py +++ b/scripts/maf_build_index.py @@ -28,8 +28,7 @@ def main(): if maf_file.endswith(".bz2"): table_file = maf_file + "t" if not os.path.exists(table_file): - doc_optparse.exit("To index bz2 compressed files first " - "create a bz2t file with bzip-table.") + doc_optparse.exit("To index bz2 compressed files first " "create a bz2t file with bzip-table.") # Open with SeekableBzip2File so we have tell support maf_in = SeekableBzip2File(maf_file, table_file) # Strip .bz2 from the filename before adding ".index" @@ -37,8 +36,9 @@ def main(): elif maf_file.endswith(".lzo"): table_file = maf_file + "t" if not os.path.exists(table_file): - doc_optparse.exit("To index lzo compressed files first " - "create a lzot file with lzop_build_offset_table.") + doc_optparse.exit( + "To index lzo compressed files first " "create a lzot file with lzop_build_offset_table." + ) # Open with SeekableBzip2File so we have tell support maf_in = SeekableLzopFile(maf_file, table_file) # Strip .lzo from the filename before adding ".index" @@ -69,11 +69,11 @@ def main(): if block is None: break for c in block.components: - if species is not None and c.src.split('.')[0] not in species: + if species is not None and c.src.split(".")[0] not in species: continue indexes.add(c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size) - out = open(index_file, 'wb') + out = open(index_file, "wb") indexes.write(out) out.close() diff --git a/scripts/maf_chunk.py b/scripts/maf_chunk.py index f29bfc34..cdb21e6d 100755 --- a/scripts/maf_chunk.py +++ b/scripts/maf_chunk.py @@ -24,8 +24,7 @@ def __main__(): parser = OptionParser("usage: %prog chunk_size out_dir") - parser.add_option("--prob", action="store", default=None, type="float", - help="Probability of writing a given chunk") + parser.add_option("--prob", action="store", default=None, type="float", help="Probability of writing a given chunk") (options, args) = parser.parse_args() diff --git a/scripts/maf_col_counts.py b/scripts/maf_col_counts.py index 850179e6..8a28515d 100755 --- a/scripts/maf_col_counts.py +++ b/scripts/maf_col_counts.py @@ -25,7 +25,7 @@ else: nspecies = len(block.components) # Increment count for each column - for col in zip(* [iter(comp.text.upper()) for comp in block.components]): + for col in zip(*[iter(comp.text.upper()) for comp in block.components]): try: counts[col] += 1 except Exception: diff --git a/scripts/maf_col_counts_all.py b/scripts/maf_col_counts_all.py index f921af70..64afd67e 100755 --- a/scripts/maf_col_counts_all.py +++ b/scripts/maf_col_counts_all.py @@ -33,8 +33,8 @@ else: nspecies = len(block.components) # Increment count for each column - for col in zip(* [iter(comp.text.upper()) for comp in block.components]): - col = ''.join(col) + for col in zip(*[iter(comp.text.upper()) for comp in block.components]): + col = "".join(col) try: counts[col] += 1 except Exception: @@ -55,7 +55,7 @@ nucs += "*" for col in cross_lists(*([nucs] * nspecies)): - col = ''.join(col) + col = "".join(col) if wildcard and col.count("*") > max_wildcard: continue if col.count("-") == nspecies: diff --git a/scripts/maf_count.py b/scripts/maf_count.py index 64488b55..8f8450e3 100755 --- a/scripts/maf_count.py +++ b/scripts/maf_count.py @@ -52,7 +52,7 @@ def __main__(): count += m.text_size elif action == "bases": if skip: - count += (m.components[ref].size - m.components[ref].text.count(skip)) + count += m.components[ref].size - m.components[ref].text.count(skip) else: count += m.components[ref].size diff --git a/scripts/maf_covered_ranges.py b/scripts/maf_covered_ranges.py index 25d10203..8df8f6aa 100755 --- a/scripts/maf_covered_ranges.py +++ b/scripts/maf_covered_ranges.py @@ -17,7 +17,7 @@ def main(): options, args = doc_optparse.parse(__doc__) try: - species = args[0].split(',') + species = args[0].split(",") nrequired = int(args[1]) except Exception: doc_optparse.exit() @@ -30,7 +30,7 @@ def main(): for m in maf_reader: ref = m.components[0] # Does this alignment have enough of the required species - if nrequired <= len([comp for comp in m.components if comp.src.split('.')[0] in species]): + if nrequired <= len([comp for comp in m.components if comp.src.split(".")[0] in species]): if interval_start is None: interval_start = ref.start interval_end = ref.end @@ -39,12 +39,12 @@ def main(): interval_end = ref.end else: if interval_end - interval_start >= MIN: - print(ref.src.split('.')[1], interval_start, interval_end) + print(ref.src.split(".")[1], interval_start, interval_end) interval_start = ref.start interval_end = ref.end else: if interval_start is not None and interval_end - interval_start >= MIN: - print(ref.src.split('.')[1], interval_start, interval_end) + print(ref.src.split(".")[1], interval_start, interval_end) interval_start = None interval_end = None diff --git a/scripts/maf_covered_regions.py b/scripts/maf_covered_regions.py index 02dd6371..0099e881 100755 --- a/scripts/maf_covered_regions.py +++ b/scripts/maf_covered_regions.py @@ -22,14 +22,14 @@ def block_pid(comp1, comp2): t2 = comp2.text.lower() for i in range(0, len(t1)): a, b = t1[i], t2[i] - if a == '-' or b == '-': + if a == "-" or b == "-": continue elif a == b: match += 1 total += 1 if total == 0: return None - return (match / total) + return match / total def main(): @@ -38,18 +38,27 @@ def main(): out_files = dict() for block in bx.align.maf.Reader(sys.stdin): ref_comp = block.components[0] - ref_chrom = ref_comp.src.split('.')[1] + ref_chrom = ref_comp.src.split(".")[1] for comp in block.components[1:]: - comp_species, comp_chrom = comp.src.split('.')[:2] + comp_species, comp_chrom = comp.src.split(".")[:2] if comp_species not in out_files: f = open(f"{out_prefix}{comp_species}.bed", "w") out_files[comp_species] = f pid = block_pid(ref_comp, comp) if pid: out_files[comp_species].write( - "%s\t%d\t%d\t%s:%d-%d,%s\t%f\n" % - (ref_chrom, ref_comp.forward_strand_start, ref_comp.forward_strand_end, - comp_chrom, comp.start, comp.end, comp.strand, pid)) + "%s\t%d\t%d\t%s:%d-%d,%s\t%f\n" + % ( + ref_chrom, + ref_comp.forward_strand_start, + ref_comp.forward_strand_end, + comp_chrom, + comp.start, + comp.end, + comp.strand, + pid, + ) + ) for f in out_files.values(): f.close() diff --git a/scripts/maf_div_sites.py b/scripts/maf_div_sites.py index 35794744..a659a4d6 100755 --- a/scripts/maf_div_sites.py +++ b/scripts/maf_div_sites.py @@ -23,7 +23,7 @@ def main(): if not ref or not other: continue - ref_chrom = ref.src.split('.')[1] + ref_chrom = ref.src.split(".")[1] ref_start = ref.start chrom_size = ref.get_src_size() @@ -32,9 +32,9 @@ def main(): pos = ref_start for i, j in zip(ref.text.upper(), other.text.upper()): - if i != '-': + if i != "-": if i != j: # mismatch - if i != 'N' and j != 'N' and j != '-': + if i != "N" and j != "N" and j != "-": # set if all valid chars bitsets[ref_chrom].set(pos) pos += 1 diff --git a/scripts/maf_extract_chrom_ranges.py b/scripts/maf_extract_chrom_ranges.py index 9944c1e1..07c252f4 100755 --- a/scripts/maf_extract_chrom_ranges.py +++ b/scripts/maf_extract_chrom_ranges.py @@ -62,7 +62,7 @@ def __main__(): for maf in bx.align.maf.Reader(sys.stdin): if refname: - sourcenames = [cmp.src.split('.')[0] for cmp in maf.components] + sourcenames = [cmp.src.split(".")[0] for cmp in maf.components] try: refindex = sourcenames.index(refname) except Exception: diff --git a/scripts/maf_extract_ranges_indexed.py b/scripts/maf_extract_ranges_indexed.py index ee60f58c..bff7b864 100755 --- a/scripts/maf_extract_ranges_indexed.py +++ b/scripts/maf_extract_ranges_indexed.py @@ -86,7 +86,7 @@ def main(): blocks = index.get(src, start, end) # Open file if needed if dir: - out = bx.align.maf.Writer(open(os.path.join(dir, "%s:%09d-%09d.maf" % (src, start, end)), 'w')) + out = bx.align.maf.Writer(open(os.path.join(dir, "%s:%09d-%09d.maf" % (src, start, end)), "w")) # Write each intersecting block if chop: for block in blocks: diff --git a/scripts/maf_filter.py b/scripts/maf_filter.py index dfe3592f..9792d329 100755 --- a/scripts/maf_filter.py +++ b/scripts/maf_filter.py @@ -35,7 +35,7 @@ def __main__(): # Compile expression for SPEED if expr: - expr = compile(expr, '', 'eval') + expr = compile(expr, "", "eval") maf_reader = maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = maf.Writer(sys.stdout) diff --git a/scripts/maf_filter_max_wc.py b/scripts/maf_filter_max_wc.py index 322e495d..d3529d50 100755 --- a/scripts/maf_filter_max_wc.py +++ b/scripts/maf_filter_max_wc.py @@ -26,7 +26,7 @@ def main(): for m in maf_reader: good = 0 for col in m.column_iter(): - if col.count('*') <= min_species: + if col.count("*") <= min_species: good += 1 if good >= min_good: maf_writer.write(m) diff --git a/scripts/maf_gap_frequency.py b/scripts/maf_gap_frequency.py index 9558e377..3fd4f737 100755 --- a/scripts/maf_gap_frequency.py +++ b/scripts/maf_gap_frequency.py @@ -16,7 +16,7 @@ def main(): for m in bx.align.maf.Reader(sys.stdin): gaps = 0 for col in m.column_iter(): - if '-' in col: + if "-" in col: gaps += 1 print(gaps / m.text_size) diff --git a/scripts/maf_gc_content.py b/scripts/maf_gc_content.py index b0f897b8..d2973838 100755 --- a/scripts/maf_gc_content.py +++ b/scripts/maf_gc_content.py @@ -19,11 +19,11 @@ def __main__(): gc = 0 bases = 0 for c in m.components: - gc += c.text.count('G') - gc += c.text.count('C') - gc += c.text.count('g') - gc += c.text.count('c') - bases += (len(c.text) - c.text.count('-')) + gc += c.text.count("G") + gc += c.text.count("C") + gc += c.text.count("g") + gc += c.text.count("c") + bases += len(c.text) - c.text.count("-") print(gc / bases) diff --git a/scripts/maf_interval_alignibility.py b/scripts/maf_interval_alignibility.py index cbdfba18..635c7a1c 100755 --- a/scripts/maf_interval_alignibility.py +++ b/scripts/maf_interval_alignibility.py @@ -31,13 +31,11 @@ def main(): except Exception: doc_optparse.exit() # Open indexed access to mafs - index = bx.align.maf.MultiIndexed(maf_files, - parse_e_rows=True, - use_cache=use_cache) + index = bx.align.maf.MultiIndexed(maf_files, parse_e_rows=True, use_cache=use_cache) # Print header - print("#chr", "start", "end", end=' ') + print("#chr", "start", "end", end=" ") for s in species: - print(s, end=' ') + print(s, end=" ") print() # Iterate over input ranges for line in sys.stdin: @@ -61,8 +59,7 @@ def main(): # Determine the piece of the human interval this block covers, # relative to the start of the interval of interest ref = block.get_component_by_src(src) - assert ref.strand == "+", \ - "Reference species blocks must be on '+' strand" + assert ref.strand == "+", "Reference species blocks must be on '+' strand" rel_start = max(start, ref.start) - start rel_end = min(end, ref.end) - start # Check alignability for each species @@ -82,7 +79,7 @@ def main(): else: aligned_bits[i][rel_start:rel_end] = True # Now determine the total alignment coverage of each interval - print(chr, start, end, end=' ') + print(chr, start, end, end=" ") for i, s in enumerate(species): aligned = sum(aligned_bits[i]) missing = sum(missing_bits[i]) @@ -90,11 +87,11 @@ def main(): # present, or more than 100bp and less that 50bp present (yes, # arbitrary) if length < 100 and missing > (length / 2): - print("NA", end=' ') + print("NA", end=" ") elif length >= 100 and missing > 50: - print("NA", end=' ') + print("NA", end=" ") else: - print(aligned / (length - missing), end=' ') + print(aligned / (length - missing), end=" ") print() diff --git a/scripts/maf_limit_to_species.py b/scripts/maf_limit_to_species.py index 0ed9b42b..0733665c 100755 --- a/scripts/maf_limit_to_species.py +++ b/scripts/maf_limit_to_species.py @@ -14,7 +14,7 @@ def main(): - species = sys.argv[1].split(',') + species = sys.argv[1].split(",") maf_reader = bx.align.maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = bx.align.maf.Writer(sys.stdout) @@ -22,7 +22,7 @@ def main(): for m in maf_reader: new_components = [] for comp in m.components: - if comp.src.split('.')[0] in species: + if comp.src.split(".")[0] in species: new_components.append(comp) m.components = new_components m.remove_all_gap_columns() diff --git a/scripts/maf_mapping_word_frequency.py b/scripts/maf_mapping_word_frequency.py index f49b92cc..1329c868 100755 --- a/scripts/maf_mapping_word_frequency.py +++ b/scripts/maf_mapping_word_frequency.py @@ -12,9 +12,7 @@ import sys -from numpy import ( - zeros -) +from numpy import zeros import bx.align.maf from bx import seqmapping @@ -33,15 +31,15 @@ def main(): ints = alpha_map.translate(ints) # Count words radix = alpha_map.get_out_size() - counts = zeros(radix ** word_length, int) + counts = zeros(radix**word_length, int) total = 0 for i in range(word_length, len(ints)): index = 0 factor = 1 skip = False for j in range(word_length): - assert 0 < i-j < len(ints) - letter = ints[i-j] + assert 0 < i - j < len(ints) + letter = ints[i - j] if letter < 0: skip = True break @@ -53,7 +51,7 @@ def main(): counts[index] += 1 total += 1 # Write ints separated by tabs - print('\t'.join([str(total)] + [str(_) for _ in counts])) + print("\t".join([str(total)] + [str(_) for _ in counts])) if __name__ == "__main__": diff --git a/scripts/maf_mask_cpg.py b/scripts/maf_mask_cpg.py index 8478f551..3e0c8550 100644 --- a/scripts/maf_mask_cpg.py +++ b/scripts/maf_mask_cpg.py @@ -36,7 +36,7 @@ def main(): cpgfilter = bx.align.sitemask.cpg.Inclusive(mask=mask) cpgfilter.run(reader, writer.write) - print(str(float(cpgfilter.masked)/float(cpgfilter.total) * 100) + "% bases masked.", file=sys.stderr) + print(str(float(cpgfilter.masked) / float(cpgfilter.total) * 100) + "% bases masked.", file=sys.stderr) if __name__ == "__main__": diff --git a/scripts/maf_mean_length_ungapped_piece.py b/scripts/maf_mean_length_ungapped_piece.py index 17c7cda7..85f9ac10 100755 --- a/scripts/maf_mean_length_ungapped_piece.py +++ b/scripts/maf_mean_length_ungapped_piece.py @@ -21,7 +21,7 @@ def main(): in_ungapped = False for col in m.column_iter(): - is_gap = ('-' in col) + is_gap = "-" in col if not is_gap: ungapped_columns += 1 if in_ungapped and is_gap: diff --git a/scripts/maf_percent_identity.py b/scripts/maf_percent_identity.py index c6ab6249..6a92a6e3 100755 --- a/scripts/maf_percent_identity.py +++ b/scripts/maf_percent_identity.py @@ -25,7 +25,7 @@ def __main__(): for i in range(0, m.text_size): a = m.components[0].text[i].lower() b = m.components[1].text[i].lower() - if a == '-' or b == '-': + if a == "-" or b == "-": continue elif a == b: match += 1 diff --git a/scripts/maf_print_chroms.py b/scripts/maf_print_chroms.py index 54643ea5..02b65b08 100755 --- a/scripts/maf_print_chroms.py +++ b/scripts/maf_print_chroms.py @@ -31,7 +31,7 @@ def __main__(): for m in maf_reader: c = m.components[refindex].src - print(c[c.rfind("chr") + 3:]) + print(c[c.rfind("chr") + 3 :]) if __name__ == "__main__": diff --git a/scripts/maf_print_scores.py b/scripts/maf_print_scores.py index c79f5878..964be40e 100755 --- a/scripts/maf_print_scores.py +++ b/scripts/maf_print_scores.py @@ -14,7 +14,10 @@ import sys -from bx.align import maf, score +from bx.align import ( + maf, + score, +) from bx.cookbook import doc_optparse @@ -29,11 +32,16 @@ def main(): except Exception: doc_optparse.exit() - hox70 = score.build_scoring_scheme(""" A C G T + hox70 = score.build_scoring_scheme( + """ A C G T 91 -114 -31 -123 -114 100 -125 -31 -31 -125 100 -114 - -123 -31 -114 91 """, 400, 30, default=0) + -123 -31 -114 91 """, + 400, + 30, + default=0, + ) maf_reader = maf.Reader(sys.stdin) diff --git a/scripts/maf_region_coverage_by_src.py b/scripts/maf_region_coverage_by_src.py index 3d1a6395..a99d022e 100755 --- a/scripts/maf_region_coverage_by_src.py +++ b/scripts/maf_region_coverage_by_src.py @@ -56,10 +56,10 @@ def __main__(): length = overlap_end - overlap_start assert length > 0 for c in block.components[1:]: - species = c.src.split('.')[0] + species = c.src.split(".")[0] coverage[species] += length - print(line, end=' ') + print(line, end=" ") for key, value in coverage.items(): print(" ", key.ljust(10), "%0.2f" % (value / total_length)) diff --git a/scripts/maf_species_in_all_files.py b/scripts/maf_species_in_all_files.py index 559d1598..808b0ebd 100755 --- a/scripts/maf_species_in_all_files.py +++ b/scripts/maf_species_in_all_files.py @@ -20,7 +20,7 @@ s = set() for block in bx.align.maf.Reader(open(file)): for comp in block.components: - s.add(comp.src.split('.')[0]) + s.add(comp.src.split(".")[0]) sets.append(s) inter = reduce(operator.and_, sets) diff --git a/scripts/maf_split_by_src.py b/scripts/maf_split_by_src.py index eebf4998..ea5803de 100755 --- a/scripts/maf_split_by_src.py +++ b/scripts/maf_split_by_src.py @@ -19,7 +19,6 @@ import bx.align.maf - INF = "inf" @@ -44,7 +43,7 @@ def __main__(): for m in maf_reader: if comp is None: - writer_key = string.join([c.src for c in m.components], '_') + writer_key = string.join([c.src for c in m.components], "_") else: writer_key = m.components[comp].src diff --git a/scripts/maf_thread_for_species.py b/scripts/maf_thread_for_species.py index 5a287624..8c732945 100755 --- a/scripts/maf_thread_for_species.py +++ b/scripts/maf_thread_for_species.py @@ -14,9 +14,7 @@ import bx.align.maf from bx.align.tools.fuse import FusingAlignmentWriter -from bx.align.tools.thread import ( - get_components_for_species -) +from bx.align.tools.thread import get_components_for_species from bx.cookbook import doc_optparse @@ -27,8 +25,8 @@ def main(): try: species = args # Allow a comma separated list, TODO: allow a newick format tree - if len(species) == 1 and ',' in species[0]: - species = species[0].split(',') + if len(species) == 1 and "," in species[0]: + species = species[0].split(",") fuse = not (bool(options.nofuse)) except Exception: doc_optparse.exit() diff --git a/scripts/maf_tile.py b/scripts/maf_tile.py index 8e299845..e17a0af7 100755 --- a/scripts/maf_tile.py +++ b/scripts/maf_tile.py @@ -48,7 +48,7 @@ def main(): def load_seq_db(fname): db = {} for line in open(fname): - fields = line.split(',') + fields = line.split(",") src = fields[1] + "." + fields[2] seq = fields[4] db[src] = seq.strip() @@ -57,7 +57,9 @@ def load_seq_db(fname): def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data): - assert sources[0].split('.')[0] == ref_src.split('.')[0], "{} != {}".format(sources[0].split('.')[0], ref_src.split('.')[0]) + assert sources[0].split(".")[0] == ref_src.split(".")[0], "{} != {}".format( + sources[0].split(".")[0], ref_src.split(".")[0] + ) base_len = end - start @@ -75,7 +77,7 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data): slice_start = max(start, ref.start) slice_end = min(end, ref.end) for j in range(slice_start, slice_end): - mask[j-start] = i + mask[j - start] = i tiled = [] for i in range(len(sources)): @@ -83,7 +85,7 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data): for ss, ee, index in intervals_from_mask(mask): if index < 0: - tiled[0].append(bx.seq.nib.NibFile(open(seq_db[ref_src])).get(start+ss, ee-ss)) + tiled[0].append(bx.seq.nib.NibFile(open(seq_db[ref_src])).get(start + ss, ee - ss)) for row in tiled[1:]: if missing_data: row.append("*" * (ee - ss)) @@ -114,7 +116,7 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data): if i == 0: if ref_src_size is None: ref_src_size = bx.seq.nib.NibFile(open(seq_db[ref_src])).length - c = align.Component(ref_src, start, end-start, "+", ref_src_size, text) + c = align.Component(ref_src, start, end - start, "+", ref_src_size, text) else: c = align.Component(name + ".fake", 0, size, "?", size, text) a.add_component(c) diff --git a/scripts/maf_tile_2.py b/scripts/maf_tile_2.py index 0b757028..bfeecb97 100755 --- a/scripts/maf_tile_2.py +++ b/scripts/maf_tile_2.py @@ -60,7 +60,7 @@ def main(): if use_strand and len(fields) > 5: strand = fields[5] else: - strand = '+' + strand = "+" do_interval(sources, index, out, ref_src, int(start), int(end), seq_db, missing_data, strand) out.close() @@ -69,7 +69,7 @@ def main(): def load_seq_db(fname): db = {} for line in open(fname): - fields = line.split(',') + fields = line.split(",") src = fields[1] + "." + fields[2] seq = fields[4] db[src] = seq.strip() @@ -84,8 +84,12 @@ def get_fill_char(maf_status): # assert maf_status not in (maf.MAF_CONTIG_NESTED_STATUS, maf.MAF_NEW_NESTED_STATUS, # maf.MAF_MAYBE_NEW_NESTED_STATUS ), \ # "Nested rows do not make sense in a single coverage MAF (or do they?)" - if maf_status in (maf.MAF_NEW_STATUS, maf.MAF_MAYBE_NEW_STATUS, - maf.MAF_NEW_NESTED_STATUS, maf.MAF_MAYBE_NEW_NESTED_STATUS): + if maf_status in ( + maf.MAF_NEW_STATUS, + maf.MAF_MAYBE_NEW_STATUS, + maf.MAF_NEW_NESTED_STATUS, + maf.MAF_MAYBE_NEW_NESTED_STATUS, + ): return "*" elif maf_status in (maf.MAF_INVERSE_STATUS, maf.MAF_INSERT_STATUS): return "=" @@ -105,7 +109,7 @@ def guess_fill_char(left_comp, right_comp): return "*" # First check that the blocks have the same src (not just species) and # orientation - if (left_comp.src == right_comp.src and left_comp.strand != right_comp.strand): + if left_comp.src == right_comp.src and left_comp.strand != right_comp.strand: # Are they completely contiguous? Easy to call that a gap if left_comp.end == right_comp.start: return "-" @@ -125,7 +129,7 @@ def remove_all_gap_columns(texts): while i < text_size: all_gap = True for seq in seqs: - if seq[i] not in ('-', '#', '*', '=', 'X', '@'): + if seq[i] not in ("-", "#", "*", "=", "X", "@"): all_gap = False if all_gap: for seq in seqs: @@ -133,7 +137,7 @@ def remove_all_gap_columns(texts): text_size -= 1 else: i += 1 - return [''.join(s) for s in seqs] + return ["".join(s) for s in seqs] def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, strand): @@ -144,8 +148,10 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, """ ref_src_size = None # Make sure the reference component is also the first in the source list - assert sources[0].split('.')[0] == ref_src.split('.')[0], "%s != %s" \ - % (sources[0].split('.')[0], ref_src.split('.')[0]) + assert sources[0].split(".")[0] == ref_src.split(".")[0], "%s != %s" % ( + sources[0].split(".")[0], + ref_src.split(".")[0], + ) # Counter for the last reference species base we have processed last_stop = start # Rows in maf blocks come in in arbitrary order, we'll convert things @@ -174,10 +180,10 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, block = block.slice_by_component(ref, max(start, ref.start), min(end, ref.end)) ref = block.get_component_by_src_start(ref_src) # print block - assert last_components[0] is None or ref.start >= last_components[0].end, \ - "MAF must be sorted and single coverage in reference species!" - assert ref.strand == "+", \ - "MAF must have all reference species blocks on the plus strand" + assert ( + last_components[0] is None or ref.start >= last_components[0].end + ), "MAF must be sorted and single coverage in reference species!" + assert ref.strand == "+", "MAF must have all reference species blocks on the plus strand" # Store the size of the reference sequence for building fake block if ref_src_size is None: ref_src_size = ref.src_size @@ -217,12 +223,13 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, fill_char = guess_fill_char(last_components[source_index], comp) else: fill_char = get_fill_char(left_status) - tiled_rows[source_index] += (fill_char * cols_to_fill) + tiled_rows[source_index] += fill_char * cols_to_fill cols_needing_fill[source_index] = 0 # Okay, filled up to current position, now append the text tiled_rows[source_index] += comp.text - assert len(tiled_rows[source_index]) == len(tiled_rows[0]), \ - "length of tiled row should match reference row" + assert len(tiled_rows[source_index]) == len( + tiled_rows[0] + ), "length of tiled row should match reference row" last_components[source_index] = comp last_status[source_index] = right_status else: @@ -244,15 +251,14 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, if fill_needed > 0: if last_components[source_index] is None: # print >>sys.stderr, "Never saw any components for %s, filling with @" % source - fill_char = '@' + fill_char = "@" else: if last_status[source_index] is None: - fill_char = '*' + fill_char = "*" else: fill_char = get_fill_char(last_status[source_index]) tiled_rows[source_index] += fill_char * fill_needed - assert len(tiled_rows[source_index]) == len(tiled_rows[0]), \ - "length of tiled row should match reference row" + assert len(tiled_rows[source_index]) == len(tiled_rows[0]), "length of tiled row should match reference row" # Okay, now make up the fake alignment from the tiled rows. tiled_rows = remove_all_gap_columns(tiled_rows) a = align.Alignment() @@ -262,11 +268,11 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, if i == 0: if ref_src_size is None: ref_src_size = bx.seq.nib.NibFile(open(seq_db[ref_src])).length - c = align.Component(ref_src, start, end-start, "+", ref_src_size, text) + c = align.Component(ref_src, start, end - start, "+", ref_src_size, text) else: c = align.Component(name + ".fake", 0, size, "?", size, text) a.add_component(c) - if strand == '-': + if strand == "-": a = a.reverse_complement() out.write(a) diff --git a/scripts/maf_tile_2bit.py b/scripts/maf_tile_2bit.py index abc1e5c6..11a71b52 100755 --- a/scripts/maf_tile_2bit.py +++ b/scripts/maf_tile_2bit.py @@ -58,7 +58,7 @@ def main(): if use_strand and len(fields) > 5: strand = fields[5] else: - strand = '+' + strand = "+" do_interval(sources, index, out, ref_src, int(start), int(end), ref_2bit, missing_data, strand) out.close() @@ -72,8 +72,12 @@ def get_fill_char(maf_status): # assert maf_status not in (maf.MAF_CONTIG_NESTED_STATUS, maf.MAF_NEW_NESTED_STATUS, # maf.MAF_MAYBE_NEW_NESTED_STATUS ), \ # "Nested rows do not make sense in a single coverage MAF (or do they?)" - if maf_status in (maf.MAF_NEW_STATUS, maf.MAF_MAYBE_NEW_STATUS, - maf.MAF_NEW_NESTED_STATUS, maf.MAF_MAYBE_NEW_NESTED_STATUS): + if maf_status in ( + maf.MAF_NEW_STATUS, + maf.MAF_MAYBE_NEW_STATUS, + maf.MAF_NEW_NESTED_STATUS, + maf.MAF_MAYBE_NEW_NESTED_STATUS, + ): return "*" elif maf_status in (maf.MAF_INVERSE_STATUS, maf.MAF_INSERT_STATUS): return "=" @@ -113,7 +117,7 @@ def remove_all_gap_columns(texts): while i < text_size: all_gap = True for seq in seqs: - if seq[i] not in ('-', '#', '*', '=', 'X', '@'): + if seq[i] not in ("-", "#", "*", "=", "X", "@"): all_gap = False if all_gap: for seq in seqs: @@ -121,7 +125,7 @@ def remove_all_gap_columns(texts): text_size -= 1 else: i += 1 - return [''.join(s) for s in seqs] + return ["".join(s) for s in seqs] def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data, strand): @@ -132,12 +136,14 @@ def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data """ ref_src_size = None # Make sure the reference component is also the first in the source list - assert sources[0].split('.')[0] == ref_src.split('.')[0], "%s != %s" \ - % (sources[0].split('.')[0], ref_src.split('.')[0]) + assert sources[0].split(".")[0] == ref_src.split(".")[0], "%s != %s" % ( + sources[0].split(".")[0], + ref_src.split(".")[0], + ) # Extract non-species part from ref_src for grabbing sequence ref_chr = ref_src if "." in ref_src: - ref_chr = ref_src[ref_src.index(".")+1:] + ref_chr = ref_src[ref_src.index(".") + 1 :] # Counter for the last reference species base we have processed last_stop = start # Rows in maf blocks come in in arbitrary order, we'll convert things @@ -166,10 +172,10 @@ def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data block = block.slice_by_component(ref, max(start, ref.start), min(end, ref.end)) ref = block.get_component_by_src_start(ref_src) # print block - assert last_components[0] is None or ref.start >= last_components[0].end, \ - "MAF must be sorted and single coverage in reference species!" - assert ref.strand == "+", \ - "MAF must have all reference species blocks on the plus strand" + assert ( + last_components[0] is None or ref.start >= last_components[0].end + ), "MAF must be sorted and single coverage in reference species!" + assert ref.strand == "+", "MAF must have all reference species blocks on the plus strand" # Store the size of the reference sequence for building fake block if ref_src_size is None: ref_src_size = ref.src_size @@ -209,12 +215,13 @@ def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data fill_char = guess_fill_char(last_components[source_index], comp) else: fill_char = get_fill_char(left_status) - tiled_rows[source_index] += (fill_char * cols_to_fill) + tiled_rows[source_index] += fill_char * cols_to_fill cols_needing_fill[source_index] = 0 # Okay, filled up to current position, now append the text tiled_rows[source_index] += comp.text - assert len(tiled_rows[source_index]) == len(tiled_rows[0]), \ - "length of tiled row should match reference row" + assert len(tiled_rows[source_index]) == len( + tiled_rows[0] + ), "length of tiled row should match reference row" last_components[source_index] = comp last_status[source_index] = right_status else: @@ -236,15 +243,14 @@ def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data if fill_needed > 0: if last_components[source_index] is None: # print >>sys.stderr, "Never saw any components for %s, filling with @" % source - fill_char = '@' + fill_char = "@" else: if last_status[source_index] is None: - fill_char = '*' + fill_char = "*" else: fill_char = get_fill_char(last_status[source_index]) tiled_rows[source_index] += fill_char * fill_needed - assert len(tiled_rows[source_index]) == len(tiled_rows[0]), \ - "length of tiled row should match reference row" + assert len(tiled_rows[source_index]) == len(tiled_rows[0]), "length of tiled row should match reference row" # Okay, now make up the fake alignment from the tiled rows. tiled_rows = remove_all_gap_columns(tiled_rows) a = align.Alignment() @@ -254,11 +260,11 @@ def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data if i == 0: if ref_src_size is None: ref_src_size = ref_2bit[ref_chr].length - c = align.Component(ref_src, start, end-start, "+", ref_src_size, text) + c = align.Component(ref_src, start, end - start, "+", ref_src_size, text) else: c = align.Component(name + ".fake", 0, size, "?", size, text) a.add_component(c) - if strand == '-': + if strand == "-": a = a.reverse_complement() out.write(a) diff --git a/scripts/maf_to_concat_fasta.py b/scripts/maf_to_concat_fasta.py index e5c88f9a..635c6d81 100755 --- a/scripts/maf_to_concat_fasta.py +++ b/scripts/maf_to_concat_fasta.py @@ -27,7 +27,7 @@ def __main__(): species = [] for arg in args: - species.extend(arg.split(',')) + species.extend(arg.split(",")) fill = "" if options.fill: @@ -63,7 +63,7 @@ def print_n(s, n, f=sys.stdout): else: p = 0 while p < len(s): - print(s[p:min(p+n, len(s))], file=f) + print(s[p : min(p + n, len(s))], file=f) p += n diff --git a/scripts/maf_to_fasta.py b/scripts/maf_to_fasta.py index 4b49eb8f..7933b5e4 100755 --- a/scripts/maf_to_fasta.py +++ b/scripts/maf_to_fasta.py @@ -37,7 +37,7 @@ def __main__(): def print_n(s, n, f=sys.stdout): p = 0 while p < len(s): - print(s[p:min(p+n, len(s))], file=f) + print(s[p : min(p + n, len(s))], file=f) p += n diff --git a/scripts/maf_to_int_seqs.py b/scripts/maf_to_int_seqs.py index e4ab2ee4..09d5bec7 100755 --- a/scripts/maf_to_int_seqs.py +++ b/scripts/maf_to_int_seqs.py @@ -32,7 +32,7 @@ def main(): int_seq = alpha_map.translate(int_seq) # Write ints separated by spaces for i in int_seq: - print(i, end=' ') + print(i, end=" ") print() diff --git a/scripts/maf_word_frequency.py b/scripts/maf_word_frequency.py index a0dfdb9e..6415ebc1 100755 --- a/scripts/maf_word_frequency.py +++ b/scripts/maf_word_frequency.py @@ -31,7 +31,7 @@ def __main__(): for m in maf_reader: texts = [c.text.upper() for c in m.components] for i in range(m.text_size - motif_len): - motif = string.join([text[i: i + motif_len] for text in texts]) + motif = string.join(text[i : i + motif_len] for text in texts) if motif in big_map: big_map[motif] += 1 else: diff --git a/scripts/mask_quality.py b/scripts/mask_quality.py index 73656e12..243be269 100644 --- a/scripts/mask_quality.py +++ b/scripts/mask_quality.py @@ -70,9 +70,9 @@ def main(): if len(specieslist) != 2: print("AXT is pairwise only.") sys.exit() - reader = bx.align.axt.Reader(instream, species1=specieslist[0], - species2=specieslist[1], - species_to_lengths=species_to_lengths) + reader = bx.align.axt.Reader( + instream, species1=specieslist[0], species2=specieslist[1], species_to_lengths=species_to_lengths + ) elif outputformat == "maf": # load maf reader = bx.align.maf.Reader(instream, species_to_lengths=species_to_lengths) @@ -91,8 +91,8 @@ def main(): qualfilter.run(reader, writer.write) - print("For "+str(qualfilter.total)+" base pairs, "+str(qualfilter.masked)+" base pairs were masked.") - print(str(float(qualfilter.masked)/float(qualfilter.total) * 100)+"%") + print("For " + str(qualfilter.total) + " base pairs, " + str(qualfilter.masked) + " base pairs were masked.") + print(str(float(qualfilter.masked) / float(qualfilter.total) * 100) + "%") if __name__ == "__main__": diff --git a/scripts/out_to_chain.py b/scripts/out_to_chain.py index ed65a57a..2527b354 100755 --- a/scripts/out_to_chain.py +++ b/scripts/out_to_chain.py @@ -7,7 +7,10 @@ import numpy as np -from bx.align.epo import Chain, EPOitem +from bx.align.epo import ( + Chain, + EPOitem, +) from bx.cookbook import argparse logging.basicConfig(level=logging.INFO) @@ -15,9 +18,9 @@ def outFile(s): - if (s in ('-', 'stdout')) or (s is None): + if (s in ("-", "stdout")) or (s is None): return sys.stdout - return open(s, 'w') + return open(s, "w") def loadChrSizes(path): @@ -44,16 +47,22 @@ def convert_action(trg_comp, qr_comp, ts, qs, opt): log.warning(f"skipping chromosome/contig ({a.chrom}, {b.chrom})") -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser( description="""EPO alignments (.out) to .chain converter.""", epilog="Olgert Denas (Taylor Lab)", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) parser.add_argument("input", help="File to process.") - parser.add_argument("--species", nargs=2, default=["homo_sapiens", "mus_musculus"], help="Names of target and query species (respectively) in the alignment.") + parser.add_argument( + "--species", + nargs=2, + default=["homo_sapiens", "mus_musculus"], + help="Names of target and query species (respectively) in the alignment.", + ) parser.add_argument("--chrsizes", nargs=2, required=True, help="Chromosome sizes for the given species.") - parser.add_argument("-o", '--output', metavar="FILE", default='stdout', type=outFile, help="Output file") + parser.add_argument("-o", "--output", metavar="FILE", default="stdout", type=outFile, help="Output file") opt = parser.parse_args() diff --git a/scripts/prefix_lines.py b/scripts/prefix_lines.py index f5991641..10028719 100755 --- a/scripts/prefix_lines.py +++ b/scripts/prefix_lines.py @@ -7,4 +7,4 @@ import sys for line in sys.stdin: - print(sys.argv[1] + line, end=' ') + print(sys.argv[1] + line, end=" ") diff --git a/scripts/qv_to_bqv.py b/scripts/qv_to_bqv.py index b517d101..b72432ac 100644 --- a/scripts/qv_to_bqv.py +++ b/scripts/qv_to_bqv.py @@ -45,7 +45,7 @@ def main(): outfname = output_file + "." + region + ".bqv" print("Writing region " + region + " to file " + outfname) outfile = open(outfname, "wb") - outbin = BinnedArrayWriter(outfile, typecode='b', default=0) + outbin = BinnedArrayWriter(outfile, typecode="b", default=0) base_count = 0 mega_count = 0 else: @@ -57,7 +57,7 @@ def main(): outbin.write(outval) base_count += 1 if (mega_count * 1000000) <= base_count: - sys.stdout.write(str(mega_count)+" ") + sys.stdout.write(str(mega_count) + " ") sys.stdout.flush() mega_count = base_count // 1000000 + 1 if outbin and outfile: diff --git a/scripts/random_lines.py b/scripts/random_lines.py index 4770f22c..230e8a42 100755 --- a/scripts/random_lines.py +++ b/scripts/random_lines.py @@ -13,4 +13,4 @@ ndesired = int(sys.argv[1]) for line in random.sample(sys.stdin.readlines(), ndesired): - print(line, end=' ') + print(line, end=" ") diff --git a/scripts/table_add_column.py b/scripts/table_add_column.py index 3f775724..dbfc4fd0 100755 --- a/scripts/table_add_column.py +++ b/scripts/table_add_column.py @@ -29,7 +29,7 @@ def __main__(): # Compile expression for SPEED if expr: - expr = compile(expr, '', 'eval') + expr = compile(expr, "", "eval") for element in bx.tabular.io.Reader(sys.stdin): if isinstance(element, bx.tabular.io.Header): diff --git a/scripts/table_filter.py b/scripts/table_filter.py index 11eadc52..057341d6 100755 --- a/scripts/table_filter.py +++ b/scripts/table_filter.py @@ -30,7 +30,7 @@ def __main__(): keep_comments = bool(options.comments) cols = [] if options.cols: - for c in options.cols.split(','): + for c in options.cols.split(","): try: v = int(c) except ValueError: @@ -49,7 +49,7 @@ def __main__(): # Compile expression for SPEED if expr: - expr = compile(expr, '', 'eval') + expr = compile(expr, "", "eval") for element in bx.tabular.io.TableReader(sys.stdin, force_header=force_header): if isinstance(element, bx.tabular.io.Header): diff --git a/scripts/ucsc_gene_table_to_intervals.py b/scripts/ucsc_gene_table_to_intervals.py index 32f88749..e01da7ca 100755 --- a/scripts/ucsc_gene_table_to_intervals.py +++ b/scripts/ucsc_gene_table_to_intervals.py @@ -21,22 +21,33 @@ def main(): # Parse command line parser = optparse.OptionParser(usage="%prog [options] < gene_table.txt") - parser.add_option("-r", "--region", dest="region", default="transcribed", - help="Limit to region: one of coding, utr3, utr5, transcribed [default]") - parser.add_option("-e", "--exons", action="store_true", dest="exons", - help="Only print intervals overlapping an exon") - parser.add_option("-s", "--strand", action="store_true", dest="strand", - help="Print strand after interval") - parser.add_option("-b", "--nobin", action="store_false", dest="discard_first_column", default=True, - help="file doesn't contain a 'bin' column (use this for pre-hg18 files)") + parser.add_option( + "-r", + "--region", + dest="region", + default="transcribed", + help="Limit to region: one of coding, utr3, utr5, transcribed [default]", + ) + parser.add_option( + "-e", "--exons", action="store_true", dest="exons", help="Only print intervals overlapping an exon" + ) + parser.add_option("-s", "--strand", action="store_true", dest="strand", help="Print strand after interval") + parser.add_option( + "-b", + "--nobin", + action="store_false", + dest="discard_first_column", + default=True, + help="file doesn't contain a 'bin' column (use this for pre-hg18 files)", + ) options, args = parser.parse_args() - assert options.region in ('coding', 'utr3', 'utr5', 'transcribed'), "Invalid region argument" + assert options.region in ("coding", "utr3", "utr5", "transcribed"), "Invalid region argument" # Read table from stdin and handle each gene for line in sys.stdin: # Parse fields from gene tabls - fields = line.split('\t') + fields = line.split("\t") if options.discard_first_column: fields.pop(0) chrom = fields[1] @@ -47,17 +58,17 @@ def main(): cds_end = int(fields[6]) # Determine the subset of the transcribed region we are interested in - if options.region == 'utr3': - if strand == '-': + if options.region == "utr3": + if strand == "-": region_start, region_end = tx_start, cds_start else: region_start, region_end = cds_end, tx_end - elif options.region == 'utr5': - if strand == '-': + elif options.region == "utr5": + if strand == "-": region_start, region_end = cds_end, tx_end else: region_start, region_end = tx_start, cds_start - elif options.region == 'coding': + elif options.region == "coding": region_start, region_end = cds_start, cds_end else: region_start, region_end = tx_start, tx_end @@ -65,8 +76,8 @@ def main(): # If only interested in exons, print the portion of each exon overlapping # the region of interest, otherwise print the span of the region if options.exons: - exon_starts = [int(_) for _ in fields[8].rstrip(',\n').split(',')] - exon_ends = [int(_) for _ in fields[9].rstrip(',\n').split(',')] + exon_starts = [int(_) for _ in fields[8].rstrip(",\n").split(",")] + exon_ends = [int(_) for _ in fields[9].rstrip(",\n").split(",")] for start, end in zip(exon_starts, exon_ends): start = max(start, region_start) end = min(end, region_end) @@ -84,7 +95,7 @@ def main(): def print_tab_sep(*args): """Print items in `l` to stdout separated by tabs""" - print(string.join((str(f) for f in args), '\t')) + print(string.join((str(f) for f in args), "\t")) if __name__ == "__main__": diff --git a/scripts/wiggle_to_array_tree.py b/scripts/wiggle_to_array_tree.py index ce973e63..e798fbdb 100755 --- a/scripts/wiggle_to_array_tree.py +++ b/scripts/wiggle_to_array_tree.py @@ -11,7 +11,7 @@ from bx.arrays.array_tree import ( array_tree_dict_from_reader, - FileArrayTreeDict + FileArrayTreeDict, ) from bx.arrays.wiggle import WiggleReader diff --git a/setup.cfg b/setup.cfg index 0b8287d1..d362d1c5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,7 +47,7 @@ snapshot = egg_info -rb_DEV bdist_egg rotate -m.egg -k1 build_docs = build_sphinx build_apidocs [flake8] -ignore = E226,E501,E741,W503 +ignore = E203,E501,E741,W503 exclude = .git,.tox,.venv,build,doc/source/conf.py import-order-style = smarkets application-import-names = bx,bx_extras diff --git a/setup.py b/setup.py index 73565ce1..9e43209f 100644 --- a/setup.py +++ b/setup.py @@ -13,12 +13,11 @@ def main(): - metadata = dict( - scripts=glob("scripts/*.py"), - cmdclass=command_classes) + metadata = dict(scripts=glob("scripts/*.py"), cmdclass=command_classes) - if len(sys.argv) >= 2 and \ - ('--help' in sys.argv[1:] or sys.argv[1] in ('--help-commands', 'egg_info', '--version', 'clean')): + if len(sys.argv) >= 2 and ( + "--help" in sys.argv[1:] or sys.argv[1] in ("--help-commands", "egg_info", "--version", "clean") + ): # For these actions, NumPy is not required. # # They are required to succeed without Numpy for example when @@ -28,11 +27,12 @@ def main(): else: try: import numpy + # Suppress numpy tests numpy.test = None except Exception as e: raise Exception(f"numpy must be installed to build: {e}") - metadata['ext_modules'] = get_extension_modules(numpy_include=numpy.get_include()) + metadata["ext_modules"] = get_extension_modules(numpy_include=numpy.get_include()) setup(**metadata) @@ -43,7 +43,8 @@ def main(): command_classes = {} try: import Cython.Distutils - command_classes['build_ext'] = Cython.Distutils.build_ext + + command_classes["build_ext"] = Cython.Distutils.build_ext class build_ext_sdist(sdist): def run(self): @@ -51,7 +52,7 @@ def run(self): self.run_command("build_ext") super().run() - command_classes['sdist'] = build_ext_sdist + command_classes["sdist"] = build_ext_sdist except ImportError: pass @@ -89,8 +90,9 @@ def run(self): # Restore args and working directory sys.argv = old_argv os.chdir(old_cwd) + # Add to extra_commands - command_classes['build_apidocs'] = BuildAPIDocs + command_classes["build_apidocs"] = BuildAPIDocs except Exception: pass @@ -105,12 +107,13 @@ def run(self): def get_extension_modules(numpy_include=None): extensions = [] # Bitsets - extensions.append(Extension("bx.bitset", - ["lib/bx/bitset.pyx", - "src/binBits.c", - "src/kent/bits.c", - "src/kent/common.c"], - include_dirs=["src/kent", "src"])) + extensions.append( + Extension( + "bx.bitset", + ["lib/bx/bitset.pyx", "src/binBits.c", "src/kent/bits.c", "src/kent/common.c"], + include_dirs=["src/kent", "src"], + ) + ) # Interval intersection extensions.append(Extension("bx.intervals.intersection", ["lib/bx/intervals/intersection.pyx"])) # Alignment object speedups @@ -122,28 +125,36 @@ def get_extension_modules(numpy_include=None): # Translation if character / integer strings extensions.append(Extension("bx._seqmapping", ["lib/bx/_seqmapping.pyx"])) # BGZF - extensions.append(Extension("bx.misc.bgzf", - ["lib/bx/misc/bgzf.pyx", "src/samtools/bgzf.c"], - include_dirs=["src/samtools"], - libraries=['z'])) + extensions.append( + Extension( + "bx.misc.bgzf", + ["lib/bx/misc/bgzf.pyx", "src/samtools/bgzf.c"], + include_dirs=["src/samtools"], + libraries=["z"], + ) + ) # The following extensions won't (currently) compile on windows - if platform.system() not in ('Microsoft', 'Windows'): + if platform.system() not in ("Microsoft", "Windows"): # Interval clustering - extensions.append(Extension("bx.intervals.cluster", - ["lib/bx/intervals/cluster.pyx", - "src/cluster.c"], - include_dirs=["src"])) + extensions.append( + Extension("bx.intervals.cluster", ["lib/bx/intervals/cluster.pyx", "src/cluster.c"], include_dirs=["src"]) + ) # Position weight matrices - extensions.append(Extension("bx.pwm._position_weight_matrix", - ["lib/bx/pwm/_position_weight_matrix.pyx", "src/pwm_utils.c"], - include_dirs=["src"])) + extensions.append( + Extension( + "bx.pwm._position_weight_matrix", + ["lib/bx/pwm/_position_weight_matrix.pyx", "src/pwm_utils.c"], + include_dirs=["src"], + ) + ) - extensions.append(Extension("bx.motif._pwm", ["lib/bx/motif/_pwm.pyx"], - include_dirs=[numpy_include])) + extensions.append(Extension("bx.motif._pwm", ["lib/bx/motif/_pwm.pyx"], include_dirs=[numpy_include])) # Sparse arrays with summaries organized as trees on disk - extensions.append(Extension("bx.arrays.array_tree", ["lib/bx/arrays/array_tree.pyx"], include_dirs=[numpy_include])) + extensions.append( + Extension("bx.arrays.array_tree", ["lib/bx/arrays/array_tree.pyx"], include_dirs=[numpy_include]) + ) # Reading UCSC "big binary index" files extensions.append(Extension("bx.bbi.bpt_file", ["lib/bx/bbi/bpt_file.pyx"])) @@ -160,19 +171,21 @@ def get_extension_modules(numpy_include=None): extensions.append(Extension("bx.arrays.wiggle", ["lib/bx/arrays/wiggle.pyx"])) # CpG masking - extensions.append(Extension("bx.align.sitemask._cpg", - ["lib/bx/align/sitemask/_cpg.pyx", - "lib/bx/align/sitemask/find_cpg.c"])) + extensions.append( + Extension("bx.align.sitemask._cpg", ["lib/bx/align/sitemask/_cpg.pyx", "lib/bx/align/sitemask/find_cpg.c"]) + ) # Counting n-grams in integer strings - extensions.append(Extension("bx.intseq.ngramcount", ["lib/bx/intseq/ngramcount.pyx"], - include_dirs=["src"])) + extensions.append(Extension("bx.intseq.ngramcount", ["lib/bx/intseq/ngramcount.pyx"], include_dirs=["src"])) # Seekable access to bzip2 files - extensions.append(Extension("bx.misc._seekbzip2", - ["lib/bx/misc/_seekbzip2.pyx", - "src/bunzip/micro-bunzip.c"], - include_dirs=["src/bunzip"])) + extensions.append( + Extension( + "bx.misc._seekbzip2", + ["lib/bx/misc/_seekbzip2.pyx", "src/bunzip/micro-bunzip.c"], + include_dirs=["src/bunzip"], + ) + ) return extensions diff --git a/tox.ini b/tox.ini index fc996be9..b0c8978d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,7 @@ +[tox] +envlist = lint, py + [testenv] -skip_install = true commands_pre = python setup.py build_ext --inplace commands = @@ -10,3 +12,15 @@ deps = pytest pytest-cython python-lzo >= 1.14 # Python 3.10 support +skip_install = true + +[testenv:lint] +commands_pre = +commands = + flake8 . + black --check --diff . + isort --check --diff . +deps = + black + flake8 + isort From 336cee492ddef64f5cc623426ba3a43729a159a1 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Fri, 26 Aug 2022 12:37:31 +0100 Subject: [PATCH 31/68] Add `.git-blame-ignore-revs` file for better blame view on GitHub See https://docs.github.com/en/repositories/working-with-files/using-files/viewing-a-file#ignore-commits-in-the-blame-view To use this locally, execute this once: ``` git config blame.ignoreRevsFile .git-blame-ignore-revs ``` and all following `git blame` will skip the revisions indicated in the file. --- .git-blame-ignore-revs | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..33ab8a83 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Format Python code with black and isort +a100c005f0714ebca78ec7770f770d6522b3b870 From a3bf7ed7ec6b5e940f1500e5018290f986d9e777 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Fri, 26 Aug 2022 12:50:12 +0100 Subject: [PATCH 32/68] Upgrade Python syntax with pyupgrade using `ack --type=python -f | xargs pyupgrade --py37-plus` --- lib/bx_extras/lrucache.py | 2 +- lib/bx_extras/pyparsing.py | 2 +- scripts/maf_tile_2.py | 2 +- scripts/maf_tile_2bit.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/bx_extras/lrucache.py b/lib/bx_extras/lrucache.py index c1eea95e..3e60bd09 100644 --- a/lib/bx_extras/lrucache.py +++ b/lib/bx_extras/lrucache.py @@ -126,7 +126,7 @@ def __ne__(self, other): return not self.__eq__(other) def __repr__(self): - return "<%s %s => %s (%s)>" % (self.__class__, self.key, self.obj, time.asctime(time.localtime(self.atime))) + return f"<{self.__class__} {self.key} => {self.obj} ({time.asctime(time.localtime(self.atime))})>" def __init__(self, size=DEFAULT_SIZE): # Check arguments diff --git a/lib/bx_extras/pyparsing.py b/lib/bx_extras/pyparsing.py index 38cff70a..5255e8b7 100644 --- a/lib/bx_extras/pyparsing.py +++ b/lib/bx_extras/pyparsing.py @@ -1878,7 +1878,7 @@ def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, unqu "|(?:" + ")|(?:".join( [ - "{}[^{}]".format(re.escape(self.endQuoteChar[:i]), _escapeRegexRangeChars(self.endQuoteChar[i])) + f"{re.escape(self.endQuoteChar[:i])}[^{_escapeRegexRangeChars(self.endQuoteChar[i])}]" for i in range(len(self.endQuoteChar) - 1, 0, -1) ] ) diff --git a/scripts/maf_tile_2.py b/scripts/maf_tile_2.py index bfeecb97..f6e1dcb8 100755 --- a/scripts/maf_tile_2.py +++ b/scripts/maf_tile_2.py @@ -148,7 +148,7 @@ def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data, """ ref_src_size = None # Make sure the reference component is also the first in the source list - assert sources[0].split(".")[0] == ref_src.split(".")[0], "%s != %s" % ( + assert sources[0].split(".")[0] == ref_src.split(".")[0], "{} != {}".format( sources[0].split(".")[0], ref_src.split(".")[0], ) diff --git a/scripts/maf_tile_2bit.py b/scripts/maf_tile_2bit.py index 11a71b52..6d2ea5f2 100755 --- a/scripts/maf_tile_2bit.py +++ b/scripts/maf_tile_2bit.py @@ -136,7 +136,7 @@ def do_interval(sources, index, out, ref_src, start, end, ref_2bit, missing_data """ ref_src_size = None # Make sure the reference component is also the first in the source list - assert sources[0].split(".")[0] == ref_src.split(".")[0], "%s != %s" % ( + assert sources[0].split(".")[0] == ref_src.split(".")[0], "{} != {}".format( sources[0].split(".")[0], ref_src.split(".")[0], ) From 4f4a48d3f227ae390c1b22072867ba86e347bdef Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Sun, 28 Aug 2022 11:43:39 +0100 Subject: [PATCH 33/68] Release 0.9.0 --- lib/bx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index 07508a60..3e2f46a3 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = "0.8.13" +__version__ = "0.9.0" From d2bbf1dda5f7aa03e4dca6a923c42bd5e8169f14 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Tue, 24 Jan 2023 15:32:13 +0100 Subject: [PATCH 34/68] do not use np.float --- scripts/bnMapper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/bnMapper.py b/scripts/bnMapper.py index 21ebce7d..58c82253 100755 --- a/scripts/bnMapper.py +++ b/scripts/bnMapper.py @@ -35,9 +35,9 @@ ("id", np.str_, 100), ("score", np.int64), ("strand", np.str_, 1), - ("signalValue", np.float), - ("pValue", np.float), - ("qValue", np.float), + ("signalValue", float), + ("pValue", float), + ("qValue", float), ("peak", np.int64), ] ) From 8effd42519d49aac4695c320cdb90849caea835d Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Tue, 24 Jan 2023 16:13:05 +0100 Subject: [PATCH 35/68] change numpy.int --- lib/bx/align/_epo.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/bx/align/_epo.pyx b/lib/bx/align/_epo.pyx index ba8db44a..97d57666 100644 --- a/lib/bx/align/_epo.pyx +++ b/lib/bx/align/_epo.pyx @@ -112,9 +112,9 @@ def fastLoadChain(fname, hf): N.append( (int(line[0]), 0, 0) ) s, t, q = zip( *N ) data.append( (hd, - numpy.array(s, dtype=numpy.int), - numpy.array(t, dtype=numpy.int), - numpy.array(q, dtype=numpy.int)) ) + numpy.array(s, dtype=int), + numpy.array(t, dtype=int), + numpy.array(q, dtype=int)) ) assert hd.tEnd - hd.tStart == sum(s) + sum(t) assert hd.qEnd - hd.qStart == sum(s) + sum(q) fd.readline() # a blank line From aa768ee8094879add1a17ade48779232b6859892 Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 26 Jan 2023 01:48:49 +0100 Subject: [PATCH 36/68] Fix exception message (#89) --- lib/bx/interval_index_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/interval_index_file.py b/lib/bx/interval_index_file.py index 1adab781..12e2be7f 100644 --- a/lib/bx/interval_index_file.py +++ b/lib/bx/interval_index_file.py @@ -157,7 +157,7 @@ def bin_for_range(start, end, offsets=None): else: start_bin >>= BIN_NEXT_SHIFT end_bin >>= BIN_NEXT_SHIFT - raise Exception("Interval (%d,%d) out of range") + raise Exception("Interval (%d,%d) out of range" % (start, end)) class AbstractMultiIndexedAccess: From 9789e4c3f4046712c1c03bbf5ccee37304cb610d Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 25 Jan 2023 12:46:53 +0000 Subject: [PATCH 37/68] Test importing all modules --- pyproject.toml | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 69747c83..dccb8a55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,36 @@ line-length = 120 target-version = ['py37'] [tool.cibuildwheel] -test-command = "python -c 'import bx, bx.align, bx.align.sitemask, bx.align.tools, bx.arrays, bx.bbi, bx.cookbook, bx.intervals, bx.intervals.operations, bx.intseq, bx.misc, bx.motif, bx.motif.io, bx.motif.logo, bx.phylo, bx.pwm, bx.seq, bx.tabular, bx_extras'" +test-command = """ +python -c 'import bx, bx.align, bx.align.axt, bx.align.core, bx.align.epo, \ +bx.align.lav, bx.align.maf, bx.align.score, bx.align.sitemask, \ +bx.align.sitemask.core, bx.align.sitemask.cpg, bx.align.sitemask.quality, \ +bx.align.tools, bx.align.tools.chop, bx.align.tools.fuse, \ +bx.align.tools.thread, bx.arrays, bx.arrays.array_tree, bx.arrays.bed, \ +bx.arrays.wiggle, bx.bbi, bx.bbi.bbi_file, bx.bbi.bigwig_file, \ +bx.bbi.bpt_file, bx.bbi.cirtree_file, bx.binned_array, bx.bitset, \ +bx.bitset_builders, bx.bitset_utils, bx.cookbook, bx.cookbook.argparse, \ +bx.cookbook.attribute, bx.cookbook.doc_optparse, bx.cookbook.progress_bar, \ +bx.filter, bx.gene_reader, bx.interval_index_file, bx.intervals, \ +bx.intervals.cluster, bx.intervals.intersection, bx.intervals.io, \ +bx.intervals.operations, bx.intervals.operations.base_coverage, \ +bx.intervals.operations.complement, bx.intervals.operations.concat, \ +bx.intervals.operations.coverage, bx.intervals.operations.find_clusters, \ +bx.intervals.operations.intersect, bx.intervals.operations.join, \ +bx.intervals.operations.merge, bx.intervals.operations.quicksect, \ +bx.intervals.operations.subtract, bx.intervals.random_intervals, \ +bx.intseq, bx.intseq.ngramcount, bx.misc, bx.misc.bgzf, bx.misc.binary_file, \ +bx.misc.cdb, bx.misc.filecache, bx.misc.readlengths, bx.misc.seekbzip2, \ +bx.misc.seeklzop, bx.motif, bx.motif.io, bx.motif.logo, bx.motif.pwm, \ +bx.phylo, bx.phylo.newick, bx.phylo.phast, bx.pwm, \ +bx.pwm.bed_score_aligned_pwm, bx.pwm.bed_score_aligned_string, \ +bx.pwm.maf_select_motifs, bx.pwm.position_weight_matrix, bx.pwm.pwm_score_maf, \ +bx.pwm.pwm_score_motifs, bx.pwm.pwm_score_positions, bx.seq, bx.seq.core, \ +bx.seq.fasta, bx.seq.nib, bx.seq.qdna, bx.seq.seq, bx.seq.twobit, \ +bx.seqmapping, bx.tabular, bx.tabular.io, bx.wiggle, bx_extras, \ +bx_extras.fpconst, bx_extras.lrucache, bx_extras.pstat, bx_extras.pyparsing, \ +bx_extras.stats' +""" [tool.cibuildwheel.linux] before-all = """ From 6f7e5ca3d9b4801afb2de5452a93feee5a7cedd7 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 25 Jan 2023 17:21:02 +0000 Subject: [PATCH 38/68] Build arm64 macOS wheels. Drop PyPy 3.7 Linux wheels. --- .github/workflows/deploy.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 326375f7..e70c1517 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -9,9 +9,11 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] - arch: [auto] include: + - os: ubuntu-latest + arch: auto + - os: macos-latest + arch: x86_64 arm64 - os: ubuntu-latest arch: aarch64 steps: @@ -32,9 +34,8 @@ jobs: CIBW_ARCHS: ${{matrix.arch}} # Skip building musllinux wheels for now, they take too long to build, # mainly because numpy doesn't have musllinux wheels on PyPI yet. - # Skip also building the PyPy 3.7 wheel for macOS, because numpy - # doesn't have a wheel on PyPI and it fails to install. - CIBW_SKIP: '*-musllinux* pp37-macosx_x86_64' + # Skip also building for PyPy 3.7, which is deprecated upstream. + CIBW_SKIP: '*-musllinux* pp37-*' - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v3 From ab4e287dd31c593fc67e1e355ba2461e17c3f8ff Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 12:10:07 +0200 Subject: [PATCH 39/68] Fix integer division --- lib/bx/_seqmapping.pyx | 2 +- lib/bx/bbi/bbi_file.pyx | 8 ++++---- lib/bx/misc/_seekbzip2.pyx | 2 +- lib/bx/seq/_nib.pyx | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/bx/_seqmapping.pyx b/lib/bx/_seqmapping.pyx index ecdbaa5f..027d49ef 100644 --- a/lib/bx/_seqmapping.pyx +++ b/lib/bx/_seqmapping.pyx @@ -132,7 +132,7 @@ cdef class IntToIntMapping: cdef int *t_buf # Get direct access to string PyObject_AsReadBuffer( src, &s_buf, &s_len ) - s_len = s_len / sizeof( int ) + s_len = s_len // sizeof( int ) assert s_len == len( src ), "`src` argument must be a buffer of 32bit integers" # Initialize empty array rval = zeros( s_len, 'i' ) diff --git a/lib/bx/bbi/bbi_file.pyx b/lib/bx/bbi/bbi_file.pyx index 8b80e23d..836641bb 100644 --- a/lib/bx/bbi/bbi_file.pyx +++ b/lib/bx/bbi/bbi_file.pyx @@ -95,7 +95,7 @@ cdef class SummarizedData: e = self.end if s >= e: return - base_step = ( self.end - self.start ) / self.size + base_step = ( self.end - self.start ) // self.size for j from 0 <= j < self.size: base_start = self.start + ( base_step * j ) base_end = base_start + base_step @@ -206,8 +206,8 @@ cdef class BBIFile: # Find appropriate zoom level cdef bits32 base_size = end - start - cdef int full_reduction = base_size / summary_size - cdef int zoom = full_reduction / 2 + cdef int full_reduction = base_size // summary_size + cdef int zoom = full_reduction // 2 if zoom < 0: zoom = 0 cdef ZoomLevel zoom_level = self._best_zoom_level( zoom ) @@ -418,7 +418,7 @@ cdef class ZoomLevel: reader.seek( self.index_offset ) summaries = self._summary_blocks_in_region(chrom_id, start, end) - base_step = (end - start) / summary_size + base_step = (end - start) // summary_size base_start = start base_end = start diff --git a/lib/bx/misc/_seekbzip2.pyx b/lib/bx/misc/_seekbzip2.pyx index c71c92a5..45e1ccbf 100644 --- a/lib/bx/misc/_seekbzip2.pyx +++ b/lib/bx/misc/_seekbzip2.pyx @@ -62,7 +62,7 @@ cdef class SeekBzip2: cdef int n_bit # Break position into bit and byte offsets ## sys.stderr.write( "arg pos: %d\n" % position ) - n_byte = position / 8; + n_byte = position // 8; n_bit = position % 8; ## sys.stderr.write( "byte pos: %d\n" % n_byte ) ## sys.stderr.write( "bit pos: %d\n" % n_bit ) diff --git a/lib/bx/seq/_nib.pyx b/lib/bx/seq/_nib.pyx index aa2c7488..0cf970e5 100644 --- a/lib/bx/seq/_nib.pyx +++ b/lib/bx/seq/_nib.pyx @@ -42,7 +42,7 @@ def translate_raw_data( data, int start, int length ): p_data = p_data + 1 i = 1 # Two output values for each input value - for j from 0 <= j < (length-i)/2: + for j from 0 <= j < (length-i) // 2: #p_rval[i] = NIB_I2C_TABLE[ ( p_data[0] >> 4 ) & 0xF ]; #p_rval[i+1] = NIB_I2C_TABLE[ ( p_data[0] >> 0 ) & 0xF ]; p_rval[i] = NIB_I2C_TABLE_FIRST [ p_data[0] ] From a17c0478756fb670d9fbc0bc350d501e74d6c8dd Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 12:11:26 +0200 Subject: [PATCH 40/68] Fix print statement syntax --- lib/bx/intseq/ngramcount.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bx/intseq/ngramcount.pyx b/lib/bx/intseq/ngramcount.pyx index dc2e5c58..2d102756 100644 --- a/lib/bx/intseq/ngramcount.pyx +++ b/lib/bx/intseq/ngramcount.pyx @@ -76,10 +76,10 @@ cdef _count_ngrams( int* ints, int n_ints, int* rval, int n, int radix ): letter = ints[ i + j ] if letter < 0 or letter >= radix: # This word is bad, break out and do not increment counts - print "breaking, letter", letter + print("breaking, letter", letter) break index = index + letter * factor factor = factor * radix else: - print index + print(index) rval[ index ] = rval[ index ] + 1 From b76a509d10ee2e12b1c69ec6cecee1f3819f9b1e Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 12:14:17 +0200 Subject: [PATCH 41/68] Fix imports --- lib/bx/bbi/bbi_file.pxd | 6 +++--- lib/bx/bbi/bbi_file.pyx | 6 +++--- lib/bx/bbi/bigbed_file.pyx | 6 +++--- lib/bx/bbi/bigwig_file.pyx | 6 +++--- lib/bx/bbi/bpt_file.pxd | 2 +- lib/bx/bbi/cirtree_file.pxd | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/bx/bbi/bbi_file.pxd b/lib/bx/bbi/bbi_file.pxd index 7117c561..fe753f09 100644 --- a/lib/bx/bbi/bbi_file.pxd +++ b/lib/bx/bbi/bbi_file.pxd @@ -1,7 +1,7 @@ -from types cimport * +from .types cimport * -from bpt_file cimport BPTFile -from cirtree_file cimport CIRTreeFile +from .bpt_file cimport BPTFile +from .cirtree_file cimport CIRTreeFile import numpy diff --git a/lib/bx/bbi/bbi_file.pyx b/lib/bx/bbi/bbi_file.pyx index 836641bb..8cca6dcc 100644 --- a/lib/bx/bbi/bbi_file.pyx +++ b/lib/bx/bbi/bbi_file.pyx @@ -15,10 +15,10 @@ cimport cython from collections import deque -from types cimport * +from .types cimport * -from bpt_file cimport BPTFile -from cirtree_file cimport CIRTreeFile +from .bpt_file cimport BPTFile +from .cirtree_file cimport CIRTreeFile from libc cimport limits import numpy diff --git a/lib/bx/bbi/bigbed_file.pyx b/lib/bx/bbi/bigbed_file.pyx index b00d204f..cac8b61f 100644 --- a/lib/bx/bbi/bigbed_file.pyx +++ b/lib/bx/bbi/bigbed_file.pyx @@ -2,12 +2,12 @@ BigBed file. """ -from bbi_file cimport * -from cirtree_file cimport CIRTreeFile +from .bbi_file cimport * +from .cirtree_file cimport CIRTreeFile import numpy -from types cimport * +from .types cimport * cimport numpy diff --git a/lib/bx/bbi/bigwig_file.pyx b/lib/bx/bbi/bigwig_file.pyx index 99bf0138..75dc58f7 100644 --- a/lib/bx/bbi/bigwig_file.pyx +++ b/lib/bx/bbi/bigwig_file.pyx @@ -4,12 +4,12 @@ BigWig file. from collections import deque -from bbi_file cimport * -from cirtree_file cimport CIRTreeFile +from .bbi_file cimport * +from .cirtree_file cimport CIRTreeFile import numpy -from types cimport * +from .types cimport * cimport numpy diff --git a/lib/bx/bbi/bpt_file.pxd b/lib/bx/bbi/bpt_file.pxd index a2097ca0..df72ef50 100644 --- a/lib/bx/bbi/bpt_file.pxd +++ b/lib/bx/bbi/bpt_file.pxd @@ -1,6 +1,6 @@ from bx.misc.binary_file import BinaryFileReader -from types cimport * +from .types cimport * cdef class BPTFile: diff --git a/lib/bx/bbi/cirtree_file.pxd b/lib/bx/bbi/cirtree_file.pxd index 94d057c3..6f4d1ad7 100644 --- a/lib/bx/bbi/cirtree_file.pxd +++ b/lib/bx/bbi/cirtree_file.pxd @@ -1,4 +1,4 @@ -from types cimport * +from .types cimport * cdef class CIRTreeFile: From 55e146fac6bc0e151210a66c2553f7ad5b240491 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 12:15:19 +0200 Subject: [PATCH 42/68] Extend tests to include python 3.11 --- .github/workflows/test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a4c9ff81..7aa815bd 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.10'] + python-version: ['3.7', '3.11'] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 From cb81453e75629fbb6677c575f2451d44ff3a5426 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 12:15:43 +0200 Subject: [PATCH 43/68] Include Python 3.11 as officially supported --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index d362d1c5..eec006d5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,6 +12,7 @@ classifiers = Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 Topic :: Scientific/Engineering :: Bio-Informatics Topic :: Software Development :: Libraries :: Python Modules name = bx-python From e9c0249ef8f5455969fa040c520d783c6c3da2e2 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 12:21:22 +0200 Subject: [PATCH 44/68] Run black --- lib/bx/align/epo_tests.py | 2 +- lib/bx/align/lav.py | 7 +++---- lib/bx/align/lav_tests.py | 1 - lib/bx/align/maf_tests.py | 4 ---- lib/bx/cookbook/argparse.py | 14 -------------- lib/bx/interval_index_file_tests.py | 2 +- lib/bx/intervals/intersection_tests.py | 3 --- lib/bx/misc/cdb_tests.py | 1 - lib/bx/pwm/bed_score_aligned_pwm.py | 1 - lib/bx/pwm/bed_score_aligned_string.py | 2 -- lib/bx/pwm/maf_select_motifs.py | 1 - lib/bx/pwm/position_weight_matrix.py | 13 ------------- lib/bx/pwm/pwm_score_maf.py | 3 --- lib/bx/pwm/pwm_score_motifs.py | 1 - lib/bx/pwm/pwm_score_positions.py | 2 -- lib/bx/pwm/pwm_tests.py | 3 +-- lib/bx/seq/qdna.py | 2 +- lib/bx/seq/seq.py | 1 - lib/bx/seq/seq_tests.py | 2 +- lib/bx/seqmapping.py | 1 - lib/bx_extras/pyparsing.py | 1 - scripts/aggregate_scores_in_intervals.py | 1 - scripts/align_print_template.py | 1 - scripts/axt_to_maf.py | 1 - scripts/bed_count_by_interval.py | 6 +++++- scripts/bed_count_overlapping.py | 6 +++++- scripts/bnMapper.py | 2 +- scripts/div_snp_table_chr.py | 1 - scripts/gene_fourfold_sites.py | 4 +--- scripts/get_scores_in_intervals.py | 1 - scripts/interval_count_intersections.py | 1 - scripts/lav_to_axt.py | 1 - scripts/lav_to_maf.py | 1 - scripts/line_select.py | 1 - scripts/mMK_bitset.py | 2 -- scripts/maf_chop.py | 1 - scripts/maf_count.py | 1 - scripts/maf_extract_ranges.py | 1 - scripts/maf_filter.py | 2 -- scripts/maf_filter_max_wc.py | 1 - scripts/maf_gc_content.py | 1 - scripts/maf_mean_length_ungapped_piece.py | 2 -- scripts/maf_percent_columns_matching.py | 1 - scripts/maf_percent_identity.py | 1 - scripts/maf_print_chroms.py | 1 - scripts/maf_print_scores.py | 1 - scripts/maf_region_coverage_by_src.py | 1 - scripts/maf_select.py | 1 - scripts/maf_shuffle_columns.py | 2 -- scripts/maf_split_by_src.py | 2 -- scripts/maf_thread_for_species.py | 1 - scripts/maf_tile.py | 2 -- scripts/maf_tile_2.py | 1 - scripts/maf_tile_2bit.py | 1 - scripts/maf_to_axt.py | 1 - scripts/maf_to_fasta.py | 1 - scripts/maf_to_int_seqs.py | 1 - scripts/maf_translate_chars.py | 1 - scripts/maf_truncate.py | 1 - scripts/maf_word_frequency.py | 1 - scripts/mask_quality.py | 1 - scripts/nib_intervals_to_fasta.py | 1 - scripts/table_add_column.py | 1 - scripts/table_filter.py | 1 - scripts/ucsc_gene_table_to_intervals.py | 1 - scripts/wiggle_to_array_tree.py | 1 - scripts/wiggle_to_binned_array.py | 1 - 67 files changed, 20 insertions(+), 113 deletions(-) diff --git a/lib/bx/align/epo_tests.py b/lib/bx/align/epo_tests.py index 99f714d8..7d4e3e66 100644 --- a/lib/bx/align/epo_tests.py +++ b/lib/bx/align/epo_tests.py @@ -164,7 +164,7 @@ def ch(c, ci): assert c[th : th + l] == "-" * l th += l - for (a, b) in self.epo_records: + for a, b in self.epo_records: ca, cb = cigar_pairs[int(a.gabid)] ch(ca, a.cigar_iter(False)) ch(cb, b.cigar_iter(False)) diff --git a/lib/bx/align/lav.py b/lib/bx/align/lav.py index 102ef346..50e7d347 100644 --- a/lib/bx/align/lav.py +++ b/lib/bx/align/lav.py @@ -349,7 +349,7 @@ def build_alignment(self, score, pieces): self.open_seqs() text1 = text2 = "" end1 = end2 = None - for (start1, start2, length, _pctId) in pieces: + for start1, start2, length, _pctId in pieces: if end1 is not None: if start1 == end1: # insertion in sequence 2 text1 += self.seq1_gap * (start2 - end2) @@ -438,7 +438,6 @@ def build_alignment(self, score, pieces): class Writer: - # blockHash is a hash from (src1,strand1,src2,strand2) to a list of blocks; # the blocks are collected on each call to write(), but the actual writing # does not occur until close(). @@ -568,7 +567,7 @@ def write_a_stanza(self, alignment): print(" s %s" % score, file=self.file) print(" b %d %d" % (start1 + 1, start2 + 1), file=self.file) print(" e %d %d" % (end1, end2), file=self.file) - for (start1, start2, size, pctId) in pieces: + for start1, start2, size, pctId in pieces: print(" l %d %d %d %d %d" % (start1 + 1, start2 + 1, start1 + size, start2 + size, pctId), file=self.file) print("}", file=self.file) @@ -625,7 +624,7 @@ def rc_or_nothing(strand): def do_path_subs(path, path_subs): - for (prefix, replacement) in path_subs: + for prefix, replacement in path_subs: if path.startswith(prefix): return replacement + path[len(prefix) :] return path diff --git a/lib/bx/align/lav_tests.py b/lib/bx/align/lav_tests.py index f62fc771..320a4e77 100644 --- a/lib/bx/align/lav_tests.py +++ b/lib/bx/align/lav_tests.py @@ -11,7 +11,6 @@ class lavTestCase(unittest.TestCase): def testReader(self): - reader = lav.Reader(open(test_lav)) a = next(reader) diff --git a/lib/bx/align/maf_tests.py b/lib/bx/align/maf_tests.py index 4e2f0f4f..94678501 100644 --- a/lib/bx/align/maf_tests.py +++ b/lib/bx/align/maf_tests.py @@ -77,7 +77,6 @@ def test_reader(): - reader = maf.Reader(StringIO(test_maf)) assert reader.attributes["version"] == "1" assert reader.attributes["scoring"] == "humor.v4" @@ -103,7 +102,6 @@ def test_reader(): def test_writer(): - val = StringIO() writer = maf.Writer(val, {"scoring": "foobar"}) @@ -134,7 +132,6 @@ def test_writer(): def test_slice(): - b = complex_maf.slice_by_component(0, 101, 105) check_component(b.components[0], src="human_hoxa", start=101, size=4, strand="+", src_size=100257, text="CA-TT") @@ -181,7 +178,6 @@ def test_slice(): def test_reverse_complement(): - b = complex_maf.reverse_complement() check_component( diff --git a/lib/bx/cookbook/argparse.py b/lib/bx/cookbook/argparse.py index 74205410..f557f50b 100644 --- a/lib/bx/cookbook/argparse.py +++ b/lib/bx/cookbook/argparse.py @@ -173,7 +173,6 @@ class HelpFormatter: """ def __init__(self, prog, indent_increment=2, max_help_position=24, width=None): - # default setting for width if width is None: try: @@ -268,7 +267,6 @@ def add_usage(self, usage, actions, groups, prefix=None): def add_argument(self, action): if action.help is not SUPPRESS: - # find all invocations get_invocation = self._format_action_invocation invocations = [get_invocation(action)] @@ -333,7 +331,6 @@ def _format_usage(self, usage, actions, groups, prefix): # wrap the usage parts if it's too long text_width = self._width - self._current_indent if len(prefix) + len(usage) > text_width: - # break usage into wrappable parts part_regexp = r"\(.*?\)+|\[.*?\]+|\S+" opt_usage = format(optionals, groups) @@ -424,7 +421,6 @@ def _format_actions_usage(self, actions, groups): # collect all actions format strings parts = [] for i, action in enumerate(actions): - # suppressed arguments are marked with None # remove | separators for suppressed arguments if action.help is SUPPRESS: @@ -990,7 +986,6 @@ def __init__(self, name, help): sup.__init__(option_strings=[], dest=name, help=help) def __init__(self, option_strings, prog, parser_class, dest=SUPPRESS, help=None, metavar=None): - self._prog_prefix = prog self._parser_class = parser_class self._name_parser_map = {} @@ -1286,7 +1281,6 @@ def _add_container_actions(self, container): # map each action to its group group_map = {} for group in container._action_groups: - # if a group with the title exists, use that, otherwise # create a new group matching the container's group if group.title not in title_group_map: @@ -1376,7 +1370,6 @@ def _get_handler(self): raise ValueError(msg % self.conflict_handler) def _check_conflict(self, action): - # find all options that conflict with this option confl_optionals = [] for option_string in action.option_strings: @@ -1397,7 +1390,6 @@ def _handle_conflict_error(self, action, conflicting_actions): def _handle_conflict_resolve(self, action, conflicting_actions): # remove all conflicting options for option_string, action in conflicting_actions: - # remove the conflicting option action.option_strings.remove(option_string) self._option_string_actions.pop(option_string, None) @@ -1491,7 +1483,6 @@ def __init__( conflict_handler="error", add_help=True, ): - if version is not None: import warnings @@ -1697,7 +1688,6 @@ def _parse_known_args(self, arg_strings, namespace): arg_string_pattern_parts = [] arg_strings_iter = iter(arg_strings) for i, arg_string in enumerate(arg_strings_iter): - # all args after -- are non-options if arg_string == "--": arg_string_pattern_parts.append("-") @@ -1744,7 +1734,6 @@ def take_action(action, argument_strings, option_string=None): # function to convert arg_strings into an optional action def consume_optional(start_index): - # get the optional identified at this index option_tuple = option_string_indices[start_index] action, option_string, explicit_arg = option_tuple @@ -1754,7 +1743,6 @@ def consume_optional(start_index): match_argument = self._match_argument action_tuples = [] while True: - # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) @@ -1847,7 +1835,6 @@ def consume_positionals(start_index): else: max_option_string_index = -1 while start_index <= max_option_string_index: - # consume any Positionals preceding the next option next_option_string_index = min(index for index in option_string_indices if index >= start_index) if start_index != next_option_string_index: @@ -1909,7 +1896,6 @@ def _read_args_from_files(self, arg_strings): # expand arguments referencing files new_arg_strings = [] for arg_string in arg_strings: - # for regular arguments, just add them back into the list if arg_string[0] not in self.fromfile_prefix_chars: new_arg_strings.append(arg_string) diff --git a/lib/bx/interval_index_file_tests.py b/lib/bx/interval_index_file_tests.py index 0b94fb92..62e0afb7 100644 --- a/lib/bx/interval_index_file_tests.py +++ b/lib/bx/interval_index_file_tests.py @@ -47,7 +47,7 @@ def test_interval_index_file(): if end < start: end, start = start, end query_intervals = set() - for (s, e, i) in intervals: + for s, e, i in intervals: if e > start and s < end: query_intervals.add((s, e, i)) result = ix.find(name, start, end) diff --git a/lib/bx/intervals/intersection_tests.py b/lib/bx/intervals/intersection_tests.py index 059d87ef..45febc18 100644 --- a/lib/bx/intervals/intersection_tests.py +++ b/lib/bx/intervals/intersection_tests.py @@ -157,7 +157,6 @@ def test_find(self): class IntervalTreeTest(unittest.TestCase): def setUp(self): - iv = IntervalTree() n = 0 for i in range(1, 1000, 80): @@ -174,7 +173,6 @@ def setUp(self): self.nintervals = n def test_find(self): - r = self.iv.find(100, 200) self.assertEqual(len(r), 5) @@ -204,5 +202,4 @@ def fn(ival): if __name__ == "__main__": - unittest.main() diff --git a/lib/bx/misc/cdb_tests.py b/lib/bx/misc/cdb_tests.py index b6cd86a9..7c83d3d0 100644 --- a/lib/bx/misc/cdb_tests.py +++ b/lib/bx/misc/cdb_tests.py @@ -4,7 +4,6 @@ def test(): - d = {} for i in range(10000): d["foo" + str(i)] = "bar" + str(i) diff --git a/lib/bx/pwm/bed_score_aligned_pwm.py b/lib/bx/pwm/bed_score_aligned_pwm.py index 98d11ce8..3813d705 100755 --- a/lib/bx/pwm/bed_score_aligned_pwm.py +++ b/lib/bx/pwm/bed_score_aligned_pwm.py @@ -17,7 +17,6 @@ def isnan(x): def main(): - if len(sys.argv) < 5: print("%s bedfile inmaf spec1,spec2,... motif_file " % sys.argv[0], file=sys.stderr) sys.exit(0) diff --git a/lib/bx/pwm/bed_score_aligned_string.py b/lib/bx/pwm/bed_score_aligned_string.py index e00935f8..31b3434a 100755 --- a/lib/bx/pwm/bed_score_aligned_string.py +++ b/lib/bx/pwm/bed_score_aligned_string.py @@ -16,7 +16,6 @@ def isnan(x): def main(): - if len(sys.argv) < 5: print("%s bedfile inmaf spec1,spec2,... string [string2,...]" % sys.argv[0], file=sys.stderr) sys.exit(0) @@ -65,7 +64,6 @@ def main(): # lists of scores for each position in scoremax for mx_name, mx in scoremax.items(): for offset in range(blocklength): - # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: diff --git a/lib/bx/pwm/maf_select_motifs.py b/lib/bx/pwm/maf_select_motifs.py index 01367bb8..62b0e33d 100755 --- a/lib/bx/pwm/maf_select_motifs.py +++ b/lib/bx/pwm/maf_select_motifs.py @@ -16,7 +16,6 @@ def isnan(x): def main(): - if len(sys.argv) < 5: print("%s transfac|basic pwmfile inmaf threshold [motif]" % sys.argv[0], file=sys.stderr) sys.exit(2) diff --git a/lib/bx/pwm/position_weight_matrix.py b/lib/bx/pwm/position_weight_matrix.py index 4edef1af..b661f1f0 100755 --- a/lib/bx/pwm/position_weight_matrix.py +++ b/lib/bx/pwm/position_weight_matrix.py @@ -82,7 +82,6 @@ def score_align_motif(align, motif, gapmask=None, byPosition=True): continue for start in range(ncols): - if align.rows[ir][start] == "-": continue elif align.rows[ir][start] == "n": @@ -154,7 +153,6 @@ def score_align_motif(align, motif, gapmask=None, byPosition=True): class PositionWeightMatrix: - complementMap = str.maketrans("ACGTacgt", "TGCAtgca") # IUPAC-IUB @@ -176,7 +174,6 @@ class PositionWeightMatrix: } def __init__(self, id, rows, alphabet, background=None, score_correction=True): - self.id = id self.alphabet = alphabet nsymbols = len(self.alphabet) @@ -210,7 +207,6 @@ def __init__(self, id, rows, alphabet, background=None, score_correction=True): scale = 1 for i in range(len(rows)): - # try: fields, consensus = rows[i][:nsymbols], rows[i][-1] for x, count in enumerate(fields): @@ -278,7 +274,6 @@ def information_content_calculation(self, i, counts): # return sum( [ self.information_base_content(base,i,counts) for base in self.alphabet ] ) def information_base_content(self, base, i, counts): - # Reference 1) # return self.score_correction(counts,base,i) * math.log ( self.score_correction(counts,base,i), 2) @@ -289,7 +284,6 @@ def __call__(self, seq): return self.score_seq(seq) def __add__(self, other): - assert self.alphabet == other.alphabet r, (p, q) = self.max_correlation(other) @@ -323,7 +317,6 @@ def __add__(self, other): return PositionWeightMatrix(self.id + other.id, newRows, self.alphabet, self.background) def __old_add__(self, other, maxp=None): - assert self.alphabet == other.alphabet bigN = max(len(self), len(other)) smallN = min(len(self), len(other)) @@ -436,7 +429,6 @@ def correlation(self, otherwmx): return position_rsq def score_align(self, align, gapmask=None, byPosition=True): - # a blank score matrix nrows, ncols = align.dims ascoremax = AlignScoreMatrix(align) @@ -444,7 +436,6 @@ def score_align(self, align, gapmask=None, byPosition=True): minSeqLen = len(self) for ir in range(nrows): - # row is missing data if isnan(align.rows[ir][0]): continue @@ -461,7 +452,6 @@ def score_align(self, align, gapmask=None, byPosition=True): subseq = "" end = 0 for ic in range(start, ncols): - char = align.rows[ir][ic] if char == "-" or char == "N": continue @@ -602,7 +592,6 @@ def pwm_score(self, base, i, freq, background=None): return float("nan") def parse_weight(self, weightString): - fields = weightString.split(".") if len(fields) > 2: raise ValueError @@ -840,7 +829,6 @@ def sum_of_squares(x, y=None): def consensus_symbol(pattern): - if isinstance(pattern, str): try: pattern = [int(x) for x in pattern.split()] @@ -898,7 +886,6 @@ def consensus_symbol(pattern): from ._position_weight_matrix import c_match_consensus def match_consensus(sequence, pattern): - return c_match_consensus(sequence, pattern, len(sequence)) # print >>sys.stderr, "C match_consensus used" diff --git a/lib/bx/pwm/pwm_score_maf.py b/lib/bx/pwm/pwm_score_maf.py index d86c0e8b..418f212c 100755 --- a/lib/bx/pwm/pwm_score_maf.py +++ b/lib/bx/pwm/pwm_score_maf.py @@ -17,7 +17,6 @@ def isnan(x): def main(): - pwm_file = sys.argv[1] splist = sys.argv[2] if len(sys.argv) == 4: @@ -53,7 +52,6 @@ def main(): def MafScorer(pwm, species, inmaf): - index = 0 scoremax, width = None, None for maf in align_maf.Reader(inmaf): @@ -79,7 +77,6 @@ def MafScorer(pwm, species, inmaf): def MafMotifSelect(mafblock, pwm, motif=None, threshold=0): - if motif is not None and len(motif) != len(pwm): raise Exception("pwm and motif must be the same length") # generic alignment diff --git a/lib/bx/pwm/pwm_score_motifs.py b/lib/bx/pwm/pwm_score_motifs.py index 623c0077..4cb3382d 100755 --- a/lib/bx/pwm/pwm_score_motifs.py +++ b/lib/bx/pwm/pwm_score_motifs.py @@ -43,7 +43,6 @@ def main(): # lists of scores for each position in scoremax mx = scoremax for offset in range(blocklength): - # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: diff --git a/lib/bx/pwm/pwm_score_positions.py b/lib/bx/pwm/pwm_score_positions.py index 9ea6a1d8..b7af02b1 100755 --- a/lib/bx/pwm/pwm_score_positions.py +++ b/lib/bx/pwm/pwm_score_positions.py @@ -16,7 +16,6 @@ def isnan(x): def main(): - if len(sys.argv) < 6: print("%s transfac|basic pwmfile inmaf threshold spec1,spec2,... " % sys.argv[0], file=sys.stderr) sys.exit(0) @@ -49,7 +48,6 @@ def main(): # lists of scores for each position in scoremax for id, mx in scoremax.items(): for offset in range(blocklength): - # scan all species with threshold for i in range(len(species)): if mx[i][offset] > threshold: diff --git a/lib/bx/pwm/pwm_tests.py b/lib/bx/pwm/pwm_tests.py index fe017b01..b2b7dfd7 100644 --- a/lib/bx/pwm/pwm_tests.py +++ b/lib/bx/pwm/pwm_tests.py @@ -65,7 +65,6 @@ class PWMTestCase(unittest.TestCase): def testReader(self): - # test basic format: i.e. for jaspar wms = [ wm for wm in pwm.Reader(StringIO(basicPwm), format="basic", background=background, score_correction=False) @@ -87,7 +86,7 @@ def testReader(self): assert f"{dScores[0][0]:.4f} {dScores[0][1]:.4f} {dScores[1][0]:.4f} {dScores[1][1]:.4f}" == dScoresExpected qdSeq = [] - for (ix, nt) in enumerate(dSeq): + for ix, nt in enumerate(dSeq): qdSeq.append(dict()) qdSeq[ix][nt] = 1.0 qScores = wm.score_seq(qdSeq) diff --git a/lib/bx/seq/qdna.py b/lib/bx/seq/qdna.py index aa9dbad9..f1cd58e4 100644 --- a/lib/bx/seq/qdna.py +++ b/lib/bx/seq/qdna.py @@ -194,7 +194,7 @@ def read_codebook(self, codeF): alphabet = "ACGT" codeToProbs = {} - for (lineNum, line) in enumerate(codeF): + for lineNum, line in enumerate(codeF): lineNum += 1 line = line.rstrip() stripped = line.strip() diff --git a/lib/bx/seq/seq.py b/lib/bx/seq/seq.py index 34365952..81cc92ec 100644 --- a/lib/bx/seq/seq.py +++ b/lib/bx/seq/seq.py @@ -37,7 +37,6 @@ class attributes: """ def __init__(self, file=None, revcomp=False, name="", gap=None): - self.file = file if revcomp: self.revcomp = "-5'" diff --git a/lib/bx/seq/seq_tests.py b/lib/bx/seq/seq_tests.py index cc71e311..391f1c44 100644 --- a/lib/bx/seq/seq_tests.py +++ b/lib/bx/seq/seq_tests.py @@ -44,7 +44,7 @@ def test_get_qdna(self): def test_get_reader(self): reader = bx.seq.seq_reader(open(test2_fa, "rb")) - for (ix, seq) in enumerate(reader): + for ix, seq in enumerate(reader): assert ix < len(valid2_fa), "FastaReader returns too many sequences" text = "%s" % seq fields = text.split() diff --git a/lib/bx/seqmapping.py b/lib/bx/seqmapping.py index 90c411b0..cae98ff4 100644 --- a/lib/bx/seqmapping.py +++ b/lib/bx/seqmapping.py @@ -72,7 +72,6 @@ def alignment_mapping_from_file(f, char_mapping=DNA): def second_mapping_from_file(f, first_mapping, char_mapping=DNA): - columns, symbols = [], [] for line in f: column, symbol = line.split() diff --git a/lib/bx_extras/pyparsing.py b/lib/bx_extras/pyparsing.py index 5255e8b7..a44ea7d1 100644 --- a/lib/bx_extras/pyparsing.py +++ b/lib/bx_extras/pyparsing.py @@ -1915,7 +1915,6 @@ def parseImpl(self, instring, loc, doActions=True): ret = result.group() if self.unquoteResults: - # strip off quotes ret = ret[self.quoteCharLen : -self.endQuoteCharLen] diff --git a/scripts/aggregate_scores_in_intervals.py b/scripts/aggregate_scores_in_intervals.py index 68ca153c..d5eabb05 100755 --- a/scripts/aggregate_scores_in_intervals.py +++ b/scripts/aggregate_scores_in_intervals.py @@ -80,7 +80,6 @@ def load_scores_ba_dir(dir): def main(): - # Parse command line options, args = doc_optparse.parse(__doc__) try: diff --git a/scripts/align_print_template.py b/scripts/align_print_template.py index 153a4cfb..b53f7841 100755 --- a/scripts/align_print_template.py +++ b/scripts/align_print_template.py @@ -23,7 +23,6 @@ def main(): - # Parse command line arguments options, args = doc_optparse.parse(__doc__) diff --git a/scripts/axt_to_maf.py b/scripts/axt_to_maf.py index 4d0134a5..a377a59a 100755 --- a/scripts/axt_to_maf.py +++ b/scripts/axt_to_maf.py @@ -130,7 +130,6 @@ def clone_component(c): def read_lengths(fileName): - chromToLength = {} f = open(fileName) diff --git a/scripts/bed_count_by_interval.py b/scripts/bed_count_by_interval.py index 82f5c6f1..da59f635 100755 --- a/scripts/bed_count_by_interval.py +++ b/scripts/bed_count_by_interval.py @@ -17,7 +17,11 @@ ranges = {} for line in open(bed2): fields = line.strip().split() - chrom, start, end, = ( + ( + chrom, + start, + end, + ) = ( fields[0], int(fields[1]), int(fields[2]), diff --git a/scripts/bed_count_overlapping.py b/scripts/bed_count_overlapping.py index 82f5c6f1..da59f635 100755 --- a/scripts/bed_count_overlapping.py +++ b/scripts/bed_count_overlapping.py @@ -17,7 +17,11 @@ ranges = {} for line in open(bed2): fields = line.strip().split() - chrom, start, end, = ( + ( + chrom, + start, + end, + ) = ( fields[0], int(fields[1]), int(fields[2]), diff --git a/scripts/bnMapper.py b/scripts/bnMapper.py index 58c82253..fc84e155 100755 --- a/scripts/bnMapper.py +++ b/scripts/bnMapper.py @@ -134,7 +134,7 @@ def union_elements(elements): unioned_elements = [] for ch, chgrp in groupby(elements, key=itemgetter(0)): - for (s, e) in elem_u(np.array([itemgetter(1, 2)(_) for _ in chgrp], dtype=np.uint)): + for s, e in elem_u(np.array([itemgetter(1, 2)(_) for _ in chgrp], dtype=np.uint)): if s < e: unioned_elements.append((ch, s, e, el_id)) assert len(unioned_elements) <= len(elements) diff --git a/scripts/div_snp_table_chr.py b/scripts/div_snp_table_chr.py index ceb23440..ced1abc9 100755 --- a/scripts/div_snp_table_chr.py +++ b/scripts/div_snp_table_chr.py @@ -81,7 +81,6 @@ def main(): # collect snp and div for chr in feature.keys(): - if chr not in snp: continue if chr not in ar: diff --git a/scripts/gene_fourfold_sites.py b/scripts/gene_fourfold_sites.py index d9ea05a1..f10dafc9 100755 --- a/scripts/gene_fourfold_sites.py +++ b/scripts/gene_fourfold_sites.py @@ -134,7 +134,6 @@ def Comp(seq): def main(): - options, args = doc_optparse.parse(__doc__) try: if options.outfile: @@ -156,12 +155,11 @@ def main(): nibs = getnib(nibdir) for chrom, strand, cds_exons, name in CDSReader(open(bedfile), format=format): - cds_seq = "" # genome_seq_index maps the position in CDS to position on the genome genome_seq_index = [] - for (c_start, c_end) in cds_exons: + for c_start, c_end in cds_exons: cds_seq += nibs[chrom].get(c_start, c_end - c_start) for i in range(c_start, c_end): genome_seq_index.append(i) diff --git a/scripts/get_scores_in_intervals.py b/scripts/get_scores_in_intervals.py index a8c3c5e5..60d1081c 100755 --- a/scripts/get_scores_in_intervals.py +++ b/scripts/get_scores_in_intervals.py @@ -27,7 +27,6 @@ def read_scores(f): def main(): - # Parse command line options, args = doc_optparse.parse(__doc__) try: diff --git a/scripts/interval_count_intersections.py b/scripts/interval_count_intersections.py index 3102185d..f4fe5e75 100755 --- a/scripts/interval_count_intersections.py +++ b/scripts/interval_count_intersections.py @@ -19,7 +19,6 @@ def main(): - intersecters = {} # Read ranges diff --git a/scripts/lav_to_axt.py b/scripts/lav_to_axt.py index fd0cb6fa..64edd86e 100755 --- a/scripts/lav_to_axt.py +++ b/scripts/lav_to_axt.py @@ -24,7 +24,6 @@ def usage(s=None): def main(): - # parse the command line silent = False diff --git a/scripts/lav_to_maf.py b/scripts/lav_to_maf.py index 1a61f8c6..45ed4905 100755 --- a/scripts/lav_to_maf.py +++ b/scripts/lav_to_maf.py @@ -22,7 +22,6 @@ def usage(s=None): def main(): - # parse the command line silent = False diff --git a/scripts/line_select.py b/scripts/line_select.py index bc00fd0f..b7b844a1 100755 --- a/scripts/line_select.py +++ b/scripts/line_select.py @@ -14,7 +14,6 @@ def __main__(): - feature_file = sys.argv[1] if len(sys.argv) > 2: diff --git a/scripts/mMK_bitset.py b/scripts/mMK_bitset.py index 545215f9..00a312a2 100644 --- a/scripts/mMK_bitset.py +++ b/scripts/mMK_bitset.py @@ -128,7 +128,6 @@ def main(): def MK_fisher_pvalue(win_snp, win_div, AR_snp, AR_div): - if win_snp == 0 and win_div == 0 and AR_snp == 0 and AR_div == 0: return 1.0 @@ -138,7 +137,6 @@ def MK_fisher_pvalue(win_snp, win_div, AR_snp, AR_div): def MK_chi_pvalue(win_snp, win_div, AR_snp, AR_div): - chi_result = r.chisq_test(r.matrix(r.c([win_snp, win_div, AR_snp, AR_div]), nr=2)) return chi_result["p.value"] diff --git a/scripts/maf_chop.py b/scripts/maf_chop.py index b343f640..b11f8325 100755 --- a/scripts/maf_chop.py +++ b/scripts/maf_chop.py @@ -16,7 +16,6 @@ def main(): - # Parse command line arguments parser = OptionParser() diff --git a/scripts/maf_count.py b/scripts/maf_count.py index 8f8450e3..430b4518 100755 --- a/scripts/maf_count.py +++ b/scripts/maf_count.py @@ -45,7 +45,6 @@ def __main__(): count = 0 for m in maf_reader: - if action == "aligns": count += 1 elif action == "cols": diff --git a/scripts/maf_extract_ranges.py b/scripts/maf_extract_ranges.py index 93345a73..5af7e1da 100755 --- a/scripts/maf_extract_ranges.py +++ b/scripts/maf_extract_ranges.py @@ -24,7 +24,6 @@ def __main__(): - # Parse Command Line options, args = doc_optparse.parse(__doc__) diff --git a/scripts/maf_filter.py b/scripts/maf_filter.py index 9792d329..e8705585 100755 --- a/scripts/maf_filter.py +++ b/scripts/maf_filter.py @@ -19,7 +19,6 @@ def __main__(): - # Parse command line arguments parser = OptionParser() @@ -41,7 +40,6 @@ def __main__(): maf_writer = maf.Writer(sys.stdout) for m in maf_reader: - if component_count and len(m.components) != component_count: continue if min_cols and m.text_size < min_cols: diff --git a/scripts/maf_filter_max_wc.py b/scripts/maf_filter_max_wc.py index d3529d50..6641c1b6 100755 --- a/scripts/maf_filter_max_wc.py +++ b/scripts/maf_filter_max_wc.py @@ -16,7 +16,6 @@ def main(): - min_good = int(sys.argv[1]) min_species = int(sys.argv[2]) diff --git a/scripts/maf_gc_content.py b/scripts/maf_gc_content.py index d2973838..78cce22d 100755 --- a/scripts/maf_gc_content.py +++ b/scripts/maf_gc_content.py @@ -12,7 +12,6 @@ def __main__(): - maf_reader = maf.Reader(sys.stdin) for m in maf_reader: diff --git a/scripts/maf_mean_length_ungapped_piece.py b/scripts/maf_mean_length_ungapped_piece.py index 85f9ac10..f769c676 100755 --- a/scripts/maf_mean_length_ungapped_piece.py +++ b/scripts/maf_mean_length_ungapped_piece.py @@ -13,9 +13,7 @@ def main(): - for m in bx.align.maf.Reader(sys.stdin): - ungapped_columns = 0 ungapped_runs = 0 in_ungapped = False diff --git a/scripts/maf_percent_columns_matching.py b/scripts/maf_percent_columns_matching.py index 05ef46ba..ba978fe8 100755 --- a/scripts/maf_percent_columns_matching.py +++ b/scripts/maf_percent_columns_matching.py @@ -15,7 +15,6 @@ def __main__(): - maf_reader = maf.Reader(sys.stdin) for m in maf_reader: diff --git a/scripts/maf_percent_identity.py b/scripts/maf_percent_identity.py index 6a92a6e3..39774eef 100755 --- a/scripts/maf_percent_identity.py +++ b/scripts/maf_percent_identity.py @@ -16,7 +16,6 @@ def __main__(): - maf_reader = maf.Reader(sys.stdin) for m in maf_reader: diff --git a/scripts/maf_print_chroms.py b/scripts/maf_print_chroms.py index 02b65b08..4c0bd3f6 100755 --- a/scripts/maf_print_chroms.py +++ b/scripts/maf_print_chroms.py @@ -18,7 +18,6 @@ def __main__(): - # Parse command line arguments options, args = doc_optparse.parse(__doc__) diff --git a/scripts/maf_print_scores.py b/scripts/maf_print_scores.py index 964be40e..cfea4237 100755 --- a/scripts/maf_print_scores.py +++ b/scripts/maf_print_scores.py @@ -22,7 +22,6 @@ def main(): - # Parse command line arguments options, args = doc_optparse.parse(__doc__) diff --git a/scripts/maf_region_coverage_by_src.py b/scripts/maf_region_coverage_by_src.py index a99d022e..ef398e5c 100755 --- a/scripts/maf_region_coverage_by_src.py +++ b/scripts/maf_region_coverage_by_src.py @@ -17,7 +17,6 @@ def __main__(): - # Parse Command Line options, args = doc_optparse.parse(__doc__) diff --git a/scripts/maf_select.py b/scripts/maf_select.py index b95f9f44..bbeb1f60 100755 --- a/scripts/maf_select.py +++ b/scripts/maf_select.py @@ -13,7 +13,6 @@ def __main__(): - feature_file = sys.argv[1] if len(sys.argv) > 2: diff --git a/scripts/maf_shuffle_columns.py b/scripts/maf_shuffle_columns.py index f74c688b..61b7a10a 100755 --- a/scripts/maf_shuffle_columns.py +++ b/scripts/maf_shuffle_columns.py @@ -15,12 +15,10 @@ def __main__(): - maf_reader = maf.Reader(sys.stdin, parse_e_rows=True) maf_writer = maf.Writer(sys.stdout) for m in maf_reader: - align.shuffle_columns(m) maf_writer.write(m) diff --git a/scripts/maf_split_by_src.py b/scripts/maf_split_by_src.py index ea5803de..27aaa3b3 100755 --- a/scripts/maf_split_by_src.py +++ b/scripts/maf_split_by_src.py @@ -23,7 +23,6 @@ def __main__(): - # Parse command line arguments parser = OptionParser() @@ -41,7 +40,6 @@ def __main__(): writers = {} for m in maf_reader: - if comp is None: writer_key = string.join([c.src for c in m.components], "_") else: diff --git a/scripts/maf_thread_for_species.py b/scripts/maf_thread_for_species.py index 8c732945..58daa765 100755 --- a/scripts/maf_thread_for_species.py +++ b/scripts/maf_thread_for_species.py @@ -19,7 +19,6 @@ def main(): - options, args = doc_optparse.parse(__doc__) try: diff --git a/scripts/maf_tile.py b/scripts/maf_tile.py index e17a0af7..0393b818 100755 --- a/scripts/maf_tile.py +++ b/scripts/maf_tile.py @@ -26,7 +26,6 @@ def main(): - options, args = doc_optparse.parse(__doc__) try: sources = args[0].translate(tree_tx).split() @@ -56,7 +55,6 @@ def load_seq_db(fname): def do_interval(sources, index, out, ref_src, start, end, seq_db, missing_data): - assert sources[0].split(".")[0] == ref_src.split(".")[0], "{} != {}".format( sources[0].split(".")[0], ref_src.split(".")[0] ) diff --git a/scripts/maf_tile_2.py b/scripts/maf_tile_2.py index f6e1dcb8..074ac2ae 100755 --- a/scripts/maf_tile_2.py +++ b/scripts/maf_tile_2.py @@ -41,7 +41,6 @@ def main(): - options, args = doc_optparse.parse(__doc__) try: sources = args[0].translate(tree_tx).split() diff --git a/scripts/maf_tile_2bit.py b/scripts/maf_tile_2bit.py index 6d2ea5f2..9a3cadfa 100755 --- a/scripts/maf_tile_2bit.py +++ b/scripts/maf_tile_2bit.py @@ -39,7 +39,6 @@ def main(): - options, args = doc_optparse.parse(__doc__) try: sources = args[0].translate(tree_tx).split() diff --git a/scripts/maf_to_axt.py b/scripts/maf_to_axt.py index 1766621c..816b938a 100755 --- a/scripts/maf_to_axt.py +++ b/scripts/maf_to_axt.py @@ -90,7 +90,6 @@ def clone_component(c): def remove_mutual_gaps(block): - if len(block.components) == 0: return diff --git a/scripts/maf_to_fasta.py b/scripts/maf_to_fasta.py index 7933b5e4..01278e6d 100755 --- a/scripts/maf_to_fasta.py +++ b/scripts/maf_to_fasta.py @@ -12,7 +12,6 @@ def __main__(): - maf_reader = maf.Reader(sys.stdin) # Confusing since maf_to_concat_fasta takes names. diff --git a/scripts/maf_to_int_seqs.py b/scripts/maf_to_int_seqs.py index 09d5bec7..ecf28003 100755 --- a/scripts/maf_to_int_seqs.py +++ b/scripts/maf_to_int_seqs.py @@ -18,7 +18,6 @@ def main(): - if len(sys.argv) > 1: _, alpha_map = seqmapping.alignment_mapping_from_file(open(sys.argv[1])) else: diff --git a/scripts/maf_translate_chars.py b/scripts/maf_translate_chars.py index 3205a0ec..5eaa757c 100755 --- a/scripts/maf_translate_chars.py +++ b/scripts/maf_translate_chars.py @@ -19,7 +19,6 @@ def main(): - maf_reader = maf.Reader(sys.stdin) maf_writer = maf.Writer(sys.stdout) diff --git a/scripts/maf_truncate.py b/scripts/maf_truncate.py index 134f6351..b64077d9 100755 --- a/scripts/maf_truncate.py +++ b/scripts/maf_truncate.py @@ -29,7 +29,6 @@ def __main__(): count = 0 for m in maf_reader: - maf_writer.write(m) count += m.text_size diff --git a/scripts/maf_word_frequency.py b/scripts/maf_word_frequency.py index 6415ebc1..ff93c967 100755 --- a/scripts/maf_word_frequency.py +++ b/scripts/maf_word_frequency.py @@ -20,7 +20,6 @@ def __main__(): - motif_len = int(sys.argv[1]) big_map = {} diff --git a/scripts/mask_quality.py b/scripts/mask_quality.py index 243be269..e97fa586 100644 --- a/scripts/mask_quality.py +++ b/scripts/mask_quality.py @@ -26,7 +26,6 @@ def main(): - options, args = doc_optparse.parse(__doc__) try: inputformat = options.input diff --git a/scripts/nib_intervals_to_fasta.py b/scripts/nib_intervals_to_fasta.py index 6bde93cc..d8a12e6f 100755 --- a/scripts/nib_intervals_to_fasta.py +++ b/scripts/nib_intervals_to_fasta.py @@ -12,7 +12,6 @@ def __main__(): - options, args = doc_optparse.parse(__doc__) try: diff --git a/scripts/table_add_column.py b/scripts/table_add_column.py index dbfc4fd0..f8459630 100755 --- a/scripts/table_add_column.py +++ b/scripts/table_add_column.py @@ -16,7 +16,6 @@ def __main__(): - # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: diff --git a/scripts/table_filter.py b/scripts/table_filter.py index 057341d6..31384043 100755 --- a/scripts/table_filter.py +++ b/scripts/table_filter.py @@ -22,7 +22,6 @@ def __main__(): - # Parse command line arguments options, args = doc_optparse.parse(__doc__) try: diff --git a/scripts/ucsc_gene_table_to_intervals.py b/scripts/ucsc_gene_table_to_intervals.py index e01da7ca..301cb70f 100755 --- a/scripts/ucsc_gene_table_to_intervals.py +++ b/scripts/ucsc_gene_table_to_intervals.py @@ -45,7 +45,6 @@ def main(): # Read table from stdin and handle each gene for line in sys.stdin: - # Parse fields from gene tabls fields = line.split("\t") if options.discard_first_column: diff --git a/scripts/wiggle_to_array_tree.py b/scripts/wiggle_to_array_tree.py index e798fbdb..d5cfaec9 100755 --- a/scripts/wiggle_to_array_tree.py +++ b/scripts/wiggle_to_array_tree.py @@ -17,7 +17,6 @@ def main(): - sizes_fname = sys.argv[1] out_fname = sys.argv[2] diff --git a/scripts/wiggle_to_binned_array.py b/scripts/wiggle_to_binned_array.py index ca3be82e..5f2327a2 100755 --- a/scripts/wiggle_to_binned_array.py +++ b/scripts/wiggle_to_binned_array.py @@ -15,7 +15,6 @@ def main(): - # Parse command line options, args = doc_optparse.parse(__doc__) try: From 8eeb95d2569ba94e9371af6aedd20fc382166c5d Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 25 Jul 2023 12:48:01 +0100 Subject: [PATCH 45/68] Fixup --- scripts/bed_count_by_interval.py | 12 +++--------- scripts/bed_count_overlapping.py | 12 +++--------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/scripts/bed_count_by_interval.py b/scripts/bed_count_by_interval.py index da59f635..a900d0ed 100755 --- a/scripts/bed_count_by_interval.py +++ b/scripts/bed_count_by_interval.py @@ -17,15 +17,9 @@ ranges = {} for line in open(bed2): fields = line.strip().split() - ( - chrom, - start, - end, - ) = ( - fields[0], - int(fields[1]), - int(fields[2]), - ) + chrom = fields[0] + start = int(fields[1]) + end = int(fields[2]) if chrom not in ranges: ranges[chrom] = Intersecter() ranges[chrom].add_interval(Interval(start, end)) diff --git a/scripts/bed_count_overlapping.py b/scripts/bed_count_overlapping.py index da59f635..a900d0ed 100755 --- a/scripts/bed_count_overlapping.py +++ b/scripts/bed_count_overlapping.py @@ -17,15 +17,9 @@ ranges = {} for line in open(bed2): fields = line.strip().split() - ( - chrom, - start, - end, - ) = ( - fields[0], - int(fields[1]), - int(fields[2]), - ) + chrom = fields[0] + start = int(fields[1]) + end = int(fields[2]) if chrom not in ranges: ranges[chrom] = Intersecter() ranges[chrom].add_interval(Interval(start, end)) From ffef6853bed8b5664e5a08911008c18cde343906 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 25 Jul 2023 12:41:12 +0100 Subject: [PATCH 46/68] Lint with black and isort in CI workflow Also: - Don't lint again in the test step of the CI workflow - Fix import order - Replace some star imports - Remove some trailing whitespaces --- .github/workflows/test.yaml | 8 ++++---- lib/bx/bbi/bbi_file.pxd | 14 +++++++++----- lib/bx/bbi/bbi_file.pyx | 26 ++++++++++++-------------- lib/bx/bbi/bigbed_file.pyx | 19 +++++++++++-------- lib/bx/bbi/bigwig_file.pyx | 23 +++++++++++++++-------- lib/bx/bbi/types.pxd | 12 ++++++------ 6 files changed, 57 insertions(+), 45 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 7aa815bd..fb64c9b4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -14,10 +14,10 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Install flake8 - run: pip install flake8 + - name: Install tox + run: pip install tox - name: Lint - run: flake8 . + run: tox -e lint test: runs-on: ubuntu-latest strategy: @@ -34,4 +34,4 @@ jobs: - name: Install tox run: pip install tox - name: Test - run: tox + run: tox -e py diff --git a/lib/bx/bbi/bbi_file.pxd b/lib/bx/bbi/bbi_file.pxd index fe753f09..a5f598d0 100644 --- a/lib/bx/bbi/bbi_file.pxd +++ b/lib/bx/bbi/bbi_file.pxd @@ -1,12 +1,16 @@ -from .types cimport * - -from .bpt_file cimport BPTFile -from .cirtree_file cimport CIRTreeFile - import numpy cimport numpy +from .bpt_file cimport BPTFile +from .cirtree_file cimport CIRTreeFile +from .types cimport ( + bits16, + bits32, + bits64, + boolean, +) + cdef class SummaryBlock: """ diff --git a/lib/bx/bbi/bbi_file.pyx b/lib/bx/bbi/bbi_file.pyx index 8cca6dcc..bddd9682 100644 --- a/lib/bx/bbi/bbi_file.pyx +++ b/lib/bx/bbi/bbi_file.pyx @@ -9,27 +9,25 @@ mirrors Jim Kent's 'bbiRead.c' mostly. from cpython.version cimport PY_MAJOR_VERSION +import math import sys - -cimport cython - +import zlib from collections import deque - -from .types cimport * - -from .bpt_file cimport BPTFile -from .cirtree_file cimport CIRTreeFile -from libc cimport limits +from io import BytesIO import numpy -cimport numpy +from bx.misc.binary_file import BinaryFileReader -import math -import zlib -from io import BytesIO +cimport cython +from libc cimport limits -from bx.misc.binary_file import BinaryFileReader +from .bpt_file cimport BPTFile +from .cirtree_file cimport CIRTreeFile +from .types cimport ( + bits32, + bits64, +) cdef extern from "Python.h": diff --git a/lib/bx/bbi/bigbed_file.pyx b/lib/bx/bbi/bigbed_file.pyx index cac8b61f..ed871c4b 100644 --- a/lib/bx/bbi/bigbed_file.pyx +++ b/lib/bx/bbi/bigbed_file.pyx @@ -2,20 +2,23 @@ BigBed file. """ -from .bbi_file cimport * -from .cirtree_file cimport CIRTreeFile +import zlib +from io import BytesIO import numpy -from .types cimport * +from bx.intervals.io import GenomicInterval +from bx.misc.binary_file import BinaryFileReader cimport numpy -import zlib -from io import BytesIO - -from bx.intervals.io import GenomicInterval -from bx.misc.binary_file import BinaryFileReader +from .bbi_file cimport ( + BBIFile, + BlockHandler, + SummarizedData, +) +from .cirtree_file cimport CIRTreeFile +from .types cimport bits32 DEF big_bed_sig = 0x8789F2EB diff --git a/lib/bx/bbi/bigwig_file.pyx b/lib/bx/bbi/bigwig_file.pyx index 75dc58f7..e7a6f45b 100644 --- a/lib/bx/bbi/bigwig_file.pyx +++ b/lib/bx/bbi/bigwig_file.pyx @@ -2,21 +2,28 @@ BigWig file. """ +import zlib from collections import deque - -from .bbi_file cimport * -from .cirtree_file cimport CIRTreeFile +from io import BytesIO import numpy -from .types cimport * +from bx.misc.binary_file import BinaryFileReader cimport numpy -import zlib -from io import BytesIO - -from bx.misc.binary_file import BinaryFileReader +from .bbi_file cimport ( + BBIFile, + BlockHandler, + SummarizedData, +) +from .cirtree_file cimport CIRTreeFile +from .types cimport ( + bits8, + bits16, + bits32, + UBYTE, +) DEF big_wig_sig = 0x888FFC26 DEF bwg_bed_graph = 1 diff --git a/lib/bx/bbi/types.pxd b/lib/bx/bbi/types.pxd index 8c180bee..89522bb7 100644 --- a/lib/bx/bbi/types.pxd +++ b/lib/bx/bbi/types.pxd @@ -1,9 +1,9 @@ -ctypedef unsigned char UBYTE -ctypedef signed char BYTE -ctypedef unsigned short UWORD -ctypedef short WORD -ctypedef unsigned long long bits64 -ctypedef unsigned bits32 +ctypedef unsigned char UBYTE +ctypedef signed char BYTE +ctypedef unsigned short UWORD +ctypedef short WORD +ctypedef unsigned long long bits64 +ctypedef unsigned bits32 ctypedef unsigned short bits16 ctypedef unsigned char bits8 ctypedef int signed32 From 9bc5edaa307d1193bc9e2809b2c4fe1fb0ca9bb1 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 25 Jul 2023 13:52:03 +0200 Subject: [PATCH 47/68] Include pxd files in source dist --- MANIFEST.in | 1 + lib/bx/arrays/array_tree.pyx | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 7b9c2d84..7fac4702 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,3 +4,4 @@ recursive-include src *.c recursive-include lib *.h recursive-include lib *.c recursive-include lib *.pyx +recursive-include lib *.pxd diff --git a/lib/bx/arrays/array_tree.pyx b/lib/bx/arrays/array_tree.pyx index d6575819..57e5cea8 100644 --- a/lib/bx/arrays/array_tree.pyx +++ b/lib/bx/arrays/array_tree.pyx @@ -7,8 +7,6 @@ from numpy import * cimport numpy -cimport bx.arrays.wiggle - from bx.misc.binary_file import ( BinaryFileReader, BinaryFileWriter, From ab55cde3d18eb734034fb0f67fce0fe553c425d9 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 25 Jul 2023 19:38:01 +0100 Subject: [PATCH 48/68] Drop PyPy 3.8 wheels --- .github/workflows/deploy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index e70c1517..0a214e62 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -34,8 +34,8 @@ jobs: CIBW_ARCHS: ${{matrix.arch}} # Skip building musllinux wheels for now, they take too long to build, # mainly because numpy doesn't have musllinux wheels on PyPI yet. - # Skip also building for PyPy 3.7, which is deprecated upstream. - CIBW_SKIP: '*-musllinux* pp37-*' + # Skip also building for PyPy 3.7-3.8, which are deprecated upstream. + CIBW_SKIP: '*-musllinux* pp37-* pp38-*' - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v3 From 59750a3419eb9e53856dfd8d1d3283eba2c9bdef Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 26 Jul 2023 11:44:03 +0100 Subject: [PATCH 49/68] Release 0.10.0 --- lib/bx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index 3e2f46a3..61fb31ca 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = "0.9.0" +__version__ = "0.10.0" From 7758bc4492626ffdbaa90c8fc5dd7620b1e2f3f8 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 27 Jul 2023 13:09:24 +0100 Subject: [PATCH 50/68] Update setup-qemu-action version --- .github/workflows/deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 0a214e62..2b3cb40f 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -23,7 +23,7 @@ jobs: python-version: '3.x' - name: Set up QEMU to build non-native architectures if: ${{ matrix.arch == 'aarch64' }} - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Install required Python packages run: | python -m pip install --upgrade pip setuptools wheel From ccd8c6bac984e895bf7b482666215c788e1a2b5a Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 7 Dec 2023 14:03:22 +0000 Subject: [PATCH 51/68] Update action versions --- .github/workflows/deploy.yaml | 10 +++++----- .github/workflows/test.yaml | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 2b3cb40f..8a82da86 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -17,13 +17,13 @@ jobs: - os: ubuntu-latest arch: aarch64 steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.x' - name: Set up QEMU to build non-native architectures if: ${{ matrix.arch == 'aarch64' }} - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Install required Python packages run: | python -m pip install --upgrade pip setuptools wheel @@ -46,8 +46,8 @@ jobs: build_sdist: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install required Python packages diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index fb64c9b4..2f563c42 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -10,8 +10,8 @@ jobs: matrix: python-version: ['3.7', '3.11'] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install tox @@ -25,8 +25,8 @@ jobs: matrix: python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install liblzo2-dev From 0a1faaa0cf799027ed13c078d473bd93d3f2b455 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Thu, 7 Dec 2023 14:09:38 +0000 Subject: [PATCH 52/68] Fix F811 flake8 errors --- lib/bx/cookbook/attribute.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/bx/cookbook/attribute.py b/lib/bx/cookbook/attribute.py index 4775185f..cd091cc0 100644 --- a/lib/bx/cookbook/attribute.py +++ b/lib/bx/cookbook/attribute.py @@ -113,7 +113,6 @@ def _attribute(permission="rwd", **kwds): def _property(attrname, default): propname, attrname = attrname, mangle(classname, attrname) - fget, fset, fdel, doc = None, None, None, propname if "r" in permission: def fget(self): @@ -124,11 +123,17 @@ def fget(self): setattr(self, attrname, default) return value + else: + fget = None + if "w" in permission: def fset(self, value): setattr(self, attrname, value) + else: + fset = None + if "d" in permission: def fdel(self): @@ -139,7 +144,9 @@ def fdel(self): # calling fget can restore this attribute, so remove property delattr(self.__class__, propname) - return property(fget=fget, fset=fset, fdel=fdel, doc=doc) + else: + fdel = None + return property(fget=fget, fset=fset, fdel=fdel, doc=propname) for attrname, default in kwds.items(): classdict[attrname] = _property(attrname, default) From 92518bba45374174838d95e2d8a65cf501695300 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 12 Dec 2023 03:28:38 +0000 Subject: [PATCH 53/68] site.cfg is unsupported in numpy>=1.26 --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index dccb8a55..0e1b900f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,8 @@ fi # for PyPy3.7), we need to build it using OpenBLAS (both before building the # bx-python wheel and when testing it), see # https://github.com/numpy/numpy/issues/15947#issuecomment-683355728 +# The ~/.numpy-site.cfg is supported by numpy <1.26.0 , see +# https://numpy.org/doc/stable/release/1.26.0-notes.html#numpy-specific-build-customization before-all = """ brew install openblas && cat > ~/.numpy-site.cfg < Date: Fri, 15 Dec 2023 21:44:12 +0000 Subject: [PATCH 54/68] Add now mandatory readthedocs config file Also: - Remove unnecessary and now unresolvable python-lzo doc requirement --- .readthedocs.yaml | 27 +++++++++++++++++++++++++++ doc/requirements.txt | 1 - 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..9237d9b7 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,27 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: doc/source/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - path: . + - requirements: doc/requirements.txt diff --git a/doc/requirements.txt b/doc/requirements.txt index 08f3b8c2..24ce15ab 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,2 +1 @@ -https://bitbucket.org/james_taylor/python-lzo-static/get/63987d89fd1b.zip numpy From 9fee4eb0606e71f8b7717d0417941e40bcfc50b7 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 19 Dec 2023 19:50:36 +0000 Subject: [PATCH 55/68] Regenerate documentation with: ``` $ rm -rf doc $ sphinx-quickstart --sep -p bx-python -a "James Taylor" --ext-autodoc --ext-doctest --ext-intersphinx --extensions sphinx_rtd_theme doc $ cd doc/ $ sphinx-apidoc -d 3 --force --separate -o source/lib ../lib/ ``` --- doc/Makefile | 103 +--- doc/make.bat | 35 ++ doc/requirements.txt | 2 + doc/source/conf.py | 204 +------ doc/source/contents.rst | 18 - doc/source/index.rst | 53 +- doc/source/lib/bx.align.axt.rst | 6 +- doc/source/lib/bx.align.core.rst | 6 +- doc/source/lib/bx.align.epo.rst | 6 +- doc/source/lib/bx.align.epo_tests.rst | 10 +- doc/source/lib/bx.align.lav.rst | 6 +- doc/source/lib/bx.align.lav_tests.rst | 10 +- doc/source/lib/bx.align.maf.rst | 6 +- doc/source/lib/bx.align.maf_tests.rst | 10 +- doc/source/lib/bx.align.rst | 12 +- doc/source/lib/bx.align.score.rst | 6 +- doc/source/lib/bx.align.score_tests.rst | 10 +- doc/source/lib/bx.align.sitemask.core.rst | 6 +- doc/source/lib/bx.align.sitemask.cpg.rst | 6 +- doc/source/lib/bx.align.sitemask.quality.rst | 6 +- doc/source/lib/bx.align.sitemask.rst | 7 +- .../lib/bx.align.sitemask.sitemask_tests.rst | 10 +- doc/source/lib/bx.align.tools.chop.rst | 6 +- doc/source/lib/bx.align.tools.fuse.rst | 6 +- doc/source/lib/bx.align.tools.rst | 7 +- doc/source/lib/bx.align.tools.thread.rst | 6 +- doc/source/lib/bx.align.tools.tile.rst | 6 +- doc/source/lib/bx.arrays.array_tree.rst | 10 +- doc/source/lib/bx.arrays.array_tree_tests.rst | 10 +- doc/source/lib/bx.arrays.bed.rst | 6 +- doc/source/lib/bx.arrays.rst | 7 +- doc/source/lib/bx.arrays.wiggle.rst | 6 +- doc/source/lib/bx.bbi.bbi_file.rst | 10 +- doc/source/lib/bx.bbi.bigbed_file.rst | 10 +- doc/source/lib/bx.bbi.bigwig_file.rst | 10 +- doc/source/lib/bx.bbi.bigwig_tests.rst | 10 +- doc/source/lib/bx.bbi.bpt_file.rst | 10 +- doc/source/lib/bx.bbi.cirtree_file.rst | 10 +- doc/source/lib/bx.bbi.rst | 7 +- doc/source/lib/bx.binned_array.rst | 10 +- doc/source/lib/bx.binned_array_tests.rst | 10 +- doc/source/lib/bx.bitset.rst | 6 +- doc/source/lib/bx.bitset_builders.rst | 10 +- doc/source/lib/bx.bitset_tests.rst | 10 +- doc/source/lib/bx.bitset_utils.rst | 10 +- doc/source/lib/bx.cookbook.argparse.rst | 6 +- doc/source/lib/bx.cookbook.attribute.rst | 6 +- doc/source/lib/bx.cookbook.doc_optparse.rst | 10 +- doc/source/lib/bx.cookbook.progress_bar.rst | 10 +- doc/source/lib/bx.cookbook.rst | 7 +- doc/source/lib/bx.filter.rst | 6 +- doc/source/lib/bx.gene_reader.rst | 10 +- doc/source/lib/bx.interval_index_file.rst | 10 +- .../lib/bx.interval_index_file_tests.rst | 10 +- doc/source/lib/bx.intervals.cluster.rst | 6 +- doc/source/lib/bx.intervals.cluster_tests.rst | 10 +- doc/source/lib/bx.intervals.intersection.rst | 6 +- .../lib/bx.intervals.intersection_tests.rst | 10 +- doc/source/lib/bx.intervals.io.rst | 6 +- .../bx.intervals.operations.base_coverage.rst | 10 +- .../bx.intervals.operations.complement.rst | 6 +- .../lib/bx.intervals.operations.concat.rst | 6 +- .../lib/bx.intervals.operations.coverage.rst | 6 +- .../bx.intervals.operations.find_clusters.rst | 10 +- .../lib/bx.intervals.operations.intersect.rst | 6 +- .../lib/bx.intervals.operations.join.rst | 6 +- .../lib/bx.intervals.operations.merge.rst | 6 +- .../lib/bx.intervals.operations.quicksect.rst | 6 +- doc/source/lib/bx.intervals.operations.rst | 7 +- .../lib/bx.intervals.operations.subtract.rst | 6 +- .../lib/bx.intervals.random_intervals.rst | 10 +- doc/source/lib/bx.intervals.rst | 10 +- doc/source/lib/bx.intseq.ngramcount.rst | 6 +- doc/source/lib/bx.intseq.rst | 7 +- doc/source/lib/bx.misc.bgzf.rst | 6 +- doc/source/lib/bx.misc.bgzf_tests.rst | 10 +- doc/source/lib/bx.misc.binary_file.rst | 10 +- doc/source/lib/bx.misc.cdb.rst | 6 +- doc/source/lib/bx.misc.cdb_tests.rst | 10 +- doc/source/lib/bx.misc.filecache.rst | 6 +- doc/source/lib/bx.misc.filecache_tests.rst | 10 +- doc/source/lib/bx.misc.readlengths.rst | 6 +- doc/source/lib/bx.misc.rst | 7 +- doc/source/lib/bx.misc.seekbzip2.rst | 6 +- doc/source/lib/bx.misc.seekbzip2_tests.rst | 10 +- doc/source/lib/bx.misc.seeklzop.rst | 6 +- doc/source/lib/bx.misc.seeklzop_tests.rst | 10 +- doc/source/lib/bx.motif.io.rst | 7 +- doc/source/lib/bx.motif.io.transfac.rst | 6 +- doc/source/lib/bx.motif.io.transfac_tests.rst | 10 +- doc/source/lib/bx.motif.logo.rst | 6 +- doc/source/lib/bx.motif.pwm.rst | 6 +- doc/source/lib/bx.motif.pwm_tests.rst | 10 +- doc/source/lib/bx.motif.rst | 12 +- doc/source/lib/bx.phylo.newick.rst | 6 +- doc/source/lib/bx.phylo.newick_tests.rst | 10 +- doc/source/lib/bx.phylo.phast.rst | 6 +- doc/source/lib/bx.phylo.phast_tests.rst | 10 +- doc/source/lib/bx.phylo.rst | 7 +- .../lib/bx.pwm.bed_score_aligned_pwm.rst | 10 +- .../lib/bx.pwm.bed_score_aligned_string.rst | 10 +- doc/source/lib/bx.pwm.maf_select_motifs.rst | 10 +- .../lib/bx.pwm.position_weight_matrix.rst | 10 +- doc/source/lib/bx.pwm.pwm_score_maf.rst | 10 +- doc/source/lib/bx.pwm.pwm_score_motifs.rst | 10 +- doc/source/lib/bx.pwm.pwm_score_positions.rst | 10 +- doc/source/lib/bx.pwm.pwm_tests.rst | 10 +- doc/source/lib/bx.pwm.rst | 7 +- doc/source/lib/bx.rst | 34 +- doc/source/lib/bx.seq.core.rst | 6 +- doc/source/lib/bx.seq.fasta.rst | 6 +- doc/source/lib/bx.seq.fasta_tests.rst | 10 +- doc/source/lib/bx.seq.nib.rst | 6 +- doc/source/lib/bx.seq.nib_tests.rst | 10 +- doc/source/lib/bx.seq.qdna.rst | 6 +- doc/source/lib/bx.seq.qdna_tests.rst | 10 +- doc/source/lib/bx.seq.rst | 7 +- doc/source/lib/bx.seq.seq.rst | 6 +- doc/source/lib/bx.seq.seq_tests.rst | 10 +- doc/source/lib/bx.seq.twobit.rst | 6 +- doc/source/lib/bx.seq.twobit_tests.rst | 10 +- doc/source/lib/bx.seqmapping.rst | 6 +- doc/source/lib/bx.seqmapping_tests.rst | 10 +- doc/source/lib/bx.tabular.io.rst | 6 +- doc/source/lib/bx.tabular.rst | 7 +- doc/source/lib/bx.wiggle.rst | 6 +- doc/source/lib/bx.wiggle_tests.rst | 10 +- doc/source/lib/bx_extras.fpconst.rst | 10 +- doc/source/lib/bx_extras.lrucache.rst | 10 +- doc/source/lib/bx_extras.pstat.rst | 10 +- doc/source/lib/bx_extras.pyparsing.rst | 10 +- doc/source/lib/bx_extras.rst | 12 +- doc/source/lib/bx_extras.stats.rst | 10 +- doc/source/lib/modules.rst | 2 +- doc/source/lib/psyco_full.rst | 10 +- doc/source/static/base.css | 152 ------ doc/source/static/tripoli.base.css | 509 ------------------ doc/source/templates/index.html | 34 -- doc/source/templates/indexsidebar.html | 9 - doc/source/templates/layout.html | 51 -- 140 files changed, 654 insertions(+), 1585 deletions(-) create mode 100644 doc/make.bat delete mode 100644 doc/source/contents.rst delete mode 100644 doc/source/static/base.css delete mode 100644 doc/source/static/tripoli.base.css delete mode 100644 doc/source/templates/index.html delete mode 100644 doc/source/templates/indexsidebar.html delete mode 100644 doc/source/templates/layout.html diff --git a/doc/Makefile b/doc/Makefile index 7abd1c5a..d0c3cbf1 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,83 +1,20 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help clean html web pickle htmlhelp latex changes linkcheck - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " apidoc to run epydoc" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " changes to make an overview over all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - -clean: - -rm -rf docbuild/* - -html: - mkdir -p build/html build/doctrees - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) docbuild/html - @echo - @echo "Build finished. The HTML pages are in docbuild/html." - -apidoc: - mkdir -p build/html/apidoc - epydoc-2.6 --docformat restructuredtext ../lib/bx -o docbuild/html/apidoc - @echo - @echo "Epydoc finished. The pages are in docbuild/html/apidoc." - - -pickle: - mkdir -p build/pickle build/doctrees - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) docbuild/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -web: pickle - -json: - mkdir -p build/json build/doctrees - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) docbuild/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - mkdir -p build/htmlhelp build/doctrees - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) docbuild/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in build/htmlhelp." - -latex: - mkdir -p build/latex build/doctrees - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) docbuild/latex - @echo - @echo "Build finished; the LaTeX files are in build/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." - -changes: - mkdir -p build/changes build/doctrees - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) docbuild/changes - @echo - @echo "The overview file is in build/changes." - -linkcheck: - mkdir -p build/linkcheck build/doctrees - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) docbuild/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in build/linkcheck/output.txt." +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 00000000..dc1312ab --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/requirements.txt b/doc/requirements.txt index 24ce15ab..d090a2ea 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1 +1,3 @@ numpy +sphinx +sphinx-rtd-theme diff --git a/doc/source/conf.py b/doc/source/conf.py index 43935eff..b16bb3e9 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,194 +1,42 @@ +# Configuration file for the Sphinx documentation builder. # -# BxPython documentation build configuration file, created by -# sphinx-quickstart on Fri May 08 10:18:22 2009. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# The contents of this file are pickled, so don't put values in the namespace -# that aren't pickleable (module imports are okay, they're removed automatically). -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If your extensions are in another directory, add it here. If the directory -# is relative to the documentation root, use os.path.abspath to make it -# absolute, like shown here. -import bx +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html -# General configuration -# --------------------- +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8' - -# The master toctree document. -master_doc = "index" +import bx -# General information about the project. project = "bx-python" -copyright = "2017, James Taylor" - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. +copyright = "2005-2019, James Taylor; 2019-2023, Nicola Soranzo" +author = "James Taylor" version = bx.__version__ - -# The full version, including alpha/beta/rc tags. release = version -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of documents that shouldn't be included in the build. -# unused_docs = [] - -# List of directories, relative to source directory, that shouldn't be searched -# for source files. -exclude_trees = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -# Options for HTML output -# ----------------------- - -# The style sheet to use for HTML and HTML Help pages. A file of that name -# must exist either in Sphinx' static/ path, or in one of the custom paths -# given in html_static_path. -html_style = "base.css" - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -# html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["static"] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -html_index = "index.html" -html_sidebars = {"index": "indexsidebar.html"} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -##html_additional_pages = { -## 'index': 'index.html', -##} - -# If false, no module index is generated. -# html_use_modindex = True - -# If false, no index is generated. -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# html_split_index = False - -# If true, the reST sources are included in the HTML build as _sources/. -# html_copy_source = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# html_use_opensearch = '' - -# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = '' - -# Output file base name for HTML help builder. -htmlhelp_basename = "bx-doc" - - -# Options for LaTeX output -# ------------------------ - -# The paper size ('letter' or 'a4'). -# latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -# latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, document class [howto/manual]). -latex_documents = [ - ("index", "bx-python.tex", "bx-python Documentation", "James Taylor", "manual"), +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx_rtd_theme", ] -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# latex_use_parts = False +templates_path = ["_templates"] +exclude_patterns = [] -# Additional stuff for the LaTeX preamble. -# latex_preamble = '' -# Documents to append as an appendix to all manuals. -# latex_appendices = [] +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -# If false, no module index is generated. -# latex_use_modindex = True +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] +# -- Options for intersphinx extension --------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration -# Example configuration for intersphinx: refer to the Python standard library. -# intersphinx_mapping = {'http://docs.python.org/dev': None} +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), +} diff --git a/doc/source/contents.rst b/doc/source/contents.rst deleted file mode 100644 index c0d48d2f..00000000 --- a/doc/source/contents.rst +++ /dev/null @@ -1,18 +0,0 @@ - - -bx-python documentation contents -================================ - -Browse the Python API `class documentation `_ - -Contents: - -.. toctree:: - :maxdepth: 2 - - modules/index.rst - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - diff --git a/doc/source/index.rst b/doc/source/index.rst index 69ed3dfb..e257d4e3 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1,28 +1,25 @@ -About bx-python -=============== - -The bx-python project is a python library and associated set of scripts to allow for rapid implementation of genome scale analyses. The library contains a variety of useful modules, but the particular strengths are: - - * Classes for reading and working with genome-scale multiple local alignments (in MAF, AXT, and LAV formats) - * Generic data structure for indexing on disk files that contain blocks of data associated with intervals on various sequences (used, for example, to provide random access to individual alignments in huge files; optimized for use over network filesystems) - * Data structures for working with intervals on sequences - * "Binned bitsets" which act just like chromosome sized bit arrays, but lazily allocate regions and allow large blocks of all set or all unset bits to be stored compactly - * "Intersecter" for performing fast intersection tests that preserve both query and target intervals and associated annotation - -These tools have been used in a variety of published research, and are a fundamental part of the ongoing Galaxy and ESPERR projects. - -Contents -======== - -.. toctree:: - :maxdepth: 5 - - Application Documentation - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - +Welcome to bx-python's documentation! +===================================== + +The bx-python project is a python library and associated set of scripts to allow for rapid implementation of genome scale analyses. The library contains a variety of useful modules, but the particular strengths are: + + * Classes for reading and working with genome-scale multiple local alignments (in MAF, AXT, and LAV formats) + * Generic data structure for indexing on disk files that contain blocks of data associated with intervals on various sequences (used, for example, to provide random access to individual alignments in huge files; optimized for use over network filesystems) + * Data structures for working with intervals on sequences + * "Binned bitsets" which act just like chromosome sized bit arrays, but lazily allocate regions and allow large blocks of all set or all unset bits to be stored compactly + * "Intersecter" for performing fast intersection tests that preserve both query and target intervals and associated annotation + +These tools have been used in a variety of published research, and are a fundamental part of the ongoing Galaxy and ESPERR projects. + +.. toctree:: + :maxdepth: 4 + :caption: Contents: + + Application Documentation + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/source/lib/bx.align.axt.rst b/doc/source/lib/bx.align.axt.rst index 807863a2..3eb45e26 100644 --- a/doc/source/lib/bx.align.axt.rst +++ b/doc/source/lib/bx.align.axt.rst @@ -2,6 +2,6 @@ bx.align.axt module =================== .. automodule:: bx.align.axt - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.core.rst b/doc/source/lib/bx.align.core.rst index d3337551..4ff436cc 100644 --- a/doc/source/lib/bx.align.core.rst +++ b/doc/source/lib/bx.align.core.rst @@ -2,6 +2,6 @@ bx.align.core module ==================== .. automodule:: bx.align.core - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.epo.rst b/doc/source/lib/bx.align.epo.rst index e0ed150d..8088515b 100644 --- a/doc/source/lib/bx.align.epo.rst +++ b/doc/source/lib/bx.align.epo.rst @@ -2,6 +2,6 @@ bx.align.epo module =================== .. automodule:: bx.align.epo - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.epo_tests.rst b/doc/source/lib/bx.align.epo_tests.rst index 14a8fee4..8a794c8d 100644 --- a/doc/source/lib/bx.align.epo_tests.rst +++ b/doc/source/lib/bx.align.epo_tests.rst @@ -1,7 +1,7 @@ -bx.align.epo_tests module -========================= +bx.align.epo\_tests module +========================== .. automodule:: bx.align.epo_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.lav.rst b/doc/source/lib/bx.align.lav.rst index 85a17d68..a2827a21 100644 --- a/doc/source/lib/bx.align.lav.rst +++ b/doc/source/lib/bx.align.lav.rst @@ -2,6 +2,6 @@ bx.align.lav module =================== .. automodule:: bx.align.lav - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.lav_tests.rst b/doc/source/lib/bx.align.lav_tests.rst index 1d465213..044aaa5b 100644 --- a/doc/source/lib/bx.align.lav_tests.rst +++ b/doc/source/lib/bx.align.lav_tests.rst @@ -1,7 +1,7 @@ -bx.align.lav_tests module -========================= +bx.align.lav\_tests module +========================== .. automodule:: bx.align.lav_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.maf.rst b/doc/source/lib/bx.align.maf.rst index b4d00c68..e5e62332 100644 --- a/doc/source/lib/bx.align.maf.rst +++ b/doc/source/lib/bx.align.maf.rst @@ -2,6 +2,6 @@ bx.align.maf module =================== .. automodule:: bx.align.maf - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.maf_tests.rst b/doc/source/lib/bx.align.maf_tests.rst index 6fda3ce8..1af3fd27 100644 --- a/doc/source/lib/bx.align.maf_tests.rst +++ b/doc/source/lib/bx.align.maf_tests.rst @@ -1,7 +1,7 @@ -bx.align.maf_tests module -========================= +bx.align.maf\_tests module +========================== .. automodule:: bx.align.maf_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.rst b/doc/source/lib/bx.align.rst index e8924e20..e742e310 100644 --- a/doc/source/lib/bx.align.rst +++ b/doc/source/lib/bx.align.rst @@ -5,14 +5,16 @@ Subpackages ----------- .. toctree:: + :maxdepth: 3 - bx.align.sitemask - bx.align.tools + bx.align.sitemask + bx.align.tools Submodules ---------- .. toctree:: + :maxdepth: 3 bx.align.axt bx.align.core @@ -29,6 +31,6 @@ Module contents --------------- .. automodule:: bx.align - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.score.rst b/doc/source/lib/bx.align.score.rst index 9a87e1f8..f425857c 100644 --- a/doc/source/lib/bx.align.score.rst +++ b/doc/source/lib/bx.align.score.rst @@ -2,6 +2,6 @@ bx.align.score module ===================== .. automodule:: bx.align.score - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.score_tests.rst b/doc/source/lib/bx.align.score_tests.rst index d8c758a0..bac70c36 100644 --- a/doc/source/lib/bx.align.score_tests.rst +++ b/doc/source/lib/bx.align.score_tests.rst @@ -1,7 +1,7 @@ -bx.align.score_tests module -=========================== +bx.align.score\_tests module +============================ .. automodule:: bx.align.score_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.sitemask.core.rst b/doc/source/lib/bx.align.sitemask.core.rst index 548052ce..8f9320ef 100644 --- a/doc/source/lib/bx.align.sitemask.core.rst +++ b/doc/source/lib/bx.align.sitemask.core.rst @@ -2,6 +2,6 @@ bx.align.sitemask.core module ============================= .. automodule:: bx.align.sitemask.core - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.sitemask.cpg.rst b/doc/source/lib/bx.align.sitemask.cpg.rst index 5a1454ff..e2ef3014 100644 --- a/doc/source/lib/bx.align.sitemask.cpg.rst +++ b/doc/source/lib/bx.align.sitemask.cpg.rst @@ -2,6 +2,6 @@ bx.align.sitemask.cpg module ============================ .. automodule:: bx.align.sitemask.cpg - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.sitemask.quality.rst b/doc/source/lib/bx.align.sitemask.quality.rst index 5061690d..660f2700 100644 --- a/doc/source/lib/bx.align.sitemask.quality.rst +++ b/doc/source/lib/bx.align.sitemask.quality.rst @@ -2,6 +2,6 @@ bx.align.sitemask.quality module ================================ .. automodule:: bx.align.sitemask.quality - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.sitemask.rst b/doc/source/lib/bx.align.sitemask.rst index 2c0d9ad5..85c4437c 100644 --- a/doc/source/lib/bx.align.sitemask.rst +++ b/doc/source/lib/bx.align.sitemask.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.align.sitemask.core bx.align.sitemask.cpg @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: bx.align.sitemask - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.sitemask.sitemask_tests.rst b/doc/source/lib/bx.align.sitemask.sitemask_tests.rst index 3d0593f5..a95798ec 100644 --- a/doc/source/lib/bx.align.sitemask.sitemask_tests.rst +++ b/doc/source/lib/bx.align.sitemask.sitemask_tests.rst @@ -1,7 +1,7 @@ -bx.align.sitemask.sitemask_tests module -======================================= +bx.align.sitemask.sitemask\_tests module +======================================== .. automodule:: bx.align.sitemask.sitemask_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.tools.chop.rst b/doc/source/lib/bx.align.tools.chop.rst index c1153cee..82b921cb 100644 --- a/doc/source/lib/bx.align.tools.chop.rst +++ b/doc/source/lib/bx.align.tools.chop.rst @@ -2,6 +2,6 @@ bx.align.tools.chop module ========================== .. automodule:: bx.align.tools.chop - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.tools.fuse.rst b/doc/source/lib/bx.align.tools.fuse.rst index 6fd51ab8..9f3acde0 100644 --- a/doc/source/lib/bx.align.tools.fuse.rst +++ b/doc/source/lib/bx.align.tools.fuse.rst @@ -2,6 +2,6 @@ bx.align.tools.fuse module ========================== .. automodule:: bx.align.tools.fuse - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.tools.rst b/doc/source/lib/bx.align.tools.rst index 63baf336..e8d318bd 100644 --- a/doc/source/lib/bx.align.tools.rst +++ b/doc/source/lib/bx.align.tools.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.align.tools.chop bx.align.tools.fuse @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: bx.align.tools - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.tools.thread.rst b/doc/source/lib/bx.align.tools.thread.rst index d7e36d7b..8b6bcee9 100644 --- a/doc/source/lib/bx.align.tools.thread.rst +++ b/doc/source/lib/bx.align.tools.thread.rst @@ -2,6 +2,6 @@ bx.align.tools.thread module ============================ .. automodule:: bx.align.tools.thread - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.align.tools.tile.rst b/doc/source/lib/bx.align.tools.tile.rst index acb45163..ff53de53 100644 --- a/doc/source/lib/bx.align.tools.tile.rst +++ b/doc/source/lib/bx.align.tools.tile.rst @@ -2,6 +2,6 @@ bx.align.tools.tile module ========================== .. automodule:: bx.align.tools.tile - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.arrays.array_tree.rst b/doc/source/lib/bx.arrays.array_tree.rst index 2b689227..e5fc65b9 100644 --- a/doc/source/lib/bx.arrays.array_tree.rst +++ b/doc/source/lib/bx.arrays.array_tree.rst @@ -1,7 +1,7 @@ -bx.arrays.array_tree module -=========================== +bx.arrays.array\_tree module +============================ .. automodule:: bx.arrays.array_tree - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.arrays.array_tree_tests.rst b/doc/source/lib/bx.arrays.array_tree_tests.rst index 9f04d019..169f8a5c 100644 --- a/doc/source/lib/bx.arrays.array_tree_tests.rst +++ b/doc/source/lib/bx.arrays.array_tree_tests.rst @@ -1,7 +1,7 @@ -bx.arrays.array_tree_tests module -================================= +bx.arrays.array\_tree\_tests module +=================================== .. automodule:: bx.arrays.array_tree_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.arrays.bed.rst b/doc/source/lib/bx.arrays.bed.rst index fcb5d36c..1ee1dd53 100644 --- a/doc/source/lib/bx.arrays.bed.rst +++ b/doc/source/lib/bx.arrays.bed.rst @@ -2,6 +2,6 @@ bx.arrays.bed module ==================== .. automodule:: bx.arrays.bed - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.arrays.rst b/doc/source/lib/bx.arrays.rst index a6417ec1..f4bbbff2 100644 --- a/doc/source/lib/bx.arrays.rst +++ b/doc/source/lib/bx.arrays.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.arrays.array_tree bx.arrays.array_tree_tests @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: bx.arrays - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.arrays.wiggle.rst b/doc/source/lib/bx.arrays.wiggle.rst index 560d4051..1f65fe23 100644 --- a/doc/source/lib/bx.arrays.wiggle.rst +++ b/doc/source/lib/bx.arrays.wiggle.rst @@ -2,6 +2,6 @@ bx.arrays.wiggle module ======================= .. automodule:: bx.arrays.wiggle - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.bbi_file.rst b/doc/source/lib/bx.bbi.bbi_file.rst index f4ddad43..066c598a 100644 --- a/doc/source/lib/bx.bbi.bbi_file.rst +++ b/doc/source/lib/bx.bbi.bbi_file.rst @@ -1,7 +1,7 @@ -bx.bbi.bbi_file module -====================== +bx.bbi.bbi\_file module +======================= .. automodule:: bx.bbi.bbi_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.bigbed_file.rst b/doc/source/lib/bx.bbi.bigbed_file.rst index 7219d4b8..5aba2c3d 100644 --- a/doc/source/lib/bx.bbi.bigbed_file.rst +++ b/doc/source/lib/bx.bbi.bigbed_file.rst @@ -1,7 +1,7 @@ -bx.bbi.bigbed_file module -========================= +bx.bbi.bigbed\_file module +========================== .. automodule:: bx.bbi.bigbed_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.bigwig_file.rst b/doc/source/lib/bx.bbi.bigwig_file.rst index badeec64..f0994758 100644 --- a/doc/source/lib/bx.bbi.bigwig_file.rst +++ b/doc/source/lib/bx.bbi.bigwig_file.rst @@ -1,7 +1,7 @@ -bx.bbi.bigwig_file module -========================= +bx.bbi.bigwig\_file module +========================== .. automodule:: bx.bbi.bigwig_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.bigwig_tests.rst b/doc/source/lib/bx.bbi.bigwig_tests.rst index a267a388..3cf8b803 100644 --- a/doc/source/lib/bx.bbi.bigwig_tests.rst +++ b/doc/source/lib/bx.bbi.bigwig_tests.rst @@ -1,7 +1,7 @@ -bx.bbi.bigwig_tests module -========================== +bx.bbi.bigwig\_tests module +=========================== .. automodule:: bx.bbi.bigwig_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.bpt_file.rst b/doc/source/lib/bx.bbi.bpt_file.rst index 8af609fc..c6acd733 100644 --- a/doc/source/lib/bx.bbi.bpt_file.rst +++ b/doc/source/lib/bx.bbi.bpt_file.rst @@ -1,7 +1,7 @@ -bx.bbi.bpt_file module -====================== +bx.bbi.bpt\_file module +======================= .. automodule:: bx.bbi.bpt_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.cirtree_file.rst b/doc/source/lib/bx.bbi.cirtree_file.rst index 5c9878b8..8e3035b0 100644 --- a/doc/source/lib/bx.bbi.cirtree_file.rst +++ b/doc/source/lib/bx.bbi.cirtree_file.rst @@ -1,7 +1,7 @@ -bx.bbi.cirtree_file module -========================== +bx.bbi.cirtree\_file module +=========================== .. automodule:: bx.bbi.cirtree_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bbi.rst b/doc/source/lib/bx.bbi.rst index 24b6d607..455c2e01 100644 --- a/doc/source/lib/bx.bbi.rst +++ b/doc/source/lib/bx.bbi.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.bbi.bbi_file bx.bbi.bigbed_file @@ -17,6 +18,6 @@ Module contents --------------- .. automodule:: bx.bbi - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.binned_array.rst b/doc/source/lib/bx.binned_array.rst index 918ea824..578b8563 100644 --- a/doc/source/lib/bx.binned_array.rst +++ b/doc/source/lib/bx.binned_array.rst @@ -1,7 +1,7 @@ -bx.binned_array module -====================== +bx.binned\_array module +======================= .. automodule:: bx.binned_array - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.binned_array_tests.rst b/doc/source/lib/bx.binned_array_tests.rst index a1c5ac9e..81e4f4f3 100644 --- a/doc/source/lib/bx.binned_array_tests.rst +++ b/doc/source/lib/bx.binned_array_tests.rst @@ -1,7 +1,7 @@ -bx.binned_array_tests module -============================ +bx.binned\_array\_tests module +============================== .. automodule:: bx.binned_array_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bitset.rst b/doc/source/lib/bx.bitset.rst index 3c5634b5..9c9c08aa 100644 --- a/doc/source/lib/bx.bitset.rst +++ b/doc/source/lib/bx.bitset.rst @@ -2,6 +2,6 @@ bx.bitset module ================ .. automodule:: bx.bitset - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bitset_builders.rst b/doc/source/lib/bx.bitset_builders.rst index 9c3e4b8f..baed34f8 100644 --- a/doc/source/lib/bx.bitset_builders.rst +++ b/doc/source/lib/bx.bitset_builders.rst @@ -1,7 +1,7 @@ -bx.bitset_builders module -========================= +bx.bitset\_builders module +========================== .. automodule:: bx.bitset_builders - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bitset_tests.rst b/doc/source/lib/bx.bitset_tests.rst index ebd3da70..a07676b8 100644 --- a/doc/source/lib/bx.bitset_tests.rst +++ b/doc/source/lib/bx.bitset_tests.rst @@ -1,7 +1,7 @@ -bx.bitset_tests module -====================== +bx.bitset\_tests module +======================= .. automodule:: bx.bitset_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.bitset_utils.rst b/doc/source/lib/bx.bitset_utils.rst index 6bfcd83d..a106355c 100644 --- a/doc/source/lib/bx.bitset_utils.rst +++ b/doc/source/lib/bx.bitset_utils.rst @@ -1,7 +1,7 @@ -bx.bitset_utils module -====================== +bx.bitset\_utils module +======================= .. automodule:: bx.bitset_utils - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.cookbook.argparse.rst b/doc/source/lib/bx.cookbook.argparse.rst index d1a79599..e5ca0126 100644 --- a/doc/source/lib/bx.cookbook.argparse.rst +++ b/doc/source/lib/bx.cookbook.argparse.rst @@ -2,6 +2,6 @@ bx.cookbook.argparse module =========================== .. automodule:: bx.cookbook.argparse - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.cookbook.attribute.rst b/doc/source/lib/bx.cookbook.attribute.rst index e3c5d657..8f58e7e9 100644 --- a/doc/source/lib/bx.cookbook.attribute.rst +++ b/doc/source/lib/bx.cookbook.attribute.rst @@ -2,6 +2,6 @@ bx.cookbook.attribute module ============================ .. automodule:: bx.cookbook.attribute - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.cookbook.doc_optparse.rst b/doc/source/lib/bx.cookbook.doc_optparse.rst index 7091c7c1..099ea9b3 100644 --- a/doc/source/lib/bx.cookbook.doc_optparse.rst +++ b/doc/source/lib/bx.cookbook.doc_optparse.rst @@ -1,7 +1,7 @@ -bx.cookbook.doc_optparse module -=============================== +bx.cookbook.doc\_optparse module +================================ .. automodule:: bx.cookbook.doc_optparse - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.cookbook.progress_bar.rst b/doc/source/lib/bx.cookbook.progress_bar.rst index 5d828a1b..2b050950 100644 --- a/doc/source/lib/bx.cookbook.progress_bar.rst +++ b/doc/source/lib/bx.cookbook.progress_bar.rst @@ -1,7 +1,7 @@ -bx.cookbook.progress_bar module -=============================== +bx.cookbook.progress\_bar module +================================ .. automodule:: bx.cookbook.progress_bar - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.cookbook.rst b/doc/source/lib/bx.cookbook.rst index 215de67d..a35bb144 100644 --- a/doc/source/lib/bx.cookbook.rst +++ b/doc/source/lib/bx.cookbook.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.cookbook.argparse bx.cookbook.attribute @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: bx.cookbook - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.filter.rst b/doc/source/lib/bx.filter.rst index 2e1cf1d9..5b3693ff 100644 --- a/doc/source/lib/bx.filter.rst +++ b/doc/source/lib/bx.filter.rst @@ -2,6 +2,6 @@ bx.filter module ================ .. automodule:: bx.filter - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.gene_reader.rst b/doc/source/lib/bx.gene_reader.rst index a99b1a70..d43a4e06 100644 --- a/doc/source/lib/bx.gene_reader.rst +++ b/doc/source/lib/bx.gene_reader.rst @@ -1,7 +1,7 @@ -bx.gene_reader module -===================== +bx.gene\_reader module +====================== .. automodule:: bx.gene_reader - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.interval_index_file.rst b/doc/source/lib/bx.interval_index_file.rst index 59428ba2..494429b6 100644 --- a/doc/source/lib/bx.interval_index_file.rst +++ b/doc/source/lib/bx.interval_index_file.rst @@ -1,7 +1,7 @@ -bx.interval_index_file module -============================= +bx.interval\_index\_file module +=============================== .. automodule:: bx.interval_index_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.interval_index_file_tests.rst b/doc/source/lib/bx.interval_index_file_tests.rst index b1d9a987..f56abc2f 100644 --- a/doc/source/lib/bx.interval_index_file_tests.rst +++ b/doc/source/lib/bx.interval_index_file_tests.rst @@ -1,7 +1,7 @@ -bx.interval_index_file_tests module -=================================== +bx.interval\_index\_file\_tests module +====================================== .. automodule:: bx.interval_index_file_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.cluster.rst b/doc/source/lib/bx.intervals.cluster.rst index ef3c8d47..4aff3138 100644 --- a/doc/source/lib/bx.intervals.cluster.rst +++ b/doc/source/lib/bx.intervals.cluster.rst @@ -2,6 +2,6 @@ bx.intervals.cluster module =========================== .. automodule:: bx.intervals.cluster - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.cluster_tests.rst b/doc/source/lib/bx.intervals.cluster_tests.rst index cb45a59c..cb974de3 100644 --- a/doc/source/lib/bx.intervals.cluster_tests.rst +++ b/doc/source/lib/bx.intervals.cluster_tests.rst @@ -1,7 +1,7 @@ -bx.intervals.cluster_tests module -================================= +bx.intervals.cluster\_tests module +================================== .. automodule:: bx.intervals.cluster_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.intersection.rst b/doc/source/lib/bx.intervals.intersection.rst index ce7febc2..6199303f 100644 --- a/doc/source/lib/bx.intervals.intersection.rst +++ b/doc/source/lib/bx.intervals.intersection.rst @@ -2,6 +2,6 @@ bx.intervals.intersection module ================================ .. automodule:: bx.intervals.intersection - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.intersection_tests.rst b/doc/source/lib/bx.intervals.intersection_tests.rst index c6ebb8df..d53a6aac 100644 --- a/doc/source/lib/bx.intervals.intersection_tests.rst +++ b/doc/source/lib/bx.intervals.intersection_tests.rst @@ -1,7 +1,7 @@ -bx.intervals.intersection_tests module -====================================== +bx.intervals.intersection\_tests module +======================================= .. automodule:: bx.intervals.intersection_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.io.rst b/doc/source/lib/bx.intervals.io.rst index 051c9575..1135f085 100644 --- a/doc/source/lib/bx.intervals.io.rst +++ b/doc/source/lib/bx.intervals.io.rst @@ -2,6 +2,6 @@ bx.intervals.io module ====================== .. automodule:: bx.intervals.io - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.base_coverage.rst b/doc/source/lib/bx.intervals.operations.base_coverage.rst index 5b6d01b1..b63c4392 100644 --- a/doc/source/lib/bx.intervals.operations.base_coverage.rst +++ b/doc/source/lib/bx.intervals.operations.base_coverage.rst @@ -1,7 +1,7 @@ -bx.intervals.operations.base_coverage module -============================================ +bx.intervals.operations.base\_coverage module +============================================= .. automodule:: bx.intervals.operations.base_coverage - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.complement.rst b/doc/source/lib/bx.intervals.operations.complement.rst index 434ddbcf..f37b3de0 100644 --- a/doc/source/lib/bx.intervals.operations.complement.rst +++ b/doc/source/lib/bx.intervals.operations.complement.rst @@ -2,6 +2,6 @@ bx.intervals.operations.complement module ========================================= .. automodule:: bx.intervals.operations.complement - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.concat.rst b/doc/source/lib/bx.intervals.operations.concat.rst index ad9ed441..332a7e2e 100644 --- a/doc/source/lib/bx.intervals.operations.concat.rst +++ b/doc/source/lib/bx.intervals.operations.concat.rst @@ -2,6 +2,6 @@ bx.intervals.operations.concat module ===================================== .. automodule:: bx.intervals.operations.concat - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.coverage.rst b/doc/source/lib/bx.intervals.operations.coverage.rst index 80c67bac..bc124b46 100644 --- a/doc/source/lib/bx.intervals.operations.coverage.rst +++ b/doc/source/lib/bx.intervals.operations.coverage.rst @@ -2,6 +2,6 @@ bx.intervals.operations.coverage module ======================================= .. automodule:: bx.intervals.operations.coverage - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.find_clusters.rst b/doc/source/lib/bx.intervals.operations.find_clusters.rst index 96c49929..81a66ead 100644 --- a/doc/source/lib/bx.intervals.operations.find_clusters.rst +++ b/doc/source/lib/bx.intervals.operations.find_clusters.rst @@ -1,7 +1,7 @@ -bx.intervals.operations.find_clusters module -============================================ +bx.intervals.operations.find\_clusters module +============================================= .. automodule:: bx.intervals.operations.find_clusters - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.intersect.rst b/doc/source/lib/bx.intervals.operations.intersect.rst index 46851427..4b2c97b5 100644 --- a/doc/source/lib/bx.intervals.operations.intersect.rst +++ b/doc/source/lib/bx.intervals.operations.intersect.rst @@ -2,6 +2,6 @@ bx.intervals.operations.intersect module ======================================== .. automodule:: bx.intervals.operations.intersect - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.join.rst b/doc/source/lib/bx.intervals.operations.join.rst index 608cc5de..82a1bdc3 100644 --- a/doc/source/lib/bx.intervals.operations.join.rst +++ b/doc/source/lib/bx.intervals.operations.join.rst @@ -2,6 +2,6 @@ bx.intervals.operations.join module =================================== .. automodule:: bx.intervals.operations.join - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.merge.rst b/doc/source/lib/bx.intervals.operations.merge.rst index bc2dc543..f41055c6 100644 --- a/doc/source/lib/bx.intervals.operations.merge.rst +++ b/doc/source/lib/bx.intervals.operations.merge.rst @@ -2,6 +2,6 @@ bx.intervals.operations.merge module ==================================== .. automodule:: bx.intervals.operations.merge - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.quicksect.rst b/doc/source/lib/bx.intervals.operations.quicksect.rst index dbeaaf22..7149a5d0 100644 --- a/doc/source/lib/bx.intervals.operations.quicksect.rst +++ b/doc/source/lib/bx.intervals.operations.quicksect.rst @@ -2,6 +2,6 @@ bx.intervals.operations.quicksect module ======================================== .. automodule:: bx.intervals.operations.quicksect - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.rst b/doc/source/lib/bx.intervals.operations.rst index d4c52278..936e2eff 100644 --- a/doc/source/lib/bx.intervals.operations.rst +++ b/doc/source/lib/bx.intervals.operations.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.intervals.operations.base_coverage bx.intervals.operations.complement @@ -21,6 +22,6 @@ Module contents --------------- .. automodule:: bx.intervals.operations - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.operations.subtract.rst b/doc/source/lib/bx.intervals.operations.subtract.rst index 2430b94c..cd5078df 100644 --- a/doc/source/lib/bx.intervals.operations.subtract.rst +++ b/doc/source/lib/bx.intervals.operations.subtract.rst @@ -2,6 +2,6 @@ bx.intervals.operations.subtract module ======================================= .. automodule:: bx.intervals.operations.subtract - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.random_intervals.rst b/doc/source/lib/bx.intervals.random_intervals.rst index 3970489e..e20e9f7a 100644 --- a/doc/source/lib/bx.intervals.random_intervals.rst +++ b/doc/source/lib/bx.intervals.random_intervals.rst @@ -1,7 +1,7 @@ -bx.intervals.random_intervals module -==================================== +bx.intervals.random\_intervals module +===================================== .. automodule:: bx.intervals.random_intervals - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intervals.rst b/doc/source/lib/bx.intervals.rst index e63d2feb..33a85db2 100644 --- a/doc/source/lib/bx.intervals.rst +++ b/doc/source/lib/bx.intervals.rst @@ -5,13 +5,15 @@ Subpackages ----------- .. toctree:: + :maxdepth: 3 - bx.intervals.operations + bx.intervals.operations Submodules ---------- .. toctree:: + :maxdepth: 3 bx.intervals.cluster bx.intervals.cluster_tests @@ -24,6 +26,6 @@ Module contents --------------- .. automodule:: bx.intervals - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intseq.ngramcount.rst b/doc/source/lib/bx.intseq.ngramcount.rst index 44665edb..dc33c2f4 100644 --- a/doc/source/lib/bx.intseq.ngramcount.rst +++ b/doc/source/lib/bx.intseq.ngramcount.rst @@ -2,6 +2,6 @@ bx.intseq.ngramcount module =========================== .. automodule:: bx.intseq.ngramcount - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.intseq.rst b/doc/source/lib/bx.intseq.rst index c1721503..9b7b809e 100644 --- a/doc/source/lib/bx.intseq.rst +++ b/doc/source/lib/bx.intseq.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.intseq.ngramcount @@ -12,6 +13,6 @@ Module contents --------------- .. automodule:: bx.intseq - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.bgzf.rst b/doc/source/lib/bx.misc.bgzf.rst index bf4c413d..25c9d46d 100644 --- a/doc/source/lib/bx.misc.bgzf.rst +++ b/doc/source/lib/bx.misc.bgzf.rst @@ -2,6 +2,6 @@ bx.misc.bgzf module =================== .. automodule:: bx.misc.bgzf - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.bgzf_tests.rst b/doc/source/lib/bx.misc.bgzf_tests.rst index c78fd0d6..fb0495d0 100644 --- a/doc/source/lib/bx.misc.bgzf_tests.rst +++ b/doc/source/lib/bx.misc.bgzf_tests.rst @@ -1,7 +1,7 @@ -bx.misc.bgzf_tests module -========================= +bx.misc.bgzf\_tests module +========================== .. automodule:: bx.misc.bgzf_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.binary_file.rst b/doc/source/lib/bx.misc.binary_file.rst index c87493c2..b0ccd630 100644 --- a/doc/source/lib/bx.misc.binary_file.rst +++ b/doc/source/lib/bx.misc.binary_file.rst @@ -1,7 +1,7 @@ -bx.misc.binary_file module -========================== +bx.misc.binary\_file module +=========================== .. automodule:: bx.misc.binary_file - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.cdb.rst b/doc/source/lib/bx.misc.cdb.rst index c414a9c7..4863429d 100644 --- a/doc/source/lib/bx.misc.cdb.rst +++ b/doc/source/lib/bx.misc.cdb.rst @@ -2,6 +2,6 @@ bx.misc.cdb module ================== .. automodule:: bx.misc.cdb - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.cdb_tests.rst b/doc/source/lib/bx.misc.cdb_tests.rst index 048d97ec..93ea5347 100644 --- a/doc/source/lib/bx.misc.cdb_tests.rst +++ b/doc/source/lib/bx.misc.cdb_tests.rst @@ -1,7 +1,7 @@ -bx.misc.cdb_tests module -======================== +bx.misc.cdb\_tests module +========================= .. automodule:: bx.misc.cdb_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.filecache.rst b/doc/source/lib/bx.misc.filecache.rst index 8d5bd01f..7c97463f 100644 --- a/doc/source/lib/bx.misc.filecache.rst +++ b/doc/source/lib/bx.misc.filecache.rst @@ -2,6 +2,6 @@ bx.misc.filecache module ======================== .. automodule:: bx.misc.filecache - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.filecache_tests.rst b/doc/source/lib/bx.misc.filecache_tests.rst index f4464bdd..8d5055d5 100644 --- a/doc/source/lib/bx.misc.filecache_tests.rst +++ b/doc/source/lib/bx.misc.filecache_tests.rst @@ -1,7 +1,7 @@ -bx.misc.filecache_tests module -============================== +bx.misc.filecache\_tests module +=============================== .. automodule:: bx.misc.filecache_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.readlengths.rst b/doc/source/lib/bx.misc.readlengths.rst index 89f34fb5..b5710844 100644 --- a/doc/source/lib/bx.misc.readlengths.rst +++ b/doc/source/lib/bx.misc.readlengths.rst @@ -2,6 +2,6 @@ bx.misc.readlengths module ========================== .. automodule:: bx.misc.readlengths - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.rst b/doc/source/lib/bx.misc.rst index 7f67d46a..4ee4257c 100644 --- a/doc/source/lib/bx.misc.rst +++ b/doc/source/lib/bx.misc.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.misc.bgzf bx.misc.bgzf_tests @@ -23,6 +24,6 @@ Module contents --------------- .. automodule:: bx.misc - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.seekbzip2.rst b/doc/source/lib/bx.misc.seekbzip2.rst index a6192d8b..94bd6a7b 100644 --- a/doc/source/lib/bx.misc.seekbzip2.rst +++ b/doc/source/lib/bx.misc.seekbzip2.rst @@ -2,6 +2,6 @@ bx.misc.seekbzip2 module ======================== .. automodule:: bx.misc.seekbzip2 - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.seekbzip2_tests.rst b/doc/source/lib/bx.misc.seekbzip2_tests.rst index 40de576e..dee56266 100644 --- a/doc/source/lib/bx.misc.seekbzip2_tests.rst +++ b/doc/source/lib/bx.misc.seekbzip2_tests.rst @@ -1,7 +1,7 @@ -bx.misc.seekbzip2_tests module -============================== +bx.misc.seekbzip2\_tests module +=============================== .. automodule:: bx.misc.seekbzip2_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.seeklzop.rst b/doc/source/lib/bx.misc.seeklzop.rst index 675ee92c..9659f343 100644 --- a/doc/source/lib/bx.misc.seeklzop.rst +++ b/doc/source/lib/bx.misc.seeklzop.rst @@ -2,6 +2,6 @@ bx.misc.seeklzop module ======================= .. automodule:: bx.misc.seeklzop - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.misc.seeklzop_tests.rst b/doc/source/lib/bx.misc.seeklzop_tests.rst index 41e48500..5a99c75a 100644 --- a/doc/source/lib/bx.misc.seeklzop_tests.rst +++ b/doc/source/lib/bx.misc.seeklzop_tests.rst @@ -1,7 +1,7 @@ -bx.misc.seeklzop_tests module -============================= +bx.misc.seeklzop\_tests module +============================== .. automodule:: bx.misc.seeklzop_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.io.rst b/doc/source/lib/bx.motif.io.rst index 58004a0b..4f40ae53 100644 --- a/doc/source/lib/bx.motif.io.rst +++ b/doc/source/lib/bx.motif.io.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.motif.io.transfac bx.motif.io.transfac_tests @@ -13,6 +14,6 @@ Module contents --------------- .. automodule:: bx.motif.io - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.io.transfac.rst b/doc/source/lib/bx.motif.io.transfac.rst index 009b0f73..4d80891d 100644 --- a/doc/source/lib/bx.motif.io.transfac.rst +++ b/doc/source/lib/bx.motif.io.transfac.rst @@ -2,6 +2,6 @@ bx.motif.io.transfac module =========================== .. automodule:: bx.motif.io.transfac - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.io.transfac_tests.rst b/doc/source/lib/bx.motif.io.transfac_tests.rst index e0dbba97..a2c6f646 100644 --- a/doc/source/lib/bx.motif.io.transfac_tests.rst +++ b/doc/source/lib/bx.motif.io.transfac_tests.rst @@ -1,7 +1,7 @@ -bx.motif.io.transfac_tests module -================================= +bx.motif.io.transfac\_tests module +================================== .. automodule:: bx.motif.io.transfac_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.logo.rst b/doc/source/lib/bx.motif.logo.rst index 1fc74480..dd6530e1 100644 --- a/doc/source/lib/bx.motif.logo.rst +++ b/doc/source/lib/bx.motif.logo.rst @@ -5,6 +5,6 @@ Module contents --------------- .. automodule:: bx.motif.logo - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.pwm.rst b/doc/source/lib/bx.motif.pwm.rst index 9411e941..411c271a 100644 --- a/doc/source/lib/bx.motif.pwm.rst +++ b/doc/source/lib/bx.motif.pwm.rst @@ -2,6 +2,6 @@ bx.motif.pwm module =================== .. automodule:: bx.motif.pwm - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.pwm_tests.rst b/doc/source/lib/bx.motif.pwm_tests.rst index 0310f55f..64966447 100644 --- a/doc/source/lib/bx.motif.pwm_tests.rst +++ b/doc/source/lib/bx.motif.pwm_tests.rst @@ -1,7 +1,7 @@ -bx.motif.pwm_tests module -========================= +bx.motif.pwm\_tests module +========================== .. automodule:: bx.motif.pwm_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.motif.rst b/doc/source/lib/bx.motif.rst index dc2608cc..2d731726 100644 --- a/doc/source/lib/bx.motif.rst +++ b/doc/source/lib/bx.motif.rst @@ -5,14 +5,16 @@ Subpackages ----------- .. toctree:: + :maxdepth: 3 - bx.motif.io - bx.motif.logo + bx.motif.io + bx.motif.logo Submodules ---------- .. toctree:: + :maxdepth: 3 bx.motif.pwm bx.motif.pwm_tests @@ -21,6 +23,6 @@ Module contents --------------- .. automodule:: bx.motif - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.phylo.newick.rst b/doc/source/lib/bx.phylo.newick.rst index 9f4c8b23..cc5d84c8 100644 --- a/doc/source/lib/bx.phylo.newick.rst +++ b/doc/source/lib/bx.phylo.newick.rst @@ -2,6 +2,6 @@ bx.phylo.newick module ====================== .. automodule:: bx.phylo.newick - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.phylo.newick_tests.rst b/doc/source/lib/bx.phylo.newick_tests.rst index f2712dfe..bdebc6c0 100644 --- a/doc/source/lib/bx.phylo.newick_tests.rst +++ b/doc/source/lib/bx.phylo.newick_tests.rst @@ -1,7 +1,7 @@ -bx.phylo.newick_tests module -============================ +bx.phylo.newick\_tests module +============================= .. automodule:: bx.phylo.newick_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.phylo.phast.rst b/doc/source/lib/bx.phylo.phast.rst index 5ee8255c..d243a1cb 100644 --- a/doc/source/lib/bx.phylo.phast.rst +++ b/doc/source/lib/bx.phylo.phast.rst @@ -2,6 +2,6 @@ bx.phylo.phast module ===================== .. automodule:: bx.phylo.phast - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.phylo.phast_tests.rst b/doc/source/lib/bx.phylo.phast_tests.rst index 7925f217..37cb2ecf 100644 --- a/doc/source/lib/bx.phylo.phast_tests.rst +++ b/doc/source/lib/bx.phylo.phast_tests.rst @@ -1,7 +1,7 @@ -bx.phylo.phast_tests module -=========================== +bx.phylo.phast\_tests module +============================ .. automodule:: bx.phylo.phast_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.phylo.rst b/doc/source/lib/bx.phylo.rst index 13f6223a..d9439742 100644 --- a/doc/source/lib/bx.phylo.rst +++ b/doc/source/lib/bx.phylo.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.phylo.newick bx.phylo.newick_tests @@ -15,6 +16,6 @@ Module contents --------------- .. automodule:: bx.phylo - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.bed_score_aligned_pwm.rst b/doc/source/lib/bx.pwm.bed_score_aligned_pwm.rst index 43ed464d..e86703ba 100644 --- a/doc/source/lib/bx.pwm.bed_score_aligned_pwm.rst +++ b/doc/source/lib/bx.pwm.bed_score_aligned_pwm.rst @@ -1,7 +1,7 @@ -bx.pwm.bed_score_aligned_pwm module -=================================== +bx.pwm.bed\_score\_aligned\_pwm module +====================================== .. automodule:: bx.pwm.bed_score_aligned_pwm - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.bed_score_aligned_string.rst b/doc/source/lib/bx.pwm.bed_score_aligned_string.rst index 80c72c31..1c5cb315 100644 --- a/doc/source/lib/bx.pwm.bed_score_aligned_string.rst +++ b/doc/source/lib/bx.pwm.bed_score_aligned_string.rst @@ -1,7 +1,7 @@ -bx.pwm.bed_score_aligned_string module -====================================== +bx.pwm.bed\_score\_aligned\_string module +========================================= .. automodule:: bx.pwm.bed_score_aligned_string - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.maf_select_motifs.rst b/doc/source/lib/bx.pwm.maf_select_motifs.rst index 0c475f56..1bcc31aa 100644 --- a/doc/source/lib/bx.pwm.maf_select_motifs.rst +++ b/doc/source/lib/bx.pwm.maf_select_motifs.rst @@ -1,7 +1,7 @@ -bx.pwm.maf_select_motifs module -=============================== +bx.pwm.maf\_select\_motifs module +================================= .. automodule:: bx.pwm.maf_select_motifs - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.position_weight_matrix.rst b/doc/source/lib/bx.pwm.position_weight_matrix.rst index ff486904..dfb42fee 100644 --- a/doc/source/lib/bx.pwm.position_weight_matrix.rst +++ b/doc/source/lib/bx.pwm.position_weight_matrix.rst @@ -1,7 +1,7 @@ -bx.pwm.position_weight_matrix module -==================================== +bx.pwm.position\_weight\_matrix module +====================================== .. automodule:: bx.pwm.position_weight_matrix - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.pwm_score_maf.rst b/doc/source/lib/bx.pwm.pwm_score_maf.rst index d597544a..f19b5a49 100644 --- a/doc/source/lib/bx.pwm.pwm_score_maf.rst +++ b/doc/source/lib/bx.pwm.pwm_score_maf.rst @@ -1,7 +1,7 @@ -bx.pwm.pwm_score_maf module -=========================== +bx.pwm.pwm\_score\_maf module +============================= .. automodule:: bx.pwm.pwm_score_maf - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.pwm_score_motifs.rst b/doc/source/lib/bx.pwm.pwm_score_motifs.rst index 192a665a..84a8afb0 100644 --- a/doc/source/lib/bx.pwm.pwm_score_motifs.rst +++ b/doc/source/lib/bx.pwm.pwm_score_motifs.rst @@ -1,7 +1,7 @@ -bx.pwm.pwm_score_motifs module -============================== +bx.pwm.pwm\_score\_motifs module +================================ .. automodule:: bx.pwm.pwm_score_motifs - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.pwm_score_positions.rst b/doc/source/lib/bx.pwm.pwm_score_positions.rst index cba956b8..d0caa749 100644 --- a/doc/source/lib/bx.pwm.pwm_score_positions.rst +++ b/doc/source/lib/bx.pwm.pwm_score_positions.rst @@ -1,7 +1,7 @@ -bx.pwm.pwm_score_positions module -================================= +bx.pwm.pwm\_score\_positions module +=================================== .. automodule:: bx.pwm.pwm_score_positions - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.pwm_tests.rst b/doc/source/lib/bx.pwm.pwm_tests.rst index 1b65c126..7f47610a 100644 --- a/doc/source/lib/bx.pwm.pwm_tests.rst +++ b/doc/source/lib/bx.pwm.pwm_tests.rst @@ -1,7 +1,7 @@ -bx.pwm.pwm_tests module -======================= +bx.pwm.pwm\_tests module +======================== .. automodule:: bx.pwm.pwm_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.pwm.rst b/doc/source/lib/bx.pwm.rst index c00f09f0..7026146c 100644 --- a/doc/source/lib/bx.pwm.rst +++ b/doc/source/lib/bx.pwm.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.pwm.bed_score_aligned_pwm bx.pwm.bed_score_aligned_string @@ -19,6 +20,6 @@ Module contents --------------- .. automodule:: bx.pwm - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.rst b/doc/source/lib/bx.rst index b667d663..14d4fda6 100644 --- a/doc/source/lib/bx.rst +++ b/doc/source/lib/bx.rst @@ -5,24 +5,26 @@ Subpackages ----------- .. toctree:: - - bx.align - bx.arrays - bx.bbi - bx.cookbook - bx.intervals - bx.intseq - bx.misc - bx.motif - bx.phylo - bx.pwm - bx.seq - bx.tabular + :maxdepth: 3 + + bx.align + bx.arrays + bx.bbi + bx.cookbook + bx.intervals + bx.intseq + bx.misc + bx.motif + bx.phylo + bx.pwm + bx.seq + bx.tabular Submodules ---------- .. toctree:: + :maxdepth: 3 bx.binned_array bx.binned_array_tests @@ -43,6 +45,6 @@ Module contents --------------- .. automodule:: bx - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.core.rst b/doc/source/lib/bx.seq.core.rst index 00cfcce5..f8919b17 100644 --- a/doc/source/lib/bx.seq.core.rst +++ b/doc/source/lib/bx.seq.core.rst @@ -2,6 +2,6 @@ bx.seq.core module ================== .. automodule:: bx.seq.core - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.fasta.rst b/doc/source/lib/bx.seq.fasta.rst index 3918d5b9..81217e9d 100644 --- a/doc/source/lib/bx.seq.fasta.rst +++ b/doc/source/lib/bx.seq.fasta.rst @@ -2,6 +2,6 @@ bx.seq.fasta module =================== .. automodule:: bx.seq.fasta - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.fasta_tests.rst b/doc/source/lib/bx.seq.fasta_tests.rst index f5bf7948..e6880de8 100644 --- a/doc/source/lib/bx.seq.fasta_tests.rst +++ b/doc/source/lib/bx.seq.fasta_tests.rst @@ -1,7 +1,7 @@ -bx.seq.fasta_tests module -========================= +bx.seq.fasta\_tests module +========================== .. automodule:: bx.seq.fasta_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.nib.rst b/doc/source/lib/bx.seq.nib.rst index 03f9c67a..84c7458d 100644 --- a/doc/source/lib/bx.seq.nib.rst +++ b/doc/source/lib/bx.seq.nib.rst @@ -2,6 +2,6 @@ bx.seq.nib module ================= .. automodule:: bx.seq.nib - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.nib_tests.rst b/doc/source/lib/bx.seq.nib_tests.rst index c701dd1e..b73fc129 100644 --- a/doc/source/lib/bx.seq.nib_tests.rst +++ b/doc/source/lib/bx.seq.nib_tests.rst @@ -1,7 +1,7 @@ -bx.seq.nib_tests module -======================= +bx.seq.nib\_tests module +======================== .. automodule:: bx.seq.nib_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.qdna.rst b/doc/source/lib/bx.seq.qdna.rst index 32055d4b..6a181e69 100644 --- a/doc/source/lib/bx.seq.qdna.rst +++ b/doc/source/lib/bx.seq.qdna.rst @@ -2,6 +2,6 @@ bx.seq.qdna module ================== .. automodule:: bx.seq.qdna - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.qdna_tests.rst b/doc/source/lib/bx.seq.qdna_tests.rst index ad33feca..23632143 100644 --- a/doc/source/lib/bx.seq.qdna_tests.rst +++ b/doc/source/lib/bx.seq.qdna_tests.rst @@ -1,7 +1,7 @@ -bx.seq.qdna_tests module -======================== +bx.seq.qdna\_tests module +========================= .. automodule:: bx.seq.qdna_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.rst b/doc/source/lib/bx.seq.rst index 4d781254..2475452b 100644 --- a/doc/source/lib/bx.seq.rst +++ b/doc/source/lib/bx.seq.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.seq.core bx.seq.fasta @@ -22,6 +23,6 @@ Module contents --------------- .. automodule:: bx.seq - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.seq.rst b/doc/source/lib/bx.seq.seq.rst index f483cdc7..d14d1891 100644 --- a/doc/source/lib/bx.seq.seq.rst +++ b/doc/source/lib/bx.seq.seq.rst @@ -2,6 +2,6 @@ bx.seq.seq module ================= .. automodule:: bx.seq.seq - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.seq_tests.rst b/doc/source/lib/bx.seq.seq_tests.rst index 98cc278f..257ad48a 100644 --- a/doc/source/lib/bx.seq.seq_tests.rst +++ b/doc/source/lib/bx.seq.seq_tests.rst @@ -1,7 +1,7 @@ -bx.seq.seq_tests module -======================= +bx.seq.seq\_tests module +======================== .. automodule:: bx.seq.seq_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.twobit.rst b/doc/source/lib/bx.seq.twobit.rst index 958cb2b0..b3bfd698 100644 --- a/doc/source/lib/bx.seq.twobit.rst +++ b/doc/source/lib/bx.seq.twobit.rst @@ -2,6 +2,6 @@ bx.seq.twobit module ==================== .. automodule:: bx.seq.twobit - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seq.twobit_tests.rst b/doc/source/lib/bx.seq.twobit_tests.rst index 7cbe2300..9bf1ca79 100644 --- a/doc/source/lib/bx.seq.twobit_tests.rst +++ b/doc/source/lib/bx.seq.twobit_tests.rst @@ -1,7 +1,7 @@ -bx.seq.twobit_tests module -========================== +bx.seq.twobit\_tests module +=========================== .. automodule:: bx.seq.twobit_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seqmapping.rst b/doc/source/lib/bx.seqmapping.rst index 6b5f1e14..ea2bb79a 100644 --- a/doc/source/lib/bx.seqmapping.rst +++ b/doc/source/lib/bx.seqmapping.rst @@ -2,6 +2,6 @@ bx.seqmapping module ==================== .. automodule:: bx.seqmapping - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.seqmapping_tests.rst b/doc/source/lib/bx.seqmapping_tests.rst index 6539435e..75512fdf 100644 --- a/doc/source/lib/bx.seqmapping_tests.rst +++ b/doc/source/lib/bx.seqmapping_tests.rst @@ -1,7 +1,7 @@ -bx.seqmapping_tests module -========================== +bx.seqmapping\_tests module +=========================== .. automodule:: bx.seqmapping_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.tabular.io.rst b/doc/source/lib/bx.tabular.io.rst index 213b3df0..330497fd 100644 --- a/doc/source/lib/bx.tabular.io.rst +++ b/doc/source/lib/bx.tabular.io.rst @@ -2,6 +2,6 @@ bx.tabular.io module ==================== .. automodule:: bx.tabular.io - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.tabular.rst b/doc/source/lib/bx.tabular.rst index 4b06698b..5da0affa 100644 --- a/doc/source/lib/bx.tabular.rst +++ b/doc/source/lib/bx.tabular.rst @@ -5,6 +5,7 @@ Submodules ---------- .. toctree:: + :maxdepth: 3 bx.tabular.io @@ -12,6 +13,6 @@ Module contents --------------- .. automodule:: bx.tabular - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.wiggle.rst b/doc/source/lib/bx.wiggle.rst index 50e6e43e..8f6e61bb 100644 --- a/doc/source/lib/bx.wiggle.rst +++ b/doc/source/lib/bx.wiggle.rst @@ -2,6 +2,6 @@ bx.wiggle module ================ .. automodule:: bx.wiggle - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx.wiggle_tests.rst b/doc/source/lib/bx.wiggle_tests.rst index 8c4408f6..f1f373c5 100644 --- a/doc/source/lib/bx.wiggle_tests.rst +++ b/doc/source/lib/bx.wiggle_tests.rst @@ -1,7 +1,7 @@ -bx.wiggle_tests module -====================== +bx.wiggle\_tests module +======================= .. automodule:: bx.wiggle_tests - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx_extras.fpconst.rst b/doc/source/lib/bx_extras.fpconst.rst index e193071d..208c54f0 100644 --- a/doc/source/lib/bx_extras.fpconst.rst +++ b/doc/source/lib/bx_extras.fpconst.rst @@ -1,7 +1,7 @@ -bx_extras.fpconst module -======================== +bx\_extras.fpconst module +========================= .. automodule:: bx_extras.fpconst - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx_extras.lrucache.rst b/doc/source/lib/bx_extras.lrucache.rst index e403ad6a..c717330c 100644 --- a/doc/source/lib/bx_extras.lrucache.rst +++ b/doc/source/lib/bx_extras.lrucache.rst @@ -1,7 +1,7 @@ -bx_extras.lrucache module -========================= +bx\_extras.lrucache module +========================== .. automodule:: bx_extras.lrucache - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx_extras.pstat.rst b/doc/source/lib/bx_extras.pstat.rst index 6aea33a6..c0a6a7ac 100644 --- a/doc/source/lib/bx_extras.pstat.rst +++ b/doc/source/lib/bx_extras.pstat.rst @@ -1,7 +1,7 @@ -bx_extras.pstat module -====================== +bx\_extras.pstat module +======================= .. automodule:: bx_extras.pstat - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx_extras.pyparsing.rst b/doc/source/lib/bx_extras.pyparsing.rst index 83f54313..7eab3e11 100644 --- a/doc/source/lib/bx_extras.pyparsing.rst +++ b/doc/source/lib/bx_extras.pyparsing.rst @@ -1,7 +1,7 @@ -bx_extras.pyparsing module -========================== +bx\_extras.pyparsing module +=========================== .. automodule:: bx_extras.pyparsing - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx_extras.rst b/doc/source/lib/bx_extras.rst index 239579f9..7e106af1 100644 --- a/doc/source/lib/bx_extras.rst +++ b/doc/source/lib/bx_extras.rst @@ -1,12 +1,14 @@ -bx_extras package -================= +bx\_extras package +================== Submodules ---------- .. toctree:: + :maxdepth: 3 bx_extras.fpconst + bx_extras.fpconst_tests bx_extras.lrucache bx_extras.pstat bx_extras.pyparsing @@ -16,6 +18,6 @@ Module contents --------------- .. automodule:: bx_extras - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/bx_extras.stats.rst b/doc/source/lib/bx_extras.stats.rst index 097eaed1..c13aa071 100644 --- a/doc/source/lib/bx_extras.stats.rst +++ b/doc/source/lib/bx_extras.stats.rst @@ -1,7 +1,7 @@ -bx_extras.stats module -====================== +bx\_extras.stats module +======================= .. automodule:: bx_extras.stats - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/lib/modules.rst b/doc/source/lib/modules.rst index a0bba841..7672e969 100644 --- a/doc/source/lib/modules.rst +++ b/doc/source/lib/modules.rst @@ -2,7 +2,7 @@ lib === .. toctree:: - :maxdepth: 4 + :maxdepth: 3 bx bx_extras diff --git a/doc/source/lib/psyco_full.rst b/doc/source/lib/psyco_full.rst index 370be057..6a398fe7 100644 --- a/doc/source/lib/psyco_full.rst +++ b/doc/source/lib/psyco_full.rst @@ -1,7 +1,7 @@ -psyco_full module -================= +psyco\_full module +================== .. automodule:: psyco_full - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/static/base.css b/doc/source/static/base.css deleted file mode 100644 index e8dbe6a6..00000000 --- a/doc/source/static/base.css +++ /dev/null @@ -1,152 +0,0 @@ -@import url(tripoli.base.css); - -html { - font-family: 'Verdana', sans-serif; - color: #333333; -} -body { - padding: 3em 3em; -} - -h1.pageheader { - font-variant: small-caps; - margin-top: 0; - border-top: solid 1px; - padding-top: 2px; - border-bottom: solid 1px; - border-color: #CCCCCC; - margin-bottom: 1em; -} - -h1.pageheader a { - color: inherit; - text-decoration: inherit; - border: none; -} - -.content h1, .content h2, .content h3, .content h4, .content h5, .content h6 { - font-family: 'Hoefler Text', 'Georgia', serif; - font-weight: normal; - color: #666666; - /* border-bottom: solid #666666 1px; */ -} - -.content h1.pagetitle { - color: #c33; -} - -#main { -} - -.colpad { - padding: 0 2em; -} - -#main > .inner { - min-width: 70em; - max-width: 90em; - margin: auto; - height: 100%; -} - -#left { - background: white; - margin-right: 36%; /* 31em; */ - padding-right: 3%; - height: 100%; -} - -#right { - float: right; - width: 33%; /* 28em; */ - padding-left: 3%; - border-left: solid #CCCCCC 1px; -} - -.sidebar { - font-size: 1em; -} - -.sidebar ul { - margin-left: 0; -} - -.sidebar ul li { - list-style-type: none; - margin-bottom: 0.6em; -} - -.sidebar ul.pages { - margin-left: 5px; - margin-top: 0.6em; -} - -.sidebar ul.pages li { - background: url(hbullet.png) 0 0.4em no-repeat; - padding-left: 25px; - list-style-type: none; -} - -.sidebar ul.pages li { -} - -.sidebar h1 { - clear: both; -} - -.sidebar .publications .info { - color: #666666; -} - -.postinfo { - color: #666666; - font-size: 92%; - margin-top: -1em; -} - -.postreadlink { - margin-top: -1em; -} - -.sidebar .posts .info { - color: #666666; -} - -.comments_title { - margin-top: 2em; -} - -label { - display: block; -} - -#footer { - clear: both; -} - -a, a:link, a:visited { - text-decoration: none; - border-bottom: dotted #666666 1px; - color: black; -} - -a:hover { - color: #CC3333; -} - -li { - list-style: square; -} - -table.layout td { - vertical-align: top; - padding-left: 2em; - padding-right: 2em; - border-left: solid #999999 1px -} - -hr { - border: none; - height: 1px; - background: #999999; -} diff --git a/doc/source/static/tripoli.base.css b/doc/source/static/tripoli.base.css deleted file mode 100644 index da578421..00000000 --- a/doc/source/static/tripoli.base.css +++ /dev/null @@ -1,509 +0,0 @@ -/* - * Tripoli is a generic CSS standard for HTML rendering. - * Copyright (C) 2007 David Hellsing - * - * http://devkick.com/lab/tripoli/ - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . -**/ - -/* -_______________________________ -RESET */ - -* -{ - text-decoration:none; - font-size:1em; - outline:none; - margin:0; - padding:0; -} - -code,kbd,samp,pre,tt,var,textarea,input,select,isindex,listing,xmp,plaintext -{ - font:inherit; - white-space:normal; -} - -a,img,a img,iframe,form,abbr,acronym,object,applet,table,a abbr,a acronym -{ - border-width:0; -} - -dfn,i,cite,var,address,em -{ - font-style:normal; -} - -th,b,strong,h1,h2,h3,h4,h5,h6,dt -{ - font-weight:normal; -} - -caption,th,td -{ - text-align:left; -} - -html -{ - background:white; - color:black; - line-height:1; - font-family:arial, sans-serif; -} - -/* \*/ - -html -{ - font-family:sans-serif; -} - -/* */ - -q -{ - quotes:"\201C""\201D""\2018""\2019"; -} - -ul,ol,dir,menu -{ - list-style:none; -} - -sub,sup -{ - vertical-align:baseline; -} - -a -{ - color:inherit; -} - -/* -_______________________________ -DISABLE DEPRECATED HTML */ - -font,basefont -{ - color:inherit; - font:inherit; - font-size:100%; -} - - -center,*[align] -{ - text-align:inherit; -} - -s,strike,u -{ - text-decoration:inherit; -} - -img -{ - border:none; - margin:0; -} - -ol -{ - list-style-type:decimal; -} - -body -{ - background-color:transparent; -} - -tr,th,td -{ - width:auto; - height:auto; - background-color:transparent; - vertical-align:inherit; - border:none; -} - -table[border],.content table[border] -{ - border-collapse:separate; - border-spacing:0; -} - -nobr -{ - white-space:normal; -} - -marquee -{ - overflow:visible; - -moz-binding:none; -} - -blink -{ - text-decoration:none; -} - -/* -_______________________________ -GENERAL */ - -html -{ - font-size:125%; -} - -body -{ - font-size:50%; -} - -a -{ - text-decoration:underline; -} - -strong,th,thead td,h1,h2,h3,h4,h5,h6,dt -{ - font-weight:bold; -} - -cite,em,dfn -{ - font-style:italic; -} - -code,kbd,samp,pre,tt,var,input[type='text'],input[type='password'],textarea -{ - font-size:100%; - font-family:mono-space,monospace; -} - -pre -{ - white-space:pre; -} - -pre * -{ - font-size:100%; - white-space:pre; -} - -del -{ - text-decoration:line-through; -} - -ins,dfn -{ - border-bottom:1px solid black; -} - -small,sup,sub -{ - font-size:85%; -} - -big -{ - font-size:125%; - line-height:80%; -} - -abbr,acronym -{ - text-transform:uppercase; - font-size:85%; - letter-spacing:.1em; -} - -abbr[title],acronym[title],dfn[title] -{ - cursor:help; - border-bottom:1px dotted black; -} - -sup -{ - vertical-align:super; -} - -sub -{ - vertical-align:sub; -} - -blockquote -{ - padding-left:2.2em; -} - -hr -{ - display:none; /* We will re-reset it later for content */ -} - -:lang(af),:lang(nl),:lang(pl) -{ - quotes:'\201E' '\201D' '\201A' '\2019'; -} - -:lang(bg),:lang(cs),:lang(de),:lang(is),:lang(lt),:lang(sk),:lang(sr),:lang(ro) -{ - quotes:'\201E' '\201C' '\201A' '\2018'; -} - -:lang(da),:lang(hr) -{ - quotes:'\00BB' '\00AB' '\203A' '\2039'; -} - -:lang(el),:lang(es),:lang(sq),:lang(tr) -{ - quotes:'\00AB' '\00BB' '\2039' '\203A'; -} - -:lang(en-GB) -{ - quotes:'\2018' '\2019' '\201C' '\201D'; -} - -:lang(fi),:lang(sv) -{ - quotes:'\201D' '\201D' '\2019' '\2019'; -} - -:lang(fr) -{ - quotes:'\ab\2005' '\2005\bb' '\2039\2005' '\2005\203a'; -} - -*[lang|='en'] q:before -{ - content:'\201C'; -} - -*[lang|='en'] q:after -{ - content:'\201D'; -} - -*[lang|='en'] q q:before -{ - content:'\2018'; -} - -*[lang|='en'] q q:after -{ - content:'\2019'; -} - -input,select,button -{ - cursor:pointer; -} - -input[type='text'],input[type='password'] -{ - cursor:text; -} - -input[type='hidden'] -{ - display:none; -} - -/* -_______________________________ -CONTENT */ - -.content -{ - font-size:1.2em; - line-height:1.6em; -} - -.content h1 -{ - font-size:1.6em; - line-height:1; - margin:1em 0 .5em; -} - -.content h2 -{ - font-size:1.5em; - line-height:1; - margin:1.07em 0 .535em; -} - -.content h3 -{ - font-size:1.4em; - line-height:1; - margin:1.14em 0 .57em; -} - -.content h4 -{ - font-size:1.3em; - line-height:1; - margin:1.23em 0 .615em; -} - -.content h5 -{ - font-size:1.2em; - line-height:1; - margin:1.33em 0 .67em; -} - -.content h6 -{ - font-size:1em; - line-height:1; - margin:1.6em 0 .8em; -} - -.content hr -{ - display:block; - background:black; - color:black; - width:100%; - height:1px; - border:none; -} - -.content ul -{ - list-style:disc outside; -} - -.content ol -{ - list-style:decimal outside; -} - -.content table -{ - border-collapse:collapse; -} - -.content hr,.content p,.content ul,.content ol,.content dl,.content pre, .content address,.content table,.content form -{ - margin-bottom:1.6em; -} - -.content p+p -{ - margin-top:-.8em; -} - -.content fieldset -{ - margin:1.6em 0; - padding:1.6em; -} - -/* \*/ - -.content legend -{ - padding-left:.8em; - padding-right:.8em; -} - -/* */ - -@media all and (min-width: 0px) /* for Opera 8 */ -{ - .content legend - { - margin-bottom:1.6em; - } - .content fieldset - { - margin-top:0; - } - .content[class^='content'] fieldset - { - margin-top:1.6em; - } -} - -.content fieldset>*:first-child -{ - margin-top:0; -} - -.content textarea,.content input[type='text'] -{ - padding:.1em .2em; -} - -.content input -{ - padding:.2em .1em; -} - -.content select -{ - padding:.2em .1em 0; -} - -.content select[multiple] -{ - margin-bottom:.8em; -} - -.content option -{ - padding:0 .4em .1em; -} - -.content button -{ - padding:.3em .5em; -} - -.content input[type='radio'] -{ - position:relative; - bottom:-.2em; -} - -.content dt -{ - margin-top:.8em; - margin-bottom:.4em; -} - -.content ul,.content ol -{ - margin-left:2.2em; -} - -.content caption,.content form div -{ - padding-bottom:.8em; -} - -.content ul ul,content ol ul,.content ul ol,content ol ol -{ - margin-bottom:0; -} - -/* -_______________________________ -END */ diff --git a/doc/source/templates/index.html b/doc/source/templates/index.html deleted file mode 100644 index 597f9307..00000000 --- a/doc/source/templates/index.html +++ /dev/null @@ -1,34 +0,0 @@ -{% extends "layout.html" %} -{% set title = 'bx python' %} -{% block body %} -

Welcome

- -

- - The bx-python project is a python library and associated set of scripts to allow for rapid implementation of genome scale analyses. The library contains a variety of useful modules, but the particular strengths are: - -

    -
  • Classes for reading and working with genome-scale multiple local alignments (in MAF, AXT, and LAV formats)
  • -
  • Generic data structure for indexing on disk files that contain blocks of data associated with intervals on various sequences (used, for example, to provide random access to individual alignments in huge files; optomized for use over network filesystems) -
  • -
  • Data structures for working with intervals on sequences
  • -
  • "Binned bitsets" which act just like chromosome sized bit arrays, but lazily allocate regions and allow large blocks of all set or all unset bits to be stored compactly
  • -
  • "Intersecter" for performing fast intersection tests that preserve both query and target intervals and associated annotation
  • - - These tools have been used in a variety of published research, and are a fundamental part of the ongoing Galaxy and ESPERR projects. - -
- -

- -

Documentation

- - - -{% endblock %} diff --git a/doc/source/templates/indexsidebar.html b/doc/source/templates/indexsidebar.html deleted file mode 100644 index 4247e03b..00000000 --- a/doc/source/templates/indexsidebar.html +++ /dev/null @@ -1,9 +0,0 @@ -

About bx-python

- -

Current version: {{ version }}

- -

Download

- -

- bx-python source -

diff --git a/doc/source/templates/layout.html b/doc/source/templates/layout.html deleted file mode 100644 index 48b10452..00000000 --- a/doc/source/templates/layout.html +++ /dev/null @@ -1,51 +0,0 @@ -{% extends "!layout.html" %} - -{%- block document %} -
-
-
-

- bx-python -

-
- -
-
- {%- if builder != 'htmlhelp' %} -
- {%- endif %} -
- {% block body %} {% endblock %} -
- {%- if builder != 'htmlhelp' %} -
- {%- endif %} -
-
-
-
-{%- endblock %} - -{% block doctype %} - -{% endblock %} - -{% block rootrellink %} -
  • bx-python home
  • -
  • Table of contents
  • -{% endblock %} - -{# Sidebar and already handled #} - -{% block relbar1 %}{% endblock %} -{% block relbar2 %}{% endblock %} - -{% block sidebar1 %}{% endblock %} -{% block sidebar2 %}{% endblock %} - -{% block footer %}{% endblock %} \ No newline at end of file From f48bf2dc9ac76cee808290f84432dc8aaa1b18f4 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 30 Jan 2024 17:34:13 +0000 Subject: [PATCH 56/68] Update to black 2024 stable style --- lib/bx/align/core.py | 1 + lib/bx/align/epo.py | 1 - lib/bx/align/maf.py | 1 + lib/bx/align/score_tests.py | 1 + lib/bx/align/sitemask/sitemask_tests.py | 1 + lib/bx/interval_index_file.py | 1 + lib/bx/intervals/operations/merge.py | 1 - lib/bx/misc/binary_file.py | 1 + lib/bx/misc/seekbzip2.py | 1 + lib/bx/misc/seekbzip2_tests.py | 1 + lib/bx/motif/pwm.py | 1 + lib/bx/seq/qdna.py | 1 + lib/bx/seq/twobit.py | 1 + lib/bx/seqmapping_tests.py | 1 + lib/bx/tabular/io.py | 1 + lib/bx/wiggle_tests.py | 1 + setup.cfg | 2 +- 17 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/bx/align/core.py b/lib/bx/align/core.py index f8e7e376..21aa53c9 100644 --- a/lib/bx/align/core.py +++ b/lib/bx/align/core.py @@ -1,6 +1,7 @@ """ Classes that represent alignments between multiple sequences. """ + import random import weakref diff --git a/lib/bx/align/epo.py b/lib/bx/align/epo.py index e8a1a582..3a2d4c1a 100644 --- a/lib/bx/align/epo.py +++ b/lib/bx/align/epo.py @@ -1,6 +1,5 @@ """Classes and utilities for mutliple alignments from the EPO pipeline""" - import logging import os import pickle as cPickle diff --git a/lib/bx/align/maf.py b/lib/bx/align/maf.py index c747bf74..50448768 100644 --- a/lib/bx/align/maf.py +++ b/lib/bx/align/maf.py @@ -4,6 +4,7 @@ .. _MAF: http://genome.ucsc.edu/FAQ/FAQformat.html#format5 .. _multiz: http://www.bx.psu.edu/miller_lab/ """ + from io import ( StringIO, TextIOWrapper, diff --git a/lib/bx/align/score_tests.py b/lib/bx/align/score_tests.py index b3d86895..a0e9dc9b 100644 --- a/lib/bx/align/score_tests.py +++ b/lib/bx/align/score_tests.py @@ -1,6 +1,7 @@ """ Tests for `bx.align.score`. """ + import unittest from io import StringIO diff --git a/lib/bx/align/sitemask/sitemask_tests.py b/lib/bx/align/sitemask/sitemask_tests.py index dd7fec2b..994c6973 100644 --- a/lib/bx/align/sitemask/sitemask_tests.py +++ b/lib/bx/align/sitemask/sitemask_tests.py @@ -1,6 +1,7 @@ """ Tests for `bx.align.maf.sitemask`. """ + import tempfile from io import StringIO diff --git a/lib/bx/interval_index_file.py b/lib/bx/interval_index_file.py index 12e2be7f..03a1cc87 100644 --- a/lib/bx/interval_index_file.py +++ b/lib/bx/interval_index_file.py @@ -80,6 +80,7 @@ ... ... ... ============ =========== ================================================= """ + import os.path import sys from bisect import ( diff --git a/lib/bx/intervals/operations/merge.py b/lib/bx/intervals/operations/merge.py index d94202c3..7b4dd68f 100644 --- a/lib/bx/intervals/operations/merge.py +++ b/lib/bx/intervals/operations/merge.py @@ -2,7 +2,6 @@ Merge overlapping regions in two sets of genomic intervals. """ - from bx.intervals.io import BitsetSafeReaderWrapper from bx.intervals.operations import ( bits_set_in_range, diff --git a/lib/bx/misc/binary_file.py b/lib/bx/misc/binary_file.py index bce48007..e16a139d 100644 --- a/lib/bx/misc/binary_file.py +++ b/lib/bx/misc/binary_file.py @@ -1,6 +1,7 @@ """ Wrappers for doing binary IO on file-like objects """ + import struct import sys diff --git a/lib/bx/misc/seekbzip2.py b/lib/bx/misc/seekbzip2.py index 6f4b6b52..03372e0c 100644 --- a/lib/bx/misc/seekbzip2.py +++ b/lib/bx/misc/seekbzip2.py @@ -1,6 +1,7 @@ """ Semi-random access to bz2 compressed data. """ + import bisect from ._seekbzip2 import SeekBzip2 diff --git a/lib/bx/misc/seekbzip2_tests.py b/lib/bx/misc/seekbzip2_tests.py index 550b7cd0..94bb759e 100644 --- a/lib/bx/misc/seekbzip2_tests.py +++ b/lib/bx/misc/seekbzip2_tests.py @@ -1,6 +1,7 @@ """ Tests for `bx.misc.seekbzip2`. """ + import bz2 import os import random diff --git a/lib/bx/motif/pwm.py b/lib/bx/motif/pwm.py index e9147e6a..32f433a5 100644 --- a/lib/bx/motif/pwm.py +++ b/lib/bx/motif/pwm.py @@ -1,6 +1,7 @@ """ Classes for working with position specific matrices. """ + from copy import copy import numpy diff --git a/lib/bx/seq/qdna.py b/lib/bx/seq/qdna.py index f1cd58e4..9f0df018 100644 --- a/lib/bx/seq/qdna.py +++ b/lib/bx/seq/qdna.py @@ -39,6 +39,7 @@ Recognized properties (at present only one): - codebook: A string in qdna code file format (see QdnaCodebook class for details). """ + import struct from io import StringIO diff --git a/lib/bx/seq/twobit.py b/lib/bx/seq/twobit.py index 77a7d34d..36e05710 100644 --- a/lib/bx/seq/twobit.py +++ b/lib/bx/seq/twobit.py @@ -1,6 +1,7 @@ """ Access to files containing sequence data in 'twobit' format. """ + from collections.abc import Mapping from struct import ( calcsize, diff --git a/lib/bx/seqmapping_tests.py b/lib/bx/seqmapping_tests.py index 879c4f65..efb5ab03 100644 --- a/lib/bx/seqmapping_tests.py +++ b/lib/bx/seqmapping_tests.py @@ -1,6 +1,7 @@ """ Tests for `bx.seqmapping`. """ + import unittest from io import StringIO diff --git a/lib/bx/tabular/io.py b/lib/bx/tabular/io.py index 63d8e517..79a01a1b 100644 --- a/lib/bx/tabular/io.py +++ b/lib/bx/tabular/io.py @@ -1,6 +1,7 @@ """ Reading and writing delimited data files (with headers and comments). """ + from itertools import count FIRST_LINE_IS_HEADER = object() diff --git a/lib/bx/wiggle_tests.py b/lib/bx/wiggle_tests.py index 67451775..3e3f4dcb 100644 --- a/lib/bx/wiggle_tests.py +++ b/lib/bx/wiggle_tests.py @@ -1,6 +1,7 @@ """ Tests for `bx.wiggle`. """ + import unittest from io import StringIO diff --git a/setup.cfg b/setup.cfg index eec006d5..e6938fc5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,7 +48,7 @@ snapshot = egg_info -rb_DEV bdist_egg rotate -m.egg -k1 build_docs = build_sphinx build_apidocs [flake8] -ignore = E203,E501,E741,W503 +ignore = E203,E501,E701,E704,E741,W503 exclude = .git,.tox,.venv,build,doc/source/conf.py import-order-style = smarkets application-import-names = bx,bx_extras From 0501d899cfdc385cab654fd4eaa785334626f044 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 30 Jan 2024 18:01:04 +0000 Subject: [PATCH 57/68] Temporarily disable doctesting of C extension modules xref: https://github.com/lgpage/pytest-cython/issues/58 --- pytest.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index e77e08f2..76374a0b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] -addopts = --doctest-cython --doctest-modules +#addopts = --doctest-cython --doctest-modules # https://github.com/lgpage/pytest-cython/issues/58 +addopts = --doctest-modules python_files = *_tests.py testpaths = lib script_tests/ From e3e287b0b4ba546fb556116799ba6592727d831c Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 7 Feb 2024 13:28:10 +0000 Subject: [PATCH 58/68] Add support for Python 3.12. Drop support for Python 3.7 --- .github/workflows/deploy.yaml | 8 +++---- .github/workflows/test.yaml | 6 ++--- lib/bx/align/epo_tests.py | 2 +- lib/bx/cookbook/argparse.py | 3 +-- pyproject.toml | 2 +- setup.cfg | 4 ++-- setup.py | 43 ----------------------------------- tox.ini | 38 ++++++++++++++----------------- 8 files changed, 29 insertions(+), 77 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 8a82da86..0e921ea7 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -32,10 +32,10 @@ jobs: run: python -m cibuildwheel --output-dir dist env: CIBW_ARCHS: ${{matrix.arch}} - # Skip building musllinux wheels for now, they take too long to build, - # mainly because numpy doesn't have musllinux wheels on PyPI yet. - # Skip also building for PyPy 3.7-3.8, which are deprecated upstream. - CIBW_SKIP: '*-musllinux* pp37-* pp38-*' + # Skip building musllinux wheels for the Python versions for which the oldest + # supported numpy version doesn't have musllinux wheels on PyPI. + # Skip also building for PyPy 3.8, which is deprecated upstream. + CIBW_SKIP: "cp38-musllinux_* cp39-musllinux_* cp310-musllinux_* cp311-musllinux_* pp38-*" - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v3 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2f563c42..473288e7 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.11'] + python-version: ['3.8', '3.12'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -34,4 +34,4 @@ jobs: - name: Install tox run: pip install tox - name: Test - run: tox -e py + run: tox -e test diff --git a/lib/bx/align/epo_tests.py b/lib/bx/align/epo_tests.py index 7d4e3e66..186cf9d0 100644 --- a/lib/bx/align/epo_tests.py +++ b/lib/bx/align/epo_tests.py @@ -187,7 +187,7 @@ def cch(cigar, s, e): for s, t, q in zip(S, T, Q): if not (cch(c1, th, th + s) and cch(c2, th, th + s)): pdb.set_trace() - assert cch(c1, th, th + s) and cch(c2, th, th + s), f"{c1[th:th+s]} and {c2[th:th+s]}" + assert cch(c1, th, th + s) and cch(c2, th, th + s), f"{c1[th:th + s]} and {c2[th:th + s]}" if t > q: cch(c1, th + s, th + s + t) and c1[th + s : th + s + t] == "-" * t else: diff --git a/lib/bx/cookbook/argparse.py b/lib/bx/cookbook/argparse.py index f557f50b..9dc87efe 100644 --- a/lib/bx/cookbook/argparse.py +++ b/lib/bx/cookbook/argparse.py @@ -982,8 +982,7 @@ def __call__(self, parser, namespace, values, option_string=None): class _SubParsersAction(Action): class _ChoicesPseudoAction(Action): def __init__(self, name, help): - sup = super(_SubParsersAction._ChoicesPseudoAction, self) - sup.__init__(option_strings=[], dest=name, help=help) + super().__init__(option_strings=[], dest=name, help=help) def __init__(self, option_strings, prog, parser_class, dest=SUPPRESS, help=None, metavar=None): self._prog_prefix = prog diff --git a/pyproject.toml b/pyproject.toml index 0e1b900f..c7abb96d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [tool.black] include = '\.pyi?$' line-length = 120 -target-version = ['py37'] +target-version = ['py38'] [tool.cibuildwheel] test-command = """ diff --git a/setup.cfg b/setup.cfg index e6938fc5..bf9687e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,11 +8,11 @@ classifiers = License :: OSI Approved :: MIT License Operating System :: POSIX Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Topic :: Scientific/Engineering :: Bio-Informatics Topic :: Software Development :: Libraries :: Python Modules name = bx-python @@ -34,7 +34,7 @@ package_dir = =lib py_modules = psyco_full -python_requires = >=3.7 +python_requires = >=3.8 zip_safe = False [options.package_data] diff --git a/setup.py b/setup.py index 9e43209f..0d2d1523 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,5 @@ -import os -import os.path import platform import sys -from distutils.core import Command from glob import glob from setuptools import ( @@ -56,46 +53,6 @@ def run(self): except ImportError: pass -# Use epydoc if found -try: - import epydoc.cli - - # Create command class to build API documentation - class BuildAPIDocs(Command): - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - # Save working directory and args - old_argv = sys.argv - old_cwd = os.getcwd() - # Build command line for Epydoc - sys.argv = """epydoc.py bx --verbose --html --simple-term - --exclude=._ - --exclude=_tests - --docformat=reStructuredText - --output=../doc/docbuild/html/apidoc""".split() - # Make output directory - if not os.path.exists("./doc/docbuild/html/apidoc"): - os.mkdir("./doc/docbuild/html/apidoc") - # Move to lib directory (so bx package is in current directory) - os.chdir("./lib") - # Invoke epydoc - epydoc.cli.cli() - # Restore args and working directory - sys.argv = old_argv - os.chdir(old_cwd) - - # Add to extra_commands - command_classes["build_apidocs"] = BuildAPIDocs -except Exception: - pass - # ---- Extension Modules ---------------------------------------------------- # # suppress C++ #warning, e.g., to silence NumPy deprecation warnings: diff --git a/tox.ini b/tox.ini index b0c8978d..7993bd49 100644 --- a/tox.ini +++ b/tox.ini @@ -1,26 +1,22 @@ [tox] -envlist = lint, py +envlist = lint, test [testenv] -commands_pre = - python setup.py build_ext --inplace commands = - pytest {posargs} + test: pytest {posargs} + lint: flake8 . + lint: black --check --diff . + lint: isort --check --diff . deps = - Cython - numpy - pytest - pytest-cython - python-lzo >= 1.14 # Python 3.10 support -skip_install = true - -[testenv:lint] -commands_pre = -commands = - flake8 . - black --check --diff . - isort --check --diff . -deps = - black - flake8 - isort + test: Cython + test: numpy + test: pytest + test: pytest-cython + test: python-lzo >= 1.14 # Python 3.10 support + lint: black + lint: flake8 + lint: isort +package = + test: editable +skip_install = + lint: true From d884f25706e2655f439f3be1b3eef18666141eb5 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 7 Feb 2024 20:52:25 +0000 Subject: [PATCH 59/68] Upload upload-artifact/download-artifact actions to v4 --- .github/workflows/deploy.yaml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 0e921ea7..983dc5ac 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -11,18 +11,18 @@ jobs: matrix: include: - os: ubuntu-latest - arch: auto + archs: auto - os: macos-latest - arch: x86_64 arm64 + archs: x86_64 arm64 - os: ubuntu-latest - arch: aarch64 + archs: aarch64 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.x' - name: Set up QEMU to build non-native architectures - if: ${{ matrix.arch == 'aarch64' }} + if: ${{ matrix.archs == 'aarch64' }} uses: docker/setup-qemu-action@v3 - name: Install required Python packages run: | @@ -31,16 +31,16 @@ jobs: - name: Build wheels run: python -m cibuildwheel --output-dir dist env: - CIBW_ARCHS: ${{matrix.arch}} + CIBW_ARCHS: ${{ matrix.archs }} # Skip building musllinux wheels for the Python versions for which the oldest # supported numpy version doesn't have musllinux wheels on PyPI. # Skip also building for PyPy 3.8, which is deprecated upstream. CIBW_SKIP: "cp38-musllinux_* cp39-musllinux_* cp310-musllinux_* cp311-musllinux_* pp38-*" - name: Check packages run: twine check dist/* - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: packages + name: "packages-${{ matrix.os }}-${{ matrix.archs }}" path: dist/ build_sdist: @@ -64,21 +64,24 @@ jobs: python -c 'import bx, bx.align, bx.align.sitemask, bx.align.tools, bx.arrays, bx.bbi, bx.cookbook, bx.intervals, bx.intervals.operations, bx.intseq, bx.misc, bx.motif, bx.motif.io, bx.motif.logo, bx.phylo, bx.pwm, bx.seq, bx.tabular, bx_extras' - name: Check packages run: twine check dist/* - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: packages + name: packages-sdist path: dist/ upload_pypi: - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && github.repository_owner == 'bxlab' needs: [build_wheels, build_sdist] runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: - name: packages + merge-multiple: true path: dist + pattern: packages-* + - name: Display structure of downloaded files + run: ls -R dist/ - name: Publish to PyPI + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') && github.repository_owner == 'bxlab' uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ From 2cbe5628dca319aab800c5662787c71c6216a772 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Fri, 9 Feb 2024 16:27:37 +0000 Subject: [PATCH 60/68] Release 0.11.0 --- lib/bx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index 61fb31ca..ae6db5f1 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = "0.10.0" +__version__ = "0.11.0" From 7fce74336815496865ec7ac9b4770837892158f6 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 27 Feb 2024 16:27:25 +0100 Subject: [PATCH 61/68] Add pytest 8.x compatability replace nose-style module setup function with a pytest fixture --- lib/bx/binned_array_tests.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/bx/binned_array_tests.py b/lib/bx/binned_array_tests.py index c61c8671..dc0b9726 100644 --- a/lib/bx/binned_array_tests.py +++ b/lib/bx/binned_array_tests.py @@ -2,6 +2,7 @@ Tests for `bx.binned_array`. """ +import pytest from numpy import ( allclose, concatenate, @@ -22,12 +23,9 @@ # CHUNK_SIZE_RANDOM=9456 # CHUNK_SIZE_ZEROS=8972 -source = target = None - -def setup(): - global source - global target +@pytest.fixture(scope="module") +def source_target(): source = [] for _ in range(13): if random() < 0.5: @@ -43,7 +41,8 @@ def setup(): return source, target -def test_simple(): +def test_simple(source_target): + source, target = source_target # Verify for i in range(len(source)): assert source[i] == target[i], "No match, index: %d, source: %f, target: %f, len( source ): %d" % ( @@ -66,7 +65,8 @@ def test_simple(): ) -def test_file(): +def test_file(source_target): + source, target = source_target # With a file (zlib) target.to_file(open("/tmp/foo", "wb")) target2 = FileBinnedArray(open("/tmp/foo", "rb")) @@ -87,7 +87,8 @@ def test_file(): ) -def test_file_lzo(): +def test_file_lzo(source_target): + source, target = source_target # With a file (lzo) target.to_file(open("/tmp/foo3", "wb"), comp_type="lzo") target3 = FileBinnedArray(open("/tmp/foo3", "rb")) @@ -109,7 +110,8 @@ def test_file_lzo(): ) -def test_binned_array_writer(): +def test_binned_array_writer(source_target): + source, target = source_target # Test with ba writer o = open("/tmp/foo4", "wb") w = BinnedArrayWriter(o, 128, comp_type="lzo") From c697626b9d4207ccd8f351a1cc4bfa61cc24b5fc Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 27 Feb 2024 18:16:08 +0100 Subject: [PATCH 62/68] enable post-installation testing --- lib/bx/align/sitemask/sitemask_tests.py | 2 +- lib/bx/binned_array_tests.py | 2 +- lib/bx/interval_index_file_tests.py | 4 ++-- lib/bx/intervals/cluster_tests.py | 10 +--------- lib/bx/misc/seekbzip2_tests.py | 2 +- lib/bx/motif/io/transfac_tests.py | 2 +- lib/bx/motif/pwm_tests.py | 2 +- lib/bx/seq/fasta_tests.py | 2 +- lib/bx/seq/nib_tests.py | 2 +- lib/bx/seq/qdna_tests.py | 2 +- lib/bx/seq/twobit_tests.py | 2 +- 11 files changed, 12 insertions(+), 20 deletions(-) diff --git a/lib/bx/align/sitemask/sitemask_tests.py b/lib/bx/align/sitemask/sitemask_tests.py index 994c6973..67c38ca7 100644 --- a/lib/bx/align/sitemask/sitemask_tests.py +++ b/lib/bx/align/sitemask/sitemask_tests.py @@ -6,7 +6,7 @@ from io import StringIO import bx.align.maf -from . import cpg +from bx.align.sitemask import cpg test_maf_cpg = """##maf version=1 scoring=none a score=0 diff --git a/lib/bx/binned_array_tests.py b/lib/bx/binned_array_tests.py index dc0b9726..8af0fb46 100644 --- a/lib/bx/binned_array_tests.py +++ b/lib/bx/binned_array_tests.py @@ -11,7 +11,7 @@ ) from numpy.random import random_sample as random -from .binned_array import ( +from bx.binned_array import ( BinnedArray, BinnedArrayWriter, FileBinnedArray, diff --git a/lib/bx/interval_index_file_tests.py b/lib/bx/interval_index_file_tests.py index 62e0afb7..1316a7a4 100644 --- a/lib/bx/interval_index_file_tests.py +++ b/lib/bx/interval_index_file_tests.py @@ -1,8 +1,8 @@ import random from tempfile import mktemp -from . import interval_index_file -from .interval_index_file import Indexes +from bx import interval_index_file +from bx.interval_index_file import Indexes def test_offsets(): diff --git a/lib/bx/intervals/cluster_tests.py b/lib/bx/intervals/cluster_tests.py index 2a51ffe3..1fa0957c 100644 --- a/lib/bx/intervals/cluster_tests.py +++ b/lib/bx/intervals/cluster_tests.py @@ -1,14 +1,6 @@ -import os -import sys import unittest -try: - sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) -except Exception: - sys.path.insert(0, os.path.dirname(os.path.abspath("."))) - -# from bx.intervals.cluster import ClusterTree -from .cluster import ClusterTree +from bx.intervals.cluster import ClusterTree class TestCluster(unittest.TestCase): diff --git a/lib/bx/misc/seekbzip2_tests.py b/lib/bx/misc/seekbzip2_tests.py index 94bb759e..8942b261 100644 --- a/lib/bx/misc/seekbzip2_tests.py +++ b/lib/bx/misc/seekbzip2_tests.py @@ -7,7 +7,7 @@ import random from codecs import encode -from . import seekbzip2 +from bx.misc import seekbzip2 F = None T = None diff --git a/lib/bx/motif/io/transfac_tests.py b/lib/bx/motif/io/transfac_tests.py index 1d7c290d..10f54343 100644 --- a/lib/bx/motif/io/transfac_tests.py +++ b/lib/bx/motif/io/transfac_tests.py @@ -2,7 +2,7 @@ from numpy import allclose -from . import transfac +from bx.motif.io import transfac sample = """ VV TRANSFAC MATRIX TABLE, Rel.3.2 26-06-1997 diff --git a/lib/bx/motif/pwm_tests.py b/lib/bx/motif/pwm_tests.py index b07685f0..1a6ccee1 100644 --- a/lib/bx/motif/pwm_tests.py +++ b/lib/bx/motif/pwm_tests.py @@ -3,7 +3,7 @@ isnan, ) -from . import pwm +from bx.motif import pwm def test_create(): diff --git a/lib/bx/seq/fasta_tests.py b/lib/bx/seq/fasta_tests.py index df4105bf..a659bda4 100644 --- a/lib/bx/seq/fasta_tests.py +++ b/lib/bx/seq/fasta_tests.py @@ -4,7 +4,7 @@ import unittest -from . import fasta +from bx.seq import fasta test_fa = "test_data/seq_tests/test.fa" diff --git a/lib/bx/seq/nib_tests.py b/lib/bx/seq/nib_tests.py index f54ac988..ce78d6da 100644 --- a/lib/bx/seq/nib_tests.py +++ b/lib/bx/seq/nib_tests.py @@ -4,7 +4,7 @@ import unittest -from . import nib +from bx.seq import nib test_nib = "test_data/seq_tests/test.nib" diff --git a/lib/bx/seq/qdna_tests.py b/lib/bx/seq/qdna_tests.py index 9c67bb55..03e0c4c5 100644 --- a/lib/bx/seq/qdna_tests.py +++ b/lib/bx/seq/qdna_tests.py @@ -4,7 +4,7 @@ import unittest -from . import qdna +from bx.seq import qdna test_qdna = "test_data/seq_tests/test.qdna" diff --git a/lib/bx/seq/twobit_tests.py b/lib/bx/seq/twobit_tests.py index 3f63e17b..62d525cf 100644 --- a/lib/bx/seq/twobit_tests.py +++ b/lib/bx/seq/twobit_tests.py @@ -2,7 +2,7 @@ import pytest -from . import twobit +from bx.seq import twobit def quick_fasta_iter(f): From ee2b8b819f10e303cddc29e4eeaab3d443e68259 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 14 Mar 2024 12:26:10 +0100 Subject: [PATCH 63/68] correct include for strlen() In Debian, as of dpkg 1.22.6, we automatically add -Werror=implicit-function-declaration to CFLAGS, which found this bug. --- src/pwm_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pwm_utils.c b/src/pwm_utils.c index 3303b106..1cd9867b 100644 --- a/src/pwm_utils.c +++ b/src/pwm_utils.c @@ -1,7 +1,7 @@ #include #include -#include +#include int symbol_match( char, char); int pattern_match( char*, char*, int); From 2e46af6b2954fec38ad1bc85839d812a2dd0919d Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Sun, 31 Mar 2024 17:55:02 +0100 Subject: [PATCH 64/68] Re-enable doctesting of C extension modules --- lib/bx/intervals/intersection.pyx | 2 -- pytest.ini | 3 +-- tox.ini | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/bx/intervals/intersection.pyx b/lib/bx/intervals/intersection.pyx index 2b0be0c1..b5c1783b 100644 --- a/lib/bx/intervals/intersection.pyx +++ b/lib/bx/intervals/intersection.pyx @@ -282,8 +282,6 @@ cdef class Interval: >>> f1 = Interval(23, 36) >>> f2 = Interval(34, 48, value=OrderedDict([('chr', 12), ('anno', 'transposon')])) - >>> f2 - Interval(34, 48, value=OrderedDict([('chr', 12), ('anno', 'transposon')])) """ cdef public int start, end diff --git a/pytest.ini b/pytest.ini index 76374a0b..e77e08f2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] -#addopts = --doctest-cython --doctest-modules # https://github.com/lgpage/pytest-cython/issues/58 -addopts = --doctest-modules +addopts = --doctest-cython --doctest-modules python_files = *_tests.py testpaths = lib script_tests/ diff --git a/tox.ini b/tox.ini index 7993bd49..09123052 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ deps = test: Cython test: numpy test: pytest - test: pytest-cython + test: pytest-cython >= 0.2.2 # https://github.com/lgpage/pytest-cython/issues/58 test: python-lzo >= 1.14 # Python 3.10 support lint: black lint: flake8 From 5fc11f89437af4874c9ea42711e0cbe25d9f5542 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Sun, 31 Mar 2024 18:32:47 +0100 Subject: [PATCH 65/68] Ignore ``.tox/`` directory --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3e948f96..996c25bf 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,6 @@ nose*.egg # Built sdist directory dist + +# Testing +.tox/ From 335ca029d000d3f030f45ffe8a4e3c4a4b8f729c Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Mon, 1 Jul 2024 19:42:32 +0100 Subject: [PATCH 66/68] Support NumPy 2.0 --- lib/bx/binned_array.py | 6 +++--- lib/bx/binned_array_tests.py | 4 ++-- pyproject.toml | 8 +++++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/bx/binned_array.py b/lib/bx/binned_array.py index 7b54201b..66d0f131 100644 --- a/lib/bx/binned_array.py +++ b/lib/bx/binned_array.py @@ -20,7 +20,7 @@ array, concatenate, frombuffer, - NaN, + nan, resize, zeros, ) @@ -70,7 +70,7 @@ def bytesify(s): class BinnedArray: - def __init__(self, bin_size=512 * 1024, default=NaN, max_size=MAX, typecode="f"): + def __init__(self, bin_size=512 * 1024, default=nan, max_size=MAX, typecode="f"): self.max_size = max_size self.bin_size = bin_size self.nbins = int(math.ceil(max_size / self.bin_size)) @@ -273,7 +273,7 @@ def __getitem__(self, key): class BinnedArrayWriter: - def __init__(self, f, bin_size=512 * 1024, default=NaN, max_size=MAX, typecode="f", comp_type="zlib"): + def __init__(self, f, bin_size=512 * 1024, default=nan, max_size=MAX, typecode="f", comp_type="zlib"): # All parameters in the constructor are immutable after creation self.f = f self.max_size = max_size diff --git a/lib/bx/binned_array_tests.py b/lib/bx/binned_array_tests.py index 8af0fb46..d1403cb3 100644 --- a/lib/bx/binned_array_tests.py +++ b/lib/bx/binned_array_tests.py @@ -6,7 +6,7 @@ from numpy import ( allclose, concatenate, - NaN, + nan, zeros, ) from numpy.random import random_sample as random @@ -34,7 +34,7 @@ def source_target(): source = concatenate((source, zeros(CHUNK_SIZE_ZEROS, "f"))) source = source.astype("f") # Set on target - target = BinnedArray(128, NaN, len(source)) + target = BinnedArray(128, nan, len(source)) for i in range(len(source)): # if not isNaN( source[i] ): target[i] = source[i] diff --git a/pyproject.toml b/pyproject.toml index c7abb96d..053a6ac2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,11 @@ [build-system] -requires = ["cython", "oldest-supported-numpy", "setuptools", "wheel"] +requires = [ + "cython", + "numpy>=1.25.0; python_version>='3.9'", + "oldest-supported-numpy; python_version<'3.9'", + "setuptools", + "wheel", +] build-backend = "setuptools.build_meta" [tool.black] From fba2c50c7b1d2dcb3f9619c50951f63783ec7b02 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 2 Jul 2024 19:15:05 +0100 Subject: [PATCH 67/68] Build musllinux wheels for Python 3.9-3.11. Skip PyPy wheels on i686 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NumPy 2.0 has musllinux wheels for Python 3.9-3.12 on PyPI. Building NumPy 2.0 for PyPy on i686 fails with errors like: ``` ../numpy/_core/src/npysort/quicksort.cpp:86:51: error: no matches converting function ‘QSort_AVX512_ICL’ to type ‘void (*)(using TF = using Type = std::conditional::type*, intptr_t)’ {aka ‘void (*)(short int*, int)’} ``` --- .github/workflows/deploy.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 983dc5ac..a1f5f23a 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -32,10 +32,11 @@ jobs: run: python -m cibuildwheel --output-dir dist env: CIBW_ARCHS: ${{ matrix.archs }} - # Skip building musllinux wheels for the Python versions for which the oldest - # supported numpy version doesn't have musllinux wheels on PyPI. - # Skip also building for PyPy 3.8, which is deprecated upstream. - CIBW_SKIP: "cp38-musllinux_* cp39-musllinux_* cp310-musllinux_* cp311-musllinux_* pp38-*" + # Skip building musllinux wheels for the CPython versions for which the + # numpy version we build against doesn't have musllinux wheels on PyPI. + # Skip building for PyPy 3.8, which is deprecated upstream. + # Skip building for PyPy on i686 since NumPy 2.0 fails to build on it. + CIBW_SKIP: "cp38-musllinux_* pp38-* pp*-manylinux_i686" - name: Check packages run: twine check dist/* - uses: actions/upload-artifact@v4 From b6e40e783ff2b31067e2e278cc6847fd23293991 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 3 Jul 2024 01:39:30 +0100 Subject: [PATCH 68/68] Release 0.12.0 --- lib/bx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bx/__init__.py b/lib/bx/__init__.py index ae6db5f1..ea370a8e 100644 --- a/lib/bx/__init__.py +++ b/lib/bx/__init__.py @@ -1 +1 @@ -__version__ = "0.11.0" +__version__ = "0.12.0"