Skip to content

Commit

Permalink
Partial implementation of consensus (see issue #3)
Browse files Browse the repository at this point in the history
I can now build a consensus table that counts the characters in each
column of the alignment.

If we want to reduce these to a single consensus string, then we need to
decide how to resolve ties.

```
>seq_1
AAATATAT
>seq_2
AAAATTAT
>seq_3
AAATATAA
```

```
A       T
3       0
3       0
3       0
1       2
2       1
0       3
3       0
1       2
```
  • Loading branch information
arendsee committed Aug 29, 2018
1 parent 46081ae commit 53dd141
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions smof.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def parse(argv=None):
subcommands = [
Cut,
Clean,
Consensus,
Filter,
Grep,
Md5sum,
Expand Down Expand Up @@ -2136,6 +2137,49 @@ def sortterm(x):
yield s


class Consensus(Subcommand):
def _parse(self):
cmd_name = 'consensus'
parser = self.subparsers.add_parser(
cmd_name,
usage=self.usage.format(cmd_name),
help="finds the consensus sequence for aligned sequence",
description="""Given input in aligned FASTA file format, where all
sequences are of equal length (possibly with gaps), `consensus`
will find the most common character in each column. Optionall, it
will instead provide the counts or proportions of each character at
each position."""
)
parser.add_argument(
'fh',
help='input fasta sequence (default = stdin)',
metavar='INPUT',
nargs="*"
)
parser.set_defaults(func=self.func)

def generator(self, args, gen):
raise NotImplemented

def write(self, args, gen, out=sys.stdout):
seqs = [s for s in gen.next()]
imax = max([len(s.seq) for s in seqs])
try:
transpose = [[s.seq[i] for s in seqs] for i in range(0, imax)]
except IndexError:
err("All sequences must be of equivalent length")

counts = Counter(('').join([s.seq for s in seqs]))
characters = list(counts.keys())

out.write("\t".join(characters))
out.write("\n")
for column in transpose:
c = Counter(column)
out.write("\t".join([str(c[x]) for x in characters]))
out.write("\n")


# ==============
# UNIX EMULATORS
# ==============
Expand Down

0 comments on commit 53dd141

Please sign in to comment.