Skip to content

Commit

Permalink
Merge pull request #7 from DanielFaulkner/SortUtility
Browse files Browse the repository at this point in the history
New annotation sort utility and changes to trackobj class header line.
  • Loading branch information
DanielFaulkner authored Feb 7, 2021
2 parents 63c10a5 + f945a81 commit 3ebc092
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 1 deletion.
47 changes: 47 additions & 0 deletions annosort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/python3
# annosort
#
# A terminal prompt interface to sort entries within an annotation file.
#
# By Daniel R Faulkner

from lib import libAnnoSort
from lib import libAnnoShared
import argparse

## Command line options:
### Parse the command line arguments
parser = argparse.ArgumentParser(description="Sort annotation file by genomic position")
# Arguments:
# Required
parser.add_argument("input", help="Input filename", type=argparse.FileType('r'))
# Optional
parser.add_argument("-o","--output", help="Output filename", type=argparse.FileType('w'))
parser.add_argument("-s","--status", help="View current sort status", action="store_true")

# Any commands entered without a flag
args = parser.parse_args()

# Run the command
print("Indexing reference file")
trackobj = libAnnoShared.loadTrackFile(args.input)
if args.status:
# Display the current status of the annotation file
sortstr = "NO"
orderstr = "NO"
if trackobj.ordered:
orderstr = "YES"
if trackobj.sorted:
sortstr = "YES"
print("Annotation file grouped by chromosome: "+orderstr)
print("Annotation file sorted by start position: "+sortstr)
elif args.output:
# Sort the file
libAnnoSort.sort(trackobj,args.output)
else:
print("Status [-s] or Output filename [-o] option required")

# Close files
args.input.close()
if args.output:
args.output.close()
6 changes: 5 additions & 1 deletion lib/libAnnoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,11 @@ def __init__(self, fileobj, filetype="", fixedannotype="", indexlines=0, smallon
self.type=filetype.upper()
# Store the first line incase it is a header line needed later
self.fileobj.seek(0)
self.header = self.fileobj.readline()
line = self.fileobj.readline()
self.header = line # Store the first line as a header regardless
while line[0]=="#":
self.header = line # Replace with the last comment line if multiple lines are present
line = self.fileobj.readline()
self.fileobj.seek(0)
# Index the file for faster access times later
self.indexFile()
Expand Down
41 changes: 41 additions & 0 deletions lib/libAnnoSort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# libAnnoSort
# Functions related to the sorting of annotation files.
#
# By Daniel R Faulkner

from lib import libAnnoShared

# Groups annotations within a file by chromsome and sorts by start position.
# Example usage:
#trackobj = libAnnoShared.loadTrackFile(open("AnnotationFilename")) # Create track object from file
#sort(trackobj, open('OutputFilename','w')) # Perform sort
def sort(trackobj, outputfile):
"""Sort an annotation file by genomic position"""
trackobj.fileobj.seek(0)
# Copy any preceeding comment lines across unaltered
line = trackobj.fileobj.readline()
while line[0]=="#":
outputfile.write(line)
line = trackobj.fileobj.readline()
# Sort and store the chromosome list
chrlist = sorted(trackobj.chrIndex)
for chromsome in chrlist:
chrlineindex = []
trackobj.fileobj.seek(trackobj.chrIndex.get(chromsome.upper()))
# Create a list of alignment start positions and line positions
line = trackobj.fileobj.readline()
while line:
annoentry = libAnnoShared.Annotation(line, trackobj.type, trackobj.header)
if annoentry.chrName.upper()==chromsome:
chrlineindex.append([annoentry.alignStart,trackobj.fileobj.tell()-len(line)])
elif annoentry.chrName.upper()!=chromsome and trackobj.ordered==1:
line = None
if line:
line = trackobj.fileobj.readline()
# Sort the start positions
sortedlinestarts = sorted(chrlineindex)
# Write out the lines in the correct order
for item in sortedlinestarts:
trackobj.fileobj.seek(item[1])
line = trackobj.fileobj.readline()
outputfile.write(line)

0 comments on commit 3ebc092

Please sign in to comment.