-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from DanielFaulkner/SortUtility
New annotation sort utility and changes to trackobj class header line.
- Loading branch information
Showing
3 changed files
with
93 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/python3 | ||
# annosort | ||
# | ||
# A terminal prompt interface to sort entries within an annotation file. | ||
# | ||
# By Daniel R Faulkner | ||
|
||
from lib import libAnnoSort | ||
from lib import libAnnoShared | ||
import argparse | ||
|
||
## Command line options: | ||
### Parse the command line arguments | ||
parser = argparse.ArgumentParser(description="Sort annotation file by genomic position") | ||
# Arguments: | ||
# Required | ||
parser.add_argument("input", help="Input filename", type=argparse.FileType('r')) | ||
# Optional | ||
parser.add_argument("-o","--output", help="Output filename", type=argparse.FileType('w')) | ||
parser.add_argument("-s","--status", help="View current sort status", action="store_true") | ||
|
||
# Any commands entered without a flag | ||
args = parser.parse_args() | ||
|
||
# Run the command | ||
print("Indexing reference file") | ||
trackobj = libAnnoShared.loadTrackFile(args.input) | ||
if args.status: | ||
# Display the current status of the annotation file | ||
sortstr = "NO" | ||
orderstr = "NO" | ||
if trackobj.ordered: | ||
orderstr = "YES" | ||
if trackobj.sorted: | ||
sortstr = "YES" | ||
print("Annotation file grouped by chromosome: "+orderstr) | ||
print("Annotation file sorted by start position: "+sortstr) | ||
elif args.output: | ||
# Sort the file | ||
libAnnoSort.sort(trackobj,args.output) | ||
else: | ||
print("Status [-s] or Output filename [-o] option required") | ||
|
||
# Close files | ||
args.input.close() | ||
if args.output: | ||
args.output.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# libAnnoSort | ||
# Functions related to the sorting of annotation files. | ||
# | ||
# By Daniel R Faulkner | ||
|
||
from lib import libAnnoShared | ||
|
||
# Groups annotations within a file by chromsome and sorts by start position. | ||
# Example usage: | ||
#trackobj = libAnnoShared.loadTrackFile(open("AnnotationFilename")) # Create track object from file | ||
#sort(trackobj, open('OutputFilename','w')) # Perform sort | ||
def sort(trackobj, outputfile): | ||
"""Sort an annotation file by genomic position""" | ||
trackobj.fileobj.seek(0) | ||
# Copy any preceeding comment lines across unaltered | ||
line = trackobj.fileobj.readline() | ||
while line[0]=="#": | ||
outputfile.write(line) | ||
line = trackobj.fileobj.readline() | ||
# Sort and store the chromosome list | ||
chrlist = sorted(trackobj.chrIndex) | ||
for chromsome in chrlist: | ||
chrlineindex = [] | ||
trackobj.fileobj.seek(trackobj.chrIndex.get(chromsome.upper())) | ||
# Create a list of alignment start positions and line positions | ||
line = trackobj.fileobj.readline() | ||
while line: | ||
annoentry = libAnnoShared.Annotation(line, trackobj.type, trackobj.header) | ||
if annoentry.chrName.upper()==chromsome: | ||
chrlineindex.append([annoentry.alignStart,trackobj.fileobj.tell()-len(line)]) | ||
elif annoentry.chrName.upper()!=chromsome and trackobj.ordered==1: | ||
line = None | ||
if line: | ||
line = trackobj.fileobj.readline() | ||
# Sort the start positions | ||
sortedlinestarts = sorted(chrlineindex) | ||
# Write out the lines in the correct order | ||
for item in sortedlinestarts: | ||
trackobj.fileobj.seek(item[1]) | ||
line = trackobj.fileobj.readline() | ||
outputfile.write(line) |