diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp index 06055df3..4a3e2a1d 100644 --- a/src/api/BamMultiReader.cpp +++ b/src/api/BamMultiReader.cpp @@ -381,16 +381,41 @@ bool BamMultiReader::SetRegion(const int& leftRefID, return d->SetRegion(region); } -/*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order) +/*! \fn void BamMultiReader::SetSortOrder(const BamSortCriteria& sort) \brief Sets the expected sorting order for reading across multiple BAM files. - Default is BamMultiReader::SortedByPosition. + Default is BamMultiReader::SortedByPosition and sorting ascending. + + The SortOrder determines how the reader determines which alignment is "next" + from among its open readers. + + \param sort Sorting object that specifies the expected sort order and direction +*/ +void BamMultiReader::SetSortOrder(const BamSortCriteria& sort) { + d->SetSortOrder(sort); +} + +/*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending) + \brief Sets the expected sorting order for reading across multiple BAM files. + + Default is BamMultiReader::SortedByPosition and sorting ascending. The SortOrder determines how the reader determines which alignment is "next" from among its open readers. \param order expected sort order + \param ascending sorting direction */ -void BamMultiReader::SetSortOrder(const SortOrder& order) { - d->SetSortOrder(order); +void BamTools::BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending) +{ + string sortCol=""; + switch(order){ + case BamMultiReader::SortedByAlignmentScore:sortCol="AS";break; + case BamMultiReader::SortedByPosition:sortCol="POS";break; + case BamMultiReader::SortedByReadName: sortCol="QNAME";break; + case BamMultiReader::Unsorted:sortCol="";break; + default:cerr<<"BamMultiReader.cpp ERROR: The selected sort order is not known\n"; + } + BamSortCriteria sort(sortCol, !ascending); + d->SetSortOrder(sort); } diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h index cc49ec8e..68271607 100644 --- a/src/api/BamMultiReader.h +++ b/src/api/BamMultiReader.h @@ -13,13 +13,14 @@ #include #include +#include #include #include #include #include namespace BamTools { - + class BamSortCriteria; namespace Internal { class BamMultiReaderPrivate; } // namespace Internal @@ -27,8 +28,9 @@ namespace Internal { class API_EXPORT BamMultiReader { public: - enum SortOrder { SortedByPosition = 0 - , SortedByReadName + enum SortOrder { SortedByPosition =0 + , SortedByReadName + , SortedByAlignmentScore , Unsorted }; @@ -78,8 +80,9 @@ class API_EXPORT BamMultiReader { bool GetNextAlignmentCore(BamAlignment& alignment); // sets the expected sorting order for reading across multiple BAM files - void SetSortOrder(const SortOrder& order); - + void SetSortOrder(const BamSortCriteria& sort); + + void SetSortOrder(const SortOrder& order, const bool& ascending =true); // ---------------------- // access auxiliary data // ---------------------- diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp new file mode 100644 index 00000000..cd815fcb --- /dev/null +++ b/src/api/BamSortCriteria.cpp @@ -0,0 +1,119 @@ +#include "BamSortCriteria.h" +#include +#include +#include + +using namespace BamTools; +const string BamSortCriteria::allowedTags[3]={"QNAME","POS","AS"}; +const string BamSortCriteria::coreTags[1]={"POS"}; + + + + +bool BamSortCriteria::isTagCoreAttribute() { + int length = sizeof(coreTags)/sizeof(coreTags[0]); + for(int i=0;i > > >(); + } else if (sortCriteria=="POS") { + return new CommonMultiMerger > > >(); + } else if (sortCriteria=="AS") { + return new CommonMultiMerger > > >(); + } else if (sortCriteria == "") { + return new UnsortedMultiMerger; + } + }else{ + if (sortCriteria=="QNAME") { + return new CommonMultiMerger > > >(); + } else if (sortCriteria=="POS") { + return new CommonMultiMerger > > >(); + } else if (sortCriteria=="AS") { + return new CommonMultiMerger > > >(); + } else if (sortCriteria == "") { + return new UnsortedMultiMerger; + } + } + cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; + return 0; +} + + +void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterator end) { + /** + * It seems like this step could not be simplified for the + * ascending and descending case + */ + if (!descending ) { + if (sortCriteria=="QNAME") { + sort(begin,end,SortName >()); + } else if (sortCriteria=="POS") { + sort(begin,end,SortPosition >()); + } else if (sortCriteria=="AS") { + sort(begin,end,SortAlignmentScore >()); + } else { + cerr << "BamMultiReader ERROR: requested sort order ("< >()); + } else if (sortCriteria=="POS") { + sort(begin,end,SortPosition >()); + } else if (sortCriteria=="AS") { + sort(begin,end,SortAlignmentScore >()); + } else { + cerr << "BamMultiReader ERROR: requested sort order ("< +#include +#include + +using namespace std; + + +using namespace BamTools::Internal; +namespace BamTools { + +typedef binary_function BamAlignmentBFunction; +typedef binary_function ReadAlignmentBFunction; +typedef vector< BamAlignment >::iterator BamAlignmentIterator; + + +class BamSortCriteria{ +private: + string sortCriteria; + bool descending; + static const string allowedTags[]; + static const string coreTags[]; +public: + static string getAllowedTags(); + BamSortCriteria():sortCriteria("QNAME"),descending(false){} + + + BamSortCriteria(string sortCriteria, bool descending):sortCriteria(sortCriteria),descending(descending){ + if(!isTagValid()){ + cerr << "BamSortCriteria ERROR: Requested sort order ("< +class SortPosition : public binary_function{ + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + COMP c; + if ( lhs.RefID != rhs.RefID ){ + return c(lhs.RefID, rhs.RefID); + }else { + int32_t lh,rh; + lh = lhs.Position; + rh = rhs.Position; + return c(lh,rh); + } + } + }; + + + // QNAME + template + class SortName : public binary_function { + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + COMP c; + return c(lhs.Name, rhs.Name); + } + }; + + // AS Alignment Score from BFAST + + template + class SortAlignmentScore : public BamAlignmentBFunction{ + public: + bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { + int32_t lh, rh; + lhs.GetTag("AS",lh); + rhs.GetTag("AS",rh); + + COMP c; + return c(lh,rh); + } + }; + // printf("%d %d %d %d\n",lh,rh,lhs.Position,rhs.Position); + // return c(lhs.Position, rhs.Position); + + + + //-------------------------------------------------------- + /** + * The two classes are used for a descending search as they flip the conditions of the defined + * SortLessThan* classes. One is used for the Reader and th other for the BamAlignment + **/ + template + class SortReaderAlignment: public binary_function{ + public: + bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ + T t; + const BamAlignment l= *lhs.second; + const BamAlignment r= *rhs.second; + return t(l,r); + } + }; + +} + +#endif // BAMSORTCRITERIA_H diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt index 57efba25..1cd9ca95 100644 --- a/src/api/CMakeLists.txt +++ b/src/api/CMakeLists.txt @@ -18,6 +18,7 @@ set( BamToolsAPISources BamMultiReader.cpp BamReader.cpp BamWriter.cpp + BamSortCriteria.cpp SamHeader.cpp SamReadGroup.cpp SamReadGroupDictionary.cpp @@ -66,6 +67,7 @@ ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamSortCriteria.h ${ApiIncludeDir}) ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir}) ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir}) ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir}) diff --git a/src/api/SamConstants.h b/src/api/SamConstants.h index 6412b3d0..5849994a 100644 --- a/src/api/SamConstants.h +++ b/src/api/SamConstants.h @@ -63,6 +63,7 @@ const std::string SAM_CO_BEGIN_TOKEN = "@CO"; // HD:SO values const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate"; const std::string SAM_HD_SORTORDER_QUERYNAME = "queryname"; +const std::string SAM_HD_SORTORDER_ALIGNMENTSCORE = "alignmentscore"; const std::string SAM_HD_SORTORDER_UNSORTED = "unsorted"; // HD:GO values diff --git a/src/api/internal/BamMultiMerger_p.h b/src/api/internal/BamMultiMerger_p.h index ae67eea2..92038f69 100644 --- a/src/api/internal/BamMultiMerger_p.h +++ b/src/api/internal/BamMultiMerger_p.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ typedef std::pair ReaderAlignment; class IBamMultiMerger { public: - IBamMultiMerger(void) { } + IBamMultiMerger(){ } virtual ~IBamMultiMerger(void) { } public: @@ -49,42 +50,15 @@ class IBamMultiMerger { virtual void Remove(BamReader* reader) =0; virtual int Size(void) const =0; virtual ReaderAlignment TakeFirst(void) =0; -}; - -// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position) -class PositionMultiMerger : public IBamMultiMerger { - - public: - PositionMultiMerger(void) : IBamMultiMerger() { } - ~PositionMultiMerger(void) { } - - public: - void Add(const ReaderAlignment& value); - void Clear(void); - const ReaderAlignment& First(void) const; - bool IsEmpty(void) const; - void Remove(BamReader* reader); - int Size(void) const; - ReaderAlignment TakeFirst(void); - - private: - typedef std::pair KeyType; - typedef ReaderAlignment ValueType; - typedef std::pair ElementType; - - typedef std::multimap ContainerType; - typedef ContainerType::iterator DataIterator; - typedef ContainerType::const_iterator DataConstIterator; - ContainerType m_data; }; -// IBamMultiMerger implementation - sorted on BamAlignment: Name -class ReadNameMultiMerger : public IBamMultiMerger { - public: - ReadNameMultiMerger(void) : IBamMultiMerger() { } - ~ReadNameMultiMerger(void) { } +template +class CommonMultiMerger : public IBamMultiMerger{ + public: + CommonMultiMerger() { } + ~CommonMultiMerger(void) { } public: void Add(const ReaderAlignment& value); @@ -94,16 +68,11 @@ class ReadNameMultiMerger : public IBamMultiMerger { void Remove(BamReader* reader); int Size(void) const; ReaderAlignment TakeFirst(void); - private: - typedef std::string KeyType; typedef ReaderAlignment ValueType; - typedef std::pair ElementType; - - typedef std::multimap ContainerType; - typedef ContainerType::iterator DataIterator; - typedef ContainerType::const_iterator DataConstIterator; - + typedef std::multiset ContainerType; + typedef typename ContainerType::iterator DataIterator; + typedef typename ContainerType::const_iterator DataConstIterator; ContainerType m_data; }; @@ -132,40 +101,40 @@ class UnsortedMultiMerger : public IBamMultiMerger { ContainerType m_data; }; -// ------------------------------------------ -// PositionMultiMerger implementation -inline void PositionMultiMerger::Add(const ReaderAlignment& value) { - const KeyType key( value.second->RefID, value.second->Position ); - m_data.insert( ElementType(key, value) ); +//--------------------------------------------------------------------------- +// CommonMultiMerger implementation +template +inline void CommonMultiMerger::Add(const ReaderAlignment& value) { + m_data.insert( value); } -inline void PositionMultiMerger::Clear(void) { +template +inline void CommonMultiMerger::Clear(void) { m_data.clear(); } -inline const ReaderAlignment& PositionMultiMerger::First(void) const { - const ElementType& entry = (*m_data.begin()); - return entry.second; +template +inline const ReaderAlignment& CommonMultiMerger::First(void) const { + const ValueType& entry = (*m_data.begin()); + return entry; } -inline bool PositionMultiMerger::IsEmpty(void) const { +template +inline bool CommonMultiMerger::IsEmpty(void) const { return m_data.empty(); } - -inline void PositionMultiMerger::Remove(BamReader* reader) { - +template +inline void CommonMultiMerger::Remove(BamReader* reader) { if ( reader == 0 ) return; const std::string filenameToRemove = reader->GetFilename(); - // iterate over readers in cache DataIterator dataIter = m_data.begin(); DataIterator dataEnd = m_data.end(); for ( ; dataIter != dataEnd; ++dataIter ) { - const ValueType& entry = (*dataIter).second; + const ValueType& entry = (*dataIter); const BamReader* entryReader = entry.first; if ( entryReader == 0 ) continue; - // remove iterator on match if ( entryReader->GetFilename() == filenameToRemove ) { m_data.erase(dataIter); @@ -173,74 +142,20 @@ inline void PositionMultiMerger::Remove(BamReader* reader) { } } } - -inline int PositionMultiMerger::Size(void) const { +template +inline int CommonMultiMerger::Size(void) const { return m_data.size(); } - -inline ReaderAlignment PositionMultiMerger::TakeFirst(void) { - DataIterator first = m_data.begin(); - ReaderAlignment next = (*first).second; - m_data.erase(first); - return next; -} - -// ------------------------------------------ -// ReadNameMultiMerger implementation - -inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) { - const KeyType key(value.second->Name); - m_data.insert( ElementType(key, value) ); -} - -inline void ReadNameMultiMerger::Clear(void) { - m_data.clear(); -} - -inline const ReaderAlignment& ReadNameMultiMerger::First(void) const { - const ElementType& entry = (*m_data.begin()); - return entry.second; -} - -inline bool ReadNameMultiMerger::IsEmpty(void) const { - return m_data.empty(); -} - -inline void ReadNameMultiMerger::Remove(BamReader* reader) { - - if ( reader == 0 ) return; - const std::string filenameToRemove = reader->GetFilename(); - - // iterate over readers in cache - DataIterator dataIter = m_data.begin(); - DataIterator dataEnd = m_data.end(); - for ( ; dataIter != dataEnd; ++dataIter ) { - const ValueType& entry = (*dataIter).second; - const BamReader* entryReader = entry.first; - if ( entryReader == 0 ) continue; - - // remove iterator on match - if ( entryReader->GetFilename() == filenameToRemove ) { - m_data.erase(dataIter); - return; - } - } - -} - -inline int ReadNameMultiMerger::Size(void) const { - return m_data.size(); -} - -inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) { +template +inline ReaderAlignment CommonMultiMerger::TakeFirst(void) { DataIterator first = m_data.begin(); - ReaderAlignment next = (*first).second; + ReaderAlignment next = (*first); m_data.erase(first); return next; } -// ------------------------------------------ -// UnsortedMultiMerger implementation +//---------------------------------------------------------------- +// MultiMerger for Unsorted Files inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) { m_data.push_back(value); diff --git a/src/api/internal/BamMultiReader_p.cpp b/src/api/internal/BamMultiReader_p.cpp index 583085c7..690d10fa 100644 --- a/src/api/internal/BamMultiReader_p.cpp +++ b/src/api/internal/BamMultiReader_p.cpp @@ -12,6 +12,7 @@ #include #include #include +#include using namespace BamTools; using namespace BamTools::Internal; @@ -26,7 +27,6 @@ using namespace std; BamMultiReaderPrivate::BamMultiReaderPrivate(void) : m_alignments(0) , m_isCoreMode(false) - , m_sortOrder(BamMultiReader::SortedByPosition) { } // dtor @@ -97,7 +97,7 @@ void BamMultiReaderPrivate::CloseFiles(const vector& filenames) { // make sure alignment cache is cleared if all readers are now closed if ( m_readers.empty() && m_alignments != 0 ) m_alignments->Clear(); -} +}// // creates index files for BAM files that don't have them bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) { @@ -119,16 +119,6 @@ bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) { return result; } -IBamMultiMerger* BamMultiReaderPrivate::CreateMergerForCurrentSortOrder(void) const { - switch ( m_sortOrder ) { - case ( BamMultiReader::SortedByPosition ) : return new PositionMultiMerger; - case ( BamMultiReader::SortedByReadName ) : return new ReadNameMultiMerger; - case ( BamMultiReader::Unsorted ) : return new UnsortedMultiMerger; - default : - cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; - return 0; - } -} const string BamMultiReaderPrivate::ExtractReadGroup(const string& headerLine) const { @@ -450,7 +440,7 @@ bool BamMultiReaderPrivate::Open(const vector& filenames) { // create alignment cache if neccessary if ( m_alignments == 0 ) { - m_alignments = CreateMergerForCurrentSortOrder(); + m_alignments = m_sort.getMerger();//CreateMergerForCurrentSortOrder(); if ( m_alignments == 0 ) return false; } @@ -603,7 +593,7 @@ bool BamMultiReaderPrivate::RewindReaders(void) { void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) { // must be in core mode && NOT sorting by read name to call GNACore() - if ( m_isCoreMode && m_sortOrder != BamMultiReader::SortedByReadName ) { + if ( m_isCoreMode && m_sort.isTagCoreAttribute()) { if ( reader->GetNextAlignmentCore(*alignment) ) m_alignments->Add( make_pair(reader, alignment) ); } @@ -657,16 +647,18 @@ bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) { return true; } -void BamMultiReaderPrivate::SetSortOrder(const BamMultiReader::SortOrder& order) { +void BamMultiReaderPrivate::SetSortOrder(const BamSortCriteria& sort) { // skip if no change needed - if ( m_sortOrder == order ) return; - + //if ( (m_sort.getSortCriteria() == sort.getSortCriteria) && + // (m_sort.isDescending() == sort.isDescending())) return; + + //m_sort = sort; // set new sort order - m_sortOrder = order; + m_sort = sort; // create new alignment cache based on sort order - IBamMultiMerger* newAlignmentCache = CreateMergerForCurrentSortOrder(); + IBamMultiMerger* newAlignmentCache = m_sort.getMerger();//CreateMergerForCurrentSortOrder(); if ( newAlignmentCache == 0 ) return; // print error? // copy old cache contents to new cache diff --git a/src/api/internal/BamMultiReader_p.h b/src/api/internal/BamMultiReader_p.h index b34fb0c5..189776ca 100644 --- a/src/api/internal/BamMultiReader_p.h +++ b/src/api/internal/BamMultiReader_p.h @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -31,6 +32,7 @@ namespace Internal { class IBamMultiMerger; + class BamMultiReaderPrivate { // constructor / destructor @@ -57,7 +59,7 @@ class BamMultiReaderPrivate { bool GetNextAlignment(BamAlignment& al); bool GetNextAlignmentCore(BamAlignment& al); bool HasOpenReaders(void); - void SetSortOrder(const BamMultiReader::SortOrder& order); + void SetSortOrder(const BamSortCriteria& sort); // access auxiliary data SamHeader GetHeader(void) const; @@ -75,7 +77,9 @@ class BamMultiReaderPrivate { // 'internal' methods public: - IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const; + template + IBamMultiMerger* CreateMergerForCurrentSortOrderDesc(bool desc) const; + //IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const; const std::string ExtractReadGroup(const std::string& headerLine) const; bool HasAlignmentData(void) const; bool LoadNextAlignment(BamAlignment& al); @@ -93,7 +97,7 @@ class BamMultiReaderPrivate { IBamMultiMerger* m_alignments; bool m_isCoreMode; - BamMultiReader::SortOrder m_sortOrder; + BamSortCriteria m_sort; }; } // namespace Internal diff --git a/src/toolkit/bamtools_convert.h b/src/toolkit/bamtools_convert.h index 8dd68572..f711f03c 100644 --- a/src/toolkit/bamtools_convert.h +++ b/src/toolkit/bamtools_convert.h @@ -35,4 +35,4 @@ class ConvertTool : public AbstractTool { } // namespace BamTools -#endif // BAMTOOLS_CONVERT_H \ No newline at end of file +#endif // BAMTOOLS_CONVERT_H diff --git a/src/toolkit/bamtools_sort.cpp b/src/toolkit/bamtools_sort.cpp index 8d18f671..68153fa0 100644 --- a/src/toolkit/bamtools_sort.cpp +++ b/src/toolkit/bamtools_sort.cpp @@ -14,6 +14,7 @@ #include #include #include +#include using namespace BamTools; #include @@ -39,21 +40,6 @@ namespace BamTools { // ----------------------------------- // comparison objects (for sorting) - struct SortLessThanPosition { - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - if ( lhs.RefID != rhs.RefID ) - return lhs.RefID < rhs.RefID; - else - return lhs.Position < rhs.Position; - } - }; - - struct SortLessThanName { - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return lhs.Name < rhs.Name; - } - }; - } // namespace BamTools // --------------------------------------------- @@ -76,11 +62,14 @@ class SortTool::SortToolPrivate { bool HandleBufferContents(vector& buffer); bool MergeSortedRuns(void); bool WriteTempFile(const vector& buffer, const string& tempFilename); - void SortBuffer(vector& buffer); + //void SortBuffer(vector& buffer); + template + BamAlignmentBFunction& getSortingFunction(); // data members private: SortTool::SortSettings* m_settings; + BamSortCriteria m_sort; string m_tempFilenameStub; int m_numberOfRuns; string m_headerText; @@ -98,7 +87,9 @@ struct SortTool::SortSettings { bool HasMaxBufferCount; bool HasMaxBufferMemory; bool HasOutputBamFilename; - bool IsSortingByName; + bool IsSortDescending; + string SortCriteria; + bool HasSortCriteria; // filenames string InputBamFilename; @@ -114,7 +105,9 @@ struct SortTool::SortSettings { , HasMaxBufferCount(false) , HasMaxBufferMemory(false) , HasOutputBamFilename(false) - , IsSortingByName(false) + , IsSortDescending(false) + , SortCriteria("QNAME") + , HasSortCriteria(false) , InputBamFilename(Options::StandardIn()) , OutputBamFilename(Options::StandardOut()) , MaxBufferCount(SORT_DEFAULT_MAX_BUFFER_COUNT) @@ -139,7 +132,9 @@ SortTool::SortTool(void) Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut()); OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods"); - Options::AddOption("-byname", "sort by alignment name", m_settings->IsSortingByName, SortOpts); + Options::AddValueOption("-tagname", "sort by tag name", "("+BamSortCriteria::getAllowedTags()+")", "", m_settings->HasSortCriteria, m_settings->SortCriteria, SortOpts, Options::StandardOut()); + Options::AddOption("-desc", "sort values descending", m_settings->IsSortDescending, SortOpts); +// Options::AddOption("-byname", "sort by alignment name", m_settings->SortCriteria, SortOpts); OptionGroup* MemOpts = Options::CreateOptionGroup("Memory Settings"); Options::AddValueOption("-n", "count", "max number of alignments per tempfile", "", m_settings->HasMaxBufferCount, m_settings->MaxBufferCount, MemOpts, SORT_DEFAULT_MAX_BUFFER_COUNT); @@ -164,10 +159,14 @@ int SortTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); + if(!BamSortCriteria::isTagValid(m_settings->SortCriteria)){ + cerr << "bamtools sort ERROR: The tag "<SortCriteria<<" is not valid...Aborting"<Run() ) return 0; else return 1; } @@ -178,6 +177,7 @@ int SortTool::Run(int argc, char* argv[]) { // constructor SortTool::SortToolPrivate::SortToolPrivate(SortTool::SortSettings* settings) : m_settings(settings) + , m_sort(settings->SortCriteria,settings->IsSortDescending) , m_numberOfRuns(0) { // set filename stub depending on inputfile path @@ -206,9 +206,7 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { // get basic data that will be shared by all temp/output files SamHeader header = inputReader.GetHeader(); - header.SortOrder = ( m_settings->IsSortingByName - ? Constants::SAM_HD_SORTORDER_QUERYNAME - : Constants::SAM_HD_SORTORDER_COORDINATE ); + header.SortOrder = m_sort.getSamHeaderSort(); m_headerText = header.ToString(); m_references = inputReader.GetReferenceData(); @@ -219,7 +217,7 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { // if sorting by name, we need to generate full char data // so can't use GetNextAlignmentCore() - if ( m_settings->IsSortingByName ) { + if (!m_sort.isTagCoreAttribute() ) { // iterate through file while ( inputReader.GetNextAlignment(al)) { @@ -244,8 +242,9 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { buffer.push_back(al); // if buffer is full, handle contents (sort & write to temp file) - if ( buffer.size() == m_settings->MaxBufferCount ) + if ( buffer.size() == m_settings->MaxBufferCount ){ HandleBufferContents(buffer); + } } } @@ -261,7 +260,8 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { bool SortTool::SortToolPrivate::HandleBufferContents(vector& buffer ) { // do sorting - SortBuffer(buffer); + m_sort.sortBuffer(buffer.begin(),buffer.end()); + //SortBuffer(buffer); // write sorted contents to temp file, store success/fail stringstream tempStr; @@ -274,7 +274,6 @@ bool SortTool::SortToolPrivate::HandleBufferContents(vector& buffe // clear buffer contents & update run counter buffer.clear(); ++m_numberOfRuns; - // return success/fail of writing to temp file // TODO: a failure returned here is not actually caught and handled anywhere return success; @@ -292,10 +291,7 @@ bool SortTool::SortToolPrivate::MergeSortedRuns(void) { } // set sort order for merge - if ( m_settings->IsSortingByName ) - multiReader.SetSortOrder(BamMultiReader::SortedByReadName); - else - multiReader.SetSortOrder(BamMultiReader::SortedByPosition); + multiReader.SetSortOrder(m_sort); // open writer for our completely sorted output BAM file BamWriter mergedWriter; @@ -336,18 +332,9 @@ bool SortTool::SortToolPrivate::Run(void) { else return false; } - -void SortTool::SortToolPrivate::SortBuffer(vector& buffer) { - - // ** add further custom sort options later ?? ** - - // sort buffer by desired method - if ( m_settings->IsSortingByName ) - sort ( buffer.begin(), buffer.end(), SortLessThanName() ); - else - sort ( buffer.begin(), buffer.end(), SortLessThanPosition() ); -} - + + + bool SortTool::SortToolPrivate::WriteTempFile(const vector& buffer, const string& tempFilename) { diff --git a/src/toolkit/bamtools_sort.h b/src/toolkit/bamtools_sort.h index 0241b025..28352cac 100644 --- a/src/toolkit/bamtools_sort.h +++ b/src/toolkit/bamtools_sort.h @@ -12,7 +12,7 @@ #define BAMTOOLS_SORT_H #include "bamtools_tool.h" - +#include "api/BamSortCriteria.h" namespace BamTools { class SortTool : public AbstractTool { @@ -28,7 +28,7 @@ class SortTool : public AbstractTool { private: struct SortSettings; SortSettings* m_settings; - + BamSortCriteria m_sort; struct SortToolPrivate; SortToolPrivate* m_impl; };