From ee3e1014347f6abdcbb457d124cb272cc867760e Mon Sep 17 00:00:00 2001 From: riedl Date: Tue, 29 Mar 2011 09:42:08 +0200 Subject: [PATCH 1/8] Sort Criterias seperated --- src/api/BamSortCriteria.cpp | 106 ++++++++++++++++++++++++++ src/api/BamSortCriteria.cpp~ | 106 ++++++++++++++++++++++++++ src/api/BamSortCriteria.h | 123 ++++++++++++++++++++++++++++++ src/api/BamSortCriteria.h~ | 142 +++++++++++++++++++++++++++++++++++ 4 files changed, 477 insertions(+) create mode 100644 src/api/BamSortCriteria.cpp create mode 100644 src/api/BamSortCriteria.cpp~ create mode 100644 src/api/BamSortCriteria.h create mode 100644 src/api/BamSortCriteria.h~ diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp new file mode 100644 index 00000000..d03443c8 --- /dev/null +++ b/src/api/BamSortCriteria.cpp @@ -0,0 +1,106 @@ +#include "BamSortCriteria.h" +#include +#include +#include + +const string BamSortCriteria::allowedTags[4]={"QNAME","POS","AS"}; +const string BamSortCriteria::coreTags[1]={"POS"}; + + +bool BamSortCriteria::isTagCoreAttribute() { + int length = sizeof(coreTags)/sizeof(coreTags[0]); + for(int i=0;i +IBamMultiMerger* BamSortCriteria::getMergerDesc() { + if (descending) { + return new CommonMultiMerger >; + } + return new CommonMultiMerger; +} + +IBamMultiMerger* BamSortCriteria::getMerger(void ) { + if (sortCriteria=="QNAME") { + return getMergerDesc >(); + } else if (sortCriteria=="POS") { + return getMergerDesc >(); + } else if (sortCriteria=="AS") { + return getMergerDesc >(); + } else if (sortCriteria == "") { + return new UnsortedMultiMerger; + } + cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; + return 0; +} + + +void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterator end) { + /** + * It seems like this step could not be simplified for the + * ascending and descending case + */ + if (!descending) { + if (sortCriteria=="QNAME") { + sort(begin,end,SortLessThanName()); + } else if (sortCriteria=="POS") { + sort(begin,end,SortLessThanPosition()); + } else if (sortCriteria=="AS") { + sort(begin,end,SortLessThanAlignmentScore()); + } else { + cerr << "BamMultiReader ERROR: requested sort order ("<(SortLessThanName())); + } else if (sortCriteria=="POS") { + sort(begin,end,SortGreaterThanBamAlignment(SortLessThanPosition())); + } else if (sortCriteria=="AS") { + sort(begin,end,SortGreaterThanBamAlignment(SortLessThanAlignmentScore())); + } else { + cerr << "BamMultiReader ERROR: requested sort order ("< +#include +#include + +const string BamSortCriteria::allowedTags[4]={"QNAME","POS","AS","CQ_MEAN"}; +const string BamSortCriteria::coreTags[1]={"POS"}; + + +bool BamSortCriteria::isTagCoreAttribute() { + int length = sizeof(coreTags)/sizeof(coreTags[0]); + for(int i=0;i +IBamMultiMerger* BamSortCriteria::getMergerDesc() { + if (descending) { + return new CommonMultiMerger >; + } + return new CommonMultiMerger; +} + +IBamMultiMerger* BamSortCriteria::getMerger(void ) { + if (sortCriteria=="QNAME") { + return getMergerDesc >(); + } else if (sortCriteria=="POS") { + return getMergerDesc >(); + } else if (sortCriteria=="AS") { + return getMergerDesc >(); + } else if (sortCriteria == "") { + return new UnsortedMultiMerger; + } + cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; + return 0; +} + + +void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterator end) { + /** + * It seems like this step could not be simplified for the + * ascending and descending case + */ + if (!descending) { + if (sortCriteria=="QNAME") { + sort(begin,end,SortLessThanName()); + } else if (sortCriteria=="POS") { + sort(begin,end,SortLessThanPosition()); + } else if (sortCriteria=="AS") { + sort(begin,end,SortLessThanAlignmentScore()); + } else { + cerr << "BamMultiReader ERROR: requested sort order ("<(SortLessThanName())); + } else if (sortCriteria=="POS") { + sort(begin,end,SortGreaterThanBamAlignment(SortLessThanPosition())); + } else if (sortCriteria=="AS") { + sort(begin,end,SortGreaterThanBamAlignment(SortLessThanAlignmentScore())); + } else { + cerr << "BamMultiReader ERROR: requested sort order ("< +#include +#include + +using namespace std; +using namespace BamTools; +using namespace BamTools::Internal; + +typedef binary_function BamAlignmentBFunction; +typedef binary_function ReadAlignmentBFunction; +typedef vector< BamAlignment >::iterator BamAlignmentIterator; + +class BamSortCriteria{ +private: + string sortCriteria; + bool descending; + template + IBamMultiMerger* getMergerDesc(); + static const string allowedTags[]; + static const string coreTags[]; + string getAllowedTags(); +public: + + BamSortCriteria():sortCriteria("QNAME"),descending(false){} + BamSortCriteria(string sortCriteria, bool descending):sortCriteria(sortCriteria),descending(descending){ + if(!isTagValid()){ + cerr << "BamSortCriteria ERROR: Requested sort order ("<{ + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + + if ( lhs.RefID != rhs.RefID ) + return lhs.RefID < rhs.RefID; + else + return lhs.Position < rhs.Position; + } + }; + + // QNAME + class SortLessThanName : public binary_function { + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return lhs.Name < rhs.Name; + } + }; + + // AS Alignment Score from BFAST + class SortLessThanAlignmentScore : public BamAlignmentBFunction{//binary_function{ + public: + bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { + uint32_t lh, rh; + lhs.GetTag("AS",lh); + rhs.GetTag("AS",rh); + return lh + class SortGreaterThanReaderAlignment : public binary_function{ + public: + bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs) { + F f; + return !f(lhs,rhs); + } + }; + + + + template + class SortLessReaderAlignment: public binary_function{ + public: + bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ + T t; + const BamAlignment l= *lhs.second; + const BamAlignment r= *rhs.second; + return t(l,r); + } + }; + + template + class SortGreaterThanBamAlignment{ + private: + F func; + public: + SortGreaterThanBamAlignment(F f):func(f){} + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return !func(lhs,rhs); + } + }; + +#endif // BAMSORTCRITERIA_H diff --git a/src/api/BamSortCriteria.h~ b/src/api/BamSortCriteria.h~ new file mode 100644 index 00000000..70820d26 --- /dev/null +++ b/src/api/BamSortCriteria.h~ @@ -0,0 +1,142 @@ +#ifndef BAMSORTCRITERIA_H +#define BAMSORTCRITERIA_H +#include +#include +#include + +using namespace std; +using namespace BamTools; +using namespace BamTools::Internal; + +typedef binary_function BamAlignmentBFunction; +typedef binary_function ReadAlignmentBFunction; +typedef vector< BamAlignment >::iterator BamAlignmentIterator; + +class BamSortCriteria{ +private: + string sortCriteria; + bool descending; + template + IBamMultiMerger* getMergerDesc(); + static const string allowedTags[]; + static const string coreTags[]; + string getAllowedTags(); +public: + + BamSortCriteria():sortCriteria("QNAME"),descending(false){} + BamSortCriteria(string sortCriteria, bool descending):sortCriteria(sortCriteria),descending(descending){ + if(!isTagValid()){ + cerr << "BamSortCriteria ERROR: Requested sort order ("<{ + public: + + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return false; + } + + private: + double computeMean(const string& s){ + double mean =0.0; + int size = s.size(); + + for(int i=0;i{ + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + + if ( lhs.RefID != rhs.RefID ) + return lhs.RefID < rhs.RefID; + else + return lhs.Position < rhs.Position; + } + }; + + // QNAME + class SortLessThanName : public binary_function { + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return lhs.Name < rhs.Name; + } + }; + + // AS Alignment Score from BFAST + class SortLessThanAlignmentScore : public BamAlignmentBFunction{//binary_function{ + public: + bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { + uint32_t lh, rh; + lhs.GetTag("AS",lh); + rhs.GetTag("AS",rh); + return lh + class SortGreaterThanReaderAlignment : public binary_function{ + public: + bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs) { + F f; + return !f(lhs,rhs); + } + }; + + + + template + class SortLessReaderAlignment: public binary_function{ + public: + bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ + T t; + const BamAlignment l= *lhs.second; + const BamAlignment r= *rhs.second; + return t(l,r); + } + }; + + template + class SortGreaterThanBamAlignment{ + private: + F func; + public: + SortGreaterThanBamAlignment(F f):func(f){} + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return !func(lhs,rhs); + } + }; + +#endif // BAMSORTCRITERIA_H From ae24738885a19bcc854721547ff2fa13f161a872 Mon Sep 17 00:00:00 2001 From: riedl Date: Tue, 29 Mar 2011 10:11:31 +0200 Subject: [PATCH 2/8] modified: api/BamMultiReader.cpp modified: api/BamMultiReader.h deleted: api/BamSortCriteria.cpp~ deleted: api/BamSortCriteria.h~ modified: api/CMakeLists.txt modified: api/SamConstants.h modified: api/internal/BamMultiMerger_p.h modified: api/internal/BamMultiReader_p.cpp modified: api/internal/BamMultiReader_p.h modified: toolkit/bamtools_convert.h modified: toolkit/bamtools_sort.cpp modified: toolkit/bamtools_sort.h --- src/api/BamMultiReader.cpp | 4 +- src/api/BamMultiReader.h | 10 +- src/api/BamSortCriteria.cpp~ | 106 ------------------ src/api/BamSortCriteria.h~ | 142 ------------------------ src/api/CMakeLists.txt | 2 + src/api/SamConstants.h | 1 + src/api/internal/BamMultiMerger_p.h | 151 ++++++-------------------- src/api/internal/BamMultiReader_p.cpp | 30 ++--- src/api/internal/BamMultiReader_p.h | 9 +- src/toolkit/bamtools_convert.h | 2 +- src/toolkit/bamtools_sort.cpp | 55 ++++------ src/toolkit/bamtools_sort.h | 4 +- 12 files changed, 85 insertions(+), 431 deletions(-) delete mode 100644 src/api/BamSortCriteria.cpp~ delete mode 100644 src/api/BamSortCriteria.h~ diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp index 06055df3..f40dc11a 100644 --- a/src/api/BamMultiReader.cpp +++ b/src/api/BamMultiReader.cpp @@ -391,6 +391,6 @@ bool BamMultiReader::SetRegion(const int& leftRefID, \param order expected sort order */ -void BamMultiReader::SetSortOrder(const SortOrder& order) { - d->SetSortOrder(order); +void BamMultiReader::SetSortOrder(const BamSortCriteria& sort) { + d->SetSortOrder(sort); } diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h index cc49ec8e..fd948260 100644 --- a/src/api/BamMultiReader.h +++ b/src/api/BamMultiReader.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -27,10 +28,11 @@ namespace Internal { class API_EXPORT BamMultiReader { public: - enum SortOrder { SortedByPosition = 0 - , SortedByReadName + /*enum SortOrder { SortedByPosition = 0 + , SortedByReadName =1 + , SortedByAlignmentScore=2 , Unsorted - }; + };*/ // constructor / destructor public: @@ -78,7 +80,7 @@ class API_EXPORT BamMultiReader { bool GetNextAlignmentCore(BamAlignment& alignment); // sets the expected sorting order for reading across multiple BAM files - void SetSortOrder(const SortOrder& order); + void SetSortOrder(const BamSortCriteria& sort); // ---------------------- // access auxiliary data diff --git a/src/api/BamSortCriteria.cpp~ b/src/api/BamSortCriteria.cpp~ deleted file mode 100644 index 7aea77ec..00000000 --- a/src/api/BamSortCriteria.cpp~ +++ /dev/null @@ -1,106 +0,0 @@ -#include "BamSortCriteria.h" -#include -#include -#include - -const string BamSortCriteria::allowedTags[4]={"QNAME","POS","AS","CQ_MEAN"}; -const string BamSortCriteria::coreTags[1]={"POS"}; - - -bool BamSortCriteria::isTagCoreAttribute() { - int length = sizeof(coreTags)/sizeof(coreTags[0]); - for(int i=0;i -IBamMultiMerger* BamSortCriteria::getMergerDesc() { - if (descending) { - return new CommonMultiMerger >; - } - return new CommonMultiMerger; -} - -IBamMultiMerger* BamSortCriteria::getMerger(void ) { - if (sortCriteria=="QNAME") { - return getMergerDesc >(); - } else if (sortCriteria=="POS") { - return getMergerDesc >(); - } else if (sortCriteria=="AS") { - return getMergerDesc >(); - } else if (sortCriteria == "") { - return new UnsortedMultiMerger; - } - cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; - return 0; -} - - -void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterator end) { - /** - * It seems like this step could not be simplified for the - * ascending and descending case - */ - if (!descending) { - if (sortCriteria=="QNAME") { - sort(begin,end,SortLessThanName()); - } else if (sortCriteria=="POS") { - sort(begin,end,SortLessThanPosition()); - } else if (sortCriteria=="AS") { - sort(begin,end,SortLessThanAlignmentScore()); - } else { - cerr << "BamMultiReader ERROR: requested sort order ("<(SortLessThanName())); - } else if (sortCriteria=="POS") { - sort(begin,end,SortGreaterThanBamAlignment(SortLessThanPosition())); - } else if (sortCriteria=="AS") { - sort(begin,end,SortGreaterThanBamAlignment(SortLessThanAlignmentScore())); - } else { - cerr << "BamMultiReader ERROR: requested sort order ("< -#include -#include - -using namespace std; -using namespace BamTools; -using namespace BamTools::Internal; - -typedef binary_function BamAlignmentBFunction; -typedef binary_function ReadAlignmentBFunction; -typedef vector< BamAlignment >::iterator BamAlignmentIterator; - -class BamSortCriteria{ -private: - string sortCriteria; - bool descending; - template - IBamMultiMerger* getMergerDesc(); - static const string allowedTags[]; - static const string coreTags[]; - string getAllowedTags(); -public: - - BamSortCriteria():sortCriteria("QNAME"),descending(false){} - BamSortCriteria(string sortCriteria, bool descending):sortCriteria(sortCriteria),descending(descending){ - if(!isTagValid()){ - cerr << "BamSortCriteria ERROR: Requested sort order ("<{ - public: - - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return false; - } - - private: - double computeMean(const string& s){ - double mean =0.0; - int size = s.size(); - - for(int i=0;i{ - public: - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - - if ( lhs.RefID != rhs.RefID ) - return lhs.RefID < rhs.RefID; - else - return lhs.Position < rhs.Position; - } - }; - - // QNAME - class SortLessThanName : public binary_function { - public: - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return lhs.Name < rhs.Name; - } - }; - - // AS Alignment Score from BFAST - class SortLessThanAlignmentScore : public BamAlignmentBFunction{//binary_function{ - public: - bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { - uint32_t lh, rh; - lhs.GetTag("AS",lh); - rhs.GetTag("AS",rh); - return lh - class SortGreaterThanReaderAlignment : public binary_function{ - public: - bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs) { - F f; - return !f(lhs,rhs); - } - }; - - - - template - class SortLessReaderAlignment: public binary_function{ - public: - bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ - T t; - const BamAlignment l= *lhs.second; - const BamAlignment r= *rhs.second; - return t(l,r); - } - }; - - template - class SortGreaterThanBamAlignment{ - private: - F func; - public: - SortGreaterThanBamAlignment(F f):func(f){} - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return !func(lhs,rhs); - } - }; - -#endif // BAMSORTCRITERIA_H diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt index 57efba25..1cd9ca95 100644 --- a/src/api/CMakeLists.txt +++ b/src/api/CMakeLists.txt @@ -18,6 +18,7 @@ set( BamToolsAPISources BamMultiReader.cpp BamReader.cpp BamWriter.cpp + BamSortCriteria.cpp SamHeader.cpp SamReadGroup.cpp SamReadGroupDictionary.cpp @@ -66,6 +67,7 @@ ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir}) ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamSortCriteria.h ${ApiIncludeDir}) ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir}) ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir}) ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir}) diff --git a/src/api/SamConstants.h b/src/api/SamConstants.h index 6412b3d0..263c9183 100644 --- a/src/api/SamConstants.h +++ b/src/api/SamConstants.h @@ -63,6 +63,7 @@ const std::string SAM_CO_BEGIN_TOKEN = "@CO"; // HD:SO values const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate"; const std::string SAM_HD_SORTORDER_QUERYNAME = "queryname"; +const std::string SAM_HD_SORTORDER_ALIGNMENTPOSTION = "alignmentposition"; const std::string SAM_HD_SORTORDER_UNSORTED = "unsorted"; // HD:GO values diff --git a/src/api/internal/BamMultiMerger_p.h b/src/api/internal/BamMultiMerger_p.h index ae67eea2..92038f69 100644 --- a/src/api/internal/BamMultiMerger_p.h +++ b/src/api/internal/BamMultiMerger_p.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ typedef std::pair ReaderAlignment; class IBamMultiMerger { public: - IBamMultiMerger(void) { } + IBamMultiMerger(){ } virtual ~IBamMultiMerger(void) { } public: @@ -49,42 +50,15 @@ class IBamMultiMerger { virtual void Remove(BamReader* reader) =0; virtual int Size(void) const =0; virtual ReaderAlignment TakeFirst(void) =0; -}; - -// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position) -class PositionMultiMerger : public IBamMultiMerger { - - public: - PositionMultiMerger(void) : IBamMultiMerger() { } - ~PositionMultiMerger(void) { } - - public: - void Add(const ReaderAlignment& value); - void Clear(void); - const ReaderAlignment& First(void) const; - bool IsEmpty(void) const; - void Remove(BamReader* reader); - int Size(void) const; - ReaderAlignment TakeFirst(void); - - private: - typedef std::pair KeyType; - typedef ReaderAlignment ValueType; - typedef std::pair ElementType; - - typedef std::multimap ContainerType; - typedef ContainerType::iterator DataIterator; - typedef ContainerType::const_iterator DataConstIterator; - ContainerType m_data; }; -// IBamMultiMerger implementation - sorted on BamAlignment: Name -class ReadNameMultiMerger : public IBamMultiMerger { - public: - ReadNameMultiMerger(void) : IBamMultiMerger() { } - ~ReadNameMultiMerger(void) { } +template +class CommonMultiMerger : public IBamMultiMerger{ + public: + CommonMultiMerger() { } + ~CommonMultiMerger(void) { } public: void Add(const ReaderAlignment& value); @@ -94,16 +68,11 @@ class ReadNameMultiMerger : public IBamMultiMerger { void Remove(BamReader* reader); int Size(void) const; ReaderAlignment TakeFirst(void); - private: - typedef std::string KeyType; typedef ReaderAlignment ValueType; - typedef std::pair ElementType; - - typedef std::multimap ContainerType; - typedef ContainerType::iterator DataIterator; - typedef ContainerType::const_iterator DataConstIterator; - + typedef std::multiset ContainerType; + typedef typename ContainerType::iterator DataIterator; + typedef typename ContainerType::const_iterator DataConstIterator; ContainerType m_data; }; @@ -132,40 +101,40 @@ class UnsortedMultiMerger : public IBamMultiMerger { ContainerType m_data; }; -// ------------------------------------------ -// PositionMultiMerger implementation -inline void PositionMultiMerger::Add(const ReaderAlignment& value) { - const KeyType key( value.second->RefID, value.second->Position ); - m_data.insert( ElementType(key, value) ); +//--------------------------------------------------------------------------- +// CommonMultiMerger implementation +template +inline void CommonMultiMerger::Add(const ReaderAlignment& value) { + m_data.insert( value); } -inline void PositionMultiMerger::Clear(void) { +template +inline void CommonMultiMerger::Clear(void) { m_data.clear(); } -inline const ReaderAlignment& PositionMultiMerger::First(void) const { - const ElementType& entry = (*m_data.begin()); - return entry.second; +template +inline const ReaderAlignment& CommonMultiMerger::First(void) const { + const ValueType& entry = (*m_data.begin()); + return entry; } -inline bool PositionMultiMerger::IsEmpty(void) const { +template +inline bool CommonMultiMerger::IsEmpty(void) const { return m_data.empty(); } - -inline void PositionMultiMerger::Remove(BamReader* reader) { - +template +inline void CommonMultiMerger::Remove(BamReader* reader) { if ( reader == 0 ) return; const std::string filenameToRemove = reader->GetFilename(); - // iterate over readers in cache DataIterator dataIter = m_data.begin(); DataIterator dataEnd = m_data.end(); for ( ; dataIter != dataEnd; ++dataIter ) { - const ValueType& entry = (*dataIter).second; + const ValueType& entry = (*dataIter); const BamReader* entryReader = entry.first; if ( entryReader == 0 ) continue; - // remove iterator on match if ( entryReader->GetFilename() == filenameToRemove ) { m_data.erase(dataIter); @@ -173,74 +142,20 @@ inline void PositionMultiMerger::Remove(BamReader* reader) { } } } - -inline int PositionMultiMerger::Size(void) const { +template +inline int CommonMultiMerger::Size(void) const { return m_data.size(); } - -inline ReaderAlignment PositionMultiMerger::TakeFirst(void) { - DataIterator first = m_data.begin(); - ReaderAlignment next = (*first).second; - m_data.erase(first); - return next; -} - -// ------------------------------------------ -// ReadNameMultiMerger implementation - -inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) { - const KeyType key(value.second->Name); - m_data.insert( ElementType(key, value) ); -} - -inline void ReadNameMultiMerger::Clear(void) { - m_data.clear(); -} - -inline const ReaderAlignment& ReadNameMultiMerger::First(void) const { - const ElementType& entry = (*m_data.begin()); - return entry.second; -} - -inline bool ReadNameMultiMerger::IsEmpty(void) const { - return m_data.empty(); -} - -inline void ReadNameMultiMerger::Remove(BamReader* reader) { - - if ( reader == 0 ) return; - const std::string filenameToRemove = reader->GetFilename(); - - // iterate over readers in cache - DataIterator dataIter = m_data.begin(); - DataIterator dataEnd = m_data.end(); - for ( ; dataIter != dataEnd; ++dataIter ) { - const ValueType& entry = (*dataIter).second; - const BamReader* entryReader = entry.first; - if ( entryReader == 0 ) continue; - - // remove iterator on match - if ( entryReader->GetFilename() == filenameToRemove ) { - m_data.erase(dataIter); - return; - } - } - -} - -inline int ReadNameMultiMerger::Size(void) const { - return m_data.size(); -} - -inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) { +template +inline ReaderAlignment CommonMultiMerger::TakeFirst(void) { DataIterator first = m_data.begin(); - ReaderAlignment next = (*first).second; + ReaderAlignment next = (*first); m_data.erase(first); return next; } -// ------------------------------------------ -// UnsortedMultiMerger implementation +//---------------------------------------------------------------- +// MultiMerger for Unsorted Files inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) { m_data.push_back(value); diff --git a/src/api/internal/BamMultiReader_p.cpp b/src/api/internal/BamMultiReader_p.cpp index 583085c7..690d10fa 100644 --- a/src/api/internal/BamMultiReader_p.cpp +++ b/src/api/internal/BamMultiReader_p.cpp @@ -12,6 +12,7 @@ #include #include #include +#include using namespace BamTools; using namespace BamTools::Internal; @@ -26,7 +27,6 @@ using namespace std; BamMultiReaderPrivate::BamMultiReaderPrivate(void) : m_alignments(0) , m_isCoreMode(false) - , m_sortOrder(BamMultiReader::SortedByPosition) { } // dtor @@ -97,7 +97,7 @@ void BamMultiReaderPrivate::CloseFiles(const vector& filenames) { // make sure alignment cache is cleared if all readers are now closed if ( m_readers.empty() && m_alignments != 0 ) m_alignments->Clear(); -} +}// // creates index files for BAM files that don't have them bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) { @@ -119,16 +119,6 @@ bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) { return result; } -IBamMultiMerger* BamMultiReaderPrivate::CreateMergerForCurrentSortOrder(void) const { - switch ( m_sortOrder ) { - case ( BamMultiReader::SortedByPosition ) : return new PositionMultiMerger; - case ( BamMultiReader::SortedByReadName ) : return new ReadNameMultiMerger; - case ( BamMultiReader::Unsorted ) : return new UnsortedMultiMerger; - default : - cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; - return 0; - } -} const string BamMultiReaderPrivate::ExtractReadGroup(const string& headerLine) const { @@ -450,7 +440,7 @@ bool BamMultiReaderPrivate::Open(const vector& filenames) { // create alignment cache if neccessary if ( m_alignments == 0 ) { - m_alignments = CreateMergerForCurrentSortOrder(); + m_alignments = m_sort.getMerger();//CreateMergerForCurrentSortOrder(); if ( m_alignments == 0 ) return false; } @@ -603,7 +593,7 @@ bool BamMultiReaderPrivate::RewindReaders(void) { void BamMultiReaderPrivate::SaveNextAlignment(BamReader* reader, BamAlignment* alignment) { // must be in core mode && NOT sorting by read name to call GNACore() - if ( m_isCoreMode && m_sortOrder != BamMultiReader::SortedByReadName ) { + if ( m_isCoreMode && m_sort.isTagCoreAttribute()) { if ( reader->GetNextAlignmentCore(*alignment) ) m_alignments->Add( make_pair(reader, alignment) ); } @@ -657,16 +647,18 @@ bool BamMultiReaderPrivate::SetRegion(const BamRegion& region) { return true; } -void BamMultiReaderPrivate::SetSortOrder(const BamMultiReader::SortOrder& order) { +void BamMultiReaderPrivate::SetSortOrder(const BamSortCriteria& sort) { // skip if no change needed - if ( m_sortOrder == order ) return; - + //if ( (m_sort.getSortCriteria() == sort.getSortCriteria) && + // (m_sort.isDescending() == sort.isDescending())) return; + + //m_sort = sort; // set new sort order - m_sortOrder = order; + m_sort = sort; // create new alignment cache based on sort order - IBamMultiMerger* newAlignmentCache = CreateMergerForCurrentSortOrder(); + IBamMultiMerger* newAlignmentCache = m_sort.getMerger();//CreateMergerForCurrentSortOrder(); if ( newAlignmentCache == 0 ) return; // print error? // copy old cache contents to new cache diff --git a/src/api/internal/BamMultiReader_p.h b/src/api/internal/BamMultiReader_p.h index b34fb0c5..d240bda1 100644 --- a/src/api/internal/BamMultiReader_p.h +++ b/src/api/internal/BamMultiReader_p.h @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -57,7 +58,7 @@ class BamMultiReaderPrivate { bool GetNextAlignment(BamAlignment& al); bool GetNextAlignmentCore(BamAlignment& al); bool HasOpenReaders(void); - void SetSortOrder(const BamMultiReader::SortOrder& order); + void SetSortOrder(const BamSortCriteria& sort); // access auxiliary data SamHeader GetHeader(void) const; @@ -75,7 +76,9 @@ class BamMultiReaderPrivate { // 'internal' methods public: - IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const; + template + IBamMultiMerger* CreateMergerForCurrentSortOrderDesc(bool desc) const; + //IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const; const std::string ExtractReadGroup(const std::string& headerLine) const; bool HasAlignmentData(void) const; bool LoadNextAlignment(BamAlignment& al); @@ -93,7 +96,7 @@ class BamMultiReaderPrivate { IBamMultiMerger* m_alignments; bool m_isCoreMode; - BamMultiReader::SortOrder m_sortOrder; + BamSortCriteria m_sort; }; } // namespace Internal diff --git a/src/toolkit/bamtools_convert.h b/src/toolkit/bamtools_convert.h index 8dd68572..f711f03c 100644 --- a/src/toolkit/bamtools_convert.h +++ b/src/toolkit/bamtools_convert.h @@ -35,4 +35,4 @@ class ConvertTool : public AbstractTool { } // namespace BamTools -#endif // BAMTOOLS_CONVERT_H \ No newline at end of file +#endif // BAMTOOLS_CONVERT_H diff --git a/src/toolkit/bamtools_sort.cpp b/src/toolkit/bamtools_sort.cpp index 8d18f671..174ab9d6 100644 --- a/src/toolkit/bamtools_sort.cpp +++ b/src/toolkit/bamtools_sort.cpp @@ -14,6 +14,7 @@ #include #include #include +#include using namespace BamTools; #include @@ -39,21 +40,6 @@ namespace BamTools { // ----------------------------------- // comparison objects (for sorting) - struct SortLessThanPosition { - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - if ( lhs.RefID != rhs.RefID ) - return lhs.RefID < rhs.RefID; - else - return lhs.Position < rhs.Position; - } - }; - - struct SortLessThanName { - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return lhs.Name < rhs.Name; - } - }; - } // namespace BamTools // --------------------------------------------- @@ -77,10 +63,13 @@ class SortTool::SortToolPrivate { bool MergeSortedRuns(void); bool WriteTempFile(const vector& buffer, const string& tempFilename); void SortBuffer(vector& buffer); + template + BamAlignmentBFunction& getSortingFunction(); // data members private: SortTool::SortSettings* m_settings; + BamSortCriteria m_sort; string m_tempFilenameStub; int m_numberOfRuns; string m_headerText; @@ -98,7 +87,9 @@ struct SortTool::SortSettings { bool HasMaxBufferCount; bool HasMaxBufferMemory; bool HasOutputBamFilename; - bool IsSortingByName; + bool IsSortDescending; + string SortCriteria; + bool HasSortCriteria; // filenames string InputBamFilename; @@ -114,7 +105,9 @@ struct SortTool::SortSettings { , HasMaxBufferCount(false) , HasMaxBufferMemory(false) , HasOutputBamFilename(false) - , IsSortingByName(false) + , IsSortDescending(false) + , SortCriteria("QNAME") + , HasSortCriteria(false) , InputBamFilename(Options::StandardIn()) , OutputBamFilename(Options::StandardOut()) , MaxBufferCount(SORT_DEFAULT_MAX_BUFFER_COUNT) @@ -139,7 +132,9 @@ SortTool::SortTool(void) Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut()); OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods"); - Options::AddOption("-byname", "sort by alignment name", m_settings->IsSortingByName, SortOpts); + Options::AddValueOption("-tagname", "sort by tag name", "(QNAME, AS, QPOS)", "", m_settings->HasSortCriteria, m_settings->SortCriteria, SortOpts, Options::StandardOut()); + Options::AddOption("-desc", "sort values descending", m_settings->IsSortDescending, SortOpts); +// Options::AddOption("-byname", "sort by alignment name", m_settings->SortCriteria, SortOpts); OptionGroup* MemOpts = Options::CreateOptionGroup("Memory Settings"); Options::AddValueOption("-n", "count", "max number of alignments per tempfile", "", m_settings->HasMaxBufferCount, m_settings->MaxBufferCount, MemOpts, SORT_DEFAULT_MAX_BUFFER_COUNT); @@ -178,6 +173,7 @@ int SortTool::Run(int argc, char* argv[]) { // constructor SortTool::SortToolPrivate::SortToolPrivate(SortTool::SortSettings* settings) : m_settings(settings) + , m_sort(settings->SortCriteria,settings->IsSortDescending) , m_numberOfRuns(0) { // set filename stub depending on inputfile path @@ -206,9 +202,7 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { // get basic data that will be shared by all temp/output files SamHeader header = inputReader.GetHeader(); - header.SortOrder = ( m_settings->IsSortingByName - ? Constants::SAM_HD_SORTORDER_QUERYNAME - : Constants::SAM_HD_SORTORDER_COORDINATE ); + header.SortOrder = m_sort.getSamHeaderSort(); m_headerText = header.ToString(); m_references = inputReader.GetReferenceData(); @@ -219,7 +213,7 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { // if sorting by name, we need to generate full char data // so can't use GetNextAlignmentCore() - if ( m_settings->IsSortingByName ) { + if (!m_sort.isTagCoreAttribute() ) { // iterate through file while ( inputReader.GetNextAlignment(al)) { @@ -292,10 +286,7 @@ bool SortTool::SortToolPrivate::MergeSortedRuns(void) { } // set sort order for merge - if ( m_settings->IsSortingByName ) - multiReader.SetSortOrder(BamMultiReader::SortedByReadName); - else - multiReader.SetSortOrder(BamMultiReader::SortedByPosition); + multiReader.SetSortOrder(m_sort); // open writer for our completely sorted output BAM file BamWriter mergedWriter; @@ -336,16 +327,12 @@ bool SortTool::SortToolPrivate::Run(void) { else return false; } - + + + void SortTool::SortToolPrivate::SortBuffer(vector& buffer) { - - // ** add further custom sort options later ?? ** - // sort buffer by desired method - if ( m_settings->IsSortingByName ) - sort ( buffer.begin(), buffer.end(), SortLessThanName() ); - else - sort ( buffer.begin(), buffer.end(), SortLessThanPosition() ); + m_sort.sortBuffer(buffer.begin(),buffer.end()); } diff --git a/src/toolkit/bamtools_sort.h b/src/toolkit/bamtools_sort.h index 0241b025..28352cac 100644 --- a/src/toolkit/bamtools_sort.h +++ b/src/toolkit/bamtools_sort.h @@ -12,7 +12,7 @@ #define BAMTOOLS_SORT_H #include "bamtools_tool.h" - +#include "api/BamSortCriteria.h" namespace BamTools { class SortTool : public AbstractTool { @@ -28,7 +28,7 @@ class SortTool : public AbstractTool { private: struct SortSettings; SortSettings* m_settings; - + BamSortCriteria m_sort; struct SortToolPrivate; SortToolPrivate* m_impl; }; From 68a8086e4ae94e10d70cb192ec26f8df2b44fe30 Mon Sep 17 00:00:00 2001 From: riedl Date: Tue, 29 Mar 2011 11:10:58 +0200 Subject: [PATCH 3/8] Rename ALIGNMENTPOSITION to ALIGNMENTSCORE wihtin SamConstant.h and BamSortOrder.h --- src/api/BamSortCriteria.cpp | 2 +- src/api/SamConstants.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp index d03443c8..25d4d033 100644 --- a/src/api/BamSortCriteria.cpp +++ b/src/api/BamSortCriteria.cpp @@ -98,7 +98,7 @@ string BamSortCriteria::getSamHeaderSort(){ }else if(sortCriteria =="POS"){ return Constants::SAM_HD_SORTORDER_COORDINATE; }else if(sortCriteria =="AS"){ - return Constants::SAM_HD_SORTORDER_ALIGNMENTPOSTION; + return Constants::SAM_HD_SORTORDER_ALIGNMENTSCORE; } cerr<<"bamtools sort ERROR: Sort criteria " << sortCriteria <<" could not be found"< Date: Wed, 30 Mar 2011 09:12:52 +0200 Subject: [PATCH 4/8] Segmentation fault solved. Reason: sort needs a StrictWeakOdering (if x==x then false). Therefore one sorting criteria can not be flipped to offer sorting in the other direction --- src/api/BamSortCriteria.cpp | 44 ++++++++++++++-------- src/api/BamSortCriteria.h | 71 ++++++++++++++++++++++++++--------- src/toolkit/bamtools_sort.cpp | 15 ++++---- 3 files changed, 90 insertions(+), 40 deletions(-) diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp index 25d4d033..23a4501f 100644 --- a/src/api/BamSortCriteria.cpp +++ b/src/api/BamSortCriteria.cpp @@ -3,7 +3,7 @@ #include #include -const string BamSortCriteria::allowedTags[4]={"QNAME","POS","AS"}; +const string BamSortCriteria::allowedTags[3]={"QNAME","POS","AS"}; const string BamSortCriteria::coreTags[1]={"POS"}; @@ -38,24 +38,37 @@ bool BamSortCriteria::isTagValid() { return false; } - +/* template IBamMultiMerger* BamSortCriteria::getMergerDesc() { if (descending) { return new CommonMultiMerger >; } return new CommonMultiMerger; -} +}*/ IBamMultiMerger* BamSortCriteria::getMerger(void ) { - if (sortCriteria=="QNAME") { - return getMergerDesc >(); - } else if (sortCriteria=="POS") { - return getMergerDesc >(); - } else if (sortCriteria=="AS") { - return getMergerDesc >(); - } else if (sortCriteria == "") { - return new UnsortedMultiMerger; + + if(descending){ + if (sortCriteria=="QNAME") { + return new CommonMultiMerger >(); + } else if (sortCriteria=="POS") { + return new CommonMultiMerger >(); + } else if (sortCriteria=="AS") { + return new CommonMultiMerger >(); + } else if (sortCriteria == "") { + return new UnsortedMultiMerger; + } + }else{ + if (sortCriteria=="QNAME") { + return new CommonMultiMerger >(); + } else if (sortCriteria=="POS") { + return new CommonMultiMerger >(); + } else if (sortCriteria=="AS") { + return new CommonMultiMerger >(); + } else if (sortCriteria == "") { + return new UnsortedMultiMerger; + } } cerr << "BamMultiReader ERROR: requested sort order is unknown" << endl; return 0; @@ -67,7 +80,7 @@ void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterato * It seems like this step could not be simplified for the * ascending and descending case */ - if (!descending) { + if (!descending ) { if (sortCriteria=="QNAME") { sort(begin,end,SortLessThanName()); } else if (sortCriteria=="POS") { @@ -77,13 +90,14 @@ void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterato } else { cerr << "BamMultiReader ERROR: requested sort order ("<(SortLessThanName())); + sort(begin,end,SortGreaterThanName()); } else if (sortCriteria=="POS") { - sort(begin,end,SortGreaterThanBamAlignment(SortLessThanPosition())); + sort(begin,end,SortGreaterThanPosition()); } else if (sortCriteria=="AS") { - sort(begin,end,SortGreaterThanBamAlignment(SortLessThanAlignmentScore())); + sort(begin,end,SortGreaterThanAlignmentScore()); } else { cerr << "BamMultiReader ERROR: requested sort order ("<{ + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + + if ( lhs.RefID != rhs.RefID ) + return lhs.RefID > rhs.RefID; + else + return lhs.Position > rhs.Position; + } + }; + // QNAME class SortLessThanName : public binary_function { public: @@ -69,15 +80,32 @@ class SortLessThanPosition : public binary_function { + public: + bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + return lhs.Name > rhs.Name; + } + }; // AS Alignment Score from BFAST class SortLessThanAlignmentScore : public BamAlignmentBFunction{//binary_function{ public: bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { - uint32_t lh, rh; - lhs.GetTag("AS",lh); - rhs.GetTag("AS",rh); - return lh{ + public: + bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { + uint32_t lh, rh; + lhs.GetTag("AS",lh); + rhs.GetTag("AS",rh); + return lh > rh; } }; @@ -87,18 +115,26 @@ class SortLessThanPosition : public binary_function + template + class SortReaderAlignment: public binary_function{ + public: + bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ + T t; + const BamAlignment l= *lhs.second; + const BamAlignment r= *rhs.second; + return t(l,r); + } + }; + /*template class SortGreaterThanReaderAlignment : public binary_function{ public: bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs) { F f; return !f(lhs,rhs); } - }; - - + };*/ - template + /*template class SortLessReaderAlignment: public binary_function{ public: bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ @@ -107,17 +143,16 @@ class SortLessThanPosition : public binary_function class SortGreaterThanBamAlignment{ - private: - F func; public: - SortGreaterThanBamAlignment(F f):func(f){} + SortGreaterThanBamAlignment(){} bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { + F func; return !func(lhs,rhs); } - }; + };*/ #endif // BAMSORTCRITERIA_H diff --git a/src/toolkit/bamtools_sort.cpp b/src/toolkit/bamtools_sort.cpp index 174ab9d6..d27d7437 100644 --- a/src/toolkit/bamtools_sort.cpp +++ b/src/toolkit/bamtools_sort.cpp @@ -62,7 +62,7 @@ class SortTool::SortToolPrivate { bool HandleBufferContents(vector& buffer); bool MergeSortedRuns(void); bool WriteTempFile(const vector& buffer, const string& tempFilename); - void SortBuffer(vector& buffer); + //void SortBuffer(vector& buffer); template BamAlignmentBFunction& getSortingFunction(); @@ -132,7 +132,7 @@ SortTool::SortTool(void) Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputBamFilename, IO_Opts, Options::StandardOut()); OptionGroup* SortOpts = Options::CreateOptionGroup("Sorting Methods"); - Options::AddValueOption("-tagname", "sort by tag name", "(QNAME, AS, QPOS)", "", m_settings->HasSortCriteria, m_settings->SortCriteria, SortOpts, Options::StandardOut()); + Options::AddValueOption("-tagname", "sort by tag name", "("+BamSortCriteria::getAllowedTags()+")", "", m_settings->HasSortCriteria, m_settings->SortCriteria, SortOpts, Options::StandardOut()); Options::AddOption("-desc", "sort values descending", m_settings->IsSortDescending, SortOpts); // Options::AddOption("-byname", "sort by alignment name", m_settings->SortCriteria, SortOpts); @@ -238,8 +238,9 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { buffer.push_back(al); // if buffer is full, handle contents (sort & write to temp file) - if ( buffer.size() == m_settings->MaxBufferCount ) + if ( buffer.size() == m_settings->MaxBufferCount ){ HandleBufferContents(buffer); + } } } @@ -255,7 +256,8 @@ bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { bool SortTool::SortToolPrivate::HandleBufferContents(vector& buffer ) { // do sorting - SortBuffer(buffer); + m_sort.sortBuffer(buffer.begin(),buffer.end()); + //SortBuffer(buffer); // write sorted contents to temp file, store success/fail stringstream tempStr; @@ -268,7 +270,6 @@ bool SortTool::SortToolPrivate::HandleBufferContents(vector& buffe // clear buffer contents & update run counter buffer.clear(); ++m_numberOfRuns; - // return success/fail of writing to temp file // TODO: a failure returned here is not actually caught and handled anywhere return success; @@ -329,11 +330,11 @@ bool SortTool::SortToolPrivate::Run(void) { } - +/* void SortTool::SortToolPrivate::SortBuffer(vector& buffer) { // sort buffer by desired method m_sort.sortBuffer(buffer.begin(),buffer.end()); -} +}*/ bool SortTool::SortToolPrivate::WriteTempFile(const vector& buffer, const string& tempFilename) { From b9e2878920253dc01cbe7117664d8ea36a31f045 Mon Sep 17 00:00:00 2001 From: riedl Date: Wed, 30 Mar 2011 13:03:54 +0200 Subject: [PATCH 5/8] Add the signature BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending =true) and removing comments --- src/api/BamMultiReader.cpp | 31 ++++++++++++++++++++--- src/api/BamMultiReader.h | 13 +++++----- src/api/BamSortCriteria.cpp | 10 ++------ src/api/BamSortCriteria.h | 39 ++++++----------------------- src/api/internal/BamMultiReader_p.h | 1 + src/toolkit/bamtools_sort.cpp | 7 +----- 6 files changed, 47 insertions(+), 54 deletions(-) diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp index f40dc11a..4a3e2a1d 100644 --- a/src/api/BamMultiReader.cpp +++ b/src/api/BamMultiReader.cpp @@ -381,16 +381,41 @@ bool BamMultiReader::SetRegion(const int& leftRefID, return d->SetRegion(region); } -/*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order) +/*! \fn void BamMultiReader::SetSortOrder(const BamSortCriteria& sort) \brief Sets the expected sorting order for reading across multiple BAM files. - Default is BamMultiReader::SortedByPosition. + Default is BamMultiReader::SortedByPosition and sorting ascending. The SortOrder determines how the reader determines which alignment is "next" from among its open readers. - \param order expected sort order + \param sort Sorting object that specifies the expected sort order and direction */ void BamMultiReader::SetSortOrder(const BamSortCriteria& sort) { d->SetSortOrder(sort); } + +/*! \fn void BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending) + \brief Sets the expected sorting order for reading across multiple BAM files. + + Default is BamMultiReader::SortedByPosition and sorting ascending. + + The SortOrder determines how the reader determines which alignment is "next" + from among its open readers. + + \param order expected sort order + \param ascending sorting direction +*/ +void BamTools::BamMultiReader::SetSortOrder(const SortOrder& order, const bool& ascending) +{ + string sortCol=""; + switch(order){ + case BamMultiReader::SortedByAlignmentScore:sortCol="AS";break; + case BamMultiReader::SortedByPosition:sortCol="POS";break; + case BamMultiReader::SortedByReadName: sortCol="QNAME";break; + case BamMultiReader::Unsorted:sortCol="";break; + default:cerr<<"BamMultiReader.cpp ERROR: The selected sort order is not known\n"; + } + BamSortCriteria sort(sortCol, !ascending); + d->SetSortOrder(sort); +} diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h index fd948260..68271607 100644 --- a/src/api/BamMultiReader.h +++ b/src/api/BamMultiReader.h @@ -20,7 +20,7 @@ #include namespace BamTools { - + class BamSortCriteria; namespace Internal { class BamMultiReaderPrivate; } // namespace Internal @@ -28,11 +28,11 @@ namespace Internal { class API_EXPORT BamMultiReader { public: - /*enum SortOrder { SortedByPosition = 0 - , SortedByReadName =1 - , SortedByAlignmentScore=2 + enum SortOrder { SortedByPosition =0 + , SortedByReadName + , SortedByAlignmentScore , Unsorted - };*/ + }; // constructor / destructor public: @@ -81,7 +81,8 @@ class API_EXPORT BamMultiReader { // sets the expected sorting order for reading across multiple BAM files void SetSortOrder(const BamSortCriteria& sort); - + + void SetSortOrder(const SortOrder& order, const bool& ascending =true); // ---------------------- // access auxiliary data // ---------------------- diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp index 23a4501f..25412cdf 100644 --- a/src/api/BamSortCriteria.cpp +++ b/src/api/BamSortCriteria.cpp @@ -3,6 +3,7 @@ #include #include +using namespace BamTools; const string BamSortCriteria::allowedTags[3]={"QNAME","POS","AS"}; const string BamSortCriteria::coreTags[1]={"POS"}; @@ -38,14 +39,7 @@ bool BamSortCriteria::isTagValid() { return false; } -/* -template -IBamMultiMerger* BamSortCriteria::getMergerDesc() { - if (descending) { - return new CommonMultiMerger >; - } - return new CommonMultiMerger; -}*/ + IBamMultiMerger* BamSortCriteria::getMerger(void ) { diff --git a/src/api/BamSortCriteria.h b/src/api/BamSortCriteria.h index d326942a..de5e3094 100644 --- a/src/api/BamSortCriteria.h +++ b/src/api/BamSortCriteria.h @@ -1,29 +1,33 @@ #ifndef BAMSORTCRITERIA_H #define BAMSORTCRITERIA_H + #include #include #include using namespace std; -using namespace BamTools; + + using namespace BamTools::Internal; +namespace BamTools { typedef binary_function BamAlignmentBFunction; typedef binary_function ReadAlignmentBFunction; typedef vector< BamAlignment >::iterator BamAlignmentIterator; + class BamSortCriteria{ private: string sortCriteria; bool descending; - template - IBamMultiMerger* getMergerDesc(); static const string allowedTags[]; static const string coreTags[]; public: static string getAllowedTags(); BamSortCriteria():sortCriteria("QNAME"),descending(false){} + + BamSortCriteria(string sortCriteria, bool descending):sortCriteria(sortCriteria),descending(descending){ if(!isTagValid()){ cerr << "BamSortCriteria ERROR: Requested sort order ("< - class SortGreaterThanReaderAlignment : public binary_function{ - public: - bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs) { - F f; - return !f(lhs,rhs); - } - };*/ - /*template - class SortLessReaderAlignment: public binary_function{ - public: - bool operator() (const ReaderAlignment& lhs, const ReaderAlignment& rhs){ - T t; - const BamAlignment l= *lhs.second; - const BamAlignment r= *rhs.second; - return t(l,r); - } - };*/ - /* - template - class SortGreaterThanBamAlignment{ - public: - SortGreaterThanBamAlignment(){} - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - F func; - return !func(lhs,rhs); - } - };*/ +} #endif // BAMSORTCRITERIA_H diff --git a/src/api/internal/BamMultiReader_p.h b/src/api/internal/BamMultiReader_p.h index d240bda1..189776ca 100644 --- a/src/api/internal/BamMultiReader_p.h +++ b/src/api/internal/BamMultiReader_p.h @@ -32,6 +32,7 @@ namespace Internal { class IBamMultiMerger; + class BamMultiReaderPrivate { // constructor / destructor diff --git a/src/toolkit/bamtools_sort.cpp b/src/toolkit/bamtools_sort.cpp index d27d7437..1585886a 100644 --- a/src/toolkit/bamtools_sort.cpp +++ b/src/toolkit/bamtools_sort.cpp @@ -330,12 +330,7 @@ bool SortTool::SortToolPrivate::Run(void) { } -/* -void SortTool::SortToolPrivate::SortBuffer(vector& buffer) { - // sort buffer by desired method - m_sort.sortBuffer(buffer.begin(),buffer.end()); -}*/ - + bool SortTool::SortToolPrivate::WriteTempFile(const vector& buffer, const string& tempFilename) { From 6587d9075ef016c82d2475f694110cef9343ce2c Mon Sep 17 00:00:00 2001 From: riedl Date: Wed, 30 Mar 2011 13:52:04 +0200 Subject: [PATCH 6/8] Check the SortCriteria for the bamtools_sort given with the -tagname flag --- src/api/BamSortCriteria.cpp | 11 +++++++---- src/api/BamSortCriteria.h | 1 + src/toolkit/bamtools_sort.cpp | 6 +++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp index 25412cdf..f4d4cdbe 100644 --- a/src/api/BamSortCriteria.cpp +++ b/src/api/BamSortCriteria.cpp @@ -28,17 +28,20 @@ string BamSortCriteria::getAllowedTags(){ return s; } - -bool BamSortCriteria::isTagValid() { - int length = sizeof(allowedTags)/sizeof(allowedTags[0]); +bool BamSortCriteria::isTagValid(const string& tag) { + int length = sizeof(allowedTags)/sizeof(allowedTags[0]); for(int i=0;iSortCriteria)){ + cerr << "bamtools sort ERROR: The tag "<SortCriteria<<" is not valid...aborting\n "; + return 1; + } // run internal SortTool implementation, return success/fail m_impl = new SortToolPrivate(m_settings); - + if ( m_impl->Run() ) return 0; else return 1; } From 047a543decb798a9edd5f3b03a367659463a5f11 Mon Sep 17 00:00:00 2001 From: riedl Date: Thu, 31 Mar 2011 11:37:28 +0200 Subject: [PATCH 7/8] Extract the comparison to single templates and use Less/Greater function of the STL. Correction of using int32_t instead of uint32_t. This spoiled the order in files havin entries without POS/AS --- src/api/BamSortCriteria.cpp | 32 ++++++++++------- src/api/BamSortCriteria.h | 65 ++++++++++++++--------------------- src/toolkit/bamtools_sort.cpp | 2 +- 3 files changed, 45 insertions(+), 54 deletions(-) diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp index f4d4cdbe..cd098bd4 100644 --- a/src/api/BamSortCriteria.cpp +++ b/src/api/BamSortCriteria.cpp @@ -8,6 +8,8 @@ const string BamSortCriteria::allowedTags[3]={"QNAME","POS","AS"}; const string BamSortCriteria::coreTags[1]={"POS"}; + + bool BamSortCriteria::isTagCoreAttribute() { int length = sizeof(coreTags)/sizeof(coreTags[0]); for(int i=0;i >(); + return new CommonMultiMerger > > >(); } else if (sortCriteria=="POS") { - return new CommonMultiMerger >(); + return new CommonMultiMerger > > >(); } else if (sortCriteria=="AS") { - return new CommonMultiMerger >(); + return new CommonMultiMerger > > >(); } else if (sortCriteria == "") { return new UnsortedMultiMerger; } }else{ if (sortCriteria=="QNAME") { - return new CommonMultiMerger >(); + return new CommonMultiMerger > > >(); } else if (sortCriteria=="POS") { - return new CommonMultiMerger >(); + return new CommonMultiMerger > > >(); } else if (sortCriteria=="AS") { - return new CommonMultiMerger >(); + return new CommonMultiMerger > > >(); } else if (sortCriteria == "") { return new UnsortedMultiMerger; } @@ -79,22 +81,26 @@ void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterato */ if (!descending ) { if (sortCriteria=="QNAME") { - sort(begin,end,SortLessThanName()); + //sort(begin,end,SortLessThanName()); + sort(begin,end,SortName >()); } else if (sortCriteria=="POS") { - sort(begin,end,SortLessThanPosition()); + //sort(begin,end,SortLessThanPosition()); + sort(begin,end,SortPosition >()); } else if (sortCriteria=="AS") { - sort(begin,end,SortLessThanAlignmentScore()); + sort(begin,end,SortAlignmentScore >()); } else { cerr << "BamMultiReader ERROR: requested sort order ("< >()); } else if (sortCriteria=="POS") { - sort(begin,end,SortGreaterThanPosition()); + //sort(begin,end,SortGreaterThanPosition()); + sort(begin,end,SortPosition >()); } else if (sortCriteria=="AS") { - sort(begin,end,SortGreaterThanAlignmentScore()); + sort(begin,end,SortAlignmentScore >()); } else { cerr << "BamMultiReader ERROR: requested sort order ("<{ +template +class SortPosition : public binary_function{ public: bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - - if ( lhs.RefID != rhs.RefID ) - return lhs.RefID < rhs.RefID; - else - return lhs.Position < rhs.Position; + COMP c; + if ( lhs.RefID != rhs.RefID ){ + return c(lhs.RefID, rhs.RefID); + }else { + int32_t lh,rh; + lh = lhs.Position; + rh = rhs.Position; + // printf("%d %d %d %d\n",lh,rh,lhs.Position,rhs.Position); + // return c(lhs.Position, rhs.Position); + return c(lh,rh); + } } }; - class SortGreaterThanPosition : public binary_function{ - public: - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - - if ( lhs.RefID != rhs.RefID ) - return lhs.RefID > rhs.RefID; - else - return lhs.Position > rhs.Position; - } - }; // QNAME - class SortLessThanName : public binary_function { - public: - bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return lhs.Name < rhs.Name; - } - }; - - class SortGreaterThanName : public binary_function { + template + class SortName : public binary_function { public: bool operator() (const BamAlignment& lhs, const BamAlignment& rhs) { - return lhs.Name > rhs.Name; + COMP c; + return c(lhs.Name, rhs.Name); } }; - + // AS Alignment Score from BFAST - class SortLessThanAlignmentScore : public BamAlignmentBFunction{//binary_function{ + + template + class SortAlignmentScore : public BamAlignmentBFunction{ public: bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { - uint32_t lh, rh; + int32_t lh, rh; lhs.GetTag("AS",lh); rhs.GetTag("AS",rh); - return lh < rh; + + COMP c; + return c(lh,rh); } }; - class SortGreaterThanAlignmentScore : public BamAlignmentBFunction{//binary_function{ - public: - bool operator() ( const BamAlignment& lhs, const BamAlignment& rhs) const { - uint32_t lh, rh; - lhs.GetTag("AS",lh); - rhs.GetTag("AS",rh); - return lh > rh; - } - }; //-------------------------------------------------------- diff --git a/src/toolkit/bamtools_sort.cpp b/src/toolkit/bamtools_sort.cpp index 85049127..68153fa0 100644 --- a/src/toolkit/bamtools_sort.cpp +++ b/src/toolkit/bamtools_sort.cpp @@ -160,7 +160,7 @@ int SortTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); if(!BamSortCriteria::isTagValid(m_settings->SortCriteria)){ - cerr << "bamtools sort ERROR: The tag "<SortCriteria<<" is not valid...aborting\n "; + cerr << "bamtools sort ERROR: The tag "<SortCriteria<<" is not valid...Aborting"< Date: Thu, 31 Mar 2011 11:38:19 +0200 Subject: [PATCH 8/8] src cleaning --- src/api/BamSortCriteria.cpp | 4 ---- src/api/BamSortCriteria.h | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/api/BamSortCriteria.cpp b/src/api/BamSortCriteria.cpp index cd098bd4..cd815fcb 100644 --- a/src/api/BamSortCriteria.cpp +++ b/src/api/BamSortCriteria.cpp @@ -81,10 +81,8 @@ void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterato */ if (!descending ) { if (sortCriteria=="QNAME") { - //sort(begin,end,SortLessThanName()); sort(begin,end,SortName >()); } else if (sortCriteria=="POS") { - //sort(begin,end,SortLessThanPosition()); sort(begin,end,SortPosition >()); } else if (sortCriteria=="AS") { sort(begin,end,SortAlignmentScore >()); @@ -94,10 +92,8 @@ void BamSortCriteria::sortBuffer(BamAlignmentIterator begin, BamAlignmentIterato } else { if (sortCriteria=="QNAME") { - //sort(begin,end,SortGreaterThanName()); sort(begin,end,SortName >()); } else if (sortCriteria=="POS") { - //sort(begin,end,SortGreaterThanPosition()); sort(begin,end,SortPosition >()); } else if (sortCriteria=="AS") { sort(begin,end,SortAlignmentScore >()); diff --git a/src/api/BamSortCriteria.h b/src/api/BamSortCriteria.h index 95175386..0eab61c7 100644 --- a/src/api/BamSortCriteria.h +++ b/src/api/BamSortCriteria.h @@ -65,8 +65,6 @@ class SortPosition : public binary_function{ int32_t lh,rh; lh = lhs.Position; rh = rhs.Position; - // printf("%d %d %d %d\n",lh,rh,lhs.Position,rhs.Position); - // return c(lhs.Position, rhs.Position); return c(lh,rh); } } @@ -97,7 +95,9 @@ class SortPosition : public binary_function{ return c(lh,rh); } }; - + // printf("%d %d %d %d\n",lh,rh,lhs.Position,rhs.Position); + // return c(lhs.Position, rhs.Position); + //--------------------------------------------------------