Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for picard-style .bam/.bai indexes #103

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bin/
build/
include/
lib/
src/toolkit/bamtools_version.h
1 change: 1 addition & 0 deletions src/api/BamIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class API_EXPORT BamIndex {
// list of supported BamIndex types
enum IndexType { BAMTOOLS = 0
, STANDARD
, PICARD
};

// ctor & dtor
Expand Down
51 changes: 45 additions & 6 deletions src/api/internal/index/BamIndexFactory_p.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
switch ( type ) {
case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
case ( BamIndex::PICARD ) : {
string picardIndexFilename = bamFilename;
picardIndexFilename[picardIndexFilename.size()-1] = 'i'; // .bai instead of .bam.bai
return picardIndexFilename;
}
default :
return string();
}
Expand All @@ -37,8 +42,9 @@ BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename,
return 0;

// create index based on extension
if ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
else if ( extension == BamToolsIndex::Extension() ) return new BamToolsIndex(reader);
if ( extension == BamStandardIndex::BamDotExtension() ) return new BamStandardIndex(reader);
else if ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
else if ( extension == BamToolsIndex::BamDotExtension() ) return new BamToolsIndex(reader);
else
return 0;
}
Expand Down Expand Up @@ -69,8 +75,36 @@ const string BamIndexFactory::FileExtension(const string& filename) {
if ( lastDotPosition == string::npos )
return string();

// now determine if we have .bam.* or just .bai
const size_t secondLastDotPosition = filename.find_last_of('.', lastDotPosition);

string ending;

if ( secondLastDotPosition != string::npos ) {
// it's possible we have a .bam.* ending
if ( filename.size() - secondLastDotPosition == 8) {
// and we do
if ( filename.substr(secondLastDotPosition, 5) == ".bam." ) {
ending = filename.substr(secondLastDotPosition);
}
}
}

// we didn't find a .bam.* ending, so we'll assume a bare .bai
if ( ending.empty() )
ending = filename.substr(lastDotPosition);

// return substring from last dot position
return filename.substr(lastDotPosition);
return ending;
}

// tests if a file exists
const bool BamIndexFactory::FileExists(const std::string& filename) {
struct stat buf;
if (stat(filename.c_str(), &buf) != -1) {
return true;
}
return false;
}

// returns name of existing index file that corresponds to @bamFilename
Expand All @@ -86,19 +120,24 @@ const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
// try to find index of preferred type first
// return index filename if found
string indexFilename = CreateIndexFilename(bamFilename, preferredType);
if ( !indexFilename.empty() )
if ( !indexFilename.empty() && FileExists(indexFilename) )
return indexFilename;

// couldn't find preferred type, try the other supported types
// return index filename if found
if ( preferredType != BamIndex::STANDARD ) {
indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
if ( !indexFilename.empty() )
if ( !indexFilename.empty() && FileExists(indexFilename) )
return indexFilename;
}
if ( preferredType != BamIndex::BAMTOOLS ) {
indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
if ( !indexFilename.empty() )
if ( !indexFilename.empty() && FileExists(indexFilename) )
return indexFilename;
}
if ( preferredType != BamIndex::PICARD ) {
indexFilename = CreateIndexFilename(bamFilename, BamIndex::PICARD);
if ( !indexFilename.empty() && FileExists(indexFilename) )
return indexFilename;
}

Expand Down
4 changes: 4 additions & 0 deletions src/api/internal/index/BamIndexFactory_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "api/BamIndex.h"
#include <string>
#include <sys/stat.h>

namespace BamTools {
namespace Internal {
Expand Down Expand Up @@ -41,6 +42,9 @@ class BamIndexFactory {
const BamIndex::IndexType& type);
// retrieves file extension (including '.')
static const std::string FileExtension(const std::string& filename);

private:
static const bool FileExists(const std::string& filename);
};

} // namespace Internal
Expand Down
8 changes: 7 additions & 1 deletion src/api/internal/index/BamStandardIndex_p.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ using namespace std;
const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1
const int BamStandardIndex::BAM_LIDX_SHIFT = 14;
const string BamStandardIndex::BAI_EXTENSION = ".bai";
const string BamStandardIndex::BAM_BAI_EXTENSION = ".bam.bai";
const char* const BamStandardIndex::BAI_MAGIC = "BAI\1";
const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;
const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);
Expand Down Expand Up @@ -420,6 +421,11 @@ bool BamStandardIndex::Create(void) {
}

// returns format's file extension
const string BamStandardIndex::BamDotExtension(void) {
return BamStandardIndex::BAM_BAI_EXTENSION;
}

// alternative file extension
const string BamStandardIndex::Extension(void) {
return BamStandardIndex::BAI_EXTENSION;
}
Expand Down Expand Up @@ -934,7 +940,7 @@ void BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVecto
// make sure linear offsets are sorted before writing & saving summary
SortLinearOffsets(linearOffsets);

int64_t numBytesWritten = 0;
uint64_t numBytesWritten = 0;

// write number of linear offsets
int32_t offsetCount = linearOffsets.size();
Expand Down
2 changes: 2 additions & 0 deletions src/api/internal/index/BamStandardIndex_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class BamStandardIndex : public BamIndex {
public:
// returns format's file extension
static const std::string Extension(void);
static const std::string BamDotExtension(void);

// internal methods
private:
Expand Down Expand Up @@ -225,6 +226,7 @@ class BamStandardIndex : public BamIndex {
static const int MAX_BIN;
static const int BAM_LIDX_SHIFT;
static const std::string BAI_EXTENSION;
static const std::string BAM_BAI_EXTENSION;
static const char* const BAI_MAGIC;
static const int SIZEOF_ALIGNMENTCHUNK;
static const int SIZEOF_BINCORE;
Expand Down
8 changes: 7 additions & 1 deletion src/api/internal/index/BamToolsIndex_p.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ using namespace std;
// --------------------------------

const uint32_t BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;
const string BamToolsIndex::BTI_EXTENSION = ".bti";
const string BamToolsIndex::BTI_EXTENSION = ".bam.bti";
const string BamToolsIndex::BAM_BTI_EXTENSION = ".bam.bti";
const char* const BamToolsIndex::BTI_MAGIC = "BTI\1";
const int BamToolsIndex::SIZEOF_BLOCK = sizeof(int32_t)*2 + sizeof(int64_t);

Expand Down Expand Up @@ -275,6 +276,11 @@ bool BamToolsIndex::Create(void) {
}

// returns format's file extension
const std::string BamToolsIndex::BamDotExtension(void) {
return BamToolsIndex::BAM_BTI_EXTENSION;
}

// just the bare extension
const std::string BamToolsIndex::Extension(void) {
return BamToolsIndex::BTI_EXTENSION;
}
Expand Down
2 changes: 2 additions & 0 deletions src/api/internal/index/BamToolsIndex_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class BamToolsIndex : public BamIndex {
public:
// returns format's file extension
static const std::string Extension(void);
static const std::string BamDotExtension(void);

// internal methods
private:
Expand Down Expand Up @@ -176,6 +177,7 @@ class BamToolsIndex : public BamIndex {
private:
static const uint32_t DEFAULT_BLOCK_LENGTH;
static const std::string BTI_EXTENSION;
static const std::string BAM_BTI_EXTENSION;
static const char* const BTI_MAGIC;
static const int SIZEOF_BLOCK;
};
Expand Down