Skip to content

Commit

Permalink
Add option for taking as input a list of contigs to compare
Browse files Browse the repository at this point in the history
  • Loading branch information
morispi committed Jul 12, 2021
1 parent 5340e96 commit a91d0ee
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 21 deletions.
64 changes: 51 additions & 13 deletions src/barcodesComparison.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareRegi
ifstream in;
in.open(regions);
if (!in.is_open()) {
fprintf(stderr, "Unable to open barcodes index file %s. Please provide an existing and valid file.\n", regions.c_str());
fprintf(stderr, "Unable to open regions file %s. Please provide an existing and valid file.\n", regions.c_str());
exit(EXIT_FAILURE);
}

Expand Down Expand Up @@ -96,17 +96,7 @@ void computeCommonBarcodesCounts(robin_hood::unordered_map<pair<string, string>,
}
}

robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContig(string bamFile, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contig, int size) {
BamReader reader;
if (!reader.Open(bamFile)) {
fprintf(stderr, "Unable open BAM file %s. Please make sure the file exists.\n", bamFile.c_str());
exit(EXIT_FAILURE);
}
if (!reader.LocateIndex()) {
fprintf(stderr, "Unable to find a BAM index for file %s. Please build the BAM index or provide a BAM file for which the BAM index is built\n", bamFile.c_str());
exit(EXIT_FAILURE);
}

robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContig_BamReader(BamReader& reader, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contig, int size) {
int id = reader.GetReferenceID(contig);
if (id == -1) {
fprintf(stderr, "Cannot find refence with ID %s.\n", contig.c_str());
Expand All @@ -132,7 +122,55 @@ robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareCont
computeCommonBarcodesCounts(counts, BarcodesOffsetsIndex, reader, rv, size, qRegion);
}

reader.Close();
return counts;
}

robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContig(string bamFile, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contig, int size) {
BamReader reader;
if (!reader.Open(bamFile)) {
fprintf(stderr, "Unable open BAM file %s. Please make sure the file exists.\n", bamFile.c_str());
exit(EXIT_FAILURE);
}
if (!reader.LocateIndex()) {
fprintf(stderr, "Unable to find a BAM index for file %s. Please build the BAM index or provide a BAM file for which the BAM index is built\n", bamFile.c_str());
exit(EXIT_FAILURE);
}

robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> res = compareContig_BamReader(reader, BarcodesOffsetsIndex, contig, size);
reader.Close();

return res;
}

robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContigs(string bamFile, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contigs, int size) {
BamReader reader;
if (!reader.Open(bamFile)) {
fprintf(stderr, "Unable open BAM file %s. Please make sure the file exists.\n", bamFile.c_str());
exit(EXIT_FAILURE);
}
if (!reader.LocateIndex()) {
fprintf(stderr, "Unable to find a BAM index for file %s. Please build the BAM index or provide a BAM file for which the BAM index is built\n", bamFile.c_str());
exit(EXIT_FAILURE);
}

ifstream in;
in.open(contigs);
if (!in.is_open()) {
fprintf(stderr, "Unable to open contigs file %s. Please provide an existing and valid file.\n", contigs.c_str());
exit(EXIT_FAILURE);
}

// Process each contig
robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> res;
string contig;
while (getline(in, contig)) {
robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> tmp = compareContig_BamReader(reader, BarcodesOffsetsIndex, contig, size);
for (auto r : tmp) {
res[r.first] += r.second;
}
}

reader.Close();

return res;
}
22 changes: 22 additions & 0 deletions src/include/barcodesComparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareRegi
*/
void computeCommonBarcodesCounts(robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs>& counts, BarcodesOffsetsIndex& BarcodesOffsetsIndex, BamReader& reader, const RefVector& rv, int size, string& qRegion);

/**
Compare the barcodes of a given contig's extremities to the barcodes of other contigs' extremities.
@param reader BamReader open on the desired BAM file
@param BarcodesOffsetsIndex barcode index associating barcodes to the set of BamRegion they appear in
@param contig name of the contig of interest
@param size size of the contigs extremities to consider
@return a map associating pairs of contigs' extremities to their number of common barcodes
*/
robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContig_BamReader(string bamFile, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contig, int size);

/**
Compare the barcodes of a given contig's extremities to the barcodes of other contigs' extremities.
Expand All @@ -77,4 +88,15 @@ void computeCommonBarcodesCounts(robin_hood::unordered_map<pair<string, string>,
*/
robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContig(string bamFile, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contig, int size);

/**
Compare the barcodes of a given list of contigs' extremities to the barcodes of other contigs' extremities.
@param bamFile BAM file to compare contigs' extremities barcodes from
@param BarcodesOffsetsIndex barcode index associating barcodes to the set of BamRegion they appear in
@param contigs file containing a list of contigs of interest
@param size size of the contigs extremities to consider
@return a map associating pairs of contigs' extremities to their number of common barcodes
*/
robin_hood::unordered_map<pair<string, string>, unsigned, hashPairs> compareContigs(string bamFile, BarcodesOffsetsIndex& BarcodesOffsetsIndex, string contigs, int size);

#endif
2 changes: 1 addition & 1 deletion src/include/subcommands/help.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <string>

#define VERSION "LRez v.2.1.2"
#define VERSION "LRez v.2.1.3"

using namespace std;

Expand Down
30 changes: 24 additions & 6 deletions src/subcommands/compare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ void subcommandCompare(int argc, char* argv[]) {
string regionsFile;
string outputFile;
string contig;
string contigsFile;
unsigned size = 1000;
ofstream out;
BamRegion r;
Expand All @@ -18,6 +19,7 @@ void subcommandCompare(int argc, char* argv[]) {
{"bam", required_argument, 0, 'b'},
{"regions", required_argument, 0, 'r'},
{"contig", required_argument, 0, 'c'},
{"contigs", required_argument, 0, 'C'},
{"index", required_argument, 0, 'i'},
{"size", required_argument, 0, 's'},
{"output", required_argument, 0, 'o'},
Expand All @@ -26,7 +28,7 @@ void subcommandCompare(int argc, char* argv[]) {
int index;
int iarg = 0;

iarg = getopt_long(argc, argv, "b:r:c:i:s:o:", longopts, &index);
iarg = getopt_long(argc, argv, "b:r:c:i:s:o:C:", longopts, &index);
if (iarg == -1) {
subcommandHelp("compare");
}
Expand All @@ -41,6 +43,9 @@ void subcommandCompare(int argc, char* argv[]) {
case 'c':
contig = optarg;
break;
case 'C':
contigsFile = optarg;
break;
case 'i':
indexFile = optarg;
break;
Expand All @@ -55,22 +60,30 @@ void subcommandCompare(int argc, char* argv[]) {
subcommandHelp("extract");
break;
}
iarg = getopt_long(argc, argv, "b:r:c:i:s:o:", longopts, &index);
iarg = getopt_long(argc, argv, "b:r:c:i:s:o:C:", longopts, &index);
}

if (bamFile.empty()) {
fprintf(stderr, "Please specify a BAM file with option --bam/-b.\n");
exit(EXIT_FAILURE);
}
if (regionsFile.empty() and contig.empty()) {
fprintf(stderr, "Please specify a file containing a list of regions with option --region/-r, or specify a contig of interest with option --contig/-c.\n");
if (regionsFile.empty() and contig.empty() and contigsFile.empty()) {
fprintf(stderr, "Please specify a file containing a list of regions with option --region/-r, contig of interest with option --contig/-c, or a list of contigs of interest with option --contigs/-C.\n");
exit(EXIT_FAILURE);
}
if (!regionsFile.empty() and !contig.empty()) {
fprintf(stderr, "Options --regions/-r and --contig/-c cannot be used at the same time.\n");
exit(EXIT_FAILURE);
}
if (!contig.empty() and indexFile.empty()) {
if (!regionsFile.empty() and !contigsFile.empty()) {
fprintf(stderr, "Options --regions/-r and --contigs/-C cannot be used at the same time.\n");
exit(EXIT_FAILURE);
}
if (!contig.empty() and !contigsFile.empty()) {
fprintf(stderr, "Options --contig/-c and --contigs/-C cannot be used at the same time.\n");
exit(EXIT_FAILURE);
}
if ((!contig.empty() or !contigsFile.empty()) and indexFile.empty()) {
fprintf(stderr, "Please specify an index file with option --index/-i.\n");
exit(EXIT_FAILURE);
}
Expand All @@ -79,11 +92,16 @@ void subcommandCompare(int argc, char* argv[]) {
// Compare the barcodes of all regions provided in the file with each other and output the result "matrix"
if (!regionsFile.empty()) {
commonBarcodes = compareRegions(bamFile, regionsFile);
} else {
} else if (!contig.empty()) {
// Compare the barcodes contained in the extremities of the provided contig with the barcodes contained in the extremities of other contigs
BarcodesOffsetsIndex BarcodesOffsetsIndex;
BarcodesOffsetsIndex = loadBarcodesOffsetsIndex(indexFile);
commonBarcodes = compareContig(bamFile, BarcodesOffsetsIndex, contig, size);
} else {
// Compare the barcodes contained in the extremities of the provided contigs list with the barcodes contained in the extremities of other contigs
BarcodesOffsetsIndex BarcodesOffsetsIndex;
BarcodesOffsetsIndex = loadBarcodesOffsetsIndex(indexFile);
commonBarcodes = compareContigs(bamFile, BarcodesOffsetsIndex, contigsFile, size);
}

if (!outputFile.empty()) {
Expand Down
3 changes: 2 additions & 1 deletion src/subcommands/help.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ void subcommandHelp(std::string subcommand) {
printf("ARGS:\n");
printf("\t-b, --bam\t BAM file containing the alignments\n");
printf("\t-i, --index\t Barcodes offsets index built with the index bam subcommand\n");
printf("\t-r, --region\t File containing regions of interest in format chromosome:startPosition-endPosition\n");
printf("\t-r, --regions\t File containing regions of interest in format chromosome:startPosition-endPosition\n");
printf("\t-c, --contig\t Contig of interest\n");
printf("\t-C, --contigs\t File containing a list of contigs of interest\n");
printf("\t-s, --size\t Size of contigs' extremities to consider (optional, default: 1000)\n");
printf("\t-o, --output\t File where to output the results (optional, default: stdout)\n");
} else if (subcommand == "extract") {
Expand Down

0 comments on commit a91d0ee

Please sign in to comment.