Skip to content

Commit

Permalink
Use vectors with small buffer optimization to represent intermediate …
Browse files Browse the repository at this point in the history
…lists of gene indexes
  • Loading branch information
yp committed Jun 19, 2020
1 parent e71f01c commit 6f5a499
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ shark: main.o
@echo '* Compiling $<'
$(CXX) $(CXXFLAGS) -o $@ -c $<

main.o: common.hpp argument_parser.hpp bloomfilter.h BloomfilterFiller.hpp KmerBuilder.hpp FastaSplitter.hpp FastqSplitter.hpp ReadAnalyzer.hpp ReadOutput.hpp kmer_utils.hpp
main.o: common.hpp argument_parser.hpp bloomfilter.h BloomfilterFiller.hpp KmerBuilder.hpp FastaSplitter.hpp FastqSplitter.hpp ReadAnalyzer.hpp ReadOutput.hpp kmer_utils.hpp small_vector.hpp

clean:
rm -rf *.o
6 changes: 4 additions & 2 deletions bloomfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@
#include <string>

#include "kmer_utils.hpp"
#include "small_vector.hpp"

using namespace std;
using namespace sdsl;

class KmerBuilder;
class BloomfilterFiller;


class BF {
friend class KmerBuilder;
friend class BloomfilterFiller;
Expand All @@ -45,7 +47,7 @@ class BF {
typedef uint64_t hash_t;
typedef bit_vector bit_vector_t;
typedef bit_vector_t::rank_1_type rank_t;
typedef vector<int> index_t;
typedef small_vector_t index_t;
typedef vector<index_t> set_index_t;
typedef vector<uint16_t> index_kmer_t;
typedef bit_vector_t::select_1_type select_t;
Expand Down Expand Up @@ -88,7 +90,7 @@ class BF {
for (const auto bf_idx: kmers) {
int kmer_rank = _brank(bf_idx);
const auto size = _set_index[kmer_rank].size();
if (size == 0 || _set_index[kmer_rank][size-1] != input_idx)
if (size == 0 || _set_index[kmer_rank].last() != input_idx)
_set_index[kmer_rank].push_back(input_idx);
}
}
Expand Down
91 changes: 91 additions & 0 deletions small_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* shark - Mapping-free filtering of useless RNA-Seq reads
* Copyright (C) 2020 Tamara Ceccato, Luca Denti, Yuri Pirola, Marco Previtali
*
* This file is part of shark.
*
* shark is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* shark is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with shark; see the file LICENSE. If not, see
* <https://www.gnu.org/licenses/>.
**/

#include <cstdint>
#include <vector>

struct small_vector_t {
union {
struct {
uint8_t flag;
uint8_t size;
uint16_t arr[3];
} s;
std::vector<uint16_t>* l;
} v;

small_vector_t() {
v.s.flag = 1u;
v.s.size = 0;
}

~small_vector_t() {
if ((v.s.flag & 0x1) == 0) {
delete v.l;
}
}

void push_back(uint16_t x) {
if ((v.s.flag & 0x1) != 0) {
if (v.s.size < 3) v.s.arr[v.s.size++] = x;
else {
std::vector<uint16_t>* ptr = new std::vector<uint16_t>(v.s.arr, v.s.arr + 3);
ptr->push_back(x);
v.l = ptr;
}
} else {
v.l->push_back(x);
}
}

size_t size() const {
if ((v.s.flag & 0x1) != 0) {
return v.s.size;
} else {
return v.l->size();
}
}

uint16_t last() const {
if ((v.s.flag & 0x1) != 0) {
return v.s.arr[v.s.size - 1];
} else {
return v.l->back();
}
}

const uint16_t* begin() const {
if ((v.s.flag & 0x1) != 0) {
return v.s.arr;
} else {
return v.l->data();
}
}

const uint16_t* end() const {
if ((v.s.flag & 0x1) != 0) {
return v.s.arr + v.s.size;
} else {
return v.l->data() + v.l->size();
}
}

};

0 comments on commit 6f5a499

Please sign in to comment.