Skip to content

Commit

Permalink
radix update
Browse files Browse the repository at this point in the history
  • Loading branch information
jerryidk committed Feb 16, 2024
1 parent e789be2 commit c06c5cd
Show file tree
Hide file tree
Showing 9 changed files with 420 additions and 597 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ option(BUILD_TESTING "Build tests." OFF)
option(BUILD_EXAMPLE "Build examples." OFF)
option(LEGACY_PAPI "Use Vikram's PAPI stuff to do performance monitering." OFF)
option(HIGH_LEVEL_PAPI "Use PAPI high-level monitoring" OFF)
option(VTUNE "Use VTUNE to do performance monitering." ON)
option(AGGR "Use aggregation hashtable (histogram)" OFF)
option(VTUNE "Use VTUNE to do performance monitering." OFF)
option(AGGR "Use aggregation hashtable (histogram)" ON)
option(BQUEUE "Enable bqueue tests" OFF)
option(XORWOW "Xorwow" OFF)
option(BQ_ZIPFIAN "Enable global zipfian distribution generation" ON)
option(BQ_ZIPFIAN_LOCAL "Enable local zipfian distribution generation" OFF)
option(CALC_STATS "Enable hashtable statistics" OFF)
option(CALC_STATS "Enable hashtable statistics" ON)
option(ZIPF_FAST "Enable faster zipfian distribution generation" ON)
option(LATENCY_COLLECTION "Enable latency data collection" OFF)
option(BQ_KMER_TEST "Bqueue kmer test" OFF)
Expand Down
2 changes: 1 addition & 1 deletion include/hashtables/cas_kht_single.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ class CASHashTableSingle : public BaseHashTable {
this->num_memcmps++;
#endif
if (!true || curr->compare_key(q)) {
curr->update(q);
curr->update_cas(q);
// hashtable[pidx].kmer_count++;
// hashtable_mutexes[pidx].unlock();

Expand Down
2 changes: 1 addition & 1 deletion include/hashtables/kvtypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ struct Value {
found = true;
vp.second[vp.first].id = elem->key_id;
vp.second[vp.first].value = this->value;
vp.first++;
vp.first++;
goto exit;
}
exit:
Expand Down
7 changes: 4 additions & 3 deletions include/input_reader/span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ class PartitionedSpan {
public:
PartitionedSpan(const std::span<T>& span, uint64_t part_id,
uint64_t num_parts) {
PLOG_WARNING_ONCE_IF(span.size() % num_parts)
<< "Partition with size " << span.size()
<< " does not divide evenly by " << num_parts << " partitions.";
if(span.size() % num_parts)
{
PLOG_WARNING << "Partition with size " << span.size() << " does not divide evenly by " << num_parts << " partitions.";
}
uint64_t num_tuples_per_part = span.size() / num_parts;
T* pointer = span.data() + part_id * num_tuples_per_part;
size_t size = num_tuples_per_part;
Expand Down
2 changes: 1 addition & 1 deletion include/tests/KmerTest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "input_reader/fastq.hpp"
#include "types.hpp"


namespace kmercounter {

class KmerTest {
Expand All @@ -22,7 +23,6 @@ class KmerTest {
std::barrier<VoidFn> *barrier,
RadixContext &context);


void count_kmer_radix_jerry(Shard *sh, const Configuration &config,
std::barrier<VoidFn> *barrier,
RadixContext &context, BaseHashTable *ht);
Expand Down
103 changes: 44 additions & 59 deletions include/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <absl/container/flat_hash_map.h>
#include <absl/hash/hash.h>
#include <plog/Log.h>
#include <sys/mman.h>

#include <atomic>
#include <cinttypes>
Expand All @@ -13,7 +14,6 @@
#include <span>
#include <string>
#include <utility>
#include <sys/mman.h>

#define PAGE_SIZE 4096
#define ALPHA 0.15
Expand Down Expand Up @@ -102,95 +102,81 @@ struct alignas(64) cacheline {

typedef uint64_t Kmer;

struct KmerChunk {
typedef struct KmerChunk {
size_t count;
Kmer* kmers;
};
} KmerChunk_t;

const uint64_t KMERSPERCACHELINE1 = (CACHELINE_SIZE / sizeof(Kmer));
const uint64_t MAX_CHUCKS = 5;

class PartitionChunks {
public:
size_t chunk_size;
size_t chunk_count;
KmerChunk chunks[MAX_CHUCKS];
uint64_t chunks_len = 0;
// KmerChunk chunk;
size_t chunk_size; // # bytes for a KmerChunk_t
size_t chunk_count; // # kmer in a KmerChunk_t
KmerChunk_t chunks[MAX_CHUCKS];
uint64_t chunks_len = 0; // # length of chunk array

PartitionChunks() = default;

PartitionChunks(size_t size_hint) {
// chuck size must be a multiple of CACHELINE_SIZE
chunk_size =
(size_hint + CACHELINE_SIZE - 1) / CACHELINE_SIZE * CACHELINE_SIZE * 3;
chunk_count = chunk_size / sizeof(Kmer);

// PLOGI.printf("chunk size: %llu", chunk_size);
// auto first_chunk = (Kmer*)std::aligned_alloc(PAGESIZE, chunk_size);
auto first_chunk = alloc(true);
memset(first_chunk, 0, chunk_size);
struct KmerChunk kc = {0, first_chunk};
chunks[chunks_len++] = std::move(kc);

// auto second_chunk = alloc(true);
// memset(second_chunk, 0, chunk_size);
// struct KmerChunk kc1 = {0, nullptr};
// chunk = std::move(kc1);
chunk_count = chunk_size / sizeof(Kmer) -
1; // subtract 1 because we keep track of count
alloc_kmerchunk();
}

void write_one(Kmer k) {
auto& last = chunks[chunks_len - 1];
KmerChunk last = chunks[chunks_len - 1];
if (last.count == chunk_count) {
assert(false);
// auto chunk = (Kmer*)std::aligned_alloc(PAGESIZE, chunk_size);
auto chunk = alloc(false);
struct KmerChunk kc = {0, chunk};
assert(chunks_len <= MAX_CHUCKS);
chunks[chunks_len++] = (std::move(kc));
// return chunk;
if (!alloc_kmerchunk()) {
PLOG_FATAL << "Allocation of KmerChunk failed \n";
}
}
last.kmers[last.count++] = k;
// return last.kmers + last.count;
last.kmers[last.count] = k;
last.count++;
}

Kmer* get_next() {
auto& last = chunks[chunks_len - 1];
KmerChunk last = chunks[chunks_len - 1];
if (last.count >= chunk_count) {
assert(false);
// auto chunk = (Kmer*)std::aligned_alloc(PAGESIZE, chunk_size);
auto chunk = alloc(false);
struct KmerChunk kc = {0, chunk};
assert(chunks_len <= MAX_CHUCKS);
chunks[chunks_len++] = (std::move(kc));
return chunk;
if (!alloc_kmerchunk()) {
PLOG_FATAL << "Allocation of KmerChunk failed \n";
}else
{
return chunks[chunks_len - 1].kmers;
}
}
return last.kmers + last.count;
return &last.kmers[last.count];
}

Kmer* alloc(bool mset) {
// PLOGI.printf("start mmap, chunk_size: %llu", chunk_size);
bool alloc_kmerchunk() {
if (chunks_len >= MAX_CHUCKS) return false;

KmerChunk_t kc;
kc.count = 0;
kc.kmers = (Kmer*)alloc();

#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
uint huge = chunk_size < (1 << 20)? 0: MAP_HUGE_2MB;
auto addr = (Kmer*) aligned_alloc(CACHELINE_SIZE, chunk_size);
// auto addr = (Kmer*) mmap(nullptr, /* 256*1024*1024*/ chunk_size, PROT_READ | PROT_WRITE,
// huge | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
// PLOGI.printf("end mmap");
chunks[chunks_len] = std::move(kc);
chunks_len++;

return true;
}

Kmer* alloc() {
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
uint huge = chunk_size < (1 << 20) ? 0 : MAP_HUGE_2MB;
auto addr = (Kmer*)aligned_alloc(CACHELINE_SIZE, chunk_size);
if (addr == MAP_FAILED) {
perror("mmap");
exit(1);
}
// if (mset) {
// std::memset(addr, 0, chunk_size);
// }
}
return addr;
// return (Kmer*)std::aligned_alloc(PAGESIZE, chunk_size);
}

void advance() {
// chunk.count += KMERSPERCACHELINE1;
chunks[chunks_len - 1].count += KMERSPERCACHELINE1;
}
void advance() { chunks[chunks_len - 1].count += KMERSPERCACHELINE1; }
};

class RadixContext {
Expand Down Expand Up @@ -222,8 +208,7 @@ class RadixContext {
parts = (uint64_t**)std::aligned_alloc(CACHELINE_SIZE,
fanOut * sizeof(uint64_t*));

partitions = std::vector<PartitionChunks*>(
num_threads, nullptr);
partitions = std::vector<PartitionChunks*>(num_threads, nullptr);
// for (auto& p : partitions) {
// p.reserve(fanOut);
// }
Expand Down
8 changes: 4 additions & 4 deletions jerry-benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function bench_regular()
sudo ./build/dramhit \
--mode 4 \
--ht-type 3 \
--numa-split 0 \
--numa-split 1 \
--num-threads 64 \
--ht-size 8589934592 \
--in-file /opt/dramhit/kmer_dataset/SRR1513870.fastq \
Expand All @@ -19,7 +19,7 @@ function bench_radix()
sudo ./build/dramhit \
--mode 14 \
--ht-type 3 \
--numa-split 0 \
--numa-split 1 \
--num-threads 64 \
--ht-size 8589934592 \
--in-file /opt/dramhit/kmer_dataset/SRR1513870.fastq \
Expand All @@ -28,11 +28,11 @@ function bench_radix()

function bench() {
bench_radix
bench_regular
#bench_regular
}

function build() {
cmake --build build/
cmake --build build/
}

if [ "$1" == "build" ]; then
Expand Down
5 changes: 3 additions & 2 deletions src/Application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ void Application::shard_thread(int tid,
kmer_ht = init_ht(config.ht_size, sh->shard_idx);
break;
case FASTQ_WITH_INSERT_RADIX:
kmer_ht = init_ht(config.ht_size, sh->shard_idx);
break;
case PREFETCH:
// kmer_ht = new PartitionedHashStore<Prefetch_KV, PrefetchKV_Queue>(
Expand Down Expand Up @@ -232,8 +233,8 @@ void Application::shard_thread(int tid,
config.materialize, barrier);
break;
case FASTQ_WITH_INSERT_RADIX:
this->test.kmer.count_kmer_radix_custom(sh, config, barrier,
this->radixContext);
this->test.kmer.count_kmer_radix_jerry(sh, config, barrier,
this->radixContext, kmer_ht);
break;
case FASTQ_WITH_INSERT:
this->test.kmer.count_kmer(sh, config, kmer_ht, barrier);
Expand Down
Loading

0 comments on commit c06c5cd

Please sign in to comment.