From abd41aacf81fe4bfa5bb95ceba7101323c48e1c6 Mon Sep 17 00:00:00 2001 From: Daniel Yuan Date: Thu, 1 Dec 2022 22:27:22 -0500 Subject: [PATCH] Add linear hashing implementation. --- README.md | 57 +++++ src/include/utils/linear_hashing.h | 58 +++++ src/utils/linear_hashing.cc | 71 ++++++ test/unit/utils/linear_hashing_test.cc | 291 +++++++++++++++++++++++++ 4 files changed, 477 insertions(+) create mode 100644 src/include/utils/linear_hashing.h create mode 100644 src/utils/linear_hashing.cc create mode 100644 test/unit/utils/linear_hashing_test.cc diff --git a/README.md b/README.md index dedc2b4..b7c1d9e 100644 --- a/README.md +++ b/README.md @@ -7,3 +7,60 @@ Dependencies ``` apt install bison flex ``` + +# Linear Hashing + +You can build and run the test suite with the following commands: +- `cd build` +- `cmake -DCMAKE_BUILD_TYPE=Debug ..` +- `make check` +- `ctest -R linear_hashing_test -V` + +Files: +- `src/include/utils/linear_hashing.h` +- `src/utils/linear_hashing.cc` +- `test/unit/utils/linear_hashing_test.cc` + +## Architecture +I chose to implement tombstones for the deletion logic without replacing the tombstoned entries. In a real implementation, this would be acceptable for a hashtable with a high base number of entries since the capacity would slowly shrink. As the hashtable implementation focus is on minimalism, I decided not to implement the logic for cleaning up the tombstones. + +## HashTable +The hashtable has a backing vector of Entries. Entries contain a key, value, tombstone boolean, and full boolean. The tombstone boolean is flagged when the key is deleted and the full boolean is flagged when the spot in the vector is filled by an insert. + +## Lookup +The lookup algorithm returns the value associated with the key. If the key is not found, it returns -1. + +## Insert +The key value pair is inserted into the backing table. +- If another entry with the same key is already in the table, it is replaced with the new value. +- If a tombstone with the same key is in the table, the tombstone entry is revived with the new value. +- If the table is full, no entry is inserted. + +## Erase +The entry associated with the key is flagged as a tombstone and the value at the location is returned. If the key does not exist in the table, the function returns -1. + +# Tests +I included 15 tests which cover almost all of the possibilities that the table could encounter. + +General: +- InitTable + +For insert: +- InsertOne +- InsertFive +- InsertFull +- InsertOverflow +- InsertReplace +- InsertFiveCollision +- InsertThreeOneCollision + +For erase: +- EraseOne +- EraseAll +- EraseThreeOneCollision +- EraseKeyNotFound + +For lookup: +- LookupOne +- LookupEraseThreeOneCollision +- LookupKeyNotFound \ No newline at end of file diff --git a/src/include/utils/linear_hashing.h b/src/include/utils/linear_hashing.h new file mode 100644 index 0000000..029581d --- /dev/null +++ b/src/include/utils/linear_hashing.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include + +namespace buzzdb{ +namespace utils{ + +class LinearHashTable { + public: + struct Entry { + int key; + int val; + bool tombstone; + bool full; + + Entry(int key, int val): key(key), val(val), tombstone(false), full(true) {}; + Entry(int key, int val, bool ts, bool full): key(key), val(val), tombstone(ts), full(full) {}; + ~Entry() {}; + void set_tombstone(bool ts) { this->tombstone = ts; }; + void set_full(bool full) { this->full = full; }; + bool operator==(const Entry& e) const { + return key == e.key && val == e.val && tombstone == e.tombstone && full == e.full; + }; + friend std::ostream& operator<<(std::ostream& os, const Entry& e) { + os << "Entry(" << e.key << "," << e.val << "," << e.tombstone << "," << e.full << ")"; + return os; + }; + }; + + LinearHashTable(size_t capacity) { + this->capacity = capacity; + this->sz = 0; + + for (size_t i = 0; i < capacity; i++) { + table.push_back(Entry(0, 0, false, false)); + } + }; + ~LinearHashTable() {}; + void insert(int, int); + int erase(int); + int lookup(int); + size_t size(); + std::vector get_backing_vector() { return table; }; + + private: + size_t capacity; + size_t sz; + std::vector table; + + size_t hash(int); + Entry *lookup_entry(int); +}; + +} // namespace utils +} // namespace buzzdb \ No newline at end of file diff --git a/src/utils/linear_hashing.cc b/src/utils/linear_hashing.cc new file mode 100644 index 0000000..dc425c9 --- /dev/null +++ b/src/utils/linear_hashing.cc @@ -0,0 +1,71 @@ + +#include +#include +#include +#include + +namespace buzzdb { +namespace utils { + +#define UNUSED(p) ((void)(p)) + +size_t LinearHashTable::hash(int key) { + return key % capacity; +} + +size_t LinearHashTable::size() { + return sz; +} + +LinearHashTable::Entry *LinearHashTable::lookup_entry(int key) { + size_t index = hash(key); + Entry *e = &table[index]; + + for (size_t i = 0; i < capacity; i++) { + e = &table[(index + i) % capacity]; + + if (!e->full) return nullptr; + if (e->key == key) { + if (e->tombstone) return nullptr; + else break; + } + } + if (e->key != key) return nullptr; // iterated through vector + + return e; +} + +void LinearHashTable::insert(int key, int val) { + size_t index = hash(key); + Entry *e = &table[index];; + + for (size_t i = 0; i < capacity; i++) { + e = &table[(index + i) % capacity]; + + if (!e->full || e->key == key) { + break; + } + } + if (e->full && e->key != key) return; // vector full + + if (!e->full || e->tombstone) sz++; + *e = Entry(key, val); +} + +int LinearHashTable::erase(int key) { + Entry *e = lookup_entry(key); + + if (e == nullptr) return -1; + e->tombstone = true; + sz--; + + return e->val; +} + +int LinearHashTable::lookup(int key) { + Entry *e = lookup_entry(key); + return e == nullptr ? -1 : e->val; +} + +} // namespace utils +} // namespace buzzdb diff --git a/test/unit/utils/linear_hashing_test.cc b/test/unit/utils/linear_hashing_test.cc new file mode 100644 index 0000000..7a98908 --- /dev/null +++ b/test/unit/utils/linear_hashing_test.cc @@ -0,0 +1,291 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/linear_hashing.h" + +// using buzzdb::utils::LinearHashTable; + +namespace { + +TEST(LinearHashTableTests, InitTable) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + std::vector v = {ee, ee, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertOne) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + std::vector v = {ee, onefive, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(1, 5); + + EXPECT_EQ(1, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertFive) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + buzzdb::utils::LinearHashTable::Entry fivetwo = buzzdb::utils::LinearHashTable::Entry(5, 2, 0, 1); + std::vector v = {ee, onefive, twothree, threesix, fournine, fivetwo, ee, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(10); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + table.insert(5, 2); + + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertFull) { + buzzdb::utils::LinearHashTable::Entry zerotwo = buzzdb::utils::LinearHashTable::Entry(0, 2, 0, 1); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + std::vector v = {zerotwo, onefive, twothree, threesix, fournine}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertOverflow) { + buzzdb::utils::LinearHashTable::Entry zerotwo = buzzdb::utils::LinearHashTable::Entry(0, 2, 0, 1); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + std::vector v = {zerotwo, onefive, twothree, threesix, fournine}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + table.insert(5, 2); + + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertReplace) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + std::vector v = {ee, onefive, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(1, 8); + table.insert(1, 5); + + EXPECT_EQ(1, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertFiveCollision) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry eleventhree = buzzdb::utils::LinearHashTable::Entry(11, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry twentyone_six = buzzdb::utils::LinearHashTable::Entry(21, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry thirtyone_nine = buzzdb::utils::LinearHashTable::Entry(31, 9, 0, 1); + buzzdb::utils::LinearHashTable::Entry fortyone_two = buzzdb::utils::LinearHashTable::Entry(41, 2, 0, 1); + std::vector v = {ee, onefive, eleventhree, twentyone_six, thirtyone_nine, fortyone_two, ee, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(10); + table.insert(1, 5); + table.insert(11, 3); + table.insert(21, 6); + table.insert(31, 9); + table.insert(41, 2); + + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, InsertThreeOneCollision) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry elevensix = buzzdb::utils::LinearHashTable::Entry(11, 6, 0, 1); + std::vector v = {ee, onefive, twothree, elevensix, ee, ee, ee, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(10); + table.insert(1, 5); + table.insert(2, 3); + table.insert(11, 6); + + EXPECT_EQ(3, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, EraseOne) { + buzzdb::utils::LinearHashTable::Entry zerotwo = buzzdb::utils::LinearHashTable::Entry(0, 2, 0, 1); + buzzdb::utils::LinearHashTable::Entry onefivets = buzzdb::utils::LinearHashTable::Entry(1, 5, 1, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + std::vector v = {zerotwo, onefivets, twothree, threesix, fournine}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + table.erase(1); + + EXPECT_EQ(4, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, EraseAll) { + buzzdb::utils::LinearHashTable::Entry zerotwots = buzzdb::utils::LinearHashTable::Entry(0, 2, 1, 1); + buzzdb::utils::LinearHashTable::Entry onefivets = buzzdb::utils::LinearHashTable::Entry(1, 5, 1, 1); + buzzdb::utils::LinearHashTable::Entry twothreets = buzzdb::utils::LinearHashTable::Entry(2, 3, 1, 1); + buzzdb::utils::LinearHashTable::Entry threesixts = buzzdb::utils::LinearHashTable::Entry(3, 6, 1, 1); + buzzdb::utils::LinearHashTable::Entry fourninets = buzzdb::utils::LinearHashTable::Entry(4, 9, 1, 1); + std::vector v = {zerotwots, onefivets, twothreets, threesixts, fourninets}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + table.erase(0); + table.erase(1); + table.erase(2); + table.erase(3); + table.erase(4); + + EXPECT_EQ(0, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, EraseThreeOneCollision) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothreets = buzzdb::utils::LinearHashTable::Entry(2, 3, 1, 1); + buzzdb::utils::LinearHashTable::Entry elevensix = buzzdb::utils::LinearHashTable::Entry(11, 6, 0, 1); + std::vector v = {ee, onefive, twothreets, elevensix, ee, ee, ee, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(10); + table.insert(1, 5); + table.insert(2, 3); + table.insert(11, 6); + table.erase(2); + + EXPECT_EQ(2, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, EraseKeyNotFound) { + buzzdb::utils::LinearHashTable::Entry zerotwo = buzzdb::utils::LinearHashTable::Entry(0, 2, 0, 1); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + std::vector v = {zerotwo, onefive, twothree, threesix, fournine}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + + EXPECT_EQ(-1, table.erase(5)); + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); +} + +TEST(LinearHashTableTests, LookupOne) { + buzzdb::utils::LinearHashTable::Entry zerotwo = buzzdb::utils::LinearHashTable::Entry(0, 2, 0, 1); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + std::vector v = {zerotwo, onefive, twothree, threesix, fournine}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); + EXPECT_EQ(5, table.lookup(1)); +} + +TEST(LinearHashTableTests, LookupEraseThreeOneCollision) { + buzzdb::utils::LinearHashTable::Entry ee = buzzdb::utils::LinearHashTable::Entry(0, 0, 0, 0); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothreets = buzzdb::utils::LinearHashTable::Entry(2, 3, 1, 1); + buzzdb::utils::LinearHashTable::Entry elevensix = buzzdb::utils::LinearHashTable::Entry(11, 6, 0, 1); + std::vector v = {ee, onefive, twothreets, elevensix, ee, ee, ee, ee, ee, ee}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(10); + table.insert(1, 5); + table.insert(2, 3); + table.insert(11, 6); + table.erase(2); + + EXPECT_EQ(2, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); + EXPECT_EQ(6, table.lookup(11)); +} + +TEST(LinearHashTableTests, LookupKeyNotFound) { + buzzdb::utils::LinearHashTable::Entry zerotwo = buzzdb::utils::LinearHashTable::Entry(0, 2, 0, 1); + buzzdb::utils::LinearHashTable::Entry onefive = buzzdb::utils::LinearHashTable::Entry(1, 5, 0, 1); + buzzdb::utils::LinearHashTable::Entry twothree = buzzdb::utils::LinearHashTable::Entry(2, 3, 0, 1); + buzzdb::utils::LinearHashTable::Entry threesix = buzzdb::utils::LinearHashTable::Entry(3, 6, 0, 1); + buzzdb::utils::LinearHashTable::Entry fournine = buzzdb::utils::LinearHashTable::Entry(4, 9, 0, 1); + std::vector v = {zerotwo, onefive, twothree, threesix, fournine}; + + buzzdb::utils::LinearHashTable table = buzzdb::utils::LinearHashTable(5); + table.insert(0, 2); + table.insert(1, 5); + table.insert(2, 3); + table.insert(3, 6); + table.insert(4, 9); + + EXPECT_EQ(5, table.size()); + EXPECT_EQ(v, table.get_backing_vector()); + EXPECT_EQ(-1, table.lookup(5)); +} + + +} // namespace + +int main(int argc, char *argv[]) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}