diff --git a/include/fruit/impl/data_structures/semistatic_graph.defn.h b/include/fruit/impl/data_structures/semistatic_graph.defn.h index 95873e8a..aebaf76c 100644 --- a/include/fruit/impl/data_structures/semistatic_graph.defn.h +++ b/include/fruit/impl/data_structures/semistatic_graph.defn.h @@ -143,7 +143,7 @@ inline typename SemistaticGraph::const_node_iterator SemistaticGra template inline typename SemistaticGraph::node_iterator SemistaticGraph::find(NodeId nodeId) { - InternalNodeId* internalNodeIdPtr = nodeIndexMap.find(nodeId); + const InternalNodeId* internalNodeIdPtr = nodeIndexMap.find(nodeId); if (internalNodeIdPtr == nullptr) { return node_iterator{nodes.end()}; } else { diff --git a/include/fruit/impl/data_structures/semistatic_graph.h b/include/fruit/impl/data_structures/semistatic_graph.h index b3079320..280bb295 100644 --- a/include/fruit/impl/data_structures/semistatic_graph.h +++ b/include/fruit/impl/data_structures/semistatic_graph.h @@ -29,6 +29,14 @@ namespace impl { // The alignas ensures that a SemistaticGraphInternalNodeId* always has 0 in the low-order bit. struct alignas(2) alignas(alignof(std::size_t)) SemistaticGraphInternalNodeId { std::size_t id; + + bool operator==(const SemistaticGraphInternalNodeId& x) const { + return id == x.id; + } + + bool operator<(const SemistaticGraphInternalNodeId& x) const { + return id < x.id; + } }; /** @@ -164,7 +172,7 @@ class SemistaticGraph { SemistaticGraph(NodeIter first, NodeIter last); SemistaticGraph(SemistaticGraph&&) = default; - SemistaticGraph(const SemistaticGraph&) = default; + SemistaticGraph(const SemistaticGraph&) = delete; // Creates a copy of x with the additional nodes in [first, last). The requirements on NodeIter as the same as for the 2-arg // constructor. @@ -174,7 +182,7 @@ class SemistaticGraph { template SemistaticGraph(const SemistaticGraph& x, NodeIter first, NodeIter last); - SemistaticGraph& operator=(const SemistaticGraph&) = default; + SemistaticGraph& operator=(const SemistaticGraph&) = delete; SemistaticGraph& operator=(SemistaticGraph&&) = default; node_iterator end(); diff --git a/include/fruit/impl/data_structures/semistatic_graph.templates.h b/include/fruit/impl/data_structures/semistatic_graph.templates.h index e4b00779..8fae6f12 100644 --- a/include/fruit/impl/data_structures/semistatic_graph.templates.h +++ b/include/fruit/impl/data_structures/semistatic_graph.templates.h @@ -151,34 +151,43 @@ template template SemistaticGraph::SemistaticGraph(const SemistaticGraph& x, NodeIter first, NodeIter last) // TODO: Do a shallow copy of the index map too. - : nodeIndexMap(x.nodeIndexMap), firstUnusedIndex(x.firstUnusedIndex), nodes(x.nodes) { + : firstUnusedIndex(x.firstUnusedIndex), nodes(x.nodes) { // TODO: The code below is very similar to the other constructor, extract the common parts in separate functions. std::size_t num_new_edges = 0; // Step 1: assign IDs to new nodes, fill `nodeIndexMap' and update `firstUnusedIndex'. - std::unordered_set nodeIds; + + // Step 1a: collect all new node IDs. + std::vector> nodeIds; for (NodeIter i = first; i != last; ++i) { - ++firstUnusedIndex; - nodeIndexMap.insert(i->getId(), InternalNodeId{firstUnusedIndex - 1}, [this](InternalNodeId x, InternalNodeId) { - // There was already an index for this TypeId, we don't need to allocate an index after all. - --firstUnusedIndex; - return x; - }); + if (x.nodeIndexMap.find(i->getId()) == nullptr) { + nodeIds.push_back(std::make_pair(i->getId(), InternalNodeId())); + } if (!i->isTerminal()) { for (auto j = i->getEdgesBegin(); j != i->getEdgesEnd(); ++j) { - ++firstUnusedIndex; - nodeIndexMap.insert(*j, InternalNodeId{firstUnusedIndex - 1}, [this](InternalNodeId x, InternalNodeId) { - // There was already an index for this TypeId, we don't need to allocate an index after all. - --firstUnusedIndex; - return x; - }); + if (x.nodeIndexMap.find(*j) == nullptr) { + nodeIds.push_back(std::make_pair(*j, InternalNodeId())); + } ++num_new_edges; } } } + // Step 1b: remove duplicates. + std::sort(nodeIds.begin(), nodeIds.end()); + nodeIds.erase(std::unique(nodeIds.begin(), nodeIds.end()), nodeIds.end()); + + // Step 1c: assign new IDs. + for (auto& p : nodeIds) { + p.second = InternalNodeId{firstUnusedIndex}; + ++firstUnusedIndex; + } + + // Step 1d: actually populate nodeIndexMap. + nodeIndexMap = SemistaticMap(x.nodeIndexMap, std::move(nodeIds)); + // Step 2: fill `nodes' and `edgesStorage' // Note that not all of these will be assigned in the loop below. @@ -188,17 +197,6 @@ SemistaticGraph::SemistaticGraph(const SemistaticGraph& x, NodeIte #endif Node(), 1}); -#ifdef FRUIT_EXTRA_DEBUG - { - std::cerr << "SemistaticGraph constructed with the following known types:" << std::endl; - std::size_t i = 0; - for (typename std::unordered_set::iterator itr = nodeIds.begin(); itr != nodeIds.end(); ++i, ++itr) { - nodes[i].key = *itr; - std::cerr << i << ": " << *itr << std::endl; - } - } -#endif - // edgesStorage[0] is unused, that's the reason for the +1 edgesStorage.reserve(num_new_edges + 1); edgesStorage.resize(1); diff --git a/include/fruit/impl/data_structures/semistatic_map.h b/include/fruit/impl/data_structures/semistatic_map.h index 05c1e124..b21bba03 100644 --- a/include/fruit/impl/data_structures/semistatic_map.h +++ b/include/fruit/impl/data_structures/semistatic_map.h @@ -38,6 +38,7 @@ class SemistaticMap { private: using Unsigned = std::uintptr_t; using NumBits = unsigned char; + using value_type = std::pair; static const unsigned char beta = 4; @@ -62,42 +63,50 @@ class SemistaticMap { } HashFunction hash_function; - // Given a key x, the candidate places for x are keys[lookup_table[hash_function.hash(x)]] and the following cells that hash to the same value. - std::vector lookup_table; - std::vector> values; + // Given a key x, if p=lookup_table[hash_function.hash(x)] the candidate places for x are [p.first, p.second). These pointers + // point to the values[] vector, but it might be either the one of this object or the one of an object that was shallow-copied + // into this one. + std::vector> lookup_table; + std::vector values; inline Unsigned hash(const Key& key) const { return hash_function.hash(std::hash::type>()(key)); } + // Inserts a range [elemsBegin, elemsEnd) of new (key,value) pairs with hash h. The keys must not exist in the map. + // Before calling this, ensure that the capacity of `values' is sufficient to contain the new values without re-allocating. + void insert(std::size_t h, + typename std::vector::const_iterator elemsBegin, + typename std::vector::const_iterator elemsEnd); + public: // Constructs an *invalid* map (as if this map was just moved from). SemistaticMap() = default; // Iter must be a forward iterator with value type std::pair. - // This constructor is *not* defined in semistatic_map.templates.h, but only in semistatic_map.cc. - // All instantiations must provide an extern template declaration and have a matching instantiation in semistatic_map.cc. template SemistaticMap(Iter begin, std::size_t num_values); - SemistaticMap(const SemistaticMap&) = default; + // Creates a shallow copy of `map' with the additional elements in newElements. + // The keys in newElements must be unique and must not be present in `map'. + // The new map will share data with `map', so must be destroyed before `map' is destroyed. + // NOTE: If more than O(1) elements are added, calls to at() and find() on the result will *not* be O(1). + // This is O(newElements.size()*log(newElements.size())). + SemistaticMap(const SemistaticMap& map, std::vector&& newElements); + SemistaticMap(SemistaticMap&&) = default; + SemistaticMap(const SemistaticMap&) = delete; - SemistaticMap& operator=(const SemistaticMap&) = default; SemistaticMap& operator=(SemistaticMap&&) = default; + SemistaticMap& operator=(const SemistaticMap&) = delete; // Precondition: `key' must exist in the map. // Unlike std::map::at(), this yields undefined behavior if the precondition isn't satisfied (instead of throwing). - Value& at(Key key); + const Value& at(Key key) const; // Prefer using at() when possible, this is slightly slower. // Returns nullptr if the key was not found. const Value* find(Key key) const; - Value* find(Key key); - - // Inserts (key, value). If `key' already exists, inserts (key, combine(oldValue, (*this)[key])) instead. - template - void insert(Key key, Value value, Combine combine); }; } // namespace impl diff --git a/include/fruit/impl/data_structures/semistatic_map.templates.h b/include/fruit/impl/data_structures/semistatic_map.templates.h index 22ef8978..f75663ac 100644 --- a/include/fruit/impl/data_structures/semistatic_map.templates.h +++ b/include/fruit/impl/data_structures/semistatic_map.templates.h @@ -33,43 +33,67 @@ namespace fruit { namespace impl { template -template -void SemistaticMap::insert(Key key, Value value, Combine combine) { - Unsigned h = hash(key); - Unsigned old_keys_size = values.size(); - Unsigned first_candidate_index = lookup_table[h]; - Unsigned last_candidate_index = old_keys_size; - - { - Unsigned i = first_candidate_index; - for (; i != last_candidate_index; ++i) { - if (values[i].first == key) { - values[i].second = combine(values[i].second, value); - return; - } - Unsigned h1 = hash(values[i].first); - if (h1 != h) { - break; - } - } - last_candidate_index = i; - // Now [first_candidate_index, last_candidate_index) contains only keys that hash to h. - } +void SemistaticMap::insert(std::size_t h, + typename std::vector::const_iterator elemsBegin, + typename std::vector::const_iterator elemsEnd) { + + value_type* oldBucketBegin = lookup_table[h].first; + value_type* oldBucketEnd = lookup_table[h].second; - // `key' is not in `keys'. + lookup_table[h].first = values.data() + values.size(); // Step 1: re-insert all keys with the same hash at the end (if any). - for (Unsigned i = first_candidate_index; i != last_candidate_index; ++i) { - // The copies make sure that the references passed to push_back dont't get invalidated by resizing. - values.emplace_back(Key(values[i].first), Value(values[i].second)); + for (value_type* p = oldBucketBegin; p != oldBucketEnd; ++p) { + values.push_back(*p); + } + + // Step 2: also insert the new keys and values + for (typename std::vector::const_iterator itr = elemsBegin; itr != elemsEnd; ++itr) { + values.push_back(*itr); } - // Step 2: also insert the new key and value - values.emplace_back(key, value); + lookup_table[h].second = values.data() + values.size(); - // Step 3: update the index in the lookup table to point to the newly-added sequence. // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be too slow. - lookup_table[h] = old_keys_size; +} + +template +SemistaticMap::SemistaticMap(const SemistaticMap& map, std::vector&& newElements) + : hash_function(map.hash_function), lookup_table(map.lookup_table) { + + // Sort by hash. + std::sort(newElements.begin(), newElements.end(), [this](const value_type& x, const value_type& y) { + return hash(x.first) < hash(y.first); + }); + + std::size_t additionalValues = newElements.size(); + // Add the space needed to store copies of the old buckets. + for (typename std::vector::iterator itr = newElements.begin(), itr_end = newElements.end(); + itr != itr_end; + /* no increment */) { + Unsigned h = hash(itr->first); + auto p = map.lookup_table[h]; + additionalValues += (p.second - p.first); + for (; itr != itr_end && hash(itr->first) == h; ++itr) { + } + } + + values.reserve(additionalValues); + + // Now actually perform the insertions. + + for (typename std::vector::iterator itr = newElements.begin(), itr_end = newElements.end(); + itr != itr_end; + /* no increment */) { + Unsigned h = hash(itr->first); + auto p = map.lookup_table[h]; + additionalValues += (p.second - p.first); + typename std::vector::iterator first = itr; + for (; itr != itr_end && hash(itr->first) == h; ++itr) { + } + typename std::vector::iterator last = itr; + insert(h, first, last); + } } template @@ -104,54 +128,44 @@ SemistaticMap::SemistaticMap(Iter valuesBegin, std::size_t num_value pick_another:; } - std::partial_sum(count.begin(), count.end(), count.begin()); - lookup_table = std::move(count); values.resize(num_values); + std::partial_sum(count.begin(), count.end(), count.begin()); + lookup_table.reserve(count.size()); + for (Unsigned n : count) { + lookup_table.push_back(make_pair(values.data() + n, values.data() + n)); + } + // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h. // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant. Iter itr = valuesBegin; for (std::size_t i = 0; i < num_values; ++i, ++itr) { - Unsigned& cell = lookup_table[hash((*itr).first)]; - --cell; - assert(cell < num_values); - values[cell] = *itr; + value_type*& firstValuePtr = lookup_table[hash((*itr).first)].first; + --firstValuePtr; + assert(values.data() <= firstValuePtr); + assert(firstValuePtr < values.data() + values.size()); + *firstValuePtr = *itr; } } template -Value& SemistaticMap::at(Key key) { +const Value& SemistaticMap::at(Key key) const { Unsigned h = hash(key); - Unsigned i = lookup_table[h]; - while (true) { - assert(i < values.size()); - if (values[i].first == key) { - return values[i].second; + for (const value_type* p = lookup_table[h].first; /* p!=lookup_table[h].second but no need to check */; ++p) { + assert(p != lookup_table[h].second); + if (p->first == key) { + return p->second; } - assert(hash(values[i].first) == h); - ++i; } } -template -Value* SemistaticMap::find(Key key) { - const SemistaticMap* cthis = this; - return const_cast(cthis->find(key)); -} - template const Value* SemistaticMap::find(Key key) const { Unsigned h = hash(key); - Unsigned first_candidate_index = lookup_table[h]; - Unsigned last_candidate_index = values.size(); - for (Unsigned i = first_candidate_index; i != last_candidate_index; ++i) { - if (values[i].first == key) { - return &(values[i].second); - } - Unsigned h1 = hash(values[i].first); - if (h1 != h) { - break; + for (const value_type *p = lookup_table[h].first, *p_end = lookup_table[h].second; p != p_end; ++p) { + if (p->first == key) { + return &(p->second); } } return nullptr; diff --git a/tests/data_structures/semistatic_graph.cpp b/tests/data_structures/semistatic_graph.cpp index c876fd0d..5dce26f1 100644 --- a/tests/data_structures/semistatic_graph.cpp +++ b/tests/data_structures/semistatic_graph.cpp @@ -173,22 +173,6 @@ void test_move_constructor() { assert(graph.find(5) == graph.end()); } -void test_copy_constructor() { - vector values = {{2, "foo", {}, false}, {3, "bar", {2}, false}}; - Graph graph1(values.begin(), values.end()); - Graph graph = graph1; - assert(graph.find(0) == graph.end()); - assert(!(graph.find(2) == graph.end())); - assert(graph.at(2).getNode() == "foo"); - assert(graph.at(2).isTerminal() == false); - assert(graph.at(3).getNode() == "bar"); - assert(graph.at(3).isTerminal() == false); - edge_iterator itr = graph.at(3).neighborsBegin(); - assert(itr.getNodeIterator(graph).getNode() == "foo"); - assert(itr.getNodeIterator(graph).isTerminal() == false); - assert(graph.find(5) == graph.end()); -} - void test_move_assignment() { vector values = {{2, "foo", {}, false}, {3, "bar", {2}, false}}; Graph graph1(values.begin(), values.end()); @@ -206,23 +190,6 @@ void test_move_assignment() { assert(graph.find(5) == graph.end()); } -void test_copy_assignment() { - vector values = {{2, "foo", {}, false}, {3, "bar", {2}, false}}; - Graph graph1(values.begin(), values.end()); - Graph graph; - graph = graph1; - assert(graph.find(0) == graph.end()); - assert(!(graph.find(2) == graph.end())); - assert(graph.at(2).getNode() == "foo"); - assert(graph.at(2).isTerminal() == false); - assert(graph.at(3).getNode() == "bar"); - assert(graph.at(3).isTerminal() == false); - edge_iterator itr = graph.at(3).neighborsBegin(); - assert(itr.getNodeIterator(graph).getNode() == "foo"); - assert(itr.getNodeIterator(graph).isTerminal() == false); - assert(graph.find(5) == graph.end()); -} - int main() { test_empty(); @@ -234,9 +201,7 @@ int main() { test_add_node(); test_set_terminal(); test_move_constructor(); - test_copy_constructor(); test_move_assignment(); - test_copy_assignment(); return 0; } diff --git a/tests/data_structures/semistatic_map.cpp b/tests/data_structures/semistatic_map.cpp index 56ad9964..a9172dd8 100644 --- a/tests/data_structures/semistatic_map.cpp +++ b/tests/data_structures/semistatic_map.cpp @@ -44,24 +44,15 @@ void test_1_elem() { void test_1_inserted_elem() { vector> values = {}; - SemistaticMap map(values.begin(), values.size()); - map.insert(2, "bar", [](std::string x, std::string){ return x; }); + SemistaticMap old_map(values.begin(), values.size()); + vector> new_values = {{2, "bar"}}; + SemistaticMap map(old_map, std::move(new_values)); assert(map.find(0) == nullptr); assert(map.find(2) != nullptr); assert(map.at(2) == "bar"); assert(map.find(5) == nullptr); } -void test_1_elem_combine() { - vector> values = {{2, "foo"}}; - SemistaticMap map(values.begin(), values.size()); - map.insert(2, "bar", [](std::string x, std::string y){ return x + y; }); - assert(map.find(0) == nullptr); - assert(map.find(2) != nullptr); - assert(map.at(2) == "foobar"); - assert(map.find(5) == nullptr); -} - void test_3_elem() { vector> values = {{1, "foo"}, {3, "bar"}, {4, "baz"}}; SemistaticMap map(values.begin(), values.size()); @@ -78,10 +69,9 @@ void test_3_elem() { void test_1_elem_2_inserted() { vector> values = {{1, "foo"}}; - SemistaticMap map(values.begin(), values.size()); - auto combine = [](std::string x, std::string y){ return x + y; }; - map.insert(3, "bar", combine); - map.insert(4, "baz", combine); + SemistaticMap old_map(values.begin(), values.size()); + vector> new_values = {{3, "bar"}, {4, "baz"}}; + SemistaticMap map(old_map, std::move(new_values)); assert(map.find(0) == nullptr); assert(map.find(1) != nullptr); assert(map.at(1) == "foo"); @@ -95,11 +85,9 @@ void test_1_elem_2_inserted() { void test_3_elem_3_inserted() { vector> values = {{1, "1"}, {3, "3"}, {5, "5"}}; - SemistaticMap map(values.begin(), values.size()); - auto combine = [](std::string x, std::string y){ return x + y; }; - map.insert(2, "2", combine); - map.insert(4, "4", combine); - map.insert(16, "16", combine); + SemistaticMap old_map(values.begin(), values.size()); + vector> new_values = {{2, "2"}, {4, "4"}, {16, "16"}}; + SemistaticMap map(old_map, std::move(new_values)); assert(map.find(0) == nullptr); assert(map.find(1) != nullptr); assert(map.at(1) == "1"); @@ -131,21 +119,6 @@ void test_move_constructor() { assert(map.find(5) == nullptr); } -void test_copy_constructor() { - vector> values = {{1, "foo"}, {3, "bar"}, {4, "baz"}}; - SemistaticMap map1(values.begin(), values.size()); - SemistaticMap map = map1; - assert(map.find(0) == nullptr); - assert(map.find(1) != nullptr); - assert(map.at(1) == "foo"); - assert(map.find(2) == nullptr); - assert(map.find(3) != nullptr); - assert(map.at(3) == "bar"); - assert(map.find(4) != nullptr); - assert(map.at(4) == "baz"); - assert(map.find(5) == nullptr); -} - void test_move_assignment() { vector> values = {{1, "foo"}, {3, "bar"}, {4, "baz"}}; SemistaticMap map1(values.begin(), values.size()); @@ -162,35 +135,16 @@ void test_move_assignment() { assert(map.find(5) == nullptr); } -void test_copy_assignment() { - vector> values = {{1, "foo"}, {3, "bar"}, {4, "baz"}}; - SemistaticMap map1(values.begin(), values.size()); - SemistaticMap map; - map = map1; - assert(map.find(0) == nullptr); - assert(map.find(1) != nullptr); - assert(map.at(1) == "foo"); - assert(map.find(2) == nullptr); - assert(map.find(3) != nullptr); - assert(map.at(3) == "bar"); - assert(map.find(4) != nullptr); - assert(map.at(4) == "baz"); - assert(map.find(5) == nullptr); -} - int main() { test_empty(); test_1_elem(); test_1_inserted_elem(); - test_1_elem_combine(); test_3_elem(); test_1_elem_2_inserted(); test_3_elem_3_inserted(); test_move_constructor(); - test_copy_constructor(); test_move_assignment(); - test_copy_assignment(); return 0; }