-
Notifications
You must be signed in to change notification settings - Fork 674
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add cpp examples * Add multithreaded cpp examples
- Loading branch information
Showing
18 changed files
with
743 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# C++ examples | ||
|
||
Creating index, inserting elements, searching and serialization | ||
```cpp | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Query the elements for themselves and measure recall | ||
float correct = 0; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1); | ||
hnswlib::labeltype label = result.top().second; | ||
if (label == i) correct++; | ||
} | ||
float recall = correct / max_elements; | ||
std::cout << "Recall: " << recall << "\n"; | ||
|
||
// Serialize index | ||
std::string hnsw_path = "hnsw.bin"; | ||
alg_hnsw->saveIndex(hnsw_path); | ||
delete alg_hnsw; | ||
|
||
// Deserialize index and check recall | ||
alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, hnsw_path); | ||
correct = 0; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::priority_queue<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnn(data + i * dim, 1); | ||
hnswlib::labeltype label = result.top().second; | ||
if (label == i) correct++; | ||
} | ||
recall = (float)correct / max_elements; | ||
std::cout << "Recall of deserialized index: " << recall << "\n"; | ||
|
||
delete[] data; | ||
delete alg_hnsw; | ||
return 0; | ||
} | ||
``` | ||
|
||
An example of filtering with a boolean function during the search: | ||
```cpp | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
// Filter that allows labels divisible by divisor | ||
class PickDivisibleIds: public hnswlib::BaseFilterFunctor { | ||
unsigned int divisor = 1; | ||
public: | ||
PickDivisibleIds(unsigned int divisor): divisor(divisor) { | ||
assert(divisor != 0); | ||
} | ||
bool operator()(hnswlib::labeltype label_id) { | ||
return label_id % divisor == 0; | ||
} | ||
}; | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Create filter that allows only even labels | ||
PickDivisibleIds pickIdsDivisibleByTwo(2); | ||
|
||
// Query the elements for themselves with filter and check returned labels | ||
int k = 10; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo); | ||
for (auto item: result) { | ||
if (item.second % 2 == 1) std::cout << "Error: found odd label\n"; | ||
} | ||
} | ||
|
||
delete[] data; | ||
delete alg_hnsw; | ||
return 0; | ||
} | ||
``` | ||
|
||
An example with reusing the memory of the deleted elements when new elements are being added (via `allow_replace_deleted` flag): | ||
```cpp | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction, 100, true); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Mark first half of elements as deleted | ||
int num_deleted = max_elements / 2; | ||
for (int i = 0; i < num_deleted; i++) { | ||
alg_hnsw->markDelete(i); | ||
} | ||
|
||
float* add_data = new float[dim * num_deleted]; | ||
for (int i = 0; i < dim * num_deleted; i++) { | ||
add_data[i] = distrib_real(rng); | ||
} | ||
|
||
// Replace deleted data with new elements | ||
// Maximum number of elements is reached therefore we cannot add new items, | ||
// but we can replace the deleted ones by using replace_deleted=true | ||
for (int i = 0; i < num_deleted; i++) { | ||
int label = max_elements + i; | ||
alg_hnsw->addPoint(add_data + i * dim, label, true); | ||
} | ||
|
||
delete[] data; | ||
delete[] add_data; | ||
delete alg_hnsw; | ||
return 0; | ||
} | ||
``` | ||
|
||
Multithreaded examples: | ||
* Creating index, inserting elements, searching [example_mt_search.cpp](example_mt_search.cpp) | ||
* Filtering during the search with a boolean function [example_mt_filter.cpp](example_mt_filter.cpp) | ||
* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#include "../../hnswlib/hnswlib.h" | ||
|
||
|
||
// Filter that allows labels divisible by divisor | ||
class PickDivisibleIds: public hnswlib::BaseFilterFunctor { | ||
unsigned int divisor = 1; | ||
public: | ||
PickDivisibleIds(unsigned int divisor): divisor(divisor) { | ||
assert(divisor != 0); | ||
} | ||
bool operator()(hnswlib::labeltype label_id) { | ||
return label_id % divisor == 0; | ||
} | ||
}; | ||
|
||
|
||
int main() { | ||
int dim = 16; // Dimension of the elements | ||
int max_elements = 10000; // Maximum number of elements, should be known beforehand | ||
int M = 16; // Tightly connected with internal dimensionality of the data | ||
// strongly affects the memory consumption | ||
int ef_construction = 200; // Controls index search speed/build speed tradeoff | ||
|
||
// Initing index | ||
hnswlib::L2Space space(dim); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, max_elements, M, ef_construction); | ||
|
||
// Generate random data | ||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
float* data = new float[dim * max_elements]; | ||
for (int i = 0; i < dim * max_elements; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
|
||
// Add data to index | ||
for (int i = 0; i < max_elements; i++) { | ||
alg_hnsw->addPoint(data + i * dim, i); | ||
} | ||
|
||
// Create filter that allows only even labels | ||
PickDivisibleIds pickIdsDivisibleByTwo(2); | ||
|
||
// Query the elements for themselves with filter and check returned labels | ||
int k = 10; | ||
for (int i = 0; i < max_elements; i++) { | ||
std::vector<std::pair<float, hnswlib::labeltype>> result = alg_hnsw->searchKnnCloserFirst(data + i * dim, k, &pickIdsDivisibleByTwo); | ||
for (auto item: result) { | ||
if (item.second % 2 == 1) std::cout << "Error: found odd label\n"; | ||
} | ||
} | ||
|
||
delete[] data; | ||
delete alg_hnsw; | ||
return 0; | ||
} |
Oops, something went wrong.