Skip to content

Commit

Permalink
Improve: Test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Jan 5, 2024
1 parent 567f782 commit 3cf591f
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 36 deletions.
4 changes: 3 additions & 1 deletion include/stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -2068,6 +2068,7 @@ SZ_PUBLIC sz_bool_t sz_string_grow(sz_string_t *string, sz_size_t new_space, sz_
string->on_heap.start = new_start;
string->on_heap.space = new_space;
string->on_heap.padding = 0;
string->on_heap.length = string_length;

// Deallocate the old string.
if (string_is_on_heap) allocator->free(string_start, string_space, allocator->handle);
Expand Down Expand Up @@ -2146,8 +2147,9 @@ SZ_PUBLIC void sz_string_erase(sz_string_t *string, sz_size_t offset, sz_size_t
}

SZ_PUBLIC void sz_string_free(sz_string_t *string, sz_memory_allocator_t *allocator) {
if (sz_string_is_on_stack(string)) return;
if (!sz_string_is_on_stack(string))
allocator->free(string->on_heap.start, string->on_heap.space, allocator->handle);
sz_string_init(string);
}

SZ_PUBLIC void sz_fill_serial(sz_ptr_t target, sz_size_t length, sz_u8_t value) {
Expand Down
57 changes: 30 additions & 27 deletions scripts/bench_container.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,56 +14,59 @@

using namespace ashvardanian::stringzilla::scripts;

template <typename string_type_to, typename string_type_from>
std::vector<string_type_to> to(std::vector<string_type_from> const &strings) {
std::vector<string_type_to> result;
result.reserve(strings.size());
for (string_type_from const &string : strings) result.push_back({string.data(), string.size()});
return result;
}

/**
* @brief Evaluation for search string operations: find.
*/
template <typename container_at>
void bench(std::vector<typename container_at::key_type> const &strings) {
void bench(std::string name, std::vector<std::string_view> const &strings) {

using key_type = typename container_at::key_type;
std::vector<key_type> keys = to<key_type>(strings);

// Build up the container
container_at container;
for (key_type const &key : strings) container[key] = 0;
for (key_type const &key : keys) container[key] = 0;

tracked_function_gt<unary_function_t> variant;
variant.results = bench_on_tokens(strings, [&](key_type const &key) {
container[key]++;
return 1;
variant.name = name;
variant.results = bench_on_tokens(keys, [&](key_type const &key) {
container.find(key)->second++;
return key.size();
});

variant.print();
}

template <typename string_type_to, typename string_type_from>
std::vector<string_type_to> to(std::vector<string_type_from> const &strings) {
std::vector<string_type_to> result;
result.reserve(strings.size());
for (string_type_from const &string : strings) result.push_back({string.data(), string.size()});
return result;
}

template <typename strings_type>
void bench_tokens(strings_type const &strings) {
if (strings.size() == 0) return;

// Pure STL
bench<std::map<std::string, int>>(to<std::string>(strings));
bench<std::map<std::string_view, int>>(to<std::string_view>(strings));
bench<std::unordered_map<std::string, int>>(to<std::string>(strings));
bench<std::unordered_map<std::string_view, int>>(to<std::string_view>(strings));
auto const &s = strings;

// StringZilla structures
bench<std::map<sz::string, int>>(to<sz::string>(strings));
bench<std::map<sz::string_view, int>>(to<sz::string_view>(strings));
bench<std::unordered_map<sz::string, int>>(to<sz::string>(strings));
bench<std::unordered_map<sz::string_view, int>>(to<sz::string_view>(strings));
bench<std::map<sz::string, int>>("map<sz::string>", s);
bench<std::map<sz::string_view, int>>("map<sz::string_view>", s);
bench<std::unordered_map<sz::string, int>>("unordered_map<sz::string>", s);
bench<std::unordered_map<sz::string_view, int>>("unordered_map<sz::string_view>", s);

// Pure STL
bench<std::map<std::string, int>>("map<std::string>", s);
bench<std::map<std::string_view, int>>("map<std::string_view>", s);
bench<std::unordered_map<std::string, int>>("unordered_map<std::string>", s);
bench<std::unordered_map<std::string_view, int>>("unordered_map<std::string_view>", s);

// STL structures with StringZilla operations
// bench<std::map<std::string, int, sz::less>>(to<std::string>(strings));
// bench<std::map<std::string_view, int, sz::less>>(to<std::string_view>(strings));
// bench<std::unordered_map<std::string, int, sz::hash, sz::equal_to>>(to<std::string>(strings));
// bench<std::unordered_map<std::string_view, int, sz::hash, sz::equal_to>>(to<std::string_view>(strings));
// bench<std::map<std::string, int, sz::less>>("map<std::string>", s);
// bench<std::map<std::string_view, int, sz::less>>("map<std::string_view>", s);
// bench<std::unordered_map<std::string, int, sz::hash, sz::equal_to>>("unordered_map<std::string>", s);
// bench<std::unordered_map<std::string_view, int, sz::hash, sz::equal_to>>("unordered_map<std::string_view>", s);
}

int main(int argc, char const **argv) {
Expand Down
81 changes: 73 additions & 8 deletions scripts/test.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include <cassert> // assertions
#include <cstdio> // `std::printf`
#include <cstring> // `std::memcpy`
#include <iterator> // `std::distance`
#include <vector> // `std::vector`
#include <algorithm> // `std::transform`
#include <cassert> // assertions
#include <cstdio> // `std::printf`
#include <cstring> // `std::memcpy`
#include <iterator> // `std::distance`
#include <vector> // `std::vector`

#define SZ_USE_X86_AVX2 0
#define SZ_USE_X86_AVX512 0
Expand All @@ -26,6 +27,8 @@ template <typename stl_matcher_, typename sz_matcher_>
void eval(std::string_view haystack_pattern, std::string_view needle_stl, std::size_t misalignment) {
constexpr std::size_t max_repeats = 128;
alignas(64) char haystack[misalignment + max_repeats * haystack_pattern.size()];
std::vector<std::size_t> offsets_stl;
std::vector<std::size_t> offsets_sz;

for (std::size_t repeats = 0; repeats != 128; ++repeats) {
std::size_t haystack_length = (repeats + 1) * haystack_pattern.size();
Expand All @@ -47,16 +50,37 @@ void eval(std::string_view haystack_pattern, std::string_view needle_stl, std::s
auto count_stl = std::distance(begin_stl, end_stl);
auto count_sz = std::distance(begin_sz, end_sz);

// To simplify debugging, let's first export all the match offsets, and only then compare them
std::transform(begin_stl, end_stl, std::back_inserter(offsets_stl),
[&](auto const &match) { return match.data() - haystack_stl.data(); });
std::transform(begin_sz, end_sz, std::back_inserter(offsets_sz),
[&](auto const &match) { return match.data() - haystack_sz.data(); });

// Compare results
for (; begin_stl != end_stl && begin_sz != end_sz; ++begin_stl, ++begin_sz) {
for (std::size_t match_idx = 0; begin_stl != end_stl && begin_sz != end_sz;
++begin_stl, ++begin_sz, ++match_idx) {
auto match_stl = *begin_stl;
auto match_sz = *begin_sz;
assert(match_stl.data() == match_sz.data());
if (match_stl.data() != match_sz.data()) {
std::printf("Mismatch at index #%zu: %zu != %zu\n", match_idx, match_stl.data() - haystack_stl.data(),
match_sz.data() - haystack_sz.data());
std::printf("Breakdown of found matches:\n");
std::printf("- STL (%zu): ", offsets_stl.size());
for (auto offset : offsets_stl) std::printf("%zu ", offset);
std::printf("\n");
std::printf("- StringZilla (%zu): ", offsets_sz.size());
for (auto offset : offsets_sz) std::printf("%zu ", offset);
std::printf("\n");
assert(false);
}
}

// If one range is not finished, assert failure
assert(count_stl == count_sz);
assert(begin_stl == end_stl && begin_sz == end_sz);

offsets_stl.clear();
offsets_sz.clear();
}
}

Expand Down Expand Up @@ -107,23 +131,64 @@ void eval(std::string_view haystack_pattern, std::string_view needle_stl) {
int main(int argc, char const **argv) {
std::printf("Hi Ash! ... or is it someone else?!\n");

// Comparing relative order of the strings
assert("a"_sz.compare("a") == 0);
assert("a"_sz.compare("ab") == -1);
assert("ab"_sz.compare("a") == 1);
assert("a"_sz.compare("a\0"_sz) == -1);
assert("a\0"_sz.compare("a") == 1);
assert("a\0"_sz.compare("a\0"_sz) == 0);
assert("a"_sz == "a"_sz);
assert("a"_sz != "a\0"_sz);
assert("a\0"_sz == "a\0"_sz);

assert(sz_size_bit_ceil(0) == 1);
assert(sz_size_bit_ceil(1) == 1);
assert(sz_size_bit_ceil(2) == 2);
assert(sz_size_bit_ceil(3) == 4);
assert(sz_size_bit_ceil(127) == 128);
assert(sz_size_bit_ceil(128) == 128);

assert(sz::string("abc").edit_distance("_abc") == 1);
assert(sz::string("").edit_distance("_") == 1);
assert(sz::string("_").edit_distance("") == 1);
assert(sz::string("_").edit_distance("xx") == 2);
assert(sz::string("_").edit_distance("xx", 1) == 1);
assert(sz::string("_").edit_distance("xx", 0) == 0);

std::string_view alphabet = "abcdefghijklmnopqrstuvwxyz"; // 26 characters
std::string_view base64 = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-"; // 64 characters
std::string_view common = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-=@$%"; // 68 characters

// Make sure copy constructors work as expected:
{
std::vector<sz::string> strings;
for (std::size_t alphabet_slice = 0; alphabet_slice != alphabet.size(); ++alphabet_slice)
strings.push_back(alphabet.substr(0, alphabet_slice));
std::vector<sz::string> copies {strings};
std::vector<sz::string> assignments = strings;
assert(std::equal(strings.begin(), strings.end(), copies.begin()));
assert(std::equal(strings.begin(), strings.end(), assignments.begin()));
}

// When haystack is only formed of needles:
// eval("a", "a");
eval("a", "a");
eval("ab", "ab");
eval("abc", "abc");
eval("abcd", "abcd");
eval(alphabet, alphabet);
eval(base64, base64);
eval(common, common);

// When we are dealing with NULL characters inside the string
eval("\0", "\0");
eval("a\0", "a\0");
eval("ab\0", "ab");
eval("ab\0", "ab\0");
eval("abc\0", "abc");
eval("abc\0", "abc\0");
eval("abcd\0", "abcd");

// When haystack is formed of equidistant needles:
eval("ab", "a");
eval("abc", "a");
Expand Down

0 comments on commit 3cf591f

Please sign in to comment.