Skip to content

Commit

Permalink
Improve: Pointer casting rules
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Dec 26, 2024
1 parent 89be0cb commit a3f2f00
Showing 1 changed file with 34 additions and 21 deletions.
55 changes: 34 additions & 21 deletions include/stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -2018,10 +2018,11 @@ SZ_INTERNAL sz_u64_vec_t _sz_u64_each_2byte_equal(sz_u64_vec_t a, sz_u64_vec_t b
* @brief Find the first occurrence of a @b two-character needle in an arbitrary length haystack.
* This implementation uses hardware-agnostic SWAR technique, to process 8 possible offsets at a time.
*/
SZ_INTERNAL sz_cptr_t _sz_find_2byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) {
SZ_INTERNAL sz_cptr_t _sz_find_2byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {

// This is an internal method, and the haystack is guaranteed to be at least 2 bytes long.
sz_assert(h_length >= 2 && "The haystack is too short.");
sz_unused(n_length);
sz_cptr_t const h_end = h + h_length;

#if !SZ_USE_MISALIGNED_LOADS
Expand Down Expand Up @@ -2072,10 +2073,11 @@ SZ_INTERNAL sz_u64_vec_t _sz_u64_each_4byte_equal(sz_u64_vec_t a, sz_u64_vec_t b
* @brief Find the first occurrence of a @b four-character needle in an arbitrary length haystack.
* This implementation uses hardware-agnostic SWAR technique, to process 8 possible offsets at a time.
*/
SZ_INTERNAL sz_cptr_t _sz_find_4byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) {
SZ_INTERNAL sz_cptr_t _sz_find_4byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {

// This is an internal method, and the haystack is guaranteed to be at least 4 bytes long.
sz_assert(h_length >= 4 && "The haystack is too short.");
sz_unused(n_length);
sz_cptr_t const h_end = h + h_length;

#if !SZ_USE_MISALIGNED_LOADS
Expand Down Expand Up @@ -2136,10 +2138,11 @@ SZ_INTERNAL sz_u64_vec_t _sz_u64_each_3byte_equal(sz_u64_vec_t a, sz_u64_vec_t b
* @brief Find the first occurrence of a @b three-character needle in an arbitrary length haystack.
* This implementation uses hardware-agnostic SWAR technique, to process 8 possible offsets at a time.
*/
SZ_INTERNAL sz_cptr_t _sz_find_3byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) {
SZ_INTERNAL sz_cptr_t _sz_find_3byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {

// This is an internal method, and the haystack is guaranteed to be at least 4 bytes long.
sz_assert(h_length >= 3 && "The haystack is too short.");
sz_unused(n_length);
sz_cptr_t const h_end = h + h_length;

#if !SZ_USE_MISALIGNED_LOADS
Expand Down Expand Up @@ -2344,8 +2347,18 @@ SZ_INTERNAL sz_cptr_t _sz_rfind_with_suffix(sz_cptr_t h, sz_size_t h_length, sz_
return SZ_NULL_CHAR;
}

SZ_INTERNAL sz_cptr_t _sz_find_byte_prefix_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
sz_unused(n_length);
return sz_find_byte_serial(h, h_length, n);
}

SZ_INTERNAL sz_cptr_t _sz_rfind_byte_prefix_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
sz_unused(n_length);
return sz_rfind_byte_serial(h, h_length, n);
}

SZ_INTERNAL sz_cptr_t _sz_find_over_4bytes_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) {
return _sz_find_with_prefix(h, h_length, n, n_length, (sz_find_t)_sz_find_4byte_serial, 4);
return _sz_find_with_prefix(h, h_length, n, n_length, _sz_find_4byte_serial, 4);
}

SZ_INTERNAL sz_cptr_t _sz_find_horspool_over_256bytes_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n,
Expand All @@ -2364,24 +2377,24 @@ SZ_PUBLIC sz_cptr_t sz_find_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n,

#if SZ_DETECT_BIG_ENDIAN
sz_find_t backends[] = {
(sz_find_t)sz_find_byte_serial,
(sz_find_t)_sz_find_horspool_upto_256bytes_serial,
(sz_find_t)_sz_find_horspool_over_256bytes_serial,
_sz_find_byte_prefix_serial,
_sz_find_horspool_upto_256bytes_serial,
_sz_find_horspool_over_256bytes_serial,
};

return backends[(n_length > 1) + (n_length > 256)](h, h_length, n, n_length);
#else
sz_find_t backends[] = {
// For very short strings brute-force SWAR makes sense.
(sz_find_t)sz_find_byte_serial,
(sz_find_t)_sz_find_2byte_serial,
(sz_find_t)_sz_find_3byte_serial,
(sz_find_t)_sz_find_4byte_serial,
_sz_find_byte_prefix_serial,
_sz_find_2byte_serial,
_sz_find_3byte_serial,
_sz_find_4byte_serial,
// To avoid constructing the skip-table, let's use the prefixed approach.
(sz_find_t)_sz_find_over_4bytes_serial,
_sz_find_over_4bytes_serial,
// For longer needles - use skip tables.
(sz_find_t)_sz_find_horspool_upto_256bytes_serial,
(sz_find_t)_sz_find_horspool_over_256bytes_serial,
_sz_find_horspool_upto_256bytes_serial,
_sz_find_horspool_over_256bytes_serial,
};

return backends[
Expand All @@ -2401,16 +2414,16 @@ SZ_PUBLIC sz_cptr_t sz_rfind_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n

sz_find_t backends[] = {
// For very short strings brute-force SWAR makes sense.
(sz_find_t)sz_rfind_byte_serial,
_sz_rfind_byte_prefix_serial,
// TODO: implement reverse-order SWAR for 2/3/4 byte variants.
// TODO: (sz_find_t)_sz_rfind_2byte_serial,
// TODO: (sz_find_t)_sz_rfind_3byte_serial,
// TODO: (sz_find_t)_sz_rfind_4byte_serial,
// TODO: _sz_rfind_2byte_serial,
// TODO: _sz_rfind_3byte_serial,
// TODO: _sz_rfind_4byte_serial,
// To avoid constructing the skip-table, let's use the prefixed approach.
// (sz_find_t)_sz_rfind_over_4bytes_serial,
// _sz_rfind_over_4bytes_serial,
// For longer needles - use skip tables.
(sz_find_t)_sz_rfind_horspool_upto_256bytes_serial,
(sz_find_t)_sz_rfind_horspool_over_256bytes_serial,
_sz_rfind_horspool_upto_256bytes_serial,
_sz_rfind_horspool_over_256bytes_serial,
};

return backends[
Expand Down

0 comments on commit a3f2f00

Please sign in to comment.