diff --git a/librz/util/bitvector.c b/librz/util/bitvector.c index b9ba66fec29..6cae8ff7b33 100644 --- a/librz/util/bitvector.c +++ b/librz/util/bitvector.c @@ -2,11 +2,14 @@ // SPDX-License-Identifier: LGPL-3.0-only #include "rz_util.h" +#include #include #include #define NELEM(N, ELEMPER) ((N + (ELEMPER)-1) / (ELEMPER)) #define BV_ELEM_SIZE 8U +#define RZ_BV_CHUNK_SIZE (sizeof(ut32) * CHAR_BIT) +#define SIZE_OF_UT32 sizeof(ut32) // optimization for reversing 8 bits which uses 32 bits // https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith32Bits @@ -137,7 +140,6 @@ RZ_API RZ_OWN char *rz_bv_as_hex_string(RZ_NONNULL const RzBitVector *bv, bool p if (!str) { return NULL; } - str[0] = '0'; str[1] = 'x'; ut32 j = 2; @@ -200,6 +202,79 @@ RZ_API ut32 rz_bv_copy(RZ_NONNULL const RzBitVector *src, RZ_NONNULL RzBitVector return dst->_elem_len; } +/** + * Get a 32-bit chunk from the specified position in the bit vector. + * \param bv RzBitVector, the bit vector from which to extract the chunk + * \param chunk_idx ut32, the index of the chunk to retrieve + * \return chunk ut32, the extracted 32-bit chunk + */ +RZ_API ut32 rz_bv_get_chunk(const RzBitVector *bv, ut32 chunk_idx) { + rz_return_val_if_fail(bv, 0); // Ensure the bit vector is not NULL + + // Calculate the starting position for the chunk + ut32 chunk_start_pos = chunk_idx * RZ_BV_CHUNK_SIZE; + ut32 word_idx = chunk_start_pos / 32; // Identify the starting word for the chunk + + ut32 chunk = 0; + ut32 bit_offset = chunk_start_pos % 32; // Offset within the word + + // Extract the 32-bit chunk, considering the word boundary + if (bit_offset == 0) { + // The chunk is aligned to a 32-bit boundary + chunk = bv->data[word_idx]; + } else { + // The chunk spans across two 32-bit words + ut32 first_word = bv->data[word_idx]; + ut32 second_word = bv->data[word_idx + 1]; + + // Shift the first word and mask the necessary bits + chunk = first_word >> bit_offset; + + // Mask the remaining bits from the second word + chunk |= (second_word << (32 - bit_offset)); + } + + return chunk; // Return the extracted 32-bit chunk +} + +/** + * Set a 32-bit chunk at the specified position in the bit vector. + * \param bv RzBitVector, the bit vector in which to set the chunk + * \param chunk_idx ut32, the index of the chunk to set + * \param chunk ut32, the 32-bit chunk to set + */ +RZ_API void rz_bv_set_chunk(RzBitVector *bv, ut32 chunk_idx, ut32 chunk) { + rz_return_if_fail(bv); // Ensure the bit vector is not NULL + + // Calculate the starting position for the chunk + ut32 chunk_start_pos = chunk_idx * RZ_BV_CHUNK_SIZE; + ut32 word_idx = chunk_start_pos / 32; // Identify the starting word for the chunk + + ut32 bit_offset = chunk_start_pos % 32; // Offset within the word + + // Set the 32-bit chunk, considering the word boundary + if (bit_offset == 0) { + // The chunk is aligned to a 32-bit boundary + bv->data[word_idx] = chunk; + } else { + // The chunk spans across two 32-bit words + ut32 first_word = bv->data[word_idx]; + ut32 second_word = bv->data[word_idx + 1]; + + // Clear the bits in the current chunk positions + first_word &= ~(0xFFFFFFFF >> bit_offset); // Clear the upper bits + second_word &= (0xFFFFFFFF >> (32 - bit_offset)); // Clear the lower bits + + // Combine the chunk into the words + first_word |= (chunk << bit_offset); + second_word |= (chunk >> (32 - bit_offset)); + + // Write the words back to the bit vector + bv->data[word_idx] = first_word; + bv->data[word_idx + 1] = second_word; + } +} + /** * Copy n bits from start position of source to start position of dest, return num of copied bits * \param src RzBitVector, data source @@ -209,24 +284,60 @@ RZ_API ut32 rz_bv_copy(RZ_NONNULL const RzBitVector *src, RZ_NONNULL RzBitVector * \param nbit ut32, control the size of copy (in bits) * \return copied_size ut32, Actual copied size */ + RZ_API ut32 rz_bv_copy_nbits(RZ_NONNULL const RzBitVector *src, ut32 src_start_pos, RZ_NONNULL RzBitVector *dst, ut32 dst_start_pos, ut32 nbit) { rz_return_val_if_fail(src && dst, 0); - ut32 max_nbit = RZ_MIN((src->len - src_start_pos), - (dst->len - dst_start_pos)); + // Determine the chunk size (word size) dynamically + const ut32 RZ_BV_CHUNK_SIZE = SIZE_OF_UT32 * CHAR_BIT; // Word size in bits + ut32 max_nbit = RZ_MIN((src->len - src_start_pos), (dst->len - dst_start_pos)); - // prevent overflow if (max_nbit < nbit) { return 0; } - // normal case here - for (ut32 i = 0; i < nbit; ++i) { - bool c = rz_bv_get(src, src_start_pos + i); - rz_bv_set(dst, dst_start_pos + i, c); + ut32 nbit_original = nbit; + + // Handle unaligned prefix + if (src_start_pos % RZ_BV_CHUNK_SIZE != 0 || dst_start_pos % RZ_BV_CHUNK_SIZE != 0) { + while (nbit > 0) { + bool bit = rz_bv_get(src, src_start_pos++); + rz_bv_set(dst, dst_start_pos++, bit); + --nbit; + } + } + + // Process aligned chunks + while (nbit >= RZ_BV_CHUNK_SIZE) { + // Get chunks from the source and destination + ut32 src_chunk = rz_bv_get_chunk(src, src_start_pos / RZ_BV_CHUNK_SIZE); + ut32 dst_chunk = rz_bv_get_chunk(dst, dst_start_pos / RZ_BV_CHUNK_SIZE); + + // Create a mask for the bits to copy + ut32 mask = UT32_MAX; + if (nbit < RZ_BV_CHUNK_SIZE) { + mask = (1UL << nbit) - 1; + } + + // Merge chunks using the optimized approach , reference : https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge + ut32 result = dst_chunk ^ ((dst_chunk ^ src_chunk) & mask); + rz_bv_set_chunk(dst, dst_start_pos / RZ_BV_CHUNK_SIZE, result); + + src_start_pos += RZ_BV_CHUNK_SIZE; + dst_start_pos += RZ_BV_CHUNK_SIZE; + if (nbit < RV_BV_CHUNK_SIZE) { + break; + } + nbit -= RZ_BV_CHUNK_SIZE; } - return nbit; + // Handle remaining unaligned suffix bits + while (nbit > 0) { + bool bit = rz_bv_get(src, src_start_pos++); + rz_bv_set(dst, dst_start_pos++, bit); + --nbit; + } + return nbit_original; } /** @@ -1481,12 +1592,36 @@ RZ_API ut64 rz_bv_to_ut64(RZ_NONNULL const RzBitVector *x) { */ RZ_API bool rz_bv_set_range(RZ_NONNULL RzBitVector *bv, ut32 pos_start, ut32 pos_end, bool b) { rz_return_val_if_fail(bv, false); - if (pos_start > bv->len - 1 || pos_end > bv->len - 1) { + + if (pos_start > bv->len - 1 || pos_end > bv->len - 1 || pos_start > pos_end) { return false; } - for (ut32 i = pos_start; i <= pos_end; ++i) { - rz_bv_set(bv, i, b); + // Determine the chunk size dynamically + const ut32 RZ_BV_CHUNK_SIZE = SIZE_OF_UT32 * CHAR_BIT; + + // Handle unaligned prefix bits + while (pos_start < pos_end && pos_start % RZ_BV_CHUNK_SIZE != 0) { + rz_bv_set(bv, pos_start++, b); + } + + // Process aligned chunks + if (pos_start < pos_end) { + ut32 chunk_start = pos_start / RZ_BV_CHUNK_SIZE; + ut32 chunk_end = pos_end / RZ_BV_CHUNK_SIZE; + + ut32 fill_value = b ? ~0UL : 0UL; + + for (ut32 i = chunk_start; i < chunk_end; ++i) { + rz_bv_set_chunk(bv, i, fill_value); + } + + pos_start = chunk_end * RZ_BV_CHUNK_SIZE; + } + + // Handle remaining unaligned suffix bits + while (pos_start <= pos_end) { + rz_bv_set(bv, pos_start++, b); } return true;