Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved performance of rz_bv_copy_nbits and rz_bv_set_range #4740

Closed
wants to merge 11 commits into from
86 changes: 74 additions & 12 deletions librz/util/bitvector.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
// SPDX-License-Identifier: LGPL-3.0-only

#include "rz_util.h"
#include <rz_types.h>
#include <stdlib.h>
#include <stdio.h>

#define NELEM(N, ELEMPER) ((N + (ELEMPER)-1) / (ELEMPER))
#define BV_ELEM_SIZE 8U
#define RZ_BV_CHUNK_SIZE (sizeof(unsigned long) * CHAR_BIT)
#define SIZE_OF_UNSIGNED_LONG sizeof(unsigned long)

// optimization for reversing 8 bits which uses 32 bits
// https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith32Bits
Expand Down Expand Up @@ -137,7 +140,6 @@ RZ_API RZ_OWN char *rz_bv_as_hex_string(RZ_NONNULL const RzBitVector *bv, bool p
if (!str) {
return NULL;
}

str[0] = '0';
str[1] = 'x';
ut32 j = 2;
Expand Down Expand Up @@ -209,24 +211,60 @@ RZ_API ut32 rz_bv_copy(RZ_NONNULL const RzBitVector *src, RZ_NONNULL RzBitVector
* \param nbit ut32, control the size of copy (in bits)
* \return copied_size ut32, Actual copied size
*/

RZ_API ut32 rz_bv_copy_nbits(RZ_NONNULL const RzBitVector *src, ut32 src_start_pos, RZ_NONNULL RzBitVector *dst, ut32 dst_start_pos, ut32 nbit) {
rz_return_val_if_fail(src && dst, 0);

ut32 max_nbit = RZ_MIN((src->len - src_start_pos),
(dst->len - dst_start_pos));
// Determine the chunk size (word size) dynamically
const ut32 RZ_BV_CHUNK_SIZE = SIZE_OF_UNSIGNED_LONG * CHAR_BIT; // Word size in bits
ut32 max_nbit = RZ_MIN((src->len - src_start_pos), (dst->len - dst_start_pos));

// prevent overflow
if (max_nbit < nbit) {
return 0;
}

// normal case here
for (ut32 i = 0; i < nbit; ++i) {
bool c = rz_bv_get(src, src_start_pos + i);
rz_bv_set(dst, dst_start_pos + i, c);
ut32 nbit_original = nbit;

// Handle unaligned prefix
if (src_start_pos % RZ_BV_CHUNK_SIZE != 0 || dst_start_pos % RZ_BV_CHUNK_SIZE != 0) {
while (nbit > 0) {
bool bit = rz_bv_get(src, src_start_pos++);
rz_bv_set(dst, dst_start_pos++, bit);
--nbit;
}
}

// Process aligned chunks
while (nbit >= RZ_BV_CHUNK_SIZE) {
// Get chunks from the source and destination
unsigned long src_chunk = rz_bv_get_chunk(src, src_start_pos / RZ_BV_CHUNK_SIZE);
unsigned long dst_chunk = rz_bv_get_chunk(dst, dst_start_pos / RZ_BV_CHUNK_SIZE);

// Create a mask for the bits to copy
unsigned long mask = UINT32_MAX;
if (nbit < RZ_BV_CHUNK_SIZE) {
mask = (1UL << nbit) - 1;
}

// Merge chunks using the optimized approach , reference : https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
unsigned long result = dst_chunk ^ ((dst_chunk ^ src_chunk) & mask);
rz_bv_set_chunk(dst, dst_start_pos / RZ_BV_CHUNK_SIZE, result);

src_start_pos += RZ_BV_CHUNK_SIZE;
dst_start_pos += RZ_BV_CHUNK_SIZE;
if (nbit >= RV_BV_CHUNK_SIZE)
nbit -= RZ_BV_CHUNK_SIZE;
else
break;
}

return nbit;
// Handle remaining unaligned suffix bits
while (nbit > 0) {
bool bit = rz_bv_get(src, src_start_pos++);
rz_bv_set(dst, dst_start_pos++, bit);
--nbit;
}
return nbit_original;
}

/**
Expand Down Expand Up @@ -1481,12 +1519,36 @@ RZ_API ut64 rz_bv_to_ut64(RZ_NONNULL const RzBitVector *x) {
*/
RZ_API bool rz_bv_set_range(RZ_NONNULL RzBitVector *bv, ut32 pos_start, ut32 pos_end, bool b) {
rz_return_val_if_fail(bv, false);
if (pos_start > bv->len - 1 || pos_end > bv->len - 1) {

if (pos_start > bv->len - 1 || pos_end > bv->len - 1 || pos_start > pos_end) {
return false;
}

for (ut32 i = pos_start; i <= pos_end; ++i) {
rz_bv_set(bv, i, b);
// Determine the chunk size dynamically
const ut32 RZ_BV_CHUNK_SIZE = SIZE_OF_UNSIGNED_LONG * CHAR_BIT;

// Handle unaligned prefix bits
while (pos_start < pos_end && pos_start % RZ_BV_CHUNK_SIZE != 0) {
rz_bv_set(bv, pos_start++, b);
}

// Process aligned chunks
if (pos_start < pos_end) {
ut32 chunk_start = pos_start / RZ_BV_CHUNK_SIZE;
ut32 chunk_end = pos_end / RZ_BV_CHUNK_SIZE;

unsigned long fill_value = b ? ~0UL : 0UL;

for (ut32 i = chunk_start; i < chunk_end; ++i) {
rz_bv_set_chunk(bv, i, fill_value);
}

pos_start = chunk_end * RZ_BV_CHUNK_SIZE;
}

// Handle remaining unaligned suffix bits
while (pos_start <= pos_end) {
rz_bv_set(bv, pos_start++, b);
}

return true;
Expand Down