Skip to content

Commit

Permalink
Avoid unnecessary hfield Creation/Deletion on updates in hashTypeSet.…
Browse files Browse the repository at this point in the history
… HSET updates improvement of ~10% (redis#13655)

This PR eliminates unnecessary creation and destruction of hfield
objects, ensuring only required updates or insertions are performed.
This reduces overhead and improves performance by streamlining field
management in hash dictionaries, particularly in scenarios involving
frequent updates, like the benchmarks in:
-
[memtier_benchmark-100Kkeys-load-hash-50-fields-with-100B-values](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-100Kkeys-load-hash-50-fields-with-100B-values.yml)
-
[memtier_benchmark-10Mkeys-load-hash-5-fields-with-100B-values-pipeline-10](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-10Mkeys-load-hash-5-fields-with-100B-values-pipeline-10.yml)


To test it we can simply focus on the hfield related tests

```
tclsh tests/test_helper.tcl --single unit/type/hash-field-expire
tclsh tests/test_helper.tcl --single unit/type/hash
tclsh tests/test_helper.tcl --dump-logs --single unit/other
```

Extra check on full CI:
- [x] https://github.com/filipecosta90/redis/actions/runs/12225788759

## microbenchmark results 
16.7% improvement (drop in time) in dictAddNonExistingRaw vs dictAddRaw
```
make REDIS_CFLAGS="-g -fno-omit-frame-pointer -O3 -DREDIS_TEST" -j
$ ./src/redis-server test dict --accurate
(...)
Inserting via dictAddRaw() non existing: 5000000 items in 2592 ms
(...)
Inserting via dictAddNonExistingRaw() non existing: 5000000 items in 2160 ms
```

8% improvement (drop in time) in find (non existing) and adding via
`dictGetHash()+dictFindWithHash()+dictAddNonExistingRaw()` vs
`dictFind()+dictAddRaw()`
```
make REDIS_CFLAGS="-g -fno-omit-frame-pointer -O3 -DREDIS_TEST" -j
$ ./src/redis-server test dict --accurate
(...)
Find() and inserting via dictFind()+dictAddRaw() non existing: 5000000 items in 2983 ms
Find() and inserting via dictGetHash()+dictFindWithHash()+dictAddNonExistingRaw() non existing: 5000000 items in 2740 ms

```

## benchmark results 


To benchmark:

```
pip3 install redis-benchmarks-specification==0.1.250
taskset -c 0 ./src/redis-server --save '' --protected-mode no --daemonize yes
redis-benchmarks-spec-client-runner --tests-regexp ".*load-hash.*" --flushall_on_every_test_start --flushall_on_every_test_end  --cpuset_start_pos 2 --override-memtier-test-time 60
```

Improvements on achievable throughput in:

test | ops/sec unstable (59953d2) |
ops/sec this PR (24af719) | % change
-- | -- | -- | --
memtier_benchmark-1key-load-hash-1K-fields-with-5B-values | 4097 | 5032
| 22.8%
memtier_benchmark-100Kkeys-load-hash-50-fields-with-100B-values | 37658
| 44688 | 18.7%
memtier_benchmark-100Kkeys-load-hash-50-fields-with-1000B-values | 14736
| 17350 | 17.7%

memtier_benchmark-1Mkeys-load-hash-5-fields-with-1000B-values-pipeline-10
| 131848 | 143485 | 8.8%
memtier_benchmark-1Mkeys-load-hash-hmset-5-fields-with-1000B-values |
82071 | 85681 | 4.4%
memtier_benchmark-1Mkeys-load-hash-5-fields-with-1000B-values | 82882 |
86336 | 4.2%

memtier_benchmark-10Mkeys-load-hash-5-fields-with-100B-values-pipeline-10
| 262502 | 273376 | 4.1%
memtier_benchmark-10Kkeys-load-hash-50-fields-with-10000B-values | 2821
| 2936 | 4.1%

---------

Co-authored-by: Moti Cohen <[email protected]>
  • Loading branch information
fcostaoliveira and moticless authored Dec 12, 2024
1 parent c51c966 commit f8942f9
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 63 deletions.
4 changes: 2 additions & 2 deletions src/defrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de) {
dictUseStoredKeyApi(d, 1);
uint64_t hash = dictGetHash(d, newhf);
dictUseStoredKeyApi(d, 0);
dictEntry *de = dictFindEntryByPtrAndHash(d, hf, hash);
dictEntry *de = dictFindByHashAndPtr(d, hf, hash);
serverAssert(de);
dictSetKey(d, de, newhf);
}
Expand Down Expand Up @@ -753,7 +753,7 @@ void defragKey(defragCtx *ctx, dictEntry *de) {
* the pointer it holds, since it won't be able to do the string
* compare, but we can find the entry using key hash and pointer. */
uint64_t hash = kvstoreGetHash(db->expires, newsds);
dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash);
dictEntry *expire_de = kvstoreDictFindByHashAndPtr(db->expires, slot, keysds, hash);
if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
}

Expand Down
224 changes: 183 additions & 41 deletions src/dict.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ typedef struct {

static void _dictExpandIfNeeded(dict *d);
static void _dictShrinkIfNeeded(dict *d);
static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx);
static signed char _dictNextExp(unsigned long size);
static int _dictInit(dict *d, dictType *type);
static dictEntry *dictGetNext(const dictEntry *de);
Expand Down Expand Up @@ -509,6 +510,39 @@ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
return dictInsertAtPosition(d, key, position);
}

/* Low-level add function for non-existing keys:
* This function adds a new entry to the dictionary, assuming the key does not
* already exist.
* Parameters:
* - `dict *d`: Pointer to the dictionary structure.
* - `void *key`: Pointer to the key being added.
* - `const uint64_t hash`: hash of the key being added.
* Guarantees:
* - The key is assumed to be non-existing.
* Note:
* Ensure that the key's uniqueness is managed externally before calling this function. */
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash) {
/* Get the position for the new key, it should never be NULL. */
unsigned long idx, table;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);

/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);

/* Expand the hash table if needed */
_dictExpandIfNeeded(d);

table = dictIsRehashing(d) ? 1 : 0;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
void *position = &d->ht_table[table][idx];
assert(position!=NULL);

/* Dup the key if necessary. */
if (d->type->keyDup) key = d->type->keyDup(d, key);

return dictInsertAtPosition(d, key, position);
}

/* Adds a key in the dict's hashtable at the position returned by a preceding
* call to dictFindPositionForInsert. This is a low level function which allows
* splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be
Expand Down Expand Up @@ -608,17 +642,8 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);

if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);

keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);

Expand Down Expand Up @@ -734,32 +759,21 @@ void dictRelease(dict *d)
zfree(d);
}

dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash) {
dictEntry *he;
uint64_t h, idx, table;
uint64_t idx, table;

if (dictSize(d) == 0) return NULL; /* dict is empty */

h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);

if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);

for (table = 0; table <= 1; table++) {
if (table == 0 && (long)idx < d->rehashidx) continue;
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);

/* Prefetch the bucket at the calculated index */
redis_prefetch_read(&d->ht_table[table][idx]);
Expand All @@ -781,6 +795,13 @@ dictEntry *dictFind(dict *d, const void *key)
return NULL;
}

dictEntry *dictFind(dict *d, const void *key)
{
if (dictSize(d) == 0) return NULL; /* dict is empty */
const uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
return dictFindByHash(d,key,hash);
}

void *dictFetchValue(dict *d, const void *key) {
dictEntry *he;

Expand Down Expand Up @@ -1566,6 +1587,21 @@ static void _dictShrinkIfNeeded(dict *d)
dictShrinkIfNeeded(d);
}

static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx) {
if ((!dictIsRehashing(d)) || (d->pauserehash != 0))
return;
/* rehashing not in progress if rehashidx == -1 */
if ((long)visitedIdx >= d->rehashidx && d->ht_table[0][visitedIdx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, visitedIdx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
dictRehash(d,1);
}
}

/* Our hash table capability is a power of two */
static signed char _dictNextExp(unsigned long size)
{
Expand All @@ -1586,17 +1622,8 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
if (existing) *existing = NULL;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);

if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);

/* Expand the hash table if needed */
_dictExpandIfNeeded(d);
Expand Down Expand Up @@ -1624,6 +1651,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
return bucket;
}


void dictEmpty(dict *d, void(callback)(dict*)) {
/* Someone may be monitoring a dict that started rehashing, before
* destroying the dict fake completion. */
Expand All @@ -1649,7 +1677,7 @@ uint64_t dictGetHash(dict *d, const void *key) {
* the hash value should be provided using dictGetHash.
* no string / key comparison is performed.
* return value is a pointer to the dictEntry if found, or NULL if not found. */
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash) {
dictEntry *he;
unsigned long idx, table;

Expand Down Expand Up @@ -1831,6 +1859,32 @@ char *stringFromLongLong(long long value) {
return s;
}

char *stringFromSubstring(void) {
#define LARGE_STRING_SIZE 10000
#define MIN_STRING_SIZE 100
#define MAX_STRING_SIZE 500
static char largeString[LARGE_STRING_SIZE + 1];
static int init = 0;
if (init == 0) {
/* Generate a large string */
for (size_t i = 0; i < LARGE_STRING_SIZE; i++) {
/* Random printable ASCII character (33 to 126) */
largeString[i] = 33 + (rand() % 94);
}
/* Null-terminate the large string */
largeString[LARGE_STRING_SIZE] = '\0';
init = 1;
}
/* Randomly choose a size between minSize and maxSize */
size_t substringSize = MIN_STRING_SIZE + (rand() % (MAX_STRING_SIZE - MIN_STRING_SIZE + 1));
size_t startIndex = rand() % (LARGE_STRING_SIZE - substringSize + 1);
/* Allocate memory for the substring (+1 for null terminator) */
char *s = zmalloc(substringSize + 1);
memcpy(s, largeString + startIndex, substringSize); // Copy the substring
s[substringSize] = '\0'; // Null-terminate the string
return s;
}

dictType BenchmarkDictType = {
hashCallback,
NULL,
Expand All @@ -1853,6 +1907,8 @@ int dictTest(int argc, char **argv, int flags) {
long long start, elapsed;
int retval;
dict *dict = dictCreate(&BenchmarkDictType);
dictEntry* de = NULL;
dictEntry* existing = NULL;
long count = 0;
unsigned long new_dict_size, current_dict_used, remain_keys;
int accurate = (flags & REDIS_TEST_ACCURATE);
Expand Down Expand Up @@ -1992,13 +2048,99 @@ int dictTest(int argc, char **argv, int flags) {
dictEmpty(dict, NULL);
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
}
srand(12345);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a dynamically allocated substring */
char *key = stringFromSubstring();

/* Insert the range directly from the large string */
de = dictAddRaw(dict, key, &existing);
assert(de != NULL || existing != NULL);
/* If key already exists NULL is returned so we need to free the temp key string */
if (de == NULL) zfree(key);
}
end_benchmark("Inserting random substrings (100-500B) from large string with symbols");
assert((long)dictSize(dict) <= count);
dictEmpty(dict, NULL);

start_benchmark();
for (j = 0; j < count; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK);
}
end_benchmark("Inserting");
end_benchmark("Inserting via dictAdd() non existing");
assert((long)dictSize(dict) == count);

dictEmpty(dict, NULL);

start_benchmark();
for (j = 0; j < count; j++) {
de = dictAddRaw(dict,stringFromLongLong(j),NULL);
assert(de != NULL);
}
end_benchmark("Inserting via dictAddRaw() non existing");
assert((long)dictSize(dict) == count);

start_benchmark();
for (j = 0; j < count; j++) {
void *key = stringFromLongLong(j);
de = dictAddRaw(dict,key,&existing);
assert(existing != NULL);
zfree(key);
}
end_benchmark("Inserting via dictAddRaw() existing (no insertion)");
assert((long)dictSize(dict) == count);

dictEmpty(dict, NULL);

start_benchmark();
for (j = 0; j < count; j++) {
void *key = stringFromLongLong(j);
const uint64_t hash = dictGetHash(dict, key);
de = dictAddNonExistsByHash(dict,key,hash);
assert(de != NULL);
}
end_benchmark("Inserting via dictAddNonExistsByHash() non existing");
assert((long)dictSize(dict) == count);

/* Wait for rehashing. */
while (dictIsRehashing(dict)) {
dictRehashMicroseconds(dict,100*1000);
}

dictEmpty(dict, NULL);

start_benchmark();
for (j = 0; j < count; j++) {
/* Create a key */
void *key = stringFromLongLong(j);

/* Check if the key exists */
dictEntry *entry = dictFind(dict, key);
assert(entry == NULL);

/* Add the key */
dictEntry *de = dictAddRaw(dict, key, NULL);
assert(de != NULL);
}
end_benchmark("Find() and inserting via dictFind()+dictAddRaw() non existing");

dictEmpty(dict, NULL);

start_benchmark();
for (j = 0; j < count; j++) {
/* Create a key */
void *key = stringFromLongLong(j);
uint64_t hash = dictGetHash(dict, key);

/* Check if the key exists */
dictEntry *entry = dictFindByHash(dict, key, hash);
assert(entry == NULL);
de = dictAddNonExistsByHash(dict, key, hash);
assert(de != NULL);
}
end_benchmark("Find() and inserting via dictGetHash()+dictFindByHash()+dictAddNonExistsByHash() non existing");
assert((long)dictSize(dict) == count);

/* Wait for rehashing. */
Expand Down
4 changes: 3 additions & 1 deletion src/dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ int dictTryExpand(dict *d, unsigned long size);
int dictShrink(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash);
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position);
dictEntry *dictAddOrFind(dict *d, void *key);
Expand All @@ -207,6 +208,8 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash);
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash);
void *dictFetchValue(dict *d, const void *key);
int dictShrinkIfNeeded(dict *d);
int dictExpandIfNeeded(dict *d);
Expand Down Expand Up @@ -249,7 +252,6 @@ uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);

size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);
Expand Down
4 changes: 2 additions & 2 deletions src/kvstore.c
Original file line number Diff line number Diff line change
Expand Up @@ -766,12 +766,12 @@ dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx)
return dictGetFairRandomKey(d);
}

dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash)
dictEntry *kvstoreDictFindByHashAndPtr(kvstore *kvs, int didx, const void *oldptr, uint64_t hash)
{
dict *d = kvstoreGetDict(kvs, didx);
if (!d)
return NULL;
return dictFindEntryByPtrAndHash(d, oldptr, hash);
return dictFindByHashAndPtr(d, oldptr, hash);
}

unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count)
Expand Down
2 changes: 1 addition & 1 deletion src/kvstore.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id);
dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di);
dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
dictEntry *kvstoreDictFindByHashAndPtr(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count);
int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size);
unsigned long kvstoreDictScanDefrag(kvstore *kvs, int didx, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
Expand Down
Loading

0 comments on commit f8942f9

Please sign in to comment.