-
Notifications
You must be signed in to change notification settings - Fork 292
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1542 from divyalakshmi0/main
Added Robin Hood Hashing in Hashmap for Optimized Probe Length Management
- Loading branch information
Showing
2 changed files
with
175 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <stdbool.h> | ||
#include <string.h> | ||
|
||
#define TABLE_SIZE 100 | ||
#define DELETED -1 | ||
|
||
// Define a structure for storing elements in the hash table | ||
typedef struct { | ||
int key; | ||
int value; | ||
int probe_distance; | ||
} HashEntry; | ||
|
||
HashEntry* hashTable[TABLE_SIZE]; | ||
|
||
// Hash function to calculate the index | ||
int hashFunction(int key) { | ||
return key % TABLE_SIZE; | ||
} | ||
|
||
// Initialize the hash table | ||
void initializeTable() { | ||
for (int i = 0; i < TABLE_SIZE; i++) { | ||
hashTable[i] = NULL; | ||
} | ||
} | ||
|
||
// Insert an element into the hash table | ||
void insert(int key, int value) { | ||
int index = hashFunction(key); | ||
int probe_distance = 0; | ||
HashEntry* newEntry = (HashEntry*)malloc(sizeof(HashEntry)); | ||
newEntry->key = key; | ||
newEntry->value = value; | ||
newEntry->probe_distance = 0; | ||
|
||
while (hashTable[index] != NULL && hashTable[index]->key != DELETED) { | ||
if (hashTable[index]->probe_distance < probe_distance) { | ||
// Swap the current entry with the new entry if its probe distance is smaller | ||
HashEntry* temp = hashTable[index]; | ||
hashTable[index] = newEntry; | ||
newEntry = temp; | ||
probe_distance = newEntry->probe_distance; | ||
} | ||
// Increment probe distance and find next index | ||
index = (index + 1) % TABLE_SIZE; | ||
probe_distance++; | ||
} | ||
|
||
hashTable[index] = newEntry; | ||
hashTable[index]->probe_distance = probe_distance; | ||
} | ||
|
||
// Search for an element in the hash table | ||
int search(int key) { | ||
int index = hashFunction(key); | ||
int probe_distance = 0; | ||
|
||
while (hashTable[index] != NULL) { | ||
if (hashTable[index]->key == key) { | ||
return hashTable[index]->value; | ||
} | ||
if (hashTable[index]->probe_distance < probe_distance) { | ||
break; | ||
} | ||
index = (index + 1) % TABLE_SIZE; | ||
probe_distance++; | ||
} | ||
return -1; // Key not found | ||
} | ||
|
||
// Delete an element from the hash table | ||
void delete(int key) { | ||
int index = hashFunction(key); | ||
int probe_distance = 0; | ||
|
||
while (hashTable[index] != NULL) { | ||
if (hashTable[index]->key == key) { | ||
hashTable[index]->key = DELETED; | ||
hashTable[index]->value = 0; | ||
hashTable[index]->probe_distance = 0; | ||
return; | ||
} | ||
if (hashTable[index]->probe_distance < probe_distance) { | ||
break; | ||
} | ||
index = (index + 1) % TABLE_SIZE; | ||
probe_distance++; | ||
} | ||
} | ||
|
||
// Display the hash table | ||
void displayTable() { | ||
for (int i = 0; i < TABLE_SIZE; i++) { | ||
if (hashTable[i] != NULL && hashTable[i]->key != DELETED) { | ||
printf("Index %d: Key = %d, Value = %d, Probe Distance = %d\n", i, hashTable[i]->key, hashTable[i]->value, hashTable[i]->probe_distance); | ||
} | ||
} | ||
} | ||
|
||
int main() { | ||
initializeTable(); | ||
insert(10, 100); | ||
insert(20, 200); | ||
insert(30, 300); | ||
insert(40, 400); | ||
insert(15, 150); // Causes probing with Robin Hood logic | ||
|
||
printf("Hash Table:\n"); | ||
displayTable(); | ||
|
||
printf("Searching for key 20: %d\n", search(20)); | ||
printf("Searching for key 40: %d\n", search(40)); | ||
|
||
printf("Deleting key 20...\n"); | ||
delete(20); | ||
printf("Searching for key 20: %d\n", search(20)); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Robin Hood Hashing Algorithm | ||
|
||
## Description | ||
|
||
Robin Hood Hashing is a variant of open addressing for hash tables, designed to minimize probe lengths and handle clusters more effectively. When inserting an element, if an existing element has a longer probe distance, they swap places to ensure that the “poorest” element (the one with the longest probe distance) gets closer to its initial hash location. This balancing strategy improves lookup times by reducing the maximum probe length and balancing load distribution in the hash table. | ||
|
||
## Key Features | ||
|
||
- **Efficient Collision Resolution**: Uses open addressing with probe length comparison to determine the optimal position for each element. | ||
- **Load Balancing**: Minimizes clustering by enforcing an even distribution of elements. | ||
- **Dynamic Swapping**: Ensures each element achieves the shortest possible probe distance. | ||
|
||
## Problem Definition | ||
|
||
Given a set of key-value pairs, the goal of Robin Hood Hashing is to create a hash table that: | ||
- Minimizes the maximum probe distance for any element. | ||
- Balances the distribution of elements across the table, especially at high load factors. | ||
|
||
- **Input**: A set of keys and values to be inserted into the hash table. | ||
- **Output**: A hash table with balanced probe lengths to ensure efficient access and minimized clustering. | ||
|
||
## Algorithm Review | ||
|
||
### 1. Hash Function | ||
- Compute the initial index for each key using `key % TABLE_SIZE`. | ||
|
||
### 2. Insertion | ||
- Insert each key-value pair at its computed index. | ||
- If a collision occurs and an existing element has a shorter probe distance than the new element, swap them. | ||
- Repeat this until the element with the highest probe distance is inserted at a suitable location. | ||
|
||
### 3. Search | ||
- Calculate the hash index and probe through the table. | ||
- If an element with a shorter probe distance than expected is found, the search is terminated early. | ||
|
||
### 4. Deletion | ||
- Mark elements as deleted rather than physically removing them to retain the probe distance logic for neighboring elements. | ||
|
||
## Time Complexity | ||
|
||
- **Insertion**: Average O(1) with probe length adjustments. | ||
- **Search**: Average O(1) by leveraging minimized probe lengths. | ||
|
||
## Applications | ||
|
||
Robin Hood Hashing is beneficial for scenarios involving: | ||
- **High Load Factor Hash Tables**: Maintains efficiency in dense hash tables. | ||
- **Data Deduplication**: Handles duplicate elements efficiently without excessive clustering. | ||
- **Real-Time Applications**: Minimizes lookup and insertion times, ideal for high-throughput systems. | ||
|
||
## Conclusion | ||
|
||
Robin Hood Hashing optimizes hash table performance by balancing probe lengths, improving access times, and reducing clustering. This makes it a valuable technique for efficient data storage and retrieval in applications requiring high load factors and balanced hash distribution. |