Skip to content

Commit

Permalink
another performance improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
duggavo committed May 29, 2024
1 parent 44d8732 commit 2018848
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 26 deletions.
4 changes: 0 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +0,0 @@
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
42 changes: 20 additions & 22 deletions xelishash.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,15 @@ func XelisHash(input []byte, scratch_pad *ScratchPad) ([32]byte, error) {

for i := 0; i < ITERS; i++ {
for j := 0; j < len(small_pad)/SLOT_LENGTH; j++ {
// Initialize indices
// Initialize indices and precompute the total sum of small pad
var total_sum uint32 = 0
for k := 0; k < SLOT_LENGTH; k++ {
indices[k] = uint16(k)
if slots[k]>>31 == 0 {
total_sum += small_pad[j*SLOT_LENGTH+k]
} else {
total_sum -= small_pad[j*SLOT_LENGTH+k]
}
}

for slot_idx := SLOT_LENGTH - 1; slot_idx >= 0; slot_idx-- {
Expand All @@ -86,29 +92,21 @@ func XelisHash(input []byte, scratch_pad *ScratchPad) ([32]byte, error) {
index := int(indices[index_in_indices])
indices[index_in_indices] = indices[slot_idx]

// THIS IS THE MOST PERFORMANCE-CRITICAL SECTION

// Split the loop in two to avoid checking k == index
sum := slots[index]
offset := j * SLOT_LENGTH
for k := 0; k < index; k++ {
pad := small_pad[offset+k]
if slots[k]>>31 == 0 {
sum = sum + pad
} else {
sum = sum - pad
}
}
for k := index + 1; k < SLOT_LENGTH; k++ {
pad := small_pad[offset+k]
if slots[k]>>31 == 0 {
sum = sum + pad
} else {
sum = sum - pad
}
local_sum := total_sum
s1 := int32(slots[index] >> 31)
pad_value := small_pad[j*SLOT_LENGTH+index]
if s1 == 0 {
local_sum -= pad_value
} else {
local_sum += pad_value
}

slots[index] = sum
// Apply the sum to the slot
slots[index] += local_sum

// Update the total sum
s2 := int32(slots[index] >> 31)
total_sum -= 2 * small_pad[j*SLOT_LENGTH+index] * uint32(-s1+s2)
}
}
}
Expand Down

0 comments on commit 2018848

Please sign in to comment.