From 67b770aabdabe78edc255d13cb8a656f14e8e303 Mon Sep 17 00:00:00 2001 From: Yuku on yuku12 Date: Wed, 14 Feb 2024 12:11:28 +0800 Subject: [PATCH] Fix binary search on footnotes and xref: arrays of arif should be treated as unsigned ints Fixes #102 --- .idea/misc.xml | 8 ++- AlkitabYes2/build.gradle | 1 + .../yes2/section/FootnotesSection.java | 7 +- .../alkitab/yes2/section/XrefsSection.java | 7 +- .../alkitab/yes2/util/unsignedBinarySearch.kt | 30 +++++++++ .../yes2/util/UnsignedBinarySearchKtTest.kt | 65 +++++++++++++++++++ 6 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 AlkitabYes2/src/main/java/yuku/alkitab/yes2/util/unsignedBinarySearch.kt create mode 100644 AlkitabYes2/src/test/java/yuku/alkitab/yes2/util/UnsignedBinarySearchKtTest.kt diff --git a/.idea/misc.xml b/.idea/misc.xml index ef2471e29..c33d6afab 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -9,7 +9,7 @@ - + diff --git a/AlkitabYes2/build.gradle b/AlkitabYes2/build.gradle index 602aa5d79..92dbd3212 100644 --- a/AlkitabYes2/build.gradle +++ b/AlkitabYes2/build.gradle @@ -32,6 +32,7 @@ dependencies { implementation project(':BintexReader') implementation project(':BintexWriter') implementation project(':Snappy') + testImplementation 'junit:junit:4.12' } repositories { mavenCentral() diff --git a/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/FootnotesSection.java b/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/FootnotesSection.java index 3f924d974..8309f2bad 100644 --- a/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/FootnotesSection.java +++ b/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/FootnotesSection.java @@ -1,14 +1,13 @@ package yuku.alkitab.yes2.section; import android.util.Log; +import java.io.IOException; import yuku.alkitab.model.FootnoteEntry; import yuku.alkitab.yes2.io.RandomInputStream; import yuku.alkitab.yes2.section.base.SectionContent; +import yuku.alkitab.yes2.util.UnsignedBinarySearchKt; import yuku.bintex.BintexReader; -import java.io.IOException; -import java.util.Arrays; - // The writer is in another class (not here to save code amount for Alkitab app) // Section format: // { @@ -56,7 +55,7 @@ public class FootnotesSection extends SectionContent { } public FootnoteEntry getFootnoteEntry(final int arif) { - final int pos = Arrays.binarySearch(index_arifs, arif); + final int pos = UnsignedBinarySearchKt.unsignedIntBinarySearch(index_arifs, arif); if (pos < 0) { return null; } diff --git a/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/XrefsSection.java b/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/XrefsSection.java index cbae4f948..8ca47cf15 100644 --- a/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/XrefsSection.java +++ b/AlkitabYes2/src/main/java/yuku/alkitab/yes2/section/XrefsSection.java @@ -1,14 +1,13 @@ package yuku.alkitab.yes2.section; import android.util.Log; +import java.io.IOException; import yuku.alkitab.model.XrefEntry; import yuku.alkitab.yes2.io.RandomInputStream; import yuku.alkitab.yes2.section.base.SectionContent; +import yuku.alkitab.yes2.util.UnsignedBinarySearchKt; import yuku.bintex.BintexReader; -import java.io.IOException; -import java.util.Arrays; - // The writer is in another class (not here to save code amount for Alkitab app) // Section format: // { @@ -56,7 +55,7 @@ public class XrefsSection extends SectionContent { } public XrefEntry getXrefEntry(final int arif) { - final int pos = Arrays.binarySearch(index_arifs, arif); + final int pos = UnsignedBinarySearchKt.unsignedIntBinarySearch(index_arifs, arif); if (pos < 0) { return null; } diff --git a/AlkitabYes2/src/main/java/yuku/alkitab/yes2/util/unsignedBinarySearch.kt b/AlkitabYes2/src/main/java/yuku/alkitab/yes2/util/unsignedBinarySearch.kt new file mode 100644 index 000000000..2db476037 --- /dev/null +++ b/AlkitabYes2/src/main/java/yuku/alkitab/yes2/util/unsignedBinarySearch.kt @@ -0,0 +1,30 @@ +package yuku.alkitab.yes2.util + +/** + * Treat [a] as a sorted array of unsigned ints and do binary search on it. + * + * @param key element to look for, treated as an unsigned int. + */ +fun unsignedIntBinarySearch(a: IntArray, key: Int): Int { + var low = 0 + var high = a.size - 1 + + while (low <= high) { + val mid = (low + high) ushr 1 + val midVal = a[mid] + + val cmp = uintCompare(midVal, key) + if (cmp < 0) { + low = mid + 1 + } else if (cmp > 0) { + high = mid - 1 + } else { + // key found + return mid + } + } + + return -(low + 1) // key not found. +} + +private fun uintCompare(v1: Int, v2: Int): Int = (v1 xor Int.MIN_VALUE).compareTo(v2 xor Int.MIN_VALUE) diff --git a/AlkitabYes2/src/test/java/yuku/alkitab/yes2/util/UnsignedBinarySearchKtTest.kt b/AlkitabYes2/src/test/java/yuku/alkitab/yes2/util/UnsignedBinarySearchKtTest.kt new file mode 100644 index 000000000..66ac65185 --- /dev/null +++ b/AlkitabYes2/src/test/java/yuku/alkitab/yes2/util/UnsignedBinarySearchKtTest.kt @@ -0,0 +1,65 @@ +@file:OptIn(ExperimentalUnsignedTypes::class) + +package yuku.alkitab.yes2.util + +import org.junit.Assert.assertArrayEquals +import org.junit.Assert.assertEquals +import org.junit.Before +import org.junit.Test + +class UnsignedBinarySearchKtTest { + + private val allPositiveArray = IntArray(100) { it + 10 } + private val allNegativeArray = IntArray(100) { (0xc000_0000L + it.toLong()).toInt() } + private val mixedSignArray = IntArray(100) { it } + IntArray(100) { (0xc000_0000L + it.toLong()).toInt() } + private val skipArray = IntArray(100) { it * 2 } + IntArray(100) { (0xc000_0000L + (it * 2).toLong()).toInt() } + + + @Before + fun checkSorted() { + // all arrays under test must be sorted + fun assertSorted(a: IntArray) { + val u = UIntArray(a.size) { a[it].toUInt() } + assertArrayEquals(u.toTypedArray(), u.sorted().toTypedArray()) + } + + assertSorted(allPositiveArray) + assertSorted(allNegativeArray) + assertSorted(mixedSignArray) + } + + @Test + fun unsignedBinarySearch() { + for (i in allPositiveArray.indices) { + assertEquals(i, unsignedIntBinarySearch(allPositiveArray, i + 10)) + } + assertEquals(0.inv(), unsignedIntBinarySearch(allPositiveArray, 0)) + assertEquals(100.inv(), unsignedIntBinarySearch(allPositiveArray, -1)) + assertEquals(100.inv(), unsignedIntBinarySearch(allPositiveArray, -2)) + assertEquals(100.inv(), unsignedIntBinarySearch(allPositiveArray, 400)) + + for (i in allNegativeArray.indices) { + assertEquals(i, unsignedIntBinarySearch(allNegativeArray, allNegativeArray[i])) + } + assertEquals(0.inv(), unsignedIntBinarySearch(allNegativeArray, 0)) + assertEquals(100.inv(), unsignedIntBinarySearch(allNegativeArray, -1)) + assertEquals(100.inv(), unsignedIntBinarySearch(allNegativeArray, -2)) + assertEquals(0.inv(), unsignedIntBinarySearch(allNegativeArray, 400)) + + for (i in mixedSignArray.indices) { + assertEquals(i, unsignedIntBinarySearch(mixedSignArray, mixedSignArray[i])) + } + assertEquals(0, unsignedIntBinarySearch(mixedSignArray, 0)) + assertEquals(200.inv(), unsignedIntBinarySearch(mixedSignArray, -1)) + assertEquals(200.inv(), unsignedIntBinarySearch(mixedSignArray, -2)) + // in the middle + assertEquals(100.inv(), unsignedIntBinarySearch(mixedSignArray, 400)) + + for (i in skipArray.indices) { + assertEquals(i, unsignedIntBinarySearch(skipArray, skipArray[i])) + } + assertEquals(0, unsignedIntBinarySearch(skipArray, 0)) + assertEquals(2.inv(), unsignedIntBinarySearch(skipArray, 3)) + assertEquals(103.inv(), unsignedIntBinarySearch(skipArray, (0xc000_0000L + 5L).toInt())) + } +}