Skip to content

Commit

Permalink
refactor: move bsearch function to C-code (#2945)
Browse files Browse the repository at this point in the history
This commit fixes issue #527 and move the bsearch function to a native
C-code. This change improves the performance of this function.
  • Loading branch information
eloycoto authored Jan 25, 2025
1 parent 18bd837 commit 562ffec
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 32 deletions.
35 changes: 35 additions & 0 deletions src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,40 @@ static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) {
}
}

/*
* Assuming the input array is sorted, bsearch/1 returns
* the index of the target if the target is in the input array; and otherwise
* (-1 - ix), where ix is the insertion point that would leave the array sorted.
* If the input is not sorted, bsearch will terminate but with irrelevant results.
*/
static jv f_bsearch(jq_state *jq, jv input, jv target) {
if (jv_get_kind(input) != JV_KIND_ARRAY) {
jv_free(target);
return type_error(input, "cannot be searched from");
}
int start = 0;
int end = jv_array_length(jv_copy(input));
jv answer = jv_invalid();
while (start < end) {
int mid = start + (end - start) / 2;
int result = jv_cmp(jv_copy(target), jv_array_get(jv_copy(input), mid));
if (result == 0) {
answer = jv_number(mid);
break;
} else if (result < 0) {
end = mid;
} else {
start = mid + 1;
}
}
if (!jv_is_valid(answer)) {
answer = jv_number(-1 - start);
}
jv_free(input);
jv_free(target);
return answer;
}

static jv f_group_by_impl(jq_state *jq, jv input, jv keys) {
if (jv_get_kind(input) == JV_KIND_ARRAY &&
jv_get_kind(keys) == JV_KIND_ARRAY &&
Expand Down Expand Up @@ -1844,6 +1878,7 @@ BINOPS
CFUNC(f_sort, "sort", 1),
CFUNC(f_sort_by_impl, "_sort_by_impl", 2),
CFUNC(f_group_by_impl, "_group_by_impl", 2),
CFUNC(f_bsearch, "bsearch", 2),
CFUNC(f_min, "min", 1),
CFUNC(f_max, "max", 1),
CFUNC(f_min_by_impl, "_min_by_impl", 2),
Expand Down
31 changes: 0 additions & 31 deletions src/builtin.jq
Original file line number Diff line number Diff line change
Expand Up @@ -219,37 +219,6 @@ def tostream:
getpath($p) |
reduce path(.[]?) as $q ([$p, .]; [$p+$q]);

# Assuming the input array is sorted, bsearch/1 returns
# the index of the target if the target is in the input array; and otherwise
# (-1 - ix), where ix is the insertion point that would leave the array sorted.
# If the input is not sorted, bsearch will terminate but with irrelevant results.
def bsearch($target):
if length == 0 then -1
elif length == 1 then
if $target == .[0] then 0 elif $target < .[0] then -1 else -2 end
else . as $in
# state variable: [start, end, answer]
# where start and end are the upper and lower offsets to use.
| [0, length-1, null]
| until( .[0] > .[1] ;
if .[2] != null then (.[1] = -1) # i.e. break
else
( ( (.[1] + .[0]) / 2 ) | floor ) as $mid
| $in[$mid] as $monkey
| if $monkey == $target then (.[2] = $mid) # success
elif .[0] == .[1] then (.[1] = -1) # failure
elif $monkey < $target then (.[0] = ($mid + 1))
else (.[1] = ($mid - 1))
end
end )
| if .[2] == null then # compute the insertion point
if $in[ .[0] ] < $target then (-2 -.[0])
else (-1 -.[0])
end
else .[2]
end
end;

# Apply f to composite entities recursively, and to atoms
def walk(f):
def w:
Expand Down
12 changes: 11 additions & 1 deletion tests/jq.test
Original file line number Diff line number Diff line change
Expand Up @@ -1642,12 +1642,22 @@ ascii_upcase
"useful but not for é"
"USEFUL BUT NOT FOR é"

bsearch(0,2,4)
bsearch(0,1,2,3,4)
[1,2,3]
-1
0
1
2
-4

bsearch({x:1})
[{ "x": 0 },{ "x": 1 },{ "x": 2 }]
1

try ["OK", bsearch(0)] catch ["KO",.]
"aa"
["KO","string (\"aa\") cannot be searched from"]

# strptime tests are in optional.test

strftime("%Y-%m-%dT%H:%M:%SZ")
Expand Down

0 comments on commit 562ffec

Please sign in to comment.