Skip to content

Commit

Permalink
Save profiling of intersect+adj code
Browse files Browse the repository at this point in the history
  • Loading branch information
softwaredoug committed Dec 2, 2024
1 parent 29270ea commit ad39e54
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 0 deletions.
5 changes: 5 additions & 0 deletions searcharray/phrase/middle_out.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import logging
from collections import defaultdict, abc

from time import perf_counter


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -139,8 +141,11 @@ def _compute_phrase_freqs_right_to_left(encoded_posns: List[np.ndarray],
rhs = encoded_posns[-1]
for lhs in encoded_posns[-2::-1]:
# Only count the count of the last bigram (ignoring the ones where priors did not match)
# begin = perf_counter()
phrase_freqs, conts = bigram_freqs(lhs, rhs,
cont=Continuation.LHS)
# end = perf_counter()
# print(f"bigram_freqs {len(lhs)}|{len(rhs)} took {end - begin} seconds")

assert conts[0] is not None
rhs = conts[0]
Expand Down
24 changes: 24 additions & 0 deletions searcharray/roaringish/intersect.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
# cython: language_level=3
cimport numpy as np
import numpy as np
cimport libc.stdint
from libc.stdio cimport printf
from libc.stdint cimport uint32_t
from libc.stdint cimport uint64_t
cimport cython


cdef extern from "timer.h":
cdef uint64_t timestamp() nogil
cdef void print_elapsed(uint64_t start, const char* msg) nogil


cimport searcharray.roaringish.snp_ops
Expand Down Expand Up @@ -228,11 +238,18 @@ cdef DTYPE_t _gallop_int_and_adj_drop(intersect_args_t args,
cdef DTYPE_t* lhs_adj_result_ptr = &adj_lhs_out[0]
cdef DTYPE_t* rhs_adj_result_ptr = &adj_rhs_out[0]

cdef uint64_t gallop_time = 0
cdef uint64_t gallop_start = 0

cdef uint64_t collect_time = 0
cdef uint64_t collect_start = 0

while lhs_ptr < end_lhs_ptr and rhs_ptr < end_rhs_ptr:

# Gallop to adjacent or equal value
# if value_lhs < value_rhs - delta:
# Gallop past the current element
gallop_start = timestamp()
if (lhs_ptr[0] & args.mask) != (rhs_ptr[0] & args.mask):
while lhs_ptr < end_lhs_ptr and ((lhs_ptr[0] & args.mask) + delta) < (rhs_ptr[0] & args.mask):
lhs_ptr += (gallop * args.lhs_stride)
Expand All @@ -246,6 +263,8 @@ cdef DTYPE_t _gallop_int_and_adj_drop(intersect_args_t args,
gallop = 1
# Now lhs is at or before RHS - delta
# RHS is 4, LHS is at most 3
gallop_time += (timestamp() - gallop_start)
collect_start = timestamp()
# Collect adjacent avalues
if ((lhs_ptr[0] & args.mask) + delta) == ((rhs_ptr[0] & args.mask)):
if (last_adj & args.mask) != (lhs_ptr[0] & args.mask):
Expand All @@ -270,6 +289,10 @@ cdef DTYPE_t _gallop_int_and_adj_drop(intersect_args_t args,
lhs_result_ptr += 1
rhs_result_ptr += 1
rhs_ptr += args.rhs_stride
collect_time += (timestamp() - collect_start)

print_elapsed(gallop_time, "Gallop ")
print_elapsed(collect_time, "Collect")

adj_out_len[0] = lhs_adj_result_ptr - &adj_lhs_out[0]
return lhs_result_ptr - &args.lhs_out[0]
Expand Down Expand Up @@ -381,6 +404,7 @@ def intersect_with_adjacents(np.ndarray[DTYPE_t, ndim=1] lhs,
args.rhs_out = &rhs_out[0]
adj_lhs_out_begin = &adj_lhs_out[0]
adj_rhs_out_begin = &adj_rhs_out[0]
print(f"Lhs len: {lhs.shape[0]} | Rhs len: {rhs.shape[0]}")
with nogil:
amt_written = _gallop_int_and_adj_drop(args, delta,
adj_lhs_out_begin,
Expand Down
28 changes: 28 additions & 0 deletions searcharray/roaringish/timer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef TIMER_H
#define TIMER_H


#include <stdio.h>
#include "mach/mach_time.h"


uint64_t timestamp() {
return mach_absolute_time();
}


void print_elapsed(uint64_t elapsed, const char *msg) {
mach_timebase_info_data_t info;
mach_timebase_info(&info);
double nanos = elapsed * ((double)(info.numer) / (double)(info.denom));
if (nanos > 1000000000) {
printf("%s Elapsed time: %lf s\n", msg, nanos / 1000000000.0);
} else if (nanos > 1000000) {
printf("%s Elapsed time: %lf ms\n", msg, nanos / 1000000.0);
} else {
printf("%s Elapsed time: %lf ns\n", msg, nanos);
}
}


#endif // TIMER_H
31 changes: 31 additions & 0 deletions test/test_snp_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,37 @@ def intersect_many():
profiler.run(intersect_many)


@pytest.mark.skipif(not profile_enabled, reason="Profiling disabled")
@pytest.mark.parametrize("suffix", [128, 185, 24179, 27685, 44358, 45907, 90596])
def test_int_adj_saved(suffix, benchmark):
profiler = Profiler(benchmark)

print(f"Running with {suffix}")
lhs = np.load(f"fixtures/lhs_{suffix}.npy")
rhs = np.load(f"fixtures/rhs_{suffix}.npy")
mask = np.load(f"fixtures/mask_{suffix}.npy")
print(lhs.shape, rhs.shape)

def int_adj():
intersect_with_adjacents(lhs, rhs, mask=mask)

def two_ops():
intersect(lhs, rhs, mask)
adjacent(lhs, rhs, mask)

def upper_bound():
"""An operation involving a scan, that's likely the upper bound of speed."""
count_odds(lhs, rhs)

def intersect_many():
for _ in range(10):
two_ops()
upper_bound()
int_adj()

profiler.run(intersect_many)


@pytest.mark.skipif(not profile_enabled, reason="Profiling disabled")
def test_profile_masked_intersect_sparse_dense(benchmark):
profiler = Profiler(benchmark)
Expand Down

0 comments on commit ad39e54

Please sign in to comment.