dnbaker · dnbaker · Aug 3, 2023 · Jul 14, 2023 · Aug 1, 2023 · Aug 3, 2023
diff --git a/Makefile b/Makefile
@@ -19,9 +19,9 @@ WARNINGS=-Wall -Wextra -Wno-char-subscripts \
 		 -Wpointer-arith -Wwrite-strings -Wdisabled-optimization \
 		 -Wformat -Wcast-align -Wno-unused-function -Wno-unused-parameter \
 		 -pedantic -Wunused-variable\
-        -Wno-cast-align
+        -Wno-cast-align -Wno-sign-compare
 
-FLAGS=-O3 -funroll-loops -pipe $(TARGET_FLAG) -Iinclude/sketch -I. -Iinclude/blaze -Ivec -Ipybind11/include -Iinclude -fpic -Wall $(WARNINGS) \
+FLAGS=-O2 -funroll-loops -pipe $(TARGET_FLAG) -Iinclude/sketch -I. -Iinclude/blaze -Ivec -Ipybind11/include -Iinclude -fpic -Wall $(WARNINGS) \
      -fno-strict-aliasing
 
 CXXFLAGS=$(FLAGS) -Wreorder  \
@@ -49,7 +49,7 @@ STD?= -std=c++17
 
 #CCBIN?=-ccbin=clang++
 
-GPUFLAGS= $(CCBIN) -O3 -std=c++14 -Iinclude -I. -Xcompiler $(TARGET_FLAG) -Xcompiler -fopenmp -Iinclude/sketch \
+GPUFLAGS= $(CCBIN) -O2 -std=c++14 -Iinclude -I. -Xcompiler $(TARGET_FLAG) -Xcompiler -fopenmp -Iinclude/sketch \
 		-lz
 
 INCLUDES=-I`$(PYCONF) --includes` -Ipybind11/include
@@ -70,7 +70,7 @@ hpython: pybbmh.cpython.so
 	$(PYTHON) -c "import subprocess;import site; subprocess.check_call('cp pybbmh.py "*`$(PYCONF) --extension-suffix`" %s' % site.getsitepackages()[0], shell=True)"
 
 %.cpython.so: %.cpp
-	$(CXX) $(UNDEFSTR) $(INCLUDES) -fopenmp -O3 -Wall $(CXXFLAGS) -shared $(STD) -fPIC `python3 -m pybind11 --includes` $< -o $*$(SUF) -lz && \
+	$(CXX) $(UNDEFSTR) $(INCLUDES) -fopenmp -Wall $(CXXFLAGS) -O2 -shared $(STD) -fPIC `python3 -m pybind11 --includes` $< -o $*$(SUF) -lz && \
     ln -fs $*$(SUF) $@
 
 %.o: %.cpp

diff --git a/include/sketch/bbmh.h b/include/sketch/bbmh.h
@@ -361,8 +361,8 @@ struct FinalDivBBitMinHash {
         return std::max(0., frac / (1. - b2pow));
     }
     double containment_index(const FinalDivBBitMinHash &o) const {
-        double ji = jaccard_index(o);
-        double is = (est_cardinality_ + o.est_cardinality_) * ji / (1. + ji);
+        const double ji = jaccard_index(o);
+        const double is = (est_cardinality_ + o.est_cardinality_) * ji / (1. + ji);
         return is / est_cardinality_;
     }
     double intersection_size(const FinalDivBBitMinHash &o) const {
@@ -1623,9 +1623,6 @@ FinalBBitMinHash BBitMinHasher<T, Hasher>::finalize(uint32_t b) const {
     size_t ndef;
     double cest = -1.;
     if((ndef = std::count_if(core_.begin(), core_.end(), [](auto x) {return x == detail::default_val<T>();}))) {
-#ifndef NDEBUG
-        std::fprintf(stderr, "requires densification: %zu/%zu need to be densified\n", ndef, core_.size());
-#endif
         tmp = core_;
         cest = detail::harmonic_cardinality_estimate_impl(tmp);
         std::replace(tmp.begin(), tmp.end(), std::numeric_limits<T>::max() >> p_, detail::default_val<T>());

diff --git a/include/sketch/common.h b/include/sketch/common.h
@@ -48,8 +48,8 @@
 #define sk__xstr__(x) sk__str__(x)
 #define SKETCH_SHIFT 8
 #define SKETCH_MAJOR 0
-#define SKETCH_MINOR 18
-#define SKETCH_REVISION 1
+#define SKETCH_MINOR 19
+#define SKETCH_REVISION 0
 #define SKETCH_VERSION_INTEGER ((((SKETCH_MAJOR << SKETCH_SHIFT) | SKETCH_MINOR) << SKETCH_SHIFT) | SKETCH_REVISION)
 #define SKETCH_VERSION SKETCH_MAJOR.SKETCH_MINOR##SKETCH_REVISION
 #define SKETCH_VERSION_STR sk__xstr__(SKETCH_MAJOR.SKETCH_MINOR.SKETCH_REVISION)

diff --git a/include/sketch/hash.h b/include/sketch/hash.h
@@ -737,7 +737,7 @@ struct InvH {
     const InverseOperation iop;
 
     InvH(uint64_t seed):
-            seed_(seed | std::is_same<Operation, op::multiplies<uint64_t>>::value),
+            seed_(seed | std::is_same<Operation, op::multiplies<uint64_t>>::value), // Ensures that the seed is odd if multiplies, as an odd number is needed for reversibility.
             inverse_(multinv::Inverse64<Operation>()(seed_)), op(), iop() {}
     // To ensure that it is actually reversible.
     INLINE uint64_t inverse(uint64_t hv) const {
@@ -901,8 +901,9 @@ struct XorMultiply: public FusedReversible<InvXor, InvMul > {
 struct MultiplyAdd: public FusedReversible<InvMul, InvAdd> {
     MultiplyAdd(uint64_t seed1=0x9a98567ed20c127d, uint64_t seed2=0xe37e28c4271b5a1duLL): FusedReversible<InvMul, InvAdd>(seed1 | 1, seed2 | 1) {}
 };
-struct MultiplyAddXor: public FusedReversible3<InvMul,InvAdd,InvXor> {
-    MultiplyAddXor(uint64_t seed1=0x9a98567ed20c127d, uint64_t seed2=0xe37e28c4271b5a1duLL): FusedReversible3<InvMul,InvAdd,InvXor>(seed1 | 1, seed2 | 1) {}
+struct MultiplyAddXor: public FusedReversible3<InvAdd,InvMul,InvXor> {
+    using base = FusedReversible3<InvAdd,InvMul,InvXor>;
+    MultiplyAddXor(uint64_t seed1=0x9a98567ed20c127d, uint64_t seed2=0xe37e28c4271b5a1duLL): base(seed1 | 1, seed2 | 1) {}
 };
 template<size_t shift>
 struct MultiplyAddXoRot: public FusedReversible3<InvMul,InvXor,RotN<shift>> {

diff --git a/include/sketch/hll.h b/include/sketch/hll.h
@@ -76,12 +76,21 @@ static constexpr const char *JESTIM_STRINGS []
     "ORIGINAL", "ERTL_IMPROVED", "ERTL_MLE", "ERTL_JOINT_MLE"
 };
 enum JointEstimationMethod: uint8_t {
-    //ORIGINAL       = 0,
-    //ERTL_IMPROVED  = 1, // Improved but biased method
-    //ERTL_MLE       = 2, // element-wise max, followed by MLE
-    ERTL_JOINT_MLE = 3  // Ertl special version
+    J_ORIGINAL       = ORIGINAL,
+    J_ERTL_IMPROVED  = ERTL_IMPROVED, // Improved but biased method
+    J_ERTL_MLE       = ERTL_MLE, // element-wise max, followed by MLE
+    ERTL_JOINT_MLE = 3, // Ertl special version
+    J_ERTL_JOINT_MLE = ERTL_JOINT_MLE,
 };
 
+static inline std::string to_string(JointEstimationMethod est) {
+    switch(est) {case J_ORIGINAL: return "Original"; case J_ERTL_IMPROVED: return "Improved"; case J_ERTL_MLE: return "MLE"; case J_ERTL_JOINT_MLE: return "JMLE"; default: return "UNKNOWN";};
+}
+static inline std::string to_string(EstimationMethod est) {
+    return to_string(static_cast<JointEstimationMethod>(est));
+}
+
+
 static const char *EST_STRS [] {
     "original",
     "ertl_improved",
@@ -200,17 +209,17 @@ static constexpr double TWO_POW_32 = 1ull << 32;
 
 template<typename CountArrType>
 static double calculate_estimate(const CountArrType &counts,
-                                 EstimationMethod estim, uint64_t m, uint32_t p, double alpha, double relerr=1e-2) noexcept {
+                                 JointEstimationMethod estim, uint64_t m, uint32_t p, double alpha, double relerr=1e-2) noexcept {
     assert(estim <= 3);
 #if ENABLE_COMPUTED_GOTO
-    static constexpr void *arr [] {&&ORREST, &&ERTL_IMPROVED_EST, &&ERTL_MLE_EST};
+    static constexpr void *arr [] {&&ORREST, &&ERTL_IMPROVED_EST, &&ERTL_MLE_EST, &&ERTL_JOINT_MLE_EST};
     goto *arr[estim];
     ORREST: {
 #else
     switch(estim) {
-        case ORIGINAL: {
+        case J_ORIGINAL: {
 #endif
-        assert(estim != ERTL_MLE);
+        assert(estim != static_cast<JointEstimationMethod>(ERTL_MLE));
         double sum = counts[0];
         for(unsigned i = 1; i < 64 - p + 1; ++i) if(counts[i]) sum += std::ldexp(counts[i], -i); // 64 - p because we can't have more than that many leading 0s. This is just a speed thing.
         //for(unsigned i = 1; i < 64 - p + 1; ++i) sum += std::ldexp(counts[i], -i); // 64 - p because we can't have more than that many leading 0s. This is just a speed thing.
@@ -228,23 +237,35 @@ static double calculate_estimate(const CountArrType &counts,
         return value;
     }
 #if ENABLE_COMPUTED_GOTO
-    ERTL_IMPROVED_EST: {
+    ERTL_IMPROVED_EST:
 #else
-        case ERTL_IMPROVED: {
+        case J_ERTL_IMPROVED:
 #endif
+    {
         static const double divinv = 1. / (2.L*std::log(2.L));
         double z = m * detail::gen_tau(static_cast<double>((m-counts[64 - p + 1]))/static_cast<double>(m));
         for(unsigned i = 64-p; i; z += counts[i--], z *= 0.5); // Reuse value variable to avoid an additional allocation.
         z += m * detail::gen_sigma(static_cast<double>(counts[0])/static_cast<double>(m));
         return m * divinv * m / z;
     }
 #if ENABLE_COMPUTED_GOTO
-    ERTL_MLE_EST: return ertl_ml_estimate(counts, p, 64 - p, relerr);
+    ERTL_MLE_EST:
+    ERTL_JOINT_MLE_EST:
 #else
-    case ERTL_MLE: return ertl_ml_estimate(counts, p, 64 - p, relerr);
-    default: HEDLEY_UNREACHABLE();
-    }
+    case J_ERTL_MLE:
+    case J_ERTL_JOINT_MLE:
 #endif
+        return ertl_ml_estimate(counts, p, 64 - p, relerr);
+    default:
+        std::fprintf(stderr, "Unknown estimation method.\n");
+        HEDLEY_UNREACHABLE();
+    }
+}
+
+template<typename CountArrType>
+static double calculate_estimate(const CountArrType &counts,
+                                 EstimationMethod estim, uint64_t m, uint32_t p, double alpha, double relerr=1e-2) noexcept {
+    return calculate_estimate(counts, static_cast<JointEstimationMethod>(estim), m, p, alpha, relerr);
 }
 
 template<typename CoreType>
@@ -696,7 +717,7 @@ std::array<double, 3> ertl_joint(const HllType &h1, const HllType &h2) {
     const double cAX = h1.get_is_ready() ? h1.creport() : ertl_ml_estimate(c1, h1.p(), h1.q());
     const double cBX = h2.get_is_ready() ? h2.creport() : ertl_ml_estimate(c2, h2.p(), h2.q());
     const double cABX = ertl_ml_estimate(cu, h1.p(), h1.q());
-    // std::fprintf(stderr, "Made initials: %lf, %lf, %lf\n", cAX, cBX, cABX);
+    std::fprintf(stderr, "Made initials: %lf, %lf, %lf\n", cAX, cBX, cABX);
     std::array<uint32_t, 64> countsAXBhalf;
     std::array<uint32_t, 64> countsBXAhalf;
     countsAXBhalf[q] = h1.m();

diff --git a/include/sketch/hmh.h b/include/sketch/hmh.h
@@ -289,8 +289,12 @@ struct hmh_t {
     template<typename IT> IT access(size_t index) const {
         return reinterpret_cast<const IT *>(data_.data())[index];
     }
+#ifndef _mm512_srli_epi16
 #define _mm512_srli_epi16(mm, Imm) _mm512_and_si512(_mm512_set1_epi16(0xFFFFu >> Imm), _mm512_srli_epi32(mm, Imm))
+#endif
+#ifndef _mm512_srli_epi8
 #define _mm512_srli_epi8(mm, Imm) _mm512_and_si512(_mm512_set1_epi8(0xFFu >> Imm), _mm512_srli_epi32(mm, Imm))
+#endif
     template<typename IT>
     std::array<uint32_t, 64> sum_counts() const {
         using hll::detail::SIMDHolder;

diff --git a/include/sketch/mh.h b/include/sketch/mh.h
@@ -109,9 +109,16 @@ struct RangeMinHash: public AbstractMinHash<T, Cmp> {
         AbstractMinHash<T, Cmp>(sketch_size), hf_(std::move(hf)), cmp_(std::move(cmp))
     {
     }
+    void show() {
+        std::fprintf(stderr, "%zu mins\n", minimizers_.size());
+        for(const auto x: minimizers_) {
+            std::fprintf(stderr, "%zu\n", size_t(x));
+        }
+    }
     RangeMinHash(std::string) {throw NotImplementedError("");}
     double cardinality_estimate() const {
-        return double(std::numeric_limits<T>::max()) / this->max_element() * minimizers_.size();
+        const double result = (std::numeric_limits<T>::max()) / this->max_element() * minimizers_.size();
+        return result;
     }
     RangeMinHash(gzFile fp) {
         if(!fp) throw std::runtime_error("Null file handle!");
@@ -127,8 +134,9 @@ struct RangeMinHash: public AbstractMinHash<T, Cmp> {
     }
     RangeMinHash &operator+=(const RangeMinHash &o) {
         minimizers_.insert(o.begin(), o.end());
-        while(minimizers_.size() > this->ss_)
+        while(minimizers_.size() > this->ss_) {
             minimizers_.erase(minimizers_.begin());
+        }
         return *this;
     }
     RangeMinHash operator+(const RangeMinHash &o) const {
@@ -222,8 +230,6 @@ struct RangeMinHash: public AbstractMinHash<T, Cmp> {
     void free() {clear();}
     final_type cfinalize() const {
         std::vector<T> reta(minimizers_.begin(), minimizers_.end());
-        if(reta.size() < this->ss_)
-            reta.insert(reta.end(), this->ss_ - reta.size(), std::numeric_limits<uint64_t>::max());
         return final_type(std::move(reta));
     }
     final_type finalize() & {
@@ -320,25 +326,36 @@ struct FinalRMinHash {
         tmp += o;
         return tmp;
     }
+    /*
     double union_size(const FinalRMinHash &o) const {
+        std::vector<T> total(o.begin(), o.end());
+        total.insert(total.end(), begin(), end());
+        std::sort(total.begin(), total.end());
+        total.resize(std::min(o.size(), size()));
+        const size_t maxv = total.back();
+        return (double(std::numeric_limits<T>::max()) / maxv) * this->size();
         PREC_REQ(this->size() == o.size(), "Non-matching parameters for FinalRMinHash comparison");
         size_t n_in_sketch = 0;
         auto i1 = this->rbegin(), i2 = o.rbegin();
-        T mv;
         while(n_in_sketch < first.size() - 1) {
             // Easier to branch-predict:  http://www.vldb.org/pvldb/vol8/p293-inoue.pdf
-            if(*i1 != *i2) ++i1, ++i2;
-            else {
-                const int c = *i1 < *i2;
-                i2 += !c; i1 += c;
+            if(*i1 == *i2) {
+                ++i1, ++i2;
+            } else if(*i1 < *i2) {
+                ++i1;
+            } else {
+                ++i2;
             }
             ++n_in_sketch;
         }
-        mv = *i1 < *i2 ? *i1: *i2;
         // TODO: test after refactoring
         assert(i1 < this->rend());
-        return double(std::numeric_limits<T>::max()) / (mv) * this->size();
+        const size_t mv = std::min(*i1, *i2);
+        const double est = double(std::numeric_limits<T>::max()) / mv * this->size();
+        std::fprintf(stderr, "mv: %zu. est: %g. Expected maxv %zu\n", size_t(mv), est, maxv);
+        return est;
     }
+    */
     double cardinality_estimate(MHCardinalityMode mode=ARITHMETIC_MEAN) const {
         // KMV (kth-minimum value) estimate
         return (static_cast<double>(std::numeric_limits<T>::max()) / double(this->max_element()) * first.size());
@@ -399,20 +416,16 @@ struct FinalRMinHash {
     void free() {
         decltype(first) tmp; std::swap(tmp, first);
     }
-    template<typename Alloc>
-    FinalRMinHash(const std::vector<T, Alloc> &ofirst): first(ofirst.size()) {std::copy(ofirst.begin(), ofirst.end(), first.begin()); sort();}
     template<typename It>
-    FinalRMinHash(It start, It end): first(std::distance(start, end)) {
-        std::copy(start, end, first.begin());
-        sort();
-    }
-    template<typename Alloc, typename=std::enable_if_t<std::is_same<Alloc, allocator>::value>>
-    FinalRMinHash(std::vector<T, Alloc> &&ofirst): first(std::move(ofirst)) {
+    FinalRMinHash(It start, It end) {
+        std::copy(start, end, std::back_inserter(first));
         sort();
     }
+    template<typename Alloc>
+    FinalRMinHash(const std::vector<T, Alloc> &ofirst): FinalRMinHash(ofirst.begin(), ofirst.end()) {}
     template<typename Hasher, bool is_bottom>
     FinalRMinHash(const BottomKHasher<Hasher, T, is_bottom> &bk): FinalRMinHash(bk.mpq_.getq().begin(), bk.mpq_.getq().end()) {}
-    FinalRMinHash(FinalRMinHash &&o): first(std::move(o.first)) {sort();}
+    FinalRMinHash(FinalRMinHash &&o) = default;
     ssize_t read(gzFile fp) {
         uint64_t sz;
         if(gzread(fp, &sz, sizeof(sz)) != sizeof(sz)) throw ZlibError("Failed to read");
@@ -453,7 +466,7 @@ struct FinalRMinHash {
     FinalRMinHash() {}
     FinalRMinHash &operator=(const FinalRMinHash &o) = default;
     void sort() {
-        common::sort::default_sort(this->first.begin(), this->first.end());
+        std::sort(this->first.begin(), this->first.end());
     }
 };
 

diff --git a/include/sketch/rnla.h b/include/sketch/rnla.h
@@ -372,9 +372,7 @@ struct PStableSketcher: public RNLASketcher<FloatType> {
                     tx(st * this->destdim() + ind, sind) = dist_(gen);
                 }
             }
-#if !NDEBUG
-            std::fprintf(stderr, "nonzeros: %zu. total: %zu\n", tx.nonZeros(), tx.rows() * tx.columns());
-#endif
+            VERBOSE_ONLY(std::fprintf(stderr, "nonzeros: %zu. total: %zu\n", tx.nonZeros(), tx.rows() * tx.columns());)
         }
     }
     PStableSketcher(const PStableSketcher &o): super(o.ntables(), o.destdim()), seed_(o.seed_), dist_(o.dist_), dense_(o.dense_) {

diff --git a/python/setup.py b/python/setup.py
@@ -51,8 +51,7 @@ def __str__(self):
     "../pybind11/include"
 ]
 
-__version__ = subprocess.check_output(
-    ["git", "describe", "--abbrev=4"]).decode().strip().split('-')[0]
+__version__ = "0.19.0"
 
 
 def make_namepair(name):

diff --git a/testsrc/bbmhtest.cpp b/testsrc/bbmhtest.cpp
@@ -73,18 +73,19 @@ void verify_popcount() {
     b2.addh(4);
     b2.addh(17);
     auto f1 = b1.cfinalize(), f2 = b2.cfinalize();
+VERBOSE_ONLY(
     std::fprintf(stderr, "f1 popcount: %" PRIu64 "\n", f1.popcnt());
     std::fprintf(stderr, "f2 popcount: %" PRIu64 "\n", f2.popcnt());
+)
 #if 0
     b1.show();
     b2.show();
 #endif
     auto b3 = b1 + b2;
     //b3.show();
     auto f3 = b3.finalize();
-    std::fprintf(stderr, "f3 popcount: %" PRIu64 "\n", f3.popcnt());
-    auto neqb12 = f1.equal_bblocks(f2);
-    std::fprintf(stderr, "eqb: %zu. With itself: %zu\n", size_t(neqb12), size_t(f1.equal_bblocks(f1)));
+    VERBOSE_ONLY(std::fprintf(stderr, "f3 popcount: %" PRIu64 "\n", f3.popcnt());)
+    VERBOSE_ONLY(std::fprintf(stderr, "eqb: %zu. With itself: %zu\n", size_t(f1.equal_bblocks(f2)), size_t(f1.equal_bblocks(f1)));)
 }
 
 int main(int argc, char *argv[]) {
@@ -154,7 +155,8 @@ int main(int argc, char *argv[]) {
             b2.densify();
             auto est = (b1 + b2).cardinality_estimate();
             auto usest = b1.union_size(b2);
-            std::fprintf(stderr, "union est by union: %f. by union_size: %f. difference: %12e\n", est, usest, (est - usest));
+            VERBOSE_ONLY(std::fprintf(stderr, "union est by union: %f. by union_size: %f. difference: %12e\n", est, usest, (est - usest));)
+            assert(std::abs(est - usest) < 1e-6);
             assert(est == usest);
             auto f1 = b1.finalize(), f2 = b2.finalize(), f3 = b3.finalize();
             assert(i <= 9 || std::abs(est - niter) < niter * .1 || !std::fprintf(stderr, "est: %lf. niter: %zu\n", est, size_t(niter)));
@@ -165,10 +167,10 @@ int main(int argc, char *argv[]) {
             auto smh1 = smhp2.finalize(16), smh2 = smhp21.finalize(16);
             auto smhd1 = smhdp.finalize(16), smhd2 = smhdp1.finalize(16);
             auto smh1ji = smh1.jaccard_index(smh1);
-            std::fprintf(stderr, "smh1ji: %g\n", smh1ji);
+            VERBOSE_ONLY(std::fprintf(stderr, "smh1ji: %g\n", smh1ji);)
             assert(smh1ji == 1.);
             auto pji = smh1.jaccard_index(smh2);
-            std::fprintf(stderr, "estimate: %f. nmin: %u. b: %u\n", pji, 1u << i, b);
+            VERBOSE_ONLY(std::fprintf(stderr, "estimate: %f. nmin: %u. b: %u\n", pji, 1u << i, b);)
             if(std::abs(pji - .5)  > 0.05) {
                 std::fprintf(stderr, "original (no b-bit): %f\n", b1.jaccard_index(b2));
                 std::fprintf(stderr, ">.05 error: estimate: %f. nmin: %u. b: %u. %f%% error\n", pji, 1u << i, b, std::abs(pji - .5) / .5 * 100);

diff --git a/testsrc/cmtest.cpp b/testsrc/cmtest.cpp
@@ -26,5 +26,5 @@ int main() {
     for(auto &c: cmf.second)
         c = 2;
     assert(c.count == cmf.intersection_size(cmf2)); // Make sure intersection size is still the same
-    std::fprintf(stderr, "%llu, %llu\n", c.count, cm.union_size(cm2));
+    std::fprintf(stderr, "%llu, %llu\n", (unsigned long long)c.count, (unsigned long long)cm.union_size(cm2));
 }