diff --git a/Makefile b/Makefile index 4714e62..8b1200b 100644 --- a/Makefile +++ b/Makefile @@ -19,9 +19,9 @@ WARNINGS=-Wall -Wextra -Wno-char-subscripts \ -Wpointer-arith -Wwrite-strings -Wdisabled-optimization \ -Wformat -Wcast-align -Wno-unused-function -Wno-unused-parameter \ -pedantic -Wunused-variable\ - -Wno-cast-align + -Wno-cast-align -Wno-sign-compare -FLAGS=-O3 -funroll-loops -pipe $(TARGET_FLAG) -Iinclude/sketch -I. -Iinclude/blaze -Ivec -Ipybind11/include -Iinclude -fpic -Wall $(WARNINGS) \ +FLAGS=-O2 -funroll-loops -pipe $(TARGET_FLAG) -Iinclude/sketch -I. -Iinclude/blaze -Ivec -Ipybind11/include -Iinclude -fpic -Wall $(WARNINGS) \ -fno-strict-aliasing CXXFLAGS=$(FLAGS) -Wreorder \ @@ -49,7 +49,7 @@ STD?= -std=c++17 #CCBIN?=-ccbin=clang++ -GPUFLAGS= $(CCBIN) -O3 -std=c++14 -Iinclude -I. -Xcompiler $(TARGET_FLAG) -Xcompiler -fopenmp -Iinclude/sketch \ +GPUFLAGS= $(CCBIN) -O2 -std=c++14 -Iinclude -I. -Xcompiler $(TARGET_FLAG) -Xcompiler -fopenmp -Iinclude/sketch \ -lz INCLUDES=-I`$(PYCONF) --includes` -Ipybind11/include @@ -70,7 +70,7 @@ hpython: pybbmh.cpython.so $(PYTHON) -c "import subprocess;import site; subprocess.check_call('cp pybbmh.py "*`$(PYCONF) --extension-suffix`" %s' % site.getsitepackages()[0], shell=True)" %.cpython.so: %.cpp - $(CXX) $(UNDEFSTR) $(INCLUDES) -fopenmp -O3 -Wall $(CXXFLAGS) -shared $(STD) -fPIC `python3 -m pybind11 --includes` $< -o $*$(SUF) -lz && \ + $(CXX) $(UNDEFSTR) $(INCLUDES) -fopenmp -Wall $(CXXFLAGS) -O2 -shared $(STD) -fPIC `python3 -m pybind11 --includes` $< -o $*$(SUF) -lz && \ ln -fs $*$(SUF) $@ %.o: %.cpp diff --git a/include/sketch/bbmh.h b/include/sketch/bbmh.h index a10a40a..1ea0b83 100644 --- a/include/sketch/bbmh.h +++ b/include/sketch/bbmh.h @@ -361,8 +361,8 @@ struct FinalDivBBitMinHash { return std::max(0., frac / (1. - b2pow)); } double containment_index(const FinalDivBBitMinHash &o) const { - double ji = jaccard_index(o); - double is = (est_cardinality_ + o.est_cardinality_) * ji / (1. + ji); + const double ji = jaccard_index(o); + const double is = (est_cardinality_ + o.est_cardinality_) * ji / (1. + ji); return is / est_cardinality_; } double intersection_size(const FinalDivBBitMinHash &o) const { @@ -1623,9 +1623,6 @@ FinalBBitMinHash BBitMinHasher::finalize(uint32_t b) const { size_t ndef; double cest = -1.; if((ndef = std::count_if(core_.begin(), core_.end(), [](auto x) {return x == detail::default_val();}))) { -#ifndef NDEBUG - std::fprintf(stderr, "requires densification: %zu/%zu need to be densified\n", ndef, core_.size()); -#endif tmp = core_; cest = detail::harmonic_cardinality_estimate_impl(tmp); std::replace(tmp.begin(), tmp.end(), std::numeric_limits::max() >> p_, detail::default_val()); diff --git a/include/sketch/common.h b/include/sketch/common.h index 6b072dc..9f6e9b0 100644 --- a/include/sketch/common.h +++ b/include/sketch/common.h @@ -48,8 +48,8 @@ #define sk__xstr__(x) sk__str__(x) #define SKETCH_SHIFT 8 #define SKETCH_MAJOR 0 -#define SKETCH_MINOR 18 -#define SKETCH_REVISION 1 +#define SKETCH_MINOR 19 +#define SKETCH_REVISION 0 #define SKETCH_VERSION_INTEGER ((((SKETCH_MAJOR << SKETCH_SHIFT) | SKETCH_MINOR) << SKETCH_SHIFT) | SKETCH_REVISION) #define SKETCH_VERSION SKETCH_MAJOR.SKETCH_MINOR##SKETCH_REVISION #define SKETCH_VERSION_STR sk__xstr__(SKETCH_MAJOR.SKETCH_MINOR.SKETCH_REVISION) diff --git a/include/sketch/hash.h b/include/sketch/hash.h index bd3fdd6..f38c3c4 100644 --- a/include/sketch/hash.h +++ b/include/sketch/hash.h @@ -737,7 +737,7 @@ struct InvH { const InverseOperation iop; InvH(uint64_t seed): - seed_(seed | std::is_same>::value), + seed_(seed | std::is_same>::value), // Ensures that the seed is odd if multiplies, as an odd number is needed for reversibility. inverse_(multinv::Inverse64()(seed_)), op(), iop() {} // To ensure that it is actually reversible. INLINE uint64_t inverse(uint64_t hv) const { @@ -901,8 +901,9 @@ struct XorMultiply: public FusedReversible { struct MultiplyAdd: public FusedReversible { MultiplyAdd(uint64_t seed1=0x9a98567ed20c127d, uint64_t seed2=0xe37e28c4271b5a1duLL): FusedReversible(seed1 | 1, seed2 | 1) {} }; -struct MultiplyAddXor: public FusedReversible3 { - MultiplyAddXor(uint64_t seed1=0x9a98567ed20c127d, uint64_t seed2=0xe37e28c4271b5a1duLL): FusedReversible3(seed1 | 1, seed2 | 1) {} +struct MultiplyAddXor: public FusedReversible3 { + using base = FusedReversible3; + MultiplyAddXor(uint64_t seed1=0x9a98567ed20c127d, uint64_t seed2=0xe37e28c4271b5a1duLL): base(seed1 | 1, seed2 | 1) {} }; template struct MultiplyAddXoRot: public FusedReversible3> { diff --git a/include/sketch/hll.h b/include/sketch/hll.h index 54cb263..a7b8203 100644 --- a/include/sketch/hll.h +++ b/include/sketch/hll.h @@ -76,12 +76,21 @@ static constexpr const char *JESTIM_STRINGS [] "ORIGINAL", "ERTL_IMPROVED", "ERTL_MLE", "ERTL_JOINT_MLE" }; enum JointEstimationMethod: uint8_t { - //ORIGINAL = 0, - //ERTL_IMPROVED = 1, // Improved but biased method - //ERTL_MLE = 2, // element-wise max, followed by MLE - ERTL_JOINT_MLE = 3 // Ertl special version + J_ORIGINAL = ORIGINAL, + J_ERTL_IMPROVED = ERTL_IMPROVED, // Improved but biased method + J_ERTL_MLE = ERTL_MLE, // element-wise max, followed by MLE + ERTL_JOINT_MLE = 3, // Ertl special version + J_ERTL_JOINT_MLE = ERTL_JOINT_MLE, }; +static inline std::string to_string(JointEstimationMethod est) { + switch(est) {case J_ORIGINAL: return "Original"; case J_ERTL_IMPROVED: return "Improved"; case J_ERTL_MLE: return "MLE"; case J_ERTL_JOINT_MLE: return "JMLE"; default: return "UNKNOWN";}; +} +static inline std::string to_string(EstimationMethod est) { + return to_string(static_cast(est)); +} + + static const char *EST_STRS [] { "original", "ertl_improved", @@ -200,17 +209,17 @@ static constexpr double TWO_POW_32 = 1ull << 32; template static double calculate_estimate(const CountArrType &counts, - EstimationMethod estim, uint64_t m, uint32_t p, double alpha, double relerr=1e-2) noexcept { + JointEstimationMethod estim, uint64_t m, uint32_t p, double alpha, double relerr=1e-2) noexcept { assert(estim <= 3); #if ENABLE_COMPUTED_GOTO - static constexpr void *arr [] {&&ORREST, &&ERTL_IMPROVED_EST, &&ERTL_MLE_EST}; + static constexpr void *arr [] {&&ORREST, &&ERTL_IMPROVED_EST, &&ERTL_MLE_EST, &&ERTL_JOINT_MLE_EST}; goto *arr[estim]; ORREST: { #else switch(estim) { - case ORIGINAL: { + case J_ORIGINAL: { #endif - assert(estim != ERTL_MLE); + assert(estim != static_cast(ERTL_MLE)); double sum = counts[0]; for(unsigned i = 1; i < 64 - p + 1; ++i) if(counts[i]) sum += std::ldexp(counts[i], -i); // 64 - p because we can't have more than that many leading 0s. This is just a speed thing. //for(unsigned i = 1; i < 64 - p + 1; ++i) sum += std::ldexp(counts[i], -i); // 64 - p because we can't have more than that many leading 0s. This is just a speed thing. @@ -228,10 +237,11 @@ static double calculate_estimate(const CountArrType &counts, return value; } #if ENABLE_COMPUTED_GOTO - ERTL_IMPROVED_EST: { + ERTL_IMPROVED_EST: #else - case ERTL_IMPROVED: { + case J_ERTL_IMPROVED: #endif + { static const double divinv = 1. / (2.L*std::log(2.L)); double z = m * detail::gen_tau(static_cast((m-counts[64 - p + 1]))/static_cast(m)); for(unsigned i = 64-p; i; z += counts[i--], z *= 0.5); // Reuse value variable to avoid an additional allocation. @@ -239,12 +249,23 @@ static double calculate_estimate(const CountArrType &counts, return m * divinv * m / z; } #if ENABLE_COMPUTED_GOTO - ERTL_MLE_EST: return ertl_ml_estimate(counts, p, 64 - p, relerr); + ERTL_MLE_EST: + ERTL_JOINT_MLE_EST: #else - case ERTL_MLE: return ertl_ml_estimate(counts, p, 64 - p, relerr); - default: HEDLEY_UNREACHABLE(); - } + case J_ERTL_MLE: + case J_ERTL_JOINT_MLE: #endif + return ertl_ml_estimate(counts, p, 64 - p, relerr); + default: + std::fprintf(stderr, "Unknown estimation method.\n"); + HEDLEY_UNREACHABLE(); + } +} + +template +static double calculate_estimate(const CountArrType &counts, + EstimationMethod estim, uint64_t m, uint32_t p, double alpha, double relerr=1e-2) noexcept { + return calculate_estimate(counts, static_cast(estim), m, p, alpha, relerr); } template @@ -696,7 +717,7 @@ std::array ertl_joint(const HllType &h1, const HllType &h2) { const double cAX = h1.get_is_ready() ? h1.creport() : ertl_ml_estimate(c1, h1.p(), h1.q()); const double cBX = h2.get_is_ready() ? h2.creport() : ertl_ml_estimate(c2, h2.p(), h2.q()); const double cABX = ertl_ml_estimate(cu, h1.p(), h1.q()); - // std::fprintf(stderr, "Made initials: %lf, %lf, %lf\n", cAX, cBX, cABX); + std::fprintf(stderr, "Made initials: %lf, %lf, %lf\n", cAX, cBX, cABX); std::array countsAXBhalf; std::array countsBXAhalf; countsAXBhalf[q] = h1.m(); diff --git a/include/sketch/hmh.h b/include/sketch/hmh.h index 19f18f0..1f97e35 100644 --- a/include/sketch/hmh.h +++ b/include/sketch/hmh.h @@ -289,8 +289,12 @@ struct hmh_t { template IT access(size_t index) const { return reinterpret_cast(data_.data())[index]; } +#ifndef _mm512_srli_epi16 #define _mm512_srli_epi16(mm, Imm) _mm512_and_si512(_mm512_set1_epi16(0xFFFFu >> Imm), _mm512_srli_epi32(mm, Imm)) +#endif +#ifndef _mm512_srli_epi8 #define _mm512_srli_epi8(mm, Imm) _mm512_and_si512(_mm512_set1_epi8(0xFFu >> Imm), _mm512_srli_epi32(mm, Imm)) +#endif template std::array sum_counts() const { using hll::detail::SIMDHolder; diff --git a/include/sketch/mh.h b/include/sketch/mh.h index cb2a83e..9318e9e 100644 --- a/include/sketch/mh.h +++ b/include/sketch/mh.h @@ -109,9 +109,16 @@ struct RangeMinHash: public AbstractMinHash { AbstractMinHash(sketch_size), hf_(std::move(hf)), cmp_(std::move(cmp)) { } + void show() { + std::fprintf(stderr, "%zu mins\n", minimizers_.size()); + for(const auto x: minimizers_) { + std::fprintf(stderr, "%zu\n", size_t(x)); + } + } RangeMinHash(std::string) {throw NotImplementedError("");} double cardinality_estimate() const { - return double(std::numeric_limits::max()) / this->max_element() * minimizers_.size(); + const double result = (std::numeric_limits::max()) / this->max_element() * minimizers_.size(); + return result; } RangeMinHash(gzFile fp) { if(!fp) throw std::runtime_error("Null file handle!"); @@ -127,8 +134,9 @@ struct RangeMinHash: public AbstractMinHash { } RangeMinHash &operator+=(const RangeMinHash &o) { minimizers_.insert(o.begin(), o.end()); - while(minimizers_.size() > this->ss_) + while(minimizers_.size() > this->ss_) { minimizers_.erase(minimizers_.begin()); + } return *this; } RangeMinHash operator+(const RangeMinHash &o) const { @@ -222,8 +230,6 @@ struct RangeMinHash: public AbstractMinHash { void free() {clear();} final_type cfinalize() const { std::vector reta(minimizers_.begin(), minimizers_.end()); - if(reta.size() < this->ss_) - reta.insert(reta.end(), this->ss_ - reta.size(), std::numeric_limits::max()); return final_type(std::move(reta)); } final_type finalize() & { @@ -320,25 +326,36 @@ struct FinalRMinHash { tmp += o; return tmp; } + /* double union_size(const FinalRMinHash &o) const { + std::vector total(o.begin(), o.end()); + total.insert(total.end(), begin(), end()); + std::sort(total.begin(), total.end()); + total.resize(std::min(o.size(), size())); + const size_t maxv = total.back(); + return (double(std::numeric_limits::max()) / maxv) * this->size(); PREC_REQ(this->size() == o.size(), "Non-matching parameters for FinalRMinHash comparison"); size_t n_in_sketch = 0; auto i1 = this->rbegin(), i2 = o.rbegin(); - T mv; while(n_in_sketch < first.size() - 1) { // Easier to branch-predict: http://www.vldb.org/pvldb/vol8/p293-inoue.pdf - if(*i1 != *i2) ++i1, ++i2; - else { - const int c = *i1 < *i2; - i2 += !c; i1 += c; + if(*i1 == *i2) { + ++i1, ++i2; + } else if(*i1 < *i2) { + ++i1; + } else { + ++i2; } ++n_in_sketch; } - mv = *i1 < *i2 ? *i1: *i2; // TODO: test after refactoring assert(i1 < this->rend()); - return double(std::numeric_limits::max()) / (mv) * this->size(); + const size_t mv = std::min(*i1, *i2); + const double est = double(std::numeric_limits::max()) / mv * this->size(); + std::fprintf(stderr, "mv: %zu. est: %g. Expected maxv %zu\n", size_t(mv), est, maxv); + return est; } + */ double cardinality_estimate(MHCardinalityMode mode=ARITHMETIC_MEAN) const { // KMV (kth-minimum value) estimate return (static_cast(std::numeric_limits::max()) / double(this->max_element()) * first.size()); @@ -399,20 +416,16 @@ struct FinalRMinHash { void free() { decltype(first) tmp; std::swap(tmp, first); } - template - FinalRMinHash(const std::vector &ofirst): first(ofirst.size()) {std::copy(ofirst.begin(), ofirst.end(), first.begin()); sort();} template - FinalRMinHash(It start, It end): first(std::distance(start, end)) { - std::copy(start, end, first.begin()); - sort(); - } - template::value>> - FinalRMinHash(std::vector &&ofirst): first(std::move(ofirst)) { + FinalRMinHash(It start, It end) { + std::copy(start, end, std::back_inserter(first)); sort(); } + template + FinalRMinHash(const std::vector &ofirst): FinalRMinHash(ofirst.begin(), ofirst.end()) {} template FinalRMinHash(const BottomKHasher &bk): FinalRMinHash(bk.mpq_.getq().begin(), bk.mpq_.getq().end()) {} - FinalRMinHash(FinalRMinHash &&o): first(std::move(o.first)) {sort();} + FinalRMinHash(FinalRMinHash &&o) = default; ssize_t read(gzFile fp) { uint64_t sz; if(gzread(fp, &sz, sizeof(sz)) != sizeof(sz)) throw ZlibError("Failed to read"); @@ -453,7 +466,7 @@ struct FinalRMinHash { FinalRMinHash() {} FinalRMinHash &operator=(const FinalRMinHash &o) = default; void sort() { - common::sort::default_sort(this->first.begin(), this->first.end()); + std::sort(this->first.begin(), this->first.end()); } }; diff --git a/include/sketch/rnla.h b/include/sketch/rnla.h index 63534d7..99ee544 100644 --- a/include/sketch/rnla.h +++ b/include/sketch/rnla.h @@ -372,9 +372,7 @@ struct PStableSketcher: public RNLASketcher { tx(st * this->destdim() + ind, sind) = dist_(gen); } } -#if !NDEBUG - std::fprintf(stderr, "nonzeros: %zu. total: %zu\n", tx.nonZeros(), tx.rows() * tx.columns()); -#endif + VERBOSE_ONLY(std::fprintf(stderr, "nonzeros: %zu. total: %zu\n", tx.nonZeros(), tx.rows() * tx.columns());) } } PStableSketcher(const PStableSketcher &o): super(o.ntables(), o.destdim()), seed_(o.seed_), dist_(o.dist_), dense_(o.dense_) { diff --git a/python/setup.py b/python/setup.py index ff2400c..84d32ad 100644 --- a/python/setup.py +++ b/python/setup.py @@ -51,8 +51,7 @@ def __str__(self): "../pybind11/include" ] -__version__ = subprocess.check_output( - ["git", "describe", "--abbrev=4"]).decode().strip().split('-')[0] +__version__ = "0.19.0" def make_namepair(name): diff --git a/testsrc/bbmhtest.cpp b/testsrc/bbmhtest.cpp index 8a687c5..fd91d10 100644 --- a/testsrc/bbmhtest.cpp +++ b/testsrc/bbmhtest.cpp @@ -73,8 +73,10 @@ void verify_popcount() { b2.addh(4); b2.addh(17); auto f1 = b1.cfinalize(), f2 = b2.cfinalize(); +VERBOSE_ONLY( std::fprintf(stderr, "f1 popcount: %" PRIu64 "\n", f1.popcnt()); std::fprintf(stderr, "f2 popcount: %" PRIu64 "\n", f2.popcnt()); +) #if 0 b1.show(); b2.show(); @@ -82,9 +84,8 @@ void verify_popcount() { auto b3 = b1 + b2; //b3.show(); auto f3 = b3.finalize(); - std::fprintf(stderr, "f3 popcount: %" PRIu64 "\n", f3.popcnt()); - auto neqb12 = f1.equal_bblocks(f2); - std::fprintf(stderr, "eqb: %zu. With itself: %zu\n", size_t(neqb12), size_t(f1.equal_bblocks(f1))); + VERBOSE_ONLY(std::fprintf(stderr, "f3 popcount: %" PRIu64 "\n", f3.popcnt());) + VERBOSE_ONLY(std::fprintf(stderr, "eqb: %zu. With itself: %zu\n", size_t(f1.equal_bblocks(f2)), size_t(f1.equal_bblocks(f1)));) } int main(int argc, char *argv[]) { @@ -154,7 +155,8 @@ int main(int argc, char *argv[]) { b2.densify(); auto est = (b1 + b2).cardinality_estimate(); auto usest = b1.union_size(b2); - std::fprintf(stderr, "union est by union: %f. by union_size: %f. difference: %12e\n", est, usest, (est - usest)); + VERBOSE_ONLY(std::fprintf(stderr, "union est by union: %f. by union_size: %f. difference: %12e\n", est, usest, (est - usest));) + assert(std::abs(est - usest) < 1e-6); assert(est == usest); auto f1 = b1.finalize(), f2 = b2.finalize(), f3 = b3.finalize(); assert(i <= 9 || std::abs(est - niter) < niter * .1 || !std::fprintf(stderr, "est: %lf. niter: %zu\n", est, size_t(niter))); @@ -165,10 +167,10 @@ int main(int argc, char *argv[]) { auto smh1 = smhp2.finalize(16), smh2 = smhp21.finalize(16); auto smhd1 = smhdp.finalize(16), smhd2 = smhdp1.finalize(16); auto smh1ji = smh1.jaccard_index(smh1); - std::fprintf(stderr, "smh1ji: %g\n", smh1ji); + VERBOSE_ONLY(std::fprintf(stderr, "smh1ji: %g\n", smh1ji);) assert(smh1ji == 1.); auto pji = smh1.jaccard_index(smh2); - std::fprintf(stderr, "estimate: %f. nmin: %u. b: %u\n", pji, 1u << i, b); + VERBOSE_ONLY(std::fprintf(stderr, "estimate: %f. nmin: %u. b: %u\n", pji, 1u << i, b);) if(std::abs(pji - .5) > 0.05) { std::fprintf(stderr, "original (no b-bit): %f\n", b1.jaccard_index(b2)); std::fprintf(stderr, ">.05 error: estimate: %f. nmin: %u. b: %u. %f%% error\n", pji, 1u << i, b, std::abs(pji - .5) / .5 * 100); diff --git a/testsrc/cmtest.cpp b/testsrc/cmtest.cpp index e99c33d..a187308 100644 --- a/testsrc/cmtest.cpp +++ b/testsrc/cmtest.cpp @@ -26,5 +26,5 @@ int main() { for(auto &c: cmf.second) c = 2; assert(c.count == cmf.intersection_size(cmf2)); // Make sure intersection size is still the same - std::fprintf(stderr, "%llu, %llu\n", c.count, cm.union_size(cm2)); + std::fprintf(stderr, "%llu, %llu\n", (unsigned long long)c.count, (unsigned long long)cm.union_size(cm2)); } diff --git a/testsrc/cscomp.cpp b/testsrc/cscomp.cpp index dbf7351..eb03968 100644 --- a/testsrc/cscomp.cpp +++ b/testsrc/cscomp.cpp @@ -17,17 +17,8 @@ int main() { auto step2_1 = cs_decompress(step1_1, D, hf); // top_indices_from_compressed(const C &in, size_t newdim, size_t olddim, const KWiseHasherSet<4> &hf, unsigned k) auto topind = top_indices_from_compressed(step1, D, 100, hf, 20); - std::fprintf(stderr, "topind sizes: %zu, %zu\n", topind.first.size(), topind.second.size()); - for(const auto i: topind.first) { - std::fprintf(stderr, "wooo %lf\n", double(i)); - } auto topind2 = top_indices_from_compressed(step1_2, D, 13, hf, 20); - std::fprintf(stderr, "topind2 sizes: %zu, %zu\n", topind2.first.size(), topind2.second.size()); - for(const auto i: topind2.first) - std::fprintf(stderr, "wooo %lf\n", double(i)); - std::fprintf(stderr, "run\n"); sketch::SketchApplicator<> sa(100, 10); - std::fprintf(stderr, "alloc'd\n"); sketch::IndykSketcher is(9, 50, D, 1377); //size_t ntables, size_t destdim, uint64_t sourcedim=0, auto n = is.norm(init_1); @@ -41,12 +32,7 @@ int main() { reall1sum += l1Norm(init_1); is.add(trans(init_1)); } - std::fprintf(stderr, "pnorm: %f. real: %f. %%%f error\n", is.pnorm(), reall1sum, std::abs(is.pnorm() - reall1sum) / reall1sum * 100.); assert(std::abs(is.pnorm() - reall1sum) / reall1sum <= 1.); // Within a factor of 2 - std::fprintf(stderr, "pnorms [n:%zu]\t", is.ntables()); - for(const auto p: is.pnorms()) - std::fprintf(stderr, "%f,", p); - std::fputc('\n', stderr); sketch::IndykSketcher is2(7, 100, 0), is3(7, 100, 0); size_t sn = 20000; double nl = 1, nr = 20; @@ -54,10 +40,7 @@ int main() { is2.addh(i, nl), is3.addh(i, -nr); std::swap(nl, nr); } - //assert((is2.pnorm() - sn) / sn * 100. <= 12.); - //assert((is3.pnorm() - sn) / sn * 100. <= 12.); auto us = is2.union_size(is3); std::fprintf(stderr, "us: %f. n1 %f, n2 %f. expected: %zu. %% diff: %f\n", us, is2.pnorm(), is3.pnorm(), sn * 19, fracdiff(us, is2.pnorm() + is3.pnorm()) * 100.); - //std::fprintf(stderr, "Expected us: %f\n", auto diff = is2 - is3; } diff --git a/testsrc/divtest.cpp b/testsrc/divtest.cpp index 27373f6..97f5b59 100644 --- a/testsrc/divtest.cpp +++ b/testsrc/divtest.cpp @@ -29,7 +29,7 @@ __m256i mod256_man(__m256i x, uint32_t d) { int main() { - size_t maxn = 10000000; + size_t maxn = 1000000; for(size_t i = 10; i < maxn; i *= 10) { schism::Schismatic div_(i); for(size_t j = 10000; j < 1200000; j += (std::rand() & 0xFFFU)) { @@ -86,32 +86,11 @@ int main() { auto t1 = std::chrono::high_resolution_clock::now(); for(size_t i = 0; i < nitems; doNotOptimizeAway(sm.mod(u32vals[i++]))); auto t2 = std::chrono::high_resolution_clock::now(); - for(size_t i = 0; i < nitems / 8; ++i) { - doNotOptimizeAway( - sm.mod(_mm256_load_si256((const __m256i *)&u32vals[i * 8])) - ); - } - auto t4 = std::chrono::high_resolution_clock::now(); - for(size_t i = 0; i < nitems; i += 8) { - //doNotOptimizeAway(u32vals[i] %= sz); - doNotOptimizeAway(mod256_man(_mm256_load_si256((const __m256i *)&u32vals[i]), sz)); - } - auto t6 = std::chrono::high_resolution_clock::now(); - for(size_t i = 0; i < nitems; i += 8) { - //doNotOptimizeAway(u32vals[i] %= sz); - doNotOptimizeAway(mod256_div(_mm256_load_si256((const __m256i *)&u32vals[i]), sm)); - } - auto t7 = std::chrono::high_resolution_clock::now(); - auto ts1 = std::chrono::duration(t2 - t1).count(); - auto ts2 = std::chrono::duration(t4 - t2).count(); - auto ts3 = std::chrono::duration(t6 - t4).count(); - auto ts4 = std::chrono::duration(t7 - t6).count(); - //auto ts3 = std::chrono::duration(t6 - t5).count(); - std::fprintf(stderr, "[%d] Time for serial: %g. Time for SIMD: %g. Time for serial compiler %g. Time for serial div: %g\n", sz, ts1, ts2, ts3, ts4); - t7 = std::chrono::high_resolution_clock::now(); + auto ts3 = std::chrono::duration(t2 - t1).count(); + std::fprintf(stderr, "[%u] %gms for \n", sz, ts3); for(size_t i = 0; i < nitems; doNotOptimizeAway(u32vals[i++] % sz)); - auto t8 = std::chrono::high_resolution_clock::now(); - auto ts5 = std::chrono::duration(t8 - t7).count(); - std::fprintf(stderr, "[%d] %gms for \n", sz, ts5); + auto t3 = std::chrono::high_resolution_clock::now(); + auto ts5 = std::chrono::duration(t3 - t2).count(); + std::fprintf(stderr, "[%u] %gms for \n", sz, ts5); } } diff --git a/testsrc/hlltest.cpp b/testsrc/hlltest.cpp index c89bbb4..03ab7c8 100644 --- a/testsrc/hlltest.cpp +++ b/testsrc/hlltest.cpp @@ -14,14 +14,13 @@ using namespace std::chrono; using tp = std::chrono::system_clock::time_point; -static const size_t BITS = 24; +static const size_t DEFAULT_BITS = 10; using namespace sketch; bool test_qty(size_t lim) { - hll::hll_t t(BITS); - size_t i(0); - while(i < lim) t.addh(i++); + hll::hll_t t(DEFAULT_BITS); + for(size_t i = 0;i < lim; t.addh(i++)); return std::abs(t.report() - lim) <= t.est_err(); } @@ -52,41 +51,50 @@ int main(int argc, char *argv[]) { #endif std::vector vals; for(char **p(argv + 1); *p; ++p) vals.push_back(strtoull(*p, 0, 10)); - if(vals.empty()) vals.push_back(1ull<<(BITS+1)); - for(const auto val: vals) { + if(vals.empty()) vals.push_back(1000000); + std::fprintf(stderr, "Using %s vectorization\n", +#if VEC_DISABLED__ + "no" +#else + "native" +#endif + ); + for(const int32_t nbits: {10, 12, 14, 16, 20}) { + for(const auto val: vals) { #if VERBOSE_AF - std::fprintf(stderr, "Processing val = %" PRIu64 "\n", val); + std::fprintf(stderr, "Processing val = %" PRIu64 "\n", val); #endif - using hll::detail::ertl_ml_estimate; - hll::hll_t t(BITS, hll::ORIGINAL); - hll::hllbase_t tmf(BITS, hll::ORIGINAL); - for(size_t i(0); i < val; t.addh(i), tmf.addh(i++)); - std::fprintf(stderr, "Calculating for val = %" PRIu64 "\n", val); - t.csum(); - tmf.csum(); - fprintf(stderr, "Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", - val, t.report(), t.est_err(), std::abs(val - t.report()), t.est_err() >= std::abs(val - t.report()) ? "true": "false", ertl_ml_estimate(t), std::abs(ertl_ml_estimate(t) - val)); - t.not_ready(); - t.set_estim(hll::ORIGINAL); - t.csum(); - assert(t.est_err() >= std::abs(val - t.report())); - fprintf(stderr, "ORIGINAL Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", - val, t.report(), t.est_err(), std::abs(val - t.report()), t.est_err() >= std::abs(val - t.report()) ? "true": "false", ertl_ml_estimate(t), std::abs(ertl_ml_estimate(t) - val)); - t.set_estim(hll::ERTL_JOINT_MLE); - t.not_ready(); - t.csum(); - assert(t.est_err() >= std::abs(val - t.report())); - fprintf(stderr, "JMLE Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", - val, t.report(), t.est_err(), std::abs(val - t.report()), t.est_err() >= std::abs(val - t.report()) ? "true": "false", ertl_ml_estimate(t), std::abs(ertl_ml_estimate(t) - val)); - assert(t.est_err() >= /*2. * */std::abs(val - t.report())); - fprintf(stderr, "Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", - val, tmf.report(), tmf.est_err(), std::abs(val - tmf.report()), tmf.est_err() >= std::abs(val - tmf.report()) ? "true": "false", ertl_ml_estimate(tmf), std::abs(ertl_ml_estimate(tmf) - val)); + using hll::detail::ertl_ml_estimate; + hll::hll_t t(nbits, hll::ORIGINAL); + hll::hllbase_t tmf(nbits, hll::ORIGINAL); + for(size_t i(0); i < val; t.addh(i), tmf.addh(i++)); + std::fprintf(stderr, "Calculating for val = %" PRIu64 " and nbits = %d\n", val, nbits); + t.csum(); + tmf.csum(); + fprintf(stderr, "Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", + val, t.report(), t.est_err(), std::abs(val - t.report()), t.est_err() >= std::abs(val - t.report()) ? "true": "false", ertl_ml_estimate(t), std::abs(ertl_ml_estimate(t) - val)); + t.not_ready(); + t.set_estim(hll::ORIGINAL); + t.csum(); + assert(t.est_err() * 2. >= std::abs(val - t.report())); + fprintf(stderr, "ORIGINAL Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", + val, t.report(), t.est_err(), std::abs(val - t.report()), t.est_err() >= std::abs(val - t.report()) ? "true": "false", ertl_ml_estimate(t), std::abs(ertl_ml_estimate(t) - val)); + t.set_estim(hll::ERTL_JOINT_MLE); + t.not_ready(); + t.csum(); + fprintf(stderr, "JMLE Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", + val, t.report(), t.est_err(), std::abs(val - t.report()), t.est_err() >= std::abs(val - t.report()) ? "true": "false", ertl_ml_estimate(t), std::abs(ertl_ml_estimate(t) - val)); + assert(t.est_err() * 2. >= std::abs(val - t.report())); + // assert(t.est_err() >= /*2. * */std::abs(val - t.report())); + fprintf(stderr, "Quantity expected: %" PRIu64 ". Quantity estimated: %lf. Error bounds: %lf. Error: %lf. Within bounds? %s. Ertl ML estimate: %lf. Error ertl ML: %lf\n", + val, tmf.report(), tmf.est_err(), std::abs(val - tmf.report()), tmf.est_err() >= std::abs(val - tmf.report()) ? "true": "false", ertl_ml_estimate(tmf), std::abs(ertl_ml_estimate(tmf) - val)); #ifndef VEC_DISABLED__ - hll::VType tmpv = static_cast(1337); - t.addh(tmpv); - tmf.addh(tmpv); + hll::VType tmpv = static_cast(1337); + t.addh(tmpv); + tmf.addh(tmpv); #endif - auto mini = t.compress(4); + auto mini = t.compress(4); + } } return EXIT_SUCCESS; } diff --git a/testsrc/lpfp.cpp b/testsrc/lpfp.cpp index 15ac0e4..33ff502 100644 --- a/testsrc/lpfp.cpp +++ b/testsrc/lpfp.cpp @@ -43,16 +43,16 @@ int submain(size_t NITEMS) { ts4 = std::chrono::high_resolution_clock::now(); std::fprintf(stderr, "batched batch construction of %zu items took %gms, for %g million per minute\n", nentered, timelen(ts3, ts4), nentered / timelen(ts3, ts4) / 1e6 * 60.); auto ip = lpf.inner_product(lpf); - std::fprintf(stderr, "ip: %g\n", ip); + std::fprintf(stderr, "ip: %0.10g\n", ip); lpf.batch_update(bulk.data(), bulk.size()); - ip = lpf.inner_product(lpf); - std::fprintf(stderr, "ip: %g\n", ip); + const double new_ip = lpf.inner_product(lpf); + std::fprintf(stderr, "ip: %0.10g. Difference: %0.6g\n", new_ip, new_ip - ip); return 0; } int main() { - int ret; - for(const auto N: {size_t(1 << 10), size_t(1<<16), size_t(1) << 20, size_t(16) << 20}) { + int ret = 0; + for(const auto N: {size_t(1<<16), size_t(1) << 20}) { ret |= submain(N) || submain(N) || submain(N) diff --git a/testsrc/testcontain.cpp b/testsrc/testcontain.cpp index f485a6a..0369bb2 100644 --- a/testsrc/testcontain.cpp +++ b/testsrc/testcontain.cpp @@ -1,3 +1,4 @@ +#define NO_BLAZE #include "hll.h" #include "bbmh.h" #include "aesctr/wy.h" @@ -13,9 +14,9 @@ int main(int argc, char *argv[]) { for(auto x: {&ao, &bo, &so}) for(auto &y: *x) y = wygen(); - const size_t ss = 16; + const size_t ss = 14; hll_t ha(ss), hb(ss), hi(ss); - BBitMinHasher ph1(ss, 8), ph2(ss, 8); + BBitMinHasher ph1(ss - 1, 16), ph2(ss - 1, 16); for(const auto v: ao) { ha.addh(v); ph1.addh(v); @@ -35,25 +36,25 @@ int main(int argc, char *argv[]) { assert(ha.containment_index(ha) == 1.); assert(fh1.containment_index(fh1) == 1.); assert(fh2.containment_index(fh2) == 1.); - assert(hi.containment_index(ha) == 1.); - assert(hi.containment_index(hb) == 1.); + assert(std::abs(hi.containment_index(ha) - 1.) <= 1e-5 || !std::fprintf(stderr, "contain: %g vs expected %g\n", hi.containment_index(ha), 1.)); + assert(std::abs(hi.containment_index(hb) - 1.) < 1e-5); auto full_cmps_hll = ha.full_set_comparison(hb), full_cmps_bbmh = fh1.full_set_comparison(fh2); double expected_ci = double(shared_size) / (shared_size + base_size); double ci = ha.containment_index(hb); double fhci = fh1.containment_index(fh2); - std::fprintf(stderr, "Expected: %lf\n", expected_ci); - std::fprintf(stderr, "full cmps hll CI: %lf\n", full_cmps_hll[2] / (full_cmps_hll[0] + full_cmps_hll[2])); - std::fprintf(stderr, "full cmps bbmh CI: %lf, manual %lf\n", full_cmps_bbmh[2] / (full_cmps_bbmh[0] + full_cmps_bbmh[2]), fhci); + //std::fprintf(stderr, "Expected: %lf\n", expected_ci); + //std::fprintf(stderr, "full cmps hll CI: %lf\n", full_cmps_hll[2] / (full_cmps_hll[0] + full_cmps_hll[2])); + //std::fprintf(stderr, "full cmps bbmh CI: %lf, manual %lf\n", full_cmps_bbmh[2] / (full_cmps_bbmh[0] + full_cmps_bbmh[2]), fhci); assert(full_cmps_bbmh[2] / (full_cmps_bbmh[0] + full_cmps_bbmh[2]) / fhci - 1. < 1e-40); assert(full_cmps_hll[2] / (full_cmps_hll[0] + full_cmps_hll[2]) / ci - 1. < 1e-40); //std::fprintf(stderr, "CI with bbmh: %lf\n", fhci); //std::fprintf(stderr, "CI with hll: %lf\n", ci); //std::fprintf(stderr, "CI expected: %lf\n", expected_ci); - //std::fprintf(stderr, "%% error with bbmh: %lf\n", std::abs(fhci / expected_ci - 1.) * 100); - //std::fprintf(stderr, "%% error with hll: %lf\n", std::abs(ci / expected_ci - 1.) * 100); - assert(std::abs(ci / expected_ci - 1.) < 0.025); - assert(std::abs(fhci / expected_ci - 1.) < 0.025); + // std::fprintf(stderr, "%% error with bbmh: %lf\n", std::abs(fhci / expected_ci - 1.) * 100); + // std::fprintf(stderr, "%% error with hll: %lf\n", std::abs(ci / expected_ci - 1.) * 100); + assert(std::abs(ci / expected_ci - 1.) < 0.06); + assert(std::abs(fhci / expected_ci - 1.) < 0.06); // == shared_size / (shared_size + base_siae) for(size_t imb = base_size; --imb;ha.addh(wygen())); hll_t u(ha + hb); diff --git a/testsrc/testmhmerge.cpp b/testsrc/testmhmerge.cpp index 1509d19..1743eb4 100644 --- a/testsrc/testmhmerge.cpp +++ b/testsrc/testmhmerge.cpp @@ -1,3 +1,4 @@ +#define NO_BLAZE #include "mh.h" #include "aesctr/wy.h" using namespace sketch; @@ -26,16 +27,14 @@ int main() { s1.addh(v), s2.addh(v), cs1.addh(v), cs2.addh(v); } auto f1 = s1.finalize(), f2 = s2.finalize(); - auto sf3 = (s1 + s2).finalize(); - assert(f1.union_size(f2) == sf3.cardinality_estimate()); - auto v3 = f1 + f2; - auto csf1 = cs1.finalize(), csf2= cs2.finalize(); - std::fprintf(stderr, "csf2 union size: %lf\n", csf1.union_size(csf2)); - decltype(f1.first) v1(s1.begin(), s1.end()); - decltype(f1.first) v2(s2.begin(), s2.end()); - auto sf3v = sf3.first; - sortfn(sf3v); - assert(f1.first == v1); - assert(f2.first == v2); - assert(v3.first == sf3v || print(v3, "v3") || print(sf3v, "sf3")); + RangeMinHash s3 = s1; + VERBOSE_ONLY(std::fprintf(stderr, "cad est before adding: %g\n", s3.cardinality_estimate());) + s3 += s2; + VERBOSE_ONLY(std::fprintf(stderr, "cad est after adding: %g\n", s3.cardinality_estimate());) + auto f3 = s3.finalize(); +#ifndef VERBOSE_AF + std::fprintf(stderr, "s1: %g. s2: %g. s3: %g\n", s1.cardinality_estimate(), s2.cardinality_estimate(), s3.cardinality_estimate()); + std::fprintf(stderr, "f1: %g. f2: %g. f3: %g\n", f1.cardinality_estimate(), f2.cardinality_estimate(), f3.cardinality_estimate()); + std::fprintf(stderr, "card: %g. f3 card: %g. s1 card %g. s2 card %g.\n", f3.cardinality_estimate(), s3.cardinality_estimate(), s1.cardinality_estimate(), s2.cardinality_estimate()); +#endif } diff --git a/testsrc/timehash.cpp b/testsrc/timehash.cpp index d2db281..3841f99 100644 --- a/testsrc/timehash.cpp +++ b/testsrc/timehash.cpp @@ -28,9 +28,10 @@ int main() { hash::KWiseIndependentPolynomialHash<4> fivewise_gamgee; hash::FusedReversible3, MultiplyAddXoRot<13>> gen8(hash(hash(seed1)), hash(1337)); hash::FusedReversible, InvMul> gen9(hash(hash(seed1)), hash(1337)); + hash::FusedReversible, MultiplyAddXor> gen10(hash(hash(seed1)), hash(1337)); MurFinHash mfh; hash::KWiseIndependentPolynomialHash61<4> fivewise_nomee; - std::array arr{0}; + std::array arr{0}; auto start = std::chrono::high_resolution_clock::now(); uint64_t accum = 0; uint64_t nelem = 100000000; @@ -60,7 +61,17 @@ int main() { DO_THING(gen5, "mul-bf-rrot31", 6) DO_THING(mfh, "murfinhash", 7) DO_THING(gen8, "invmul, invrshift33, maxorot", 8) - DO_THING(gen9, "invmul, invrshift33, maxorot", 9) + DO_THING(gen9, "invrshiftxor33, invmul", 9) + DO_THING(gen10, "invrshiftxor33, multiplyaddxor (add,mul,xor)", 10) + for(size_t i = 0; i < 100000; ++i) { + assert(gen3.inverse(gen3(i)) == i); + assert(gen2.inverse(gen2(i)) == i); + assert(gen1.inverse(gen1(i)) == i); + assert(gen__5.inverse(gen__5(i)) == i); + assert(gen8.inverse(gen8(i)) == i); + assert(gen9.inverse(gen9(i)) == i); + assert(gen10.inverse(gen10(i)) == i); + } for(size_t i = 1; i < arr.size(); ++i) std::fprintf(stderr, "%zu is %lf as fast as WangHash\n", i, double(arr[0]) / arr[i]); } diff --git a/testsrc/timerevhash.cpp b/testsrc/timerevhash.cpp index 8028071..d0f9be9 100644 --- a/testsrc/timerevhash.cpp +++ b/testsrc/timerevhash.cpp @@ -93,7 +93,7 @@ int main(int argc, char **argv) { DO_THING(mfh, "murfinhash", 7) DO_THING(fr8, "fr8", 8) DO_THING(gen5, "mul-bf-rrot31", 6) - DO_THING(cehasher, "contexpr_rev_hash", 9) + DO_THING(cehasher, "constexpr_rev_hash", 9) for(size_t i = 1; i < arr.size(); ++i) std::fprintf(stderr, "%zu is %lf as fast as WangHash\n", i, double(arr[0]) / arr[i]); #undef DO_THING @@ -120,7 +120,7 @@ int main(int argc, char **argv) { DO_THING(gen5, "mul-bf-rrot31", 6) DO_THING(mfh, "murfinhash", 7) DO_THING(fr8, "fr8", 8) - DO_THING(cehasher, "contexpr_rev_hash", 9) + DO_THING(cehasher, "constexpr_rev_hash", 9) for(size_t i = 1; i < arr.size(); ++i) std::fprintf(stderr, "%zu is %lf as fast as WangHash\n", i, double(arr[0]) / arr[i]); } diff --git a/testsrc/xormaskhll.cpp b/testsrc/xormaskhll.cpp index be4dc82..1e5eb0c 100644 --- a/testsrc/xormaskhll.cpp +++ b/testsrc/xormaskhll.cpp @@ -1,3 +1,4 @@ +#define NO_BLAZE #include "hll.h" #include "hash.h"