diff --git a/.github/composite-actions/download-libraries/action.yml b/.github/composite-actions/download-libraries/action.yml index 0cc80b53a1..2ca8ed1625 100644 --- a/.github/composite-actions/download-libraries/action.yml +++ b/.github/composite-actions/download-libraries/action.yml @@ -11,11 +11,31 @@ inputs: description: 'Download googletest' default: true - install-boost: + install-boost-gcc: type: boolean - description: 'Install boost' + description: 'Install boost built with GCC' default: true + install-boost-clang: + type: boolean + description: 'Install boost built with clang' + default: false + + install-boost-brew-clang: + type: boolean + description: 'Install boost built with Homebrew Clang' + default: false + + install-gcc: + type: boolean + description: 'Install GCC toolset (compiler and build tools)' + default: true + + install-clang: + type: boolean + description: 'Install clang toolset (compiler and build tools)' + default: false + runs: using: 'composite' steps: @@ -25,6 +45,19 @@ runs: sudo apt-get update -y sudo apt-get install gcc-10 g++-10 cmake build-essential -y shell: bash + if: inputs.install-gcc != 'false' + + - name: Install clang + # llvm.sh installs all needed libraries, no need in build-essential + # "all" option is needed to install libc++ and libc++abi + run: | + sudo apt-get update -y + sudo apt-get install cmake make -y + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 17 all + shell: bash + if: inputs.install-clang != 'false' - name: Make lib directory run: | @@ -35,7 +68,7 @@ runs: uses: ./.github/composite-actions/download-library with: directory: googletest - download-command: git clone https://github.com/google/googletest/ --branch release-1.12.1 --depth 1 + download-command: git clone https://github.com/google/googletest/ --branch v1.14.0 --depth 1 if: inputs.download-googletest != 'false' - name: Download easyloggingpp @@ -72,13 +105,34 @@ runs: directory: boost download-command: wget -O boost_1_81_0.tar.gz https://sourceforge.net/projects/boost/files/boost/1.81.0/boost_1_81_0.tar.gz/download && tar xzvf boost_1_81_0.tar.gz && mv boost_1_81_0 boost - - name: Install Boost + - name: Install Boost built with GCC run: | cd lib/boost ./bootstrap.sh --prefix=/usr sudo ./b2 install --prefix=/usr shell: bash - if: inputs.install-boost != 'false' + if: inputs.install-boost-gcc != 'false' + - name: Install Boost built with clang + run: | + cd lib/boost + ./bootstrap.sh --prefix=/usr --with-toolset=clang + ./b2 clean + ./b2 toolset=clang cxxflags="-stdlib=libc++" linkflags="-stdlib=libc++" + sudo ./b2 install --prefix=/usr + shell: bash + if: inputs.install-boost-clang != 'false' + - name: Install Boost built with Homebrew Clang + # b2 doesn't recognize custom compilers, so some trick is needed: + # Also, /usr is read-only, so install into /usr/local + run: | + cd lib/boost + export PATH=$(brew --prefix llvm@17)/bin:$PATH + ./bootstrap.sh --prefix=/usr/local --with-toolset=clang + ./b2 clean + ./b2 toolset=clang cxxflags="-std=c++11 -stdlib=libc++" linkflags="-stdlib=libc++" + sudo ./b2 install --prefix=/usr/local + shell: bash + if: inputs.install-boost-brew-clang != 'false' - name: Download frozen uses: ./.github/composite-actions/download-library with: diff --git a/.github/workflows/core-tests.yml b/.github/workflows/core-tests.yml index 058ca3d2fe..e755a7d140 100644 --- a/.github/workflows/core-tests.yml +++ b/.github/workflows/core-tests.yml @@ -27,7 +27,7 @@ on: #- examples/** workflow_dispatch: jobs: - run_tests: + run_tests_gcc: runs-on: ubuntu-latest strategy: matrix: @@ -44,6 +44,81 @@ jobs: uses: ./.github/composite-actions/download-datasets - name: Build run: | + export CC=gcc-10 + export CXX=g++-10 + if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then + ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} + else + ./build.sh + fi + - name: Test + working-directory: ${{github.workspace}}/build/target + shell: bash + run: ./Desbordante_test --gtest_filter='*:-*HeavyDatasets*' + run_tests_clang: + runs-on: ubuntu-latest + strategy: + matrix: + cfg: + - { BUILD_TYPE: Release } + - { BUILD_TYPE: Debug } + - { BUILD_TYPE: Debug, SANITIZER : ADDRESS } + - { BUILD_TYPE: Debug, SANITIZER : UB } + steps: + - uses: actions/checkout@v3 + - name: Download libraries + uses: ./.github/composite-actions/download-libraries + with: + install-gcc: false + install-clang: true + install-boost-gcc: false + install-boost-clang: true + - name: Download datasets + uses: ./.github/composite-actions/download-datasets + - name: Build + run: | + export CC=clang-17 + export CXX=clang++-17 + export CXXFLAGS="-stdlib=libc++" + export LDFLAGS="-lc++abi" + if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then + ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} + else + ./build.sh + fi + - name: Test + working-directory: ${{github.workspace}}/build/target + shell: bash + run: ./Desbordante_test --gtest_filter='*:-*HeavyDatasets*' + run_tests_macos_clang: + runs-on: macos-14 + strategy: + matrix: + cfg: + - { BUILD_TYPE: Release } + - { BUILD_TYPE: Debug } + - { BUILD_TYPE: Debug, SANITIZER : ADDRESS } + - { BUILD_TYPE: Debug, SANITIZER : UB } + steps: + - uses: actions/checkout@v3 + - name: Download build system + run: brew install llvm@17 make + shell: bash + - name: Download libraries + uses: ./.github/composite-actions/download-libraries + with: + install-gcc: false + install-clang: false + install-boost-gcc: false + install-boost-clang: false + install-boost-brew-clang: true + - name: Download datasets + uses: ./.github/composite-actions/download-datasets + - name: Build + run: | + export CC=$(brew --prefix llvm@17)/bin/clang + export CXX=$(brew --prefix llvm@17)/bin/clang++ + export BOOST_ROOT=/usr/local if [[ "${{matrix.cfg.BUILD_TYPE}}" == "Debug" ]]; then ./build.sh --debug --sanitizer=${{ matrix.cfg.SANITIZER }} else diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 451a6b4293..712d1206c0 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -75,7 +75,7 @@ jobs: with: download-pybind: true download-googletest: false - install-boost: false + install-boost-gcc: false - name: Build wheels uses: pypa/cibuildwheel@v2.16.2 diff --git a/CMakeLists.txt b/CMakeLists.txt index dd04905570..0c04298efd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,22 @@ else() if (ASAN) # Set DEBUG build options specific for build with ASAN set(ASAN_OPTS "-fsanitize=address") + + find_program(APT_FOUND apt-get) + if (APT_FOUND AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # alloc-dealloc-mismatch generates false positives on boost exceptions + # This applies only to Ubuntu package: + # https://github.com/llvm/llvm-project/issues/59432?ysclid=m4y0iqca2c577414782 + # Disable this check on files listed in address_sanitizer_ignore_list.txt if compiler + # is Clang and apt-get is installed on system: + # FIXME(senichenkov): apt-get is not an ideal check -- maybe it wouldn't be so hard to + # ask apt-get if repository is ubuntu-...? + message("Running on Ubuntu") + message(WARNING "ASAN is broken in Ubuntu package, therefore alloc-dealloc-mismatch") + message(WARNING "check will be supressed. Consider using another distro for full ASAN coverage.") + string(JOIN ";" ASAN_OPTS "-fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/address_sanitizer_ignore_list.txt") + endif() + string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-O1" "-Wno-error" # Use of -Werror is discouraged with sanitizers @@ -81,6 +97,12 @@ else() "-fno-sanitize=signed-integer-overflow" # Remove this when CustomRandom gets fixed "-fno-sanitize=shift" # Remove this when CustomRandom gets fixed "-fno-sanitize-recover=all") # For tests to fail if UBSan finds an error + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_HOST_APPLE) + # Limit some UB sanitizer checks to "src" directory on macOS when building with Clang, + # because libraries (STL, googletest, boost, etc.) are somehow broken + string(JOIN ";" UBSAN_OPTS + "-fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/ub_sanitizer_ignore_list.txt") + endif() string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-O1" "${UBSAN_OPTS}") @@ -95,6 +117,16 @@ else() add_compile_options(-ggdb3) endif() + # Workaround clang-18 bug: + # https://github.com/llvm/llvm-project/issues/76515?ysclid=m406q4it5k674680045 + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + string(FIND "${CMAKE_CXX_COMPILER_VERSION}" "18" IDX) + if (IDX EQUAL 0) # clang major version is 18 + message(WARNING "C++ compiler is Clang++-18. Supressing deprecated declaration warnings. Consider using another version of Clang") + string(JOIN ";" DEBUG_BUILD_OPTS "${DEBUG_BUILD_OPTS}" "-Wno-deprecated-declarations") + endif() + endif() + add_compile_options("$<$<CONFIG:Debug>:${DEBUG_BUILD_OPTS}>") add_link_options("$<$<CONFIG:Debug>:${DEBUG_LINK_OPTS}>") @@ -122,7 +154,7 @@ include_directories(SYSTEM "lib/easyloggingpp/src" "lib/better-enums/" "lib/emha # adding submodules if (COMPILE_TESTS) - add_subdirectory("lib/googletest") + add_subdirectory("lib/googletest" SYSTEM) endif() set( CMAKE_BUILD_TYPE_COPY "${CMAKE_BUILD_TYPE}" ) diff --git a/README.md b/README.md index 375a2153fd..fdb79ccd58 100644 --- a/README.md +++ b/README.md @@ -245,15 +245,16 @@ The following instructions were tested on Ubuntu 20.04+ LTS and macOS Sonoma 14. ### Dependencies Prior to cloning the repository and attempting to build the project, ensure that you have the following software: -- GNU GCC, version 10+ +- GNU GCC, version 10+ or Clang, version 16+ - CMake, version 3.13+ -- Boost library built with GCC, version 1.81.0+ +- Boost library built with compiler you're going to use (GCC or Clang), version 1.81.0+ To use test datasets you will need: - Git Large File Storage, version 3.0.2+ #### Ubuntu dependencies installation +##### GCC Run the following commands: ```sh sudo apt install gcc g++ cmake libboost-all-dev git-lfs @@ -263,8 +264,48 @@ export CXX=g++ The last 2 lines set gcc as CMake compiler in your terminal session. You can also add them to the end of `~/.profile` to set this by default in all sessions. +##### Clang +Firstly, you'll need to build Boost with Clang, as packaged versions, distributed by package managers, are built with GCC and have different ABI. +Instructions below are given for Boost-1.81.0. If you want to use another version, you'll need to change `tar ...` command on step 3. +For further details on Boost installation, please consult [Boost documentation](https://www.boost.org/doc/libs/1_81_0/more/getting_started/unix-variants.html). +1) It's recommended to install Boost into `/usr/local`. You can use any other location, but you'll need to adapt instructions for it. +```sh +cd /usr/local +``` +2) Download an official Boost distribuition from [SourceForge](https://sourceforge.net/projects/boost/files/boost/1.81.0/) +3) Unpack downloaded archive: +```sh +tar --bzip2 -xf boost_1_81_0.tar.bz2 +``` +4) Compile Boost: +```sh +./bootstrap --with-toolset=clang +./b2 clean +./b2 toolset=clang cxxflags="-stdlib=libc++" linkflags="-stdlib=libc++" +``` +5) Install Boost: +```sh +sudo ./b2 install +``` + +Run the following commands: +```sh +sudo apt install cmake git-lfs +bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" +export CC=clang +export CXX=clang++ +export CXXFLAGS="-stdlib=libc++" +# libc++ is fully compatible with GCC's ABI, so you can omit the next line if you want to use libstdc++ ABI: +export LDFLAGS="-lc++abi" +export BOOST_ROOT="/usr/local/" +``` +Second command installs the latest version of LLVM (which includes Clang). For other installation options, see [LLVM packages page](https://apt.llvm.org/). +The last 5 lines set Clang as CMake compiler in your terminal session and set directory where Boost libraries are located. +You can also add them to the end of `~/.profile` to set this by default in all sessions. + #### MacOS dependencies installation +##### GCC Install Xcode Command Line Tools if you don't have them. Run: ```sh xcode-select --install @@ -309,6 +350,70 @@ The first two lines set GCC as the default compiler in CMake. The last export is the last MacOSX15.0.sdk used by CMake by default, you can read more about this [here](https://gist.github.com/scivision/d69faebbc56da9714798087b56de925a) and [here](https://github.com/iains/gcc-14-branch/issues/11). +##### Clang (LLVM) +Instructions below are given for Clang-17. To use another version, replace `llvm@17` with `llvm@vv` everywhere. + +Install Xcode Command Line Tools if you don't have them. Run: +```sh +xcode-select --install +``` +Follow the prompts to continue. + +To install Clang and CMake on macOS we recommend to use [Homebrew](https://brew.sh/) package manager. With Homebrew +installed, run the following commands: +```sh +brew install llvm@17 cmake +``` +After installation, check `cmake --version`. If command is not found, then you need to add to environment path to +homebrew installed packages. To do this open `~/.zprofile` (for Zsh) or +`~/.bash_profile` (for Bash) and add to the end of the file the output of `brew shellenv`. +After that, restart the terminal and check the version of CMake again, now it should be displayed. + +Then you need to install Boost library built with LLVM Clang. Please avoid using Homebrew for this, as the Boost version provided by Homebrew +is built with Apple Clang, which has a different ABI. Instead, download the latest version of Boost from the [official website](https://www.boost.org/users/download/) and unpack the archive to the `/usr/local/` directory or another directory of your choice: +```sh +cd /usr/local/ +curl https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 --output "boost_1_86_0.tar.bz2" +tar xvjf boost_1_86_0.tar.bz2 +rm boost_1_86_0.tar.bz2 +cd boost_1_86_0 +``` + +To use LLVM Clang instead of Apple Clang run the following command: +```sh +export PATH=$(brew --prefix llvm@17)/bin:$PATH +``` +This will make LLVM Clang have greater priority than Apple Clang for your current session. +If you want to have LLVM Clang as default compiler, add this line to your `~/.zprofile` (for Zsh) or +`~/.bash_profile` (for Bash). In that case you can omit `$(brew --prefix llvm@17)/bin/` in all following commands +(e. g. `export CC=$(brew --preifx llvm@17)/bin/clang` becomes `export CC=clang`). + +To check that previous command succeeded, run +```sh +clang --version +``` +The first line must contain "`Homebrew Clang`". If there's "`Apple Clang`", return to the previous step. + +Navigate to the unpacked Boost directory in the terminal and run the following commands: +```sh +./bootstrap.sh --with-toolset=clang +./b2 toolset=clang cxxflags="-std=c++11 -I$(brew --prefix llvm@17)/include" \ + linkflags="-L$(brew --prefix llvm@17)/lib/c++ -L$(brew --prefix llvm@17)/lib/unwind -lunwind" +./b2 install --layout=versioned +export BOOST_ROOT=$(pwd) # export Boost_ROOT=$(pwd) for CMake 3.26 and below. +``` +You can also add the last export with current path to `~/.zprofile` or `~/.bash_profile` to set this boost path by default. + +Before building the project you must set locally or in the above-mentioned dotfiles the following CMake environment variables: +```sh +export CC=$(brew --prefix llvm@17)/bin/clang +export CXX=$(brew --prefix llvm@17)/bin/clang++ +export CXXFLAGS="-I$(brew --prefix llvm@17)/include" +export LDFLAGS="-L$(brew --prefix llvm@17)/lib/c++ -L$(brew --prefix llvm@17)/lib/unwind -lunwind" +``` +The first two lines set LLVM Clang as the default compiler in CMake. Other two lines tell Clang to use +LLVM version of libc++. Note that commands are slightly different from the ones given in `brew info llvm@17`. + ### Building the project #### Building the Python module using pip diff --git a/address_sanitizer_ignore_list.txt b/address_sanitizer_ignore_list.txt new file mode 100644 index 0000000000..e14a6175f9 --- /dev/null +++ b/address_sanitizer_ignore_list.txt @@ -0,0 +1,4 @@ +# Disable alloc_dealloc_mismatch ASAN check +[alloc_dealloc_mismatch] +# in file: +src:typed_column_data.h diff --git a/build.sh b/build.sh index c89d854cab..15273fa434 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,8 @@ #!/bin/bash +# Stop on error: +set -e + function print_help() { cat << EOF Usage: ./build.sh [options] @@ -85,7 +88,7 @@ if [[ $NO_TESTS == true ]]; then PREFIX="$PREFIX -D COMPILE_TESTS=OFF" else if [[ ! -d "googletest" ]] ; then - git clone https://github.com/google/googletest/ --branch v1.13.0 --depth 1 + git clone https://github.com/google/googletest/ --branch v1.14.0 --depth 1 fi fi @@ -116,5 +119,5 @@ fi cd .. mkdir -p build cd build -rm CMakeCache.txt +rm -f CMakeCache.txt cmake $PREFIX .. && make $JOBS_OPTION diff --git a/src/core/algorithms/fd/fdep/fd_tree_element.cpp b/src/core/algorithms/fd/fdep/fd_tree_element.cpp index a1cd92678e..7afb76f689 100644 --- a/src/core/algorithms/fd/fdep/fd_tree_element.cpp +++ b/src/core/algorithms/fd/fdep/fd_tree_element.cpp @@ -1,6 +1,7 @@ #include "fd_tree_element.h" #include "boost/dynamic_bitset.hpp" +#include "util/bitset_extensions.h" FDTreeElement::FDTreeElement(size_t max_attribute_number) : max_attribute_number_(max_attribute_number) { @@ -45,7 +46,7 @@ bool FDTreeElement::ContainsGeneralization(std::bitset<kMaxAttrNum> const& lhs, return true; } - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { return false; } @@ -71,7 +72,7 @@ bool FDTreeElement::GetGeneralizationAndDelete(std::bitset<kMaxAttrNum> const& l return true; } - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { return false; } @@ -104,7 +105,7 @@ bool FDTreeElement::GetSpecialization(std::bitset<kMaxAttrNum> const& lhs, size_ bool found = false; size_t attr = (current_attr > 1 ? current_attr : 1); - size_t next_set_attr = lhs._Find_next(current_attr); + size_t next_set_attr = util::FindNext(lhs, current_attr); if (next_set_attr == kMaxAttrNum) { while (!found && attr <= this->max_attribute_number_) { @@ -153,7 +154,8 @@ void FDTreeElement::AddFunctionalDependency(std::bitset<kMaxAttrNum> const& lhs, FDTreeElement* current_node = this; this->AddRhsAttribute(attr_num); - for (size_t i = lhs._Find_first(); i != kMaxAttrNum; i = lhs._Find_next(i)) { + auto iter = util::MakeBitsetIterator(lhs); + for (size_t i = iter->Pos(); i != kMaxAttrNum; iter->Next(), i = iter->Pos()) { if (current_node->children_[i - 1] == nullptr) { current_node->children_[i - 1] = std::make_unique<FDTreeElement>(this->max_attribute_number_); @@ -215,8 +217,8 @@ void FDTreeElement::PrintDependencies(std::bitset<kMaxAttrNum>& active_path, std if (this->is_fd_[attr - 1]) { out = "{"; - for (size_t i = active_path._Find_first(); i != kMaxAttrNum; - i = active_path._Find_next(i)) { + auto iter = util::MakeBitsetIterator(active_path); + for (size_t i = iter->Pos(); i != kMaxAttrNum; iter->Next(), i = iter->Pos()) { if (!column_id.empty()) out += column_id + std::to_string(std::stoi(column_names[i - 1]) + 1) + ","; else @@ -257,11 +259,8 @@ void FDTreeElement::TransformTreeFdCollection(std::bitset<kMaxAttrNum>& active_p for (size_t attr = 1; attr <= this->max_attribute_number_; ++attr) { if (this->is_fd_[attr - 1]) { - boost::dynamic_bitset<> lhs_bitset(this->max_attribute_number_); - for (size_t i = active_path._Find_first(); i != kMaxAttrNum; - i = active_path._Find_next(i)) { - lhs_bitset.set(i - 1); - } + auto lhs_bitset = + util::CreateShiftedDynamicBitset(active_path, this->max_attribute_number_); Vertical lhs(scheme.get(), lhs_bitset); Column rhs(scheme.get(), scheme->GetColumn(attr - 1)->GetName(), attr - 1); fd_collection.emplace_back(FD{lhs, rhs, scheme}); diff --git a/src/core/algorithms/fd/fdep/fdep.cpp b/src/core/algorithms/fd/fdep/fdep.cpp index 293902a270..af0507f0e1 100644 --- a/src/core/algorithms/fd/fdep/fdep.cpp +++ b/src/core/algorithms/fd/fdep/fdep.cpp @@ -5,6 +5,7 @@ #include "config/equal_nulls/option.h" #include "config/tabular_data/input_table/option.h" #include "model/table/column_layout_relation_data.h" +#include "util/bitset_extensions.h" // #ifndef PRINT_FDS // #define PRINT_FDS @@ -96,8 +97,9 @@ void FDep::AddViolatedFDs(std::vector<size_t> const& t1, std::vector<size_t> con } equal_attr &= (~diff_attr); - for (size_t attr = diff_attr._Find_first(); attr != FDTreeElement::kMaxAttrNum; - attr = diff_attr._Find_next(attr)) { + auto iter = util::MakeBitsetIterator(diff_attr); + for (size_t attr = iter->Pos(); attr != FDTreeElement::kMaxAttrNum; + iter->Next(), attr = iter->Pos()) { this->neg_cover_tree_->AddFunctionalDependency(equal_attr, attr); } } diff --git a/src/core/algorithms/fd/pyrocommon/model/pli_cache.h b/src/core/algorithms/fd/pyrocommon/model/pli_cache.h index d2933337e0..59f948bf9f 100644 --- a/src/core/algorithms/fd/pyrocommon/model/pli_cache.h +++ b/src/core/algorithms/fd/pyrocommon/model/pli_cache.h @@ -8,6 +8,7 @@ class ProfilingContext; #include "cache_eviction_method.h" #include "caching_method.h" #include "model/table/column_layout_relation_data.h" +#include "util/maybe_unused.h" namespace model { @@ -29,20 +30,23 @@ class PLICache { std::unique_ptr<VerticalMap<PositionListIndex>> index_; // usageCounter - for parallelism - int saved_intersections_ = 0; + // All these MAYBE_UNUSED variables are required to support Pyro's caching strategies from our + // ADBIS paper: https://link.springer.com/chapter/10.1007/978-3-030-30278-8_7 + + MAYBE_UNUSED int saved_intersections_ = 0; mutable std::mutex getting_pli_mutex_; CachingMethod caching_method_; - CacheEvictionMethod eviction_method_; - double caching_method_value_; + MAYBE_UNUSED CacheEvictionMethod eviction_method_; + MAYBE_UNUSED double caching_method_value_; // long long maximumAvailableMemory_ = 0; double maximum_entropy_; - double mean_entropy_; - double min_entropy_; - double median_entropy_; - double median_gini_; - double median_inverted_entropy_; + MAYBE_UNUSED double mean_entropy_; + MAYBE_UNUSED double min_entropy_; + MAYBE_UNUSED double median_entropy_; + MAYBE_UNUSED double median_gini_; + MAYBE_UNUSED double median_inverted_entropy_; std::variant<PositionListIndex*, std::unique_ptr<PositionListIndex>> CachingProcess( Vertical const& vertical, std::unique_ptr<PositionListIndex> pli, diff --git a/src/core/algorithms/fd/sfd/cords.cpp b/src/core/algorithms/fd/sfd/cords.cpp index af719871a7..5e5ec75c4f 100644 --- a/src/core/algorithms/fd/sfd/cords.cpp +++ b/src/core/algorithms/fd/sfd/cords.cpp @@ -163,7 +163,7 @@ unsigned long long Cords::ExecuteInternal() { Init(column_count, data); - auto start_time = std::chrono::high_resolution_clock::now(); + auto start_time = std::chrono::system_clock::now(); SetProgress(kTotalProgressPercent); ToNextProgressPhase(); diff --git a/src/core/algorithms/fd/sfd/frequency_handler.cpp b/src/core/algorithms/fd/sfd/frequency_handler.cpp index 0d2d700600..3157d14462 100644 --- a/src/core/algorithms/fd/sfd/frequency_handler.cpp +++ b/src/core/algorithms/fd/sfd/frequency_handler.cpp @@ -2,6 +2,7 @@ #include <algorithm> #include <cstddef> +#include <tuple> #include <unordered_map> #include <utility> #include <vector> @@ -30,7 +31,9 @@ void FrequencyHandler::InitFrequencyHandler(std::vector<model::TypedColumnData> auto cmp = [](std::pair<std::string, size_t> const &left, std::pair<std::string, size_t> const &right) { - return left.second > right.second; + // Compare frequencies. + // If frequencies are equal, compare values lexicographically. + return std::tie(left.second, left.first) > std::tie(right.second, right.first); }; std::sort(values_ordered_by_frequencies.begin(), values_ordered_by_frequencies.end(), cmp); diff --git a/src/core/algorithms/fd/tane/pfdtane.cpp b/src/core/algorithms/fd/tane/pfdtane.cpp index 68c7ccc106..2dfd0a013b 100644 --- a/src/core/algorithms/fd/tane/pfdtane.cpp +++ b/src/core/algorithms/fd/tane/pfdtane.cpp @@ -1,5 +1,7 @@ #include "pfdtane.h" +#include <algorithm> + #include "config/error/option.h" #include "config/error_measure/option.h" #include "enums.h" @@ -48,10 +50,10 @@ config::ErrorType PFDTane::CalculatePFDError(model::PositionListIndex const* x_p std::deque<Cluster> xa_index = xa_pli->GetIndex(); std::shared_ptr<Cluster const> probing_table_ptr = x_pli->CalculateAndGetProbingTable(); auto const& probing_table = *probing_table_ptr; - std::sort(xa_index.begin(), xa_index.end(), - [&probing_table](Cluster const& a, Cluster const& b) { - return probing_table[a.front()] < probing_table[b.front()]; - }); + std::stable_sort(xa_index.begin(), xa_index.end(), + [&probing_table](Cluster const& a, Cluster const& b) { + return probing_table[a.front()] < probing_table[b.front()]; + }); double sum = 0.0; std::size_t cluster_rows_count = 0; std::deque<Cluster> const& x_index = x_pli->GetIndex(); diff --git a/src/core/algorithms/gfd/egfd_validation.cpp b/src/core/algorithms/gfd/egfd_validation.cpp index 21bf4be6a5..307b2fff42 100644 --- a/src/core/algorithms/gfd/egfd_validation.cpp +++ b/src/core/algorithms/gfd/egfd_validation.cpp @@ -342,7 +342,7 @@ void ReverseConstruction(std::set<vertex_t> const& lev, graph_t const& graph, gr std::map<vertex_t, std::set<vertex_t>>& candidates, std::map<vertex_t, int>& cnts, std::map<vertex_t, std::set<vertex_t>>& unvisited_neighbours) { - for (std::set<vertex_t>::iterator j = --lev.end(); j != std::next(lev.begin(), -1); --j) { + for (auto j = lev.rbegin(); j != lev.rend(); ++j) { vertex_t u = *j; int cnt = 0; if (unvisited_neighbours.find(u) != unvisited_neighbours.end()) { diff --git a/src/core/algorithms/gfd/gfd_validation.cpp b/src/core/algorithms/gfd/gfd_validation.cpp index 1f9bf64dec..39a3e4b0c9 100644 --- a/src/core/algorithms/gfd/gfd_validation.cpp +++ b/src/core/algorithms/gfd/gfd_validation.cpp @@ -25,6 +25,10 @@ std::vector<std::vector<vertex_t>> GetPartition(std::vector<vertex_t> const& can config::ThreadNumType const& threads_num) { std::vector<std::vector<vertex_t>> result = {}; + if (candidates.empty()) { + return {}; + } + int musthave = candidates.size() / threads_num; int oversized_num = candidates.size() % threads_num; diff --git a/src/core/algorithms/md/hymd/utility/md_less.h b/src/core/algorithms/md/hymd/utility/md_less.h index 526a91bff0..2fd17e17b1 100644 --- a/src/core/algorithms/md/hymd/utility/md_less.h +++ b/src/core/algorithms/md/hymd/utility/md_less.h @@ -20,7 +20,19 @@ inline bool MdLessPairs(MdPair const& pair_left, MdPair const& pair_right) { } else if (cardinality_left > cardinality_right) { return false; } + +#if __cpp_lib_three_way_comparison == 201907L auto comp = lhs_left <=> lhs_right; +#else + signed char comp; + if (lhs_left < lhs_right) { + comp = -1; + } else if (lhs_left == lhs_right) { + comp = 0; + } else { + comp = 1; + } +#endif if (comp < 0) { return true; } else if (comp > 0) { diff --git a/src/core/algorithms/od/fastod/model/attribute_set.h b/src/core/algorithms/od/fastod/model/attribute_set.h index 7ac9cec2a6..c0744f5239 100644 --- a/src/core/algorithms/od/fastod/model/attribute_set.h +++ b/src/core/algorithms/od/fastod/model/attribute_set.h @@ -8,6 +8,7 @@ #include <boost/functional/hash.hpp> #include "model/table/column_index.h" +#include "util/bitset_extensions.h" namespace algos::fastod { @@ -93,11 +94,11 @@ class AttributeSet { } model::ColumnIndex FindFirst() const noexcept { - return bitset_._Find_first(); + return util::FindFirst(bitset_); } model::ColumnIndex FindNext(model::ColumnIndex pos) const noexcept { - return bitset_._Find_next(pos); + return util::FindNext(bitset_, pos); } std::string ToString() const; diff --git a/src/core/algorithms/od/fastod/util/timer.h b/src/core/algorithms/od/fastod/util/timer.h index 4601f980bc..23ada501b1 100644 --- a/src/core/algorithms/od/fastod/util/timer.h +++ b/src/core/algorithms/od/fastod/util/timer.h @@ -4,7 +4,7 @@ namespace algos::fastod { -using TimePoint = std::chrono::_V2::high_resolution_clock::time_point; +using TimePoint = std::chrono::high_resolution_clock::time_point; class Timer { private: diff --git a/src/core/algorithms/statistics/data_stats.cpp b/src/core/algorithms/statistics/data_stats.cpp index 809cf42863..121b044c8a 100644 --- a/src/core/algorithms/statistics/data_stats.cpp +++ b/src/core/algorithms/statistics/data_stats.cpp @@ -464,8 +464,8 @@ Statistic DataStats::GetMedianAD(size_t index) const { return all_stats_[index].median_ad; } mo::TypedColumnData const& col = col_data_[index]; - auto const& type = static_cast<mo::INumericType const&>(col.GetType()); if (!col.IsNumeric()) return {}; + auto const& type = static_cast<mo::INumericType const&>(col.GetType()); std::vector<std::byte const*> data = DeleteNullAndEmpties(index); std::byte* median = MedianOfNumericVector(data, type); diff --git a/src/core/config/exceptions.h b/src/core/config/exceptions.h index adb2641153..49e55296ec 100644 --- a/src/core/config/exceptions.h +++ b/src/core/config/exceptions.h @@ -1,6 +1,7 @@ #pragma once #include <stdexcept> +#include <string> namespace config { diff --git a/src/core/model/table/agree_set_factory.cpp b/src/core/model/table/agree_set_factory.cpp index f48cccdb78..73a27fc8e5 100644 --- a/src/core/model/table/agree_set_factory.cpp +++ b/src/core/model/table/agree_set_factory.cpp @@ -159,8 +159,8 @@ AgreeSetFactory::SetOfAgreeSets AgreeSetFactory::GenAsUsingMapOfIdSets() const { */ unsigned short const actual_threads_num = std::min(max_representation.size(), (size_t)config_.threads_num); - auto task = [&identifier_sets, &agree_sets, percent_per_cluster, actual_threads_num, - &map_init_mutex, this, &threads_agree_sets, &map_init_cv, + auto task = [&identifier_sets, percent_per_cluster, actual_threads_num, &map_init_mutex, + this, &threads_agree_sets, &map_init_cv, &map_initialized](SetOfVectors::value_type const& cluster) { std::thread::id const thread_id = std::this_thread::get_id(); diff --git a/src/core/model/table/vertical_map.cpp b/src/core/model/table/vertical_map.cpp index 1e57a1ac6a..6eb8a48a78 100644 --- a/src/core/model/table/vertical_map.cpp +++ b/src/core/model/table/vertical_map.cpp @@ -425,7 +425,7 @@ void VerticalMap<Value>::Shrink(double factor, std::function<bool(Entry, Entry)> key_queue.push(entry); } }); - unsigned int num_of_removed = 0; + // unsigned int num_of_removed = 0; unsigned int target_size = size_ * factor; while (!key_queue.empty() && size_ > target_size) { auto key = key_queue.top().first; @@ -433,7 +433,7 @@ void VerticalMap<Value>::Shrink(double factor, std::function<bool(Entry, Entry)> // insert additional logging - num_of_removed++; + // num_of_removed++; Remove(key); } shrink_invocations_++; @@ -467,14 +467,14 @@ void VerticalMap<Value>::Shrink(std::unordered_map<Vertical, unsigned int>& usag key_queue.push(entry); } }); - unsigned int num_of_removed = 0; + // unsigned int num_of_removed = 0; while (!key_queue.empty()) { auto key = key_queue.front().first; key_queue.pop(); // insert additional logging - num_of_removed++; + // num_of_removed++; Remove(key); RemoveFromUsageCounter(usage_counter, key); } diff --git a/src/core/util/auto_join_thread.h b/src/core/util/auto_join_thread.h new file mode 100644 index 0000000000..c217e564a0 --- /dev/null +++ b/src/core/util/auto_join_thread.h @@ -0,0 +1,53 @@ +#pragma once + +#include <stdexcept> +#include <thread> + +#include <easylogging++.h> + +namespace util::jthread { + +/// @brief Simple RAII wrapper for std::thread. Joins on destruction. +/// @remark The class is inspired by Scott Meyers' ThreadRAII (from Effective Modern C++) +class AutoJoinThread { +public: + explicit AutoJoinThread(std::thread&& t) : t_(std::move(t)) {} + + AutoJoinThread(AutoJoinThread&&) = default; + AutoJoinThread& operator=(AutoJoinThread&&) = default; + // std::thread is not copyable: + AutoJoinThread(AutoJoinThread&) = delete; + AutoJoinThread& operator=(AutoJoinThread&) = delete; + + template <typename F, typename... Args> + explicit AutoJoinThread(F&& f, Args&&... args) + : AutoJoinThread(std::thread{std::forward<F>(f), std::forward<Args>(args)...}) {} + + ~AutoJoinThread() try { + if (t_.joinable()) { + t_.join(); + } + } catch (std::system_error const& e) { + LOG(ERROR) << e.what(); + return; // Don't pass exception on + } + + std::thread& Get() { + return t_; + } + +private: + std::thread t_; +}; + +} // namespace util::jthread + +namespace util { + +#ifdef __cpp_lib_jthread +using JThread = std::jthread; +#else +using JThread = jthread::AutoJoinThread; +#endif + +} // namespace util diff --git a/src/core/util/bitset_extensions.cpp b/src/core/util/bitset_extensions.cpp new file mode 100644 index 0000000000..30bbfd2398 --- /dev/null +++ b/src/core/util/bitset_extensions.cpp @@ -0,0 +1,54 @@ +#include "bitset_extensions.h" + +#include <bit> +#include <bitset> + +namespace util::bitset_extensions { + +CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num) { + return (val & kBytes[byte_num]) >> (byte_num * 8); +} + +size_t FindFirstFixedWidth(std::bitset<kWidth> const& bs) { + if (bs.none()) { + return kWidth; + } + unsigned long long val = bs.to_ullong(); + for (size_t byte_idx{0}; byte_idx < kNumBytes; ++byte_idx) { + auto byte = GetByte(val, byte_idx); + if (byte > 0) { + return byte_idx * 8 + std::countr_zero(byte); + } + } + __builtin_unreachable(); +} + +size_t FindNextFixedWidth(std::bitset<kWidth> const& bs, size_t pos) { + if (bs.none()) { + return kWidth; + } + unsigned long long val = bs.to_ullong(); + size_t start_byte = pos / 8; + size_t bit_pos = pos % 8; + for (size_t byte_idx{start_byte}; byte_idx < kNumBytes; ++byte_idx) { + auto byte = GetByte(val, byte_idx); + if (byte > 0) { + if (byte_idx > start_byte) { + return byte_idx * 8 + std::countr_zero(byte); + } else { + size_t leading_zeros = std::countl_zero(byte); + if (leading_zeros < 7 - bit_pos) { + std::bitset<8> bs{byte}; + for (size_t i{bit_pos + 1}; i < 8; ++i) { + if (bs[i]) { + return start_byte * 8 + i; + } + } + } + } + } + } + return kWidth; +} + +} // namespace util::bitset_extensions diff --git a/src/core/util/bitset_extensions.h b/src/core/util/bitset_extensions.h new file mode 100644 index 0000000000..78a30ce4f5 --- /dev/null +++ b/src/core/util/bitset_extensions.h @@ -0,0 +1,180 @@ +/* This file contains custom implementation of _Find_first and _Find_next gcc-specific methods +(which come from SGI extensions) of std::bitset for 64-bit bitsets. +These implementations are close to what is in SGI (and are competitive in terms of efficiency). +It shouldn't be so hard to adapt them for bitsets of any width -- see, for example, +https://cocode.se/c++/unsigned_split.html. +If you need _Find_first or _Find_next methods, consider using FindFirst and FindNext from this file. +FindFirst and FindNext are wrappers that use custom implementations if (and only if) gcc intrinsiscs +aren't availible. */ + +#pragma once + +#include <bitset> +#include <memory> +#include <vector> + +#include <boost/dynamic_bitset.hpp> + +namespace util { + +/// @brief Wrapper for std::bitset to iterate through set bits. +class IBitsetIterator { +public: + virtual ~IBitsetIterator() {} + + virtual size_t Pos() const noexcept = 0; + virtual void Next() noexcept = 0; +}; + +namespace bitset_extensions { + +static std::vector<unsigned long long> const kBytes{0xff, + 0xff'00, + 0xff'00'00, + 0xff'00'00'00, + 0xff'00'00'00'00, + 0xff'00'00'00'00'00, + 0xff'00'00'00'00'00'00, + 0xff'00'00'00'00'00'00'00}; +constexpr static size_t kNumBytes = 8; +constexpr static size_t kWidth = 64; + +#if (__cpp_lib_constexpr_vector == 201907L) +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR constexpr +#else +#define CONSTEXPR_IF_VECTOR_IS_CONSTEXPR /* Ignore */ +#endif + +CONSTEXPR_IF_VECTOR_IS_CONSTEXPR unsigned char GetByte(unsigned long long val, size_t byte_num); + +size_t FindFirstFixedWidth(std::bitset<kWidth> const&); + +size_t FindNextFixedWidth(std::bitset<kWidth> const&, size_t pos); + +template <typename Bitset> +concept HasFindFirst = requires(Bitset bs) { bs._Find_first(); }; + +/// @brief Wrapper for std::bitset to iterate through set bits using temporary +/// boost::dynamic_bitset. +template <size_t S> +class DynamicBitsetIterator : public IBitsetIterator { +private: + boost::dynamic_bitset<> bs_; + size_t pos_; + +public: + DynamicBitsetIterator(std::bitset<S> const& bs) : bs_(bs.to_string()), pos_(bs_.find_first()) { + if (pos_ > bs_.size()) { + pos_ = bs_.size(); + } + } + + ~DynamicBitsetIterator() override = default; + + size_t Pos() const noexcept override { + return pos_; + } + + void Next() noexcept override { + pos_ = bs_.find_next(pos_); + if (pos_ > bs_.size()) { + pos_ = bs_.size(); + } + } +}; + +/// @brief Wrapper for std::bitset to iterate through set bits using GCC intrinsics. +/// If reference to bitset is invalidated, behaviour is undefined! +template <size_t S> +class BitsetIterator : public IBitsetIterator { +private: + std::bitset<S> const& bs_; + size_t pos_; + +public: + BitsetIterator(std::bitset<S> const& bs) : bs_(bs), pos_(bs_._Find_first()) {} + + ~BitsetIterator() override = default; + + size_t Pos() const noexcept override { + return pos_; + } + + void Next() noexcept override { + pos_ = bs_._Find_next(pos_); + } +}; + +} // namespace bitset_extensions + +/// @brief Call bs._Find_first if it's availible, use custom implementation otherwise +template <size_t S> + requires bitset_extensions::HasFindFirst<std::bitset<S>> +inline size_t FindFirst(std::bitset<S> const& bs) noexcept { + return bs._Find_first(); +} + +/// @brief Call bs._Find_first if it's availible, use custom implementation otherwise +template <size_t S> +inline size_t FindFirst(std::bitset<S> const& bs) noexcept { + return bitset_extensions::FindFirstFixedWidth(bs); +} + +/// @brief Call bs._Find_next if it's availible, use custom implementation otherwise +template <size_t S> + requires bitset_extensions::HasFindFirst<std::bitset<S>> +inline size_t FindNext(std::bitset<S> const& bs, size_t pos) noexcept { + return bs._Find_next(pos); +} + +/// @brief Call bs._Find_next if it's availible, use custom implementation otherwise +template <size_t S> +inline size_t FindNext(std::bitset<S> const& bs, size_t pos) noexcept { + if constexpr (S == 64) { + return bitset_extensions::FindNextFixedWidth(bs, pos); + } else { + // FIXME(senichenkov): implement custom FindNext for 256-bit (or custom width) bitsets + boost::dynamic_bitset<> dbs(bs.to_string()); + auto result = dbs.find_next(pos); + return result <= S ? result : S; + } +} + +/// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset +/// through string representation. Bitset is shifted 1 bit left. +template <size_t S> + requires bitset_extensions::HasFindFirst<std::bitset<S>> +inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset<S> const& bs, + std::size_t size = S) noexcept { + boost::dynamic_bitset<> dyn_bitset(size); + for (size_t i = bs._Find_first(); i != S; i = bs._Find_next(i)) { + dyn_bitset.set(i - 1); + } + return dyn_bitset; +} + +/// @brief If _Find_next is availible, copy every set bit, else copy biset to dynamic_bitset +/// through string representation. Bitset is shifted 1 bit left. +template <size_t S> +inline boost::dynamic_bitset<> CreateShiftedDynamicBitset(std::bitset<S> const& bs, + std::size_t size = S) noexcept { + size_t start = S - size - 1; + return boost::dynamic_bitset(bs.to_string(), start, size); +} + +/// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else +/// boost::dynamic_bitset set-bits-iterator +template <size_t S> + requires bitset_extensions::HasFindFirst<std::bitset<S>> +inline std::unique_ptr<IBitsetIterator> MakeBitsetIterator(std::bitset<S> const& bs) { + return std::make_unique<bitset_extensions::BitsetIterator<S>>(bs); +} + +/// @brief If _Find_next is availible, create std::bitset set-bits-iterator, else +/// boost::dynamic_bitset set-bits-iterator +template <size_t S> +inline std::unique_ptr<IBitsetIterator> MakeBitsetIterator(std::bitset<S> const& bs) { + return std::make_unique<bitset_extensions::DynamicBitsetIterator<S>>(bs); +} + +} // namespace util diff --git a/src/core/util/kdtree.h b/src/core/util/kdtree.h index 5e47d1b7de..761f3b394a 100644 --- a/src/core/util/kdtree.h +++ b/src/core/util/kdtree.h @@ -191,12 +191,12 @@ size_t KDTree<PointType>::Size() const { template <SubscriptableOrder PointType> KDTree<PointType>::KDTree(std::vector<PointType> const& points) : KDTree<PointType>() { - std::for_each(points.begin(), points.end(), this->Insert); + std::for_each(points.begin(), points.end(), &Insert); } template <SubscriptableOrder PointType> KDTree<PointType>::KDTree(std::initializer_list<PointType> const& points) : KDTree<PointType>() { - std::for_each(points.begin(), points.end(), this->Insert); + std::for_each(points.begin(), points.end(), &Insert); } template <SubscriptableOrder PointType> diff --git a/src/core/util/maybe_unused.h b/src/core/util/maybe_unused.h new file mode 100644 index 0000000000..11c06d619d --- /dev/null +++ b/src/core/util/maybe_unused.h @@ -0,0 +1,12 @@ +#pragma once + +// clang produces warning on unused private fields, so they need to be marked as [[maybe_unused]], +// but g++ doesn't recognize [[maybe_unused]] on class fields and produces warning. +// This macro expands to [[maybe_unused]], when compiler is clang, nop otherwise +// (see https://stackoverflow.com/questions/50646334/maybe-unused-on-member-variable-gcc-warns- +// incorrectly-that-attribute-is and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72789) +#ifdef __clang__ +#define MAYBE_UNUSED [[maybe_unused]] +#else +#define MAYBE_UNUSED /* Ignore */ +#endif diff --git a/src/core/util/worker_thread_pool.h b/src/core/util/worker_thread_pool.h index 9d4449245a..37e2cb9e9b 100644 --- a/src/core/util/worker_thread_pool.h +++ b/src/core/util/worker_thread_pool.h @@ -7,11 +7,11 @@ #include <future> #include <memory> #include <mutex> -#include <thread> #include <variant> #include <vector> #include "model/index.h" +#include "util/auto_join_thread.h" #include "util/barrier.h" #include "util/desbordante_assume.h" @@ -44,7 +44,7 @@ class WorkerThreadPool { }; Worker work_; - std::vector<std::jthread> worker_threads_; + std::vector<JThread> worker_threads_; std::vector<std::packaged_task<void(WorkerThreadPool&)>> tasks_; util::Barrier<Completion> barrier_; std::condition_variable working_var_; diff --git a/src/tests/test_nd_verifier.cpp b/src/tests/test_nd_verifier.cpp index 9a7884d366..de9e2e3f87 100644 --- a/src/tests/test_nd_verifier.cpp +++ b/src/tests/test_nd_verifier.cpp @@ -51,14 +51,14 @@ INSTANTIATE_TEST_SUITE_P( NDVerifyingParams({1, 2, 3}, {6}, 2) )); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( NDVerifierHeavyDatasets, TestNDVerifying, ::testing::Values( NDVerifyingParams({5}, {6}, 1000000, kIowa1kk), // I just want to see execution time. Real weight doesn't matter (but it shouldn't be very big) NDVerifyingParams({16, 17, 18}, {20, 23}, 1000000, kIowa1kk) // Also, I want to see how execution time depends on number of columns )); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( NDVerifierTestNullEqualNull, TestNDVerifying, ::testing::Values( // 6-th column contains 2 values and 7 empty cells NDVerifyingParams({0}, {6}, 3, kTestND, true), diff --git a/src/tests/test_pfdtane.cpp b/src/tests/test_pfdtane.cpp index 1721ec7cf7..a93c085566 100644 --- a/src/tests/test_pfdtane.cpp +++ b/src/tests/test_pfdtane.cpp @@ -65,9 +65,9 @@ INSTANTIATE_TEST_SUITE_P( PFDTaneTestMiningSuite, TestPFDTaneMining, ::testing::Values( PFDTaneMiningParams(44381, 0.3, +algos::PfdErrorMeasure::per_value, kTestFD), - PFDTaneMiningParams(39491, 0.1, +algos::PfdErrorMeasure::per_value, kIris), + PFDTaneMiningParams(19266, 0.1, +algos::PfdErrorMeasure::per_value, kIris), PFDTaneMiningParams(10695, 0.01, +algos::PfdErrorMeasure::per_value, kIris), - PFDTaneMiningParams(7893, 0.1, +algos::PfdErrorMeasure::per_value, kNeighbors10k), + PFDTaneMiningParams(44088, 0.1, +algos::PfdErrorMeasure::per_value, kNeighbors10k), PFDTaneMiningParams(41837, 0.01, +algos::PfdErrorMeasure::per_value, kNeighbors10k) )); diff --git a/src/tests/test_sfd.cpp b/src/tests/test_sfd.cpp index f754af2966..0392b6bda8 100644 --- a/src/tests/test_sfd.cpp +++ b/src/tests/test_sfd.cpp @@ -48,41 +48,41 @@ namespace tests { TEST(TestCordsUtils, FrequenciesOfIris) { std::vector<std::vector<std::pair<std::string, size_t>>> expected = { - {{"7.400000", 34}, {"7.600000", 33}, {"4.300000", 32}, {"7.100000", 31}, - {"6.100000", 11}, {"5.400000", 8}, {"6.600000", 25}, {"6.400000", 7}, - {"5.800000", 5}, {"4.800000", 14}, {"5.200000", 18}, {"4.900000", 9}, - {"5.500000", 6}, {"5.700000", 4}, {"4.600000", 16}, {"5.100000", 2}, - {"6.000000", 10}, {"5.600000", 12}, {"4.500000", 27}, {"6.700000", 3}, - {"6.300000", 1}, {"6.500000", 13}, {"6.200000", 15}, {"7.300000", 30}, - {"7.900000", 29}, {"6.900000", 17}, {"6.800000", 20}, {"7.000000", 28}, - {"5.900000", 21}, {"4.700000", 24}, {"5.000000", 0}, {"4.400000", 22}, - {"7.700000", 19}, {"7.200000", 23}, {"5.300000", 26}}, - - {{"4.200000", 21}, {"4.400000", 19}, {"4.000000", 18}, {"2.400000", 16}, - {"3.700000", 15}, {"4.100000", 20}, {"3.600000", 14}, {"2.800000", 1}, + {{"4.300000", 34}, {"4.500000", 33}, {"5.300000", 32}, {"7.100000", 30}, + {"5.400000", 11}, {"6.600000", 24}, {"6.100000", 8}, {"7.600000", 27}, + {"5.800000", 6}, {"4.800000", 14}, {"5.200000", 18}, {"6.400000", 5}, + {"4.900000", 12}, {"5.500000", 7}, {"5.700000", 4}, {"4.600000", 19}, + {"5.100000", 2}, {"6.000000", 9}, {"5.600000", 10}, {"6.700000", 3}, + {"6.300000", 1}, {"6.500000", 13}, {"6.200000", 17}, {"7.700000", 15}, + {"7.200000", 20}, {"7.300000", 29}, {"6.900000", 16}, {"7.900000", 26}, + {"6.800000", 21}, {"7.000000", 31}, {"5.900000", 22}, {"4.700000", 25}, + {"5.000000", 0}, {"4.400000", 23}, {"7.400000", 28}}, + + {{"4.000000", 21}, {"4.200000", 19}, {"4.400000", 18}, {"2.400000", 15}, + {"4.100000", 20}, {"3.600000", 14}, {"3.700000", 13}, {"2.800000", 1}, {"3.800000", 8}, {"3.200000", 2}, {"3.900000", 17}, {"3.100000", 4}, {"3.300000", 10}, {"2.900000", 5}, {"2.000000", 22}, {"2.500000", 7}, - {"3.400000", 3}, {"3.500000", 9}, {"2.300000", 12}, {"2.200000", 13}, + {"3.400000", 3}, {"3.500000", 9}, {"2.300000", 12}, {"2.200000", 16}, {"3.000000", 0}, {"2.700000", 6}, {"2.600000", 11}}, - {{"6.900000", 42}, {"6.300000", 41}, {"6.600000", 40}, {"1.000000", 39}, - {"1.100000", 38}, {"3.000000", 37}, {"3.600000", 34}, {"5.400000", 32}, - {"6.700000", 31}, {"5.300000", 30}, {"5.900000", 29}, {"4.900000", 7}, - {"5.800000", 15}, {"3.700000", 36}, {"5.200000", 27}, {"4.800000", 10}, - {"1.900000", 28}, {"4.500000", 3}, {"1.300000", 4}, {"5.100000", 2}, - {"6.000000", 23}, {"1.600000", 5}, {"4.400000", 12}, {"5.000000", 11}, - {"6.400000", 35}, {"3.500000", 24}, {"1.500000", 0}, {"4.200000", 13}, - {"6.100000", 17}, {"3.800000", 33}, {"1.400000", 1}, {"5.700000", 16}, - {"4.600000", 19}, {"5.500000", 18}, {"1.200000", 22}, {"4.100000", 20}, - {"4.000000", 8}, {"5.600000", 6}, {"3.900000", 21}, {"4.700000", 9}, - {"1.700000", 14}, {"3.300000", 25}, {"4.300000", 26}}, - - {{"0.600000", 20}, {"1.700000", 19}, {"1.100000", 18}, {"1.900000", 13}, - {"0.200000", 0}, {"2.400000", 16}, {"1.300000", 1}, {"2.100000", 10}, - {"1.800000", 2}, {"2.200000", 15}, {"0.400000", 6}, {"1.500000", 3}, - {"0.100000", 9}, {"1.400000", 4}, {"2.300000", 5}, {"0.300000", 7}, - {"0.500000", 21}, {"2.500000", 17}, {"1.600000", 14}, {"2.000000", 11}, - {"1.200000", 12}, {"1.000000", 8}}, + {{"1.000000", 42}, {"1.100000", 41}, {"3.000000", 40}, {"3.600000", 39}, + {"6.300000", 36}, {"6.600000", 34}, {"6.900000", 33}, {"5.900000", 24}, + {"4.900000", 7}, {"4.500000", 3}, {"1.300000", 5}, {"1.900000", 31}, + {"4.800000", 11}, {"5.800000", 16}, {"3.700000", 38}, {"5.200000", 27}, + {"5.400000", 25}, {"5.100000", 2}, {"6.000000", 23}, {"1.600000", 4}, + {"4.400000", 12}, {"5.000000", 10}, {"6.700000", 22}, {"6.400000", 35}, + {"3.500000", 29}, {"1.500000", 0}, {"4.200000", 13}, {"6.100000", 15}, + {"3.300000", 30}, {"4.700000", 8}, {"1.700000", 14}, {"3.800000", 37}, + {"1.400000", 1}, {"5.700000", 17}, {"4.600000", 19}, {"5.500000", 18}, + {"1.200000", 32}, {"4.100000", 20}, {"4.000000", 9}, {"5.600000", 6}, + {"3.900000", 21}, {"5.300000", 26}, {"4.300000", 28}}, + + {{"0.600000", 20}, {"1.700000", 19}, {"1.100000", 18}, {"1.200000", 13}, + {"0.200000", 0}, {"2.400000", 16}, {"1.300000", 1}, {"2.100000", 9}, + {"1.800000", 2}, {"2.200000", 17}, {"0.400000", 7}, {"1.500000", 3}, + {"0.100000", 11}, {"2.300000", 4}, {"1.400000", 5}, {"1.000000", 6}, + {"0.300000", 8}, {"0.500000", 21}, {"2.500000", 15}, {"1.600000", 14}, + {"2.000000", 10}, {"1.900000", 12}}, {{"Iris-setosa", 2}, {"Iris-versicolor", 1}, {"Iris-virginica", 0}}}; diff --git a/src/tests/test_types.cpp b/src/tests/test_types.cpp index c0ccf2917f..d11a79deb2 100644 --- a/src/tests/test_types.cpp +++ b/src/tests/test_types.cpp @@ -97,8 +97,8 @@ TYPED_TEST(TestNumeric, Negate) { }; test(0); - test(-123.5); - test(321.4); + test(typename TypeParam::UnderlyingType(-123.5)); + test(typename TypeParam::UnderlyingType(321.4)); } TYPED_TEST(TestNumeric, Abs) { @@ -108,8 +108,8 @@ TYPED_TEST(TestNumeric, Abs) { }; test(0); - test(-123.5); - test(321.4); + test(typename TypeParam::UnderlyingType(-123.5)); + test(typename TypeParam::UnderlyingType(321.4)); } TYPED_TEST(TestNumeric, Add) { @@ -135,7 +135,7 @@ TYPED_TEST(TestNumeric, Div) { test(0, 100); test(22, 1); test(123, 321); - test(11.4, 3.14); + test(Type(11.4), Type(3.14)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -150,7 +150,7 @@ TYPED_TEST(TestNumeric, Sub) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -166,7 +166,7 @@ TYPED_TEST(TestNumeric, Mul) { test(100, 0); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -182,10 +182,13 @@ TYPED_TEST(TestNumeric, Pow) { test(0, 100); test(22, 12); - test(123, 321); - test(2.72, 1.3123141); - test(-102, 11); - test(-123, 123); + test(Type(2.72), 1.3123141); + // 123^321, -102^11 and -123^123 won't fit into long (i. e. IntType) -- it's UB + if constexpr (!std::is_base_of_v<typename TypeParam::NumericType, mo::IntType>) { + test(123, 321); + test(-102, 11); + test(-123, 123); + } test(-21, -7); } @@ -200,7 +203,7 @@ TYPED_TEST(TestNumeric, Dist) { test(0, 100); test(22, 12); test(123, 321); - test(2.72, 1.3123141); + test(Type(2.72), Type(1.3123141)); test(-102, 11); test(-123, 123); test(-21, -7); @@ -214,8 +217,8 @@ TYPED_TEST(TestNumeric, ValueToString) { test(0); test(123); - test(3.14123123182387); - test(-1231.123456678987654321); + test(typename TypeParam::UnderlyingType(3.14123123182387)); + test(typename TypeParam::UnderlyingType(-1231.123456678987654321)); } struct TestStringParam { diff --git a/ub_sanitizer_ignore_list.txt b/ub_sanitizer_ignore_list.txt new file mode 100644 index 0000000000..b75b0bd24c --- /dev/null +++ b/ub_sanitizer_ignore_list.txt @@ -0,0 +1,6 @@ +# Disable UB sanitizer, "Indirect call of function through a pointer of the wrong type" check: +[function] +# in all files: +src:* +# enable again only in our code: +src:src/*=sanitize