From 8310de280472b789360833806d0ff1c151df8b84 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Tue, 26 Mar 2024 14:39:59 +0100
Subject: [PATCH 01/37] Remove executables from khiops conda package

The `khiops` conda package now contains only the python code.
The conda package with the Khiops executables (`khiops-core`) is now
created in the Khiops repository.

The new `khiops` package is "noarch" so there is now only one artifact.
---
 .github/workflows/conda.yml             | 142 ++++++++----------------
 .pre-commit-config.yaml                 |   2 +-
 packaging/conda/bld.bat                 |  34 ------
 packaging/conda/build.sh                | 121 --------------------
 packaging/conda/conda_build_config.yaml |  13 ---
 packaging/conda/meta.yaml               |  56 +---------
 6 files changed, 56 insertions(+), 312 deletions(-)
 delete mode 100644 packaging/conda/bld.bat
 delete mode 100644 packaging/conda/build.sh
 delete mode 100644 packaging/conda/conda_build_config.yaml

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 08ed5950..54e4fc3e 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -1,14 +1,16 @@
 ---
-name: Conda Packages
+name: Conda Package
 env:
-  DEFAULT_KHIOPS_REVISION: main
   DEFAULT_SAMPLES_REVISION: main
+  # Note: The default Khiops version must never be an alpha release as they are
+  #       ephemeral. To test alpha versions run the workflow manually.
+  DEFAULT_KHIOPS_CORE_VERSION: 10.2.1
 on:
   workflow_dispatch:
     inputs:
-      khiops-revision:
-        default: main
-        description: khiops repo revision
+      khiops-core-version:
+        default: 10.2.1
+        description: khiops-core version for testing
       samples-revision:
         default: main
         description: khiops-samples repo revision
@@ -29,18 +31,7 @@ concurrency:
   cancel-in-progress: true
 jobs:
   build:
-    strategy:
-      fail-fast: false
-      matrix:
-        #  Use the oldest supported Mac OS and Ubuntu versions for GLIBC compatibility
-        include:
-          - os: ubuntu-20.04
-            os-family: linux
-          - os: windows-latest
-            os-family: windows
-          - os: macos-11
-            os-family: macos
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout Sources
         uses: actions/checkout@v4
@@ -52,34 +43,19 @@ jobs:
         uses: conda-incubator/setup-miniconda@v3
         with:
           miniconda-version: latest
-          python-version: '3.11'
+          python-version: '3.12'
       - name: Install Dependency Requirements for Building Conda Packages
-        run: conda install conda-build=3.27.0 conda-verify
-      # We need MacOS SDK 10.10 to build on Big Sur
-      - name: Install Mac OS SDK 10.10
-        if: runner.os == 'macOS'
-        run: |
-          wget https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX10.10.sdk.tar.xz
-          sudo tar -zxvf MacOSX10.10.sdk.tar.xz -C /opt
-      - name: Set KHIOPS_REVISION build input parameter
-        run: |
-          KHIOPS_REVISION="${{ inputs.khiops-revision || env.DEFAULT_KHIOPS_REVISION }}"
-          echo "KHIOPS_REVISION=$KHIOPS_REVISION" >> "$GITHUB_ENV"
-      - name: Build Khiops Conda Package (Windows)
-        if: runner.os == 'Windows'
-        run: |
-          mkdir khiops-conda
-          conda build --output-folder khiops-conda ./packaging/conda
-      # In Linux/macOS we need the conda-forge channel to install their pinned versions
-      - name: Build Khiops Conda Package (Linux/macOS)
-        if: runner.os != 'Windows'
+        run: conda install conda-build
+      - name: Build the Conda Package
+        # Note: The "khiops-dev" conda channel is needed to retrieve the "khiops-core" package.
+        #       The "test" part of the conda recipe needs this package.
         run: |
-          mkdir khiops-conda
-          conda build --channel conda-forge --output-folder khiops-conda ./packaging/conda
-      - name: Upload Khiops Conda Package
+          conda build --channel conda-forge --channel khiops-dev \
+            --output-folder ./khiops-conda ./packaging/conda
+      - name: Upload Conda Package Artifact
         uses: actions/upload-artifact@v4
         with:
-          name: khiops-conda-${{ matrix.os-family }}
+          name: khiops-conda
           path: ./khiops-conda
           retention-days: 7
   # Test Conda package on brand new environments
@@ -88,16 +64,17 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        env:
-          - {os: ubuntu-20.04, os-family: linux}
-          - {os: ubuntu-22.04, os-family: linux}
-          - {os: windows-2019, os-family: windows}
-          - {os: windows-2022, os-family: windows}
-          - {os: macos-11, os-family: macos}
-          - {os: macos-12, os-family: macos}
-          - {os: macos-13, os-family: macos}
+        os:
+          - ubuntu-20.04
+          - ubuntu-22.04
+          - windows-2019
+          - windows-2022
+          - macos-11
+          - macos-12
+          - macos-13
+          - macos-14
         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-    runs-on: ${{ matrix.env.os }}
+    runs-on: ${{ matrix.os }}
     env:
       KHIOPS_SAMPLES_DIR: ./khiops-samples-repo
     steps:
@@ -116,15 +93,23 @@ jobs:
       - name: Download Conda Package Artifact
         uses: actions/download-artifact@v4
         with:
-          name: khiops-conda-${{ matrix.env.os-family }}
-          path: khiops-conda
+          name: khiops-conda
+          path: ./khiops-conda
+      - name: Put the khiops-core Version in the Environment
+        run: |
+          KHIOPS_CORE_VERSION="${{ inputs.khiops-core-version || env.DEFAULT_KHIOPS_CORE_VERSION }}"
+          echo "KHIOPS_CORE_VERSION=$KHIOPS_CORE_VERSION" >> "$GITHUB_ENV"
       - name: Install the Khiops Conda pagkage (Windows)
         if: runner.os == 'Windows'
-        run: conda install -c ./khiops-conda/ khiops
+        run: |
+          conda install --channel khiops-dev khiops-core=$KHIOPS_CORE_VERSION
+          conda install --channel ./khiops-conda/ khiops
       # In Linux/macOS we need the conda-forge channel to install their pinned versions
       - name: Install the Khiops Conda package (Linux/macOS)
         if: runner.os != 'Windows'
-        run: conda install -c conda-forge -c ./khiops-conda/ khiops
+        run: |
+          conda install --channel conda-forge --channel khiops-dev khiops-core=$KHIOPS_CORE_VERSION
+          conda install --channel ./khiops-conda/ khiops
       - name: Test Khiops Installation Status
         run: kh-status
       - name: Test Conda Package Installation on Samples
@@ -140,7 +125,7 @@ jobs:
   release:
     if: github.ref_type == 'tag'
     needs: test
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     permissions:
       contents: write
     steps:
@@ -148,20 +133,19 @@ jobs:
         uses: actions/download-artifact@v4
         with:
           # See the upload-artifact step in the build job for the explanation of this pattern
+          name: khiops-conda
           path: ./khiops-conda
-          pattern: khiops-conda-*
-          merge-multiple: true
       - name: Install Miniconda
         uses: conda-incubator/setup-miniconda@v3
         with:
           miniconda-version: latest
-          python-version: '3.11'
-      - name: Install requirement packages
-        run: conda install -y anaconda-client conda-build=3.27.0
+          python-version: '3.12'
+      - name: Install Requirement Packages
+        run: conda install -y anaconda-client conda-index
       - name: Reindex the package directory
-        run: conda-index ./khiops-conda
-      - name: Upload the packages to anaconda.org
-        run: |
+        run: python -m conda_index ./khiops-conda
+      - name: Upload the Package to anaconda.org
+        run: |-
           # Set the anaconda.org channel
           ANACONDA_CHANNEL="${{ inputs.release-channel || 'khiops-dev' }}"
 
@@ -169,37 +153,9 @@ jobs:
           if [[ "$ANACONDA_CHANNEL" == "khiops" ]]
           then
             anaconda --token "${{ secrets.KHIOPS_ANACONDA_CHANNEL_TOKEN }}" upload \
-              --user "$ANACONDA_CHANNEL" ./khiops-conda/*/*.tar.bz2
+              --user "$ANACONDA_CHANNEL" ./khiops-conda/noarch/*.tar.bz2
           # For the dev channel: upload with forcing
           else
             anaconda --token "${{ secrets.KHIOPS_DEV_ANACONDA_CHANNEL_TOKEN }}" upload \
-              --user "$ANACONDA_CHANNEL" --force ./khiops-conda/*/*.tar.bz2
+              --user "$ANACONDA_CHANNEL" --force ./khiops-conda/noarch/*.tar.bz2
           fi
-      - name: Extract package version
-        run: |
-          PKG_VERSION=$(\
-              conda search --override-channels --channel ./khiops-conda/ khiops \
-                | awk '!/#|channels/ {print $2}' \
-                | sort -u \
-          )
-          echo "PKG_VERSION=$PKG_VERSION" >> "$GITHUB_ENV"
-      - name: Create the release zip archive
-        uses: thedoctor0/zip-release@0.7.6
-        with:
-          type: zip
-          path: ./khiops-conda/
-          filename: khiops-${{ env.PKG_VERSION }}-conda.zip
-      - name: Upload conda package artifacts for all platforms
-        uses: actions/upload-artifact@v4
-        with:
-          name: khiops-conda-all
-          path: ./khiops-${{ env.PKG_VERSION }}-conda.zip
-      - name: Release the zip archive
-        uses: ncipollo/release-action@v1
-        with:
-          allowUpdates: true
-          artifacts: ./khiops-${{ env.PKG_VERSION }}-conda.zip
-          draft: false
-          makeLatest: false
-          prerelease: true
-          updateOnlyUnreleased: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9786ede1..1c217d93 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ repos:
     rev: 1.15.0
     hooks:
       - id: yamlfix
-        exclude: packaging/conda/(meta|conda_build_config).yaml
+        exclude: packaging/conda/meta.yaml
   - repo: https://github.com/python-jsonschema/check-jsonschema
     rev: 0.27.1
     hooks:
diff --git a/packaging/conda/bld.bat b/packaging/conda/bld.bat
deleted file mode 100644
index fd2380a6..00000000
--- a/packaging/conda/bld.bat
+++ /dev/null
@@ -1,34 +0,0 @@
-REM Echo all output
-@echo on
-
-REM Clone Khiops sources
-git clone https://github.com/khiopsml/khiops.git khiops_bin
-cd .\khiops_bin\
-git checkout "%KHIOPS_REVISION%"
-cd ..
-
-REM Copy relevant Khiops files to current directory
-robocopy .\khiops_bin\src .\src /e
-robocopy .\khiops_bin\test .\test /e
-mkdir .\packaging
-robocopy .\khiops_bin\packaging\common .\packaging\common /e
-if errorlevel 8 exit 1
-copy /y .\khiops_bin\CMakeLists.txt .
-copy /y .\khiops_bin\CMakePresets.json .
-copy /y .\khiops_bin\LICENSE .
-copy /y .\khiops_bin\packaging\install.cmake .\packaging\
-copy /y .\khiops_bin\packaging\packaging.cmake .\packaging\
-
-REM Build the Khiops binaries
-cmake --preset windows-msvc-release -DBUILD_JARS=OFF -DTESTING=OFF
-cmake --build --preset windows-msvc-release --parallel --target MODL MODL_Coclustering
-
-REM Copy the MODL binaries to the Conda PREFIX path
-mkdir %PREFIX%\bin
-copy build\windows-msvc-release\bin\MODL.exe %PREFIX%\bin
-copy build\windows-msvc-release\bin\MODL_Coclustering.exe %PREFIX%\bin
-
-REM Build the Khiops Python package
-"%PYTHON%" -m pip install . --no-deps --ignore-installed --no-cache-dir --no-build-isolation -vvv
-
-if errorlevel 1 exit 1
diff --git a/packaging/conda/build.sh b/packaging/conda/build.sh
deleted file mode 100644
index 3bbb4527..00000000
--- a/packaging/conda/build.sh
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/bin/bash
-
-# Set-up the shell to behave more like a general-purpose programming language
-set -euo pipefail
-
-# Clone Khiops sources (we change working directory there)
-git clone https://github.com/khiopsml/khiops.git khiops-core
-cd khiops-core
-git checkout "$KHIOPS_REVISION"
-
-# Copy License file
-cp ./LICENSE ..
-
-# Build MODL and MODL_Coclustering
-# Note on macOS we need the macOS SDK 10.10 for this conda build to work
-if [[ "$(uname)" == "Darwin" ]]
-then
-  CMAKE_PRESET="macos-clang-release"
-else
-  CMAKE_PRESET="linux-gcc-release"
-fi
-cmake --preset $CMAKE_PRESET -DBUILD_JARS=OFF -DTESTING=OFF -DCMAKE_CXX_COMPILER="$PREFIX/bin/mpicxx"
-cmake --build --preset $CMAKE_PRESET --parallel --target MODL MODL_Coclustering
-
-# Copy the MODL binaries to the Conda PREFIX path
-cp "./build/$CMAKE_PRESET/bin/MODL" "$PREFIX/bin"
-cp "./build/$CMAKE_PRESET/bin/MODL_Coclustering" "$PREFIX/bin"
-
-
-# Build the Khiops Python package in the base directory
-cd ..
-$PYTHON -m pip install . --no-deps --ignore-installed --no-cache-dir --no-build-isolation -vvv
-
-# Custom rpath relocation and signing executables for macOS in arm64
-#
-# In osx-arm64 executing any binary that is not signed will make appear popups appearing demanding
-# "accepting incoming connections". Since our application doesn't need any connections from the
-# outside the machine this doesn't affect the execution but since it is launched with MPI the number
-# of popups appearing is high. This is difficult to fix for the user because the if the artifact is
-# not signed it will reappear even if we click in the "Allow" button. So we sign the MODL
-# executables to solve this (only a single popup concerning mpiexec.hydra may appear but for this
-# application pressing on "Allow" works).
-#
-# However, in the default settings, `conda build` relocalizes the executable by changing rpath of
-# the library paths at $PREFIX by relative ones and in doing so it nullifies any signature. So we
-# do ourselves this procedure first and then sign the binary.
-#
-# Note that in meta.yaml for osx-arm64 we have custom build.binary_relocation and
-# build.detect_binary_files_with_prefix option
-#
-# This part must be executed in a root machine to be non-interactive (eg. GitHub runner)
-# It also needs the following environment variable:
-# - KHIOPS_APPLE_CERTIFICATE_COMMON_NAME: The second column of the `security find-identity` command
-# A base64 encoded certificate may also be provided, the following 2 variables must be set
-# - KHIOPS_APPLE_CERTIFICATE_BASE64: The identity file .p12 (certificate + private key) in base64
-# - KHIOPS_APPLE_CERTIFICATE_PASSWORD: Password for the certificate file
-# - KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD: A temporary password to decrypt the certificate
-#
-if [[ "$(uname)" == "Darwin" && -n "${KHIOPS_APPLE_CERTIFICATE_COMMON_NAME-}" ]]
-then
-  # Delete the rpath of each executable
-  # Delete two times for MODL because for some reason it is there 2 times
-  install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/MODL"
-  install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/MODL"
-  install_name_tool -delete_rpath "$PREFIX/lib" "$PREFIX/bin/MODL_Coclustering"
-
-  # Add the relative rpath as conda build would
-  install_name_tool -add_rpath "@loader_path/../lib" "$PREFIX/bin/MODL"
-  install_name_tool -add_rpath "@loader_path/../lib" "$PREFIX/bin/MODL_Coclustering"
-
-  if [[ -n "${KHIOPS_APPLE_CERTIFICATE_BASE64-}" ]]
-  then
-    # Keychain setup slightly modified from: https://stackoverflow.com/a/68577995
-    # Before importing identity
-    # - Set the default user login keychain
-    # - Create a temporary keychain
-    # - Append temporary keychain to the user domain
-    # - Remove relock timeout
-    # - Unlock the temporary keychain
-    sudo security list-keychains -d user -s login.keychain
-    sudo security create-keychain -p "$KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD" kh-tmp.keychain
-    sudo security list-keychains -d user -s kh-tmp.keychain \
-      "$(security list-keychains -d user | sed s/\"//g)"
-    sudo security set-keychain-settings kh-tmp.keychain
-    sudo security unlock-keychain -p "$KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD" kh-tmp.keychain
-
-    # Add identity (certificate + private key) to keychain
-    echo "$KHIOPS_APPLE_CERTIFICATE_BASE64" \
-      | base64 --decode -i - -o kh-cert.p12
-    sudo security import kh-cert.p12 \
-      -k kh-tmp.keychain \
-      -P "$KHIOPS_APPLE_CERTIFICATE_PASSWORD" \
-      -A -T "/usr/bin/codesign"
-    rm -f kh-cert.p12
-
-    # Enable codesigning from a non user interactive shell
-    sudo security set-key-partition-list -S apple-tool:,apple:, \
-      -s -k "$KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD" \
-      -D "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" \
-      -t private kh-tmp.keychain
-  fi
-
-  # We make sure to use the default macOS/Xcode codesign tool. This is because the sigtool python
-  # package (installed by conda build as a dependency) makes an alias "codesign" which is prioritary
-  # in the build environment. The alias, however, alias doesn't support signing with a proper
-  # identity and makes the build fail!
-  CODESIGN="/usr/bin/codesign"
-
-  # Sign the MODL executable and check
-  $CODESIGN --force --sign "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" "$PREFIX/bin/MODL"
-  $CODESIGN --force --sign "$KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" "$PREFIX/bin/MODL_Coclustering"
-  $CODESIGN -d -vvv "$PREFIX/bin/MODL"
-  $CODESIGN -d -vvv "$PREFIX/bin/MODL_Coclustering"
-
-  # Remove the temporary keychain and restore the login keychain as default if created
-  if [[ -n "${KHIOPS_APPLE_CERTIFICATE_BASE64-}" ]]
-  then
-    sudo security delete-keychain kh-tmp.keychain
-    sudo security list-keychains -d user -s login.keychain
-  fi
-fi
diff --git a/packaging/conda/conda_build_config.yaml b/packaging/conda/conda_build_config.yaml
deleted file mode 100644
index d7e92ce4..00000000
--- a/packaging/conda/conda_build_config.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
----
-python:
-  - 3.8
-  - 3.9
-  - 3.10
-  - 3.11
-  - 3.12
-
-# We need MacOS SDK 10.10 to be able to build on Big Sur for x64
-# Download: https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX10.10.sdk.tar.xz
-# Decompress then to /opt: tar -zxvf MacOSX10.10.sdk.tar.xz -C /opt
-CONDA_BUILD_SYSROOT:
-  - /opt/MacOSX10.10.sdk # [osx and not arm64]
diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml
index a85ec1e8..0e62f11e 100644
--- a/packaging/conda/meta.yaml
+++ b/packaging/conda/meta.yaml
@@ -7,68 +7,26 @@ source:
   path: ../../
 
 build:
-  script_env:
-    - KHIOPS_REVISION
-    # Variables for signing the MODL executables in osx-arm64.
-    {% if "KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" in os.environ %}
-    - KHIOPS_APPLE_CERTIFICATE_COMMON_NAME # [osx]
-    # Only available when "KHIOPS_APPLE_CERTIFICATE_BASE64" is defined in the environment.
-    {% if "KHIOPS_APPLE_CERTIFICATE_BASE64" in os.environ %}
-    - KHIOPS_APPLE_CERTIFICATE_BASE64      # [osx]
-    - KHIOPS_APPLE_CERTIFICATE_PASSWORD    # [osx]
-    - KHIOPS_APPLE_TMP_KEYCHAIN_PASSWORD   # [osx]
-    {% endif %}
-    {% endif %}
+  number: 0
+  noarch: python
   entry_points:
     - kh-status = khiops.tools:kh_status_entry_point
     - kh-samples = khiops.tools:kh_samples_entry_point
     - kh-download-datasets = khiops.tools:kh_download_datasets_entry_point
     - pk-status = khiops.tools:pk_status_entry_point   # deprecated
-  number: 0
-  # Binary relocation of MODL and MODL_Coclustering is done in build.sh script
-  # This is to be able to sign it, see the script for more details.
-  # Only done when "KHIOPS_APPLE_CERTIFICATE_BASE64" is defined in the environment.
-  {% if "KHIOPS_APPLE_CERTIFICATE_COMMON_NAME" in os.environ %}
-  binary_relocation:                     # [osx]
-    - bin/kh-status                      # [osx]
-    - bin/kh-samples                     # [osx]
-    - bin/kh-download-datasets           # [osx]
-    - bin/pk-status                      # [osx]
-  detect_binary_files_with_prefix: false # [osx]
-  {% endif %}
+  script: |
+    {{ PYTHON }} -m pip install . --no-deps --ignore-installed --no-cache-dir --no-build-isolation -vvv
 
-# Note on version pinning:
-# OSX:
-# - mpich=3.4.3 because 4.* is still unstable
-# - requires conda-forge
-# Linux:
-# - mpich=4.0.3 because of bugs of the 3.* series
-# - requires conda-forge
 requirements:
   build:
-    - mpich 4.0.3        # [linux]
-    - mpich-mpicxx 4.0.3 # [linux]
-    - mpich 3.4.3        # [osx]
-    - mpich-mpicxx 3.4.3 # [osx]
-    - msmpi              # [win]
-    - cmake
-    - ninja
     - python
     - setuptools
-    - {{ compiler('cxx') }}
   host:
-    - mpich 4.0.3        # [linux]
-    - mpich-mpicxx 4.0.3 # [linux]
-    - mpich 3.4.3        # [osx]
-    - mpich-mpicxx 3.4.3 # [osx]
-    - msmpi                # [win]
     - python
   run:
-    - mpich 4.0.3   # [linux]
-    - mpich 3.4.3   # [osx]
-    - msmpi         # [win]
-    - pandas >=0.25.3
     - python
+    - khiops-core >=10.0.0,<11.0.0
+    - pandas >=0.25.3
     - scikit-learn >=0.22.2
   run_constrained:
     - boto3 >=1.17.39
@@ -78,8 +36,6 @@ outputs:
   - name: {{ metadata.get('name') }}
     test:
       commands:
-        - MODL -v
-        - MODL_Coclustering -v
         - kh-status
       imports:
         - khiops.core.api

From 0b120514ab739a3819a271b6911cbc94aacac639 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Fri, 5 Apr 2024 15:07:36 +0200
Subject: [PATCH 02/37] Improve release checklist at CONTRIBUTING.md

---
 CONTRIBUTING.md | 53 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 990c5923..5e639bdb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -227,8 +227,8 @@ git stash pop       # only when you have non-committed changes
 ### Package dependencies
 We should strive to minimize external package dependencies to minimize installation problems. The
 current dependency policy is:
-- `pykhiops.core` should only depend on python built-in modules.
-- `pykhiops.sklearn` should only depend on python built-in modules and the following mainstream
+- `khiops.core` should only depend on python built-in modules.
+- `khiops.sklearn` should only depend on python built-in modules and the following mainstream
 data-science packages:
   - [Scikit-learn](https://scikit-learn.org/stable/)
   - [Pandas](https://pandas.pydata.org/)
@@ -244,22 +244,53 @@ carefree while still trying to not add too many dependencies.
 We follow a non-standard `MAJOR.MINOR.PATCH.INCREMENT[PRE_RELEASE]` versioning convention. The
 first three numbers `MAJOR.MINOR.PATCH` are the latest Khiops version that is compatible with the
 package. The number `INCREMENT` indicates the evolution of `khiops-python` followed by an optional
-`[PRE_RELEASE` version for alpha, beta and release candidate releases (eg. `b2`).
+`[PRE_RELEASE]` version for alpha, beta and release candidate releases (eg. `b2`).
 
 ## Releases
+
+## Pre-releases
 When tagging a revision the CI will create the packages and upload them to the `khiops-dev` channel.
 Prefer to augment the pre-release revision number to re-create a tag because the CI overwrites
 packages with the same tag in the `khiops-dev` channel. Do not forget to clean any temporary
-pre-releases from `khiops-dev` and the releases github page.
-
-To make a public release, you must execute the `Conda Packages` CI workflow manually on a tag and
+pre-releases from `khiops-dev` and the releases GitHub page.
+
+
+## Public Releases
+Checklist:
+- Release issue and its related PR
+  - Update the API Docs if necessary
+  - Update `CHANGELOG.md`
+  - Update the default `khiops-core` version in [.github/workflows/conda.yml]
+- Git manipulations
+  - Update your local repo and save your work:
+    - `git stash # if necessary`
+    - `git fetch --tags --prune --prune-tags`
+    - `git switch dev`
+    - `git pull`
+    - `git switch main`
+    - `git pull`
+  - Merge the `dev` branch into `main`
+    - `git switch main`
+    - `git merge dev`
+  - Tag the merge commit with the release version (see Versioning above)
+    - `git switch main`
+    - `git tag 10.3.0.1 # Just an example`
+  - Make `dev` point to the merge commit just created in `main`
+    - This is necessary to include the merge commit into master to calculate intermediary versions
+      with Versioneer.
+    - Steps:
+      - `git switch dev`
+      - `git reset --hard main`
+      - `git push dev` (you need to remove the protections of `dev` for this step)
+- Workflows
+  - Execute the `Conda Package` workflow specifying:
+    - The release tag
+    - `khiops` as the release channel
+  - Execute the `API Docs` workflow specifying "Deploy GH Pages".
+
+To make a public release, you must execute the `Conda Package` CI workflow manually on a tag and
 specify the `khiops` anaconda channel for upload. These uploads do not overwrite any packages in
 this channel, so you must correct any mistake manually.
 
 ### Git Manipulations upon a Major Release
 The following is the check list to be done upon a major release:
-- Merge the `dev` branch into `main`
-- Tag the merge commit with the release version
-- Rebase the `dev` branch onto `main`
-  - This is necessary to include the merge commit into master to calculate intermediary versions
-    with versioneer

From 6ba7ec5609ecefb18fdded5062c27ea37e7ab4f7 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Mon, 8 Apr 2024 21:50:02 +0200
Subject: [PATCH 03/37] Add conda package tests for Rocky Linux

---
 .github/workflows/conda.yml | 46 ++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 54e4fc3e..6da1f3df 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -1,7 +1,7 @@
 ---
 name: Conda Package
 env:
-  DEFAULT_SAMPLES_REVISION: main
+  DEFAULT_SAMPLES_VERSION: 10.2.0
   # Note: The default Khiops version must never be an alpha release as they are
   #       ephemeral. To test alpha versions run the workflow manually.
   DEFAULT_KHIOPS_CORE_VERSION: 10.2.1
@@ -11,9 +11,9 @@ on:
       khiops-core-version:
         default: 10.2.1
         description: khiops-core version for testing
-      samples-revision:
-        default: main
-        description: khiops-samples repo revision
+      khiops-samples-version:
+        default: 10.2.0
+        description: khiops-samples version
       release-channel:
         type: choice
         default: khiops-dev
@@ -64,27 +64,21 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os:
-          - ubuntu-20.04
-          - ubuntu-22.04
-          - windows-2019
-          - windows-2022
-          - macos-11
-          - macos-12
-          - macos-13
-          - macos-14
         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-    runs-on: ${{ matrix.os }}
-    env:
-      KHIOPS_SAMPLES_DIR: ./khiops-samples-repo
+        env:
+          - {os: ubuntu-20.04, json-image: '{"image": null}'}
+          - {os: ubuntu-22.04, json-image: '{"image": null}'}
+          - {os: ubuntu-22.04, json-image: '{"image": "rockylinux:8"}'}
+          - {os: ubuntu-22.04, json-image: '{"image": "rockylinux:9"}'}
+          - {os: windows-2019, json-image: '{"image": null}'}
+          - {os: windows-2022, json-image: '{"image": null}'}
+          - {os: macos-11, json-image: '{"image": null}'}
+          - {os: macos-12, json-image: '{"image": null}'}
+          - {os: macos-13, json-image: '{"image": null}'}
+          - {os: macos-14, json-image: '{"image": null}'}
+    runs-on: ${{ matrix.env.os }}
+    container: ${{ fromJSON(matrix.env.json-image) }}
     steps:
-      - name: Checkout Khiops samples
-        uses: actions/checkout@v4
-        with:
-          repository: khiopsml/khiops-samples
-          ref: ${{ inputs.samples-revision || env.DEFAULT_SAMPLES_REVISION }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          path: ${{ env.KHIOPS_SAMPLES_DIR }}
       - name: Install Miniconda
         uses: conda-incubator/setup-miniconda@v3
         with:
@@ -99,7 +93,7 @@ jobs:
         run: |
           KHIOPS_CORE_VERSION="${{ inputs.khiops-core-version || env.DEFAULT_KHIOPS_CORE_VERSION }}"
           echo "KHIOPS_CORE_VERSION=$KHIOPS_CORE_VERSION" >> "$GITHUB_ENV"
-      - name: Install the Khiops Conda pagkage (Windows)
+      - name: Install the Khiops Conda package (Windows)
         if: runner.os == 'Windows'
         run: |
           conda install --channel khiops-dev khiops-core=$KHIOPS_CORE_VERSION
@@ -112,6 +106,10 @@ jobs:
           conda install --channel ./khiops-conda/ khiops
       - name: Test Khiops Installation Status
         run: kh-status
+      - name: Download Sample Datasets
+        run: |
+          kh-download-datasets \
+            --version ${{ inputs.khiops-core-version || env.DEFAULT_SAMPLES_VERSION }}
       - name: Test Conda Package Installation on Samples
         run: |
           kh-samples core -i train_predictor -e

From fe0ad717b590a56a0d4d627861098ad89b12ee8d Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Tue, 9 Apr 2024 10:45:54 +0200
Subject: [PATCH 04/37] Update pre-commit repos

---
 .pre-commit-config.yaml         | 10 ++++-----
 doc/conf.py                     |  1 +
 doc/convert_tutorials.py        |  1 +
 khiops/core/api.py              | 20 +++++++++--------
 khiops/core/internals/runner.py |  8 ++++---
 khiops/core/internals/task.py   |  8 +++----
 scripts/update_copyright.py     |  1 +
 tests/test_core.py              | 38 +++++++++++++++++++++------------
 8 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1c217d93..beb7663b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,28 +1,28 @@
 ---
 repos:
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 23.10.1
+    rev: 24.3.0
     hooks:
       - id: black
         language_version: python3
   - repo: https://github.com/pycqa/pylint
-    rev: v3.0.1
+    rev: v3.1.0
     hooks:
       - id: pylint
         language_version: python3
         exclude: doc/convert_samples.py|doc/conf.py|versioneer.py|khiops/_version.py|setup.py
   - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
         language_version: python3
   - repo: https://github.com/lyz-code/yamlfix/
-    rev: 1.15.0
+    rev: 1.16.0
     hooks:
       - id: yamlfix
         exclude: packaging/conda/meta.yaml
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.27.1
+    rev: 0.28.1
     hooks:
       - id: check-github-workflows
         args: [--verbose]
diff --git a/doc/conf.py b/doc/conf.py
index 51a0c670..ab39df32 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,4 +1,5 @@
 """Khiops Python Sphinx configuration file"""
+
 import os
 import sys
 from datetime import datetime
diff --git a/doc/convert_tutorials.py b/doc/convert_tutorials.py
index 349f1f93..452e45be 100644
--- a/doc/convert_tutorials.py
+++ b/doc/convert_tutorials.py
@@ -1,4 +1,5 @@
 """Converts the Jupyter notebooks of the Khiops Python tutorial to reST"""
+
 import argparse
 import glob
 import os
diff --git a/khiops/core/api.py b/khiops/core/api.py
index 5f304c95..ad99487f 100644
--- a/khiops/core/api.py
+++ b/khiops/core/api.py
@@ -128,15 +128,17 @@ def _run_task(task_name, task_args):
     # Create a command line options object
     command_line_options = CommandLineOptions(
         batch_mode=task_args["batch_mode"] if "batch_mode" in task_args else True,
-        log_file_path=task_args["log_file_path"]
-        if "log_file_path" in task_args
-        else "",
-        output_scenario_path=task_args["output_scenario_path"]
-        if "output_scenario_path" in task_args
-        else "",
-        task_file_path=task_args["task_file_path"]
-        if "task_file_path" in task_args
-        else "",
+        log_file_path=(
+            task_args["log_file_path"] if "log_file_path" in task_args else ""
+        ),
+        output_scenario_path=(
+            task_args["output_scenario_path"]
+            if "output_scenario_path" in task_args
+            else ""
+        ),
+        task_file_path=(
+            task_args["task_file_path"] if "task_file_path" in task_args else ""
+        ),
     )
 
     # Clean the task_args to leave only the task arguments
diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index 0ae11f01..83181012 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -833,9 +833,11 @@ def _write_task_scenario_file(
                 writer,
                 task,
                 task_args,
-                general_options
-                if general_options is not None
-                else self.general_options,
+                (
+                    general_options
+                    if general_options is not None
+                    else self.general_options
+                ),
             )
             fs.write(scenario_path, scenario_stream.getvalue())
 
diff --git a/khiops/core/internals/task.py b/khiops/core/internals/task.py
index b603d02c..80d33b81 100644
--- a/khiops/core/internals/task.py
+++ b/khiops/core/internals/task.py
@@ -304,10 +304,10 @@ def write_execution_scenario(self, writer, args):
             and args["output_additional_data_tables"] is not None
         ):
             for data_path in args["output_additional_data_tables"].keys():
-                args["output_additional_data_tables"][
-                    data_path
-                ] = create_unambiguous_khiops_path(
-                    args["output_additional_data_tables"][data_path]
+                args["output_additional_data_tables"][data_path] = (
+                    create_unambiguous_khiops_path(
+                        args["output_additional_data_tables"][data_path]
+                    )
                 )
 
         # Transform to string-like parameters
diff --git a/scripts/update_copyright.py b/scripts/update_copyright.py
index 72c9b1fe..f31d1028 100644
--- a/scripts/update_copyright.py
+++ b/scripts/update_copyright.py
@@ -1,4 +1,5 @@
 """Updates the copyright notice of the input files"""
+
 import argparse
 from datetime import datetime
 
diff --git a/tests/test_core.py b/tests/test_core.py
index 78926ce4..1bab6354 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -404,20 +404,30 @@ def test_api_scenario_generation(self):
                         bytes(f"{dataset}Deployed.csv", encoding="ascii"),
                     ],
                     "kwargs": {
-                        "additional_data_tables": {
-                            bytes(key, encoding="ascii"): bytes(value, encoding="ascii")
-                            for key, value in additional_data_tables[dataset].items()
-                        }
-                        if additional_data_tables[dataset] is not None
-                        else None,
-                        "output_additional_data_tables": {
-                            bytes(key, encoding="ascii"): bytes(value, encoding="ascii")
-                            for key, value in output_additional_data_tables[
-                                dataset
-                            ].items()
-                        }
-                        if output_additional_data_tables[dataset] is not None
-                        else None,
+                        "additional_data_tables": (
+                            {
+                                bytes(key, encoding="ascii"): bytes(
+                                    value, encoding="ascii"
+                                )
+                                for key, value in additional_data_tables[
+                                    dataset
+                                ].items()
+                            }
+                            if additional_data_tables[dataset] is not None
+                            else None
+                        ),
+                        "output_additional_data_tables": (
+                            {
+                                bytes(key, encoding="ascii"): bytes(
+                                    value, encoding="ascii"
+                                )
+                                for key, value in output_additional_data_tables[
+                                    dataset
+                                ].items()
+                            }
+                            if output_additional_data_tables[dataset] is not None
+                            else None
+                        ),
                     },
                 }
                 for dataset in datasets

From b1416e23280ea5ef8394f74c5ba3a2bcb124a0ab Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Tue, 2 Apr 2024 11:13:30 +0200
Subject: [PATCH 05/37] Rename PyKhiops* test class to Khiops*

---
 tests/test_sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 55b33d76..963abb54 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -2249,7 +2249,7 @@ def test_parameter_transfer_coclustering_predict_from_file_dataset(self):
         )
 
 
-class PyKhiopsSklearnEstimatorStandardTests(unittest.TestCase):
+class KhiopsSklearnEstimatorStandardTests(unittest.TestCase):
     """Tests to comply with `sklearn.util.estimator_checks.check_estimator`"""
 
     def test_sklearn_check_estimator(self):

From 9bfa2b690c5c88ec7b71eace560e2000692cfec2 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 3 Apr 2024 19:39:48 +0200
Subject: [PATCH 06/37] Give each particular *Table its own `__repr__` method

The `dtypes` attribute has been removed in changeset 1e1422dd
---
 khiops/sklearn/tables.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/khiops/sklearn/tables.py b/khiops/sklearn/tables.py
index 72287f6e..33dac075 100644
--- a/khiops/sklearn/tables.py
+++ b/khiops/sklearn/tables.py
@@ -962,13 +962,6 @@ def create_khiops_dictionary(self):
     def _get_all_column_ids(self):
         """Returns the column ids including the target"""
 
-    def __repr__(self):
-        dtypes_str = str(self.dtypes).replace("\n", ", ")[:-16].replace("    ", ":")
-        return (
-            f"<{self.__class__.__name__}; cols={list(self.column_ids)}; "
-            f"dtypes={dtypes_str}; target={self.target_column_id}>"
-        )
-
 
 class PandasTable(DatasetTable):
     """Table encapsulating (X,y) pair with types (pandas.DataFrame, pandas.Series)
@@ -1067,6 +1060,15 @@ def __init__(
         # Check key integrity
         self.check_key()
 
+    def __repr__(self):
+        dtypes_str = (
+            str(self.dataframe.dtypes).replace("\n", ", ")[:-16].replace("    ", ":")
+        )
+        return (
+            f"<{self.__class__.__name__}; cols={list(self.column_ids)}; "
+            f"dtypes={dtypes_str}; target={self.target_column_id}>"
+        )
+
     def _get_all_column_ids(self):
         if self.target_column is not None:
             all_column_ids = list(self.column_ids) + [self.target_column_id]
@@ -1197,6 +1199,13 @@ def __init__(
         }
         self.n_samples = len(self.array)
 
+    def __repr__(self):
+        dtype_str = str(self.array.dtype)
+        return (
+            f"<{self.__class__.__name__}; cols={list(self.column_ids)}; "
+            f"dtype={dtype_str}; target={self.target_column_id}>"
+        )
+
     def _get_all_column_ids(self):
         n_columns = len(self.column_ids)
         if self.target_column is not None:

From 62961d3c52de8625386ccc92fc1afd364dd7fd34 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 3 Apr 2024 19:41:46 +0200
Subject: [PATCH 07/37] Update PandasTable docstring to account for dataframe
 label support

---
 khiops/sklearn/tables.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/khiops/sklearn/tables.py b/khiops/sklearn/tables.py
index 33dac075..7a89de0c 100644
--- a/khiops/sklearn/tables.py
+++ b/khiops/sklearn/tables.py
@@ -964,7 +964,10 @@ def _get_all_column_ids(self):
 
 
 class PandasTable(DatasetTable):
-    """Table encapsulating (X,y) pair with types (pandas.DataFrame, pandas.Series)
+    """Table encapsulating the features dataframe X and the target labels y
+
+    X is of type pandas.DataFrame.
+    y is of type pandas.Series or pandas.DataFrame.
 
     Parameters
     ----------

From 1cdc65f5f615362adb1e2d79694c5c754067c0ac Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 3 Apr 2024 19:54:02 +0200
Subject: [PATCH 08/37] Add SciPy sparse matrix support

All types mandated by SciPy / Sklearn are supported

Empty rows in sparse matrices are supported as rows where all variables
have missing values.
This is needed so that Khiops yields predictions for such empty data as
well, in order to comply with Scikit Learn standard estimator tests.

related_to #42
---
 doc/conf.py                 |   1 +
 khiops/sklearn/tables.py    | 189 +++++++++++++++++++++++++++++++++++-
 tests/test_dataset_class.py |  73 +++++++++++++-
 tests/test_sklearn.py       |  14 ++-
 4 files changed, 265 insertions(+), 12 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index ab39df32..e4027f58 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -52,6 +52,7 @@
     "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None),
     "sklearn": ("https://scikit-learn.org/stable", None),
     "numpy": ("https://numpy.org/doc/stable", None),
+    "scipy": ("https://docs.scipy.org/doc/scipy", None),
 }
 
 ## Autosummary extension config
diff --git a/khiops/sklearn/tables.py b/khiops/sklearn/tables.py
index 7a89de0c..9bccad55 100644
--- a/khiops/sklearn/tables.py
+++ b/khiops/sklearn/tables.py
@@ -13,11 +13,13 @@
 
 import numpy as np
 import pandas as pd
+import scipy.sparse as sp
 from sklearn.utils import check_array
 from sklearn.utils.validation import column_or_1d
 
 import khiops.core as kh
 import khiops.core.internals.filesystems as fs
+from khiops.core.dictionary import VariableBlock
 from khiops.core.internals.common import (
     deprecation_message,
     is_dict_like,
@@ -164,6 +166,11 @@ def __init__(self, X, y=None, categorical_target=True, key=None):
                 y,
                 categorical_target=categorical_target,
             )
+        # A sparse matrix
+        elif isinstance(X, sp.spmatrix):
+            self._init_tables_from_sparse_matrix(
+                X, y, categorical_target=categorical_target
+            )
         # A tuple spec
         elif isinstance(X, tuple):
             warnings.warn(
@@ -226,6 +233,17 @@ def _init_tables_from_dataframe(self, X, y=None, categorical_target=True):
         )
         self.secondary_tables = []
 
+    def _init_tables_from_sparse_matrix(self, X, y=None, categorical_target=True):
+        """Initializes the dataset from a 'X' of type scipy.sparse.spmatrix"""
+        assert isinstance(X, sp.spmatrix), "'X' must be a scipy.sparse.spmatrix"
+        if y is not None and not hasattr(y, "__array__"):
+            raise TypeError(type_error_message("y", y, "array-like"))
+
+        self.main_table = SparseTable(
+            "main_table", X, target_column=y, categorical_target=categorical_target
+        )
+        self.secondary_tables = []
+
     def _init_tables_from_numpy_array(self, X, y=None, categorical_target=True):
         assert hasattr(
             X, "__array__"
@@ -710,14 +728,14 @@ def is_in_memory(self):
         """Tests whether the dataset is in memory
 
         A dataset is in memory if it is constituted either of only pandas.DataFrame
-        tables or numpy.ndarray tables.
+        tables, numpy.ndarray, or scipy.sparse.spmatrix tables.
 
         Returns
         -------
         bool
             `True` if the dataset is constituted of pandas.DataFrame tables.
         """
-        return isinstance(self.main_table, (PandasTable, NumpyTable))
+        return isinstance(self.main_table, (PandasTable, NumpyTable, SparseTable))
 
     def is_multitable(self):
         """Tests whether the dataset is a multi-table one
@@ -1261,6 +1279,173 @@ def create_table_file_for_khiops(self, output_dir, sort=True):
         return output_table_path
 
 
+class SparseTable(DatasetTable):
+    """Table encapsulating feature matrix X and target array y
+
+    X is of type scipy.sparse.spmatrix.
+    y is array-like.
+
+    Parameters
+    ----------
+    name : str
+        Name for the table.
+    matrix : `scipy.sparse.spmatrix`
+        The sparse matrix to be encapsulated.
+    key : list-like of str, optional
+        The names of the columns composing the key
+    target_column : :external:term:`array-like`, optional
+        The array containing the target column.
+    categorical_target : bool, default ``True``.
+        ``True`` if the target column is categorical.
+    """
+
+    def __init__(
+        self, name, matrix, key=None, target_column=None, categorical_target=True
+    ):
+        assert key is None, "'key' must be unset for sparse matrix tables"
+        # Call the parent method
+        super().__init__(name, key=key, categorical_target=categorical_target)
+
+        # Check the sparse matrix types
+        if not isinstance(matrix, sp.spmatrix):
+            raise TypeError(
+                type_error_message("matrix", matrix, "scipy.sparse.spmatrix")
+            )
+        if not np.issubdtype(matrix.dtype, np.number):
+            raise TypeError(
+                type_error_message("'matrix' dtype", matrix.dtype, "numeric")
+            )
+
+        # Check the target's types
+        if target_column is not None and not hasattr(target_column, "__array__"):
+            raise TypeError(
+                type_error_message("target_column", target_column, "array-like")
+            )
+
+        # Initialize the members
+        self.matrix = matrix
+        self.column_ids = list(range(self.matrix.shape[1]))
+        self.target_column_id = self.matrix.shape[1]
+        self.target_column = target_column
+        self.categorical_target = categorical_target
+        self.khiops_types = {
+            column_id: get_khiops_type(self.matrix.dtype)
+            for column_id in self.column_ids
+        }
+        self.n_samples = self.matrix.shape[0]
+
+    def __repr__(self):
+        dtype_str = str(self.matrix.dtype)
+        return (
+            f"<{self.__class__.__name__}; cols={list(self.column_ids)}; "
+            f"dtype={dtype_str}; target={self.target_column_id}>"
+        )
+
+    def create_khiops_dictionary(self):
+        """Creates a Khiops dictionary representing this sparse table
+
+        Adds metadata to each sparse variable
+
+        Returns
+        -------
+        `.Dictionary`:
+            The Khiops Dictionary object describing this table's schema
+
+        """
+
+        # create dictionary as usual
+        dictionary = super().create_khiops_dictionary()
+
+        # create variable block for containing the sparse variables
+        variable_block = VariableBlock()
+        variable_block.name = "SparseVariables"
+
+        # For each variable, add metadata, named `VarKey`
+        variable_names = [variable.name for variable in dictionary.variables]
+        target_column_variable_name = self.get_khiops_variable_name(
+            self.target_column_id
+        )
+        for i, variable_name in enumerate(variable_names, 1):
+            if variable_name != target_column_variable_name:
+                variable = dictionary.remove_variable(variable_name)
+                variable.meta_data.add_value("VarKey", i)
+                variable_block.add_variable(variable)
+        dictionary.add_variable_block(variable_block)
+
+        return dictionary
+
+    def _get_all_column_ids(self):
+        n_columns = len(self.column_ids)
+        if self.target_column is not None:
+            n_columns += 1
+        return list(range(n_columns))
+
+    def get_khiops_variable_name(self, column_id):
+        """Return the khiops variable name associated to a column id"""
+        assert column_id == self.target_column_id or column_id in self.column_ids
+        if isinstance(column_id, str):
+            variable_name = column_id
+        else:
+            assert isinstance(column_id, (np.int64, int))
+            variable_name = f"Var{column_id}"
+        return variable_name
+
+    def _write_sparse_block(self, row_index, stream, target=None):
+        assert row_index in range(
+            self.matrix.shape[0]
+        ), "'row_index' must be coherent with the shape of the sparse matrix"
+        if target is not None:
+            assert target in self.target_column, "'target' must be in the target column"
+            stream.write(f"{target}\t")
+        row = self.matrix.getrow(row_index)
+        # Empty row in the sparse matrix: use the first variable as missing data
+        # TODO: remove this part once Khiops bug
+        # https://github.com/KhiopsML/khiops/issues/235 is solved
+        if row.data.size == 0:
+            for variable_index in self.column_ids:
+                stream.write(f"{variable_index + 1}: ")
+                break
+        # Non-empty row in the sparse matrix: get non-missing data
+        else:
+            # Variable indices are not always sorted in `row.indices`
+            # Khiops needs variable indices to be sorted
+            sorted_indices = np.sort(row.indices, axis=-1, kind="mergesort")
+            sorted_data = row.data[sorted_indices.argsort()]
+            for variable_index, variable_value in zip(sorted_indices, sorted_data):
+                stream.write(f"{variable_index + 1}:{variable_value} ")
+        stream.write("\n")
+
+    def create_table_file_for_khiops(self, output_dir, sort=True):
+        # Create the output table resource object
+        output_table_path = fs.get_child_path(output_dir, f"{self.name}.txt")
+
+        # Write the sparse matrix to an internal table file
+        with io.StringIO() as output_sparse_matrix_stream:
+            if self.target_column is not None:
+                target_column_name = self.get_khiops_variable_name(
+                    self.target_column_id
+                )
+                output_sparse_matrix_stream.write(
+                    f"{target_column_name}\tSparseVariables\n"
+                )
+                for target, row_index in zip(
+                    self.target_column, range(self.matrix.shape[0])
+                ):
+                    self._write_sparse_block(
+                        row_index, output_sparse_matrix_stream, target=target
+                    )
+            else:
+                output_sparse_matrix_stream.write("SparseVariables\n")
+                for row_index in range(self.matrix.shape[0]):
+                    self._write_sparse_block(row_index, output_sparse_matrix_stream)
+            fs.write(
+                output_table_path,
+                output_sparse_matrix_stream.getvalue().encode("utf-8"),
+            )
+
+        return output_table_path
+
+
 class FileTable(DatasetTable):
     """A table representing a delimited text file
 
diff --git a/tests/test_dataset_class.py b/tests/test_dataset_class.py
index 0fdeeeba..8d071998 100644
--- a/tests/test_dataset_class.py
+++ b/tests/test_dataset_class.py
@@ -11,6 +11,7 @@
 
 import numpy as np
 import pandas as pd
+import scipy.sparse as sp
 from numpy.testing import assert_equal
 from pandas.testing import assert_frame_equal
 from sklearn import datasets
@@ -521,8 +522,8 @@ def test_created_file_from_dataframe_monotable(self):
         )
 
     def test_created_file_from_numpy_array_monotable(self):
-        """Test consistency of the created data file with the input dataframe"""
-        # Create a monotable dahaset from a numpy array
+        """Test consistency of the created data file with the input numpy array"""
+        # Create a monotable dataset from a numpy array
         iris = datasets.load_iris()
         spec = {"tables": {"iris": (iris.data, None)}}
         dataset = Dataset(spec, y=iris.target, categorical_target=True)
@@ -541,6 +542,74 @@ def test_created_file_from_numpy_array_monotable(self):
             ),
         )
 
+    def _create_test_sparse_matrix_with_target(self):
+        # Create sparse array that also contains missing data-only rows
+        sparse_array = np.eye(N=100, k=2) + np.eye(N=100, k=5)
+
+        # Create scipy sparse (CSR) matrix from the sparse array
+        sparse_matrix = sp.csr_matrix(sparse_array)
+
+        # Create targets: -1 for left-sided values; +1 for right-sided values,
+        # 0 for missing-data-only rows
+        target_array = np.array(50 * [-1] + 45 * [1] + 5 * [0])
+        return sparse_matrix, target_array
+
+    def _load_khiops_sparse_file(self, stream):
+        # skip header
+        next(stream)
+        target_vector = []
+        feature_matrix = []
+        for line in stream:
+            target, features = line.split(b"\t")
+            feature_row = np.zeros(100)
+            for feature in features.strip().split(b" "):
+                feature_index, feature_value = feature.split(b":")
+                try:
+                    feature_value = float(feature_value)
+                # missing value, whence empty string
+                except ValueError:
+                    feature_value = 0.0
+                feature_row[int(feature_index) - 1] = feature_value
+            feature_matrix.append(feature_row)
+            target_vector.append(float(target))
+        target_array = np.array(target_vector)
+        sparse_matrix = sp.csr_matrix(feature_matrix)
+        return sparse_matrix, target_array
+
+    def test_created_file_from_sparse_matrix_monotable(self):
+        """Test consistency of the created data file with the input sparse matrix"""
+
+        # Load input sparse matrix and target array
+        (
+            input_sparse_matrix,
+            input_target,
+        ) = self._create_test_sparse_matrix_with_target()
+
+        # Create monotable dataset from the sparse matrix
+        dataset = Dataset(
+            X=input_sparse_matrix, y=input_target, categorical_target=True
+        )
+        # Create and load the intermediary Khiops file
+        created_table_path, _ = dataset.create_table_files_for_khiops(self.output_dir)
+        with open(created_table_path, "rb") as created_table_stream:
+            sparse_matrix, target_array = self._load_khiops_sparse_file(
+                created_table_stream
+            )
+
+        # Check that the arrays are equal
+        assert_equal(
+            np.concatenate(
+                (
+                    sparse_matrix.toarray(),
+                    target_array.reshape(-1, 1),
+                ),
+                axis=1,
+            ),
+            np.concatenate(
+                (input_sparse_matrix.toarray(), input_target.reshape(-1, 1)), axis=1
+            ),
+        )
+
     def test_created_file_from_data_file_monotable(self):
         """Test consistency of the created data file with the input data file
 
diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py
index 963abb54..0024f1b3 100644
--- a/tests/test_sklearn.py
+++ b/tests/test_sklearn.py
@@ -2271,15 +2271,13 @@ def test_sklearn_check_estimator(self):
             for estimator, check in check_estimator(
                 khiops_estimator, generate_only=True
             ):
-                # Skip:
-                # - sparse data tests (not yet supported)
-                # - some checks for KhiopsEncoder as they yield "empty" deployed tables
-                #   - To be implemented manually
+                # Skip some checks for KhiopsEncoder as they yield "empty"
+                # deployed tables; they need to be implemented manually
                 check_name = check.func.__name__
-                if check_name == "check_estimator_sparse_data" or (
-                    check_name in ["check_fit_score_takes_y", "check_fit_idempotent"]
-                    and isinstance(estimator, KhiopsEncoder)
-                ):
+                if check_name in [
+                    "check_fit_score_takes_y",
+                    "check_fit_idempotent",
+                ] and isinstance(estimator, KhiopsEncoder):
                     continue
                 with self.subTest(
                     sklearn_check_name=check_name, sklearn_check_kwargs=check.keywords

From a11fe422f0010d81446c38c7061da049d32bf2d6 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 10 Apr 2024 18:13:23 +0200
Subject: [PATCH 09/37] Add sparse sklearn KhiopsClassifier sample

related_to #42
---
 doc/convert_samples.py               |  2 +
 doc/samples/samples_sklearn.rst      | 52 ++++++++++++++++++++++++
 khiops/samples/samples_sklearn.ipynb | 60 ++++++++++++++++++++++++++++
 khiops/samples/samples_sklearn.py    | 53 ++++++++++++++++++++++++
 4 files changed, 167 insertions(+)

diff --git a/doc/convert_samples.py b/doc/convert_samples.py
index d4e472aa..ae204a8a 100644
--- a/doc/convert_samples.py
+++ b/doc/convert_samples.py
@@ -30,6 +30,8 @@ def create_boilerplate_code(script_name):
             "from sklearn.compose import ColumnTransformer\n",
             "from sklearn.experimental import enable_hist_gradient_boosting\n",
             "from sklearn.ensemble import HistGradientBoostingClassifier\n",
+            "from sklearn.datasets import fetch_20newsgroups\n",
+            "from sklearn.feature_extraction.text import HashingVectorizer\n",
             "from sklearn.model_selection import train_test_split\n",
             "from sklearn.pipeline import Pipeline\n",
             "from sklearn.preprocessing import OneHotEncoder\n",
diff --git a/doc/samples/samples_sklearn.rst b/doc/samples/samples_sklearn.rst
index 68d96603..2afed2c9 100644
--- a/doc/samples/samples_sklearn.rst
+++ b/doc/samples/samples_sklearn.rst
@@ -44,6 +44,8 @@ preamble:
     from sklearn.compose import ColumnTransformer
     from sklearn.experimental import enable_hist_gradient_boosting
     from sklearn.ensemble import HistGradientBoostingClassifier
+    from sklearn.datasets import fetch_20newsgroups
+    from sklearn.feature_extraction.text import HashingVectorizer
     from sklearn.model_selection import train_test_split
     from sklearn.pipeline import Pipeline
     from sklearn.preprocessing import OneHotEncoder
@@ -105,6 +107,56 @@ Samples
         print(f"Test accuracy = {test_accuracy}")
         print(f"Test auc      = {test_auc}")
 
+.. autofunction:: khiops_classifier_sparse
+.. code-block:: python
+
+    def khiops_classifier_sparse():
+
+        # Load 3 classes of the 20newsgroups dataset
+        categories = ["comp.graphics", "sci.space", "misc.forsale"]
+        data_train, y_train = fetch_20newsgroups(
+            subset="train",
+            categories=categories,
+            return_X_y=True,
+        )
+        data_test, y_test = fetch_20newsgroups(
+            subset="test",
+            categories=categories,
+            return_X_y=True,
+        )
+
+        # Extract features from the training data using a sparse vectorizer
+        vectorizer = HashingVectorizer(n_features=2**10, stop_words="english")
+        X_train = vectorizer.fit_transform(data_train)
+
+        # Extract features from the test data using the same vectorizer
+        X_test = vectorizer.transform(data_test)
+
+        # Create the classifier object
+        khc = KhiopsClassifier()
+
+        # Train the classifier
+        khc.fit(X_train, y_train)
+
+        # Predict the classes on the test dataset
+        y_test_pred = khc.predict(X_test)
+        print("Predicted classes (first 10):")
+        print(y_test_pred[0:10])
+        print("---")
+
+        # Predict the class probabilities on the test dataset
+        y_test_probas = khc.predict_proba(X_test)
+        print(f"Class order: {khc.classes_}")
+        print("Predicted class probabilities (first 10):")
+        print(y_test_probas[0:10])
+        print("---")
+
+        # Evaluate accuracy and auc metrics on the test dataset
+        test_accuracy = metrics.accuracy_score(y_test, y_test_pred)
+        test_auc = metrics.roc_auc_score(y_test, y_test_probas, multi_class="ovr")
+        print(f"Test accuracy = {test_accuracy}")
+        print(f"Test auc      = {test_auc}")
+
 .. autofunction:: khiops_classifier_multiclass
 .. code-block:: python
 
diff --git a/khiops/samples/samples_sklearn.ipynb b/khiops/samples/samples_sklearn.ipynb
index 3d5e58a0..dd541edb 100644
--- a/khiops/samples/samples_sklearn.ipynb
+++ b/khiops/samples/samples_sklearn.ipynb
@@ -26,6 +26,8 @@
     "from sklearn.compose import ColumnTransformer\n",
     "from sklearn.experimental import enable_hist_gradient_boosting\n",
     "from sklearn.ensemble import HistGradientBoostingClassifier\n",
+    "from sklearn.datasets import fetch_20newsgroups\n",
+    "from sklearn.feature_extraction.text import HashingVectorizer\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.preprocessing import OneHotEncoder\n",
@@ -93,6 +95,64 @@
     "khiops_classifier()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def khiops_classifier_sparse():\n",
+    "    \"\"\"Trains a `.KhiopsClassifier` on a monotable sparse matrix\"\"\"\n",
+    "\n",
+    "    # Load 3 classes of the 20newsgroups dataset\n",
+    "    categories = [\"comp.graphics\", \"sci.space\", \"misc.forsale\"]\n",
+    "    data_train, y_train = fetch_20newsgroups(\n",
+    "        subset=\"train\",\n",
+    "        categories=categories,\n",
+    "        return_X_y=True,\n",
+    "    )\n",
+    "    data_test, y_test = fetch_20newsgroups(\n",
+    "        subset=\"test\",\n",
+    "        categories=categories,\n",
+    "        return_X_y=True,\n",
+    "    )\n",
+    "\n",
+    "    # Extract features from the training data using a sparse vectorizer\n",
+    "    vectorizer = HashingVectorizer(n_features=2**10, stop_words=\"english\")\n",
+    "    X_train = vectorizer.fit_transform(data_train)\n",
+    "\n",
+    "    # Extract features from the test data using the same vectorizer\n",
+    "    X_test = vectorizer.transform(data_test)\n",
+    "\n",
+    "    # Create the classifier object\n",
+    "    khc = KhiopsClassifier()\n",
+    "\n",
+    "    # Train the classifier\n",
+    "    khc.fit(X_train, y_train)\n",
+    "\n",
+    "    # Predict the classes on the test dataset\n",
+    "    y_test_pred = khc.predict(X_test)\n",
+    "    print(\"Predicted classes (first 10):\")\n",
+    "    print(y_test_pred[0:10])\n",
+    "    print(\"---\")\n",
+    "\n",
+    "    # Predict the class probabilities on the test dataset\n",
+    "    y_test_probas = khc.predict_proba(X_test)\n",
+    "    print(f\"Class order: {khc.classes_}\")\n",
+    "    print(\"Predicted class probabilities (first 10):\")\n",
+    "    print(y_test_probas[0:10])\n",
+    "    print(\"---\")\n",
+    "\n",
+    "    # Evaluate accuracy and auc metrics on the test dataset\n",
+    "    test_accuracy = metrics.accuracy_score(y_test, y_test_pred)\n",
+    "    test_auc = metrics.roc_auc_score(y_test, y_test_probas, multi_class=\"ovr\")\n",
+    "    print(f\"Test accuracy = {test_accuracy}\")\n",
+    "    print(f\"Test auc      = {test_auc}\")\n",
+    "\n",
+    "#Run sample\n",
+    "khiops_classifier_sparse()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/khiops/samples/samples_sklearn.py b/khiops/samples/samples_sklearn.py
index 189d8554..112bd95f 100644
--- a/khiops/samples/samples_sklearn.py
+++ b/khiops/samples/samples_sklearn.py
@@ -23,6 +23,9 @@
 from sklearn.ensemble import HistGradientBoostingClassifier
 
 # isort: on
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import HashingVectorizer
+
 # pylint: enable=unused-import
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
@@ -88,6 +91,55 @@ def khiops_classifier():
     print(f"Test auc      = {test_auc}")
 
 
+def khiops_classifier_sparse():
+    """Trains a `.KhiopsClassifier` on a monotable sparse matrix"""
+
+    # Load 3 classes of the 20newsgroups dataset
+    categories = ["comp.graphics", "sci.space", "misc.forsale"]
+    data_train, y_train = fetch_20newsgroups(
+        subset="train",
+        categories=categories,
+        return_X_y=True,
+    )
+    data_test, y_test = fetch_20newsgroups(
+        subset="test",
+        categories=categories,
+        return_X_y=True,
+    )
+
+    # Extract features from the training data using a sparse vectorizer
+    vectorizer = HashingVectorizer(n_features=2**10, stop_words="english")
+    X_train = vectorizer.fit_transform(data_train)
+
+    # Extract features from the test data using the same vectorizer
+    X_test = vectorizer.transform(data_test)
+
+    # Create the classifier object
+    khc = KhiopsClassifier()
+
+    # Train the classifier
+    khc.fit(X_train, y_train)
+
+    # Predict the classes on the test dataset
+    y_test_pred = khc.predict(X_test)
+    print("Predicted classes (first 10):")
+    print(y_test_pred[0:10])
+    print("---")
+
+    # Predict the class probabilities on the test dataset
+    y_test_probas = khc.predict_proba(X_test)
+    print(f"Class order: {khc.classes_}")
+    print("Predicted class probabilities (first 10):")
+    print(y_test_probas[0:10])
+    print("---")
+
+    # Evaluate accuracy and auc metrics on the test dataset
+    test_accuracy = metrics.accuracy_score(y_test, y_test_pred)
+    test_auc = metrics.roc_auc_score(y_test, y_test_probas, multi_class="ovr")
+    print(f"Test accuracy = {test_accuracy}")
+    print(f"Test auc      = {test_auc}")
+
+
 def khiops_classifier_multiclass():
     """Trains a multiclass `.KhiopsClassifier` on a monotable dataframe"""
     # Load the dataset into a pandas dataframe
@@ -761,6 +813,7 @@ def khiops_classifier_multitable_star_file():
 
 exported_samples = [
     khiops_classifier,
+    khiops_classifier_sparse,
     khiops_classifier_multiclass,
     khiops_classifier_multitable_star,
     khiops_classifier_multitable_snowflake,

From e6ba8222ffa1bdbeaeb6be45c7f9e9b10c909aef Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Fri, 5 Apr 2024 18:03:25 +0200
Subject: [PATCH 10/37] Support mapping-based dataset input with sparse
 matrices

closes #42
---
 khiops/sklearn/tables.py     | 27 +++++++++++++++++++++++----
 tests/test_dataset_class.py  | 34 ++++++++++++++++++++++++++++++++++
 tests/test_dataset_errors.py |  4 ++--
 3 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/khiops/sklearn/tables.py b/khiops/sklearn/tables.py
index 9bccad55..a20631fe 100644
--- a/khiops/sklearn/tables.py
+++ b/khiops/sklearn/tables.py
@@ -437,6 +437,16 @@ def _init_tables_from_mapping(self, X, y=None, categorical_target=True):
                     self.secondary_tables.append(
                         PandasTable(table_name, table_source, key=table_key)
                     )
+        # Case of sparse matrices
+        elif isinstance(main_table_source, sp.spmatrix):
+            self.main_table = SparseTable(
+                main_table_name,
+                main_table_source,
+                key=main_table_key,
+                target_column=y,
+                categorical_target=categorical_target,
+            )
+            self.secondary_tables = []
         # Case of numpyarray
         else:
             self.main_table = NumpyTable(
@@ -596,14 +606,14 @@ def _check_input_mapping(self, X, y=None):
                     f"must have size 2 not {len(table_input)}"
                 )
             table_source, table_key = table_input
-            if not isinstance(table_source, (pd.DataFrame, str)) and not hasattr(
-                table_source, "__array__"
-            ):
+            if not isinstance(
+                table_source, (pd.DataFrame, sp.spmatrix, str)
+            ) and not hasattr(table_source, "__array__"):
                 raise TypeError(
                     type_error_message(
                         f"Table source at X['tables']['{table_name}']",
                         table_source,
-                        "array-like",
+                        "array-like or scipy.sparse.spmatrix",
                         str,
                     )
                 )
@@ -718,6 +728,15 @@ def _check_input_mapping(self, X, y=None):
                     type_error_message("y", y, pd.Series, pd.DataFrame)
                     + " (X's tables are of type pandas.DataFrame)"
                 )
+            if (
+                isinstance(main_table_source, sp.spmatrix)
+                or hasattr(main_table_source, "__array__")
+            ) and not hasattr(y, "__array__"):
+                raise TypeError(
+                    type_error_message("y", y, "array-like")
+                    + " (X's tables are of type numpy.ndarray"
+                    + " or scipy.sparse.spmatrix)"
+                )
             if isinstance(main_table_source, str) and not isinstance(y, str):
                 raise TypeError(
                     type_error_message("y", y, str)
diff --git a/tests/test_dataset_class.py b/tests/test_dataset_class.py
index 8d071998..ae095c86 100644
--- a/tests/test_dataset_class.py
+++ b/tests/test_dataset_class.py
@@ -610,6 +610,40 @@ def test_created_file_from_sparse_matrix_monotable(self):
             ),
         )
 
+    def test_created_file_from_sparse_matrix_monotable_specification(self):
+        """Test consistency of the created data file with the input sparse matrix"""
+
+        # Load input sparse matrix and target array
+        (
+            input_sparse_matrix,
+            input_target,
+        ) = self._create_test_sparse_matrix_with_target()
+
+        # Create monotable dataset from input mapping with the sparse matrix
+        spec = {"tables": {"example_sparse_matrix": (input_sparse_matrix, None)}}
+        dataset = Dataset(spec, y=input_target, categorical_target=True)
+
+        # Create and load the intermediary Khiops file
+        created_table_path, _ = dataset.create_table_files_for_khiops(self.output_dir)
+        with open(created_table_path, "rb") as created_table_stream:
+            sparse_matrix, target_array = self._load_khiops_sparse_file(
+                created_table_stream
+            )
+
+        # Check that the arrays are equal
+        assert_equal(
+            np.concatenate(
+                (
+                    sparse_matrix.toarray(),
+                    target_array.reshape(-1, 1),
+                ),
+                axis=1,
+            ),
+            np.concatenate(
+                (input_sparse_matrix.toarray(), input_target.reshape(-1, 1)), axis=1
+            ),
+        )
+
     def test_created_file_from_data_file_monotable(self):
         """Test consistency of the created data file with the input data file
 
diff --git a/tests/test_dataset_errors.py b/tests/test_dataset_errors.py
index 473f8aa6..3cd09f40 100644
--- a/tests/test_dataset_errors.py
+++ b/tests/test_dataset_errors.py
@@ -579,14 +579,14 @@ def test_dict_spec_table_input_tuple_must_have_size_2(self):
         expected_msg = "Table input tuple at X['tables']['D'] must have size 2 not 4"
         self.assert_dataset_fails(bad_spec, y, ValueError, expected_msg)
 
-    def test_dict_spec_source_table_type_must_be_array_like_or_str(self):
+    def test_dict_spec_source_table_type_must_be_adequate(self):
         """Test Dataset raising TypeError when a table entry is not str nor DataFrame"""
         bad_spec, y = self.create_fixture_dataset_spec()
         bad_spec["tables"]["D"] = (AnotherType(), bad_spec["tables"]["D"][-1])
         expected_msg = type_error_message(
             "Table source at X['tables']['D']",
             bad_spec["tables"]["D"][0],
-            "array-like",
+            "array-like or scipy.sparse.spmatrix",
             str,
         )
         self.assert_dataset_fails(bad_spec, y, TypeError, expected_msg)

From 99d00197d3b2ca5307f9c7ee89e7c8f1c6f815f2 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Thu, 11 Apr 2024 19:56:51 +0200
Subject: [PATCH 11/37] Fix metric name search in performance report

---
 khiops/core/analysis_results.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/khiops/core/analysis_results.py b/khiops/core/analysis_results.py
index ea018a27..8cbb4cf4 100644
--- a/khiops/core/analysis_results.py
+++ b/khiops/core/analysis_results.py
@@ -2841,14 +2841,13 @@ def get_metric(self, metric_name):
         """
         # Search the lower cased metric name in the list, report error if not found
         lowercase_metric_name = metric_name.lower()
-        metric = None
-        for name in self.get_metric_names():
-            if lowercase_metric_name == name:
-                metric = getattr(self, lowercase_metric_name)
-        if metric is None:
+        metric_found = lowercase_metric_name in self.get_metric_names()
+        if metric_found:
+            metric = getattr(self, lowercase_metric_name)
+        else:
             metric_list_msg = ",".join(self.get_metric_names())
             raise ValueError(
-                f"Invalid metric: '{metric_name}'. Choose among {metric_list_msg}"
+                f"Invalid metric: '{metric_name}'. Choose among {metric_list_msg}."
             )
         return metric
 

From 5cc1d79b054c9c916df005f363ddcee6482b2b8f Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Fri, 12 Apr 2024 09:47:20 +0200
Subject: [PATCH 12/37] Fix interval number in check DataGridDimension

Change the minimal size of an interval partition from 2 to 1.
---
 khiops/core/analysis_results.py               |    4 +-
 tests/resources/README.md                     |    4 +-
 .../ref_json_reports/IrisU2D.khj              | 2714 +++++++++++++++++
 .../analysis_results/ref_reports/IrisU2D.txt  | 1159 +++++++
 tests/test_core.py                            |    1 +
 5 files changed, 3878 insertions(+), 4 deletions(-)
 create mode 100644 tests/resources/analysis_results/ref_json_reports/IrisU2D.khj
 create mode 100644 tests/resources/analysis_results/ref_reports/IrisU2D.txt

diff --git a/khiops/core/analysis_results.py b/khiops/core/analysis_results.py
index 8cbb4cf4..bf980730 100644
--- a/khiops/core/analysis_results.py
+++ b/khiops/core/analysis_results.py
@@ -2178,9 +2178,9 @@ def __init__(self, json_data=None):
         # Numerical partition
         if self.partition_type == "Intervals":
             # Check the length of the partition
-            if len(json_partition) < 2:
+            if len(json_partition) < 1:
                 raise KhiopsJSONError(
-                    "'partition' for interval must have length at least 2"
+                    "'partition' for interval must have length at least 1"
                 )
 
             # Initialize intervals
diff --git a/tests/resources/README.md b/tests/resources/README.md
index dba20a96..0c7ea1fd 100644
--- a/tests/resources/README.md
+++ b/tests/resources/README.md
@@ -1,6 +1,6 @@
 Test Resources
 --------------
 
-The file `reference_paths.txt` contain the origin and destination of reports and dictionary files. The
-origin is a path relative to the `LearningTest` directory that contain the Khiops tests.  The
+The file `reference_paths.txt` contain the origin and destination of reports and dictionary files.
+The origin is a path relative to the `LearningTest` directory that contain the Khiops tests. The
 destination paths are relative to this directory.
diff --git a/tests/resources/analysis_results/ref_json_reports/IrisU2D.khj b/tests/resources/analysis_results/ref_json_reports/IrisU2D.khj
new file mode 100644
index 00000000..73582195
--- /dev/null
+++ b/tests/resources/analysis_results/ref_json_reports/IrisU2D.khj
@@ -0,0 +1,2714 @@
+{
+	"tool": "Khiops",
+	"version": "10.5.0-a1",
+	"shortDescription": "",
+	"preparationReport": {
+		"reportType": "Preparation",
+		"summary": {
+			"dictionary": "Iris",
+			"variables": {
+				"types": [
+					"Categorical",
+					"Numerical"
+				],
+				"numbers": [
+					4,
+					8
+				]
+			},
+			"database": "..\/..\/..\/datasets\/Iris\/Iris.txt",
+			"samplePercentage": 70,
+			"samplingMode": "Include sample",
+			"selectionVariable": "",
+			"selectionValue": "",
+			"instances": 105,
+			"learningTask": "Unsupervised analysis",
+			"evaluatedVariables": 12,
+			"nativeVariables": 5,
+			"constructedVariables": 7,
+			"featureEngineering": {
+				"maxNumberOfConstructedVariables": 0,
+				"maxNumberOfTextFeatures": 0,
+				"maxNumberOfTrees": 0,
+				"maxNumberOfVariablePairs": 100
+			},
+			"discretization": "MODL",
+			"valueGrouping": "MODL"
+		},
+		"variablesStatistics": [
+			{
+				"rank": "R01",
+				"name": "Class",
+				"type": "Categorical",
+				"parts": 3,
+				"values": 3,
+				"mode": "Iris-setosa",
+				"modeFrequency": 38,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805
+			},
+			{
+				"rank": "R02",
+				"name": "Class1",
+				"type": "Categorical",
+				"parts": 2,
+				"values": 2,
+				"mode": "",
+				"modeFrequency": 67,
+				"missingNumber": 67,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "IfC(EQc(Class, \"Iris-setosa\"), \"setosa\", \"\")"
+			},
+			{
+				"rank": "R03",
+				"name": "Class2",
+				"type": "Categorical",
+				"parts": 2,
+				"values": 2,
+				"mode": "",
+				"modeFrequency": 73,
+				"missingNumber": 73,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "IfC(EQc(Class, \"Iris-versicolor\"), \"versicolor\", \"\")"
+			},
+			{
+				"rank": "R04",
+				"name": "Dummy1",
+				"type": "Numerical",
+				"parts": 1,
+				"values": 1,
+				"min": 0,
+				"max": 0,
+				"mean": 0,
+				"stdDev": 0,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "Copy(0)"
+			},
+			{
+				"rank": "R05",
+				"name": "Dummy2",
+				"type": "Numerical",
+				"parts": 1,
+				"values": 105,
+				"min": 0.005121241265,
+				"max": 0.9859650261,
+				"mean": 0.5173966838,
+				"stdDev": 0.2650019122,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "Random()"
+			},
+			{
+				"rank": "R06",
+				"name": "LowerPetalLength",
+				"type": "Numerical",
+				"parts": 4,
+				"values": 10,
+				"min": 1,
+				"max": 3,
+				"mean": 2.446666667,
+				"stdDev": 0.7433600251,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "If(LE(PetalLength, 3), PetalLength, 3)"
+			},
+			{
+				"rank": "R07",
+				"name": "PetalLength",
+				"type": "Numerical",
+				"parts": 5,
+				"values": 36,
+				"min": 1,
+				"max": 6.9,
+				"mean": 3.686666667,
+				"stdDev": 1.80132579,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805
+			},
+			{
+				"rank": "R08",
+				"name": "PetalWidth",
+				"type": "Numerical",
+				"parts": 5,
+				"values": 21,
+				"min": 0.1,
+				"max": 2.5,
+				"mean": 1.175238095,
+				"stdDev": 0.7880996979,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805
+			},
+			{
+				"rank": "R09",
+				"name": "SPetalLength",
+				"type": "Categorical",
+				"parts": 5,
+				"values": 5,
+				"mode": "1",
+				"modeFrequency": 38,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "AsCategorical(Floor(PetalLength))"
+			},
+			{
+				"rank": "R10",
+				"name": "SepalLength",
+				"type": "Numerical",
+				"parts": 2,
+				"values": 31,
+				"min": 4.3,
+				"max": 7.7,
+				"mean": 5.827619048,
+				"stdDev": 0.8375127846,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805
+			},
+			{
+				"rank": "R11",
+				"name": "SepalWidth",
+				"type": "Numerical",
+				"parts": 3,
+				"values": 23,
+				"min": 2,
+				"max": 4.4,
+				"mean": 3.081904762,
+				"stdDev": 0.4284592446,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805
+			},
+			{
+				"rank": "R12",
+				"name": "UpperPetalWidth",
+				"type": "Numerical",
+				"parts": 2,
+				"values": 11,
+				"min": 1.5,
+				"max": 2.5,
+				"mean": 1.692380952,
+				"stdDev": 0.2962287527,
+				"missingNumber": 0,
+				"sparseMissingNumber": 0,
+				"constructionCost": 3.17805,
+				"derivationRule": "If(GE(PetalWidth, 1.5), PetalWidth, 1.5)"
+			}
+		],
+		"variablesDetailedStatistics": {
+			"R01": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa"],
+								["Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 2
+						}
+					],
+					"frequencies": [38,35,32]
+				},
+				"inputValues": {
+					"values": ["Iris-setosa","Iris-virginica","Iris-versicolor"],
+					"frequencies": [38,35,32]
+				}
+			},
+			"R02": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"frequencies": [67,38]
+				},
+				"inputValues": {
+					"values": ["","setosa"],
+					"frequencies": [67,38]
+				}
+			},
+			"R03": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"frequencies": [73,32]
+				},
+				"inputValues": {
+					"values": ["","versicolor"],
+					"frequencies": [73,32]
+				}
+			},
+			"R05": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Dummy2",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.00390625,1]
+							]
+						}
+					],
+					"frequencies": [105]
+				},
+				"modlHistograms": {
+					"histogramNumber": 1,
+					"interpretableHistogramNumber": 1,
+					"truncationEpsilon": 0,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0],
+					"intervalNumbers": [1],
+					"peakIntervalNumbers": [0],
+					"spikeIntervalNumbers": [0],
+					"emptyIntervalNumbers": [0],
+					"levels": [0],
+					"informationRates": [0],
+					"histograms": [
+						{
+							"bounds": [0.00390625,1],
+							"frequencies": [105]
+						}
+					]
+				}
+			},
+			"R06": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.95,1.25],
+								[1.25,1.75],
+								[1.75,2.95],
+								[2.95,3.05]
+							]
+						}
+					],
+					"frequencies": [4,32,2,67]
+				},
+				"modlHistograms": {
+					"histogramNumber": 5,
+					"interpretableHistogramNumber": 4,
+					"truncationEpsilon": 0.1,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0,2,3,5,28],
+					"intervalNumbers": [1,3,5,4,12],
+					"peakIntervalNumbers": [0,0,1,1,5],
+					"spikeIntervalNumbers": [0,0,0,0,5],
+					"emptyIntervalNumbers": [0,0,1,0,4],
+					"levels": [0,0.01356921313,0.04765003824,0.06838973265,0.6853858332],
+					"informationRates": [0,19.84100917,69.67425722,100,1002.176506],
+					"histograms": [
+						{
+							"bounds": [0.95,3.05],
+							"frequencies": [105]
+						},
+						{
+							"bounds": [0.95,1.65,2.45,3.05],
+							"frequencies": [32,6,67]
+						},
+						{
+							"bounds": [0.95,1.25,1.65,2.05,2.85,3.05],
+							"frequencies": [4,28,6,0,67]
+						},
+						{
+							"bounds": [0.95,1.25,1.75,2.95,3.05],
+							"frequencies": [4,32,2,67]
+						},
+						{
+							"bounds": [0.9375,1.299999997,1.300000012,1.399999991,1.400000006,1.499999985,1.5,1.599999994,1.600000009,1.699999988,1.700000003,2.999999985,3],
+							"frequencies": [4,4,0,9,0,10,0,5,0,4,2,67]
+						}
+					]
+				}
+			},
+			"R07": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.95,1.25],
+								[1.25,1.75],
+								[1.75,3.85],
+								[3.85,6.15],
+								[6.15,6.95]
+							]
+						}
+					],
+					"frequencies": [4,32,7,59,3]
+				},
+				"modlHistograms": {
+					"histogramNumber": 5,
+					"interpretableHistogramNumber": 4,
+					"truncationEpsilon": 0.1,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0,3,5,7,29],
+					"intervalNumbers": [1,3,7,5,41],
+					"peakIntervalNumbers": [0,0,2,2,20],
+					"spikeIntervalNumbers": [0,0,0,0,20],
+					"emptyIntervalNumbers": [0,0,0,0,14],
+					"levels": [0,0.005142168327,0.006968300144,0.00863976387,0.2871300831],
+					"informationRates": [0,59.51746372,80.6538263,100,3323.355678],
+					"histograms": [
+						{
+							"bounds": [0.95,6.95],
+							"frequencies": [105]
+						},
+						{
+							"bounds": [0.95,1.65,3.25,6.95],
+							"frequencies": [32,7,66]
+						},
+						{
+							"bounds": [0.95,1.25,1.65,2.05,3.25,4.45,5.65,6.95],
+							"frequencies": [4,28,6,1,17,37,12]
+						},
+						{
+							"bounds": [0.95,1.25,1.75,3.85,6.15,6.95],
+							"frequencies": [4,32,7,59,3]
+						},
+						{
+							"bounds": [0.875,1.299999997,1.300000012,1.399999991,1.400000006,1.499999985,1.5,1.599999994,1.600000009,1.699999988,1.700000003,1.899999991,1.900000006,3.499999985,3.5,3.899999991,3.900000006,3.999999985,4,4.099999994,4.100000009,4.499999985,4.5,4.699999988,4.700000003,4.799999997,4.800000012,4.899999991,4.900000006,4.999999985,5,5.099999994,5.100000009,5.499999985,5.5,5.599999994,5.600000009,6.099999994,6.100000009,6.699999988,6.700000003,7],
+							"frequencies": [4,4,0,9,0,10,0,5,0,4,0,2,2,2,1,3,0,4,0,3,3,5,0,4,0,3,0,3,0,3,0,5,5,3,0,6,6,3,0,2,1]
+						}
+					]
+				}
+			},
+			"R08": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.05,0.15],
+								[0.15,0.25],
+								[0.25,0.45],
+								[0.45,0.85],
+								[0.85,2.55]
+							]
+						}
+					],
+					"frequencies": [6,20,11,1,67]
+				},
+				"modlHistograms": {
+					"histogramNumber": 5,
+					"interpretableHistogramNumber": 4,
+					"truncationEpsilon": 0.1,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0,1,2,3,30],
+					"intervalNumbers": [1,2,2,5,28],
+					"peakIntervalNumbers": [0,0,0,1,13],
+					"spikeIntervalNumbers": [0,0,0,1,13],
+					"emptyIntervalNumbers": [0,0,0,0,9],
+					"levels": [0,0.003138504791,0.003358793296,0.004913113283,0.5003837405],
+					"informationRates": [0,63.88016335,68.36384798,100,10184.65709],
+					"histograms": [
+						{
+							"bounds": [0.05,2.55],
+							"frequencies": [105]
+						},
+						{
+							"bounds": [0.05,0.45,2.55],
+							"frequencies": [37,68]
+						},
+						{
+							"bounds": [0.05,0.25,2.55],
+							"frequencies": [26,79]
+						},
+						{
+							"bounds": [0.05,0.15,0.25,0.45,0.85,2.55],
+							"frequencies": [6,20,11,1,67]
+						},
+						{
+							"bounds": [0.09999999963,0.1000000001,0.1999999993,0.2000000002,0.2999999989,0.3000000007,0.3999999985,0.4000000004,0.9999999963,1,1.299999997,1.300000004,1.399999999,1.400000006,1.499999993,1.5,1.599999994,1.600000001,1.799999997,1.800000004,1.999999993,2,2.099999994,2.100000009,2.199999988,2.200000003,2.299999997,2.300000012,2.5],
+							"frequencies": [6,0,20,0,5,0,6,1,5,4,9,0,6,0,5,0,3,1,9,2,5,0,4,0,3,0,8,3]
+						}
+					]
+				}
+			},
+			"R09": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["1"],
+								["5"],
+								["4"],
+								["3"],
+								["6"]
+							],
+							"defaultGroupIndex": 4
+						}
+					],
+					"frequencies": [38,27,25,8,7]
+				},
+				"inputValues": {
+					"values": ["1","5","4","3","6"],
+					"frequencies": [38,27,25,8,7]
+				}
+			},
+			"R10": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.25,7],
+								[7,7.75]
+							]
+						}
+					],
+					"frequencies": [97,8]
+				},
+				"modlHistograms": {
+					"histogramNumber": 2,
+					"interpretableHistogramNumber": 2,
+					"truncationEpsilon": 0,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0,2],
+					"intervalNumbers": [1,2],
+					"peakIntervalNumbers": [0,0],
+					"spikeIntervalNumbers": [0,0],
+					"emptyIntervalNumbers": [0,0],
+					"levels": [0,0.0003815098758],
+					"informationRates": [0,100],
+					"histograms": [
+						{
+							"bounds": [4.25,7.75],
+							"frequencies": [105]
+						},
+						{
+							"bounds": [4.25,7,7.75],
+							"frequencies": [97,8]
+						}
+					]
+				}
+			},
+			"R11": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SepalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.95,2.45],
+								[2.45,3.25],
+								[3.25,4.45]
+							]
+						}
+					],
+					"frequencies": [5,72,28]
+				},
+				"modlHistograms": {
+					"histogramNumber": 4,
+					"interpretableHistogramNumber": 3,
+					"truncationEpsilon": 0.1,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0,1,2,29],
+					"intervalNumbers": [1,2,3,23],
+					"peakIntervalNumbers": [0,0,1,11],
+					"spikeIntervalNumbers": [0,0,0,11],
+					"emptyIntervalNumbers": [0,0,0,8],
+					"levels": [0,0.002413792626,0.009713296272,0.471063928],
+					"informationRates": [0,24.85039639,100,4849.681455],
+					"histograms": [
+						{
+							"bounds": [1.95,4.45],
+							"frequencies": [105]
+						},
+						{
+							"bounds": [1.95,3.25,4.45],
+							"frequencies": [77,28]
+						},
+						{
+							"bounds": [1.95,2.45,3.25,4.45],
+							"frequencies": [5,72,28]
+						},
+						{
+							"bounds": [1.9921875,2.499999985,2.5,2.599999994,2.600000009,2.699999988,2.700000003,2.799999997,2.800000012,2.899999991,2.900000006,2.999999985,3,3.099999994,3.100000009,3.199999988,3.200000003,3.399999991,3.400000006,3.499999985,3.5,3.799999997,3.800000012,4.40625],
+							"frequencies": [5,6,0,3,0,5,0,11,0,6,0,20,0,10,0,11,2,10,0,3,3,4,6]
+						}
+					]
+				}
+			},
+			"R12": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.45,1.55],
+								[1.55,2.55]
+							]
+						}
+					],
+					"frequencies": [67,38]
+				},
+				"modlHistograms": {
+					"histogramNumber": 4,
+					"interpretableHistogramNumber": 3,
+					"truncationEpsilon": 0.1,
+					"removedSingularIntervalNumber": 0,
+					"granularities": [0,1,5,28],
+					"intervalNumbers": [1,2,2,14],
+					"peakIntervalNumbers": [0,0,0,6],
+					"spikeIntervalNumbers": [0,0,0,6],
+					"emptyIntervalNumbers": [0,0,0,4],
+					"levels": [0,0.02500256671,0.03972305577,0.6316260053],
+					"informationRates": [0,62.94220379,100,1590.074059],
+					"histograms": [
+						{
+							"bounds": [1.45,2.55],
+							"frequencies": [105]
+						},
+						{
+							"bounds": [1.45,1.65,2.55],
+							"frequencies": [70,35]
+						},
+						{
+							"bounds": [1.45,1.55,2.55],
+							"frequencies": [67,38]
+						},
+						{
+							"bounds": [1.499999985,1.5,1.599999994,1.600000009,1.799999997,1.800000012,1.999999985,2,2.099999994,2.100000009,2.199999988,2.200000003,2.299999997,2.300000012,2.5],
+							"frequencies": [67,0,3,1,9,2,5,0,4,0,3,0,8,3]
+						}
+					]
+				}
+			}
+		}
+	},
+	"bivariatePreparationReport": {
+		"reportType": "BivariatePreparation",
+		"summary": {
+			"dictionary": "Iris",
+			"variables": {
+				"types": [
+					"Categorical",
+					"Numerical"
+				],
+				"numbers": [
+					4,
+					8
+				]
+			},
+			"database": "..\/..\/..\/datasets\/Iris\/Iris.txt",
+			"samplePercentage": 70,
+			"samplingMode": "Include sample",
+			"selectionVariable": "",
+			"selectionValue": "",
+			"instances": 105,
+			"learningTask": "Unsupervised analysis",
+			"evaluatedVariablePairs": 55,
+			"informativeVariablePairs": 38
+		},
+		"variablesPairsStatistics": [
+			{
+				"rank": "R01",
+				"name1": "Class",
+				"name2": "Class1",
+				"level": 0.286471,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 18.9311,
+				"dataCost": 110.25
+			},
+			{
+				"rank": "R02",
+				"name1": "Class",
+				"name2": "Class2",
+				"level": 0.270234,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 19.0156,
+				"dataCost": 110.25
+			},
+			{
+				"rank": "R03",
+				"name1": "Class",
+				"name2": "SPetalLength",
+				"level": 0.258511,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 41.7647,
+				"dataCost": 157.188
+			},
+			{
+				"rank": "R04",
+				"name1": "Class1",
+				"name2": "SPetalLength",
+				"level": 0.231831,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 27.2099,
+				"dataCost": 142.253
+			},
+			{
+				"rank": "R05",
+				"name1": "PetalLength",
+				"name2": "SPetalLength",
+				"level": 0.151582,
+				"variables": 2,
+				"parts1": 5,
+				"parts2": 5,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 69.091,
+				"dataCost": 386.913
+			},
+			{
+				"rank": "R06",
+				"name1": "Class2",
+				"name2": "SPetalLength",
+				"level": 0.142436,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 4,
+				"constructionCost": 6.71557,
+				"preparationCost": 27.7273,
+				"dataCost": 158.704
+			},
+			{
+				"rank": "R07",
+				"name1": "Class",
+				"name2": "PetalWidth",
+				"level": 0.14197,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 31.1679,
+				"dataCost": 396.708
+			},
+			{
+				"rank": "R08",
+				"name1": "Class",
+				"name2": "PetalLength",
+				"level": 0.136908,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 31.1679,
+				"dataCost": 399.272
+			},
+			{
+				"rank": "R09",
+				"name1": "Class1",
+				"name2": "LowerPetalLength",
+				"level": 0.111506,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.7255,
+				"dataCost": 386.913
+			},
+			{
+				"rank": "R10",
+				"name1": "Class1",
+				"name2": "PetalLength",
+				"level": 0.111506,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.7255,
+				"dataCost": 386.913
+			},
+			{
+				"rank": "R11",
+				"name1": "Class1",
+				"name2": "PetalWidth",
+				"level": 0.111506,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.7255,
+				"dataCost": 386.913
+			},
+			{
+				"rank": "R12",
+				"name1": "PetalWidth",
+				"name2": "SPetalLength",
+				"level": 0.109807,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 40.5555,
+				"dataCost": 438.232
+			},
+			{
+				"rank": "R13",
+				"name1": "Class",
+				"name2": "LowerPetalLength",
+				"level": 0.0982915,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 19.0436,
+				"dataCost": 430.955
+			},
+			{
+				"rank": "R14",
+				"name1": "LowerPetalLength",
+				"name2": "SPetalLength",
+				"level": 0.0887331,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 27.3225,
+				"dataCost": 462.959
+			},
+			{
+				"rank": "R15",
+				"name1": "PetalLength",
+				"name2": "PetalWidth",
+				"level": 0.0785935,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 4,
+				"constructionCost": 6.71557,
+				"preparationCost": 29.9587,
+				"dataCost": 676.972
+			},
+			{
+				"rank": "R16",
+				"name1": "Class",
+				"name2": "UpperPetalWidth",
+				"level": 0.0721164,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 4,
+				"constructionCost": 6.71557,
+				"preparationCost": 19.0868,
+				"dataCost": 444.17
+			},
+			{
+				"rank": "R17",
+				"name1": "PetalWidth",
+				"name2": "UpperPetalWidth",
+				"level": 0.0703191,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 29.9587,
+				"dataCost": 683.381
+			},
+			{
+				"rank": "R18",
+				"name1": "LowerPetalLength",
+				"name2": "PetalLength",
+				"level": 0.0701201,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 29.9587,
+				"dataCost": 683.535
+			},
+			{
+				"rank": "R19",
+				"name1": "Class2",
+				"name2": "PetalWidth",
+				"level": 0.0662843,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 20.8147,
+				"dataCost": 396.708
+			},
+			{
+				"rank": "R20",
+				"name1": "SPetalLength",
+				"name2": "SepalLength",
+				"level": 0.0654694,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 4,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 49.4973,
+				"dataCost": 453.472
+			},
+			{
+				"rank": "R21",
+				"name1": "Class2",
+				"name2": "PetalLength",
+				"level": 0.0606416,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 20.8147,
+				"dataCost": 399.272
+			},
+			{
+				"rank": "R22",
+				"name1": "LowerPetalLength",
+				"name2": "PetalWidth",
+				"level": 0.0598398,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 2,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.838,
+				"dataCost": 707.618
+			},
+			{
+				"rank": "R23",
+				"name1": "Class",
+				"name2": "SepalLength",
+				"level": 0.059526,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 7,
+				"constructionCost": 6.71557,
+				"preparationCost": 31.1679,
+				"dataCost": 438.466
+			},
+			{
+				"rank": "R24",
+				"name1": "Class1",
+				"name2": "Class2",
+				"level": 0.0559199,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.6129,
+				"dataCost": 110.25
+			},
+			{
+				"rank": "R25",
+				"name1": "Class1",
+				"name2": "SepalLength",
+				"level": 0.0531576,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 4,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.7255,
+				"dataCost": 413.664
+			},
+			{
+				"rank": "R26",
+				"name1": "SPetalLength",
+				"name2": "UpperPetalWidth",
+				"level": 0.0466723,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 2,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 31.8478,
+				"dataCost": 481.373
+			},
+			{
+				"rank": "R27",
+				"name1": "PetalLength",
+				"name2": "SepalLength",
+				"level": 0.0407398,
+				"variables": 2,
+				"parts1": 4,
+				"parts2": 4,
+				"cells": 8,
+				"constructionCost": 6.71557,
+				"preparationCost": 47.7303,
+				"dataCost": 688.519
+			},
+			{
+				"rank": "R28",
+				"name1": "PetalLength",
+				"name2": "UpperPetalWidth",
+				"level": 0.0401281,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.838,
+				"dataCost": 722.885
+			},
+			{
+				"rank": "R29",
+				"name1": "PetalWidth",
+				"name2": "SepalLength",
+				"level": 0.0303985,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 8,
+				"constructionCost": 6.71557,
+				"preparationCost": 29.9587,
+				"dataCost": 714.3
+			},
+			{
+				"rank": "R30",
+				"name1": "LowerPetalLength",
+				"name2": "SepalLength",
+				"level": 0.0253003,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 4,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.838,
+				"dataCost": 734.369
+			},
+			{
+				"rank": "R31",
+				"name1": "Class1",
+				"name2": "UpperPetalWidth",
+				"level": 0.0166012,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.7255,
+				"dataCost": 430.424
+			},
+			{
+				"rank": "R32",
+				"name1": "SepalLength",
+				"name2": "UpperPetalWidth",
+				"level": 0.0164148,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 4,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.838,
+				"dataCost": 741.251
+			},
+			{
+				"rank": "R33",
+				"name1": "Class1",
+				"name2": "SepalWidth",
+				"level": 0.00749643,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 3,
+				"cells": 5,
+				"constructionCost": 6.71557,
+				"preparationCost": 20.8147,
+				"dataCost": 427.509
+			},
+			{
+				"rank": "R34",
+				"name1": "Class2",
+				"name2": "LowerPetalLength",
+				"level": 0.0065114,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.7255,
+				"dataCost": 430.955
+			},
+			{
+				"rank": "R35",
+				"name1": "Class",
+				"name2": "SepalWidth",
+				"level": 0.00543684,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 2,
+				"cells": 6,
+				"constructionCost": 6.71557,
+				"preparationCost": 22.1365,
+				"dataCost": 474.893
+			},
+			{
+				"rank": "R36",
+				"name1": "LowerPetalLength",
+				"name2": "UpperPetalWidth",
+				"level": 0.00366071,
+				"variables": 2,
+				"parts1": 2,
+				"parts2": 2,
+				"cells": 3,
+				"constructionCost": 6.71557,
+				"preparationCost": 13.838,
+				"dataCost": 751.129
+			},
+			{
+				"rank": "R37",
+				"name1": "PetalWidth",
+				"name2": "SepalWidth",
+				"level": 0.00221737,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 2,
+				"cells": 6,
+				"constructionCost": 6.71557,
+				"preparationCost": 20.9273,
+				"dataCost": 745.158
+			},
+			{
+				"rank": "R38",
+				"name1": "SPetalLength",
+				"name2": "SepalWidth",
+				"level": 0.00143264,
+				"variables": 2,
+				"parts1": 3,
+				"parts2": 3,
+				"cells": 9,
+				"constructionCost": 6.71557,
+				"preparationCost": 40.2319,
+				"dataCost": 497.662
+			},
+			{
+				"rank": "R39",
+				"name1": "Class",
+				"name2": "Dummy2",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 8.64312,
+				"dataCost": 497.163
+			},
+			{
+				"rank": "R40",
+				"name1": "Class1",
+				"name2": "Dummy2",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 4.66344,
+				"dataCost": 453.12
+			},
+			{
+				"rank": "R41",
+				"name1": "Class2",
+				"name2": "Dummy2",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 4.66344,
+				"dataCost": 448.998
+			},
+			{
+				"rank": "R42",
+				"name1": "Class2",
+				"name2": "SepalLength",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 4.66344,
+				"dataCost": 448.998
+			},
+			{
+				"rank": "R43",
+				"name1": "Class2",
+				"name2": "SepalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 4.66344,
+				"dataCost": 448.998
+			},
+			{
+				"rank": "R44",
+				"name1": "Class2",
+				"name2": "UpperPetalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 4.66344,
+				"dataCost": 448.998
+			},
+			{
+				"rank": "R45",
+				"name1": "Dummy2",
+				"name2": "LowerPetalLength",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R46",
+				"name1": "Dummy2",
+				"name2": "PetalLength",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R47",
+				"name1": "Dummy2",
+				"name2": "PetalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R48",
+				"name1": "Dummy2",
+				"name2": "SPetalLength",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 15.5317,
+				"dataCost": 529.166
+			},
+			{
+				"rank": "R49",
+				"name1": "Dummy2",
+				"name2": "SepalLength",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R50",
+				"name1": "Dummy2",
+				"name2": "SepalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R51",
+				"name1": "Dummy2",
+				"name2": "UpperPetalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R52",
+				"name1": "LowerPetalLength",
+				"name2": "SepalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R53",
+				"name1": "PetalLength",
+				"name2": "SepalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R54",
+				"name1": "SepalLength",
+				"name2": "SepalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			},
+			{
+				"rank": "R55",
+				"name1": "SepalWidth",
+				"name2": "UpperPetalWidth",
+				"level": 0,
+				"variables": 0,
+				"parts1": 1,
+				"parts2": 1,
+				"cells": 1,
+				"constructionCost": 0.693147,
+				"preparationCost": 0,
+				"dataCost": 773.825
+			}
+		],
+		"variablesPairsDetailedStatistics": {
+			"R01": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-virginica","Iris-versicolor"],
+								["Iris-setosa"]
+							],
+							"defaultGroupIndex": 0
+						},
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"cellIds": ["C1","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1]
+					],
+					"cellFrequencies": [67,38]
+				}
+			},
+			"R02": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa","Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"cellIds": ["C1","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1]
+					],
+					"cellFrequencies": [73,32]
+				}
+			},
+			"R03": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa"],
+								["Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 2
+						},
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["1"],
+								["5","6"],
+								["4","3"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"cellIds": ["C1","C5","C6","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [38,32,2,3,30]
+				}
+			},
+			"R04": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["5","4","3","6"],
+								["1"]
+							],
+							"defaultGroupIndex": 0
+						}
+					],
+					"cellIds": ["C1","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1]
+					],
+					"cellFrequencies": [67,38]
+				}
+			},
+			"R05": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3.95],
+								[3.95,4.95],
+								[4.95,5.95],
+								[5.95,6.9]
+							]
+						},
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["1"],
+								["5"],
+								["4"],
+								["3"],
+								["6"]
+							],
+							"defaultGroupIndex": 4
+						}
+					],
+					"cellIds": ["C1","C9","C13","C17","C25"],
+					"cellPartIndexes": [
+						[0,0],
+						[3,1],
+						[2,2],
+						[1,3],
+						[4,4]
+					],
+					"cellFrequencies": [38,27,25,8,7]
+				}
+			},
+			"R06": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["1","5","6"],
+								["4","3"]
+							],
+							"defaultGroupIndex": 0
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1],
+						[1,1]
+					],
+					"cellFrequencies": [70,2,3,30]
+				}
+			},
+			"R07": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa"],
+								["Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 2
+						},
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,1.75],
+								[1.75,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C5","C6","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [38,2,31,33,1]
+				}
+			},
+			"R08": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa"],
+								["Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 2
+						},
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,4.85],
+								[4.85,6.9]
+							]
+						}
+					],
+					"cellIds": ["C1","C5","C6","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [38,1,29,34,3]
+				}
+			},
+			"R09": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						}
+					],
+					"cellIds": ["C2","C3"],
+					"cellPartIndexes": [
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [38,67]
+				}
+			},
+			"R10": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,6.9]
+							]
+						}
+					],
+					"cellIds": ["C2","C3"],
+					"cellPartIndexes": [
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [38,67]
+				}
+			},
+			"R11": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,2.5]
+							]
+						}
+					],
+					"cellIds": ["C2","C3"],
+					"cellPartIndexes": [
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [38,67]
+				}
+			},
+			"R12": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,1.65],
+								[1.65,2.5]
+							]
+						},
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["1"],
+								["5","6"],
+								["4","3"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"cellIds": ["C1","C5","C6","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [38,3,31,29,4]
+				}
+			},
+			"R13": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-virginica","Iris-versicolor"],
+								["Iris-setosa"]
+							],
+							"defaultGroupIndex": 0
+						},
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						}
+					],
+					"cellIds": ["C2","C3"],
+					"cellPartIndexes": [
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [38,67]
+				}
+			},
+			"R14": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						},
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["5","4","3","6"],
+								["1"]
+							],
+							"defaultGroupIndex": 0
+						}
+					],
+					"cellIds": ["C2","C3"],
+					"cellPartIndexes": [
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [67,38]
+				}
+			},
+			"R15": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,4.75],
+								[4.75,6.9]
+							]
+						},
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,1.65],
+								[1.65,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C5","C6","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,1],
+						[2,2]
+					],
+					"cellFrequencies": [38,27,5,35]
+				}
+			},
+			"R16": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa","Iris-versicolor"],
+								["Iris-virginica"]
+							],
+							"defaultGroupIndex": 0
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.75],
+								[1.75,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1],
+						[1,1]
+					],
+					"cellFrequencies": [69,2,1,33]
+				}
+			},
+			"R17": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,1.55],
+								[1.55,2.05],
+								[2.05,2.5]
+							]
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.55],
+								[1.55,2.05],
+								[2.05,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C5","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,2]
+					],
+					"cellFrequencies": [67,20,18]
+				}
+			},
+			"R18": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,1.45],
+								[1.45,2.4],
+								[2.4,3]
+							]
+						},
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,1.45],
+								[1.45,2.4],
+								[2.4,6.9]
+							]
+						}
+					],
+					"cellIds": ["C1","C5","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1],
+						[2,2]
+					],
+					"cellFrequencies": [17,21,67]
+				}
+			},
+			"R19": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,1.75],
+								[1.75,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C3","C4","C5","C6"],
+					"cellPartIndexes": [
+						[0,0],
+						[0,1],
+						[1,1],
+						[0,2],
+						[1,2]
+					],
+					"cellFrequencies": [38,2,31,33,1]
+				}
+			},
+			"R20": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["5","4"],
+								["1","3"],
+								["6"]
+							],
+							"defaultGroupIndex": 2
+						},
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.35],
+								[5.35,5.85],
+								[5.85,7.15],
+								[7.15,7.7]
+							]
+						}
+					],
+					"cellIds": ["C2","C4","C5","C7","C12"],
+					"cellPartIndexes": [
+						[1,0],
+						[0,1],
+						[1,1],
+						[0,2],
+						[2,3]
+					],
+					"cellFrequencies": [34,10,12,42,7]
+				}
+			},
+			"R21": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,4.85],
+								[4.85,6.9]
+							]
+						}
+					],
+					"cellIds": ["C1","C3","C4","C5","C6"],
+					"cellPartIndexes": [
+						[0,0],
+						[0,1],
+						[1,1],
+						[0,2],
+						[1,2]
+					],
+					"cellFrequencies": [38,1,29,34,3]
+				}
+			},
+			"R22": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						},
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,1]
+					],
+					"cellFrequencies": [38,67]
+				}
+			},
+			"R23": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa"],
+								["Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 2
+						},
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.45],
+								[5.45,6.15],
+								[6.15,7.7]
+							]
+						}
+					],
+					"cellIds": ["C1","C3","C4","C5","C6","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[2,0],
+						[0,1],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [34,5,4,5,19,30,8]
+				}
+			},
+			"R24": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						}
+					],
+					"cellIds": ["C1","C2","C3"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [35,38,32]
+				}
+			},
+			"R25": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.45],
+								[5.45,7.7]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1],
+						[1,1]
+					],
+					"cellFrequencies": [5,34,62,4]
+				}
+			},
+			"R26": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["1","3"],
+								["5","6"],
+								["4"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.55],
+								[1.55,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C5","C6"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[2,0],
+						[1,1],
+						[2,1]
+					],
+					"cellFrequencies": [46,2,19,32,6]
+				}
+			},
+			"R27": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,3.55],
+								[3.55,4.6],
+								[4.6,5.95],
+								[5.95,6.9]
+							]
+						},
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.45],
+								[5.45,5.85],
+								[5.85,7.15],
+								[7.15,7.7]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C5","C6","C7","C10","C11","C16"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2],
+						[3,3]
+					],
+					"cellFrequencies": [37,2,5,10,2,7,35,7]
+				}
+			},
+			"R28": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,4.75],
+								[4.75,6.9]
+							]
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.65],
+								[1.65,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[1,1]
+					],
+					"cellFrequencies": [65,5,35]
+				}
+			},
+			"R29": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,1.35],
+								[1.35,2.5]
+							]
+						},
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.45],
+								[5.45,5.85],
+								[5.85,7.7]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4","C5","C6","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[2,0],
+						[0,1],
+						[1,1],
+						[2,1],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [34,3,2,4,10,3,5,44]
+				}
+			},
+			"R30": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						},
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.45],
+								[5.45,7.7]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1],
+						[1,1]
+					],
+					"cellFrequencies": [34,5,4,62]
+				}
+			},
+			"R31": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.55],
+								[1.55,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1]
+					],
+					"cellFrequencies": [29,38,38]
+				}
+			},
+			"R32": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SepalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[4.3,5.85],
+								[5.85,7.7]
+							]
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.55],
+								[1.55,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[0,1],
+						[1,1]
+					],
+					"cellFrequencies": [54,13,2,36]
+				}
+			},
+			"R33": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class1",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["setosa"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "SepalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[2,2.85],
+								[2.85,3.35],
+								[3.35,4.4]
+							]
+						}
+					],
+					"cellIds": ["C1","C3","C4","C5","C6"],
+					"cellPartIndexes": [
+						[0,0],
+						[0,1],
+						[1,1],
+						[0,2],
+						[1,2]
+					],
+					"cellFrequencies": [30,32,17,5,21]
+				}
+			},
+			"R34": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class2",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								[""],
+								["versicolor"]
+							],
+							"defaultGroupIndex": 1
+						},
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						}
+					],
+					"cellIds": ["C1","C3","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[0,1],
+						[1,1]
+					],
+					"cellFrequencies": [38,35,32]
+				}
+			},
+			"R35": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "Class",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["Iris-setosa"],
+								["Iris-virginica"],
+								["Iris-versicolor"]
+							],
+							"defaultGroupIndex": 2
+						},
+						{
+							"variable": "SepalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[2,2.95],
+								[2.95,4.4]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4","C5","C6"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[2,0],
+						[0,1],
+						[1,1],
+						[2,1]
+					],
+					"cellFrequencies": [1,13,22,37,22,10]
+				}
+			},
+			"R36": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "LowerPetalLength",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1,2.4],
+								[2.4,3]
+							]
+						},
+						{
+							"variable": "UpperPetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[1.5,1.55],
+								[1.55,2.5]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C4"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[1,1]
+					],
+					"cellFrequencies": [38,29,38]
+				}
+			},
+			"R37": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "PetalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[0.1,0.75],
+								[0.75,1.45],
+								[1.45,2.5]
+							]
+						},
+						{
+							"variable": "SepalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[2,2.95],
+								[2.95,4.4]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4","C5","C6"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[2,0],
+						[0,1],
+						[1,1],
+						[2,1]
+					],
+					"cellFrequencies": [1,21,14,37,3,29]
+				}
+			},
+			"R38": {
+				"dataGrid": {
+					"isSupervised": false,
+					"dimensions": [
+						{
+							"variable": "SPetalLength",
+							"type": "Categorical",
+							"partitionType": "Value groups",
+							"partition": [
+								["4","3","6"],
+								["1"],
+								["5"]
+							],
+							"defaultGroupIndex": 0
+						},
+						{
+							"variable": "SepalWidth",
+							"type": "Numerical",
+							"partitionType": "Intervals",
+							"partition": [
+								[2,2.95],
+								[2.95,3.25],
+								[3.25,4.4]
+							]
+						}
+					],
+					"cellIds": ["C1","C2","C3","C4","C5","C6","C7","C8","C9"],
+					"cellPartIndexes": [
+						[0,0],
+						[1,0],
+						[2,0],
+						[0,1],
+						[1,1],
+						[2,1],
+						[0,2],
+						[1,2],
+						[2,2]
+					],
+					"cellFrequencies": [26,1,9,10,15,16,4,22,2]
+				}
+			}
+		}
+	},
+	"khiops_encoding": "ascii"
+}
diff --git a/tests/resources/analysis_results/ref_reports/IrisU2D.txt b/tests/resources/analysis_results/ref_reports/IrisU2D.txt
new file mode 100644
index 00000000..7600a3c3
--- /dev/null
+++ b/tests/resources/analysis_results/ref_reports/IrisU2D.txt
@@ -0,0 +1,1159 @@
+Tool	Khiops
+Version	10.5.0-a1
+Short description	
+
+
+Report	Preparation
+
+Dictionary	Iris
+Variables
+	Categorical	4
+	Numerical	8
+	Total	12
+Database	../../../datasets/Iris/Iris.txt
+Sample percentage	70
+Sampling mode	Include sample
+Selection variable	
+Selection value	
+Instances	105
+Learning task	Unsupervised analysis
+Evaluated variables	12
+Informative variables	0
+Max number of constructed variables	0
+Max number of trees	0
+Max number of variable pairs	100
+Discretization	MODL
+Value grouping	MODL
+
+Variable statistics
+Rank	Name	Type	Level	Target parts	Parts	Values	Min	Max	Mean	StdDev	Missing number	Mode	Mode frequency	Construction cost	Preparation cost	Data cost	Derivation rule
+R01	Class	Categorical			3	3						Iris-setosa	38	3.17805			
+R02	Class1	Categorical			2	2							67	3.17805			IfC(EQc(Class, "Iris-setosa"), "setosa", "")
+R03	Class2	Categorical			2	2							73	3.17805			IfC(EQc(Class, "Iris-versicolor"), "versicolor", "")
+R04	Dummy1	Numerical			1	1	0	0	0	0	0			3.17805			Copy(0)
+R05	Dummy2	Numerical			1	105	0.005121241265	0.9859650261	0.5173966838	0.2650019122	0			3.17805			Random()
+R06	LowerPetalLength	Numerical			4	10	1	3	2.446666667	0.7433600251	0			3.17805			If(LE(PetalLength, 3), PetalLength, 3)
+R07	PetalLength	Numerical			5	36	1	6.9	3.686666667	1.80132579	0			3.17805			
+R08	PetalWidth	Numerical			5	21	0.1	2.5	1.175238095	0.7880996979	0			3.17805			
+R09	SPetalLength	Categorical			5	5						1	38	3.17805			AsCategorical(Floor(PetalLength))
+R10	SepalLength	Numerical			2	31	4.3	7.7	5.827619048	0.8375127846	0			3.17805			
+R11	SepalWidth	Numerical			3	23	2	4.4	3.081904762	0.4284592446	0			3.17805			
+R12	UpperPetalWidth	Numerical			2	11	1.5	2.5	1.692380952	0.2962287527	0			3.17805			If(GE(PetalWidth, 1.5), PetalWidth, 1.5)
+
+Detailed variable statistics
+
+Rank	R01	Class	Categorical
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa}	Iris-setosa
+	{Iris-virginica}	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+Cells
+Value group	Frequency
+{Iris-setosa}	38
+{Iris-virginica}	35
+{Iris-versicolor}	32
+
+Input values
+	Iris-setosa	38
+	Iris-virginica	35
+	Iris-versicolor	32
+
+Rank	R02	Class1	Categorical
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+Cells
+Value group	Frequency
+{}	67
+{setosa}	38
+
+Input values
+		67
+	setosa	38
+
+Rank	R03	Class2	Categorical
+
+Data grid	Unsupervised
+Dimensions
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+Cells
+Value group	Frequency
+{}	73
+{versicolor}	32
+
+Input values
+		73
+	versicolor	32
+
+Rank	R05	Dummy2	Numerical
+
+Data grid	Unsupervised
+Dimensions
+Dummy2	Numerical	Intervals
+	]-inf;+inf[	0.00390625	1
+Cells
+Interval	Frequency
+]-inf;+inf[	105
+
+Rank	R06	LowerPetalLength	Numerical
+
+Data grid	Unsupervised
+Dimensions
+LowerPetalLength	Numerical	Intervals
+	]-inf;1.25]	0.95	1.25
+	]1.25;1.75]	1.25	1.75
+	]1.75;2.95]	1.75	2.95
+	]2.95;+inf[	2.95	3.05
+Cells
+Interval	Frequency
+]-inf;1.25]	4
+]1.25;1.75]	32
+]1.75;2.95]	2
+]2.95;+inf[	67
+
+Rank	R07	PetalLength	Numerical
+
+Data grid	Unsupervised
+Dimensions
+PetalLength	Numerical	Intervals
+	]-inf;1.25]	0.95	1.25
+	]1.25;1.75]	1.25	1.75
+	]1.75;3.85]	1.75	3.85
+	]3.85;6.15]	3.85	6.15
+	]6.15;+inf[	6.15	6.95
+Cells
+Interval	Frequency
+]-inf;1.25]	4
+]1.25;1.75]	32
+]1.75;3.85]	7
+]3.85;6.15]	59
+]6.15;+inf[	3
+
+Rank	R08	PetalWidth	Numerical
+
+Data grid	Unsupervised
+Dimensions
+PetalWidth	Numerical	Intervals
+	]-inf;0.15]	0.05	0.15
+	]0.15;0.25]	0.15	0.25
+	]0.25;0.45]	0.25	0.45
+	]0.45;0.85]	0.45	0.85
+	]0.85;+inf[	0.85	2.55
+Cells
+Interval	Frequency
+]-inf;0.15]	6
+]0.15;0.25]	20
+]0.25;0.45]	11
+]0.45;0.85]	1
+]0.85;+inf[	67
+
+Rank	R09	SPetalLength	Categorical
+
+Data grid	Unsupervised
+Dimensions
+SPetalLength	Categorical	Value groups
+	{1}	1
+	{5}	5
+	{4}	4
+	{3}	3
+	{6}	6	 * 
+Cells
+Value group	Frequency
+{1}	38
+{5}	27
+{4}	25
+{3}	8
+{6}	7
+
+Input values
+	1	38
+	5	27
+	4	25
+	3	8
+	6	7
+
+Rank	R10	SepalLength	Numerical
+
+Data grid	Unsupervised
+Dimensions
+SepalLength	Numerical	Intervals
+	]-inf;7]	4.25	7
+	]7;+inf[	7	7.75
+Cells
+Interval	Frequency
+]-inf;7]	97
+]7;+inf[	8
+
+Rank	R11	SepalWidth	Numerical
+
+Data grid	Unsupervised
+Dimensions
+SepalWidth	Numerical	Intervals
+	]-inf;2.45]	1.95	2.45
+	]2.45;3.25]	2.45	3.25
+	]3.25;+inf[	3.25	4.45
+Cells
+Interval	Frequency
+]-inf;2.45]	5
+]2.45;3.25]	72
+]3.25;+inf[	28
+
+Rank	R12	UpperPetalWidth	Numerical
+
+Data grid	Unsupervised
+Dimensions
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.55]	1.45	1.55
+	]1.55;+inf[	1.55	2.55
+Cells
+Interval	Frequency
+]-inf;1.55]	67
+]1.55;+inf[	38
+
+
+Report	Bivariate preparation
+
+Dictionary	Iris
+Variables
+	Categorical	4
+	Numerical	8
+	Total	12
+Database	../../../datasets/Iris/Iris.txt
+Sample percentage	70
+Sampling mode	Include sample
+Selection variable	
+Selection value	
+Instances	105
+Learning task	Unsupervised analysis
+Evaluated variable pairs	55
+Informative variable pairs	38
+
+Variable pair statistics
+Rank	Name 1	Name 2	Level	Variables	Parts 1	Parts 2	Cells	Construction cost	Preparation cost	Data cost
+R01	Class	Class1	0.286471	2	2	2	2	6.71557	18.9311	110.25
+R02	Class	Class2	0.270234	2	2	2	2	6.71557	19.0156	110.25
+R03	Class	SPetalLength	0.258511	2	3	3	5	6.71557	41.7647	157.188
+R04	Class1	SPetalLength	0.231831	2	2	2	2	6.71557	27.2099	142.253
+R05	PetalLength	SPetalLength	0.151582	2	5	5	5	6.71557	69.091	386.913
+R06	Class2	SPetalLength	0.142436	2	2	2	4	6.71557	27.7273	158.704
+R07	Class	PetalWidth	0.14197	2	3	3	5	6.71557	31.1679	396.708
+R08	Class	PetalLength	0.136908	2	3	3	5	6.71557	31.1679	399.272
+R09	Class1	LowerPetalLength	0.111506	2	2	2	2	6.71557	13.7255	386.913
+R10	Class1	PetalLength	0.111506	2	2	2	2	6.71557	13.7255	386.913
+R11	Class1	PetalWidth	0.111506	2	2	2	2	6.71557	13.7255	386.913
+R12	PetalWidth	SPetalLength	0.109807	2	3	3	5	6.71557	40.5555	438.232
+R13	Class	LowerPetalLength	0.0982915	2	2	2	2	6.71557	19.0436	430.955
+R14	LowerPetalLength	SPetalLength	0.0887331	2	2	2	2	6.71557	27.3225	462.959
+R15	PetalLength	PetalWidth	0.0785935	2	3	3	4	6.71557	29.9587	676.972
+R16	Class	UpperPetalWidth	0.0721164	2	2	2	4	6.71557	19.0868	444.17
+R17	PetalWidth	UpperPetalWidth	0.0703191	2	3	3	3	6.71557	29.9587	683.381
+R18	LowerPetalLength	PetalLength	0.0701201	2	3	3	3	6.71557	29.9587	683.535
+R19	Class2	PetalWidth	0.0662843	2	2	3	5	6.71557	20.8147	396.708
+R20	SPetalLength	SepalLength	0.0654694	2	3	4	5	6.71557	49.4973	453.472
+R21	Class2	PetalLength	0.0606416	2	2	3	5	6.71557	20.8147	399.272
+R22	LowerPetalLength	PetalWidth	0.0598398	2	2	2	2	6.71557	13.838	707.618
+R23	Class	SepalLength	0.059526	2	3	3	7	6.71557	31.1679	438.466
+R24	Class1	Class2	0.0559199	2	2	2	3	6.71557	13.6129	110.25
+R25	Class1	SepalLength	0.0531576	2	2	2	4	6.71557	13.7255	413.664
+R26	SPetalLength	UpperPetalWidth	0.0466723	2	3	2	5	6.71557	31.8478	481.373
+R27	PetalLength	SepalLength	0.0407398	2	4	4	8	6.71557	47.7303	688.519
+R28	PetalLength	UpperPetalWidth	0.0401281	2	2	2	3	6.71557	13.838	722.885
+R29	PetalWidth	SepalLength	0.0303985	2	3	3	8	6.71557	29.9587	714.3
+R30	LowerPetalLength	SepalLength	0.0253003	2	2	2	4	6.71557	13.838	734.369
+R31	Class1	UpperPetalWidth	0.0166012	2	2	2	3	6.71557	13.7255	430.424
+R32	SepalLength	UpperPetalWidth	0.0164148	2	2	2	4	6.71557	13.838	741.251
+R33	Class1	SepalWidth	0.00749643	2	2	3	5	6.71557	20.8147	427.509
+R34	Class2	LowerPetalLength	0.0065114	2	2	2	3	6.71557	13.7255	430.955
+R35	Class	SepalWidth	0.00543684	2	3	2	6	6.71557	22.1365	474.893
+R36	LowerPetalLength	UpperPetalWidth	0.00366071	2	2	2	3	6.71557	13.838	751.129
+R37	PetalWidth	SepalWidth	0.00221737	2	3	2	6	6.71557	20.9273	745.158
+R38	SPetalLength	SepalWidth	0.00143264	2	3	3	9	6.71557	40.2319	497.662
+R39	Class	Dummy2	0	0	1	1	1	0.693147	8.64312	497.163
+R40	Class1	Dummy2	0	0	1	1	1	0.693147	4.66344	453.12
+R41	Class2	Dummy2	0	0	1	1	1	0.693147	4.66344	448.998
+R42	Class2	SepalLength	0	0	1	1	1	0.693147	4.66344	448.998
+R43	Class2	SepalWidth	0	0	1	1	1	0.693147	4.66344	448.998
+R44	Class2	UpperPetalWidth	0	0	1	1	1	0.693147	4.66344	448.998
+R45	Dummy2	LowerPetalLength	0	0	1	1	1	0.693147	0	773.825
+R46	Dummy2	PetalLength	0	0	1	1	1	0.693147	0	773.825
+R47	Dummy2	PetalWidth	0	0	1	1	1	0.693147	0	773.825
+R48	Dummy2	SPetalLength	0	0	1	1	1	0.693147	15.5317	529.166
+R49	Dummy2	SepalLength	0	0	1	1	1	0.693147	0	773.825
+R50	Dummy2	SepalWidth	0	0	1	1	1	0.693147	0	773.825
+R51	Dummy2	UpperPetalWidth	0	0	1	1	1	0.693147	0	773.825
+R52	LowerPetalLength	SepalWidth	0	0	1	1	1	0.693147	0	773.825
+R53	PetalLength	SepalWidth	0	0	1	1	1	0.693147	0	773.825
+R54	SepalLength	SepalWidth	0	0	1	1	1	0.693147	0	773.825
+R55	SepalWidth	UpperPetalWidth	0	0	1	1	1	0.693147	0	773.825
+
+Detailed variable pair statistics
+
+Rank	R01
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-virginica, Iris-versicolor}	Iris-virginica	Iris-versicolor	 * 
+	{Iris-setosa}	Iris-setosa
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+Cells
+Cell id	Class	Class1	Frequency
+C1	{Iris-virginica, Iris-versicolor}	{}	67
+C4	{Iris-setosa}	{setosa}	38
+Confusion matrix
+	{Iris-virginica, Iris-versicolor}	{Iris-setosa}
+{}	67	0
+{setosa}	0	38
+
+Rank	R02
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa, Iris-virginica}	Iris-setosa	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+Cells
+Cell id	Class	Class2	Frequency
+C1	{Iris-setosa, Iris-virginica}	{}	73
+C4	{Iris-versicolor}	{versicolor}	32
+Confusion matrix
+	{Iris-setosa, Iris-virginica}	{Iris-versicolor}
+{}	73	0
+{versicolor}	0	32
+
+Rank	R03
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa}	Iris-setosa
+	{Iris-virginica}	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+SPetalLength	Categorical	Value groups
+	{1}	1
+	{5, 6}	5	6	 * 
+	{4, 3}	4	3
+Cells
+Cell id	Class	SPetalLength	Frequency
+C1	{Iris-setosa}	{1}	38
+C5	{Iris-virginica}	{5, 6}	32
+C6	{Iris-versicolor}	{5, 6}	2
+C8	{Iris-virginica}	{4, 3}	3
+C9	{Iris-versicolor}	{4, 3}	30
+Confusion matrix
+	{Iris-setosa}	{Iris-virginica}	{Iris-versicolor}
+{1}	38	0	0
+{5, 6}	0	32	2
+{4, 3}	0	3	30
+
+Rank	R04
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+SPetalLength	Categorical	Value groups
+	{5, 4, 3, ...}	5	4	3	6	 * 
+	{1}	1
+Cells
+Cell id	Class1	SPetalLength	Frequency
+C1	{}	{5, 4, 3, ...}	67
+C4	{setosa}	{1}	38
+Confusion matrix
+	{}	{setosa}
+{5, 4, 3, ...}	67	0
+{1}	0	38
+
+Rank	R05
+
+Data grid	Unsupervised
+Dimensions
+PetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;3.95]	2.4	3.95
+	]3.95;4.95]	3.95	4.95
+	]4.95;5.95]	4.95	5.95
+	]5.95;+inf[	5.95	6.9
+SPetalLength	Categorical	Value groups
+	{1}	1
+	{5}	5
+	{4}	4
+	{3}	3
+	{6}	6	 * 
+Cells
+Cell id	PetalLength	SPetalLength	Frequency
+C1	]-inf;2.4]	{1}	38
+C9	]4.95;5.95]	{5}	27
+C13	]3.95;4.95]	{4}	25
+C17	]2.4;3.95]	{3}	8
+C25	]5.95;+inf[	{6}	7
+Confusion matrix
+	]-inf;2.4]	]2.4;3.95]	]3.95;4.95]	]4.95;5.95]	]5.95;+inf[
+{1}	38	0	0	0	0
+{5}	0	0	0	27	0
+{4}	0	0	25	0	0
+{3}	0	8	0	0	0
+{6}	0	0	0	0	7
+
+Rank	R06
+
+Data grid	Unsupervised
+Dimensions
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+SPetalLength	Categorical	Value groups
+	{1, 5, 6}	1	5	6	 * 
+	{4, 3}	4	3
+Cells
+Cell id	Class2	SPetalLength	Frequency
+C1	{}	{1, 5, 6}	70
+C2	{versicolor}	{1, 5, 6}	2
+C3	{}	{4, 3}	3
+C4	{versicolor}	{4, 3}	30
+Confusion matrix
+	{}	{versicolor}
+{1, 5, 6}	70	2
+{4, 3}	3	30
+
+Rank	R07
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa}	Iris-setosa
+	{Iris-virginica}	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;1.75]	0.75	1.75
+	]1.75;+inf[	1.75	2.5
+Cells
+Cell id	Class	PetalWidth	Frequency
+C1	{Iris-setosa}	]-inf;0.75]	38
+C5	{Iris-virginica}	]0.75;1.75]	2
+C6	{Iris-versicolor}	]0.75;1.75]	31
+C8	{Iris-virginica}	]1.75;+inf[	33
+C9	{Iris-versicolor}	]1.75;+inf[	1
+Confusion matrix
+	{Iris-setosa}	{Iris-virginica}	{Iris-versicolor}
+]-inf;0.75]	38	0	0
+]0.75;1.75]	0	2	31
+]1.75;+inf[	0	33	1
+
+Rank	R08
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa}	Iris-setosa
+	{Iris-virginica}	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+PetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;4.85]	2.4	4.85
+	]4.85;+inf[	4.85	6.9
+Cells
+Cell id	Class	PetalLength	Frequency
+C1	{Iris-setosa}	]-inf;2.4]	38
+C5	{Iris-virginica}	]2.4;4.85]	1
+C6	{Iris-versicolor}	]2.4;4.85]	29
+C8	{Iris-virginica}	]4.85;+inf[	34
+C9	{Iris-versicolor}	]4.85;+inf[	3
+Confusion matrix
+	{Iris-setosa}	{Iris-virginica}	{Iris-versicolor}
+]-inf;2.4]	38	0	0
+]2.4;4.85]	0	1	29
+]4.85;+inf[	0	34	3
+
+Rank	R09
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+Cells
+Cell id	Class1	LowerPetalLength	Frequency
+C2	{setosa}	]-inf;2.4]	38
+C3	{}	]2.4;+inf[	67
+Confusion matrix
+	{}	{setosa}
+]-inf;2.4]	0	38
+]2.4;+inf[	67	0
+
+Rank	R10
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+PetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	6.9
+Cells
+Cell id	Class1	PetalLength	Frequency
+C2	{setosa}	]-inf;2.4]	38
+C3	{}	]2.4;+inf[	67
+Confusion matrix
+	{}	{setosa}
+]-inf;2.4]	0	38
+]2.4;+inf[	67	0
+
+Rank	R11
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;+inf[	0.75	2.5
+Cells
+Cell id	Class1	PetalWidth	Frequency
+C2	{setosa}	]-inf;0.75]	38
+C3	{}	]0.75;+inf[	67
+Confusion matrix
+	{}	{setosa}
+]-inf;0.75]	0	38
+]0.75;+inf[	67	0
+
+Rank	R12
+
+Data grid	Unsupervised
+Dimensions
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;1.65]	0.75	1.65
+	]1.65;+inf[	1.65	2.5
+SPetalLength	Categorical	Value groups
+	{1}	1
+	{5, 6}	5	6	 * 
+	{4, 3}	4	3
+Cells
+Cell id	PetalWidth	SPetalLength	Frequency
+C1	]-inf;0.75]	{1}	38
+C5	]0.75;1.65]	{5, 6}	3
+C6	]1.65;+inf[	{5, 6}	31
+C8	]0.75;1.65]	{4, 3}	29
+C9	]1.65;+inf[	{4, 3}	4
+Confusion matrix
+	]-inf;0.75]	]0.75;1.65]	]1.65;+inf[
+{1}	38	0	0
+{5, 6}	0	3	31
+{4, 3}	0	29	4
+
+Rank	R13
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-virginica, Iris-versicolor}	Iris-virginica	Iris-versicolor	 * 
+	{Iris-setosa}	Iris-setosa
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+Cells
+Cell id	Class	LowerPetalLength	Frequency
+C2	{Iris-setosa}	]-inf;2.4]	38
+C3	{Iris-virginica, Iris-versicolor}	]2.4;+inf[	67
+Confusion matrix
+	{Iris-virginica, Iris-versicolor}	{Iris-setosa}
+]-inf;2.4]	0	38
+]2.4;+inf[	67	0
+
+Rank	R14
+
+Data grid	Unsupervised
+Dimensions
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+SPetalLength	Categorical	Value groups
+	{5, 4, 3, ...}	5	4	3	6	 * 
+	{1}	1
+Cells
+Cell id	LowerPetalLength	SPetalLength	Frequency
+C2	]2.4;+inf[	{5, 4, 3, ...}	67
+C3	]-inf;2.4]	{1}	38
+Confusion matrix
+	]-inf;2.4]	]2.4;+inf[
+{5, 4, 3, ...}	0	67
+{1}	38	0
+
+Rank	R15
+
+Data grid	Unsupervised
+Dimensions
+PetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;4.75]	2.4	4.75
+	]4.75;+inf[	4.75	6.9
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;1.65]	0.75	1.65
+	]1.65;+inf[	1.65	2.5
+Cells
+Cell id	PetalLength	PetalWidth	Frequency
+C1	]-inf;2.4]	]-inf;0.75]	38
+C5	]2.4;4.75]	]0.75;1.65]	27
+C6	]4.75;+inf[	]0.75;1.65]	5
+C9	]4.75;+inf[	]1.65;+inf[	35
+Confusion matrix
+	]-inf;2.4]	]2.4;4.75]	]4.75;+inf[
+]-inf;0.75]	38	0	0
+]0.75;1.65]	0	27	5
+]1.65;+inf[	0	0	35
+
+Rank	R16
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa, Iris-versicolor}	Iris-setosa	Iris-versicolor	 * 
+	{Iris-virginica}	Iris-virginica
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.75]	1.5	1.75
+	]1.75;+inf[	1.75	2.5
+Cells
+Cell id	Class	UpperPetalWidth	Frequency
+C1	{Iris-setosa, Iris-versicolor}	]-inf;1.75]	69
+C2	{Iris-virginica}	]-inf;1.75]	2
+C3	{Iris-setosa, Iris-versicolor}	]1.75;+inf[	1
+C4	{Iris-virginica}	]1.75;+inf[	33
+Confusion matrix
+	{Iris-setosa, Iris-versicolor}	{Iris-virginica}
+]-inf;1.75]	69	2
+]1.75;+inf[	1	33
+
+Rank	R17
+
+Data grid	Unsupervised
+Dimensions
+PetalWidth	Numerical	Intervals
+	]-inf;1.55]	0.1	1.55
+	]1.55;2.05]	1.55	2.05
+	]2.05;+inf[	2.05	2.5
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.55]	1.5	1.55
+	]1.55;2.05]	1.55	2.05
+	]2.05;+inf[	2.05	2.5
+Cells
+Cell id	PetalWidth	UpperPetalWidth	Frequency
+C1	]-inf;1.55]	]-inf;1.55]	67
+C5	]1.55;2.05]	]1.55;2.05]	20
+C9	]2.05;+inf[	]2.05;+inf[	18
+Confusion matrix
+	]-inf;1.55]	]1.55;2.05]	]2.05;+inf[
+]-inf;1.55]	67	0	0
+]1.55;2.05]	0	20	0
+]2.05;+inf[	0	0	18
+
+Rank	R18
+
+Data grid	Unsupervised
+Dimensions
+LowerPetalLength	Numerical	Intervals
+	]-inf;1.45]	1	1.45
+	]1.45;2.4]	1.45	2.4
+	]2.4;+inf[	2.4	3
+PetalLength	Numerical	Intervals
+	]-inf;1.45]	1	1.45
+	]1.45;2.4]	1.45	2.4
+	]2.4;+inf[	2.4	6.9
+Cells
+Cell id	LowerPetalLength	PetalLength	Frequency
+C1	]-inf;1.45]	]-inf;1.45]	17
+C5	]1.45;2.4]	]1.45;2.4]	21
+C9	]2.4;+inf[	]2.4;+inf[	67
+Confusion matrix
+	]-inf;1.45]	]1.45;2.4]	]2.4;+inf[
+]-inf;1.45]	17	0	0
+]1.45;2.4]	0	21	0
+]2.4;+inf[	0	0	67
+
+Rank	R19
+
+Data grid	Unsupervised
+Dimensions
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;1.75]	0.75	1.75
+	]1.75;+inf[	1.75	2.5
+Cells
+Cell id	Class2	PetalWidth	Frequency
+C1	{}	]-inf;0.75]	38
+C3	{}	]0.75;1.75]	2
+C4	{versicolor}	]0.75;1.75]	31
+C5	{}	]1.75;+inf[	33
+C6	{versicolor}	]1.75;+inf[	1
+Confusion matrix
+	{}	{versicolor}
+]-inf;0.75]	38	0
+]0.75;1.75]	2	31
+]1.75;+inf[	33	1
+
+Rank	R20
+
+Data grid	Unsupervised
+Dimensions
+SPetalLength	Categorical	Value groups
+	{5, 4}	5	4
+	{1, 3}	1	3
+	{6}	6	 * 
+SepalLength	Numerical	Intervals
+	]-inf;5.35]	4.3	5.35
+	]5.35;5.85]	5.35	5.85
+	]5.85;7.15]	5.85	7.15
+	]7.15;+inf[	7.15	7.7
+Cells
+Cell id	SPetalLength	SepalLength	Frequency
+C2	{1, 3}	]-inf;5.35]	34
+C4	{5, 4}	]5.35;5.85]	10
+C5	{1, 3}	]5.35;5.85]	12
+C7	{5, 4}	]5.85;7.15]	42
+C12	{6}	]7.15;+inf[	7
+Confusion matrix
+	{5, 4}	{1, 3}	{6}
+]-inf;5.35]	0	34	0
+]5.35;5.85]	10	12	0
+]5.85;7.15]	42	0	0
+]7.15;+inf[	0	0	7
+
+Rank	R21
+
+Data grid	Unsupervised
+Dimensions
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+PetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;4.85]	2.4	4.85
+	]4.85;+inf[	4.85	6.9
+Cells
+Cell id	Class2	PetalLength	Frequency
+C1	{}	]-inf;2.4]	38
+C3	{}	]2.4;4.85]	1
+C4	{versicolor}	]2.4;4.85]	29
+C5	{}	]4.85;+inf[	34
+C6	{versicolor}	]4.85;+inf[	3
+Confusion matrix
+	{}	{versicolor}
+]-inf;2.4]	38	0
+]2.4;4.85]	1	29
+]4.85;+inf[	34	3
+
+Rank	R22
+
+Data grid	Unsupervised
+Dimensions
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;+inf[	0.75	2.5
+Cells
+Cell id	LowerPetalLength	PetalWidth	Frequency
+C1	]-inf;2.4]	]-inf;0.75]	38
+C4	]2.4;+inf[	]0.75;+inf[	67
+Confusion matrix
+	]-inf;2.4]	]2.4;+inf[
+]-inf;0.75]	38	0
+]0.75;+inf[	0	67
+
+Rank	R23
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa}	Iris-setosa
+	{Iris-virginica}	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+SepalLength	Numerical	Intervals
+	]-inf;5.45]	4.3	5.45
+	]5.45;6.15]	5.45	6.15
+	]6.15;+inf[	6.15	7.7
+Cells
+Cell id	Class	SepalLength	Frequency
+C1	{Iris-setosa}	]-inf;5.45]	34
+C3	{Iris-versicolor}	]-inf;5.45]	5
+C4	{Iris-setosa}	]5.45;6.15]	4
+C5	{Iris-virginica}	]5.45;6.15]	5
+C6	{Iris-versicolor}	]5.45;6.15]	19
+C8	{Iris-virginica}	]6.15;+inf[	30
+C9	{Iris-versicolor}	]6.15;+inf[	8
+Confusion matrix
+	{Iris-setosa}	{Iris-virginica}	{Iris-versicolor}
+]-inf;5.45]	34	0	5
+]5.45;6.15]	4	5	19
+]6.15;+inf[	0	30	8
+
+Rank	R24
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+Cells
+Cell id	Class1	Class2	Frequency
+C1	{}	{}	35
+C2	{setosa}	{}	38
+C3	{}	{versicolor}	32
+Confusion matrix
+	{}	{setosa}
+{}	35	38
+{versicolor}	32	0
+
+Rank	R25
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+SepalLength	Numerical	Intervals
+	]-inf;5.45]	4.3	5.45
+	]5.45;+inf[	5.45	7.7
+Cells
+Cell id	Class1	SepalLength	Frequency
+C1	{}	]-inf;5.45]	5
+C2	{setosa}	]-inf;5.45]	34
+C3	{}	]5.45;+inf[	62
+C4	{setosa}	]5.45;+inf[	4
+Confusion matrix
+	{}	{setosa}
+]-inf;5.45]	5	34
+]5.45;+inf[	62	4
+
+Rank	R26
+
+Data grid	Unsupervised
+Dimensions
+SPetalLength	Categorical	Value groups
+	{1, 3}	1	3
+	{5, 6}	5	6	 * 
+	{4}	4
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.55]	1.5	1.55
+	]1.55;+inf[	1.55	2.5
+Cells
+Cell id	SPetalLength	UpperPetalWidth	Frequency
+C1	{1, 3}	]-inf;1.55]	46
+C2	{5, 6}	]-inf;1.55]	2
+C3	{4}	]-inf;1.55]	19
+C5	{5, 6}	]1.55;+inf[	32
+C6	{4}	]1.55;+inf[	6
+Confusion matrix
+	{1, 3}	{5, 6}	{4}
+]-inf;1.55]	46	2	19
+]1.55;+inf[	0	32	6
+
+Rank	R27
+
+Data grid	Unsupervised
+Dimensions
+PetalLength	Numerical	Intervals
+	]-inf;3.55]	1	3.55
+	]3.55;4.6]	3.55	4.6
+	]4.6;5.95]	4.6	5.95
+	]5.95;+inf[	5.95	6.9
+SepalLength	Numerical	Intervals
+	]-inf;5.45]	4.3	5.45
+	]5.45;5.85]	5.45	5.85
+	]5.85;7.15]	5.85	7.15
+	]7.15;+inf[	7.15	7.7
+Cells
+Cell id	PetalLength	SepalLength	Frequency
+C1	]-inf;3.55]	]-inf;5.45]	37
+C2	]3.55;4.6]	]-inf;5.45]	2
+C5	]-inf;3.55]	]5.45;5.85]	5
+C6	]3.55;4.6]	]5.45;5.85]	10
+C7	]4.6;5.95]	]5.45;5.85]	2
+C10	]3.55;4.6]	]5.85;7.15]	7
+C11	]4.6;5.95]	]5.85;7.15]	35
+C16	]5.95;+inf[	]7.15;+inf[	7
+Confusion matrix
+	]-inf;3.55]	]3.55;4.6]	]4.6;5.95]	]5.95;+inf[
+]-inf;5.45]	37	2	0	0
+]5.45;5.85]	5	10	2	0
+]5.85;7.15]	0	7	35	0
+]7.15;+inf[	0	0	0	7
+
+Rank	R28
+
+Data grid	Unsupervised
+Dimensions
+PetalLength	Numerical	Intervals
+	]-inf;4.75]	1	4.75
+	]4.75;+inf[	4.75	6.9
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.65]	1.5	1.65
+	]1.65;+inf[	1.65	2.5
+Cells
+Cell id	PetalLength	UpperPetalWidth	Frequency
+C1	]-inf;4.75]	]-inf;1.65]	65
+C2	]4.75;+inf[	]-inf;1.65]	5
+C4	]4.75;+inf[	]1.65;+inf[	35
+Confusion matrix
+	]-inf;4.75]	]4.75;+inf[
+]-inf;1.65]	65	5
+]1.65;+inf[	0	35
+
+Rank	R29
+
+Data grid	Unsupervised
+Dimensions
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;1.35]	0.75	1.35
+	]1.35;+inf[	1.35	2.5
+SepalLength	Numerical	Intervals
+	]-inf;5.45]	4.3	5.45
+	]5.45;5.85]	5.45	5.85
+	]5.85;+inf[	5.85	7.7
+Cells
+Cell id	PetalWidth	SepalLength	Frequency
+C1	]-inf;0.75]	]-inf;5.45]	34
+C2	]0.75;1.35]	]-inf;5.45]	3
+C3	]1.35;+inf[	]-inf;5.45]	2
+C4	]-inf;0.75]	]5.45;5.85]	4
+C5	]0.75;1.35]	]5.45;5.85]	10
+C6	]1.35;+inf[	]5.45;5.85]	3
+C8	]0.75;1.35]	]5.85;+inf[	5
+C9	]1.35;+inf[	]5.85;+inf[	44
+Confusion matrix
+	]-inf;0.75]	]0.75;1.35]	]1.35;+inf[
+]-inf;5.45]	34	3	2
+]5.45;5.85]	4	10	3
+]5.85;+inf[	0	5	44
+
+Rank	R30
+
+Data grid	Unsupervised
+Dimensions
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+SepalLength	Numerical	Intervals
+	]-inf;5.45]	4.3	5.45
+	]5.45;+inf[	5.45	7.7
+Cells
+Cell id	LowerPetalLength	SepalLength	Frequency
+C1	]-inf;2.4]	]-inf;5.45]	34
+C2	]2.4;+inf[	]-inf;5.45]	5
+C3	]-inf;2.4]	]5.45;+inf[	4
+C4	]2.4;+inf[	]5.45;+inf[	62
+Confusion matrix
+	]-inf;2.4]	]2.4;+inf[
+]-inf;5.45]	34	5
+]5.45;+inf[	4	62
+
+Rank	R31
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.55]	1.5	1.55
+	]1.55;+inf[	1.55	2.5
+Cells
+Cell id	Class1	UpperPetalWidth	Frequency
+C1	{}	]-inf;1.55]	29
+C2	{setosa}	]-inf;1.55]	38
+C3	{}	]1.55;+inf[	38
+Confusion matrix
+	{}	{setosa}
+]-inf;1.55]	29	38
+]1.55;+inf[	38	0
+
+Rank	R32
+
+Data grid	Unsupervised
+Dimensions
+SepalLength	Numerical	Intervals
+	]-inf;5.85]	4.3	5.85
+	]5.85;+inf[	5.85	7.7
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.55]	1.5	1.55
+	]1.55;+inf[	1.55	2.5
+Cells
+Cell id	SepalLength	UpperPetalWidth	Frequency
+C1	]-inf;5.85]	]-inf;1.55]	54
+C2	]5.85;+inf[	]-inf;1.55]	13
+C3	]-inf;5.85]	]1.55;+inf[	2
+C4	]5.85;+inf[	]1.55;+inf[	36
+Confusion matrix
+	]-inf;5.85]	]5.85;+inf[
+]-inf;1.55]	54	13
+]1.55;+inf[	2	36
+
+Rank	R33
+
+Data grid	Unsupervised
+Dimensions
+Class1	Categorical	Value groups
+	{}	
+	{setosa}	setosa	 * 
+SepalWidth	Numerical	Intervals
+	]-inf;2.85]	2	2.85
+	]2.85;3.35]	2.85	3.35
+	]3.35;+inf[	3.35	4.4
+Cells
+Cell id	Class1	SepalWidth	Frequency
+C1	{}	]-inf;2.85]	30
+C3	{}	]2.85;3.35]	32
+C4	{setosa}	]2.85;3.35]	17
+C5	{}	]3.35;+inf[	5
+C6	{setosa}	]3.35;+inf[	21
+Confusion matrix
+	{}	{setosa}
+]-inf;2.85]	30	0
+]2.85;3.35]	32	17
+]3.35;+inf[	5	21
+
+Rank	R34
+
+Data grid	Unsupervised
+Dimensions
+Class2	Categorical	Value groups
+	{}	
+	{versicolor}	versicolor	 * 
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+Cells
+Cell id	Class2	LowerPetalLength	Frequency
+C1	{}	]-inf;2.4]	38
+C3	{}	]2.4;+inf[	35
+C4	{versicolor}	]2.4;+inf[	32
+Confusion matrix
+	{}	{versicolor}
+]-inf;2.4]	38	0
+]2.4;+inf[	35	32
+
+Rank	R35
+
+Data grid	Unsupervised
+Dimensions
+Class	Categorical	Value groups
+	{Iris-setosa}	Iris-setosa
+	{Iris-virginica}	Iris-virginica
+	{Iris-versicolor}	Iris-versicolor	 * 
+SepalWidth	Numerical	Intervals
+	]-inf;2.95]	2	2.95
+	]2.95;+inf[	2.95	4.4
+Cells
+Cell id	Class	SepalWidth	Frequency
+C1	{Iris-setosa}	]-inf;2.95]	1
+C2	{Iris-virginica}	]-inf;2.95]	13
+C3	{Iris-versicolor}	]-inf;2.95]	22
+C4	{Iris-setosa}	]2.95;+inf[	37
+C5	{Iris-virginica}	]2.95;+inf[	22
+C6	{Iris-versicolor}	]2.95;+inf[	10
+Confusion matrix
+	{Iris-setosa}	{Iris-virginica}	{Iris-versicolor}
+]-inf;2.95]	1	13	22
+]2.95;+inf[	37	22	10
+
+Rank	R36
+
+Data grid	Unsupervised
+Dimensions
+LowerPetalLength	Numerical	Intervals
+	]-inf;2.4]	1	2.4
+	]2.4;+inf[	2.4	3
+UpperPetalWidth	Numerical	Intervals
+	]-inf;1.55]	1.5	1.55
+	]1.55;+inf[	1.55	2.5
+Cells
+Cell id	LowerPetalLength	UpperPetalWidth	Frequency
+C1	]-inf;2.4]	]-inf;1.55]	38
+C2	]2.4;+inf[	]-inf;1.55]	29
+C4	]2.4;+inf[	]1.55;+inf[	38
+Confusion matrix
+	]-inf;2.4]	]2.4;+inf[
+]-inf;1.55]	38	29
+]1.55;+inf[	0	38
+
+Rank	R37
+
+Data grid	Unsupervised
+Dimensions
+PetalWidth	Numerical	Intervals
+	]-inf;0.75]	0.1	0.75
+	]0.75;1.45]	0.75	1.45
+	]1.45;+inf[	1.45	2.5
+SepalWidth	Numerical	Intervals
+	]-inf;2.95]	2	2.95
+	]2.95;+inf[	2.95	4.4
+Cells
+Cell id	PetalWidth	SepalWidth	Frequency
+C1	]-inf;0.75]	]-inf;2.95]	1
+C2	]0.75;1.45]	]-inf;2.95]	21
+C3	]1.45;+inf[	]-inf;2.95]	14
+C4	]-inf;0.75]	]2.95;+inf[	37
+C5	]0.75;1.45]	]2.95;+inf[	3
+C6	]1.45;+inf[	]2.95;+inf[	29
+Confusion matrix
+	]-inf;0.75]	]0.75;1.45]	]1.45;+inf[
+]-inf;2.95]	1	21	14
+]2.95;+inf[	37	3	29
+
+Rank	R38
+
+Data grid	Unsupervised
+Dimensions
+SPetalLength	Categorical	Value groups
+	{4, 3, 6}	4	3	6	 * 
+	{1}	1
+	{5}	5
+SepalWidth	Numerical	Intervals
+	]-inf;2.95]	2	2.95
+	]2.95;3.25]	2.95	3.25
+	]3.25;+inf[	3.25	4.4
+Cells
+Cell id	SPetalLength	SepalWidth	Frequency
+C1	{4, 3, 6}	]-inf;2.95]	26
+C2	{1}	]-inf;2.95]	1
+C3	{5}	]-inf;2.95]	9
+C4	{4, 3, 6}	]2.95;3.25]	10
+C5	{1}	]2.95;3.25]	15
+C6	{5}	]2.95;3.25]	16
+C7	{4, 3, 6}	]3.25;+inf[	4
+C8	{1}	]3.25;+inf[	22
+C9	{5}	]3.25;+inf[	2
+Confusion matrix
+	{4, 3, 6}	{1}	{5}
+]-inf;2.95]	26	1	9
+]2.95;3.25]	10	15	16
+]3.25;+inf[	4	22	2
diff --git a/tests/test_core.py b/tests/test_core.py
index 1bab6354..92ded09c 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -70,6 +70,7 @@ def test_analysis_results(self):
             "IrisMAPLegacy",
             "IrisR",
             "IrisU",
+            "IrisU2D",
             "LargeSpiral",
             "Latin",
             "LatinGreek",

From 75e9a89ffe755a85e14607a0f09d406302276c14 Mon Sep 17 00:00:00 2001
From: Thierry RAMORASOAVINA <thierry.ramorasoavina@orange.com>
Date: Mon, 18 Mar 2024 15:42:25 +0100
Subject: [PATCH 13/37] Install conda in the `khiopspydev` docker image of all
 supported python versions

- the purpose is to run the unit tests on the supported python versions from 3.8 to 3.12
- miniconda3 was chosen to keep the docker image light
- the conda envs follow this name pattern py$version (where version is the python3 version)
- installation of packages & run within a conda env is performed without activating it (this saves us from annoying little issues)
---
 .github/workflows/dev-docker.yml              |  6 +++
 .github/workflows/unit-tests.yml              | 39 ++++++++++++-------
 packaging/docker/khiopspydev/Dockerfile.rocky | 18 ++++++++-
 .../docker/khiopspydev/Dockerfile.ubuntu      | 16 +++++++-
 4 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/dev-docker.yml b/.github/workflows/dev-docker.yml
index 41265080..522662fd 100644
--- a/.github/workflows/dev-docker.yml
+++ b/.github/workflows/dev-docker.yml
@@ -3,6 +3,7 @@ name: Dev Docker
 env:
   DEFAULT_KHIOPS_REVISION: main
   DEFAULT_SERVER_REVISION: main
+  DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12
 on:
   pull_request:
     paths: [packaging/docker/khiopspydev/Dockerfile.*, .github/workflows/dev-docker.yml]
@@ -20,6 +21,10 @@ on:
         type: boolean
         default: true
         description: Push to GH Registry
+      python-versions:
+        type: string
+        default: 3.8 3.9 3.10 3.11 3.12
+        description: Python versions supported by khiops-python
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
@@ -60,6 +65,7 @@ jobs:
             "KHIOPS_REVISION=${{ env.KHIOPS_REVISION }}"
             "KHIOPSDEV_OS=${{ matrix.khiopsdev-os }}"
             "SERVER_REVISION=${{ env.SERVER_REVISION }}"
+            "PYTHON_VERSIONS=${{ inputs.python-versions || env.DEFAULT_PYTHON_VERSIONS }}"
           tags: ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.khiopsdev-os }}:latest
           # Push only on manual request
           push: ${{ inputs.push || false }}
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 296ee95a..9254cd82 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -20,6 +20,10 @@ concurrency:
 jobs:
   run:
     runs-on: ubuntu-22.04
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
     container:
       image: ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:latest
       credentials:
@@ -55,14 +59,19 @@ jobs:
       - name: Setup and Install Test Requirements
         if: success() || failure()
         run: |
-          mkdir -p -m u+rwx ${{ github.workspace }}/reports
-          pip install unittest-xml-reporting
-          pip install -r test-requirements.txt
+          mkdir -p -m u+rwx reports/py${{ matrix.python-version }}            
+          # install within the conda environment without activating it 
+          /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }} -c conda-forge unittest-xml-reporting
+          /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }} --file test-requirements.txt
       - name: Install khiops-python dependencies
         if: success() || failure()
         run: |
-          python setup.py egg_info
-          pip install `grep -v "^\[" khiops.egg-info/requires.txt`
+          # The following git command is required, 
+          # as the Git repository is in a directory the current user does not own,
+          # Python versioneer fails to compute the current version correctly otherwise
+          git config --global --add safe.directory $(realpath .)        
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} python setup.py egg_info
+          /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }} `grep -v "^\[" khiops.egg-info/requires.txt`
           rm -rf khiops.egg-info
       - name: Prepare Unit Tests Environment
         if: github.ref != 'dev' && github.ref != 'main' && ! inputs.run-long-tests
@@ -77,23 +86,23 @@ jobs:
           # This is needed so that the Git tag is parsed and the khiops-python
           # version is retrieved
           git config --global --add safe.directory $(realpath .)
-          coverage run -m xmlrunner -o "reports" -v
-          coverage report -m
-          coverage xml -o "reports/py-coverage.xml"
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} coverage run -m xmlrunner -o "reports/py${{ matrix.python-version }}" -v
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} coverage report -m
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} coverage xml -o "reports/py${{ matrix.python-version }}/py-coverage.xml"
       - name: Display Unit Test Reports
         uses: dorny/test-reporter@v1
         with:
-          name: Unit Tests
-          path: reports/TEST-tests.*.*.xml
+          name: Unit Tests ${{ matrix.python-version }}
+          path: reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml
           reporter: java-junit
           path-replace-backslashes: 'true'  # Necessary for windows paths
       - name: Upload Test Reports as Artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: test-reports
+          name: test-reports-${{ matrix.python-version }}
           path: |-
-            reports/TEST-tests.*.*.xml
-            reports/py-coverage.xml
+            reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml
+            reports/py${{ matrix.python-version }}/py-coverage.xml
             tests/resources/scenario_generation/*/ref/*._kh
             tests/resources/scenario_generation/*/output/*._kh
             tests/resources/*/output_reports/*.txt
@@ -128,6 +137,10 @@ jobs:
           fetch-depth: 0
       - name: Install khiops-python dev dependencies
         run: |
+          # The following git command is required, 
+          # as the Git repository is in a directory the current user does not own,
+          # Python versioneer fails to compute the current version correctly otherwise
+          git config --global --add safe.directory $(realpath .)
           python setup.py egg_info
           pip install `grep -v "^\[" khiops.egg-info/requires.txt`
           rm -rf khiops.egg-info
diff --git a/packaging/docker/khiopspydev/Dockerfile.rocky b/packaging/docker/khiopspydev/Dockerfile.rocky
index df4d3dc9..7eef216e 100644
--- a/packaging/docker/khiopspydev/Dockerfile.rocky
+++ b/packaging/docker/khiopspydev/Dockerfile.rocky
@@ -8,8 +8,9 @@ LABEL description="Container for the development of khiops-python"
 # Reuse KHIOPSDEV_OS from previous stage
 ARG KHIOPSDEV_OS
 ARG KHIOPS_REVISION
-
-# Install dev tools; build and install Khiops; set mpich as the default MPI
+# - Install dev tools and miniconda3 (for the unit tests)
+# - Build and install Khiops
+# - Set mpich as the default MPI
 RUN true \
   && useradd -rm -d /home/rocky -s /bin/bash -g root -u 1000 rocky \
   # Install git (for khiops-python version calculation), pandoc and pip \
@@ -18,6 +19,7 @@ RUN true \
   && dnf install --enablerepo=devel -y \
     git \
     pandoc \
+    wget \
   # Install Python 3.11 if on Rocky 8 \
   && if [ "$KHIOPSDEV_OS" = "rocky8" ]; then \
        dnf install -y \
@@ -55,11 +57,23 @@ RUN true \
        alternatives --install /usr/bin/python python /usr/bin/python3 1 \
        && alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 ; \
      fi \
+    # Install miniconda3 to have multiple Python versions via Conda \
+    && mkdir -p /root/miniconda3 && cd /root/miniconda3 \
+    && wget https://repo.anaconda.com/miniconda/Miniconda3-py312_24.1.2-0-Linux-x86_64.sh -O ./Miniconda3-py312_24.1.2-0-Linux-x86_64.sh \
+    && echo "b978856ec3c826eb495b60e3fffe621f670c101150ebcbdeede4f961f22dc438 Miniconda3-py312_24.1.2-0-Linux-x86_64.sh" | sha256sum --check \
+    && bash ./Miniconda3-py312_24.1.2-0-Linux-x86_64.sh -b -u -p /root/miniconda3 \
+    && rm -rf /root/miniconda3/Miniconda3-py312_24.1.2-0-Linux-x86_64.sh \
   # Clean build files \
   && dnf clean all \
   && rm -rf ./khiops \
   && true
 
+# set up all the supported Python environments under conda (for the unit tests)
+# relying on a variable containing all the versions
+ARG PYTHON_VERSIONS
+RUN for version in ${PYTHON_VERSIONS}; \
+    do /root/miniconda3/bin/conda create -y -n py${version} python=${version}; done
+
 RUN mkdir -p /scripts
 COPY ./run_service.sh /scripts/run_service.sh
 RUN chmod +x /scripts/run_service.sh
diff --git a/packaging/docker/khiopspydev/Dockerfile.ubuntu b/packaging/docker/khiopspydev/Dockerfile.ubuntu
index 00f395cf..493f5f5d 100644
--- a/packaging/docker/khiopspydev/Dockerfile.ubuntu
+++ b/packaging/docker/khiopspydev/Dockerfile.ubuntu
@@ -5,12 +5,12 @@ FROM ghcr.io/khiopsml/khiops/khiopsdev-${KHIOPSDEV_OS}:latest AS khiopsdev
 LABEL maintainer="khiops.team@orange.com"
 LABEL description="Container for the development of khiops-python"
 
-# Install dev tools; build and install Khiops
+# Install dev tools and miniconda3 (for the unit tests); build and install Khiops
 ARG KHIOPS_REVISION
 RUN true \
   # Install git (for khiops-python version calculation) and pip \
   && apt-get -y update \
-  && apt-get -y --no-install-recommends install git python3-pip zip pandoc \
+  && apt-get -y --no-install-recommends install git python3-pip zip pandoc wget \
   # Obtain the Khiops sources \
   && git clone https://github.com/khiopsml/khiops.git \
   && cd khiops \
@@ -24,6 +24,12 @@ RUN true \
   && cd .. \
   # Set python to python3 \
   && update-alternatives --install /usr/bin/python python /usr/bin/python3 1 \
+  # Install miniconda3 to have multiple Python versions via Conda \
+  && mkdir -p /root/miniconda3 && cd /root/miniconda3 \
+  && wget https://repo.anaconda.com/miniconda/Miniconda3-py312_24.1.2-0-Linux-x86_64.sh -O ./Miniconda3-py312_24.1.2-0-Linux-x86_64.sh \
+  && echo "b978856ec3c826eb495b60e3fffe621f670c101150ebcbdeede4f961f22dc438 Miniconda3-py312_24.1.2-0-Linux-x86_64.sh" | sha256sum --check \
+  && bash ./Miniconda3-py312_24.1.2-0-Linux-x86_64.sh -b -u -p /root/miniconda3 \
+  && rm -rf /root/miniconda3/Miniconda3-py312_24.1.2-0-Linux-x86_64.sh \
   # Make sure that MPI is mpich \
   && update-alternatives --set mpirun /usr/bin/mpirun.mpich \
   # Clean build files \
@@ -32,6 +38,12 @@ RUN true \
   && rm -rf ./khiops \
   && true
 
+# set up all the supported Python environments under conda (for the unit tests)
+# relying on a variable containing all the versions
+ARG PYTHON_VERSIONS
+RUN for version in ${PYTHON_VERSIONS}; \
+    do /root/miniconda3/bin/conda create -y -n py${version} python=${version}; done
+
 RUN mkdir -p /scripts
 COPY ./run_service.sh /scripts/run_service.sh
 RUN chmod +x /scripts/run_service.sh && \

From 3c39334f7b23dcb3cd05482ea69c5611f1f1742c Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Tue, 16 Apr 2024 11:25:53 +0200
Subject: [PATCH 14/37] Fix Python 3.8 test failures

In Python 3.8, `scipy.sparse.lil_matrix` rows contain arrays of Python lists.
If empty row, it contains a single-element array with an empty Python
list element; its numpy array data has size 1.
This patch tests directly on the size of the row and flattens the rows
to ensure a homogeneous treatment.
---
 khiops/sklearn/tables.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/khiops/sklearn/tables.py b/khiops/sklearn/tables.py
index a20631fe..aa2c44b2 100644
--- a/khiops/sklearn/tables.py
+++ b/khiops/sklearn/tables.py
@@ -9,7 +9,7 @@
 import io
 import warnings
 from abc import ABC, abstractmethod
-from collections.abc import Mapping, Sequence
+from collections.abc import Iterable, Mapping, Sequence
 
 import numpy as np
 import pandas as pd
@@ -1409,6 +1409,14 @@ def get_khiops_variable_name(self, column_id):
             variable_name = f"Var{column_id}"
         return variable_name
 
+    def _flatten(self, iterable):
+        if isinstance(iterable, Iterable):
+            for iterand in iterable:
+                if isinstance(iterand, Iterable):
+                    yield from self._flatten(iterand)
+                else:
+                    yield iterand
+
     def _write_sparse_block(self, row_index, stream, target=None):
         assert row_index in range(
             self.matrix.shape[0]
@@ -1420,7 +1428,7 @@ def _write_sparse_block(self, row_index, stream, target=None):
         # Empty row in the sparse matrix: use the first variable as missing data
         # TODO: remove this part once Khiops bug
         # https://github.com/KhiopsML/khiops/issues/235 is solved
-        if row.data.size == 0:
+        if row.size == 0:
             for variable_index in self.column_ids:
                 stream.write(f"{variable_index + 1}: ")
                 break
@@ -1428,8 +1436,19 @@ def _write_sparse_block(self, row_index, stream, target=None):
         else:
             # Variable indices are not always sorted in `row.indices`
             # Khiops needs variable indices to be sorted
-            sorted_indices = np.sort(row.indices, axis=-1, kind="mergesort")
-            sorted_data = row.data[sorted_indices.argsort()]
+            sorted_indices = np.sort(row.nonzero()[1], axis=-1, kind="mergesort")
+
+            # Flatten row for Python < 3.9 scipy.sparse.lil_matrix whose API
+            # is not homogeneous with other sparse matrices: it stores
+            # opaque Python lists as elements
+            # Thus:
+            # - if isinstance(self.matrix, sp.lil_matrix) and Python 3.8, then
+            # row.data is np.array([list([...])])
+            # - else, row.data is np.array([...])
+            # TODO: remove this flattening once Python 3.8 support is dropped
+            sorted_data = np.fromiter(self._flatten(row.data), row.data.dtype)[
+                sorted_indices.argsort()
+            ]
             for variable_index, variable_value in zip(sorted_indices, sorted_data):
                 stream.write(f"{variable_index + 1}:{variable_value} ")
         stream.write("\n")

From 2bbeb1fca5ca530d80c5446ee21d3a9f04510e10 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Wed, 17 Apr 2024 16:28:28 +0200
Subject: [PATCH 15/37] Print download URL in kh-download-datasets

---
 khiops/tools.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/khiops/tools.py b/khiops/tools.py
index 3880e3e3..58d05e87 100644
--- a/khiops/tools.py
+++ b/khiops/tools.py
@@ -117,7 +117,7 @@ def kh_download_datasets_entry_point():
 
 
 def download_datasets(
-    force_overwrite=False, version="10.1.1", _called_from_shell=False
+    force_overwrite=False, version="10.2.0", _called_from_shell=False
 ):
     """Downloads the Khiops sample datasets for a given version
 
@@ -129,7 +129,7 @@ def download_datasets(
     ==========
     force_overwrite : bool, default ``False``
         If ``True`` it always overwrites the local samples directory even if it exists.
-    version : str, default "10.1.1"
+    version : str, default "10.2.0"
         The version of the samples datasets.
     """
     # Note: The hidden parameter _called_from_shell is just to change the user messages.
@@ -159,6 +159,7 @@ def download_datasets(
         )
 
         # Download the sample zip file and extracted to the home dataset dir
+        print(f"Downloading samples from {samples_zip_url}")
         with tempfile.NamedTemporaryFile() as temp_zip_file, urllib.request.urlopen(
             samples_zip_url
         ) as zip_request:

From 6336cf7394463b39e56dd1f2ab0674e406403d98 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Wed, 17 Apr 2024 16:47:51 +0200
Subject: [PATCH 16/37] Fix download samples dataset version in CI

---
 .github/workflows/conda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 6da1f3df..fbcef45c 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -109,7 +109,7 @@ jobs:
       - name: Download Sample Datasets
         run: |
           kh-download-datasets \
-            --version ${{ inputs.khiops-core-version || env.DEFAULT_SAMPLES_VERSION }}
+            --version ${{ inputs.khiops-samples-version || env.DEFAULT_SAMPLES_VERSION }}
       - name: Test Conda Package Installation on Samples
         run: |
           kh-samples core -i train_predictor -e

From e83eab1f64f375c30efd1581c2bce0331e517e39 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Mon, 22 Apr 2024 10:20:09 +0200
Subject: [PATCH 17/37] Make samples dir check be executed only on access

---
 khiops/core/internals/runner.py | 119 +++++++++++++++++---------------
 1 file changed, 62 insertions(+), 57 deletions(-)

diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index 83181012..846469e3 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -44,6 +44,55 @@ def _isdir_without_all_perms(dir_path):
     )
 
 
+def get_dir_status(a_dir):
+    """Returns the status of a local or remote directory
+
+    Against a local directory a real check is performed. A remote directory is detected
+    but not checked.
+    """
+    if fs.is_local_resource(a_dir):
+        # Remove initial slash on windows systems
+        # urllib's url2pathname does not work properly
+        a_dir_res = fs.create_resource(os.path.normpath(a_dir))
+        a_dir_path = a_dir_res.uri_info.path
+        if platform.system() == "Windows":
+            if a_dir_path.startswith("/"):
+                a_dir_path = a_dir_path[1:]
+
+        if not os.path.exists(a_dir_path):
+            status = "non-existent"
+        elif not os.path.isdir(a_dir_path):
+            status = "not-a-dir"
+        else:
+            status = "ok"
+    else:
+        status = "remote-path"
+
+    assert status in ["non-existent", "not-a-dir", "ok", "remote-path"]
+    return status
+
+
+def check_samples_dir(samples_dir):
+    # Warn if there are problems with the samples_dir
+    samples_dir_status = get_dir_status(samples_dir)
+    download_msg = (
+        "Execute the kh-download-datasets script or "
+        "the khiops.tools.download_datasets function to download them."
+    )
+    if samples_dir_status == "non-existent":
+        warnings.warn(
+            "Sample datasets location does not exist "
+            f"({samples_dir}). {download_msg}",
+            stacklevel=3,
+        )
+    elif samples_dir_status == "not-a-dir":
+        warnings.warn(
+            "Sample datasets location is not a directory "
+            f"({samples_dir}). {download_msg}",
+            stacklevel=3,
+        )
+
+
 def _extract_path_from_uri(uri):
     res = fs.create_resource(uri)
     if platform.system() == "Windows":
@@ -69,30 +118,6 @@ def _extract_path_from_uri(uri):
     return path
 
 
-def _dir_status(a_dir):
-    """Returns the status of a local or remote directory"""
-    if fs.is_local_resource(a_dir):
-        # Remove initial slash on windows systems
-        # urllib's url2pathname does not work properly
-        a_dir_res = fs.create_resource(os.path.normpath(a_dir))
-        a_dir_path = a_dir_res.uri_info.path
-        if platform.system() == "Windows":
-            if a_dir_path.startswith("/"):
-                a_dir_path = a_dir_path[1:]
-
-        if not os.path.exists(a_dir_path):
-            status = "non-existent"
-        elif not os.path.isdir(a_dir_path):
-            status = "not-a-dir"
-        else:
-            status = "ok"
-    else:
-        status = "remote-path"
-
-    assert status in ["non-existent", "not-a-dir", "ok", "remote-path"]
-    return status
-
-
 def _get_system_cpu_cores():
     """Portably obtains the number of cpu cores (no hyperthreading)"""
     # Set the cpu info command and arguments for each platform
@@ -969,6 +994,7 @@ def __init__(self):
         self._khiops_bin_dir = None
         self._khiops_version = None
         self._samples_dir = None
+        self._samples_dir_checked = False
 
         # Call parent constructor
         super().__init__()
@@ -1013,9 +1039,8 @@ def _start_khiops_environment_initialization(self):
         else:
             self.khiops_temp_dir = ""
 
-        # Initialize and check the default samples dir
+        # Initialize the default samples dir
         self._initialize_default_samples_dir()
-        self._check_samples_dir()
 
     def _initialize_mpi_command_args(self):
         """Creates the mpiexec call arguments for each platform"""
@@ -1188,10 +1213,12 @@ def _initialize_default_samples_dir(self):
                 )
             else:
                 public_samples_dir = None
-            if public_samples_dir is not None and _dir_status(public_samples_dir) in [
-                "ok",
-                "remote",
-            ]:
+
+            ok_statuses = ["ok", "remote"]
+            if (
+                public_samples_dir is not None
+                and get_dir_status(public_samples_dir) in ok_statuses
+            ):
                 self._samples_dir = public_samples_dir
             else:
                 self._samples_dir = str(home_samples_dir)
@@ -1203,32 +1230,6 @@ def _initialize_default_samples_dir(self):
 
         assert self._samples_dir is not None
 
-    def _check_samples_dir(self, samples_dir=None):
-        # Check the runners samples_dir if samples_dir is not specified
-        if samples_dir is None:
-            samples_dir_to_check = self._samples_dir
-        else:
-            samples_dir_to_check = samples_dir
-
-        # Warn if there are problems with the samples_dir
-        samples_dir_status = _dir_status(samples_dir_to_check)
-        download_msg = (
-            "Execute the kh-download-datasets script or "
-            "the khiops.tools.download_datasets function to download them."
-        )
-        if samples_dir_status == "non-existent":
-            warnings.warn(
-                "Sample datasets location does not exist "
-                f"({samples_dir_to_check}). {download_msg}",
-                stacklevel=3,
-            )
-        elif samples_dir_status == "not-a-dir":
-            warnings.warn(
-                "Sample datasets location is not a directory "
-                f"({samples_dir_to_check}). {download_msg}",
-                stacklevel=3,
-            )
-
     def _finish_khiops_environment_initialization(self):
         # Initialize Khiops binary directory
         self._initialize_khiops_bin_dir()
@@ -1428,10 +1429,14 @@ def _tool_path(self, tool_name):
 
     def _set_samples_dir(self, samples_dir):
         """Checks and sets the samples directory"""
-        self._check_samples_dir(samples_dir)
+        check_samples_dir(samples_dir)
         super()._set_samples_dir(samples_dir)
 
     def _get_samples_dir(self):
+        # Check the samples dir once (the check emmits only warnings)
+        if not self._samples_dir_checked:
+            check_samples_dir(self._samples_dir)
+            self._samples_dir_checked = True
         return self._samples_dir
 
     def raw_run(self, tool_name, command_line_args=None, use_mpi=True, trace=False):

From a1ba662d8dce1694938ab7e24b08f040303da489 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Mon, 29 Apr 2024 15:08:57 +0200
Subject: [PATCH 18/37] Add datasets samples check in the release checklist

---
 CONTRIBUTING.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5e639bdb..98e43bcf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -261,6 +261,7 @@ Checklist:
   - Update the API Docs if necessary
   - Update `CHANGELOG.md`
   - Update the default `khiops-core` version in [.github/workflows/conda.yml]
+  - Update the default value for `version` in the `download_datasets` function in [khiops/tools.py]
 - Git manipulations
   - Update your local repo and save your work:
     - `git stash # if necessary`

From ec8a3b255982de42001e8a14c86bc3a973b5dcd9 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Thu, 6 Jun 2024 09:59:43 +0200
Subject: [PATCH 19/37] Simplify error reporting

Also: Modify an integration test to restore the runners state when there
are unexpected errors.
---
 khiops/core/internals/runner.py   | 114 +++++++++++++-----------------
 tests/test_khiops_integrations.py |  70 ++++++++++--------
 2 files changed, 89 insertions(+), 95 deletions(-)

diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index 846469e3..9f1bba75 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -741,74 +741,60 @@ def _report_exit_status(
     ):
         """Reports the exit status of a Khiops execution"""
         # Note:
-        #   We report stdout and stderr in both branches below because we use a log file
-        #   and thus normally Khiops doesn't write anything to these streams. In
-        #   practice MPI and the remote filesystems plugins may write to them to report
-        #   anomalies.
-
-        # If the execution was correct, warn and report:
-        # - the stdout if it was not empty
-        # - the stderr if it was not empty
-        # - any warnings found in the log
-        if return_code == 0:
-            # Add Khiops log warnings to the warning message if any
-            warning_msg = ""
-            _, _, warning_messages = self._collect_errors(log_file_path)
-            if warning_messages:
-                warning_msg += "Warnings in log:\n" + "".join(warning_messages)
-
-            # Add stdout to the warning message if non empty
-            if stdout:
-                if warning_msg:
-                    warning_msg += "\n"
-                warning_msg += f"Contents of stdout:\n{stdout}"
-
-            # Add stderr to the warning message if non empty
-            if stderr:
-                if warning_msg:
-                    warning_msg += "\n"
-                warning_msg += f"Contents of stderr:\n{stderr}"
-
-            # Report the message if there were any
-            if warning_msg:
-                warning_msg = (
-                    "Khiops ended correctly but there were minor issues: " + warning_msg
-                )
-                warnings.warn(warning_msg.rstrip(), stacklevel=4)
-        # If the execution was incorrect raise an exception reporting:
+        #   We report stdout and stderr below because we use a log file and thus
+        #   normally Khiops doesn't write anything to these streams. In practice MPI and
+        #   the remote filesystems plugins may write to them to report anomalies.
+
+        # Report messages:
         # - The warnings in the log
         # - The errors and/or fatal errors in the log
         # - The stdout if not empty
         # - The stderr if not empty
-        else:
-            # Collect errors and warnings
-            errors, fatal_errors, warning_messages = self._collect_errors(log_file_path)
-
-            # Create the message reporting the errors
-            error_msg = ""
-            if warning_messages:
-                error_msg += "Warnings in log:\n" + "".join(warning_messages)
-            if errors:
-                if error_msg:
-                    error_msg += "\n"
-                error_msg += "Errors in log:\n" + "".join(errors)
-            if fatal_errors:
-                if error_msg:
-                    error_msg += "\n"
-                error_msg += "Fatal errors in log:\n" + "".join(fatal_errors)
-            if stdout:
-                if error_msg:
-                    error_msg += "\n"
-                error_msg += f"Contents of stdout:\n{stdout}"
-            if stderr:
-                if error_msg:
-                    error_msg += "\n"
-                error_msg += f"Contents of stderr:\n{stderr}"
-
-            # Raise an exception with the errors
-            raise KhiopsRuntimeError(
-                f"{tool_name} ended with return code {return_code}\n{error_msg}"
-            )
+        #
+        # If there were any errors (fatal or not) or the return code is non-zero the
+        # reporting is via an exception. Otherwise we show the message as a warning.
+        #
+
+        # Create the message reporting the errors and warnings
+        error_msg = ""
+        errors, fatal_errors, warning_messages = self._collect_errors(log_file_path)
+        if warning_messages:
+            error_msg += "Warnings in log:\n" + "".join(warning_messages)
+        if errors:
+            if error_msg:
+                error_msg += "\n"
+            error_msg += "Errors in log:\n" + "".join(errors)
+        if fatal_errors:
+            if error_msg:
+                error_msg += "\n"
+            error_msg += "Fatal errors in log:\n" + "".join(fatal_errors)
+
+        # Add stdout to the warning message if non empty
+        if stdout:
+            if error_msg:
+                error_msg += "\n"
+            error_msg += f"Contents of stdout:\n{stdout}"
+
+        # Add stderr to the warning message if non empty
+        if stderr:
+            if error_msg:
+                error_msg += "\n"
+            error_msg += f"Contents of stderr:\n{stderr}"
+
+        # Report the message to the user if there were any
+        if error_msg:
+            # Raise an exception if there were errors
+            if errors or fatal_errors or return_code != 0:
+                raise KhiopsRuntimeError(
+                    f"{tool_name} execution had errors (return code {return_code}):\n"
+                    f"{error_msg}"
+                )
+            # Otherwise show the message as a warning
+            else:
+                error_msg = (
+                    f"Khiops ended correctly but there were minor issues:\n{error_msg}"
+                )
+                warnings.warn(error_msg.rstrip())
 
     def _collect_errors(self, log_file_path):
         # Collect errors any errors found in the log
diff --git a/tests/test_khiops_integrations.py b/tests/test_khiops_integrations.py
index abc5d404..20d62094 100644
--- a/tests/test_khiops_integrations.py
+++ b/tests/test_khiops_integrations.py
@@ -138,38 +138,46 @@ def test_runner_with_custom_khiops_binary_directory(self):
         # Get default runner
         default_runner = kh.get_runner()
 
-        # Create a fresh local runner and initialize its default Khiops binary dir
-        runner = KhiopsLocalRunner()
-        runner._initialize_khiops_bin_dir()
-
-        # Get runner's default Khiops binary directory
-        default_bin_dir = runner.khiops_bin_dir
+        # Test in a try block to restore the runner if there are unexpected errors
+        try:
+            # Create a fresh local runner and initialize its default Khiops binary dir
+            runner = KhiopsLocalRunner()
+            runner._initialize_khiops_bin_dir()
+
+            # Get runner's default Khiops binary directory
+            default_bin_dir = runner.khiops_bin_dir
+
+            # Create temporary directory
+            with tempfile.TemporaryDirectory() as tmp_khiops_bin_dir:
+                # Copy Khiops binaries into the temporary directory
+                for binary_file in os.listdir(default_bin_dir):
+                    if binary_file.startswith("MODL"):
+                        shutil.copy(
+                            os.path.join(default_bin_dir, binary_file),
+                            os.path.join(tmp_khiops_bin_dir, binary_file),
+                        )
+
+                # Change runner's Khiops binary directory to the temporary directory
+                runner.khiops_bin_dir = tmp_khiops_bin_dir
+
+                # Set current runner to the fresh runner
+                kh.set_runner(runner)
+
+                # Test the core API works
+                # Call check_database (could be any other method)
+                with self.assertRaises(kh.KhiopsRuntimeError) as cm:
+                    kh.check_database("a.kdic", "dict_name", "data.txt")
+
+                # Test that MODL executable can be found and launched
+                # Note: The return code is not specified to support older khiops
+                # versions that returned 2 instead of 0 in this case.
+                self.assertIn(
+                    "khiops execution had errors (return code ", str(cm.exception)
+                )
 
-        # Create temporary directory
-        with tempfile.TemporaryDirectory() as tmp_khiops_bin_dir:
-            # Copy Khiops binaries into the temporary directory
-            for binary_file in os.listdir(default_bin_dir):
-                if binary_file.startswith("MODL"):
-                    shutil.copy(
-                        os.path.join(default_bin_dir, binary_file),
-                        os.path.join(tmp_khiops_bin_dir, binary_file),
-                    )
-
-            # Change runner's Khiops binary directory to the temporary directory
-            runner.khiops_bin_dir = tmp_khiops_bin_dir
-
-            # Set current runner to the fresh runner
-            kh.set_runner(runner)
-
-            # Test the core API works
-            # Call check_database (could be any other method)
-            with self.assertRaises(kh.KhiopsRuntimeError) as cm:
-                kh.check_database("a.kdic", "dict_name", "data.txt")
-            # Test that MODL executable can be found and launched
-            self.assertIn("khiops ended with return code 2", str(cm.exception))
-
-        # Set current runner to the default runner
-        kh.set_runner(default_runner)
+        # Always set back to the default runner
+        finally:
+            kh.set_runner(default_runner)
 
 
 class KhiopsMultitableFitTests(unittest.TestCase):

From 63aebdf3ca897da2fa0af4b2a13952e913fc5c08 Mon Sep 17 00:00:00 2001
From: Felipe Olmos <92923444+folmos-at-orange@users.noreply.github.com>
Date: Wed, 5 Jun 2024 14:48:21 +0200
Subject: [PATCH 20/37] Admit periods in pre-release version token

---
 khiops/core/internals/version.py | 23 ++++++++++++++++-------
 tests/test_core.py               |  8 +++++++-
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/khiops/core/internals/version.py b/khiops/core/internals/version.py
index 59843701..e4303762 100644
--- a/khiops/core/internals/version.py
+++ b/khiops/core/internals/version.py
@@ -36,12 +36,12 @@ def __init__(self, version_str):
         self._version_str = version_str
 
         # Remove the "v" prefix if present
-        raw_parts = re.sub("^v", "", self._version_str).split(".")
+        raw_parts = re.sub("^v", "", self._version_str).split(".", maxsplit=2)
 
         # Check the Khiops version format: MAJOR.MINOR.PATCH[-PRE_RELEASE]
-        if len(raw_parts) != 3:
+        if len(raw_parts) < 3:
             self._raise_init_error(
-                "Version must have the format " "MAJOR.MINOR.PATCH[-PRE_RELEASE]",
+                "Version must have the format MAJOR.MINOR.PATCH[-PRE_RELEASE]",
                 version_str,
             )
         self._major, self._minor, patch_and_pre_release = raw_parts
@@ -73,6 +73,11 @@ def __init__(self, version_str):
                     "PATCH-PRE_RELEASE version part must contain a single '-'",
                     version_str,
                 )
+            if patch_and_pre_release.count(".") > 1:
+                self._raise_init_error(
+                    "PATCH-PRE_RELEASE version part must contain at most a single '.'",
+                    version_str,
+                )
             self._patch, _pre_release = patch_and_pre_release.split("-")
 
             # Store only the patch version part if there are only digits
@@ -93,7 +98,11 @@ def __init__(self, version_str):
                 )
 
             # Store the rest of the prelease (if any) and check it is a number
-            self._pre_release_increment = _pre_release.replace(self._pre_release_id, "")
+            # We accept not having a "." in the pre-release increment for backward
+            # compatibility.
+            self._pre_release_increment = _pre_release.replace(
+                self._pre_release_id, ""
+            ).replace(".", "")
             if _is_simple_number(self._pre_release_increment):
                 self._pre_release_increment = int(self._pre_release_increment)
             else:
@@ -102,7 +111,7 @@ def __init__(self, version_str):
                 )
 
     def _raise_init_error(self, msg, version_str):
-        raise ValueError(f"{msg}. Version string: {version_str}.")
+        raise ValueError(f"{msg}. Version string: '{version_str}'.")
 
     @property
     def major(self):
@@ -123,12 +132,12 @@ def patch(self):
     def pre_release(self):
         """str : The version's pre-release tag
 
-        Returns: either 'a', 'b' or 'rc' followed by a number or None.
+        Returns: either 'a', 'b' or 'rc' followed by '.' and a number or None.
         """
         if self._pre_release_id is None:
             return None
         else:
-            return f"{self._pre_release_id}{self._pre_release_increment}"
+            return f"{self._pre_release_id}.{self._pre_release_increment}"
 
     def __repr__(self):
         return self._version_str
diff --git a/tests/test_core.py b/tests/test_core.py
index 92ded09c..1e6a9e13 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2160,6 +2160,7 @@ def test_version_comparisons(self):
             "9.0.1",
             "9.5.1-a1",
             "9.5.1-a2",
+            "9.5.1-a.3",
             "9.5.1",
             "10.0.0",
             "10.0.1",
@@ -2193,11 +2194,16 @@ def test_version_comparisons(self):
     def test_invalid_versions(self):
         """Test invalid versions"""
         for version in [
+            "a.b.c-4",
+            "...",
+            ".0.4",
             "ver10.0.0",
             "10",
             "10.0",
-            "10i.4.0",
+            "10.4.0-5.4," "10i.4.0",
             "10.4b.3",
+            "10.4.1-b..2",
+            "10.4.1.-b.",
             "10.2.@",
             "10.@.2",
             "10.1.2b",

From aea667fbe88a097b9223b24cf561d5c38979188e Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 12 Jun 2024 19:03:02 +0200
Subject: [PATCH 21/37] Update Docker dev containers

- use OpenMPI on native installations
- get Khiops binaries either as native packages or as Conda packages

related_to #183
---
 packaging/docker/khiopspydev/Dockerfile.rocky | 36 ++++++++++---------
 .../docker/khiopspydev/Dockerfile.ubuntu      | 32 +++++++++--------
 2 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/packaging/docker/khiopspydev/Dockerfile.rocky b/packaging/docker/khiopspydev/Dockerfile.rocky
index 7eef216e..6001b806 100644
--- a/packaging/docker/khiopspydev/Dockerfile.rocky
+++ b/packaging/docker/khiopspydev/Dockerfile.rocky
@@ -31,23 +31,21 @@ RUN true \
        python3-setuptools \
        python3-pip ; \
      fi \
-  # Obtain the Khiops sources \
-  && git clone https://github.com/khiopsml/khiops.git \
-  && cd khiops \
-  && git checkout ${KHIOPS_REVISION} \
-  # Make sure that MPI is mpich \
+  # Get Linux distribution codename \
+  && if [ -f /etc/os-release ]; then . /etc/os-release; fi \
+  && IFS='.' read -ra VERSION <<< "$VERSION_ID" \
+  && ROCKY_VERSION=${VERSION[0]} \
+  # Obtain the Khiops native package \
+  && KHIOPS_PKG_FILE=$KHIOPS_REVISION/khiops-core-openmpi-$(echo ${KHIOPS_REVISION} | tr '-' '_')-1.el$ROCKY_VERSION.x86_64.rpm \
+  && wget -O KHIOPS_CORE.rpm "https://github.com/KhiopsML/khiops/releases/download/${KHIOPS_PKG_FILE}" \
+  # Install the Khiops native package \
+  && dnf install KHIOPS_CORE.rpm -y \
+  && rm -f KHIOPS_CORE.rpm \
+  # Make sure that MPI is openmpi \
   && source /etc/profile.d/modules.sh \
   && module unload mpi \
-  # Hard-code MPICH module name \
-  && module load mpi/mpich-x86_64 \
-  # Build Khiops \
-  # Note: We build the JARs and KNI because the `cmake --install` command below doesn't work \
-  && cmake --preset linux-gcc-release -DTESTING=OFF -DBUILD_JARS=ON -DCMAKE_INSTALL_PREFIX= \
-  && cmake --build --preset linux-gcc-release --parallel \
-       --target MODL${MPI_SUFFIX} MODL_Coclustering${MPI_SUFFIX} \
-                KhiopsNativeInterface norm_jar khiops_jar \
-  && cmake --install ./build/linux-gcc-release \
-  && cd .. \
+  # Hard-code OpenMPI module name \
+  && module load mpi/openmpi-x86_64 \
   # Set python to python3.11 and pip to Pip 3.11 on Rocky 8 \
   # Set python to python3 on Rocky 9 \
   && if [ "$KHIOPSDEV_OS" = "rocky8" ]; then \
@@ -71,8 +69,12 @@ RUN true \
 # set up all the supported Python environments under conda (for the unit tests)
 # relying on a variable containing all the versions
 ARG PYTHON_VERSIONS
-RUN for version in ${PYTHON_VERSIONS}; \
-    do /root/miniconda3/bin/conda create -y -n py${version} python=${version}; done
+RUN /bin/bash -c 'for version in ${PYTHON_VERSIONS}; \
+do \
+    /root/miniconda3/bin/conda create -y -n py${version} python=${version}; \
+    /root/miniconda3/bin/conda create -y -n py${version}_conda python=${version}; \
+    /root/miniconda3/bin/conda install -y -n py${version}_conda -c conda-forge -c khiops-dev khiops-core=$(echo ${KHIOPS_REVISION} | tr -d "-") ; \
+done'
 
 RUN mkdir -p /scripts
 COPY ./run_service.sh /scripts/run_service.sh
diff --git a/packaging/docker/khiopspydev/Dockerfile.ubuntu b/packaging/docker/khiopspydev/Dockerfile.ubuntu
index 493f5f5d..81f4556f 100644
--- a/packaging/docker/khiopspydev/Dockerfile.ubuntu
+++ b/packaging/docker/khiopspydev/Dockerfile.ubuntu
@@ -11,17 +11,15 @@ RUN true \
   # Install git (for khiops-python version calculation) and pip \
   && apt-get -y update \
   && apt-get -y --no-install-recommends install git python3-pip zip pandoc wget \
-  # Obtain the Khiops sources \
-  && git clone https://github.com/khiopsml/khiops.git \
-  && cd khiops \
-  && git checkout ${KHIOPS_REVISION} \
-  # Build and install khiops \
-  # Note: We build the JARs and KNI because the `cmake --install` command below doesn't work \
-  && cmake --preset linux-gcc-release -DTESTING=OFF -DBUILD_JARS=ON -DCMAKE_INSTALL_PREFIX= \
-  && cmake --build --preset linux-gcc-release --parallel \
-       --target MODL MODL_Coclustering KhiopsNativeInterface norm_jar khiops_jar \
-  && cmake --install ./build/linux-gcc-release \
-  && cd .. \
+  # Get Linux distribution codename \
+  && if [ -f /etc/os-release ]; then . /etc/os-release; fi \
+  # Obtain the Khiops native package \
+  && KHIOPS_PKG_FILE=$KHIOPS_REVISION/khiops-core-openmpi_$KHIOPS_REVISION-1-$VERSION_CODENAME.amd64.deb \
+  && wget -O KHIOPS_CORE.deb "https://github.com/KhiopsML/khiops/releases/download/${KHIOPS_PKG_FILE}" \
+  # Install the Khiops native package \
+  && dpkg -i --force-all KHIOPS_CORE.deb \
+  && apt-get -f -y install \
+  && rm -f KHIOPS_CORE.deb \
   # Set python to python3 \
   && update-alternatives --install /usr/bin/python python /usr/bin/python3 1 \
   # Install miniconda3 to have multiple Python versions via Conda \
@@ -30,8 +28,8 @@ RUN true \
   && echo "b978856ec3c826eb495b60e3fffe621f670c101150ebcbdeede4f961f22dc438 Miniconda3-py312_24.1.2-0-Linux-x86_64.sh" | sha256sum --check \
   && bash ./Miniconda3-py312_24.1.2-0-Linux-x86_64.sh -b -u -p /root/miniconda3 \
   && rm -rf /root/miniconda3/Miniconda3-py312_24.1.2-0-Linux-x86_64.sh \
-  # Make sure that MPI is mpich \
-  && update-alternatives --set mpirun /usr/bin/mpirun.mpich \
+  # Make sure that MPI is openmpi \
+  && update-alternatives --set mpirun /usr/bin/mpirun.openmpi \
   # Clean build files \
   && rm -fr /var/lib/apt/lists/* \
   && apt-get clean \
@@ -41,8 +39,12 @@ RUN true \
 # set up all the supported Python environments under conda (for the unit tests)
 # relying on a variable containing all the versions
 ARG PYTHON_VERSIONS
-RUN for version in ${PYTHON_VERSIONS}; \
-    do /root/miniconda3/bin/conda create -y -n py${version} python=${version}; done
+RUN /bin/bash -c 'for version in ${PYTHON_VERSIONS}; \
+do \
+    /root/miniconda3/bin/conda create -y -n py${version} python=${version}; \
+    /root/miniconda3/bin/conda create -y -n py${version}_conda python=${version}; \
+    /root/miniconda3/bin/conda install -y -n py${version}_conda -c conda-forge -c khiops-dev khiops-core=$(echo ${KHIOPS_REVISION} | tr -d "-") ; \
+done'
 
 RUN mkdir -p /scripts
 COPY ./run_service.sh /scripts/run_service.sh

From f4fdcba0b02e3602a9a3517539b07d7f468f6149 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 12 Jun 2024 19:05:24 +0200
Subject: [PATCH 22/37] Update Khiops binaries package version to 10.2.2b3 in
 the CI

The update is also propagated to the `khiops-core` Conda dependency to
facilitate Conda package manufacturing and testing workflow.

related_to #183
---
 .github/workflows/conda.yml      | 6 +++---
 .github/workflows/dev-docker.yml | 2 +-
 packaging/conda/meta.yaml        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index fbcef45c..67f78266 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -4,12 +4,12 @@ env:
   DEFAULT_SAMPLES_VERSION: 10.2.0
   # Note: The default Khiops version must never be an alpha release as they are
   #       ephemeral. To test alpha versions run the workflow manually.
-  DEFAULT_KHIOPS_CORE_VERSION: 10.2.1
+  DEFAULT_KHIOPS_CORE_VERSION: 10.2.2b.3
 on:
   workflow_dispatch:
     inputs:
       khiops-core-version:
-        default: 10.2.1
+        default: 10.2.2b.3
         description: khiops-core version for testing
       khiops-samples-version:
         default: 10.2.0
@@ -45,7 +45,7 @@ jobs:
           miniconda-version: latest
           python-version: '3.12'
       - name: Install Dependency Requirements for Building Conda Packages
-        run: conda install conda-build
+        run: conda install -y conda-build
       - name: Build the Conda Package
         # Note: The "khiops-dev" conda channel is needed to retrieve the "khiops-core" package.
         #       The "test" part of the conda recipe needs this package.
diff --git a/.github/workflows/dev-docker.yml b/.github/workflows/dev-docker.yml
index 522662fd..8c17d4a3 100644
--- a/.github/workflows/dev-docker.yml
+++ b/.github/workflows/dev-docker.yml
@@ -1,7 +1,7 @@
 ---
 name: Dev Docker
 env:
-  DEFAULT_KHIOPS_REVISION: main
+  DEFAULT_KHIOPS_REVISION: 10.2.2-b.3
   DEFAULT_SERVER_REVISION: main
   DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12
 on:
diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml
index 0e62f11e..40050dbb 100644
--- a/packaging/conda/meta.yaml
+++ b/packaging/conda/meta.yaml
@@ -25,7 +25,7 @@ requirements:
     - python
   run:
     - python
-    - khiops-core >=10.0.0,<11.0.0
+    - khiops-core >=10.2.2b.3,<11.0.0
     - pandas >=0.25.3
     - scikit-learn >=0.22.2
   run_constrained:

From d67e0dc02763c74a1156585b4b13debcea87e5e8 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 12 Jun 2024 19:26:52 +0200
Subject: [PATCH 23/37] Also test on the `khiops-core`-based Conda environments

These Conda environments use the `khiops-core` Conda package instead of
the native Khiops packages.

related_to #183
---
 .github/workflows/unit-tests.yml | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 9254cd82..555a45ed 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -60,9 +60,14 @@ jobs:
         if: success() || failure()
         run: |
           mkdir -p -m u+rwx reports/py${{ matrix.python-version }}            
-          # install within the conda environment without activating it 
+          mkdir -p -m u+rwx reports/py${{ matrix.python-version }}_conda
+          # install within the conda environments without activating them
+          # Native Khiops-based Conda environment
           /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }} -c conda-forge unittest-xml-reporting
           /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }} --file test-requirements.txt
+          # `khiops-core`-based Conda environment
+          /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }}_conda -c conda-forge unittest-xml-reporting
+          /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }}_conda --file test-requirements.txt
       - name: Install khiops-python dependencies
         if: success() || failure()
         run: |
@@ -70,8 +75,12 @@ jobs:
           # as the Git repository is in a directory the current user does not own,
           # Python versioneer fails to compute the current version correctly otherwise
           git config --global --add safe.directory $(realpath .)        
+          # Native Khiops-based Conda environment
           /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} python setup.py egg_info
           /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }} `grep -v "^\[" khiops.egg-info/requires.txt`
+          # `khiops-core`-based Conda environment
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }}_conda python setup.py egg_info
+          /root/miniconda3/bin/conda install -y -n py${{ matrix.python-version }}_conda `grep -v "^\[" khiops.egg-info/requires.txt`
           rm -rf khiops.egg-info
       - name: Prepare Unit Tests Environment
         if: github.ref != 'dev' && github.ref != 'main' && ! inputs.run-long-tests
@@ -86,14 +95,21 @@ jobs:
           # This is needed so that the Git tag is parsed and the khiops-python
           # version is retrieved
           git config --global --add safe.directory $(realpath .)
+          # Native Khiops-based Conda environments
           /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} coverage run -m xmlrunner -o "reports/py${{ matrix.python-version }}" -v
           /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} coverage report -m
           /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }} coverage xml -o "reports/py${{ matrix.python-version }}/py-coverage.xml"
+          # `khiops-core`-based Conda environments
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }}_conda coverage run -m xmlrunner -o "reports/py${{ matrix.python-version }}_conda" -v
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }}_conda coverage report -m
+          /root/miniconda3/bin/conda run --no-capture-output -n py${{ matrix.python-version }}_conda coverage xml -o "reports/py${{ matrix.python-version }}_conda/py-coverage.xml"
       - name: Display Unit Test Reports
         uses: dorny/test-reporter@v1
         with:
           name: Unit Tests ${{ matrix.python-version }}
-          path: reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml
+          path: >-
+            reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml,
+            reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml
           reporter: java-junit
           path-replace-backslashes: 'true'  # Necessary for windows paths
       - name: Upload Test Reports as Artifacts
@@ -103,6 +119,8 @@ jobs:
           path: |-
             reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml
             reports/py${{ matrix.python-version }}/py-coverage.xml
+            reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml
+            reports/py${{ matrix.python-version }}_conda/py-coverage.xml
             tests/resources/scenario_generation/*/ref/*._kh
             tests/resources/scenario_generation/*/output/*._kh
             tests/resources/*/output_reports/*.txt

From 4ecfdb2697eca9d22427b904b5d48e0d520692c1 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Tue, 18 Jun 2024 16:14:13 +0200
Subject: [PATCH 24/37] Check inferred Conda env binary dir is really inside
 the Conda env

---
 khiops/core/internals/runner.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index 9f1bba75..14eadf4a 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -230,6 +230,33 @@ def _infer_env_bin_dir_for_conda_based_installations():
     return env_bin_dir
 
 
+def _check_conda_env_bin_dir(conda_env_bin_dir):
+    """Check inferred Conda environment binary directory really is one
+
+    A real Conda environment binary directory:
+    - should exist
+    - should not be directly under the root directory
+    - should coexist with `conda-meta` directory under the same parent
+    """
+    conda_env_bin_dir_path = Path(conda_env_bin_dir)
+
+    # Conda env bin dir should end with `/bin`
+    assert conda_env_bin_dir_path.parts[-1] == "bin"
+
+    is_conda_env_bin_dir = False
+
+    # Conda env dir is not equal to its root dir
+    # Conda env bin dir exists, along with the `conda-meta` dir
+    conda_env_dir_path = conda_env_bin_dir_path.parent
+    if (
+        conda_env_dir_path != conda_env_dir_path.root
+        and conda_env_bin_dir_path.is_dir()
+        and conda_env_dir_path.joinpath("conda-meta").is_dir()
+    ):
+        is_conda_env_bin_dir = True
+    return is_conda_env_bin_dir
+
+
 def _infer_khiops_installation_method(trace=False):
     """Return the Khiops installation method"""
     # We are in a conda environment if
@@ -249,7 +276,9 @@ def _infer_khiops_installation_method(trace=False):
         env_bin_dir = _infer_env_bin_dir_for_conda_based_installations()
         if trace:
             print(f"Environment binary dir: '{env_bin_dir}'")
-        if _modl_and_mpiexec_executables_exist(env_bin_dir):
+        if _check_conda_env_bin_dir(
+            env_bin_dir
+        ) and _modl_and_mpiexec_executables_exist(env_bin_dir):
             installation_method = "conda-based"
         else:
             installation_method = "binary+pip"

From 87493a2fd8faf5c5c66001461d7d121447c42c8b Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Tue, 18 Jun 2024 16:17:08 +0200
Subject: [PATCH 25/37] Use OpenMPI on Linux system-wide installs

- use KHIOPS_PROC_NUMBER or the number of system cores directly in the
  mpiexec command; thus, oversubscription is avoided for OpenMPI

- do not use MPI for <= 2 CPUs (as one is master anyway)

- reset mpiexec command according to `max_core` change in the
  `KhiopsLocalRunner`

closes #183
---
 .github/workflows/unit-tests.yml  |   5 +-
 khiops/core/internals/runner.py   | 122 +++++++++++++++++-------------
 tests/test_core.py                |  24 +++++-
 tests/test_khiops_integrations.py |  44 +++++------
 4 files changed, 116 insertions(+), 79 deletions(-)

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 555a45ed..29bad24f 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -91,6 +91,9 @@ jobs:
           KHIOPS_DOCKER_RUNNER_URL: https://localhost:11000
           KHIOPS_DOCKER_RUNNER_SHARED_DIR: /tmp/sandbox
           KHIOPS_RUNNER_SERVICE_PATH: /scripts/run_service.sh
+          # This is needed so that OpenMPI's mpiexec can be run as root
+          OMPI_ALLOW_RUN_AS_ROOT: 1
+          OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
         run: |
           # This is needed so that the Git tag is parsed and the khiops-python
           # version is retrieved
@@ -170,5 +173,5 @@ jobs:
           # Make sure MPI support is not loaded through env modules
           # Note: As Docker container's shell is non-interactive, environment
           # modules are currently not initializing the shell anyway
-          if [[ -n "$MODULESHOME" ]]; then module unload mpi; fi
+          if [ -n "$MODULESHOME" ]; then module unload mpi; fi
           python -m unittest -v tests.test_khiops_integrations.KhiopsRunnerEnvironmentTests.test_runner_has_mpiexec_on_linux
diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index 14eadf4a..e4417c38 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -181,12 +181,9 @@ def _compute_max_cores_from_proc_number(proc_number):
     # if KHIOPS_PROC_NUMBER is 0 we set max_cores to the system's core number
     if proc_number == 0:
         max_cores = _get_system_cpu_cores()
-    # if KHIOPS_PROC_NUMBER is 1 we just set max_cores to 1 (no MPI)
-    elif proc_number == 1:
-        max_cores = 1
-    # Otherwise we set max_cores to KHIOPS_PROC_NUMBER - 1
+    # Otherwise we set max_cores to KHIOPS_PROC_NUMBER
     else:
-        max_cores = proc_number - 1
+        max_cores = proc_number
 
     return max_cores
 
@@ -472,6 +469,9 @@ def max_cores(self):
 
     @max_cores.setter
     def max_cores(self, core_number):
+        self._set_max_cores(core_number)
+
+    def _set_max_cores(self, core_number):
         self.general_options.max_cores = core_number
         self.general_options.check()
 
@@ -1017,16 +1017,20 @@ def __init__(self):
         # Initialize Khiops environment
         self._start_khiops_environment_initialization()
 
+    def _set_max_cores(self, core_number):
+        super()._set_max_cores(core_number)
+        self._initialize_mpi_command_args()
+
     def _start_khiops_environment_initialization(self):
         # Set the Khiops process number according to the `KHIOPS_PROC_NUMBER` env var
         if "KHIOPS_PROC_NUMBER" in os.environ:
             self.max_cores = _compute_max_cores_from_proc_number(
                 int(os.environ["KHIOPS_PROC_NUMBER"])
             )
-        # If not defined, set it to the number of system cores + 1
+        # If not defined, set it to the number of system cores
         else:
             self.max_cores = _get_system_cpu_cores()
-            os.environ["KHIOPS_PROC_NUMBER"] = str(self.max_cores + 1)
+            os.environ["KHIOPS_PROC_NUMBER"] = str(self.max_cores)
 
         # Set the Khiops memory limit
         if "KHIOPS_MEMORY_LIMIT" in os.environ:
@@ -1054,6 +1058,16 @@ def _start_khiops_environment_initialization(self):
         else:
             self.khiops_temp_dir = ""
 
+        # Set the OpenMPI variable OMPI_MCA_plm_rsh_agent to the empty string if not set
+        # This avoids errors on systems without ssh (eg. simple Docker containers)
+        installation_method = _infer_khiops_installation_method()
+        if (
+            platform.system() == "Linux"
+            and installation_method == "binary+pip"
+            and "OMPI_MCA_plm_rsh_agent" not in os.environ
+        ):
+            os.environ["OMPI_MCA_plm_rsh_agent"] = ""
+
         # Initialize the default samples dir
         self._initialize_default_samples_dir()
 
@@ -1154,60 +1168,61 @@ def _initialize_mpi_command_args(self):
             self._set_mpi_command_args_with_mpiexec(mpiexec_path)
         # If MPI is still not found, then do not use MPI and warn the user
         else:
-            self._set_empty_mpi_command_args_and_raise_warning()
-
-    def _set_empty_mpi_command_args_and_raise_warning(self):
-        self.mpi_command_args = []
-        warnings.warn(
-            "mpiexec is not in PATH, Khiops will run with just one CPU. "
-            "We recommend you to reinstall khiops. "
-            "Go to https://khiops.org for more information."
-        )
+            self.mpi_command_args = []
+            warnings.warn(
+                "mpiexec is not in PATH, Khiops will run with just one CPU. "
+                "We recommend you to reinstall khiops. "
+                "Go to https://khiops.org for more information."
+            )
 
     def _set_mpi_command_args_with_mpiexec(self, mpiexec_path):
-        self.mpi_command_args = [mpiexec_path]
-        mpi_command_args = os.environ.get("KHIOPS_MPI_COMMAND_ARGS")
-        if mpi_command_args is not None:
-            self.mpi_command_args += shlex.split(mpi_command_args)
-        elif platform.system() == "Linux":
-            self.mpi_command_args += [
-                "-bind-to",
-                "hwthread",
-                "-map-by",
-                "core",
-                "-n",
-                str(self.max_cores + 1),
-            ]
-        elif platform.system() == "Darwin":
-            # Note: The '-host localhost' arguments for arm64
-            #       may be removed when mpich > 4.1.2 is released
-            if platform.processor() == "arm":
+        assert mpiexec_path is not None
+        # User-specified MPI command args take precendence over automatic setting
+        if "KHIOPS_MPI_COMMAND_ARGS" in os.environ:
+            self.mpi_command_args = [mpiexec_path] + shlex.split(
+                os.environ["KHIOPS_MPI_COMMAND_ARGS"]
+            )
+        # With only 1 or 2 processes run sequentially (without MPI)
+        elif self.max_cores in (1, 2):
+            self.mpi_command_args = []
+            warnings.warn(
+                f"Too few cores: {self.max_cores}. "
+                "To efficiently run Khiops in parallel at least 3 processes "
+                "are needed. Khiops will run in a single process."
+            )
+        # Otherwise, build the mpiexec command arguments
+        else:
+            self.mpi_command_args = [mpiexec_path]
+            if platform.system() == "Windows":
+                self.mpi_command_args += [
+                    "-al",
+                    "spr:P",
+                    "-n",
+                    str(self.max_cores),
+                    "/priority",
+                    "1",
+                ]
+            elif platform.system() == "Linux":
+                self.mpi_command_args += [
+                    "-bind-to",
+                    "hwthread",
+                    "-map-by",
+                    "core",
+                    "-n",
+                    str(self.max_cores),
+                ]
+            elif platform.system() == "Darwin":
                 self.mpi_command_args += [
                     "-host",
                     "localhost",
                     "-n",
-                    str(self.max_cores + 1),
+                    str(self.max_cores),
                 ]
             else:
-                self.mpi_command_args = [
-                    mpiexec_path,
-                    "-n",
-                    str(self.max_cores + 1),
-                ]
-        elif platform.system() == "Windows":
-            self.mpi_command_args += [
-                "-al",
-                "spr:P",
-                "-n",
-                str(self.max_cores + 1),
-                "/priority",
-                "1",
-            ]
-        else:
-            raise KhiopsEnvironmentError(
-                f"Unsupported OS {platform.system()}. "
-                "Check the supported OSes at https://khiops.org."
-            )
+                raise KhiopsEnvironmentError(
+                    f"Unsupported OS {platform.system()}. "
+                    "Check the supported OSes at https://khiops.org."
+                )
 
     def _initialize_default_samples_dir(self):
         """See class docstring"""
@@ -1268,6 +1283,7 @@ def _initialize_khiops_bin_dir(self):
         # System-wide installations
         else:
             self._initialize_default_system_wide_khiops_bin_dir()
+        assert self.khiops_bin_dir is not None
 
     def _initialize_default_system_wide_khiops_bin_dir(self):
         # Warn if both KHIOPS_HOME and KhiopsHome are set
diff --git a/tests/test_core.py b/tests/test_core.py
index 1e6a9e13..a397d599 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -2599,7 +2599,7 @@ def test_khiops_environment_variables_basic(self):
                 "variable": "KHIOPS_PROC_NUMBER",
                 "value": 2,
                 "runner_field": "max_cores",
-                "expected_field_value": 1,
+                "expected_field_value": 2,
             },
             {
                 "variable": "KHIOPS_PROC_NUMBER",
@@ -2661,6 +2661,24 @@ def test_khiops_environment_variables_basic(self):
                 else:
                     os.environ[fixture["variable"]] = old_value
 
+    def test_mpi_command_is_updated_on_max_cores_update(self):
+        """Test MPI command is updated on max_cores update"""
+        # Create a fresh runner and initialize its env
+        with MockedRunnerContext(create_mocked_raw_run(False, False, 0)) as runner:
+            pass
+
+        # Update max_cores
+        max_cores_updated_value = 100
+        runner.max_cores = max_cores_updated_value
+
+        # Check MPI command arguments contain the updated max_cores
+        # The number of cores in the MPI command is the value after '-n'
+        mpi_command_args = runner.mpi_command_args
+        max_cores_in_mpi_command = int(
+            mpi_command_args[mpi_command_args.index("-n") + 1]
+        )
+        self.assertEqual(max_cores_in_mpi_command, max_cores_updated_value)
+
     def test_undefined_khiops_proc_number_env_var(self):
         """Test default value for KHIOPS_PROC_NUMBER env var
 
@@ -2677,8 +2695,8 @@ def test_undefined_khiops_proc_number_env_var(self):
             pass
         # Define default `KHIOPS_PROC_NUMBER` and check the `maxcores` attribute
         # is set accordingly
-        default_khiops_proc_number = _get_system_cpu_cores() + 1
-        self.assertEqual(runner.max_cores, default_khiops_proc_number - 1)
+        default_khiops_proc_number = _get_system_cpu_cores()
+        self.assertEqual(runner.max_cores, default_khiops_proc_number)
 
         # Check default environment variable value is added
         self.assertTrue("KHIOPS_PROC_NUMBER" in os.environ)
diff --git a/tests/test_khiops_integrations.py b/tests/test_khiops_integrations.py
index 20d62094..7b7c30c8 100644
--- a/tests/test_khiops_integrations.py
+++ b/tests/test_khiops_integrations.py
@@ -35,7 +35,7 @@ def test_runner_has_mpiexec_on_linux(self):
         # Check package is installed on supported platform:
         # Check /etc/os-release for Linux version
         linux_distribution = None
-        mpich_found = None
+        openmpi_found = None
         with open(
             os.path.join(os.sep, "etc", "os-release"), encoding="ascii"
         ) as os_release_info:
@@ -44,48 +44,48 @@ def test_runner_has_mpiexec_on_linux(self):
                     linux_distribution = entry.split("=")[-1].strip('"\n').lower()
                     break
 
-        # Check if MPICH is installed on the Debian Linux OS
+        # Check if OpenMPI is installed on the Debian Linux OS
         if linux_distribution == "ubuntu":
             with subprocess.Popen(
-                ["dpkg", "-l", "mpich"],
+                ["dpkg", "-l", "openmpi-bin"],
                 stdin=subprocess.DEVNULL,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.DEVNULL,
                 universal_newlines=True,
-            ) as mpich_query:
-                stdout, _ = mpich_query.communicate()
-                if mpich_query.returncode != 0:
-                    mpich_found = False
+            ) as openmpi_query:
+                stdout, _ = openmpi_query.communicate()
+                if openmpi_query.returncode != 0:
+                    openmpi_found = False
                 for line in stdout.splitlines():
-                    if all(field in line for field in ("ii", "mpich")):
-                        # MPICH installed
-                        mpich_found = True
+                    # openmpi installed
+                    if all(field in line for field in ("ii", "openmpi")):
+                        openmpi_found = True
                         break
                 else:
-                    mpich_found = False
+                    openmpi_found = False
 
-        # Check if MPICH is installed on the CentOS / Rocky Linux OS
+        # Check if openmpi is installed on the CentOS / Rocky Linux OS
         elif linux_distribution == "rocky linux":
             with subprocess.Popen(
-                ["yum", "list", "installed", "mpich"],
+                ["yum", "list", "installed", "openmpi"],
                 stdin=subprocess.DEVNULL,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.DEVNULL,
                 universal_newlines=True,
-            ) as mpich_query:
-                stdout, _ = mpich_query.communicate()
-                if mpich_query.returncode != 0:
-                    mpich_found = False
+            ) as openmpi_query:
+                stdout, _ = openmpi_query.communicate()
+                if openmpi_query.returncode != 0:
+                    openmpi_found = False
                 for line in stdout.splitlines():
-                    if line.startswith("mpich"):
-                        # MPICH installed
-                        mpich_found = True
+                    # openmpi installed
+                    if line.startswith("openmpi"):
+                        openmpi_found = True
                         break
                 else:
-                    mpich_found = False
+                    openmpi_found = False
         else:
             self.skipTest("Skipping test: platform not Ubuntu or Rocky Linux")
-        if mpich_found:
+        if openmpi_found:
             runner = kh.get_runner()
             if not runner.mpi_command_args:
                 self.fail("MPI support found, but MPI command args not set")

From 7dad777efbb4f54891a00249931fc8ed37550f46 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Tue, 18 Jun 2024 15:41:55 +0200
Subject: [PATCH 26/37] Oversubscribe in the CI to use 4 cores

Thusly, tests become more realistic.
---
 .github/workflows/conda.yml      |  8 ++++++++
 .github/workflows/pip.yml        |  7 +++++++
 .github/workflows/unit-tests.yml | 10 ++++++++++
 3 files changed, 25 insertions(+)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 67f78266..5a43c5e0 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -111,6 +111,14 @@ jobs:
           kh-download-datasets \
             --version ${{ inputs.khiops-samples-version || env.DEFAULT_SAMPLES_VERSION }}
       - name: Test Conda Package Installation on Samples
+        env:
+          # Force > 2 CPU cores to launch mpiexec
+          KHIOPS_PROC_NUMBER: 4
+          # Oversubscribe for MPI 4.x
+          rmaps_base_oversubscribe: true
+          # Oversubscribe for MPI > 4.x
+          OMPI_MCA_rmaps_base_oversubscribe: true
+          PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe
         run: |
           kh-samples core -i train_predictor -e
           kh-samples core -i train_predictor_error_handling -e
diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
index 900a9fb4..c73ee281 100644
--- a/.github/workflows/pip.yml
+++ b/.github/workflows/pip.yml
@@ -71,6 +71,13 @@ jobs:
       - name: Run tests
         env:
           KHIOPS_SAMPLES_DIR: ${{ github.workspace }}/khiops-samples
+          # Force > 2 CPU cores to launch mpiexec
+          KHIOPS_PROC_NUMBER: 4
+          # Oversubscribe for MPI 4.x
+          rmaps_base_oversubscribe: true
+          # Oversubscribe for MPI > 4.x
+          OMPI_MCA_rmaps_base_oversubscribe: true
+          PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe
         run: |-
           # Make sure MPI support is not loaded through env modules
           # Note: As the Docker container's shell is non-interactive, environment
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 29bad24f..bf7b9f45 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -94,6 +94,13 @@ jobs:
           # This is needed so that OpenMPI's mpiexec can be run as root
           OMPI_ALLOW_RUN_AS_ROOT: 1
           OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
+          # Force > 2 CPU cores to launch mpiexec
+          KHIOPS_PROC_NUMBER: 4
+          # Oversubscribe for MPI 4.x
+          rmaps_base_oversubscribe: true
+          # Oversubscribe for MPI > 4.x
+          OMPI_MCA_rmaps_base_oversubscribe: true
+          PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe
         run: |
           # This is needed so that the Git tag is parsed and the khiops-python
           # version is retrieved
@@ -169,6 +176,9 @@ jobs:
         run: |
           pip install -r test-requirements.txt
       - name: Launch proper MPI awareness test
+        env:
+          # Force > 2 CPU cores to launch mpiexec
+          KHIOPS_PROC_NUMBER: 4
         run: |-
           # Make sure MPI support is not loaded through env modules
           # Note: As Docker container's shell is non-interactive, environment

From bd92a2a4624ee40e2f6ae693162b9910bd15c5e7 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Fri, 14 Jun 2024 16:50:24 +0200
Subject: [PATCH 27/37] Allow OpenMPI to run as root for all workflows

---
 .github/workflows/conda.yml | 3 +++
 .github/workflows/pip.yml   | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 5a43c5e0..0c185f29 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -114,6 +114,9 @@ jobs:
         env:
           # Force > 2 CPU cores to launch mpiexec
           KHIOPS_PROC_NUMBER: 4
+          # This is needed so that OpenMPI's mpiexec can be run as root
+          OMPI_ALLOW_RUN_AS_ROOT: 1
+          OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
           # Oversubscribe for MPI 4.x
           rmaps_base_oversubscribe: true
           # Oversubscribe for MPI > 4.x
diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
index c73ee281..ed05e318 100644
--- a/.github/workflows/pip.yml
+++ b/.github/workflows/pip.yml
@@ -71,6 +71,9 @@ jobs:
       - name: Run tests
         env:
           KHIOPS_SAMPLES_DIR: ${{ github.workspace }}/khiops-samples
+          # This is needed so that OpenMPI's mpiexec can be run as root
+          OMPI_ALLOW_RUN_AS_ROOT: 1
+          OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
           # Force > 2 CPU cores to launch mpiexec
           KHIOPS_PROC_NUMBER: 4
           # Oversubscribe for MPI 4.x
@@ -82,7 +85,7 @@ jobs:
           # Make sure MPI support is not loaded through env modules
           # Note: As the Docker container's shell is non-interactive, environment
           # modules are currently not initializing the shell anyway
-          if [[ -n "$MODULESHOME" ]]; then module unload mpi; fi
+          if [ -n "$MODULESHOME" ]; then module unload mpi; fi
 
           # Print khiops installation status
           kh-status

From 5d8d4ca41d377aa08e442db6c7d6fa3fce2839d3 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Fri, 14 Jun 2024 17:48:23 +0200
Subject: [PATCH 28/37] Force `khiops` Conda package installation from the
 artefact channel in the CI

---
 .github/workflows/conda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 0c185f29..50f1dbc7 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -97,7 +97,7 @@ jobs:
         if: runner.os == 'Windows'
         run: |
           conda install --channel khiops-dev khiops-core=$KHIOPS_CORE_VERSION
-          conda install --channel ./khiops-conda/ khiops
+          conda install --override-channels --channel conda-forge --channel ./khiops-conda/ khiops
       # In Linux/macOS we need the conda-forge channel to install their pinned versions
       - name: Install the Khiops Conda package (Linux/macOS)
         if: runner.os != 'Windows'

From 00a25403744c0b5d364889ee70ab81bf319ae704 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Thu, 20 Jun 2024 09:59:08 +0200
Subject: [PATCH 29/37] Drop MacOS 11 support, as GitHub runners are dropping
 it as well

See
https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners.
---
 .github/workflows/conda.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 50f1dbc7..a107619c 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -72,7 +72,6 @@ jobs:
           - {os: ubuntu-22.04, json-image: '{"image": "rockylinux:9"}'}
           - {os: windows-2019, json-image: '{"image": null}'}
           - {os: windows-2022, json-image: '{"image": null}'}
-          - {os: macos-11, json-image: '{"image": null}'}
           - {os: macos-12, json-image: '{"image": null}'}
           - {os: macos-13, json-image: '{"image": null}'}
           - {os: macos-14, json-image: '{"image": null}'}

From 540b857536c0e73bd49e587f81aab84238338977 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Thu, 20 Jun 2024 18:32:10 +0200
Subject: [PATCH 30/37] Remove OpenMPI-specific CI environment variables for
 Conda tests

This is because OpenMPI is not used in Conda environments for Khiops
Conda packages. MPICH is used instead.
---
 .github/workflows/conda.yml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index a107619c..74297025 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -113,14 +113,6 @@ jobs:
         env:
           # Force > 2 CPU cores to launch mpiexec
           KHIOPS_PROC_NUMBER: 4
-          # This is needed so that OpenMPI's mpiexec can be run as root
-          OMPI_ALLOW_RUN_AS_ROOT: 1
-          OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
-          # Oversubscribe for MPI 4.x
-          rmaps_base_oversubscribe: true
-          # Oversubscribe for MPI > 4.x
-          OMPI_MCA_rmaps_base_oversubscribe: true
-          PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe
         run: |
           kh-samples core -i train_predictor -e
           kh-samples core -i train_predictor_error_handling -e

From 2ed28643511617c33b5cfe529f72052b136bdca0 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Thu, 20 Jun 2024 19:03:31 +0200
Subject: [PATCH 31/37] Look-up the OpenMPI environment module for Rocky local
 installations

---
 khiops/core/internals/runner.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index e4417c38..c5f7a794 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -1100,9 +1100,9 @@ def _initialize_mpi_command_args(self):
             mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or shutil.which(
                 "mpiexec"
             )
-        # If mpiexec is not in the path, then try to load MPI environment module
-        # so that mpiexec is in the path
-        if mpiexec_path is None:
+        # If mpiexec is not in the path, and the installation method is local,
+        # then try to load MPI environment module so that mpiexec is in the path
+        if mpiexec_path is None and installation_method == "binary+pip":
             # If environment modules are installed, then load the MPI module
             module_init_script_path = os.path.join(
                 os.path.sep, "etc", "profile.d", "modules.sh"
@@ -1130,17 +1130,13 @@ def _initialize_mpi_command_args(self):
                         reverse=True,
                     ):
                         # If MPI environment module is found, attempt to load it
-                        if (
-                            re.search("mpich-[0-9]", line) is not None
-                            and platform.machine() in line
-                            or f"mpich-{platform.machine()}" in line
-                        ):
-                            mpich_module = line
+                        if f"openmpi-{platform.machine()}" in line:
+                            mpi_module = line
                             # Use 'type -P' to get the path to executable,
                             # as 'which' is non-portable
                             shell_command = shlex.split(
                                 f"sh -c 'source {module_init_script_path} && "
-                                f"module unload mpi && module load {mpich_module} && "
+                                f"module unload mpi && module load {mpi_module} && "
                                 "type -P mpiexec'"
                             )
                             with subprocess.Popen(

From 3094527b394b29049ad066c399e53be85178fc82 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Fri, 28 Jun 2024 14:33:19 +0200
Subject: [PATCH 32/37] Honor recent MPI command simplifications in upstream
 Khiops binary packages

Thusly, functional parity is kept with the `khiops-env` script which is
part of the native Khiops binary packages.

closes #192
---
 khiops/core/internals/runner.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index c5f7a794..f3d341df 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -1155,13 +1155,13 @@ def _initialize_mpi_command_args(self):
                                     break
                                 if mpiexec_path is not None:
                                     self._set_mpi_command_args_with_mpiexec(
-                                        mpiexec_path
+                                        mpiexec_path, installation_method
                                     )
                             break
 
         # If MPI is found, then set the path to mpiexec accordingly
         if mpiexec_path is not None:
-            self._set_mpi_command_args_with_mpiexec(mpiexec_path)
+            self._set_mpi_command_args_with_mpiexec(mpiexec_path, installation_method)
         # If MPI is still not found, then do not use MPI and warn the user
         else:
             self.mpi_command_args = []
@@ -1171,7 +1171,7 @@ def _initialize_mpi_command_args(self):
                 "Go to https://khiops.org for more information."
             )
 
-    def _set_mpi_command_args_with_mpiexec(self, mpiexec_path):
+    def _set_mpi_command_args_with_mpiexec(self, mpiexec_path, installation_method):
         assert mpiexec_path is not None
         # User-specified MPI command args take precendence over automatic setting
         if "KHIOPS_MPI_COMMAND_ARGS" in os.environ:
@@ -1199,11 +1199,10 @@ def _set_mpi_command_args_with_mpiexec(self, mpiexec_path):
                     "1",
                 ]
             elif platform.system() == "Linux":
+                # For Linux native installations we use OpenMPI
+                if installation_method == "binary+pip":
+                    self.mpi_command_args.append("--quiet")
                 self.mpi_command_args += [
-                    "-bind-to",
-                    "hwthread",
-                    "-map-by",
-                    "core",
                     "-n",
                     str(self.max_cores),
                 ]

From c698cd214920c9d144a78c1fa9554da2b179102b Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Mon, 1 Jul 2024 13:13:28 +0200
Subject: [PATCH 33/37] Backport OpenMPI variables from `khiops-env` to
 mitigate errors

Thus, PR https://github.com/KhiopsML/khiops/pull/313 that addresses
issue https://github.com/KhiopsML/khiops/issues/307 is backported to the
KhiopsLocalRunner Python code.

closes #196
---
 khiops/core/internals/runner.py   | 84 ++++++++++++++++++++++++++++---
 tests/test_khiops_integrations.py |  4 +-
 2 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py
index f3d341df..5fe03059 100644
--- a/khiops/core/internals/runner.py
+++ b/khiops/core/internals/runner.py
@@ -294,6 +294,62 @@ def _check_executable(bin_path):
         )
 
 
+def get_linux_distribution_name():
+    """Detect Linux distribution name
+
+    Parses the `NAME` variable defined in the  `/etc/os-release` or
+    `/usr/lib/os-release` files and converts it to lowercase.
+
+    Returns
+    -------
+    str
+        Name of the Linux distribution, converted to lowecase
+
+    Raises
+    ------
+    OSError
+        If neither `/etc/os-release` nor `/usr/lib/os-release` are found
+    """
+
+    def get_linux_distribution_from_os_release_file(os_release_file_path):
+        # The `NAME` variable is always defined according to the freedesktop.org
+        # standard:
+        # https://www.freedesktop.org/software/systemd/man/latest/os-release.html
+        with open(os_release_file_path, encoding="ascii") as os_release_info_file:
+            for entry in os_release_info_file:
+                if entry.startswith("NAME"):
+                    linux_distribution = entry.split("=")[-1].strip('"\n')
+                    break
+        return linux_distribution
+
+    assert platform.system() == "Linux"
+
+    # If Python version >= 3.10, use standard library support; see
+    # https://docs.python.org/3/library/platform.html#platform.freedesktop_os_release
+    python_ver_major, python_ver_minor, _ = platform.python_version_tuple()
+    if int(python_ver_major) >= 3 and int(python_ver_minor) >= 10:
+        linux_distribution = platform.freedesktop_os_release()["NAME"]
+
+    # If Python version < 3.10, determine the Linux distribution manually,
+    # but mimic the behavior of Python >= 3.10 standard library support
+    else:
+        # First try to parse /etc/os-release
+        try:
+            linux_distribution = get_linux_distribution_from_os_release_file(
+                os.path.join(os.sep, "etc", "os-release")
+            )
+        except FileNotFoundError:
+            # Fallback on parsing /usr/lib/os-release
+            try:
+                linux_distribution = get_linux_distribution_from_os_release_file(
+                    os.path.join(os.sep, "usr", "lib", "os-release")
+                )
+            # Mimic `platform.freedesktop_os_release` function behavior
+            except FileNotFoundError as error:
+                raise OSError from error
+    return linux_distribution.lower()
+
+
 class KhiopsRunner(ABC):
     """Abstract Khiops Python runner to be re-implemented"""
 
@@ -1058,15 +1114,27 @@ def _start_khiops_environment_initialization(self):
         else:
             self.khiops_temp_dir = ""
 
-        # Set the OpenMPI variable OMPI_MCA_plm_rsh_agent to the empty string if not set
-        # This avoids errors on systems without ssh (eg. simple Docker containers)
         installation_method = _infer_khiops_installation_method()
-        if (
-            platform.system() == "Linux"
-            and installation_method == "binary+pip"
-            and "OMPI_MCA_plm_rsh_agent" not in os.environ
-        ):
-            os.environ["OMPI_MCA_plm_rsh_agent"] = ""
+        if platform.system() == "Linux" and installation_method == "binary+pip":
+            # Set the OpenMPI variable OMPI_MCA_plm_rsh_agent to the empty string
+            # if not set
+            # This avoids errors on systems without ssh (eg. simple Docker containers)
+            if "OMPI_MCA_plm_rsh_agent" not in os.environ:
+                os.environ["OMPI_MCA_plm_rsh_agent"] = ""
+
+            # Set the OpenMPI variable OMPI_MCA_btl_vader_single_copy_mechanism
+            # to the "none" string value to remove the mpi message
+            # "Read -1, expected 65536, errno = 1" that appears on Docker
+            if "OMPI_MCA_btl_vader_single_copy_mechanism" not in os.environ:
+                os.environ["OMPI_MCA_btl_vader_single_copy_mechanism"] = "none"
+
+            # Set the OpenMPI variable PSM3_DEVICES to the "self" string value to
+            # fix issue https://github.com/KhiopsML/khiops/issues/307 on Rocky
+            if (
+                get_linux_distribution_name() == "rocky linux"
+                and "PSM3_DEVICES" not in os.environ
+            ):
+                os.environ["PSM3_DEVICES"] = "self"
 
         # Initialize the default samples dir
         self._initialize_default_samples_dir()
diff --git a/tests/test_khiops_integrations.py b/tests/test_khiops_integrations.py
index 7b7c30c8..02a18392 100644
--- a/tests/test_khiops_integrations.py
+++ b/tests/test_khiops_integrations.py
@@ -15,7 +15,7 @@
 
 import khiops.core as kh
 from khiops.core.exceptions import KhiopsEnvironmentError
-from khiops.core.internals.runner import KhiopsLocalRunner
+from khiops.core.internals.runner import KhiopsLocalRunner, get_linux_distribution_name
 from khiops.extras.docker import KhiopsDockerRunner
 from khiops.sklearn.estimators import KhiopsClassifier
 from tests.test_helper import KhiopsTestHelper
@@ -34,7 +34,7 @@ def test_runner_has_mpiexec_on_linux(self):
         """Test that local runner has executable mpiexec on Linux if MPI is installed"""
         # Check package is installed on supported platform:
         # Check /etc/os-release for Linux version
-        linux_distribution = None
+        linux_distribution = get_linux_distribution_name()
         openmpi_found = None
         with open(
             os.path.join(os.sep, "etc", "os-release"), encoding="ascii"

From e5bc7e560b927b710b098487761387d8c56c4104 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Tue, 2 Jul 2024 16:32:39 +0200
Subject: [PATCH 34/37] Fix Conda package version extraction from Git tag

The '-' pre-release version character is removed from the Conda version.
---
 packaging/conda/meta.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml
index 40050dbb..06de61a3 100644
--- a/packaging/conda/meta.yaml
+++ b/packaging/conda/meta.yaml
@@ -1,7 +1,8 @@
 {% set metadata = load_setup_py_data(setup_file='../../setup.py', from_recipe_dir=True) %}
 package:
   name: {{ metadata.get('name') }}
-  version: {{ metadata.get('version') }}
+  # The Conda version cannot contain the '-' character, so we eliminate it
+  version: {{ metadata.get('version') | replace('-', '') }}
 
 source:
   path: ../../

From feea8d7253a24cc01458c5d0c25876835175cc2f Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 3 Jul 2024 14:03:31 +0200
Subject: [PATCH 35/37] Update CHANGELOG for release 10.2.2.0

---
 CHANGELOG.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c25ea3e..ef1f42bf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,19 @@
   - Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
 - Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.
 
+## 10.2.2.0 - 2024-07-03
+
+### Added
+- (`sklearn`) Support for sparse arrays in sklearn estimators.
+
+### Changed
+- *Internals*:
+  - MPI backend from MPICH to OpenMPI for native + Pip-based Linux installations.
+
+### Fixed
+- `core`
+  - Metric name search in estimator analyis report.
+
 ## 10.2.1.0 - 2024-03-26
 
 ### Added

From 10d9855ce40d457ee11020347d289660da6f2dac Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 3 Jul 2024 14:11:22 +0200
Subject: [PATCH 36/37] Update upstream Khiops dependency default versions to
 10.2.2 in the CI

---
 .github/workflows/conda.yml      | 4 ++--
 .github/workflows/dev-docker.yml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml
index 74297025..9f26b6be 100644
--- a/.github/workflows/conda.yml
+++ b/.github/workflows/conda.yml
@@ -4,12 +4,12 @@ env:
   DEFAULT_SAMPLES_VERSION: 10.2.0
   # Note: The default Khiops version must never be an alpha release as they are
   #       ephemeral. To test alpha versions run the workflow manually.
-  DEFAULT_KHIOPS_CORE_VERSION: 10.2.2b.3
+  DEFAULT_KHIOPS_CORE_VERSION: 10.2.2
 on:
   workflow_dispatch:
     inputs:
       khiops-core-version:
-        default: 10.2.2b.3
+        default: 10.2.2
         description: khiops-core version for testing
       khiops-samples-version:
         default: 10.2.0
diff --git a/.github/workflows/dev-docker.yml b/.github/workflows/dev-docker.yml
index 8c17d4a3..d6995cb5 100644
--- a/.github/workflows/dev-docker.yml
+++ b/.github/workflows/dev-docker.yml
@@ -1,7 +1,7 @@
 ---
 name: Dev Docker
 env:
-  DEFAULT_KHIOPS_REVISION: 10.2.2-b.3
+  DEFAULT_KHIOPS_REVISION: 10.2.2
   DEFAULT_SERVER_REVISION: main
   DEFAULT_PYTHON_VERSIONS: 3.8 3.9 3.10 3.11 3.12
 on:

From 9e3b4e5397d32a2250636a89a650ae56d13516a3 Mon Sep 17 00:00:00 2001
From: Popescu V <136721202+popescu-v@users.noreply.github.com>
Date: Wed, 3 Jul 2024 14:47:33 +0200
Subject: [PATCH 37/37] Only deploy GitHub pages when explicitly instructed to
 do so

---
 .github/workflows/api-docs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/api-docs.yml b/.github/workflows/api-docs.yml
index 89d17b7a..419791a3 100644
--- a/.github/workflows/api-docs.yml
+++ b/.github/workflows/api-docs.yml
@@ -68,7 +68,7 @@ jobs:
           path: doc/_build/html/
   # Deploy only when the user explicitly (and manually) orders it
   deploy:
-    if: ${{ github.event_name == 'workflow_dispatch' || inputs.deploy-gh-pages == true }}
+    if: github.event_name == 'workflow_dispatch' && inputs.deploy-gh-pages == true
     runs-on: ubuntu-latest
     needs: build
     environment: