diff --git a/CMakeLists.txt b/CMakeLists.txt index af6c6fce..5d3a3016 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,7 @@ endif () # include(FindArrowParquet) option(YGM_REQUIRE_ARROW_PARQUET "YGM requires Apache Arrow Parquet." OFF) -find_arrow_parquet() +find_or_install_arrow_parquet() # # Create the YGM target library diff --git a/cmake/FindArrowParquet.cmake b/cmake/FindArrowParquet.cmake index 8667df9d..b922d07f 100644 --- a/cmake/FindArrowParquet.cmake +++ b/cmake/FindArrowParquet.cmake @@ -1,44 +1,4 @@ -# Find Arrow and Parquet using find_package -function(find_arrow_parquet_config) - # Find Arrow >= 8.0. - # Start major version from 100 so that we do not have to update - # this code every time Arrow releases a major version. - foreach (MAJOR_VERSION RANGE 100 8 -1) - find_package(Arrow "${MAJOR_VERSION}.0" QUIET) - if (Arrow_FOUND) - break() - endif () - endforeach () - set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE) - - # Find Parquet - if (Arrow_FOUND) - find_package(Parquet QUIET PATHS ${Arrow_DIR}) - endif () - set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE) - - # Show Arrow and Parquet info - if (Arrow_FOUND AND Parquet_FOUND) - if (Arrow_FOUND) - message(STATUS ${PROJECT_NAME} " found Arrow") - message(STATUS "Arrow version: ${ARROW_VERSION}") - message(STATUS "Arrow SO version: ${ARROW_FULL_SO_VERSION}") - endif () - - if (Parquet_FOUND) - message(STATUS ${PROJECT_NAME} " found Parquet") - message(STATUS "Parquet version: ${PARQUET_VERSION}") - message(STATUS "Parquet SO version: ${PARQUET_FULL_SO_VERSION}") - endif () - else () - if (YGM_REQUIRE_ARROW_PARQUET) - message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet >= 8.0 but Arrow Parquet was not found.") - else () - message(WARNING "${PROJECT_NAME} did not find Arrow Parquet >= 8.0. Building without Arrow Parquet.") - endif () - endif () -endfunction() - +include(PythonUtilities) # Find Arrow and Parquet installed along with pyarrow by pip. # @@ -60,7 +20,7 @@ endfunction() # If Arrow and Parquet are found, set Arrow_FOUND and Parquet_FOUND to TRUE. # Also, Arrow::arrow_shared and Parquet::parquet_shared are created as imported targets. # Those targets can be used to link Arrow and Parquet as find_package() is used. -function(find_pyarrow) +function(find_pip_installed_pyarrow) if (PIP_PYARROW_ROOT) # Find libarrow file(GLOB Arrow_LIBRARIES LIST_DIRECTORIES false "${PIP_PYARROW_ROOT}/libarrow.so.*") @@ -116,12 +76,6 @@ function(find_pyarrow) endif () message(STATUS "Arrow include dir: ${Arrow_INCLUDE_DIRS}") - else () # Arrow or Parquet not found - if (YGM_REQUIRE_ARROW_PARQUET) - message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet but Arrow Parquet was not found.") - else () - message(WARNING "${PROJECT_NAME} did not find Arrow Parquet. Building without Arrow Parquet.") - endif () endif () else () message(FATAL_ERROR "PIP_PYARROW_ROOT is not set. PIP_PYARROW_ROOT must be set to the root of the pyarrow installation.") @@ -130,24 +84,149 @@ function(find_pyarrow) endfunction() -# Find Arrow and Parquet using find_arrow or find_pyarrow -# If PIP_PYARROW_ROOT is set, find_pyarrow is used. +# Find the directory where pyarrow is installed. +# This function executes a Python script to find the pyarrow module and +# **does not assume that pyarrow is installed by pip**. # # Output: -# Arrow_FOUND and Parquet_FOUND are set to TRUE if Arrow and Parquet are found. -function(find_arrow_parquet) - if (PIP_PYARROW_ROOT) - find_pyarrow() - else () - find_arrow_parquet_config() +# PYARROW_ROOT is set to the root of the pyarrow installation. +function(find_pyarrow_package) + find_python3_module(pyarrow) + if (PYTHON3_MODULE_PATH) + get_filename_component(PYARROW_ROOT ${PYTHON3_MODULE_PATH} DIRECTORY) + set(PYARROW_ROOT ${PYARROW_ROOT} PARENT_SCOPE) endif () +endfunction() + +# Install pyarrow using pip +# Output: +# PIP_PYARROW_ROOT is set to the root of the pyarrow installation. +function(install_pyarrow_in_venv) + setup_python_venv() + if (NOT PYTHON_VENV_ROOT) + return() + endif () + + activate_python_venv(${PYTHON_VENV_ROOT}) + if (NOT PYTHON_VENV_ACTIVATED) + return() + endif () + + # Use only the Python 3 interpreter in the virtual environment + set(Python3_FIND_VIRTUALENV ONLY) + + # Upgrade pip + # Ignore the error status as failing to upgrade is not the end of the world + upgrade_pip() + + # Install pyarrow + pip_install_python_package("pyarrow==16.1.*") + if (PIP_INSTALL_SUCCEEDED) + find_pyarrow_package() + if (PYARROW_ROOT) + set(PIP_PYARROW_ROOT ${PYARROW_ROOT} PARENT_SCOPE) + endif () + endif () + + deactivate_python_venv() +endfunction() + + +# Find Arrow and Parquet using find_package +# Output: +# Arrow_FOUND is set to TRUE if Arrow is found. +# Parquet_FOUND is set to TRUE if Parquet is found. +function(find_arrow_parquet_config) + # Find Arrow >= 8.0. + # Start major version from 100 so that we do not have to update + # this code every time Arrow releases a major version. + foreach (MAJOR_VERSION RANGE 100 8 -1) + find_package(Arrow "${MAJOR_VERSION}.0" QUIET) + if (Arrow_FOUND) + break() + endif () + endforeach () set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE) + + # Find Parquet + if (Arrow_FOUND) + find_package(Parquet QUIET PATHS ${Arrow_DIR}) + endif () set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE) + + # Show Arrow and Parquet info + if (Arrow_FOUND AND Parquet_FOUND) + if (Arrow_FOUND) + message(STATUS ${PROJECT_NAME} " found Arrow") + message(STATUS "Arrow version: ${ARROW_VERSION}") + message(STATUS "Arrow SO version: ${ARROW_FULL_SO_VERSION}") + endif () + + if (Parquet_FOUND) + message(STATUS ${PROJECT_NAME} " found Parquet") + message(STATUS "Parquet version: ${PARQUET_VERSION}") + message(STATUS "Parquet SO version: ${PARQUET_FULL_SO_VERSION}") + endif () + endif () +endfunction() + +# Find Arrow and Parquet. If not found, install pyarrow using pip in a Python virtual environmental space. +# Input: +# PIP_PYARROW_ROOT (option) The root directory of a pyarrow installed by pip. +# YGM_REQUIRE_ARROW_PARQUET (option) If TRUE, an fatal error is thrown when Arrow Parquet is not found. +# Output: +# Arrow_FOUND and Parquet_FOUND are defined and set to TRUE if Arrow and Parquet are found. +function(find_or_install_arrow_parquet) + if (PIP_PYARROW_ROOT) + find_pip_installed_pyarrow() + if (NOT Arrow_FOUND OR NOT Parquet_FOUND) + if (YGM_REQUIRE_ARROW_PARQUET) + message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet but Arrow Parquet was not found in ${PIP_PYARROW_ROOT}.") + else () + message(WARNING "${PROJECT_NAME} did not find Arrow Parquet in ${PIP_PYARROW_ROOT}. Building without Arrow Parquet.") + endif () + return() + endif () + endif () + + if (NOT Arrow_FOUND OR NOT Parquet_FOUND) + find_arrow_parquet_config() + endif () + + if (NOT Arrow_FOUND OR NOT Parquet_FOUND) + find_pyarrow_package() + if (PYARROW_ROOT) + # Assume that the found pip was installed by pip. + set(PIP_PYARROW_ROOT ${PYARROW_ROOT}) + find_pip_installed_pyarrow() + endif () + endif () + + if (NOT Arrow_FOUND OR NOT Parquet_FOUND) + install_pyarrow_in_venv() + if (PIP_PYARROW_ROOT) + find_pip_installed_pyarrow() + endif () + endif () + + if (NOT Arrow_FOUND OR NOT Parquet_FOUND) + message(STATUS "${PROJECT_NAME} could not find Arrow Parquet.") + message(STATUS "If this is an unexpected result, try the following command to install pyarrow: export Python3_ROOT_DIR=/path/to/python3; /path/to/python3 -m pip pyarrow") + if (YGM_REQUIRE_ARROW_PARQUET) + message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet.") + else () + message(WARNING "${PROJECT_NAME} keep the build process without Arrow Parquet.") + endif () + return() + endif () + + set(Arrow_FOUND TRUE PARENT_SCOPE) + set(Parquet_FOUND TRUE PARENT_SCOPE) endfunction() # Link Arrow and Parquet to the target -# This function must be called after find_arrow_parquet(). +# This function must be called after find_or_install_arrow_parquet(). function(link_arrow_parquet target) if (Arrow_FOUND AND Parquet_FOUND) target_link_libraries(${target} PUBLIC @@ -155,4 +234,4 @@ function(link_arrow_parquet target) else () message(WARNING "Arrow or Parquet not found. Not linking Arrow or Parquet.") endif () -endfunction() \ No newline at end of file +endfunction() diff --git a/cmake/FindPython3Module.cmake b/cmake/FindPython3Module.cmake new file mode 100644 index 00000000..9d3b4c8e --- /dev/null +++ b/cmake/FindPython3Module.cmake @@ -0,0 +1,18 @@ +# Find a Python3 module using CMake's FindPython3 module. +# Input: module name to find +# Python3_ROOT_DIR can be used as a hint to find Python3 +# +# Output: PYTHON3_MODULE_PATH is set to the path of the module if found +function(find_python3_module module_name) + find_package(Python3 COMPONENTS Interpreter REQUIRED) + + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "import importlib; import sys; module_name = '${module_name}'; spec = importlib.util.find_spec(module_name); print(spec.origin if spec else ''); sys.exit(0 if spec else 1)" + OUTPUT_VARIABLE MODULE_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if (Python3_FOUND AND MODULE_PATH) + set(PYTHON3_MODULE_PATH ${MODULE_PATH} PARENT_SCOPE) + endif () +endfunction() \ No newline at end of file diff --git a/cmake/PythonUtilities.cmake b/cmake/PythonUtilities.cmake new file mode 100644 index 00000000..709f1bf6 --- /dev/null +++ b/cmake/PythonUtilities.cmake @@ -0,0 +1,103 @@ +# Create and activate a Python3 virtual environment +# +# Output: PYTHON_VENV_ROOT is set to the path of the virtual environment +# if created successfully +function(setup_python_venv) + find_package(Python3 COMPONENTS Interpreter QUIET) + if (NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 interpreter not found") + return() + endif() + + set(PYTHON_VENV_ROOT "${CMAKE_BINARY_DIR}/${PROJECT_NAME}-venv") + execute_process( + COMMAND ${Python3_EXECUTABLE} -m venv ${PYTHON_VENV_ROOT} + RESULT_VARIABLE result + OUTPUT_QUIET + ) + if (result EQUAL "0") + message(STATUS "Created Python virtual environment in ${PYTHON_VENV_ROOT}") + set(PYTHON_VENV_ROOT ${PYTHON_VENV_ROOT} PARENT_SCOPE) + endif() +endfunction() + +# Activate a Python3 virtual environment +# Input: A path to the virtual environment +# Output: PYTHON_VENV_ACTIVATED is set to TRUE if activated successfully +function(activate_python_venv venv_path) + set (ENV{VIRTUAL_ENV} ${venv_path}) + set(PYTHON_VENV_ACTIVATED TRUE PARENT_SCOPE) +endfunction() + +# Deactivate a Python3 virtual environment +function(deactivate_python_venv) + unset(ENV{VIRTUAL_ENV}) + set(PYTHON_VENV_ACTIVATED FALSE PARENT_SCOPE) +endfunction() + +# Upgrade pip in the Python3 interpreter +# Output: PIP_UPGRADE_SUCCEEDED is set to TRUE if pip was upgraded successfully +function(upgrade_pip) + find_package(Python3 COMPONENTS Interpreter QUIET) + if (NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 interpreter not found") + return() + endif() + + execute_process( + COMMAND ${Python3_EXECUTABLE} -m pip install --upgrade pip + RESULT_VARIABLE result + OUTPUT_QUIET + ) + if(result EQUAL "0") + set(PIP_UPGRADE_SUCCEEDED TRUE PARENT_SCOPE) + endif() +endfunction() + +# Install a Python3 package using pip +# +# Input: A path to pip_executable and a package name +# Output: PIP_INSTALL_SUCCEEDED is set to TRUE +# if the package was installed successfully +function(pip_install_python_package package_name) + find_package(Python3 COMPONENTS Interpreter QUIET) + if (NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 interpreter not found") + return() + endif() + + execute_process( + COMMAND ${Python3_EXECUTABLE} -m pip install ${package_name} + RESULT_VARIABLE result + OUTPUT_QUIET + ) + if(result EQUAL "0") + message(STATUS "Installed ${package_name}") + set(PIP_INSTALL_SUCCEEDED TRUE PARENT_SCOPE) + endif() +endfunction() + +# Find a Python3 module using CMake's FindPython3 module. +# Input: module name to find +# Python3_ROOT_DIR can be used as a hint to find Python3 +# +# Output: PYTHON3_MODULE_PATH is set to the path of the module if found +function(find_python3_module module_name) + find_package(Python3 COMPONENTS Interpreter QUIET) + if (NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 interpreter not found") + return() + endif() + + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "import importlib.util; import sys; module_name = '${module_name}'; spec = importlib.util.find_spec(module_name); print(spec.origin if spec else ''); sys.exit(0 if spec else 1)" + OUTPUT_VARIABLE MODULE_PATH + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE result + ) + + if (result EQUAL "0") + set(PYTHON3_MODULE_PATH ${MODULE_PATH} PARENT_SCOPE) + message(STATUS "Found Python module ${module_name} at ${MODULE_PATH}") + endif() +endfunction() \ No newline at end of file