Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Find pyarrow module automatically for Arrow and Parquet #241

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 127 additions & 53 deletions cmake/FindArrowParquet.cmake
Original file line number Diff line number Diff line change
@@ -1,42 +1,4 @@
# Find Arrow and Parquet using find_package
function(find_arrow_parquet_config)
# Find Arrow >- 8.0
foreach (VERSION 16.0 15.0 14.0 13.0 12.0 11.0 10.0 9.0 8.0)
find_package(Arrow ${VERSION} QUIET)
if (Arrow_FOUND)
break()
endif ()
endforeach ()
set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE)

# Find Parquet
if (Arrow_FOUND)
find_package(Parquet QUIET PATHS ${Arrow_DIR})
endif ()
set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE)

# Show Arrow and Parquet info
if (Arrow_FOUND AND Parquet_FOUND)
if (Arrow_FOUND)
message(STATUS ${PROJECT_NAME} " found Arrow")
message(STATUS "Arrow version: ${ARROW_VERSION}")
message(STATUS "Arrow SO version: ${ARROW_FULL_SO_VERSION}")
endif ()

if (Parquet_FOUND)
message(STATUS ${PROJECT_NAME} " found Parquet")
message(STATUS "Parquet version: ${PARQUET_VERSION}")
message(STATUS "Parquet SO version: ${PARQUET_FULL_SO_VERSION}")
endif ()
else ()
if (YGM_REQUIRE_ARROW_PARQUET)
message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet >= 8.0 but Arrow Parquet was not found.")
else ()
message(WARNING "${PROJECT_NAME} did not find Arrow Parquet >= 8.0. Building without Arrow Parquet.")
endif ()
endif ()
endfunction()

include(PythonUtilities)

# Find Arrow and Parquet installed along with pyarrow by pip.
#
Expand All @@ -58,7 +20,7 @@ endfunction()
# If Arrow and Parquet are found, set Arrow_FOUND and Parquet_FOUND to TRUE.
# Also, Arrow::arrow_shared and Parquet::parquet_shared are created as imported targets.
# Those targets can be used to link Arrow and Parquet as find_package() is used.
function(find_pyarrow)
function(find_pip_installed_pyarrow)
if (PIP_PYARROW_ROOT)
# Find libarrow
file(GLOB Arrow_LIBRARIES LIST_DIRECTORIES false "${PIP_PYARROW_ROOT}/libarrow.so.*")
Expand Down Expand Up @@ -114,12 +76,6 @@ function(find_pyarrow)
endif ()

message(STATUS "Arrow include dir: ${Arrow_INCLUDE_DIRS}")
else () # Arrow or Parquet not found
if (YGM_REQUIRE_ARROW_PARQUET)
message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet but Arrow Parquet was not found.")
else ()
message(WARNING "${PROJECT_NAME} did not find Arrow Parquet. Building without Arrow Parquet.")
endif ()
endif ()
else ()
message(FATAL_ERROR "PIP_PYARROW_ROOT is not set. PIP_PYARROW_ROOT must be set to the root of the pyarrow installation.")
Expand All @@ -128,19 +84,137 @@ function(find_pyarrow)
endfunction()


# Find Arrow and Parquet using find_arrow or find_pyarrow
# If PIP_PYARROW_ROOT is set, find_pyarrow is used.
# Find the directory where pyarrow is installed.
# This function executes a Python script to find the pyarrow module and
# **does not assume that pyarrow is installed by pip**.
#
# Output:
# Arrow_FOUND and Parquet_FOUND are set to TRUE if Arrow and Parquet are found.
# PYARROW_ROOT is set to the root of the pyarrow installation.
function(find_pyarrow_package)
find_python3_module(pyarrow)
if (PYTHON3_MODULE_PATH)
get_filename_component(PYARROW_ROOT ${PYTHON3_MODULE_PATH} DIRECTORY)
set(PYARROW_ROOT ${PYARROW_ROOT} PARENT_SCOPE)
endif ()
endfunction()

# Install pyarrow using pip
# Output:
# PIP_PYARROW_ROOT is set to the root of the pyarrow installation.
function(install_pyarrow_in_venv)
setup_python_venv()
if (NOT PYTHON_VENV_ROOT)
return()
endif ()

activate_python_venv(${PYTHON_VENV_ROOT})
if (NOT PYTHON_VENV_ACTIVATED)
return()
endif ()

# Use only the Python 3 interpreter in the virtual environment
set(Python3_FIND_VIRTUALENV ONLY)

pip_install_python_package(pyarrow)
if (PIP_INSTALL_SUCCEEDED)
find_pyarrow_package()
if (PYARROW_ROOT)
set(PIP_PYARROW_ROOT ${PYARROW_ROOT} PARENT_SCOPE)
endif ()
endif ()

deactivate_python_venv()
endfunction()


# Find Arrow and Parquet using find_package
# Output:
# Arrow_FOUND is set to TRUE if Arrow is found.
# Parquet_FOUND is set to TRUE if Parquet is found.
function(find_arrow_parquet_config)
# Find Arrow >- 8.0
foreach (VERSION 16.0 15.0 14.0 13.0 12.0 11.0 10.0 9.0 8.0)
find_package(Arrow ${VERSION} QUIET)
if (Arrow_FOUND)
break()
endif ()
endforeach ()
set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE)

# Find Parquet
if (Arrow_FOUND)
find_package(Parquet QUIET PATHS ${Arrow_DIR})
endif ()
set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE)

# Show Arrow and Parquet info
if (Arrow_FOUND AND Parquet_FOUND)
if (Arrow_FOUND)
message(STATUS ${PROJECT_NAME} " found Arrow")
message(STATUS "Arrow version: ${ARROW_VERSION}")
message(STATUS "Arrow SO version: ${ARROW_FULL_SO_VERSION}")
endif ()

if (Parquet_FOUND)
message(STATUS ${PROJECT_NAME} " found Parquet")
message(STATUS "Parquet version: ${PARQUET_VERSION}")
message(STATUS "Parquet SO version: ${PARQUET_FULL_SO_VERSION}")
endif ()
endif ()
endfunction()

# Find Arrow and Parquet. If not found, try to install pyarrow using pip in a Python virtual environmental space.
# Input:
# PIP_PYARROW_ROOT (option) The root directory of a pyarrow installed by pip.
# YGM_REQUIRE_ARROW_PARQUET (option) If TRUE, an fatal error is thrown when Arrow Parquet is not found.
# Output:
# Arrow_FOUND and Parquet_FOUND are defined and set to TRUE if Arrow and Parquet are found.
function(find_arrow_parquet)
if (PIP_PYARROW_ROOT)
find_pyarrow()
else ()
find_pip_installed_pyarrow()
if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
if (YGM_REQUIRE_ARROW_PARQUET)
message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet but Arrow Parquet was not found in ${PIP_PYARROW_ROOT}.")
else ()
message(WARNING "${PROJECT_NAME} did not find Arrow Parquet in ${PIP_PYARROW_ROOT}. Building without Arrow Parquet.")
endif ()
return()
endif ()
endif ()

if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
find_arrow_parquet_config()
endif ()
set(Arrow_FOUND ${Arrow_FOUND} PARENT_SCOPE)
set(Parquet_FOUND ${Parquet_FOUND} PARENT_SCOPE)

if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
find_pyarrow_package()
if (PYARROW_ROOT)
# Assume that the found pip was installed by pip.
set(PIP_PYARROW_ROOT ${PYARROW_ROOT})
find_pip_installed_pyarrow()
endif ()
endif ()

if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
install_pyarrow_in_venv()
if (PIP_PYARROW_ROOT)
find_pip_installed_pyarrow()
endif ()
endif ()

if (NOT Arrow_FOUND OR NOT Parquet_FOUND)
message(STATUS "${PROJECT_NAME} could not find Arrow Parquet.")
message(STATUS "If this is an unexpected result, try the following command to install pyarrow: export Python3_ROOT_DIR=/path/to/paython3; /path/to/paython3 -m pip pyarrow")
if (YGM_REQUIRE_ARROW_PARQUET)
message(FATAL_ERROR "${PROJECT_NAME} requires Arrow Parquet.")
else ()
message(WARNING "${PROJECT_NAME} keep the build process without Arrow Parquet.")
endif ()
return()
endif ()

set(Arrow_FOUND TRUE PARENT_SCOPE)
set(Parquet_FOUND TRUE PARENT_SCOPE)
endfunction()


Expand Down
18 changes: 18 additions & 0 deletions cmake/FindPython3Module.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Find a Python3 module using CMake's FindPython3 module.
# Input: module name to find
# Python3_ROOT_DIR can be used as a hint to find Python3
#
# Output: PYTHON3_MODULE_PATH is set to the path of the module if found
function(find_python3_module module_name)
find_package(Python3 COMPONENTS Interpreter REQUIRED)

execute_process(
COMMAND ${Python3_EXECUTABLE} -c "import importlib; import sys; module_name = '${module_name}'; spec = importlib.util.find_spec(module_name); print(spec.origin if spec else ''); sys.exit(0 if spec else 1)"
OUTPUT_VARIABLE MODULE_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if (Python3_FOUND AND MODULE_PATH)
set(PYTHON3_MODULE_PATH ${MODULE_PATH} PARENT_SCOPE)
endif ()
endfunction()
84 changes: 84 additions & 0 deletions cmake/PythonUtilities.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Create and activate a Python3 virtual environment
#
# Output: PYTHON_VENV_ROOT is set to the path of the virtual environment
# if created successfully
function(setup_python_venv)
find_package(Python3 COMPONENTS Interpreter QUIET)
if (NOT Python3_Interpreter_FOUND)
message(WARNING "Python3 interpreter not found")
return()
endif()

set(PYTHON_VENV_ROOT "${CMAKE_BINARY_DIR}/${PROJECT_NAME}-venv")
execute_process(
COMMAND ${Python3_EXECUTABLE} -m venv ${PYTHON_VENV_ROOT}
RESULT_VARIABLE result
OUTPUT_QUIET
)
if (result EQUAL "0")
message(STATUS "Created Python virtual environment in ${PYTHON_VENV_ROOT}")
set(PYTHON_VENV_ROOT ${PYTHON_VENV_ROOT} PARENT_SCOPE)
endif()
endfunction()

# Activate a Python3 virtual environment
# Input: A path to the virtual environment
# Output: PYTHON_VENV_ACTIVATED is set to TRUE if activated successfully
function(activate_python_venv venv_path)
set (ENV{VIRTUAL_ENV} ${venv_path})
set(PYTHON_VENV_ACTIVATED TRUE PARENT_SCOPE)
endfunction()

# Deactivate a Python3 virtual environment
function(deactivate_python_venv)
unset(ENV{VIRTUAL_ENV})
set(PYTHON_VENV_ACTIVATED FALSE PARENT_SCOPE)
endfunction()

# Install a Python3 package using pip
#
# Input: A path to pip_executable and a package name
# Output: PIP_INSTALL_SUCCEEDED is set to TRUE
# if the package was installed successfully
function(pip_install_python_package package_name)
find_package(Python3 COMPONENTS Interpreter QUIET)
if (NOT Python3_Interpreter_FOUND)
message(WARNING "Python3 interpreter not found")
return()
endif()

execute_process(
COMMAND ${Python3_EXECUTABLE} -m pip install ${package_name}
RESULT_VARIABLE result
OUTPUT_QUIET
)
if(result EQUAL "0")
message(STATUS "Installed ${package_name}")
set(PIP_INSTALL_SUCCEEDED TRUE PARENT_SCOPE)
endif()
endfunction()

# Find a Python3 module using CMake's FindPython3 module.
# Input: module name to find
# Python3_ROOT_DIR can be used as a hint to find Python3
#
# Output: PYTHON3_MODULE_PATH is set to the path of the module if found
function(find_python3_module module_name)
find_package(Python3 COMPONENTS Interpreter QUIET)
if (NOT Python3_Interpreter_FOUND)
message(WARNING "Python3 interpreter not found")
return()
endif()

execute_process(
COMMAND ${Python3_EXECUTABLE} -c "import importlib.util; import sys; module_name = '${module_name}'; spec = importlib.util.find_spec(module_name); print(spec.origin if spec else ''); sys.exit(0 if spec else 1)"
OUTPUT_VARIABLE MODULE_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE result
)

if (result EQUAL "0")
set(PYTHON3_MODULE_PATH ${MODULE_PATH} PARENT_SCOPE)
message(STATUS "Found Python module ${module_name} at ${MODULE_PATH}")
endif()
endfunction()