From 215349874b59858aa133c5a18cb9415a7a66878d Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 16 Jul 2024 15:43:20 +0000 Subject: [PATCH] Update documentation --- _sources/building_and_running_tests.rst | 51 ++ _sources/building_the_project.rst | 457 ---------- .../building_the_project_with_adaptivecpp.rst | 171 ++++ _sources/building_the_project_with_dpcpp.rst | 475 +++++++++++ _sources/index.rst | 7 +- _sources/using_onemkl_with_cmake.rst | 102 +++ building_and_running_tests.html | 659 ++++++++++++++ building_the_project_with_adaptivecpp.html | 804 ++++++++++++++++++ ...ml => building_the_project_with_dpcpp.html | 726 ++++++++-------- create_new_backend.html | 5 +- domains/blas/asum.html | 5 +- domains/blas/axpby.html | 5 +- domains/blas/axpy.html | 5 +- domains/blas/axpy_batch.html | 5 +- domains/blas/blas-level-1-routines.html | 5 +- domains/blas/blas-level-2-routines.html | 5 +- domains/blas/blas-level-3-routines.html | 5 +- domains/blas/blas-like-extensions.html | 5 +- domains/blas/blas.html | 5 +- domains/blas/copy.html | 5 +- domains/blas/copy_batch.html | 5 +- domains/blas/dgmm_batch.html | 5 +- domains/blas/dot.html | 5 +- domains/blas/dotc.html | 5 +- domains/blas/dotu.html | 5 +- domains/blas/gbmv.html | 5 +- domains/blas/gemm.html | 5 +- domains/blas/gemm_batch.html | 5 +- domains/blas/gemm_bias.html | 5 +- domains/blas/gemmt.html | 5 +- domains/blas/gemv.html | 5 +- domains/blas/gemv_batch.html | 5 +- domains/blas/ger.html | 5 +- domains/blas/gerc.html | 5 +- domains/blas/geru.html | 5 +- domains/blas/hbmv.html | 5 +- domains/blas/hemm.html | 5 +- domains/blas/hemv.html | 5 +- domains/blas/her.html | 5 +- domains/blas/her2.html | 5 +- domains/blas/her2k.html | 5 +- domains/blas/herk.html | 5 +- domains/blas/hpmv.html | 5 +- domains/blas/hpr.html | 5 +- domains/blas/hpr2.html | 5 +- domains/blas/iamax.html | 5 +- domains/blas/iamin.html | 5 +- domains/blas/nrm2.html | 5 +- domains/blas/rot.html | 5 +- domains/blas/rotg.html | 5 +- domains/blas/rotm.html | 5 +- domains/blas/rotmg.html | 5 +- domains/blas/sbmv.html | 5 +- domains/blas/scal.html | 5 +- domains/blas/sdsdot.html | 5 +- domains/blas/spmv.html | 5 +- domains/blas/spr.html | 5 +- domains/blas/spr2.html | 5 +- domains/blas/swap.html | 5 +- domains/blas/symm.html | 5 +- domains/blas/symv.html | 5 +- domains/blas/syr.html | 5 +- domains/blas/syr2.html | 5 +- domains/blas/syr2k.html | 5 +- domains/blas/syrk.html | 5 +- domains/blas/syrk_batch.html | 5 +- domains/blas/tbmv.html | 5 +- domains/blas/tbsv.html | 5 +- domains/blas/tpmv.html | 5 +- domains/blas/tpsv.html | 5 +- domains/blas/trmm.html | 5 +- domains/blas/trmv.html | 5 +- domains/blas/trsm.html | 5 +- domains/blas/trsm_batch.html | 5 +- domains/blas/trsv.html | 5 +- domains/dense_linear_algebra.html | 5 +- domains/lapack/gebrd.html | 5 +- domains/lapack/gebrd_scratchpad_size.html | 5 +- domains/lapack/geqrf.html | 5 +- domains/lapack/geqrf_batch.html | 5 +- .../lapack/geqrf_batch_scratchpad_size.html | 5 +- domains/lapack/geqrf_scratchpad_size.html | 5 +- domains/lapack/gerqf.html | 5 +- domains/lapack/gerqf_scratchpad_size.html | 5 +- domains/lapack/gesvd.html | 5 +- domains/lapack/gesvd_scratchpad_size.html | 5 +- domains/lapack/getrf.html | 5 +- domains/lapack/getrf_batch.html | 5 +- .../lapack/getrf_batch_scratchpad_size.html | 5 +- domains/lapack/getrf_scratchpad_size.html | 5 +- domains/lapack/getri.html | 5 +- domains/lapack/getri_batch.html | 5 +- .../lapack/getri_batch_scratchpad_size.html | 5 +- domains/lapack/getri_scratchpad_size.html | 5 +- domains/lapack/getrs.html | 5 +- domains/lapack/getrs_batch.html | 5 +- .../lapack/getrs_batch_scratchpad_size.html | 5 +- domains/lapack/getrs_scratchpad_size.html | 5 +- domains/lapack/heevd.html | 5 +- domains/lapack/heevd_scratchpad_size.html | 5 +- domains/lapack/hegvd.html | 5 +- domains/lapack/hegvd_scratchpad_size.html | 5 +- domains/lapack/hetrd.html | 5 +- domains/lapack/hetrd_scratchpad_size.html | 5 +- domains/lapack/hetrf.html | 5 +- domains/lapack/hetrf_scratchpad_size.html | 5 +- .../lapack/lapack-like-extensions.inc.html | 5 +- .../lapack-linear-equation-routines.inc.html | 5 +- ...ingular-value-eigenvalue-routines.inc.html | 5 +- domains/lapack/lapack.html | 5 +- domains/lapack/orgbr.html | 5 +- domains/lapack/orgbr_scratchpad_size.html | 5 +- domains/lapack/orgqr.html | 5 +- domains/lapack/orgqr_batch.html | 5 +- .../lapack/orgqr_batch_scratchpad_size.html | 5 +- domains/lapack/orgqr_scratchpad_size.html | 5 +- domains/lapack/orgtr.html | 5 +- domains/lapack/orgtr_scratchpad_size.html | 5 +- domains/lapack/ormqr.html | 5 +- domains/lapack/ormqr_scratchpad_size.html | 5 +- domains/lapack/ormrq.html | 5 +- domains/lapack/ormrq_scratchpad_size.html | 5 +- domains/lapack/ormtr.html | 5 +- domains/lapack/ormtr_scratchpad_size.html | 5 +- domains/lapack/potrf.html | 5 +- domains/lapack/potrf_batch.html | 5 +- .../lapack/potrf_batch_scratchpad_size.html | 5 +- domains/lapack/potrf_scratchpad_size.html | 5 +- domains/lapack/potri.html | 5 +- domains/lapack/potri_scratchpad_size.html | 5 +- domains/lapack/potrs.html | 5 +- domains/lapack/potrs_batch.html | 5 +- .../lapack/potrs_batch_scratchpad_size.html | 5 +- domains/lapack/potrs_scratchpad_size.html | 5 +- domains/lapack/syevd.html | 5 +- domains/lapack/syevd_scratchpad_size.html | 5 +- domains/lapack/sygvd.html | 5 +- domains/lapack/sygvd_scratchpad_size.html | 5 +- domains/lapack/sytrd.html | 5 +- domains/lapack/sytrd_scratchpad_size.html | 5 +- domains/lapack/sytrf.html | 5 +- domains/lapack/sytrf_scratchpad_size.html | 5 +- domains/lapack/trtrs.html | 5 +- domains/lapack/trtrs_scratchpad_size.html | 5 +- domains/lapack/ungbr.html | 5 +- domains/lapack/ungbr_scratchpad_size.html | 5 +- domains/lapack/ungqr.html | 5 +- domains/lapack/ungqr_batch.html | 5 +- .../lapack/ungqr_batch_scratchpad_size.html | 5 +- domains/lapack/ungqr_scratchpad_size.html | 5 +- domains/lapack/ungtr.html | 5 +- domains/lapack/ungtr_scratchpad_size.html | 5 +- domains/lapack/unmqr.html | 5 +- domains/lapack/unmqr_scratchpad_size.html | 5 +- domains/lapack/unmrq.html | 5 +- domains/lapack/unmrq_scratchpad_size.html | 5 +- domains/lapack/unmtr.html | 5 +- domains/lapack/unmtr_scratchpad_size.html | 5 +- domains/matrix-storage.html | 5 +- genindex.html | 5 +- index.html | 32 +- introduction.html | 5 +- objects.inv | Bin 6170 -> 6738 bytes onemkl-datatypes.html | 11 +- search.html | 5 +- searchindex.js | 2 +- selecting_a_compiler.html | 11 +- using_onemkl_with_cmake.html | 722 ++++++++++++++++ 168 files changed, 4009 insertions(+), 986 deletions(-) create mode 100644 _sources/building_and_running_tests.rst delete mode 100644 _sources/building_the_project.rst create mode 100644 _sources/building_the_project_with_adaptivecpp.rst create mode 100644 _sources/building_the_project_with_dpcpp.rst create mode 100644 _sources/using_onemkl_with_cmake.rst create mode 100644 building_and_running_tests.html create mode 100644 building_the_project_with_adaptivecpp.html rename building_the_project.html => building_the_project_with_dpcpp.html (58%) create mode 100644 using_onemkl_with_cmake.html diff --git a/_sources/building_and_running_tests.rst b/_sources/building_and_running_tests.rst new file mode 100644 index 000000000..43d3431af --- /dev/null +++ b/_sources/building_and_running_tests.rst @@ -0,0 +1,51 @@ +.. _building_and_running_tests: + +Building and Running Tests +========================== + +The functional tests are enabled by default, and can be enabled/disabled +with the CMake build parameter ``-DBUILD_FUNCTIONAL_TESTS=True/False``. Only +the tests relevant to the enabled backends and target domains will be built. + +Building tests for BLAS and LAPACK domains requires additional libraries for +reference. + +* BLAS: Requires a reference BLAS library. +* LAPACK: Requires a reference LAPACK library. + +For both BLAS and LAPACK, shared libraries supporting both 32 and 64 bit +indexing are required. + +A reference LAPACK implementation (including BLAS) can be built as the +following: + +.. code-block:: bash + + git clone https://github.com/Reference-LAPACK/lapack.git + cd lapack; mkdir -p build; cd build + cmake -DCMAKE_INSTALL_PREFIX=~/lapack -DCBLAS=True -DLAPACK=True -DLAPACKE=True -DBUILD_INDEX64=True -DBUILD_SHARED_LIBS=True .. + cmake --build . -j --target install + cmake -DCMAKE_INSTALL_PREFIX=~/lapack -DCBLAS=True -DLAPACK=True -DLAPACKE=True -DBUILD_INDEX64=False -DBUILD_SHARED_LIBS=True .. + cmake --build . -j --target install + +and then used in oneMKL by setting ``-REF_BLAS_ROOT=/path/to/lapack/install`` +and ``-DREF_LAPACK_ROOT=/path/to/lapack/install``. + +You can re-run tests without re-building the entire project. + +To run the tests, either run test binaries individually, or use ``ctest`` CMake test driver program. + +.. code-block:: bash + + # Run all tests + ctest + # Run only Gpu specific tests + ctest -R Gpu + # Exclude Cpu tests + ctest -E Cpu + +For more ``ctest`` options, refer to `ctest manual page `_. + +When running tests you may encounter the issue ``BACKEND NOT FOUND EXCEPTION``, +you may need to add your ``/lib`` to your +``LD_LIBRARY_PATH`` on Linux. diff --git a/_sources/building_the_project.rst b/_sources/building_the_project.rst deleted file mode 100644 index 216b57807..000000000 --- a/_sources/building_the_project.rst +++ /dev/null @@ -1,457 +0,0 @@ -.. _building_the_project: - -Building the Project -==================== - -.. _build_setup: - -Build Setup -########### - -#. - Install Intel(R) oneAPI DPC++ Compiler (select the variant as described in - :ref:`Selecting a Compiler`). - -#. - Clone this project to ````\ , where ```` - is the root directory of this repository. - -#. - Download and install the required dependencies manually and :ref:`Build with CMake `. - - -.. _build_setup_with_hipsycl: - -Build Setup with hipSYCL -######################## - -#. - Make sure that the dependencies of hipSYCL are fulfilled. For a detailed - description, see the - `hipSYCL installation readme `_. - -#. - Install hipSYCL with the prefered backends enabled. hipSYCL supports - various backends. You can customize support for the target system at - compile time by setting the appropriate configuration flags; see the - `hipSYCL documentation `_ - for instructions. - -#. - Install `AMD rocBLAS `_. - -#. - Clone this project to ````, where ```` is - the root directory of this repository. - -#. - Download and install the required dependencies manually and - :ref:`Build with CMake `. - - - -.. _building_with_cmake: - -Building with CMake -################### - -#. - Make sure you have completed `Build Setup <#build-setup>`_. - -#. - Build and install all required `dependencies <#software-requirements>`_. - -Building for oneMKL -^^^^^^^^^^^^^^^^^^^ - -* On Linux* - - .. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. [-DCMAKE_CXX_COMPILER=/bin/icpx] # required only if icpx is not found in environment variable PATH - [-DCMAKE_C_COMPILER=/bin/icx] # required only if icx is not found in environment variable PATH - [-DMKL_ROOT=] # required only if environment variable MKLROOT is not set - [-DREF_BLAS_ROOT=] # required only for testing - [-DREF_LAPACK_ROOT=] # required only for testing - cmake --build . - ctest - cmake --install . --prefix # required to have full package structure - -* On Windows* - - .. code-block:: bash - - # Inside - md build && cd build - cmake .. -G Ninja [-DCMAKE_CXX_COMPILER=\bin\icx] # required only if icx is not found in environment variable PATH - [-DCMAKE_C_COMPILER=\bin\icx] # required only if icx is not found in environment variable PATH - [-DMKL_ROOT=] # required only if environment variable MKLROOT is not set - [-DREF_BLAS_ROOT=] # required only for testing - [-DREF_LAPACK_ROOT=] # required only for testing - ninja - ctest - cmake --install . --prefix # required to have full package structure - -Building for CUDA (with hipSYCL) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* On Linux* - -With the cuBLAS backend: - -.. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. -DENABLE_CUBLAS_BACKEND=True \ - -DENABLE_MKLGPU_BACKEND=False # Disable all backends except for cuBLAS - -DENABLE_MKLCPU_BACKEND=False \ - -DENABLE_NETLIB_BACKEND=False \ - -DENABLE_ROCBLAS_BACKEND=False \ - -DHIPSYCL_TARGETS=cuda:sm_75 \ # Specify the targeted device architectures - -DONEMKL_SYCL_IMPLEMENTATION=hipSYCL \ - [-DREF_BLAS_ROOT=] # required only for testing - cmake --build . - ctest - cmake --install . --prefix # required to have full package structure - -To build with the cuRAND backend instead simply replace: - -.. code-block:: bash - - -DENABLE_CUBLAS_BACKEND=True \ - -With: - -.. code-block:: bash - - -DENABLE_CURAND_BACKEND=True \ - - -Building for CUDA (with clang++) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* On Linux* - -With the cuBLAS backend: - -.. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. [-DCMAKE_CXX_COMPILER=/bin/clang++] # required only if clang++ is not found in environment variable PATH - [-DCMAKE_C_COMPILER=/bin/clang] # required only if clang is not found in environment variable PATH - -DENABLE_CUBLAS_BACKEND=True \ - -DENABLE_MKLCPU_BACKEND=False # disable Intel MKL CPU backend - -DENABLE_MKLGPU_BACKEND=False # disable Intel MKL GPU backend - [-DREF_BLAS_ROOT=] # required only for testing - cmake --build . - ctest - cmake --install . --prefix # required to have full package structure - - -The CuFFT and CuRAND backends can be enabled in a similar way to the CuBLAS backend, by setting the corresponding CMake variable(s) to `True`: - -.. code-block:: bash - - -DENABLE_CUFFT_BACKEND=True \ - -DENABLE_CURAND_BACKEND=True \ - - -Building for ROCm (with hipSYCL) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -With the AMD rocBLAS backend: - -* On Linux* - -.. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. -DENABLE_CUBLAS_BACKEND=False \ - -DENABLE_MKLCPU_BACKEND=False/True # hipSYCL supports MKLCPU backend - -DENABLE_NETLIB_BACKEND=False/True # hipSYCL supports NETLIB backend - -DENABLE_MKLGPU_BACKEND=False # disable Intel MKL GPU backend - -DENABLE_ROCBLAS_BACKEND=True \ - -DTARGET_DOMAINS=blas # hipSYCL supports BLAS and RNG domains - -DHIPSYCL_TARGETS=omp\;hip:gfx906 # Specify the targetted device architectures - -DONEMKL_SYCL_IMPLEMENTATION=hipSYCL # Use the hipSYCL cmake integration - [-DREF_BLAS_ROOT=] # required only for testing - cmake --build . - ctest - cmake --install . --prefix # required to have full package structure - -To build with the rocRAND backend instead simply replace: - -.. code-block:: bash - - -DENABLE_ROCBLAS_BACKEND=True \ - -DTARGET_DOMAINS=blas - -With: - -.. code-block:: bash - - -DENABLE_ROCRAND_BACKEND=True \ - -DTARGET_DOMAINS=rng - -Building for ROCm (with clang++) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -With the AMD rocBLAS backend: - - -* On Linux* - -.. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. [-DCMAKE_CXX_COMPILER=/bin/clang++] # required only if clang++ is not found in environment variable PATH - [-DCMAKE_C_COMPILER=/bin/clang] # required only if clang is not found in environment variable PATH - -DENABLE_CUBLAS_BACKEND=False \ - -DENABLE_MKLCPU_BACKEND=False \ # disable Intel MKL CPU backend - -DENABLE_MKLGPU_BACKEND=False \ # disable Intel MKL GPU backend - -DENABLE_ROCBLAS_BACKEND=True \ - -DHIP_TARGETS=gfx90a \ # Specify the targetted device architectures - [-DREF_BLAS_ROOT=] # required only for testing - cmake --build . - export ONEAPI_DEVICE_SELECTOR="hip:gpu" - ctest - cmake --install . --prefix # required to have full package structure - -The rocRAND, rocFFT, and rocSOLVER backends can be enabled in a similar way to the rocBLAS backend, by setting the corresponding CMake variable(s) to `True`: - -.. code-block:: bash - - -DENABLE_ROCRAND_BACKEND=True \ - -DENABLE_ROCFFT_BACKEND=True \ - -DENABLE_ROCSOLVER_BACKEND=True \ - -**AMD GPU device architectures** - -The device architecture can be retrieved via the ``rocminfo`` tool. The architecture will be displayed in the ``Name:`` row. - -A few often-used architectures are listed below: - -.. list-table:: - :header-rows: 1 - - * - Architecture - - AMD GPU name - * - gfx90a - - AMD Instinct(TM) MI210/250/250X Accellerator - * - gfx908 - - AMD Instinct(TM) MI 100 Accelerator - * - gfx906 - - | AMD Radeon Instinct(TM) MI50/60 Accelerator - | AMD Radeon(TM) (Pro) VII Graphics Card - * - gfx900 - - | Radeon Instinct(TM) MI 25 Accelerator - | Radeon(TM) RX Vega 64/56 Graphics - -Building for portBLAS -^^^^^^^^^^^^^^^^^^^^^^ - -Note the portBLAS backend is experimental and currently only supports a -subset of the operations and features. The portBLAS backend cannot be enabled -with other backends and can only be used with the compile time dispatch. -The portBLAS backend uses the `portBLAS `_ -project as a header-only library. - -* On Linux* - -.. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. -DENABLE_PORTBLAS_BACKEND=ON \ - -DENABLE_MKLCPU_BACKEND=OFF \ - -DENABLE_MKLGPU_BACKEND=OFF \ - -DTARGET_DOMAINS=blas \ - [-DREF_BLAS_ROOT=] \ # required only for testing - [-DPORTBLAS_DIR=] - cmake --build . - ./bin/test_main_blas_ct - cmake --install . --prefix - - -portBLAS will be downloaded automatically if not found. -By default, the portBLAS backend is not tuned for any specific device which -will impact performance. -portBLAS can be tuned for a specific hardware target by adding compiler -definitions in 2 ways: - -#. - Manually specify a tuning target with ``-DPORTBLAS_TUNING_TARGET=``. - The list of portBLAS targets can be found - `here `_. - This will automatically set ``-fsycl-targets`` if needed. - In case of ``AMD_GPU`` target, it is mandatory to set one or more device - architectures by means of ``HIP_TARGETS``, e.g., ``-DHIP_TARGETS=gfx90a``. - In case of ``NVIDIA_GPU`` target, it is possible to select a specific device - architecture by means of ``CUDA_TARGET``, e.g., ``-DCUDA_TARGET=sm_80``. -#. - If one target is set via ``-fsycl-targets`` the configuration step will - try to automatically detect the portBLAS tuning target. One can manually - specify ``-fsycl-targets`` via ``CMAKE_CXX_FLAGS``. See - `DPC++ User Manual `_ - for more information on ``-fsycl-targets``. - -Building for portFFT -^^^^^^^^^^^^^^^^^^^^^^ - -Note the portFFT backend is experimental and currently only supports a -subset of the operations and features. -The portFFT backend uses the `portFFT `_ -project as a header-only library. - -* On Linux* - -.. code-block:: bash - - # Inside - mkdir build && cd build - cmake .. -DENABLE_PORTFFT_BACKEND=ON \ - -DENABLE_MKLCPU_BACKEND=OFF \ - -DENABLE_MKLGPU_BACKEND=OFF \ - -DTARGET_DOMAINS=dft \ - [-DPORTFFT_REGISTERS_PER_WI=128] \ # Example portFFT tuning parameter - [-DREF_BLAS_ROOT=] \ # required only for testing - [-DPORTFFT_DIR=] - cmake --build . - ./bin/test_main_dft_ct - cmake --install . --prefix - - -portFFT will be downloaded automatically if not found. - -By default, the portFFT backend is not tuned for any specific device. The tuning flags are -detailed in the `portFFT `_ repository. -The tuning parameters can be set at configuration time, -with the above example showing how to set the tuning parameter -``PORTFFT_REGISTERS_PER_WI``. Note that some tuning configurations may be incompatible -with some targets. - -The portFFT library is compiled using the same ``-fsycl-targets`` as specified -by the ``CMAKE_CXX_FLAGS``. If none are found, it will compile for -``-fsycl-targets=nvptx64-nvidia-cuda,spir64``. To enable HIP targets, -``HIP_TARGETS`` must be specified. See -`DPC++ User Manual `_ -for more information on ``-fsycl-targets``. - - -Build Options -^^^^^^^^^^^^^ - -When building oneMKL the SYCL implementation can be specified by setting the -``ONEMKL_SYCL_IMPLEMENTATION`` option. Possible values are: - -* ``dpc++`` (default) for the - `Intel(R) oneAPI DPC++ Compiler `_ - and for the - `oneAPI DPC++ Compiler `_ compilers. -* ``hipsycl`` for the `hipSYCL `_ SYCL implementation. - -The following table provides details of CMake options and their default values: - -.. list-table:: - :header-rows: 1 - - * - CMake Option - - Supported Values - - Default Value - * - BUILD_SHARED_LIBS - - True, False - - True - * - ENABLE_MKLCPU_BACKEND - - True, False - - True - * - ENABLE_MKLGPU_BACKEND - - True, False - - True - * - ENABLE_CUBLAS_BACKEND - - True, False - - False - * - ENABLE_CUSOLVER_BACKEND - - True, False - - False - * - ENABLE_CUFFT_BACKEND - - True, False - - False - * - ENABLE_CURAND_BACKEND - - True, False - - False - * - ENABLE_NETLIB_BACKEND - - True, False - - False - * - ENABLE_ROCBLAS_BACKEND - - True, False - - False - * - ENABLE_ROCFFT_BACKEND - - True, False - - False - * - ENABLE_MKLCPU_THREAD_TBB - - True, False - - True - * - ENABLE_PORTBLAS_BACKEND - - True, False - - False - * - ENABLE_PORTFFT_BACKEND - - True, False - - False - * - BUILD_FUNCTIONAL_TESTS - - True, False - - True - * - BUILD_EXAMPLES - - True, False - - True - * - BUILD_DOC - - True, False - - False - * - TARGET_DOMAINS (list) - - blas, lapack, rng, dft - - All domains - -.. note:: - ``build_functional_tests`` and related CMake options affect all domains at a - global scope. - -  -.. note:: - When building with hipSYCL, you must additionally provide - ``-DHIPSYCL_TARGETS`` according to the targeted hardware. For the options, - see the tables in the hipSYCL-specific sections. - - -.. note:: - When building with clang++ for AMD backends, you must additionally set - ``ONEAPI_DEVICE_SELECTOR`` to ``hip:gpu`` and provide ``-DHIP_TARGETS`` according to - the targeted hardware. This backend has only been tested for the ``gfx90a`` - architecture (MI210) at the time of writing. - -.. note:: - When building with ``BUILD_FUNCTIONAL_TESTS=yes`` (default option) only single CUDA backend can be built - (`#270 `_). - -.. _project_cleanup: - -Project Cleanup -############### - -Most use-cases involve building the project without the need to cleanup the -build directory. However, if you wish to cleanup the build directory, you can -delete the ``build`` folder and create a new one. If you wish to cleanup the -build files but retain the build configuration, following commands will help -you do so. - -.. code-block:: sh - - # If you use "GNU/Unix Makefiles" for building, - make clean - - # If you use "Ninja" for building - ninja -t clean diff --git a/_sources/building_the_project_with_adaptivecpp.rst b/_sources/building_the_project_with_adaptivecpp.rst new file mode 100644 index 000000000..98c763b90 --- /dev/null +++ b/_sources/building_the_project_with_adaptivecpp.rst @@ -0,0 +1,171 @@ +.. _building_the_project_with_adaptivecpp: + +Building the Project with AdaptiveCpp +===================================== + +.. _build_setup_with_adaptivecpp: + +Environment Setup +################# + +#. + Build and install AdaptiveCpp. For a detailed description of available + AdaptiveCpp backends, their dependencies, and installation, see the + `AdaptiveCpp installation readme + `_. + +#. + Clone this project. The root directory of the cloned repository will be + referred to as ````. + +#. + Download and install the `required dependencies + `_ + manually. + +Build Commands +############### + +In most cases, building oneMKL Interfaces is as simple as setting the compiler and +selecting the desired backends to build with. + +On Linux (other OSes are not supported with the AdaptiveCpp compiler): + +.. code-block:: bash + + # Inside + mkdir build && cd build + cmake .. -DONEMKL_SYCL_IMPLEMENTATION=hipsycl \ # Indicate that AdaptiveCpp is being used. + -DENABLE_MKLGPU_BACKEND=False \ # MKLGPU backend is not supported by AdaptiveCpp + -DENABLE__BACKEND=True \ # Enable backend(s) (optional) + -DENABLE__BACKEND=True \ # Multiple backends can be enabled at once. + -DHIPSYCL_TARGETS=omp/;hip:gfx90a,gfx906 \ # Set target architectures depending on supported devices. + -DBUILD_FUNCTIONAL_TESTS=False \ # See section *Building the tests* for more on building tests. True by default. + -DBUILD_EXAMPLES=False # Optional: True by default. + cmake --build . + cmake --install . --prefix # required to have full package structure + +Backends should be enabled by setting ``-DENABLE__BACKEND=True`` for +each desired backend. By default, the ``MKLGPU`` and ``MKLCPU`` backends are +enabled, but ``MKLGPU`` must be disabled with AdaptiveCpp. The supported +backends for the compilers are given in the table at `oneMKL supported +configurations table +`_, +and the CMake option names are given in the table below. Some backends may +require additional parameters to be set. See the relevant section below for +additional guidance. The target architectures must be specified with +``HIP_TARGETS``. See the `AdaptiveCpp documentation +`_. + +If a backend library supports multiple domains (i.e. BLAS, RNG), it may be +desirable to only enable selected domains. For this, the ``TARGET_DOMAINS`` +variable should be set. For further details, see :ref:`_build_target_domains`. + +By default, the library also additionally builds examples and tests. These can +be disabled by setting the parameters ``BUILD_FUNCTIONAL_TESTS`` and +``BUILD_EXAMPLES`` to False. Building the functional tests may require additional +external libraries. See the section :ref:`building_and_running_tests` for more +information. + +The most important supported build options are: + +.. list-table:: + :header-rows: 1 + + * - CMake Option + - Supported Values + - Default Value + * - ENABLE_MKLCPU_BACKEND + - True, False + - True + * - ENABLE_CUBLAS_BACKEND + - True, False + - False + * - ENABLE_CURAND_BACKEND + - True, False + - False + * - ENABLE_NETLIB_BACKEND + - True, False + - False + * - ENABLE_ROCBLAS_BACKEND + - True, False + - False + * - ENABLE_ROCRAND_BACKEND + - True, False + - False + * - ENABLE_MKLCPU_THREAD_TBB + - True, False + - True + * - BUILD_FUNCTIONAL_TESTS + - True, False + - True + * - BUILD_EXAMPLES + - True, False + - True + * - TARGET_DOMAINS (list) + - blas, rng + - All supported domains + +Some additional build options are given in +:ref:`build_additional_options_dpcpp`. + +Backends +######## + +.. _build_for_cuda_adaptivecpp: + +Building for CUDA +~~~~~~~~~~~~~~~~~ + +The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND`` and +``ENABLE_CURAND_BACKEND``. + +The target architecture must be set using the ``HIPSYCL_TARGETS`` parameter. For +example, to target a Nvidia A100 (Ampere architecture), set +``-DHIPSYCL_TARGETS=cuda:sm_80``, where the figure ``80`` corresponds to a CUDA +compute capability of 8.0. The correspondence between compute capabilities and +Nvidia GPU products is given on the `Nvidia website +`_. Multiple architectures can be +enabled using a comma separated list. See the `AdaptiveCpp documentation +`_. + +No additional parameters are required for using CUDA libraries. In most cases, +the CUDA libraries should be found automatically by CMake. + +.. _build_for_rocm_adaptivecpp: + +Building for ROCm +~~~~~~~~~~~~~~~~~ + +The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND`` and +``ENABLE_ROCRAND_BACKEND``. + +The target architecture must be set using the ``HIPSYCL_TARGETS`` parameter. See +the `AdaptiveCpp documentation +`_. +For example, to target the MI200 series, set ``-DHIPSYCL_TARGETS=hip:gfx90a``. +Multiple architectures can be enabled using a comma separated list. For example, +``-DHIPSYCL_TARGETS=hip:gfx906,gfx90a``, and multiple APIs with a semicolon +(``-DHIPSYCL_TARGETS=omp\;hip:gfx906,gfx90a``). + +For common AMD GPU architectures, see the :ref:`build_for_ROCM_dpcpp` in the +DPC++ build guide. + +.. _project_cleanup: + +Project Cleanup +############### + +Most use-cases involve building the project without the need to clean up the +build directory. However, if you wish to clean up the build directory, you can +delete the ``build`` folder and create a new one. If you wish to clean up the +build files but retain the build configuration, following commands will help you +do so. + +.. code-block:: sh + + # If you use "GNU/Unix Makefiles" for building, + make clean + + # If you use "Ninja" for building + ninja -t clean diff --git a/_sources/building_the_project_with_dpcpp.rst b/_sources/building_the_project_with_dpcpp.rst new file mode 100644 index 000000000..365028237 --- /dev/null +++ b/_sources/building_the_project_with_dpcpp.rst @@ -0,0 +1,475 @@ +.. _building_the_project_with_dpcpp: + +Building the Project with DPC++ +=============================== + +This page describes building the oneMKL Interfaces with either the Intel(R) +oneAPI DPC++ Compiler or open-source oneAPI DPC++ Compiler. For guidance on +building the project with AdaptiveCpp, see +:ref:`building_the_project_with_adaptivecpp`. + +.. _build_setup_with_dpcpp: + +Environment Setup +################## + +#. + Install the required DPC++ compiler (Intel(R) DPC++ or Open DPC++ - see + :ref:`Selecting a Compiler`). + +#. + Clone this project. The root directory of the cloned repository will be + referred to as ````. + +#. + Build and install all `required dependencies + `_. + +.. _build_introduction_with_dpcpp: + +Build Commands +############### + +The build commands for various compilers and backends differ mostly in setting +the values of CMake options for compiler and backend. In this section, we +describe the common build commands. We will discuss backend-specific details in +the `Backends`_ section and provide examples in `CMake invocation examples`_. + +On Linux, the common form of the build command looks as follows (see `Building +for Windows`_ for building on Windows): + +.. code-block:: bash + + # Inside + mkdir build && cd build + cmake .. -DCMAKE_CXX_COMPILER=$CXX_COMPILER \ # Should be icpx or clang++ + -DCMAKE_C_COMPILER=$C_COMPILER \ # Should be icx or clang + -DENABLE_MKLGPU_BACKEND=False \ # Optional: The MKLCPU backend is True by default. + -DENABLE_MKLGPU_BACKEND=False \ # Optional: The MKLGPU backend is True by default. + -DENABLE__BACKEND=True \ # Enable any other backend(s) (optional) + -DENABLE__BACKEND=True \ # Multiple backends can be enabled at once. + -DBUILD_FUNCTIONAL_TESTS=False \ # See page *Building and Running Tests* for more on building tests. True by default. + -DBUILD_EXAMPLES=False # Optional: True by default. + cmake --build . + cmake --install . --prefix # required to have full package structure + +In the above, the ``$CXX_COMPILER`` and ``$C_COMPILER`` should be set to +``icpx`` and ``icx`` respectively when using the Intel(R) oneAPI DPC++ Compiler, +or ``clang++`` and ``clang`` respectively when using the Open DPC++ Compiler. + +Backends should be enabled by setting ``-DENABLE__BACKEND=True`` for +each desired backend. By default, only the ``MKLGPU`` and ``MKLCPU`` backends +are enabled. Multiple backends for multiple device vendors can be enabled at +once (albeit with limitations when using portBLAS and portFFT). The supported +backends for the compilers are given in the table at `oneMKL supported +configurations table +`_, +and the CMake option names are given in the table below. Some backends may +require additional parameters to be set. See the relevant section below for +additional guidance. + +If a backend library supports multiple domains (i.e., BLAS, LAPACK, DFT, RNG, +sparse BLAS), it may be desirable to only enable selected domains. For this, the +``TARGET_DOMAINS`` variable should be set. See the section `TARGET_DOMAINS`_. + +By default, the library also additionally builds examples and tests. These can +be disabled by setting the parameters ``BUILD_FUNCTIONAL_TESTS`` and +``BUILD_EXAMPLES`` to ``False``. Building the functional tests requires +additional external libraries for the BLAS and LAPACK domains. See the section +:ref:`building_and_running_tests` for more information. + +The most important supported build options are: + +.. list-table:: + :header-rows: 1 + + * - CMake Option + - Supported Values + - Default Value + * - ENABLE_MKLCPU_BACKEND + - True, False + - True + * - ENABLE_MKLGPU_BACKEND + - True, False + - True + * - ENABLE_CUBLAS_BACKEND + - True, False + - False + * - ENABLE_CUSOLVER_BACKEND + - True, False + - False + * - ENABLE_CUFFT_BACKEND + - True, False + - False + * - ENABLE_CURAND_BACKEND + - True, False + - False + * - ENABLE_NETLIB_BACKEND + - True, False + - False + * - ENABLE_ROCBLAS_BACKEND + - True, False + - False + * - ENABLE_ROCFFT_BACKEND + - True, False + - False + * - ENABLE_ROCSOLVER_BACKEND + - True, False + - False + * - ENABLE_ROCRAND_BACKEND + - True, False + - False + * - ENABLE_MKLCPU_THREAD_TBB + - True, False + - True + * - ENABLE_PORTBLAS_BACKEND + - True, False + - False + * - ENABLE_PORTFFT_BACKEND + - True, False + - False + * - BUILD_FUNCTIONAL_TESTS + - True, False + - True + * - BUILD_EXAMPLES + - True, False + - True + * - TARGET_DOMAINS (list) + - blas, lapack, rng, dft, sparse_blas + - All domains + +Some additional build options are given in the section `Additional build options`_. + +.. _build_target_domains: + +TARGET_DOMAINS +^^^^^^^^^^^^^^ + +oneMKL supports multiple domains: BLAS, DFT, LAPACK, RNG and sparse BLAS. The +domains built by oneMKL can be selected using the ``TARGET_DOMAINS`` parameter. +In most cases, ``TARGET_DOMAINS`` is set automatically according to the domains +supported by the backend libraries enabled. However, while most backend +libraries support only one of these domains, but some may support multiple. For +example, the ``MKLCPU`` backend supports every domain. To enable support for +only the BLAS domain in the oneMKL Interfaces whilst compiling with ``MKLCPU``, +``TARGET_DOMAINS`` could be set to ``blas``. To enable BLAS and DFT, +``-DTARGET_DOMAINS="blas dft"`` would be used. + + +Backends +######### + +.. _build_for_intel_onemkl_dpcpp: + +Building for Intel(R) oneMKL +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Intel(R) oneMKL backend supports multiple domains on both x86 CPUs and Intel +GPUs. The MKLCPU backend using Intel(R) oneMKL for x86 CPU is enabled by +default, and controlled with the parameter ``ENABLE_MKLCPU_BACKEND``. The MKLGPU +backend using Intel(R) oneMKL for Intel GPU is enabled by default, and +controlled with the parameter ``ENABLE_MKLGPU_BACKEND``. + +When using the Intel(R) oneAPI DPC++ Compiler, it is likely that Intel(R) oneMKL +will be found automatically. If it is not, the parameter ``MKL_ROOT`` can be set +to point to the installation prefix of Intel(R) oneMKL. Alternatively, the +``MKLROOT`` environment variable can be set, either manually or by using an +environment script provided by the package. + + +.. _build_for_CUDA_dpcpp: + +Building for CUDA +^^^^^^^^^^^^^^^^^ + +The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``, +``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``, and +``ENABLE_CUSOLVER_BACKEND``. + +No additional parameters are required for using CUDA libraries. In most cases, +the CUDA libraries should be found automatically by CMake. + +.. _build_for_ROCM_dpcpp: + +Building for ROCm +^^^^^^^^^^^^^^^^^ + +The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND``, +``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND`` and +``ENABLE_ROCRAND_BACKEND``. + +For *RocBLAS*, *RocSOLVER* and *RocRAND*, the target device architecture must be +set. This can be set with using the ``HIP_TARGETS`` parameter. For example, to +enable a build for MI200 series GPUs, ``-DHIP_TARGETS=gfx90a`` should be set. +Currently, DPC++ can only build for a single HIP target at a time. This may +change in future versions. + +A few often-used architectures are listed below: + +.. list-table:: + :header-rows: 1 + + * - Architecture + - AMD GPU name + * - gfx90a + - AMD Instinct(TM) MI210/250/250X Accelerator + * - gfx908 + - AMD Instinct(TM) MI 100 Accelerator + * - gfx906 + - | AMD Radeon Instinct(TM) MI50/60 Accelerator + | AMD Radeon(TM) (Pro) VII Graphics Card + * - gfx900 + - | Radeon Instinct(TM) MI 25 Accelerator + | Radeon(TM) RX Vega 64/56 Graphics + +For a host with ROCm installed, the device architecture can be retrieved via the +``rocminfo`` tool. The architecture will be displayed in the ``Name:`` row. + +.. _build_for_portlibs_dpcpp: + +Pure SYCL backends: portBLAS and portFFT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`portBLAS `_ and `portFFT +`_ are experimental pure-SYCL +backends that work on all SYCL targets supported by the DPC++ compiler. Since +they support multiple targets, they cannot be enabled with other backends in the +same domain, or the MKLCPU or MKLGPU backends. Both libraries are experimental +and currently only support a subset of operations and features. + +For best performance, both libraries must be tuned. See the individual sections +for more details. + +Both portBLAS and portFFT are used as header-only libraries, and will be +downloaded automatically if not found. + +.. _build_for_portblas_dpcpp: + +Building for portBLAS +--------------------- + +`portBLAS `_ is +enabled by setting ``-DENABLE_PORTBLAS_BACKEND=True``. + +By default, the portBLAS backend is not tuned for any specific device. +This tuning is required to achieve best performance. +portBLAS can be tuned for a specific hardware target by adding compiler +definitions in 2 ways: + +#. + Manually specify a tuning target with ``-DPORTBLAS_TUNING_TARGET=``. + The list of portBLAS targets can be found + `here `_. + This will automatically set ``-fsycl-targets`` if needed. +#. + If one target is set via ``-fsycl-targets`` the configuration step will + try to automatically detect the portBLAS tuning target. One can manually + specify ``-fsycl-targets`` via ``CMAKE_CXX_FLAGS``. See + `DPC++ User Manual `_ + for more information on ``-fsycl-targets``. + +portBLAS relies heavily on JIT compilation. This may cause time-outs on some +systems. To avoid this issue, use ahead-of-time compilation through tuning +targets or ``sycl-targets``. + +.. _build_for_portfft_dpcpp: + +Building for portFFT +--------------------- + +`portFFT `_ is enabled by setting +``-DENABLE_PORTFFT_BACKEND=True``. + +By default, the portFFT backend is not tuned for any specific device. The tuning +flags are detailed in the `portFFT +`_ repository, and can set at +configuration time. Note that some tuning configurations may be incompatible +with some targets. + +The portFFT library is compiled using the same ``-fsycl-targets`` as specified +by the ``CMAKE_CXX_FLAGS``. If none are found, it will compile for +``-fsycl-targets=spir64``, and -if the compiler supports it- +``nvptx64-nvidia-cuda``. To enable HIP targets, ``HIP_TARGETS`` must be +specified. See `DPC++ User Manual +`_ for more information on +``-fsycl-targets``. + +.. _build_additional_options_dpcpp: + +Additional Build Options +########################## + +When building oneMKL the SYCL implementation can be specified by setting the +``ONEMKL_SYCL_IMPLEMENTATION`` option. Possible values are: + +* ``dpc++`` (default) for the `Intel(R) oneAPI DPC++ Compiler + `_ and for the `oneAPI + DPC++ Compiler `_ compilers. +* ``hipsycl`` for the `AdaptiveCpp `_ + SYCL implementation. +Please see :ref:`building_the_project_with_adaptivecpp` if using this option. + +The following table provides details of CMake options and their default values: + +.. list-table:: + :header-rows: 1 + + * - CMake Option + - Supported Values + - Default Value + * - BUILD_SHARED_LIBS + - True, False + - True + * - BUILD_DOC + - True, False + - False + + +.. note:: + When building with ``clang++`` for AMD backends, you must additionally set + ``ONEAPI_DEVICE_SELECTOR`` to ``hip:gpu`` and provide ``-DHIP_TARGETS`` + according to the targeted hardware. This backend has only been tested for the + ``gfx90a`` architecture (MI210) at the time of writing. + +.. note:: + When building with ``BUILD_FUNCTIONAL_TESTS=True`` (default option) only single CUDA backend can be built + (`#270 `_). + + +.. _build_invocation_examples_dpcpp: + +CMake invocation examples +########################## + +Build oneMKL with support for Nvidia GPUs with tests +disabled using the Ninja build system: + +.. code-block:: bash + + cmake $ONEMKL_DIR \ + -GNinja \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_C_COMPILER=clang \ + -DENABLE_MKLGPU_BACKEND=False \ + -DENABLE_MKLCPU_BACKEND=False \ + -DENABLE_CUFFT_BACKEND=True \ + -DENABLE_CUBLAS_BACKEND=True \ + -DENABLE_CUSOLVER_BACKEND=True \ + -DENABLE_CURAND_BACKEND=True \ + -DBUILD_FUNCTIONAL_TESTS=False + +``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``) +and Intel GPU (``MKLGPU``) backends are enabled by default, but are disabled +here. The backends for Nvidia GPUs must all be explicilty enabled. The tests are +disabled, but the examples will still be built. + +Building oneMKL with support for AMD GPUs with tests +disabled: + +.. code-block:: bash + + cmake $ONEMKL_DIR \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_C_COMPILER=clang \ + -DENABLE_MKLCPU_BACKEND=False \ + -DENABLE_MKLGPU_BACKEND=False \ + -DENABLE_ROCFFT_BACKEND=True \ + -DENABLE_ROCBLAS_BACKEND=True \ + -DENABLE_ROCSOLVER_BACKEND=True \ + -DHIP_TARGETS=gfx90a \ + -DBUILD_FUNCTIONAL_TESTS=False + +``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``) +and Intel GPU (``MKLGPU``) backends are enabled by default, but are disabled +here. The backends for AMD GPUs must all be explicilty enabled. The tests are +disabled, but the examples will still be built. + + +Build oneMKL for the DFT domain only with support for x86 CPU, Intel GPU, AMD +GPU and Nvidia GPU with testing enabled: + +.. code-block:: bash + + cmake $ONEMKL_DIR \ + -DCMAKE_CXX_COMPILER=icpx \ + -DCMAKE_C_COMPILER=icx \ + -DENABLE_ROCFFT_BACKEND=True \ + -DENABLE_CUFFT_BACKEND=True \ + -DTARGET_DOMAINS=dft \ + -DBUILD_EXAMPLES=False + +Note that this is not a supported configuration, and requires Codeplay's oneAPI +for `AMD `_ and +`Nvidia `_ GPU +plugins. The MKLCPU and MKLGPU backends are enabled by +default, with backends for Nvidia GPU and AMD GPU explicitly enabled. +``-DTARGET_DOMAINS=dft`` causes only DFT backends to be built. If this was not +set, the backend libraries to enable the use of BLAS, LAPACK and RNG with MKLGPU +and MKLCPU would also be enabled. The build of examples is disabled. Since +functional testing was not disabled, tests would be built. + +.. _project_cleanup: + +Project Cleanup +############### + +Most use-cases involve building the project without the need to clean up the +build directory. However, if you wish to clean up the build directory, you can +delete the ``build`` folder and create a new one. If you wish to clean up the +build files but retain the build configuration, following commands will help you +do so. + +.. code-block:: sh + + # If you use "GNU/Unix Makefiles" for building, + make clean + + # If you use "Ninja" for building + ninja -t clean + + +.. _build_for_windows_dpcpp: + +Building for Windows +#################### + +The Windows build is similar to the Linux build, albeit that `fewer backends are +supported `_. +Additionally, the Ninja build system must be used. For example: + +.. code-block:: bash + + # Inside + md build && cd build + cmake .. -G Ninja [-DCMAKE_CXX_COMPILER=\bin\icx] # required only if icx is not found in environment variable PATH + [-DCMAKE_C_COMPILER=\bin\icx] # required only if icx is not found in environment variable PATH + [-DMKL_ROOT=] # required only if environment variable MKLROOT is not set + [-DREF_BLAS_ROOT=] # required only for testing + [-DREF_LAPACK_ROOT=] # required only for testing + ninja + ctest + cmake --install . --prefix # required to have full package structure + +.. _build_common_problems_dpcpp: + +Build FAQ +######### + +clangrt builtins lib not found + Encountered when trying to build oneMKL with some ROCm libraries. There are + several possible solutions: * If building Open DPC++ from source, add + ``compiler-rt`` to the external projects compile option: + ``--llvm-external-projects compiler-rt``. * The *clangrt* from ROCm can be + used, depending on ROCm version: ``export + LIBRARY_PATH=/path/to/rocm-$rocm-version$/llvm/lib/clang/$clang-version$/lib/linux/:$LIBRARY_PATH`` + +Could NOT find CBLAS (missing: CBLAS file) + Encountered when tests are enabled along with the BLAS domain. The tests + require a reference BLAS implementation, but cannot find one. Either install + or build a BLAS library and set ``-DREF_BLAS_ROOT``` as described in + :ref:`building_and_running_tests`. Alternatively, the tests can be disabled by + setting ``-DBUILD_FUNCTIONAL_TESTS=False``. + +error: invalid target ID ''; format is a processor name followed by an optional colon-delimited list of features followed by an enable/disable sign (e.g.,'gfx908:sramecc+:xnack-') + The HIP_TARGET has not been set. Please see `Building for ROCm`_. + diff --git a/_sources/index.rst b/_sources/index.rst index e1a051524..51e4216ee 100644 --- a/_sources/index.rst +++ b/_sources/index.rst @@ -1,5 +1,5 @@ .. - Copyright 2020–2022 Intel Corporation + Copyright 2020-2024 Intel Corporation .. _onemkl: @@ -21,7 +21,10 @@ Contents :maxdepth: 2 selecting_a_compiler.rst - building_the_project.rst + building_the_project_with_dpcpp.rst + building_the_project_with_adaptivecpp.rst + building_and_running_tests.rst + using_onemkl_with_cmake.rst .. toctree:: :caption: Developer Reference diff --git a/_sources/using_onemkl_with_cmake.rst b/_sources/using_onemkl_with_cmake.rst new file mode 100644 index 000000000..5fb497362 --- /dev/null +++ b/_sources/using_onemkl_with_cmake.rst @@ -0,0 +1,102 @@ +.. _using_onemkl_interface_library_with_cmake: + +Using the oneMKL Interfaces in your project with CMake +============================================================= + +The CMake build tool can help you use oneMKL Interfaces in your own project. +Instead of manually linking and including directories, you can use the CMake targets +exported by the oneMKL Interfaces project. You can use oneMKL in one of two +forms, with the target names depending on the approach taken: + +* you can use a previously installed copy, either from a binary distribution or + built from source. This can be imported using CMake's ``find_package`` + command. See the section `using_from_installed_binary`_. +* or you can have CMake automatically download and build oneMKL as part of the + build process using CMake's FetchContent_ functionality. + See the section `using_with_fetchcontent`_. + + +.. _using_from_installed_binary: + +Using an installed oneMKL Interfaces +#################################### + +If the oneMKL Interfaces have been previously installed, either by building from +source or as a distributed binary, they can be consumed using CMake using +``find_package(oneMKL REQUIRED)``. The compiler used for the target library or +application should match that used to build oneMKL Interfaces. + +For example: + +.. code-block:: cmake + + find_package(oneMKL REQUIRED) + target_link_libraries(myTarget PRIVATE MKL::onemkl) + +Different targets can be used depending on the requirements of oneMKL. +To link against the entire library, the ``MKL::onemkl`` target should be used. +For specific domains, ``MKL::onemkl_`` should be used. +And for specific backends, ``MKL::onemkl__`` should be used. + +When using a binary, it may be useful to know the backends that were enabled +during the build. To check for the existence of backends, CMake's ``if(TARGET +)`` construct can be used. For example, with the ``cufft`` backend: + +.. code-block:: cmake + + if(TARGET MKL::onemkl_dft_cufft) + target_link_libraries(myTarget PRIVATE MKL::onemkl_dft_cufft) + else() + message(FATAL_ERROR "oneMKL Interfaces was not built with CuFFT backend") + endif() + + +If oneMKL Interfaces has been installed to a non-standard location, the +operating system may not find the backend libraries when they're lazily loaded +at runtime. To make sure they're found you may need to set +``LD_LIBRARY_PATH=/lib:$LD_LIBRARY_PATH`` on Linux. + +.. _using_with_fetchcontent: + +Using CMake's FetchContent +########################## + + +The FetchContent_ functionality of CMake can be used to download, build and +install oneMKL Interfaces as part of the build. + +For example: + +.. code-block:: cmake + + include(FetchContent) + set(BUILD_FUNCTIONAL_TESTS False) + set(BUILD_EXAMPLES False) + set(ENABLE__BACKEND True) + FetchContent_Declare( + onemkl_interface_library + GIT_REPOSITORY https://github.com/oneapi-src/oneMKL.git + GIT_TAG develop + ) + FetchContent_MakeAvailable(onemkl_interface_library) + + target_link_libraries(myTarget PRIVATE onemkl) + +The build parameters should be appropriately set before +``FetchContent_Declare``. See :ref:`building_the_project_with_dpcpp` or +:ref:`building_the_project_with_adaptivecpp`. + +To link against the main library with run-time dispatching, use the target +``onemkl``. To link against particular domains, use the target +``onemkl_``. For example, ``onemkl_blas`` or ``onemkl_dft``. To link +against particular backends (as required for static dispatch of oneAPI calls to +a particular backend), use the target ``onemkl__``. For +example, ``onemkl_dft_cufft``. + +When using the run-time dispatch mechanism, it is likely that the operating +system will not find the backend libraries when they're loaded at runtime. To +make sure they're found you may need to set +``LD_LIBRARY_PATH=/lib:$LD_LIBRARY_PATH`` on Linux. + + +.. _FetchContent: https://cmake.org/cmake/help/latest/module/FetchContent.html diff --git a/building_and_running_tests.html b/building_and_running_tests.html new file mode 100644 index 000000000..889547485 --- /dev/null +++ b/building_and_running_tests.html @@ -0,0 +1,659 @@ + + + + + + + + + + + Building and Running Tests — oneAPI Math Kernel Library Interfaces 0.1 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + +
+
+
+
+
+ + + + +
+
+ + + +
+ + + + + + + + + + + + + +
+ +
+ + + +
+ +
+
+ +
+
+ +
+ +
+ +
+ + +
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+ + + +
+

Building and Running Tests

+ +
+
+ +
+
+
+ + + + +
+ +
+

Building and Running Tests#

+

The functional tests are enabled by default, and can be enabled/disabled +with the CMake build parameter -DBUILD_FUNCTIONAL_TESTS=True/False. Only +the tests relevant to the enabled backends and target domains will be built.

+

Building tests for BLAS and LAPACK domains requires additional libraries for +reference.

+
    +
  • BLAS: Requires a reference BLAS library.

  • +
  • LAPACK: Requires a reference LAPACK library.

  • +
+

For both BLAS and LAPACK, shared libraries supporting both 32 and 64 bit +indexing are required.

+

A reference LAPACK implementation (including BLAS) can be built as the +following:

+
git clone https://github.com/Reference-LAPACK/lapack.git
+cd lapack; mkdir -p build; cd build
+cmake -DCMAKE_INSTALL_PREFIX=~/lapack -DCBLAS=True -DLAPACK=True -DLAPACKE=True -DBUILD_INDEX64=True -DBUILD_SHARED_LIBS=True ..
+cmake --build . -j --target install
+cmake -DCMAKE_INSTALL_PREFIX=~/lapack -DCBLAS=True -DLAPACK=True -DLAPACKE=True -DBUILD_INDEX64=False -DBUILD_SHARED_LIBS=True ..
+cmake --build . -j --target install
+
+
+

and then used in oneMKL by setting -REF_BLAS_ROOT=/path/to/lapack/install +and -DREF_LAPACK_ROOT=/path/to/lapack/install.

+

You can re-run tests without re-building the entire project.

+

To run the tests, either run test binaries individually, or use ctest CMake test driver program.

+
# Run all tests
+ctest
+# Run only Gpu specific tests
+ctest -R Gpu
+# Exclude Cpu tests
+ctest -E Cpu
+
+
+

For more ctest options, refer to ctest manual page.

+

When running tests you may encounter the issue BACKEND NOT FOUND EXCEPTION, +you may need to add your <oneMKL build directory>/lib to your +LD_LIBRARY_PATH on Linux.

+
+ + +
+ + + + + + + + +
+ + + +
+ + +
+
+ + +
+ + +
+
+
+ + + + + +
+
+ + \ No newline at end of file diff --git a/building_the_project_with_adaptivecpp.html b/building_the_project_with_adaptivecpp.html new file mode 100644 index 000000000..125bb0699 --- /dev/null +++ b/building_the_project_with_adaptivecpp.html @@ -0,0 +1,804 @@ + + + + + + + + + + + Building the Project with AdaptiveCpp — oneAPI Math Kernel Library Interfaces 0.1 documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + +
+
+
+
+
+ + + + +
+
+ + + +
+ + + + + + + + + + + + + +
+ +
+ + + +
+ +
+
+ +
+
+ +
+ +
+ +
+ + +
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+ + + +
+

Building the Project with AdaptiveCpp

+ +
+ +
+
+ + + + +
+ +
+

Building the Project with AdaptiveCpp#

+
+

Environment Setup#

+
    +
  1. Build and install AdaptiveCpp. For a detailed description of available +AdaptiveCpp backends, their dependencies, and installation, see the +AdaptiveCpp installation readme.

  2. +
  3. Clone this project. The root directory of the cloned repository will be +referred to as <path to onemkl>.

  4. +
  5. Download and install the required dependencies +manually.

  6. +
+
+
+

Build Commands#

+

In most cases, building oneMKL Interfaces is as simple as setting the compiler and +selecting the desired backends to build with.

+

On Linux (other OSes are not supported with the AdaptiveCpp compiler):

+
# Inside <path to onemkl>
+mkdir build && cd build
+cmake .. -DONEMKL_SYCL_IMPLEMENTATION=hipsycl    \ # Indicate that AdaptiveCpp is being used.
+        -DENABLE_MKLGPU_BACKEND=False            \ # MKLGPU backend is not supported by AdaptiveCpp
+        -DENABLE_<BACKEND_NAME>_BACKEND=True     \ # Enable backend(s) (optional)
+        -DENABLE_<BACKEND_NAME_2>_BACKEND=True   \ # Multiple backends can be enabled at once.
+        -DHIPSYCL_TARGETS=omp/;hip:gfx90a,gfx906 \ # Set target architectures depending on supported devices.
+        -DBUILD_FUNCTIONAL_TESTS=False           \ # See section *Building the tests* for more on building tests. True by default.
+        -DBUILD_EXAMPLES=False                   # Optional: True by default.
+cmake --build .
+cmake --install . --prefix <path_to_install_dir> # required to have full package structure
+
+
+

Backends should be enabled by setting -DENABLE_<BACKEND_NAME>_BACKEND=True for +each desired backend. By default, the MKLGPU and MKLCPU backends are +enabled, but MKLGPU must be disabled with AdaptiveCpp. The supported +backends for the compilers are given in the table at oneMKL supported +configurations table, +and the CMake option names are given in the table below. Some backends may +require additional parameters to be set. See the relevant section below for +additional guidance. The target architectures must be specified with +HIP_TARGETS. See the AdaptiveCpp documentation.

+

If a backend library supports multiple domains (i.e. BLAS, RNG), it may be +desirable to only enable selected domains. For this, the TARGET_DOMAINS +variable should be set. For further details, see _build_target_domains.

+

By default, the library also additionally builds examples and tests. These can +be disabled by setting the parameters BUILD_FUNCTIONAL_TESTS and +BUILD_EXAMPLES to False. Building the functional tests may require additional +external libraries. See the section Building and Running Tests for more +information.

+

The most important supported build options are:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

CMake Option

Supported Values

Default Value

ENABLE_MKLCPU_BACKEND

True, False

True

ENABLE_CUBLAS_BACKEND

True, False

False

ENABLE_CURAND_BACKEND

True, False

False

ENABLE_NETLIB_BACKEND

True, False

False

ENABLE_ROCBLAS_BACKEND

True, False

False

ENABLE_ROCRAND_BACKEND

True, False

False

ENABLE_MKLCPU_THREAD_TBB

True, False

True

BUILD_FUNCTIONAL_TESTS

True, False

True

BUILD_EXAMPLES

True, False

True

TARGET_DOMAINS (list)

blas, rng

All supported domains

+

Some additional build options are given in +Additional Build Options.

+
+
+

Backends#

+
+

Building for CUDA#

+

The CUDA backends can be enabled with ENABLE_CUBLAS_BACKEND and +ENABLE_CURAND_BACKEND.

+

The target architecture must be set using the HIPSYCL_TARGETS parameter. For +example, to target a Nvidia A100 (Ampere architecture), set +-DHIPSYCL_TARGETS=cuda:sm_80, where the figure 80 corresponds to a CUDA +compute capability of 8.0. The correspondence between compute capabilities and +Nvidia GPU products is given on the Nvidia website. Multiple architectures can be +enabled using a comma separated list. See the AdaptiveCpp documentation.

+

No additional parameters are required for using CUDA libraries. In most cases, +the CUDA libraries should be found automatically by CMake.

+
+
+

Building for ROCm#

+

The ROCm backends can be enabled with ENABLE_ROCBLAS_BACKEND and +ENABLE_ROCRAND_BACKEND.

+

The target architecture must be set using the HIPSYCL_TARGETS parameter. See +the AdaptiveCpp documentation. +For example, to target the MI200 series, set -DHIPSYCL_TARGETS=hip:gfx90a. +Multiple architectures can be enabled using a comma separated list. For example, +-DHIPSYCL_TARGETS=hip:gfx906,gfx90a, and multiple APIs with a semicolon +(-DHIPSYCL_TARGETS=omp\;hip:gfx906,gfx90a).

+

For common AMD GPU architectures, see the Building for ROCm in the +DPC++ build guide.

+
+
+
+

Project Cleanup#

+

Most use-cases involve building the project without the need to clean up the +build directory. However, if you wish to clean up the build directory, you can +delete the build folder and create a new one. If you wish to clean up the +build files but retain the build configuration, following commands will help you +do so.

+
# If you use "GNU/Unix Makefiles" for building,
+make clean
+
+# If you use "Ninja" for building
+ninja -t clean
+
+
+
+
+ + +
+ + + + + + + + +
+ + + + + + +
+
+ + +
+ + +
+
+
+ + + + + +
+
+ + \ No newline at end of file diff --git a/building_the_project.html b/building_the_project_with_dpcpp.html similarity index 58% rename from building_the_project.html rename to building_the_project_with_dpcpp.html index 4c055e64b..a69dd0fc4 100644 --- a/building_the_project.html +++ b/building_the_project_with_dpcpp.html @@ -8,7 +8,7 @@ - Building the Project — oneAPI Math Kernel Library Interfaces 0.1 documentation + Building the Project with DPC++ — oneAPI Math Kernel Library Interfaces 0.1 documentation @@ -40,11 +40,11 @@ - + - + @@ -159,7 +159,10 @@

Get Started

Developer Reference