From ce6d7d4d1873010ee3dfdf9dac8f8ad1667cf394 Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 14:47:50 +0100 Subject: [PATCH 1/7] Add OpenCL Continuous Integration. Closes #188 --- .travis.yml | 41 ++++++++++++++++++++++++++--------------- ci/README.md | 3 +++ ci/amd_sdk.sh | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 15 deletions(-) create mode 100644 ci/README.md create mode 100644 ci/amd_sdk.sh diff --git a/.travis.yml b/.travis.yml index 74770551e..a206962d0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,29 +12,20 @@ matrix: compiler: gcc - os: linux - env: CHANNEL=devel + env: CHANNEL=stable BACKEND=OpenCL compiler: gcc - # For faster testing we don't test clang on linux, only on macOS - # - os: linux - # env: CHANNEL=stable - # compiler: clang - # - # - os: linux - # env: CHANNEL=devel - # compiler: clang + - os: linux + env: CHANNEL=devel + compiler: gcc # On OSX we only test against clang (gcc is mapped to clang by default) # Note: for OpenMP, Homebrew will build flame/blis with GCC-5 + # As BLIS is in OSX homebrew, this is an opportunity to test it as well - os: osx env: CHANNEL=stable BLIS=true compiler: clang - # For faster testing, we only test BLIS = true - # - os: osx - # env: CHANNEL=stable BLIS=false - # compiler: clang - allow_failures: # Ignore failures when building against the devel Nim branch # Also ignore OSX, due to very long build time and Homebrew/curl SSLRead errors @@ -52,6 +43,22 @@ before_install: # On MacOS flame/blis can be tested as it is an homebrew package - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install brewsci/science/blis; fi + - if [[ "$BACKEND" == "OpenCL" ]]; then + bash ci/amd_sdk.sh; + tar -xjf AMD-SDK.tar.bz2; + AMDAPPSDK=${HOME}/AMDAPPSDK; + export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors; + mkdir -p ${OPENCL_VENDOR_PATH}; + sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDK}; + echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd; + export LD_LIBRARY_PATH=${AMDAPPSDK}/lib/x86_64:${LD_LIBRARY_PATH}; + chmod +x ${AMDAPPSDK}/bin/x86_64/clinfo; + ${AMDAPPSDK}/bin/x86_64/clinfo; + + sudo add-apt-repository ppa:cnugteren/clblast; + sudo apt-get update; + sudo apt-get install clblast; + fi install: - export CHOOSENIM_NO_ANALYTICS=1 @@ -63,7 +70,11 @@ install: script: - nimble refresh - - nimble test + - if [[ "$BACKEND" == "OpenCL" ]]; then + nimble test_opencl; + else + nimble test; + fi branches: except: diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 000000000..a64a1fd2a --- /dev/null +++ b/ci/README.md @@ -0,0 +1,3 @@ +# Continuous Integration + +Scripts needed for continuous integration of Arraymancer \ No newline at end of file diff --git a/ci/amd_sdk.sh b/ci/amd_sdk.sh new file mode 100644 index 000000000..acbe617b8 --- /dev/null +++ b/ci/amd_sdk.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Original script from https://github.com/gregvw/amd_sdk/ + +# Location from which get nonce and file name from +URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/" +URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/" + +NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"' +FILE_STRING='name="f"' +POSTID_STRING='name="post_id"' +NONCE2_STRING='name="amd_developer_central_nonce"' + +#For newest FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'` +FORM=`wget --no-check-certificate -qO - $URL | sed -n '/download-5/,/64-bit/p'` + +# Get nonce from form +NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'` +NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'` +echo $NONCE1 + +# get the postid +POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'` +POSTID=`echo $POSTID | awk -F'"' '{print $2}'` +echo $POSTID + +# get file name +FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'` +FILE=`echo $FILE | awk -F'"' '{print $2}'` +echo $FILE + +FORM=`wget --no-check-certificate -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"` + +NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'` +NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'` +echo $NONCE2 + +wget --no-check-certificate --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2; From e9daa2a83c5d235f351f544f83ce29f91db53a4b Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 14:58:31 +0100 Subject: [PATCH 2/7] Build was stuck on PPA prompt + cleanup AMD SDK download --- .travis.yml | 14 ++---------- ci/amd_sdk.sh | 38 ------------------------------- ci/opencl_amd_sdk.sh | 54 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 50 deletions(-) delete mode 100644 ci/amd_sdk.sh create mode 100644 ci/opencl_amd_sdk.sh diff --git a/.travis.yml b/.travis.yml index a206962d0..3667b71ff 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,18 +44,8 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install brewsci/science/blis; fi - if [[ "$BACKEND" == "OpenCL" ]]; then - bash ci/amd_sdk.sh; - tar -xjf AMD-SDK.tar.bz2; - AMDAPPSDK=${HOME}/AMDAPPSDK; - export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors; - mkdir -p ${OPENCL_VENDOR_PATH}; - sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDK}; - echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd; - export LD_LIBRARY_PATH=${AMDAPPSDK}/lib/x86_64:${LD_LIBRARY_PATH}; - chmod +x ${AMDAPPSDK}/bin/x86_64/clinfo; - ${AMDAPPSDK}/bin/x86_64/clinfo; - - sudo add-apt-repository ppa:cnugteren/clblast; + bash ci/opencl_amd_sdk.sh; + sudo add-apt-repository ppa:cnugteren/clblast -y; sudo apt-get update; sudo apt-get install clblast; fi diff --git a/ci/amd_sdk.sh b/ci/amd_sdk.sh deleted file mode 100644 index acbe617b8..000000000 --- a/ci/amd_sdk.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -# Original script from https://github.com/gregvw/amd_sdk/ - -# Location from which get nonce and file name from -URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/" -URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/" - -NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"' -FILE_STRING='name="f"' -POSTID_STRING='name="post_id"' -NONCE2_STRING='name="amd_developer_central_nonce"' - -#For newest FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'` -FORM=`wget --no-check-certificate -qO - $URL | sed -n '/download-5/,/64-bit/p'` - -# Get nonce from form -NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'` -NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'` -echo $NONCE1 - -# get the postid -POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'` -POSTID=`echo $POSTID | awk -F'"' '{print $2}'` -echo $POSTID - -# get file name -FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'` -FILE=`echo $FILE | awk -F'"' '{print $2}'` -echo $FILE - -FORM=`wget --no-check-certificate -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"` - -NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'` -NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'` -echo $NONCE2 - -wget --no-check-certificate --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2; diff --git a/ci/opencl_amd_sdk.sh b/ci/opencl_amd_sdk.sh new file mode 100644 index 000000000..e104fbddd --- /dev/null +++ b/ci/opencl_amd_sdk.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Original script from https://github.com/gregvw/amd_sdk/ + +export OPENCL_VENDOR_PATH=${AMDAPPSDKROOT}/etc/OpenCL/vendors +export LD_LIBRARY_PATH=${AMDAPPSDKROOT}/lib/x86_64:${LD_LIBRARY_PATH} +export CMAKE_LIBRARY_PATH=${AMDAPPSDKROOT}/lib/x86_64 + +if [ ! -e ${AMDAPPSDKROOT}/bin/x86_64/clinfo ]; then + # Location from which get nonce and file name from + URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/" + URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/" + + NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"' + FILE_STRING='name="f"' + POSTID_STRING='name="post_id"' + NONCE2_STRING='name="amd_developer_central_nonce"' + + # This gets the second latest (2.9.1 ATM, latest is 3.0) + # For newest: FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'` + FORM=`wget -qO - $URL | sed -n '/download-5/,/64-bit/p'` + + # Get nonce from form + NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'` + NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'` + echo $NONCE1 + + # get the postid + POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'` + POSTID=`echo $POSTID | awk -F'"' '{print $2}'` + echo $POSTID + + # get file name + FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'` + FILE=`echo $FILE | awk -F'"' '{print $2}'` + echo $FILE + + FORM=`wget -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"` + + NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'` + NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'` + echo $NONCE2 + + wget --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2; + + # Unpack and install + tar -xjf AMD-SDK.tar.bz2; + mkdir -p ${OPENCL_VENDOR_PATH}; + sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDKROOT}; + echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd; + chmod +x ${AMDAPPSDKROOT}/bin/x86_64/clinfo; +fi + +${AMDAPPSDKROOT}/bin/x86_64/clinfo \ No newline at end of file From 6ddae7ecb5a260f380f1a009dec04ffbd8c43566 Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 15:25:11 +0100 Subject: [PATCH 3/7] CLBlast is only available for xenial + AMD SDK config from VexCL has broken permission --- .travis.yml | 6 ++- ci/opencl_amd_sdk.sh | 97 +++++++++++++++++++++----------------------- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3667b71ff..da6571369 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,7 +45,11 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install brewsci/science/blis; fi - if [[ "$BACKEND" == "OpenCL" ]]; then bash ci/opencl_amd_sdk.sh; - sudo add-apt-repository ppa:cnugteren/clblast -y; + + echo "deb http://ppa.launchpad.net/cnugteren/clblast/ubuntu xenial main" | sudo tee -a /etc/apt/sources.list + echo "deb-src http://ppa.launchpad.net/cnugteren/clblast/ubuntu xenial main" | sudo tee -a /etc/apt/sources.list + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 48647B32AE252498951E6BD11F9D5FBEF38CB4E0 + sudo apt-get update; sudo apt-get install clblast; fi diff --git a/ci/opencl_amd_sdk.sh b/ci/opencl_amd_sdk.sh index e104fbddd..ae6f49bb7 100644 --- a/ci/opencl_amd_sdk.sh +++ b/ci/opencl_amd_sdk.sh @@ -2,53 +2,50 @@ # Original script from https://github.com/gregvw/amd_sdk/ -export OPENCL_VENDOR_PATH=${AMDAPPSDKROOT}/etc/OpenCL/vendors -export LD_LIBRARY_PATH=${AMDAPPSDKROOT}/lib/x86_64:${LD_LIBRARY_PATH} -export CMAKE_LIBRARY_PATH=${AMDAPPSDKROOT}/lib/x86_64 - -if [ ! -e ${AMDAPPSDKROOT}/bin/x86_64/clinfo ]; then - # Location from which get nonce and file name from - URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/" - URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/" - - NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"' - FILE_STRING='name="f"' - POSTID_STRING='name="post_id"' - NONCE2_STRING='name="amd_developer_central_nonce"' - - # This gets the second latest (2.9.1 ATM, latest is 3.0) - # For newest: FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'` - FORM=`wget -qO - $URL | sed -n '/download-5/,/64-bit/p'` - - # Get nonce from form - NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'` - NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'` - echo $NONCE1 - - # get the postid - POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'` - POSTID=`echo $POSTID | awk -F'"' '{print $2}'` - echo $POSTID - - # get file name - FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'` - FILE=`echo $FILE | awk -F'"' '{print $2}'` - echo $FILE - - FORM=`wget -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"` - - NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'` - NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'` - echo $NONCE2 - - wget --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2; - - # Unpack and install - tar -xjf AMD-SDK.tar.bz2; - mkdir -p ${OPENCL_VENDOR_PATH}; - sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDKROOT}; - echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd; - chmod +x ${AMDAPPSDKROOT}/bin/x86_64/clinfo; -fi - -${AMDAPPSDKROOT}/bin/x86_64/clinfo \ No newline at end of file +# Location from which get nonce and file name from +URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/" +URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/" + +NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"' +FILE_STRING='name="f"' +POSTID_STRING='name="post_id"' +NONCE2_STRING='name="amd_developer_central_nonce"' + +#For newest FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'` +FORM=`wget --no-check-certificate -qO - $URL | sed -n '/download-5/,/64-bit/p'` + +# Get nonce from form +NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'` +NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'` +echo $NONCE1 + +# get the postid +POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'` +POSTID=`echo $POSTID | awk -F'"' '{print $2}'` +echo $POSTID + +# get file name +FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'` +FILE=`echo $FILE | awk -F'"' '{print $2}'` +echo $FILE + +FORM=`wget --no-check-certificate -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"` + +NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'` +NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'` +echo $NONCE2 + +wget --no-check-certificate --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2; + +# unpacking and installing +tar -xjf AMD-SDK.tar.bz2 +AMDAPPSDK=${HOME}/AMDAPPSDK +export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors +mkdir -p ${OPENCL_VENDOR_PATH} +sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDK} +echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd +export LD_LIBRARY_PATH=${AMDAPPSDK}/lib/x86_64:${LD_LIBRARY_PATH} +chmod +x ${AMDAPPSDK}/bin/x86_64/clinfo + +# Checking OpenCL status +${AMDAPPSDK}/bin/x86_64/clinfo \ No newline at end of file From e05442aba50d669c8306df38527553b444e80c7f Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 15:31:28 +0100 Subject: [PATCH 4/7] PPA signing keys are a mess --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index da6571369..096696497 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,7 +48,7 @@ before_install: echo "deb http://ppa.launchpad.net/cnugteren/clblast/ubuntu xenial main" | sudo tee -a /etc/apt/sources.list echo "deb-src http://ppa.launchpad.net/cnugteren/clblast/ubuntu xenial main" | sudo tee -a /etc/apt/sources.list - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 48647B32AE252498951E6BD11F9D5FBEF38CB4E0 + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 1F9D5FBEF38CB4E0 sudo apt-get update; sudo apt-get install clblast; From 0810eb49a17e2c9e08f540af60d28f3813cd5994 Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 15:44:45 +0100 Subject: [PATCH 5/7] Using dpkg, installing xenial pkg on trusty is ridiculous. Thank you Travis ... --- .travis.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 096696497..60ef07622 100644 --- a/.travis.yml +++ b/.travis.yml @@ -46,12 +46,9 @@ before_install: - if [[ "$BACKEND" == "OpenCL" ]]; then bash ci/opencl_amd_sdk.sh; - echo "deb http://ppa.launchpad.net/cnugteren/clblast/ubuntu xenial main" | sudo tee -a /etc/apt/sources.list - echo "deb-src http://ppa.launchpad.net/cnugteren/clblast/ubuntu xenial main" | sudo tee -a /etc/apt/sources.list - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 1F9D5FBEF38CB4E0 - - sudo apt-get update; - sudo apt-get install clblast; + curl https://launchpad.net/~cnugteren/+archive/ubuntu/clblast/+files/libclblast_1.3.0-1ubuntu2_amd64.deb -Ssf > libclblast.deb; + sudo dpkg -i libclblast.deb; + sudo apt-get -f install; fi install: From d1aabea20e749523f0174a31ba45d247f9df5b5e Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 15:54:05 +0100 Subject: [PATCH 6/7] Use wget install of curl for the ubuntu redirection --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 60ef07622..164f4bdbf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -46,7 +46,7 @@ before_install: - if [[ "$BACKEND" == "OpenCL" ]]; then bash ci/opencl_amd_sdk.sh; - curl https://launchpad.net/~cnugteren/+archive/ubuntu/clblast/+files/libclblast_1.3.0-1ubuntu2_amd64.deb -Ssf > libclblast.deb; + wget https://launchpad.net/~cnugteren/+archive/ubuntu/clblast/+files/libclblast_1.3.0-1ubuntu2_amd64.deb -O libclblast.deb; sudo dpkg -i libclblast.deb; sudo apt-get -f install; fi From 507578078128ecc015e1dc58329100229155d137 Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Feb 2018 15:55:10 +0100 Subject: [PATCH 7/7] Wrong branch Revert "Add float64 support to matrix-vector multiplication" This reverts commit a9622052cd1132ef3a035419ab9d6f8f2664eef8. --- src/tensor/operators_blas_l1_opencl.nim | 2 +- src/tensor/operators_blas_l2l3_opencl.nim | 53 ++++++++++----------- tests/tensor/test_operators_blas_opencl.nim | 43 +++++++---------- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/src/tensor/operators_blas_l1_opencl.nim b/src/tensor/operators_blas_l1_opencl.nim index 9aca3e3bb..4d0416d91 100644 --- a/src/tensor/operators_blas_l1_opencl.nim +++ b/src/tensor/operators_blas_l1_opencl.nim @@ -27,7 +27,7 @@ import ./backend/metadataArray, # #################################################################### # BLAS Level 1 (Vector dot product, Addition, Scalar to Vector/Matrix) -template dotImpl(T: typedesc[SomeReal], clblast_proc: untyped): untyped = +template dotImpl(T: typedesc, clblast_proc: untyped): untyped = proc dot*(a, b: ClTensor[T]): T = ## Vector to Vector dot (scalar) product when compileOption("boundChecks"): diff --git a/src/tensor/operators_blas_l2l3_opencl.nim b/src/tensor/operators_blas_l2l3_opencl.nim index 18f5aaa1e..e42fb485c 100644 --- a/src/tensor/operators_blas_l2l3_opencl.nim +++ b/src/tensor/operators_blas_l2l3_opencl.nim @@ -7,37 +7,36 @@ import ./data_structure, ./private/[p_init_opencl, p_checks] -template l1l2_blas_Impl(T: typedesc[SomeReal], clblast_gemv_proc: untyped): untyped = - proc openCL_MV_y_eq_aAx_p_by( - alpha: T, a, x: ClTensor[T], - beta: T, y: var ClTensor[T]) = - # Matrix-Vector: y = alpha A matvecmul x + beta y - - # TODO: remove this contiguous layout constraint - if not a.isContiguous: - raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous") - - let - a_is_rowMajor = a.is_C_contiguous - layout = if a_is_rowMajor: CLBlastLayoutRowMajor - else: CLBlastLayoutColMajor - lda = if a_is_rowMajor: a.strides[0] - else: a.strides[1] - - check clblast_gemv_proc(layout, CLBlastTransposeNo, a.shape[0], a.shape[1], - alpha, - a.toClPointer, a.offset, lda, - x.toClpointer, x.offset, x.strides[0], - beta, - y.toClpointer, y.offset, y.strides[0], - unsafeAddr clQueue0, nil) - -l1l2_blas_Impl(float32, clblastSgemv) -l1l2_blas_Impl(float64, clblastDgemv) + +proc openCL_MV_y_eq_aAx_p_by( + alpha: float32, a, x: ClTensor[float32], + beta: float32, y: var ClTensor[float32]) = + # Matrix-Vector: y = alpha A matvecmul x + beta y + + # TODO: remove this contiguous layout constraint + if not a.isContiguous: + raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous") + + let + a_is_rowMajor = a.is_C_contiguous + layout = if a_is_rowMajor: CLBlastLayoutRowMajor + else: CLBlastLayoutColMajor + lda = if a_is_rowMajor: a.strides[0] + else: a.strides[1] + + check clblastSgemv(layout, CLBlastTransposeNo, a.shape[0], a.shape[1], + alpha, + a.toClPointer, a.offset, lda, + x.toClpointer, x.offset, x.strides[0], + beta, + y.toClpointer, y.offset, y.strides[0], + unsafeAddr clQueue0, nil) + proc `*`*[T: SomeReal](a, b: ClTensor[T]): ClTensor[T] = ## Matrix multiplication (Matrix-Matrix and Matrix-Vector) on CUDA + assert T is float32, "Only float32 is supported at the moment" assert b.rank == 1, "Only Matrix-Vector product is supported at the moment" if a.rank == 2 and b.rank == 1: diff --git a/tests/tensor/test_operators_blas_opencl.nim b/tests/tensor/test_operators_blas_opencl.nim index 78361398e..faaaf496f 100644 --- a/tests/tensor/test_operators_blas_opencl.nim +++ b/tests/tensor/test_operators_blas_opencl.nim @@ -17,27 +17,18 @@ import ../../src/arraymancer import unittest suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": - test "GEMV - General Matrix to Vector Multiplication - float32": + test "GEMV - General Matrix to Vector Multiplication": ## TODO: test with slices ## TODO: support and test non-contiguous tensors - let d = [[float32 1,-1,2], [float32 0.0,-3,1]].toTensor().opencl() - let e = [float32 2, 1, 0].toTensor().opencl() + let d = @[@[1.0'f32,-1,2],@[0.0'f32,-3,1]].toTensor().opencl() + let e = @[2.0'f32, 1, 0].toTensor().opencl() - check: (d * e).cpu == [float32 1, -3].toTensor() - - test "GEMV - General Matrix to Vector Multiplication - float64": - ## TODO: test with slices - ## TODO: support and test non-contiguous tensors - - let d = [[float64 1,-1,2], [float64 0.0,-3,1]].toTensor().opencl() - let e = [float64 2, 1, 0].toTensor().opencl() - - check: (d * e).cpu == [float64 1, -3].toTensor() + check: (d * e).cpu == [1.0'f32, -3].toTensor() test "Matrix and vector addition": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl check: (u + v).cpu == @[2'f32, 4, -4].toTensor() @@ -53,10 +44,10 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": discard a + b.cpu[0..1, 0..1].opencl test "Matrix and vector substraction": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl - check: (u - v).cpu == @[float32 0, 2, -6].toTensor() + check: (u - v).cpu == @[0'f32, 2, -6].toTensor() let a = @[7.0, 4.0, 3.0, 1.0, 8.0, 6.0, 8.0, 1.0, 6.0, 2.0].toTensor.reshape([5,2]).opencl let b = @[6.0, 6.0, 2.0, 0.0, 4.0, 3.0, 2.0, 0.0, 0.0, 3.0].toTensor.reshape([5,2]).opencl @@ -77,8 +68,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": test "Matrix and Vector in-place addition": - var u = @[float64 1, 3, -5].toTensor().opencl() - let v = @[float64 4, -2, -1].toTensor().opencl() + var u = @[1'f64, 3, -5].toTensor().opencl() + let v = @[4'f64, -2, -1].toTensor().opencl() u += v @@ -116,8 +107,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": z += t2.cpu[0..1,0..1].opencl test "Matrix and Vector in-place substraction": - var u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + var u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl u -= v @@ -142,8 +133,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": a += b.cpu[0..1,0..1].opencl test "Matrix and vector addition": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl check: (u + v).cpu == @[2'f32, 4, -4].toTensor() @@ -159,8 +150,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": discard a + b.cpu[0..1, 0..1].opencl test "Matrix and vector substraction": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl check: (u - v).cpu == @[0'f32, 2, -6].toTensor()