diff --git a/.travis.yml b/.travis.yml index 74770551e..164f4bdbf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,29 +12,20 @@ matrix: compiler: gcc - os: linux - env: CHANNEL=devel + env: CHANNEL=stable BACKEND=OpenCL compiler: gcc - # For faster testing we don't test clang on linux, only on macOS - # - os: linux - # env: CHANNEL=stable - # compiler: clang - # - # - os: linux - # env: CHANNEL=devel - # compiler: clang + - os: linux + env: CHANNEL=devel + compiler: gcc # On OSX we only test against clang (gcc is mapped to clang by default) # Note: for OpenMP, Homebrew will build flame/blis with GCC-5 + # As BLIS is in OSX homebrew, this is an opportunity to test it as well - os: osx env: CHANNEL=stable BLIS=true compiler: clang - # For faster testing, we only test BLIS = true - # - os: osx - # env: CHANNEL=stable BLIS=false - # compiler: clang - allow_failures: # Ignore failures when building against the devel Nim branch # Also ignore OSX, due to very long build time and Homebrew/curl SSLRead errors @@ -52,6 +43,13 @@ before_install: # On MacOS flame/blis can be tested as it is an homebrew package - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install brewsci/science/blis; fi + - if [[ "$BACKEND" == "OpenCL" ]]; then + bash ci/opencl_amd_sdk.sh; + + wget https://launchpad.net/~cnugteren/+archive/ubuntu/clblast/+files/libclblast_1.3.0-1ubuntu2_amd64.deb -O libclblast.deb; + sudo dpkg -i libclblast.deb; + sudo apt-get -f install; + fi install: - export CHOOSENIM_NO_ANALYTICS=1 @@ -63,7 +61,11 @@ install: script: - nimble refresh - - nimble test + - if [[ "$BACKEND" == "OpenCL" ]]; then + nimble test_opencl; + else + nimble test; + fi branches: except: diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 000000000..a64a1fd2a --- /dev/null +++ b/ci/README.md @@ -0,0 +1,3 @@ +# Continuous Integration + +Scripts needed for continuous integration of Arraymancer \ No newline at end of file diff --git a/ci/opencl_amd_sdk.sh b/ci/opencl_amd_sdk.sh new file mode 100644 index 000000000..ae6f49bb7 --- /dev/null +++ b/ci/opencl_amd_sdk.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Original script from https://github.com/gregvw/amd_sdk/ + +# Location from which get nonce and file name from +URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/" +URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/" + +NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"' +FILE_STRING='name="f"' +POSTID_STRING='name="post_id"' +NONCE2_STRING='name="amd_developer_central_nonce"' + +#For newest FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'` +FORM=`wget --no-check-certificate -qO - $URL | sed -n '/download-5/,/64-bit/p'` + +# Get nonce from form +NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'` +NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'` +echo $NONCE1 + +# get the postid +POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'` +POSTID=`echo $POSTID | awk -F'"' '{print $2}'` +echo $POSTID + +# get file name +FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'` +FILE=`echo $FILE | awk -F'"' '{print $2}'` +echo $FILE + +FORM=`wget --no-check-certificate -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"` + +NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'` +NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'` +echo $NONCE2 + +wget --no-check-certificate --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2; + +# unpacking and installing +tar -xjf AMD-SDK.tar.bz2 +AMDAPPSDK=${HOME}/AMDAPPSDK +export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors +mkdir -p ${OPENCL_VENDOR_PATH} +sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDK} +echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd +export LD_LIBRARY_PATH=${AMDAPPSDK}/lib/x86_64:${LD_LIBRARY_PATH} +chmod +x ${AMDAPPSDK}/bin/x86_64/clinfo + +# Checking OpenCL status +${AMDAPPSDK}/bin/x86_64/clinfo \ No newline at end of file diff --git a/src/tensor/operators_blas_l1_opencl.nim b/src/tensor/operators_blas_l1_opencl.nim index 9aca3e3bb..4d0416d91 100644 --- a/src/tensor/operators_blas_l1_opencl.nim +++ b/src/tensor/operators_blas_l1_opencl.nim @@ -27,7 +27,7 @@ import ./backend/metadataArray, # #################################################################### # BLAS Level 1 (Vector dot product, Addition, Scalar to Vector/Matrix) -template dotImpl(T: typedesc[SomeReal], clblast_proc: untyped): untyped = +template dotImpl(T: typedesc, clblast_proc: untyped): untyped = proc dot*(a, b: ClTensor[T]): T = ## Vector to Vector dot (scalar) product when compileOption("boundChecks"): diff --git a/src/tensor/operators_blas_l2l3_opencl.nim b/src/tensor/operators_blas_l2l3_opencl.nim index 18f5aaa1e..e42fb485c 100644 --- a/src/tensor/operators_blas_l2l3_opencl.nim +++ b/src/tensor/operators_blas_l2l3_opencl.nim @@ -7,37 +7,36 @@ import ./data_structure, ./private/[p_init_opencl, p_checks] -template l1l2_blas_Impl(T: typedesc[SomeReal], clblast_gemv_proc: untyped): untyped = - proc openCL_MV_y_eq_aAx_p_by( - alpha: T, a, x: ClTensor[T], - beta: T, y: var ClTensor[T]) = - # Matrix-Vector: y = alpha A matvecmul x + beta y - - # TODO: remove this contiguous layout constraint - if not a.isContiguous: - raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous") - - let - a_is_rowMajor = a.is_C_contiguous - layout = if a_is_rowMajor: CLBlastLayoutRowMajor - else: CLBlastLayoutColMajor - lda = if a_is_rowMajor: a.strides[0] - else: a.strides[1] - - check clblast_gemv_proc(layout, CLBlastTransposeNo, a.shape[0], a.shape[1], - alpha, - a.toClPointer, a.offset, lda, - x.toClpointer, x.offset, x.strides[0], - beta, - y.toClpointer, y.offset, y.strides[0], - unsafeAddr clQueue0, nil) - -l1l2_blas_Impl(float32, clblastSgemv) -l1l2_blas_Impl(float64, clblastDgemv) + +proc openCL_MV_y_eq_aAx_p_by( + alpha: float32, a, x: ClTensor[float32], + beta: float32, y: var ClTensor[float32]) = + # Matrix-Vector: y = alpha A matvecmul x + beta y + + # TODO: remove this contiguous layout constraint + if not a.isContiguous: + raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous") + + let + a_is_rowMajor = a.is_C_contiguous + layout = if a_is_rowMajor: CLBlastLayoutRowMajor + else: CLBlastLayoutColMajor + lda = if a_is_rowMajor: a.strides[0] + else: a.strides[1] + + check clblastSgemv(layout, CLBlastTransposeNo, a.shape[0], a.shape[1], + alpha, + a.toClPointer, a.offset, lda, + x.toClpointer, x.offset, x.strides[0], + beta, + y.toClpointer, y.offset, y.strides[0], + unsafeAddr clQueue0, nil) + proc `*`*[T: SomeReal](a, b: ClTensor[T]): ClTensor[T] = ## Matrix multiplication (Matrix-Matrix and Matrix-Vector) on CUDA + assert T is float32, "Only float32 is supported at the moment" assert b.rank == 1, "Only Matrix-Vector product is supported at the moment" if a.rank == 2 and b.rank == 1: diff --git a/tests/tensor/test_operators_blas_opencl.nim b/tests/tensor/test_operators_blas_opencl.nim index 78361398e..faaaf496f 100644 --- a/tests/tensor/test_operators_blas_opencl.nim +++ b/tests/tensor/test_operators_blas_opencl.nim @@ -17,27 +17,18 @@ import ../../src/arraymancer import unittest suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": - test "GEMV - General Matrix to Vector Multiplication - float32": + test "GEMV - General Matrix to Vector Multiplication": ## TODO: test with slices ## TODO: support and test non-contiguous tensors - let d = [[float32 1,-1,2], [float32 0.0,-3,1]].toTensor().opencl() - let e = [float32 2, 1, 0].toTensor().opencl() + let d = @[@[1.0'f32,-1,2],@[0.0'f32,-3,1]].toTensor().opencl() + let e = @[2.0'f32, 1, 0].toTensor().opencl() - check: (d * e).cpu == [float32 1, -3].toTensor() - - test "GEMV - General Matrix to Vector Multiplication - float64": - ## TODO: test with slices - ## TODO: support and test non-contiguous tensors - - let d = [[float64 1,-1,2], [float64 0.0,-3,1]].toTensor().opencl() - let e = [float64 2, 1, 0].toTensor().opencl() - - check: (d * e).cpu == [float64 1, -3].toTensor() + check: (d * e).cpu == [1.0'f32, -3].toTensor() test "Matrix and vector addition": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl check: (u + v).cpu == @[2'f32, 4, -4].toTensor() @@ -53,10 +44,10 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": discard a + b.cpu[0..1, 0..1].opencl test "Matrix and vector substraction": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl - check: (u - v).cpu == @[float32 0, 2, -6].toTensor() + check: (u - v).cpu == @[0'f32, 2, -6].toTensor() let a = @[7.0, 4.0, 3.0, 1.0, 8.0, 6.0, 8.0, 1.0, 6.0, 2.0].toTensor.reshape([5,2]).opencl let b = @[6.0, 6.0, 2.0, 0.0, 4.0, 3.0, 2.0, 0.0, 0.0, 3.0].toTensor.reshape([5,2]).opencl @@ -77,8 +68,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": test "Matrix and Vector in-place addition": - var u = @[float64 1, 3, -5].toTensor().opencl() - let v = @[float64 4, -2, -1].toTensor().opencl() + var u = @[1'f64, 3, -5].toTensor().opencl() + let v = @[4'f64, -2, -1].toTensor().opencl() u += v @@ -116,8 +107,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": z += t2.cpu[0..1,0..1].opencl test "Matrix and Vector in-place substraction": - var u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + var u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl u -= v @@ -142,8 +133,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": a += b.cpu[0..1,0..1].opencl test "Matrix and vector addition": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl check: (u + v).cpu == @[2'f32, 4, -4].toTensor() @@ -159,8 +150,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)": discard a + b.cpu[0..1, 0..1].opencl test "Matrix and vector substraction": - let u = @[float32 1, 3, -5].toTensor.opencl - let v = @[float32 1, 1, 1].toTensor.opencl + let u = @[1'f32, 3, -5].toTensor.opencl + let v = @[1'f32, 1, 1].toTensor.opencl check: (u - v).cpu == @[0'f32, 2, -6].toTensor()