Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenCL continuous integration #189

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,20 @@ matrix:
compiler: gcc

- os: linux
env: CHANNEL=devel
env: CHANNEL=stable BACKEND=OpenCL
compiler: gcc

# For faster testing we don't test clang on linux, only on macOS
# - os: linux
# env: CHANNEL=stable
# compiler: clang
#
# - os: linux
# env: CHANNEL=devel
# compiler: clang
- os: linux
env: CHANNEL=devel
compiler: gcc

# On OSX we only test against clang (gcc is mapped to clang by default)
# Note: for OpenMP, Homebrew will build flame/blis with GCC-5
# As BLIS is in OSX homebrew, this is an opportunity to test it as well
- os: osx
env: CHANNEL=stable BLIS=true
compiler: clang

# For faster testing, we only test BLIS = true
# - os: osx
# env: CHANNEL=stable BLIS=false
# compiler: clang

allow_failures:
# Ignore failures when building against the devel Nim branch
# Also ignore OSX, due to very long build time and Homebrew/curl SSLRead errors
Expand All @@ -52,6 +43,13 @@ before_install:
# On MacOS flame/blis can be tested as it is an homebrew package
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install brewsci/science/blis; fi
- if [[ "$BACKEND" == "OpenCL" ]]; then
bash ci/opencl_amd_sdk.sh;

wget https://launchpad.net/~cnugteren/+archive/ubuntu/clblast/+files/libclblast_1.3.0-1ubuntu2_amd64.deb -O libclblast.deb;
sudo dpkg -i libclblast.deb;
sudo apt-get -f install;
fi

install:
- export CHOOSENIM_NO_ANALYTICS=1
Expand All @@ -63,7 +61,11 @@ install:

script:
- nimble refresh
- nimble test
- if [[ "$BACKEND" == "OpenCL" ]]; then
nimble test_opencl;
else
nimble test;
fi

branches:
except:
Expand Down
3 changes: 3 additions & 0 deletions ci/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Continuous Integration

Scripts needed for continuous integration of Arraymancer
51 changes: 51 additions & 0 deletions ci/opencl_amd_sdk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

# Original script from https://github.com/gregvw/amd_sdk/

# Location from which get nonce and file name from
URL="https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/"
URLDOWN="https://developer.amd.com/amd-license-agreement-appsdk/"

NONCE1_STRING='name="amd_developer_central_downloads_page_nonce"'
FILE_STRING='name="f"'
POSTID_STRING='name="post_id"'
NONCE2_STRING='name="amd_developer_central_nonce"'

#For newest FORM=`wget -qO - $URL | sed -n '/download-2/,/64-bit/p'`
FORM=`wget --no-check-certificate -qO - $URL | sed -n '/download-5/,/64-bit/p'`

# Get nonce from form
NONCE1=`echo $FORM | awk -F ${NONCE1_STRING} '{print $2}'`
NONCE1=`echo $NONCE1 | awk -F'"' '{print $2}'`
echo $NONCE1

# get the postid
POSTID=`echo $FORM | awk -F ${POSTID_STRING} '{print $2}'`
POSTID=`echo $POSTID | awk -F'"' '{print $2}'`
echo $POSTID

# get file name
FILE=`echo $FORM | awk -F ${FILE_STRING} '{print $2}'`
FILE=`echo $FILE | awk -F'"' '{print $2}'`
echo $FILE

FORM=`wget --no-check-certificate -qO - $URLDOWN --post-data "amd_developer_central_downloads_page_nonce=${NONCE1}&f=${FILE}&post_id=${POSTID}"`

NONCE2=`echo $FORM | awk -F ${NONCE2_STRING} '{print $2}'`
NONCE2=`echo $NONCE2 | awk -F'"' '{print $2}'`
echo $NONCE2

wget --no-check-certificate --content-disposition --trust-server-names $URLDOWN --post-data "amd_developer_central_nonce=${NONCE2}&f=${FILE}" -O AMD-SDK.tar.bz2;

# unpacking and installing
tar -xjf AMD-SDK.tar.bz2
AMDAPPSDK=${HOME}/AMDAPPSDK
export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors
mkdir -p ${OPENCL_VENDOR_PATH}
sh AMD-APP-SDK*.sh --tar -xf -C ${AMDAPPSDK}
echo libamdocl64.so > ${OPENCL_VENDOR_PATH}/amdocl64.icd
export LD_LIBRARY_PATH=${AMDAPPSDK}/lib/x86_64:${LD_LIBRARY_PATH}
chmod +x ${AMDAPPSDK}/bin/x86_64/clinfo

# Checking OpenCL status
${AMDAPPSDK}/bin/x86_64/clinfo
2 changes: 1 addition & 1 deletion src/tensor/operators_blas_l1_opencl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import ./backend/metadataArray,
# ####################################################################
# BLAS Level 1 (Vector dot product, Addition, Scalar to Vector/Matrix)

template dotImpl(T: typedesc[SomeReal], clblast_proc: untyped): untyped =
template dotImpl(T: typedesc, clblast_proc: untyped): untyped =
proc dot*(a, b: ClTensor[T]): T =
## Vector to Vector dot (scalar) product
when compileOption("boundChecks"):
Expand Down
53 changes: 26 additions & 27 deletions src/tensor/operators_blas_l2l3_opencl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,36 @@ import ./data_structure,
./private/[p_init_opencl, p_checks]


template l1l2_blas_Impl(T: typedesc[SomeReal], clblast_gemv_proc: untyped): untyped =
proc openCL_MV_y_eq_aAx_p_by(
alpha: T, a, x: ClTensor[T],
beta: T, y: var ClTensor[T]) =
# Matrix-Vector: y = alpha A matvecmul x + beta y

# TODO: remove this contiguous layout constraint
if not a.isContiguous:
raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")

let
a_is_rowMajor = a.is_C_contiguous
layout = if a_is_rowMajor: CLBlastLayoutRowMajor
else: CLBlastLayoutColMajor
lda = if a_is_rowMajor: a.strides[0]
else: a.strides[1]

check clblast_gemv_proc(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
alpha,
a.toClPointer, a.offset, lda,
x.toClpointer, x.offset, x.strides[0],
beta,
y.toClpointer, y.offset, y.strides[0],
unsafeAddr clQueue0, nil)

l1l2_blas_Impl(float32, clblastSgemv)
l1l2_blas_Impl(float64, clblastDgemv)

proc openCL_MV_y_eq_aAx_p_by(
alpha: float32, a, x: ClTensor[float32],
beta: float32, y: var ClTensor[float32]) =
# Matrix-Vector: y = alpha A matvecmul x + beta y

# TODO: remove this contiguous layout constraint
if not a.isContiguous:
raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")

let
a_is_rowMajor = a.is_C_contiguous
layout = if a_is_rowMajor: CLBlastLayoutRowMajor
else: CLBlastLayoutColMajor
lda = if a_is_rowMajor: a.strides[0]
else: a.strides[1]

check clblastSgemv(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
alpha,
a.toClPointer, a.offset, lda,
x.toClpointer, x.offset, x.strides[0],
beta,
y.toClpointer, y.offset, y.strides[0],
unsafeAddr clQueue0, nil)


proc `*`*[T: SomeReal](a, b: ClTensor[T]): ClTensor[T] =
## Matrix multiplication (Matrix-Matrix and Matrix-Vector) on CUDA

assert T is float32, "Only float32 is supported at the moment"
assert b.rank == 1, "Only Matrix-Vector product is supported at the moment"

if a.rank == 2 and b.rank == 1:
Expand Down
43 changes: 17 additions & 26 deletions tests/tensor/test_operators_blas_opencl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,18 @@ import ../../src/arraymancer
import unittest

suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
test "GEMV - General Matrix to Vector Multiplication - float32":
test "GEMV - General Matrix to Vector Multiplication":
## TODO: test with slices
## TODO: support and test non-contiguous tensors

let d = [[float32 1,-1,2], [float32 0.0,-3,1]].toTensor().opencl()
let e = [float32 2, 1, 0].toTensor().opencl()
let d = @[@[1.0'f32,-1,2],@[0.0'f32,-3,1]].toTensor().opencl()
let e = @[2.0'f32, 1, 0].toTensor().opencl()

check: (d * e).cpu == [float32 1, -3].toTensor()

test "GEMV - General Matrix to Vector Multiplication - float64":
## TODO: test with slices
## TODO: support and test non-contiguous tensors

let d = [[float64 1,-1,2], [float64 0.0,-3,1]].toTensor().opencl()
let e = [float64 2, 1, 0].toTensor().opencl()

check: (d * e).cpu == [float64 1, -3].toTensor()
check: (d * e).cpu == [1.0'f32, -3].toTensor()

test "Matrix and vector addition":
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl

check: (u + v).cpu == @[2'f32, 4, -4].toTensor()

Expand All @@ -53,10 +44,10 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
discard a + b.cpu[0..1, 0..1].opencl

test "Matrix and vector substraction":
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl

check: (u - v).cpu == @[float32 0, 2, -6].toTensor()
check: (u - v).cpu == @[0'f32, 2, -6].toTensor()

let a = @[7.0, 4.0, 3.0, 1.0, 8.0, 6.0, 8.0, 1.0, 6.0, 2.0].toTensor.reshape([5,2]).opencl
let b = @[6.0, 6.0, 2.0, 0.0, 4.0, 3.0, 2.0, 0.0, 0.0, 3.0].toTensor.reshape([5,2]).opencl
Expand All @@ -77,8 +68,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":


test "Matrix and Vector in-place addition":
var u = @[float64 1, 3, -5].toTensor().opencl()
let v = @[float64 4, -2, -1].toTensor().opencl()
var u = @[1'f64, 3, -5].toTensor().opencl()
let v = @[4'f64, -2, -1].toTensor().opencl()

u += v

Expand Down Expand Up @@ -116,8 +107,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
z += t2.cpu[0..1,0..1].opencl

test "Matrix and Vector in-place substraction":
var u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl
var u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl

u -= v

Expand All @@ -142,8 +133,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
a += b.cpu[0..1,0..1].opencl

test "Matrix and vector addition":
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl

check: (u + v).cpu == @[2'f32, 4, -4].toTensor()

Expand All @@ -159,8 +150,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
discard a + b.cpu[0..1, 0..1].opencl

test "Matrix and vector substraction":
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl

check: (u - v).cpu == @[0'f32, 2, -6].toTensor()

Expand Down