Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

limit number of available GPUs #61

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions src/main/scala/BIDMat/GMat.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1859,38 +1859,40 @@ object GMat {

}

def setGPU(i:Int) = jcuda.runtime.JCuda.cudaSetDevice(i)
def setGPU(i:Int) = jcuda.runtime.JCuda.cudaSetDevice(Mat.cudaDeviceIndexMap(i))

def getGPU:Int = {
val ar = Array[Int](1)
jcuda.runtime.JCuda.cudaGetDevice(ar)
ar(0)
Mat.cudaDeviceInverseIndexMap(ar(0))
}

def connect(i:Int) = {
val v0 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(i,0)
val v0 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(Mat.cudaDeviceIndexMap(i),0)
val j = getGPU
setGPU(i)
val v1 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(j,0)
val v1 = jcuda.runtime.JCuda.cudaDeviceEnablePeerAccess(Mat.cudaDeviceInverseIndexMap(j),0)
setGPU(j)
(v0, v1)
}

def disconnect(i:Int) = {
val v0 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(i)
val v0 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(Mat.cudaDeviceIndexMap(i))
val j = getGPU
setGPU(i)
val v1 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(j)
val v1 = jcuda.runtime.JCuda.cudaDeviceDisablePeerAccess(Mat.cudaDeviceInverseIndexMap(j))
setGPU(j)
(v0, v1)
}

def canconnect(i:Int) = {
val ar = Array[Int](1)
val j = getGPU
jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, i, j)
val v0 = ar(0)
jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, j, i)
val mi: Int = Mat.cudaDeviceIndexMap(i)
val mj: Int = Mat.cudaDeviceIndexMap(j)
jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, mi, mj)
val v0 = ar(0)
jcuda.runtime.JCuda.cudaDeviceCanAccessPeer(ar, mj, mi)
(v0, ar(0))
}

Expand Down
37 changes: 33 additions & 4 deletions src/main/scala/BIDMat/Mat.scala
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,11 @@ object Mat {
var recycleGrow = 1.2 // For caching, amount to grow re-allocated matrices

var hasCUDA = 0 // Number of available CUDA GPUs


var cudaDeviceIndexMap = HashMap.empty[Int, Int] // from logical index to physical index

var cudaDeviceInverseIndexMap = HashMap.empty[Int, Int] // from physical index to logical index

var useBLAS = true;

var useMKL = true; // Use MKL libs
Expand Down Expand Up @@ -854,7 +858,11 @@ object Mat {

def checkCUDA:Unit = checkCUDA(false);

def checkCUDA(verbose:Boolean):Unit = {
def checkCUDA(verbose:Boolean):Unit = checkCUDA(verbose, -1);

def checkCUDA(numGPUs: Int):Unit = checkCUDA(false, numGPUs);

def checkCUDA(verbose:Boolean, numGPUs: Int):Unit = {
if (hasCUDA == 0) {
val os = System.getProperty("os.name");
try {
Expand Down Expand Up @@ -901,8 +909,7 @@ object Mat {
if (hasCUDA >= 0) {
try {
var cudanum = new Array[Int](1);
jcuda.runtime.JCuda.cudaGetDeviceCount(cudanum);
hasCUDA = cudanum(0);
findGPUs(numGPUs)
printf("%d CUDA device%s found", hasCUDA, if (hasCUDA == 1) "" else "s");
if (hasCUDA > 0) {
jcuda.runtime.JCuda.cudaRuntimeGetVersion(cudanum);
Expand All @@ -926,6 +933,28 @@ object Mat {
}
}

def findGPUs(numGPUs: Int): Unit = {
val cudanum = new Array[Int](1);
jcuda.runtime.JCuda.cudaGetDeviceCount(cudanum);
val minNumGPUs = if (numGPUs < -1) cudanum(0) else Math.min(numGPUs, cudanum(0))
var i = 0
var j = 0
var continue = true
val ptr: jcuda.Pointer = new jcuda.Pointer()
while(continue) {
if ((jcuda.runtime.cudaError.cudaSuccess == jcuda.runtime.JCuda.cudaSetDevice(j)) &&
(jcuda.runtime.cudaError.cudaSuccess == jcuda.runtime.JCuda.cudaFree(ptr))) {
cudaDeviceIndexMap += (i -> j)
cudaDeviceInverseIndexMap += (j -> i)
println("Map logical device #" + i + " --> physical device #" + j)
i += 1
}
j += 1
if ((j >= cudanum(0)) || (i >= minNumGPUs)) continue = false
}
hasCUDA = i
}

def copyToIntArray[@specialized(Double, Float, Long, Byte, Short) T](data:Array[T], i0:Int, idata:Array[Int], d0:Int, n:Int)
(implicit numeric : Numeric[T]) = {
var i = 0
Expand Down