From f7da4ba5d94429d05567e641cd4c98e0ef409107 Mon Sep 17 00:00:00 2001 From: brodyh Date: Tue, 10 Feb 2015 20:11:47 -0800 Subject: [PATCH] added option to select gpu device based on free memory --- src/caffe/util/mpi.cpp | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/caffe/util/mpi.cpp b/src/caffe/util/mpi.cpp index ac1697421c1..62e70d606e2 100644 --- a/src/caffe/util/mpi.cpp +++ b/src/caffe/util/mpi.cpp @@ -2,11 +2,13 @@ #include #endif #include +#include +#include +#include #include "caffe/common.hpp" #include "caffe/util/mpi.hpp" - #ifdef USE_MPI // MPI: various checks for different function calls. #define MPI_CHECK(condition) \ @@ -23,6 +25,7 @@ namespace caffe { static bool distributed = false; +static int get_free_deviceid(int local_rank); void caffe_init_mpi(int* pargc, char*** pargv) { const char* local_rank_env = std::getenv("MV2_COMM_WORLD_LOCAL_RANK"); @@ -31,7 +34,8 @@ void caffe_init_mpi(int* pargc, char*** pargv) { if (local_rank_env) { #ifdef USE_MPI distributed = true; - int local_rank = std::atoi(local_rank_env); + int local_rank = (std::atoi(local_rank_env)); + local_rank = get_free_deviceid(local_rank); Caffe::SetDevice(local_rank); int provided, requested = MPI_THREAD_MULTIPLE; MPI_CHECK(MPI_Init_thread(pargc, pargv, requested, &provided)); @@ -60,6 +64,38 @@ void caffe_init_mpi(int* pargc, char*** pargv) { } +struct sort_pred { + bool operator()(const std::pair& left, + const std::pair& right) { + return left.first > right.first; + } +}; + + +// This is somewhat experimental because there is no guarantee all +// ranks will return the same info for free GPUs +static int get_free_deviceid(int local_rank) { + int num_devices, orig_device; + CUDA_CHECK(cudaGetDevice(&orig_device)); + CUDA_CHECK(cudaGetDeviceCount(&num_devices)); + std::vector > device_mems(num_devices); + for (int devid = 0; devid < num_devices; ++devid) { + Caffe::SetDevice(devid); + size_t free_mem = Caffe::DeviceMemoryFree(); + device_mems[devid] = std::pair(free_mem, devid); + } + std::sort(device_mems.begin(), device_mems.end(), sort_pred()); + Caffe::SetDevice(orig_device); + std::ostringstream info; + for (int devid = 0; devid < num_devices; ++devid) { + info << "(" << device_mems[devid].second << ", " + << device_mems[devid].first / (1024.*1024.) << ") "; + } + LOG(INFO) << "Rank "<< local_rank << " " << info.str(); + return device_mems[local_rank].second; +} + + void caffe_finalize_mpi() { #ifdef USE_MPI if (distributed)