Skip to content

Commit

Permalink
Clean up device memory and cuda error checks
Browse files Browse the repository at this point in the history
  • Loading branch information
denisalevi committed Mar 16, 2018
1 parent 215fd13 commit 688c14f
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 103 deletions.
72 changes: 27 additions & 45 deletions brian2cuda/brianlib/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,43 @@

// Define this to turn on error checking
#define BRIAN2CUDA_ERROR_CHECK
// Define this to make kernel calls block CPU execution
// Define this to synchronize device before checking errors
//#define BRIAN2CUDA_ERROR_CHECK_BLOCKING

// Define this to turn on memory checking
//#define BRIAN2CUDA_MEMORY_CHECK
// Define this to synchronize device before checking memory
//#define BRIAN2CUDA_MEMORY_CHECK_BLOCKING


// partly adapted from https://gist.github.com/ashwin/2652488
#define CudaSafeCall(err) __cudaSafeCall(err, __FILE__, __LINE__)
#define CudaCheckError() __cudaCheckError(__FILE__, __LINE__)
#define CudaCheckMemory(param) __cudaCheckMemory(param, __FILE__, __LINE__)
#define CUDA_SAFE_CALL(err) __cudaSafeCall(err, __FILE__, __LINE__)
#define CUDA_CHECK_ERROR() __cudaCheckError(__FILE__, __LINE__)
#define CUDA_CHECK_MEMORY() __cudaCheckMemory(__FILE__, __LINE__)


inline void __cudaSafeCall(cudaError err, const char *file, const int line)
{
#ifdef BRIAN2CUDA_ERROR_CHECK
if (cudaSuccess != err)
{
fprintf(stderr, "cudaSafeCall() failed at %s:%i : %s\n",
fprintf(stderr, "ERROR: CUDA_SAFE_CALL() failed at %s:%i : %s\n",
file, line, cudaGetErrorString(err));
exit( -1 );
exit(-1);
}
#endif

return;
}


inline void __cudaCheckError(const char *file, const int line)
{
#ifdef BRIAN2CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "cudaCheckError() failed at %s:%i : %s\n",
fprintf(stderr, "ERROR: CUDA_CHECK_ERROR() failed at %s:%i : %s\n",
file, line, cudaGetErrorString(err));
exit(-1);
}
Expand All @@ -46,7 +49,7 @@ inline void __cudaCheckError(const char *file, const int line)
err = cudaDeviceSynchronize();
if(cudaSuccess != err)
{
fprintf(stderr, "cudaCheckError() with sync failed at %s:%i : %s\n",
fprintf(stderr, "ERROR: CUDA_CHECK_ERROR() with sync failed at %s:%i : %s\n",
file, line, cudaGetErrorString(err));
exit(-1);
}
Expand All @@ -56,52 +59,29 @@ inline void __cudaCheckError(const char *file, const int line)
return;
}

#endif


// Report cuda device memory usage
inline void __cudaCheckMemory(const char *msg, const char *file, const int line)
// Report device memory usage. The memory diff is reported with respect to the
// global brian::used_device_memory as reference, which was set in the last
// __cudaCheckMemory call.
inline void __cudaCheckMemory(const char *file, const int line)
{
#ifdef BRIAN2CUDA_MEMORY_CHECK
#ifdef BRIAN2CUDA_MEMORY_CHECK_BLOCKING
cudaDeviceSynchronize();
#endif
const double to_MB = 1.0 / (1024.0 * 1024.0);
size_t avail;
size_t total;
size_t avail, total, used, diff;
cudaMemGetInfo(&avail, &total);
size_t used = total - avail;
printf("INFO: cuda device memory usage in %s:%i (%s)\n"
"\t used: \t %f MB\n"
"\t avail: \t %f MB\n"
"\t total: \t %f MB\n",
file, line, msg,
double(used) * to_MB,
double(avail) * to_MB,
double(total) * to_MB);
#endif
}


// In this version the memory difference is always reported with respect to
// some reference memory, e.g. from a previous cudaMemGetInfo call
inline void __cudaCheckMemory(size_t &reference_memory, const char *file, const int line)
{
#ifdef BRIAN2CUDA_MEMORY_CHECK
#ifdef BRIAN2CUDA_MEMORY_CHECK_BLOCKING
cudaDeviceSynchronize();
#endif
const double to_MB = 1.0 / (1024.0 * 1024.0);
size_t avail;
size_t total;
cudaMemGetInfo(&avail, &total);
size_t used = total - avail;
size_t diff = used - reference_memory;
used = total - avail;
diff = used - brian::used_device_memory;
// print memory information only if device memory usage changed
// NOTE: Device memory is allocated in chunks. When allocating only little
// memory, the memory usage reported by cudaMemGetInfo might not change if
// the previously allocated chunk has enough free memory to be used for the
// newly requested allocation.
if (diff > 0)
{
if (reference_memory == 0)
diff = NAN;
printf("INFO: cuda device memory usage in %s:%i\n"
fprintf(stdout, "INFO: cuda device memory usage in %s:%i\n"
"\t used: \t %f MB\n"
"\t avail: \t %f MB\n"
"\t total: \t %f MB\n"
Expand All @@ -111,7 +91,9 @@ inline void __cudaCheckMemory(size_t &reference_memory, const char *file, const
double(avail) * to_MB,
double(total) * to_MB,
double(diff) * to_MB, diff);
reference_memory = used;
brian::used_device_memory = used;
}
#endif
}

#endif
19 changes: 19 additions & 0 deletions brian2cuda/templates/objects.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "objects.h"
#include "synapses_classes.h"
#include "brianlib/clocks.h"
#include "brianlib/cuda_utils.h"
#include "network.h"
#include <stdint.h>
#include <iostream>
Expand All @@ -14,6 +15,8 @@
#include <thrust/device_vector.h>
#include <curand.h>

size_t brian::used_device_memory = 0;

//////////////// clocks ///////////////////
{% for clock in clocks | sort(attribute='name') %}
Clock brian::{{clock.name}};
Expand Down Expand Up @@ -154,6 +157,11 @@ void _init_arrays()
{
using namespace brian;

std::clock_t start_timer = std::clock();

CUDA_CHECK_MEMORY();
size_t used_device_memory_start = used_device_memory;

cudaDeviceProp props;
cudaGetDeviceProperties(&props, 0);

Expand Down Expand Up @@ -267,6 +275,15 @@ void _init_arrays()
cudaMalloc((void**)&dev{{varname}}[0], sizeof({{c_data_type(var.dtype)}})*_num_{{varname}});
{{varname}} = new {{c_data_type(var.dtype)}}[{{var.size}}];
{% endfor %}

CUDA_CHECK_MEMORY();
const double to_MB = 1.0 / (1024.0 * 1024.0);
double tot_memory_MB = (used_device_memory - used_device_memory_start) * to_MB;
double time_passed = (double)(std::clock() - start_timer) / CLOCKS_PER_SEC;
std::cout << "INFO: _init_arrays() took " << time_passed << "s";
if (tot_memory_MB > 0)
std::cout << " and used " << tot_memory_MB << "MB of device memory.";
std::cout << std::endl;
}

void _load_arrays()
Expand Down Expand Up @@ -498,6 +515,8 @@ typedef {{curand_float_type}} randomNumber_t; // random number type

namespace brian {

extern size_t used_device_memory;

//////////////// clocks ///////////////////
{% for clock in clocks %}
extern Clock {{clock.name}};
Expand Down
4 changes: 3 additions & 1 deletion brian2cuda/templates/synapses.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ kernel_{{codeobj_name}}(

unsigned int tid = threadIdx.x;
unsigned int bid = blockIdx.x + bid_offset;
//TODO: do we need _idx here? if now, get also rid of scoping after scalar code
//TODO: do we need _idx here? if no, get also rid of scoping after scalar code
// scalar_code can depend on _idx (e.g. if the state update depends on a
// subexpression that is the same for all synapses, ?)
unsigned int _idx = bid * THREADS_PER_BLOCK + tid;
unsigned int _vectorisation_idx = _idx;
%KERNEL_VARIABLES%
Expand Down
14 changes: 14 additions & 0 deletions brian2cuda/templates/synapses_create_array.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
{% block extra_headers %}
{{ super() }}
#include<map>
#include "brianlib/cuda_utils.h"
{% endblock %}

{% block kernel %}
Expand All @@ -24,9 +25,21 @@
{% endblock %}

{% block profiling_start %}
std::clock_t start_timer = std::clock();

CUDA_CHECK_MEMORY();
size_t used_device_memory_start = used_device_memory;
{% endblock %}

{% block profiling_stop %}
CUDA_CHECK_MEMORY();
const double to_MB = 1.0 / (1024.0 * 1024.0);
double tot_memory_MB = (used_device_memory - used_device_memory_start) * to_MB;
double time_passed = (double)(std::clock() - start_timer) / CLOCKS_PER_SEC;
std::cout << "INFO: {{owner.name}} creation took " << time_passed << "s";
if (tot_memory_MB > 0)
std::cout << " and used " << tot_memory_MB << "MB of device memory.";
std::cout << std::endl;
{% endblock %}

{% block extra_maincode %}
Expand Down Expand Up @@ -76,6 +89,7 @@ const int32_t newsize = {{_dynamic__synaptic_pre}}.size();
{{varname}}.resize(newsize);
{% endif %}
{% endfor %}
CUDA_CHECK_MEMORY();

// update the total number of synapses
{{N}} = newsize;
Expand Down
13 changes: 13 additions & 0 deletions brian2cuda/templates/synapses_create_generator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include<iostream>
#include<curand.h>
#include<brianlib/curand_buffer.h>
#include "brianlib/cuda_utils.h"
#include<map>
{% endblock %}

Expand All @@ -27,9 +28,21 @@
{% endblock %}

{% block profiling_start %}
std::clock_t start_timer = std::clock();

CUDA_CHECK_MEMORY();
size_t used_device_memory_start = used_device_memory;
{% endblock %}

{% block profiling_stop %}
CUDA_CHECK_MEMORY();
const double to_MB = 1.0 / (1024.0 * 1024.0);
double tot_memory_MB = (used_device_memory - used_device_memory_start) * to_MB;
double time_passed = (double)(std::clock() - start_timer) / CLOCKS_PER_SEC;
std::cout << "INFO: {{owner.name}} creation took " << time_passed << "s";
if (tot_memory_MB > 0)
std::cout << " and used " << tot_memory_MB << "MB of memory.";
std::cout << std::endl;
{% endblock %}

{% block extra_maincode %}
Expand Down
Loading

0 comments on commit 688c14f

Please sign in to comment.