Skip to content

Commit

Permalink
Merge branch 'headless-gl-render' into 'main'
Browse files Browse the repository at this point in the history
Support headless OpenGL rendering, improve documentation

See merge request omniverse/warp!487
  • Loading branch information
mmacklin committed Jun 5, 2024
2 parents f2527ca + cfbd9d5 commit 659d00f
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 56 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
- Fixed `volume_sample_v` and `volume_store_*` adjoints
- Prevent `volume_store` from overwriting grid background values
- Improve validation of user-provided fields and values in warp.fem
- Support headless rendering of `OpenGLRenderer` via `pyglet.options["headless"] = True`
- `RegisteredGLBuffer` can fall back to CPU-bound copying if CUDA/OpenGL interop is not available

## [1.1.1] - 2024-05-24

Expand Down
4 changes: 2 additions & 2 deletions docs/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ featured physics engine. Warp is also integrated with IsaacGym and is
great for performing auxiliary tasks such as reward and observation
computations for reinforcement learning.

Why aren't assignments to Warp arrays aren'supported outside of kernels?
------------------------------------------------------------------------
Why aren't assignments to Warp arrays supported outside of kernels?
-------------------------------------------------------------------

For best performance, reading and writing data that is living on the GPU can
only be performed inside Warp CUDA kernels. Otherwise individual element accesses
Expand Down
11 changes: 11 additions & 0 deletions docs/modules/render.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,14 @@ Based on these renderers from ``warp.render``, the ``SimRendererUsd`` (which equ
.. autoclass:: SimRendererOpenGL
:members:

CUDA graphics interface
-----------------------

Warp provides a CUDA graphics interface that allows you to access OpenGL buffers from CUDA kernels. This is useful for manipulating OpenGL array buffers without having to copy them back and forth between the CPU and GPU.

See the `CUDA documentation on OpenGL Interoperability <https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GL.html>`_ for more information.

.. currentmodule:: warp.context

.. autoclass:: RegisteredGLBuffer
:members:
123 changes: 103 additions & 20 deletions warp/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -3752,34 +3752,87 @@ def wait_stream(stream: Stream, event: Event = None):

class RegisteredGLBuffer:
"""
Helper object to register a GL buffer with CUDA so that it can be mapped to a Warp array.
Helper class to register a GL buffer with CUDA so that it can be mapped to a Warp array.
Example usage::
import warp as wp
import numpy as np
from pyglet.gl import *
wp.init()
# create a GL buffer
gl_buffer_id = GLuint()
glGenBuffers(1, gl_buffer_id)
# copy some data to the GL buffer
glBindBuffer(GL_ARRAY_BUFFER, gl_buffer_id)
gl_data = np.arange(1024, dtype=np.float32)
glBufferData(GL_ARRAY_BUFFER, gl_data.nbytes, gl_data.ctypes.data, GL_DYNAMIC_DRAW)
glBindBuffer(GL_ARRAY_BUFFER, 0)
# register the GL buffer with CUDA
cuda_gl_buffer = wp.RegisteredGLBuffer(gl_buffer_id)
# map the GL buffer to a Warp array
arr = cuda_gl_buffer.map(dtype=wp.float32, shape=(1024,))
# launch a Warp kernel to manipulate or read the array
wp.launch(my_kernel, dim=1024, inputs=[arr])
# unmap the GL buffer
cuda_gl_buffer.unmap()
"""

# Specifies no hints about how this resource will be used.
# It is therefore assumed that this resource will be
# read from and written to by CUDA. This is the default value.
NONE = 0x00
"""
Flag that specifies no hints about how this resource will be used.
It is therefore assumed that this resource will be
read from and written to by CUDA. This is the default value.
"""

# Specifies that CUDA will not write to this resource.
READ_ONLY = 0x01
"""
Flag that specifies that CUDA will not write to this resource.
"""

# Specifies that CUDA will not read from this resource and will write over the
# entire contents of the resource, so none of the data previously
# stored in the resource will be preserved.
WRITE_DISCARD = 0x02
"""
Flag that specifies that CUDA will not read from this resource and will write over the
entire contents of the resource, so none of the data previously
stored in the resource will be preserved.
"""

def __init__(self, gl_buffer_id: int, device: Devicelike = None, flags: int = NONE):
"""Create a new RegisteredGLBuffer object.
__fallback_warning_shown = False

def __init__(self, gl_buffer_id: int, device: Devicelike = None, flags: int = NONE, fallback_to_copy: bool = True):
"""
Args:
gl_buffer_id: The OpenGL buffer id (GLuint).
device: The device to register the buffer with. If None, the current device will be used.
flags: A combination of the flags constants.
flags: A combination of the flags constants :attr:`NONE`, :attr:`READ_ONLY`, and :attr:`WRITE_DISCARD`.
fallback_to_copy: If True and CUDA/OpenGL interop is not available, fall back to copy operations between the Warp array and the OpenGL buffer. Otherwise, a ``RuntimeError`` will be raised.
Note:
The ``fallback_to_copy`` option (to use copy operations if CUDA graphics interop functionality is not available) requires pyglet version 2.0 or later. Install via ``pip install pyglet==2.*``.
"""
self.gl_buffer_id = gl_buffer_id
self.device = get_device(device)
self.context = self.device.context
self.flags = flags
self.fallback_to_copy = fallback_to_copy
self.resource = runtime.core.cuda_graphics_register_gl_buffer(self.context, gl_buffer_id, flags)
if self.resource is None:
if self.fallback_to_copy:
self.warp_buffer = None
self.warp_buffer_cpu = None
if not RegisteredGLBuffer.__fallback_warning_shown:
warp.utils.warn(
"Could not register GL buffer since CUDA/OpenGL interoperability is not available. Falling back to copy operations between the Warp array and the OpenGL buffer.",
)
RegisteredGLBuffer.__fallback_warning_shown = True
else:
raise RuntimeError(f"Failed to register OpenGL buffer {gl_buffer_id} with CUDA")

def __del__(self):
if not self.resource:
Expand All @@ -3799,18 +3852,48 @@ def map(self, dtype, shape) -> warp.array:
Returns:
A Warp array object representing the mapped OpenGL buffer.
"""
runtime.core.cuda_graphics_map(self.context, self.resource)
ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.c_size_t)
ptr = ctypes.c_uint64(0)
size = ctypes.c_size_t(0)
runtime.core.cuda_graphics_device_ptr_and_size(
self.context, self.resource, ctypes.byref(ptr), ctypes.byref(size)
)
return warp.array(ptr=ptr.value, dtype=dtype, shape=shape, device=self.device)
if self.resource is not None:
runtime.core.cuda_graphics_map(self.context, self.resource)
ptr = ctypes.c_uint64(0)
size = ctypes.c_size_t(0)
runtime.core.cuda_graphics_device_ptr_and_size(
self.context, self.resource, ctypes.byref(ptr), ctypes.byref(size)
)
return warp.array(ptr=ptr.value, dtype=dtype, shape=shape, device=self.device)
elif self.fallback_to_copy:
if self.warp_buffer is None or self.warp_buffer.dtype != dtype or self.warp_buffer.shape != shape:
self.warp_buffer = warp.empty(shape, dtype, device=self.device)
self.warp_buffer_cpu = warp.empty(shape, dtype, device="cpu", pinned=True)

if self.flags == self.READ_ONLY or self.flags == self.NONE:
# copy from OpenGL buffer to Warp array
from pyglet import gl

gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self.gl_buffer_id)
nbytes = self.warp_buffer.size * warp.types.type_size_in_bytes(dtype)
gl.glGetBufferSubData(gl.GL_ARRAY_BUFFER, 0, nbytes, self.warp_buffer_cpu.ptr)
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)
warp.copy(self.warp_buffer, self.warp_buffer_cpu)
return self.warp_buffer

return None

def unmap(self):
"""Unmap the OpenGL buffer."""
runtime.core.cuda_graphics_unmap(self.context, self.resource)
if self.resource is not None:
runtime.core.cuda_graphics_unmap(self.context, self.resource)
elif self.fallback_to_copy:
if self.warp_buffer is None:
raise RuntimeError("RegisteredGLBuffer first has to be mapped")

if self.flags == self.WRITE_DISCARD or self.flags == self.NONE:
# copy from Warp array to OpenGL buffer
from pyglet import gl

gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self.gl_buffer_id)
buffer = self.warp_buffer.numpy()
gl.glBufferData(gl.GL_ARRAY_BUFFER, buffer.nbytes, buffer.ctypes.data, gl.GL_DYNAMIC_DRAW)
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)


def zeros(
Expand Down
7 changes: 6 additions & 1 deletion warp/native/warp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2865,7 +2865,12 @@ void* cuda_graphics_register_gl_buffer(void* context, uint32_t gl_buffer, unsign
ContextGuard guard(context);

CUgraphicsResource *resource = new CUgraphicsResource;
check_cu(cuGraphicsGLRegisterBuffer_f(resource, gl_buffer, flags));
bool success = check_cu(cuGraphicsGLRegisterBuffer_f(resource, gl_buffer, flags));
if (!success)
{
delete resource;
return NULL;
}

return resource;
}
Expand Down
Loading

0 comments on commit 659d00f

Please sign in to comment.