Channels ▼
RSS

Web Development

CUDA, Supercomputing for the Masses: Part 18


The simpleVBO.cpp File

The Part 15 simplePBO.cpp file was modified in several ways to create the simpleVBO.cpp file shown below. The only significant change is to the display routine to utilize both 3D vertex and color data as discussed below. The remaining changes are straight-forward refactoring to support the CUDA 3.0 and pre-3.0 graphics interoperability APIs as well as two graphics interoperability buffers colorVBO and vertexVBO. Note that colorVBO is created to hold a uchar4 color array and vertexVBO a float4 array of vertex values.

The changes to simplePBO.cpp can be summarized as follows:

  • The call to launch_kernel now includes parameters to pass both vertex and color arrays.
  • createVBO was refactored to use a typedef mappedBuffer_t structure. This allows float4 and uchar4 arrays to be created as well as facilitating CUDA 3.0 and pre-3.0 graphics interoperability APIs.
  • cleanupCuda frees both vertexVBO and colorVBO structures.
  • runCuda maps and unmaps both vertexVBO and colorVBO objects as well as passing the appropriate pointers to launch_kernel.


// simpleVBO.cpp (Rob Farber)

// includes, GL
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glext.h>

// includes
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include <cutil_gl_inline.h>
#include <cuda_gl_interop.h>
#include <rendercheck_gl.h>

//#define USE_CUDA3

extern float animTime;

////////////////////////////////////////////////////////////////////////////////
// VBO specific code
#include <cuda_runtime.h>
#include <cutil_inline.h>

// constants
const unsigned int mesh_width = 128;
const unsigned int mesh_height = 128;
const unsigned int RestartIndex = 0xffffffff;

typedef struct {
  GLuint vbo;
  GLuint typeSize;
#ifdef USE_CUDA3
  struct cudaGraphicsResource *cudaResource;
#else
  void* space;
#endif
} mappedBuffer_t;

extern "C" 
void launch_kernel(float4* pos, uchar4* posColor,
		   unsigned int mesh_width, unsigned int mesh_height, float time);

// vbo variables
mappedBuffer_t vertexVBO = {NULL, sizeof(float4), NULL};
mappedBuffer_t colorVBO =  {NULL, sizeof(uchar4), NULL};

////////////////////////////////////////////////////////////////////////////////
//! Create VBO
////////////////////////////////////////////////////////////////////////////////
//void createVBO(GLuint* vbo, unsigned int typeSize)
void createVBO(mappedBuffer_t* mbuf)
{
  // create buffer object
  glGenBuffers(1, &(mbuf->vbo) );
  glBindBuffer(GL_ARRAY_BUFFER, mbuf->vbo);
  
  // initialize buffer object
  unsigned int size = mesh_width * mesh_height * mbuf->typeSize;
  glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
  
  glBindBuffer(GL_ARRAY_BUFFER, 0);
  
#ifdef USE_CUDA3
  cudaGraphicsGLRegisterBuffer( &(mbuf->cudaResource), mbuf->vbo,
				cudaGraphicsMapFlagsNone );
#else
  // register buffer object with CUDA
  cudaGLRegisterBufferObject(mbuf->vbo);
#endif
}

////////////////////////////////////////////////////////////////////////////////
//! Delete VBO
////////////////////////////////////////////////////////////////////////////////
//void deleteVBO(GLuint* vbo)
void deleteVBO(mappedBuffer_t* mbuf)
{
  glBindBuffer(1, mbuf->vbo );
  glDeleteBuffers(1, &(mbuf->vbo) );
  
#ifdef USE_CUDA3
  cudaGraphicsUnregisterResource( mbuf->cudaResource );
  mbuf->cudaResource = NULL;
  mbuf->vbo = NULL;
#else
  cudaGLUnregisterBufferObject( mbuf->vbo );
  mbuf->vbo = NULL;
#endif
  
}

void cleanupCuda()
{
  deleteVBO(&vertexVBO);
  deleteVBO(&colorVBO);
}

////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runCuda()
{
    // map OpenGL buffer object for writing from CUDA
    float4 *dptr;
    uchar4 *cptr;
    uint *iptr;
#ifdef USE_CUDA3
    size_t start;
    cudaGraphicsMapResources( 1, &vertexVBO.cudaResource, NULL );
    cudaGraphicsResourceGetMappedPointer( ( void ** )&dptr, &start, 
					  vertexVBO.cudaResource );
    cudaGraphicsMapResources( 1, &colorVBO.cudaResource, NULL );
    cudaGraphicsResourceGetMappedPointer( ( void ** )&cptr, &start, 
					  colorVBO.cudaResource );
#else
    cudaGLMapBufferObject((void**)&dptr, vertexVBO.vbo);
    cudaGLMapBufferObject((void**)&cptr, colorVBO.vbo);
#endif

    // execute the kernel
    launch_kernel(dptr, cptr, mesh_width, mesh_height, animTime);

    // unmap buffer object
#ifdef USE_CUDA3
    cudaGraphicsUnmapResources( 1, &vertexVBO.cudaResource, NULL );
    cudaGraphicsUnmapResources( 1, &colorVBO.cudaResource, NULL );
#else
    cudaGLUnmapBufferObject(vertexVBO.vbo);
    cudaGLUnmapBufferObject(colorVBO.vbo);
#endif
}

void initCuda(int argc, char** argv)
{
  // First initialize OpenGL context, so we can properly set the GL
  // for CUDA.  NVIDIA notes this is necessary in order to achieve
  // optimal performance with OpenGL/CUDA interop.  use command-line
  // specified CUDA device, otherwise use device with highest Gflops/s
  if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
    cutilGLDeviceInit(argc, argv);
  } else {
    cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
  }
  
  createVBO(&vertexVBO);
  createVBO(&colorVBO);
  // make certain the VBO gets cleaned up on program exit
  atexit(cleanupCuda);

  runCuda();

}

void renderCuda(int drawMode)
{
  glBindBuffer(GL_ARRAY_BUFFER, vertexVBO.vbo);
  glVertexPointer(4, GL_FLOAT, 0, 0);
  glEnableClientState(GL_VERTEX_ARRAY);
  
  glBindBuffer(GL_ARRAY_BUFFER, colorVBO.vbo);
  glColorPointer(4, GL_UNSIGNED_BYTE, 0, 0);
  glEnableClientState(GL_COLOR_ARRAY);

  switch(drawMode) {
  case GL_LINE_STRIP:
    for(int i=0 ; i < mesh_width*mesh_height; i+= mesh_width)
      glDrawArrays(GL_LINE_STRIP, i, mesh_width);
    break;
  case GL_TRIANGLE_FAN: {
    static GLuint* qIndices=NULL;
    int size = 5*(mesh_height-1)*(mesh_width-1);

    if(qIndices == NULL) { // allocate and assign trianglefan indicies 
      qIndices = (GLuint *) malloc(size*sizeof(GLint));
      int index=0;
      for(int i=1; i < mesh_height; i++) {
	for(int j=1; j < mesh_width; j++) {
	  qIndices[index++] = (i)*mesh_width + j; 
	  qIndices[index++] = (i)*mesh_width + j-1; 
	  qIndices[index++] = (i-1)*mesh_width + j-1; 
	  qIndices[index++] = (i-1)*mesh_width + j; 
	  qIndices[index++] = RestartIndex;
	}
      }
    }
    glPrimitiveRestartIndexNV(RestartIndex);
    glEnableClientState(GL_PRIMITIVE_RESTART_NV);
    glDrawElements(GL_TRIANGLE_FAN, size, GL_UNSIGNED_INT, qIndices);
    glDisableClientState(GL_PRIMITIVE_RESTART_NV);
  } break;
  default:
    glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
    break;
  }

  glDisableClientState(GL_VERTEX_ARRAY);
  glDisableClientState(GL_COLOR_ARRAY);
}

As can be seen, the call to renderCuda has been modified to support several drawing modes based on a parameter drawMode. Once inside the renderCuda routine, the following code tells OpenGL to bind the buffer vbo as a float4 vertex array:


  glBindBuffer(GL_ARRAY_BUFFER, vertexVBO.vbo);
  glVertexPointer(4, GL_FLOAT, 0, 0);
  glEnableClientState(GL_VERTEX_ARRAY);

Similarly, the colorVBO buffer is bound as uchar4 array. This requires the CUDA kernel specifiy the color at each vertex in RGB format according to glColorPointer.


  glBindBuffer(GL_ARRAY_BUFFER, colorVBO.vbo);
  glColorPointer(4, GL_UNSIGNED_BYTE, 0, 0);
  glEnableClientState(GL_COLOR_ARRAY);

A switch statement based on BdrawMode is used to draw the image according to the user's input.

Drawing colored points is requires only a single call:


    glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);

The most straightforward way to draw lines was utilized by looping over the rows of the mesh:


case GL_LINE_STRIP:
   for(int i=0 ; i < mesh_width*mesh_height; i+= mesh_width)
     glDrawArrays(GL_LINE_STRIP, i, mesh_width);
   break;

As discussed earlier in this article, primitive restart was utilized to render colored surfaces. This example takes the unusual approach of declaring the pointer to the qIndices array as a static pointer within the case statement that only gets allocated and initialized once during the life of the program. This was purposely done to help the reader experiment with alternative drawing methods, index layout, and drawing modes by keeping the allocation, initialization, and usage local to the drawing mode. As a general rule, such use of a static variable should be avoided.


  case GL_TRIANGLE_FAN: {
    static GLuint* qIndices=NULL;
    int size = 5*(mesh_height-1)*(mesh_width-1);

    if(qIndices == NULL) { // allocate and assign trianglefan indicies
      qIndices = (GLuint *) malloc(size*sizeof(GLint));
      int index=0;
      for(int i=1; i < mesh_height; i++) {
        for(int j=1; j < mesh_width; j++) {
          qIndices[index++] = (i)*mesh_width + j;
          qIndices[index++] = (i)*mesh_width + j-1;
          qIndices[index++] = (i-1)*mesh_width + j-1;
          qIndices[index++] = (i-1)*mesh_width + j;
          qIndices[index++] = RestartIndex;
        }
      }
    }
    glPrimitiveRestartIndexNV(RestartIndex);
    glEnableClientState(GL_PRIMITIVE_RESTART_NV);
    glDrawElements(GL_TRIANGLE_FAN, size, GL_UNSIGNED_INT, qIndices);
    glDisableClientState(GL_PRIMITIVE_RESTART_NV);
  } break;

Finally the OpenGL client state machine is informed that the vertex and color arrays are disabled and renderCuda returns.


  glDisableClientState(GL_VERTEX_ARRAY);
  glDisableClientState(GL_COLOR_ARRAY);


Related Reading


More Insights






Currently we allow the following HTML tags in comments:

Single tags

These tags can be used alone and don't need an ending tag.

<br> Defines a single line break

<hr> Defines a horizontal line

Matching tags

These require an ending tag - e.g. <i>italic text</i>

<a> Defines an anchor

<b> Defines bold text

<big> Defines big text

<blockquote> Defines a long quotation

<caption> Defines a table caption

<cite> Defines a citation

<code> Defines computer code text

<em> Defines emphasized text

<fieldset> Defines a border around elements in a form

<h1> This is heading 1

<h2> This is heading 2

<h3> This is heading 3

<h4> This is heading 4

<h5> This is heading 5

<h6> This is heading 6

<i> Defines italic text

<p> Defines a paragraph

<pre> Defines preformatted text

<q> Defines a short quotation

<samp> Defines sample computer code text

<small> Defines small text

<span> Defines a section in a document

<s> Defines strikethrough text

<strike> Defines strikethrough text

<strong> Defines strong text

<sub> Defines subscripted text

<sup> Defines superscripted text

<u> Defines underlined text

Dr. Dobb's encourages readers to engage in spirited, healthy debate, including taking us to task. However, Dr. Dobb's moderates all comments posted to our site, and reserves the right to modify or remove any content that it determines to be derogatory, offensive, inflammatory, vulgar, irrelevant/off-topic, racist or obvious marketing or spam. Dr. Dobb's further reserves the right to disable the profile of any commenter participating in said activities.

 
Disqus Tips To upload an avatar photo, first complete your Disqus profile. | View the list of supported HTML tags you can use to style comments. | Please read our commenting policy.
 
Dr. Dobb's TV