The simpleVBO.cpp File
The Part 15 simplePBO.cpp file was modified in several ways to create the simpleVBO.cpp file shown below. The only significant change is to the display routine to utilize both 3D vertex and color data as discussed below. The remaining changes are straight-forward refactoring to support the CUDA 3.0 and pre-3.0 graphics interoperability APIs as well as two graphics interoperability buffers colorVBO and vertexVBO. Note that colorVBO is created to hold a uchar4 color array and vertexVBO a float4 array of vertex values.
The changes to simplePBO.cpp can be summarized as follows:
- The call to
launch_kernelnow includes parameters to pass both vertex and color arrays. createVBOwas refactored to use atypedef mappedBuffer_tstructure. This allows float4 and uchar4 arrays to be created as well as facilitating CUDA 3.0 and pre-3.0 graphics interoperability APIs.cleanupCudafrees bothvertexVBOandcolorVBOstructures.runCudamaps and unmaps bothvertexVBOandcolorVBOobjects as well as passing the appropriate pointers tolaunch_kernel.
// simpleVBO.cpp (Rob Farber)
// includes, GL
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glext.h>
// includes
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include <cutil_gl_inline.h>
#include <cuda_gl_interop.h>
#include <rendercheck_gl.h>
//#define USE_CUDA3
extern float animTime;
////////////////////////////////////////////////////////////////////////////////
// VBO specific code
#include <cuda_runtime.h>
#include <cutil_inline.h>
// constants
const unsigned int mesh_width = 128;
const unsigned int mesh_height = 128;
const unsigned int RestartIndex = 0xffffffff;
typedef struct {
GLuint vbo;
GLuint typeSize;
#ifdef USE_CUDA3
struct cudaGraphicsResource *cudaResource;
#else
void* space;
#endif
} mappedBuffer_t;
extern "C"
void launch_kernel(float4* pos, uchar4* posColor,
unsigned int mesh_width, unsigned int mesh_height, float time);
// vbo variables
mappedBuffer_t vertexVBO = {NULL, sizeof(float4), NULL};
mappedBuffer_t colorVBO = {NULL, sizeof(uchar4), NULL};
////////////////////////////////////////////////////////////////////////////////
//! Create VBO
////////////////////////////////////////////////////////////////////////////////
//void createVBO(GLuint* vbo, unsigned int typeSize)
void createVBO(mappedBuffer_t* mbuf)
{
// create buffer object
glGenBuffers(1, &(mbuf->vbo) );
glBindBuffer(GL_ARRAY_BUFFER, mbuf->vbo);
// initialize buffer object
unsigned int size = mesh_width * mesh_height * mbuf->typeSize;
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
#ifdef USE_CUDA3
cudaGraphicsGLRegisterBuffer( &(mbuf->cudaResource), mbuf->vbo,
cudaGraphicsMapFlagsNone );
#else
// register buffer object with CUDA
cudaGLRegisterBufferObject(mbuf->vbo);
#endif
}
////////////////////////////////////////////////////////////////////////////////
//! Delete VBO
////////////////////////////////////////////////////////////////////////////////
//void deleteVBO(GLuint* vbo)
void deleteVBO(mappedBuffer_t* mbuf)
{
glBindBuffer(1, mbuf->vbo );
glDeleteBuffers(1, &(mbuf->vbo) );
#ifdef USE_CUDA3
cudaGraphicsUnregisterResource( mbuf->cudaResource );
mbuf->cudaResource = NULL;
mbuf->vbo = NULL;
#else
cudaGLUnregisterBufferObject( mbuf->vbo );
mbuf->vbo = NULL;
#endif
}
void cleanupCuda()
{
deleteVBO(&vertexVBO);
deleteVBO(&colorVBO);
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runCuda()
{
// map OpenGL buffer object for writing from CUDA
float4 *dptr;
uchar4 *cptr;
uint *iptr;
#ifdef USE_CUDA3
size_t start;
cudaGraphicsMapResources( 1, &vertexVBO.cudaResource, NULL );
cudaGraphicsResourceGetMappedPointer( ( void ** )&dptr, &start,
vertexVBO.cudaResource );
cudaGraphicsMapResources( 1, &colorVBO.cudaResource, NULL );
cudaGraphicsResourceGetMappedPointer( ( void ** )&cptr, &start,
colorVBO.cudaResource );
#else
cudaGLMapBufferObject((void**)&dptr, vertexVBO.vbo);
cudaGLMapBufferObject((void**)&cptr, colorVBO.vbo);
#endif
// execute the kernel
launch_kernel(dptr, cptr, mesh_width, mesh_height, animTime);
// unmap buffer object
#ifdef USE_CUDA3
cudaGraphicsUnmapResources( 1, &vertexVBO.cudaResource, NULL );
cudaGraphicsUnmapResources( 1, &colorVBO.cudaResource, NULL );
#else
cudaGLUnmapBufferObject(vertexVBO.vbo);
cudaGLUnmapBufferObject(colorVBO.vbo);
#endif
}
void initCuda(int argc, char** argv)
{
// First initialize OpenGL context, so we can properly set the GL
// for CUDA. NVIDIA notes this is necessary in order to achieve
// optimal performance with OpenGL/CUDA interop. use command-line
// specified CUDA device, otherwise use device with highest Gflops/s
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
cutilGLDeviceInit(argc, argv);
} else {
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
}
createVBO(&vertexVBO);
createVBO(&colorVBO);
// make certain the VBO gets cleaned up on program exit
atexit(cleanupCuda);
runCuda();
}
void renderCuda(int drawMode)
{
glBindBuffer(GL_ARRAY_BUFFER, vertexVBO.vbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glBindBuffer(GL_ARRAY_BUFFER, colorVBO.vbo);
glColorPointer(4, GL_UNSIGNED_BYTE, 0, 0);
glEnableClientState(GL_COLOR_ARRAY);
switch(drawMode) {
case GL_LINE_STRIP:
for(int i=0 ; i < mesh_width*mesh_height; i+= mesh_width)
glDrawArrays(GL_LINE_STRIP, i, mesh_width);
break;
case GL_TRIANGLE_FAN: {
static GLuint* qIndices=NULL;
int size = 5*(mesh_height-1)*(mesh_width-1);
if(qIndices == NULL) { // allocate and assign trianglefan indicies
qIndices = (GLuint *) malloc(size*sizeof(GLint));
int index=0;
for(int i=1; i < mesh_height; i++) {
for(int j=1; j < mesh_width; j++) {
qIndices[index++] = (i)*mesh_width + j;
qIndices[index++] = (i)*mesh_width + j-1;
qIndices[index++] = (i-1)*mesh_width + j-1;
qIndices[index++] = (i-1)*mesh_width + j;
qIndices[index++] = RestartIndex;
}
}
}
glPrimitiveRestartIndexNV(RestartIndex);
glEnableClientState(GL_PRIMITIVE_RESTART_NV);
glDrawElements(GL_TRIANGLE_FAN, size, GL_UNSIGNED_INT, qIndices);
glDisableClientState(GL_PRIMITIVE_RESTART_NV);
} break;
default:
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
break;
}
glDisableClientState(GL_VERTEX_ARRAY);
glDisableClientState(GL_COLOR_ARRAY);
}
As can be seen, the call to renderCuda has been modified to support several drawing modes based on a parameter drawMode. Once inside the renderCuda routine, the following code tells OpenGL to bind the buffer vbo as a float4 vertex array:
glBindBuffer(GL_ARRAY_BUFFER, vertexVBO.vbo); glVertexPointer(4, GL_FLOAT, 0, 0); glEnableClientState(GL_VERTEX_ARRAY);
Similarly, the colorVBO buffer is bound as uchar4 array. This requires the CUDA kernel specifiy the color at each vertex in RGB format according to glColorPointer.
glBindBuffer(GL_ARRAY_BUFFER, colorVBO.vbo); glColorPointer(4, GL_UNSIGNED_BYTE, 0, 0); glEnableClientState(GL_COLOR_ARRAY);
A switch statement based on BdrawMode is used to draw the image according to the user's input.
Drawing colored points is requires only a single call:
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
The most straightforward way to draw lines was utilized by looping over the rows of the mesh:
case GL_LINE_STRIP:
for(int i=0 ; i < mesh_width*mesh_height; i+= mesh_width)
glDrawArrays(GL_LINE_STRIP, i, mesh_width);
break;
As discussed earlier in this article, primitive restart was utilized to render colored surfaces. This example takes the unusual approach of declaring the pointer to the qIndices array as a static pointer within the case statement that only gets allocated and initialized once during the life of the program. This was purposely done to help the reader experiment with alternative drawing methods, index layout, and drawing modes by keeping the allocation, initialization, and usage local to the drawing mode. As a general rule, such use of a static variable should be avoided.
case GL_TRIANGLE_FAN: {
static GLuint* qIndices=NULL;
int size = 5*(mesh_height-1)*(mesh_width-1);
if(qIndices == NULL) { // allocate and assign trianglefan indicies
qIndices = (GLuint *) malloc(size*sizeof(GLint));
int index=0;
for(int i=1; i < mesh_height; i++) {
for(int j=1; j < mesh_width; j++) {
qIndices[index++] = (i)*mesh_width + j;
qIndices[index++] = (i)*mesh_width + j-1;
qIndices[index++] = (i-1)*mesh_width + j-1;
qIndices[index++] = (i-1)*mesh_width + j;
qIndices[index++] = RestartIndex;
}
}
}
glPrimitiveRestartIndexNV(RestartIndex);
glEnableClientState(GL_PRIMITIVE_RESTART_NV);
glDrawElements(GL_TRIANGLE_FAN, size, GL_UNSIGNED_INT, qIndices);
glDisableClientState(GL_PRIMITIVE_RESTART_NV);
} break;
Finally the OpenGL client state machine is informed that the vertex and color arrays are disabled and renderCuda returns.
glDisableClientState(GL_VERTEX_ARRAY); glDisableClientState(GL_COLOR_ARRAY);


