Channels ▼
RSS

Parallel

The OpenACC Execution Model


A First Implementation

The following code is a complete C-language implementation of a serial, OpenMP, and OpenACC CGS test code. This code fills a matrix with random numbers and then performs a Classical Gram-Schmidt orthogonalization. The orthogonality of the results is checked. If desired, the user can call the printOctave() method to check the results with Octave or Matlab.

// Classical Gram-Schmidt Rob Farber

#include <stdio.h>
#include <stdlib.h>
#include <accelmath.h>
#include <omp.h>

#ifndef ROWS
#define ROWS 640
#endif
#ifndef COLS
#define COLS 640
#endif

// The _OPENACC variable defined by the OpenACC compiler.
#ifdef _OPENACC

// Use the OpenACC implementation if _OPENACC is defined
void gramSchmidt(restrict float Q[][COLS], const int rows, const int cols) 
{
#pragma acc data copy(Q[0:rows][0:cols])
  for(int k=0; k < cols; k++) {
    double tmp = 0.;
#pragma acc parallel reduction(+:tmp)
    for(int i=0; i < rows; i++) tmp +=  (Q[i][k] * Q[i][k]);
    tmp = sqrt(tmp);
    
#pragma acc parallel loop
    for(int i=0; i < rows; i++) Q[i][k] /= tmp;
    
#pragma acc parallel loop
    for(int j=k+1; j < cols; j++) {
      tmp=0.;
      for(int i=0; i < rows; i++) tmp += Q[i][k] * Q[i][j];
      for(int i=0; i < rows; i++) Q[i][j] -= tmp * Q[i][k];
    }
  }
}

#else 

// If _OPENACC is not defined then use OMP/serial code for performance testing
void gramSchmidt(restrict float Q[][COLS], const int rows, const int cols) 
{
  for(int k=0; k < cols; k++) {
    double tmp = 0.;
#pragma omp parallel for reduction(+:tmp)
    for(int i=0; i < rows; i++) tmp +=  (Q[i][k] * Q[i][k]);
    tmp = sqrt(tmp);

#pragma omp parallel for
    for(int i=0; i < rows; i++) Q[i][k] /= tmp;

#pragma omp parallel for reduction(+:tmp)
    for(int j=k+1; j < cols; j++) {
      tmp=0.;
      for(int i=0; i < rows; i++) tmp += Q[i][k] * Q[i][j];
      for(int i=0; i < rows; i++) Q[i][j] -= tmp * Q[i][k];
    }
  }
}

#endif

//Allows loading of the matrices into octave/matlab for testing
void printOctave(char* var, float A[][COLS], int rows, int cols)
{
  //  if((rows*cols) > 1000) return;
  printf("%s=[\n",var);
  for(int i=0; i < rows; i++) { 
    for(int j=0; j < cols; j++) printf("%c%e", (j==0)?' ':',', A[i][j]);
    printf(";"); 
  }
  printf("];\n");
}
// checks that the results are orthonogal where they should be
void checkOctave(restrict float A[][COLS], int rows, int cols)
{
  int found_error=0;
  for(int c1=0; c1 < cols; c1++)
    for(int c2=c1; c2 < cols; c2++) {
      double sum=0.;
      for(int i=0; i < rows; i++) sum += A[i][c1] * A[i][c2];
      if(c1 == c2) { // should be near 1 (unit length)
	if(sum < 0.9) {
	  printf("Failed unit length: %d %d %g\n", c1,c2,sum);
	  found_error = 1;
	  exit(1);
	}
      } else { // should be very small (orthogonal)
	if(sum > 0.1) {
	  printf("Failed orthonogonal  %d %d %g\n", c1,c2,sum);
	  found_error = 1;
	  exit(1);
	}
      }
  }
  if(!found_error) printf("Check OK!\n");
}

//Simple driver using random values
int main(int argc, char *argv[])
{
  int rows=ROWS;
  int cols=COLS;
  float (*A)[COLS] = (float(*)[COLS])malloc(sizeof(float)*rows*cols);

  // fill matrix A with random numbers
  for(int i=0; i < rows; i++)
    for(int j=0; j < cols; j++)
      A[i][j] = (float)rand()/ (float)RAND_MAX;
  printf("Done with init!\n");

  //printOctave("A", A, rows, cols);
  double startTime = omp_get_wtime();
  gramSchmidt(A,rows, cols);
  double endTime = omp_get_wtime();
  printf("runtime %d %d matrix %g\n",rows, cols, (endTime-startTime));
  checkOctave(A, rows, cols);
  //printOctave("Q", A, rows, cols);
  free(A);
}

Example 6: Source for cgs-01.c>

Note that the OpenACC implementation closely mirrors the OpenMP implementation. The only real difference between the OpenMP and OpenACC code (aside from slight syntax differences in the pragmas) is the data clause that tells the OpenACC compiler to copy the matrix Q to the accelerator at the beginning of the gramSchmidt() method and return the modified Q matrix just prior to the return. Since the scope of the k loop encompasses the entire gramSchmidt() method, no additional curly brackets are required. The j loop demonstrates how the parallel loop can enclose multiple work-sharing loops. (A final note on the copy pragma: do not be tempted to eliminate the "0:" in the copy clause as "copy(Q[rows][cols]" tells the OpenACC compiler to copy just one element. Some compilers will recognize this as an error and perform a copy operation on the entire array. For portability reasons, make certain the copy clause for C code correctly specifies the entire region.)

// Use the OpenACC implementation if _OPENACC is defined
void gramSchmidt(restrict float Q[][COLS], const int rows, const int cols) 
{
#pragma acc data copy(Q[0:rows][0:cols])
  for(int k=0; k < cols; k++) {
    double tmp = 0.;
#pragma acc parallel reduction(+:tmp)
    for(int i=0; i < rows; i++) tmp +=  (Q[i][k] * Q[i][k]);
    tmp = sqrt(tmp);
    
#pragma acc parallel loop
    for(int i=0; i < rows; i++) Q[i][k] /= tmp;
    
#pragma acc parallel loop
    for(int j=k+1; j < cols; j++) {
      tmp=0.;
      for(int i=0; i < rows; i++) tmp += Q[i][k] * Q[i][j];
      for(int i=0; i < rows; i++) Q[i][j] -= tmp * Q[i][k];
    }
  }
}

Example 7: The OpenACC gramSchmidt method.

Combining both OpenMP and OpenACC pragmas in the implementation of the gramSchmidt() method clearly demonstrates the similarity between the pragmas and that it is not necessary to write separate OpenMP and OpenACC methods. The following code compiles correctly for OpenMP and OpenACC without the need for an #ifdef _OPENACC preprocessor conditional:

void gramSchmidt(restrict float Q[][COLS], const int rows, const int cols) 
{
#pragma acc data copy(Q[0:rows][0:cols])
  for(int k=0; k < cols; k++) {
    double tmp = 0.;
#pragma omp parallel for reduction(+:tmp)
#pragma acc parallel reduction(+:tmp)
    for(int i=0; i < rows; i++) tmp +=  (Q[i][k] * Q[i][k]);
    tmp = sqrt(tmp);
    
#pragma omp parallel for
#pragma acc parallel loop
    for(int i=0; i < rows; i++) Q[i][k] /= tmp;
    
#pragma omp parallel for reduction(+:tmp)
#pragma acc parallel loop
    for(int j=k+1; j < cols; j++) {
      tmp=0.;
      for(int i=0; i < rows; i++) tmp += Q[i][k] * Q[i][j];
      for(int i=0; i < rows; i++) Q[i][j] -= tmp * Q[i][k];
    }
  }
}

Example 8: Combining OpenACC and OpenMP pragmas in a single method.


Related Reading


More Insights






Currently we allow the following HTML tags in comments:

Single tags

These tags can be used alone and don't need an ending tag.

<br> Defines a single line break

<hr> Defines a horizontal line

Matching tags

These require an ending tag - e.g. <i>italic text</i>

<a> Defines an anchor

<b> Defines bold text

<big> Defines big text

<blockquote> Defines a long quotation

<caption> Defines a table caption

<cite> Defines a citation

<code> Defines computer code text

<em> Defines emphasized text

<fieldset> Defines a border around elements in a form

<h1> This is heading 1

<h2> This is heading 2

<h3> This is heading 3

<h4> This is heading 4

<h5> This is heading 5

<h6> This is heading 6

<i> Defines italic text

<p> Defines a paragraph

<pre> Defines preformatted text

<q> Defines a short quotation

<samp> Defines sample computer code text

<small> Defines small text

<span> Defines a section in a document

<s> Defines strikethrough text

<strike> Defines strikethrough text

<strong> Defines strong text

<sub> Defines subscripted text

<sup> Defines superscripted text

<u> Defines underlined text

Dr. Dobb's encourages readers to engage in spirited, healthy debate, including taking us to task. However, Dr. Dobb's moderates all comments posted to our site, and reserves the right to modify or remove any content that it determines to be derogatory, offensive, inflammatory, vulgar, irrelevant/off-topic, racist or obvious marketing or spam. Dr. Dobb's further reserves the right to disable the profile of any commenter participating in said activities.

 
Disqus Tips To upload an avatar photo, first complete your Disqus profile. | View the list of supported HTML tags you can use to style comments. | Please read our commenting policy.
 

Video