1 #include <iostream>
  2 #include <stdlib.h>
  3 #include "tbb/task_scheduler_init.h"
  4 #include "tbb/parallel_for.h"
  5 #include "tbb/blocked_range.h"
  6 #include "fatals.h"
  7 #include "hrtime.h"
  8 
  9 using namespace tbb;
 10 using namespace std;
 11 
 12 /*------------------------------------------------------+
 13  | This may look like a class definition, but its not:  |
 14  | This is actually a TBB Parallel For BODY definition! |
 15  |                                                      |
 16  | Arguments are the data members of the "class", in    |
 17  | this case, p_array_a, p_array_b, and p_array_sum,    |
 18  | which are all of type int *                          |
 19  *------------------------------------------------------*/
 20 class ArraySummer {
 21 
 22   int * p_array_a;
 23   int * p_array_b;
 24   int * p_array_sum;
 25 
 26 public:
 27   // This empty constructor with an initialization list is used to setup calls to the function
 28   ArraySummer(int * p_a, int * p_b, int * p_sum) : p_array_a(p_a), p_array_b(p_b), p_array_sum(p_sum) { }
 29 
 30   /*----------------------------------------------------------+
 31    | Here is the actual body, that will be called in parallel |
 32    | by the TBB runtime. You MUST put this code inside the    |
 33    | class definition, since the compiler will be expanding   |
 34    | and inlining this code as part of the template process.  |
 35    |                                                          |
 36    | The blocked_range<int> is something like a list of       |
 37    | indexes corresponding to each invocation of the function |
 38    +----------------------------------------------------------*/
 39 
 40   void operator() ( const blocked_range<int>& r ) const {
 41     for ( int i = r.begin(); i != r.end(); i++ ) { // iterates over the entire chunk
 42       p_array_sum[i] = p_array_a[i] + p_array_b[i];
 43     }
 44   }
 45 
 46 };
 47 
 48 /*------------------------------------------------+
 49  | Here's the meat of the program -- sums two     |
 50  | Fibonacci-like arrays, using a for loop first  |
 51  | and then a parallel_for TBB template.          |
 52  +------------------------------------------------*/
 53 
 54 int main(size_t argc, char *argv[]) {
 55  hrtime starttime, endtime, singlethread_time, tbb_time; // for timing
 56  int * p_A;
 57  int * p_B;
 58  int * p_SUM_1T;
 59  int * p_SUM_TBB;
 60 
 61  /* This is the TBB runtime... */
 62  task_scheduler_init init;
 63 
 64  if( argc != 2 ) {
 65    fatal("Usage: %s <arraySize>\n",argv[0]);
 66  }
 67 
 68  int nElements = atoi( argv[1] );
 69  if( nElements <= 2 ) {
 70    fatal("Array size (%s) must be an integer > 2\n", argv[1]);
 71  }
 72 
 73  p_A       = new int[nElements];
 74  p_B       = new int[nElements];
 75  p_SUM_1T  = new int[nElements];
 76  p_SUM_TBB = new int[nElements];
 77 
 78  /* 
 79   * Initialize the data sets ... could do this in parallel too, but 
 80   * serial is easier to read
 81   */
 82  p_A[0] = p_B[0] = 0;
 83  p_A[1] = p_B[1] = 1;
 84  for( int i=2;i<nElements;i++) {
 85    p_A[i]   = (p_A[i-1] + p_A[i-2]) % (INT_MAX/2);
 86    p_B[i]   = p_A[i];
 87    p_SUM_1T[i] = 0;
 88    p_SUM_TBB[i] = 0;
 89  }
 90 
 91 
 92  /*
 93   * Time how long it takes to sum the arrays using a single thread
 94   */
 95  starttime = gethrtime();
 96 
 97  for( int i=0;i<nElements;i++ ) {
 98    p_SUM_1T[i] = p_A[i] + p_B[i];
 99  }
100 
101  endtime = gethrtime();
102  singlethread_time = endtime - starttime;
103 
104  /*
105   * Now sum the arrays again using TBB, again timing the execution
106   */
107  starttime = gethrtime();
108 
109  parallel_for(blocked_range<int>(0, nElements, 100),
110        ArraySummer( p_A, p_B, p_SUM_TBB ) );
111 
112  endtime = gethrtime();
113  tbb_time = endtime - starttime;
114 
115  /*
116   * Verify the sums match
117   */
118  for(int i=0;i<nElements;i++) {
119    if( p_SUM_1T[i] != p_SUM_TBB[i] ) {
120      cout << p_A[i] << " + " << p_B[i] << " = " << p_SUM_1T[i] << " AND " << p_SUM_TBB[i] <<  endl;
121    }
122  }
123 
124  /* 
125   * Print the times
126   */
127  cout << "1T summing time:  " << singlethread_time << " ticks" << endl;
128  cout << "TBB summing time: " << tbb_time << " ticks" << endl;
129 
130  delete [] p_A;
131  delete [] p_B;
132  delete [] p_SUM_1T;
133  delete [] p_SUM_TBB;
134 
135  return 0;
136 }
137