1 #include <iostream>
2 #include <stdlib.h>
3 #include "tbb/task_scheduler_init.h"
4 #include "tbb/parallel_for.h"
5 #include "tbb/blocked_range.h"
6 #include "fatals.h"
7 #include "hrtime.h"
8
9 using namespace tbb;
10 using namespace std;
11
12 /*------------------------------------------------------+
13 | This may look like a class definition, but its not: |
14 | This is actually a TBB Parallel For BODY definition! |
15 | |
16 | Arguments are the data members of the "class", in |
17 | this case, p_array_a, p_array_b, and p_array_sum, |
18 | which are all of type int * |
19 *------------------------------------------------------*/
20 class ArraySummer {
21
22 int * p_array_a;
23 int * p_array_b;
24 int * p_array_sum;
25
26 public:
27 // This empty constructor with an initialization list is used to setup calls to the function
28 ArraySummer(int * p_a, int * p_b, int * p_sum) : p_array_a(p_a), p_array_b(p_b), p_array_sum(p_sum) { }
29
30 /*----------------------------------------------------------+
31 | Here is the actual body, that will be called in parallel |
32 | by the TBB runtime. You MUST put this code inside the |
33 | class definition, since the compiler will be expanding |
34 | and inlining this code as part of the template process. |
35 | |
36 | The blocked_range<int> is something like a list of |
37 | indexes corresponding to each invocation of the function |
38 +----------------------------------------------------------*/
39
40 void operator() ( const blocked_range<int>& r ) const {
41 for ( int i = r.begin(); i != r.end(); i++ ) { // iterates over the entire chunk
42 p_array_sum[i] = p_array_a[i] + p_array_b[i];
43 }
44 }
45
46 };
47
48 /*------------------------------------------------+
49 | Here's the meat of the program -- sums two |
50 | Fibonacci-like arrays, using a for loop first |
51 | and then a parallel_for TBB template. |
52 +------------------------------------------------*/
53
54 int main(size_t argc, char *argv[]) {
55 hrtime starttime, endtime, singlethread_time, tbb_time; // for timing
56 int * p_A;
57 int * p_B;
58 int * p_SUM_1T;
59 int * p_SUM_TBB;
60
61 /* This is the TBB runtime... */
62 task_scheduler_init init;
63
64 if( argc != 2 ) {
65 fatal("Usage: %s <arraySize>\n",argv[0]);
66 }
67
68 int nElements = atoi( argv[1] );
69 if( nElements <= 2 ) {
70 fatal("Array size (%s) must be an integer > 2\n", argv[1]);
71 }
72
73 p_A = new int[nElements];
74 p_B = new int[nElements];
75 p_SUM_1T = new int[nElements];
76 p_SUM_TBB = new int[nElements];
77
78 /*
79 * Initialize the data sets ... could do this in parallel too, but
80 * serial is easier to read
81 */
82 p_A[0] = p_B[0] = 0;
83 p_A[1] = p_B[1] = 1;
84 for( int i=2;i<nElements;i++) {
85 p_A[i] = (p_A[i-1] + p_A[i-2]) % (INT_MAX/2);
86 p_B[i] = p_A[i];
87 p_SUM_1T[i] = 0;
88 p_SUM_TBB[i] = 0;
89 }
90
91
92 /*
93 * Time how long it takes to sum the arrays using a single thread
94 */
95 starttime = gethrtime();
96
97 for( int i=0;i<nElements;i++ ) {
98 p_SUM_1T[i] = p_A[i] + p_B[i];
99 }
100
101 endtime = gethrtime();
102 singlethread_time = endtime - starttime;
103
104 /*
105 * Now sum the arrays again using TBB, again timing the execution
106 */
107 starttime = gethrtime();
108
109 parallel_for(blocked_range<int>(0, nElements, 100),
110 ArraySummer( p_A, p_B, p_SUM_TBB ) );
111
112 endtime = gethrtime();
113 tbb_time = endtime - starttime;
114
115 /*
116 * Verify the sums match
117 */
118 for(int i=0;i<nElements;i++) {
119 if( p_SUM_1T[i] != p_SUM_TBB[i] ) {
120 cout << p_A[i] << " + " << p_B[i] << " = " << p_SUM_1T[i] << " AND " << p_SUM_TBB[i] << endl;
121 }
122 }
123
124 /*
125 * Print the times
126 */
127 cout << "1T summing time: " << singlethread_time << " ticks" << endl;
128 cout << "TBB summing time: " << tbb_time << " ticks" << endl;
129
130 delete [] p_A;
131 delete [] p_B;
132 delete [] p_SUM_1T;
133 delete [] p_SUM_TBB;
134
135 return 0;
136 }
137