gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gpu_dyn_inst.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Anthony Gutierrez
34  */
35 
37 
38 #include "debug/GPUMem.hh"
40 #include "gpu-compute/shader.hh"
41 #include "gpu-compute/wavefront.hh"
42 
44  GPUStaticInst *static_inst, uint64_t instSeqNum)
45  : GPUExecContext(_cu, _wf), addr(computeUnit()->wfSize(), (Addr)0),
46  n_reg(0), useContinuation(false),
47  statusBitVector(0), _staticInst(static_inst), _seqNum(instSeqNum)
48 {
49  tlbHitLevel.assign(computeUnit()->wfSize(), -1);
50  d_data = new uint8_t[computeUnit()->wfSize() * 16];
51  a_data = new uint8_t[computeUnit()->wfSize() * 8];
52  x_data = new uint8_t[computeUnit()->wfSize() * 8];
53  for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
54  a_data[i] = 0;
55  x_data[i] = 0;
56  }
57  for (int i = 0; i < (computeUnit()->wfSize() * 16); ++i) {
58  d_data[i] = 0;
59  }
60 }
61 
63 {
64  delete[] d_data;
65  delete[] a_data;
66  delete[] x_data;
67 }
68 
69 void
71 {
72  _staticInst->execute(gpuDynInst);
73 }
74 
75 int
77 {
79 }
80 
81 int
83 {
85 }
86 
87 int
89 {
90  return _staticInst->getNumOperands();
91 }
92 
93 bool
95 {
96  return _staticInst->isVectorRegister(operandIdx);
97 }
98 
99 bool
101 {
102  return _staticInst->isScalarRegister(operandIdx);
103 }
104 
105 bool
107 {
108  return _staticInst->isCondRegister(operandIdx);
109 }
110 
111 int
112 GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
113 {
114  return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
115 }
116 
117 int
119 {
120  return _staticInst->getOperandSize(operandIdx);
121 }
122 
123 bool
125 {
126  return _staticInst->isDstOperand(operandIdx);
127 }
128 
129 bool
131 {
132  return _staticInst->isSrcOperand(operandIdx);
133 }
134 
135 const std::string&
137 {
138  return _staticInst->disassemble();
139 }
140 
141 uint64_t
143 {
144  return _seqNum;
145 }
146 
147 Enums::StorageClassType
149 {
150  return _staticInst->executed_as;
151 }
152 
153 // Process a memory instruction and (if necessary) submit timing request
154 void
156 {
157  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
159 
160  _staticInst->initiateAcc(gpuDynInst);
161  time = 0;
162 }
163 
164 void
166 {
167  _staticInst->completeAcc(gpuDynInst);
168 }
169 
174 bool
176 {
177  return _staticInst->isALU();
178 }
179 
180 bool
182 {
183  return _staticInst->isBranch();
184 }
185 
186 bool
188 {
189  return _staticInst->isNop();
190 }
191 
192 bool
194 {
195  return _staticInst->isReturn();
196 }
197 
198 bool
200 {
202 }
203 
204 bool
206 {
207  return _staticInst->isSpecialOp();
208 }
209 
210 bool
212 {
213  return _staticInst->isWaitcnt();
214 }
215 
216 bool
218 {
219  return _staticInst->isBarrier();
220 }
221 
222 bool
224 {
225  return _staticInst->isMemFence();
226 }
227 
228 bool
230 {
231  return _staticInst->isMemRef();
232 }
233 
234 bool
236 {
237  return _staticInst->isFlat();
238 }
239 
240 bool
242 {
243  return _staticInst->isLoad();
244 }
245 
246 bool
248 {
249  return _staticInst->isStore();
250 }
251 
252 bool
254 {
255  return _staticInst->isAtomic();
256 }
257 
258 bool
260 {
261  return _staticInst->isAtomicNoRet();
262 }
263 
264 bool
266 {
267  return _staticInst->isAtomicRet();
268 }
269 
270 bool
272 {
273  return _staticInst->isScalar();
274 }
275 
276 bool
278 {
279  return _staticInst->readsSCC();
280 }
281 
282 bool
284 {
285  return _staticInst->writesSCC();
286 }
287 
288 bool
290 {
291  return _staticInst->readsVCC();
292 }
293 
294 bool
296 {
297  return _staticInst->writesVCC();
298 }
299 
300 bool
302 {
303  return _staticInst->isAtomicAnd();
304 }
305 
306 bool
308 {
309  return _staticInst->isAtomicOr();
310 }
311 
312 bool
314 {
315  return _staticInst->isAtomicXor();
316 }
317 
318 bool
320 {
321  return _staticInst->isAtomicCAS();
322 }
323 
325 {
326  return _staticInst->isAtomicExch();
327 }
328 
329 bool
331 {
332  return _staticInst->isAtomicAdd();
333 }
334 
335 bool
337 {
338  return _staticInst->isAtomicSub();
339 }
340 
341 bool
343 {
344  return _staticInst->isAtomicInc();
345 }
346 
347 bool
349 {
350  return _staticInst->isAtomicDec();
351 }
352 
353 bool
355 {
356  return _staticInst->isAtomicMax();
357 }
358 
359 bool
361 {
362  return _staticInst->isAtomicMin();
363 }
364 
365 bool
367 {
368  return _staticInst->isArgLoad();
369 }
370 
371 bool
373 {
374  return _staticInst->isGlobalMem();
375 }
376 
377 bool
379 {
380  return _staticInst->isLocalMem();
381 }
382 
383 bool
385 {
386  return _staticInst->isArgSeg();
387 }
388 
389 bool
391 {
392  return _staticInst->isGlobalSeg();
393 }
394 
395 bool
397 {
398  return _staticInst->isGroupSeg();
399 }
400 
401 bool
403 {
404  return _staticInst->isKernArgSeg();
405 }
406 
407 bool
409 {
410  return _staticInst->isPrivateSeg();
411 }
412 
413 bool
415 {
416  return _staticInst->isReadOnlySeg();
417 }
418 
419 bool
421 {
422  return _staticInst->isSpillSeg();
423 }
424 
425 bool
427 {
428  return _staticInst->isWorkitemScope();
429 }
430 
431 bool
433 {
434  return _staticInst->isWavefrontScope();
435 }
436 
437 bool
439 {
440  return _staticInst->isWorkgroupScope();
441 }
442 
443 bool
445 {
446  return _staticInst->isDeviceScope();
447 }
448 
449 bool
451 {
452  return _staticInst->isSystemScope();
453 }
454 
455 bool
457 {
458  return _staticInst->isNoScope();
459 }
460 
461 bool
463 {
464  return _staticInst->isRelaxedOrder();
465 }
466 
467 bool
469 {
470  return _staticInst->isAcquire();
471 }
472 
473 bool
475 {
476  return _staticInst->isRelease();
477 }
478 
479 bool
481 {
482  return _staticInst->isAcquireRelease();
483 }
484 
485 bool
487 {
488  return _staticInst->isNoOrder();
489 }
490 
491 bool
493 {
495 }
496 
497 bool
499 {
500  return _staticInst->isSystemCoherent();
501 }
502 
503 void
505 {
506  if (_staticInst->isLocalMem()) {
507  // access to LDS (shared) memory
509  } else {
510  // access to global memory
511 
512  // update PageDivergence histogram
513  int number_pages_touched = cu->pagesTouched.size();
514  assert(number_pages_touched);
515  cu->pageDivergenceDist.sample(number_pages_touched);
516 
518 
519  for (auto it : cu->pagesTouched) {
520  // see if this page has been touched before. if not, this also
521  // inserts the page into the table.
522  ret = cu->pageAccesses
523  .insert(ComputeUnit::pageDataStruct::value_type(it.first,
524  std::make_pair(1, it.second)));
525 
526  // if yes, then update the stats
527  if (!ret.second) {
528  ret.first->second.first++;
529  ret.first->second.second += it.second;
530  }
531  }
532 
533  cu->pagesTouched.clear();
534 
535  // total number of memory instructions (dynamic)
536  // Atomics are counted as a single memory instruction.
537  // this is # memory instructions per wavefronts, not per workitem
539  }
540 }
bool isRelaxedOrder() const
bool isPrivateSeg() const
#define DPRINTF(x,...)
Definition: trace.hh:212
bool isSpecialOp() const
bool isGlobalSeg() const
bool isUnconditionalJump() const
bool isAtomicDec() const
bool isAtomicRet() const
virtual void completeAcc(GPUDynInstPtr gpuDynInst)
bool isWaitcnt() const
bool isAtomicMin() const
bool isSystemCoherent() const
bool isSystemScope() const
std::vector< int > tlbHitLevel
bool isAtomicNoRet() const
bool isAtomicAdd() const
bool readsSCC() const
bool isAcquireRelease() const
Bitfield< 7 > i
Definition: miscregs.hh:1378
STL pair class.
Definition: stl.hh:61
bool isAtomicAnd() const
bool readsVCC() const
uint64_t seqNum() const
bool isScalar() const
bool isAtomicInc() const
Stats::Scalar dynamicGMemInstrCnt
bool isScalar() const
int numDstRegOperands()
Definition: gpu_dyn_inst.cc:82
bool isAtomicSub() const
bool isReturn() const
bool isSpillSeg() const
bool isAtomicSub() const
bool isBranch() const
ip6_addr_t addr
Definition: inet.hh:335
bool isAtomicOr() const
bool isWorkitemScope() const
int getOperandSize(int operandIdx)
bool isNop() const
int wfSize() const
bool isAtomicInc() const
virtual bool isCondRegister(int operandIndex)=0
const std::string & disassemble() const
bool isLocalMem() const
uint8_t * a_data
Stats::Scalar dynamicLMemInstrCnt
bool isAtomicExch() const
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum)
Definition: gpu_dyn_inst.cc:43
bool isAtomicDec() const
bool isArgSeg() const
ComputeUnit * cu
bool isGlobalMem() const
Enums::StorageClassType executedAs()
bool isNoScope() const
bool isKernArgSeg() const
bool isAtomicAdd() const
bool isWorkgroupScope() const
bool isWavefrontScope() const
bool isLocalMem() const
bool isAtomic() const
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
bool isFlat() const
bool isGlobalMem() const
virtual bool isDstOperand(int operandIndex)=0
virtual bool isScalarRegister(int operandIndex)=0
bool isBarrier() const
void updateStats()
bool isAtomicMax() const
uint64_t _seqNum
virtual int numDstRegOperands()=0
bool writesSCC() const
Stats::Distribution pageDivergenceDist
bool isCondRegister(int operandIdx)
uint8_t * d_data
bool isAtomicMax() const
bool isRelease() const
bool isGroupSeg() const
bool isAtomicNoRet() const
bool writesVCC() const
bool isWavefrontScope() const
bool isSystemCoherent() const
bool isPrivateSeg() const
bool isVectorRegister(int operandIdx)
Definition: gpu_dyn_inst.cc:94
bool isArgLoad() const
bool isAtomicXor() const
bool isLoad() const
uint8_t * x_data
int numSrcRegOperands()
Definition: gpu_dyn_inst.cc:76
int getNumOperands()
Definition: gpu_dyn_inst.cc:88
Enums::StorageClassType executed_as
VectorMask exec_mask
bool isGroupSeg() const
bool isNoScope() const
bool isAtomic() const
bool isFlat() const
virtual int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)=0
bool isAtomicCAS() const
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
bool isStore() const
bool isBranch() const
bool isScalarRegister(int operandIdx)
bool isSrcOperand(int operandIdx)
bool isSpecialOp() const
bool isWorkitemScope() const
virtual bool isSrcOperand(int operandIndex)=0
bool isALU() const
accessor methods for the attributes of the underlying GPU static instruction
bool isAtomicXor() const
bool isMemFence() const
bool writesSCC() const
bool isGloballyCoherent() const
bool isAcquire() const
bool isStore() const
bool isDeviceScope() const
virtual int getNumOperands()=0
void completeAcc(GPUDynInstPtr gpuDynInst)
bool isNoOrder() const
bool isAtomicCAS() const
bool readsSCC() const
bool isNop() const
GPUStaticInst * _staticInst
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
bool isAtomicExch() const
bool isUnconditionalJump() const
bool isRelease() const
bool isAtomicMin() const
bool readsVCC() const
bool isWaitcnt() const
void execute(GPUDynInstPtr gpuDynInst)
Definition: gpu_dyn_inst.cc:70
bool isReadOnlySeg() const
bool isKernArgSeg() const
const std::string & disassemble()
bool isAtomicAnd() const
std::map< Addr, int > pagesTouched
bool isAtomicOr() const
ComputeUnit * computeUnit()
virtual int numSrcRegOperands()=0
bool isLoad() const
bool isALU() const
bool isRelaxedOrder() const
virtual bool isVectorRegister(int operandIndex)=0
bool writesVCC() const
bool isAtomicRet() const
bool isArgLoad() const
bool isDeviceScope() const
bool isGloballyCoherent() const
Coherence domain of a memory instruction.
bool isAcquire() const
virtual void execute(GPUDynInstPtr gpuDynInst)=0
bool isDstOperand(int operandIdx)
pageDataStruct pageAccesses
virtual void initiateAcc(GPUDynInstPtr gpuDynInst)
bool isWorkgroupScope() const
bool isSpillSeg() const
bool isArgSeg() const
bool isReturn() const
virtual int getOperandSize(int operandIndex)=0
bool isGlobalSeg() const
bool isSystemScope() const
void initiateAcc(GPUDynInstPtr gpuDynInst)
bool isNoOrder() const
bool isReadOnlySeg() const
bool isMemRef() const
bool isMemFence() const
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1869
bool isAcquireRelease() const
bool isBarrier() const
bool isMemRef() const

Generated on Fri Jun 9 2017 13:03:47 for gem5 by doxygen 1.8.6