gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mem_impl.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Steve Reinhardt
34  */
35 
37 
38 // defined in code.cc, but not worth sucking in all of code.h for this
39 // at this point
40 extern const char *segmentNames[];
41 
42 namespace HsailISA
43 {
44  template<typename DestDataType, typename AddrRegOperandType>
45  void
47  {
48  this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
49  DestDataType::label,
50  this->dest.disassemble(),
51  this->addr.disassemble());
52  }
53 
54  template<typename DestDataType, typename AddrRegOperandType>
55  void
57  {
58  Wavefront *w = gpuDynInst->wavefront();
59 
60  typedef typename DestDataType::CType CType M5_VAR_USED;
61  const VectorMask &mask = w->getPred();
62  std::vector<Addr> addr_vec;
63  addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
64  this->addr.calcVector(w, addr_vec);
65 
66  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
67  if (mask[lane]) {
68  this->dest.set(w, lane, addr_vec[lane]);
69  }
70  }
71  addr_vec.clear();
72  }
73 
74  template<typename MemDataType, typename DestDataType,
75  typename AddrRegOperandType>
76  void
78  {
79  switch (num_dest_operands) {
80  case 1:
81  this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
82  segmentNames[this->segment],
83  MemDataType::label,
84  this->dest.disassemble(),
85  this->addr.disassemble());
86  break;
87  case 2:
88  this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
89  segmentNames[this->segment],
90  MemDataType::label,
91  this->dest_vect[0].disassemble(),
92  this->dest_vect[1].disassemble(),
93  this->addr.disassemble());
94  break;
95  case 3:
96  this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
97  segmentNames[this->segment],
98  MemDataType::label,
99  this->dest_vect[0].disassemble(),
100  this->dest_vect[1].disassemble(),
101  this->dest_vect[2].disassemble(),
102  this->addr.disassemble());
103  break;
104  case 4:
105  this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
106  this->opcode,
107  segmentNames[this->segment],
108  MemDataType::label,
109  this->dest_vect[0].disassemble(),
110  this->dest_vect[1].disassemble(),
111  this->dest_vect[2].disassemble(),
112  this->dest_vect[3].disassemble(),
113  this->addr.disassemble());
114  break;
115  default:
116  fatal("Bad ld register dest operand, num vector operands: %d \n",
117  num_dest_operands);
118  break;
119  }
120  }
121 
122  static Addr
124  {
125  // what is the size of the object we are accessing??
126  // NOTE: the compiler doesn't generate enough information
127  // to do this yet..have to just line up all the private
128  // work-item spaces back to back for now
129  /*
130  StorageElement* se =
131  i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
132  assert(se);
133 
134  return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
135  se->offset * w->computeUnit->wfSize() +
136  lane * se->size;
137  */
138 
139  // addressing strategy: interleave the private spaces of
140  // work-items in a wave-front on 8 byte granularity.
141  // this won't be perfect coalescing like the spill space
142  // strategy, but it's better than nothing. The spill space
143  // strategy won't work with private because the same address
144  // may be accessed by different sized loads/stores.
145 
146  // Note: I'm assuming that the largest load/store to private
147  // is 8 bytes. If it is larger, the stride will have to increase
148 
149  Addr addr_div8 = addr / 8;
150  Addr addr_mod8 = addr % 8;
151 
152  Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
153  addr_mod8 + w->privBase;
154 
155  assert(ret < w->privBase +
156  (w->privSizePerItem * w->computeUnit->wfSize()));
157 
158  return ret;
159  }
160 
161  template<typename MemDataType, typename DestDataType,
162  typename AddrRegOperandType>
163  void
164  LdInst<MemDataType, DestDataType,
165  AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
166  {
167  Wavefront *w = gpuDynInst->wavefront();
168 
169  typedef typename MemDataType::CType MemCType;
170  const VectorMask &mask = w->getPred();
171 
172  // Kernarg references are handled uniquely for now (no Memory Request
173  // is used), so special-case them up front. Someday we should
174  // make this more realistic, at which we should get rid of this
175  // block and fold this case into the switch below.
176  if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
177  MemCType val;
178 
179  // I assume no vector ld for kernargs
180  assert(num_dest_operands == 1);
181 
182  // assuming for the moment that we'll never do register
183  // offsets into kernarg space... just to make life simpler
184  uint64_t address = this->addr.calcUniform();
185 
186  val = *(MemCType*)&w->kernelArgs[address];
187 
188  DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
189 
190  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
191  if (mask[lane]) {
192  this->dest.set(w, lane, val);
193  }
194  }
195 
196  return;
197  } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
198  uint64_t address = this->addr.calcUniform();
199  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
200  if (mask[lane]) {
201  MemCType val = w->readCallArgMem<MemCType>(lane, address);
202 
203  DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
204  (unsigned long long)val);
205 
206  this->dest.set(w, lane, val);
207  }
208  }
209 
210  return;
211  }
212 
213  GPUDynInstPtr m = gpuDynInst;
214 
215  this->addr.calcVector(w, m->addr);
216 
217  m->m_type = MemDataType::memType;
218  m->v_type = DestDataType::vgprType;
219 
220  m->exec_mask = w->execMask();
221  m->statusBitVector = 0;
222  m->equiv = this->equivClass;
223 
224  if (num_dest_operands == 1) {
225  m->dst_reg = this->dest.regIndex();
226  m->n_reg = 1;
227  } else {
228  m->n_reg = num_dest_operands;
229  for (int i = 0; i < num_dest_operands; ++i) {
230  m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
231  }
232  }
233 
234  m->simdId = w->simdId;
235  m->wfSlotId = w->wfSlotId;
236  m->wfDynId = w->wfDynId;
237  m->kern_id = w->kernId;
238  m->cu_id = w->computeUnit->cu_id;
239  m->latency.init(&w->computeUnit->shader->tick_cnt);
240 
241  switch (this->segment) {
243  m->pipeId = GLBMEM_PIPE;
244  m->latency.set(w->computeUnit->shader->ticks(1));
245 
246  // this is a complete hack to get around a compiler bug
247  // (the compiler currently generates global access for private
248  // addresses (starting from 0). We need to add the private offset)
249  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
250  if (m->addr[lane] < w->privSizePerItem) {
251  if (mask[lane]) {
252  // what is the size of the object we are accessing?
253  // find base for for this wavefront
254 
255  // calcPrivAddr will fail if accesses are unaligned
256  assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
257 
258  Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
259  this);
260 
261  m->addr[lane] = privAddr;
262  }
263  }
264  }
265 
267  w->outstandingReqsRdGm++;
268  w->rdGmReqsInPipe--;
269  break;
270 
272  assert(num_dest_operands == 1);
273  m->pipeId = GLBMEM_PIPE;
274  m->latency.set(w->computeUnit->shader->ticks(1));
275  {
276  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
277  // note: this calculation will NOT WORK if the compiler
278  // ever generates loads/stores to the same address with
279  // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
280  if (mask[lane]) {
281  assert(m->addr[lane] < w->spillSizePerItem);
282 
283  m->addr[lane] = m->addr[lane] * w->spillWidth +
284  lane * sizeof(MemCType) + w->spillBase;
285 
286  w->lastAddr[lane] = m->addr[lane];
287  }
288  }
289  }
290 
292  w->outstandingReqsRdGm++;
293  w->rdGmReqsInPipe--;
294  break;
295 
297  m->pipeId = LDSMEM_PIPE;
298  m->latency.set(w->computeUnit->shader->ticks(24));
300  w->outstandingReqsRdLm++;
301  w->rdLmReqsInPipe--;
302  break;
303 
305  m->pipeId = GLBMEM_PIPE;
306  m->latency.set(w->computeUnit->shader->ticks(1));
307 
308  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
309  if (mask[lane]) {
310  assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
311  m->addr[lane] += w->roBase;
312  }
313  }
314 
316  w->outstandingReqsRdGm++;
317  w->rdGmReqsInPipe--;
318  break;
319 
321  m->pipeId = GLBMEM_PIPE;
322  m->latency.set(w->computeUnit->shader->ticks(1));
323  {
324  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
325  if (mask[lane]) {
326  assert(m->addr[lane] < w->privSizePerItem);
327 
328  m->addr[lane] = m->addr[lane] +
329  lane * sizeof(MemCType) + w->privBase;
330  }
331  }
332  }
334  w->outstandingReqsRdGm++;
335  w->rdGmReqsInPipe--;
336  break;
337 
338  default:
339  fatal("Load to unsupported segment %d %llxe\n", this->segment,
340  m->addr[0]);
341  }
342 
343  w->outstandingReqs++;
344  w->memReqsInPipe--;
345  }
346 
347  template<typename OperationType, typename SrcDataType,
348  typename AddrRegOperandType>
349  void
350  StInst<OperationType, SrcDataType,
351  AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
352  {
353  Wavefront *w = gpuDynInst->wavefront();
354 
355  typedef typename OperationType::CType CType;
356 
357  const VectorMask &mask = w->getPred();
358 
359  // arg references are handled uniquely for now (no Memory Request
360  // is used), so special-case them up front. Someday we should
361  // make this more realistic, at which we should get rid of this
362  // block and fold this case into the switch below.
363  if (this->segment == Brig::BRIG_SEGMENT_ARG) {
364  uint64_t address = this->addr.calcUniform();
365 
366  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
367  if (mask[lane]) {
368  CType data = this->src.template get<CType>(w, lane);
369  DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
370  w->writeCallArgMem<CType>(lane, address, data);
371  }
372  }
373 
374  return;
375  }
376 
377  GPUDynInstPtr m = gpuDynInst;
378 
379  m->exec_mask = w->execMask();
380 
381  this->addr.calcVector(w, m->addr);
382 
383  if (num_src_operands == 1) {
384  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
385  if (mask[lane]) {
386  ((CType*)m->d_data)[lane] =
387  this->src.template get<CType>(w, lane);
388  }
389  }
390  } else {
391  for (int k= 0; k < num_src_operands; ++k) {
392  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
393  if (mask[lane]) {
394  ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
395  this->src_vect[k].template get<CType>(w, lane);
396  }
397  }
398  }
399  }
400 
401  m->m_type = OperationType::memType;
402  m->v_type = OperationType::vgprType;
403 
404  m->statusBitVector = 0;
405  m->equiv = this->equivClass;
406 
407  if (num_src_operands == 1) {
408  m->n_reg = 1;
409  } else {
410  m->n_reg = num_src_operands;
411  }
412 
413  m->simdId = w->simdId;
414  m->wfSlotId = w->wfSlotId;
415  m->wfDynId = w->wfDynId;
416  m->kern_id = w->kernId;
417  m->cu_id = w->computeUnit->cu_id;
418  m->latency.init(&w->computeUnit->shader->tick_cnt);
419 
420  switch (this->segment) {
422  m->pipeId = GLBMEM_PIPE;
423  m->latency.set(w->computeUnit->shader->ticks(1));
424 
425  // this is a complete hack to get around a compiler bug
426  // (the compiler currently generates global access for private
427  // addresses (starting from 0). We need to add the private offset)
428  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
429  if (mask[lane]) {
430  if (m->addr[lane] < w->privSizePerItem) {
431 
432  // calcPrivAddr will fail if accesses are unaligned
433  assert(!((sizeof(CType)-1) & m->addr[lane]));
434 
435  Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
436  this);
437 
438  m->addr[lane] = privAddr;
439  }
440  }
441  }
442 
444  w->outstandingReqsWrGm++;
445  w->wrGmReqsInPipe--;
446  break;
447 
449  assert(num_src_operands == 1);
450  m->pipeId = GLBMEM_PIPE;
451  m->latency.set(w->computeUnit->shader->ticks(1));
452  {
453  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
454  if (mask[lane]) {
455  assert(m->addr[lane] < w->spillSizePerItem);
456 
457  m->addr[lane] = m->addr[lane] * w->spillWidth +
458  lane * sizeof(CType) + w->spillBase;
459  }
460  }
461  }
462 
464  w->outstandingReqsWrGm++;
465  w->wrGmReqsInPipe--;
466  break;
467 
469  m->pipeId = LDSMEM_PIPE;
470  m->latency.set(w->computeUnit->shader->ticks(24));
472  w->outstandingReqsWrLm++;
473  w->wrLmReqsInPipe--;
474  break;
475 
477  m->pipeId = GLBMEM_PIPE;
478  m->latency.set(w->computeUnit->shader->ticks(1));
479  {
480  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
481  if (mask[lane]) {
482  assert(m->addr[lane] < w->privSizePerItem);
483  m->addr[lane] = m->addr[lane] + lane *
484  sizeof(CType)+w->privBase;
485  }
486  }
487  }
488 
490  w->outstandingReqsWrGm++;
491  w->wrGmReqsInPipe--;
492  break;
493 
494  default:
495  fatal("Store to unsupported segment %d\n", this->segment);
496  }
497 
498  w->outstandingReqs++;
499  w->memReqsInPipe--;
500  }
501 
502  template<typename OperationType, typename SrcDataType,
503  typename AddrRegOperandType>
504  void
505  StInst<OperationType, SrcDataType,
506  AddrRegOperandType>::generateDisassembly()
507  {
508  switch (num_src_operands) {
509  case 1:
510  this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
511  segmentNames[this->segment],
512  OperationType::label,
513  this->src.disassemble(),
514  this->addr.disassemble());
515  break;
516  case 2:
517  this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
518  segmentNames[this->segment],
519  OperationType::label,
520  this->src_vect[0].disassemble(),
521  this->src_vect[1].disassemble(),
522  this->addr.disassemble());
523  break;
524  case 4:
525  this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
526  this->opcode,
527  segmentNames[this->segment],
528  OperationType::label,
529  this->src_vect[0].disassemble(),
530  this->src_vect[1].disassemble(),
531  this->src_vect[2].disassemble(),
532  this->src_vect[3].disassemble(),
533  this->addr.disassemble());
534  break;
535  default: fatal("Bad ld register src operand, num vector operands: "
536  "%d \n", num_src_operands);
537  break;
538  }
539  }
540 
541  template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
542  bool HasDst>
543  void
544  AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
545  HasDst>::execute(GPUDynInstPtr gpuDynInst)
546  {
547  typedef typename DataType::CType CType;
548 
549  Wavefront *w = gpuDynInst->wavefront();
550 
551  GPUDynInstPtr m = gpuDynInst;
552 
553  this->addr.calcVector(w, m->addr);
554 
555  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
556  ((CType *)m->a_data)[lane] =
557  this->src[0].template get<CType>(w, lane);
558  }
559 
560  // load second source operand for CAS
561  if (NumSrcOperands > 1) {
562  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
563  ((CType*)m->x_data)[lane] =
564  this->src[1].template get<CType>(w, lane);
565  }
566  }
567 
568  assert(NumSrcOperands <= 2);
569 
570  m->m_type = DataType::memType;
571  m->v_type = DataType::vgprType;
572 
573  m->exec_mask = w->execMask();
574  m->statusBitVector = 0;
575  m->equiv = 0; // atomics don't have an equivalence class operand
576  m->n_reg = 1;
577 
578  if (HasDst) {
579  m->dst_reg = this->dest.regIndex();
580  }
581 
582  m->simdId = w->simdId;
583  m->wfSlotId = w->wfSlotId;
584  m->wfDynId = w->wfDynId;
585  m->kern_id = w->kernId;
586  m->cu_id = w->computeUnit->cu_id;
587  m->latency.init(&w->computeUnit->shader->tick_cnt);
588 
589  switch (this->segment) {
591  m->latency.set(w->computeUnit->shader->ticks(64));
592  m->pipeId = GLBMEM_PIPE;
593 
595  w->outstandingReqsWrGm++;
596  w->wrGmReqsInPipe--;
597  w->outstandingReqsRdGm++;
598  w->rdGmReqsInPipe--;
599  break;
600 
602  m->pipeId = LDSMEM_PIPE;
603  m->latency.set(w->computeUnit->shader->ticks(24));
605  w->outstandingReqsWrLm++;
606  w->wrLmReqsInPipe--;
607  w->outstandingReqsRdLm++;
608  w->rdLmReqsInPipe--;
609  break;
610 
611  default:
612  fatal("Atomic op to unsupported segment %d\n",
613  this->segment);
614  }
615 
616  w->outstandingReqs++;
617  w->memReqsInPipe--;
618  }
619 
620  const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
621 
622  template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
623  bool HasDst>
624  void
625  AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
626  HasDst>::generateDisassembly()
627  {
628  if (HasDst) {
629  this->disassembly =
630  csprintf("%s_%s_%s_%s %s,%s", this->opcode,
631  atomicOpToString(this->atomicOperation),
632  segmentNames[this->segment],
633  DataType::label, this->dest.disassemble(),
634  this->addr.disassemble());
635  } else {
636  this->disassembly =
637  csprintf("%s_%s_%s_%s %s", this->opcode,
638  atomicOpToString(this->atomicOperation),
639  segmentNames[this->segment],
640  DataType::label, this->addr.disassemble());
641  }
642 
643  for (int i = 0; i < NumSrcOperands; ++i) {
644  this->disassembly += ",";
645  this->disassembly += this->src[i].disassemble();
646  }
647  }
648 } // namespace HsailISA
Addr roBase
Definition: wavefront.hh:275
#define DPRINTF(x,...)
Definition: trace.hh:212
Tick ticks(int numCycles) const
Definition: shader.hh:91
Addr spillBase
Definition: wavefront.hh:263
VectorMask getPred()
Definition: wavefront.hh:338
Bitfield< 7 > i
Definition: miscregs.hh:1378
Bitfield< 0 > m
Definition: miscregs.hh:1577
void generateDisassembly() override
Definition: mem_impl.hh:77
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
ip6_addr_t addr
Definition: inet.hh:335
int wfSize() const
uint32_t spillWidth
Definition: wavefront.hh:267
int simdId
Definition: wavefront.hh:165
int kernId
Definition: wavefront.hh:163
int wfSlotId
Definition: wavefront.hh:162
void generateDisassembly()
Definition: mem_impl.hh:46
Bitfield< 63 > val
Definition: misc.hh:770
const char data[]
Definition: circlebuf.cc:43
uint64_t wfDynId
Definition: wavefront.hh:282
GlobalMemPipeline globalMemoryPipe
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
uint32_t rdLmReqsInPipe
Definition: wavefront.hh:222
Addr privBase
Definition: wavefront.hh:270
std::queue< GPUDynInstPtr > & getLMReqFIFO()
Bitfield< 23 > k
Definition: dt_constants.hh:80
CType readCallArgMem(int lane, int addr)
Definition: wavefront.hh:309
ComputeUnit * computeUnit
Definition: wavefront.hh:167
uint32_t rdGmReqsInPipe
Definition: wavefront.hh:223
#define fatal(...)
Definition: misc.hh:163
uint32_t outstandingReqsWrLm
Definition: wavefront.hh:217
uint32_t outstandingReqsRdGm
Definition: wavefront.hh:219
Bitfield< 0 > w
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
uint32_t outstandingReqsRdLm
Definition: wavefront.hh:221
uint32_t outstandingReqs
Definition: wavefront.hh:210
uint32_t privSizePerItem
Definition: wavefront.hh:272
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:83
uint32_t outstandingReqsWrGm
Definition: wavefront.hh:215
std::vector< Addr > lastAddr
Definition: wavefront.hh:192
Shader * shader
uint32_t memReqsInPipe
Definition: wavefront.hh:213
VectorMask execMask() const
Definition: wavefront.cc:828
void issueRequest(GPUDynInstPtr gpuDynInst)
issues a request to the pipeline - i.e., enqueue it in the request buffer.
uint32_t roSize
Definition: wavefront.hh:277
uint32_t wrGmReqsInPipe
Definition: wavefront.hh:225
uint32_t spillSizePerItem
Definition: wavefront.hh:265
const char * atomicOpToString(BrigAtomicOperation brigOp)
Definition: mem.cc:47
Bitfield< 24, 21 > opcode
Definition: types.hh:97
BrigAtomicOperation
Definition: Brig.h:270
uint32_t wrLmReqsInPipe
Definition: wavefront.hh:224
Bitfield< 3, 0 > mask
Definition: types.hh:64
LocalMemPipeline localMemoryPipe
void execute(GPUDynInstPtr gpuDynInst)
Definition: mem_impl.hh:56
uint64_t tick_cnt
Definition: shader.hh:161
const char * segmentNames[]
Definition: brig_object.cc:69
static Addr calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
Definition: mem_impl.hh:123
uint8_t * kernelArgs
Definition: wavefront.hh:280

Generated on Fri Jun 9 2017 13:03:39 for gem5 by doxygen 1.8.6