gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pseudo_inst.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Marc Orr
34  */
35 
36 #include <csignal>
37 
38 #include "arch/hsail/insts/decl.hh"
39 #include "arch/hsail/insts/mem.hh"
40 
41 namespace HsailISA
42 {
43  // Pseudo (or magic) instructions are overloaded on the hsail call
44  // instruction, because of its flexible parameter signature.
45 
46  // To add a new magic instruction:
47  // 1. Add an entry to the enum.
48  // 2. Implement it in the switch statement below (Call::exec).
49  // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
50  // so its easy to call from an OpenCL kernel.
51 
52  // This enum should be identical to the enum in
53  // hsa/hsail-gpu-compute/util/magicinst.h
54  enum
55  {
77  };
78 
79  void
81  {
82  const VectorMask &mask = w->getPred();
83 
84  int op = 0;
85  bool got_op = false;
86 
87  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
88  if (mask[lane]) {
89  int src_val0 = src1.get<int>(w, lane, 0);
90  if (got_op) {
91  if (src_val0 != op) {
92  fatal("Multiple magic instructions per PC not "
93  "supported\n");
94  }
95  } else {
96  op = src_val0;
97  got_op = true;
98  }
99  }
100  }
101 
102  switch(op) {
103  case MAGIC_PRINT_WF_32:
104  MagicPrintWF32(w);
105  break;
106  case MAGIC_PRINT_WF_64:
107  MagicPrintWF64(w);
108  break;
109  case MAGIC_PRINT_LANE:
110  MagicPrintLane(w);
111  break;
112  case MAGIC_PRINT_LANE_64:
113  MagicPrintLane64(w);
114  break;
117  break;
118  case MAGIC_SIM_BREAK:
119  MagicSimBreak(w);
120  break;
121  case MAGIC_PREF_SUM:
122  MagicPrefixSum(w);
123  break;
124  case MAGIC_REDUCTION:
125  MagicReduction(w);
126  break;
128  MagicMaskLower(w);
129  break;
131  MagicMaskUpper(w);
132  break;
133  case MAGIC_JOIN_WF_BAR:
134  MagicJoinWFBar(w);
135  break;
136  case MAGIC_WAIT_WF_BAR:
137  MagicWaitWFBar(w);
138  break;
139  case MAGIC_PANIC:
140  MagicPanic(w);
141  break;
142 
143  // atomic instructions
145  MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
146  break;
147 
149  MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
150  break;
151 
153  MagicLoadGlobalU32Reg(w, gpuDynInst);
154  break;
155 
156  case MAGIC_XACT_CAS_LD:
157  MagicXactCasLd(w);
158  break;
159 
160  case MAGIC_MOST_SIG_THD:
162  break;
163 
166  break;
167 
168  case MAGIC_PRINT_WFID_32:
169  MagicPrintWF32ID(w);
170  break;
171 
172  case MAGIC_PRINT_WFID_64:
173  MagicPrintWFID64(w);
174  break;
175 
176  default: fatal("unrecognized magic instruction: %d\n", op);
177  }
178  }
179 
180  void
182  {
183  #if TRACING_ON
184  const VectorMask &mask = w->getPred();
185  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
186  if (mask[lane]) {
187  int src_val1 = src1.get<int>(w, lane, 1);
188  int src_val2 = src1.get<int>(w, lane, 2);
189  if (src_val2) {
190  DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
191  disassemble(), w->computeUnit->cu_id, w->simdId,
192  w->wfSlotId, lane, src_val1);
193  } else {
194  DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
195  disassemble(), w->computeUnit->cu_id, w->simdId,
196  w->wfSlotId, lane, src_val1);
197  }
198  }
199  }
200  #endif
201  }
202 
203  void
205  {
206  #if TRACING_ON
207  const VectorMask &mask = w->getPred();
208  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
209  if (mask[lane]) {
210  int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
211  int src_val2 = src1.get<int>(w, lane, 2);
212  if (src_val2) {
213  DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
214  disassemble(), w->computeUnit->cu_id, w->simdId,
215  w->wfSlotId, lane, src_val1);
216  } else {
217  DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
218  disassemble(), w->computeUnit->cu_id, w->simdId,
219  w->wfSlotId, lane, src_val1);
220  }
221  }
222  }
223  #endif
224  }
225 
226  void
228  {
229  #if TRACING_ON
230  const VectorMask &mask = w->getPred();
231  std::string res_str;
232  res_str = csprintf("krl_prt (%s)\n", disassemble());
233 
234  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
235  if (!(lane & 7)) {
236  res_str += csprintf("DB%03d: ", (int)w->wfDynId);
237  }
238 
239  if (mask[lane]) {
240  int src_val1 = src1.get<int>(w, lane, 1);
241  int src_val2 = src1.get<int>(w, lane, 2);
242 
243  if (src_val2) {
244  res_str += csprintf("%08x", src_val1);
245  } else {
246  res_str += csprintf("%08d", src_val1);
247  }
248  } else {
249  res_str += csprintf("xxxxxxxx");
250  }
251 
252  if ((lane & 7) == 7) {
253  res_str += csprintf("\n");
254  } else {
255  res_str += csprintf(" ");
256  }
257  }
258 
259  res_str += "\n\n";
260  DPRINTFN(res_str.c_str());
261  #endif
262  }
263 
264  void
266  {
267  #if TRACING_ON
268  const VectorMask &mask = w->getPred();
269  std::string res_str;
270  int src_val3 = -1;
271  res_str = csprintf("krl_prt (%s)\n", disassemble());
272 
273  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
274  if (!(lane & 7)) {
275  res_str += csprintf("DB%03d: ", (int)w->wfDynId);
276  }
277 
278  if (mask[lane]) {
279  int src_val1 = src1.get<int>(w, lane, 1);
280  int src_val2 = src1.get<int>(w, lane, 2);
281  src_val3 = src1.get<int>(w, lane, 3);
282 
283  if (src_val2) {
284  res_str += csprintf("%08x", src_val1);
285  } else {
286  res_str += csprintf("%08d", src_val1);
287  }
288  } else {
289  res_str += csprintf("xxxxxxxx");
290  }
291 
292  if ((lane & 7) == 7) {
293  res_str += csprintf("\n");
294  } else {
295  res_str += csprintf(" ");
296  }
297  }
298 
299  res_str += "\n\n";
300  if (w->wfDynId == src_val3) {
301  DPRINTFN(res_str.c_str());
302  }
303  #endif
304  }
305 
306  void
308  {
309  #if TRACING_ON
310  const VectorMask &mask = w->getPred();
311  std::string res_str;
312  res_str = csprintf("krl_prt (%s)\n", disassemble());
313 
314  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
315  if (!(lane & 3)) {
316  res_str += csprintf("DB%03d: ", (int)w->wfDynId);
317  }
318 
319  if (mask[lane]) {
320  int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
321  int src_val2 = src1.get<int>(w, lane, 2);
322 
323  if (src_val2) {
324  res_str += csprintf("%016x", src_val1);
325  } else {
326  res_str += csprintf("%016d", src_val1);
327  }
328  } else {
329  res_str += csprintf("xxxxxxxxxxxxxxxx");
330  }
331 
332  if ((lane & 3) == 3) {
333  res_str += csprintf("\n");
334  } else {
335  res_str += csprintf(" ");
336  }
337  }
338 
339  res_str += "\n\n";
340  DPRINTFN(res_str.c_str());
341  #endif
342  }
343 
344  void
346  {
347  #if TRACING_ON
348  const VectorMask &mask = w->getPred();
349  std::string res_str;
350  int src_val3 = -1;
351  res_str = csprintf("krl_prt (%s)\n", disassemble());
352 
353  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
354  if (!(lane & 3)) {
355  res_str += csprintf("DB%03d: ", (int)w->wfDynId);
356  }
357 
358  if (mask[lane]) {
359  int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
360  int src_val2 = src1.get<int>(w, lane, 2);
361  src_val3 = src1.get<int>(w, lane, 3);
362 
363  if (src_val2) {
364  res_str += csprintf("%016x", src_val1);
365  } else {
366  res_str += csprintf("%016d", src_val1);
367  }
368  } else {
369  res_str += csprintf("xxxxxxxxxxxxxxxx");
370  }
371 
372  if ((lane & 3) == 3) {
373  res_str += csprintf("\n");
374  } else {
375  res_str += csprintf(" ");
376  }
377  }
378 
379  res_str += "\n\n";
380  if (w->wfDynId == src_val3) {
381  DPRINTFN(res_str.c_str());
382  }
383  #endif
384  }
385 
386  void
388  {
389  #if TRACING_ON
390  const VectorMask &mask = w->getPred();
391  std::string res_str;
392  res_str = csprintf("krl_prt (%s)\n", disassemble());
393 
394  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
395  if (!(lane & 7)) {
396  res_str += csprintf("DB%03d: ", (int)w->wfDynId);
397  }
398 
399  if (mask[lane]) {
400  float src_val1 = src1.get<float>(w, lane, 1);
401  res_str += csprintf("%08f", src_val1);
402  } else {
403  res_str += csprintf("xxxxxxxx");
404  }
405 
406  if ((lane & 7) == 7) {
407  res_str += csprintf("\n");
408  } else {
409  res_str += csprintf(" ");
410  }
411  }
412 
413  res_str += "\n\n";
414  DPRINTFN(res_str.c_str());
415  #endif
416  }
417 
418  // raises a signal that GDB will catch
419  // when done with the break, type "signal 0" in gdb to continue
420  void
422  {
423  std::string res_str;
424  // print out state for this wavefront and then break
425  res_str = csprintf("Breakpoint encountered for wavefront %i\n",
426  w->wfSlotId);
427 
428  res_str += csprintf(" Kern ID: %i\n", w->kernId);
429  res_str += csprintf(" Phase ID: %i\n", w->simdId);
430  res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
431  res_str += csprintf(" Exec mask: ");
432 
433  for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
434  if (w->execMask(i))
435  res_str += "1";
436  else
437  res_str += "0";
438 
439  if ((i & 7) == 7)
440  res_str += " ";
441  }
442 
443  res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
444 
445  res_str += "\nHelpful debugging hints:\n";
446  res_str += " Check out w->s_reg / w->d_reg for register state\n";
447 
448  res_str += "\n\n";
449  DPRINTFN(res_str.c_str());
450  fflush(stdout);
451 
452  raise(SIGTRAP);
453  }
454 
455  void
457  {
458  const VectorMask &mask = w->getPred();
459  int res = 0;
460 
461  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462  if (mask[lane]) {
463  int src_val1 = src1.get<int>(w, lane, 1);
464  dest.set<int>(w, lane, res);
465  res += src_val1;
466  }
467  }
468  }
469 
470  void
472  {
473  // reduction magic instruction
474  // The reduction instruction takes up to 64 inputs (one from
475  // each thread in a WF) and sums them. It returns the sum to
476  // each thread in the WF.
477  const VectorMask &mask = w->getPred();
478  int res = 0;
479 
480  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
481  if (mask[lane]) {
482  int src_val1 = src1.get<int>(w, lane, 1);
483  res += src_val1;
484  }
485  }
486 
487  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
488  if (mask[lane]) {
489  dest.set<int>(w, lane, res);
490  }
491  }
492  }
493 
494  void
496  {
497  const VectorMask &mask = w->getPred();
498  int res = 0;
499 
500  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
501  if (mask[lane]) {
502  int src_val1 = src1.get<int>(w, lane, 1);
503 
504  if (src_val1) {
505  if (lane < (w->computeUnit->wfSize()/2)) {
506  res = res | ((uint32_t)(1) << lane);
507  }
508  }
509  }
510  }
511 
512  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
513  if (mask[lane]) {
514  dest.set<int>(w, lane, res);
515  }
516  }
517  }
518 
519  void
521  {
522  const VectorMask &mask = w->getPred();
523  int res = 0;
524  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
525  if (mask[lane]) {
526  int src_val1 = src1.get<int>(w, lane, 1);
527 
528  if (src_val1) {
529  if (lane >= (w->computeUnit->wfSize()/2)) {
530  res = res | ((uint32_t)(1) <<
531  (lane - (w->computeUnit->wfSize()/2)));
532  }
533  }
534  }
535  }
536 
537  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
538  if (mask[lane]) {
539  dest.set<int>(w, lane, res);
540  }
541  }
542  }
543 
544  void
546  {
547  const VectorMask &mask = w->getPred();
548  int max_cnt = 0;
549 
550  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
551  if (mask[lane]) {
552  w->barCnt[lane]++;
553 
554  if (w->barCnt[lane] > max_cnt) {
555  max_cnt = w->barCnt[lane];
556  }
557  }
558  }
559 
560  if (max_cnt > w->maxBarCnt) {
561  w->maxBarCnt = max_cnt;
562  }
563  }
564 
565  void
567  {
568  const VectorMask &mask = w->getPred();
569  int max_cnt = 0;
570 
571  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
572  if (mask[lane]) {
573  w->barCnt[lane]--;
574  }
575 
576  if (w->barCnt[lane] > max_cnt) {
577  max_cnt = w->barCnt[lane];
578  }
579  }
580 
581  if (max_cnt < w->maxBarCnt) {
582  w->maxBarCnt = max_cnt;
583  }
584 
585  w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
586  w->instructionBuffer.end());
587  if (w->pendingFetch)
588  w->dropFetch = true;
589  }
590 
591  void
593  {
594  const VectorMask &mask = w->getPred();
595 
596  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
597  if (mask[lane]) {
598  int src_val1 = src1.get<int>(w, lane, 1);
599  panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
600  src_val1, lane);
601  }
602  }
603  }
604 
605  void
607  {
608  // the address is in src1 | src2
609  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
610  int src_val1 = src1.get<int>(w, lane, 1);
611  int src_val2 = src1.get<int>(w, lane, 2);
612  Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
613 
614  m->addr[lane] = addr;
615  }
616 
617  }
618 
619  void
621  {
622  GPUDynInstPtr m = gpuDynInst;
623 
624  calcAddr(w, m);
625 
626  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
627  ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
628  }
629 
630  setFlag(AtomicNoReturn);
631  setFlag(AtomicAdd);
632  setFlag(NoScope);
633  setFlag(NoOrder);
634  setFlag(GlobalSegment);
635 
636  m->m_type = U32::memType;
637  m->v_type = U32::vgprType;
638 
639  m->exec_mask = w->execMask();
640  m->statusBitVector = 0;
641  m->equiv = 0; // atomics don't have an equivalence class operand
642  m->n_reg = 1;
643 
644  m->simdId = w->simdId;
645  m->wfSlotId = w->wfSlotId;
646  m->wfDynId = w->wfDynId;
647  m->latency.init(&w->computeUnit->shader->tick_cnt);
648 
649  m->pipeId = GLBMEM_PIPE;
650  m->latency.set(w->computeUnit->shader->ticks(64));
652  w->outstandingReqsWrGm++;
653  w->wrGmReqsInPipe--;
654  w->outstandingReqsRdGm++;
655  w->rdGmReqsInPipe--;
656  w->outstandingReqs++;
657  w->memReqsInPipe--;
658  }
659 
660  void
662  {
663  GPUDynInstPtr m = gpuDynInst;
664  calcAddr(w, m);
665 
666  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
667  ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
668  }
669 
670  setFlag(AtomicNoReturn);
671  setFlag(AtomicAdd);
672  setFlag(NoScope);
673  setFlag(NoOrder);
674  setFlag(GlobalSegment);
675 
676  m->m_type = U32::memType;
677  m->v_type = U32::vgprType;
678 
679  m->exec_mask = w->execMask();
680  m->statusBitVector = 0;
681  m->equiv = 0; // atomics don't have an equivalence class operand
682  m->n_reg = 1;
683 
684  m->simdId = w->simdId;
685  m->wfSlotId = w->wfSlotId;
686  m->wfDynId = w->wfDynId;
687  m->latency.init(&w->computeUnit->shader->tick_cnt);
688 
689  m->pipeId = GLBMEM_PIPE;
690  m->latency.set(w->computeUnit->shader->ticks(64));
692  w->outstandingReqsWrGm++;
693  w->wrGmReqsInPipe--;
694  w->outstandingReqsRdGm++;
695  w->rdGmReqsInPipe--;
696  w->outstandingReqs++;
697  w->memReqsInPipe--;
698  }
699 
700  void
702  {
703  GPUDynInstPtr m = gpuDynInst;
704  // calculate the address
705  calcAddr(w, m);
706 
707  setFlag(Load);
708  setFlag(NoScope);
709  setFlag(NoOrder);
710  setFlag(GlobalSegment);
711 
712  m->m_type = U32::memType; //MemDataType::memType;
713  m->v_type = U32::vgprType; //DestDataType::vgprType;
714 
715  m->exec_mask = w->execMask();
716  m->statusBitVector = 0;
717  m->equiv = 0;
718  m->n_reg = 1;
719 
720  // FIXME
721  //m->dst_reg = this->dest.regIndex();
722 
723  m->simdId = w->simdId;
724  m->wfSlotId = w->wfSlotId;
725  m->wfDynId = w->wfDynId;
726  m->latency.init(&w->computeUnit->shader->tick_cnt);
727 
728  m->pipeId = GLBMEM_PIPE;
729  m->latency.set(w->computeUnit->shader->ticks(1));
731  w->outstandingReqsRdGm++;
732  w->rdGmReqsInPipe--;
733  w->outstandingReqs++;
734  w->memReqsInPipe--;
735  }
736 
737  void
739  {
740  const VectorMask &mask = w->getPred();
741  int src_val1 = 0;
742 
743  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
744  if (mask[lane]) {
745  src_val1 = src1.get<int>(w, lane, 1);
746  break;
747  }
748  }
749 
750  if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
752  w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
753  }
754 
755  w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
756  .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
757  }
758 
759  void
761  {
762  const VectorMask &mask = w->getPred();
763  unsigned mst = true;
764 
765  for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
766  if (mask[lane]) {
767  dest.set<int>(w, lane, mst);
768  mst = false;
769  }
770  }
771  }
772 
773  void
775  {
776  const VectorMask &mask = w->getPred();
777  int res = 0;
778  bool got_res = false;
779 
780  for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
781  if (mask[lane]) {
782  if (!got_res) {
783  res = src1.get<int>(w, lane, 1);
784  got_res = true;
785  }
786  dest.set<int>(w, lane, res);
787  }
788  }
789  }
790 
791 } // namespace HsailISA
Tick ticks(int numCycles) const
Definition: shader.hh:91
VectorMask getPred()
Definition: wavefront.hh:338
void MagicMaskUpper(Wavefront *w)
Definition: pseudo_inst.cc:520
Bitfield< 7 > i
Definition: miscregs.hh:1378
std::map< unsigned, waveQueue > xactCasLoadMap
Bitfield< 0 > m
Definition: miscregs.hh:1577
#define panic(...)
Definition: misc.hh:153
void setFlag(Flags flag)
int maxBarCnt
Definition: wavefront.hh:254
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
ip6_addr_t addr
Definition: inet.hh:335
void MagicPrintWF64(Wavefront *w)
Definition: pseudo_inst.cc:307
static const vgpr_type vgprType
Definition: decl.hh:72
int wfSize() const
int simdId
Definition: wavefront.hh:165
void MagicPrintWF32ID(Wavefront *w)
Definition: pseudo_inst.cc:265
bool dropFetch
Definition: wavefront.hh:172
int kernId
Definition: wavefront.hh:163
int wfSlotId
Definition: wavefront.hh:162
#define DPRINTFN(...)
Definition: trace.hh:216
void MagicMostSigBroadcast(Wavefront *w)
Definition: pseudo_inst.cc:774
void MagicPrintWFID64(Wavefront *w)
Definition: pseudo_inst.cc:345
uint64_t wfDynId
Definition: wavefront.hh:282
void MagicSimBreak(Wavefront *w)
Definition: pseudo_inst.cc:421
GlobalMemPipeline globalMemoryPipe
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
void MagicMaskLower(Wavefront *w)
Definition: pseudo_inst.cc:495
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:169
void MagicWaitWFBar(Wavefront *w)
Definition: pseudo_inst.cc:566
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
void calcAddr(Wavefront *w, GPUDynInstPtr m)
Definition: pseudo_inst.cc:606
void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
Definition: pseudo_inst.cc:80
void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
Definition: pseudo_inst.cc:701
void MagicPanic(Wavefront *w)
Definition: pseudo_inst.cc:592
std::vector< int > barCnt
Definition: wavefront.hh:253
ComputeUnit * computeUnit
Definition: wavefront.hh:167
uint32_t rdGmReqsInPipe
Definition: wavefront.hh:223
#define fatal(...)
Definition: misc.hh:163
uint32_t outstandingReqsRdGm
Definition: wavefront.hh:219
ListOperand dest
Definition: decl.hh:1181
void MagicPrintWF32(Wavefront *w)
Definition: pseudo_inst.cc:227
ListOperand src1
Definition: decl.hh:1183
Bitfield< 0 > w
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
void MagicPrintLane(Wavefront *w)
Definition: pseudo_inst.cc:181
void MagicXactCasLd(Wavefront *w)
Definition: pseudo_inst.cc:738
void MagicReduction(Wavefront *w)
Definition: pseudo_inst.cc:471
uint32_t outstandingReqs
Definition: wavefront.hh:210
void MagicJoinWFBar(Wavefront *w)
Definition: pseudo_inst.cc:545
void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
Definition: pseudo_inst.cc:620
uint32_t outstandingReqsWrGm
Definition: wavefront.hh:215
Shader * shader
bool pendingFetch
Definition: wavefront.hh:171
uint32_t memReqsInPipe
Definition: wavefront.hh:213
void MagicPrintWFFloat(Wavefront *w)
Definition: pseudo_inst.cc:387
VectorMask execMask() const
Definition: wavefront.cc:828
void issueRequest(GPUDynInstPtr gpuDynInst)
issues a request to the pipeline - i.e., enqueue it in the request buffer.
uint32_t wrGmReqsInPipe
Definition: wavefront.hh:225
OperandType get(Wavefront *w, int lane, int arg_idx)
Definition: operand.hh:771
void MagicPrintLane64(Wavefront *w)
Definition: pseudo_inst.cc:204
void MagicMostSigThread(Wavefront *w)
Definition: pseudo_inst.cc:760
const std::string & disassemble()
Bitfield< 3, 0 > mask
Definition: types.hh:64
void MagicPrefixSum(Wavefront *w)
Definition: pseudo_inst.cc:456
Bitfield< 4 > op
Definition: types.hh:80
void MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
Definition: pseudo_inst.cc:661
uint64_t tick_cnt
Definition: shader.hh:161
static const Enums::MemType memType
Definition: decl.hh:71
void set(Wavefront *w, int lane, OperandType val)
Definition: operand.hh:778

Generated on Fri Jun 9 2017 13:03:39 for gem5 by doxygen 1.8.6