gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mem.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Steve Reinhardt
34  */
35 
36 #ifndef __ARCH_HSAIL_INSTS_MEM_HH__
37 #define __ARCH_HSAIL_INSTS_MEM_HH__
38 
39 #include <type_traits>
40 
41 #include "arch/hsail/insts/decl.hh"
43 #include "arch/hsail/operand.hh"
45 
46 namespace HsailISA
47 {
48  class MemInst
49  {
50  public:
51  MemInst() : size(0), addr_operand(nullptr) { }
52 
53  MemInst(Enums::MemType m_type)
54  {
55  if (m_type == Enums::M_U64 ||
56  m_type == Enums::M_S64 ||
57  m_type == Enums::M_F64) {
58  size = 8;
59  } else if (m_type == Enums::M_U32 ||
60  m_type == Enums::M_S32 ||
61  m_type == Enums::M_F32) {
62  size = 4;
63  } else if (m_type == Enums::M_U16 ||
64  m_type == Enums::M_S16 ||
65  m_type == Enums::M_F16) {
66  size = 2;
67  } else {
68  size = 1;
69  }
70 
71  addr_operand = nullptr;
72  }
73 
74  void
75  init_addr(AddrOperandBase *_addr_operand)
76  {
77  addr_operand = _addr_operand;
78  }
79 
80  private:
81  int size;
83 
84  public:
85  int getMemOperandSize() { return size; }
87  };
88 
89  template<typename DestOperandType, typename AddrOperandType>
91  {
92  public:
93  typename DestOperandType::DestOperand dest;
94  AddrOperandType addr;
95 
96  LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
97  const char *_opcode)
98  : HsailGPUStaticInst(obj, _opcode)
99  {
100  using namespace Brig;
101 
102  setFlag(ALU);
103 
104  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
105  dest.init(op_offs, obj);
106  op_offs = obj->getOperandPtr(ib->operands, 1);
107  addr.init(op_offs, obj);
108  }
109 
110  int numSrcRegOperands() override
111  { return(this->addr.isVectorRegister()); }
112  int numDstRegOperands() override
113  { return dest.isVectorRegister(); }
114  bool isVectorRegister(int operandIndex) override
115  {
116  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
117  return((operandIndex == 0) ? dest.isVectorRegister() :
118  this->addr.isVectorRegister());
119  }
120  bool isCondRegister(int operandIndex) override
121  {
122  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
123  return((operandIndex == 0) ? dest.isCondRegister() :
124  this->addr.isCondRegister());
125  }
126  bool isScalarRegister(int operandIndex) override
127  {
128  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
129  return((operandIndex == 0) ? dest.isScalarRegister() :
130  this->addr.isScalarRegister());
131  }
132  bool isSrcOperand(int operandIndex) override
133  {
134  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
135  if (operandIndex > 0)
136  return(this->addr.isVectorRegister());
137  return false;
138  }
139  bool isDstOperand(int operandIndex) override {
140  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
141  return(operandIndex == 0);
142  }
143  int getOperandSize(int operandIndex) override
144  {
145  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
146  return((operandIndex == 0) ? dest.opSize() :
147  this->addr.opSize());
148  }
149  int
150  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
151  {
152  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
153  return((operandIndex == 0) ? dest.regIndex() :
154  this->addr.regIndex());
155  }
156  int getNumOperands() override
157  {
158  if (this->addr.isVectorRegister())
159  return 2;
160  return 1;
161  }
162  };
163 
164  template<typename DestDataType, typename AddrOperandType>
165  class LdaInst :
166  public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
167  public MemInst
168  {
169  public:
170  void generateDisassembly();
171 
172  LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
173  const char *_opcode)
174  : LdaInstBase<typename DestDataType::OperandType,
175  AddrOperandType>(ib, obj, _opcode)
176  {
177  init_addr(&this->addr);
178  }
179 
180  void execute(GPUDynInstPtr gpuDynInst);
181  };
182 
183  template<typename DataType>
185  decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
186  {
187  unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
188  BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
189 
190  if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
191  return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
192  } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
193  // V2/V4 not allowed
194  switch (regDataType.regKind) {
196  return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
198  return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
199  default:
200  fatal("Bad ldas register operand type %d\n", regDataType.type);
201  }
202  } else {
203  fatal("Bad ldas register operand kind %d\n", regDataType.kind);
204  }
205  }
206 
207  template<typename MemOperandType, typename DestOperandType,
208  typename AddrOperandType>
210  {
211  public:
213  typename DestOperandType::DestOperand dest;
214  AddrOperandType addr;
215 
219  unsigned int equivClass;
220 
221  LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
222  const char *_opcode)
223  : HsailGPUStaticInst(obj, _opcode)
224  {
225  using namespace Brig;
226 
227  setFlag(MemoryRef);
228  setFlag(Load);
229 
230  if (ib->opcode == BRIG_OPCODE_LD) {
231  const BrigInstMem *ldst = (const BrigInstMem*)ib;
232 
233  segment = (BrigSegment)ldst->segment;
234  memoryOrder = BRIG_MEMORY_ORDER_NONE;
235  memoryScope = BRIG_MEMORY_SCOPE_NONE;
236  equivClass = ldst->equivClass;
237 
238  width = ldst->width;
239  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
240  const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
241  if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
242  dest.init(op_offs, obj);
243 
244  op_offs = obj->getOperandPtr(ib->operands, 1);
245  addr.init(op_offs, obj);
246  } else {
247  const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
248 
249  segment = (BrigSegment)at->segment;
250  memoryOrder = (BrigMemoryOrder)at->memoryOrder;
251  memoryScope = (BrigMemoryScope)at->memoryScope;
252  equivClass = 0;
253 
255  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
256  const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
257 
258  if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
259  dest.init(op_offs, obj);
260 
261  op_offs = obj->getOperandPtr(ib->operands,1);
262  addr.init(op_offs, obj);
263  }
264 
265  switch (memoryOrder) {
267  setFlag(NoOrder);
268  break;
270  setFlag(RelaxedOrder);
271  break;
273  setFlag(Acquire);
274  break;
276  setFlag(Release);
277  break;
279  setFlag(AcquireRelease);
280  break;
281  default:
282  fatal("LdInst has bad memory order type\n");
283  }
284 
285  switch (memoryScope) {
287  setFlag(NoScope);
288  break;
290  setFlag(WorkitemScope);
291  break;
293  setFlag(WorkgroupScope);
294  break;
296  setFlag(DeviceScope);
297  break;
299  setFlag(SystemScope);
300  break;
301  default:
302  fatal("LdInst has bad memory scope type\n");
303  }
304 
305  switch (segment) {
306  case BRIG_SEGMENT_GLOBAL:
307  setFlag(GlobalSegment);
308  break;
309  case BRIG_SEGMENT_GROUP:
310  setFlag(GroupSegment);
311  break;
313  setFlag(PrivateSegment);
314  break;
316  setFlag(ReadOnlySegment);
317  break;
318  case BRIG_SEGMENT_SPILL:
319  setFlag(SpillSegment);
320  break;
321  case BRIG_SEGMENT_FLAT:
322  setFlag(Flat);
323  break;
325  setFlag(KernArgSegment);
326  break;
327  case BRIG_SEGMENT_ARG:
328  setFlag(ArgSegment);
329  break;
330  default:
331  panic("Ld: segment %d not supported\n", segment);
332  }
333  }
334 
335  int numSrcRegOperands() override
336  { return(this->addr.isVectorRegister()); }
337  int numDstRegOperands() override { return dest.isVectorRegister(); }
338  int getNumOperands() override
339  {
340  if (this->addr.isVectorRegister())
341  return 2;
342  else
343  return 1;
344  }
345  bool isVectorRegister(int operandIndex) override
346  {
347  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
348  return((operandIndex == 0) ? dest.isVectorRegister() :
349  this->addr.isVectorRegister());
350  }
351  bool isCondRegister(int operandIndex) override
352  {
353  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
354  return((operandIndex == 0) ? dest.isCondRegister() :
355  this->addr.isCondRegister());
356  }
357  bool isScalarRegister(int operandIndex) override
358  {
359  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
360  return((operandIndex == 0) ? dest.isScalarRegister() :
361  this->addr.isScalarRegister());
362  }
363  bool isSrcOperand(int operandIndex) override
364  {
365  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
366  if (operandIndex > 0)
367  return(this->addr.isVectorRegister());
368  return false;
369  }
370  bool isDstOperand(int operandIndex) override
371  {
372  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
373  return(operandIndex == 0);
374  }
375  int getOperandSize(int operandIndex) override
376  {
377  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
378  return((operandIndex == 0) ? dest.opSize() :
379  this->addr.opSize());
380  }
381  int
382  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
383  {
384  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
385  return((operandIndex == 0) ? dest.regIndex() :
386  this->addr.regIndex());
387  }
388  };
389 
390  template<typename MemDataType, typename DestDataType,
391  typename AddrOperandType>
392  class LdInst :
393  public LdInstBase<typename MemDataType::CType,
394  typename DestDataType::OperandType, AddrOperandType>,
395  public MemInst
396  {
397  typename DestDataType::OperandType::DestOperand dest_vect[4];
399  void generateDisassembly() override;
400 
401  public:
402  LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
403  const char *_opcode)
404  : LdInstBase<typename MemDataType::CType,
405  typename DestDataType::OperandType,
406  AddrOperandType>(ib, obj, _opcode),
407  MemInst(MemDataType::memType)
408  {
409  init_addr(&this->addr);
410 
411  unsigned op_offs = obj->getOperandPtr(ib->operands,0);
412  const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
413 
415  const Brig::BrigOperandOperandList *brigRegVecOp =
416  (const Brig::BrigOperandOperandList*)brigOp;
417 
418  num_dest_operands =
419  *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
420 
421  assert(num_dest_operands <= 4);
422  } else {
423  num_dest_operands = 1;
424  }
425 
426  if (num_dest_operands > 1) {
427  assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
428 
429  for (int i = 0; i < num_dest_operands; ++i) {
430  dest_vect[i].init_from_vect(op_offs, obj, i);
431  }
432  }
433  }
434 
435  void
436  initiateAcc(GPUDynInstPtr gpuDynInst) override
437  {
438  typedef typename MemDataType::CType c0;
439 
440  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
441 
442  if (num_dest_operands > 1) {
443  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
444  if (gpuDynInst->exec_mask[i])
445  gpuDynInst->statusVector.push_back(num_dest_operands);
446  else
447  gpuDynInst->statusVector.push_back(0);
448  }
449 
450  for (int k = 0; k < num_dest_operands; ++k) {
451 
452  c0 *d = &((c0*)gpuDynInst->d_data)
453  [k * gpuDynInst->computeUnit()->wfSize()];
454 
455  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
456  if (gpuDynInst->exec_mask[i]) {
457  Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
458 
459  if (this->isLocalMem()) {
460  // load from shared memory
461  *d = gpuDynInst->wavefront()->ldsChunk->
462  read<c0>(vaddr);
463  } else {
464  Request *req = new Request(0, vaddr, sizeof(c0), 0,
465  gpuDynInst->computeUnit()->masterId(),
466  0, gpuDynInst->wfDynId);
467 
468  gpuDynInst->setRequestFlags(req);
469  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
470  pkt->dataStatic(d);
471 
472  if (gpuDynInst->computeUnit()->shader->
473  separate_acquire_release &&
474  gpuDynInst->isAcquire()) {
475  // if this load has acquire semantics,
476  // set the response continuation function
477  // to perform an Acquire request
478  gpuDynInst->execContinuation =
480 
481  gpuDynInst->useContinuation = true;
482  } else {
483  // the request will be finished when
484  // the load completes
485  gpuDynInst->useContinuation = false;
486  }
487  // translation is performed in sendRequest()
488  gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
489  i, pkt);
490  }
491  }
492  ++d;
493  }
494  }
495 
496  gpuDynInst->updateStats();
497  }
498 
499  void
500  completeAcc(GPUDynInstPtr gpuDynInst) override
501  {
502  typedef typename MemDataType::CType c1;
503 
504  constexpr bool is_vt_32 = DestDataType::vgprType == VT_32;
505 
517  typedef typename std::conditional<is_vt_32,
518  typename std::conditional<std::is_floating_point<c1>::value,
519  float, typename std::conditional<std::is_signed<c1>::value,
520  int32_t, uint32_t>::type>::type,
521  typename std::conditional<std::is_floating_point<c1>::value,
522  double, typename std::conditional<std::is_signed<c1>::value,
523  int64_t, uint64_t>::type>::type>::type c0;
524 
525 
526  Wavefront *w = gpuDynInst->wavefront();
527 
528  std::vector<uint32_t> regVec;
529  // iterate over number of destination register operands since
530  // this is a load
531  for (int k = 0; k < num_dest_operands; ++k) {
532  assert((sizeof(c1) * num_dest_operands)
534 
535  int dst = this->dest.regIndex() + k;
536  if (num_dest_operands > MAX_REGS_FOR_NON_VEC_MEM_INST)
537  dst = dest_vect[k].regIndex();
538  // virtual->physical VGPR mapping
539  int physVgpr = w->remap(dst, sizeof(c0), 1);
540  // save the physical VGPR index
541  regVec.push_back(physVgpr);
542 
543  c1 *p1 =
544  &((c1*)gpuDynInst->d_data)[k * w->computeUnit->wfSize()];
545 
546  for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
547  if (gpuDynInst->exec_mask[i]) {
548  DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
549  "$%s%d <- %d global ld done (src = wavefront "
550  "ld inst)\n", w->computeUnit->cu_id, w->simdId,
551  w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d",
552  dst, *p1);
553  // write the value into the physical VGPR. This is a
554  // purely functional operation. No timing is modeled.
555  w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
556  *p1, i);
557  }
558  ++p1;
559  }
560  }
561 
562  // Schedule the write operation of the load data on the VRF.
563  // This simply models the timing aspect of the VRF write operation.
564  // It does not modify the physical VGPR.
565  int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
566  vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
567  sizeof(c0), gpuDynInst->time);
568 
569  if (this->isGlobalMem()) {
570  gpuDynInst->computeUnit()->globalMemoryPipe
571  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
572  } else {
573  assert(this->isLocalMem());
574  gpuDynInst->computeUnit()->localMemoryPipe
575  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
576  }
577  }
578 
579  private:
580  void
581  execLdAcq(GPUDynInstPtr gpuDynInst) override
582  {
583  // after the load has complete and if the load has acquire
584  // semantics, issue an acquire request.
585  if (!this->isLocalMem()) {
586  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
587  && gpuDynInst->isAcquire()) {
588  gpuDynInst->statusBitVector = VectorMask(1);
589  gpuDynInst->useContinuation = false;
590  // create request
591  Request *req = new Request(0, 0, 0, 0,
592  gpuDynInst->computeUnit()->masterId(),
593  0, gpuDynInst->wfDynId);
595  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
596  }
597  }
598  }
599 
600  public:
601  bool isVectorRegister(int operandIndex) override
602  {
603  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
604  if ((num_dest_operands != getNumOperands()) &&
605  (operandIndex == (getNumOperands()-1)))
606  return(this->addr.isVectorRegister());
607  if (num_dest_operands > 1) {
608  return dest_vect[operandIndex].isVectorRegister();
609  }
610  else if (num_dest_operands == 1) {
611  return LdInstBase<typename MemDataType::CType,
612  typename DestDataType::OperandType,
613  AddrOperandType>::dest.isVectorRegister();
614  }
615  return false;
616  }
617  bool isCondRegister(int operandIndex) override
618  {
619  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
620  if ((num_dest_operands != getNumOperands()) &&
621  (operandIndex == (getNumOperands()-1)))
622  return(this->addr.isCondRegister());
623  if (num_dest_operands > 1)
624  return dest_vect[operandIndex].isCondRegister();
625  else if (num_dest_operands == 1)
626  return LdInstBase<typename MemDataType::CType,
627  typename DestDataType::OperandType,
628  AddrOperandType>::dest.isCondRegister();
629  return false;
630  }
631  bool isScalarRegister(int operandIndex) override
632  {
633  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
634  if ((num_dest_operands != getNumOperands()) &&
635  (operandIndex == (getNumOperands()-1)))
636  return(this->addr.isScalarRegister());
637  if (num_dest_operands > 1)
638  return dest_vect[operandIndex].isScalarRegister();
639  else if (num_dest_operands == 1)
640  return LdInstBase<typename MemDataType::CType,
641  typename DestDataType::OperandType,
642  AddrOperandType>::dest.isScalarRegister();
643  return false;
644  }
645  bool isSrcOperand(int operandIndex) override
646  {
647  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
648  if ((num_dest_operands != getNumOperands()) &&
649  (operandIndex == (getNumOperands()-1)))
650  return(this->addr.isVectorRegister());
651  return false;
652  }
653  bool isDstOperand(int operandIndex) override
654  {
655  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
656  if ((num_dest_operands != getNumOperands()) &&
657  (operandIndex == (getNumOperands()-1)))
658  return false;
659  return true;
660  }
661  int getOperandSize(int operandIndex) override
662  {
663  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
664  if ((num_dest_operands != getNumOperands()) &&
665  (operandIndex == (getNumOperands()-1)))
666  return(this->addr.opSize());
667  if (num_dest_operands > 1)
668  return(dest_vect[operandIndex].opSize());
669  else if (num_dest_operands == 1)
670  return(LdInstBase<typename MemDataType::CType,
671  typename DestDataType::OperandType,
672  AddrOperandType>::dest.opSize());
673  return 0;
674  }
675  int
676  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
677  {
678  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
679  if ((num_dest_operands != getNumOperands()) &&
680  (operandIndex == (getNumOperands()-1)))
681  return(this->addr.regIndex());
682  if (num_dest_operands > 1)
683  return(dest_vect[operandIndex].regIndex());
684  else if (num_dest_operands == 1)
685  return(LdInstBase<typename MemDataType::CType,
686  typename DestDataType::OperandType,
687  AddrOperandType>::dest.regIndex());
688  return -1;
689  }
690  int getNumOperands() override
691  {
692  if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
693  return(num_dest_operands+1);
694  else
695  return(num_dest_operands);
696  }
697  void execute(GPUDynInstPtr gpuDynInst) override;
698  };
699 
700  template<typename MemDT, typename DestDT>
702  decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
703  {
704  unsigned op_offs = obj->getOperandPtr(ib->operands,1);
705  BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
706 
708  return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
709  } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
711  switch (tmp.regKind) {
713  return new LdInst<MemDT, DestDT,
714  SRegAddrOperand>(ib, obj, "ld");
716  return new LdInst<MemDT, DestDT,
717  DRegAddrOperand>(ib, obj, "ld");
718  default:
719  fatal("Bad ld register operand type %d\n", tmp.regKind);
720  }
721  } else {
722  fatal("Bad ld register operand kind %d\n", tmp.kind);
723  }
724  }
725 
726  template<typename MemDT>
728  decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
729  {
730  unsigned op_offs = obj->getOperandPtr(ib->operands,0);
731  BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
732 
733  assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
735  switch(dest.regKind) {
737  switch (ib->type) {
738  case Brig::BRIG_TYPE_B8:
739  case Brig::BRIG_TYPE_B16:
740  case Brig::BRIG_TYPE_B32:
741  return decodeLd2<MemDT, B32>(ib, obj);
742  case Brig::BRIG_TYPE_U8:
743  case Brig::BRIG_TYPE_U16:
744  case Brig::BRIG_TYPE_U32:
745  return decodeLd2<MemDT, U32>(ib, obj);
746  case Brig::BRIG_TYPE_S8:
747  case Brig::BRIG_TYPE_S16:
748  case Brig::BRIG_TYPE_S32:
749  return decodeLd2<MemDT, S32>(ib, obj);
750  case Brig::BRIG_TYPE_F16:
751  case Brig::BRIG_TYPE_F32:
752  return decodeLd2<MemDT, U32>(ib, obj);
753  default:
754  fatal("Bad ld register operand type %d, %d\n",
755  dest.regKind, ib->type);
756  };
758  switch (ib->type) {
759  case Brig::BRIG_TYPE_B64:
760  return decodeLd2<MemDT, B64>(ib, obj);
761  case Brig::BRIG_TYPE_U64:
762  return decodeLd2<MemDT, U64>(ib, obj);
763  case Brig::BRIG_TYPE_S64:
764  return decodeLd2<MemDT, S64>(ib, obj);
765  case Brig::BRIG_TYPE_F64:
766  return decodeLd2<MemDT, U64>(ib, obj);
767  default:
768  fatal("Bad ld register operand type %d, %d\n",
769  dest.regKind, ib->type);
770  };
771  default:
772  fatal("Bad ld register operand type %d, %d\n", dest.regKind,
773  ib->type);
774  }
775  }
776 
777  template<typename MemDataType, typename SrcOperandType,
778  typename AddrOperandType>
780  {
781  public:
782  typename SrcOperandType::SrcOperand src;
783  AddrOperandType addr;
784 
788  unsigned int equivClass;
789 
790  StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
791  const char *_opcode)
792  : HsailGPUStaticInst(obj, _opcode)
793  {
794  using namespace Brig;
795 
796  setFlag(MemoryRef);
797  setFlag(Store);
798 
799  if (ib->opcode == BRIG_OPCODE_ST) {
800  const BrigInstMem *ldst = (const BrigInstMem*)ib;
801 
802  segment = (BrigSegment)ldst->segment;
803  memoryOrder = BRIG_MEMORY_ORDER_NONE;
804  memoryScope = BRIG_MEMORY_SCOPE_NONE;
805  equivClass = ldst->equivClass;
806 
807  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
808  const BrigOperand *baseOp = obj->getOperand(op_offs);
809 
810  if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
811  (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
812  src.init(op_offs, obj);
813  }
814 
815  op_offs = obj->getOperandPtr(ib->operands, 1);
816  addr.init(op_offs, obj);
817  } else {
818  const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
819 
820  segment = (BrigSegment)at->segment;
821  memoryScope = (BrigMemoryScope)at->memoryScope;
822  memoryOrder = (BrigMemoryOrder)at->memoryOrder;
823  equivClass = 0;
824 
825  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
826  addr.init(op_offs, obj);
827 
828  op_offs = obj->getOperandPtr(ib->operands, 1);
829  src.init(op_offs, obj);
830  }
831 
832  switch (memoryOrder) {
834  setFlag(NoOrder);
835  break;
837  setFlag(RelaxedOrder);
838  break;
840  setFlag(Acquire);
841  break;
843  setFlag(Release);
844  break;
846  setFlag(AcquireRelease);
847  break;
848  default:
849  fatal("StInst has bad memory order type\n");
850  }
851 
852  switch (memoryScope) {
854  setFlag(NoScope);
855  break;
857  setFlag(WorkitemScope);
858  break;
860  setFlag(WorkgroupScope);
861  break;
863  setFlag(DeviceScope);
864  break;
866  setFlag(SystemScope);
867  break;
868  default:
869  fatal("StInst has bad memory scope type\n");
870  }
871 
872  switch (segment) {
873  case BRIG_SEGMENT_GLOBAL:
874  setFlag(GlobalSegment);
875  break;
876  case BRIG_SEGMENT_GROUP:
877  setFlag(GroupSegment);
878  break;
880  setFlag(PrivateSegment);
881  break;
883  setFlag(ReadOnlySegment);
884  break;
885  case BRIG_SEGMENT_SPILL:
886  setFlag(SpillSegment);
887  break;
888  case BRIG_SEGMENT_FLAT:
889  setFlag(Flat);
890  break;
891  case BRIG_SEGMENT_ARG:
892  setFlag(ArgSegment);
893  break;
894  default:
895  panic("St: segment %d not supported\n", segment);
896  }
897  }
898 
899  int numDstRegOperands() override { return 0; }
900  int numSrcRegOperands() override
901  {
902  return src.isVectorRegister() + this->addr.isVectorRegister();
903  }
904  int getNumOperands() override
905  {
906  if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
907  return 2;
908  else
909  return 1;
910  }
911  bool isVectorRegister(int operandIndex) override
912  {
913  assert(operandIndex >= 0 && operandIndex < getNumOperands());
914  return !operandIndex ? src.isVectorRegister() :
915  this->addr.isVectorRegister();
916  }
917  bool isCondRegister(int operandIndex) override
918  {
919  assert(operandIndex >= 0 && operandIndex < getNumOperands());
920  return !operandIndex ? src.isCondRegister() :
921  this->addr.isCondRegister();
922  }
923  bool isScalarRegister(int operandIndex) override
924  {
925  assert(operandIndex >= 0 && operandIndex < getNumOperands());
926  return !operandIndex ? src.isScalarRegister() :
927  this->addr.isScalarRegister();
928  }
929  bool isSrcOperand(int operandIndex) override
930  {
931  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
932  return true;
933  }
934  bool isDstOperand(int operandIndex) override { return false; }
935  int getOperandSize(int operandIndex) override
936  {
937  assert(operandIndex >= 0 && operandIndex < getNumOperands());
938  return !operandIndex ? src.opSize() : this->addr.opSize();
939  }
940  int
941  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
942  {
943  assert(operandIndex >= 0 && operandIndex < getNumOperands());
944  return !operandIndex ? src.regIndex() : this->addr.regIndex();
945  }
946  };
947 
948 
949  template<typename MemDataType, typename SrcDataType,
950  typename AddrOperandType>
951  class StInst :
952  public StInstBase<MemDataType, typename SrcDataType::OperandType,
953  AddrOperandType>,
954  public MemInst
955  {
956  public:
957  typename SrcDataType::OperandType::SrcOperand src_vect[4];
959  void generateDisassembly() override;
960 
961  StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
962  const char *_opcode, int srcIdx)
963  : StInstBase<MemDataType, typename SrcDataType::OperandType,
964  AddrOperandType>(ib, obj, _opcode),
965  MemInst(SrcDataType::memType)
966  {
967  init_addr(&this->addr);
968 
969  BrigRegOperandInfo rinfo;
970  unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
971  const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
972 
976 
979  } else {
980  rinfo = findRegDataType(op_offs, obj);
981  }
982 
984  const Brig::BrigOperandOperandList *brigRegVecOp =
985  (const Brig::BrigOperandOperandList*)baseOp;
986 
987  num_src_operands =
988  *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
989 
990  assert(num_src_operands <= 4);
991  } else {
992  num_src_operands = 1;
993  }
994 
995  if (num_src_operands > 1) {
996  assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
997 
998  for (int i = 0; i < num_src_operands; ++i) {
999  src_vect[i].init_from_vect(op_offs, obj, i);
1000  }
1001  }
1002  }
1003 
1004  void
1005  initiateAcc(GPUDynInstPtr gpuDynInst) override
1006  {
1007  // before performing a store, check if this store has
1008  // release semantics, and if so issue a release first
1009  if (!this->isLocalMem()) {
1010  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1011  && gpuDynInst->isRelease()) {
1012 
1013  gpuDynInst->statusBitVector = VectorMask(1);
1014  gpuDynInst->execContinuation = &GPUStaticInst::execSt;
1015  gpuDynInst->useContinuation = true;
1016  // create request
1017  Request *req = new Request(0, 0, 0, 0,
1018  gpuDynInst->computeUnit()->masterId(),
1019  0, gpuDynInst->wfDynId);
1020  req->setFlags(Request::RELEASE);
1021  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1022 
1023  return;
1024  }
1025  }
1026 
1027  // if there is no release semantic, perform stores immediately
1028  execSt(gpuDynInst);
1029  }
1030 
1031  // stores don't write anything back, so there is nothing
1032  // to do here. we only override this method to avoid the
1033  // fatal in the base class implementation
1034  void completeAcc(GPUDynInstPtr gpuDynInst) override { }
1035 
1036  private:
1037  // execSt may be called through a continuation
1038  // if the store had release semantics. see comment for
1039  // execSt in gpu_static_inst.hh
1040  void
1041  execSt(GPUDynInstPtr gpuDynInst) override
1042  {
1043  typedef typename MemDataType::CType c0;
1044 
1045  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1046 
1047  if (num_src_operands > 1) {
1048  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
1049  if (gpuDynInst->exec_mask[i])
1050  gpuDynInst->statusVector.push_back(num_src_operands);
1051  else
1052  gpuDynInst->statusVector.push_back(0);
1053  }
1054 
1055  for (int k = 0; k < num_src_operands; ++k) {
1056  c0 *d = &((c0*)gpuDynInst->d_data)
1057  [k * gpuDynInst->computeUnit()->wfSize()];
1058 
1059  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1060  if (gpuDynInst->exec_mask[i]) {
1061  Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
1062 
1063  if (this->isLocalMem()) {
1064  //store to shared memory
1065  gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
1066  *d);
1067  } else {
1068  Request *req =
1069  new Request(0, vaddr, sizeof(c0), 0,
1070  gpuDynInst->computeUnit()->masterId(),
1071  0, gpuDynInst->wfDynId);
1072 
1073  gpuDynInst->setRequestFlags(req);
1074  PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
1075  pkt->dataStatic<c0>(d);
1076 
1077  // translation is performed in sendRequest()
1078  // the request will be finished when the store completes
1079  gpuDynInst->useContinuation = false;
1080  gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
1081  i, pkt);
1082 
1083  }
1084  }
1085  ++d;
1086  }
1087  }
1088 
1089  gpuDynInst->updateStats();
1090  }
1091 
1092  public:
1093  bool isVectorRegister(int operandIndex) override
1094  {
1095  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1096  if (operandIndex == num_src_operands)
1097  return this->addr.isVectorRegister();
1098  if (num_src_operands > 1)
1099  return src_vect[operandIndex].isVectorRegister();
1100  else if (num_src_operands == 1)
1101  return StInstBase<MemDataType,
1102  typename SrcDataType::OperandType,
1103  AddrOperandType>::src.isVectorRegister();
1104  return false;
1105  }
1106  bool isCondRegister(int operandIndex) override
1107  {
1108  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1109  if (operandIndex == num_src_operands)
1110  return this->addr.isCondRegister();
1111  if (num_src_operands > 1)
1112  return src_vect[operandIndex].isCondRegister();
1113  else if (num_src_operands == 1)
1114  return StInstBase<MemDataType,
1115  typename SrcDataType::OperandType,
1116  AddrOperandType>::src.isCondRegister();
1117  return false;
1118  }
1119  bool isScalarRegister(int operandIndex) override
1120  {
1121  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1122  if (operandIndex == num_src_operands)
1123  return this->addr.isScalarRegister();
1124  if (num_src_operands > 1)
1125  return src_vect[operandIndex].isScalarRegister();
1126  else if (num_src_operands == 1)
1127  return StInstBase<MemDataType,
1128  typename SrcDataType::OperandType,
1129  AddrOperandType>::src.isScalarRegister();
1130  return false;
1131  }
1132  bool isSrcOperand(int operandIndex) override
1133  {
1134  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1135  return true;
1136  }
1137  bool isDstOperand(int operandIndex) override { return false; }
1138  int getOperandSize(int operandIndex) override
1139  {
1140  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1141  if (operandIndex == num_src_operands)
1142  return this->addr.opSize();
1143  if (num_src_operands > 1)
1144  return src_vect[operandIndex].opSize();
1145  else if (num_src_operands == 1)
1146  return StInstBase<MemDataType,
1147  typename SrcDataType::OperandType,
1148  AddrOperandType>::src.opSize();
1149  return 0;
1150  }
1151  int
1152  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
1153  {
1154  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1155  if (operandIndex == num_src_operands)
1156  return this->addr.regIndex();
1157  if (num_src_operands > 1)
1158  return src_vect[operandIndex].regIndex();
1159  else if (num_src_operands == 1)
1160  return StInstBase<MemDataType,
1161  typename SrcDataType::OperandType,
1162  AddrOperandType>::src.regIndex();
1163  return -1;
1164  }
1165  int getNumOperands() override
1166  {
1167  if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
1168  return num_src_operands + 1;
1169  else
1170  return num_src_operands;
1171  }
1172  void execute(GPUDynInstPtr gpuDynInst) override;
1173  };
1174 
1175  template<typename DataType, typename SrcDataType>
1176  GPUStaticInst*
1177  decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
1178  {
1179  int srcIdx = 0;
1180  int destIdx = 1;
1181  if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
1183  srcIdx = 1;
1184  destIdx = 0;
1185  }
1186  unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
1187 
1188  BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1189 
1191  return new StInst<DataType, SrcDataType,
1192  NoRegAddrOperand>(ib, obj, "st", srcIdx);
1193  } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1194  // V2/V4 not allowed
1195  switch (tmp.regKind) {
1197  return new StInst<DataType, SrcDataType,
1198  SRegAddrOperand>(ib, obj, "st", srcIdx);
1200  return new StInst<DataType, SrcDataType,
1201  DRegAddrOperand>(ib, obj, "st", srcIdx);
1202  default:
1203  fatal("Bad st register operand type %d\n", tmp.type);
1204  }
1205  } else {
1206  fatal("Bad st register operand kind %d\n", tmp.kind);
1207  }
1208  }
1209 
1210  template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
1211  bool HasDst>
1213  {
1214  public:
1215  typename OperandType::DestOperand dest;
1216  typename OperandType::SrcOperand src[NumSrcOperands];
1217  AddrOperandType addr;
1218 
1224 
1226  const char *_opcode)
1227  : HsailGPUStaticInst(obj, _opcode)
1228  {
1229  using namespace Brig;
1230 
1231  const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
1232 
1233  segment = (BrigSegment)at->segment;
1234  memoryScope = (BrigMemoryScope)at->memoryScope;
1235  memoryOrder = (BrigMemoryOrder)at->memoryOrder;
1236  atomicOperation = (BrigAtomicOperation)at->atomicOperation;
1237  opcode = (BrigOpcode)ib->opcode;
1238 
1241 
1242  setFlag(MemoryRef);
1243 
1245  setFlag(AtomicReturn);
1246  } else {
1247  setFlag(AtomicNoReturn);
1248  }
1249 
1250  switch (memoryOrder) {
1252  setFlag(NoOrder);
1253  break;
1255  setFlag(RelaxedOrder);
1256  break;
1258  setFlag(Acquire);
1259  break;
1261  setFlag(Release);
1262  break;
1264  setFlag(AcquireRelease);
1265  break;
1266  default:
1267  fatal("AtomicInst has bad memory order type\n");
1268  }
1269 
1270  switch (memoryScope) {
1272  setFlag(NoScope);
1273  break;
1275  setFlag(WorkitemScope);
1276  break;
1278  setFlag(WorkgroupScope);
1279  break;
1281  setFlag(DeviceScope);
1282  break;
1284  setFlag(SystemScope);
1285  break;
1286  default:
1287  fatal("AtomicInst has bad memory scope type\n");
1288  }
1289 
1290  switch (atomicOperation) {
1291  case Brig::BRIG_ATOMIC_AND:
1292  setFlag(AtomicAnd);
1293  break;
1294  case Brig::BRIG_ATOMIC_OR:
1295  setFlag(AtomicOr);
1296  break;
1297  case Brig::BRIG_ATOMIC_XOR:
1298  setFlag(AtomicXor);
1299  break;
1300  case Brig::BRIG_ATOMIC_CAS:
1301  setFlag(AtomicCAS);
1302  break;
1304  setFlag(AtomicExch);
1305  break;
1306  case Brig::BRIG_ATOMIC_ADD:
1307  setFlag(AtomicAdd);
1308  break;
1310  setFlag(AtomicInc);
1311  break;
1313  setFlag(AtomicDec);
1314  break;
1315  case Brig::BRIG_ATOMIC_MIN:
1316  setFlag(AtomicMin);
1317  break;
1318  case Brig::BRIG_ATOMIC_MAX:
1319  setFlag(AtomicMax);
1320  break;
1321  case Brig::BRIG_ATOMIC_SUB:
1322  setFlag(AtomicSub);
1323  break;
1324  default:
1325  fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
1326  }
1327 
1328  switch (segment) {
1329  case BRIG_SEGMENT_GLOBAL:
1330  setFlag(GlobalSegment);
1331  break;
1332  case BRIG_SEGMENT_GROUP:
1333  setFlag(GroupSegment);
1334  break;
1335  case BRIG_SEGMENT_FLAT:
1336  setFlag(Flat);
1337  break;
1338  default:
1339  panic("Atomic: segment %d not supported\n", segment);
1340  }
1341 
1342  if (HasDst) {
1343  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1344  dest.init(op_offs, obj);
1345 
1346  op_offs = obj->getOperandPtr(ib->operands, 1);
1347  addr.init(op_offs, obj);
1348 
1349  for (int i = 0; i < NumSrcOperands; ++i) {
1350  op_offs = obj->getOperandPtr(ib->operands, i + 2);
1351  src[i].init(op_offs, obj);
1352  }
1353  } else {
1354 
1355  unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
1356  addr.init(op_offs, obj);
1357 
1358  for (int i = 0; i < NumSrcOperands; ++i) {
1359  op_offs = obj->getOperandPtr(ib->operands, i + 1);
1360  src[i].init(op_offs, obj);
1361  }
1362  }
1363  }
1364 
1366  {
1367  int operands = 0;
1368  for (int i = 0; i < NumSrcOperands; i++) {
1369  if (src[i].isVectorRegister()) {
1370  operands++;
1371  }
1372  }
1373  if (addr.isVectorRegister())
1374  operands++;
1375  return operands;
1376  }
1377  int numDstRegOperands() { return dest.isVectorRegister(); }
1379  {
1380  if (addr.isVectorRegister())
1381  return(NumSrcOperands + 2);
1382  return(NumSrcOperands + 1);
1383  }
1384  bool isVectorRegister(int operandIndex)
1385  {
1386  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1387  if (operandIndex < NumSrcOperands)
1388  return src[operandIndex].isVectorRegister();
1389  else if (operandIndex == NumSrcOperands)
1390  return(addr.isVectorRegister());
1391  else
1392  return dest.isVectorRegister();
1393  }
1394  bool isCondRegister(int operandIndex)
1395  {
1396  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1397  if (operandIndex < NumSrcOperands)
1398  return src[operandIndex].isCondRegister();
1399  else if (operandIndex == NumSrcOperands)
1400  return(addr.isCondRegister());
1401  else
1402  return dest.isCondRegister();
1403  }
1404  bool isScalarRegister(int operandIndex)
1405  {
1406  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1407  if (operandIndex < NumSrcOperands)
1408  return src[operandIndex].isScalarRegister();
1409  else if (operandIndex == NumSrcOperands)
1410  return(addr.isScalarRegister());
1411  else
1412  return dest.isScalarRegister();
1413  }
1414  bool isSrcOperand(int operandIndex)
1415  {
1416  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1417  if (operandIndex < NumSrcOperands)
1418  return true;
1419  else if (operandIndex == NumSrcOperands)
1420  return(addr.isVectorRegister());
1421  else
1422  return false;
1423  }
1424  bool isDstOperand(int operandIndex)
1425  {
1426  if (operandIndex <= NumSrcOperands)
1427  return false;
1428  else
1429  return true;
1430  }
1431  int getOperandSize(int operandIndex)
1432  {
1433  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1434  if (operandIndex < NumSrcOperands)
1435  return(src[operandIndex].opSize());
1436  else if (operandIndex == NumSrcOperands)
1437  return(addr.opSize());
1438  else
1439  return(dest.opSize());
1440  }
1441  int
1442  getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
1443  {
1444  assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
1445  if (operandIndex < NumSrcOperands)
1446  return(src[operandIndex].regIndex());
1447  else if (operandIndex == NumSrcOperands)
1448  return(addr.regIndex());
1449  else
1450  return(dest.regIndex());
1451  return -1;
1452  }
1453  };
1454 
1455  template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
1456  bool HasDst>
1457  class AtomicInst :
1458  public AtomicInstBase<typename MemDataType::OperandType,
1459  AddrOperandType, NumSrcOperands, HasDst>,
1460  public MemInst
1461  {
1462  public:
1463  void generateDisassembly() override;
1464 
1466  const char *_opcode)
1467  : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
1468  NumSrcOperands, HasDst>
1469  (ib, obj, _opcode),
1470  MemInst(MemDataType::memType)
1471  {
1472  init_addr(&this->addr);
1473  }
1474 
1475  void
1476  initiateAcc(GPUDynInstPtr gpuDynInst) override
1477  {
1478  // before doing the RMW, check if this atomic has
1479  // release semantics, and if so issue a release first
1480  if (!this->isLocalMem()) {
1481  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1482  && (gpuDynInst->isRelease()
1483  || gpuDynInst->isAcquireRelease())) {
1484 
1485  gpuDynInst->statusBitVector = VectorMask(1);
1486 
1487  gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
1488  gpuDynInst->useContinuation = true;
1489 
1490  // create request
1491  Request *req = new Request(0, 0, 0, 0,
1492  gpuDynInst->computeUnit()->masterId(),
1493  0, gpuDynInst->wfDynId);
1494  req->setFlags(Request::RELEASE);
1495  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1496 
1497  return;
1498  }
1499  }
1500 
1501  // if there is no release semantic, execute the RMW immediately
1502  execAtomic(gpuDynInst);
1503 
1504  }
1505 
1506  void
1507  completeAcc(GPUDynInstPtr gpuDynInst) override
1508  {
1509  // if this is not an atomic return op, then we
1510  // have nothing more to do.
1511  if (this->isAtomicRet()) {
1512  // the size of the src operands and the
1513  // memory being operated on must match
1514  // for HSAIL atomics - this assumption may
1515  // not apply to all ISAs
1516  typedef typename MemDataType::CType CType;
1517 
1518  Wavefront *w = gpuDynInst->wavefront();
1519  int dst = this->dest.regIndex();
1520  std::vector<uint32_t> regVec;
1521  // virtual->physical VGPR mapping
1522  int physVgpr = w->remap(dst, sizeof(CType), 1);
1523  regVec.push_back(physVgpr);
1524  CType *p1 = &((CType*)gpuDynInst->d_data)[0];
1525 
1526  for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
1527  if (gpuDynInst->exec_mask[i]) {
1528  DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
1529  "$%s%d <- %d global ld done (src = wavefront "
1530  "ld inst)\n", w->computeUnit->cu_id, w->simdId,
1531  w->wfSlotId, i, sizeof(CType) == 4 ? "s" : "d",
1532  dst, *p1);
1533  // write the value into the physical VGPR. This is a
1534  // purely functional operation. No timing is modeled.
1535  w->computeUnit->vrf[w->simdId]->write<CType>(physVgpr, *p1, i);
1536  }
1537  ++p1;
1538  }
1539 
1540  // Schedule the write operation of the load data on the VRF.
1541  // This simply models the timing aspect of the VRF write operation.
1542  // It does not modify the physical VGPR.
1543  int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
1544  vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
1545  sizeof(CType), gpuDynInst->time);
1546 
1547  if (this->isGlobalMem()) {
1548  gpuDynInst->computeUnit()->globalMemoryPipe
1549  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
1550  } else {
1551  assert(this->isLocalMem());
1552  gpuDynInst->computeUnit()->localMemoryPipe
1553  .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
1554  }
1555  }
1556  }
1557 
1558  void execute(GPUDynInstPtr gpuDynInst) override;
1559 
1560  private:
1561  // execAtomic may be called through a continuation
1562  // if the RMW had release semantics. see comment for
1563  // execContinuation in gpu_dyn_inst.hh
1564  void
1565  execAtomic(GPUDynInstPtr gpuDynInst) override
1566  {
1567  gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
1568 
1569  typedef typename MemDataType::CType c0;
1570 
1571  c0 *d = &((c0*) gpuDynInst->d_data)[0];
1572  c0 *e = &((c0*) gpuDynInst->a_data)[0];
1573  c0 *f = &((c0*) gpuDynInst->x_data)[0];
1574 
1575  for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
1576  if (gpuDynInst->exec_mask[i]) {
1577  Addr vaddr = gpuDynInst->addr[i];
1578 
1579  if (this->isLocalMem()) {
1580  Wavefront *wavefront = gpuDynInst->wavefront();
1581  *d = wavefront->ldsChunk->read<c0>(vaddr);
1582 
1583  if (this->isAtomicAdd()) {
1584  wavefront->ldsChunk->write<c0>(vaddr,
1585  wavefront->ldsChunk->read<c0>(vaddr) + (*e));
1586  } else if (this->isAtomicSub()) {
1587  wavefront->ldsChunk->write<c0>(vaddr,
1588  wavefront->ldsChunk->read<c0>(vaddr) - (*e));
1589  } else if (this->isAtomicMax()) {
1590  wavefront->ldsChunk->write<c0>(vaddr,
1591  std::max(wavefront->ldsChunk->read<c0>(vaddr),
1592  (*e)));
1593  } else if (this->isAtomicMin()) {
1594  wavefront->ldsChunk->write<c0>(vaddr,
1595  std::min(wavefront->ldsChunk->read<c0>(vaddr),
1596  (*e)));
1597  } else if (this->isAtomicAnd()) {
1598  wavefront->ldsChunk->write<c0>(vaddr,
1599  wavefront->ldsChunk->read<c0>(vaddr) & (*e));
1600  } else if (this->isAtomicOr()) {
1601  wavefront->ldsChunk->write<c0>(vaddr,
1602  wavefront->ldsChunk->read<c0>(vaddr) | (*e));
1603  } else if (this->isAtomicXor()) {
1604  wavefront->ldsChunk->write<c0>(vaddr,
1605  wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
1606  } else if (this->isAtomicInc()) {
1607  wavefront->ldsChunk->write<c0>(vaddr,
1608  wavefront->ldsChunk->read<c0>(vaddr) + 1);
1609  } else if (this->isAtomicDec()) {
1610  wavefront->ldsChunk->write<c0>(vaddr,
1611  wavefront->ldsChunk->read<c0>(vaddr) - 1);
1612  } else if (this->isAtomicExch()) {
1613  wavefront->ldsChunk->write<c0>(vaddr, (*e));
1614  } else if (this->isAtomicCAS()) {
1615  wavefront->ldsChunk->write<c0>(vaddr,
1616  (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
1617  (*f) : wavefront->ldsChunk->read<c0>(vaddr));
1618  } else {
1619  fatal("Unrecognized or invalid HSAIL atomic op "
1620  "type.\n");
1621  }
1622  } else {
1623  Request *req =
1624  new Request(0, vaddr, sizeof(c0), 0,
1625  gpuDynInst->computeUnit()->masterId(),
1626  0, gpuDynInst->wfDynId,
1627  gpuDynInst->makeAtomicOpFunctor<c0>(e,
1628  f));
1629 
1630  gpuDynInst->setRequestFlags(req);
1631  PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
1632  pkt->dataStatic(d);
1633 
1634  if (gpuDynInst->computeUnit()->shader->
1635  separate_acquire_release &&
1636  (gpuDynInst->isAcquire())) {
1637  // if this atomic has acquire semantics,
1638  // schedule the continuation to perform an
1639  // acquire after the RMW completes
1640  gpuDynInst->execContinuation =
1642 
1643  gpuDynInst->useContinuation = true;
1644  } else {
1645  // the request will be finished when the RMW completes
1646  gpuDynInst->useContinuation = false;
1647  }
1648  // translation is performed in sendRequest()
1649  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
1650  pkt);
1651  }
1652  }
1653 
1654  ++d;
1655  ++e;
1656  ++f;
1657  }
1658 
1659  gpuDynInst->updateStats();
1660  }
1661 
1662  // execAtomicACq will always be called through a continuation.
1663  // see comment for execContinuation in gpu_dyn_inst.hh
1664  void
1665  execAtomicAcq(GPUDynInstPtr gpuDynInst) override
1666  {
1667  // after performing the RMW, check to see if this instruction
1668  // has acquire semantics, and if so, issue an acquire
1669  if (!this->isLocalMem()) {
1670  if (gpuDynInst->computeUnit()->shader->separate_acquire_release
1671  && gpuDynInst->isAcquire()) {
1672  gpuDynInst->statusBitVector = VectorMask(1);
1673 
1674  // the request will be finished when
1675  // the acquire completes
1676  gpuDynInst->useContinuation = false;
1677  // create request
1678  Request *req = new Request(0, 0, 0, 0,
1679  gpuDynInst->computeUnit()->masterId(),
1680  0, gpuDynInst->wfDynId);
1681  req->setFlags(Request::ACQUIRE);
1682  gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
1683  }
1684  }
1685  }
1686  };
1687 
1688  template<typename DataType, typename AddrOperandType, int NumSrcOperands>
1689  GPUStaticInst*
1691  {
1692  const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1693 
1695  return decodeLd<DataType>(ib, obj);
1696  } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
1697  switch (ib->type) {
1698  case Brig::BRIG_TYPE_B8:
1699  return decodeSt<S8,S8>(ib, obj);
1700  case Brig::BRIG_TYPE_B16:
1701  return decodeSt<S16,S16>(ib, obj);
1702  case Brig::BRIG_TYPE_B32:
1703  return decodeSt<S32,S32>(ib, obj);
1704  case Brig::BRIG_TYPE_B64:
1705  return decodeSt<S64,S64>(ib, obj);
1706  default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
1707  }
1708  } else {
1710  return new AtomicInst<DataType, AddrOperandType,
1711  NumSrcOperands, false>(ib, obj, "atomicnoret");
1712  else
1713  return new AtomicInst<DataType, AddrOperandType,
1714  NumSrcOperands, true>(ib, obj, "atomic");
1715  }
1716  }
1717 
1718  template<typename DataType, int NumSrcOperands>
1719  GPUStaticInst*
1721  {
1722  unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
1724 
1725  unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
1726 
1727  BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
1728 
1730  return constructAtomic<DataType, NoRegAddrOperand,
1731  NumSrcOperands>(ib, obj);
1732  } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
1733  // V2/V4 not allowed
1734  switch (tmp.regKind) {
1736  return constructAtomic<DataType, SRegAddrOperand,
1737  NumSrcOperands>(ib, obj);
1739  return constructAtomic<DataType, DRegAddrOperand,
1740  NumSrcOperands>(ib, obj);
1741  default:
1742  fatal("Bad atomic register operand type %d\n", tmp.type);
1743  }
1744  } else {
1745  fatal("Bad atomic register operand kind %d\n", tmp.kind);
1746  }
1747  }
1748 
1749 
1750  template<typename DataType>
1751  GPUStaticInst*
1753  {
1754  const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1755 
1757  return decodeAtomicHelper<DataType, 2>(ib, obj);
1758  } else {
1759  return decodeAtomicHelper<DataType, 1>(ib, obj);
1760  }
1761  }
1762 
1763  template<typename DataType>
1764  GPUStaticInst*
1766  {
1767  const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
1769  return decodeAtomicHelper<DataType, 2>(ib, obj);
1770  } else {
1771  return decodeAtomicHelper<DataType, 1>(ib, obj);
1772  }
1773  }
1774 } // namespace HsailISA
1775 
1776 #endif // __ARCH_HSAIL_INSTS_MEM_HH__
#define DPRINTF(x,...)
Definition: trace.hh:212
Brig::BrigMemoryOrder memoryOrder
Definition: mem.hh:787
Brig::BrigMemoryOrder memoryOrder
Definition: mem.hh:217
Brig::BrigWidth8_t width
Definition: mem.hh:212
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:601
bool isAtomicDec() const
bool isAtomicRet() const
int getNumOperands() override
Definition: mem.hh:1165
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
Definition: mem.hh:1442
const Brig::BrigOperand * getOperand(int offs) const
Definition: brig_object.cc:116
Defines classes encapsulating HSAIL instruction operands.
BrigSegment8_t segment
Definition: Brig.h:1337
bool isDstOperand(int operandIndex) override
Definition: mem.hh:139
bool isAtomicMin() const
BrigDataOffsetOperandList32_t operands
Definition: Brig.h:1323
bool isDstOperand(int operandIndex)
Definition: mem.hh:1424
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:357
int getOperandSize(int operandIndex) override
Definition: mem.hh:375
BrigType16_t type
Definition: Brig.h:1322
bool isAtomicAdd() const
void execSt(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1041
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:500
Bitfield< 7 > i
Definition: miscregs.hh:1378
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1507
void generateDisassembly() override
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:58
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:1093
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:59
#define panic(...)
Definition: misc.hh:153
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:911
uint16_t BrigKind16_t
Definition: Brig.h:102
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:132
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:382
BrigMemoryOrder8_t memoryOrder
Definition: Brig.h:1338
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:345
StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:790
void setFlag(Flags flag)
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:436
void execAtomic(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1565
int getOperandSize(int operandIndex) override
Definition: mem.hh:143
bool isAtomicSub() const
RegAddrOperand< SRegOperand > SRegAddrOperand
Definition: operand.hh:703
virtual void execLdAcq(GPUDynInstPtr gpuDynInst)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
const std::string opcode
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:929
BrigSegment8_t segment
Definition: Brig.h:1389
unsigned int equivClass
Definition: mem.hh:788
int getNumOperands() override
Definition: mem.hh:156
int wfSize() const
The request should be marked with ACQUIRE.
Definition: request.hh:159
bool isAtomicInc() const
int simdId
Definition: wavefront.hh:165
uint8_t equivClass
Definition: Brig.h:1391
bool isSrcOperand(int operandIndex)
Definition: mem.hh:1414
virtual void execAtomicAcq(GPUDynInstPtr gpuDynInst)
AddrOperandType addr
Definition: mem.hh:783
bool isDstOperand(int operandIndex) override
Definition: mem.hh:1137
AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:1465
int numDstRegOperands() override
Definition: mem.hh:337
bool isCondRegister(int operandIndex)
Definition: mem.hh:1394
int wfSlotId
Definition: wavefront.hh:162
DestOperandType::DestOperand dest
Definition: mem.hh:93
unsigned int equivClass
Definition: mem.hh:219
LdsChunk * ldsChunk
Definition: wavefront.hh:260
MemInst(Enums::MemType m_type)
Definition: mem.hh:53
AddrOperandType addr
Definition: mem.hh:94
Brig::BrigMemoryScope memoryScope
Definition: mem.hh:786
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:909
bool isGlobalMem() const
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:1119
const uint8_t * getData(int offs) const
Definition: brig_object.cc:110
Brig::BrigKind16_t kind
Definition: operand.hh:80
Bitfield< 6 > f
Definition: miscregs.hh:1379
virtual void execSt(GPUDynInstPtr gpuDynInst)
BrigKind16_t kind
Definition: Brig.h:1180
bool isLocalMem() const
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:150
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
uint16_t num_src_operands
Definition: mem.hh:958
virtual void execAtomic(GPUDynInstPtr gpuDynInst)
void execLdAcq(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:581
bool isCondRegister(int operandIndex) override
Definition: mem.hh:1106
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1152
bool isDstOperand(int operandIndex) override
Definition: mem.hh:653
AddrOperandType addr
Definition: mem.hh:214
BrigMemoryScope8_t memoryScope
Definition: Brig.h:1339
int numSrcRegOperands() override
Definition: mem.hh:900
DestOperandType::DestOperand dest
Definition: mem.hh:213
BrigMemoryOrder
Definition: Brig.h:505
bool isAtomicMax() const
Brig::BrigAtomicOperation atomicOperation
Definition: mem.hh:1221
bool isCondRegister(int operandIndex) override
Definition: mem.hh:351
AddrOperandBase * getAddressOperand()
Definition: mem.hh:86
GPUStaticInst * decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:728
LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:172
Bitfield< 23 > k
Definition: dt_constants.hh:80
Brig::BrigOpcode opcode
Definition: mem.hh:1223
Bitfield< 9 > d
Definition: miscregs.hh:1375
int getMemOperandSize()
Definition: mem.hh:85
GPUStaticInst * decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1752
ComputeUnit * computeUnit
Definition: wavefront.hh:167
Brig::BrigSegment segment
Definition: mem.hh:1219
BrigMemoryScope
Definition: Brig.h:521
#define fatal(...)
Definition: misc.hh:163
Brig::BrigSegment segment
Definition: mem.hh:785
Brig::BrigSegment segment
Definition: mem.hh:216
AddrOperandBase * addr_operand
Definition: mem.hh:82
int getOperandSize(int operandIndex)
Definition: mem.hh:1431
Brig::BrigMemoryScope memoryScope
Definition: mem.hh:1222
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1005
Bitfield< 14, 13 > at
void execAtomicAcq(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1665
LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:221
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:923
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:73
bool isCondRegister(int operandIndex) override
Definition: mem.hh:120
BrigAtomicOperation8_t atomicOperation
Definition: Brig.h:1340
Bitfield< 0 > w
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
OperandType::DestOperand dest
Definition: mem.hh:1215
int numDstRegOperands() override
Definition: mem.hh:112
bool isCondRegister(int operandIndex) override
Definition: mem.hh:917
Brig::BrigType type
Definition: operand.hh:81
bool isVectorRegister(int operandIndex) override
Definition: mem.hh:114
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
void initiateAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1476
bool isDstOperand(int operandIndex) override
Definition: mem.hh:934
int numSrcRegOperands() override
Definition: mem.hh:335
AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:1225
bool isAtomicXor() const
BrigSegment
Definition: Brig.h:925
bool isCondRegister(int operandIndex) override
Definition: mem.hh:617
RegAddrOperand< DRegOperand > DRegAddrOperand
Definition: operand.hh:704
bool isScalarRegister(int operandIndex)
Definition: mem.hh:1404
uint16_t num_dest_operands
Definition: mem.hh:398
Bitfield< 9 > e
Definition: miscregs.hh:1376
AddrOperandType addr
Definition: mem.hh:1217
unsigned getOperandPtr(int offs, int index) const
Definition: brig_object.cc:122
void completeAcc(GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:1034
bool isAtomicCAS() const
type
Definition: misc.hh:728
int getNumOperands() override
Definition: mem.hh:904
int numSrcRegOperands() override
Definition: mem.hh:110
int numDstRegOperands() override
Definition: mem.hh:899
void init_addr(AddrOperandBase *_addr_operand)
Definition: mem.hh:75
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:1132
SrcOperandType::SrcOperand src
Definition: mem.hh:782
std::vector< VectorRegisterFile * > vrf
BrigDataOffsetOperandList32_t elements
Definition: Brig.h:1523
The request should be marked with RELEASE.
Definition: request.hh:161
bool isAtomicExch() const
BrigAtomicOperation
Definition: Brig.h:270
GPUStaticInst * constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1690
GPUStaticInst * decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1720
Brig::BrigRegisterKind regKind
Definition: operand.hh:82
bool isAtomicAnd() const
Bitfield< 4 > width
Definition: miscregs.hh:1383
bool isAtomicOr() const
int getOperandSize(int operandIndex) override
Definition: mem.hh:935
GPUStaticInst * decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1765
LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:402
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:86
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:941
virtual void execute(GPUDynInstPtr gpuDynInst)=0
BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj)
Definition: operand.cc:213
Bitfield< 4 > op
Definition: types.hh:80
int getNumOperands() override
Definition: mem.hh:690
GPUStaticInst * decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:702
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:363
uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0)
Definition: wavefront.cc:282
Brig::BrigMemoryOrder memoryOrder
Definition: mem.hh:1220
int getOperandSize(int operandIndex) override
Definition: mem.hh:661
LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode)
Definition: mem.hh:96
GPUStaticInst * decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:185
int getNumOperands() override
Definition: mem.hh:338
GPUStaticInst * decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
Definition: mem.hh:1177
bool isDstOperand(int operandIndex) override
Definition: mem.hh:370
Brig::BrigMemoryScope memoryScope
Definition: mem.hh:218
void setFlags(Flags flags)
Note that unlike other accessors, this function sets specific flags (ORs them in); it does not assign...
Definition: request.hh:595
bool isSrcOperand(int operandIndex) override
Definition: mem.hh:645
BrigWidth8_t width
Definition: Brig.h:1392
int getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
Definition: mem.hh:676
bool isVectorRegister(int operandIndex)
Definition: mem.hh:1384
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:126
uint8_t BrigWidth8_t
Definition: Brig.h:146
bool isScalarRegister(int operandIndex) override
Definition: mem.hh:631
BrigOpcode
Definition: Brig.h:538
int getOperandSize(int operandIndex) override
Definition: mem.hh:1138
StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, const char *_opcode, int srcIdx)
Definition: mem.hh:961
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:102
BrigOpcode16_t opcode
Definition: Brig.h:1321

Generated on Fri Jun 9 2017 13:03:38 for gem5 by doxygen 1.8.6