gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
main.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Steve Reinhardt
34  */
35 
36 #include "arch/hsail/insts/decl.hh"
37 #include "debug/GPUExec.hh"
40 
41 namespace HsailISA
42 {
43  template<> const char *B1::label = "b1";
44  template<> const char *B8::label = "b8";
45  template<> const char *B16::label = "b16";
46  template<> const char *B32::label = "b32";
47  template<> const char *B64::label = "b64";
48 
49  template<> const char *S8::label = "s8";
50  template<> const char *S16::label = "s16";
51  template<> const char *S32::label = "s32";
52  template<> const char *S64::label = "s64";
53 
54  template<> const char *U8::label = "u8";
55  template<> const char *U16::label = "u16";
56  template<> const char *U32::label = "u32";
57  template<> const char *U64::label = "u64";
58 
59  template<> const char *F32::label = "f32";
60  template<> const char *F64::label = "f64";
61 
62  const char*
64  {
65  using namespace Brig;
66 
67  switch (cmpOp) {
68  case BRIG_COMPARE_EQ:
69  return "eq";
70  case BRIG_COMPARE_NE:
71  return "ne";
72  case BRIG_COMPARE_LT:
73  return "lt";
74  case BRIG_COMPARE_LE:
75  return "le";
76  case BRIG_COMPARE_GT:
77  return "gt";
78  case BRIG_COMPARE_GE:
79  return "ge";
80  case BRIG_COMPARE_EQU:
81  return "equ";
82  case BRIG_COMPARE_NEU:
83  return "neu";
84  case BRIG_COMPARE_LTU:
85  return "ltu";
86  case BRIG_COMPARE_LEU:
87  return "leu";
88  case BRIG_COMPARE_GTU:
89  return "gtu";
90  case BRIG_COMPARE_GEU:
91  return "geu";
92  case BRIG_COMPARE_NUM:
93  return "num";
94  case BRIG_COMPARE_NAN:
95  return "nan";
96  case BRIG_COMPARE_SEQ:
97  return "seq";
98  case BRIG_COMPARE_SNE:
99  return "sne";
100  case BRIG_COMPARE_SLT:
101  return "slt";
102  case BRIG_COMPARE_SLE:
103  return "sle";
104  case BRIG_COMPARE_SGT:
105  return "sgt";
106  case BRIG_COMPARE_SGE:
107  return "sge";
108  case BRIG_COMPARE_SGEU:
109  return "sgeu";
110  case BRIG_COMPARE_SEQU:
111  return "sequ";
112  case BRIG_COMPARE_SNEU:
113  return "sneu";
114  case BRIG_COMPARE_SLTU:
115  return "sltu";
116  case BRIG_COMPARE_SLEU:
117  return "sleu";
118  case BRIG_COMPARE_SNUM:
119  return "snum";
120  case BRIG_COMPARE_SNAN:
121  return "snan";
122  case BRIG_COMPARE_SGTU:
123  return "sgtu";
124  default:
125  return "unknown";
126  }
127  }
128 
129  void
131  {
132  Wavefront *w = gpuDynInst->wavefront();
133 
134  const VectorMask &mask = w->getPred();
135 
136  // mask off completed work-items
137  for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
138  if (mask[lane]) {
139  w->initMask[lane] = 0;
140  }
141 
142  }
143 
144  // delete extra instructions fetched for completed work-items
145  w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
146  w->instructionBuffer.end());
147  if (w->pendingFetch) {
148  w->dropFetch = true;
149  }
150 
151  // if all work-items have completed, then wave-front is done
152  if (w->initMask.none()) {
154 
155  int32_t refCount = w->computeUnit->getLds().
156  decreaseRefCounter(w->dispatchId, w->wgId);
157 
158  DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
159  w->computeUnit->cu_id, w->wgId, refCount);
160 
161  // free the vector registers of the completed wavefront
164 
165  assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
166 
167  uint32_t endIndex = (w->startVgprIndex +
168  w->reservedVectorRegs - 1) %
169  w->computeUnit->vrf[w->simdId]->numRegs();
170 
171  w->computeUnit->vrf[w->simdId]->manager->
172  freeRegion(w->startVgprIndex, endIndex);
173 
174  w->reservedVectorRegs = 0;
175  w->startVgprIndex = 0;
177 
178  DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
179  w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
180 
181  if (!refCount) {
182  setFlag(SystemScope);
183  setFlag(Release);
184  setFlag(GlobalSegment);
185  // Notify Memory System of Kernel Completion
186  // Kernel End = isKernel + isRelease
188  GPUDynInstPtr local_mempacket = gpuDynInst;
189  local_mempacket->useContinuation = false;
190  local_mempacket->simdId = w->simdId;
191  local_mempacket->wfSlotId = w->wfSlotId;
192  local_mempacket->wfDynId = w->wfDynId;
193  w->computeUnit->injectGlobalMemFence(local_mempacket, true);
194  } else {
196  }
197  }
198  }
199 
200  void
202  {
203  Wavefront *w = gpuDynInst->wavefront();
204 
205  assert(w->barrierCnt == w->oldBarrierCnt);
206  w->barrierCnt = w->oldBarrierCnt + 1;
207  w->stalledAtBarrier = true;
208  }
209 } // namespace HsailISA
#define DPRINTF(x,...)
Definition: trace.hh:212
VectorMask getPred()
Definition: wavefront.hh:338
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
void execute(GPUDynInstPtr gpuDynInst)
Definition: main.cc:201
uint32_t barrierCnt
Definition: wavefront.hh:157
void setFlag(Flags flag)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
VectorMask initMask
Definition: wavefront.hh:250
GpuDispatcher * dispatcher
Definition: shader.hh:165
int wfSize() const
int simdId
Definition: wavefront.hh:165
bool dropFetch
Definition: wavefront.hh:172
BrigCompareOperation
Definition: Brig.h:303
uint32_t dispatchId
Definition: wavefront.hh:208
int wfSlotId
Definition: wavefront.hh:162
bool stalledAtBarrier
Definition: wavefront.hh:256
static const char * label
Definition: decl.hh:73
uint64_t wfDynId
Definition: wavefront.hh:282
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:169
void execute(GPUDynInstPtr gpuDynInst)
Definition: main.cc:130
ComputeUnit * computeUnit
Definition: wavefront.hh:167
uint32_t wgId
Definition: wavefront.hh:199
const char * cmpOpToString(Brig::BrigCompareOperation cmpOp)
Definition: main.cc:63
Bitfield< 0 > w
Stats::Scalar completedWfs
Shader * shader
uint32_t oldBarrierCnt
Definition: wavefront.hh:156
LdsState & getLds() const
bool pendingFetch
Definition: wavefront.hh:171
int reservedVectorRegs
Definition: wavefront.hh:230
uint32_t startVgprIndex
Definition: wavefront.hh:233
void scheduleDispatch()
Definition: dispatcher.cc:344
std::vector< VectorRegisterFile * > vrf
Bitfield< 3, 0 > mask
Definition: types.hh:64
status_e status
Definition: wavefront.hh:160
std::vector< int > vectorRegsReserved

Generated on Fri Jun 9 2017 13:03:39 for gem5 by doxygen 1.8.6