gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
lds_state.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: John Kalamatianos, Joe Gross
34  */
35 
36 #ifndef __LDS_STATE_HH__
37 #define __LDS_STATE_HH__
38 
39 #include <array>
40 #include <queue>
41 #include <string>
42 #include <unordered_map>
43 #include <utility>
44 #include <vector>
45 
46 #include "enums/MemType.hh"
47 #include "gpu-compute/misc.hh"
48 #include "mem/mem_object.hh"
49 #include "mem/port.hh"
50 #include "params/LdsState.hh"
51 
52 class ComputeUnit;
53 
58 class LdsChunk
59 {
60  public:
61  LdsChunk(const uint32_t x_size):
62  chunk(x_size)
63  {
64  }
65 
66  LdsChunk() {}
67 
71  template<class T>
72  T
73  read(const uint32_t index)
74  {
75  fatal_if(!chunk.size(), "cannot read from an LDS chunk of size 0");
76  fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
77  T *p0 = (T *) (&(chunk.at(index)));
78  return *p0;
79  }
80 
84  template<class T>
85  void
86  write(const uint32_t index, const T value)
87  {
88  fatal_if(!chunk.size(), "cannot write to an LDS chunk of size 0");
89  fatal_if(index >= chunk.size(), "out-of-bounds access to an LDS chunk");
90  T *p0 = (T *) (&(chunk.at(index)));
91  *p0 = value;
92  }
93 
98  size() const
99  {
100  return chunk.size();
101  }
102 
103  protected:
104  // the actual data store for this slice of the LDS
106 };
107 
108 // Local Data Share (LDS) State per Wavefront (contents of the LDS region
109 // allocated to the WorkGroup of this Wavefront)
110 class LdsState: public MemObject
111 {
112  protected:
113 
117  class TickEvent: public Event
118  {
119  protected:
120 
121  LdsState *ldsState = nullptr;
122 
124 
125  public:
126 
127  TickEvent(LdsState *_ldsState) :
128  ldsState(_ldsState)
129  {
130  }
131 
132  virtual void
133  process();
134 
135  void
137  {
138  mainEventQueue[0]->schedule(this, when);
139  }
140 
141  void
143  {
144  mainEventQueue[0]->deschedule(this);
145  }
146  };
147 
151  class CuSidePort: public SlavePort
152  {
153  public:
154  CuSidePort(const std::string &_name, LdsState *_ownerLds) :
155  SlavePort(_name, _ownerLds), ownerLds(_ownerLds)
156  {
157  }
158 
159  protected:
161 
162  virtual bool
164 
165  virtual Tick
167  {
168  return 0;
169  }
170 
171  virtual void
173 
174  virtual void
176  {
177  }
178 
179  virtual void
180  recvRetry();
181 
182  virtual void
183  recvRespRetry();
184 
185  virtual AddrRangeList
187  {
188  AddrRangeList ranges;
189  ranges.push_back(ownerLds->getAddrRange());
190  return ranges;
191  }
192 
193  template<typename T>
194  void
195  loadData(PacketPtr packet);
196 
197  template<typename T>
198  void
199  storeData(PacketPtr packet);
200 
201  template<typename T>
202  void
203  atomicOperation(PacketPtr packet);
204  };
205 
206  protected:
207 
208  // the lds reference counter
209  // The key is the workgroup ID and dispatch ID
210  // The value is the number of wavefronts that reference this LDS, as
211  // wavefronts are launched, the counter goes up for that workgroup and when
212  // they return it decreases, once it reaches 0 then this chunk of the LDS is
213  // returned to the available pool. However,it is deallocated on the 1->0
214  // transition, not whenever the counter is 0 as it always starts with 0 when
215  // the workgroup asks for space
216  std::unordered_map<uint32_t,
217  std::unordered_map<uint32_t, int32_t>> refCounter;
218 
219  // the map that allows workgroups to access their own chunk of the LDS
220  std::unordered_map<uint32_t,
221  std::unordered_map<uint32_t, LdsChunk>> chunkMap;
222 
223  // an event to allow the LDS to wake up at a specified time
225 
226  // the queue of packets that are going back to the CU after a
227  // read/write/atomic op
228  // TODO need to make this have a maximum size to create flow control
229  std::queue<std::pair<Tick, PacketPtr>> returnQueue;
230 
231  // whether or not there are pending responses
232  bool retryResp = false;
233 
234  bool
235  process();
236 
238  getDynInstr(PacketPtr packet);
239 
240  bool
241  processPacket(PacketPtr packet);
242 
243  unsigned
244  countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
245 
246  unsigned
248  unsigned *numBankAccesses);
249 
250  public:
251  typedef LdsStateParams Params;
252 
253  LdsState(const Params *params);
254 
255  // prevent copy construction
256  LdsState(const LdsState&) = delete;
257 
259  {
260  parent = nullptr;
261  }
262 
263  const Params *
264  params() const
265  {
266  return dynamic_cast<const Params *>(_params);
267  }
268 
269  bool
270  isRetryResp() const
271  {
272  return retryResp;
273  }
274 
275  void
276  setRetryResp(const bool value)
277  {
278  retryResp = value;
279  }
280 
281  // prevent assignment
282  LdsState &
283  operator=(const LdsState &) = delete;
284 
288  int
289  increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
290  {
291  int refCount = getRefCounter(dispatchId, wgId);
292  fatal_if(refCount < 0,
293  "reference count should not be below zero");
294  return ++refCounter[dispatchId][wgId];
295  }
296 
301  int
302  decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
303  {
304  int refCount = getRefCounter(dispatchId, wgId);
305 
306  fatal_if(refCount <= 0,
307  "reference count should not be below zero or at zero to"
308  "decrement");
309 
310  refCounter[dispatchId][wgId]--;
311 
312  if (refCounter[dispatchId][wgId] == 0) {
313  releaseSpace(dispatchId, wgId);
314  return 0;
315  } else {
316  return refCounter[dispatchId][wgId];
317  }
318  }
319 
323  int
324  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
325  {
326  auto dispatchIter = chunkMap.find(dispatchId);
327  fatal_if(dispatchIter == chunkMap.end(),
328  "could not locate this dispatch id [%d]", dispatchId);
329 
330  auto workgroup = dispatchIter->second.find(wgId);
331  fatal_if(workgroup == dispatchIter->second.end(),
332  "could not find this workgroup id within this dispatch id"
333  " did[%d] wgid[%d]", dispatchId, wgId);
334 
335  auto refCountIter = refCounter.find(dispatchId);
336  if (refCountIter == refCounter.end()) {
337  fatal("could not locate this dispatch id [%d]", dispatchId);
338  } else {
339  auto workgroup = refCountIter->second.find(wgId);
340  if (workgroup == refCountIter->second.end()) {
341  fatal("could not find this workgroup id within this dispatch id"
342  " did[%d] wgid[%d]", dispatchId, wgId);
343  } else {
344  return refCounter.at(dispatchId).at(wgId);
345  }
346  }
347 
348  fatal("should not reach this point");
349  return 0;
350  }
351 
356  LdsChunk *
357  reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
358  const uint32_t size)
359  {
360  if (chunkMap.find(dispatchId) != chunkMap.end()) {
361  fatal_if(
362  chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
363  "duplicate workgroup ID asking for space in the LDS "
364  "did[%d] wgid[%d]", dispatchId, wgId);
365  }
366 
368  "request would ask for more space than is available");
369 
370  bytesAllocated += size;
371 
372  chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
373  // make an entry for this workgroup
374  refCounter[dispatchId][wgId] = 0;
375 
376  return &chunkMap[dispatchId][wgId];
377  }
378 
379  bool
381 
382  Tick
384  {
385  // TODO set to max(lastCommand+1, curTick())
386  return returnQueue.empty() ? curTick() : returnQueue.back().first;
387  }
388 
389  void
390  setParent(ComputeUnit *x_parent);
391 
392  // accessors
393  ComputeUnit *
394  getParent() const
395  {
396  return parent;
397  }
398 
399  std::string
401  {
402  return _name;
403  }
404 
405  int
406  getBanks() const
407  {
408  return banks;
409  }
410 
411  ComputeUnit *
413  {
414  return parent;
415  }
416 
417  int
419  {
420  return bankConflictPenalty;
421  }
422 
426  std::size_t
427  ldsSize(const uint32_t x_wgId)
428  {
429  return chunkMap[x_wgId].size();
430  }
431 
432  AddrRange
433  getAddrRange() const
434  {
435  return range;
436  }
437 
438  virtual BaseSlavePort &
439  getSlavePort(const std::string& if_name, PortID idx)
440  {
441  if (if_name == "cuPort") {
442  // TODO need to set name dynamically at this point?
443  return cuPort;
444  } else {
445  fatal("cannot resolve the port name " + if_name);
446  }
447  }
448 
452  bool
453  canReserve(uint32_t x_size) const
454  {
455  return bytesAllocated + x_size <= maximumSize;
456  }
457 
458  private:
462  bool
463  releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
464  {
465  auto dispatchIter = chunkMap.find(x_dispatchId);
466 
467  if (dispatchIter == chunkMap.end()) {
468  fatal("dispatch id not found [%d]", x_dispatchId);
469  } else {
470  auto workgroupIter = dispatchIter->second.find(x_wgId);
471  if (workgroupIter == dispatchIter->second.end()) {
472  fatal("workgroup id [%d] not found in dispatch id [%d]",
473  x_wgId, x_dispatchId);
474  }
475  }
476 
477  fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
478  "releasing more space than was allocated");
479 
480  bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
481  chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
482  return true;
483  }
484 
485  // the port that connects this LDS to its owner CU
487 
488  ComputeUnit* parent = nullptr;
489 
490  std::string _name;
491 
492  // the number of bytes currently reserved by all workgroups
493  int bytesAllocated = 0;
494 
495  // the size of the LDS, the most bytes available
497 
498  // Address range of this memory
500 
501  // the penalty, in cycles, for each LDS bank conflict
503 
504  // the number of banks in the LDS underlying data store
505  int banks = 0;
506 };
507 
508 #endif // __LDS_STATE_HH__
void atomicOperation(PacketPtr packet)
LdsState * ownerLds
Definition: lds_state.hh:160
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:185
Bitfield< 30, 0 > index
an event to allow event-driven execution
Definition: lds_state.hh:117
std::string _name
Definition: lds_state.hh:490
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:329
std::string getName()
Definition: lds_state.hh:400
STL pair class.
Definition: stl.hh:61
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:229
int bankConflictPenalty
Definition: lds_state.hh:502
bool isRetryResp() const
Definition: lds_state.hh:270
std::vector< uint8_t > chunk
Definition: lds_state.hh:105
TickEvent(LdsState *_ldsState)
Definition: lds_state.hh:127
MemObject declaration.
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:228
AddrRange range
Definition: lds_state.hh:499
Port Object Declaration.
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:246
LdsState(const Params *params)
the default constructor that works with SWIG
Definition: lds_state.cc:49
vector< EventQueue * > mainEventQueue
Array for main event queues.
Definition: eventq.cc:59
LdsStateParams Params
Definition: lds_state.hh:251
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:197
A SlavePort is a specialisation of a port.
Definition: port.hh:331
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:58
ComputeUnit * parent
Definition: lds_state.hh:488
AddrRange getAddrRange() const
Definition: lds_state.hh:433
A BaseSlavePort is a protocol-agnostic slave port, responsible only for the structural connection to ...
Definition: port.hh:139
STL vector class.
Definition: stl.hh:40
CuSidePort(const std::string &_name, LdsState *_ownerLds)
Definition: lds_state.hh:154
void schedule(Tick when)
Definition: lds_state.hh:136
The AddrRange class encapsulates an address range, and supports a number of tests to check if two ran...
Definition: addr_range.hh:72
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
void loadData(PacketPtr packet)
virtual void recvRangeChange()
Definition: lds_state.hh:175
void setRetryResp(const bool value)
Definition: lds_state.hh:276
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:267
LdsState * ldsState
Definition: lds_state.hh:121
Tick curTick()
The current simulated tick.
Definition: core.hh:47
Tick earliestReturnTime() const
Definition: lds_state.hh:383
int getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
return the current reference count for this workgroup id
Definition: lds_state.hh:324
std::size_t ldsSize(const uint32_t x_wgId)
get the allocated size for this workgroup
Definition: lds_state.hh:427
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:397
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: lds_state.hh:186
uint64_t Tick
Tick count type.
Definition: types.hh:63
LdsState & operator=(const LdsState &)=delete
bool canReserve(uint32_t x_size) const
can this much space be reserved for a workgroup?
Definition: lds_state.hh:453
#define fatal(...)
Definition: misc.hh:163
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:96
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:255
int getBanks() const
Definition: lds_state.hh:406
std::unordered_map< uint32_t, std::unordered_map< uint32_t, int32_t > > refCounter
Definition: lds_state.hh:217
int increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
use the dynamic wave id to create or just increase the reference count
Definition: lds_state.hh:289
int decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
decrease the reference count after making sure it is in the list give back this chunk if the ref coun...
Definition: lds_state.hh:302
T read(const uint32_t index)
a read operation
Definition: lds_state.hh:73
int getBankConflictPenalty() const
Definition: lds_state.hh:418
virtual Tick recvAtomic(PacketPtr pkt)
Receive an atomic request packet from the master port.
Definition: lds_state.hh:166
LdsChunk(const uint32_t x_size)
Definition: lds_state.hh:61
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
ComputeUnit * getComputeUnit() const
Definition: lds_state.hh:412
TickEvent tickEvent
Definition: lds_state.hh:224
static SimObject * find(const char *name)
Find the SimObject with the given name and return a pointer to it.
Definition: sim_object.cc:179
int size()
Definition: pagetable.hh:146
std::unordered_map< uint32_t, std::unordered_map< uint32_t, LdsChunk > > chunkMap
Definition: lds_state.hh:221
void storeData(PacketPtr packet)
bool retryResp
Definition: lds_state.hh:232
virtual BaseSlavePort & getSlavePort(const std::string &if_name, PortID idx)
Get a slave port with a given name and index.
Definition: lds_state.hh:439
Definition: eventq.hh:185
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
Definition: mem_object.hh:60
LdsChunk * reserveSpace(const uint32_t dispatchId, const uint32_t wgId, const uint32_t size)
assign a parent and request this amount of space be set aside for this wgid
Definition: lds_state.hh:357
int banks
Definition: lds_state.hh:505
const SimObjectParams * _params
Cached copy of the object parameters.
Definition: sim_object.hh:107
const Params * params() const
Definition: lds_state.hh:264
void write(const uint32_t index, const T value)
a write operation
Definition: lds_state.hh:86
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:181
int maximumSize
Definition: lds_state.hh:496
bool releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
give back the space
Definition: lds_state.hh:463
int bytesAllocated
Definition: lds_state.hh:493
CuSidePort cuPort
Definition: lds_state.hh:486
fatal_if(p->js_features.size() > 16,"Too many job slot feature registers specified (%i)\n", p->js_features.size())
ComputeUnit * getParent() const
Definition: lds_state.hh:394
LdsChunk()
Definition: lds_state.hh:66
bool process()
look for packets to return at this time
Definition: lds_state.cc:276
std::vector< uint8_t >::size_type size() const
get the size of this chunk
Definition: lds_state.hh:98
CuSidePort is the LDS Port closer to the CU side.
Definition: lds_state.hh:151
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:81
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:179

Generated on Fri Jun 9 2017 13:03:48 for gem5 by doxygen 1.8.6