gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
lds_state.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: John Kalamatianos, Joe Gross
34  */
35 
36 #include "gpu-compute/lds_state.hh"
37 
38 #include <array>
39 #include <cstdio>
40 #include <cstdlib>
41 
44 #include "gpu-compute/shader.hh"
45 
49 LdsState::LdsState(const Params *params) :
50  MemObject(params),
51  tickEvent(this),
52  cuPort(name() + ".port", this),
53  maximumSize(params->size),
54  range(params->range),
55  bankConflictPenalty(params->bankConflictPenalty),
56  banks(params->banks)
57 {
58  fatal_if(params->banks <= 0,
59  "Number of LDS banks should be positive number");
60  fatal_if((params->banks & (params->banks - 1)) != 0,
61  "Number of LDS banks should be a power of 2");
62  fatal_if(params->size <= 0,
63  "cannot allocate an LDS with a size less than 1");
64  fatal_if(params->size % 2,
65  "the LDS should be an even number");
66 }
67 
71 LdsState *
72 LdsStateParams::create()
73 {
74  return new LdsState(this);
75 }
76 
80 void
82 {
83  // check that this gets assigned to the same thing each time
84  fatal_if(!x_parent, "x_parent should not be nullptr");
85  fatal_if(x_parent == parent,
86  "should not be setting the parent twice");
87 
88  parent = x_parent;
89  _name = x_parent->name() + ".LdsState";
90 }
91 
95 unsigned
96 LdsState::countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
97 {
98  Packet::SenderState *baseSenderState = packet->senderState;
99  while (baseSenderState->predecessor) {
100  baseSenderState = baseSenderState->predecessor;
101  }
102  const ComputeUnit::LDSPort::SenderState *senderState =
103  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(baseSenderState);
104 
105  fatal_if(!senderState,
106  "did not get the right sort of sender state");
107 
108  GPUDynInstPtr gpuDynInst = senderState->getMemInst();
109 
110  return countBankConflicts(gpuDynInst, bankAccesses);
111 }
112 
113 // Count the total number of bank conflicts for the local memory packet
114 unsigned
116  unsigned *numBankAccesses)
117 {
118  int bank_conflicts = 0;
119  std::vector<int> bank;
120  // the number of LDS banks being touched by the memory instruction
121  int numBanks = std::min(parent->wfSize(), banks);
122  // if the wavefront size is larger than the number of LDS banks, we
123  // need to iterate over all work items to calculate the total
124  // number of bank conflicts
125  int groups = (parent->wfSize() > numBanks) ?
126  (parent->wfSize() / numBanks) : 1;
127  for (int i = 0; i < groups; i++) {
128  // Address Array holding all the work item addresses of an instruction
129  std::vector<Addr> addr_array;
130  addr_array.resize(numBanks, 0);
131  bank.clear();
132  bank.resize(banks, 0);
133  int max_bank = 0;
134 
135  // populate the address array for all active work items
136  for (int j = 0; j < numBanks; j++) {
137  if (gpuDynInst->exec_mask[(i*numBanks)+j]) {
138  addr_array[j] = gpuDynInst->addr[(i*numBanks)+j];
139  } else {
140  addr_array[j] = std::numeric_limits<Addr>::max();
141  }
142  }
143 
144  if (gpuDynInst->isLoad() || gpuDynInst->isStore()) {
145  // mask identical addresses
146  for (int j = 0; j < numBanks; ++j) {
147  for (int j0 = 0; j0 < j; j0++) {
148  if (addr_array[j] != std::numeric_limits<Addr>::max()
149  && addr_array[j] == addr_array[j0]) {
150  addr_array[j] = std::numeric_limits<Addr>::max();
151  }
152  }
153  }
154  }
155  // calculate bank conflicts
156  for (int j = 0; j < numBanks; ++j) {
157  if (addr_array[j] != std::numeric_limits<Addr>::max()) {
158  int bankId = addr_array[j] % banks;
159  bank[bankId]++;
160  max_bank = std::max(max_bank, bank[bankId]);
161  // Count the number of LDS banks accessed.
162  // Since we have masked identical addresses all remaining
163  // accesses will need to be serialized if they access
164  // the same bank (bank conflict).
165  (*numBankAccesses)++;
166  }
167  }
168  bank_conflicts += max_bank;
169  }
170  panic_if(bank_conflicts > parent->wfSize(),
171  "Max bank conflicts should match num of work items per instr");
172  return bank_conflicts;
173 }
174 
178 bool
180 {
181  return ownerLds->processPacket(packet);
182 }
183 
186 {
188  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
189  packet->senderState);
190  return ss->getMemInst();
191 }
192 
196 bool
198 {
199  unsigned bankAccesses = 0;
200  // the number of conflicts this packet will have when accessing the LDS
201  unsigned bankConflicts = countBankConflicts(packet, &bankAccesses);
202  // count the total number of physical LDS bank accessed
203  parent->ldsBankAccesses += bankAccesses;
204  // count the LDS bank conflicts. A number set to 1 indicates one
205  // access per bank maximum so there are no bank conflicts
206  parent->ldsBankConflictDist.sample(bankConflicts-1);
207 
208  GPUDynInstPtr dynInst = getDynInstr(packet);
209  // account for the LDS bank conflict overhead
210  int busLength = (dynInst->isLoad()) ? parent->loadBusLength() :
211  (dynInst->isStore()) ? parent->storeBusLength() :
213  // delay for accessing the LDS
214  Tick processingTime =
215  parent->shader->ticks(bankConflicts * bankConflictPenalty) +
216  parent->shader->ticks(busLength);
217  // choose (delay + last packet in queue) or (now + delay) as the time to
218  // return this
219  Tick doneAt = earliestReturnTime() + processingTime;
220  // then store it for processing
221  return returnQueuePush(std::make_pair(doneAt, packet));
222 }
223 
227 bool
229 {
230  // TODO add time limits (e.g. one packet per cycle) and queue size limits
231  // and implement flow control
232  returnQueue.push(thePair);
233 
234  // if there is no set wakeup time, look through the queue
235  if (!tickEvent.scheduled()) {
236  process();
237  }
238 
239  return true;
240 }
241 
245 void
247 {
248  fatal("not implemented");
249 }
250 
254 void
256 {
257  // TODO verify that this is the right way to do this
258  assert(ownerLds->isRetryResp());
259  ownerLds->setRetryResp(false);
260  ownerLds->process();
261 }
262 
266 void
268 {
269  fatal("not implemented");
270 }
271 
275 bool
277 {
278  Tick now = clockEdge();
279 
280  // send back completed packets
281  while (!returnQueue.empty() && returnQueue.front().first <= now) {
282  PacketPtr packet = returnQueue.front().second;
283 
285  dynamic_cast<ComputeUnit::LDSPort::SenderState *>(
286  packet->senderState);
287 
288  GPUDynInstPtr gpuDynInst = ss->getMemInst();
289 
290  gpuDynInst->initiateAcc(gpuDynInst);
291 
292  packet->makeTimingResponse();
293 
294  returnQueue.pop();
295 
296  bool success = cuPort.sendTimingResp(packet);
297 
298  if (!success) {
299  retryResp = true;
300  panic("have not handled timing responses being NACK'd when sent"
301  "back");
302  }
303  }
304 
305  // determine the next wakeup time
306  if (!returnQueue.empty()) {
307 
308  Tick next = returnQueue.front().first;
309 
310  if (tickEvent.scheduled()) {
311 
312  if (next < tickEvent.when()) {
313 
315  tickEvent.schedule(next);
316  }
317  } else {
318  tickEvent.schedule(next);
319  }
320  }
321 
322  return true;
323 }
324 
328 void
330 {
331  ldsState->process();
332 }
Tick ticks(int numCycles) const
Definition: shader.hh:91
LdsState * ownerLds
Definition: lds_state.hh:160
GPUDynInstPtr getDynInstr(PacketPtr packet)
Definition: lds_state.cc:185
std::string _name
Definition: lds_state.hh:490
virtual void process()
wake up at this time and perform specified actions
Definition: lds_state.cc:329
const std::string & name()
Definition: trace.cc:49
Bitfield< 7 > i
Definition: miscregs.hh:1378
STL pair class.
Definition: stl.hh:61
#define panic(...)
Definition: misc.hh:153
std::queue< std::pair< Tick, PacketPtr > > returnQueue
Definition: lds_state.hh:229
int bankConflictPenalty
Definition: lds_state.hh:502
void makeTimingResponse()
Definition: packet.hh:863
int storeBusLength()
bool returnQueuePush(std::pair< Tick, PacketPtr > thePair)
add this to the queue of packets to be returned
Definition: lds_state.cc:228
panic_if(!root,"Invalid expression\n")
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:381
int wfSize() const
virtual void recvFunctional(PacketPtr pkt)
receive a packet in functional mode
Definition: lds_state.cc:246
LdsState(const Params *params)
the default constructor that works with SWIG
Definition: lds_state.cc:49
LdsStateParams Params
Definition: lds_state.hh:251
bool processPacket(PacketPtr packet)
process an incoming packet, add it to the return queue
Definition: lds_state.cc:197
ComputeUnit * parent
Definition: lds_state.hh:488
GPUDynInstPtr getMemInst() const
Stats::Distribution ldsBankConflictDist
void schedule(Tick when)
Definition: lds_state.hh:136
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
Definition: port.cc:251
virtual void recvRetry()
receive a retry
Definition: lds_state.cc:267
Tick earliestReturnTime() const
Definition: lds_state.hh:383
SenderState * predecessor
Definition: packet.hh:379
Tick when() const
Get the time that the event is scheduled.
Definition: eventq.hh:397
uint64_t Tick
Tick count type.
Definition: types.hh:63
#define fatal(...)
Definition: misc.hh:163
unsigned countBankConflicts(PacketPtr packet, unsigned *bankAccesses)
derive the gpu mem packet from the packet and then count the bank conflicts
Definition: lds_state.cc:96
Bitfield< 21 > ss
Definition: miscregs.hh:1371
virtual void recvRespRetry()
receive a retry for a response
Definition: lds_state.cc:255
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a MemObject that sees the packet.
Definition: packet.hh:377
Bitfield< 24 > j
Definition: miscregs.hh:1369
Shader * shader
TickEvent tickEvent
Definition: lds_state.hh:224
int size()
Definition: pagetable.hh:146
virtual const std::string name() const
Definition: sim_object.hh:117
SenderState is information carried along with the packet, esp.
bool retryResp
Definition: lds_state.hh:232
SenderState * senderState
This packet's sender state.
Definition: packet.hh:454
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
Definition: mem_object.hh:60
int loadBusLength()
int banks
Definition: lds_state.hh:505
CuSidePort cuPort
Definition: lds_state.hh:486
fatal_if(p->js_features.size() > 16,"Too many job slot feature registers specified (%i)\n", p->js_features.size())
Stats::Scalar ldsBankAccesses
bool process()
look for packets to return at this time
Definition: lds_state.cc:276
void setParent(ComputeUnit *x_parent)
set the parent and name based on the parent
Definition: lds_state.cc:81
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1869
virtual bool recvTimingReq(PacketPtr pkt)
receive the packet from the CU
Definition: lds_state.cc:179

Generated on Fri Jun 9 2017 13:03:48 for gem5 by doxygen 1.8.6