gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
global_memory_pipeline.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: John Kalamatianos, Sooraj Puthoor
34  */
35 
37 
38 #include "debug/GPUMem.hh"
39 #include "debug/GPUReg.hh"
42 #include "gpu-compute/shader.hh"
44 #include "gpu-compute/wavefront.hh"
45 
46 GlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) :
47  computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size),
48  outOfOrderDataDelivery(p->out_of_order_data_delivery), inflightStores(0),
49  inflightLoads(0)
50 {
51 }
52 
53 void
55 {
56  computeUnit = cu;
58  _name = computeUnit->name() + ".GlobalMemPipeline";
59 }
60 
61 void
63 {
64  // apply any returned global memory operations
66 
67  bool accessVrf = true;
68  Wavefront *w = nullptr;
69 
70  // check the VRF to see if the operands of a load (or load component
71  // of an atomic) are accessible
72  if ((m) && (m->isLoad() || m->isAtomicRet())) {
73  w = m->wavefront();
74 
75  accessVrf =
76  w->computeUnit->vrf[w->simdId]->
77  vrfOperandAccessReady(m->seqNum(), w, m, VrfAccessType::WRITE);
78  }
79 
80  if (m && m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() &&
81  accessVrf && m->statusBitVector == VectorMask(0) &&
83  computeUnit->wfWait.at(m->pipeId).rdy())) {
84 
85  w = m->wavefront();
86 
87  m->completeAcc(m);
88 
89  completeRequest(m);
90 
91  // Decrement outstanding register count
92  computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
93 
94  if (m->isStore() || m->isAtomic()) {
96  m->time, -1);
97  }
98 
99  if (m->isLoad() || m->isAtomic()) {
101  m->time, -1);
102  }
103 
104  // Mark write bus busy for appropriate amount of time
105  computeUnit->glbMemToVrfBus.set(m->time);
107  w->computeUnit->wfWait.at(m->pipeId).set(m->time);
108  }
109 
110  // If pipeline has executed a global memory instruction
111  // execute global memory packets and issue global
112  // memory packets to DTLB
113  if (!gmIssuedRequests.empty()) {
115  if (mp->isLoad() || mp->isAtomic()) {
116  if (inflightLoads >= gmQueueSize) {
117  return;
118  } else {
119  ++inflightLoads;
120  }
121  } else if (mp->isStore()) {
122  if (inflightStores >= gmQueueSize) {
123  return;
124  } else {
125  ++inflightStores;
126  }
127  }
128 
129  mp->initiateAcc(mp);
130 
131  if (!outOfOrderDataDelivery && !mp->isMemFence()) {
141  gmOrderedRespBuffer.insert(std::make_pair(mp->seqNum(),
142  std::make_pair(mp, false)));
143  }
144 
145  gmIssuedRequests.pop();
146 
147  DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
148  computeUnit->cu_id, mp->simdId, mp->wfSlotId);
149  }
150 }
151 
154 {
156  if (!gmReturnedLoads.empty()) {
157  return gmReturnedLoads.front();
158  } else if (!gmReturnedStores.empty()) {
159  return gmReturnedStores.front();
160  }
161  } else {
162  if (!gmOrderedRespBuffer.empty()) {
163  auto mem_req = gmOrderedRespBuffer.begin();
164 
165  if (mem_req->second.second) {
166  return mem_req->second.first;
167  }
168  }
169  }
170 
171  return nullptr;
172 }
173 
174 void
176 {
177  if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
178  assert(inflightLoads > 0);
179  --inflightLoads;
180  } else if (gpuDynInst->isStore()) {
181  assert(inflightStores > 0);
182  --inflightStores;
183  }
184 
186  if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
187  assert(!gmReturnedLoads.empty());
188  gmReturnedLoads.pop();
189  } else if (gpuDynInst->isStore()) {
190  assert(!gmReturnedStores.empty());
191  gmReturnedStores.pop();
192  }
193  } else {
194  // we should only pop the oldest requst, and it
195  // should be marked as done if we are here
196  assert(gmOrderedRespBuffer.begin()->first == gpuDynInst->seqNum());
197  assert(gmOrderedRespBuffer.begin()->second.first == gpuDynInst);
198  assert(gmOrderedRespBuffer.begin()->second.second);
199  // remove this instruction from the buffer by its
200  // unique seq ID
201  gmOrderedRespBuffer.erase(gpuDynInst->seqNum());
202  }
203 }
204 
205 void
207 {
208  gmIssuedRequests.push(gpuDynInst);
209 }
210 
211 void
213 {
215  if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
216  assert(isGMLdRespFIFOWrRdy());
217  gmReturnedLoads.push(gpuDynInst);
218  } else {
219  assert(isGMStRespFIFOWrRdy());
220  gmReturnedStores.push(gpuDynInst);
221  }
222  } else {
223  auto mem_req = gmOrderedRespBuffer.find(gpuDynInst->seqNum());
224  // if we are getting a response for this mem request,
225  // then it ought to already be in the ordered response
226  // buffer
227  assert(mem_req != gmOrderedRespBuffer.end());
228  mem_req->second.second = true;
229  }
230 }
231 
232 void
234 {
236  .name(name() + ".load_vrf_bank_conflict_cycles")
237  .desc("total number of cycles GM data are delayed before updating "
238  "the VRF")
239  ;
240 }
int coissue_return
Definition: shader.hh:125
#define DPRINTF(x,...)
Definition: trace.hh:212
bool rdy() const
Definition: misc.hh:70
std::queue< GPUDynInstPtr > gmIssuedRequests
void handleResponse(GPUDynInstPtr gpuDynInst)
this method handles responses sent to this GM pipeline by the CU.
WaitClass glbMemToVrfBus
Bitfield< 0 > m
Definition: miscregs.hh:1577
void completeRequest(GPUDynInstPtr gpuDynInst)
once a memory request is finished we remove it from the buffer.
GPUDynInstPtr getNextReadyResp()
find the next ready response to service.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
std::map< uint64_t, std::pair< GPUDynInstPtr, bool > > gmOrderedRespBuffer
GlobalMemPipeline(const ComputeUnitParams *params)
int simdId
Definition: wavefront.hh:165
std::queue< GPUDynInstPtr > gmReturnedStores
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
bool isGMLdRespFIFOWrRdy() const
Stats::Scalar loadVrfBankConflictCycles
ComputeUnit * computeUnit
Definition: wavefront.hh:167
Bitfield< 1 > mp
Definition: misc.hh:604
const std::string & name() const
uint32_t outstandingReqsRdGm
Definition: wavefront.hh:219
Bitfield< 0 > w
uint32_t outstandingReqs
Definition: wavefront.hh:210
int globalMemSize
Definition: shader.hh:133
uint32_t outstandingReqsWrGm
Definition: wavefront.hh:215
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:254
Shader * shader
virtual const std::string name() const
Definition: sim_object.hh:117
void ScheduleAdd(uint32_t *val, Tick when, int x)
Definition: shader.cc:312
void issueRequest(GPUDynInstPtr gpuDynInst)
issues a request to the pipeline - i.e., enqueue it in the request buffer.
std::vector< VectorRegisterFile * > vrf
std::queue< GPUDynInstPtr > gmReturnedLoads
void init(ComputeUnit *cu)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:287
std::vector< WaitClass > wfWait
Bitfield< 0 > p
void set(uint32_t i)
Definition: misc.hh:60
bool isGMStRespFIFOWrRdy() const

Generated on Fri Jun 9 2017 13:03:47 for gem5 by doxygen 1.8.6