gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GPUCoalescer.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Sooraj Puthoor
34  */
35 
36 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
37 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
38 
39 #include <iostream>
40 #include <unordered_map>
41 
42 #include "base/statistics.hh"
43 #include "mem/protocol/HSAScope.hh"
44 #include "mem/protocol/HSASegment.hh"
45 #include "mem/protocol/PrefetchBit.hh"
46 #include "mem/protocol/RubyAccessMode.hh"
47 #include "mem/protocol/RubyRequestType.hh"
48 #include "mem/protocol/SequencerRequestType.hh"
49 #include "mem/request.hh"
53 
54 class DataBlock;
55 class CacheMsg;
56 class MachineID;
57 class CacheMemory;
58 
59 class RubyGPUCoalescerParams;
60 
61 HSAScope reqScopeToHSAScope(Request* req);
62 HSASegment reqSegmentToHSASegment(Request* req);
63 
65 {
67  RubyRequestType m_type;
69 
70  GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type,
71  Cycles _issue_time)
72  : pkt(_pkt), m_type(_m_type), issue_time(_issue_time)
73  {}
74 };
75 
77 {
78  public:
79  RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
80  : pkt(pkt), primaryType(p_type), secondaryType(s_type)
81  {
82  }
83 
84  RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL),
85  secondaryType(RubyRequestType_NULL)
86  {
87  }
88 
90  RubyRequestType primaryType;
91  RubyRequestType secondaryType;
92 };
93 
94 std::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj);
95 
96 class GPUCoalescer : public RubyPort
97 {
98  public:
99  typedef RubyGPUCoalescerParams Params;
100  GPUCoalescer(const Params *);
101  ~GPUCoalescer();
102 
103  // Public Methods
104  void wakeup(); // Used only for deadlock detection
105 
106  void printProgress(std::ostream& out) const;
107  void resetStats();
108  void collateStats();
109  void regStats();
110 
111  void writeCallback(Addr address, DataBlock& data);
112 
113  void writeCallback(Addr address,
114  MachineType mach,
115  DataBlock& data);
116 
117  void writeCallback(Addr address,
118  MachineType mach,
119  DataBlock& data,
120  Cycles initialRequestTime,
121  Cycles forwardRequestTime,
122  Cycles firstResponseTime,
123  bool isRegion);
124 
125  void writeCallback(Addr address,
126  MachineType mach,
127  DataBlock& data,
128  Cycles initialRequestTime,
129  Cycles forwardRequestTime,
130  Cycles firstResponseTime);
131 
132  void readCallback(Addr address, DataBlock& data);
133 
134  void readCallback(Addr address,
135  MachineType mach,
136  DataBlock& data);
137 
138  void readCallback(Addr address,
139  MachineType mach,
140  DataBlock& data,
141  Cycles initialRequestTime,
142  Cycles forwardRequestTime,
143  Cycles firstResponseTime);
144 
145  void readCallback(Addr address,
146  MachineType mach,
147  DataBlock& data,
148  Cycles initialRequestTime,
149  Cycles forwardRequestTime,
150  Cycles firstResponseTime,
151  bool isRegion);
152  /* atomics need their own callback because the data
153  might be const coming from SLICC */
154  void atomicCallback(Addr address,
155  MachineType mach,
156  const DataBlock& data);
157 
158  void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID);
159  void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID);
160 
161  // Alternate implementations in VIPER Coalescer
162  virtual RequestStatus makeRequest(PacketPtr pkt);
163 
164  int outstandingCount() const { return m_outstanding_count; }
165 
166  bool
168  {
169  return deadlockCheckEvent.scheduled();
170  }
171 
172  void
174  {
176  }
177 
178  bool empty() const;
179 
180  void print(std::ostream& out) const;
181  void checkCoherence(Addr address);
182 
183  void markRemoved();
184  void removeRequest(GPUCoalescerRequest* request);
185  void evictionCallback(Addr address);
186  void completeIssue();
187 
188  void insertKernel(int wavefront_id, PacketPtr pkt);
189 
190  void recordRequestType(SequencerRequestType requestType);
192 
195  { return *m_typeLatencyHist[t]; }
196 
198  { return m_missLatencyHist; }
200  { return *m_missTypeLatencyHist[t]; }
201 
203  { return *m_missMachLatencyHist[t]; }
204 
206  getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
207  { return *m_missTypeMachLatencyHist[r][t]; }
208 
210  { return *m_IssueToInitialDelayHist[t]; }
211 
213  getInitialToForwardDelayHist(const MachineType t) const
214  { return *m_InitialToForwardDelayHist[t]; }
215 
217  getForwardRequestToFirstResponseHist(const MachineType t) const
219 
221  getFirstResponseToCompletionDelayHist(const MachineType t) const
223 
224  // Changed to protected to enable inheritance by VIPER Coalescer
225  protected:
226  bool tryCacheAccess(Addr addr, RubyRequestType type,
227  Addr pc, RubyAccessMode access_mode,
228  int size, DataBlock*& data_ptr);
229  // Alternate implementations in VIPER Coalescer
230  virtual void issueRequest(PacketPtr pkt, RubyRequestType type);
231 
232  void kernelCallback(int wavfront_id);
233 
234  void hitCallback(GPUCoalescerRequest* request,
235  MachineType mach,
236  DataBlock& data,
237  bool success,
238  Cycles initialRequestTime,
239  Cycles forwardRequestTime,
240  Cycles firstResponseTime,
241  bool isRegion);
243  MachineType mach,
244  Cycles initialRequestTime,
245  Cycles forwardRequestTime,
246  Cycles firstResponseTime,
247  bool success, bool isRegion);
248  void completeHitCallback(std::vector<PacketPtr> & mylist, int len);
249  PacketPtr mapAddrToPkt(Addr address);
250 
251 
252  RequestStatus getRequestStatus(PacketPtr pkt,
253  RubyRequestType request_type);
254  bool insertRequest(PacketPtr pkt, RubyRequestType request_type);
255 
256  bool handleLlsc(Addr address, GPUCoalescerRequest* request);
257 
258  class IssueEvent : public Event
259  {
260  private:
262  public:
263  IssueEvent(GPUCoalescer *_seq);
264  void process();
265  const char *description() const;
266  };
267 
269 
270 
271  // Changed to protected to enable inheritance by VIPER Coalescer
272  protected:
275 
278 
279  // The cache access latency for this GPU data cache. This is assessed at the
280  // beginning of each access. This should be very similar to the
281  // implementation in Sequencer() as this is very much like a Sequencer
283 
284  // We need to track both the primary and secondary request types.
285  // The secondary request type comprises a subset of RubyRequestTypes that
286  // are understood by the L1 Controller. A primary request type can be any
287  // RubyRequestType.
288  typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable;
291 
292  typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable;
295  // Global outstanding request count, across all request tables
298  std::unordered_map<int, PacketPtr> kernelEndList;
300 
305 
307 
309  {
310  private:
312 
313  public:
315  m_GPUCoalescer_ptr(_seq) {}
317  const char *description() const
318  {
319  return "GPUCoalescer deadlock check";
320  }
321  };
322 
325 
326  // m5 style stats for TCP hit/miss counts
331 
336 
341 
346 
349 
353 
358 
363 
369 
370 private:
371  // Private copy constructor and assignment operator
372  GPUCoalescer(const GPUCoalescer& obj);
373  GPUCoalescer& operator=(const GPUCoalescer& obj);
374 };
375 
376 inline std::ostream&
377 operator<<(std::ostream& out, const GPUCoalescer& obj)
378 {
379  obj.print(out);
380  out << std::flush;
381  return out;
382 }
383 
384 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__
385 
Stats::Histogram & getLatencyHist()
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Stats::Scalar CP_TCCStHits
RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type)
Definition: GPUCoalescer.hh:79
Stats::Scalar GPU_TCPStHits
int m_deadlock_threshold
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
GPUCoalescerWakeupEvent(GPUCoalescer *_seq)
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
GPUCoalescer & operator=(const GPUCoalescer &obj)
RubyRequestType m_type
Definition: GPUCoalescer.hh:67
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
RequestTable m_readRequestTable
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
Stats::Scalar GPU_TCPLdHits
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:335
Stats::Scalar GPU_TCCStHits
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:381
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
void kernelCallback(int wavfront_id)
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
const char * description() const
Return a C string describing the event.
Stats::Scalar CP_TCPLdTransfers
virtual RequestStatus makeRequest(PacketPtr pkt)
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
bool tryCacheAccess(Addr addr, RubyRequestType type, Addr pc, RubyAccessMode access_mode, int size, DataBlock *&data_ptr)
std::ostream & operator<<(std::ostream &out, const GPUCoalescerRequest &obj)
std::unordered_map< Addr, GPUCoalescerRequest * > RequestTable
void completeIssue()
Declaration of Statistics objects.
IssueEvent(GPUCoalescer *_seq)
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2475
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
void descheduleDeadlockEvent()
STL vector class.
Definition: stl.hh:40
void deschedule(Event &event)
Definition: eventq.hh:734
Stats::Scalar CP_TCPStTransfers
const char data[]
Definition: circlebuf.cc:43
bool empty() const
RubyGPUCoalescerParams Params
Definition: GPUCoalescer.hh:99
const char * description() const
Return a C string describing the event.
std::unordered_map< Addr, std::vector< RequestDesc > > CoalescingTable
HSASegment reqSegmentToHSASegment(Request *req)
Definition: GPUCoalescer.cc:93
void markRemoved()
bool isDeadlockEventScheduled() const
Stats::Scalar GPU_LdMiss
CacheMemory * m_dataCache_ptr
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
RubyRequestType primaryType
Definition: GPUCoalescer.hh:90
void collateStats()
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
Stats::Scalar GPU_TCCLdHits
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
int m_store_waiting_on_load_cycles
A simple histogram stat.
Definition: statistics.hh:2551
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
GPUCoalescerWakeupEvent deadlockCheckEvent
MemObjectParams Params
Definition: mem_object.hh:63
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
HSAScope reqScopeToHSAScope(Request *req)
Definition: GPUCoalescer.cc:73
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
std::unordered_map< int, PacketPtr > kernelEndList
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Histogram & getOutstandReqHist()
Stats::Scalar CP_TCPLdHits
int outstandingCount() const
void regStats()
Register statistics for this object.
std::vector< Stats::Histogram * > m_typeLatencyHist
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
Stats::Histogram & getMissLatencyHist()
Stats::Histogram & getTypeLatencyHist(uint32_t t)
type
Definition: misc.hh:728
int size()
Definition: pagetable.hh:146
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void removeRequest(GPUCoalescerRequest *request)
GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type, Cycles _issue_time)
Definition: GPUCoalescer.hh:70
RubyRequestType secondaryType
Definition: GPUCoalescer.hh:91
void print(std::ostream &out) const
Definition: eventq.hh:185
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
RequestTable m_writeRequestTable
Bitfield< 18, 16 > len
Definition: miscregs.hh:1626
IssueEvent issueEvent
CacheMemory * m_instCache_ptr
void resetStats()
Reset statistics associated with this object.
PacketPtr pkt
Definition: GPUCoalescer.hh:89
int m_outstanding_count
Cycles m_data_cache_hit_latency
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
IntReg pc
Definition: remote_gdb.hh:91
void printProgress(std::ostream &out) const
Bitfield< 5 > t
Definition: miscregs.hh:1382
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
bool m_runningGarnetStandalone
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
bool m_deadlock_check_scheduled
Stats::Scalar CP_TCCLdHits

Generated on Fri Jun 9 2017 13:03:50 for gem5 by doxygen 1.8.6