gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GPUCoalescer.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Sooraj Puthoor
34  */
35 
36 #include "base/misc.hh"
37 #include "base/str.hh"
38 #include "config/the_isa.hh"
39 
40 #if THE_ISA == X86_ISA
42 
43 #endif // X86_ISA
45 
47 #include "debug/GPUCoalescer.hh"
48 #include "debug/MemoryAccess.hh"
49 #include "debug/ProtocolTrace.hh"
50 #include "debug/RubyPort.hh"
51 #include "debug/RubyStats.hh"
52 #include "gpu-compute/shader.hh"
53 #include "mem/packet.hh"
62 #include "params/RubyGPUCoalescer.hh"
63 
64 using namespace std;
65 
67 RubyGPUCoalescerParams::create()
68 {
69  return new GPUCoalescer(this);
70 }
71 
72 HSAScope
74 {
75  HSAScope accessScope = HSAScope_UNSPECIFIED;
76  if (req->isScoped()) {
77  if (req->isWavefrontScope()) {
78  accessScope = HSAScope_WAVEFRONT;
79  } else if (req->isWorkgroupScope()) {
80  accessScope = HSAScope_WORKGROUP;
81  } else if (req->isDeviceScope()) {
82  accessScope = HSAScope_DEVICE;
83  } else if (req->isSystemScope()) {
84  accessScope = HSAScope_SYSTEM;
85  } else {
86  fatal("Bad scope type");
87  }
88  }
89  return accessScope;
90 }
91 
92 HSASegment
94 {
95  HSASegment accessSegment = HSASegment_GLOBAL;
96 
97  if (req->isGlobalSegment()) {
98  accessSegment = HSASegment_GLOBAL;
99  } else if (req->isGroupSegment()) {
100  accessSegment = HSASegment_GROUP;
101  } else if (req->isPrivateSegment()) {
102  accessSegment = HSASegment_PRIVATE;
103  } else if (req->isKernargSegment()) {
104  accessSegment = HSASegment_KERNARG;
105  } else if (req->isReadonlySegment()) {
106  accessSegment = HSASegment_READONLY;
107  } else if (req->isSpillSegment()) {
108  accessSegment = HSASegment_SPILL;
109  } else if (req->isArgSegment()) {
110  accessSegment = HSASegment_ARG;
111  } else {
112  fatal("Bad segment type");
113  }
114 
115  return accessSegment;
116 }
117 
119  : RubyPort(p), issueEvent(this), deadlockCheckEvent(this)
120 {
125 
127 
130  m_instCache_ptr = nullptr;
131  m_dataCache_ptr = nullptr;
132 
133  m_instCache_ptr = p->icache;
134  m_dataCache_ptr = p->dcache;
135  m_max_outstanding_requests = p->max_outstanding_requests;
136  m_deadlock_threshold = p->deadlock_threshold;
137 
138  assert(m_max_outstanding_requests > 0);
139  assert(m_deadlock_threshold > 0);
140  assert(m_instCache_ptr);
141  assert(m_dataCache_ptr);
142 
143  m_data_cache_hit_latency = p->dcache_hit_latency;
144 
145  m_runningGarnetStandalone = p->garnet_standalone;
146  assumingRfOCoherence = p->assume_rfo;
147 }
148 
150 {
151 }
152 
153 void
155 {
156  // Check for deadlock of any of the requests
157  Cycles current_time = curCycle();
158 
159  // Check across all outstanding requests
160  int total_outstanding = 0;
161 
162  RequestTable::iterator read = m_readRequestTable.begin();
163  RequestTable::iterator read_end = m_readRequestTable.end();
164  for (; read != read_end; ++read) {
165  GPUCoalescerRequest* request = read->second;
166  if (current_time - request->issue_time < m_deadlock_threshold)
167  continue;
168 
169  panic("Possible Deadlock detected. Aborting!\n"
170  "version: %d request.paddr: 0x%x m_readRequestTable: %d "
171  "current time: %u issue_time: %d difference: %d\n", m_version,
172  request->pkt->getAddr(), m_readRequestTable.size(),
173  current_time * clockPeriod(), request->issue_time * clockPeriod(),
174  (current_time - request->issue_time)*clockPeriod());
175  }
176 
177  RequestTable::iterator write = m_writeRequestTable.begin();
178  RequestTable::iterator write_end = m_writeRequestTable.end();
179  for (; write != write_end; ++write) {
180  GPUCoalescerRequest* request = write->second;
181  if (current_time - request->issue_time < m_deadlock_threshold)
182  continue;
183 
184  panic("Possible Deadlock detected. Aborting!\n"
185  "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
186  "current time: %u issue_time: %d difference: %d\n", m_version,
187  request->pkt->getAddr(), m_writeRequestTable.size(),
188  current_time * clockPeriod(), request->issue_time * clockPeriod(),
189  (current_time - request->issue_time) * clockPeriod());
190  }
191 
192  total_outstanding += m_writeRequestTable.size();
193  total_outstanding += m_readRequestTable.size();
194 
195  assert(m_outstanding_count == total_outstanding);
196 
197  if (m_outstanding_count > 0) {
198  // If there are still outstanding requests, keep checking
201  curTick());
202  }
203 }
204 
205 void
207 {
210  for (int i = 0; i < RubyRequestType_NUM; i++) {
211  m_typeLatencyHist[i]->reset();
212  m_missTypeLatencyHist[i]->reset();
213  for (int j = 0; j < MachineType_NUM; j++) {
214  m_missTypeMachLatencyHist[i][j]->reset();
215  }
216  }
217 
218  for (int i = 0; i < MachineType_NUM; i++) {
219  m_missMachLatencyHist[i]->reset();
220 
221  m_IssueToInitialDelayHist[i]->reset();
222  m_InitialToForwardDelayHist[i]->reset();
225  }
226 }
227 
228 void
229 GPUCoalescer::printProgress(ostream& out) const
230 {
231 }
232 
233 RequestStatus
234 GPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
235 {
236  Addr line_addr = makeLineAddress(pkt->getAddr());
237 
239  return RequestStatus_BufferFull;
240  }
241 
242  if (m_controller->isBlocked(line_addr) &&
243  request_type != RubyRequestType_Locked_RMW_Write) {
244  return RequestStatus_Aliased;
245  }
246 
247  if ((request_type == RubyRequestType_ST) ||
248  (request_type == RubyRequestType_ATOMIC) ||
249  (request_type == RubyRequestType_ATOMIC_RETURN) ||
250  (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
251  (request_type == RubyRequestType_RMW_Read) ||
252  (request_type == RubyRequestType_RMW_Write) ||
253  (request_type == RubyRequestType_Load_Linked) ||
254  (request_type == RubyRequestType_Store_Conditional) ||
255  (request_type == RubyRequestType_Locked_RMW_Read) ||
256  (request_type == RubyRequestType_Locked_RMW_Write) ||
257  (request_type == RubyRequestType_FLUSH)) {
258 
259  // Check if there is any outstanding read request for the same
260  // cache line.
261  if (m_readRequestTable.count(line_addr) > 0) {
263  return RequestStatus_Aliased;
264  }
265 
266  if (m_writeRequestTable.count(line_addr) > 0) {
267  // There is an outstanding write request for the cache line
269  return RequestStatus_Aliased;
270  }
271  } else {
272  // Check if there is any outstanding write request for the same
273  // cache line.
274  if (m_writeRequestTable.count(line_addr) > 0) {
276  return RequestStatus_Aliased;
277  }
278 
279  if (m_readRequestTable.count(line_addr) > 0) {
280  // There is an outstanding read request for the cache line
282  return RequestStatus_Aliased;
283  }
284  }
285 
286  return RequestStatus_Ready;
287 
288 }
289 
290 
291 
292 // sets the kernelEndList
293 void
294 GPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
295 {
296  // Don't know if this will happen or is possible
297  // but I just want to be careful and not have it become
298  // simulator hang in the future
299  DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
300  assert(kernelEndList.count(wavefront_id) == 0);
301 
302  kernelEndList[wavefront_id] = pkt;
303  DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
304  kernelEndList.size());
305 }
306 
307 
308 // Insert the request on the correct request table. Return true if
309 // the entry was already present.
310 bool
311 GPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
312 {
313  assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
314  pkt->req->isLockedRMW() ||
316 
317  int total_outstanding M5_VAR_USED =
318  m_writeRequestTable.size() + m_readRequestTable.size();
319 
320  assert(m_outstanding_count == total_outstanding);
321 
322  // See if we should schedule a deadlock check
323  if (!deadlockCheckEvent.scheduled()) {
325  }
326 
327  Addr line_addr = makeLineAddress(pkt->getAddr());
328  if ((request_type == RubyRequestType_ST) ||
329  (request_type == RubyRequestType_ATOMIC) ||
330  (request_type == RubyRequestType_ATOMIC_RETURN) ||
331  (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
332  (request_type == RubyRequestType_RMW_Read) ||
333  (request_type == RubyRequestType_RMW_Write) ||
334  (request_type == RubyRequestType_Load_Linked) ||
335  (request_type == RubyRequestType_Store_Conditional) ||
336  (request_type == RubyRequestType_Locked_RMW_Read) ||
337  (request_type == RubyRequestType_Locked_RMW_Write) ||
338  (request_type == RubyRequestType_FLUSH)) {
339 
341  m_writeRequestTable.insert(RequestTable::value_type(line_addr,
342  (GPUCoalescerRequest*) NULL));
343  if (r.second) {
344  RequestTable::iterator i = r.first;
345  i->second = new GPUCoalescerRequest(pkt, request_type,
346  curCycle());
348  "Inserting write request for paddr %#x for type %d\n",
349  pkt->req->getPaddr(), i->second->m_type);
351  } else {
352  return true;
353  }
354  } else {
356  m_readRequestTable.insert(RequestTable::value_type(line_addr,
357  (GPUCoalescerRequest*) NULL));
358 
359  if (r.second) {
360  RequestTable::iterator i = r.first;
361  i->second = new GPUCoalescerRequest(pkt, request_type,
362  curCycle());
364  "Inserting read request for paddr %#x for type %d\n",
365  pkt->req->getPaddr(), i->second->m_type);
367  } else {
368  return true;
369  }
370  }
371 
373 
374  total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
375  assert(m_outstanding_count == total_outstanding);
376 
377  return false;
378 }
379 
380 void
382 {
384  assert(m_outstanding_count ==
385  m_writeRequestTable.size() + m_readRequestTable.size());
386 }
387 
388 void
390 {
391  assert(m_outstanding_count ==
392  m_writeRequestTable.size() + m_readRequestTable.size());
393 
394  Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
395  if ((srequest->m_type == RubyRequestType_ST) ||
396  (srequest->m_type == RubyRequestType_RMW_Read) ||
397  (srequest->m_type == RubyRequestType_RMW_Write) ||
398  (srequest->m_type == RubyRequestType_Load_Linked) ||
399  (srequest->m_type == RubyRequestType_Store_Conditional) ||
400  (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
401  (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
402  m_writeRequestTable.erase(line_addr);
403  } else {
404  m_readRequestTable.erase(line_addr);
405  }
406 
407  markRemoved();
408 }
409 
410 bool
412 {
413  //
414  // The success flag indicates whether the LLSC operation was successful.
415  // LL ops will always succeed, but SC may fail if the cache line is no
416  // longer locked.
417  //
418  bool success = true;
419  if (request->m_type == RubyRequestType_Store_Conditional) {
420  if (!m_dataCache_ptr->isLocked(address, m_version)) {
421  //
422  // For failed SC requests, indicate the failure to the cpu by
423  // setting the extra data to zero.
424  //
425  request->pkt->req->setExtraData(0);
426  success = false;
427  } else {
428  //
429  // For successful SC requests, indicate the success to the cpu by
430  // setting the extra data to one.
431  //
432  request->pkt->req->setExtraData(1);
433  }
434  //
435  // Independent of success, all SC operations must clear the lock
436  //
437  m_dataCache_ptr->clearLocked(address);
438  } else if (request->m_type == RubyRequestType_Load_Linked) {
439  //
440  // Note: To fully follow Alpha LLSC semantics, should the LL clear any
441  // previously locked cache lines?
442  //
444  } else if ((m_dataCache_ptr->isTagPresent(address)) &&
445  (m_dataCache_ptr->isLocked(address, m_version))) {
446  //
447  // Normal writes should clear the locked address
448  //
449  m_dataCache_ptr->clearLocked(address);
450  }
451  return success;
452 }
453 
454 void
456 {
457  writeCallback(address, MachineType_NULL, data);
458 }
459 
460 void
462  MachineType mach,
463  DataBlock& data)
464 {
465  writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
466 }
467 
468 void
470  MachineType mach,
471  DataBlock& data,
472  Cycles initialRequestTime,
473  Cycles forwardRequestTime,
474  Cycles firstResponseTime)
475 {
476  writeCallback(address, mach, data,
477  initialRequestTime, forwardRequestTime, firstResponseTime,
478  false);
479 }
480 
481 void
483  MachineType mach,
484  DataBlock& data,
485  Cycles initialRequestTime,
486  Cycles forwardRequestTime,
487  Cycles firstResponseTime,
488  bool isRegion)
489 {
490  assert(address == makeLineAddress(address));
491 
492  DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
493  assert(m_writeRequestTable.count(makeLineAddress(address)));
494 
495  RequestTable::iterator i = m_writeRequestTable.find(address);
496  assert(i != m_writeRequestTable.end());
497  GPUCoalescerRequest* request = i->second;
498 
499  m_writeRequestTable.erase(i);
500  markRemoved();
501 
502  assert((request->m_type == RubyRequestType_ST) ||
503  (request->m_type == RubyRequestType_ATOMIC) ||
504  (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
505  (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
506  (request->m_type == RubyRequestType_RMW_Read) ||
507  (request->m_type == RubyRequestType_RMW_Write) ||
508  (request->m_type == RubyRequestType_Load_Linked) ||
509  (request->m_type == RubyRequestType_Store_Conditional) ||
510  (request->m_type == RubyRequestType_Locked_RMW_Read) ||
511  (request->m_type == RubyRequestType_Locked_RMW_Write) ||
512  (request->m_type == RubyRequestType_FLUSH));
513 
514 
515  //
516  // For Alpha, properly handle LL, SC, and write requests with respect to
517  // locked cache blocks.
518  //
519  // Not valid for Garnet_standalone protocl
520  //
521  bool success = true;
523  success = handleLlsc(address, request);
524 
525  if (request->m_type == RubyRequestType_Locked_RMW_Read) {
527  } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
528  m_controller->unblock(address);
529  }
530 
531  hitCallback(request, mach, data, success,
532  request->issue_time, forwardRequestTime, firstResponseTime,
533  isRegion);
534 }
535 
536 void
538 {
539  readCallback(address, MachineType_NULL, data);
540 }
541 
542 void
544  MachineType mach,
545  DataBlock& data)
546 {
547  readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
548 }
549 
550 void
552  MachineType mach,
553  DataBlock& data,
554  Cycles initialRequestTime,
555  Cycles forwardRequestTime,
556  Cycles firstResponseTime)
557 {
558 
559  readCallback(address, mach, data,
560  initialRequestTime, forwardRequestTime, firstResponseTime,
561  false);
562 }
563 
564 void
566  MachineType mach,
567  DataBlock& data,
568  Cycles initialRequestTime,
569  Cycles forwardRequestTime,
570  Cycles firstResponseTime,
571  bool isRegion)
572 {
573  assert(address == makeLineAddress(address));
574  assert(m_readRequestTable.count(makeLineAddress(address)));
575 
576  DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
577  RequestTable::iterator i = m_readRequestTable.find(address);
578  assert(i != m_readRequestTable.end());
579  GPUCoalescerRequest* request = i->second;
580 
581  m_readRequestTable.erase(i);
582  markRemoved();
583 
584  assert((request->m_type == RubyRequestType_LD) ||
585  (request->m_type == RubyRequestType_IFETCH));
586 
587  hitCallback(request, mach, data, true,
588  request->issue_time, forwardRequestTime, firstResponseTime,
589  isRegion);
590 }
591 
592 void
594  MachineType mach,
595  DataBlock& data,
596  bool success,
597  Cycles initialRequestTime,
598  Cycles forwardRequestTime,
599  Cycles firstResponseTime,
600  bool isRegion)
601 {
602  PacketPtr pkt = srequest->pkt;
603  Addr request_address = pkt->getAddr();
604  Addr request_line_address = makeLineAddress(request_address);
605 
606  RubyRequestType type = srequest->m_type;
607 
608  // Set this cache entry to the most recently used
609  if (type == RubyRequestType_IFETCH) {
610  if (m_instCache_ptr->isTagPresent(request_line_address))
611  m_instCache_ptr->setMRU(request_line_address);
612  } else {
613  if (m_dataCache_ptr->isTagPresent(request_line_address))
614  m_dataCache_ptr->setMRU(request_line_address);
615  }
616 
617  recordMissLatency(srequest, mach,
618  initialRequestTime,
619  forwardRequestTime,
620  firstResponseTime,
621  success, isRegion);
622  // update the data
623  //
624  // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
625  int len = reqCoalescer[request_line_address].size();
626  std::vector<PacketPtr> mylist;
627  for (int i = 0; i < len; ++i) {
628  PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
629  assert(type == reqCoalescer[request_line_address][i].primaryType);
630  request_address = pkt->getAddr();
631  request_line_address = makeLineAddress(pkt->getAddr());
632  if (pkt->getPtr<uint8_t>()) {
633  if ((type == RubyRequestType_LD) ||
634  (type == RubyRequestType_ATOMIC) ||
635  (type == RubyRequestType_ATOMIC_RETURN) ||
636  (type == RubyRequestType_IFETCH) ||
637  (type == RubyRequestType_RMW_Read) ||
638  (type == RubyRequestType_Locked_RMW_Read) ||
639  (type == RubyRequestType_Load_Linked)) {
640  memcpy(pkt->getPtr<uint8_t>(),
641  data.getData(getOffset(request_address),
642  pkt->getSize()),
643  pkt->getSize());
644  } else {
645  data.setData(pkt->getPtr<uint8_t>(),
646  getOffset(request_address), pkt->getSize());
647  }
648  } else {
649  DPRINTF(MemoryAccess,
650  "WARNING. Data not transfered from Ruby to M5 for type " \
651  "%s\n",
652  RubyRequestType_to_string(type));
653  }
654 
655  // If using the RubyTester, update the RubyTester sender state's
656  // subBlock with the recieved data. The tester will later access
657  // this state.
658  // Note: RubyPort will access it's sender state before the
659  // RubyTester.
660  if (m_usingRubyTester) {
661  RubyPort::SenderState *requestSenderState =
663  RubyTester::SenderState* testerSenderState =
664  safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
665  testerSenderState->subBlock.mergeFrom(data);
666  }
667 
668  mylist.push_back(pkt);
669  }
670  delete srequest;
671  reqCoalescer.erase(request_line_address);
672  assert(!reqCoalescer.count(request_line_address));
673 
674 
675 
676  completeHitCallback(mylist, len);
677 }
678 
679 bool
681 {
682  return m_writeRequestTable.empty() && m_readRequestTable.empty();
683 }
684 
685 // Analyzes the packet to see if this request can be coalesced.
686 // If request can be coalesced, this request is added to the reqCoalescer table
687 // and makeRequest returns RequestStatus_Issued;
688 // If this is the first request to a cacheline, request is added to both
689 // newRequests queue and to the reqCoalescer table; makeRequest
690 // returns RequestStatus_Issued.
691 // If there is a pending request to this cacheline and this request
692 // can't be coalesced, RequestStatus_Aliased is returned and
693 // the packet needs to be reissued.
694 RequestStatus
696 {
697  // Check for GPU Barrier Kernel End or Kernel Begin
698  // Leave these to be handled by the child class
699  // Kernel End/Barrier = isFlush + isRelease
700  // Kernel Begin = isFlush + isAcquire
701  if (pkt->req->isKernel()) {
702  if (pkt->req->isAcquire()){
703  // This is a Kernel Begin leave handling to
704  // virtual xCoalescer::makeRequest
705  return RequestStatus_Issued;
706  }else if (pkt->req->isRelease()) {
707  // This is a Kernel End leave handling to
708  // virtual xCoalescer::makeRequest
709  // If we are here then we didn't call
710  // a virtual version of this function
711  // so we will also schedule the callback
712  int wf_id = 0;
713  if (pkt->req->hasContextId()) {
714  wf_id = pkt->req->contextId();
715  }
716  insertKernel(wf_id, pkt);
717  newKernelEnds.push_back(wf_id);
718  if (!issueEvent.scheduled()) {
720  }
721  return RequestStatus_Issued;
722  }
723  }
724 
725  // If number of outstanding requests greater than the max allowed,
726  // return RequestStatus_BufferFull. This logic can be extended to
727  // support proper backpressure.
729  return RequestStatus_BufferFull;
730  }
731 
732  RubyRequestType primary_type = RubyRequestType_NULL;
733  RubyRequestType secondary_type = RubyRequestType_NULL;
734 
735  if (pkt->isLLSC()) {
736  //
737  // Alpha LL/SC instructions need to be handled carefully by the cache
738  // coherence protocol to ensure they follow the proper semantics. In
739  // particular, by identifying the operations as atomic, the protocol
740  // should understand that migratory sharing optimizations should not
741  // be performed (i.e. a load between the LL and SC should not steal
742  // away exclusive permission).
743  //
744  if (pkt->isWrite()) {
745  primary_type = RubyRequestType_Store_Conditional;
746  } else {
747  assert(pkt->isRead());
748  primary_type = RubyRequestType_Load_Linked;
749  }
750  secondary_type = RubyRequestType_ATOMIC;
751  } else if (pkt->req->isLockedRMW()) {
752  //
753  // x86 locked instructions are translated to store cache coherence
754  // requests because these requests should always be treated as read
755  // exclusive operations and should leverage any migratory sharing
756  // optimization built into the protocol.
757  //
758  if (pkt->isWrite()) {
759  primary_type = RubyRequestType_Locked_RMW_Write;
760  } else {
761  assert(pkt->isRead());
762  primary_type = RubyRequestType_Locked_RMW_Read;
763  }
764  secondary_type = RubyRequestType_ST;
765  } else if (pkt->isAtomicOp()) {
766  //
767  // GPU Atomic Operation
768  //
769  primary_type = RubyRequestType_ATOMIC;
770  secondary_type = RubyRequestType_ATOMIC;
771  } else {
772  if (pkt->isRead()) {
773  if (pkt->req->isInstFetch()) {
774  primary_type = secondary_type = RubyRequestType_IFETCH;
775  } else {
776 #if THE_ISA == X86_ISA
777  uint32_t flags = pkt->req->getFlags();
778  bool storeCheck = flags &
780 #else
781  bool storeCheck = false;
782 #endif // X86_ISA
783  if (storeCheck) {
784  primary_type = RubyRequestType_RMW_Read;
785  secondary_type = RubyRequestType_ST;
786  } else {
787  primary_type = secondary_type = RubyRequestType_LD;
788  }
789  }
790  } else if (pkt->isWrite()) {
791  //
792  // Note: M5 packets do not differentiate ST from RMW_Write
793  //
794  primary_type = secondary_type = RubyRequestType_ST;
795  } else if (pkt->isFlush()) {
796  primary_type = secondary_type = RubyRequestType_FLUSH;
797  } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
798  if (assumingRfOCoherence) {
799  // If we reached here, this request must be a memFence
800  // and the protocol implements RfO, the coalescer can
801  // assume sequentially consistency and schedule the callback
802  // immediately.
803  // Currently the code implements fence callbacks
804  // by reusing the mechanism for kernel completions.
805  // This should be fixed.
806  int wf_id = 0;
807  if (pkt->req->hasContextId()) {
808  wf_id = pkt->req->contextId();
809  }
810  insertKernel(wf_id, pkt);
811  newKernelEnds.push_back(wf_id);
812  if (!issueEvent.scheduled()) {
814  }
815  return RequestStatus_Issued;
816  } else {
817  // If not RfO, return issued here and let the child coalescer
818  // take care of it.
819  return RequestStatus_Issued;
820  }
821  } else {
822  panic("Unsupported ruby packet type\n");
823  }
824  }
825 
826  // Check if there is any pending request to this cache line from
827  // previous cycles.
828  // If there is a pending request, return aliased. Since coalescing
829  // across time is not permitted, aliased requests are not coalesced.
830  // If a request for this address has already been issued, we must block
831  RequestStatus status = getRequestStatus(pkt, primary_type);
832  if (status != RequestStatus_Ready)
833  return status;
834 
835  Addr line_addr = makeLineAddress(pkt->getAddr());
836 
837  // Check if this request can be coalesced with previous
838  // requests from this cycle.
839  if (!reqCoalescer.count(line_addr)) {
840  // This is the first access to this cache line.
841  // A new request to the memory subsystem has to be
842  // made in the next cycle for this cache line, so
843  // add this line addr to the "newRequests" queue
844  newRequests.push_back(line_addr);
845 
846  // There was a request to this cache line in this cycle,
847  // let us see if we can coalesce this request with the previous
848  // requests from this cycle
849  } else if (primary_type !=
850  reqCoalescer[line_addr][0].primaryType) {
851  // can't coalesce loads, stores and atomics!
852  return RequestStatus_Aliased;
853  } else if (pkt->req->isLockedRMW() ||
854  reqCoalescer[line_addr][0].pkt->req->isLockedRMW()) {
855  // can't coalesce locked accesses, but can coalesce atomics!
856  return RequestStatus_Aliased;
857  } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
858  pkt->req->contextId() !=
859  reqCoalescer[line_addr][0].pkt->req->contextId()) {
860  // can't coalesce releases from different wavefronts
861  return RequestStatus_Aliased;
862  }
863 
864  // in addition to the packet, we need to save both request types
865  reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
866  if (!issueEvent.scheduled())
868  // TODO: issue hardware prefetches here
869  return RequestStatus_Issued;
870 }
871 
872 void
873 GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
874 {
875 
876  int proc_id = -1;
877  if (pkt != NULL && pkt->req->hasContextId()) {
878  proc_id = pkt->req->contextId();
879  }
880 
881  // If valid, copy the pc to the ruby request
882  Addr pc = 0;
883  if (pkt->req->hasPC()) {
884  pc = pkt->req->getPC();
885  }
886 
887  // At the moment setting scopes only counts
888  // for GPU spill space accesses
889  // which is pkt->req->isStack()
890  // this scope is REPLACE since it
891  // does not need to be flushed at the end
892  // of a kernel Private and local may need
893  // to be visible at the end of the kernel
894  HSASegment accessSegment = reqSegmentToHSASegment(pkt->req);
895  HSAScope accessScope = reqScopeToHSAScope(pkt->req);
896 
897  Addr line_addr = makeLineAddress(pkt->getAddr());
898 
899  // Creating WriteMask that records written bytes
900  // and atomic operations. This enables partial writes
901  // and partial reads of those writes
902  DataBlock dataBlock;
903  dataBlock.clear();
904  uint32_t blockSize = RubySystem::getBlockSizeBytes();
905  std::vector<bool> accessMask(blockSize,false);
907  uint32_t tableSize = reqCoalescer[line_addr].size();
908  for (int i = 0; i < tableSize; i++) {
909  PacketPtr tmpPkt = reqCoalescer[line_addr][i].pkt;
910  uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
911  uint32_t tmpSize = tmpPkt->getSize();
912  if (tmpPkt->isAtomicOp()) {
913  std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
914  tmpPkt->getAtomicOp());
915  atomicOps.push_back(tmpAtomicOp);
916  } else if (tmpPkt->isWrite()) {
917  dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
918  tmpOffset, tmpSize);
919  }
920  for (int j = 0; j < tmpSize; j++) {
921  accessMask[tmpOffset + j] = true;
922  }
923  }
924  std::shared_ptr<RubyRequest> msg;
925  if (pkt->isAtomicOp()) {
926  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
927  pkt->getPtr<uint8_t>(),
928  pkt->getSize(), pc, secondary_type,
929  RubyAccessMode_Supervisor, pkt,
930  PrefetchBit_No, proc_id, 100,
931  blockSize, accessMask,
932  dataBlock, atomicOps,
933  accessScope, accessSegment);
934  } else {
935  msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
936  pkt->getPtr<uint8_t>(),
937  pkt->getSize(), pc, secondary_type,
938  RubyAccessMode_Supervisor, pkt,
939  PrefetchBit_No, proc_id, 100,
940  blockSize, accessMask,
941  dataBlock,
942  accessScope, accessSegment);
943  }
944  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
945  curTick(), m_version, "Coal", "Begin", "", "",
946  printAddress(msg->getPhysicalAddress()),
947  RubyRequestType_to_string(secondary_type));
948 
949  fatal_if(secondary_type == RubyRequestType_IFETCH,
950  "there should not be any I-Fetch requests in the GPU Coalescer");
951 
952  // Send the message to the cache controller
954  "should not have a latency of zero");
955 
956  assert(m_mandatory_q_ptr);
958 }
959 
960 template <class KEY, class VALUE>
961 std::ostream &
962 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
963 {
964  out << "[";
965  for (auto i = map.begin(); i != map.end(); ++i)
966  out << " " << i->first << "=" << i->second;
967  out << " ]";
968 
969  return out;
970 }
971 
972 void
973 GPUCoalescer::print(ostream& out) const
974 {
975  out << "[GPUCoalescer: " << m_version
976  << ", outstanding requests: " << m_outstanding_count
977  << ", read request table: " << m_readRequestTable
978  << ", write request table: " << m_writeRequestTable
979  << "]";
980 }
981 
982 // this can be called from setState whenever coherence permissions are
983 // upgraded when invoked, coherence violations will be checked for the
984 // given block
985 void
987 {
988 #ifdef CHECK_COHERENCE
989  m_ruby_system->checkGlobalCoherenceInvariant(addr);
990 #endif
991 }
992 
993 void
994 GPUCoalescer::recordRequestType(SequencerRequestType requestType) {
995  DPRINTF(RubyStats, "Recorded statistic: %s\n",
996  SequencerRequestType_to_string(requestType));
997 }
998 
1000  : Event(Progress_Event_Pri), seq(_seq)
1001 {
1002 }
1003 
1004 
1005 void
1007 {
1008  // newRequests has the cacheline addresses of all the
1009  // requests which need to be issued to the memory subsystem
1010  // in this cycle
1011  int len = newRequests.size();
1012  DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
1013  for (int i = 0; i < len; ++i) {
1014  // Get the requests from reqCoalescer table. Get only the
1015  // first request for each cacheline, the remaining requests
1016  // can be coalesced with the first request. So, only
1017  // one request is issued per cacheline.
1018  RequestDesc info = reqCoalescer[newRequests[i]][0];
1019  PacketPtr pkt = info.pkt;
1020  DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
1021  i, pkt->req->getPaddr());
1022  // Insert this request to the read/writeRequestTables. These tables
1023  // are used to track aliased requests in makeRequest subroutine
1024  bool found = insertRequest(pkt, info.primaryType);
1025 
1026  if (found) {
1027  panic("GPUCoalescer::makeRequest should never be called if the "
1028  "request is already outstanding\n");
1029  }
1030 
1031  // Issue request to ruby subsystem
1032  issueRequest(pkt, info.secondaryType);
1033  }
1034  newRequests.clear();
1035 
1036  // have Kernel End releases been issued this cycle
1037  len = newKernelEnds.size();
1038  for (int i = 0; i < len; i++) {
1040  }
1041  newKernelEnds.clear();
1042 }
1043 
1044 void
1046 {
1047  seq->completeIssue();
1048 }
1049 
1050 const char *
1052 {
1053  return "Issue coalesced request";
1054 }
1055 
1056 void
1058 {
1059  ruby_eviction_callback(address);
1060 }
1061 
1062 void
1064 {
1065  assert(kernelEndList.count(wavefront_id));
1066 
1067  ruby_hit_callback(kernelEndList[wavefront_id]);
1068 
1069  kernelEndList.erase(wavefront_id);
1070 }
1071 
1072 void
1074  MachineType mach,
1075  const DataBlock& data)
1076 {
1077  assert(address == makeLineAddress(address));
1078 
1079  DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
1080  assert(m_writeRequestTable.count(makeLineAddress(address)));
1081 
1082  RequestTable::iterator i = m_writeRequestTable.find(address);
1083  assert(i != m_writeRequestTable.end());
1084  GPUCoalescerRequest* srequest = i->second;
1085 
1086  m_writeRequestTable.erase(i);
1087  markRemoved();
1088 
1089  assert((srequest->m_type == RubyRequestType_ATOMIC) ||
1090  (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
1091  (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
1092 
1093 
1094  // Atomics don't write to cache, so there is no MRU update...
1095 
1096  recordMissLatency(srequest, mach,
1097  srequest->issue_time, Cycles(0), Cycles(0), true, false);
1098 
1099  PacketPtr pkt = srequest->pkt;
1100  Addr request_address = pkt->getAddr();
1101  Addr request_line_address = makeLineAddress(pkt->getAddr());
1102 
1103  int len = reqCoalescer[request_line_address].size();
1104  std::vector<PacketPtr> mylist;
1105  for (int i = 0; i < len; ++i) {
1106  PacketPtr pkt = reqCoalescer[request_line_address][i].pkt;
1107  assert(srequest->m_type ==
1108  reqCoalescer[request_line_address][i].primaryType);
1109  request_address = (pkt->getAddr());
1110  request_line_address = makeLineAddress(request_address);
1111  if (pkt->getPtr<uint8_t>() &&
1112  srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
1113  /* atomics are done in memory, and return the data *before* the atomic op... */
1114  memcpy(pkt->getPtr<uint8_t>(),
1115  data.getData(getOffset(request_address),
1116  pkt->getSize()),
1117  pkt->getSize());
1118  } else {
1119  DPRINTF(MemoryAccess,
1120  "WARNING. Data not transfered from Ruby to M5 for type " \
1121  "%s\n",
1122  RubyRequestType_to_string(srequest->m_type));
1123  }
1124 
1125  // If using the RubyTester, update the RubyTester sender state's
1126  // subBlock with the recieved data. The tester will later access
1127  // this state.
1128  // Note: RubyPort will access it's sender state before the
1129  // RubyTester.
1130  if (m_usingRubyTester) {
1131  RubyPort::SenderState *requestSenderState =
1133  RubyTester::SenderState* testerSenderState =
1134  safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
1135  testerSenderState->subBlock.mergeFrom(data);
1136  }
1137 
1138  mylist.push_back(pkt);
1139  }
1140  delete srequest;
1141  reqCoalescer.erase(request_line_address);
1142  assert(!reqCoalescer.count(request_line_address));
1143 
1144  completeHitCallback(mylist, len);
1145 }
1146 
1147 void
1149 {
1150  if (myMachID == senderMachID) {
1151  CP_TCPLdHits++;
1152  } else if (machineIDToMachineType(senderMachID) == MachineType_TCP) {
1154  } else if (machineIDToMachineType(senderMachID) == MachineType_TCC) {
1155  CP_TCCLdHits++;
1156  } else {
1157  CP_LdMiss++;
1158  }
1159 }
1160 
1161 void
1163 {
1164  if (myMachID == senderMachID) {
1165  CP_TCPStHits++;
1166  } else if (machineIDToMachineType(senderMachID) == MachineType_TCP) {
1168  } else if (machineIDToMachineType(senderMachID) == MachineType_TCC) {
1169  CP_TCCStHits++;
1170  } else {
1171  CP_StMiss++;
1172  }
1173 }
1174 
1175 void
1177 {
1178  for (int i = 0; i < len; ++i) {
1180  safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
1181  MemSlavePort *port = ss->port;
1182  assert(port != NULL);
1183 
1184  mylist[i]->senderState = ss->predecessor;
1185  delete ss;
1186  port->hitCallback(mylist[i]);
1187  trySendRetries();
1188  }
1189 
1191 }
1192 
1193 PacketPtr
1195 {
1196  RequestTable::iterator i = m_readRequestTable.find(address);
1197  assert(i != m_readRequestTable.end());
1198  GPUCoalescerRequest* request = i->second;
1199  return request->pkt;
1200 }
1201 
1202 void
1204  MachineType mach,
1205  Cycles initialRequestTime,
1206  Cycles forwardRequestTime,
1207  Cycles firstResponseTime,
1208  bool success, bool isRegion)
1209 {
1210  RubyRequestType type = srequest->m_type;
1211  Cycles issued_time = srequest->issue_time;
1212  Cycles completion_time = curCycle();
1213  assert(completion_time >= issued_time);
1214  Cycles total_lat = completion_time - issued_time;
1215 
1216  // cache stats (valid for RfO protocol only)
1217  if (mach == MachineType_TCP) {
1218  if (type == RubyRequestType_LD) {
1219  GPU_TCPLdHits++;
1220  } else {
1221  GPU_TCPStHits++;
1222  }
1223  } else if (mach == MachineType_L1Cache_wCC) {
1224  if (type == RubyRequestType_LD) {
1226  } else {
1228  }
1229  } else if (mach == MachineType_TCC) {
1230  if (type == RubyRequestType_LD) {
1231  GPU_TCCLdHits++;
1232  } else {
1233  GPU_TCCStHits++;
1234  }
1235  } else {
1236  if (type == RubyRequestType_LD) {
1237  GPU_LdMiss++;
1238  } else {
1239  GPU_StMiss++;
1240  }
1241  }
1242 
1243  // Profile all access latency, even zero latency accesses
1244  m_latencyHist.sample(total_lat);
1245  m_typeLatencyHist[type]->sample(total_lat);
1246 
1247  // Profile the miss latency for all non-zero demand misses
1248  if (total_lat != Cycles(0)) {
1249  m_missLatencyHist.sample(total_lat);
1250  m_missTypeLatencyHist[type]->sample(total_lat);
1251 
1252  if (mach != MachineType_NUM) {
1253  m_missMachLatencyHist[mach]->sample(total_lat);
1254  m_missTypeMachLatencyHist[type][mach]->sample(total_lat);
1255 
1256  if ((issued_time <= initialRequestTime) &&
1257  (initialRequestTime <= forwardRequestTime) &&
1258  (forwardRequestTime <= firstResponseTime) &&
1259  (firstResponseTime <= completion_time)) {
1260 
1261  m_IssueToInitialDelayHist[mach]->sample(
1262  initialRequestTime - issued_time);
1263  m_InitialToForwardDelayHist[mach]->sample(
1264  forwardRequestTime - initialRequestTime);
1265  m_ForwardToFirstResponseDelayHist[mach]->sample(
1266  firstResponseTime - forwardRequestTime);
1268  completion_time - firstResponseTime);
1269  }
1270  }
1271 
1272  }
1273 
1274  DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1275  curTick(), m_version, "Coal",
1276  success ? "Done" : "SC_Failed", "", "",
1277  printAddress(srequest->pkt->getAddr()), total_lat);
1278 }
1279 
1280 void
1282 {
1284 
1285  // These statistical variables are not for display.
1286  // The profiler will collate these across different
1287  // coalescers and display those collated statistics.
1288  m_outstandReqHist.init(10);
1289  m_latencyHist.init(10);
1290  m_missLatencyHist.init(10);
1291 
1292  for (int i = 0; i < RubyRequestType_NUM; i++) {
1293  m_typeLatencyHist.push_back(new Stats::Histogram());
1294  m_typeLatencyHist[i]->init(10);
1295 
1296  m_missTypeLatencyHist.push_back(new Stats::Histogram());
1297  m_missTypeLatencyHist[i]->init(10);
1298  }
1299 
1300  for (int i = 0; i < MachineType_NUM; i++) {
1301  m_missMachLatencyHist.push_back(new Stats::Histogram());
1302  m_missMachLatencyHist[i]->init(10);
1303 
1305  m_IssueToInitialDelayHist[i]->init(10);
1306 
1308  m_InitialToForwardDelayHist[i]->init(10);
1309 
1312 
1315  }
1316 
1317  for (int i = 0; i < RubyRequestType_NUM; i++) {
1319 
1320  for (int j = 0; j < MachineType_NUM; j++) {
1321  m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
1322  m_missTypeMachLatencyHist[i][j]->init(10);
1323  }
1324  }
1325 
1326  // GPU cache stats
1328  .name(name() + ".gpu_tcp_ld_hits")
1329  .desc("loads that hit in the TCP")
1330  ;
1332  .name(name() + ".gpu_tcp_ld_transfers")
1333  .desc("TCP to TCP load transfers")
1334  ;
1336  .name(name() + ".gpu_tcc_ld_hits")
1337  .desc("loads that hit in the TCC")
1338  ;
1339  GPU_LdMiss
1340  .name(name() + ".gpu_ld_misses")
1341  .desc("loads that miss in the GPU")
1342  ;
1343 
1345  .name(name() + ".gpu_tcp_st_hits")
1346  .desc("stores that hit in the TCP")
1347  ;
1349  .name(name() + ".gpu_tcp_st_transfers")
1350  .desc("TCP to TCP store transfers")
1351  ;
1353  .name(name() + ".gpu_tcc_st_hits")
1354  .desc("stores that hit in the TCC")
1355  ;
1356  GPU_StMiss
1357  .name(name() + ".gpu_st_misses")
1358  .desc("stores that miss in the GPU")
1359  ;
1360 
1361  // CP cache stats
1362  CP_TCPLdHits
1363  .name(name() + ".cp_tcp_ld_hits")
1364  .desc("loads that hit in the TCP")
1365  ;
1367  .name(name() + ".cp_tcp_ld_transfers")
1368  .desc("TCP to TCP load transfers")
1369  ;
1370  CP_TCCLdHits
1371  .name(name() + ".cp_tcc_ld_hits")
1372  .desc("loads that hit in the TCC")
1373  ;
1374  CP_LdMiss
1375  .name(name() + ".cp_ld_misses")
1376  .desc("loads that miss in the GPU")
1377  ;
1378 
1379  CP_TCPStHits
1380  .name(name() + ".cp_tcp_st_hits")
1381  .desc("stores that hit in the TCP")
1382  ;
1384  .name(name() + ".cp_tcp_st_transfers")
1385  .desc("TCP to TCP store transfers")
1386  ;
1387  CP_TCCStHits
1388  .name(name() + ".cp_tcc_st_hits")
1389  .desc("stores that hit in the TCC")
1390  ;
1391  CP_StMiss
1392  .name(name() + ".cp_st_misses")
1393  .desc("stores that miss in the GPU")
1394  ;
1395 }
#define DPRINTF(x,...)
Definition: trace.hh:212
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
bool isLLSC() const
Definition: packet.hh:527
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Stats::Scalar CP_TCCStHits
Stats::Scalar GPU_TCPStHits
const int FlagShift
Definition: ldstflags.hh:52
int m_deadlock_threshold
Cycles is a wrapper class for representing cycle counts, i.e.
Definition: types.hh:83
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
Bitfield< 7 > i
Definition: miscregs.hh:1378
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
ContextID contextId() const
Accessor function for context ID.
Definition: request.hh:694
STL pair class.
Definition: stl.hh:61
#define panic(...)
Definition: misc.hh:153
RubyRequestType m_type
Definition: GPUCoalescer.hh:67
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
void setExtraData(uint64_t extraData)
Accessor function for store conditional return value.
Definition: request.hh:680
bool isLockedRMW() const
Definition: request.hh:773
RequestTable m_readRequestTable
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
Definition: RubyPort.hh:190
Stats::Scalar CP_StMiss
ip6_addr_t addr
Definition: inet.hh:335
bool isPrivateSegment() const
Definition: request.hh:844
bool isWrite() const
Definition: packet.hh:503
Stats::Scalar GPU_TCCStHits
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:381
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
bool isAcquire() const
Definition: request.hh:779
void kernelCallback(int wavfront_id)
void trySendRetries()
Definition: RubyPort.cc:385
Stats::Scalar CP_LdMiss
Stats::Scalar GPU_TCPLdTransfers
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
bool hasPC() const
Definition: request.hh:708
void clearLocked(Addr addr)
Definition: CacheMemory.cc:467
Histogram & init(size_type size)
Set the parameters of this histogram.
Definition: statistics.hh:2560
bool isLocked(Addr addr, int context)
Definition: CacheMemory.cc:478
Addr getPC() const
Accessor function for pc.
Definition: request.hh:715
Stats::Scalar CP_TCPLdTransfers
virtual RequestStatus makeRequest(PacketPtr pkt)
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
bool isDeviceScope() const
Definition: request.hh:815
void completeIssue()
T * getPtr()
get a pointer to the data ptr.
Definition: packet.hh:959
IssueEvent(GPUCoalescer *_seq)
RubySystem * m_ruby_system
Definition: RubyPort.hh:188
bool isSpillSegment() const
Definition: request.hh:862
bool isRelease() const
Definition: request.hh:780
STL vector class.
Definition: stl.hh:40
Stats::Scalar CP_TCPStTransfers
const char data[]
Definition: circlebuf.cc:43
bool hasContextId() const
Definition: request.hh:687
bool empty() const
bool isBlocked(Addr) const
Bitfield< 5, 0 > status
Definition: miscregs.hh:1604
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
const char * description() const
Return a C string describing the event.
bool areNSlotsAvailable(unsigned int n, Tick curTime)
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
HSASegment reqSegmentToHSASegment(Request *req)
Definition: GPUCoalescer.cc:93
Tick curTick()
The current simulated tick.
Definition: core.hh:47
void markRemoved()
bool isWorkgroupScope() const
Definition: request.hh:808
Stats::Scalar GPU_LdMiss
void ruby_eviction_callback(Addr address)
Definition: RubyPort.cc:534
CacheMemory * m_dataCache_ptr
void setMRU(Addr address)
Definition: CacheMemory.cc:344
SenderState * predecessor
Definition: packet.hh:379
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
RubyRequestType primaryType
Definition: GPUCoalescer.hh:90
void mergeFrom(const DataBlock &data)
Definition: SubBlock.hh:60
bool isKernargSegment() const
Definition: request.hh:850
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
uint32_t m_version
Definition: RubyPort.hh:189
Stats::Scalar GPU_TCCLdHits
int m_store_waiting_on_load_cycles
bool isArgSegment() const
Definition: request.hh:868
Addr getPaddr() const
Definition: request.hh:519
#define fatal(...)
Definition: misc.hh:163
const RequestPtr req
A pointer to the original request.
Definition: packet.hh:304
A simple histogram stat.
Definition: statistics.hh:2551
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
GPUCoalescerWakeupEvent deadlockCheckEvent
bool m_usingRubyTester
Definition: RubyPort.hh:192
Addr getOffset(Addr addr)
Definition: Address.cc:106
MemObjectParams Params
Definition: mem_object.hh:63
Bitfield< 21 > ss
Definition: miscregs.hh:1371
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
bool isInstFetch() const
Definition: request.hh:769
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
bool isWavefrontScope() const
Definition: request.hh:801
bool isRead() const
Definition: packet.hh:502
void ruby_hit_callback(PacketPtr pkt)
Definition: RubyPort.cc:362
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
int m_load_waiting_on_load_cycles
T safe_cast(U ptr)
Definition: cast.hh:61
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Addr makeLineAddress(Addr addr)
Definition: Address.cc:112
std::unordered_map< int, PacketPtr > kernelEndList
std::string printAddress(Addr addr)
Definition: Address.cc:126
void reset()
Reset stat value to default.
Definition: statistics.hh:1893
Stats::Scalar GPU_StMiss
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
bool isAtomicOp() const
Definition: packet.hh:671
void blockOnQueue(Addr, MessageBuffer *)
Bitfield< 24 > j
Definition: miscregs.hh:1369
static const int NumArgumentRegs M5_VAR_USED
Definition: process.cc:83
void regStats()
Register statistics for this object.
Flags getFlags()
Accessor for flags.
Definition: request.hh:584
std::vector< Stats::Histogram * > m_typeLatencyHist
MessageBuffer * m_mandatory_q_ptr
Definition: RubyPort.hh:191
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:254
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
const uint8_t * getData(int offset, int len) const
Definition: DataBlock.cc:95
Tick clockPeriod() const
type
Definition: misc.hh:728
bool isReadonlySegment() const
Definition: request.hh:856
void testDrainComplete()
Definition: RubyPort.cc:411
bool isKernel() const
Definition: request.hh:781
virtual const std::string name() const
Definition: sim_object.hh:117
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void clear()
Definition: DataBlock.cc:50
void removeRequest(GPUCoalescerRequest *request)
Declaration of the Packet class.
RubyRequestType secondaryType
Definition: GPUCoalescer.hh:91
void print(std::ostream &out) const
SenderState * senderState
This packet's sender state.
Definition: packet.hh:454
Definition: eventq.hh:185
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
bool isGroupSegment() const
Definition: request.hh:838
bool isScoped() const
Accessor functions for the memory space configuration flags and used by GPU ISAs such as the Heteroge...
Definition: request.hh:798
void setLocked(Addr addr, int context)
Definition: CacheMemory.cc:456
RequestTable m_writeRequestTable
Bitfield< 18, 16 > len
Definition: miscregs.hh:1626
IssueEvent issueEvent
void schedule(Event &event, Tick when)
Definition: eventq.hh:728
CacheMemory * m_instCache_ptr
void resetStats()
Reset statistics associated with this object.
PacketPtr pkt
Definition: GPUCoalescer.hh:89
int m_outstanding_count
void setData(const uint8_t *data, int offset, int len)
Definition: DataBlock.cc:108
Cycles m_data_cache_hit_latency
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
IntReg pc
Definition: remote_gdb.hh:91
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:287
HSAScope reqScopeToHSAScope(Request *req)
Definition: GPUCoalescer.cc:73
void printProgress(std::ostream &out) const
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
Definition: packet.hh:670
unsigned getSize() const
Definition: packet.hh:649
fatal_if(p->js_features.size() > 16,"Too many job slot feature registers specified (%i)\n", p->js_features.size())
MachineType machineIDToMachineType(MachineID machID)
bool m_runningGarnetStandalone
Bitfield< 0 > p
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
static uint32_t getBlockSizeBytes()
Definition: RubySystem.hh:74
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void regStats() override
Register statistics for this object.
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
bool isGlobalSegment() const
Definition: request.hh:829
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
Definition: statistics.hh:1869
bool isSystemScope() const
Definition: request.hh:822
Addr getAddr() const
Definition: packet.hh:639
bool isTagPresent(Addr address) const
Definition: CacheMemory.cc:213
bool isFlush() const
Definition: packet.hh:530
MemSlavePort * port
Definition: RubyPort.hh:141
#define DPRINTFR(...)
Definition: trace.hh:214
Stats::Scalar CP_TCCLdHits

Generated on Fri Jun 9 2017 13:03:50 for gem5 by doxygen 1.8.6