gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
compute_unit.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: John Kalamatianos, Anthony Gutierrez
34  */
35 
36 #ifndef __COMPUTE_UNIT_HH__
37 #define __COMPUTE_UNIT_HH__
38 
39 #include <deque>
40 #include <map>
41 #include <unordered_map>
42 #include <vector>
43 
44 #include "base/callback.hh"
45 #include "base/statistics.hh"
46 #include "base/types.hh"
47 #include "enums/PrefetchType.hh"
52 #include "gpu-compute/qstruct.hh"
55 #include "mem/mem_object.hh"
56 #include "mem/port.hh"
57 
58 static const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;
59 static const int MAX_WIDTH_FOR_MEM_INST = 32;
60 
61 class NDRange;
62 class Shader;
63 class VectorRegisterFile;
64 
65 struct ComputeUnitParams;
66 
68 {
69  OLDEST = 0,
71 };
72 
73 // List of execution units
75 {
76  SIMD0 = 0,
83 };
84 
86 {
91 };
92 
93 class ComputeUnit : public MemObject
94 {
95  public:
102 
103  // Buffers used to communicate between various pipeline stages
104 
105  // List of waves which are ready to be scheduled.
106  // Each execution resource has a ready list. readyList is
107  // used to communicate between scoreboardCheck stage and
108  // schedule stage
109  // TODO: make enum to index readyList
111 
112  // Stores the status of waves. A READY implies the
113  // wave is ready to be scheduled this cycle and
114  // is already present in the readyList. waveStatusList is
115  // used to communicate between scoreboardCheck stage and
116  // schedule stage
117  // TODO: convert std::pair to a class to increase readability
119 
120  // List of waves which will be dispatched to
121  // each execution resource. A FILLED implies
122  // dispatch list is non-empty and
123  // execution unit has something to execute
124  // this cycle. Currently, the dispatch list of
125  // an execution resource can hold only one wave because
126  // an execution resource can execute only one wave in a cycle.
127  // dispatchList is used to communicate between schedule
128  // and exec stage
129  // TODO: convert std::pair to a class to increase readability
131 
132  int rrNextMemID; // used by RR WF exec policy to cycle through WF's
134  typedef ComputeUnitParams Params;
136  int cu_id;
137 
138  // array of vector register files, one per SIMD
140  // Number of vector ALU units (SIMDs) in CU
141  int numSIMDs;
142  // number of pipe stages for bypassing data to next dependent single
143  // precision vector instruction inside the vector ALU pipeline
145  // number of pipe stages for bypassing data to next dependent double
146  // precision vector instruction inside the vector ALU pipeline
148  // number of cycles per issue period
150 
151  // Number of global and local memory execution resources in CU
154  // tracks the last cycle a vector instruction was executed on a SIMD
156 
157  // true if we allow a separate TLB per lane
159  // if 0, TLB prefetching is off.
161  // if fixed-stride prefetching, this is the stride.
163 
167  Enums::PrefetchType prefetchType;
169 
174 
175  /*
176  * for Counting page accesses
177  *
178  * cuExitCallback inherits from Callback. When you register a callback
179  * function as an exit callback, it will get added to an exit callback
180  * queue, such that on simulation exit, all callbacks in the callback
181  * queue will have their process() function called.
182  */
184 
186  uint32_t barrier_id;
187  // vector of Vector ALU (MACC) pipelines
189  // minimum issue period per SIMD unit (in cycles)
191 
192  // Resource control for Vector Register File->Global Memory pipe buses
194  // Resource control for Vector Register File->Local Memory pipe buses
198  // Resource control for global memory to VRF data/address bus
200  // Resource control for local memory to VRF data/address bus
202 
203  uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes
204  uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes
205  uint32_t numCyclesPerStoreTransfer; // number of cycles per vector store
206  uint32_t numCyclesPerLoadTransfer; // number of cycles per vector load
207 
210 
211  // number of vector registers being reserved for each SIMD unit
213  // number of vector registers per SIMD unit
215  // Support for scheduling VGPR status update events
219 
220  void
221  registerEvent(uint32_t simdId,
222  uint32_t regIdx,
223  uint32_t operandSize,
224  uint64_t when,
225  uint8_t newStatus) {
226  regIdxVec.push_back(std::make_pair(simdId, regIdx));
227  timestampVec.push_back(when);
228  statusVec.push_back(newStatus);
229  if (operandSize > 4) {
230  regIdxVec.push_back(std::make_pair(simdId,
231  ((regIdx + 1) %
233  timestampVec.push_back(when);
234  statusVec.push_back(newStatus);
235  }
236  }
237 
238  void updateEvents();
239 
240  // this hash map will keep track of page divergence
241  // per memory instruction per wavefront. The hash map
242  // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.
243  std::map<Addr, int> pagesTouched;
244 
245  ComputeUnit(const Params *p);
246  ~ComputeUnit();
251  int wfSize() const { return wavefrontSize; };
252 
253  void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);
254  void exec();
255  void initiateFetch(Wavefront *wavefront);
256  void fetch(PacketPtr pkt, Wavefront *wavefront);
257  void fillKernelState(Wavefront *w, NDRange *ndr);
258 
259  void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
260  NDRange *ndr);
261 
262  void StartWorkgroup(NDRange *ndr);
263  int ReadyWorkgroup(NDRange *ndr);
264 
265  bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }
266  bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }
267  bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }
268  int GlbMemUnitId() { return GLBMEM_PIPE; }
269  int ShrMemUnitId() { return LDSMEM_PIPE; }
270  int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }
271  int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }
272  /* This function cycles through all the wavefronts in all the phases to see
273  * if all of the wavefronts which should be associated with one barrier
274  * (denoted with _barrier_id), are all at the same barrier in the program
275  * (denoted by bcnt). When the number at the barrier matches bslots, then
276  * return true.
277  */
278  int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
279  bool cedeSIMD(int simdId, int wfSlotId);
280 
281  template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);
282  virtual void init();
283  void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
284  void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);
285  void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
286  bool kernelLaunch=true,
287  RequestPtr req=nullptr);
288  void handleMemPacket(PacketPtr pkt, int memport_index);
289  bool processTimingPacket(PacketPtr pkt);
290  void processFetchReturn(PacketPtr pkt);
292 
294 
295  bool isDone() const;
296  bool isSimdDone(uint32_t) const;
297 
298  protected:
300 
302 
303  public:
326 
327  void updateInstStats(GPUDynInstPtr gpuDynInst);
328 
329  // the following stats compute the avg. TLB accesslatency per
330  // uncoalesced request (only for data)
334  // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.
336 
339 
340  // over all memory instructions executed over all wavefronts
341  // how many touched 0-4 pages, 4-8, ..., 60-64 pages
345 
347  // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active
348  // when the instruction is committed, this number is still incremented by 1
350  // Number of cycles among successive instruction executions across all
351  // wavefronts of the same CU
353  // number of individual vector operations executed
355  // Total cycles that something is running on the GPU
357  Stats::Formula vpc; // vector ops per cycle
358  Stats::Formula ipc; // vector instructions per cycle
362  // number of vector ALU instructions received
364  // number of times a WG can not start due to lack of free VGPRs in SIMDs
369  // flag per vector SIMD unit that is set when there is at least one
370  // WV that has a vector ALU instruction as the oldest in its
371  // Instruction Buffer: Defined in the Scoreboard stage, consumed
372  // by the Execute stage.
374  // number of available (oldest) LDS instructions that could have
375  // been issued to the LDS at a specific issue slot
377  // number of available Global memory instructions that could have
378  // been issued to TCP at a specific issue slot
380 
381  void
382  regStats();
383 
384  LdsState &
385  getLds() const
386  {
387  return lds;
388  }
389 
390  int32_t
391  getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;
392 
393  int cacheLineSize() const { return _cacheLineSize; }
394 
395  bool
396  sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));
397 
398  typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;
400 
401  class CUExitCallback : public Callback
402  {
403  private:
405 
406  public:
407  virtual ~CUExitCallback() { }
408 
410  {
411  computeUnit = _cu;
412  }
413 
414  virtual void
415  process();
416  };
417 
419 
421  class DataPort : public MasterPort
422  {
423  public:
424  DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
425  : MasterPort(_name, _cu), computeUnit(_cu),
426  index(_index) { }
427 
429 
431  {
435 
436  SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,
437  Packet::SenderState *sender_state=nullptr)
438  : _gpuDynInst(gpuDynInst),
439  port_index(_port_index),
440  saved(sender_state) { }
441  };
442 
443  class MemReqEvent : public Event
444  {
445  private:
448 
449  public:
450  MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
451  : Event(), dataPort(_data_port), pkt(_pkt)
452  {
454  }
455 
456  void process();
457  const char *description() const;
458  };
459 
460  class MemRespEvent : public Event
461  {
462  private:
465 
466  public:
467  MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
468  : Event(), dataPort(_data_port), pkt(_pkt)
469  {
471  }
472 
473  void process();
474  const char *description() const;
475  };
476 
478 
479  protected:
481  int index;
482 
483  virtual bool recvTimingResp(PacketPtr pkt);
484  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
485  virtual void recvFunctional(PacketPtr pkt) { }
486  virtual void recvRangeChange() { }
487  virtual void recvReqRetry();
488 
489  virtual void
491  {
492  resp.clear();
493  snoop = true;
494  }
495 
496  };
497 
498  // Instruction cache access port
499  class SQCPort : public MasterPort
500  {
501  public:
502  SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
503  : MasterPort(_name, _cu), computeUnit(_cu),
504  index(_index) { }
505 
507 
509  {
512 
514  *sender_state=nullptr)
515  : wavefront(_wavefront), saved(sender_state) { }
516  };
517 
519 
520  protected:
522  int index;
523 
524  virtual bool recvTimingResp(PacketPtr pkt);
525  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
526  virtual void recvFunctional(PacketPtr pkt) { }
527  virtual void recvRangeChange() { }
528  virtual void recvReqRetry();
529 
530  virtual void
532  {
533  resp.clear();
534  snoop = true;
535  }
536  };
537 
539  class DTLBPort : public MasterPort
540  {
541  public:
542  DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
543  : MasterPort(_name, _cu), computeUnit(_cu),
544  index(_index), stalled(false)
545  { }
546 
547  bool isStalled() { return stalled; }
548  void stallPort() { stalled = true; }
549  void unstallPort() { stalled = false; }
550 
556 
561  {
562  // the memInst that this is associated with
564 
565  // the lane in the memInst this is associated with, so we send
566  // the memory request down the right port
568 
569  // constructor used for packets involved in timing accesses
570  SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
571  : _gpuDynInst(gpuDynInst), portIndex(port_index) { }
572 
573  };
574 
575  protected:
577  int index;
578  bool stalled;
579 
580  virtual bool recvTimingResp(PacketPtr pkt);
581  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
582  virtual void recvFunctional(PacketPtr pkt) { }
583  virtual void recvRangeChange() { }
584  virtual void recvReqRetry();
585  };
586 
587  class ITLBPort : public MasterPort
588  {
589  public:
590  ITLBPort(const std::string &_name, ComputeUnit *_cu)
591  : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }
592 
593 
594  bool isStalled() { return stalled; }
595  void stallPort() { stalled = true; }
596  void unstallPort() { stalled = false; }
597 
603 
608  {
609  // The wavefront associated with this request
611 
612  SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }
613  };
614 
615  protected:
617  bool stalled;
618 
619  virtual bool recvTimingResp(PacketPtr pkt);
620  virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
621  virtual void recvFunctional(PacketPtr pkt) { }
622  virtual void recvRangeChange() { }
623  virtual void recvReqRetry();
624  };
625 
629  class LDSPort : public MasterPort
630  {
631  public:
632  LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
633  : MasterPort(_name, _cu, _id), computeUnit(_cu)
634  {
635  }
636 
637  bool isStalled() const { return stalled; }
638  void stallPort() { stalled = true; }
639  void unstallPort() { stalled = false; }
640 
645  std::queue<PacketPtr> retries;
646 
652  {
653  protected:
654  // The actual read/write/atomic request that goes with this command
656 
657  public:
659  _gpuDynInst(gpuDynInst)
660  {
661  }
662 
664  getMemInst() const
665  {
666  return _gpuDynInst;
667  }
668  };
669 
670  virtual bool
672 
673  protected:
674 
675  bool stalled = false;
676 
678 
679  virtual bool
681 
682  virtual Tick
683  recvAtomic(PacketPtr pkt) { return 0; }
684 
685  virtual void
687  {
688  }
689 
690  virtual void
692  {
693  }
694 
695  virtual void
696  recvReqRetry();
697  };
698 
702  LDSPort *ldsPort = nullptr;
703 
704  LDSPort *
705  getLdsPort() const
706  {
707  return ldsPort;
708  }
709 
714  // port to the TLB hierarchy (i.e., the L1 TLB)
716  // port to the SQC (i.e. the I-cache)
718  // port to the SQC TLB (there's a separate TLB for each I-cache)
720 
721  virtual BaseMasterPort&
722  getMasterPort(const std::string &if_name, PortID idx)
723  {
724  if (if_name == "memory_port") {
725  memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),
726  this, idx);
727  return *memPort[idx];
728  } else if (if_name == "translation_port") {
729  tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),
730  this, idx);
731  return *tlbPort[idx];
732  } else if (if_name == "sqc_port") {
733  sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),
734  this, idx);
735  return *sqcPort;
736  } else if (if_name == "sqc_tlb_port") {
737  sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);
738  return *sqcTLBPort;
739  } else if (if_name == "ldsPort") {
740  if (ldsPort) {
741  fatal("an LDS port was already allocated");
742  }
743  ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);
744  return *ldsPort;
745  } else {
746  panic("incorrect port name");
747  }
748  }
749 
750  // xact_cas_load()
752  {
753  public:
755  waveIdentifier(int _simdId, int _wfSlotId)
756  : simdId(_simdId), wfSlotId(_wfSlotId) { }
757 
758  int simdId;
759  int wfSlotId;
760  };
761 
762  class waveQueue
763  {
764  public:
766  };
767  std::map<unsigned, waveQueue> xactCasLoadMap;
768 
769  uint64_t getAndIncSeqNum() { return globalSeqNum++; }
770 
771  private:
772  const int _cacheLineSize;
773  uint64_t globalSeqNum;
776 };
777 
778 #endif // __COMPUTE_UNIT_HH__
uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:167
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
const char * description() const
Return a C string describing the event.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Formula vpc
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Bitfield< 30, 0 > index
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
void handleMemPacket(PacketPtr pkt, int memport_index)
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Generic callback class.
Definition: callback.hh:41
uint32_t numCyclesPerLoadTransfer
Stats::Formula ipc
bool isDone() const
Data TLB port.
WaitClass glbMemToVrfBus
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:58
std::map< unsigned, waveQueue > xactCasLoadMap
bool debugSegFault
LdsState & lds
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:59
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
#define panic(...)
Definition: misc.hh:153
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Definition: compute_unit.hh:98
Stats::Formula flatLDSInstsPerWF
int storeBusLength()
const char * __attribute__((weak)) m5MainCommands[]
int dpBypassLength()
Stats::Distribution controlFlowDivergenceDist
ITLBPort * sqcTLBPort
std::vector< std::vector< Wavefront * > > readyList
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
ip6_addr_t addr
Definition: inet.hh:335
int dpBypassPipeLength
MemObject declaration.
uint64_t globalSeqNum
void regStats()
Register statistics for this object.
int cacheLineSize() const
SenderState(Wavefront *_wavefront)
int wfSize() const
Port Object Declaration.
int spBypassPipeLength
CUExitCallback * cuExitCallback
Definition: shader.hh:76
A vector of scalar stats.
Definition: statistics.hh:2499
const char * description() const
Return a C string describing the event.
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:58
void updateEvents()
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
GPUDynInstPtr getMemInst() const
int spBypassLength()
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2475
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Scalar vALUInsts
STL vector class.
Definition: stl.hh:40
Stats::Distribution ldsBankConflictDist
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
SenderState is information carried along with the packet throughout the TLB hierarchy.
static const FlagsType AutoDelete
Definition: eventq.hh:103
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
SQCPort * sqcPort
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
Definition: compute_unit.cc:59
bool localMemBarrier
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
bool functionalTLB
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
int ShrMemUnitId()
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Data access Port.
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
ExecStage execStage
Definition: compute_unit.hh:99
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
uint64_t Tick
Tick count type.
Definition: types.hh:63
Stats::Scalar tlbRequests
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
EXEC_UNIT
Definition: compute_unit.hh:74
A simple distribution stat.
Definition: statistics.hh:2523
ComputeUnit * computeUnit
std::vector< WaitClass > vrfToGlobalMemPipeBus
void updateInstStats(GPUDynInstPtr gpuDynInst)
MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
Stats::Scalar flatVMemInsts
Stats::Scalar numCASOps
ComputeUnit * computeUnit
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
#define fatal(...)
Definition: misc.hh:163
MasterID _masterId
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
void setFlags(Flags _flags)
Accessor for flags.
Definition: eventq.hh:264
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Bitfield< 0 > w
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
uint16_t MasterID
Definition: request.hh:85
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
Stats::Scalar instCyclesVALU
Tick resp_tick_latency
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a MemObject that sees the packet.
Definition: packet.hh:377
Stats::Scalar completedWfs
bool xact_cas_mode
STL deque class.
Definition: stl.hh:47
A formula for statistics that is calculated when printed.
Definition: statistics.hh:2895
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
Shader * shader
virtual BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx)
Get a master port with a given name and index.
Stats::Distribution activeLanesPerGMemInstrDist
virtual const std::string name() const
Definition: sim_object.hh:117
void doSmReturn(GPUDynInstPtr gpuDynInst)
LdsState & getLds() const
ComputeUnit * computeUnit
bool isStalled() const
Stats::Scalar tlbCycles
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
Definition: eventq.hh:185
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
Definition: mem_object.hh:60
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
Definition: port.hh:115
int loadBusLength()
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
EXEC_POLICY
Definition: compute_unit.hh:67
int nextLocRdBus()
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
WaitClass locMemToVrfBus
void fetch(PacketPtr pkt, Wavefront *wavefront)
TLB_CACHE
Definition: compute_unit.hh:85
FetchStage fetchStage
Definition: compute_unit.hh:96
Stats::Formula flatVMemInstsPerWF
bool isSimdDone(uint32_t) const
std::vector< uint8_t > statusVec
uint32_t barrier_id
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:181
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
bool stalled
whether or not it is stalled
void processFetchReturn(PacketPtr pkt)
int GlbMemUnitId()
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar sALUInsts
Stats::Scalar ldsBankAccesses
Tick req_tick_latency
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
Bitfield< 0 > p
std::vector< Addr > lastVaddrCU
int nextGlbRdBus()
ComputeUnitParams Params
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
MasterID masterId()
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
EXEC_POLICY exec_policy
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:97
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
const int _cacheLineSize

Generated on Fri Jun 9 2017 13:03:47 for gem5 by doxygen 1.8.6