~swilson/gem5-docs/compute__unit_8hh_source.html

 /*

  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.

  * All rights reserved.

  *

  * For use for simulation and test purposes only

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *

  * 1. Redistributions of source code must retain the above copyright notice,

  * this list of conditions and the following disclaimer.

  *

  * 2. Redistributions in binary form must reproduce the above copyright notice,

  * this list of conditions and the following disclaimer in the documentation

  * and/or other materials provided with the distribution.

  *

  * 3. Neither the name of the copyright holder nor the names of its contributors

  * may be used to endorse or promote products derived from this software

  * without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

  * POSSIBILITY OF SUCH DAMAGE.

  *

  * Author: John Kalamatianos, Anthony Gutierrez

  */


 #ifndef __COMPUTE_UNIT_HH__

 #define __COMPUTE_UNIT_HH__


 #include <deque>

 #include <map>

 #include <unordered_map>

 #include <vector>


 #include "base/callback.hh"

 #include "base/statistics.hh"

 #include "base/types.hh"

 #include "enums/PrefetchType.hh"

 #include "gpu-compute/exec_stage.hh"

 #include "gpu-compute/fetch_stage.hh"

 #include "gpu-compute/global_memory_pipeline.hh"

 #include "gpu-compute/local_memory_pipeline.hh"

 #include "gpu-compute/qstruct.hh"

 #include "gpu-compute/schedule_stage.hh"

 #include "gpu-compute/scoreboard_check_stage.hh"

 #include "mem/mem_object.hh"

 #include "mem/port.hh"


 static const int MAX_REGS_FOR_NON_VEC_MEM_INST = 1;

 static const int MAX_WIDTH_FOR_MEM_INST = 32;


 class NDRange;

 class Shader;

 class VectorRegisterFile;


 struct ComputeUnitParams;


 enum EXEC_POLICY

 {

     OLDEST = 0,

     RR

 };


 // List of execution units

 enum EXEC_UNIT

 {

     SIMD0 = 0,

     SIMD1,

     SIMD2,

     SIMD3,

     GLBMEM_PIPE,

     LDSMEM_PIPE,

     NUM_UNITS

 };


 enum TLB_CACHE

 {

     TLB_MISS_CACHE_MISS = 0,

     TLB_MISS_CACHE_HIT,

     TLB_HIT_CACHE_MISS,

     TLB_HIT_CACHE_HIT

 };


 class ComputeUnit : public MemObject

 {

   public:

     FetchStage fetchStage;

     ScoreboardCheckStage scoreboardCheckStage;

     ScheduleStage scheduleStage;

     ExecStage execStage;

     GlobalMemPipeline globalMemoryPipe;

     LocalMemPipeline localMemoryPipe;


     // Buffers used to communicate between various pipeline stages


     // List of waves which are ready to be scheduled.

     // Each execution resource has a ready list. readyList is

     // used to communicate between scoreboardCheck stage and

     // schedule stage

     // TODO: make enum to index readyList

     std::vector<std::vector<Wavefront*>> readyList;


     // Stores the status of waves. A READY implies the

     // wave is ready to be scheduled this cycle and

     // is already present in the readyList. waveStatusList is

     // used to communicate between scoreboardCheck stage and

     // schedule stage

     // TODO: convert std::pair to a class to increase readability

     std::vector<std::vector<std::pair<Wavefront*, WAVE_STATUS>>> waveStatusList;


     // List of waves which will be dispatched to

     // each execution resource. A FILLED implies

     // dispatch list is non-empty and

     // execution unit has something to execute

     // this cycle. Currently, the dispatch list of

     // an execution resource can hold only one wave because

     // an execution resource can execute only one wave in a cycle.

     // dispatchList is used to communicate between schedule

     // and exec stage

     // TODO: convert std::pair to a class to increase readability

     std::vector<std::pair<Wavefront*, DISPATCH_STATUS>> dispatchList;


     int rrNextMemID; // used by RR WF exec policy to cycle through WF's

     int rrNextALUWp;

     typedef ComputeUnitParams Params;

     std::vector<std::vector<Wavefront*>> wfList;

     int cu_id;


     // array of vector register files, one per SIMD

     std::vector<VectorRegisterFile*> vrf;

     // Number of vector ALU units (SIMDs) in CU

     int numSIMDs;

     // number of pipe stages for bypassing data to next dependent single

     // precision vector instruction inside the vector ALU pipeline

     int spBypassPipeLength;

     // number of pipe stages for bypassing data to next dependent double

     // precision vector instruction inside the vector ALU pipeline

     int dpBypassPipeLength;

     // number of cycles per issue period

     int issuePeriod;


     // Number of global and local memory execution resources in CU

     int numGlbMemUnits;

     int numLocMemUnits;

     // tracks the last cycle a vector instruction was executed on a SIMD

     std::vector<uint64_t> lastExecCycle;


     // true if we allow a separate TLB per lane

     bool perLaneTLB;

     // if 0, TLB prefetching is off.

     int prefetchDepth;

     // if fixed-stride prefetching, this is the stride.

     int prefetchStride;


     std::vector<Addr> lastVaddrCU;

     std::vector<std::vector<Addr>> lastVaddrSimd;

     std::vector<std::vector<std::vector<Addr>>> lastVaddrWF;

     Enums::PrefetchType prefetchType;

     EXEC_POLICY exec_policy;


     bool xact_cas_mode;

     bool debugSegFault;

     bool functionalTLB;

     bool localMemBarrier;


     /*

      * for Counting page accesses

      *

      * cuExitCallback inherits from Callback. When you register a callback

      * function as an exit callback, it will get added to an exit callback

      * queue, such that on simulation exit, all callbacks in the callback

      * queue will have their process() function called.

      */

     bool countPages;


     Shader *shader;

     uint32_t barrier_id;

     // vector of Vector ALU (MACC) pipelines

     std::vector<WaitClass> aluPipe;

     // minimum issue period per SIMD unit (in cycles)

     std::vector<WaitClass> wfWait;


     // Resource control for Vector Register File->Global Memory pipe buses

     std::vector<WaitClass> vrfToGlobalMemPipeBus;

     // Resource control for Vector Register File->Local Memory pipe buses

     std::vector<WaitClass> vrfToLocalMemPipeBus;

     int nextGlbMemBus;

     int nextLocMemBus;

     // Resource control for global memory to VRF data/address bus

     WaitClass glbMemToVrfBus;

     // Resource control for local memory to VRF data/address bus

     WaitClass locMemToVrfBus;


     uint32_t vrfToCoalescerBusWidth; // VRF->Coalescer data bus width in bytes

     uint32_t coalescerToVrfBusWidth; // Coalescer->VRF data bus width in bytes

     uint32_t numCyclesPerStoreTransfer;  // number of cycles per vector store

     uint32_t numCyclesPerLoadTransfer;  // number of cycles per vector load


     Tick req_tick_latency;

     Tick resp_tick_latency;


     // number of vector registers being reserved for each SIMD unit

     std::vector<int> vectorRegsReserved;

     // number of vector registers per SIMD unit

     uint32_t numVecRegsPerSimd;

     // Support for scheduling VGPR status update events

     std::vector<std::pair<uint32_t, uint32_t> > regIdxVec;

     std::vector<uint64_t> timestampVec;

     std::vector<uint8_t>  statusVec;


     void

     registerEvent(uint32_t simdId,

                   uint32_t regIdx,

                   uint32_t operandSize,

                   uint64_t when,

                   uint8_t newStatus) {

         regIdxVec.push_back(std::make_pair(simdId, regIdx));

         timestampVec.push_back(when);

         statusVec.push_back(newStatus);

         if (operandSize > 4) {

             regIdxVec.push_back(std::make_pair(simdId,

                                                ((regIdx + 1) %

                                                 numVecRegsPerSimd)));

             timestampVec.push_back(when);

             statusVec.push_back(newStatus);

         }

     }


     void updateEvents();


     // this hash map will keep track of page divergence

     // per memory instruction per wavefront. The hash map

     // is cleared in GPUDynInst::updateStats() in gpu_dyn_inst.cc.

     std::map<Addr, int> pagesTouched;


     ComputeUnit(const Params *p);

     ~ComputeUnit();

     int spBypassLength() { return spBypassPipeLength; };

     int dpBypassLength() { return dpBypassPipeLength; };

     int storeBusLength() { return numCyclesPerStoreTransfer; };

     int loadBusLength() { return numCyclesPerLoadTransfer; };

     int wfSize() const { return wavefrontSize; };


     void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs);

     void exec();

     void initiateFetch(Wavefront *wavefront);

     void fetch(PacketPtr pkt, Wavefront *wavefront);

     void fillKernelState(Wavefront *w, NDRange *ndr);


     void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,

                         NDRange *ndr);


     void StartWorkgroup(NDRange *ndr);

     int ReadyWorkgroup(NDRange *ndr);


     bool isVecAlu(int unitId) { return unitId >= SIMD0 && unitId <= SIMD3; }

     bool isGlbMem(int unitId) { return unitId == GLBMEM_PIPE; }

     bool isShrMem(int unitId) { return unitId == LDSMEM_PIPE; }

     int GlbMemUnitId() { return GLBMEM_PIPE; }

     int ShrMemUnitId() { return LDSMEM_PIPE; }

     int nextGlbRdBus() { return (++nextGlbMemBus) % numGlbMemUnits; }

     int nextLocRdBus() { return (++nextLocMemBus) % numLocMemUnits; }

     /* This function cycles through all the wavefronts in all the phases to see

      * if all of the wavefronts which should be associated with one barrier

      * (denoted with _barrier_id), are all at the same barrier in the program

      * (denoted by bcnt). When the number at the barrier matches bslots, then

      * return true.

      */

     int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);

     bool cedeSIMD(int simdId, int wfSlotId);


     template<typename c0, typename c1> void doSmReturn(GPUDynInstPtr gpuDynInst);

     virtual void init();

     void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);

     void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt);

     void injectGlobalMemFence(GPUDynInstPtr gpuDynInst,

                               bool kernelLaunch=true,

                               RequestPtr req=nullptr);

     void handleMemPacket(PacketPtr pkt, int memport_index);

     bool processTimingPacket(PacketPtr pkt);

     void processFetchReturn(PacketPtr pkt);

     void updatePageDivergenceDist(Addr addr);


     MasterID masterId() { return _masterId; }


     bool isDone() const;

     bool isSimdDone(uint32_t) const;


   protected:

     MasterID _masterId;


     LdsState &lds;


   public:

     Stats::Scalar vALUInsts;

     Stats::Formula vALUInstsPerWF;

     Stats::Scalar sALUInsts;

     Stats::Formula sALUInstsPerWF;

     Stats::Scalar instCyclesVALU;

     Stats::Scalar instCyclesSALU;

     Stats::Scalar threadCyclesVALU;

     Stats::Formula vALUUtilization;

     Stats::Scalar ldsNoFlatInsts;

     Stats::Formula ldsNoFlatInstsPerWF;

     Stats::Scalar flatVMemInsts;

     Stats::Formula flatVMemInstsPerWF;

     Stats::Scalar flatLDSInsts;

     Stats::Formula flatLDSInstsPerWF;

     Stats::Scalar vectorMemWrites;

     Stats::Formula vectorMemWritesPerWF;

     Stats::Scalar vectorMemReads;

     Stats::Formula vectorMemReadsPerWF;

     Stats::Scalar scalarMemWrites;

     Stats::Formula scalarMemWritesPerWF;

     Stats::Scalar scalarMemReads;

     Stats::Formula scalarMemReadsPerWF;


     void updateInstStats(GPUDynInstPtr gpuDynInst);


     // the following stats compute the avg. TLB accesslatency per

     // uncoalesced request (only for data)

     Stats::Scalar tlbRequests;

     Stats::Scalar tlbCycles;

     Stats::Formula tlbLatency;

     // hitsPerTLBLevel[x] are the hits in Level x TLB. x = 0 is the page table.

     Stats::Vector hitsPerTLBLevel;


     Stats::Scalar ldsBankAccesses;

     Stats::Distribution ldsBankConflictDist;


     // over all memory instructions executed over all wavefronts

     // how many touched 0-4 pages, 4-8, ..., 60-64 pages

     Stats::Distribution pageDivergenceDist;

     Stats::Scalar dynamicGMemInstrCnt;

     Stats::Scalar dynamicLMemInstrCnt;


     Stats::Scalar wgBlockedDueLdsAllocation;

     // Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are active

     // when the instruction is committed, this number is still incremented by 1

     Stats::Scalar numInstrExecuted;

     // Number of cycles among successive instruction executions across all

     // wavefronts of the same CU

     Stats::Distribution execRateDist;

     // number of individual vector operations executed

     Stats::Scalar numVecOpsExecuted;

     // Total cycles that something is running on the GPU

     Stats::Scalar totalCycles;

     Stats::Formula vpc; // vector ops per cycle

     Stats::Formula ipc; // vector instructions per cycle

     Stats::Distribution controlFlowDivergenceDist;

     Stats::Distribution activeLanesPerGMemInstrDist;

     Stats::Distribution activeLanesPerLMemInstrDist;

     // number of vector ALU instructions received

     Stats::Formula numALUInstsExecuted;

     // number of times a WG can not start due to lack of free VGPRs in SIMDs

     Stats::Scalar numTimesWgBlockedDueVgprAlloc;

     Stats::Scalar numCASOps;

     Stats::Scalar numFailedCASOps;

     Stats::Scalar completedWfs;

     // flag per vector SIMD unit that is set when there is at least one

     // WV that has a vector ALU instruction as the oldest in its

     // Instruction Buffer: Defined in the Scoreboard stage, consumed

     // by the Execute stage.

     std::vector<bool> vectorAluInstAvail;

     // number of available (oldest) LDS instructions that could have

     // been issued to the LDS at a specific issue slot

     int shrMemInstAvail;

     // number of available Global memory instructions that could have

     // been issued to TCP at a specific issue slot

     int glbMemInstAvail;


     void

     regStats();


     LdsState &

     getLds() const

     {

         return lds;

     }


     int32_t

     getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const;


     int cacheLineSize() const { return _cacheLineSize; }


     bool

     sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result));


     typedef std::unordered_map<Addr, std::pair<int, int>> pageDataStruct;

     pageDataStruct pageAccesses;


     class CUExitCallback : public Callback

     {

       private:

         ComputeUnit *computeUnit;


       public:

         virtual ~CUExitCallback() { }


         CUExitCallback(ComputeUnit *_cu)

         {

             computeUnit = _cu;

         }


         virtual void

         process();

     };


     CUExitCallback *cuExitCallback;


     class DataPort : public MasterPort

     {

       public:

         DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)

             : MasterPort(_name, _cu), computeUnit(_cu),

               index(_index) { }


         bool snoopRangeSent;


         struct SenderState : public Packet::SenderState

         {

             GPUDynInstPtr _gpuDynInst;

             int port_index;

             Packet::SenderState *saved;


             SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index,

                         Packet::SenderState *sender_state=nullptr)

                 : _gpuDynInst(gpuDynInst),

                   port_index(_port_index),

                   saved(sender_state) { }

         };


         class MemReqEvent : public Event

         {

           private:

             DataPort *dataPort;

             PacketPtr pkt;


           public:

             MemReqEvent(DataPort *_data_port, PacketPtr _pkt)

                 : Event(), dataPort(_data_port), pkt(_pkt)

             {

               setFlags(Event::AutoDelete);

             }


             void process();

             const char *description() const;

         };


         class MemRespEvent : public Event

         {

           private:

             DataPort *dataPort;

             PacketPtr pkt;


           public:

             MemRespEvent(DataPort *_data_port, PacketPtr _pkt)

                 : Event(), dataPort(_data_port), pkt(_pkt)

             {

               setFlags(Event::AutoDelete);

             }


             void process();

             const char *description() const;

         };


         std::deque<std::pair<PacketPtr, GPUDynInstPtr>> retries;


       protected:

         ComputeUnit *computeUnit;

         int index;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();


         virtual void

         getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)

         {

             resp.clear();

             snoop = true;

         }


     };


     // Instruction cache access port

     class SQCPort : public MasterPort

     {

       public:

         SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)

             : MasterPort(_name, _cu), computeUnit(_cu),

               index(_index) { }


         bool snoopRangeSent;


         struct SenderState : public Packet::SenderState

         {

             Wavefront *wavefront;

             Packet::SenderState *saved;


             SenderState(Wavefront *_wavefront, Packet::SenderState

                     *sender_state=nullptr)

                 : wavefront(_wavefront), saved(sender_state) { }

         };


         std::deque<std::pair<PacketPtr, Wavefront*>> retries;


       protected:

         ComputeUnit *computeUnit;

         int index;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();


         virtual void

         getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)

         {

             resp.clear();

             snoop = true;

         }

      };


     class DTLBPort : public MasterPort

     {

       public:

         DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)

             : MasterPort(_name, _cu), computeUnit(_cu),

               index(_index), stalled(false)

         { }


         bool isStalled() { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::deque<PacketPtr> retries;


         struct SenderState: public Packet::SenderState

         {

             // the memInst that this is associated with

             GPUDynInstPtr _gpuDynInst;


             // the lane in the memInst this is associated with, so we send

             // the memory request down the right port

             int portIndex;


             // constructor used for packets involved in timing accesses

             SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)

                 : _gpuDynInst(gpuDynInst), portIndex(port_index) { }


         };


       protected:

         ComputeUnit *computeUnit;

         int index;

         bool stalled;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();

     };


     class ITLBPort : public MasterPort

     {

       public:

         ITLBPort(const std::string &_name, ComputeUnit *_cu)

             : MasterPort(_name, _cu), computeUnit(_cu), stalled(false) { }


         bool isStalled() { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::deque<PacketPtr> retries;


         struct SenderState: public Packet::SenderState

         {

             // The wavefront associated with this request

             Wavefront *wavefront;


             SenderState(Wavefront *_wavefront) : wavefront(_wavefront) { }

         };


       protected:

         ComputeUnit *computeUnit;

         bool stalled;


         virtual bool recvTimingResp(PacketPtr pkt);

         virtual Tick recvAtomic(PacketPtr pkt) { return 0; }

         virtual void recvFunctional(PacketPtr pkt) { }

         virtual void recvRangeChange() { }

         virtual void recvReqRetry();

     };


     class LDSPort : public MasterPort

     {

       public:

         LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)

         : MasterPort(_name, _cu, _id), computeUnit(_cu)

         {

         }


         bool isStalled() const { return stalled; }

         void stallPort() { stalled = true; }

         void unstallPort() { stalled = false; }


         std::queue<PacketPtr> retries;


         class SenderState: public Packet::SenderState

         {

           protected:

             // The actual read/write/atomic request that goes with this command

             GPUDynInstPtr _gpuDynInst = nullptr;


           public:

             SenderState(GPUDynInstPtr gpuDynInst):

               _gpuDynInst(gpuDynInst)

             {

             }


             GPUDynInstPtr

             getMemInst() const

             {

               return _gpuDynInst;

             }

         };


         virtual bool

         sendTimingReq(PacketPtr pkt);


       protected:


         bool stalled = false;


         ComputeUnit *computeUnit;


         virtual bool

         recvTimingResp(PacketPtr pkt);


         virtual Tick

         recvAtomic(PacketPtr pkt) { return 0; }


         virtual void

         recvFunctional(PacketPtr pkt)

         {

         }


         virtual void

         recvRangeChange()

         {

         }


         virtual void

         recvReqRetry();

     };


     LDSPort *ldsPort = nullptr;


     LDSPort *

     getLdsPort() const

     {

         return ldsPort;

     }


     std::vector<DataPort*> memPort;

     // port to the TLB hierarchy (i.e., the L1 TLB)

     std::vector<DTLBPort*> tlbPort;

     // port to the SQC (i.e. the I-cache)

     SQCPort *sqcPort;

     // port to the SQC TLB (there's a separate TLB for each I-cache)

     ITLBPort *sqcTLBPort;


     virtual BaseMasterPort&

     getMasterPort(const std::string &if_name, PortID idx)

     {

         if (if_name == "memory_port") {

             memPort[idx] = new DataPort(csprintf("%s-port%d", name(), idx),

                                         this, idx);

             return *memPort[idx];

         } else if (if_name == "translation_port") {

             tlbPort[idx] = new DTLBPort(csprintf("%s-port%d", name(), idx),

                                         this, idx);

             return *tlbPort[idx];

         } else if (if_name == "sqc_port") {

             sqcPort = new SQCPort(csprintf("%s-port%d", name(), idx),

                                   this, idx);

             return *sqcPort;

         } else if (if_name == "sqc_tlb_port") {

             sqcTLBPort = new ITLBPort(csprintf("%s-port", name()), this);

             return *sqcTLBPort;

         } else if (if_name == "ldsPort") {

             if (ldsPort) {

                 fatal("an LDS port was already allocated");

             }

             ldsPort = new LDSPort(csprintf("%s-port", name()), this, idx);

             return *ldsPort;

         } else {

             panic("incorrect port name");

         }

     }


     // xact_cas_load()

     class waveIdentifier

     {

       public:

         waveIdentifier() { }

         waveIdentifier(int _simdId, int _wfSlotId)

           : simdId(_simdId), wfSlotId(_wfSlotId) { }


         int simdId;

         int wfSlotId;

     };


     class waveQueue

     {

       public:

         std::list<waveIdentifier> waveIDQueue;

     };

     std::map<unsigned, waveQueue> xactCasLoadMap;


     uint64_t getAndIncSeqNum() { return globalSeqNum++; }


   private:

     const int _cacheLineSize;

     uint64_t globalSeqNum;

     int wavefrontSize;

     GPUStaticInst *kernelLaunchInst;

 };


 #endif // __COMPUTE_UNIT_HH__

ComputeUnit::numVecRegsPerSimd
uint32_t numVecRegsPerSimd
Definition: compute_unit.hh:214

MasterPort
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
Definition: port.hh:167

SIMD2
Definition: compute_unit.hh:78

Wavefront
Definition: wavefront.hh:147

ComputeUnit::updatePageDivergenceDist
void updatePageDivergenceDist(Addr addr)
Definition: compute_unit.cc:1702

ComputeUnit::tlbLatency
Stats::Formula tlbLatency
Definition: compute_unit.hh:333

ComputeUnit::waveIdentifier::wfSlotId
int wfSlotId
Definition: compute_unit.hh:759

NUM_UNITS
Definition: compute_unit.hh:82

ComputeUnit::DataPort::MemRespEvent::description
const char * description() const
Return a C string describing the event.
Definition: compute_unit.cc:976

ComputeUnit::DataPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:432

ComputeUnit::DTLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: compute_unit.hh:583

ComputeUnit::SQCPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: compute_unit.hh:527

ComputeUnit::ITLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:596

GlobalMemPipeline
Definition: global_memory_pipeline.hh:58

ComputeUnit::rrNextMemID
int rrNextMemID
Definition: compute_unit.hh:132

ComputeUnit::vpc
Stats::Formula vpc
Definition: compute_unit.hh:357

ComputeUnit::SQCPort::SenderState::SenderState
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:513

ComputeUnit::flatLDSInsts
Stats::Scalar flatLDSInsts
Definition: compute_unit.hh:316

ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:602

MipsISA::index
Bitfield< 30, 0 > index
Definition: pra_constants.hh:46

ComputeUnit::vectorAluInstAvail
std::vector< bool > vectorAluInstAvail
Definition: compute_unit.hh:373

VectorRegisterFile
Definition: vector_register_file.hh:63

ComputeUnit::injectGlobalMemFence
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
Definition: compute_unit.cc:942

ComputeUnit::DataPort::MemReqEvent
Definition: compute_unit.hh:443

ComputeUnit::DataPort::MemReqEvent::MemReqEvent
MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
Definition: compute_unit.hh:450

ComputeUnit::handleMemPacket
void handleMemPacket(PacketPtr pkt, int memport_index)

ComputeUnit::DTLBPort::DTLBPort
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Definition: compute_unit.hh:542

ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:136

Callback
Generic callback class.
Definition: callback.hh:41

ComputeUnit::numCyclesPerLoadTransfer
uint32_t numCyclesPerLoadTransfer
Definition: compute_unit.hh:206

ComputeUnit::ipc
Stats::Formula ipc
Definition: compute_unit.hh:358

ComputeUnit::isDone
bool isDone() const
Definition: compute_unit.cc:1731

ComputeUnit::SQCPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:511

WaitClass
Definition: misc.hh:50

ComputeUnit::DTLBPort
Data TLB port.
Definition: compute_unit.hh:539

ComputeUnit::glbMemToVrfBus
WaitClass glbMemToVrfBus
Definition: compute_unit.hh:199

MAX_REGS_FOR_NON_VEC_MEM_INST
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
Definition: compute_unit.hh:58

ComputeUnit::xactCasLoadMap
std::map< unsigned, waveQueue > xactCasLoadMap
Definition: compute_unit.hh:767

ComputeUnit::debugSegFault
bool debugSegFault
Definition: compute_unit.hh:171

ComputeUnit::lds
LdsState & lds
Definition: compute_unit.hh:301

MAX_WIDTH_FOR_MEM_INST
static const int MAX_WIDTH_FOR_MEM_INST
Definition: compute_unit.hh:59

ComputeUnit::DTLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:548

ComputeUnit::waveStatusList
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
Definition: compute_unit.hh:118

panic
#define panic(...)
Definition: misc.hh:153

ComputeUnit::ITLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Definition: compute_unit.cc:1368

ComputeUnit::fillKernelState
void fillKernelState(Wavefront *w, NDRange *ndr)
Definition: compute_unit.cc:179

ComputeUnit::hitsPerTLBLevel
Stats::Vector hitsPerTLBLevel
Definition: compute_unit.hh:335

qstruct.hh

ComputeUnit::dynamicGMemInstrCnt
Stats::Scalar dynamicGMemInstrCnt
Definition: compute_unit.hh:343

ComputeUnit::scheduleStage
ScheduleStage scheduleStage
Definition: compute_unit.hh:98

ComputeUnit::flatLDSInstsPerWF
Stats::Formula flatLDSInstsPerWF
Definition: compute_unit.hh:317

ComputeUnit::storeBusLength
int storeBusLength()
Definition: compute_unit.hh:249

__attribute__
const char * __attribute__((weak)) m5MainCommands[]

ComputeUnit::dpBypassLength
int dpBypassLength()
Definition: compute_unit.hh:248

ComputeUnit::controlFlowDivergenceDist
Stats::Distribution controlFlowDivergenceDist
Definition: compute_unit.hh:359

ComputeUnit::DataPort::MemRespEvent::dataPort
DataPort * dataPort
Definition: compute_unit.hh:463

ComputeUnit::sqcTLBPort
ITLBPort * sqcTLBPort
Definition: compute_unit.hh:719

ComputeUnit::nextLocMemBus
int nextLocMemBus
Definition: compute_unit.hh:197

ComputeUnit::readyList
std::vector< std::vector< Wavefront * > > readyList
Definition: compute_unit.hh:110

ComputeUnit::exec
void exec()
Definition: compute_unit.cc:532

LocalMemPipeline
Definition: local_memory_pipeline.hh:57

ComputeUnit::DTLBPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:563

ExecStage
Definition: exec_stage.hh:69

ComputeUnit::vectorMemWrites
Stats::Scalar vectorMemWrites
Definition: compute_unit.hh:318

ComputeUnit::DataPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:490

addr
ip6_addr_t addr
Definition: inet.hh:335

ComputeUnit::dpBypassPipeLength
int dpBypassPipeLength
Definition: compute_unit.hh:147

mem_object.hh
MemObject declaration.

ComputeUnit::globalSeqNum
uint64_t globalSeqNum
Definition: compute_unit.hh:773

ComputeUnit::regStats
void regStats()
Register statistics for this object.
Definition: compute_unit.cc:1398

ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition: compute_unit.hh:393

ComputeUnit::DataPort::SenderState
Definition: compute_unit.hh:430

ComputeUnit::LDSPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:639

ComputeUnit::ITLBPort::SenderState::SenderState
SenderState(Wavefront *_wavefront)
Definition: compute_unit.hh:612

ComputeUnit::wfSize
int wfSize() const
Definition: compute_unit.hh:251

port.hh
Port Object Declaration.

ComputeUnit::spBypassPipeLength
int spBypassPipeLength
Definition: compute_unit.hh:144

ComputeUnit::CUExitCallback
Definition: compute_unit.hh:401

ComputeUnit::cuExitCallback
CUExitCallback * cuExitCallback
Definition: compute_unit.hh:418

Shader
Definition: shader.hh:76

Stats::Vector
A vector of scalar stats.
Definition: statistics.hh:2499

ComputeUnit::SQCPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:506

fetch_stage.hh

ComputeUnit::DataPort::MemReqEvent::description
const char * description() const
Return a C string describing the event.
Definition: compute_unit.cc:1248

ComputeUnit::tlbPort
std::vector< DTLBPort * > tlbPort
Definition: compute_unit.hh:715

ComputeUnit::prefetchDepth
int prefetchDepth
Definition: compute_unit.hh:160

ComputeUnit::wfList
std::vector< std::vector< Wavefront * > > wfList
Definition: compute_unit.hh:135

LdsChunk
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
Definition: lds_state.hh:58

ComputeUnit::updateEvents
void updateEvents()
Definition: compute_unit.cc:199

NDRange
Definition: ndrange.hh:42

ComputeUnit::SQCPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
Definition: compute_unit.cc:713

ComputeUnit::DataPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: compute_unit.hh:486

ScoreboardCheckStage
Definition: scoreboard_check_stage.hh:63

ComputeUnit::dynamicLMemInstrCnt
Stats::Scalar dynamicLMemInstrCnt
Definition: compute_unit.hh:344

ComputeUnit::DTLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:560

global_memory_pipeline.hh

ComputeUnit::numALUInstsExecuted
Stats::Formula numALUInstsExecuted
Definition: compute_unit.hh:363

statistics.hh
Declaration of Statistics objects.

ComputeUnit::LDSPort::SenderState::getMemInst
GPUDynInstPtr getMemInst() const
Definition: compute_unit.hh:664

ComputeUnit::spBypassLength
int spBypassLength()
Definition: compute_unit.hh:247

ComputeUnit::init
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
Definition: compute_unit.cc:548

ComputeUnit::kernelLaunchInst
GPUStaticInst * kernelLaunchInst
Definition: compute_unit.hh:775

ComputeUnit::numInstrExecuted
Stats::Scalar numInstrExecuted
Definition: compute_unit.hh:349

ComputeUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)

Stats::Scalar
This is a simple scalar statistic, like a counter.
Definition: statistics.hh:2475

ComputeUnit::LDSPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.hh:658

ComputeUnit::DataPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:485

ComputeUnit::vALUInsts
Stats::Scalar vALUInsts
Definition: compute_unit.hh:304

std::vector
STL vector class.
Definition: stl.hh:40

local_memory_pipeline.hh

Request
Definition: request.hh:87

ComputeUnit::ldsBankConflictDist
Stats::Distribution ldsBankConflictDist
Definition: compute_unit.hh:338

ComputeUnit::getRefCounter
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
Definition: compute_unit.cc:1760

ComputeUnit::ITLBPort::SenderState
SenderState is information carried along with the packet throughout the TLB hierarchy.
Definition: compute_unit.hh:607

EventBase::AutoDelete
static const FlagsType AutoDelete
Definition: eventq.hh:103

ComputeUnit::vrfToLocalMemPipeBus
std::vector< WaitClass > vrfToLocalMemPipeBus
Definition: compute_unit.hh:195

ComputeUnit::vectorMemWritesPerWF
Stats::Formula vectorMemWritesPerWF
Definition: compute_unit.hh:319

ComputeUnit::DataPort::retries
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Definition: compute_unit.hh:477

ComputeUnit::wgBlockedDueLdsAllocation
Stats::Scalar wgBlockedDueLdsAllocation
Definition: compute_unit.hh:346

LDSMEM_PIPE
Definition: compute_unit.hh:81

ComputeUnit::sqcPort
SQCPort * sqcPort
Definition: compute_unit.hh:717

ComputeUnit::DataPort::SenderState::saved
Packet::SenderState * saved
Definition: compute_unit.hh:434

ComputeUnit::SQCPort::index
int index
Definition: compute_unit.hh:522

ComputeUnit::LDSPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:686

FetchStage
Definition: fetch_stage.hh:53

ComputeUnit::lastVaddrWF
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
Definition: compute_unit.hh:166

ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:510

ComputeUnit::LDSPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:683

ComputeUnit::aluPipe
std::vector< WaitClass > aluPipe
Definition: compute_unit.hh:188

ComputeUnit::numCyclesPerStoreTransfer
uint32_t numCyclesPerStoreTransfer
Definition: compute_unit.hh:205

ComputeUnit::startWavefront
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
Definition: compute_unit.cc:226

ComputeUnit::ComputeUnit
ComputeUnit(const Params *p)
Definition: compute_unit.cc:59

ComputeUnit::localMemBarrier
bool localMemBarrier
Definition: compute_unit.hh:173

ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition: compute_unit.hh:518

TLB_HIT_CACHE_MISS
Definition: compute_unit.hh:89

GLBMEM_PIPE
Definition: compute_unit.hh:80

ComputeUnit::globalMemoryPipe
GlobalMemPipeline globalMemoryPipe
Definition: compute_unit.hh:100

ComputeUnit::waveIdentifier
Definition: compute_unit.hh:751

ComputeUnit::coalescerToVrfBusWidth
uint32_t coalescerToVrfBusWidth
Definition: compute_unit.hh:204

ComputeUnit::vALUUtilization
Stats::Formula vALUUtilization
Definition: compute_unit.hh:311

GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48

ComputeUnit::LDSPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: compute_unit.hh:691

ComputeUnit::activeLanesPerLMemInstrDist
Stats::Distribution activeLanesPerLMemInstrDist
Definition: compute_unit.hh:361

ComputeUnit::ITLBPort::ITLBPort
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Definition: compute_unit.hh:590

ComputeUnit::scalarMemWritesPerWF
Stats::Formula scalarMemWritesPerWF
Definition: compute_unit.hh:323

ComputeUnit::numTimesWgBlockedDueVgprAlloc
Stats::Scalar numTimesWgBlockedDueVgprAlloc
Definition: compute_unit.hh:365

ComputeUnit::functionalTLB
bool functionalTLB
Definition: compute_unit.hh:172

ComputeUnit::CUExitCallback::CUExitCallback
CUExitCallback(ComputeUnit *_cu)
Definition: compute_unit.hh:409

ComputeUnit::SQCPort::SenderState
Definition: compute_unit.hh:508

ComputeUnit::execRateDist
Stats::Distribution execRateDist
Definition: compute_unit.hh:352

ComputeUnit::vectorMemReadsPerWF
Stats::Formula vectorMemReadsPerWF
Definition: compute_unit.hh:321

ComputeUnit::ShrMemUnitId
int ShrMemUnitId()
Definition: compute_unit.hh:269

ComputeUnit::sendSyncRequest
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Definition: compute_unit.cc:924

ComputeUnit::DataPort
Data access Port.
Definition: compute_unit.hh:421

ComputeUnit::isShrMem
bool isShrMem(int unitId)
Definition: compute_unit.hh:267

ComputeUnit::regIdxVec
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
Definition: compute_unit.hh:216

csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161

ComputeUnit::DTLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:581

ComputeUnit::DTLBPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
Definition: compute_unit.hh:570

ComputeUnit::DTLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:555

exec_stage.hh

ComputeUnit::DTLBPort::SenderState::portIndex
int portIndex
Definition: compute_unit.hh:567

ComputeUnit::countPages
bool countPages
Definition: compute_unit.hh:183

ComputeUnit::DataPort::index
int index
Definition: compute_unit.hh:481

ComputeUnit::LDSPort
the port intended to communicate between the CU and its LDS
Definition: compute_unit.hh:629

ComputeUnit::waveQueue::waveIDQueue
std::list< waveIdentifier > waveIDQueue
Definition: compute_unit.hh:765

RR
Definition: compute_unit.hh:70

ComputeUnit::pageDivergenceDist
Stats::Distribution pageDivergenceDist
Definition: compute_unit.hh:342

ComputeUnit::execStage
ExecStage execStage
Definition: compute_unit.hh:99

ComputeUnit::LDSPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
Definition: compute_unit.cc:1817

GPUStaticInst
Definition: gpu_static_inst.hh:60

callback.hh

ComputeUnit::DataPort::MemRespEvent
Definition: compute_unit.hh:460

Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63

ComputeUnit::tlbRequests
Stats::Scalar tlbRequests
Definition: compute_unit.hh:331

ComputeUnit::waveIdentifier::waveIdentifier
waveIdentifier()
Definition: compute_unit.hh:754

ComputeUnit::DTLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:547

ComputeUnit::ITLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
Definition: compute_unit.cc:1315

ComputeUnit::ITLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:616

ComputeUnit::DTLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:582

EXEC_UNIT
EXEC_UNIT
Definition: compute_unit.hh:74

Stats::Distribution
A simple distribution stat.
Definition: statistics.hh:2523

ComputeUnit::LDSPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:677

ComputeUnit::LDSPort::SenderState::_gpuDynInst
GPUDynInstPtr _gpuDynInst
Definition: compute_unit.hh:655

ComputeUnit::vrfToGlobalMemPipeBus
std::vector< WaitClass > vrfToGlobalMemPipeBus
Definition: compute_unit.hh:193

ComputeUnit::updateInstStats
void updateInstStats(GPUDynInstPtr gpuDynInst)
Definition: compute_unit.cc:1669

ComputeUnit::DataPort::MemRespEvent::MemRespEvent
MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
Definition: compute_unit.hh:467

ComputeUnit::flatVMemInsts
Stats::Scalar flatVMemInsts
Definition: compute_unit.hh:314

OLDEST
Definition: compute_unit.hh:69

ComputeUnit::numCASOps
Stats::Scalar numCASOps
Definition: compute_unit.hh:366

ComputeUnit::DTLBPort::unstallPort
void unstallPort()
Definition: compute_unit.hh:549

ComputeUnit::DTLBPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:576

ComputeUnit::DTLBPort::recvReqRetry
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Definition: compute_unit.cc:1284

fatal
#define fatal(...)
Definition: misc.hh:163

ComputeUnit::_masterId
MasterID _masterId
Definition: compute_unit.hh:299

ComputeUnit::memPort
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
Definition: compute_unit.hh:713

ComputeUnit::lastVaddrSimd
std::vector< std::vector< Addr > > lastVaddrSimd
Definition: compute_unit.hh:165

ComputeUnit::DataPort::MemReqEvent::pkt
PacketPtr pkt
Definition: compute_unit.hh:447

ComputeUnit::vrfToCoalescerBusWidth
uint32_t vrfToCoalescerBusWidth
Definition: compute_unit.hh:203

ComputeUnit::AllAtBarrier
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
Definition: compute_unit.cc:446

ComputeUnit::StartWorkgroup
void StartWorkgroup(NDRange *ndr)
Definition: compute_unit.cc:310

Event::setFlags
void setFlags(Flags _flags)
Accessor for flags.
Definition: eventq.hh:264

ComputeUnit::sALUInstsPerWF
Stats::Formula sALUInstsPerWF
Definition: compute_unit.hh:307

ComputeUnit::DataPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:484

std::list< AddrRange >

ComputeUnit::isGlbMem
bool isGlbMem(int unitId)
Definition: compute_unit.hh:266

ComputeUnit::SQCPort
Definition: compute_unit.hh:499

schedule_stage.hh

ComputeUnit::scalarMemWrites
Stats::Scalar scalarMemWrites
Definition: compute_unit.hh:322

ScheduleStage
Definition: schedule_stage.hh:56

ComputeUnit::pageDataStruct
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Definition: compute_unit.hh:398

ComputeUnit::scalarMemReads
Stats::Scalar scalarMemReads
Definition: compute_unit.hh:324

MipsISA::w
Bitfield< 0 > w
Definition: pra_constants.hh:280

types.hh
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.

ComputeUnit::numSIMDs
int numSIMDs
Definition: compute_unit.hh:141

Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142

MasterID
uint16_t MasterID
Definition: request.hh:85

ComputeUnit::SQCPort::SQCPort
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Definition: compute_unit.hh:502

ComputeUnit::ldsNoFlatInsts
Stats::Scalar ldsNoFlatInsts
Definition: compute_unit.hh:312

ComputeUnit::dispatchList
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
Definition: compute_unit.hh:130

Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245

ComputeUnit::CUExitCallback::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:404

ComputeUnit::sendToLds
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
Definition: compute_unit.cc:1797

ComputeUnit::cedeSIMD
bool cedeSIMD(int simdId, int wfSlotId)
Definition: compute_unit.cc:484

TLB_MISS_CACHE_MISS
Definition: compute_unit.hh:87

ComputeUnit::instCyclesVALU
Stats::Scalar instCyclesVALU
Definition: compute_unit.hh:308

ComputeUnit::resp_tick_latency
Tick resp_tick_latency
Definition: compute_unit.hh:209

ComputeUnit::DTLBPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
Definition: compute_unit.cc:1075

Packet::SenderState
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a MemObject that sees the packet.
Definition: packet.hh:377

ComputeUnit::completedWfs
Stats::Scalar completedWfs
Definition: compute_unit.hh:368

ComputeUnit::~ComputeUnit
~ComputeUnit()
Definition: compute_unit.cc:160

ComputeUnit::wavefrontSize
int wavefrontSize
Definition: compute_unit.hh:774

ComputeUnit::xact_cas_mode
bool xact_cas_mode
Definition: compute_unit.hh:170

std::deque
STL deque class.
Definition: stl.hh:47

Stats::Formula
A formula for statistics that is calculated when printed.
Definition: statistics.hh:2895

ComputeUnit::DataPort::SenderState::SenderState
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Definition: compute_unit.hh:436

ComputeUnit
Definition: compute_unit.hh:93

ComputeUnit::LDSPort::stallPort
void stallPort()
Definition: compute_unit.hh:638

TLB_HIT_CACHE_HIT
Definition: compute_unit.hh:90

ComputeUnit::scalarMemReadsPerWF
Stats::Formula scalarMemReadsPerWF
Definition: compute_unit.hh:325

ComputeUnit::vALUInstsPerWF
Stats::Formula vALUInstsPerWF
Definition: compute_unit.hh:305

ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:185

ComputeUnit::DataPort::MemRespEvent::process
void process()
Definition: compute_unit.cc:982

ComputeUnit::getMasterPort
virtual BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx)
Get a master port with a given name and index.
Definition: compute_unit.hh:722

ComputeUnit::glbMemInstAvail
int glbMemInstAvail
Definition: compute_unit.hh:379

ComputeUnit::DataPort::snoopRangeSent
bool snoopRangeSent
Definition: compute_unit.hh:428

ComputeUnit::activeLanesPerGMemInstrDist
Stats::Distribution activeLanesPerGMemInstrDist
Definition: compute_unit.hh:360

SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:117

ComputeUnit::doSmReturn
void doSmReturn(GPUDynInstPtr gpuDynInst)

ComputeUnit::waveIdentifier::simdId
int simdId
Definition: compute_unit.hh:758

ComputeUnit::getLds
LdsState & getLds() const
Definition: compute_unit.hh:385

ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:595

ComputeUnit::DataPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:480

ComputeUnit::nextGlbMemBus
int nextGlbMemBus
Definition: compute_unit.hh:196

ComputeUnit::waveQueue
Definition: compute_unit.hh:762

ComputeUnit::DTLBPort::index
int index
Definition: compute_unit.hh:577

ComputeUnit::LDSPort::isStalled
bool isStalled() const
Definition: compute_unit.hh:637

ComputeUnit::tlbCycles
Stats::Scalar tlbCycles
Definition: compute_unit.hh:332

SIMD0
Definition: compute_unit.hh:76

ComputeUnit::LDSPort::SenderState
SenderState is information carried along with the packet, esp.
Definition: compute_unit.hh:651

ComputeUnit::issuePeriod
int issuePeriod
Definition: compute_unit.hh:149

TLB_MISS_CACHE_HIT
Definition: compute_unit.hh:88

ComputeUnit::LDSPort::retries
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Definition: compute_unit.hh:645

ComputeUnit::numVecOpsExecuted
Stats::Scalar numVecOpsExecuted
Definition: compute_unit.hh:354

ComputeUnit::vrf
std::vector< VectorRegisterFile * > vrf
Definition: compute_unit.hh:139

Event
Definition: eventq.hh:185

ComputeUnit::SQCPort::getDeviceAddressRanges
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
Definition: compute_unit.hh:531

MemObject
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
Definition: mem_object.hh:60

BaseMasterPort
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
Definition: port.hh:115

ComputeUnit::loadBusLength
int loadBusLength()
Definition: compute_unit.hh:250

ComputeUnit::ITLBPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:621

ComputeUnit::resizeRegFiles
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)

EXEC_POLICY
EXEC_POLICY
Definition: compute_unit.hh:67

ComputeUnit::prefetchStride
int prefetchStride
Definition: compute_unit.hh:162

ComputeUnit::nextLocRdBus
int nextLocRdBus()
Definition: compute_unit.hh:271

ComputeUnit::numGlbMemUnits
int numGlbMemUnits
Definition: compute_unit.hh:152

ComputeUnit::numFailedCASOps
Stats::Scalar numFailedCASOps
Definition: compute_unit.hh:367

ComputeUnit::ReadyWorkgroup
int ReadyWorkgroup(NDRange *ndr)
Definition: compute_unit.cc:364

ComputeUnit::LDSPort::sendTimingReq
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
Definition: compute_unit.cc:1840

ComputeUnit::getLdsPort
LDSPort * getLdsPort() const
Definition: compute_unit.hh:705

ComputeUnit::DataPort::MemRespEvent::pkt
PacketPtr pkt
Definition: compute_unit.hh:464

ComputeUnit::pagesTouched
std::map< Addr, int > pagesTouched
Definition: compute_unit.hh:243

ComputeUnit::instCyclesSALU
Stats::Scalar instCyclesSALU
Definition: compute_unit.hh:309

ComputeUnit::CUExitCallback::process
virtual void process()
virtual process function that is invoked when the callback queue is executed.
Definition: compute_unit.cc:1713

ComputeUnit::SQCPort::recvReqRetry
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Definition: compute_unit.cc:721

SIMD1
Definition: compute_unit.hh:77

ComputeUnit::locMemToVrfBus
WaitClass locMemToVrfBus
Definition: compute_unit.hh:201

ComputeUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)

TLB_CACHE
TLB_CACHE
Definition: compute_unit.hh:85

ComputeUnit::fetchStage
FetchStage fetchStage
Definition: compute_unit.hh:96

ComputeUnit::flatVMemInstsPerWF
Stats::Formula flatVMemInstsPerWF
Definition: compute_unit.hh:315

ComputeUnit::isSimdDone
bool isSimdDone(uint32_t) const
Definition: compute_unit.cc:1766

ComputeUnit::statusVec
std::vector< uint8_t > statusVec
Definition: compute_unit.hh:218

ComputeUnit::barrier_id
uint32_t barrier_id
Definition: compute_unit.hh:186

ComputeUnit::CUExitCallback::~CUExitCallback
virtual ~CUExitCallback()
Definition: compute_unit.hh:407

PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:181

ComputeUnit::lastExecCycle
std::vector< uint64_t > lastExecCycle
Definition: compute_unit.hh:155

ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:594

ComputeUnit::registerEvent
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
Definition: compute_unit.hh:221

ComputeUnit::DataPort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
Definition: compute_unit.cc:619

ComputeUnit::wfWait
std::vector< WaitClass > wfWait
Definition: compute_unit.hh:190

ComputeUnit::localMemoryPipe
LocalMemPipeline localMemoryPipe
Definition: compute_unit.hh:101

SIMD3
Definition: compute_unit.hh:79

ComputeUnit::LDSPort::stalled
bool stalled
whether or not it is stalled
Definition: compute_unit.hh:675

ComputeUnit::ITLBPort::stalled
bool stalled
Definition: compute_unit.hh:617

ComputeUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)

ComputeUnit::perLaneTLB
bool perLaneTLB
Definition: compute_unit.hh:158

ComputeUnit::GlbMemUnitId
int GlbMemUnitId()
Definition: compute_unit.hh:268

ComputeUnit::pageAccesses
pageDataStruct pageAccesses
Definition: compute_unit.hh:399

ComputeUnit::processTimingPacket
bool processTimingPacket(PacketPtr pkt)

ComputeUnit::prefetchType
Enums::PrefetchType prefetchType
Definition: compute_unit.hh:167

ComputeUnit::sALUInsts
Stats::Scalar sALUInsts
Definition: compute_unit.hh:306

ComputeUnit::ldsBankAccesses
Stats::Scalar ldsBankAccesses
Definition: compute_unit.hh:337

ComputeUnit::rrNextALUWp
int rrNextALUWp
Definition: compute_unit.hh:133

ComputeUnit::req_tick_latency
Tick req_tick_latency
Definition: compute_unit.hh:208

ComputeUnit::totalCycles
Stats::Scalar totalCycles
Definition: compute_unit.hh:356

ComputeUnit::ldsPort
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
Definition: compute_unit.hh:702

ComputeUnit::waveIdentifier::waveIdentifier
waveIdentifier(int _simdId, int _wfSlotId)
Definition: compute_unit.hh:755

ComputeUnit::timestampVec
std::vector< uint64_t > timestampVec
Definition: compute_unit.hh:217

ComputeUnit::vectorMemReads
Stats::Scalar vectorMemReads
Definition: compute_unit.hh:320

ComputeUnit::DataPort::SenderState::port_index
int port_index
Definition: compute_unit.hh:433

ComputeUnit::DTLBPort::stalled
bool stalled
Definition: compute_unit.hh:578

scoreboard_check_stage.hh

MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:325

ComputeUnit::lastVaddrCU
std::vector< Addr > lastVaddrCU
Definition: compute_unit.hh:164

ComputeUnit::nextGlbRdBus
int nextGlbRdBus()
Definition: compute_unit.hh:270

ComputeUnit::shrMemInstAvail
int shrMemInstAvail
Definition: compute_unit.hh:376

LdsState
Definition: lds_state.hh:110

ComputeUnit::ITLBPort
Definition: compute_unit.hh:587

ComputeUnit::Params
ComputeUnitParams Params
Definition: compute_unit.hh:134

ComputeUnit::ldsNoFlatInstsPerWF
Stats::Formula ldsNoFlatInstsPerWF
Definition: compute_unit.hh:313

ComputeUnit::SQCPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:525

ComputeUnit::ITLBPort::recvAtomic
virtual Tick recvAtomic(PacketPtr pkt)
Definition: compute_unit.hh:620

ComputeUnit::SQCPort::computeUnit
ComputeUnit * computeUnit
Definition: compute_unit.hh:521

ComputeUnit::getAndIncSeqNum
uint64_t getAndIncSeqNum()
Definition: compute_unit.hh:769

ComputeUnit::threadCyclesVALU
Stats::Scalar threadCyclesVALU
Definition: compute_unit.hh:310

ComputeUnit::ITLBPort::recvRangeChange
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Definition: compute_unit.hh:622

ComputeUnit::sendRequest
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
Definition: compute_unit.cc:744

ComputeUnit::DataPort::MemReqEvent::dataPort
DataPort * dataPort
Definition: compute_unit.hh:446

ComputeUnit::masterId
MasterID masterId()
Definition: compute_unit.hh:293

ComputeUnit::LDSPort::recvReqRetry
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
Definition: compute_unit.cc:1882

ComputeUnit::vectorRegsReserved
std::vector< int > vectorRegsReserved
Definition: compute_unit.hh:212

ComputeUnit::isVecAlu
bool isVecAlu(int unitId)
Definition: compute_unit.hh:265

ComputeUnit::exec_policy
EXEC_POLICY exec_policy
Definition: compute_unit.hh:168

ComputeUnit::ITLBPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:610

ComputeUnit::LDSPort::LDSPort
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
Definition: compute_unit.hh:632

ComputeUnit::DataPort::recvReqRetry
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Definition: compute_unit.cc:686

ComputeUnit::DataPort::MemReqEvent::process
void process()
Definition: compute_unit.cc:1254

ComputeUnit::scoreboardCheckStage
ScoreboardCheckStage scoreboardCheckStage
Definition: compute_unit.hh:97

ComputeUnit::SQCPort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Definition: compute_unit.hh:526

ComputeUnit::numLocMemUnits
int numLocMemUnits
Definition: compute_unit.hh:153

ComputeUnit::DataPort::DataPort
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Definition: compute_unit.hh:424

ComputeUnit::_cacheLineSize
const int _cacheLineSize
Definition: compute_unit.hh:772