36 #ifndef __COMPUTE_UNIT_HH__
37 #define __COMPUTE_UNIT_HH__
41 #include <unordered_map>
47 #include "enums/PrefetchType.hh"
65 struct ComputeUnitParams;
223 uint32_t operandSize,
226 regIdxVec.push_back(std::make_pair(simdId, regIdx));
229 if (operandSize > 4) {
230 regIdxVec.push_back(std::make_pair(simdId,
278 int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots);
279 bool cedeSIMD(
int simdId,
int wfSlotId);
286 bool kernelLaunch=
true,
391 getRefCounter(
const uint32_t dispatchId,
const uint32_t wgId)
const;
440 saved(sender_state) { }
514 *sender_state=
nullptr)
724 if (if_name ==
"memory_port") {
728 }
else if (if_name ==
"translation_port") {
732 }
else if (if_name ==
"sqc_port") {
736 }
else if (if_name ==
"sqc_tlb_port") {
739 }
else if (if_name ==
"ldsPort") {
741 fatal(
"an LDS port was already allocated");
746 panic(
"incorrect port name");
778 #endif // __COMPUTE_UNIT_HH__
uint32_t numVecRegsPerSimd
A MasterPort is a specialisation of a BaseMasterPort, which implements the default protocol for the t...
void updatePageDivergenceDist(Addr addr)
Stats::Formula tlbLatency
const char * description() const
Return a C string describing the event.
GPUDynInstPtr _gpuDynInst
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
SenderState(Wavefront *_wavefront, Packet::SenderState *sender_state=nullptr)
Stats::Scalar flatLDSInsts
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
std::vector< bool > vectorAluInstAvail
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst, bool kernelLaunch=true, RequestPtr req=nullptr)
MemReqEvent(DataPort *_data_port, PacketPtr _pkt)
void handleMemPacket(PacketPtr pkt, int memport_index)
DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
uint32_t numCyclesPerLoadTransfer
Packet::SenderState * saved
static const int MAX_REGS_FOR_NON_VEC_MEM_INST
std::map< unsigned, waveQueue > xactCasLoadMap
static const int MAX_WIDTH_FOR_MEM_INST
std::vector< std::vector< std::pair< Wavefront *, WAVE_STATUS > > > waveStatusList
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
void fillKernelState(Wavefront *w, NDRange *ndr)
Stats::Vector hitsPerTLBLevel
Stats::Scalar dynamicGMemInstrCnt
ScheduleStage scheduleStage
Stats::Formula flatLDSInstsPerWF
const char * __attribute__((weak)) m5MainCommands[]
Stats::Distribution controlFlowDivergenceDist
std::vector< std::vector< Wavefront * > > readyList
GPUDynInstPtr _gpuDynInst
Stats::Scalar vectorMemWrites
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
void regStats()
Register statistics for this object.
int cacheLineSize() const
SenderState(Wavefront *_wavefront)
CUExitCallback * cuExitCallback
A vector of scalar stats.
const char * description() const
Return a C string describing the event.
std::vector< DTLBPort * > tlbPort
std::vector< std::vector< Wavefront * > > wfList
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Scalar dynamicLMemInstrCnt
SenderState is information carried along with the packet throughout the TLB hierarchy.
Stats::Formula numALUInstsExecuted
Declaration of Statistics objects.
GPUDynInstPtr getMemInst() const
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
GPUStaticInst * kernelLaunchInst
Stats::Scalar numInstrExecuted
void initiateFetch(Wavefront *wavefront)
This is a simple scalar statistic, like a counter.
SenderState(GPUDynInstPtr gpuDynInst)
virtual void recvFunctional(PacketPtr pkt)
Stats::Distribution ldsBankConflictDist
int32_t getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
SenderState is information carried along with the packet throughout the TLB hierarchy.
static const FlagsType AutoDelete
std::vector< WaitClass > vrfToLocalMemPipeBus
Stats::Formula vectorMemWritesPerWF
std::deque< std::pair< PacketPtr, GPUDynInstPtr > > retries
Stats::Scalar wgBlockedDueLdsAllocation
Packet::SenderState * saved
virtual void recvFunctional(PacketPtr pkt)
std::vector< std::vector< std::vector< Addr > > > lastVaddrWF
virtual Tick recvAtomic(PacketPtr pkt)
std::vector< WaitClass > aluPipe
uint32_t numCyclesPerStoreTransfer
void startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, NDRange *ndr)
ComputeUnit(const Params *p)
std::deque< std::pair< PacketPtr, Wavefront * > > retries
GlobalMemPipeline globalMemoryPipe
uint32_t coalescerToVrfBusWidth
Stats::Formula vALUUtilization
std::shared_ptr< GPUDynInst > GPUDynInstPtr
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
Stats::Distribution activeLanesPerLMemInstrDist
ITLBPort(const std::string &_name, ComputeUnit *_cu)
Stats::Formula scalarMemWritesPerWF
Stats::Scalar numTimesWgBlockedDueVgprAlloc
CUExitCallback(ComputeUnit *_cu)
Stats::Distribution execRateDist
Stats::Formula vectorMemReadsPerWF
void sendSyncRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
bool isShrMem(int unitId)
std::vector< std::pair< uint32_t, uint32_t > > regIdxVec
std::string csprintf(const char *format, const Args &...args)
virtual Tick recvAtomic(PacketPtr pkt)
SenderState(GPUDynInstPtr gpuDynInst, PortID port_index)
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
the port intended to communicate between the CU and its LDS
std::list< waveIdentifier > waveIDQueue
Stats::Distribution pageDivergenceDist
virtual bool recvTimingResp(PacketPtr pkt)
get the result of packets sent to the LDS when they return
uint64_t Tick
Tick count type.
Stats::Scalar tlbRequests
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
ComputeUnit * computeUnit
virtual void recvFunctional(PacketPtr pkt)
A simple distribution stat.
ComputeUnit * computeUnit
GPUDynInstPtr _gpuDynInst
std::vector< WaitClass > vrfToGlobalMemPipeBus
void updateInstStats(GPUDynInstPtr gpuDynInst)
MemRespEvent(DataPort *_data_port, PacketPtr _pkt)
Stats::Scalar flatVMemInsts
ComputeUnit * computeUnit
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
std::vector< DataPort * > memPort
The memory port for SIMD data accesses.
std::vector< std::vector< Addr > > lastVaddrSimd
uint32_t vrfToCoalescerBusWidth
int AllAtBarrier(uint32_t _barrier_id, uint32_t bcnt, uint32_t bslots)
void StartWorkgroup(NDRange *ndr)
void setFlags(Flags _flags)
Accessor for flags.
Stats::Formula sALUInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
bool isGlbMem(int unitId)
Stats::Scalar scalarMemWrites
std::unordered_map< Addr, std::pair< int, int > > pageDataStruct
Stats::Scalar scalarMemReads
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
SQCPort(const std::string &_name, ComputeUnit *_cu, PortID _index)
Stats::Scalar ldsNoFlatInsts
std::vector< std::pair< Wavefront *, DISPATCH_STATUS > > dispatchList
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
ComputeUnit * computeUnit
bool sendToLds(GPUDynInstPtr gpuDynInst) __attribute__((warn_unused_result))
send a general request to the LDS make sure to look at the return value here as your request might be...
bool cedeSIMD(int simdId, int wfSlotId)
Stats::Scalar instCyclesVALU
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
A virtual base opaque structure used to hold state associated with the packet (e.g., an MSHR), specific to a MemObject that sees the packet.
Stats::Scalar completedWfs
SenderState(GPUDynInstPtr gpuDynInst, PortID _port_index, Packet::SenderState *sender_state=nullptr)
Stats::Formula scalarMemReadsPerWF
Stats::Formula vALUInstsPerWF
virtual BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx)
Get a master port with a given name and index.
Stats::Distribution activeLanesPerGMemInstrDist
virtual const std::string name() const
void doSmReturn(GPUDynInstPtr gpuDynInst)
LdsState & getLds() const
ComputeUnit * computeUnit
SenderState is information carried along with the packet, esp.
std::queue< PacketPtr > retries
here we queue all the requests that were not successfully sent.
Stats::Scalar numVecOpsExecuted
std::vector< VectorRegisterFile * > vrf
virtual void getDeviceAddressRanges(AddrRangeList &resp, bool &snoop)
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
virtual void recvFunctional(PacketPtr pkt)
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
Stats::Scalar numFailedCASOps
int ReadyWorkgroup(NDRange *ndr)
virtual bool sendTimingReq(PacketPtr pkt)
attempt to send this packet, either the port is already stalled, the request is nack'd and must stall...
LDSPort * getLdsPort() const
std::map< Addr, int > pagesTouched
Stats::Scalar instCyclesSALU
virtual void process()
virtual process function that is invoked when the callback queue is executed.
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
void fetch(PacketPtr pkt, Wavefront *wavefront)
Stats::Formula flatVMemInstsPerWF
bool isSimdDone(uint32_t) const
std::vector< uint8_t > statusVec
virtual ~CUExitCallback()
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::vector< uint64_t > lastExecCycle
void registerEvent(uint32_t simdId, uint32_t regIdx, uint32_t operandSize, uint64_t when, uint8_t newStatus)
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.
std::vector< WaitClass > wfWait
LocalMemPipeline localMemoryPipe
bool stalled
whether or not it is stalled
void processFetchReturn(PacketPtr pkt)
pageDataStruct pageAccesses
bool processTimingPacket(PacketPtr pkt)
Enums::PrefetchType prefetchType
Stats::Scalar ldsBankAccesses
Stats::Scalar totalCycles
LDSPort * ldsPort
The port to access the Local Data Store Can be connected to a LDS object.
waveIdentifier(int _simdId, int _wfSlotId)
std::vector< uint64_t > timestampVec
Stats::Scalar vectorMemReads
std::vector< Addr > lastVaddrCU
Stats::Formula ldsNoFlatInstsPerWF
virtual Tick recvAtomic(PacketPtr pkt)
virtual Tick recvAtomic(PacketPtr pkt)
ComputeUnit * computeUnit
uint64_t getAndIncSeqNum()
Stats::Scalar threadCyclesVALU
virtual void recvRangeChange()
Called to receive an address range change from the peer slave port.
void sendRequest(GPUDynInstPtr gpuDynInst, int index, PacketPtr pkt)
virtual void recvReqRetry()
the bus is telling the port that there is now space so retrying stalled requests should work now this...
std::vector< int > vectorRegsReserved
bool isVecAlu(int unitId)
LDSPort(const std::string &_name, ComputeUnit *_cu, PortID _id)
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
ScoreboardCheckStage scoreboardCheckStage
virtual void recvFunctional(PacketPtr pkt)
DataPort(const std::string &_name, ComputeUnit *_cu, PortID _index)