36 #ifndef __WAVEFRONT_HH__
37 #define __WAVEFRONT_HH__
45 #include "arch/gpu_isa.hh"
48 #include "config/the_gpu_isa.hh"
53 #include "params/Wavefront.hh"
114 template<
typename CType>
118 return addr *
wfSize +
sizeof(CType) * lane;
132 template<
typename CType>
136 return mem + getLaneOffset<CType>(lane,
addr);
139 template<
typename CType>
143 *((CType*)(
mem + getLaneOffset<CType>(lane, addr))) =
val;
181 uint32_t
remap(uint32_t vgprIndex, uint32_t
size, uint8_t
mode=0);
307 template<
typename CType>
314 template<
typename CType>
332 void start(uint64_t _wfDynId, uint64_t _base_ptr);
349 uint32_t
rpc()
const;
355 void pc(uint32_t new_pc);
395 #endif // __WAVEFRONT_HH__
std::vector< uint32_t > oldVgpr
Stats::Scalar numTimesBlockedDueRAWDependencies
CallArgMem(int func_args_size_per_item, int wf_size)
void setContext(const void *in)
Sets the hardware context fromt a stream of bytes This method is designed for HSAIL execution...
Stats::Scalar numTimesBlockedDueVrfPortAvail
std::deque< std::unique_ptr< ReconvergenceStackEntry > > reconvergenceStack
Stack containing Control Flow Graph nodes (i.e., kernel instructions) to be visited by the wavefront...
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
void pushToReconvergenceStack(uint32_t pc, uint32_t rpc, const VectorMask &exec_mask)
class ConditionRegisterState * condRegState
bool isOldestInstFlatMem()
this represents a slice of the overall LDS, intended to be associated with an individual workgroup ...
bool isOldestInstPrivMem()
Stats::Scalar numTimesBlockedDueWAXDependencies
This is a simple scalar statistic, like a counter.
bool instructionBufferHasBranch()
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Stats::Distribution srcRegOpDist
std::vector< uint32_t > workItemId[3]
uint32_t pc
PC of current instruction.
std::deque< GPUDynInstPtr > instructionBuffer
void initCallArgMem(int func_args_size_per_item, int wf_size)
void regStats()
Register statistics for this object.
std::vector< uint32_t > workItemFlatId
Wavefront(const Params *p)
void writeCallArgMem(int lane, int addr, CType val)
uint32_t getStaticContextSize() const
Returns the size of the static hardware context of a particular wavefront This should be updated ever...
A simple distribution stat.
CType readCallArgMem(int lane, int addr)
std::vector< int > barCnt
void setParent(ComputeUnit *cu)
ComputeUnit * computeUnit
Stats::Distribution dstRegOpDist
bool isLmInstruction(GPUDynInstPtr ii)
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
uint32_t outstandingReqsWrLm
void getContext(const void *out)
Returns the hardware context as a stream of bytes This method is designed for HSAIL execution...
uint32_t outstandingReqsRdGm
void computeActualWgSz(NDRange *ndr)
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,16,32,64}_t.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint32_t outstandingReqsRdLm
int getLaneOffset(int lane, int addr)
bool isOldestInstBarrier()
TheGpuISA::GPUISA & gpuISA()
uint32_t outstandingReqsWrGm
bool isGmInstruction(GPUDynInstPtr ii)
std::vector< Addr > lastAddr
TheGpuISA::GPUISA _gpuISA
VectorMask execMask() const
A reconvergence stack entry conveys the necessary state to implement control flow divergence...
void setLaneAddr(int lane, int addr, CType val)
uint8_t * getLaneAddr(int lane, int addr)
uint32_t spillSizePerItem
static const int MAX_NUM_INSTS_PER_WF
void start(uint64_t _wfDynId, uint64_t _base_ptr)
std::vector< uint64_t > oldDgpr
void popFromReconvergenceStack()
void resizeRegFiles(int num_cregs, int num_sregs, int num_dregs)
uint32_t remap(uint32_t vgprIndex, uint32_t size, uint8_t mode=0)
Abstract superclass for simulation objects.
uint32_t rpc
PC of the immediate post-dominator instruction, i.e., the value of pc for the first instruction that ...
bool waitingAtBarrier(int lane)
VectorMask execMask
Execution mask.