42 #include "debug/GPUDisp.hh"
43 #include "debug/GPUMem.hh"
44 #include "debug/HSAIL.hh"
54 clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
55 cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
56 hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
57 separate_acquire_release(p->separate_acquire_release), coissue_return(1),
58 trace_vgpr_all(1), n_cu((p->CUs).
size()), n_wf(p->n_wf),
59 globalMemSize(p->globalmem), nextSchedCu(0), sa_n(0), tick_cnt(0),
60 box_tick_cnt(0), start_tick_cnt(0)
65 for (
int i = 0;
i <
n_cu; ++
i) {
86 start = mem_state->getMmapEnd() -
length;
87 mem_state->setMmapEnd(start);
90 start = mem_state->getMmapEnd();
91 mem_state->setMmapEnd(start + length);
94 assert(mem_state->getStackBase() - mem_state->getMaxStackSize() >
95 mem_state->getMmapEnd());
98 DPRINTF(HSAIL,
"Shader::mmap start= %#x, %#x\n", start, length);
137 panic(
"Dispatcher wants to wakeup a different host");
142 ShaderParams::create()
154 for (
int i = 0;
i <
sa_n; ++
i) {
173 bool scheduledSomething =
false;
177 while (cuCount <
n_cu) {
185 scheduledSomething =
true;
186 DPRINTF(GPUDisp,
"Dispatching a workgroup to CU %d\n", curCu);
193 cuList[curCu]->StartWorkgroup(ndr);
216 return scheduledSomething;
227 bool suppress_func_errors,
int cu_id)
229 int block_size =
cuList.at(cu_id)->cacheLineSize();
240 fatal(
"unexcepted MemCmd\n");
244 Addr split_addr =
roundDown(tmp_addr + size - 1, block_size);
246 assert(split_addr <= tmp_addr || split_addr - tmp_addr < block_size);
249 if (split_addr > tmp_addr) {
266 if (suppress_func_errors) {
273 cuList[0]->memPort[0]->sendFunctional(new_pkt1);
274 cuList[0]->memPort[0]->sendFunctional(new_pkt2);
286 if (suppress_func_errors) {
292 cuList[0]->memPort[0]->sendFunctional(new_pkt);
302 for (
int i_cu = 0; i_cu <
n_cu; ++i_cu) {
303 if (!
cuList[i_cu]->isDone()) {
321 :
Event(CPU_Tick_Pri), shader(_shader)
329 if (shader->busy()) {
331 shader->schedule(
this,
curTick() + shader->ticks(1));
338 return "Shader tick";
343 MemCmd cmd,
bool suppress_func_errors)
345 uint8_t *data_buf = (uint8_t*)ptr;
348 !gen.
done(); gen.next()) {
350 cuList[0]->masterId(), 0, 0, 0);
353 data_buf += gen.size();
366 bool suppress_func_errors)
379 bool suppress_func_errors)
382 suppress_func_errors);
395 new TheISA::GpuTLB::TranslationState(mode,
gpuTc,
false);
404 cuList[cu_id]->tlbPort[0]->sendFunctional(pkt);
408 TheISA::GpuTLB::TranslationState *sender_state =
411 delete sender_state->tlbEntry;
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
std::vector< int32_t > sa_x
Tick ticks(int numCycles) const
std::vector< ComputeUnit * > cuList
void setSuppressFuncError()
void updateContext(int cid)
void doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data, bool suppress_func_errors, int cu_id)
void allocateMem(Addr vaddr, int64_t size, bool clobber=false)
virtual bool mmapGrowsDown() const
Does mmap region grow upward or downward from mmapEnd? Most platforms grow downward, but a few (such as Alpha) grow upward instead, so they can override this method to return false.
bool scheduled() const
Determine if the current event is scheduled.
GpuDispatcher * dispatcher
virtual Process * getProcessPtr()=0
std::vector< uint64_t > sa_when
T roundUp(const T &val, const U &align)
std::shared_ptr< MemState > memState
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Tick curTick()
The current simulated tick.
uint64_t Tick
Tick count type.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
const RequestPtr req
A pointer to the original request.
const char * description() const
Return a C string describing the event.
void splitOnVaddr(Addr split_addr, RequestPtr &req1, RequestPtr &req2)
Generate two requests as if this request had been split into two pieces.
virtual void init()
init() is called after all C++ SimObjects have been created and all ports are connected.
T roundDown(const T &val, const U &align)
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void ScheduleAdd(uint32_t *val, Tick when, int x)
Declaration of the Packet class.
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
SenderState * senderState
This packet's sender state.
virtual int threadId() const =0
void schedule(Event &event, Tick when)
virtual Status status() const =0
Declaration and inline definition of ChunkGenerator object.
bool dispatch_workgroups(NDRange *ndr)
void hostWakeUp(BaseCPU *cpu)
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void sendFunctional(PacketPtr pkt)
Send a functional request packet, where the data is instantly updated everywhere in the memory system...
void handshake(GpuDispatcher *dispatcher)
ProbePointArg< PacketInfo > Packet
Packet probe point.
std::vector< uint32_t * > sa_val