40 #include "debug/GPUDisp.hh"
51 pioAddr(p->pio_addr), pioSize(4096), pioDelay(p->pio_latency),
52 dispatchCount(0), dispatchActive(false), cpu(p->cpu),
53 shader(p->shader_pointer), driver(p->cl_driver), tickEvent(this)
67 .
name(
name() +
".num_kernel_launched")
68 .
desc(
"number of kernel launched")
86 fatal(
"Checkpointing not supported during active workgroup execution");
114 DPRINTF(GPUDisp,
"dispatcher registering addr range at %#x size %#x\n",
131 DPRINTF(GPUDisp,
" read register %#x size=%d\n", offset, pkt->
getSize());
142 char *curTaskPtr = (
char*)&
curTask;
144 memcpy(pkt->
getPtr<
const void*>(), curTaskPtr + offset, pkt->
getSize());
161 uint64_t data_val = 0;
165 data_val = pkt->
get<uint8_t>();
168 data_val = pkt->
get<uint16_t>();
171 data_val = pkt->
get<uint32_t>();
174 data_val = pkt->
get<uint64_t>();
180 DPRINTF(GPUDisp,
"write register %#x value %#x size=%d\n", offset, data_val,
184 static int nextId = 0;
194 uint64_t start =
curTick() / 1000;
197 &start,
sizeof(uint64_t), 0);
212 for (
int i = 0;
i < 3; ++
i) {
226 DPRINTF(GPUDisp,
"launching kernel %d\n",nextId);
240 char *curTaskPtr = (
char*)&
curTask;
241 memcpy(curTaskPtr + offset, pkt->
getPtr<
const void*>(), pkt->
getSize());
253 if (if_name ==
"translation_port") {
270 while (
execIds.size() > fail_count) {
282 DPRINTF(GPUDisp,
"kernel %d failed to launch\n", execId);
309 DPRINTF(GPUDisp,
"notify WgCompl %d\n",kern_id);
310 assert(
ndRangeMap[kern_id].dispatchId == kern_id);
329 sizeof(uint64_t), 0);
331 uint64_t end =
curTick() / 1000;
334 sizeof(uint64_t), 0);
362 panic(
"Cannot find host");
367 :
Event(CPU_Tick_Pri), dispatcher(_dispatcher)
380 return "GPU Dispatcher tick";
405 return shader->
cuList[0]->wfList[0][0]->getStaticContextSize();
AddrRangeList getAddrRanges() const
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, MemCmd cmd, bool suppress_func_errors)
AddrRange RangeSize(Addr start, Addr size)
Tick ticks(int numCycles) const
void set(T v, ByteOrder endian)
Set the value in the data pointer to v using the specified endianness.
std::vector< ComputeUnit * > cuList
const std::string & name()
ContextID contextId() const
Accessor function for context ID.
virtual void unserialize(CheckpointIn &cp)
Unserialize an object.
std::queue< int > execIds
void updateContext(int cid)
bool scheduled() const
Determine if the current event is scheduled.
uint32_t getStaticContextSize() const
Returns the size of the static hardware context of a wavefront.
void handshake(GpuDispatcher *_dispatcher)
void setFuncargsSize(int funcargs_size)
void accessUserVar(BaseCPU *cpu, uint64_t addr, int val, int off)
Tick write(PacketPtr pkt)
Pure virtual function that the device must implement.
T * getPtr()
get a pointer to the data ptr.
virtual void serialize(CheckpointOut &cp) const
Serialize an object.
void deschedule(Event &event)
T get(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness.
#define UNSERIALIZE_SCALAR(scalar)
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
Tick curTick()
The current simulated tick.
std::string csprintf(const char *format, const Args &...args)
void notifyWgCompl(Wavefront *w)
virtual BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx)
Get a master port with a given name and index.
Tick when() const
Get the time that the event is scheduled.
void makeAtomicResponse()
TickEvent(GpuDispatcher *)
uint64_t Tick
Tick count type.
const RequestPtr req
A pointer to the original request.
volatile uint32_t * numDispLeft
static void setInstance(GpuDispatcher *_instance)
BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a master port with a given name and index.
std::queue< int > doneIds
Stats::Scalar num_kernelLaunched
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
static GpuDispatcher * getInstance()
Tick read(PacketPtr pkt)
Pure virtual function that the device must implement.
#define SERIALIZE_SCALAR(scalar)
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
virtual const std::string name() const
std::ostream CheckpointOut
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
volatile bool * addrToNotify
T divCeil(const T &a, const U &b)
void schedule(Event &event, Tick when)
const char * description() const
Return a C string describing the event.
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
std::unordered_map< int, NDRange > ndRangeMap
static GpuDispatcher * instance
bool dispatch_workgroups(NDRange *ndr)
GpuDispatcher(const Params *p)
void hostWakeUp(BaseCPU *cpu)
void allocate()
Allocate memory for the packet.
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id)
void handshake(GpuDispatcher *dispatcher)