38 #include "config/the_isa.hh"
40 #if THE_ISA == X86_ISA
47 #include "debug/GPUCoalescer.hh"
48 #include "debug/MemoryAccess.hh"
49 #include "debug/ProtocolTrace.hh"
50 #include "debug/RubyPort.hh"
51 #include "debug/RubyStats.hh"
62 #include "params/RubyGPUCoalescer.hh"
67 RubyGPUCoalescerParams::create()
75 HSAScope accessScope = HSAScope_UNSPECIFIED;
78 accessScope = HSAScope_WAVEFRONT;
80 accessScope = HSAScope_WORKGROUP;
82 accessScope = HSAScope_DEVICE;
84 accessScope = HSAScope_SYSTEM;
86 fatal(
"Bad scope type");
95 HSASegment accessSegment = HSASegment_GLOBAL;
98 accessSegment = HSASegment_GLOBAL;
100 accessSegment = HSASegment_GROUP;
102 accessSegment = HSASegment_PRIVATE;
104 accessSegment = HSASegment_KERNARG;
106 accessSegment = HSASegment_READONLY;
108 accessSegment = HSASegment_SPILL;
110 accessSegment = HSASegment_ARG;
112 fatal(
"Bad segment type");
115 return accessSegment;
119 :
RubyPort(p), issueEvent(this), deadlockCheckEvent(this)
160 int total_outstanding = 0;
164 for (; read != read_end; ++read) {
169 panic(
"Possible Deadlock detected. Aborting!\n"
170 "version: %d request.paddr: 0x%x m_readRequestTable: %d "
171 "current time: %u issue_time: %d difference: %d\n",
m_version,
179 for (; write != write_end; ++write) {
184 panic(
"Possible Deadlock detected. Aborting!\n"
185 "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
186 "current time: %u issue_time: %d difference: %d\n",
m_version,
210 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
213 for (
int j = 0;
j < MachineType_NUM;
j++) {
218 for (
int i = 0;
i < MachineType_NUM;
i++) {
239 return RequestStatus_BufferFull;
243 request_type != RubyRequestType_Locked_RMW_Write) {
244 return RequestStatus_Aliased;
247 if ((request_type == RubyRequestType_ST) ||
248 (request_type == RubyRequestType_ATOMIC) ||
249 (request_type == RubyRequestType_ATOMIC_RETURN) ||
250 (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
251 (request_type == RubyRequestType_RMW_Read) ||
252 (request_type == RubyRequestType_RMW_Write) ||
253 (request_type == RubyRequestType_Load_Linked) ||
254 (request_type == RubyRequestType_Store_Conditional) ||
255 (request_type == RubyRequestType_Locked_RMW_Read) ||
256 (request_type == RubyRequestType_Locked_RMW_Write) ||
257 (request_type == RubyRequestType_FLUSH)) {
263 return RequestStatus_Aliased;
269 return RequestStatus_Aliased;
276 return RequestStatus_Aliased;
282 return RequestStatus_Aliased;
286 return RequestStatus_Ready;
304 kernelEndList.size());
328 if ((request_type == RubyRequestType_ST) ||
329 (request_type == RubyRequestType_ATOMIC) ||
330 (request_type == RubyRequestType_ATOMIC_RETURN) ||
331 (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
332 (request_type == RubyRequestType_RMW_Read) ||
333 (request_type == RubyRequestType_RMW_Write) ||
334 (request_type == RubyRequestType_Load_Linked) ||
335 (request_type == RubyRequestType_Store_Conditional) ||
336 (request_type == RubyRequestType_Locked_RMW_Read) ||
337 (request_type == RubyRequestType_Locked_RMW_Write) ||
338 (request_type == RubyRequestType_FLUSH)) {
344 RequestTable::iterator
i = r.first;
348 "Inserting write request for paddr %#x for type %d\n",
360 RequestTable::iterator
i = r.first;
364 "Inserting read request for paddr %#x for type %d\n",
395 if ((srequest->
m_type == RubyRequestType_ST) ||
396 (srequest->
m_type == RubyRequestType_RMW_Read) ||
397 (srequest->
m_type == RubyRequestType_RMW_Write) ||
398 (srequest->
m_type == RubyRequestType_Load_Linked) ||
399 (srequest->
m_type == RubyRequestType_Store_Conditional) ||
400 (srequest->
m_type == RubyRequestType_Locked_RMW_Read) ||
401 (srequest->
m_type == RubyRequestType_Locked_RMW_Write)) {
419 if (request->
m_type == RubyRequestType_Store_Conditional) {
438 }
else if (request->
m_type == RubyRequestType_Load_Linked) {
472 Cycles initialRequestTime,
473 Cycles forwardRequestTime,
477 initialRequestTime, forwardRequestTime, firstResponseTime,
485 Cycles initialRequestTime,
486 Cycles forwardRequestTime,
502 assert((request->m_type == RubyRequestType_ST) ||
503 (request->m_type == RubyRequestType_ATOMIC) ||
504 (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
505 (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
506 (request->m_type == RubyRequestType_RMW_Read) ||
507 (request->m_type == RubyRequestType_RMW_Write) ||
508 (request->m_type == RubyRequestType_Load_Linked) ||
509 (request->m_type == RubyRequestType_Store_Conditional) ||
510 (request->m_type == RubyRequestType_Locked_RMW_Read) ||
511 (request->m_type == RubyRequestType_Locked_RMW_Write) ||
512 (request->m_type == RubyRequestType_FLUSH));
525 if (request->m_type == RubyRequestType_Locked_RMW_Read) {
527 }
else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
532 request->issue_time, forwardRequestTime, firstResponseTime,
554 Cycles initialRequestTime,
555 Cycles forwardRequestTime,
560 initialRequestTime, forwardRequestTime, firstResponseTime,
568 Cycles initialRequestTime,
569 Cycles forwardRequestTime,
584 assert((request->m_type == RubyRequestType_LD) ||
585 (request->m_type == RubyRequestType_IFETCH));
588 request->issue_time, forwardRequestTime, firstResponseTime,
597 Cycles initialRequestTime,
598 Cycles forwardRequestTime,
609 if (type == RubyRequestType_IFETCH) {
627 for (
int i = 0;
i <
len; ++
i) {
629 assert(type ==
reqCoalescer[request_line_address][
i].primaryType);
630 request_address = pkt->
getAddr();
632 if (pkt->
getPtr<uint8_t>()) {
633 if ((type == RubyRequestType_LD) ||
634 (type == RubyRequestType_ATOMIC) ||
635 (type == RubyRequestType_ATOMIC_RETURN) ||
636 (type == RubyRequestType_IFETCH) ||
637 (type == RubyRequestType_RMW_Read) ||
638 (type == RubyRequestType_Locked_RMW_Read) ||
639 (type == RubyRequestType_Load_Linked)) {
640 memcpy(pkt->
getPtr<uint8_t>(),
650 "WARNING. Data not transfered from Ruby to M5 for type " \
652 RubyRequestType_to_string(type));
668 mylist.push_back(pkt);
705 return RequestStatus_Issued;
721 return RequestStatus_Issued;
729 return RequestStatus_BufferFull;
732 RubyRequestType primary_type = RubyRequestType_NULL;
733 RubyRequestType secondary_type = RubyRequestType_NULL;
745 primary_type = RubyRequestType_Store_Conditional;
748 primary_type = RubyRequestType_Load_Linked;
750 secondary_type = RubyRequestType_ATOMIC;
759 primary_type = RubyRequestType_Locked_RMW_Write;
762 primary_type = RubyRequestType_Locked_RMW_Read;
764 secondary_type = RubyRequestType_ST;
769 primary_type = RubyRequestType_ATOMIC;
770 secondary_type = RubyRequestType_ATOMIC;
774 primary_type = secondary_type = RubyRequestType_IFETCH;
776 #if THE_ISA == X86_ISA
778 bool storeCheck = flags &
781 bool storeCheck =
false;
784 primary_type = RubyRequestType_RMW_Read;
785 secondary_type = RubyRequestType_ST;
787 primary_type = secondary_type = RubyRequestType_LD;
794 primary_type = secondary_type = RubyRequestType_ST;
796 primary_type = secondary_type = RubyRequestType_FLUSH;
815 return RequestStatus_Issued;
819 return RequestStatus_Issued;
822 panic(
"Unsupported ruby packet type\n");
832 if (status != RequestStatus_Ready)
849 }
else if (primary_type !=
852 return RequestStatus_Aliased;
856 return RequestStatus_Aliased;
861 return RequestStatus_Aliased;
865 reqCoalescer[line_addr].emplace_back(pkt, primary_type, secondary_type);
869 return RequestStatus_Issued;
908 for (
int i = 0;
i < tableSize;
i++) {
910 uint32_t tmpOffset = (tmpPkt->
getAddr()) - line_addr;
911 uint32_t tmpSize = tmpPkt->
getSize();
915 atomicOps.push_back(tmpAtomicOp);
916 }
else if (tmpPkt->
isWrite()) {
917 dataBlock.setData(tmpPkt->
getPtr<uint8_t>(),
920 for (
int j = 0;
j < tmpSize;
j++) {
921 accessMask[tmpOffset +
j] =
true;
924 std::shared_ptr<RubyRequest> msg;
929 RubyAccessMode_Supervisor, pkt,
930 PrefetchBit_No, proc_id, 100,
931 blockSize, accessMask,
932 dataBlock, atomicOps,
933 accessScope, accessSegment);
938 RubyAccessMode_Supervisor, pkt,
939 PrefetchBit_No, proc_id, 100,
940 blockSize, accessMask,
942 accessScope, accessSegment);
944 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %s\n",
947 RubyRequestType_to_string(secondary_type));
949 fatal_if(secondary_type == RubyRequestType_IFETCH,
950 "there should not be any I-Fetch requests in the GPU Coalescer");
954 "should not have a latency of zero");
960 template <
class KEY,
class VALUE>
962 operator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
965 for (
auto i = map.begin();
i != map.end(); ++
i)
966 out <<
" " <<
i->first <<
"=" <<
i->second;
988 #ifdef CHECK_COHERENCE
995 DPRINTF(RubyStats,
"Recorded statistic: %s\n",
996 SequencerRequestType_to_string(requestType));
1000 :
Event(Progress_Event_Pri), seq(_seq)
1013 for (
int i = 0;
i <
len; ++
i) {
1027 panic(
"GPUCoalescer::makeRequest should never be called if the "
1028 "request is already outstanding\n");
1038 for (
int i = 0;
i <
len;
i++) {
1047 seq->completeIssue();
1053 return "Issue coalesced request";
1089 assert((srequest->m_type == RubyRequestType_ATOMIC) ||
1090 (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
1091 (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
1097 srequest->issue_time,
Cycles(0),
Cycles(0),
true,
false);
1105 for (
int i = 0; i <
len; ++
i) {
1107 assert(srequest->m_type ==
1109 request_address = (pkt->
getAddr());
1111 if (pkt->
getPtr<uint8_t>() &&
1112 srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
1114 memcpy(pkt->
getPtr<uint8_t>(),
1120 "WARNING. Data not transfered from Ruby to M5 for type " \
1122 RubyRequestType_to_string(srequest->m_type));
1138 mylist.push_back(pkt);
1150 if (myMachID == senderMachID) {
1164 if (myMachID == senderMachID) {
1178 for (
int i = 0;
i <
len; ++
i) {
1182 assert(port != NULL);
1186 port->hitCallback(mylist[
i]);
1199 return request->
pkt;
1205 Cycles initialRequestTime,
1206 Cycles forwardRequestTime,
1207 Cycles firstResponseTime,
1208 bool success,
bool isRegion)
1213 assert(completion_time >= issued_time);
1214 Cycles total_lat = completion_time - issued_time;
1217 if (mach == MachineType_TCP) {
1218 if (type == RubyRequestType_LD) {
1223 }
else if (mach == MachineType_L1Cache_wCC) {
1224 if (type == RubyRequestType_LD) {
1229 }
else if (mach == MachineType_TCC) {
1230 if (type == RubyRequestType_LD) {
1236 if (type == RubyRequestType_LD) {
1248 if (total_lat !=
Cycles(0)) {
1252 if (mach != MachineType_NUM) {
1256 if ((issued_time <= initialRequestTime) &&
1257 (initialRequestTime <= forwardRequestTime) &&
1258 (forwardRequestTime <= firstResponseTime) &&
1259 (firstResponseTime <= completion_time)) {
1262 initialRequestTime - issued_time);
1264 forwardRequestTime - initialRequestTime);
1266 firstResponseTime - forwardRequestTime);
1268 completion_time - firstResponseTime);
1274 DPRINTFR(ProtocolTrace,
"%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
1276 success ?
"Done" :
"SC_Failed",
"",
"",
1292 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1300 for (
int i = 0;
i < MachineType_NUM;
i++) {
1317 for (
int i = 0;
i < RubyRequestType_NUM;
i++) {
1320 for (
int j = 0;
j < MachineType_NUM;
j++) {
1329 .
desc(
"loads that hit in the TCP")
1332 .
name(
name() +
".gpu_tcp_ld_transfers")
1333 .
desc(
"TCP to TCP load transfers")
1337 .
desc(
"loads that hit in the TCC")
1341 .
desc(
"loads that miss in the GPU")
1346 .
desc(
"stores that hit in the TCP")
1349 .
name(
name() +
".gpu_tcp_st_transfers")
1350 .
desc(
"TCP to TCP store transfers")
1354 .
desc(
"stores that hit in the TCC")
1358 .
desc(
"stores that miss in the GPU")
1364 .
desc(
"loads that hit in the TCP")
1367 .
name(
name() +
".cp_tcp_ld_transfers")
1368 .
desc(
"TCP to TCP load transfers")
1372 .
desc(
"loads that hit in the TCC")
1376 .
desc(
"loads that miss in the GPU")
1381 .
desc(
"stores that hit in the TCP")
1384 .
name(
name() +
".cp_tcp_st_transfers")
1385 .
desc(
"TCP to TCP store transfers")
1389 .
desc(
"stores that hit in the TCC")
1393 .
desc(
"stores that miss in the GPU")
void recordMissLatency(GPUCoalescerRequest *request, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool success, bool isRegion)
void insertKernel(int wavefront_id, PacketPtr pkt)
void atomicCallback(Addr address, MachineType mach, const DataBlock &data)
Stats::Scalar CP_TCCStHits
Stats::Scalar GPU_TCPStHits
Cycles is a wrapper class for representing cycle counts, i.e.
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
ContextID contextId() const
Accessor function for context ID.
CoalescingTable reqCoalescer
virtual void issueRequest(PacketPtr pkt, RubyRequestType type)
GPUCoalescer(const Params *)
void setExtraData(uint64_t extraData)
Accessor function for store conditional return value.
RequestTable m_readRequestTable
Stats::Scalar GPU_TCPLdHits
AbstractController * m_controller
bool isPrivateSegment() const
Stats::Scalar GPU_TCCStHits
bool scheduled() const
Determine if the current event is scheduled.
void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
void kernelCallback(int wavfront_id)
Stats::Scalar GPU_TCPLdTransfers
RequestStatus getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
void clearLocked(Addr addr)
Histogram & init(size_type size)
Set the parameters of this histogram.
bool isLocked(Addr addr, int context)
Addr getPC() const
Accessor function for pc.
Stats::Scalar CP_TCPLdTransfers
virtual RequestStatus makeRequest(PacketPtr pkt)
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
bool isDeviceScope() const
T * getPtr()
get a pointer to the data ptr.
IssueEvent(GPUCoalescer *_seq)
RubySystem * m_ruby_system
bool isSpillSegment() const
Stats::Scalar CP_TCPStTransfers
bool hasContextId() const
bool isBlocked(Addr) const
Cycles curCycle() const
Determine the current cycle, corresponding to a tick aligned to a clock edge.
const char * description() const
Return a C string describing the event.
bool areNSlotsAvailable(unsigned int n, Tick curTime)
Tick clockEdge(Cycles cycles=Cycles(0)) const
Determine the tick when a cycle begins, by default the current one, but the argument also enables the...
HSASegment reqSegmentToHSASegment(Request *req)
Tick curTick()
The current simulated tick.
bool isWorkgroupScope() const
void ruby_eviction_callback(Addr address)
CacheMemory * m_dataCache_ptr
void setMRU(Addr address)
SenderState * predecessor
void readCallback(Addr address, DataBlock &data)
bool insertRequest(PacketPtr pkt, RubyRequestType request_type)
RubyRequestType primaryType
void mergeFrom(const DataBlock &data)
bool isKernargSegment() const
void writeCallback(Addr address, DataBlock &data)
bool assumingRfOCoherence
std::vector< Addr > newRequests
Stats::Scalar GPU_TCCLdHits
int m_store_waiting_on_load_cycles
bool isArgSegment() const
const RequestPtr req
A pointer to the original request.
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
GPUCoalescerWakeupEvent deadlockCheckEvent
Addr getOffset(Addr addr)
void completeHitCallback(std::vector< PacketPtr > &mylist, int len)
void recordRequestType(SequencerRequestType requestType)
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
bool isWavefrontScope() const
void ruby_hit_callback(PacketPtr pkt)
void checkCoherence(Addr address)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void hitCallback(GPUCoalescerRequest *request, MachineType mach, DataBlock &data, bool success, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, bool isRegion)
Addr makeLineAddress(Addr addr)
std::unordered_map< int, PacketPtr > kernelEndList
std::string printAddress(Addr addr)
void reset()
Reset stat value to default.
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Scalar CP_TCPLdHits
void blockOnQueue(Addr, MessageBuffer *)
static const int NumArgumentRegs M5_VAR_USED
void regStats()
Register statistics for this object.
Flags getFlags()
Accessor for flags.
std::vector< Stats::Histogram * > m_typeLatencyHist
MessageBuffer * m_mandatory_q_ptr
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
const uint8_t * getData(int offset, int len) const
bool isReadonlySegment() const
virtual const std::string name() const
PacketPtr mapAddrToPkt(Addr address)
std::vector< int > newKernelEnds
void removeRequest(GPUCoalescerRequest *request)
Declaration of the Packet class.
RubyRequestType secondaryType
void print(std::ostream &out) const
SenderState * senderState
This packet's sender state.
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
bool isGroupSegment() const
bool isScoped() const
Accessor functions for the memory space configuration flags and used by GPU ISAs such as the Heteroge...
void setLocked(Addr addr, int context)
RequestTable m_writeRequestTable
void schedule(Event &event, Tick when)
CacheMemory * m_instCache_ptr
void resetStats()
Reset statistics associated with this object.
void setData(const uint8_t *data, int offset, int len)
Cycles m_data_cache_hit_latency
bool handleLlsc(Addr address, GPUCoalescerRequest *request)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
HSAScope reqScopeToHSAScope(Request *req)
void printProgress(std::ostream &out) const
AtomicOpFunctor * getAtomicOp() const
Accessor function to atomic op.
fatal_if(p->js_features.size() > 16,"Too many job slot feature registers specified (%i)\n", p->js_features.size())
MachineType machineIDToMachineType(MachineID machID)
bool m_runningGarnetStandalone
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
static uint32_t getBlockSizeBytes()
void enqueue(MsgPtr message, Tick curTime, Tick delta)
void regStats() override
Register statistics for this object.
void evictionCallback(Addr address)
Stats::Scalar GPU_TCPStTransfers
bool isGlobalSegment() const
void sample(const U &v, int n=1)
Add a value to the distribtion n times.
bool isSystemScope() const
bool isTagPresent(Addr address) const
Stats::Scalar CP_TCCLdHits