40 #include "debug/GPUTLB.hh"
43 clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
44 coalescingWindow(p->coalescingWindow),
45 disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
49 for (
size_t i = 0;
i < p->port_slave_connection_count; ++
i) {
55 for (
size_t i = 0;
i < p->port_master_connection_count; ++
i) {
64 if (if_name ==
"slave") {
65 if (idx >= static_cast<PortID>(
cpuSidePort.size())) {
66 panic(
"TLBCoalescer::getSlavePort: unknown index %d\n", idx);
71 panic(
"TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
78 if (if_name ==
"master") {
79 if (idx >= static_cast<PortID>(
memSidePort.size())) {
80 panic(
"TLBCoalescer::getMasterPort: unknown index %d\n", idx);
85 panic(
"TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
101 TheISA::GpuTLB::TranslationState *incoming_state =
104 TheISA::GpuTLB::TranslationState *coalesced_state =
115 if (incoming_virt_page_addr != coalesced_virt_page_addr)
124 if (incoming_mode != coalesced_mode)
130 if (!incoming_state->prefetch)
131 coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
145 DPRINTF(GPUTLB,
"Update phys. addr. for %d coalesced reqs for page %#x\n",
148 TheISA::GpuTLB::TranslationState *sender_state =
151 TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry;
153 Addr first_entry_vaddr = tlb_entry->vaddr;
154 Addr first_entry_paddr = tlb_entry->paddr;
155 int page_size = tlb_entry->size();
157 int first_hit_level = sender_state->hitLevel;
158 bool valid = tlb_entry->valid;
164 phys_page_paddr &= ~(page_size - 1);
168 TheISA::GpuTLB::TranslationState *sender_state =
169 safe_cast<TheISA::GpuTLB::TranslationState*>(
174 if (!sender_state->prefetch)
175 sender_state->reqCnt.pop_back();
184 Addr paddr = phys_page_paddr;
185 paddr |= (local_pkt->
req->
getVaddr() & (page_size - 1));
193 sender_state->tlbEntry =
194 new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr,
200 sender_state->hitLevel = first_hit_level;
203 SlavePort *return_port = sender_state->ports.back();
204 sender_state->ports.pop_back();
234 bool didCoalesce =
false;
236 int coalescedReq_cnt = 0;
238 TheISA::GpuTLB::TranslationState *sender_state =
242 sender_state->ports.push_back(
this);
244 bool update_stats = !sender_state->prefetch;
254 if (!sender_state->reqCnt.empty())
255 req_cnt = sender_state->reqCnt.back();
257 sender_state->reqCnt.push_back(req_cnt);
261 req_cnt = sender_state->reqCnt.back();
262 DPRINTF(GPUTLB,
"receiving pkt w/ req_cnt %d\n", req_cnt);
272 if (!sender_state->issueTime)
273 sender_state->issueTime =
curTick();
286 for (
int i = 0;
i < coalescedReq_cnt; ++
i) {
292 DPRINTF(GPUTLB,
"Coalesced req %i w/ tick_index %d has %d reqs\n",
304 if (!coalescedReq_cnt || !didCoalesce) {
309 new_array.push_back(pkt);
312 DPRINTF(GPUTLB,
"coalescerFIFO[%d] now has %d coalesced reqs after "
313 "push\n", tick_index,
337 TheISA::GpuTLB::TranslationState *sender_state =
340 bool update_stats = !sender_state->prefetch;
343 coalescer->uncoalescedAccesses++;
350 int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
353 DPRINTF(GPUTLB,
"Warning! Functional access to addr %#x sees timing "
354 "req. pending\n", virt_page_addr);
357 coalescer->memSidePort[0]->sendFunctional(pkt);
373 coalescer->updatePhysAddresses(pkt);
382 if (!coalescer->probeTLBEvent.scheduled())
383 coalescer->schedule(coalescer->probeTLBEvent,
384 curTick() + coalescer->ticks(1));
390 fatal(
"Memory side recvFunctional() not implemented in TLB coalescer.\n");
394 :
Event(CPU_Tick_Pri), coalescer(_coalescer)
401 return "Probe the TLB below";
422 bool rejected =
false;
428 DPRINTF(GPUTLB,
"triggered TLBCoalescer IssueProbeEvent\n");
430 for (
auto iter = coalescer->coalescerFIFO.begin();
431 iter != coalescer->coalescerFIFO.end() && !rejected; ) {
432 int coalescedReq_cnt = iter->second.size();
434 int vector_index = 0;
436 DPRINTF(GPUTLB,
"coalescedReq_cnt is %d for tick_index %d\n",
437 coalescedReq_cnt, iter->first);
439 while (i < coalescedReq_cnt) {
441 PacketPtr first_packet = iter->second[vector_index][0];
449 coalescer->issuedTranslationsTable.count(virt_page_addr);
452 DPRINTF(GPUTLB,
"Cannot issue - There are pending reqs for "
453 "page %#x\n", virt_page_addr);
462 if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
463 DPRINTF(GPUTLB,
"Failed to send TLB request for page %#x",
471 TheISA::GpuTLB::TranslationState *tmp_sender_state =
472 safe_cast<TheISA::GpuTLB::TranslationState*>
475 bool update_stats = !tmp_sender_state->prefetch;
481 int req_cnt = tmp_sender_state->reqCnt.back();
482 coalescer->queuingCycles += (
curTick() * req_cnt);
484 DPRINTF(GPUTLB,
"%s sending pkt w/ req_cnt %d\n",
485 coalescer->name(), req_cnt);
489 int pkt_cnt = iter->second[vector_index].size();
490 coalescer->localqueuingCycles += (
curTick() * pkt_cnt);
493 DPRINTF(GPUTLB,
"Successfully sent TLB request for page %#x",
497 coalescer->issuedTranslationsTable[virt_page_addr]
498 = iter->second[vector_index];
501 iter->second.erase(iter->second.begin() + vector_index);
503 if (iter->second.empty())
504 assert(i == coalescedReq_cnt);
507 if (sent_probes == coalescer->TLBProbesPerCycle)
514 if (iter->second.empty()) {
515 coalescer->coalescerFIFO.erase(iter++);
523 :
Event(Maximum_Pri), coalescer(_coalescer)
530 return "Cleanup issuedTranslationsTable hashmap";
536 while (!coalescer->cleanupQueue.empty()) {
537 Addr cleanup_addr = coalescer->cleanupQueue.front();
538 coalescer->cleanupQueue.pop();
539 coalescer->issuedTranslationsTable.erase(cleanup_addr);
541 DPRINTF(GPUTLB,
"Cleanup - Delete coalescer entry with key %#x\n",
552 .
name(
name() +
".uncoalesced_accesses")
553 .
desc(
"Number of uncoalesced TLB accesses")
557 .
name(
name() +
".coalesced_accesses")
558 .
desc(
"Number of coalesced TLB accesses")
563 .
desc(
"Number of cycles spent in queue")
567 .
name(
name() +
".local_queuing_cycles")
568 .
desc(
"Number of cycles spent in queue for all incoming reqs")
573 .
desc(
"Avg. latency over all incoming pkts")
581 TLBCoalescerParams::create()
TLBCoalescer(const Params *p)
virtual void recvFunctional(PacketPtr pkt)
CoalescingFIFO coalescerFIFO
void updatePhysAddresses(PacketPtr pkt)
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
const char * description() const
Return a C string describing the event.
void makeTimingResponse()
The TLBCoalescer is a MemObject sitting on the front side (CPUSide) of each TLB.
Tick ticks(int numCycles) const
BaseSlavePort & getSlavePort(const std::string &if_name, PortID idx)
Get a slave port with a given name and index.
bool scheduled() const
Determine if the current event is scheduled.
CoalescingTable issuedTranslationsTable
A SlavePort is a specialisation of a port.
CleanupEvent(TLBCoalescer *_coalescer)
A BaseSlavePort is a protocol-agnostic slave port, responsible only for the structural connection to ...
Stats::Scalar localqueuingCycles
IssueProbeEvent(TLBCoalescer *_coalescer)
std::queue< Addr > cleanupQueue
bool sendTimingResp(PacketPtr pkt)
Attempt to send a timing response to the master port by calling its corresponding receive function...
Tick curTick()
The current simulated tick.
std::string csprintf(const char *format, const Args &...args)
BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx)
Get a master port with a given name and index.
The request is to an uncacheable address.
const char * description() const
Return a C string describing the event.
TLBCoalescerParams Params
const RequestPtr req
A pointer to the original request.
Stats::Formula localLatency
T roundDown(const T &val, const U &align)
Stats::Scalar coalescedAccesses
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::vector< CpuSidePort * > cpuSidePort
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
CleanupEvent cleanupEvent
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the master port.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
std::vector< MemSidePort * > memSidePort
virtual const std::string name() const
SenderState * senderState
This packet's sender state.
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
IssueProbeEvent probeTLBEvent
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Stats::Scalar uncoalescedAccesses
void schedule(Event &event, Tick when)
virtual void recvReqRetry()
Stats::Scalar queuingCycles
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the master port.
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2)
void setPaddr(Addr paddr)
Set just the physical address.
void regStats()
Register statistics for this object.
void setFlags(Flags flags)
Note that unlike other accessors, this function sets specific flags (ORs them in); it does not assign...
void regStats() override
Register statistics for this object.
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the slave port.