52 #include "debug/GPUPrefetch.hh"
53 #include "debug/GPUTLB.hh"
64 cleanupEvent(this, false,
Event::Maximum_Pri), exitEvent(this)
72 clock = p->clk_domain->clockPeriod();
79 for (
int set = 0; set <
numSets; ++set) {
80 for (
int way = 0; way <
assoc; ++way) {
81 int x = set * assoc + way;
109 cprintf(
"Forcing maxCoalescedReqs to %d (TLB assoc.) \n",
assoc);
118 for (
size_t i = 0;
i < p->port_slave_connection_count; ++
i) {
124 for (
size_t i = 0;
i < p->port_master_connection_count; ++
i) {
140 if (if_name ==
"slave") {
141 if (idx >= static_cast<PortID>(
cpuSidePort.size())) {
142 panic(
"TLBCoalescer::getSlavePort: unknown index %d\n", idx);
147 panic(
"TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
154 if (if_name ==
"master") {
155 if (idx >= static_cast<PortID>(
memSidePort.size())) {
156 panic(
"TLBCoalescer::getMasterPort: unknown index %d\n", idx);
163 panic(
"TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
187 newEntry->vaddr = vpn;
193 GpuTLB::EntryList::iterator
203 for (; entry !=
entryList[set].end(); ++entry) {
204 int page_size = (*entry)->size();
206 if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
207 DPRINTF(GPUTLB,
"Matched vaddr %#x to entry starting at %#x "
208 "with size %#x.\n", va, (*entry)->vaddr, page_size);
228 auto entry =
lookupIt(va, update_lru);
239 DPRINTF(GPUTLB,
"Invalidating all entries.\n");
259 DPRINTF(GPUTLB,
"Invalidating all non global entries.\n");
264 if (!(*entryIt)->global) {
290 DPRINTF(GPUTLB,
"Addresses references internal memory.\n");
295 panic(
"CPUID memory space not yet implemented!\n");
612 return std::make_shared<GeneralProtection>(0);
626 assert(!(IOPort & ~0xFFFF));
628 if (IOPort == 0xCF8 && req->
getSize() == 4) {
631 }
else if ((IOPort & ~
mask(2)) == 0xCFC) {
637 if (
bits(configAddress, 31, 31)) {
639 mbits(configAddress, 30, 2) |
650 panic(
"Access to unrecognized internal address space %#x.\n",
665 bool tlb_hit =
false;
673 DPRINTF(GPUTLB,
"TLB Lookup for vaddr %#x.\n", vaddr);
677 DPRINTF(GPUTLB,
"In protected mode.\n");
679 assert(m5Reg.mode == LongMode);
683 DPRINTF(GPUTLB,
"Paging enabled.\n");
713 bool &delayedResponse,
bool timing,
int &latency)
725 delayedResponse =
false;
727 DPRINTF(GPUTLB,
"Translating vaddr %#x.\n", vaddr);
733 DPRINTF(GPUTLB,
"In protected mode.\n");
735 if (m5Reg.mode != LongMode) {
736 DPRINTF(GPUTLB,
"Not in long mode. Checking segment "
743 return std::make_shared<GeneralProtection>(0);
752 return std::make_shared<GeneralProtection>(0);
755 return std::make_shared<GeneralProtection>(0);
757 expandDown = attr.expandDown;
769 if ((csAttr.defaultSize && sizeOverride) ||
770 (!csAttr.defaultSize && !sizeOverride)) {
778 DPRINTF(GPUTLB,
"Checking an expand down segment.\n");
779 warn_once(
"Expand down segments are untested.\n");
781 if (offset <= limit || endOffset <= limit)
782 return std::make_shared<GeneralProtection>(0);
784 if (offset > limit || endOffset > limit)
785 return std::make_shared<GeneralProtection>(0);
791 DPRINTF(GPUTLB,
"Paging enabled.\n");
803 fatal(
"GpuTLB doesn't support full-system mode\n");
805 DPRINTF(GPUTLB,
"Handling a TLB miss for address %#x "
806 "at pc %#x.\n", vaddr, tc->
instAddr());
810 bool success = p->pTable->lookup(vaddr, newEntry);
818 if (p->fixupStackFault(vaddr))
819 success = p->pTable->lookup(vaddr, newEntry);
823 return std::make_shared<PageFault>(
vaddr,
true,
827 newEntry.
valid = success;
828 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
830 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n",
831 alignedVaddr, newEntry.pageStart());
833 entry =
insert(alignedVaddr, newEntry);
836 DPRINTF(GPUTLB,
"Miss was serviced.\n");
847 bool inUser = (m5Reg.cpl == 3 &&
851 bool badWrite = (!entry->writable && (inUser || cr0.wp));
858 return std::make_shared<PageFault>(
vaddr,
true,
mode,
862 if (storeCheck && badWrite) {
865 return std::make_shared<PageFault>(
vaddr,
true,
871 DPRINTF(GPUTLB,
"Entry found with paddr %#x, doing protection "
872 "checks.\n", entry->paddr);
874 int page_size = entry->size();
875 Addr paddr = entry->paddr | (vaddr & (page_size - 1));
876 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
879 if (entry->uncacheable)
883 DPRINTF(GPUTLB,
"Paging disabled.\n");
884 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, vaddr);
889 DPRINTF(GPUTLB,
"In real mode.\n");
890 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, vaddr);
896 LocalApicBase localApicBase =
902 if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
917 bool delayedResponse;
927 bool delayedResponse;
931 delayedResponse,
true, latency);
933 if (!delayedResponse)
934 translation->
finish(fault, req, tc, mode);
960 .
name(
name() +
".local_TLB_accesses")
961 .
desc(
"Number of TLB accesses")
966 .
desc(
"Number of TLB hits")
971 .
desc(
"Number of TLB misses")
975 .
name(
name() +
".local_TLB_miss_rate")
976 .
desc(
"TLB miss rate")
981 .
desc(
"Cycles spent accessing this TLB level")
985 .
name(
name() +
".page_table_cycles")
986 .
desc(
"Cycles spent accessing the page table")
993 .
desc(
"Number of unique pages touched")
998 .
desc(
"Number of cycles spent in queue for all incoming reqs")
1003 .
desc(
"Avg. latency over incoming coalesced reqs")
1009 .
name(
name() +
".global_TLB_accesses")
1010 .
desc(
"Number of TLB accesses")
1015 .
desc(
"Number of TLB hits")
1019 .
name(
name() +
".global_TLB_misses")
1020 .
desc(
"Number of TLB misses")
1024 .
name(
name() +
".global_TLB_miss_rate")
1025 .
desc(
"TLB miss rate")
1031 .
name(
name() +
".avg_reuse_distance")
1032 .
desc(
"avg. reuse distance over all pages (in ticks)")
1054 bool update_stats = !sender_state->
prefetch;
1057 DPRINTF(GPUTLB,
"Translation req. for virt. page addr %#x\n",
1060 int req_cnt = sender_state->reqCnt.back();
1073 bool success =
tlbLookup(tmp_req, tmp_tc, update_stats);
1081 sender_state->tlbEntry =
1087 sender_state->hitLevel = sender_state->reqCnt.size();
1105 new TLBEvent(
this, virt_page_addr, lookup_outcome, pkt);
1108 panic(
"Virtual Page Address %#x already has a return event\n",
1115 DPRINTF(GPUTLB,
"schedule translationReturnEvent @ curTick %d\n",
1123 :
Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1124 outcome(tlb_outcome), pkt(_pkt)
1144 bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1146 if ((inUser && !tlb_entry->user) ||
1154 if (storeCheck && badWrite) {
1183 DPRINTF(GPUTLB,
"Translation Done - TLB Hit for addr %#x\n", vaddr);
1184 local_entry = sender_state->
tlbEntry;
1186 DPRINTF(GPUTLB,
"Translation Done - TLB Miss for addr %#x\n",
1192 new_entry = sender_state->
tlbEntry;
1194 local_entry = new_entry;
1197 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1200 local_entry =
insert(virt_page_addr, *new_entry);
1203 assert(local_entry);
1211 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
1212 "while paddr was %#x.\n", local_entry->vaddr,
1213 local_entry->paddr);
1216 int page_size = local_entry->size();
1217 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1218 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
1228 if (local_entry->uncacheable) {
1255 DPRINTF(GPUTLB,
"Triggered TLBEvent for addr %#x\n", virtPageAddr);
1263 int req_cnt = tmp_sender_state->
reqCnt.back();
1264 bool update_stats = !tmp_sender_state->
prefetch;
1277 DPRINTF(GPUTLB,
"This is a TLB miss\n");
1298 DPRINTF(GPUTLB,
"Failed sending translation request to "
1299 "lower level TLB for addr %#x\n", virtPageAddr);
1303 DPRINTF(GPUTLB,
"Sent translation request to lower level "
1304 "TLB for addr %#x\n", virtPageAddr);
1308 DPRINTF(GPUTLB,
"Last level TLB - start a page walk for "
1309 "addr %#x\n", virtPageAddr);
1324 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1334 Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1335 assert(alignedVaddr == virtPageAddr);
1338 success = p->pTable->lookup(vaddr, newEntry);
1340 if (p->fixupStackFault(vaddr)) {
1341 success = p->pTable->lookup(vaddr, newEntry);
1345 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1346 newEntry.pageStart());
1349 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
1365 tlb->translationReturn(virtPageAddr, outcome, pkt);
1371 return "trigger translationDoneEvent";
1383 return virtPageAddr;
1395 if (
tlb->outstandingReqs <
tlb->maxCoalescedReqs) {
1396 tlb->issueTLBLookup(pkt);
1398 tlb->outstandingReqs++;
1401 DPRINTF(GPUTLB,
"Reached maxCoalescedReqs number %d\n",
1402 tlb->outstandingReqs);
1428 DPRINTF(GPUTLB,
"Functional Translation Done - TLB hit for addr "
1431 local_entry = sender_state->
tlbEntry;
1433 DPRINTF(GPUTLB,
"Functional Translation Done - TLB miss for addr "
1439 new_entry = sender_state->
tlbEntry;
1441 local_entry = new_entry;
1446 DPRINTF(GPUTLB,
"allocating entry w/ addr %#x\n",
1449 local_entry =
insert(virt_page_addr, *new_entry);
1452 assert(local_entry);
1455 DPRINTF(GPUTLB,
"Entry found with vaddr %#x, doing protection checks "
1456 "while paddr was %#x.\n", local_entry->vaddr,
1457 local_entry->paddr);
1468 int page_size = local_entry->size();
1469 Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1470 DPRINTF(GPUTLB,
"Translated %#x -> %#x.\n", vaddr, paddr);
1474 if (local_entry->uncacheable)
1487 bool update_stats = !sender_state->
prefetch;
1493 tlb->updatePageFootprint(virt_page_addr);
1496 bool success =
tlb->tlbLookup(pkt->
req, tc, update_stats);
1502 tlb->globalNumTLBAccesses++;
1506 tlb->globalNumTLBHits++;
1512 tlb->globalNumTLBMisses++;
1513 if (
tlb->hasMemSidePort) {
1515 tlb->memSidePort[0]->sendFunctional(pkt);
1521 DPRINTF(GPUTLB,
"Doing a page walk for address %#x\n",
1530 assert(alignedVaddr == virt_page_addr);
1544 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1545 newEntry.pageStart());
1556 DPRINTF(GPUTLB,
"Mapping %#x to %#x\n", alignedVaddr,
1557 newEntry.pageStart());
1564 DPRINTF(GPUPrefetch,
"Prefetch failed %#x\n",
1574 DPRINTF(GPUPrefetch,
"Functional Hit for vaddr %#x\n",
1589 tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1620 DPRINTF(GPUTLB,
"MemSidePort recvTiming for virt_page_addr %#x\n",
1623 TLBEvent *tlb_event =
tlb->translationReturnEvent[virt_page_addr];
1650 delete old_tlb_event;
1678 ret =
TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1681 bool first_page_access = ret.second;
1683 if (first_page_access) {
1686 int accessed_before;
1687 accessed_before =
curTick() - ret.first->second.lastTimeAccessed;
1688 ret.first->second.totalReuseDistance += accessed_before;
1691 ret.first->second.accessesPerPage++;
1692 ret.first->second.lastTimeAccessed =
curTick();
1695 ret.first->second.localTLBAccesses
1703 std::ostream *page_stat_file =
nullptr;
1713 *page_stat_file <<
"page,max_access_distance,mean_access_distance, "
1714 <<
"stddev_distance" << std::endl;
1718 AccessPatternTable::iterator iter, iter_begin, iter_end;
1719 unsigned int sum_avg_reuse_distance_per_page = 0;
1723 sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724 iter->second.accessesPerPage;
1727 unsigned int tmp = iter->second.localTLBAccesses[0];
1728 unsigned int prev = tmp;
1730 for (
int i = 0;
i < iter->second.localTLBAccesses.size(); ++
i) {
1735 prev = iter->second.localTLBAccesses[
i];
1738 iter->second.localTLBAccesses[
i] -= tmp;
1741 iter->second.sumDistance +=
1742 iter->second.localTLBAccesses[
i];
1745 iter->second.meanDistance =
1746 iter->second.sumDistance / iter->second.accessesPerPage;
1750 unsigned int max_distance = 0;
1751 unsigned int stddev_distance = 0;
1753 for (
int i = 0;
i < iter->second.localTLBAccesses.size(); ++
i) {
1754 unsigned int tmp_access_distance =
1755 iter->second.localTLBAccesses[
i];
1757 if (tmp_access_distance > max_distance) {
1758 max_distance = tmp_access_distance;
1762 tmp_access_distance - iter->second.meanDistance;
1763 stddev_distance += pow(diff, 2);
1768 sqrt(stddev_distance/iter->second.accessesPerPage);
1770 if (page_stat_file) {
1771 *page_stat_file << std::hex << iter->first <<
",";
1772 *page_stat_file << std::dec << max_distance <<
",";
1773 *page_stat_file << std::dec << iter->second.meanDistance
1775 *page_stat_file << std::dec << stddev_distance;
1776 *page_stat_file << std::endl;
1780 iter->second.localTLBAccesses.clear();
1795 X86GPUTLBParams::create()
Counter value() const
Return the current value of this stat as its base type.
AccessPatternTable TLBFootprint
unsigned int accessesPerPage
const Addr PhysAddrPrefixPciConfig
Stats::Formula globalTLBMissRate
std::ostream * stream() const
Get the output underlying output stream.
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Stats::Scalar localCycles
virtual Addr instAddr()=0
decltype(nullptr) constexpr NoFault
const char * description() const
Return a C string describing the event.
virtual void serialize(CheckpointOut &cp) const
Serialize an object.
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Stats::Scalar avgReuseDistance
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
void makeTimingResponse()
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Stats::Scalar accessCycles
Stats::Formula localTLBMissRate
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
unsigned int meanDistance
Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, int &latency)
virtual MiscReg readMiscRegNoEffect(int misc_reg) const =0
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
virtual Process * getProcessPtr()=0
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
bool hasMemSidePort
if true, then this is not the last level TLB
unsigned int totalReuseDistance
const Addr IntAddrPrefixCPUID
unsigned int lastTimeAccessed
void translateTiming(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Bitfield< 14 > expandDown
Stats::Scalar localNumTLBMisses
ThreadContext is the external interface to all thread state for anything outside of the CPU...
std::vector< GpuTlbEntry > tlb
A BaseSlavePort is a protocol-agnostic slave port, responsible only for the structural connection to ...
GpuTlbEntry * lookup(Addr va, bool update_lru=true)
const Addr IntAddrPrefixMask
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Stats::Scalar numUniquePages
Stats::Scalar globalNumTLBMisses
std::queue< Addr > cleanupQueue
std::vector< MemSidePort * > memSidePort
bool accessDistance
Print out accessDistance stats.
Tick curTick()
The current simulated tick.
Fault translateInt(RequestPtr req, ThreadContext *tc)
void regStats()
Register statistics for this object.
std::string csprintf(const char *format, const Args &...args)
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Stats::Formula localLatency
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, GpuTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions...
void invalidateNonGlobal()
Fault translate(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
void setConfigAddress(uint32_t addr)
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
The request is to an uncacheable address.
BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a master port with a given name and index.
This hash map will use the virtual page address as a key and will keep track of total number of acces...
const RequestPtr req
A pointer to the original request.
bool tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
void demapPage(Addr va, uint64_t asn)
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the master port.
TlbEntry(Addr asn, Addr _vaddr, Addr _paddr, bool uncacheable, bool read_only)
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
const Addr IntAddrPrefixMSR
T roundDown(const T &val, const U &align)
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
std::vector< CpuSidePort * > cpuSidePort
Stats::Scalar globalNumTLBAccesses
void updateOutcome(tlbOutcome _outcome)
static MiscRegIndex MISCREG_SEG_SEL(int index)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::vector< EntryList > freeList
const Request::FlagsType M5_VAR_USED SegmentFlagMask
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
virtual void finish(Fault fault, RequestPtr req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
Tick ticks(int numCycles) const
const Addr IntAddrPrefixIO
Flags getFlags()
Accessor for flags.
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
EventWrapper< GpuTLB,&GpuTLB::cleanup > cleanupEvent
bool fixupStackFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the master port.
virtual void unserialize(CheckpointIn &cp)
Unserialize an object.
virtual const std::string name() const
Declarations of a non-full system Page Table.
static MiscRegIndex MISCREG_SEG_BASE(int index)
std::ostream CheckpointOut
EndBitUnion(PageTableEntry) struct TlbEntry Addr vaddr
SenderState * senderState
This packet's sender state.
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
virtual int contextId() const =0
GpuTlbEntry * insert(Addr vpn, GpuTlbEntry &entry)
virtual bool lookup(Addr vaddr, TheISA::TlbEntry &entry)=0
Lookup function.
void schedule(Event &event, Tick when)
Stats::Scalar pageTableCycles
Stats::Scalar localNumTLBHits
const Addr PhysAddrPrefixIO
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
This request is to a memory mapped register.
bool FA
true if this is a fully-associative TLB
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
void setPaddr(Addr paddr)
Set just the physical address.
Stats::Scalar localNumTLBAccesses
virtual void recvReqRetry()
void setFlags(Flags flags)
Note that unlike other accessors, this function sets specific flags (ORs them in); it does not assign...
BaseSlavePort & getSlavePort(const std::string &if_name, PortID idx=InvalidPortID)
Get a slave port with a given name and index.
Stats::Scalar globalNumTLBHits
std::shared_ptr< FaultBase > Fault
void regStats() override
Register statistics for this object.
std::vector< int > reqCnt
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
bool hasPaddr() const
Accessor for paddr.
void cprintf(const char *format, const Args &...args)
void updatePageFootprint(Addr virt_page_addr)