~swilson/gem5-docs/gpu__tlb_8cc_source.html

 /*

  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.

  * All rights reserved.

  *

  * For use for simulation and test purposes only

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *

  * 1. Redistributions of source code must retain the above copyright notice,

  * this list of conditions and the following disclaimer.

  *

  * 2. Redistributions in binary form must reproduce the above copyright notice,

  * this list of conditions and the following disclaimer in the documentation

  * and/or other materials provided with the distribution.

  *

  * 3. Neither the name of the copyright holder nor the names of its contributors

  * may be used to endorse or promote products derived from this software

  * without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

  * POSSIBILITY OF SUCH DAMAGE.

  *

  * Author: Lisa Hsu

  */


 #include "gpu-compute/gpu_tlb.hh"


 #include <cmath>

 #include <cstring>


 #include "arch/x86/faults.hh"

 #include "arch/x86/insts/microldstop.hh"

 #include "arch/x86/pagetable.hh"

 #include "arch/x86/pagetable_walker.hh"

 #include "arch/x86/regs/misc.hh"

 #include "arch/x86/x86_traits.hh"

 #include "base/bitfield.hh"

 #include "base/output.hh"

 #include "base/trace.hh"

 #include "cpu/base.hh"

 #include "cpu/thread_context.hh"

 #include "debug/GPUPrefetch.hh"

 #include "debug/GPUTLB.hh"

 #include "mem/packet_access.hh"

 #include "mem/page_table.hh"

 #include "mem/request.hh"

 #include "sim/process.hh"


 namespace X86ISA

 {


     GpuTLB::GpuTLB(const Params *p)

         : MemObject(p), configAddress(0), size(p->size),

           cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)

     {

         assoc = p->assoc;

         assert(assoc <= size);

         numSets = size/assoc;

         allocationPolicy = p->allocationPolicy;

         hasMemSidePort = false;

         accessDistance = p->accessDistance;

         clock = p->clk_domain->clockPeriod();


         tlb.assign(size, GpuTlbEntry());


         freeList.resize(numSets);

         entryList.resize(numSets);


         for (int set = 0; set < numSets; ++set) {

             for (int way = 0; way < assoc; ++way) {

                 int x = set * assoc + way;

                 freeList[set].push_back(&tlb.at(x));

             }

         }


         FA = (size == assoc);


         setMask = numSets - 1;


     #if 0

         // GpuTLB doesn't yet support full system

         walker = p->walker;

         walker->setTLB(this);

     #endif


         maxCoalescedReqs = p->maxOutstandingReqs;


         // Do not allow maxCoalescedReqs to be more than the TLB associativity

         if (maxCoalescedReqs > assoc) {

             maxCoalescedReqs = assoc;

             cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);

         }


         outstandingReqs = 0;

         hitLatency = p->hitLatency;

         missLatency1 = p->missLatency1;

         missLatency2 = p->missLatency2;


         // create the slave ports based on the number of connected ports

         for (size_t i = 0; i < p->port_slave_connection_count; ++i) {

             cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",

                                   name(), i), this, i));

         }


         // create the master ports based on the number of connected ports

         for (size_t i = 0; i < p->port_master_connection_count; ++i) {

             memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",

                                   name(), i), this, i));

         }

     }


     // fixme: this is never called?

     GpuTLB::~GpuTLB()

     {

         // make sure all the hash-maps are empty

         assert(translationReturnEvent.empty());

     }


     BaseSlavePort&

     GpuTLB::getSlavePort(const std::string &if_name, PortID idx)

     {

         if (if_name == "slave") {

             if (idx >= static_cast<PortID>(cpuSidePort.size())) {

                 panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);

             }


             return *cpuSidePort[idx];

         } else {

             panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);

         }

     }


     BaseMasterPort&

     GpuTLB::getMasterPort(const std::string &if_name, PortID idx)

     {

         if (if_name == "master") {

             if (idx >= static_cast<PortID>(memSidePort.size())) {

                 panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);

             }


             hasMemSidePort = true;


             return *memSidePort[idx];

         } else {

             panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);

         }

     }


     GpuTlbEntry*

     GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)

     {

         GpuTlbEntry *newEntry = nullptr;


         int set = (vpn >> TheISA::PageShift) & setMask;


         if (!freeList[set].empty()) {

             newEntry = freeList[set].front();

             freeList[set].pop_front();

         } else {

             newEntry = entryList[set].back();

             entryList[set].pop_back();

         }


         *newEntry = entry;

         newEntry->vaddr = vpn;

         entryList[set].push_front(newEntry);


         return newEntry;

     }


     GpuTLB::EntryList::iterator

     GpuTLB::lookupIt(Addr va, bool update_lru)

     {

         int set = (va >> TheISA::PageShift) & setMask;


         if (FA) {

             assert(!set);

         }


         auto entry = entryList[set].begin();

         for (; entry != entryList[set].end(); ++entry) {

             int page_size = (*entry)->size();


             if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {

                 DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "

                         "with size %#x.\n", va, (*entry)->vaddr, page_size);


                 if (update_lru) {

                     entryList[set].push_front(*entry);

                     entryList[set].erase(entry);

                     entry = entryList[set].begin();

                 }


                 break;

             }

         }


         return entry;

     }


     GpuTlbEntry*

     GpuTLB::lookup(Addr va, bool update_lru)

     {

         int set = (va >> TheISA::PageShift) & setMask;


         auto entry = lookupIt(va, update_lru);


         if (entry == entryList[set].end())

             return nullptr;

         else

             return *entry;

     }


     void

     GpuTLB::invalidateAll()

     {

         DPRINTF(GPUTLB, "Invalidating all entries.\n");


         for (int i = 0; i < numSets; ++i) {

             while (!entryList[i].empty()) {

                 GpuTlbEntry *entry = entryList[i].front();

                 entryList[i].pop_front();

                 freeList[i].push_back(entry);

             }

         }

     }


     void

     GpuTLB::setConfigAddress(uint32_t addr)

     {

         configAddress = addr;

     }


     void

     GpuTLB::invalidateNonGlobal()

     {

         DPRINTF(GPUTLB, "Invalidating all non global entries.\n");


         for (int i = 0; i < numSets; ++i) {

             for (auto entryIt = entryList[i].begin();

                  entryIt != entryList[i].end();) {

                 if (!(*entryIt)->global) {

                     freeList[i].push_back(*entryIt);

                     entryList[i].erase(entryIt++);

                 } else {

                     ++entryIt;

                 }

             }

         }

     }


     void

     GpuTLB::demapPage(Addr va, uint64_t asn)

     {


         int set = (va >> TheISA::PageShift) & setMask;

         auto entry = lookupIt(va, false);


         if (entry != entryList[set].end()) {

             freeList[set].push_back(*entry);

             entryList[set].erase(entry);

         }

     }


     Fault

     GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)

     {

         DPRINTF(GPUTLB, "Addresses references internal memory.\n");

         Addr vaddr = req->getVaddr();

         Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;


         if (prefix == IntAddrPrefixCPUID) {

             panic("CPUID memory space not yet implemented!\n");

         } else if (prefix == IntAddrPrefixMSR) {

             vaddr = vaddr >> 3;

             req->setFlags(Request::MMAPPED_IPR);

             Addr regNum = 0;


             switch (vaddr & ~IntAddrPrefixMask) {

               case 0x10:

                 regNum = MISCREG_TSC;

                 break;

               case 0x1B:

                 regNum = MISCREG_APIC_BASE;

                 break;

               case 0xFE:

                 regNum = MISCREG_MTRRCAP;

                 break;

               case 0x174:

                 regNum = MISCREG_SYSENTER_CS;

                 break;

               case 0x175:

                 regNum = MISCREG_SYSENTER_ESP;

                 break;

               case 0x176:

                 regNum = MISCREG_SYSENTER_EIP;

                 break;

               case 0x179:

                 regNum = MISCREG_MCG_CAP;

                 break;

               case 0x17A:

                 regNum = MISCREG_MCG_STATUS;

                 break;

               case 0x17B:

                 regNum = MISCREG_MCG_CTL;

                 break;

               case 0x1D9:

                 regNum = MISCREG_DEBUG_CTL_MSR;

                 break;

               case 0x1DB:

                 regNum = MISCREG_LAST_BRANCH_FROM_IP;

                 break;

               case 0x1DC:

                 regNum = MISCREG_LAST_BRANCH_TO_IP;

                 break;

               case 0x1DD:

                 regNum = MISCREG_LAST_EXCEPTION_FROM_IP;

                 break;

               case 0x1DE:

                 regNum = MISCREG_LAST_EXCEPTION_TO_IP;

                 break;

               case 0x200:

                 regNum = MISCREG_MTRR_PHYS_BASE_0;

                 break;

               case 0x201:

                 regNum = MISCREG_MTRR_PHYS_MASK_0;

                 break;

               case 0x202:

                 regNum = MISCREG_MTRR_PHYS_BASE_1;

                 break;

               case 0x203:

                 regNum = MISCREG_MTRR_PHYS_MASK_1;

                 break;

               case 0x204:

                 regNum = MISCREG_MTRR_PHYS_BASE_2;

                 break;

               case 0x205:

                 regNum = MISCREG_MTRR_PHYS_MASK_2;

                 break;

               case 0x206:

                 regNum = MISCREG_MTRR_PHYS_BASE_3;

                 break;

               case 0x207:

                 regNum = MISCREG_MTRR_PHYS_MASK_3;

                 break;

               case 0x208:

                 regNum = MISCREG_MTRR_PHYS_BASE_4;

                 break;

               case 0x209:

                 regNum = MISCREG_MTRR_PHYS_MASK_4;

                 break;

               case 0x20A:

                 regNum = MISCREG_MTRR_PHYS_BASE_5;

                 break;

               case 0x20B:

                 regNum = MISCREG_MTRR_PHYS_MASK_5;

                 break;

               case 0x20C:

                 regNum = MISCREG_MTRR_PHYS_BASE_6;

                 break;

               case 0x20D:

                 regNum = MISCREG_MTRR_PHYS_MASK_6;

                 break;

               case 0x20E:

                 regNum = MISCREG_MTRR_PHYS_BASE_7;

                 break;

               case 0x20F:

                 regNum = MISCREG_MTRR_PHYS_MASK_7;

                 break;

               case 0x250:

                 regNum = MISCREG_MTRR_FIX_64K_00000;

                 break;

               case 0x258:

                 regNum = MISCREG_MTRR_FIX_16K_80000;

                 break;

               case 0x259:

                 regNum = MISCREG_MTRR_FIX_16K_A0000;

                 break;

               case 0x268:

                 regNum = MISCREG_MTRR_FIX_4K_C0000;

                 break;

               case 0x269:

                 regNum = MISCREG_MTRR_FIX_4K_C8000;

                 break;

               case 0x26A:

                 regNum = MISCREG_MTRR_FIX_4K_D0000;

                 break;

               case 0x26B:

                 regNum = MISCREG_MTRR_FIX_4K_D8000;

                 break;

               case 0x26C:

                 regNum = MISCREG_MTRR_FIX_4K_E0000;

                 break;

               case 0x26D:

                 regNum = MISCREG_MTRR_FIX_4K_E8000;

                 break;

               case 0x26E:

                 regNum = MISCREG_MTRR_FIX_4K_F0000;

                 break;

               case 0x26F:

                 regNum = MISCREG_MTRR_FIX_4K_F8000;

                 break;

               case 0x277:

                 regNum = MISCREG_PAT;

                 break;

               case 0x2FF:

                 regNum = MISCREG_DEF_TYPE;

                 break;

               case 0x400:

                 regNum = MISCREG_MC0_CTL;

                 break;

               case 0x404:

                 regNum = MISCREG_MC1_CTL;

                 break;

               case 0x408:

                 regNum = MISCREG_MC2_CTL;

                 break;

               case 0x40C:

                 regNum = MISCREG_MC3_CTL;

                 break;

               case 0x410:

                 regNum = MISCREG_MC4_CTL;

                 break;

               case 0x414:

                 regNum = MISCREG_MC5_CTL;

                 break;

               case 0x418:

                 regNum = MISCREG_MC6_CTL;

                 break;

               case 0x41C:

                 regNum = MISCREG_MC7_CTL;

                 break;

               case 0x401:

                 regNum = MISCREG_MC0_STATUS;

                 break;

               case 0x405:

                 regNum = MISCREG_MC1_STATUS;

                 break;

               case 0x409:

                 regNum = MISCREG_MC2_STATUS;

                 break;

               case 0x40D:

                 regNum = MISCREG_MC3_STATUS;

                 break;

               case 0x411:

                 regNum = MISCREG_MC4_STATUS;

                 break;

               case 0x415:

                 regNum = MISCREG_MC5_STATUS;

                 break;

               case 0x419:

                 regNum = MISCREG_MC6_STATUS;

                 break;

               case 0x41D:

                 regNum = MISCREG_MC7_STATUS;

                 break;

               case 0x402:

                 regNum = MISCREG_MC0_ADDR;

                 break;

               case 0x406:

                 regNum = MISCREG_MC1_ADDR;

                 break;

               case 0x40A:

                 regNum = MISCREG_MC2_ADDR;

                 break;

               case 0x40E:

                 regNum = MISCREG_MC3_ADDR;

                 break;

               case 0x412:

                 regNum = MISCREG_MC4_ADDR;

                 break;

               case 0x416:

                 regNum = MISCREG_MC5_ADDR;

                 break;

               case 0x41A:

                 regNum = MISCREG_MC6_ADDR;

                 break;

               case 0x41E:

                 regNum = MISCREG_MC7_ADDR;

                 break;

               case 0x403:

                 regNum = MISCREG_MC0_MISC;

                 break;

               case 0x407:

                 regNum = MISCREG_MC1_MISC;

                 break;

               case 0x40B:

                 regNum = MISCREG_MC2_MISC;

                 break;

               case 0x40F:

                 regNum = MISCREG_MC3_MISC;

                 break;

               case 0x413:

                 regNum = MISCREG_MC4_MISC;

                 break;

               case 0x417:

                 regNum = MISCREG_MC5_MISC;

                 break;

               case 0x41B:

                 regNum = MISCREG_MC6_MISC;

                 break;

               case 0x41F:

                 regNum = MISCREG_MC7_MISC;

                 break;

               case 0xC0000080:

                 regNum = MISCREG_EFER;

                 break;

               case 0xC0000081:

                 regNum = MISCREG_STAR;

                 break;

               case 0xC0000082:

                 regNum = MISCREG_LSTAR;

                 break;

               case 0xC0000083:

                 regNum = MISCREG_CSTAR;

                 break;

               case 0xC0000084:

                 regNum = MISCREG_SF_MASK;

                 break;

               case 0xC0000100:

                 regNum = MISCREG_FS_BASE;

                 break;

               case 0xC0000101:

                 regNum = MISCREG_GS_BASE;

                 break;

               case 0xC0000102:

                 regNum = MISCREG_KERNEL_GS_BASE;

                 break;

               case 0xC0000103:

                 regNum = MISCREG_TSC_AUX;

                 break;

               case 0xC0010000:

                 regNum = MISCREG_PERF_EVT_SEL0;

                 break;

               case 0xC0010001:

                 regNum = MISCREG_PERF_EVT_SEL1;

                 break;

               case 0xC0010002:

                 regNum = MISCREG_PERF_EVT_SEL2;

                 break;

               case 0xC0010003:

                 regNum = MISCREG_PERF_EVT_SEL3;

                 break;

               case 0xC0010004:

                 regNum = MISCREG_PERF_EVT_CTR0;

                 break;

               case 0xC0010005:

                 regNum = MISCREG_PERF_EVT_CTR1;

                 break;

               case 0xC0010006:

                 regNum = MISCREG_PERF_EVT_CTR2;

                 break;

               case 0xC0010007:

                 regNum = MISCREG_PERF_EVT_CTR3;

                 break;

               case 0xC0010010:

                 regNum = MISCREG_SYSCFG;

                 break;

               case 0xC0010016:

                 regNum = MISCREG_IORR_BASE0;

                 break;

               case 0xC0010017:

                 regNum = MISCREG_IORR_BASE1;

                 break;

               case 0xC0010018:

                 regNum = MISCREG_IORR_MASK0;

                 break;

               case 0xC0010019:

                 regNum = MISCREG_IORR_MASK1;

                 break;

               case 0xC001001A:

                 regNum = MISCREG_TOP_MEM;

                 break;

               case 0xC001001D:

                 regNum = MISCREG_TOP_MEM2;

                 break;

               case 0xC0010114:

                 regNum = MISCREG_VM_CR;

                 break;

               case 0xC0010115:

                 regNum = MISCREG_IGNNE;

                 break;

               case 0xC0010116:

                 regNum = MISCREG_SMM_CTL;

                 break;

               case 0xC0010117:

                 regNum = MISCREG_VM_HSAVE_PA;

                 break;

               default:

                 return std::make_shared<GeneralProtection>(0);

             }

             //The index is multiplied by the size of a MiscReg so that

             //any memory dependence calculations will not see these as

             //overlapping.

             req->setPaddr(regNum * sizeof(MiscReg));

             return NoFault;

         } else if (prefix == IntAddrPrefixIO) {

             // TODO If CPL > IOPL or in virtual mode, check the I/O permission

             // bitmap in the TSS.


             Addr IOPort = vaddr & ~IntAddrPrefixMask;

             // Make sure the address fits in the expected 16 bit IO address

             // space.

             assert(!(IOPort & ~0xFFFF));


             if (IOPort == 0xCF8 && req->getSize() == 4) {

                 req->setFlags(Request::MMAPPED_IPR);

                 req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));

             } else if ((IOPort & ~mask(2)) == 0xCFC) {

                 req->setFlags(Request::UNCACHEABLE);


                 Addr configAddress =

                     tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);


                 if (bits(configAddress, 31, 31)) {

                     req->setPaddr(PhysAddrPrefixPciConfig |

                                   mbits(configAddress, 30, 2) |

                                   (IOPort & mask(2)));

                 } else {

                     req->setPaddr(PhysAddrPrefixIO | IOPort);

                 }

             } else {

                 req->setFlags(Request::UNCACHEABLE);

                 req->setPaddr(PhysAddrPrefixIO | IOPort);

             }

             return NoFault;

         } else {

             panic("Access to unrecognized internal address space %#x.\n",

                   prefix);

         }

     }


     bool

     GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)

     {

         bool tlb_hit = false;

     #ifndef NDEBUG

         uint32_t flags = req->getFlags();

         int seg = flags & SegmentFlagMask;

     #endif


         assert(seg != SEGMENT_REG_MS);

         Addr vaddr = req->getVaddr();

         DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);

         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);


         if (m5Reg.prot) {

             DPRINTF(GPUTLB, "In protected mode.\n");

             // make sure we are in 64-bit mode

             assert(m5Reg.mode == LongMode);


             // If paging is enabled, do the translation.

             if (m5Reg.paging) {

                 DPRINTF(GPUTLB, "Paging enabled.\n");

                 //update LRU stack on a hit

                 GpuTlbEntry *entry = lookup(vaddr, true);


                 if (entry)

                     tlb_hit = true;


                 if (!update_stats) {

                     // functional tlb access for memory initialization

                     // i.e., memory seeding or instr. seeding -> don't update

                     // TLB and stats

                     return tlb_hit;

                 }


                 localNumTLBAccesses++;


                 if (!entry) {

                     localNumTLBMisses++;

                 } else {

                     localNumTLBHits++;

                 }

             }

         }


         return tlb_hit;

     }


     Fault

     GpuTLB::translate(RequestPtr req, ThreadContext *tc,

                       Translation *translation, Mode mode,

                       bool &delayedResponse, bool timing, int &latency)

     {

         uint32_t flags = req->getFlags();

         int seg = flags & SegmentFlagMask;

         bool storeCheck = flags & (StoreCheck << FlagShift);


         // If this is true, we're dealing with a request

         // to a non-memory address space.

         if (seg == SEGMENT_REG_MS) {

             return translateInt(req, tc);

         }


         delayedResponse = false;

         Addr vaddr = req->getVaddr();

         DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);


         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);


         // If protected mode has been enabled...

         if (m5Reg.prot) {

             DPRINTF(GPUTLB, "In protected mode.\n");

             // If we're not in 64-bit mode, do protection/limit checks

             if (m5Reg.mode != LongMode) {

                 DPRINTF(GPUTLB, "Not in long mode. Checking segment "

                         "protection.\n");


                 // Check for a null segment selector.

                 if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||

                     seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)

                     && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {

                     return std::make_shared<GeneralProtection>(0);

                 }


                 bool expandDown = false;

                 SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));


                 if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {

                     if (!attr.writable && (mode == BaseTLB::Write ||

                         storeCheck))

                         return std::make_shared<GeneralProtection>(0);


                     if (!attr.readable && mode == BaseTLB::Read)

                         return std::make_shared<GeneralProtection>(0);


                     expandDown = attr.expandDown;


                 }


                 Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));

                 Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));

                 // This assumes we're not in 64 bit mode. If we were, the

                 // default address size is 64 bits, overridable to 32.

                 int size = 32;

                 bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));

                 SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);


                 if ((csAttr.defaultSize && sizeOverride) ||

                     (!csAttr.defaultSize && !sizeOverride)) {

                     size = 16;

                 }


                 Addr offset = bits(vaddr - base, size - 1, 0);

                 Addr endOffset = offset + req->getSize() - 1;


                 if (expandDown) {

                     DPRINTF(GPUTLB, "Checking an expand down segment.\n");

                     warn_once("Expand down segments are untested.\n");


                     if (offset <= limit || endOffset <= limit)

                         return std::make_shared<GeneralProtection>(0);

                 } else {

                     if (offset > limit || endOffset > limit)

                         return std::make_shared<GeneralProtection>(0);

                 }

             }


             // If paging is enabled, do the translation.

             if (m5Reg.paging) {

                 DPRINTF(GPUTLB, "Paging enabled.\n");

                 // The vaddr already has the segment base applied.

                 GpuTlbEntry *entry = lookup(vaddr);

                 localNumTLBAccesses++;


                 if (!entry) {

                     localNumTLBMisses++;

                     if (timing) {

                         latency = missLatency1;

                     }


                     if (FullSystem) {

                         fatal("GpuTLB doesn't support full-system mode\n");

                     } else {

                         DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "

                                 "at pc %#x.\n", vaddr, tc->instAddr());


                         Process *p = tc->getProcessPtr();

                         GpuTlbEntry newEntry;

                         bool success = p->pTable->lookup(vaddr, newEntry);


                         if (!success && mode != BaseTLB::Execute) {

                             // penalize a "page fault" more

                             if (timing) {

                                 latency += missLatency2;

                             }


                             if (p->fixupStackFault(vaddr))

                                 success = p->pTable->lookup(vaddr, newEntry);

                         }


                         if (!success) {

                             return std::make_shared<PageFault>(vaddr, true,

                                                                mode, true,

                                                                false);

                         } else {

                             newEntry.valid = success;

                             Addr alignedVaddr = p->pTable->pageAlign(vaddr);


                             DPRINTF(GPUTLB, "Mapping %#x to %#x\n",

                                     alignedVaddr, newEntry.pageStart());


                             entry = insert(alignedVaddr, newEntry);

                         }


                         DPRINTF(GPUTLB, "Miss was serviced.\n");

                     }

                 } else {

                     localNumTLBHits++;


                     if (timing) {

                         latency = hitLatency;

                     }

                 }


                 // Do paging protection checks.

                 bool inUser = (m5Reg.cpl == 3 &&

                                !(flags & (CPL0FlagBit << FlagShift)));


                 CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);

                 bool badWrite = (!entry->writable && (inUser || cr0.wp));


                 if ((inUser && !entry->user) || (mode == BaseTLB::Write &&

                      badWrite)) {

                     // The page must have been present to get into the TLB in

                     // the first place. We'll assume the reserved bits are

                     // fine even though we're not checking them.

                     return std::make_shared<PageFault>(vaddr, true, mode,

                                                        inUser, false);

                 }


                 if (storeCheck && badWrite) {

                     // This would fault if this were a write, so return a page

                     // fault that reflects that happening.

                     return std::make_shared<PageFault>(vaddr, true,

                                                        BaseTLB::Write,

                                                        inUser, false);

                 }


                 DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "

                         "checks.\n", entry->paddr);


                 int page_size = entry->size();

                 Addr paddr = entry->paddr | (vaddr & (page_size - 1));

                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);

                 req->setPaddr(paddr);


                 if (entry->uncacheable)

                     req->setFlags(Request::UNCACHEABLE);

             } else {

                 //Use the address which already has segmentation applied.

                 DPRINTF(GPUTLB, "Paging disabled.\n");

                 DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);

                 req->setPaddr(vaddr);

             }

         } else {

             // Real mode

             DPRINTF(GPUTLB, "In real mode.\n");

             DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);

             req->setPaddr(vaddr);

         }


         // Check for an access to the local APIC

         if (FullSystem) {

             LocalApicBase localApicBase =

                 tc->readMiscRegNoEffect(MISCREG_APIC_BASE);


             Addr baseAddr = localApicBase.base * PageBytes;

             Addr paddr = req->getPaddr();


             if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {

                 // Force the access to be uncacheable.

                 req->setFlags(Request::UNCACHEABLE);

                 req->setPaddr(x86LocalAPICAddress(tc->contextId(),

                                                   paddr - baseAddr));

             }

         }


         return NoFault;

     };


     Fault

     GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,

                             int &latency)

     {

         bool delayedResponse;


         return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,

                                  latency);

     }


     void

     GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,

             Translation *translation, Mode mode, int &latency)

     {

         bool delayedResponse;

         assert(translation);


         Fault fault = GpuTLB::translate(req, tc, translation, mode,

                                         delayedResponse, true, latency);


         if (!delayedResponse)

             translation->finish(fault, req, tc, mode);

     }


     Walker*

     GpuTLB::getWalker()

     {

         return walker;

     }


     void

     GpuTLB::serialize(CheckpointOut &cp) const

     {

     }


     void

     GpuTLB::unserialize(CheckpointIn &cp)

     {

     }


     void

     GpuTLB::regStats()

     {

         MemObject::regStats();


         localNumTLBAccesses

             .name(name() + ".local_TLB_accesses")

             .desc("Number of TLB accesses")

             ;


         localNumTLBHits

             .name(name() + ".local_TLB_hits")

             .desc("Number of TLB hits")

             ;


         localNumTLBMisses

             .name(name() + ".local_TLB_misses")

             .desc("Number of TLB misses")

             ;


         localTLBMissRate

             .name(name() + ".local_TLB_miss_rate")

             .desc("TLB miss rate")

             ;


         accessCycles

             .name(name() + ".access_cycles")

             .desc("Cycles spent accessing this TLB level")

             ;


         pageTableCycles

             .name(name() + ".page_table_cycles")

             .desc("Cycles spent accessing the page table")

             ;


         localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;


         numUniquePages

             .name(name() + ".unique_pages")

             .desc("Number of unique pages touched")

             ;


         localCycles

             .name(name() + ".local_cycles")

             .desc("Number of cycles spent in queue for all incoming reqs")

             ;


         localLatency

             .name(name() + ".local_latency")

             .desc("Avg. latency over incoming coalesced reqs")

             ;


         localLatency = localCycles / localNumTLBAccesses;


         globalNumTLBAccesses

             .name(name() + ".global_TLB_accesses")

             .desc("Number of TLB accesses")

             ;


         globalNumTLBHits

             .name(name() + ".global_TLB_hits")

             .desc("Number of TLB hits")

             ;


         globalNumTLBMisses

             .name(name() + ".global_TLB_misses")

             .desc("Number of TLB misses")

             ;


         globalTLBMissRate

             .name(name() + ".global_TLB_miss_rate")

             .desc("TLB miss rate")

             ;


         globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;


         avgReuseDistance

             .name(name() + ".avg_reuse_distance")

             .desc("avg. reuse distance over all pages (in ticks)")

             ;


     }


     void

     GpuTLB::issueTLBLookup(PacketPtr pkt)

     {

         assert(pkt);

         assert(pkt->senderState);


         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                         TheISA::PageBytes);


         TranslationState *sender_state =

                 safe_cast<TranslationState*>(pkt->senderState);


         bool update_stats = !sender_state->prefetch;

         ThreadContext * tmp_tc = sender_state->tc;


         DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",

                 virt_page_addr);


         int req_cnt = sender_state->reqCnt.back();


         if (update_stats) {

             accessCycles -= (curTick() * req_cnt);

             localCycles -= curTick();

             updatePageFootprint(virt_page_addr);

             globalNumTLBAccesses += req_cnt;

         }


         tlbOutcome lookup_outcome = TLB_MISS;

         RequestPtr tmp_req = pkt->req;


         // Access the TLB and figure out if it's a hit or a miss.

         bool success = tlbLookup(tmp_req, tmp_tc, update_stats);


         if (success) {

             lookup_outcome = TLB_HIT;

             // Put the entry in SenderState

             GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);

             assert(entry);


             sender_state->tlbEntry =

                 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);


             if (update_stats) {

                 // the reqCnt has an entry per level, so its size tells us

                 // which level we are in

                 sender_state->hitLevel = sender_state->reqCnt.size();

                 globalNumTLBHits += req_cnt;

             }

         } else {

             if (update_stats)

                 globalNumTLBMisses += req_cnt;

         }


         /*

          * We now know the TLB lookup outcome (if it's a hit or a miss), as well

          * as the TLB access latency.

          *

          * We create and schedule a new TLBEvent which will help us take the

          * appropriate actions (e.g., update TLB on a hit, send request to lower

          * level TLB on a miss, or start a page walk if this was the last-level

          * TLB)

          */

         TLBEvent *tlb_event =

             new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);


         if (translationReturnEvent.count(virt_page_addr)) {

             panic("Virtual Page Address %#x already has a return event\n",

                   virt_page_addr);

         }


         translationReturnEvent[virt_page_addr] = tlb_event;

         assert(tlb_event);


         DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",

                 curTick() + this->ticks(hitLatency));


         schedule(tlb_event, curTick() + this->ticks(hitLatency));

     }


     GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,

                                PacketPtr _pkt)

         : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),

         outcome(tlb_outcome), pkt(_pkt)

     {

     }


     void

     GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,

             GpuTlbEntry * tlb_entry, Mode mode)

     {

         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);

         uint32_t flags = pkt->req->getFlags();

         bool storeCheck = flags & (StoreCheck << FlagShift);


         // Do paging protection checks.

         bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));

         CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);


         bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));


         if ((inUser && !tlb_entry->user) ||

             (mode == BaseTLB::Write && badWrite)) {

            // The page must have been present to get into the TLB in

            // the first place. We'll assume the reserved bits are

            // fine even though we're not checking them.

            assert(false);

         }


         if (storeCheck && badWrite) {

            // This would fault if this were a write, so return a page

            // fault that reflects that happening.

            assert(false);

         }

     }


     void

     GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,

             PacketPtr pkt)

     {


         assert(pkt);

         Addr vaddr = pkt->req->getVaddr();


         TranslationState *sender_state =

             safe_cast<TranslationState*>(pkt->senderState);


         ThreadContext *tc = sender_state->tc;

         Mode mode = sender_state->tlbMode;


         GpuTlbEntry *local_entry, *new_entry;


         if (tlb_outcome == TLB_HIT) {

             DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);

             local_entry = sender_state->tlbEntry;

         } else {

             DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",

                     vaddr);


             // We are returning either from a page walk or from a hit at a lower

             // TLB level. The senderState should be "carrying" a pointer to the

             // correct TLBEntry.

             new_entry = sender_state->tlbEntry;

             assert(new_entry);

             local_entry = new_entry;


             if (allocationPolicy) {

                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",

                         virt_page_addr);


                 local_entry = insert(virt_page_addr, *new_entry);

             }


             assert(local_entry);

         }


         DPRINTF(GPUTLB, "Entry found with vaddr %#x,  doing protection checks "

                 "while paddr was %#x.\n", local_entry->vaddr,

                 local_entry->paddr);


         pagingProtectionChecks(tc, pkt, local_entry, mode);

         int page_size = local_entry->size();

         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));

         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);


         // Since this packet will be sent through the cpu side slave port,

         // it must be converted to a response pkt if it is not one already

         if (pkt->isRequest()) {

             pkt->makeTimingResponse();

         }


         pkt->req->setPaddr(paddr);


         if (local_entry->uncacheable) {

              pkt->req->setFlags(Request::UNCACHEABLE);

         }


         //send packet back to coalescer

         cpuSidePort[0]->sendTimingResp(pkt);

         //schedule cleanup event

         cleanupQueue.push(virt_page_addr);


         // schedule this only once per cycle.

         // The check is required because we might have multiple translations

         // returning the same cycle

         // this is a maximum priority event and must be on the same cycle

         // as the cleanup event in TLBCoalescer to avoid a race with

         // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry

         if (!cleanupEvent.scheduled())

             schedule(cleanupEvent, curTick());

     }


     void

     GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,

                               PacketPtr pkt)

     {

         DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);


         assert(translationReturnEvent[virtPageAddr]);

         assert(pkt);


         TranslationState *tmp_sender_state =

             safe_cast<TranslationState*>(pkt->senderState);


         int req_cnt = tmp_sender_state->reqCnt.back();

         bool update_stats = !tmp_sender_state->prefetch;


         if (outcome == TLB_HIT) {

             handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);


             if (update_stats) {

                 accessCycles += (req_cnt * curTick());

                 localCycles += curTick();

             }


         } else if (outcome == TLB_MISS) {


             DPRINTF(GPUTLB, "This is a TLB miss\n");

             if (update_stats) {

                 accessCycles += (req_cnt*curTick());

                 localCycles += curTick();

             }


             if (hasMemSidePort) {

                 // the one cyle added here represent the delay from when we get

                 // the reply back till when we propagate it to the coalescer

                 // above.

                 if (update_stats) {

                     accessCycles += (req_cnt * 1);

                     localCycles += 1;

                 }


                 if (!memSidePort[0]->sendTimingReq(pkt)) {

                     DPRINTF(GPUTLB, "Failed sending translation request to "

                             "lower level TLB for addr %#x\n", virtPageAddr);


                     memSidePort[0]->retries.push_back(pkt);

                 } else {

                     DPRINTF(GPUTLB, "Sent translation request to lower level "

                             "TLB for addr %#x\n", virtPageAddr);

                 }

             } else {

                 //this is the last level TLB. Start a page walk

                 DPRINTF(GPUTLB, "Last level TLB - start a page walk for "

                         "addr %#x\n", virtPageAddr);


                 if (update_stats)

                     pageTableCycles -= (req_cnt*curTick());


                 TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];

                 assert(tlb_event);

                 tlb_event->updateOutcome(PAGE_WALK);

                 schedule(tlb_event, curTick() + ticks(missLatency2));

             }

         } else if (outcome == PAGE_WALK) {

             if (update_stats)

                 pageTableCycles += (req_cnt*curTick());


             // Need to access the page table and update the TLB

             DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",

                     virtPageAddr);


             TranslationState *sender_state =

                 safe_cast<TranslationState*>(pkt->senderState);


             Process *p = sender_state->tc->getProcessPtr();

             TlbEntry newEntry;

             Addr vaddr = pkt->req->getVaddr();

     #ifndef NDEBUG

             Addr alignedVaddr = p->pTable->pageAlign(vaddr);

             assert(alignedVaddr == virtPageAddr);

     #endif

             bool success;

             success = p->pTable->lookup(vaddr, newEntry);

             if (!success && sender_state->tlbMode != BaseTLB::Execute) {

                 if (p->fixupStackFault(vaddr)) {

                     success = p->pTable->lookup(vaddr, newEntry);

                 }

             }


             DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,

                     newEntry.pageStart());


             sender_state->tlbEntry =

                 new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);


             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);

         } else if (outcome == MISS_RETURN) {

             handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);

         } else {

             assert(false);

         }

     }


     void

     GpuTLB::TLBEvent::process()

     {

         tlb->translationReturn(virtPageAddr, outcome, pkt);

     }


     const char*

     GpuTLB::TLBEvent::description() const

     {

         return "trigger translationDoneEvent";

     }


     void

     GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)

     {

         outcome = _outcome;

     }


     Addr

     GpuTLB::TLBEvent::getTLBEventVaddr()

     {

         return virtPageAddr;

     }


     /*

      * recvTiming receives a coalesced timing request from a TLBCoalescer

      * and it calls issueTLBLookup()

      * It only rejects the packet if we have exceeded the max

      * outstanding number of requests for the TLB

      */

     bool

     GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)

     {

         if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {

             tlb->issueTLBLookup(pkt);

             // update number of outstanding translation requests

             tlb->outstandingReqs++;

             return true;

          } else {

             DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",

                     tlb->outstandingReqs);

             return false;

          }

     }


     void

     GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)

     {

         TranslationState *sender_state =

             safe_cast<TranslationState*>(pkt->senderState);


         ThreadContext *tc = sender_state->tc;

         Mode mode = sender_state->tlbMode;

         Addr vaddr = pkt->req->getVaddr();


         GpuTlbEntry *local_entry, *new_entry;


         if (tlb_outcome == TLB_HIT) {

             DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "

                     "%#x\n", vaddr);


             local_entry = sender_state->tlbEntry;

         } else {

             DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "

                     "%#x\n", vaddr);


             // We are returning either from a page walk or from a hit at a lower

             // TLB level. The senderState should be "carrying" a pointer to the

             // correct TLBEntry.

             new_entry = sender_state->tlbEntry;

             assert(new_entry);

             local_entry = new_entry;


             if (allocationPolicy) {

                 Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);


                 DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",

                         virt_page_addr);


                 local_entry = insert(virt_page_addr, *new_entry);

             }


             assert(local_entry);

         }


         DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "

                 "while paddr was %#x.\n", local_entry->vaddr,

                 local_entry->paddr);


         // Do paging checks if it's a normal functional access.  If it's for a

         // prefetch, then sometimes you can try to prefetch something that won't

         // pass protection. We don't actually want to fault becuase there is no

         // demand access to deem this a violation.  Just put it in the TLB and

         // it will fault if indeed a future demand access touches it in

         // violation.

         if (!sender_state->prefetch && sender_state->tlbEntry->valid)

             pagingProtectionChecks(tc, pkt, local_entry, mode);


         int page_size = local_entry->size();

         Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));

         DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);


         pkt->req->setPaddr(paddr);


         if (local_entry->uncacheable)

              pkt->req->setFlags(Request::UNCACHEABLE);

     }


     // This is used for atomic translations. Need to

     // make it all happen during the same cycle.

     void

     GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)

     {

         TranslationState *sender_state =

             safe_cast<TranslationState*>(pkt->senderState);


         ThreadContext *tc = sender_state->tc;

         bool update_stats = !sender_state->prefetch;


         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                         TheISA::PageBytes);


         if (update_stats)

             tlb->updatePageFootprint(virt_page_addr);


         // do the TLB lookup without updating the stats

         bool success = tlb->tlbLookup(pkt->req, tc, update_stats);

         tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;


         // functional mode means no coalescing

         // global metrics are the same as the local metrics

         if (update_stats) {

             tlb->globalNumTLBAccesses++;


             if (success) {

                 sender_state->hitLevel = sender_state->reqCnt.size();

                 tlb->globalNumTLBHits++;

             }

         }


         if (!success) {

             if (update_stats)

                 tlb->globalNumTLBMisses++;

             if (tlb->hasMemSidePort) {

                 // there is a TLB below -> propagate down the TLB hierarchy

                 tlb->memSidePort[0]->sendFunctional(pkt);

                 // If no valid translation from a prefetch, then just return

                 if (sender_state->prefetch && !pkt->req->hasPaddr())

                     return;

             } else {

                 // Need to access the page table and update the TLB

                 DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",

                         virt_page_addr);


                 Process *p = tc->getProcessPtr();

                 TlbEntry newEntry;


                 Addr vaddr = pkt->req->getVaddr();

     #ifndef NDEBUG

                 Addr alignedVaddr = p->pTable->pageAlign(vaddr);

                 assert(alignedVaddr == virt_page_addr);

     #endif


                 bool success = p->pTable->lookup(vaddr, newEntry);

                 if (!success && sender_state->tlbMode != BaseTLB::Execute) {

                     if (p->fixupStackFault(vaddr))

                         success = p->pTable->lookup(vaddr, newEntry);

                 }


                 if (!sender_state->prefetch) {

                     // no PageFaults are permitted after

                     // the second page table lookup

                     assert(success);


                     DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,

                            newEntry.pageStart());


                     sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,

                                                              newEntry.paddr,

                                                              success);

                 } else {

                     // If this was a prefetch, then do the normal thing if it

                     // was a successful translation.  Otherwise, send an empty

                     // TLB entry back so that it can be figured out as empty and

                     // handled accordingly.

                     if (success) {

                         DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,

                                newEntry.pageStart());


                         sender_state->tlbEntry = new GpuTlbEntry(0,

                                                                  newEntry.vaddr,

                                                                  newEntry.paddr,

                                                                  success);

                     } else {

                         DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",

                                 alignedVaddr);


                         sender_state->tlbEntry = new GpuTlbEntry();


                         return;

                     }

                 }

             }

         } else {

             DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",

                     tlb->lookup(pkt->req->getVaddr()));


             GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),

                                              update_stats);


             assert(entry);


             sender_state->tlbEntry =

                 new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);

         }

         // This is the function that would populate pkt->req with the paddr of

         // the translation. But if no translation happens (i.e Prefetch fails)

         // then the early returns in the above code wiill keep this function

         // from executing.

         tlb->handleFuncTranslationReturn(pkt, tlb_outcome);

     }


     void

     GpuTLB::CpuSidePort::recvReqRetry()

     {

         // The CPUSidePort never sends anything but replies. No retries

         // expected.

         assert(false);

     }


     AddrRangeList

     GpuTLB::CpuSidePort::getAddrRanges() const

     {

         // currently not checked by the master

         AddrRangeList ranges;


         return ranges;

     }


     bool

     GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)

     {

         Addr virt_page_addr = roundDown(pkt->req->getVaddr(),

                                         TheISA::PageBytes);


         DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",

                 virt_page_addr);


         TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];

         assert(tlb_event);

         assert(virt_page_addr == tlb_event->getTLBEventVaddr());


         tlb_event->updateOutcome(MISS_RETURN);

         tlb->schedule(tlb_event, curTick()+tlb->ticks(1));


         return true;

     }


     void

     GpuTLB::MemSidePort::recvReqRetry()

     {

         // No retries should reach the TLB. The retries

         // should only reach the TLBCoalescer.

         assert(false);

     }


     void

     GpuTLB::cleanup()

     {

         while (!cleanupQueue.empty()) {

             Addr cleanup_addr = cleanupQueue.front();

             cleanupQueue.pop();


             // delete TLBEvent

             TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];

             delete old_tlb_event;

             translationReturnEvent.erase(cleanup_addr);


             // update number of outstanding requests

             outstandingReqs--;

         }


         for (int i = 0; i < cpuSidePort.size(); ++i) {

             cpuSidePort[i]->sendRetryReq();

         }

     }


     void

     GpuTLB::updatePageFootprint(Addr virt_page_addr)

     {


         std::pair<AccessPatternTable::iterator, bool> ret;


         AccessInfo tmp_access_info;

         tmp_access_info.lastTimeAccessed = 0;

         tmp_access_info.accessesPerPage = 0;

         tmp_access_info.totalReuseDistance = 0;

         tmp_access_info.sumDistance = 0;

         tmp_access_info.meanDistance = 0;


         ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,

                                   tmp_access_info));


         bool first_page_access = ret.second;


         if (first_page_access) {

             numUniquePages++;

         } else  {

             int accessed_before;

             accessed_before  = curTick() - ret.first->second.lastTimeAccessed;

             ret.first->second.totalReuseDistance += accessed_before;

         }


         ret.first->second.accessesPerPage++;

         ret.first->second.lastTimeAccessed = curTick();


         if (accessDistance) {

             ret.first->second.localTLBAccesses

                 .push_back(localNumTLBAccesses.value());

         }

     }


     void

     GpuTLB::exitCallback()

     {

         std::ostream *page_stat_file = nullptr;


         if (accessDistance) {


             // print per page statistics to a separate file (.csv format)

             // simout is the gem5 output directory (default is m5out or the one

             // specified with -d

             page_stat_file = simout.create(name().c_str())->stream();


             // print header

             *page_stat_file << "page,max_access_distance,mean_access_distance, "

                             << "stddev_distance" << std::endl;

         }


         // update avg. reuse distance footprint

         AccessPatternTable::iterator iter, iter_begin, iter_end;

         unsigned int sum_avg_reuse_distance_per_page = 0;


         // iterate through all pages seen by this TLB

         for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {

             sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /

                                                iter->second.accessesPerPage;


             if (accessDistance) {

                 unsigned int tmp = iter->second.localTLBAccesses[0];

                 unsigned int prev = tmp;


                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {

                     if (i) {

                         tmp = prev + 1;

                     }


                     prev = iter->second.localTLBAccesses[i];

                     // update the localTLBAccesses value

                     // with the actual differece

                     iter->second.localTLBAccesses[i] -= tmp;

                     // compute the sum of AccessDistance per page

                     // used later for mean

                     iter->second.sumDistance +=

                         iter->second.localTLBAccesses[i];

                 }


                 iter->second.meanDistance =

                     iter->second.sumDistance / iter->second.accessesPerPage;


                 // compute std_dev and max  (we need a second round because we

                 // need to know the mean value

                 unsigned int max_distance = 0;

                 unsigned int stddev_distance = 0;


                 for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {

                     unsigned int tmp_access_distance =

                         iter->second.localTLBAccesses[i];


                     if (tmp_access_distance > max_distance) {

                         max_distance = tmp_access_distance;

                     }


                     unsigned int diff =

                         tmp_access_distance - iter->second.meanDistance;

                     stddev_distance += pow(diff, 2);


                 }


                 stddev_distance =

                     sqrt(stddev_distance/iter->second.accessesPerPage);


                 if (page_stat_file) {

                     *page_stat_file << std::hex << iter->first << ",";

                     *page_stat_file << std::dec << max_distance << ",";

                     *page_stat_file << std::dec << iter->second.meanDistance

                                     << ",";

                     *page_stat_file << std::dec << stddev_distance;

                     *page_stat_file << std::endl;

                 }


                 // erase the localTLBAccesses array

                 iter->second.localTLBAccesses.clear();

             }

         }


         if (!TLBFootprint.empty()) {

             avgReuseDistance =

                 sum_avg_reuse_distance_per_page / TLBFootprint.size();

         }


         //clear the TLBFootprint map

         TLBFootprint.clear();

     }

 } // namespace X86ISA


 X86ISA::GpuTLB*

 X86GPUTLBParams::create()

 {

     return new X86ISA::GpuTLB(this);

 }


X86ISA::GpuTLB::assoc
int assoc
Definition: gpu_tlb.hh:147

X86ISA::MISCREG_MTRR_FIX_4K_E0000
Definition: misc.hh:192

Stats::ScalarBase::value
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:677

X86ISA::GpuTLB::TLBFootprint
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:456

DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:212

X86ISA::GpuTLB::AccessInfo::accessesPerPage
unsigned int accessesPerPage
Definition: gpu_tlb.hh:438

X86ISA::MISCREG_IORR_MASK0
Definition: misc.hh:280

X86ISA::PhysAddrPrefixPciConfig
const Addr PhysAddrPrefixPciConfig
Definition: x86_traits.hh:73

X86ISA::offset
offset
Definition: misc.hh:977

X86ISA::MISCREG_DEBUG_CTL_MSR
Definition: misc.hh:156

X86ISA::GpuTLB::Translation
Definition: gpu_tlb.hh:111

X86ISA::GpuTLB::globalTLBMissRate
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:216

simout
OutputDirectory simout
Definition: output.cc:65

OutputStream::stream
std::ostream * stream() const
Get the output underlying output stream.
Definition: output.hh:64

X86ISA::MISCREG_SF_MASK
Definition: misc.hh:252

X86ISA::GpuTLB::allocationPolicy
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:160

X86ISA::GpuTLB::localCycles
Stats::Scalar localCycles
Definition: gpu_tlb.hh:224

X86ISA::FlagShift
const int FlagShift
Definition: ldstflags.hh:52

ThreadContext::instAddr
virtual Addr instAddr()=0

NoFault
decltype(nullptr) constexpr NoFault
Definition: types.hh:189

X86ISA::MISCREG_MTRR_FIX_4K_E8000
Definition: misc.hh:193

X86ISA::MISCREG_TSC_AUX
Definition: misc.hh:256

X86ISA::MISCREG_MTRR_PHYS_BASE_6
Definition: misc.hh:170

X86ISA::StoreCheck
Definition: ldstflags.hh:56

X86ISA::MISCREG_MTRR_PHYS_MASK_2
Definition: misc.hh:177

X86ISA::GpuTLB::TLBEvent::description
const char * description() const
Return a C string describing the event.
Definition: gpu_tlb.cc:1369

X86ISA::GpuTLB::serialize
virtual void serialize(CheckpointOut &cp) const
Serialize an object.
Definition: gpu_tlb.cc:945

ArmISA::i
Bitfield< 7 > i
Definition: miscregs.hh:1378

std::pair
STL pair class.
Definition: stl.hh:61

X86ISA::MISCREG_KERNEL_GS_BASE
Definition: misc.hh:254

OutputDirectory::create
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition: output.cc:206

panic
#define panic(...)
Definition: misc.hh:153

X86ISA::GpuTLB::TranslationState
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Definition: gpu_tlb.hh:343

X86ISA::MISCREG_MTRR_FIX_4K_C0000
Definition: misc.hh:188

X86ISA::GpuTLB::avgReuseDistance
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:229

X86ISA::MISCREG_MC6_STATUS
Definition: misc.hh:219

X86ISA::GpuTLB::translationReturn
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
Definition: gpu_tlb.cc:1252

BaseTLB::Write
Definition: tlb.hh:61

X86ISA::MISCREG_IORR_MASK1
Definition: misc.hh:281

X86ISA::SYS_SEGMENT_REG_IDTR
Definition: segment.hh:62

Packet::makeTimingResponse
void makeTimingResponse()
Definition: packet.hh:863

X86ISA::GpuTLB::handleFuncTranslationReturn
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1416

X86ISA::MISCREG_MTRR_FIX_16K_80000
Definition: misc.hh:186

faults.hh

X86ISA::GpuTLB::accessCycles
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:219

X86ISA::GpuTLB::localTLBMissRate
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:208

request.hh
Declaration of a request, the overall memory request consisting of the parts of the request that are ...

X86ISA::GpuTLB::invalidateAll
void invalidateAll()
Definition: gpu_tlb.cc:237

AlphaISA::PageShift
const Addr PageShift
Definition: isa_traits.hh:51

X86ISA::MISCREG_MTRR_PHYS_BASE_5
Definition: misc.hh:169

X86ISA::MISCREG_TSC
Definition: misc.hh:144

X86ISA::CPL0FlagBit
Definition: ldstflags.hh:54

X86ISA::GpuTLB::AccessInfo::meanDistance
unsigned int meanDistance
Definition: gpu_tlb.hh:452

X86ISA::GpuTLB::TLBEvent::getTLBEventVaddr
Addr getTLBEventVaddr()
Definition: gpu_tlb.cc:1381

X86ISA::MISCREG_MC5_MISC
Definition: misc.hh:240

X86ISA::MISCREG_MCG_STATUS
Definition: misc.hh:153

X86ISA::MISCREG_PERF_EVT_SEL1
Definition: misc.hh:260

X86ISA::GpuTLB::exitCallback
void exitCallback()
Definition: gpu_tlb.cc:1701

X86ISA::MISCREG_MTRR_PHYS_MASK_6
Definition: misc.hh:181

X86ISA::GpuTLB::translateAtomic
Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:914

X86ISA::GpuTLB::getWalker
Walker * getWalker()
Definition: gpu_tlb.cc:938

packet_access.hh

PageTableBase::pageAlign
Addr pageAlign(Addr a)
Definition: page_table.hh:109

ThreadContext::readMiscRegNoEffect
virtual MiscReg readMiscRegNoEffect(int misc_reg) const =0

X86ISA::GpuTLB::lookupIt
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:194

X86ISA::SEGMENT_REG_TSG
Definition: segment.hh:55

X86ISA::MISCREG_LAST_BRANCH_TO_IP
Definition: misc.hh:159

X86ISA::GpuTLB::numSets
int numSets
Definition: gpu_tlb.hh:148

base.hh

X86ISA::MISCREG_CSTAR
Definition: misc.hh:250

X86ISA::MISCREG_PAT
Definition: misc.hh:197

X86ISA::GpuTLB::TranslationState::hitLevel
int hitLevel
Definition: gpu_tlb.hh:367

FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:146

X86ISA::GpuTLB::setMask
Addr setMask
Definition: gpu_tlb.hh:154

ThreadContext::getProcessPtr
virtual Process * getProcessPtr()=0

X86ISA::MISCREG_MC0_STATUS
Definition: misc.hh:213

X86ISA::GpuTLB::MemSidePort
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: gpu_tlb.hh:296

warn_once
#define warn_once(...)
Definition: misc.hh:226

X86ISA::MISCREG_PERF_EVT_CTR0
Definition: misc.hh:266

X86ISA::GpuTLB::hasMemSidePort
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:165

X86ISA::GpuTLB::AccessInfo::totalReuseDistance
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:440

X86ISA::IntAddrPrefixCPUID
const Addr IntAddrPrefixCPUID
Definition: x86_traits.hh:68

X86ISA::MISCREG_MC3_MISC
Definition: misc.hh:238

thread_context.hh

X86ISA::GpuTLB::AccessInfo::lastTimeAccessed
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:437

X86ISA::MISCREG_MTRR_PHYS_MASK_4
Definition: misc.hh:179

X86ISA::GpuTLB::translateTiming
void translateTiming(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:924

X86ISA::MISCREG_MC5_STATUS
Definition: misc.hh:218

X86ISA::PageBytes
const Addr PageBytes
Definition: isa_traits.hh:64

X86ISA::expandDown
Bitfield< 14 > expandDown
Definition: misc.hh:949

X86ISA::GpuTLB::AccessInfo::sumDistance
unsigned int sumDistance
Definition: gpu_tlb.hh:451

ArmISA::mode
Bitfield< 4, 0 > mode
Definition: miscregs.hh:1385

X86ISA::MISCREG_MC6_ADDR
Definition: misc.hh:230

X86ISA::MISCREG_PERF_EVT_CTR2
Definition: misc.hh:268

X86ISA::GpuTLB::localNumTLBMisses
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:207

X86ISA::GpuTLB::maxCoalescedReqs
int maxCoalescedReqs
Definition: gpu_tlb.hh:379

Packet::isRequest
bool isRequest() const
Definition: packet.hh:505

ThreadContext
ThreadContext is the external interface to all thread state for anything outside of the CPU...
Definition: thread_context.hh:93

X86ISA::GpuTLB::tlb
std::vector< GpuTlbEntry > tlb
Definition: gpu_tlb.hh:173

BaseSlavePort
A BaseSlavePort is a protocol-agnostic slave port, responsible only for the structural connection to ...
Definition: port.hh:139

X86ISA::GpuTLB::TranslationState::prefetch
bool prefetch
Definition: gpu_tlb.hh:357

X86ISA::GpuTLB::CpuSidePort
Definition: gpu_tlb.hh:269

X86ISA::MISCREG_MC0_MISC
Definition: misc.hh:235

X86ISA::GpuTLB::lookup
GpuTlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:224

X86ISA::MISCREG_MTRR_PHYS_MASK_0
Definition: misc.hh:175

X86ISA::GpuTLB::Mode
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:109

X86ISA::MISCREG_MTRR_FIX_4K_F0000
Definition: misc.hh:194

Request
Definition: request.hh:87

X86ISA::MISCREG_SYSENTER_EIP
Definition: misc.hh:150

X86ISA::Walker::setTLB
void setTLB(TLB *_tlb)
Definition: pagetable_walker.hh:190

X86ISA::MISCREG_MTRR_FIX_4K_C8000
Definition: misc.hh:189

trace.hh

X86ISA::IntAddrPrefixMask
const Addr IntAddrPrefixMask
Definition: x86_traits.hh:67

X86ISA::MISCREG_SYSENTER_CS
Definition: misc.hh:148

X86ISA::GpuTLB::walker
Walker * walker
Definition: gpu_tlb.hh:137

X86ISA::GpuTLB::TLBEvent::TLBEvent
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: gpu_tlb.cc:1121

pagetable.hh

X86ISA::GpuTLB::MemSidePort::recvReqRetry
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Definition: gpu_tlb.cc:1634

X86ISA::GpuTLB::MISS_RETURN
Definition: gpu_tlb.hh:252

X86ISA::GpuTLB::numUniquePages
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:222

X86ISA::GpuTlbEntry::valid
bool valid
Definition: gpu_tlb.hh:73

X86ISA::MISCREG_MTRR_PHYS_BASE_3
Definition: misc.hh:167

X86ISA::SEGMENT_REG_MS
Definition: segment.hh:57

X86ISA::MISCREG_IORR_BASE1
Definition: misc.hh:276

X86ISA::MISCREG_MC7_ADDR
Definition: misc.hh:231

X86ISA::GpuTLB::size
int size
Definition: gpu_tlb.hh:146

X86ISA::GpuTLB::globalNumTLBMisses
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:215

X86ISA::MISCREG_VM_CR
Definition: misc.hh:287

process.hh

X86ISA::GpuTLB::cleanupQueue
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:422

X86ISA::MISCREG_MC7_MISC
Definition: misc.hh:242

x86_traits.hh

X86ISA::GpuTLB::memSidePort
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:319

X86ISA::MISCREG_PCI_CONFIG_ADDRESS
Definition: misc.hh:397

X86ISA::GpuTLB::accessDistance
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:171

curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:47

X86ISA::MISCREG_MTRRCAP
Definition: misc.hh:146

X86ISA::GpuTLB::translateInt
Fault translateInt(RequestPtr req, ThreadContext *tc)
Definition: gpu_tlb.cc:288

X86ISA::MISCREG_EFER
Definition: misc.hh:246

X86ISA::MISCREG_PERF_EVT_SEL2
Definition: misc.hh:261

X86ISA::GpuTLB::regStats
void regStats()
Register statistics for this object.
Definition: gpu_tlb.cc:955

csprintf
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161

BaseTLB::Read
Definition: tlb.hh:61

X86ISA::MISCREG_SEG_ATTR
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Definition: misc.hh:534

X86ISA::GpuTLB::localLatency
Stats::Formula localLatency
Definition: gpu_tlb.hh:226

X86ISA::GpuTLB::TranslationState::tlbMode
Mode tlbMode
Definition: gpu_tlb.hh:346

X86ISA::GpuTLB::pagingProtectionChecks
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, GpuTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:1133

X86ISA::MISCREG_MC4_ADDR
Definition: misc.hh:228

X86ISA::MISCREG_SEG_LIMIT
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
Definition: misc.hh:527

X86ISA::GpuTLB::outstandingReqs
int outstandingReqs
Definition: gpu_tlb.hh:383

X86ISA::GpuTLB::entryList
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions...
Definition: gpu_tlb.hh:189

X86ISA::GpuTLB::invalidateNonGlobal
void invalidateNonGlobal()
Definition: gpu_tlb.cc:257

X86ISA::MISCREG_MC4_STATUS
Definition: misc.hh:217

X86ISA::MISCREG_MCG_CAP
Definition: misc.hh:152

X86ISA::GpuTLB::TranslationState::tc
ThreadContext * tc
Definition: gpu_tlb.hh:348

X86ISA::MISCREG_FS_BASE
Definition: misc.hh:317

X86ISA::MISCREG_APIC_BASE
Definition: misc.hh:394

X86ISA::MISCREG_MC4_CTL
Definition: misc.hh:206

X86ISA::GpuTLB::translate
Fault translate(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:711

X86ISA::GpuTLB::CpuSidePort::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: gpu_tlb.cc:1601

X86ISA::MISCREG_MC3_ADDR
Definition: misc.hh:227

X86ISA::GpuTLB::setConfigAddress
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:251

X86ISA::MISCREG_MC3_STATUS
Definition: misc.hh:216

X86ISA::GpuTLB::MemSidePort::recvTimingResp
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: gpu_tlb.cc:1615

X86ISA::mask
mask
Definition: misc.hh:797

X86ISA::MISCREG_DEF_TYPE
Definition: misc.hh:199

Request::UNCACHEABLE
The request is to an uncacheable address.
Definition: request.hh:114

bitfield.hh

X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:85

X86ISA::GpuTLB::getMasterPort
BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a master port with a given name and index.
Definition: gpu_tlb.cc:152

X86ISA::GpuTLB::AccessInfo
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:435

X86ISA::MISCREG_LAST_BRANCH_FROM_IP
Definition: misc.hh:158

X86ISA::GpuTLB::clock
int clock
Definition: gpu_tlb.hh:89

X86ISA::GpuTLB::Params
X86GPUTLBParams Params
Definition: gpu_tlb.hh:105

Request::getPaddr
Addr getPaddr() const
Definition: request.hh:519

X86ISA::MISCREG_MC0_CTL
Definition: misc.hh:202

fatal
#define fatal(...)
Definition: misc.hh:163

Packet::req
const RequestPtr req
A pointer to the original request.
Definition: packet.hh:304

X86ISA::MISCREG_MC4_MISC
Definition: misc.hh:239

X86ISA::GpuTLB::tlbLookup
bool tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
Definition: gpu_tlb.cc:663

X86ISA::GpuTLB::demapPage
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:275

X86ISA::GpuTLB::CpuSidePort::recvFunctional
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the master port.
Definition: gpu_tlb.cc:1481

X86ISA::TlbEntry
TlbEntry(Addr asn, Addr _vaddr, Addr _paddr, bool uncacheable, bool read_only)

X86ISA::MISCREG_PERF_EVT_SEL3
Definition: misc.hh:262

X86ISA::MISCREG_SMM_CTL
Definition: misc.hh:289

BaseTLB::Execute
Definition: tlb.hh:61

X86ISA::GpuTLB::translationReturnEvent
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:418

X86ISA::IntAddrPrefixMSR
const Addr IntAddrPrefixMSR
Definition: x86_traits.hh:69

roundDown
T roundDown(const T &val, const U &align)
Definition: intmath.hh:213

X86ISA::GpuTLB::hitLatency
int hitLatency
Definition: gpu_tlb.hh:199

X86ISA::GpuTLB::PAGE_WALK
Definition: gpu_tlb.hh:252

X86ISA::GpuTLB::handleTranslationReturn
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
Definition: gpu_tlb.cc:1167

X86ISA::MISCREG_MCG_CTL
Definition: misc.hh:154

std::list< AddrRange >

X86ISA::GpuTLB::TLBEvent
Definition: gpu_tlb.hh:395

misc.hh

X86ISA::MISCREG_MTRR_PHYS_MASK_1
Definition: misc.hh:176

X86ISA::GpuTLB::cpuSidePort
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:317

X86ISA::MISCREG_MTRR_PHYS_MASK_5
Definition: misc.hh:180

X86ISA::MISCREG_LAST_EXCEPTION_TO_IP
Definition: misc.hh:161

microldstop.hh

X86ISA::GpuTLB::globalNumTLBAccesses
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:213

X86ISA::GpuTLB::TLB_MISS
Definition: gpu_tlb.hh:252

X86ISA::SEGMENT_REG_HS
Definition: segment.hh:53

X86ISA::GpuTLB::TLBEvent::updateOutcome
void updateOutcome(tlbOutcome _outcome)
Definition: gpu_tlb.cc:1375

X86ISA::MISCREG_SEG_SEL
static MiscRegIndex MISCREG_SEG_SEL(int index)
Definition: misc.hh:506

X86ISA::seg
Bitfield< 2, 0 > seg
Definition: types.hh:84

Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142

pagetable_walker.hh

X86ISA::MISCREG_MTRR_FIX_4K_D8000
Definition: misc.hh:191

X86ISA::GpuTLB::freeList
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:180

X86ISA::MISCREG_MC6_MISC
Definition: misc.hh:241

X86ISA::MISCREG_M5_REG
Definition: misc.hh:138

CheckpointIn
Definition: serialize.hh:340

X86ISA::AddrSizeFlagBit
Definition: ldstflags.hh:55

X86ISA::SegmentFlagMask
const Request::FlagsType M5_VAR_USED SegmentFlagMask
Definition: ldstflags.hh:51

safe_cast
T safe_cast(U ptr)
Definition: cast.hh:61

Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245

X86ISA::MISCREG_STAR
Definition: misc.hh:248

X86ISA::MISCREG_LSTAR
Definition: misc.hh:249

X86ISA::MISCREG_MC1_MISC
Definition: misc.hh:236

ArmISA::va
Bitfield< 8 > va
Definition: miscregs.hh:1473

X86ISA::MISCREG_PERF_EVT_CTR3
Definition: misc.hh:269

X86ISA::GpuTLB::TranslationState::tlbEntry
GpuTlbEntry * tlbEntry
Definition: gpu_tlb.hh:355

X86ISA::MISCREG_TOP_MEM
Definition: misc.hh:284

AlphaISA::PageBytes
const Addr PageBytes
Definition: isa_traits.hh:52

Process::pTable
PageTableBase * pTable
Definition: process.hh:178

X86ISA::MISCREG_MC7_CTL
Definition: misc.hh:209

X86ISA::GpuTLB::Translation::finish
virtual void finish(Fault fault, RequestPtr req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...

X86ISA::GpuTlbEntry
Definition: gpu_tlb.hh:65

X86ISA::GpuTLB::ticks
Tick ticks(int numCycles) const
Definition: gpu_tlb.hh:97

X86ISA::MISCREG_MC2_CTL
Definition: misc.hh:204

X86ISA::IntAddrPrefixIO
const Addr IntAddrPrefixIO
Definition: x86_traits.hh:70

X86ISA::MISCREG_CS_ATTR
Definition: misc.hh:364

X86ISA::MISCREG_MC1_ADDR
Definition: misc.hh:225

X86ISA::MISCREG_MTRR_FIX_16K_A0000
Definition: misc.hh:187

Request::getFlags
Flags getFlags()
Accessor for flags.
Definition: request.hh:584

Stats::DataWrap::name
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:254

X86ISA::GpuTLB::cleanupEvent
EventWrapper< GpuTLB,&GpuTLB::cleanup > cleanupEvent
Definition: gpu_tlb.hh:428

Process::fixupStackFault
bool fixupStackFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
Definition: process.cc:338

output.hh

X86ISA::MISCREG_MTRR_FIX_64K_00000
Definition: misc.hh:185

X86ISA::GpuTLB::configAddress
uint32_t configAddress
Definition: gpu_tlb.hh:83

X86ISA::MISCREG_MC2_STATUS
Definition: misc.hh:215

X86ISA::GpuTLB::CpuSidePort::recvTimingReq
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the master port.
Definition: gpu_tlb.cc:1393

X86ISA::size
int size()
Definition: pagetable.hh:146

X86ISA::GpuTLB::unserialize
virtual void unserialize(CheckpointIn &cp)
Unserialize an object.
Definition: gpu_tlb.cc:950

SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:117

page_table.hh
Declarations of a non-full system Page Table.

X86ISA::SEGMENT_REG_LS
Definition: segment.hh:56

X86ISA::MISCREG_SEG_BASE
static MiscRegIndex MISCREG_SEG_BASE(int index)
Definition: misc.hh:513

X86ISA::MiscReg
uint64_t MiscReg
Definition: registers.hh:94

CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:67

X86ISA::vaddr
EndBitUnion(PageTableEntry) struct TlbEntry Addr vaddr
Definition: pagetable.hh:96

X86ISA::MISCREG_GS_BASE
Definition: misc.hh:318

X86ISA::MISCREG_MTRR_FIX_4K_F8000
Definition: misc.hh:195

X86ISA::MISCREG_MC2_ADDR
Definition: misc.hh:226

X86ISA::MISCREG_MTRR_PHYS_BASE_4
Definition: misc.hh:168

X86ISA::SEGMENT_REG_ES
Definition: segment.hh:47

X86ISA::MISCREG_SYSCFG
Definition: misc.hh:272

X86ISA::GpuTLB::missLatency2
int missLatency2
Definition: gpu_tlb.hh:201

X86ISA::GpuTLB::tlbOutcome
tlbOutcome
Definition: gpu_tlb.hh:252

X86ISA::MISCREG_IGNNE
Definition: misc.hh:288

Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:454

Event
Definition: eventq.hh:185

X86ISA::MISCREG_MC1_CTL
Definition: misc.hh:203

X86ISA::GpuTLB::cleanup
void cleanup()
Definition: gpu_tlb.cc:1642

MemObject
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
Definition: mem_object.hh:60

Request::getVaddr
Addr getVaddr() const
Definition: request.hh:616

X86ISA::MISCREG_MC2_MISC
Definition: misc.hh:237

BaseMasterPort
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
Definition: port.hh:115

ThreadContext::contextId
virtual int contextId() const =0

X86ISA::MISCREG_MC5_ADDR
Definition: misc.hh:229

X86ISA::GpuTLB::TLB_HIT
Definition: gpu_tlb.hh:252

X86ISA::GpuTLB::insert
GpuTlbEntry * insert(Addr vpn, GpuTlbEntry &entry)
Definition: gpu_tlb.cc:168

X86ISA::GpuTLB::missLatency1
int missLatency1
Definition: gpu_tlb.hh:200

PageTableBase::lookup
virtual bool lookup(Addr vaddr, TheISA::TlbEntry &entry)=0
Lookup function.

X86ISA::MISCREG_MTRR_PHYS_BASE_7
Definition: misc.hh:171

EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:728

X86ISA::GpuTLB::pageTableCycles
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:221

X86ISA::MISCREG_MC5_CTL
Definition: misc.hh:207

X86ISA::MISCREG_TOP_MEM2
Definition: misc.hh:285

X86ISA::MISCREG_SYSENTER_ESP
Definition: misc.hh:149

Process
Definition: process.hh:63

X86ISA::GpuTLB::localNumTLBHits
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:206

X86ISA::MISCREG_LAST_EXCEPTION_FROM_IP
Definition: misc.hh:160

X86ISA::MISCREG_MTRR_PHYS_BASE_1
Definition: misc.hh:165

X86ISA::PhysAddrPrefixIO
const Addr PhysAddrPrefixIO
Definition: x86_traits.hh:72

X86ISA::MISCREG_VM_HSAVE_PA
Definition: misc.hh:290

X86ISA::MISCREG_MC7_STATUS
Definition: misc.hh:220

X86ISA::GpuTLB::~GpuTLB
~GpuTLB()
Definition: gpu_tlb.cc:131

X86ISA::MISCREG_IORR_BASE0
Definition: misc.hh:275

X86ISA::MISCREG_MTRR_PHYS_BASE_0
Definition: misc.hh:164

Stats::DataWrap::desc
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:287

PortID
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:181

X86ISA::p
Bitfield< 0 > p
Definition: pagetable.hh:95

X86ISA::GpuTLB::GpuTLB
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:62

X86ISA::MISCREG_MC6_CTL
Definition: misc.hh:208

X86ISA::MISCREG_MTRR_PHYS_BASE_2
Definition: misc.hh:166

mbits
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
Definition: bitfield.hh:91

bits
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
Definition: bitfield.hh:67

Request::MMAPPED_IPR
This request is to a memory mapped register.
Definition: request.hh:126

Request::getSize
unsigned getSize() const
Definition: request.hh:552

X86ISA::MISCREG_MTRR_PHYS_MASK_7
Definition: misc.hh:182

ArmISA::attr
attr
Definition: miscregs.hh:1828

X86ISA::Walker
Definition: pagetable_walker.hh:58

X86ISA::MISCREG_MC0_ADDR
Definition: misc.hh:224

X86ISA::MISCREG_MC1_STATUS
Definition: misc.hh:214

X86ISA::GpuTLB::FA
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:153

X86ISA::x86LocalAPICAddress
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Definition: x86_traits.hh:93

Request::setPaddr
void setPaddr(Addr paddr)
Set just the physical address.
Definition: request.hh:487

X86ISA::GpuTLB::localNumTLBAccesses
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:205

X86ISA::x
Bitfield< 1 > x
Definition: types.hh:105

X86ISA::GpuTLB::CpuSidePort::recvReqRetry
virtual void recvReqRetry()
Definition: gpu_tlb.cc:1593

Request::setFlags
void setFlags(Flags flags)
Note that unlike other accessors, this function sets specific flags (ORs them in); it does not assign...
Definition: request.hh:595

X86ISA::GpuTLB::getSlavePort
BaseSlavePort & getSlavePort(const std::string &if_name, PortID idx=InvalidPortID)
Get a slave port with a given name and index.
Definition: gpu_tlb.cc:138

X86ISA::GpuTLB::globalNumTLBHits
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:214

Fault
std::shared_ptr< FaultBase > Fault
Definition: types.hh:184

X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:81

ClockedObject::regStats
void regStats() override
Register statistics for this object.
Definition: clocked_object.cc:147

X86ISA::MISCREG_MC3_CTL
Definition: misc.hh:205

X86ISA::GpuTLB::TranslationState::reqCnt
std::vector< int > reqCnt
Definition: gpu_tlb.hh:365

gpu_tlb.hh

X86ISA::GpuTLB::issueTLBLookup
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
Definition: gpu_tlb.cc:1043

Request::hasPaddr
bool hasPaddr() const
Accessor for paddr.
Definition: request.hh:513

X86ISA::GpuTLB
Definition: gpu_tlb.hh:76

X86ISA::MISCREG_PERF_EVT_SEL0
Definition: misc.hh:259

cprintf
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155

X86ISA::GpuTLB::updatePageFootprint
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1666

X86ISA::MISCREG_PERF_EVT_CTR1
Definition: misc.hh:267

X86ISA::MISCREG_MTRR_PHYS_MASK_3
Definition: misc.hh:178

X86ISA::GpuTLB::TLBEvent::process
void process()
Definition: gpu_tlb.cc:1363

X86ISA::MISCREG_MTRR_FIX_4K_D0000
Definition: misc.hh:190

X86ISA::MISCREG_CR0
Definition: misc.hh:106