gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gpu_tlb.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Lisa Hsu
34  */
35 
36 #include "gpu-compute/gpu_tlb.hh"
37 
38 #include <cmath>
39 #include <cstring>
40 
41 #include "arch/x86/faults.hh"
43 #include "arch/x86/pagetable.hh"
45 #include "arch/x86/regs/misc.hh"
46 #include "arch/x86/x86_traits.hh"
47 #include "base/bitfield.hh"
48 #include "base/output.hh"
49 #include "base/trace.hh"
50 #include "cpu/base.hh"
51 #include "cpu/thread_context.hh"
52 #include "debug/GPUPrefetch.hh"
53 #include "debug/GPUTLB.hh"
54 #include "mem/packet_access.hh"
55 #include "mem/page_table.hh"
56 #include "mem/request.hh"
57 #include "sim/process.hh"
58 
59 namespace X86ISA
60 {
61 
63  : MemObject(p), configAddress(0), size(p->size),
64  cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
65  {
66  assoc = p->assoc;
67  assert(assoc <= size);
68  numSets = size/assoc;
69  allocationPolicy = p->allocationPolicy;
70  hasMemSidePort = false;
71  accessDistance = p->accessDistance;
72  clock = p->clk_domain->clockPeriod();
73 
74  tlb.assign(size, GpuTlbEntry());
75 
76  freeList.resize(numSets);
77  entryList.resize(numSets);
78 
79  for (int set = 0; set < numSets; ++set) {
80  for (int way = 0; way < assoc; ++way) {
81  int x = set * assoc + way;
82  freeList[set].push_back(&tlb.at(x));
83  }
84  }
85 
86  FA = (size == assoc);
87 
96  setMask = numSets - 1;
97 
98  #if 0
99  // GpuTLB doesn't yet support full system
100  walker = p->walker;
101  walker->setTLB(this);
102  #endif
103 
104  maxCoalescedReqs = p->maxOutstandingReqs;
105 
106  // Do not allow maxCoalescedReqs to be more than the TLB associativity
107  if (maxCoalescedReqs > assoc) {
109  cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
110  }
111 
112  outstandingReqs = 0;
113  hitLatency = p->hitLatency;
114  missLatency1 = p->missLatency1;
115  missLatency2 = p->missLatency2;
116 
117  // create the slave ports based on the number of connected ports
118  for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
119  cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
120  name(), i), this, i));
121  }
122 
123  // create the master ports based on the number of connected ports
124  for (size_t i = 0; i < p->port_master_connection_count; ++i) {
125  memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
126  name(), i), this, i));
127  }
128  }
129 
130  // fixme: this is never called?
132  {
133  // make sure all the hash-maps are empty
134  assert(translationReturnEvent.empty());
135  }
136 
138  GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
139  {
140  if (if_name == "slave") {
141  if (idx >= static_cast<PortID>(cpuSidePort.size())) {
142  panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
143  }
144 
145  return *cpuSidePort[idx];
146  } else {
147  panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
148  }
149  }
150 
152  GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
153  {
154  if (if_name == "master") {
155  if (idx >= static_cast<PortID>(memSidePort.size())) {
156  panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
157  }
158 
159  hasMemSidePort = true;
160 
161  return *memSidePort[idx];
162  } else {
163  panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
164  }
165  }
166 
167  GpuTlbEntry*
169  {
170  GpuTlbEntry *newEntry = nullptr;
171 
176  int set = (vpn >> TheISA::PageShift) & setMask;
177 
178  if (!freeList[set].empty()) {
179  newEntry = freeList[set].front();
180  freeList[set].pop_front();
181  } else {
182  newEntry = entryList[set].back();
183  entryList[set].pop_back();
184  }
185 
186  *newEntry = entry;
187  newEntry->vaddr = vpn;
188  entryList[set].push_front(newEntry);
189 
190  return newEntry;
191  }
192 
193  GpuTLB::EntryList::iterator
194  GpuTLB::lookupIt(Addr va, bool update_lru)
195  {
196  int set = (va >> TheISA::PageShift) & setMask;
197 
198  if (FA) {
199  assert(!set);
200  }
201 
202  auto entry = entryList[set].begin();
203  for (; entry != entryList[set].end(); ++entry) {
204  int page_size = (*entry)->size();
205 
206  if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
207  DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
208  "with size %#x.\n", va, (*entry)->vaddr, page_size);
209 
210  if (update_lru) {
211  entryList[set].push_front(*entry);
212  entryList[set].erase(entry);
213  entry = entryList[set].begin();
214  }
215 
216  break;
217  }
218  }
219 
220  return entry;
221  }
222 
223  GpuTlbEntry*
224  GpuTLB::lookup(Addr va, bool update_lru)
225  {
226  int set = (va >> TheISA::PageShift) & setMask;
227 
228  auto entry = lookupIt(va, update_lru);
229 
230  if (entry == entryList[set].end())
231  return nullptr;
232  else
233  return *entry;
234  }
235 
236  void
238  {
239  DPRINTF(GPUTLB, "Invalidating all entries.\n");
240 
241  for (int i = 0; i < numSets; ++i) {
242  while (!entryList[i].empty()) {
243  GpuTlbEntry *entry = entryList[i].front();
244  entryList[i].pop_front();
245  freeList[i].push_back(entry);
246  }
247  }
248  }
249 
250  void
252  {
254  }
255 
256  void
258  {
259  DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
260 
261  for (int i = 0; i < numSets; ++i) {
262  for (auto entryIt = entryList[i].begin();
263  entryIt != entryList[i].end();) {
264  if (!(*entryIt)->global) {
265  freeList[i].push_back(*entryIt);
266  entryList[i].erase(entryIt++);
267  } else {
268  ++entryIt;
269  }
270  }
271  }
272  }
273 
274  void
275  GpuTLB::demapPage(Addr va, uint64_t asn)
276  {
277 
278  int set = (va >> TheISA::PageShift) & setMask;
279  auto entry = lookupIt(va, false);
280 
281  if (entry != entryList[set].end()) {
282  freeList[set].push_back(*entry);
283  entryList[set].erase(entry);
284  }
285  }
286 
287  Fault
289  {
290  DPRINTF(GPUTLB, "Addresses references internal memory.\n");
291  Addr vaddr = req->getVaddr();
292  Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
293 
294  if (prefix == IntAddrPrefixCPUID) {
295  panic("CPUID memory space not yet implemented!\n");
296  } else if (prefix == IntAddrPrefixMSR) {
297  vaddr = vaddr >> 3;
299  Addr regNum = 0;
300 
301  switch (vaddr & ~IntAddrPrefixMask) {
302  case 0x10:
303  regNum = MISCREG_TSC;
304  break;
305  case 0x1B:
306  regNum = MISCREG_APIC_BASE;
307  break;
308  case 0xFE:
309  regNum = MISCREG_MTRRCAP;
310  break;
311  case 0x174:
312  regNum = MISCREG_SYSENTER_CS;
313  break;
314  case 0x175:
315  regNum = MISCREG_SYSENTER_ESP;
316  break;
317  case 0x176:
318  regNum = MISCREG_SYSENTER_EIP;
319  break;
320  case 0x179:
321  regNum = MISCREG_MCG_CAP;
322  break;
323  case 0x17A:
324  regNum = MISCREG_MCG_STATUS;
325  break;
326  case 0x17B:
327  regNum = MISCREG_MCG_CTL;
328  break;
329  case 0x1D9:
330  regNum = MISCREG_DEBUG_CTL_MSR;
331  break;
332  case 0x1DB:
334  break;
335  case 0x1DC:
336  regNum = MISCREG_LAST_BRANCH_TO_IP;
337  break;
338  case 0x1DD:
340  break;
341  case 0x1DE:
343  break;
344  case 0x200:
345  regNum = MISCREG_MTRR_PHYS_BASE_0;
346  break;
347  case 0x201:
348  regNum = MISCREG_MTRR_PHYS_MASK_0;
349  break;
350  case 0x202:
351  regNum = MISCREG_MTRR_PHYS_BASE_1;
352  break;
353  case 0x203:
354  regNum = MISCREG_MTRR_PHYS_MASK_1;
355  break;
356  case 0x204:
357  regNum = MISCREG_MTRR_PHYS_BASE_2;
358  break;
359  case 0x205:
360  regNum = MISCREG_MTRR_PHYS_MASK_2;
361  break;
362  case 0x206:
363  regNum = MISCREG_MTRR_PHYS_BASE_3;
364  break;
365  case 0x207:
366  regNum = MISCREG_MTRR_PHYS_MASK_3;
367  break;
368  case 0x208:
369  regNum = MISCREG_MTRR_PHYS_BASE_4;
370  break;
371  case 0x209:
372  regNum = MISCREG_MTRR_PHYS_MASK_4;
373  break;
374  case 0x20A:
375  regNum = MISCREG_MTRR_PHYS_BASE_5;
376  break;
377  case 0x20B:
378  regNum = MISCREG_MTRR_PHYS_MASK_5;
379  break;
380  case 0x20C:
381  regNum = MISCREG_MTRR_PHYS_BASE_6;
382  break;
383  case 0x20D:
384  regNum = MISCREG_MTRR_PHYS_MASK_6;
385  break;
386  case 0x20E:
387  regNum = MISCREG_MTRR_PHYS_BASE_7;
388  break;
389  case 0x20F:
390  regNum = MISCREG_MTRR_PHYS_MASK_7;
391  break;
392  case 0x250:
394  break;
395  case 0x258:
397  break;
398  case 0x259:
400  break;
401  case 0x268:
402  regNum = MISCREG_MTRR_FIX_4K_C0000;
403  break;
404  case 0x269:
405  regNum = MISCREG_MTRR_FIX_4K_C8000;
406  break;
407  case 0x26A:
408  regNum = MISCREG_MTRR_FIX_4K_D0000;
409  break;
410  case 0x26B:
411  regNum = MISCREG_MTRR_FIX_4K_D8000;
412  break;
413  case 0x26C:
414  regNum = MISCREG_MTRR_FIX_4K_E0000;
415  break;
416  case 0x26D:
417  regNum = MISCREG_MTRR_FIX_4K_E8000;
418  break;
419  case 0x26E:
420  regNum = MISCREG_MTRR_FIX_4K_F0000;
421  break;
422  case 0x26F:
423  regNum = MISCREG_MTRR_FIX_4K_F8000;
424  break;
425  case 0x277:
426  regNum = MISCREG_PAT;
427  break;
428  case 0x2FF:
429  regNum = MISCREG_DEF_TYPE;
430  break;
431  case 0x400:
432  regNum = MISCREG_MC0_CTL;
433  break;
434  case 0x404:
435  regNum = MISCREG_MC1_CTL;
436  break;
437  case 0x408:
438  regNum = MISCREG_MC2_CTL;
439  break;
440  case 0x40C:
441  regNum = MISCREG_MC3_CTL;
442  break;
443  case 0x410:
444  regNum = MISCREG_MC4_CTL;
445  break;
446  case 0x414:
447  regNum = MISCREG_MC5_CTL;
448  break;
449  case 0x418:
450  regNum = MISCREG_MC6_CTL;
451  break;
452  case 0x41C:
453  regNum = MISCREG_MC7_CTL;
454  break;
455  case 0x401:
456  regNum = MISCREG_MC0_STATUS;
457  break;
458  case 0x405:
459  regNum = MISCREG_MC1_STATUS;
460  break;
461  case 0x409:
462  regNum = MISCREG_MC2_STATUS;
463  break;
464  case 0x40D:
465  regNum = MISCREG_MC3_STATUS;
466  break;
467  case 0x411:
468  regNum = MISCREG_MC4_STATUS;
469  break;
470  case 0x415:
471  regNum = MISCREG_MC5_STATUS;
472  break;
473  case 0x419:
474  regNum = MISCREG_MC6_STATUS;
475  break;
476  case 0x41D:
477  regNum = MISCREG_MC7_STATUS;
478  break;
479  case 0x402:
480  regNum = MISCREG_MC0_ADDR;
481  break;
482  case 0x406:
483  regNum = MISCREG_MC1_ADDR;
484  break;
485  case 0x40A:
486  regNum = MISCREG_MC2_ADDR;
487  break;
488  case 0x40E:
489  regNum = MISCREG_MC3_ADDR;
490  break;
491  case 0x412:
492  regNum = MISCREG_MC4_ADDR;
493  break;
494  case 0x416:
495  regNum = MISCREG_MC5_ADDR;
496  break;
497  case 0x41A:
498  regNum = MISCREG_MC6_ADDR;
499  break;
500  case 0x41E:
501  regNum = MISCREG_MC7_ADDR;
502  break;
503  case 0x403:
504  regNum = MISCREG_MC0_MISC;
505  break;
506  case 0x407:
507  regNum = MISCREG_MC1_MISC;
508  break;
509  case 0x40B:
510  regNum = MISCREG_MC2_MISC;
511  break;
512  case 0x40F:
513  regNum = MISCREG_MC3_MISC;
514  break;
515  case 0x413:
516  regNum = MISCREG_MC4_MISC;
517  break;
518  case 0x417:
519  regNum = MISCREG_MC5_MISC;
520  break;
521  case 0x41B:
522  regNum = MISCREG_MC6_MISC;
523  break;
524  case 0x41F:
525  regNum = MISCREG_MC7_MISC;
526  break;
527  case 0xC0000080:
528  regNum = MISCREG_EFER;
529  break;
530  case 0xC0000081:
531  regNum = MISCREG_STAR;
532  break;
533  case 0xC0000082:
534  regNum = MISCREG_LSTAR;
535  break;
536  case 0xC0000083:
537  regNum = MISCREG_CSTAR;
538  break;
539  case 0xC0000084:
540  regNum = MISCREG_SF_MASK;
541  break;
542  case 0xC0000100:
543  regNum = MISCREG_FS_BASE;
544  break;
545  case 0xC0000101:
546  regNum = MISCREG_GS_BASE;
547  break;
548  case 0xC0000102:
549  regNum = MISCREG_KERNEL_GS_BASE;
550  break;
551  case 0xC0000103:
552  regNum = MISCREG_TSC_AUX;
553  break;
554  case 0xC0010000:
555  regNum = MISCREG_PERF_EVT_SEL0;
556  break;
557  case 0xC0010001:
558  regNum = MISCREG_PERF_EVT_SEL1;
559  break;
560  case 0xC0010002:
561  regNum = MISCREG_PERF_EVT_SEL2;
562  break;
563  case 0xC0010003:
564  regNum = MISCREG_PERF_EVT_SEL3;
565  break;
566  case 0xC0010004:
567  regNum = MISCREG_PERF_EVT_CTR0;
568  break;
569  case 0xC0010005:
570  regNum = MISCREG_PERF_EVT_CTR1;
571  break;
572  case 0xC0010006:
573  regNum = MISCREG_PERF_EVT_CTR2;
574  break;
575  case 0xC0010007:
576  regNum = MISCREG_PERF_EVT_CTR3;
577  break;
578  case 0xC0010010:
579  regNum = MISCREG_SYSCFG;
580  break;
581  case 0xC0010016:
582  regNum = MISCREG_IORR_BASE0;
583  break;
584  case 0xC0010017:
585  regNum = MISCREG_IORR_BASE1;
586  break;
587  case 0xC0010018:
588  regNum = MISCREG_IORR_MASK0;
589  break;
590  case 0xC0010019:
591  regNum = MISCREG_IORR_MASK1;
592  break;
593  case 0xC001001A:
594  regNum = MISCREG_TOP_MEM;
595  break;
596  case 0xC001001D:
597  regNum = MISCREG_TOP_MEM2;
598  break;
599  case 0xC0010114:
600  regNum = MISCREG_VM_CR;
601  break;
602  case 0xC0010115:
603  regNum = MISCREG_IGNNE;
604  break;
605  case 0xC0010116:
606  regNum = MISCREG_SMM_CTL;
607  break;
608  case 0xC0010117:
609  regNum = MISCREG_VM_HSAVE_PA;
610  break;
611  default:
612  return std::make_shared<GeneralProtection>(0);
613  }
614  //The index is multiplied by the size of a MiscReg so that
615  //any memory dependence calculations will not see these as
616  //overlapping.
617  req->setPaddr(regNum * sizeof(MiscReg));
618  return NoFault;
619  } else if (prefix == IntAddrPrefixIO) {
620  // TODO If CPL > IOPL or in virtual mode, check the I/O permission
621  // bitmap in the TSS.
622 
623  Addr IOPort = vaddr & ~IntAddrPrefixMask;
624  // Make sure the address fits in the expected 16 bit IO address
625  // space.
626  assert(!(IOPort & ~0xFFFF));
627 
628  if (IOPort == 0xCF8 && req->getSize() == 4) {
631  } else if ((IOPort & ~mask(2)) == 0xCFC) {
633 
636 
637  if (bits(configAddress, 31, 31)) {
639  mbits(configAddress, 30, 2) |
640  (IOPort & mask(2)));
641  } else {
642  req->setPaddr(PhysAddrPrefixIO | IOPort);
643  }
644  } else {
646  req->setPaddr(PhysAddrPrefixIO | IOPort);
647  }
648  return NoFault;
649  } else {
650  panic("Access to unrecognized internal address space %#x.\n",
651  prefix);
652  }
653  }
654 
662  bool
663  GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
664  {
665  bool tlb_hit = false;
666  #ifndef NDEBUG
667  uint32_t flags = req->getFlags();
668  int seg = flags & SegmentFlagMask;
669  #endif
670 
671  assert(seg != SEGMENT_REG_MS);
672  Addr vaddr = req->getVaddr();
673  DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
674  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
675 
676  if (m5Reg.prot) {
677  DPRINTF(GPUTLB, "In protected mode.\n");
678  // make sure we are in 64-bit mode
679  assert(m5Reg.mode == LongMode);
680 
681  // If paging is enabled, do the translation.
682  if (m5Reg.paging) {
683  DPRINTF(GPUTLB, "Paging enabled.\n");
684  //update LRU stack on a hit
685  GpuTlbEntry *entry = lookup(vaddr, true);
686 
687  if (entry)
688  tlb_hit = true;
689 
690  if (!update_stats) {
691  // functional tlb access for memory initialization
692  // i.e., memory seeding or instr. seeding -> don't update
693  // TLB and stats
694  return tlb_hit;
695  }
696 
698 
699  if (!entry) {
701  } else {
702  localNumTLBHits++;
703  }
704  }
705  }
706 
707  return tlb_hit;
708  }
709 
710  Fault
712  Translation *translation, Mode mode,
713  bool &delayedResponse, bool timing, int &latency)
714  {
715  uint32_t flags = req->getFlags();
716  int seg = flags & SegmentFlagMask;
717  bool storeCheck = flags & (StoreCheck << FlagShift);
718 
719  // If this is true, we're dealing with a request
720  // to a non-memory address space.
721  if (seg == SEGMENT_REG_MS) {
722  return translateInt(req, tc);
723  }
724 
725  delayedResponse = false;
726  Addr vaddr = req->getVaddr();
727  DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
728 
729  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
730 
731  // If protected mode has been enabled...
732  if (m5Reg.prot) {
733  DPRINTF(GPUTLB, "In protected mode.\n");
734  // If we're not in 64-bit mode, do protection/limit checks
735  if (m5Reg.mode != LongMode) {
736  DPRINTF(GPUTLB, "Not in long mode. Checking segment "
737  "protection.\n");
738 
739  // Check for a null segment selector.
740  if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
741  seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
742  && !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
743  return std::make_shared<GeneralProtection>(0);
744  }
745 
746  bool expandDown = false;
747  SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
748 
749  if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
750  if (!attr.writable && (mode == BaseTLB::Write ||
751  storeCheck))
752  return std::make_shared<GeneralProtection>(0);
753 
754  if (!attr.readable && mode == BaseTLB::Read)
755  return std::make_shared<GeneralProtection>(0);
756 
757  expandDown = attr.expandDown;
758 
759  }
760 
762  Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
763  // This assumes we're not in 64 bit mode. If we were, the
764  // default address size is 64 bits, overridable to 32.
765  int size = 32;
766  bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
767  SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
768 
769  if ((csAttr.defaultSize && sizeOverride) ||
770  (!csAttr.defaultSize && !sizeOverride)) {
771  size = 16;
772  }
773 
774  Addr offset = bits(vaddr - base, size - 1, 0);
775  Addr endOffset = offset + req->getSize() - 1;
776 
777  if (expandDown) {
778  DPRINTF(GPUTLB, "Checking an expand down segment.\n");
779  warn_once("Expand down segments are untested.\n");
780 
781  if (offset <= limit || endOffset <= limit)
782  return std::make_shared<GeneralProtection>(0);
783  } else {
784  if (offset > limit || endOffset > limit)
785  return std::make_shared<GeneralProtection>(0);
786  }
787  }
788 
789  // If paging is enabled, do the translation.
790  if (m5Reg.paging) {
791  DPRINTF(GPUTLB, "Paging enabled.\n");
792  // The vaddr already has the segment base applied.
793  GpuTlbEntry *entry = lookup(vaddr);
795 
796  if (!entry) {
798  if (timing) {
799  latency = missLatency1;
800  }
801 
802  if (FullSystem) {
803  fatal("GpuTLB doesn't support full-system mode\n");
804  } else {
805  DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
806  "at pc %#x.\n", vaddr, tc->instAddr());
807 
808  Process *p = tc->getProcessPtr();
809  GpuTlbEntry newEntry;
810  bool success = p->pTable->lookup(vaddr, newEntry);
811 
812  if (!success && mode != BaseTLB::Execute) {
813  // penalize a "page fault" more
814  if (timing) {
815  latency += missLatency2;
816  }
817 
818  if (p->fixupStackFault(vaddr))
819  success = p->pTable->lookup(vaddr, newEntry);
820  }
821 
822  if (!success) {
823  return std::make_shared<PageFault>(vaddr, true,
824  mode, true,
825  false);
826  } else {
827  newEntry.valid = success;
828  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
829 
830  DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
831  alignedVaddr, newEntry.pageStart());
832 
833  entry = insert(alignedVaddr, newEntry);
834  }
835 
836  DPRINTF(GPUTLB, "Miss was serviced.\n");
837  }
838  } else {
839  localNumTLBHits++;
840 
841  if (timing) {
842  latency = hitLatency;
843  }
844  }
845 
846  // Do paging protection checks.
847  bool inUser = (m5Reg.cpl == 3 &&
848  !(flags & (CPL0FlagBit << FlagShift)));
849 
850  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
851  bool badWrite = (!entry->writable && (inUser || cr0.wp));
852 
853  if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
854  badWrite)) {
855  // The page must have been present to get into the TLB in
856  // the first place. We'll assume the reserved bits are
857  // fine even though we're not checking them.
858  return std::make_shared<PageFault>(vaddr, true, mode,
859  inUser, false);
860  }
861 
862  if (storeCheck && badWrite) {
863  // This would fault if this were a write, so return a page
864  // fault that reflects that happening.
865  return std::make_shared<PageFault>(vaddr, true,
867  inUser, false);
868  }
869 
870 
871  DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
872  "checks.\n", entry->paddr);
873 
874  int page_size = entry->size();
875  Addr paddr = entry->paddr | (vaddr & (page_size - 1));
876  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
877  req->setPaddr(paddr);
878 
879  if (entry->uncacheable)
881  } else {
882  //Use the address which already has segmentation applied.
883  DPRINTF(GPUTLB, "Paging disabled.\n");
884  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
885  req->setPaddr(vaddr);
886  }
887  } else {
888  // Real mode
889  DPRINTF(GPUTLB, "In real mode.\n");
890  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
891  req->setPaddr(vaddr);
892  }
893 
894  // Check for an access to the local APIC
895  if (FullSystem) {
896  LocalApicBase localApicBase =
898 
899  Addr baseAddr = localApicBase.base * PageBytes;
900  Addr paddr = req->getPaddr();
901 
902  if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
903  // Force the access to be uncacheable.
906  paddr - baseAddr));
907  }
908  }
909 
910  return NoFault;
911  };
912 
913  Fault
915  int &latency)
916  {
917  bool delayedResponse;
918 
919  return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
920  latency);
921  }
922 
923  void
925  Translation *translation, Mode mode, int &latency)
926  {
927  bool delayedResponse;
928  assert(translation);
929 
930  Fault fault = GpuTLB::translate(req, tc, translation, mode,
931  delayedResponse, true, latency);
932 
933  if (!delayedResponse)
934  translation->finish(fault, req, tc, mode);
935  }
936 
937  Walker*
939  {
940  return walker;
941  }
942 
943 
944  void
946  {
947  }
948 
949  void
951  {
952  }
953 
954  void
956  {
958 
960  .name(name() + ".local_TLB_accesses")
961  .desc("Number of TLB accesses")
962  ;
963 
965  .name(name() + ".local_TLB_hits")
966  .desc("Number of TLB hits")
967  ;
968 
970  .name(name() + ".local_TLB_misses")
971  .desc("Number of TLB misses")
972  ;
973 
975  .name(name() + ".local_TLB_miss_rate")
976  .desc("TLB miss rate")
977  ;
978 
980  .name(name() + ".access_cycles")
981  .desc("Cycles spent accessing this TLB level")
982  ;
983 
985  .name(name() + ".page_table_cycles")
986  .desc("Cycles spent accessing the page table")
987  ;
988 
990 
992  .name(name() + ".unique_pages")
993  .desc("Number of unique pages touched")
994  ;
995 
997  .name(name() + ".local_cycles")
998  .desc("Number of cycles spent in queue for all incoming reqs")
999  ;
1000 
1001  localLatency
1002  .name(name() + ".local_latency")
1003  .desc("Avg. latency over incoming coalesced reqs")
1004  ;
1005 
1007 
1009  .name(name() + ".global_TLB_accesses")
1010  .desc("Number of TLB accesses")
1011  ;
1012 
1014  .name(name() + ".global_TLB_hits")
1015  .desc("Number of TLB hits")
1016  ;
1017 
1019  .name(name() + ".global_TLB_misses")
1020  .desc("Number of TLB misses")
1021  ;
1022 
1024  .name(name() + ".global_TLB_miss_rate")
1025  .desc("TLB miss rate")
1026  ;
1027 
1029 
1031  .name(name() + ".avg_reuse_distance")
1032  .desc("avg. reuse distance over all pages (in ticks)")
1033  ;
1034 
1035  }
1036 
1042  void
1044  {
1045  assert(pkt);
1046  assert(pkt->senderState);
1047 
1048  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1050 
1051  TranslationState *sender_state =
1053 
1054  bool update_stats = !sender_state->prefetch;
1055  ThreadContext * tmp_tc = sender_state->tc;
1056 
1057  DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
1058  virt_page_addr);
1059 
1060  int req_cnt = sender_state->reqCnt.back();
1061 
1062  if (update_stats) {
1063  accessCycles -= (curTick() * req_cnt);
1064  localCycles -= curTick();
1065  updatePageFootprint(virt_page_addr);
1066  globalNumTLBAccesses += req_cnt;
1067  }
1068 
1069  tlbOutcome lookup_outcome = TLB_MISS;
1070  RequestPtr tmp_req = pkt->req;
1071 
1072  // Access the TLB and figure out if it's a hit or a miss.
1073  bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
1074 
1075  if (success) {
1076  lookup_outcome = TLB_HIT;
1077  // Put the entry in SenderState
1078  GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
1079  assert(entry);
1080 
1081  sender_state->tlbEntry =
1082  new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1083 
1084  if (update_stats) {
1085  // the reqCnt has an entry per level, so its size tells us
1086  // which level we are in
1087  sender_state->hitLevel = sender_state->reqCnt.size();
1088  globalNumTLBHits += req_cnt;
1089  }
1090  } else {
1091  if (update_stats)
1092  globalNumTLBMisses += req_cnt;
1093  }
1094 
1095  /*
1096  * We now know the TLB lookup outcome (if it's a hit or a miss), as well
1097  * as the TLB access latency.
1098  *
1099  * We create and schedule a new TLBEvent which will help us take the
1100  * appropriate actions (e.g., update TLB on a hit, send request to lower
1101  * level TLB on a miss, or start a page walk if this was the last-level
1102  * TLB)
1103  */
1104  TLBEvent *tlb_event =
1105  new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
1106 
1107  if (translationReturnEvent.count(virt_page_addr)) {
1108  panic("Virtual Page Address %#x already has a return event\n",
1109  virt_page_addr);
1110  }
1111 
1112  translationReturnEvent[virt_page_addr] = tlb_event;
1113  assert(tlb_event);
1114 
1115  DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
1116  curTick() + this->ticks(hitLatency));
1117 
1118  schedule(tlb_event, curTick() + this->ticks(hitLatency));
1119  }
1120 
1122  PacketPtr _pkt)
1123  : Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
1124  outcome(tlb_outcome), pkt(_pkt)
1125  {
1126  }
1127 
1132  void
1134  GpuTlbEntry * tlb_entry, Mode mode)
1135  {
1136  HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
1137  uint32_t flags = pkt->req->getFlags();
1138  bool storeCheck = flags & (StoreCheck << FlagShift);
1139 
1140  // Do paging protection checks.
1141  bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
1142  CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
1143 
1144  bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
1145 
1146  if ((inUser && !tlb_entry->user) ||
1147  (mode == BaseTLB::Write && badWrite)) {
1148  // The page must have been present to get into the TLB in
1149  // the first place. We'll assume the reserved bits are
1150  // fine even though we're not checking them.
1151  assert(false);
1152  }
1153 
1154  if (storeCheck && badWrite) {
1155  // This would fault if this were a write, so return a page
1156  // fault that reflects that happening.
1157  assert(false);
1158  }
1159  }
1160 
1166  void
1167  GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
1168  PacketPtr pkt)
1169  {
1170 
1171  assert(pkt);
1172  Addr vaddr = pkt->req->getVaddr();
1173 
1174  TranslationState *sender_state =
1176 
1177  ThreadContext *tc = sender_state->tc;
1178  Mode mode = sender_state->tlbMode;
1179 
1180  GpuTlbEntry *local_entry, *new_entry;
1181 
1182  if (tlb_outcome == TLB_HIT) {
1183  DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
1184  local_entry = sender_state->tlbEntry;
1185  } else {
1186  DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
1187  vaddr);
1188 
1189  // We are returning either from a page walk or from a hit at a lower
1190  // TLB level. The senderState should be "carrying" a pointer to the
1191  // correct TLBEntry.
1192  new_entry = sender_state->tlbEntry;
1193  assert(new_entry);
1194  local_entry = new_entry;
1195 
1196  if (allocationPolicy) {
1197  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1198  virt_page_addr);
1199 
1200  local_entry = insert(virt_page_addr, *new_entry);
1201  }
1202 
1203  assert(local_entry);
1204  }
1205 
1211  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1212  "while paddr was %#x.\n", local_entry->vaddr,
1213  local_entry->paddr);
1214 
1215  pagingProtectionChecks(tc, pkt, local_entry, mode);
1216  int page_size = local_entry->size();
1217  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1218  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1219 
1220  // Since this packet will be sent through the cpu side slave port,
1221  // it must be converted to a response pkt if it is not one already
1222  if (pkt->isRequest()) {
1223  pkt->makeTimingResponse();
1224  }
1225 
1226  pkt->req->setPaddr(paddr);
1227 
1228  if (local_entry->uncacheable) {
1230  }
1231 
1232  //send packet back to coalescer
1233  cpuSidePort[0]->sendTimingResp(pkt);
1234  //schedule cleanup event
1235  cleanupQueue.push(virt_page_addr);
1236 
1237  // schedule this only once per cycle.
1238  // The check is required because we might have multiple translations
1239  // returning the same cycle
1240  // this is a maximum priority event and must be on the same cycle
1241  // as the cleanup event in TLBCoalescer to avoid a race with
1242  // IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
1243  if (!cleanupEvent.scheduled())
1245  }
1246 
1251  void
1253  PacketPtr pkt)
1254  {
1255  DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
1256 
1257  assert(translationReturnEvent[virtPageAddr]);
1258  assert(pkt);
1259 
1260  TranslationState *tmp_sender_state =
1262 
1263  int req_cnt = tmp_sender_state->reqCnt.back();
1264  bool update_stats = !tmp_sender_state->prefetch;
1265 
1266 
1267  if (outcome == TLB_HIT) {
1268  handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
1269 
1270  if (update_stats) {
1271  accessCycles += (req_cnt * curTick());
1272  localCycles += curTick();
1273  }
1274 
1275  } else if (outcome == TLB_MISS) {
1276 
1277  DPRINTF(GPUTLB, "This is a TLB miss\n");
1278  if (update_stats) {
1279  accessCycles += (req_cnt*curTick());
1280  localCycles += curTick();
1281  }
1282 
1283  if (hasMemSidePort) {
1284  // the one cyle added here represent the delay from when we get
1285  // the reply back till when we propagate it to the coalescer
1286  // above.
1287  if (update_stats) {
1288  accessCycles += (req_cnt * 1);
1289  localCycles += 1;
1290  }
1291 
1297  if (!memSidePort[0]->sendTimingReq(pkt)) {
1298  DPRINTF(GPUTLB, "Failed sending translation request to "
1299  "lower level TLB for addr %#x\n", virtPageAddr);
1300 
1301  memSidePort[0]->retries.push_back(pkt);
1302  } else {
1303  DPRINTF(GPUTLB, "Sent translation request to lower level "
1304  "TLB for addr %#x\n", virtPageAddr);
1305  }
1306  } else {
1307  //this is the last level TLB. Start a page walk
1308  DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
1309  "addr %#x\n", virtPageAddr);
1310 
1311  if (update_stats)
1312  pageTableCycles -= (req_cnt*curTick());
1313 
1314  TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
1315  assert(tlb_event);
1316  tlb_event->updateOutcome(PAGE_WALK);
1317  schedule(tlb_event, curTick() + ticks(missLatency2));
1318  }
1319  } else if (outcome == PAGE_WALK) {
1320  if (update_stats)
1321  pageTableCycles += (req_cnt*curTick());
1322 
1323  // Need to access the page table and update the TLB
1324  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1325  virtPageAddr);
1326 
1327  TranslationState *sender_state =
1329 
1330  Process *p = sender_state->tc->getProcessPtr();
1331  TlbEntry newEntry;
1332  Addr vaddr = pkt->req->getVaddr();
1333  #ifndef NDEBUG
1334  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1335  assert(alignedVaddr == virtPageAddr);
1336  #endif
1337  bool success;
1338  success = p->pTable->lookup(vaddr, newEntry);
1339  if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1340  if (p->fixupStackFault(vaddr)) {
1341  success = p->pTable->lookup(vaddr, newEntry);
1342  }
1343  }
1344 
1345  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1346  newEntry.pageStart());
1347 
1348  sender_state->tlbEntry =
1349  new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
1350 
1351  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1352  } else if (outcome == MISS_RETURN) {
1356  handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
1357  } else {
1358  assert(false);
1359  }
1360  }
1361 
1362  void
1364  {
1365  tlb->translationReturn(virtPageAddr, outcome, pkt);
1366  }
1367 
1368  const char*
1370  {
1371  return "trigger translationDoneEvent";
1372  }
1373 
1374  void
1376  {
1377  outcome = _outcome;
1378  }
1379 
1380  Addr
1382  {
1383  return virtPageAddr;
1384  }
1385 
1386  /*
1387  * recvTiming receives a coalesced timing request from a TLBCoalescer
1388  * and it calls issueTLBLookup()
1389  * It only rejects the packet if we have exceeded the max
1390  * outstanding number of requests for the TLB
1391  */
1392  bool
1394  {
1395  if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
1396  tlb->issueTLBLookup(pkt);
1397  // update number of outstanding translation requests
1398  tlb->outstandingReqs++;
1399  return true;
1400  } else {
1401  DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
1402  tlb->outstandingReqs);
1403  return false;
1404  }
1405  }
1406 
1415  void
1417  {
1418  TranslationState *sender_state =
1420 
1421  ThreadContext *tc = sender_state->tc;
1422  Mode mode = sender_state->tlbMode;
1423  Addr vaddr = pkt->req->getVaddr();
1424 
1425  GpuTlbEntry *local_entry, *new_entry;
1426 
1427  if (tlb_outcome == TLB_HIT) {
1428  DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
1429  "%#x\n", vaddr);
1430 
1431  local_entry = sender_state->tlbEntry;
1432  } else {
1433  DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
1434  "%#x\n", vaddr);
1435 
1436  // We are returning either from a page walk or from a hit at a lower
1437  // TLB level. The senderState should be "carrying" a pointer to the
1438  // correct TLBEntry.
1439  new_entry = sender_state->tlbEntry;
1440  assert(new_entry);
1441  local_entry = new_entry;
1442 
1443  if (allocationPolicy) {
1444  Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
1445 
1446  DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
1447  virt_page_addr);
1448 
1449  local_entry = insert(virt_page_addr, *new_entry);
1450  }
1451 
1452  assert(local_entry);
1453  }
1454 
1455  DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
1456  "while paddr was %#x.\n", local_entry->vaddr,
1457  local_entry->paddr);
1458 
1459  // Do paging checks if it's a normal functional access. If it's for a
1460  // prefetch, then sometimes you can try to prefetch something that won't
1461  // pass protection. We don't actually want to fault becuase there is no
1462  // demand access to deem this a violation. Just put it in the TLB and
1463  // it will fault if indeed a future demand access touches it in
1464  // violation.
1465  if (!sender_state->prefetch && sender_state->tlbEntry->valid)
1466  pagingProtectionChecks(tc, pkt, local_entry, mode);
1467 
1468  int page_size = local_entry->size();
1469  Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
1470  DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
1471 
1472  pkt->req->setPaddr(paddr);
1473 
1474  if (local_entry->uncacheable)
1476  }
1477 
1478  // This is used for atomic translations. Need to
1479  // make it all happen during the same cycle.
1480  void
1482  {
1483  TranslationState *sender_state =
1485 
1486  ThreadContext *tc = sender_state->tc;
1487  bool update_stats = !sender_state->prefetch;
1488 
1489  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1491 
1492  if (update_stats)
1493  tlb->updatePageFootprint(virt_page_addr);
1494 
1495  // do the TLB lookup without updating the stats
1496  bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
1497  tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
1498 
1499  // functional mode means no coalescing
1500  // global metrics are the same as the local metrics
1501  if (update_stats) {
1502  tlb->globalNumTLBAccesses++;
1503 
1504  if (success) {
1505  sender_state->hitLevel = sender_state->reqCnt.size();
1506  tlb->globalNumTLBHits++;
1507  }
1508  }
1509 
1510  if (!success) {
1511  if (update_stats)
1512  tlb->globalNumTLBMisses++;
1513  if (tlb->hasMemSidePort) {
1514  // there is a TLB below -> propagate down the TLB hierarchy
1515  tlb->memSidePort[0]->sendFunctional(pkt);
1516  // If no valid translation from a prefetch, then just return
1517  if (sender_state->prefetch && !pkt->req->hasPaddr())
1518  return;
1519  } else {
1520  // Need to access the page table and update the TLB
1521  DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
1522  virt_page_addr);
1523 
1524  Process *p = tc->getProcessPtr();
1525  TlbEntry newEntry;
1526 
1527  Addr vaddr = pkt->req->getVaddr();
1528  #ifndef NDEBUG
1529  Addr alignedVaddr = p->pTable->pageAlign(vaddr);
1530  assert(alignedVaddr == virt_page_addr);
1531  #endif
1532 
1533  bool success = p->pTable->lookup(vaddr, newEntry);
1534  if (!success && sender_state->tlbMode != BaseTLB::Execute) {
1535  if (p->fixupStackFault(vaddr))
1536  success = p->pTable->lookup(vaddr, newEntry);
1537  }
1538 
1539  if (!sender_state->prefetch) {
1540  // no PageFaults are permitted after
1541  // the second page table lookup
1542  assert(success);
1543 
1544  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1545  newEntry.pageStart());
1546 
1547  sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
1548  newEntry.paddr,
1549  success);
1550  } else {
1551  // If this was a prefetch, then do the normal thing if it
1552  // was a successful translation. Otherwise, send an empty
1553  // TLB entry back so that it can be figured out as empty and
1554  // handled accordingly.
1555  if (success) {
1556  DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
1557  newEntry.pageStart());
1558 
1559  sender_state->tlbEntry = new GpuTlbEntry(0,
1560  newEntry.vaddr,
1561  newEntry.paddr,
1562  success);
1563  } else {
1564  DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
1565  alignedVaddr);
1566 
1567  sender_state->tlbEntry = new GpuTlbEntry();
1568 
1569  return;
1570  }
1571  }
1572  }
1573  } else {
1574  DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
1575  tlb->lookup(pkt->req->getVaddr()));
1576 
1577  GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
1578  update_stats);
1579 
1580  assert(entry);
1581 
1582  sender_state->tlbEntry =
1583  new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
1584  }
1585  // This is the function that would populate pkt->req with the paddr of
1586  // the translation. But if no translation happens (i.e Prefetch fails)
1587  // then the early returns in the above code wiill keep this function
1588  // from executing.
1589  tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
1590  }
1591 
1592  void
1594  {
1595  // The CPUSidePort never sends anything but replies. No retries
1596  // expected.
1597  assert(false);
1598  }
1599 
1602  {
1603  // currently not checked by the master
1604  AddrRangeList ranges;
1605 
1606  return ranges;
1607  }
1608 
1614  bool
1616  {
1617  Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
1619 
1620  DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
1621  virt_page_addr);
1622 
1623  TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
1624  assert(tlb_event);
1625  assert(virt_page_addr == tlb_event->getTLBEventVaddr());
1626 
1627  tlb_event->updateOutcome(MISS_RETURN);
1628  tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
1629 
1630  return true;
1631  }
1632 
1633  void
1635  {
1636  // No retries should reach the TLB. The retries
1637  // should only reach the TLBCoalescer.
1638  assert(false);
1639  }
1640 
1641  void
1643  {
1644  while (!cleanupQueue.empty()) {
1645  Addr cleanup_addr = cleanupQueue.front();
1646  cleanupQueue.pop();
1647 
1648  // delete TLBEvent
1649  TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
1650  delete old_tlb_event;
1651  translationReturnEvent.erase(cleanup_addr);
1652 
1653  // update number of outstanding requests
1654  outstandingReqs--;
1655  }
1656 
1660  for (int i = 0; i < cpuSidePort.size(); ++i) {
1661  cpuSidePort[i]->sendRetryReq();
1662  }
1663  }
1664 
1665  void
1667  {
1668 
1670 
1671  AccessInfo tmp_access_info;
1672  tmp_access_info.lastTimeAccessed = 0;
1673  tmp_access_info.accessesPerPage = 0;
1674  tmp_access_info.totalReuseDistance = 0;
1675  tmp_access_info.sumDistance = 0;
1676  tmp_access_info.meanDistance = 0;
1677 
1678  ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
1679  tmp_access_info));
1680 
1681  bool first_page_access = ret.second;
1682 
1683  if (first_page_access) {
1684  numUniquePages++;
1685  } else {
1686  int accessed_before;
1687  accessed_before = curTick() - ret.first->second.lastTimeAccessed;
1688  ret.first->second.totalReuseDistance += accessed_before;
1689  }
1690 
1691  ret.first->second.accessesPerPage++;
1692  ret.first->second.lastTimeAccessed = curTick();
1693 
1694  if (accessDistance) {
1695  ret.first->second.localTLBAccesses
1696  .push_back(localNumTLBAccesses.value());
1697  }
1698  }
1699 
1700  void
1702  {
1703  std::ostream *page_stat_file = nullptr;
1704 
1705  if (accessDistance) {
1706 
1707  // print per page statistics to a separate file (.csv format)
1708  // simout is the gem5 output directory (default is m5out or the one
1709  // specified with -d
1710  page_stat_file = simout.create(name().c_str())->stream();
1711 
1712  // print header
1713  *page_stat_file << "page,max_access_distance,mean_access_distance, "
1714  << "stddev_distance" << std::endl;
1715  }
1716 
1717  // update avg. reuse distance footprint
1718  AccessPatternTable::iterator iter, iter_begin, iter_end;
1719  unsigned int sum_avg_reuse_distance_per_page = 0;
1720 
1721  // iterate through all pages seen by this TLB
1722  for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
1723  sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
1724  iter->second.accessesPerPage;
1725 
1726  if (accessDistance) {
1727  unsigned int tmp = iter->second.localTLBAccesses[0];
1728  unsigned int prev = tmp;
1729 
1730  for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1731  if (i) {
1732  tmp = prev + 1;
1733  }
1734 
1735  prev = iter->second.localTLBAccesses[i];
1736  // update the localTLBAccesses value
1737  // with the actual differece
1738  iter->second.localTLBAccesses[i] -= tmp;
1739  // compute the sum of AccessDistance per page
1740  // used later for mean
1741  iter->second.sumDistance +=
1742  iter->second.localTLBAccesses[i];
1743  }
1744 
1745  iter->second.meanDistance =
1746  iter->second.sumDistance / iter->second.accessesPerPage;
1747 
1748  // compute std_dev and max (we need a second round because we
1749  // need to know the mean value
1750  unsigned int max_distance = 0;
1751  unsigned int stddev_distance = 0;
1752 
1753  for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
1754  unsigned int tmp_access_distance =
1755  iter->second.localTLBAccesses[i];
1756 
1757  if (tmp_access_distance > max_distance) {
1758  max_distance = tmp_access_distance;
1759  }
1760 
1761  unsigned int diff =
1762  tmp_access_distance - iter->second.meanDistance;
1763  stddev_distance += pow(diff, 2);
1764 
1765  }
1766 
1767  stddev_distance =
1768  sqrt(stddev_distance/iter->second.accessesPerPage);
1769 
1770  if (page_stat_file) {
1771  *page_stat_file << std::hex << iter->first << ",";
1772  *page_stat_file << std::dec << max_distance << ",";
1773  *page_stat_file << std::dec << iter->second.meanDistance
1774  << ",";
1775  *page_stat_file << std::dec << stddev_distance;
1776  *page_stat_file << std::endl;
1777  }
1778 
1779  // erase the localTLBAccesses array
1780  iter->second.localTLBAccesses.clear();
1781  }
1782  }
1783 
1784  if (!TLBFootprint.empty()) {
1786  sum_avg_reuse_distance_per_page / TLBFootprint.size();
1787  }
1788 
1789  //clear the TLBFootprint map
1790  TLBFootprint.clear();
1791  }
1792 } // namespace X86ISA
1793 
1795 X86GPUTLBParams::create()
1796 {
1797  return new X86ISA::GpuTLB(this);
1798 }
1799 
Counter value() const
Return the current value of this stat as its base type.
Definition: statistics.hh:677
AccessPatternTable TLBFootprint
Definition: gpu_tlb.hh:456
#define DPRINTF(x,...)
Definition: trace.hh:212
unsigned int accessesPerPage
Definition: gpu_tlb.hh:438
const Addr PhysAddrPrefixPciConfig
Definition: x86_traits.hh:73
offset
Definition: misc.hh:977
Stats::Formula globalTLBMissRate
Definition: gpu_tlb.hh:216
OutputDirectory simout
Definition: output.cc:65
std::ostream * stream() const
Get the output underlying output stream.
Definition: output.hh:64
bool allocationPolicy
Allocation Policy: true if we always allocate on a hit, false otherwise.
Definition: gpu_tlb.hh:160
Stats::Scalar localCycles
Definition: gpu_tlb.hh:224
const int FlagShift
Definition: ldstflags.hh:52
virtual Addr instAddr()=0
decltype(nullptr) constexpr NoFault
Definition: types.hh:189
const char * description() const
Return a C string describing the event.
Definition: gpu_tlb.cc:1369
virtual void serialize(CheckpointOut &cp) const
Serialize an object.
Definition: gpu_tlb.cc:945
Bitfield< 7 > i
Definition: miscregs.hh:1378
STL pair class.
Definition: stl.hh:61
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition: output.cc:206
#define panic(...)
Definition: misc.hh:153
TLB TranslationState: this currently is a somewhat bastardization of the usage of SenderState...
Definition: gpu_tlb.hh:343
Stats::Scalar avgReuseDistance
Definition: gpu_tlb.hh:229
void translationReturn(Addr virtPageAddr, tlbOutcome outcome, PacketPtr pkt)
A TLBEvent is scheduled after the TLB lookup and helps us take the appropriate actions: (e...
Definition: gpu_tlb.cc:1252
void makeTimingResponse()
Definition: packet.hh:863
void handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome outcome)
handleFuncTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault retu...
Definition: gpu_tlb.cc:1416
Stats::Scalar accessCycles
Definition: gpu_tlb.hh:219
Stats::Formula localTLBMissRate
Definition: gpu_tlb.hh:208
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
void invalidateAll()
Definition: gpu_tlb.cc:237
const Addr PageShift
Definition: isa_traits.hh:51
unsigned int meanDistance
Definition: gpu_tlb.hh:452
void exitCallback()
Definition: gpu_tlb.cc:1701
Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, int &latency)
Definition: gpu_tlb.cc:914
Walker * getWalker()
Definition: gpu_tlb.cc:938
Addr pageAlign(Addr a)
Definition: page_table.hh:109
virtual MiscReg readMiscRegNoEffect(int misc_reg) const =0
EntryList::iterator lookupIt(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:194
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:146
virtual Process * getProcessPtr()=0
MemSidePort is the TLB Port closer to the memory side If this is a last level TLB then this port will...
Definition: gpu_tlb.hh:296
#define warn_once(...)
Definition: misc.hh:226
bool hasMemSidePort
if true, then this is not the last level TLB
Definition: gpu_tlb.hh:165
unsigned int totalReuseDistance
Definition: gpu_tlb.hh:440
const Addr IntAddrPrefixCPUID
Definition: x86_traits.hh:68
unsigned int lastTimeAccessed
Definition: gpu_tlb.hh:437
void translateTiming(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode, int &latency)
Definition: gpu_tlb.cc:924
const Addr PageBytes
Definition: isa_traits.hh:64
Bitfield< 14 > expandDown
Definition: misc.hh:949
unsigned int sumDistance
Definition: gpu_tlb.hh:451
Bitfield< 4, 0 > mode
Definition: miscregs.hh:1385
Stats::Scalar localNumTLBMisses
Definition: gpu_tlb.hh:207
int maxCoalescedReqs
Definition: gpu_tlb.hh:379
bool isRequest() const
Definition: packet.hh:505
ThreadContext is the external interface to all thread state for anything outside of the CPU...
std::vector< GpuTlbEntry > tlb
Definition: gpu_tlb.hh:173
A BaseSlavePort is a protocol-agnostic slave port, responsible only for the structural connection to ...
Definition: port.hh:139
GpuTlbEntry * lookup(Addr va, bool update_lru=true)
Definition: gpu_tlb.cc:224
enum BaseTLB::Mode Mode
Definition: gpu_tlb.hh:109
void setTLB(TLB *_tlb)
const Addr IntAddrPrefixMask
Definition: x86_traits.hh:67
Walker * walker
Definition: gpu_tlb.hh:137
TLBEvent(GpuTLB *_tlb, Addr _addr, tlbOutcome outcome, PacketPtr _pkt)
Definition: gpu_tlb.cc:1121
virtual void recvReqRetry()
Called by the slave port if sendTimingReq was called on this master port (causing recvTimingReq to be...
Definition: gpu_tlb.cc:1634
Stats::Scalar numUniquePages
Definition: gpu_tlb.hh:222
Stats::Scalar globalNumTLBMisses
Definition: gpu_tlb.hh:215
std::queue< Addr > cleanupQueue
Definition: gpu_tlb.hh:422
std::vector< MemSidePort * > memSidePort
Definition: gpu_tlb.hh:319
bool accessDistance
Print out accessDistance stats.
Definition: gpu_tlb.hh:171
Tick curTick()
The current simulated tick.
Definition: core.hh:47
Fault translateInt(RequestPtr req, ThreadContext *tc)
Definition: gpu_tlb.cc:288
void regStats()
Register statistics for this object.
Definition: gpu_tlb.cc:955
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
static MiscRegIndex MISCREG_SEG_ATTR(int index)
Definition: misc.hh:534
Stats::Formula localLatency
Definition: gpu_tlb.hh:226
void pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt, GpuTlbEntry *tlb_entry, Mode mode)
Do Paging protection checks.
Definition: gpu_tlb.cc:1133
static MiscRegIndex MISCREG_SEG_LIMIT(int index)
Definition: misc.hh:527
int outstandingReqs
Definition: gpu_tlb.hh:383
std::vector< EntryList > entryList
An entryList per set is the equivalent of an LRU stack; it's used to guide replacement decisions...
Definition: gpu_tlb.hh:189
void invalidateNonGlobal()
Definition: gpu_tlb.cc:257
Fault translate(RequestPtr req, ThreadContext *tc, Translation *translation, Mode mode, bool &delayedResponse, bool timing, int &latency)
Definition: gpu_tlb.cc:711
virtual AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
Definition: gpu_tlb.cc:1601
void setConfigAddress(uint32_t addr)
Definition: gpu_tlb.cc:251
virtual bool recvTimingResp(PacketPtr pkt)
MemSidePort receives the packet back.
Definition: gpu_tlb.cc:1615
mask
Definition: misc.hh:797
The request is to an uncacheable address.
Definition: request.hh:114
Bitfield< 51, 12 > base
Definition: pagetable.hh:85
BaseMasterPort & getMasterPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a master port with a given name and index.
Definition: gpu_tlb.cc:152
This hash map will use the virtual page address as a key and will keep track of total number of acces...
Definition: gpu_tlb.hh:435
X86GPUTLBParams Params
Definition: gpu_tlb.hh:105
Addr getPaddr() const
Definition: request.hh:519
#define fatal(...)
Definition: misc.hh:163
const RequestPtr req
A pointer to the original request.
Definition: packet.hh:304
bool tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
TLB_lookup will only perform a TLB lookup returning true on a TLB hit and false on a TLB miss...
Definition: gpu_tlb.cc:663
void demapPage(Addr va, uint64_t asn)
Definition: gpu_tlb.cc:275
virtual void recvFunctional(PacketPtr pkt)
Receive a functional request packet from the master port.
Definition: gpu_tlb.cc:1481
TlbEntry(Addr asn, Addr _vaddr, Addr _paddr, bool uncacheable, bool read_only)
std::unordered_map< Addr, TLBEvent * > translationReturnEvent
Definition: gpu_tlb.hh:418
const Addr IntAddrPrefixMSR
Definition: x86_traits.hh:69
T roundDown(const T &val, const U &align)
Definition: intmath.hh:213
void handleTranslationReturn(Addr addr, tlbOutcome outcome, PacketPtr pkt)
handleTranslationReturn is called on a TLB hit, when a TLB miss returns or when a page fault returns...
Definition: gpu_tlb.cc:1167
std::vector< CpuSidePort * > cpuSidePort
Definition: gpu_tlb.hh:317
Stats::Scalar globalNumTLBAccesses
Definition: gpu_tlb.hh:213
void updateOutcome(tlbOutcome _outcome)
Definition: gpu_tlb.cc:1375
static MiscRegIndex MISCREG_SEG_SEL(int index)
Definition: misc.hh:506
Bitfield< 2, 0 > seg
Definition: types.hh:84
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
std::vector< EntryList > freeList
Definition: gpu_tlb.hh:180
const Request::FlagsType M5_VAR_USED SegmentFlagMask
Definition: ldstflags.hh:51
T safe_cast(U ptr)
Definition: cast.hh:61
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:245
Bitfield< 8 > va
Definition: miscregs.hh:1473
const Addr PageBytes
Definition: isa_traits.hh:52
PageTableBase * pTable
Definition: process.hh:178
virtual void finish(Fault fault, RequestPtr req, ThreadContext *tc, Mode mode)=0
The memory for this object may be dynamically allocated, and it may be responsible for cleaning itsle...
Tick ticks(int numCycles) const
Definition: gpu_tlb.hh:97
const Addr IntAddrPrefixIO
Definition: x86_traits.hh:70
Flags getFlags()
Accessor for flags.
Definition: request.hh:584
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:254
EventWrapper< GpuTLB,&GpuTLB::cleanup > cleanupEvent
Definition: gpu_tlb.hh:428
bool fixupStackFault(Addr vaddr)
Attempt to fix up a fault at vaddr by allocating a page on the stack.
Definition: process.cc:338
uint32_t configAddress
Definition: gpu_tlb.hh:83
virtual bool recvTimingReq(PacketPtr pkt)
Receive a timing request from the master port.
Definition: gpu_tlb.cc:1393
int size()
Definition: pagetable.hh:146
virtual void unserialize(CheckpointIn &cp)
Unserialize an object.
Definition: gpu_tlb.cc:950
virtual const std::string name() const
Definition: sim_object.hh:117
Declarations of a non-full system Page Table.
static MiscRegIndex MISCREG_SEG_BASE(int index)
Definition: misc.hh:513
uint64_t MiscReg
Definition: registers.hh:94
std::ostream CheckpointOut
Definition: serialize.hh:67
EndBitUnion(PageTableEntry) struct TlbEntry Addr vaddr
Definition: pagetable.hh:96
int missLatency2
Definition: gpu_tlb.hh:201
SenderState * senderState
This packet's sender state.
Definition: packet.hh:454
Definition: eventq.hh:185
void cleanup()
Definition: gpu_tlb.cc:1642
The MemObject class extends the ClockedObject with accessor functions to get its master and slave por...
Definition: mem_object.hh:60
Addr getVaddr() const
Definition: request.hh:616
A BaseMasterPort is a protocol-agnostic master port, responsible only for the structural connection t...
Definition: port.hh:115
virtual int contextId() const =0
GpuTlbEntry * insert(Addr vpn, GpuTlbEntry &entry)
Definition: gpu_tlb.cc:168
int missLatency1
Definition: gpu_tlb.hh:200
virtual bool lookup(Addr vaddr, TheISA::TlbEntry &entry)=0
Lookup function.
void schedule(Event &event, Tick when)
Definition: eventq.hh:728
Stats::Scalar pageTableCycles
Definition: gpu_tlb.hh:221
Stats::Scalar localNumTLBHits
Definition: gpu_tlb.hh:206
const Addr PhysAddrPrefixIO
Definition: x86_traits.hh:72
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:287
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition: types.hh:181
Bitfield< 0 > p
Definition: pagetable.hh:95
GpuTLB(const Params *p)
Definition: gpu_tlb.cc:62
T mbits(T val, int first, int last)
Mask off the given bits in place like bits() but without shifting.
Definition: bitfield.hh:91
T bits(T val, int first, int last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it...
Definition: bitfield.hh:67
This request is to a memory mapped register.
Definition: request.hh:126
unsigned getSize() const
Definition: request.hh:552
bool FA
true if this is a fully-associative TLB
Definition: gpu_tlb.hh:153
static Addr x86LocalAPICAddress(const uint8_t id, const uint16_t addr)
Definition: x86_traits.hh:93
void setPaddr(Addr paddr)
Set just the physical address.
Definition: request.hh:487
Stats::Scalar localNumTLBAccesses
Definition: gpu_tlb.hh:205
Bitfield< 1 > x
Definition: types.hh:105
virtual void recvReqRetry()
Definition: gpu_tlb.cc:1593
void setFlags(Flags flags)
Note that unlike other accessors, this function sets specific flags (ORs them in); it does not assign...
Definition: request.hh:595
BaseSlavePort & getSlavePort(const std::string &if_name, PortID idx=InvalidPortID)
Get a slave port with a given name and index.
Definition: gpu_tlb.cc:138
Stats::Scalar globalNumTLBHits
Definition: gpu_tlb.hh:214
std::shared_ptr< FaultBase > Fault
Definition: types.hh:184
Bitfield< 3 > addr
Definition: types.hh:81
void regStats() override
Register statistics for this object.
std::vector< int > reqCnt
Definition: gpu_tlb.hh:365
void issueTLBLookup(PacketPtr pkt)
Do the TLB lookup for this coalesced request and schedule another event <TLB access="" latency>=""> c...
Definition: gpu_tlb.cc:1043
bool hasPaddr() const
Accessor for paddr.
Definition: request.hh:513
void cprintf(const char *format, const Args &...args)
Definition: cprintf.hh:155
void updatePageFootprint(Addr virt_page_addr)
Definition: gpu_tlb.cc:1666

Generated on Fri Jun 9 2017 13:03:47 for gem5 by doxygen 1.8.6